1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/f2fs/segment.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
7 */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/prefetch.h>
13 #include <linux/kthread.h>
14 #include <linux/swap.h>
15 #include <linux/timer.h>
16 #include <linux/freezer.h>
17 #include <linux/sched/signal.h>
18
19 #include "f2fs.h"
20 #include "segment.h"
21 #include "node.h"
22 #include "gc.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25
26 #define __reverse_ffz(x) __reverse_ffs(~(x))
27
28 static struct kmem_cache *discard_entry_slab;
29 static struct kmem_cache *discard_cmd_slab;
30 static struct kmem_cache *sit_entry_set_slab;
31 static struct kmem_cache *inmem_entry_slab;
32
33 static struct discard_policy dpolicys[MAX_DPOLICY] = {
34 {DPOLICY_BG, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME,
35 MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG,
36 {{1, 0}, {0, 0}, {0, 0}}},
37 {DPOLICY_BALANCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME,
38 MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_BL,
39 {{1, 0}, {2, 50}, {0, 0}}},
40 {DPOLICY_FORCE, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME,
41 MAX_PLIST_NUM - 1, true, true, false, false, DISCARD_GRAN_FORCE,
42 {{1, 0}, {2, 50}, {4, 2000}}},
43 {DPOLICY_FSTRIM, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME,
44 MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_FORCE,
45 {{8, 0}, {8, 0}, {8, 0}}},
46 {DPOLICY_UMOUNT, 0, DEF_MID_DISCARD_ISSUE_TIME, DEF_MAX_DISCARD_ISSUE_TIME,
47 MAX_PLIST_NUM, false, true, false, false, DISCARD_GRAN_BG,
48 {{UINT_MAX, 0}, {0, 0}, {0, 0}}}
49 };
50
__reverse_ulong(unsigned char * str)51 static unsigned long __reverse_ulong(unsigned char *str)
52 {
53 unsigned long tmp = 0;
54 int shift = 24, idx = 0;
55
56 #if BITS_PER_LONG == 64
57 shift = 56;
58 #endif
59 while (shift >= 0) {
60 tmp |= (unsigned long)str[idx++] << shift;
61 shift -= BITS_PER_BYTE;
62 }
63 return tmp;
64 }
65
66 /*
67 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
68 * MSB and LSB are reversed in a byte by f2fs_set_bit.
69 */
__reverse_ffs(unsigned long word)70 static inline unsigned long __reverse_ffs(unsigned long word)
71 {
72 int num = 0;
73
74 #if BITS_PER_LONG == 64
75 if ((word & 0xffffffff00000000UL) == 0)
76 num += 32;
77 else
78 word >>= 32;
79 #endif
80 if ((word & 0xffff0000) == 0)
81 num += 16;
82 else
83 word >>= 16;
84
85 if ((word & 0xff00) == 0)
86 num += 8;
87 else
88 word >>= 8;
89
90 if ((word & 0xf0) == 0)
91 num += 4;
92 else
93 word >>= 4;
94
95 if ((word & 0xc) == 0)
96 num += 2;
97 else
98 word >>= 2;
99
100 if ((word & 0x2) == 0)
101 num += 1;
102 return num;
103 }
104
105 /*
106 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
107 * f2fs_set_bit makes MSB and LSB reversed in a byte.
108 * @size must be integral times of unsigned long.
109 * Example:
110 * MSB <--> LSB
111 * f2fs_set_bit(0, bitmap) => 1000 0000
112 * f2fs_set_bit(7, bitmap) => 0000 0001
113 */
find_rev_next_bit(const unsigned long * addr,unsigned long size,unsigned long offset)114 unsigned long find_rev_next_bit(const unsigned long *addr,
115 unsigned long size, unsigned long offset)
116 {
117 const unsigned long *p = addr + BIT_WORD(offset);
118 unsigned long result = size;
119 unsigned long tmp;
120
121 if (offset >= size)
122 return size;
123
124 size -= (offset & ~(BITS_PER_LONG - 1));
125 offset %= BITS_PER_LONG;
126
127 while (1) {
128 if (*p == 0)
129 goto pass;
130
131 tmp = __reverse_ulong((unsigned char *)p);
132
133 tmp &= ~0UL >> offset;
134 if (size < BITS_PER_LONG)
135 tmp &= (~0UL << (BITS_PER_LONG - size));
136 if (tmp)
137 goto found;
138 pass:
139 if (size <= BITS_PER_LONG)
140 break;
141 size -= BITS_PER_LONG;
142 offset = 0;
143 p++;
144 }
145 return result;
146 found:
147 return result - size + __reverse_ffs(tmp);
148 }
149
find_rev_next_zero_bit(const unsigned long * addr,unsigned long size,unsigned long offset)150 unsigned long find_rev_next_zero_bit(const unsigned long *addr,
151 unsigned long size, unsigned long offset)
152 {
153 const unsigned long *p = addr + BIT_WORD(offset);
154 unsigned long result = size;
155 unsigned long tmp;
156
157 if (offset >= size)
158 return size;
159
160 size -= (offset & ~(BITS_PER_LONG - 1));
161 offset %= BITS_PER_LONG;
162
163 while (1) {
164 if (*p == ~0UL)
165 goto pass;
166
167 tmp = __reverse_ulong((unsigned char *)p);
168
169 if (offset)
170 tmp |= ~0UL << (BITS_PER_LONG - offset);
171 if (size < BITS_PER_LONG)
172 tmp |= ~0UL >> size;
173 if (tmp != ~0UL)
174 goto found;
175 pass:
176 if (size <= BITS_PER_LONG)
177 break;
178 size -= BITS_PER_LONG;
179 offset = 0;
180 p++;
181 }
182 return result;
183 found:
184 return result - size + __reverse_ffz(tmp);
185 }
186
f2fs_need_SSR(struct f2fs_sb_info * sbi)187 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
188 {
189 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
190 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
191 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
192
193 if (f2fs_lfs_mode(sbi))
194 return false;
195 if (sbi->gc_mode == GC_URGENT_HIGH)
196 return true;
197 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
198 return true;
199
200 return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
201 SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
202 }
203
204 #ifdef CONFIG_F2FS_GRADING_SSR
need_ssr_by_type(struct f2fs_sb_info * sbi,int type,int contig_level)205 static bool need_ssr_by_type(struct f2fs_sb_info *sbi, int type, int contig_level)
206 {
207 int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
208 int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
209 int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
210 u64 valid_blocks = sbi->total_valid_block_count;
211 u64 total_blocks = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
212 u64 left_space = (total_blocks - valid_blocks) << 2;
213 unsigned int free_segs = free_segments(sbi);
214 unsigned int ovp_segments = overprovision_segments(sbi);
215 unsigned int lower_limit = 0;
216 unsigned int waterline = 0;
217 int dirty_sum = node_secs + 2 * dent_secs + imeta_secs;
218
219 if (sbi->hot_cold_params.enable == GRADING_SSR_OFF)
220 return f2fs_need_SSR(sbi);
221 if (f2fs_lfs_mode(sbi))
222 return false;
223 if (sbi->gc_mode == GC_URGENT_HIGH)
224 return true;
225 if (contig_level == SEQ_256BLKS && type == CURSEG_WARM_DATA &&
226 free_sections(sbi) > dirty_sum + 3 * reserved_sections(sbi) / 2)
227 return false;
228 if (free_sections(sbi) <= (unsigned int)(dirty_sum + 2 * reserved_sections(sbi)))
229 return true;
230 if (contig_level >= SEQ_32BLKS || total_blocks <= SSR_MIN_BLKS_LIMIT)
231 return false;
232
233 left_space -= ovp_segments * KBS_PER_SEGMENT;
234 if (unlikely(left_space == 0))
235 return false;
236
237 switch (type) {
238 case CURSEG_HOT_DATA:
239 lower_limit = sbi->hot_cold_params.hot_data_lower_limit;
240 waterline = sbi->hot_cold_params.hot_data_waterline;
241 break;
242 case CURSEG_WARM_DATA:
243 lower_limit = sbi->hot_cold_params.warm_data_lower_limit;
244 waterline = sbi->hot_cold_params.warm_data_waterline;
245 break;
246 case CURSEG_HOT_NODE:
247 lower_limit = sbi->hot_cold_params.hot_node_lower_limit;
248 waterline = sbi->hot_cold_params.hot_node_waterline;
249 break;
250 case CURSEG_WARM_NODE:
251 lower_limit = sbi->hot_cold_params.warm_node_lower_limit;
252 waterline = sbi->hot_cold_params.warm_node_waterline;
253 break;
254 default:
255 return false;
256 }
257
258 if (left_space > lower_limit)
259 return false;
260
261 if (div_u64((free_segs - ovp_segments) * 100, (left_space / KBS_PER_SEGMENT))
262 <= waterline) {
263 trace_f2fs_grading_ssr_allocate(
264 (le64_to_cpu(sbi->raw_super->block_count) - sbi->total_valid_block_count),
265 free_segments(sbi), contig_level);
266 return true;
267 } else {
268 return false;
269 }
270 }
271 #endif
272
f2fs_register_inmem_page(struct inode * inode,struct page * page)273 void f2fs_register_inmem_page(struct inode *inode, struct page *page)
274 {
275 struct inmem_pages *new;
276
277 f2fs_trace_pid(page);
278
279 f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE);
280
281 new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
282
283 /* add atomic page indices to the list */
284 new->page = page;
285 INIT_LIST_HEAD(&new->list);
286
287 /* increase reference count with clean state */
288 get_page(page);
289 mutex_lock(&F2FS_I(inode)->inmem_lock);
290 list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages);
291 inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
292 mutex_unlock(&F2FS_I(inode)->inmem_lock);
293
294 trace_f2fs_register_inmem_page(page, INMEM);
295 }
296
__revoke_inmem_pages(struct inode * inode,struct list_head * head,bool drop,bool recover,bool trylock)297 static int __revoke_inmem_pages(struct inode *inode,
298 struct list_head *head, bool drop, bool recover,
299 bool trylock)
300 {
301 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
302 struct inmem_pages *cur, *tmp;
303 int err = 0;
304
305 list_for_each_entry_safe(cur, tmp, head, list) {
306 struct page *page = cur->page;
307
308 if (drop)
309 trace_f2fs_commit_inmem_page(page, INMEM_DROP);
310
311 if (trylock) {
312 /*
313 * to avoid deadlock in between page lock and
314 * inmem_lock.
315 */
316 if (!trylock_page(page))
317 continue;
318 } else {
319 lock_page(page);
320 }
321
322 f2fs_wait_on_page_writeback(page, DATA, true, true);
323
324 if (recover) {
325 struct dnode_of_data dn;
326 struct node_info ni;
327
328 trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
329 retry:
330 set_new_dnode(&dn, inode, NULL, NULL, 0);
331 err = f2fs_get_dnode_of_data(&dn, page->index,
332 LOOKUP_NODE);
333 if (err) {
334 if (err == -ENOMEM) {
335 congestion_wait(BLK_RW_ASYNC,
336 DEFAULT_IO_TIMEOUT);
337 cond_resched();
338 goto retry;
339 }
340 err = -EAGAIN;
341 goto next;
342 }
343
344 err = f2fs_get_node_info(sbi, dn.nid, &ni);
345 if (err) {
346 f2fs_put_dnode(&dn);
347 return err;
348 }
349
350 if (cur->old_addr == NEW_ADDR) {
351 f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
352 f2fs_update_data_blkaddr(&dn, NEW_ADDR);
353 } else
354 f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
355 cur->old_addr, ni.version, true, true);
356 f2fs_put_dnode(&dn);
357 }
358 next:
359 /* we don't need to invalidate this in the sccessful status */
360 if (drop || recover) {
361 ClearPageUptodate(page);
362 clear_cold_data(page);
363 }
364 f2fs_clear_page_private(page);
365 f2fs_put_page(page, 1);
366
367 list_del(&cur->list);
368 kmem_cache_free(inmem_entry_slab, cur);
369 dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
370 }
371 return err;
372 }
373
f2fs_drop_inmem_pages_all(struct f2fs_sb_info * sbi,bool gc_failure)374 void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
375 {
376 struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
377 struct inode *inode;
378 struct f2fs_inode_info *fi;
379 unsigned int count = sbi->atomic_files;
380 unsigned int looped = 0;
381 next:
382 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
383 if (list_empty(head)) {
384 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
385 return;
386 }
387 fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
388 inode = igrab(&fi->vfs_inode);
389 if (inode)
390 list_move_tail(&fi->inmem_ilist, head);
391 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
392
393 if (inode) {
394 if (gc_failure) {
395 if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
396 goto skip;
397 }
398 set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
399 f2fs_drop_inmem_pages(inode);
400 skip:
401 iput(inode);
402 }
403 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
404 cond_resched();
405 if (gc_failure) {
406 if (++looped >= count)
407 return;
408 }
409 goto next;
410 }
411
f2fs_drop_inmem_pages(struct inode * inode)412 void f2fs_drop_inmem_pages(struct inode *inode)
413 {
414 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
415 struct f2fs_inode_info *fi = F2FS_I(inode);
416
417 do {
418 mutex_lock(&fi->inmem_lock);
419 if (list_empty(&fi->inmem_pages)) {
420 fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
421
422 spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
423 if (!list_empty(&fi->inmem_ilist))
424 list_del_init(&fi->inmem_ilist);
425 if (f2fs_is_atomic_file(inode)) {
426 clear_inode_flag(inode, FI_ATOMIC_FILE);
427 sbi->atomic_files--;
428 }
429 spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
430
431 mutex_unlock(&fi->inmem_lock);
432 break;
433 }
434 __revoke_inmem_pages(inode, &fi->inmem_pages,
435 true, false, true);
436 mutex_unlock(&fi->inmem_lock);
437 } while (1);
438 }
439
f2fs_drop_inmem_page(struct inode * inode,struct page * page)440 void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
441 {
442 struct f2fs_inode_info *fi = F2FS_I(inode);
443 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
444 struct list_head *head = &fi->inmem_pages;
445 struct inmem_pages *cur = NULL;
446
447 f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
448
449 mutex_lock(&fi->inmem_lock);
450 list_for_each_entry(cur, head, list) {
451 if (cur->page == page)
452 break;
453 }
454
455 f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
456 list_del(&cur->list);
457 mutex_unlock(&fi->inmem_lock);
458
459 dec_page_count(sbi, F2FS_INMEM_PAGES);
460 kmem_cache_free(inmem_entry_slab, cur);
461
462 ClearPageUptodate(page);
463 f2fs_clear_page_private(page);
464 f2fs_put_page(page, 0);
465
466 trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
467 }
468
__f2fs_commit_inmem_pages(struct inode * inode)469 static int __f2fs_commit_inmem_pages(struct inode *inode)
470 {
471 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
472 struct f2fs_inode_info *fi = F2FS_I(inode);
473 struct inmem_pages *cur, *tmp;
474 struct f2fs_io_info fio = {
475 .sbi = sbi,
476 .ino = inode->i_ino,
477 .type = DATA,
478 .op = REQ_OP_WRITE,
479 .op_flags = REQ_SYNC | REQ_PRIO,
480 .io_type = FS_DATA_IO,
481 };
482 struct list_head revoke_list;
483 bool submit_bio = false;
484 int err = 0;
485
486 INIT_LIST_HEAD(&revoke_list);
487
488 list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
489 struct page *page = cur->page;
490
491 lock_page(page);
492 if (page->mapping == inode->i_mapping) {
493 trace_f2fs_commit_inmem_page(page, INMEM);
494
495 f2fs_wait_on_page_writeback(page, DATA, true, true);
496
497 set_page_dirty(page);
498 if (clear_page_dirty_for_io(page)) {
499 inode_dec_dirty_pages(inode);
500 f2fs_remove_dirty_inode(inode);
501 }
502 retry:
503 fio.page = page;
504 fio.old_blkaddr = NULL_ADDR;
505 fio.encrypted_page = NULL;
506 fio.need_lock = LOCK_DONE;
507 err = f2fs_do_write_data_page(&fio);
508 if (err) {
509 if (err == -ENOMEM) {
510 congestion_wait(BLK_RW_ASYNC,
511 DEFAULT_IO_TIMEOUT);
512 cond_resched();
513 goto retry;
514 }
515 unlock_page(page);
516 break;
517 }
518 /* record old blkaddr for revoking */
519 cur->old_addr = fio.old_blkaddr;
520 submit_bio = true;
521 }
522 unlock_page(page);
523 list_move_tail(&cur->list, &revoke_list);
524 }
525
526 if (submit_bio)
527 f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
528
529 if (err) {
530 /*
531 * try to revoke all committed pages, but still we could fail
532 * due to no memory or other reason, if that happened, EAGAIN
533 * will be returned, which means in such case, transaction is
534 * already not integrity, caller should use journal to do the
535 * recovery or rewrite & commit last transaction. For other
536 * error number, revoking was done by filesystem itself.
537 */
538 err = __revoke_inmem_pages(inode, &revoke_list,
539 false, true, false);
540
541 /* drop all uncommitted pages */
542 __revoke_inmem_pages(inode, &fi->inmem_pages,
543 true, false, false);
544 } else {
545 __revoke_inmem_pages(inode, &revoke_list,
546 false, false, false);
547 }
548
549 return err;
550 }
551
f2fs_commit_inmem_pages(struct inode * inode)552 int f2fs_commit_inmem_pages(struct inode *inode)
553 {
554 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
555 struct f2fs_inode_info *fi = F2FS_I(inode);
556 int err;
557
558 f2fs_balance_fs(sbi, true);
559
560 down_write(&fi->i_gc_rwsem[WRITE]);
561
562 f2fs_lock_op(sbi);
563 set_inode_flag(inode, FI_ATOMIC_COMMIT);
564
565 mutex_lock(&fi->inmem_lock);
566 err = __f2fs_commit_inmem_pages(inode);
567 mutex_unlock(&fi->inmem_lock);
568
569 clear_inode_flag(inode, FI_ATOMIC_COMMIT);
570
571 f2fs_unlock_op(sbi);
572 up_write(&fi->i_gc_rwsem[WRITE]);
573
574 return err;
575 }
576
577 /*
578 * This function balances dirty node and dentry pages.
579 * In addition, it controls garbage collection.
580 */
f2fs_balance_fs(struct f2fs_sb_info * sbi,bool need)581 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
582 {
583 if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
584 f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
585 f2fs_stop_checkpoint(sbi, false);
586 }
587
588 /* balance_fs_bg is able to be pending */
589 if (need && excess_cached_nats(sbi))
590 f2fs_balance_fs_bg(sbi, false);
591
592 if (!f2fs_is_checkpoint_ready(sbi))
593 return;
594
595 /*
596 * We should do GC or end up with checkpoint, if there are so many dirty
597 * dir/node pages without enough free segments.
598 */
599 if (has_not_enough_free_secs(sbi, 0, 0)) {
600 if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
601 sbi->gc_thread->f2fs_gc_task) {
602 DEFINE_WAIT(wait);
603
604 prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
605 TASK_UNINTERRUPTIBLE);
606 wake_up(&sbi->gc_thread->gc_wait_queue_head);
607 io_schedule();
608 finish_wait(&sbi->gc_thread->fggc_wq, &wait);
609 } else {
610 down_write(&sbi->gc_lock);
611 f2fs_gc(sbi, false, false, false, NULL_SEGNO);
612 }
613 }
614 }
615
f2fs_balance_fs_bg(struct f2fs_sb_info * sbi,bool from_bg)616 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
617 {
618 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
619 return;
620
621 /* try to shrink extent cache when there is no enough memory */
622 if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
623 f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
624
625 /* check the # of cached NAT entries */
626 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
627 f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
628
629 if (!f2fs_available_free_memory(sbi, FREE_NIDS))
630 f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
631 else
632 f2fs_build_free_nids(sbi, false, false);
633
634 if (!is_idle(sbi, REQ_TIME) &&
635 (!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
636 return;
637
638 /* checkpoint is the only way to shrink partial cached entries */
639 if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
640 !f2fs_available_free_memory(sbi, INO_ENTRIES) ||
641 excess_prefree_segs(sbi) ||
642 excess_dirty_nats(sbi) ||
643 excess_dirty_nodes(sbi) ||
644 f2fs_time_over(sbi, CP_TIME)) {
645 if (test_opt(sbi, DATA_FLUSH) && from_bg) {
646 struct blk_plug plug;
647
648 mutex_lock(&sbi->flush_lock);
649
650 blk_start_plug(&plug);
651 f2fs_sync_dirty_inodes(sbi, FILE_INODE);
652 blk_finish_plug(&plug);
653
654 mutex_unlock(&sbi->flush_lock);
655 }
656 f2fs_sync_fs(sbi->sb, true);
657 stat_inc_bg_cp_count(sbi->stat_info);
658 }
659 }
660
__submit_flush_wait(struct f2fs_sb_info * sbi,struct block_device * bdev)661 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
662 struct block_device *bdev)
663 {
664 struct bio *bio;
665 int ret;
666
667 bio = f2fs_bio_alloc(sbi, 0, false);
668 if (!bio)
669 return -ENOMEM;
670
671 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
672 bio_set_dev(bio, bdev);
673 ret = submit_bio_wait(bio);
674 bio_put(bio);
675
676 trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
677 test_opt(sbi, FLUSH_MERGE), ret);
678 return ret;
679 }
680
submit_flush_wait(struct f2fs_sb_info * sbi,nid_t ino)681 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
682 {
683 int ret = 0;
684 int i;
685
686 if (!f2fs_is_multi_device(sbi))
687 return __submit_flush_wait(sbi, sbi->sb->s_bdev);
688
689 for (i = 0; i < sbi->s_ndevs; i++) {
690 if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
691 continue;
692 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
693 if (ret)
694 break;
695 }
696 return ret;
697 }
698
issue_flush_thread(void * data)699 static int issue_flush_thread(void *data)
700 {
701 struct f2fs_sb_info *sbi = data;
702 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
703 wait_queue_head_t *q = &fcc->flush_wait_queue;
704 repeat:
705 if (kthread_should_stop())
706 return 0;
707
708 sb_start_intwrite(sbi->sb);
709
710 if (!llist_empty(&fcc->issue_list)) {
711 struct flush_cmd *cmd, *next;
712 int ret;
713
714 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
715 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
716
717 cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
718
719 ret = submit_flush_wait(sbi, cmd->ino);
720 atomic_inc(&fcc->issued_flush);
721
722 llist_for_each_entry_safe(cmd, next,
723 fcc->dispatch_list, llnode) {
724 cmd->ret = ret;
725 complete(&cmd->wait);
726 }
727 fcc->dispatch_list = NULL;
728 }
729
730 sb_end_intwrite(sbi->sb);
731
732 wait_event_interruptible(*q,
733 kthread_should_stop() || !llist_empty(&fcc->issue_list));
734 goto repeat;
735 }
736
f2fs_issue_flush(struct f2fs_sb_info * sbi,nid_t ino)737 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
738 {
739 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
740 struct flush_cmd cmd;
741 int ret;
742
743 if (test_opt(sbi, NOBARRIER))
744 return 0;
745
746 if (!test_opt(sbi, FLUSH_MERGE)) {
747 atomic_inc(&fcc->queued_flush);
748 ret = submit_flush_wait(sbi, ino);
749 atomic_dec(&fcc->queued_flush);
750 atomic_inc(&fcc->issued_flush);
751 return ret;
752 }
753
754 if (atomic_inc_return(&fcc->queued_flush) == 1 ||
755 f2fs_is_multi_device(sbi)) {
756 ret = submit_flush_wait(sbi, ino);
757 atomic_dec(&fcc->queued_flush);
758
759 atomic_inc(&fcc->issued_flush);
760 return ret;
761 }
762
763 cmd.ino = ino;
764 init_completion(&cmd.wait);
765
766 llist_add(&cmd.llnode, &fcc->issue_list);
767
768 /* update issue_list before we wake up issue_flush thread */
769 smp_mb();
770
771 if (waitqueue_active(&fcc->flush_wait_queue))
772 wake_up(&fcc->flush_wait_queue);
773
774 if (fcc->f2fs_issue_flush) {
775 wait_for_completion(&cmd.wait);
776 atomic_dec(&fcc->queued_flush);
777 } else {
778 struct llist_node *list;
779
780 list = llist_del_all(&fcc->issue_list);
781 if (!list) {
782 wait_for_completion(&cmd.wait);
783 atomic_dec(&fcc->queued_flush);
784 } else {
785 struct flush_cmd *tmp, *next;
786
787 ret = submit_flush_wait(sbi, ino);
788
789 llist_for_each_entry_safe(tmp, next, list, llnode) {
790 if (tmp == &cmd) {
791 cmd.ret = ret;
792 atomic_dec(&fcc->queued_flush);
793 continue;
794 }
795 tmp->ret = ret;
796 complete(&tmp->wait);
797 }
798 }
799 }
800
801 return cmd.ret;
802 }
803
f2fs_create_flush_cmd_control(struct f2fs_sb_info * sbi)804 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
805 {
806 dev_t dev = sbi->sb->s_bdev->bd_dev;
807 struct flush_cmd_control *fcc;
808 int err = 0;
809
810 if (SM_I(sbi)->fcc_info) {
811 fcc = SM_I(sbi)->fcc_info;
812 if (fcc->f2fs_issue_flush)
813 return err;
814 goto init_thread;
815 }
816
817 fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
818 if (!fcc)
819 return -ENOMEM;
820 atomic_set(&fcc->issued_flush, 0);
821 atomic_set(&fcc->queued_flush, 0);
822 init_waitqueue_head(&fcc->flush_wait_queue);
823 init_llist_head(&fcc->issue_list);
824 SM_I(sbi)->fcc_info = fcc;
825 if (!test_opt(sbi, FLUSH_MERGE))
826 return err;
827
828 init_thread:
829 fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
830 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
831 if (IS_ERR(fcc->f2fs_issue_flush)) {
832 err = PTR_ERR(fcc->f2fs_issue_flush);
833 kfree(fcc);
834 SM_I(sbi)->fcc_info = NULL;
835 return err;
836 }
837
838 return err;
839 }
840
f2fs_destroy_flush_cmd_control(struct f2fs_sb_info * sbi,bool free)841 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
842 {
843 struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
844
845 if (fcc && fcc->f2fs_issue_flush) {
846 struct task_struct *flush_thread = fcc->f2fs_issue_flush;
847
848 fcc->f2fs_issue_flush = NULL;
849 kthread_stop(flush_thread);
850 }
851 if (free) {
852 kfree(fcc);
853 SM_I(sbi)->fcc_info = NULL;
854 }
855 }
856
f2fs_flush_device_cache(struct f2fs_sb_info * sbi)857 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
858 {
859 int ret = 0, i;
860
861 if (!f2fs_is_multi_device(sbi))
862 return 0;
863
864 if (test_opt(sbi, NOBARRIER))
865 return 0;
866
867 for (i = 1; i < sbi->s_ndevs; i++) {
868 if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
869 continue;
870 ret = __submit_flush_wait(sbi, FDEV(i).bdev);
871 if (ret)
872 break;
873
874 spin_lock(&sbi->dev_lock);
875 f2fs_clear_bit(i, (char *)&sbi->dirty_device);
876 spin_unlock(&sbi->dev_lock);
877 }
878
879 return ret;
880 }
881
__locate_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno,enum dirty_type dirty_type)882 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
883 enum dirty_type dirty_type)
884 {
885 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
886
887 /* need not be added */
888 if (IS_CURSEG(sbi, segno))
889 return;
890
891 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
892 dirty_i->nr_dirty[dirty_type]++;
893
894 if (dirty_type == DIRTY) {
895 struct seg_entry *sentry = get_seg_entry(sbi, segno);
896 enum dirty_type t = sentry->type;
897
898 if (unlikely(t >= DIRTY)) {
899 f2fs_bug_on(sbi, 1);
900 return;
901 }
902 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
903 dirty_i->nr_dirty[t]++;
904
905 if (__is_large_section(sbi)) {
906 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
907 block_t valid_blocks =
908 get_valid_blocks(sbi, segno, true);
909
910 f2fs_bug_on(sbi, unlikely(!valid_blocks ||
911 valid_blocks == BLKS_PER_SEC(sbi)));
912
913 if (!IS_CURSEC(sbi, secno))
914 set_bit(secno, dirty_i->dirty_secmap);
915 }
916 }
917 }
918
__remove_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno,enum dirty_type dirty_type)919 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
920 enum dirty_type dirty_type)
921 {
922 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
923 block_t valid_blocks;
924
925 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
926 dirty_i->nr_dirty[dirty_type]--;
927
928 if (dirty_type == DIRTY) {
929 struct seg_entry *sentry = get_seg_entry(sbi, segno);
930 enum dirty_type t = sentry->type;
931
932 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
933 dirty_i->nr_dirty[t]--;
934
935 valid_blocks = get_valid_blocks(sbi, segno, true);
936 if (valid_blocks == 0) {
937 clear_bit(GET_SEC_FROM_SEG(sbi, segno),
938 dirty_i->victim_secmap);
939 #ifdef CONFIG_F2FS_CHECK_FS
940 clear_bit(segno, SIT_I(sbi)->invalid_segmap);
941 #endif
942 }
943 if (__is_large_section(sbi)) {
944 unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
945
946 if (!valid_blocks ||
947 valid_blocks == BLKS_PER_SEC(sbi)) {
948 clear_bit(secno, dirty_i->dirty_secmap);
949 return;
950 }
951
952 if (!IS_CURSEC(sbi, secno))
953 set_bit(secno, dirty_i->dirty_secmap);
954 }
955 }
956 }
957
958 /*
959 * Should not occur error such as -ENOMEM.
960 * Adding dirty entry into seglist is not critical operation.
961 * If a given segment is one of current working segments, it won't be added.
962 */
locate_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno)963 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
964 {
965 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
966 unsigned short valid_blocks, ckpt_valid_blocks;
967 unsigned int usable_blocks;
968
969 if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
970 return;
971
972 usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
973 mutex_lock(&dirty_i->seglist_lock);
974
975 valid_blocks = get_valid_blocks(sbi, segno, false);
976 ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
977
978 if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
979 ckpt_valid_blocks == usable_blocks)) {
980 __locate_dirty_segment(sbi, segno, PRE);
981 __remove_dirty_segment(sbi, segno, DIRTY);
982 } else if (valid_blocks < usable_blocks) {
983 __locate_dirty_segment(sbi, segno, DIRTY);
984 } else {
985 /* Recovery routine with SSR needs this */
986 __remove_dirty_segment(sbi, segno, DIRTY);
987 }
988
989 mutex_unlock(&dirty_i->seglist_lock);
990 }
991
992 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
f2fs_dirty_to_prefree(struct f2fs_sb_info * sbi)993 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
994 {
995 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
996 unsigned int segno;
997
998 mutex_lock(&dirty_i->seglist_lock);
999 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
1000 if (get_valid_blocks(sbi, segno, false))
1001 continue;
1002 if (IS_CURSEG(sbi, segno))
1003 continue;
1004 __locate_dirty_segment(sbi, segno, PRE);
1005 __remove_dirty_segment(sbi, segno, DIRTY);
1006 }
1007 mutex_unlock(&dirty_i->seglist_lock);
1008 }
1009
f2fs_get_unusable_blocks(struct f2fs_sb_info * sbi)1010 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
1011 {
1012 int ovp_hole_segs =
1013 (overprovision_segments(sbi) - reserved_segments(sbi));
1014 block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
1015 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1016 block_t holes[2] = {0, 0}; /* DATA and NODE */
1017 block_t unusable;
1018 struct seg_entry *se;
1019 unsigned int segno;
1020
1021 mutex_lock(&dirty_i->seglist_lock);
1022 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
1023 se = get_seg_entry(sbi, segno);
1024 if (IS_NODESEG(se->type))
1025 holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
1026 se->valid_blocks;
1027 else
1028 holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
1029 se->valid_blocks;
1030 }
1031 mutex_unlock(&dirty_i->seglist_lock);
1032
1033 unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
1034 if (unusable > ovp_holes)
1035 return unusable - ovp_holes;
1036 return 0;
1037 }
1038
f2fs_disable_cp_again(struct f2fs_sb_info * sbi,block_t unusable)1039 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
1040 {
1041 int ovp_hole_segs =
1042 (overprovision_segments(sbi) - reserved_segments(sbi));
1043 if (unusable > F2FS_OPTION(sbi).unusable_cap)
1044 return -EAGAIN;
1045 if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
1046 dirty_segments(sbi) > ovp_hole_segs)
1047 return -EAGAIN;
1048 return 0;
1049 }
1050
1051 /* This is only used by SBI_CP_DISABLED */
get_free_segment(struct f2fs_sb_info * sbi)1052 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
1053 {
1054 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1055 unsigned int segno = 0;
1056
1057 mutex_lock(&dirty_i->seglist_lock);
1058 for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
1059 if (get_valid_blocks(sbi, segno, false))
1060 continue;
1061 if (get_ckpt_valid_blocks(sbi, segno, false))
1062 continue;
1063 mutex_unlock(&dirty_i->seglist_lock);
1064 return segno;
1065 }
1066 mutex_unlock(&dirty_i->seglist_lock);
1067 return NULL_SEGNO;
1068 }
1069
__create_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len)1070 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
1071 struct block_device *bdev, block_t lstart,
1072 block_t start, block_t len)
1073 {
1074 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1075 struct list_head *pend_list;
1076 struct discard_cmd *dc;
1077
1078 f2fs_bug_on(sbi, !len);
1079
1080 pend_list = &dcc->pend_list[plist_idx(len)];
1081
1082 dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
1083 INIT_LIST_HEAD(&dc->list);
1084 dc->bdev = bdev;
1085 dc->lstart = lstart;
1086 dc->start = start;
1087 dc->len = len;
1088 dc->ref = 0;
1089 dc->state = D_PREP;
1090 dc->queued = 0;
1091 dc->error = 0;
1092 init_completion(&dc->wait);
1093 list_add_tail(&dc->list, pend_list);
1094 spin_lock_init(&dc->lock);
1095 dc->bio_ref = 0;
1096 atomic_inc(&dcc->discard_cmd_cnt);
1097 dcc->undiscard_blks += len;
1098
1099 return dc;
1100 }
1101
__attach_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len,struct rb_node * parent,struct rb_node ** p,bool leftmost)1102 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
1103 struct block_device *bdev, block_t lstart,
1104 block_t start, block_t len,
1105 struct rb_node *parent, struct rb_node **p,
1106 bool leftmost)
1107 {
1108 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1109 struct discard_cmd *dc;
1110
1111 dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
1112
1113 rb_link_node(&dc->rb_node, parent, p);
1114 rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
1115
1116 return dc;
1117 }
1118
__detach_discard_cmd(struct discard_cmd_control * dcc,struct discard_cmd * dc)1119 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
1120 struct discard_cmd *dc)
1121 {
1122 if (dc->state == D_DONE)
1123 atomic_sub(dc->queued, &dcc->queued_discard);
1124
1125 list_del(&dc->list);
1126 rb_erase_cached(&dc->rb_node, &dcc->root);
1127 dcc->undiscard_blks -= dc->len;
1128
1129 kmem_cache_free(discard_cmd_slab, dc);
1130
1131 atomic_dec(&dcc->discard_cmd_cnt);
1132 }
1133
__remove_discard_cmd(struct f2fs_sb_info * sbi,struct discard_cmd * dc)1134 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
1135 struct discard_cmd *dc)
1136 {
1137 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1138 unsigned long flags;
1139
1140 trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
1141
1142 spin_lock_irqsave(&dc->lock, flags);
1143 if (dc->bio_ref) {
1144 spin_unlock_irqrestore(&dc->lock, flags);
1145 return;
1146 }
1147 spin_unlock_irqrestore(&dc->lock, flags);
1148
1149 f2fs_bug_on(sbi, dc->ref);
1150
1151 if (dc->error == -EOPNOTSUPP)
1152 dc->error = 0;
1153
1154 if (dc->error)
1155 printk_ratelimited(
1156 "%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
1157 KERN_INFO, sbi->sb->s_id,
1158 dc->lstart, dc->start, dc->len, dc->error);
1159 __detach_discard_cmd(dcc, dc);
1160 }
1161
f2fs_submit_discard_endio(struct bio * bio)1162 static void f2fs_submit_discard_endio(struct bio *bio)
1163 {
1164 struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1165 unsigned long flags;
1166
1167 spin_lock_irqsave(&dc->lock, flags);
1168 if (!dc->error)
1169 dc->error = blk_status_to_errno(bio->bi_status);
1170 dc->bio_ref--;
1171 if (!dc->bio_ref && dc->state == D_SUBMIT) {
1172 dc->state = D_DONE;
1173 complete_all(&dc->wait);
1174 }
1175 spin_unlock_irqrestore(&dc->lock, flags);
1176 bio_put(bio);
1177 }
1178
__check_sit_bitmap(struct f2fs_sb_info * sbi,block_t start,block_t end)1179 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1180 block_t start, block_t end)
1181 {
1182 #ifdef CONFIG_F2FS_CHECK_FS
1183 struct seg_entry *sentry;
1184 unsigned int segno;
1185 block_t blk = start;
1186 unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1187 unsigned long *map;
1188
1189 while (blk < end) {
1190 segno = GET_SEGNO(sbi, blk);
1191 sentry = get_seg_entry(sbi, segno);
1192 offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1193
1194 if (end < START_BLOCK(sbi, segno + 1))
1195 size = GET_BLKOFF_FROM_SEG0(sbi, end);
1196 else
1197 size = max_blocks;
1198 map = (unsigned long *)(sentry->cur_valid_map);
1199 offset = find_rev_next_bit(map, size, offset);
1200 f2fs_bug_on(sbi, offset != size);
1201 blk = START_BLOCK(sbi, segno + 1);
1202 }
1203 #endif
1204 }
1205
__init_discard_policy(struct f2fs_sb_info * sbi,struct discard_policy * policy,int discard_type,unsigned int granularity)1206 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1207 struct discard_policy *policy,
1208 int discard_type, unsigned int granularity)
1209 {
1210 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1211
1212 if (discard_type == DPOLICY_BG) {
1213 *policy = dpolicys[DPOLICY_BG];
1214 } else if (discard_type == DPOLICY_BALANCE) {
1215 *policy = dpolicys[DPOLICY_BALANCE];
1216 } else if (discard_type == DPOLICY_FORCE) {
1217 *policy = dpolicys[DPOLICY_FORCE];
1218 } else if (discard_type == DPOLICY_FSTRIM) {
1219 *policy = dpolicys[DPOLICY_FSTRIM];
1220 if (policy->granularity != granularity)
1221 policy->granularity = granularity;
1222 } else if (discard_type == DPOLICY_UMOUNT) {
1223 *policy = dpolicys[DPOLICY_UMOUNT];
1224 }
1225 dcc->discard_type = discard_type;
1226 }
1227
select_sub_discard_policy(struct discard_sub_policy ** spolicy,int index,struct discard_policy * dpolicy)1228 static void select_sub_discard_policy(struct discard_sub_policy **spolicy,
1229 int index, struct discard_policy *dpolicy)
1230 {
1231 if (dpolicy->type == DPOLICY_FSTRIM) {
1232 *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG];
1233 return;
1234 }
1235
1236 if ((index + 1) >= DISCARD_GRAN_BG)
1237 *spolicy = &dpolicy->sub_policy[SUB_POLICY_BIG];
1238 else if ((index + 1) >= DISCARD_GRAN_BL)
1239 *spolicy = &dpolicy->sub_policy[SUB_POLICY_MID];
1240 else
1241 *spolicy = &dpolicy->sub_policy[SUB_POLICY_SMALL];
1242 }
1243
1244 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1245 struct block_device *bdev, block_t lstart,
1246 block_t start, block_t len);
1247 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
__submit_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,int spolicy_index,struct discard_cmd * dc,unsigned int * issued)1248 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1249 struct discard_policy *dpolicy,
1250 int spolicy_index,
1251 struct discard_cmd *dc,
1252 unsigned int *issued)
1253 {
1254 struct block_device *bdev = dc->bdev;
1255 struct request_queue *q = bdev_get_queue(bdev);
1256 unsigned int max_discard_blocks =
1257 SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1258 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1259 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1260 &(dcc->fstrim_list) : &(dcc->wait_list);
1261 int flag = dpolicy->sync ? REQ_SYNC : 0;
1262 struct discard_sub_policy *spolicy = NULL;
1263 block_t lstart, start, len, total_len;
1264 int err = 0;
1265
1266 select_sub_discard_policy(&spolicy, spolicy_index, dpolicy);
1267
1268 if (dc->state != D_PREP)
1269 return 0;
1270
1271 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1272 return 0;
1273
1274 trace_f2fs_issue_discard(bdev, dc->start, dc->len);
1275
1276 lstart = dc->lstart;
1277 start = dc->start;
1278 len = dc->len;
1279 total_len = len;
1280
1281 dc->len = 0;
1282
1283 while (total_len && *issued < spolicy->max_requests && !err) {
1284 struct bio *bio = NULL;
1285 unsigned long flags;
1286 bool last = true;
1287
1288 if (len > max_discard_blocks) {
1289 len = max_discard_blocks;
1290 last = false;
1291 }
1292
1293 (*issued)++;
1294 if (*issued == spolicy->max_requests)
1295 last = true;
1296
1297 dc->len += len;
1298
1299 if (time_to_inject(sbi, FAULT_DISCARD)) {
1300 f2fs_show_injection_info(sbi, FAULT_DISCARD);
1301 err = -EIO;
1302 goto submit;
1303 }
1304 err = __blkdev_issue_discard(bdev,
1305 SECTOR_FROM_BLOCK(start),
1306 SECTOR_FROM_BLOCK(len),
1307 GFP_NOFS, 0, &bio);
1308 submit:
1309 if (err) {
1310 spin_lock_irqsave(&dc->lock, flags);
1311 if (dc->state == D_PARTIAL)
1312 dc->state = D_SUBMIT;
1313 spin_unlock_irqrestore(&dc->lock, flags);
1314
1315 break;
1316 }
1317
1318 f2fs_bug_on(sbi, !bio);
1319
1320 /*
1321 * should keep before submission to avoid D_DONE
1322 * right away
1323 */
1324 spin_lock_irqsave(&dc->lock, flags);
1325 if (last)
1326 dc->state = D_SUBMIT;
1327 else
1328 dc->state = D_PARTIAL;
1329 dc->bio_ref++;
1330 spin_unlock_irqrestore(&dc->lock, flags);
1331
1332 atomic_inc(&dcc->queued_discard);
1333 dc->queued++;
1334 list_move_tail(&dc->list, wait_list);
1335
1336 /* sanity check on discard range */
1337 __check_sit_bitmap(sbi, lstart, lstart + len);
1338
1339 bio->bi_private = dc;
1340 bio->bi_end_io = f2fs_submit_discard_endio;
1341 bio->bi_opf |= flag;
1342 submit_bio(bio);
1343
1344 atomic_inc(&dcc->issued_discard);
1345
1346 f2fs_update_iostat(sbi, FS_DISCARD, 1);
1347
1348 lstart += len;
1349 start += len;
1350 total_len -= len;
1351 len = total_len;
1352 }
1353
1354 if (!err && len) {
1355 dcc->undiscard_blks -= len;
1356 __update_discard_tree_range(sbi, bdev, lstart, start, len);
1357 }
1358 return err;
1359 }
1360
__insert_discard_tree(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len,struct rb_node ** insert_p,struct rb_node * insert_parent)1361 static void __insert_discard_tree(struct f2fs_sb_info *sbi,
1362 struct block_device *bdev, block_t lstart,
1363 block_t start, block_t len,
1364 struct rb_node **insert_p,
1365 struct rb_node *insert_parent)
1366 {
1367 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1368 struct rb_node **p;
1369 struct rb_node *parent = NULL;
1370 bool leftmost = true;
1371
1372 if (insert_p && insert_parent) {
1373 parent = insert_parent;
1374 p = insert_p;
1375 goto do_insert;
1376 }
1377
1378 p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
1379 lstart, &leftmost);
1380 do_insert:
1381 __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
1382 p, leftmost);
1383 }
1384
__relocate_discard_cmd(struct discard_cmd_control * dcc,struct discard_cmd * dc)1385 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1386 struct discard_cmd *dc)
1387 {
1388 list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
1389 }
1390
__punch_discard_cmd(struct f2fs_sb_info * sbi,struct discard_cmd * dc,block_t blkaddr)1391 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1392 struct discard_cmd *dc, block_t blkaddr)
1393 {
1394 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1395 struct discard_info di = dc->di;
1396 bool modified = false;
1397
1398 if (dc->state == D_DONE || dc->len == 1) {
1399 __remove_discard_cmd(sbi, dc);
1400 return;
1401 }
1402
1403 dcc->undiscard_blks -= di.len;
1404
1405 if (blkaddr > di.lstart) {
1406 dc->len = blkaddr - dc->lstart;
1407 dcc->undiscard_blks += dc->len;
1408 __relocate_discard_cmd(dcc, dc);
1409 modified = true;
1410 }
1411
1412 if (blkaddr < di.lstart + di.len - 1) {
1413 if (modified) {
1414 __insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1415 di.start + blkaddr + 1 - di.lstart,
1416 di.lstart + di.len - 1 - blkaddr,
1417 NULL, NULL);
1418 } else {
1419 dc->lstart++;
1420 dc->len--;
1421 dc->start++;
1422 dcc->undiscard_blks += dc->len;
1423 __relocate_discard_cmd(dcc, dc);
1424 }
1425 }
1426 }
1427
__update_discard_tree_range(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len)1428 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1429 struct block_device *bdev, block_t lstart,
1430 block_t start, block_t len)
1431 {
1432 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1433 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1434 struct discard_cmd *dc;
1435 struct discard_info di = {0};
1436 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1437 struct request_queue *q = bdev_get_queue(bdev);
1438 unsigned int max_discard_blocks =
1439 SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1440 block_t end = lstart + len;
1441
1442 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1443 NULL, lstart,
1444 (struct rb_entry **)&prev_dc,
1445 (struct rb_entry **)&next_dc,
1446 &insert_p, &insert_parent, true, NULL);
1447 if (dc)
1448 prev_dc = dc;
1449
1450 if (!prev_dc) {
1451 di.lstart = lstart;
1452 di.len = next_dc ? next_dc->lstart - lstart : len;
1453 di.len = min(di.len, len);
1454 di.start = start;
1455 }
1456
1457 while (1) {
1458 struct rb_node *node;
1459 bool merged = false;
1460 struct discard_cmd *tdc = NULL;
1461
1462 if (prev_dc) {
1463 di.lstart = prev_dc->lstart + prev_dc->len;
1464 if (di.lstart < lstart)
1465 di.lstart = lstart;
1466 if (di.lstart >= end)
1467 break;
1468
1469 if (!next_dc || next_dc->lstart > end)
1470 di.len = end - di.lstart;
1471 else
1472 di.len = next_dc->lstart - di.lstart;
1473 di.start = start + di.lstart - lstart;
1474 }
1475
1476 if (!di.len)
1477 goto next;
1478
1479 if (prev_dc && prev_dc->state == D_PREP &&
1480 prev_dc->bdev == bdev &&
1481 __is_discard_back_mergeable(&di, &prev_dc->di,
1482 max_discard_blocks)) {
1483 prev_dc->di.len += di.len;
1484 dcc->undiscard_blks += di.len;
1485 __relocate_discard_cmd(dcc, prev_dc);
1486 di = prev_dc->di;
1487 tdc = prev_dc;
1488 merged = true;
1489 }
1490
1491 if (next_dc && next_dc->state == D_PREP &&
1492 next_dc->bdev == bdev &&
1493 __is_discard_front_mergeable(&di, &next_dc->di,
1494 max_discard_blocks)) {
1495 next_dc->di.lstart = di.lstart;
1496 next_dc->di.len += di.len;
1497 next_dc->di.start = di.start;
1498 dcc->undiscard_blks += di.len;
1499 __relocate_discard_cmd(dcc, next_dc);
1500 if (tdc)
1501 __remove_discard_cmd(sbi, tdc);
1502 merged = true;
1503 }
1504
1505 if (!merged) {
1506 __insert_discard_tree(sbi, bdev, di.lstart, di.start,
1507 di.len, NULL, NULL);
1508 }
1509 next:
1510 prev_dc = next_dc;
1511 if (!prev_dc)
1512 break;
1513
1514 node = rb_next(&prev_dc->rb_node);
1515 next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1516 }
1517 }
1518
__queue_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1519 static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1520 struct block_device *bdev, block_t blkstart, block_t blklen)
1521 {
1522 block_t lblkstart = blkstart;
1523
1524 if (!f2fs_bdev_support_discard(bdev))
1525 return 0;
1526
1527 trace_f2fs_queue_discard(bdev, blkstart, blklen);
1528
1529 if (f2fs_is_multi_device(sbi)) {
1530 int devi = f2fs_target_device_index(sbi, blkstart);
1531
1532 blkstart -= FDEV(devi).start_blk;
1533 }
1534 mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1535 __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1536 mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1537 return 0;
1538 }
1539
__issue_discard_cmd_orderly(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,int spolicy_index)1540 static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1541 struct discard_policy *dpolicy,
1542 int spolicy_index)
1543 {
1544 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1545 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1546 struct rb_node **insert_p = NULL, *insert_parent = NULL;
1547 struct discard_cmd *dc;
1548 struct blk_plug plug;
1549 unsigned int pos = dcc->next_pos;
1550 unsigned int issued = 0;
1551 bool io_interrupted = false;
1552 struct discard_sub_policy *spolicy = NULL;
1553
1554 select_sub_discard_policy(&spolicy, spolicy_index, dpolicy);
1555 mutex_lock(&dcc->cmd_lock);
1556
1557 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1558 NULL, pos,
1559 (struct rb_entry **)&prev_dc,
1560 (struct rb_entry **)&next_dc,
1561 &insert_p, &insert_parent, true, NULL);
1562 if (!dc)
1563 dc = next_dc;
1564
1565 blk_start_plug(&plug);
1566
1567 while (dc) {
1568 struct rb_node *node;
1569 int err = 0;
1570
1571 if (dc->state != D_PREP)
1572 goto next;
1573
1574 if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1575 io_interrupted = true;
1576 break;
1577 }
1578
1579 dcc->next_pos = dc->lstart + dc->len;
1580 err = __submit_discard_cmd(sbi, dpolicy, spolicy_index, dc, &issued);
1581
1582 if (issued >= spolicy->max_requests)
1583 break;
1584 next:
1585 node = rb_next(&dc->rb_node);
1586 if (err)
1587 __remove_discard_cmd(sbi, dc);
1588 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1589 }
1590
1591 blk_finish_plug(&plug);
1592
1593 if (!dc)
1594 dcc->next_pos = 0;
1595
1596 mutex_unlock(&dcc->cmd_lock);
1597
1598 if (!issued && io_interrupted)
1599 issued = -1;
1600
1601 return issued;
1602 }
1603 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1604 struct discard_policy *dpolicy);
1605
__issue_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy)1606 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1607 struct discard_policy *dpolicy)
1608 {
1609 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1610 struct list_head *pend_list;
1611 struct discard_cmd *dc, *tmp;
1612 struct blk_plug plug;
1613 int i, issued;
1614 bool io_interrupted = false;
1615 struct discard_sub_policy *spolicy = NULL;
1616
1617 if (dpolicy->timeout)
1618 f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1619
1620 /* only do this check in CHECK_FS, may be time consumed */
1621 if (unlikely(dcc->rbtree_check)) {
1622 mutex_lock(&dcc->cmd_lock);
1623 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false));
1624 mutex_unlock(&dcc->cmd_lock);
1625 }
1626 retry:
1627 blk_start_plug(&plug);
1628 issued = 0;
1629 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1630 if (dpolicy->timeout &&
1631 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1632 break;
1633
1634 if (i + 1 < dpolicy->granularity)
1635 break;
1636
1637 select_sub_discard_policy(&spolicy, i, dpolicy);
1638
1639 if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered) {
1640 issued = __issue_discard_cmd_orderly(sbi, dpolicy, i);
1641 blk_finish_plug(&plug);
1642 return issued;
1643 }
1644
1645 pend_list = &dcc->pend_list[i];
1646
1647 mutex_lock(&dcc->cmd_lock);
1648 if (list_empty(pend_list))
1649 goto next;
1650 if (unlikely(dcc->rbtree_check))
1651 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
1652 &dcc->root, false));
1653 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1654 f2fs_bug_on(sbi, dc->state != D_PREP);
1655
1656 if (dpolicy->timeout &&
1657 f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1658 break;
1659
1660 if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1661 !is_idle(sbi, DISCARD_TIME)) {
1662 io_interrupted = true;
1663 goto skip;
1664 }
1665 __submit_discard_cmd(sbi, dpolicy, i, dc, &issued);
1666 skip:
1667 if (issued >= spolicy->max_requests)
1668 break;
1669 }
1670 next:
1671 mutex_unlock(&dcc->cmd_lock);
1672
1673 if (issued >= spolicy->max_requests || io_interrupted)
1674 break;
1675 }
1676
1677 blk_finish_plug(&plug);
1678 if (spolicy)
1679 dpolicy->min_interval = spolicy->interval;
1680
1681 if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1682 __wait_all_discard_cmd(sbi, dpolicy);
1683 goto retry;
1684 }
1685
1686 if (!issued && io_interrupted)
1687 issued = -1;
1688
1689 return issued;
1690 }
1691
__drop_discard_cmd(struct f2fs_sb_info * sbi)1692 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1693 {
1694 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1695 struct list_head *pend_list;
1696 struct discard_cmd *dc, *tmp;
1697 int i;
1698 bool dropped = false;
1699
1700 mutex_lock(&dcc->cmd_lock);
1701 for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1702 pend_list = &dcc->pend_list[i];
1703 list_for_each_entry_safe(dc, tmp, pend_list, list) {
1704 f2fs_bug_on(sbi, dc->state != D_PREP);
1705 __remove_discard_cmd(sbi, dc);
1706 dropped = true;
1707 }
1708 }
1709 mutex_unlock(&dcc->cmd_lock);
1710
1711 return dropped;
1712 }
1713
f2fs_drop_discard_cmd(struct f2fs_sb_info * sbi)1714 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1715 {
1716 __drop_discard_cmd(sbi);
1717 }
1718
__wait_one_discard_bio(struct f2fs_sb_info * sbi,struct discard_cmd * dc)1719 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1720 struct discard_cmd *dc)
1721 {
1722 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1723 unsigned int len = 0;
1724
1725 wait_for_completion_io(&dc->wait);
1726 mutex_lock(&dcc->cmd_lock);
1727 f2fs_bug_on(sbi, dc->state != D_DONE);
1728 dc->ref--;
1729 if (!dc->ref) {
1730 if (!dc->error)
1731 len = dc->len;
1732 __remove_discard_cmd(sbi, dc);
1733 }
1734 mutex_unlock(&dcc->cmd_lock);
1735
1736 return len;
1737 }
1738
__wait_discard_cmd_range(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,block_t start,block_t end)1739 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1740 struct discard_policy *dpolicy,
1741 block_t start, block_t end)
1742 {
1743 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1744 struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1745 &(dcc->fstrim_list) : &(dcc->wait_list);
1746 struct discard_cmd *dc, *tmp;
1747 bool need_wait;
1748 unsigned int trimmed = 0;
1749
1750 next:
1751 need_wait = false;
1752
1753 mutex_lock(&dcc->cmd_lock);
1754 list_for_each_entry_safe(dc, tmp, wait_list, list) {
1755 if (dc->lstart + dc->len <= start || end <= dc->lstart)
1756 continue;
1757 if (dc->len < dpolicy->granularity)
1758 continue;
1759 if (dc->state == D_DONE && !dc->ref) {
1760 wait_for_completion_io(&dc->wait);
1761 if (!dc->error)
1762 trimmed += dc->len;
1763 __remove_discard_cmd(sbi, dc);
1764 } else {
1765 dc->ref++;
1766 need_wait = true;
1767 break;
1768 }
1769 }
1770 mutex_unlock(&dcc->cmd_lock);
1771
1772 if (need_wait) {
1773 trimmed += __wait_one_discard_bio(sbi, dc);
1774 goto next;
1775 }
1776
1777 return trimmed;
1778 }
1779
__wait_all_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy)1780 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1781 struct discard_policy *dpolicy)
1782 {
1783 struct discard_policy dp;
1784 unsigned int discard_blks;
1785
1786 if (dpolicy)
1787 return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1788
1789 /* wait all */
1790 __init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1791 discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1792 __init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1793 discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1794
1795 return discard_blks;
1796 }
1797
1798 /* This should be covered by global mutex, &sit_i->sentry_lock */
f2fs_wait_discard_bio(struct f2fs_sb_info * sbi,block_t blkaddr)1799 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1800 {
1801 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1802 struct discard_cmd *dc;
1803 bool need_wait = false;
1804
1805 mutex_lock(&dcc->cmd_lock);
1806 dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
1807 NULL, blkaddr);
1808 if (dc) {
1809 if (dc->state == D_PREP) {
1810 __punch_discard_cmd(sbi, dc, blkaddr);
1811 } else {
1812 dc->ref++;
1813 need_wait = true;
1814 }
1815 }
1816 mutex_unlock(&dcc->cmd_lock);
1817
1818 if (need_wait)
1819 __wait_one_discard_bio(sbi, dc);
1820 }
1821
f2fs_stop_discard_thread(struct f2fs_sb_info * sbi)1822 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1823 {
1824 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1825
1826 if (dcc && dcc->f2fs_issue_discard) {
1827 struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1828
1829 dcc->f2fs_issue_discard = NULL;
1830 kthread_stop(discard_thread);
1831 }
1832 }
1833
1834 /* This comes from f2fs_put_super */
f2fs_issue_discard_timeout(struct f2fs_sb_info * sbi)1835 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1836 {
1837 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1838 struct discard_policy dpolicy;
1839 bool dropped;
1840
1841 __init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT, 0);
1842 __issue_discard_cmd(sbi, &dpolicy);
1843 dropped = __drop_discard_cmd(sbi);
1844
1845 /* just to make sure there is no pending discard commands */
1846 __wait_all_discard_cmd(sbi, NULL);
1847
1848 f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1849 return dropped;
1850 }
1851
select_discard_type(struct f2fs_sb_info * sbi)1852 static int select_discard_type(struct f2fs_sb_info *sbi)
1853 {
1854 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1855 block_t user_block_count = sbi->user_block_count;
1856 block_t ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
1857 block_t fs_available_blocks = user_block_count -
1858 valid_user_blocks(sbi) + ovp_count;
1859 int discard_type;
1860
1861 if (fs_available_blocks >= fs_free_space_threshold(sbi) &&
1862 fs_available_blocks - dcc->undiscard_blks >=
1863 device_free_space_threshold(sbi)) {
1864 discard_type = DPOLICY_BG;
1865 } else if (fs_available_blocks < fs_free_space_threshold(sbi) &&
1866 fs_available_blocks - dcc->undiscard_blks <
1867 device_free_space_threshold(sbi)) {
1868 discard_type = DPOLICY_FORCE;
1869 } else {
1870 discard_type = DPOLICY_BALANCE;
1871 }
1872 return discard_type;
1873 }
1874
issue_discard_thread(void * data)1875 static int issue_discard_thread(void *data)
1876 {
1877 struct f2fs_sb_info *sbi = data;
1878 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1879 wait_queue_head_t *q = &dcc->discard_wait_queue;
1880 struct discard_policy dpolicy;
1881 unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1882 int issued, discard_type;
1883
1884 set_freezable();
1885
1886 do {
1887 discard_type = select_discard_type(sbi);
1888 __init_discard_policy(sbi, &dpolicy, discard_type, 0);
1889
1890 wait_event_interruptible_timeout(*q,
1891 kthread_should_stop() || freezing(current) ||
1892 dcc->discard_wake,
1893 msecs_to_jiffies(wait_ms));
1894
1895 if (dcc->discard_wake)
1896 dcc->discard_wake = 0;
1897
1898 /* clean up pending candidates before going to sleep */
1899 if (atomic_read(&dcc->queued_discard))
1900 __wait_all_discard_cmd(sbi, NULL);
1901
1902 if (try_to_freeze())
1903 continue;
1904 if (f2fs_readonly(sbi->sb))
1905 continue;
1906 if (kthread_should_stop())
1907 return 0;
1908 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1909 wait_ms = dpolicy.max_interval;
1910 continue;
1911 }
1912
1913 if (sbi->gc_mode == GC_URGENT_HIGH)
1914 __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 0);
1915
1916 sb_start_intwrite(sbi->sb);
1917
1918 issued = __issue_discard_cmd(sbi, &dpolicy);
1919 if (issued > 0) {
1920 __wait_all_discard_cmd(sbi, &dpolicy);
1921 wait_ms = dpolicy.min_interval;
1922 } else if (issued == -1){
1923 wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1924 if (!wait_ms)
1925 wait_ms = dpolicy.mid_interval;
1926 } else {
1927 wait_ms = dpolicy.max_interval;
1928 }
1929
1930 sb_end_intwrite(sbi->sb);
1931
1932 } while (!kthread_should_stop());
1933 return 0;
1934 }
1935
1936 #ifdef CONFIG_BLK_DEV_ZONED
__f2fs_issue_discard_zone(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1937 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1938 struct block_device *bdev, block_t blkstart, block_t blklen)
1939 {
1940 sector_t sector, nr_sects;
1941 block_t lblkstart = blkstart;
1942 int devi = 0;
1943
1944 if (f2fs_is_multi_device(sbi)) {
1945 devi = f2fs_target_device_index(sbi, blkstart);
1946 if (blkstart < FDEV(devi).start_blk ||
1947 blkstart > FDEV(devi).end_blk) {
1948 f2fs_err(sbi, "Invalid block %x", blkstart);
1949 return -EIO;
1950 }
1951 blkstart -= FDEV(devi).start_blk;
1952 }
1953
1954 /* For sequential zones, reset the zone write pointer */
1955 if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1956 sector = SECTOR_FROM_BLOCK(blkstart);
1957 nr_sects = SECTOR_FROM_BLOCK(blklen);
1958
1959 if (sector & (bdev_zone_sectors(bdev) - 1) ||
1960 nr_sects != bdev_zone_sectors(bdev)) {
1961 f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1962 devi, sbi->s_ndevs ? FDEV(devi).path : "",
1963 blkstart, blklen);
1964 return -EIO;
1965 }
1966 trace_f2fs_issue_reset_zone(bdev, blkstart);
1967 return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
1968 sector, nr_sects, GFP_NOFS);
1969 }
1970
1971 /* For conventional zones, use regular discard if supported */
1972 return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1973 }
1974 #endif
1975
__issue_discard_async(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1976 static int __issue_discard_async(struct f2fs_sb_info *sbi,
1977 struct block_device *bdev, block_t blkstart, block_t blklen)
1978 {
1979 #ifdef CONFIG_BLK_DEV_ZONED
1980 if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1981 return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1982 #endif
1983 return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1984 }
1985
f2fs_issue_discard(struct f2fs_sb_info * sbi,block_t blkstart,block_t blklen)1986 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1987 block_t blkstart, block_t blklen)
1988 {
1989 sector_t start = blkstart, len = 0;
1990 struct block_device *bdev;
1991 struct seg_entry *se;
1992 unsigned int offset;
1993 block_t i;
1994 int err = 0;
1995
1996 bdev = f2fs_target_device(sbi, blkstart, NULL);
1997
1998 for (i = blkstart; i < blkstart + blklen; i++, len++) {
1999 if (i != start) {
2000 struct block_device *bdev2 =
2001 f2fs_target_device(sbi, i, NULL);
2002
2003 if (bdev2 != bdev) {
2004 err = __issue_discard_async(sbi, bdev,
2005 start, len);
2006 if (err)
2007 return err;
2008 bdev = bdev2;
2009 start = i;
2010 len = 0;
2011 }
2012 }
2013
2014 se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
2015 offset = GET_BLKOFF_FROM_SEG0(sbi, i);
2016
2017 if (!f2fs_test_and_set_bit(offset, se->discard_map))
2018 sbi->discard_blks--;
2019 }
2020
2021 if (len)
2022 err = __issue_discard_async(sbi, bdev, start, len);
2023 return err;
2024 }
2025
add_discard_addrs(struct f2fs_sb_info * sbi,struct cp_control * cpc,bool check_only)2026 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
2027 bool check_only)
2028 {
2029 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2030 int max_blocks = sbi->blocks_per_seg;
2031 struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
2032 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2033 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2034 unsigned long *discard_map = (unsigned long *)se->discard_map;
2035 unsigned long *dmap = SIT_I(sbi)->tmp_map;
2036 unsigned int start = 0, end = -1;
2037 bool force = (cpc->reason & CP_DISCARD);
2038 struct discard_entry *de = NULL;
2039 struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
2040 int i;
2041
2042 if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
2043 return false;
2044
2045 if (!force) {
2046 if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
2047 SM_I(sbi)->dcc_info->nr_discards >=
2048 SM_I(sbi)->dcc_info->max_discards)
2049 return false;
2050 }
2051
2052 /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
2053 for (i = 0; i < entries; i++)
2054 dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
2055 (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
2056
2057 while (force || SM_I(sbi)->dcc_info->nr_discards <=
2058 SM_I(sbi)->dcc_info->max_discards) {
2059 start = find_rev_next_bit(dmap, max_blocks, end + 1);
2060 if (start >= max_blocks)
2061 break;
2062
2063 end = find_rev_next_zero_bit(dmap, max_blocks, start + 1);
2064 if (force && start && end != max_blocks
2065 && (end - start) < cpc->trim_minlen)
2066 continue;
2067
2068 if (check_only)
2069 return true;
2070
2071 if (!de) {
2072 de = f2fs_kmem_cache_alloc(discard_entry_slab,
2073 GFP_F2FS_ZERO);
2074 de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
2075 list_add_tail(&de->list, head);
2076 }
2077
2078 for (i = start; i < end; i++)
2079 __set_bit_le(i, (void *)de->discard_map);
2080
2081 SM_I(sbi)->dcc_info->nr_discards += end - start;
2082 }
2083 return false;
2084 }
2085
release_discard_addr(struct discard_entry * entry)2086 static void release_discard_addr(struct discard_entry *entry)
2087 {
2088 list_del(&entry->list);
2089 kmem_cache_free(discard_entry_slab, entry);
2090 }
2091
f2fs_release_discard_addrs(struct f2fs_sb_info * sbi)2092 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
2093 {
2094 struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
2095 struct discard_entry *entry, *this;
2096
2097 /* drop caches */
2098 list_for_each_entry_safe(entry, this, head, list)
2099 release_discard_addr(entry);
2100 }
2101
2102 /*
2103 * Should call f2fs_clear_prefree_segments after checkpoint is done.
2104 */
set_prefree_as_free_segments(struct f2fs_sb_info * sbi)2105 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
2106 {
2107 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2108 unsigned int segno;
2109
2110 mutex_lock(&dirty_i->seglist_lock);
2111 for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
2112 __set_test_and_free(sbi, segno, false);
2113 mutex_unlock(&dirty_i->seglist_lock);
2114 }
2115
f2fs_clear_prefree_segments(struct f2fs_sb_info * sbi,struct cp_control * cpc)2116 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
2117 struct cp_control *cpc)
2118 {
2119 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2120 struct list_head *head = &dcc->entry_list;
2121 struct discard_entry *entry, *this;
2122 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2123 unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
2124 unsigned int start = 0, end = -1;
2125 unsigned int secno, start_segno;
2126 bool force = (cpc->reason & CP_DISCARD);
2127 bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
2128
2129 mutex_lock(&dirty_i->seglist_lock);
2130
2131 while (1) {
2132 int i;
2133
2134 if (need_align && end != -1)
2135 end--;
2136 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
2137 if (start >= MAIN_SEGS(sbi))
2138 break;
2139 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
2140 start + 1);
2141
2142 if (need_align) {
2143 start = rounddown(start, sbi->segs_per_sec);
2144 end = roundup(end, sbi->segs_per_sec);
2145 }
2146
2147 for (i = start; i < end; i++) {
2148 if (test_and_clear_bit(i, prefree_map))
2149 dirty_i->nr_dirty[PRE]--;
2150 }
2151
2152 if (!f2fs_realtime_discard_enable(sbi))
2153 continue;
2154
2155 if (force && start >= cpc->trim_start &&
2156 (end - 1) <= cpc->trim_end)
2157 continue;
2158
2159 if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) {
2160 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
2161 (end - start) << sbi->log_blocks_per_seg);
2162 continue;
2163 }
2164 next:
2165 secno = GET_SEC_FROM_SEG(sbi, start);
2166 start_segno = GET_SEG_FROM_SEC(sbi, secno);
2167 if (!IS_CURSEC(sbi, secno) &&
2168 !get_valid_blocks(sbi, start, true))
2169 f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
2170 sbi->segs_per_sec << sbi->log_blocks_per_seg);
2171
2172 start = start_segno + sbi->segs_per_sec;
2173 if (start < end)
2174 goto next;
2175 else
2176 end = start - 1;
2177 }
2178 mutex_unlock(&dirty_i->seglist_lock);
2179
2180 /* send small discards */
2181 list_for_each_entry_safe(entry, this, head, list) {
2182 unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2183 bool is_valid = test_bit_le(0, entry->discard_map);
2184
2185 find_next:
2186 if (is_valid) {
2187 next_pos = find_next_zero_bit_le(entry->discard_map,
2188 sbi->blocks_per_seg, cur_pos);
2189 len = next_pos - cur_pos;
2190
2191 if (f2fs_sb_has_blkzoned(sbi) ||
2192 (force && len < cpc->trim_minlen))
2193 goto skip;
2194
2195 f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2196 len);
2197 total_len += len;
2198 } else {
2199 next_pos = find_next_bit_le(entry->discard_map,
2200 sbi->blocks_per_seg, cur_pos);
2201 }
2202 skip:
2203 cur_pos = next_pos;
2204 is_valid = !is_valid;
2205
2206 if (cur_pos < sbi->blocks_per_seg)
2207 goto find_next;
2208
2209 release_discard_addr(entry);
2210 dcc->nr_discards -= total_len;
2211 }
2212
2213 wake_up_discard_thread(sbi, false);
2214 }
2215
create_discard_cmd_control(struct f2fs_sb_info * sbi)2216 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2217 {
2218 dev_t dev = sbi->sb->s_bdev->bd_dev;
2219 struct discard_cmd_control *dcc;
2220 int err = 0, i;
2221
2222 if (SM_I(sbi)->dcc_info) {
2223 dcc = SM_I(sbi)->dcc_info;
2224 goto init_thread;
2225 }
2226
2227 dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2228 if (!dcc)
2229 return -ENOMEM;
2230
2231 dcc->discard_granularity = DISCARD_GRAN_BG;
2232 INIT_LIST_HEAD(&dcc->entry_list);
2233 for (i = 0; i < MAX_PLIST_NUM; i++)
2234 INIT_LIST_HEAD(&dcc->pend_list[i]);
2235 INIT_LIST_HEAD(&dcc->wait_list);
2236 INIT_LIST_HEAD(&dcc->fstrim_list);
2237 mutex_init(&dcc->cmd_lock);
2238 atomic_set(&dcc->issued_discard, 0);
2239 atomic_set(&dcc->queued_discard, 0);
2240 atomic_set(&dcc->discard_cmd_cnt, 0);
2241 dcc->nr_discards = 0;
2242 dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2243 dcc->undiscard_blks = 0;
2244 dcc->next_pos = 0;
2245 dcc->root = RB_ROOT_CACHED;
2246 dcc->rbtree_check = false;
2247
2248 init_waitqueue_head(&dcc->discard_wait_queue);
2249 SM_I(sbi)->dcc_info = dcc;
2250 init_thread:
2251 dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2252 "f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2253 if (IS_ERR(dcc->f2fs_issue_discard)) {
2254 err = PTR_ERR(dcc->f2fs_issue_discard);
2255 kfree(dcc);
2256 SM_I(sbi)->dcc_info = NULL;
2257 return err;
2258 }
2259
2260 return err;
2261 }
2262
destroy_discard_cmd_control(struct f2fs_sb_info * sbi)2263 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2264 {
2265 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2266
2267 if (!dcc)
2268 return;
2269
2270 f2fs_stop_discard_thread(sbi);
2271
2272 /*
2273 * Recovery can cache discard commands, so in error path of
2274 * fill_super(), it needs to give a chance to handle them.
2275 */
2276 if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
2277 f2fs_issue_discard_timeout(sbi);
2278
2279 kfree(dcc);
2280 SM_I(sbi)->dcc_info = NULL;
2281 }
2282
__mark_sit_entry_dirty(struct f2fs_sb_info * sbi,unsigned int segno)2283 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2284 {
2285 struct sit_info *sit_i = SIT_I(sbi);
2286
2287 if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2288 sit_i->dirty_sentries++;
2289 return false;
2290 }
2291
2292 return true;
2293 }
2294
__set_sit_entry_type(struct f2fs_sb_info * sbi,int type,unsigned int segno,int modified)2295 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2296 unsigned int segno, int modified)
2297 {
2298 struct seg_entry *se = get_seg_entry(sbi, segno);
2299 se->type = type;
2300 if (modified)
2301 __mark_sit_entry_dirty(sbi, segno);
2302 }
2303
get_segment_mtime(struct f2fs_sb_info * sbi,block_t blkaddr)2304 static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
2305 block_t blkaddr)
2306 {
2307 unsigned int segno = GET_SEGNO(sbi, blkaddr);
2308
2309 if (segno == NULL_SEGNO)
2310 return 0;
2311 return get_seg_entry(sbi, segno)->mtime;
2312 }
2313
update_segment_mtime(struct f2fs_sb_info * sbi,block_t blkaddr,unsigned long long old_mtime)2314 static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
2315 unsigned long long old_mtime)
2316 {
2317 struct seg_entry *se;
2318 unsigned int segno = GET_SEGNO(sbi, blkaddr);
2319 unsigned long long ctime = get_mtime(sbi, false);
2320 unsigned long long mtime = old_mtime ? old_mtime : ctime;
2321
2322 if (segno == NULL_SEGNO)
2323 return;
2324
2325 se = get_seg_entry(sbi, segno);
2326
2327 if (!se->mtime)
2328 se->mtime = mtime;
2329 else
2330 se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
2331 se->valid_blocks + 1);
2332
2333 if (ctime > SIT_I(sbi)->max_mtime)
2334 SIT_I(sbi)->max_mtime = ctime;
2335 }
2336
update_sit_entry(struct f2fs_sb_info * sbi,block_t blkaddr,int del)2337 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2338 {
2339 struct seg_entry *se;
2340 unsigned int segno, offset;
2341 long int new_vblocks;
2342 bool exist;
2343 #ifdef CONFIG_F2FS_CHECK_FS
2344 bool mir_exist;
2345 #endif
2346
2347 segno = GET_SEGNO(sbi, blkaddr);
2348
2349 se = get_seg_entry(sbi, segno);
2350 new_vblocks = se->valid_blocks + del;
2351 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2352
2353 f2fs_bug_on(sbi, (new_vblocks < 0 ||
2354 (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
2355
2356 se->valid_blocks = new_vblocks;
2357
2358 /* Update valid block bitmap */
2359 if (del > 0) {
2360 exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2361 #ifdef CONFIG_F2FS_CHECK_FS
2362 mir_exist = f2fs_test_and_set_bit(offset,
2363 se->cur_valid_map_mir);
2364 if (unlikely(exist != mir_exist)) {
2365 f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2366 blkaddr, exist);
2367 f2fs_bug_on(sbi, 1);
2368 }
2369 #endif
2370 if (unlikely(exist)) {
2371 f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2372 blkaddr);
2373 f2fs_bug_on(sbi, 1);
2374 se->valid_blocks--;
2375 del = 0;
2376 }
2377
2378 if (!f2fs_test_and_set_bit(offset, se->discard_map))
2379 sbi->discard_blks--;
2380
2381 /*
2382 * SSR should never reuse block which is checkpointed
2383 * or newly invalidated.
2384 */
2385 if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2386 if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2387 se->ckpt_valid_blocks++;
2388 }
2389 } else {
2390 exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2391 #ifdef CONFIG_F2FS_CHECK_FS
2392 mir_exist = f2fs_test_and_clear_bit(offset,
2393 se->cur_valid_map_mir);
2394 if (unlikely(exist != mir_exist)) {
2395 f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2396 blkaddr, exist);
2397 f2fs_bug_on(sbi, 1);
2398 }
2399 #endif
2400 if (unlikely(!exist)) {
2401 f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2402 blkaddr);
2403 f2fs_bug_on(sbi, 1);
2404 se->valid_blocks++;
2405 del = 0;
2406 } else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2407 /*
2408 * If checkpoints are off, we must not reuse data that
2409 * was used in the previous checkpoint. If it was used
2410 * before, we must track that to know how much space we
2411 * really have.
2412 */
2413 if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2414 spin_lock(&sbi->stat_lock);
2415 sbi->unusable_block_count++;
2416 spin_unlock(&sbi->stat_lock);
2417 }
2418 }
2419
2420 if (f2fs_test_and_clear_bit(offset, se->discard_map))
2421 sbi->discard_blks++;
2422 }
2423 if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2424 se->ckpt_valid_blocks += del;
2425
2426 __mark_sit_entry_dirty(sbi, segno);
2427
2428 /* update total number of valid blocks to be written in ckpt area */
2429 SIT_I(sbi)->written_valid_blocks += del;
2430
2431 if (__is_large_section(sbi))
2432 get_sec_entry(sbi, segno)->valid_blocks += del;
2433 }
2434
f2fs_invalidate_blocks(struct f2fs_sb_info * sbi,block_t addr)2435 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2436 {
2437 unsigned int segno = GET_SEGNO(sbi, addr);
2438 struct sit_info *sit_i = SIT_I(sbi);
2439
2440 f2fs_bug_on(sbi, addr == NULL_ADDR);
2441 if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2442 return;
2443
2444 invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2445
2446 /* add it into sit main buffer */
2447 down_write(&sit_i->sentry_lock);
2448
2449 update_segment_mtime(sbi, addr, 0);
2450 update_sit_entry(sbi, addr, -1);
2451
2452 /* add it into dirty seglist */
2453 locate_dirty_segment(sbi, segno);
2454
2455 up_write(&sit_i->sentry_lock);
2456 }
2457
f2fs_is_checkpointed_data(struct f2fs_sb_info * sbi,block_t blkaddr)2458 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2459 {
2460 struct sit_info *sit_i = SIT_I(sbi);
2461 unsigned int segno, offset;
2462 struct seg_entry *se;
2463 bool is_cp = false;
2464
2465 if (!__is_valid_data_blkaddr(blkaddr))
2466 return true;
2467
2468 down_read(&sit_i->sentry_lock);
2469
2470 segno = GET_SEGNO(sbi, blkaddr);
2471 se = get_seg_entry(sbi, segno);
2472 offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2473
2474 if (f2fs_test_bit(offset, se->ckpt_valid_map))
2475 is_cp = true;
2476
2477 up_read(&sit_i->sentry_lock);
2478
2479 return is_cp;
2480 }
2481
2482 /*
2483 * This function should be resided under the curseg_mutex lock
2484 */
__add_sum_entry(struct f2fs_sb_info * sbi,int type,struct f2fs_summary * sum)2485 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2486 struct f2fs_summary *sum)
2487 {
2488 struct curseg_info *curseg = CURSEG_I(sbi, type);
2489 void *addr = curseg->sum_blk;
2490 addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
2491 memcpy(addr, sum, sizeof(struct f2fs_summary));
2492 }
2493
2494 /*
2495 * Calculate the number of current summary pages for writing
2496 */
f2fs_npages_for_summary_flush(struct f2fs_sb_info * sbi,bool for_ra)2497 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2498 {
2499 int valid_sum_count = 0;
2500 int i, sum_in_page;
2501
2502 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2503 if (sbi->ckpt->alloc_type[i] == SSR)
2504 valid_sum_count += sbi->blocks_per_seg;
2505 else {
2506 if (for_ra)
2507 valid_sum_count += le16_to_cpu(
2508 F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2509 else
2510 valid_sum_count += curseg_blkoff(sbi, i);
2511 }
2512 }
2513
2514 sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2515 SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2516 if (valid_sum_count <= sum_in_page)
2517 return 1;
2518 else if ((valid_sum_count - sum_in_page) <=
2519 (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2520 return 2;
2521 return 3;
2522 }
2523
2524 /*
2525 * Caller should put this summary page
2526 */
f2fs_get_sum_page(struct f2fs_sb_info * sbi,unsigned int segno)2527 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2528 {
2529 if (unlikely(f2fs_cp_error(sbi)))
2530 return ERR_PTR(-EIO);
2531 return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2532 }
2533
f2fs_update_meta_page(struct f2fs_sb_info * sbi,void * src,block_t blk_addr)2534 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2535 void *src, block_t blk_addr)
2536 {
2537 struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2538
2539 memcpy(page_address(page), src, PAGE_SIZE);
2540 set_page_dirty(page);
2541 f2fs_put_page(page, 1);
2542 }
2543
write_sum_page(struct f2fs_sb_info * sbi,struct f2fs_summary_block * sum_blk,block_t blk_addr)2544 static void write_sum_page(struct f2fs_sb_info *sbi,
2545 struct f2fs_summary_block *sum_blk, block_t blk_addr)
2546 {
2547 f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2548 }
2549
write_current_sum_page(struct f2fs_sb_info * sbi,int type,block_t blk_addr)2550 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2551 int type, block_t blk_addr)
2552 {
2553 struct curseg_info *curseg = CURSEG_I(sbi, type);
2554 struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2555 struct f2fs_summary_block *src = curseg->sum_blk;
2556 struct f2fs_summary_block *dst;
2557
2558 dst = (struct f2fs_summary_block *)page_address(page);
2559 memset(dst, 0, PAGE_SIZE);
2560
2561 mutex_lock(&curseg->curseg_mutex);
2562
2563 down_read(&curseg->journal_rwsem);
2564 memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2565 up_read(&curseg->journal_rwsem);
2566
2567 memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2568 memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2569
2570 mutex_unlock(&curseg->curseg_mutex);
2571
2572 set_page_dirty(page);
2573 f2fs_put_page(page, 1);
2574 }
2575
is_next_segment_free(struct f2fs_sb_info * sbi,struct curseg_info * curseg,int type)2576 static int is_next_segment_free(struct f2fs_sb_info *sbi,
2577 struct curseg_info *curseg, int type)
2578 {
2579 unsigned int segno = curseg->segno + 1;
2580 struct free_segmap_info *free_i = FREE_I(sbi);
2581
2582 if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2583 return !test_bit(segno, free_i->free_segmap);
2584 return 0;
2585 }
2586
2587 /*
2588 * Find a new segment from the free segments bitmap to right order
2589 * This function should be returned with success, otherwise BUG
2590 */
get_new_segment(struct f2fs_sb_info * sbi,unsigned int * newseg,bool new_sec,int dir)2591 static void get_new_segment(struct f2fs_sb_info *sbi,
2592 unsigned int *newseg, bool new_sec, int dir)
2593 {
2594 struct free_segmap_info *free_i = FREE_I(sbi);
2595 unsigned int segno, secno, zoneno;
2596 unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2597 unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2598 unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2599 unsigned int left_start = hint;
2600 bool init = true;
2601 int go_left = 0;
2602 int i;
2603
2604 spin_lock(&free_i->segmap_lock);
2605
2606 if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2607 segno = find_next_zero_bit(free_i->free_segmap,
2608 GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2609 if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2610 goto got_it;
2611 }
2612 find_other_zone:
2613 secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2614 if (secno >= MAIN_SECS(sbi)) {
2615 if (dir == ALLOC_RIGHT) {
2616 secno = find_next_zero_bit(free_i->free_secmap,
2617 MAIN_SECS(sbi), 0);
2618 f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2619 } else {
2620 go_left = 1;
2621 left_start = hint - 1;
2622 }
2623 }
2624 if (go_left == 0)
2625 goto skip_left;
2626
2627 while (test_bit(left_start, free_i->free_secmap)) {
2628 if (left_start > 0) {
2629 left_start--;
2630 continue;
2631 }
2632 left_start = find_next_zero_bit(free_i->free_secmap,
2633 MAIN_SECS(sbi), 0);
2634 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2635 break;
2636 }
2637 secno = left_start;
2638 skip_left:
2639 segno = GET_SEG_FROM_SEC(sbi, secno);
2640 zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2641
2642 /* give up on finding another zone */
2643 if (!init)
2644 goto got_it;
2645 if (sbi->secs_per_zone == 1)
2646 goto got_it;
2647 if (zoneno == old_zoneno)
2648 goto got_it;
2649 if (dir == ALLOC_LEFT) {
2650 if (!go_left && zoneno + 1 >= total_zones)
2651 goto got_it;
2652 if (go_left && zoneno == 0)
2653 goto got_it;
2654 }
2655 for (i = 0; i < NR_CURSEG_TYPE; i++)
2656 if (CURSEG_I(sbi, i)->zone == zoneno)
2657 break;
2658
2659 if (i < NR_CURSEG_TYPE) {
2660 /* zone is in user, try another */
2661 if (go_left)
2662 hint = zoneno * sbi->secs_per_zone - 1;
2663 else if (zoneno + 1 >= total_zones)
2664 hint = 0;
2665 else
2666 hint = (zoneno + 1) * sbi->secs_per_zone;
2667 init = false;
2668 goto find_other_zone;
2669 }
2670 got_it:
2671 /* set it as dirty segment in free segmap */
2672 f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2673 __set_inuse(sbi, segno);
2674 *newseg = segno;
2675 spin_unlock(&free_i->segmap_lock);
2676 }
2677
reset_curseg(struct f2fs_sb_info * sbi,int type,int modified)2678 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2679 {
2680 struct curseg_info *curseg = CURSEG_I(sbi, type);
2681 struct summary_footer *sum_footer;
2682 unsigned short seg_type = curseg->seg_type;
2683
2684 curseg->inited = true;
2685 curseg->segno = curseg->next_segno;
2686 curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2687 curseg->next_blkoff = 0;
2688 curseg->next_segno = NULL_SEGNO;
2689
2690 sum_footer = &(curseg->sum_blk->footer);
2691 memset(sum_footer, 0, sizeof(struct summary_footer));
2692
2693 sanity_check_seg_type(sbi, seg_type);
2694
2695 if (IS_DATASEG(seg_type))
2696 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2697 if (IS_NODESEG(seg_type))
2698 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2699 __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
2700 }
2701
__get_next_segno(struct f2fs_sb_info * sbi,int type)2702 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2703 {
2704 struct curseg_info *curseg = CURSEG_I(sbi, type);
2705 unsigned short seg_type = curseg->seg_type;
2706
2707 sanity_check_seg_type(sbi, seg_type);
2708
2709 /* if segs_per_sec is large than 1, we need to keep original policy. */
2710 if (__is_large_section(sbi))
2711 return curseg->segno;
2712
2713 /* inmem log may not locate on any segment after mount */
2714 if (!curseg->inited)
2715 return 0;
2716
2717 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2718 return 0;
2719
2720 if (test_opt(sbi, NOHEAP) &&
2721 (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
2722 return 0;
2723
2724 if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2725 return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2726
2727 /* find segments from 0 to reuse freed segments */
2728 if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2729 return 0;
2730
2731 return curseg->segno;
2732 }
2733
2734 /*
2735 * Allocate a current working segment.
2736 * This function always allocates a free segment in LFS manner.
2737 */
new_curseg(struct f2fs_sb_info * sbi,int type,bool new_sec)2738 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2739 {
2740 struct curseg_info *curseg = CURSEG_I(sbi, type);
2741 unsigned short seg_type = curseg->seg_type;
2742 unsigned int segno = curseg->segno;
2743 int dir = ALLOC_LEFT;
2744
2745 if (curseg->inited)
2746 write_sum_page(sbi, curseg->sum_blk,
2747 GET_SUM_BLOCK(sbi, segno));
2748 if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
2749 dir = ALLOC_RIGHT;
2750
2751 if (test_opt(sbi, NOHEAP))
2752 dir = ALLOC_RIGHT;
2753
2754 segno = __get_next_segno(sbi, type);
2755 get_new_segment(sbi, &segno, new_sec, dir);
2756 curseg->next_segno = segno;
2757 reset_curseg(sbi, type, 1);
2758 curseg->alloc_type = LFS;
2759 }
2760
__next_free_blkoff(struct f2fs_sb_info * sbi,struct curseg_info * seg,block_t start)2761 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
2762 struct curseg_info *seg, block_t start)
2763 {
2764 struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2765 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2766 unsigned long *target_map = SIT_I(sbi)->tmp_map;
2767 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2768 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2769 int i, pos;
2770
2771 for (i = 0; i < entries; i++)
2772 target_map[i] = ckpt_map[i] | cur_map[i];
2773
2774 pos = find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2775
2776 seg->next_blkoff = pos;
2777 }
2778
2779 /*
2780 * If a segment is written by LFS manner, next block offset is just obtained
2781 * by increasing the current block offset. However, if a segment is written by
2782 * SSR manner, next block offset obtained by calling __next_free_blkoff
2783 */
__refresh_next_blkoff(struct f2fs_sb_info * sbi,struct curseg_info * seg)2784 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2785 struct curseg_info *seg)
2786 {
2787 if (seg->alloc_type == SSR)
2788 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
2789 else
2790 seg->next_blkoff++;
2791 }
2792
f2fs_segment_has_free_slot(struct f2fs_sb_info * sbi,int segno)2793 bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
2794 {
2795 struct seg_entry *se = get_seg_entry(sbi, segno);
2796 int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2797 unsigned long *target_map = SIT_I(sbi)->tmp_map;
2798 unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2799 unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2800 int i, pos;
2801
2802 for (i = 0; i < entries; i++)
2803 target_map[i] = ckpt_map[i] | cur_map[i];
2804
2805 pos = find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, 0);
2806
2807 return pos < sbi->blocks_per_seg;
2808 }
2809
2810 /*
2811 * This function always allocates a used segment(from dirty seglist) by SSR
2812 * manner, so it should recover the existing segment information of valid blocks
2813 */
change_curseg(struct f2fs_sb_info * sbi,int type,bool flush)2814 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
2815 {
2816 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2817 struct curseg_info *curseg = CURSEG_I(sbi, type);
2818 unsigned int new_segno = curseg->next_segno;
2819 struct f2fs_summary_block *sum_node;
2820 struct page *sum_page;
2821
2822 if (flush)
2823 write_sum_page(sbi, curseg->sum_blk,
2824 GET_SUM_BLOCK(sbi, curseg->segno));
2825
2826 __set_test_and_inuse(sbi, new_segno);
2827
2828 mutex_lock(&dirty_i->seglist_lock);
2829 __remove_dirty_segment(sbi, new_segno, PRE);
2830 __remove_dirty_segment(sbi, new_segno, DIRTY);
2831 mutex_unlock(&dirty_i->seglist_lock);
2832
2833 reset_curseg(sbi, type, 1);
2834 curseg->alloc_type = SSR;
2835 __next_free_blkoff(sbi, curseg, 0);
2836
2837 sum_page = f2fs_get_sum_page(sbi, new_segno);
2838 if (IS_ERR(sum_page)) {
2839 /* GC won't be able to use stale summary pages by cp_error */
2840 memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2841 return;
2842 }
2843 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2844 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2845 f2fs_put_page(sum_page, 1);
2846 }
2847
2848 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2849 int alloc_mode, unsigned long long age);
2850
get_atssr_segment(struct f2fs_sb_info * sbi,int type,int target_type,int alloc_mode,unsigned long long age)2851 static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
2852 int target_type, int alloc_mode,
2853 unsigned long long age)
2854 {
2855 struct curseg_info *curseg = CURSEG_I(sbi, type);
2856
2857 curseg->seg_type = target_type;
2858
2859 if (get_ssr_segment(sbi, type, alloc_mode, age)) {
2860 struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
2861
2862 curseg->seg_type = se->type;
2863 change_curseg(sbi, type, true);
2864 } else {
2865 /* allocate cold segment by default */
2866 curseg->seg_type = CURSEG_COLD_DATA;
2867 new_curseg(sbi, type, true);
2868 }
2869 stat_inc_seg_type(sbi, curseg);
2870 }
2871
__f2fs_init_atgc_curseg(struct f2fs_sb_info * sbi)2872 static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
2873 {
2874 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
2875
2876 if (!sbi->am.atgc_enabled)
2877 return;
2878
2879 down_read(&SM_I(sbi)->curseg_lock);
2880
2881 mutex_lock(&curseg->curseg_mutex);
2882 down_write(&SIT_I(sbi)->sentry_lock);
2883
2884 get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
2885
2886 up_write(&SIT_I(sbi)->sentry_lock);
2887 mutex_unlock(&curseg->curseg_mutex);
2888
2889 up_read(&SM_I(sbi)->curseg_lock);
2890
2891 }
f2fs_init_inmem_curseg(struct f2fs_sb_info * sbi)2892 void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
2893 {
2894 __f2fs_init_atgc_curseg(sbi);
2895 }
2896
__f2fs_save_inmem_curseg(struct f2fs_sb_info * sbi,int type)2897 static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2898 {
2899 struct curseg_info *curseg = CURSEG_I(sbi, type);
2900
2901 mutex_lock(&curseg->curseg_mutex);
2902 if (!curseg->inited)
2903 goto out;
2904
2905 if (get_valid_blocks(sbi, curseg->segno, false)) {
2906 write_sum_page(sbi, curseg->sum_blk,
2907 GET_SUM_BLOCK(sbi, curseg->segno));
2908 } else {
2909 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2910 __set_test_and_free(sbi, curseg->segno, true);
2911 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2912 }
2913 out:
2914 mutex_unlock(&curseg->curseg_mutex);
2915 }
2916
f2fs_save_inmem_curseg(struct f2fs_sb_info * sbi)2917 void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
2918 {
2919 __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2920
2921 if (sbi->am.atgc_enabled)
2922 __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2923 }
2924
__f2fs_restore_inmem_curseg(struct f2fs_sb_info * sbi,int type)2925 static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
2926 {
2927 struct curseg_info *curseg = CURSEG_I(sbi, type);
2928
2929 mutex_lock(&curseg->curseg_mutex);
2930 if (!curseg->inited)
2931 goto out;
2932 if (get_valid_blocks(sbi, curseg->segno, false))
2933 goto out;
2934
2935 mutex_lock(&DIRTY_I(sbi)->seglist_lock);
2936 __set_test_and_inuse(sbi, curseg->segno);
2937 mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
2938 out:
2939 mutex_unlock(&curseg->curseg_mutex);
2940 }
2941
f2fs_restore_inmem_curseg(struct f2fs_sb_info * sbi)2942 void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
2943 {
2944 __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
2945
2946 if (sbi->am.atgc_enabled)
2947 __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
2948 }
2949
get_ssr_segment(struct f2fs_sb_info * sbi,int type,int alloc_mode,unsigned long long age)2950 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
2951 int alloc_mode, unsigned long long age)
2952 {
2953 struct curseg_info *curseg = CURSEG_I(sbi, type);
2954 const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2955 unsigned segno = NULL_SEGNO;
2956 unsigned short seg_type = curseg->seg_type;
2957 int i, cnt;
2958 bool reversed = false;
2959
2960 sanity_check_seg_type(sbi, seg_type);
2961
2962 /* f2fs_need_SSR() already forces to do this */
2963 if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
2964 curseg->next_segno = segno;
2965 return 1;
2966 }
2967
2968 /* For node segments, let's do SSR more intensively */
2969 if (IS_NODESEG(seg_type)) {
2970 if (seg_type >= CURSEG_WARM_NODE) {
2971 reversed = true;
2972 i = CURSEG_COLD_NODE;
2973 } else {
2974 i = CURSEG_HOT_NODE;
2975 }
2976 cnt = NR_CURSEG_NODE_TYPE;
2977 } else {
2978 if (seg_type >= CURSEG_WARM_DATA) {
2979 reversed = true;
2980 i = CURSEG_COLD_DATA;
2981 } else {
2982 i = CURSEG_HOT_DATA;
2983 }
2984 cnt = NR_CURSEG_DATA_TYPE;
2985 }
2986
2987 for (; cnt-- > 0; reversed ? i-- : i++) {
2988 if (i == seg_type)
2989 continue;
2990 if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
2991 curseg->next_segno = segno;
2992 return 1;
2993 }
2994 }
2995
2996 /* find valid_blocks=0 in dirty list */
2997 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2998 segno = get_free_segment(sbi);
2999 if (segno != NULL_SEGNO) {
3000 curseg->next_segno = segno;
3001 return 1;
3002 }
3003 }
3004 return 0;
3005 }
3006
3007 /*
3008 * flush out current segment and replace it with new segment
3009 * This function should be returned with success, otherwise BUG
3010 */
allocate_segment_by_default(struct f2fs_sb_info * sbi,int type,bool force,int contig_level)3011 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
3012 int type, bool force, int contig_level)
3013 {
3014 struct curseg_info *curseg = CURSEG_I(sbi, type);
3015
3016 if (force)
3017 new_curseg(sbi, type, true);
3018 else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
3019 curseg->seg_type == CURSEG_WARM_NODE)
3020 new_curseg(sbi, type, false);
3021 else if (curseg->alloc_type == LFS &&
3022 is_next_segment_free(sbi, curseg, type) &&
3023 likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
3024 new_curseg(sbi, type, false);
3025 #ifdef CONFIG_F2FS_GRADING_SSR
3026 else if (need_ssr_by_type(sbi, type, contig_level) && get_ssr_segment(sbi, type, SSR, 0))
3027 #else
3028 else if (f2fs_need_SSR(sbi) &&
3029 get_ssr_segment(sbi, type, SSR, 0))
3030 #endif
3031 change_curseg(sbi, type, true);
3032 else
3033 new_curseg(sbi, type, false);
3034
3035 stat_inc_seg_type(sbi, curseg);
3036 }
3037
f2fs_allocate_segment_for_resize(struct f2fs_sb_info * sbi,int type,unsigned int start,unsigned int end)3038 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
3039 unsigned int start, unsigned int end)
3040 {
3041 struct curseg_info *curseg = CURSEG_I(sbi, type);
3042 unsigned int segno;
3043
3044 down_read(&SM_I(sbi)->curseg_lock);
3045 mutex_lock(&curseg->curseg_mutex);
3046 down_write(&SIT_I(sbi)->sentry_lock);
3047
3048 segno = CURSEG_I(sbi, type)->segno;
3049 if (segno < start || segno > end)
3050 goto unlock;
3051
3052 if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
3053 change_curseg(sbi, type, true);
3054 else
3055 new_curseg(sbi, type, true);
3056
3057 stat_inc_seg_type(sbi, curseg);
3058
3059 locate_dirty_segment(sbi, segno);
3060 unlock:
3061 up_write(&SIT_I(sbi)->sentry_lock);
3062
3063 if (segno != curseg->segno)
3064 f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
3065 type, segno, curseg->segno);
3066
3067 mutex_unlock(&curseg->curseg_mutex);
3068 up_read(&SM_I(sbi)->curseg_lock);
3069 }
3070
__allocate_new_segment(struct f2fs_sb_info * sbi,int type,bool new_sec)3071 static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
3072 bool new_sec)
3073 {
3074 struct curseg_info *curseg = CURSEG_I(sbi, type);
3075 unsigned int old_segno;
3076
3077 if (!curseg->inited)
3078 goto alloc;
3079
3080 if (curseg->next_blkoff ||
3081 get_valid_blocks(sbi, curseg->segno, new_sec))
3082 goto alloc;
3083
3084 if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
3085 return;
3086 alloc:
3087 old_segno = curseg->segno;
3088 SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true, SEQ_NONE);
3089 locate_dirty_segment(sbi, old_segno);
3090 }
3091
__allocate_new_section(struct f2fs_sb_info * sbi,int type)3092 static void __allocate_new_section(struct f2fs_sb_info *sbi, int type)
3093 {
3094 __allocate_new_segment(sbi, type, true);
3095 }
3096
f2fs_allocate_new_section(struct f2fs_sb_info * sbi,int type)3097 void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type)
3098 {
3099 down_read(&SM_I(sbi)->curseg_lock);
3100 down_write(&SIT_I(sbi)->sentry_lock);
3101 __allocate_new_section(sbi, type);
3102 up_write(&SIT_I(sbi)->sentry_lock);
3103 up_read(&SM_I(sbi)->curseg_lock);
3104 }
3105
f2fs_allocate_new_segments(struct f2fs_sb_info * sbi)3106 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
3107 {
3108 int i;
3109
3110 down_read(&SM_I(sbi)->curseg_lock);
3111 down_write(&SIT_I(sbi)->sentry_lock);
3112 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
3113 __allocate_new_segment(sbi, i, false);
3114 up_write(&SIT_I(sbi)->sentry_lock);
3115 up_read(&SM_I(sbi)->curseg_lock);
3116 }
3117
3118 static const struct segment_allocation default_salloc_ops = {
3119 .allocate_segment = allocate_segment_by_default,
3120 };
3121
f2fs_exist_trim_candidates(struct f2fs_sb_info * sbi,struct cp_control * cpc)3122 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
3123 struct cp_control *cpc)
3124 {
3125 __u64 trim_start = cpc->trim_start;
3126 bool has_candidate = false;
3127
3128 down_write(&SIT_I(sbi)->sentry_lock);
3129 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
3130 if (add_discard_addrs(sbi, cpc, true)) {
3131 has_candidate = true;
3132 break;
3133 }
3134 }
3135 up_write(&SIT_I(sbi)->sentry_lock);
3136
3137 cpc->trim_start = trim_start;
3138 return has_candidate;
3139 }
3140
__issue_discard_cmd_range(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,unsigned int start,unsigned int end)3141 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
3142 struct discard_policy *dpolicy,
3143 unsigned int start, unsigned int end)
3144 {
3145 struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
3146 struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
3147 struct rb_node **insert_p = NULL, *insert_parent = NULL;
3148 struct discard_cmd *dc;
3149 struct blk_plug plug;
3150 struct discard_sub_policy *spolicy = NULL;
3151 int issued;
3152 unsigned int trimmed = 0;
3153 /* fstrim each time 8 discard without no interrupt */
3154 select_sub_discard_policy(&spolicy, 0, dpolicy);
3155
3156 if (dcc->rbtree_check) {
3157 mutex_lock(&dcc->cmd_lock);
3158 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi, &dcc->root, false));
3159 mutex_unlock(&dcc->cmd_lock);
3160 }
3161
3162 next:
3163 issued = 0;
3164
3165 mutex_lock(&dcc->cmd_lock);
3166 if (unlikely(dcc->rbtree_check))
3167 f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
3168 &dcc->root, false));
3169
3170 dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
3171 NULL, start,
3172 (struct rb_entry **)&prev_dc,
3173 (struct rb_entry **)&next_dc,
3174 &insert_p, &insert_parent, true, NULL);
3175 if (!dc)
3176 dc = next_dc;
3177
3178 blk_start_plug(&plug);
3179
3180 while (dc && dc->lstart <= end) {
3181 struct rb_node *node;
3182 int err = 0;
3183
3184 if (dc->len < dpolicy->granularity)
3185 goto skip;
3186
3187 if (dc->state != D_PREP) {
3188 list_move_tail(&dc->list, &dcc->fstrim_list);
3189 goto skip;
3190 }
3191
3192 err = __submit_discard_cmd(sbi, dpolicy, 0, dc, &issued);
3193
3194 if (issued >= spolicy->max_requests) {
3195 start = dc->lstart + dc->len;
3196
3197 if (err)
3198 __remove_discard_cmd(sbi, dc);
3199
3200 blk_finish_plug(&plug);
3201 mutex_unlock(&dcc->cmd_lock);
3202 trimmed += __wait_all_discard_cmd(sbi, NULL);
3203 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
3204 goto next;
3205 }
3206 skip:
3207 node = rb_next(&dc->rb_node);
3208 if (err)
3209 __remove_discard_cmd(sbi, dc);
3210 dc = rb_entry_safe(node, struct discard_cmd, rb_node);
3211
3212 if (fatal_signal_pending(current))
3213 break;
3214 }
3215
3216 blk_finish_plug(&plug);
3217 mutex_unlock(&dcc->cmd_lock);
3218
3219 return trimmed;
3220 }
3221
f2fs_trim_fs(struct f2fs_sb_info * sbi,struct fstrim_range * range)3222 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
3223 {
3224 __u64 start = F2FS_BYTES_TO_BLK(range->start);
3225 __u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
3226 unsigned int start_segno, end_segno;
3227 block_t start_block, end_block;
3228 struct cp_control cpc;
3229 struct discard_policy dpolicy;
3230 unsigned long long trimmed = 0;
3231 int err = 0;
3232 bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
3233
3234 if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
3235 return -EINVAL;
3236
3237 if (end < MAIN_BLKADDR(sbi))
3238 goto out;
3239
3240 if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
3241 f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
3242 return -EFSCORRUPTED;
3243 }
3244
3245 /* start/end segment number in main_area */
3246 start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
3247 end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
3248 GET_SEGNO(sbi, end);
3249 if (need_align) {
3250 start_segno = rounddown(start_segno, sbi->segs_per_sec);
3251 end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
3252 }
3253
3254 cpc.reason = CP_DISCARD;
3255 cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
3256 cpc.trim_start = start_segno;
3257 cpc.trim_end = end_segno;
3258
3259 if (sbi->discard_blks == 0)
3260 goto out;
3261
3262 down_write(&sbi->gc_lock);
3263 err = f2fs_write_checkpoint(sbi, &cpc);
3264 up_write(&sbi->gc_lock);
3265 if (err)
3266 goto out;
3267
3268 /*
3269 * We filed discard candidates, but actually we don't need to wait for
3270 * all of them, since they'll be issued in idle time along with runtime
3271 * discard option. User configuration looks like using runtime discard
3272 * or periodic fstrim instead of it.
3273 */
3274 if (f2fs_realtime_discard_enable(sbi))
3275 goto out;
3276
3277 start_block = START_BLOCK(sbi, start_segno);
3278 end_block = START_BLOCK(sbi, end_segno + 1);
3279
3280 __init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
3281 trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
3282 start_block, end_block);
3283
3284 trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
3285 start_block, end_block);
3286 out:
3287 if (!err)
3288 range->len = F2FS_BLK_TO_BYTES(trimmed);
3289 return err;
3290 }
3291
__has_curseg_space(struct f2fs_sb_info * sbi,struct curseg_info * curseg)3292 static bool __has_curseg_space(struct f2fs_sb_info *sbi,
3293 struct curseg_info *curseg)
3294 {
3295 return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi,
3296 curseg->segno);
3297 }
3298
f2fs_rw_hint_to_seg_type(enum rw_hint hint)3299 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
3300 {
3301 switch (hint) {
3302 case WRITE_LIFE_SHORT:
3303 return CURSEG_HOT_DATA;
3304 case WRITE_LIFE_EXTREME:
3305 return CURSEG_COLD_DATA;
3306 default:
3307 return CURSEG_WARM_DATA;
3308 }
3309 }
3310
3311 /* This returns write hints for each segment type. This hints will be
3312 * passed down to block layer. There are mapping tables which depend on
3313 * the mount option 'whint_mode'.
3314 *
3315 * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
3316 *
3317 * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
3318 *
3319 * User F2FS Block
3320 * ---- ---- -----
3321 * META WRITE_LIFE_NOT_SET
3322 * HOT_NODE "
3323 * WARM_NODE "
3324 * COLD_NODE "
3325 * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
3326 * extension list " "
3327 *
3328 * -- buffered io
3329 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
3330 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
3331 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
3332 * WRITE_LIFE_NONE " "
3333 * WRITE_LIFE_MEDIUM " "
3334 * WRITE_LIFE_LONG " "
3335 *
3336 * -- direct io
3337 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
3338 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
3339 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
3340 * WRITE_LIFE_NONE " WRITE_LIFE_NONE
3341 * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
3342 * WRITE_LIFE_LONG " WRITE_LIFE_LONG
3343 *
3344 * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
3345 *
3346 * User F2FS Block
3347 * ---- ---- -----
3348 * META WRITE_LIFE_MEDIUM;
3349 * HOT_NODE WRITE_LIFE_NOT_SET
3350 * WARM_NODE "
3351 * COLD_NODE WRITE_LIFE_NONE
3352 * ioctl(COLD) COLD_DATA WRITE_LIFE_EXTREME
3353 * extension list " "
3354 *
3355 * -- buffered io
3356 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
3357 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
3358 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_LONG
3359 * WRITE_LIFE_NONE " "
3360 * WRITE_LIFE_MEDIUM " "
3361 * WRITE_LIFE_LONG " "
3362 *
3363 * -- direct io
3364 * WRITE_LIFE_EXTREME COLD_DATA WRITE_LIFE_EXTREME
3365 * WRITE_LIFE_SHORT HOT_DATA WRITE_LIFE_SHORT
3366 * WRITE_LIFE_NOT_SET WARM_DATA WRITE_LIFE_NOT_SET
3367 * WRITE_LIFE_NONE " WRITE_LIFE_NONE
3368 * WRITE_LIFE_MEDIUM " WRITE_LIFE_MEDIUM
3369 * WRITE_LIFE_LONG " WRITE_LIFE_LONG
3370 */
3371
f2fs_io_type_to_rw_hint(struct f2fs_sb_info * sbi,enum page_type type,enum temp_type temp)3372 enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
3373 enum page_type type, enum temp_type temp)
3374 {
3375 if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
3376 if (type == DATA) {
3377 if (temp == WARM)
3378 return WRITE_LIFE_NOT_SET;
3379 else if (temp == HOT)
3380 return WRITE_LIFE_SHORT;
3381 else if (temp == COLD)
3382 return WRITE_LIFE_EXTREME;
3383 } else {
3384 return WRITE_LIFE_NOT_SET;
3385 }
3386 } else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
3387 if (type == DATA) {
3388 if (temp == WARM)
3389 return WRITE_LIFE_LONG;
3390 else if (temp == HOT)
3391 return WRITE_LIFE_SHORT;
3392 else if (temp == COLD)
3393 return WRITE_LIFE_EXTREME;
3394 } else if (type == NODE) {
3395 if (temp == WARM || temp == HOT)
3396 return WRITE_LIFE_NOT_SET;
3397 else if (temp == COLD)
3398 return WRITE_LIFE_NONE;
3399 } else if (type == META) {
3400 return WRITE_LIFE_MEDIUM;
3401 }
3402 }
3403 return WRITE_LIFE_NOT_SET;
3404 }
3405
__get_segment_type_2(struct f2fs_io_info * fio)3406 static int __get_segment_type_2(struct f2fs_io_info *fio)
3407 {
3408 if (fio->type == DATA)
3409 return CURSEG_HOT_DATA;
3410 else
3411 return CURSEG_HOT_NODE;
3412 }
3413
__get_segment_type_4(struct f2fs_io_info * fio)3414 static int __get_segment_type_4(struct f2fs_io_info *fio)
3415 {
3416 if (fio->type == DATA) {
3417 struct inode *inode = fio->page->mapping->host;
3418
3419 if (S_ISDIR(inode->i_mode))
3420 return CURSEG_HOT_DATA;
3421 else
3422 return CURSEG_COLD_DATA;
3423 } else {
3424 if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3425 return CURSEG_WARM_NODE;
3426 else
3427 return CURSEG_COLD_NODE;
3428 }
3429 }
3430
__get_segment_type_6(struct f2fs_io_info * fio)3431 static int __get_segment_type_6(struct f2fs_io_info *fio)
3432 {
3433 if (fio->type == DATA) {
3434 struct inode *inode = fio->page->mapping->host;
3435
3436 if (is_cold_data(fio->page)) {
3437 if (fio->sbi->am.atgc_enabled)
3438 return CURSEG_ALL_DATA_ATGC;
3439 else
3440 return CURSEG_COLD_DATA;
3441 }
3442 if (file_is_cold(inode) || f2fs_compressed_file(inode))
3443 return CURSEG_COLD_DATA;
3444 if (file_is_hot(inode) ||
3445 is_inode_flag_set(inode, FI_HOT_DATA) ||
3446 f2fs_is_atomic_file(inode) ||
3447 f2fs_is_volatile_file(inode))
3448 return CURSEG_HOT_DATA;
3449 return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
3450 } else {
3451 if (IS_DNODE(fio->page))
3452 return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3453 CURSEG_HOT_NODE;
3454 return CURSEG_COLD_NODE;
3455 }
3456 }
3457
__get_segment_type(struct f2fs_io_info * fio)3458 static int __get_segment_type(struct f2fs_io_info *fio)
3459 {
3460 int type = 0;
3461
3462 switch (F2FS_OPTION(fio->sbi).active_logs) {
3463 case 2:
3464 type = __get_segment_type_2(fio);
3465 break;
3466 case 4:
3467 type = __get_segment_type_4(fio);
3468 break;
3469 case 6:
3470 type = __get_segment_type_6(fio);
3471 break;
3472 default:
3473 f2fs_bug_on(fio->sbi, true);
3474 }
3475
3476 if (IS_HOT(type))
3477 fio->temp = HOT;
3478 else if (IS_WARM(type))
3479 fio->temp = WARM;
3480 else
3481 fio->temp = COLD;
3482 return type;
3483 }
3484
f2fs_allocate_data_block(struct f2fs_sb_info * sbi,struct page * page,block_t old_blkaddr,block_t * new_blkaddr,struct f2fs_summary * sum,int type,struct f2fs_io_info * fio,int contig_level)3485 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3486 block_t old_blkaddr, block_t *new_blkaddr,
3487 struct f2fs_summary *sum, int type,
3488 struct f2fs_io_info *fio, int contig_level)
3489 {
3490 struct sit_info *sit_i = SIT_I(sbi);
3491 struct curseg_info *curseg = CURSEG_I(sbi, type);
3492 unsigned long long old_mtime;
3493 bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
3494 struct seg_entry *se = NULL;
3495 #ifdef CONFIG_F2FS_GRADING_SSR
3496 struct inode *inode = NULL;
3497 #endif
3498 int contig = SEQ_NONE;
3499
3500 down_read(&SM_I(sbi)->curseg_lock);
3501
3502 mutex_lock(&curseg->curseg_mutex);
3503 down_write(&sit_i->sentry_lock);
3504
3505 if (from_gc) {
3506 f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
3507 se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
3508 sanity_check_seg_type(sbi, se->type);
3509 f2fs_bug_on(sbi, IS_NODESEG(se->type));
3510 }
3511 *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3512
3513 f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
3514
3515 f2fs_wait_discard_bio(sbi, *new_blkaddr);
3516
3517 /*
3518 * __add_sum_entry should be resided under the curseg_mutex
3519 * because, this function updates a summary entry in the
3520 * current summary block.
3521 */
3522 __add_sum_entry(sbi, type, sum);
3523
3524 __refresh_next_blkoff(sbi, curseg);
3525
3526 stat_inc_block_count(sbi, curseg);
3527
3528 if (from_gc) {
3529 old_mtime = get_segment_mtime(sbi, old_blkaddr);
3530 } else {
3531 update_segment_mtime(sbi, old_blkaddr, 0);
3532 old_mtime = 0;
3533 }
3534 update_segment_mtime(sbi, *new_blkaddr, old_mtime);
3535
3536 /*
3537 * SIT information should be updated before segment allocation,
3538 * since SSR needs latest valid block information.
3539 */
3540 update_sit_entry(sbi, *new_blkaddr, 1);
3541 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3542 update_sit_entry(sbi, old_blkaddr, -1);
3543
3544 if (!__has_curseg_space(sbi, curseg)) {
3545 if (from_gc) {
3546 get_atssr_segment(sbi, type, se->type,
3547 AT_SSR, se->mtime);
3548 } else {
3549 #ifdef CONFIG_F2FS_GRADING_SSR
3550 if (contig_level != SEQ_NONE) {
3551 contig = contig_level;
3552 goto allocate_label;
3553 }
3554
3555 if (page && page->mapping && page->mapping != NODE_MAPPING(sbi) &&
3556 page->mapping != META_MAPPING(sbi)) {
3557 inode = page->mapping->host;
3558 contig = check_io_seq(get_dirty_pages(inode));
3559 }
3560 allocate_label:
3561 #endif
3562 sit_i->s_ops->allocate_segment(sbi, type, false, contig);
3563 }
3564 }
3565 /*
3566 * segment dirty status should be updated after segment allocation,
3567 * so we just need to update status only one time after previous
3568 * segment being closed.
3569 */
3570 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3571 locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3572
3573 up_write(&sit_i->sentry_lock);
3574
3575 if (page && IS_NODESEG(type)) {
3576 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3577
3578 f2fs_inode_chksum_set(sbi, page);
3579 }
3580
3581 if (fio) {
3582 struct f2fs_bio_info *io;
3583
3584 if (F2FS_IO_ALIGNED(sbi))
3585 fio->retry = false;
3586
3587 INIT_LIST_HEAD(&fio->list);
3588 fio->in_list = true;
3589 io = sbi->write_io[fio->type] + fio->temp;
3590 spin_lock(&io->io_lock);
3591 list_add_tail(&fio->list, &io->io_list);
3592 spin_unlock(&io->io_lock);
3593 }
3594
3595 mutex_unlock(&curseg->curseg_mutex);
3596
3597 up_read(&SM_I(sbi)->curseg_lock);
3598 }
3599
update_device_state(struct f2fs_io_info * fio)3600 static void update_device_state(struct f2fs_io_info *fio)
3601 {
3602 struct f2fs_sb_info *sbi = fio->sbi;
3603 unsigned int devidx;
3604
3605 if (!f2fs_is_multi_device(sbi))
3606 return;
3607
3608 devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
3609
3610 /* update device state for fsync */
3611 f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3612
3613 /* update device state for checkpoint */
3614 if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3615 spin_lock(&sbi->dev_lock);
3616 f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3617 spin_unlock(&sbi->dev_lock);
3618 }
3619 }
3620
do_write_page(struct f2fs_summary * sum,struct f2fs_io_info * fio)3621 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3622 {
3623 int type = __get_segment_type(fio);
3624 bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
3625
3626 if (keep_order)
3627 down_read(&fio->sbi->io_order_lock);
3628 reallocate:
3629 f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3630 &fio->new_blkaddr, sum, type, fio, SEQ_NONE);
3631 if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3632 invalidate_mapping_pages(META_MAPPING(fio->sbi),
3633 fio->old_blkaddr, fio->old_blkaddr);
3634
3635 /* writeout dirty page into bdev */
3636 f2fs_submit_page_write(fio);
3637 if (fio->retry) {
3638 fio->old_blkaddr = fio->new_blkaddr;
3639 goto reallocate;
3640 }
3641
3642 update_device_state(fio);
3643
3644 if (keep_order)
3645 up_read(&fio->sbi->io_order_lock);
3646 }
3647
f2fs_do_write_meta_page(struct f2fs_sb_info * sbi,struct page * page,enum iostat_type io_type)3648 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3649 enum iostat_type io_type)
3650 {
3651 struct f2fs_io_info fio = {
3652 .sbi = sbi,
3653 .type = META,
3654 .temp = HOT,
3655 .op = REQ_OP_WRITE,
3656 .op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3657 .old_blkaddr = page->index,
3658 .new_blkaddr = page->index,
3659 .page = page,
3660 .encrypted_page = NULL,
3661 .in_list = false,
3662 };
3663
3664 if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3665 fio.op_flags &= ~REQ_META;
3666
3667 set_page_writeback(page);
3668 ClearPageError(page);
3669 f2fs_submit_page_write(&fio);
3670
3671 stat_inc_meta_count(sbi, page->index);
3672 f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
3673 }
3674
f2fs_do_write_node_page(unsigned int nid,struct f2fs_io_info * fio)3675 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3676 {
3677 struct f2fs_summary sum;
3678
3679 set_summary(&sum, nid, 0, 0);
3680 do_write_page(&sum, fio);
3681
3682 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3683 }
3684
f2fs_outplace_write_data(struct dnode_of_data * dn,struct f2fs_io_info * fio)3685 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3686 struct f2fs_io_info *fio)
3687 {
3688 struct f2fs_sb_info *sbi = fio->sbi;
3689 struct f2fs_summary sum;
3690
3691 f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3692 set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3693 do_write_page(&sum, fio);
3694 f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3695
3696 f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
3697 }
3698
f2fs_inplace_write_data(struct f2fs_io_info * fio)3699 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3700 {
3701 int err;
3702 struct f2fs_sb_info *sbi = fio->sbi;
3703 unsigned int segno;
3704
3705 fio->new_blkaddr = fio->old_blkaddr;
3706 /* i/o temperature is needed for passing down write hints */
3707 __get_segment_type(fio);
3708
3709 segno = GET_SEGNO(sbi, fio->new_blkaddr);
3710
3711 if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3712 set_sbi_flag(sbi, SBI_NEED_FSCK);
3713 f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3714 __func__, segno);
3715 return -EFSCORRUPTED;
3716 }
3717
3718 stat_inc_inplace_blocks(fio->sbi);
3719
3720 if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
3721 err = f2fs_merge_page_bio(fio);
3722 else
3723 err = f2fs_submit_page_bio(fio);
3724 if (!err) {
3725 update_device_state(fio);
3726 f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3727 }
3728
3729 return err;
3730 }
3731
__f2fs_get_curseg(struct f2fs_sb_info * sbi,unsigned int segno)3732 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3733 unsigned int segno)
3734 {
3735 int i;
3736
3737 for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3738 if (CURSEG_I(sbi, i)->segno == segno)
3739 break;
3740 }
3741 return i;
3742 }
3743
f2fs_do_replace_block(struct f2fs_sb_info * sbi,struct f2fs_summary * sum,block_t old_blkaddr,block_t new_blkaddr,bool recover_curseg,bool recover_newaddr,bool from_gc)3744 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3745 block_t old_blkaddr, block_t new_blkaddr,
3746 bool recover_curseg, bool recover_newaddr,
3747 bool from_gc)
3748 {
3749 struct sit_info *sit_i = SIT_I(sbi);
3750 struct curseg_info *curseg;
3751 unsigned int segno, old_cursegno;
3752 struct seg_entry *se;
3753 int type;
3754 unsigned short old_blkoff;
3755
3756 segno = GET_SEGNO(sbi, new_blkaddr);
3757 se = get_seg_entry(sbi, segno);
3758 type = se->type;
3759
3760 down_write(&SM_I(sbi)->curseg_lock);
3761
3762 if (!recover_curseg) {
3763 /* for recovery flow */
3764 if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3765 if (old_blkaddr == NULL_ADDR)
3766 type = CURSEG_COLD_DATA;
3767 else
3768 type = CURSEG_WARM_DATA;
3769 }
3770 } else {
3771 if (IS_CURSEG(sbi, segno)) {
3772 /* se->type is volatile as SSR allocation */
3773 type = __f2fs_get_curseg(sbi, segno);
3774 f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3775 } else {
3776 type = CURSEG_WARM_DATA;
3777 }
3778 }
3779
3780 f2fs_bug_on(sbi, !IS_DATASEG(type));
3781 curseg = CURSEG_I(sbi, type);
3782
3783 mutex_lock(&curseg->curseg_mutex);
3784 down_write(&sit_i->sentry_lock);
3785
3786 old_cursegno = curseg->segno;
3787 old_blkoff = curseg->next_blkoff;
3788
3789 /* change the current segment */
3790 if (segno != curseg->segno) {
3791 curseg->next_segno = segno;
3792 change_curseg(sbi, type, true);
3793 }
3794
3795 curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3796 __add_sum_entry(sbi, type, sum);
3797
3798 if (!recover_curseg || recover_newaddr) {
3799 if (!from_gc)
3800 update_segment_mtime(sbi, new_blkaddr, 0);
3801 update_sit_entry(sbi, new_blkaddr, 1);
3802 }
3803 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3804 invalidate_mapping_pages(META_MAPPING(sbi),
3805 old_blkaddr, old_blkaddr);
3806 if (!from_gc)
3807 update_segment_mtime(sbi, old_blkaddr, 0);
3808 update_sit_entry(sbi, old_blkaddr, -1);
3809 }
3810
3811 locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3812 locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3813
3814 locate_dirty_segment(sbi, old_cursegno);
3815
3816 if (recover_curseg) {
3817 if (old_cursegno != curseg->segno) {
3818 curseg->next_segno = old_cursegno;
3819 change_curseg(sbi, type, true);
3820 }
3821 curseg->next_blkoff = old_blkoff;
3822 }
3823
3824 up_write(&sit_i->sentry_lock);
3825 mutex_unlock(&curseg->curseg_mutex);
3826 up_write(&SM_I(sbi)->curseg_lock);
3827 }
3828
f2fs_replace_block(struct f2fs_sb_info * sbi,struct dnode_of_data * dn,block_t old_addr,block_t new_addr,unsigned char version,bool recover_curseg,bool recover_newaddr)3829 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3830 block_t old_addr, block_t new_addr,
3831 unsigned char version, bool recover_curseg,
3832 bool recover_newaddr)
3833 {
3834 struct f2fs_summary sum;
3835
3836 set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3837
3838 f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3839 recover_curseg, recover_newaddr, false);
3840
3841 f2fs_update_data_blkaddr(dn, new_addr);
3842 }
3843
f2fs_wait_on_page_writeback(struct page * page,enum page_type type,bool ordered,bool locked)3844 void f2fs_wait_on_page_writeback(struct page *page,
3845 enum page_type type, bool ordered, bool locked)
3846 {
3847 if (PageWriteback(page)) {
3848 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3849
3850 /* submit cached LFS IO */
3851 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3852 /* sbumit cached IPU IO */
3853 f2fs_submit_merged_ipu_write(sbi, NULL, page);
3854 if (ordered) {
3855 wait_on_page_writeback(page);
3856 f2fs_bug_on(sbi, locked && PageWriteback(page));
3857 } else {
3858 wait_for_stable_page(page);
3859 }
3860 }
3861 }
3862
f2fs_wait_on_block_writeback(struct inode * inode,block_t blkaddr)3863 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3864 {
3865 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3866 struct page *cpage;
3867
3868 if (!f2fs_post_read_required(inode))
3869 return;
3870
3871 if (!__is_valid_data_blkaddr(blkaddr))
3872 return;
3873
3874 cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3875 if (cpage) {
3876 f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3877 f2fs_put_page(cpage, 1);
3878 }
3879 }
3880
f2fs_wait_on_block_writeback_range(struct inode * inode,block_t blkaddr,block_t len)3881 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3882 block_t len)
3883 {
3884 block_t i;
3885
3886 for (i = 0; i < len; i++)
3887 f2fs_wait_on_block_writeback(inode, blkaddr + i);
3888 }
3889
read_compacted_summaries(struct f2fs_sb_info * sbi)3890 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3891 {
3892 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3893 struct curseg_info *seg_i;
3894 unsigned char *kaddr;
3895 struct page *page;
3896 block_t start;
3897 int i, j, offset;
3898
3899 start = start_sum_block(sbi);
3900
3901 page = f2fs_get_meta_page(sbi, start++);
3902 if (IS_ERR(page))
3903 return PTR_ERR(page);
3904 kaddr = (unsigned char *)page_address(page);
3905
3906 /* Step 1: restore nat cache */
3907 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3908 memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3909
3910 /* Step 2: restore sit cache */
3911 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3912 memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3913 offset = 2 * SUM_JOURNAL_SIZE;
3914
3915 /* Step 3: restore summary entries */
3916 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3917 unsigned short blk_off;
3918 unsigned int segno;
3919
3920 seg_i = CURSEG_I(sbi, i);
3921 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3922 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3923 seg_i->next_segno = segno;
3924 reset_curseg(sbi, i, 0);
3925 seg_i->alloc_type = ckpt->alloc_type[i];
3926 seg_i->next_blkoff = blk_off;
3927
3928 if (seg_i->alloc_type == SSR)
3929 blk_off = sbi->blocks_per_seg;
3930
3931 for (j = 0; j < blk_off; j++) {
3932 struct f2fs_summary *s;
3933 s = (struct f2fs_summary *)(kaddr + offset);
3934 seg_i->sum_blk->entries[j] = *s;
3935 offset += SUMMARY_SIZE;
3936 if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3937 SUM_FOOTER_SIZE)
3938 continue;
3939
3940 f2fs_put_page(page, 1);
3941 page = NULL;
3942
3943 page = f2fs_get_meta_page(sbi, start++);
3944 if (IS_ERR(page))
3945 return PTR_ERR(page);
3946 kaddr = (unsigned char *)page_address(page);
3947 offset = 0;
3948 }
3949 }
3950 f2fs_put_page(page, 1);
3951 return 0;
3952 }
3953
read_normal_summaries(struct f2fs_sb_info * sbi,int type)3954 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3955 {
3956 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3957 struct f2fs_summary_block *sum;
3958 struct curseg_info *curseg;
3959 struct page *new;
3960 unsigned short blk_off;
3961 unsigned int segno = 0;
3962 block_t blk_addr = 0;
3963 int err = 0;
3964
3965 /* get segment number and block addr */
3966 if (IS_DATASEG(type)) {
3967 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3968 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3969 CURSEG_HOT_DATA]);
3970 if (__exist_node_summaries(sbi))
3971 blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
3972 else
3973 blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3974 } else {
3975 segno = le32_to_cpu(ckpt->cur_node_segno[type -
3976 CURSEG_HOT_NODE]);
3977 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3978 CURSEG_HOT_NODE]);
3979 if (__exist_node_summaries(sbi))
3980 blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3981 type - CURSEG_HOT_NODE);
3982 else
3983 blk_addr = GET_SUM_BLOCK(sbi, segno);
3984 }
3985
3986 new = f2fs_get_meta_page(sbi, blk_addr);
3987 if (IS_ERR(new))
3988 return PTR_ERR(new);
3989 sum = (struct f2fs_summary_block *)page_address(new);
3990
3991 if (IS_NODESEG(type)) {
3992 if (__exist_node_summaries(sbi)) {
3993 struct f2fs_summary *ns = &sum->entries[0];
3994 int i;
3995 for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3996 ns->version = 0;
3997 ns->ofs_in_node = 0;
3998 }
3999 } else {
4000 err = f2fs_restore_node_summary(sbi, segno, sum);
4001 if (err)
4002 goto out;
4003 }
4004 }
4005
4006 /* set uncompleted segment to curseg */
4007 curseg = CURSEG_I(sbi, type);
4008 mutex_lock(&curseg->curseg_mutex);
4009
4010 /* update journal info */
4011 down_write(&curseg->journal_rwsem);
4012 memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
4013 up_write(&curseg->journal_rwsem);
4014
4015 memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
4016 memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
4017 curseg->next_segno = segno;
4018 reset_curseg(sbi, type, 0);
4019 curseg->alloc_type = ckpt->alloc_type[type];
4020 curseg->next_blkoff = blk_off;
4021 mutex_unlock(&curseg->curseg_mutex);
4022 out:
4023 f2fs_put_page(new, 1);
4024 return err;
4025 }
4026
restore_curseg_summaries(struct f2fs_sb_info * sbi)4027 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
4028 {
4029 struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
4030 struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
4031 int type = CURSEG_HOT_DATA;
4032 int err;
4033
4034 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
4035 int npages = f2fs_npages_for_summary_flush(sbi, true);
4036
4037 if (npages >= 2)
4038 f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
4039 META_CP, true);
4040
4041 /* restore for compacted data summary */
4042 err = read_compacted_summaries(sbi);
4043 if (err)
4044 return err;
4045 type = CURSEG_HOT_NODE;
4046 }
4047
4048 if (__exist_node_summaries(sbi))
4049 f2fs_ra_meta_pages(sbi,
4050 sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
4051 NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
4052
4053 for (; type <= CURSEG_COLD_NODE; type++) {
4054 err = read_normal_summaries(sbi, type);
4055 if (err)
4056 return err;
4057 }
4058
4059 /* sanity check for summary blocks */
4060 if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
4061 sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
4062 f2fs_err(sbi, "invalid journal entries nats %u sits %u\n",
4063 nats_in_cursum(nat_j), sits_in_cursum(sit_j));
4064 return -EINVAL;
4065 }
4066
4067 return 0;
4068 }
4069
write_compacted_summaries(struct f2fs_sb_info * sbi,block_t blkaddr)4070 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
4071 {
4072 struct page *page;
4073 unsigned char *kaddr;
4074 struct f2fs_summary *summary;
4075 struct curseg_info *seg_i;
4076 int written_size = 0;
4077 int i, j;
4078
4079 page = f2fs_grab_meta_page(sbi, blkaddr++);
4080 kaddr = (unsigned char *)page_address(page);
4081 memset(kaddr, 0, PAGE_SIZE);
4082
4083 /* Step 1: write nat cache */
4084 seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
4085 memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
4086 written_size += SUM_JOURNAL_SIZE;
4087
4088 /* Step 2: write sit cache */
4089 seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
4090 memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
4091 written_size += SUM_JOURNAL_SIZE;
4092
4093 /* Step 3: write summary entries */
4094 for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
4095 unsigned short blkoff;
4096 seg_i = CURSEG_I(sbi, i);
4097 if (sbi->ckpt->alloc_type[i] == SSR)
4098 blkoff = sbi->blocks_per_seg;
4099 else
4100 blkoff = curseg_blkoff(sbi, i);
4101
4102 for (j = 0; j < blkoff; j++) {
4103 if (!page) {
4104 page = f2fs_grab_meta_page(sbi, blkaddr++);
4105 kaddr = (unsigned char *)page_address(page);
4106 memset(kaddr, 0, PAGE_SIZE);
4107 written_size = 0;
4108 }
4109 summary = (struct f2fs_summary *)(kaddr + written_size);
4110 *summary = seg_i->sum_blk->entries[j];
4111 written_size += SUMMARY_SIZE;
4112
4113 if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
4114 SUM_FOOTER_SIZE)
4115 continue;
4116
4117 set_page_dirty(page);
4118 f2fs_put_page(page, 1);
4119 page = NULL;
4120 }
4121 }
4122 if (page) {
4123 set_page_dirty(page);
4124 f2fs_put_page(page, 1);
4125 }
4126 }
4127
write_normal_summaries(struct f2fs_sb_info * sbi,block_t blkaddr,int type)4128 static void write_normal_summaries(struct f2fs_sb_info *sbi,
4129 block_t blkaddr, int type)
4130 {
4131 int i, end;
4132 if (IS_DATASEG(type))
4133 end = type + NR_CURSEG_DATA_TYPE;
4134 else
4135 end = type + NR_CURSEG_NODE_TYPE;
4136
4137 for (i = type; i < end; i++)
4138 write_current_sum_page(sbi, i, blkaddr + (i - type));
4139 }
4140
f2fs_write_data_summaries(struct f2fs_sb_info * sbi,block_t start_blk)4141 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4142 {
4143 if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
4144 write_compacted_summaries(sbi, start_blk);
4145 else
4146 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
4147 }
4148
f2fs_write_node_summaries(struct f2fs_sb_info * sbi,block_t start_blk)4149 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
4150 {
4151 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
4152 }
4153
f2fs_lookup_journal_in_cursum(struct f2fs_journal * journal,int type,unsigned int val,int alloc)4154 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
4155 unsigned int val, int alloc)
4156 {
4157 int i;
4158
4159 if (type == NAT_JOURNAL) {
4160 for (i = 0; i < nats_in_cursum(journal); i++) {
4161 if (le32_to_cpu(nid_in_journal(journal, i)) == val)
4162 return i;
4163 }
4164 if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
4165 return update_nats_in_cursum(journal, 1);
4166 } else if (type == SIT_JOURNAL) {
4167 for (i = 0; i < sits_in_cursum(journal); i++)
4168 if (le32_to_cpu(segno_in_journal(journal, i)) == val)
4169 return i;
4170 if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
4171 return update_sits_in_cursum(journal, 1);
4172 }
4173 return -1;
4174 }
4175
get_current_sit_page(struct f2fs_sb_info * sbi,unsigned int segno)4176 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
4177 unsigned int segno)
4178 {
4179 return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
4180 }
4181
get_next_sit_page(struct f2fs_sb_info * sbi,unsigned int start)4182 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
4183 unsigned int start)
4184 {
4185 struct sit_info *sit_i = SIT_I(sbi);
4186 struct page *page;
4187 pgoff_t src_off, dst_off;
4188
4189 src_off = current_sit_addr(sbi, start);
4190 dst_off = next_sit_addr(sbi, src_off);
4191
4192 page = f2fs_grab_meta_page(sbi, dst_off);
4193 seg_info_to_sit_page(sbi, page, start);
4194
4195 set_page_dirty(page);
4196 set_to_next_sit(sit_i, start);
4197
4198 return page;
4199 }
4200
grab_sit_entry_set(void)4201 static struct sit_entry_set *grab_sit_entry_set(void)
4202 {
4203 struct sit_entry_set *ses =
4204 f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
4205
4206 ses->entry_cnt = 0;
4207 INIT_LIST_HEAD(&ses->set_list);
4208 return ses;
4209 }
4210
release_sit_entry_set(struct sit_entry_set * ses)4211 static void release_sit_entry_set(struct sit_entry_set *ses)
4212 {
4213 list_del(&ses->set_list);
4214 kmem_cache_free(sit_entry_set_slab, ses);
4215 }
4216
adjust_sit_entry_set(struct sit_entry_set * ses,struct list_head * head)4217 static void adjust_sit_entry_set(struct sit_entry_set *ses,
4218 struct list_head *head)
4219 {
4220 struct sit_entry_set *next = ses;
4221
4222 if (list_is_last(&ses->set_list, head))
4223 return;
4224
4225 list_for_each_entry_continue(next, head, set_list)
4226 if (ses->entry_cnt <= next->entry_cnt)
4227 break;
4228
4229 list_move_tail(&ses->set_list, &next->set_list);
4230 }
4231
add_sit_entry(unsigned int segno,struct list_head * head)4232 static void add_sit_entry(unsigned int segno, struct list_head *head)
4233 {
4234 struct sit_entry_set *ses;
4235 unsigned int start_segno = START_SEGNO(segno);
4236
4237 list_for_each_entry(ses, head, set_list) {
4238 if (ses->start_segno == start_segno) {
4239 ses->entry_cnt++;
4240 adjust_sit_entry_set(ses, head);
4241 return;
4242 }
4243 }
4244
4245 ses = grab_sit_entry_set();
4246
4247 ses->start_segno = start_segno;
4248 ses->entry_cnt++;
4249 list_add(&ses->set_list, head);
4250 }
4251
add_sits_in_set(struct f2fs_sb_info * sbi)4252 static void add_sits_in_set(struct f2fs_sb_info *sbi)
4253 {
4254 struct f2fs_sm_info *sm_info = SM_I(sbi);
4255 struct list_head *set_list = &sm_info->sit_entry_set;
4256 unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
4257 unsigned int segno;
4258
4259 for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
4260 add_sit_entry(segno, set_list);
4261 }
4262
remove_sits_in_journal(struct f2fs_sb_info * sbi)4263 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
4264 {
4265 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4266 struct f2fs_journal *journal = curseg->journal;
4267 int i;
4268
4269 down_write(&curseg->journal_rwsem);
4270 for (i = 0; i < sits_in_cursum(journal); i++) {
4271 unsigned int segno;
4272 bool dirtied;
4273
4274 segno = le32_to_cpu(segno_in_journal(journal, i));
4275 dirtied = __mark_sit_entry_dirty(sbi, segno);
4276
4277 if (!dirtied)
4278 add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
4279 }
4280 update_sits_in_cursum(journal, -i);
4281 up_write(&curseg->journal_rwsem);
4282 }
4283
4284 /*
4285 * CP calls this function, which flushes SIT entries including sit_journal,
4286 * and moves prefree segs to free segs.
4287 */
f2fs_flush_sit_entries(struct f2fs_sb_info * sbi,struct cp_control * cpc)4288 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
4289 {
4290 struct sit_info *sit_i = SIT_I(sbi);
4291 unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
4292 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4293 struct f2fs_journal *journal = curseg->journal;
4294 struct sit_entry_set *ses, *tmp;
4295 struct list_head *head = &SM_I(sbi)->sit_entry_set;
4296 bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
4297 struct seg_entry *se;
4298
4299 down_write(&sit_i->sentry_lock);
4300
4301 if (!sit_i->dirty_sentries)
4302 goto out;
4303
4304 /*
4305 * add and account sit entries of dirty bitmap in sit entry
4306 * set temporarily
4307 */
4308 add_sits_in_set(sbi);
4309
4310 /*
4311 * if there are no enough space in journal to store dirty sit
4312 * entries, remove all entries from journal and add and account
4313 * them in sit entry set.
4314 */
4315 if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
4316 !to_journal)
4317 remove_sits_in_journal(sbi);
4318
4319 /*
4320 * there are two steps to flush sit entries:
4321 * #1, flush sit entries to journal in current cold data summary block.
4322 * #2, flush sit entries to sit page.
4323 */
4324 list_for_each_entry_safe(ses, tmp, head, set_list) {
4325 struct page *page = NULL;
4326 struct f2fs_sit_block *raw_sit = NULL;
4327 unsigned int start_segno = ses->start_segno;
4328 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
4329 (unsigned long)MAIN_SEGS(sbi));
4330 unsigned int segno = start_segno;
4331
4332 if (to_journal &&
4333 !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
4334 to_journal = false;
4335
4336 if (to_journal) {
4337 down_write(&curseg->journal_rwsem);
4338 } else {
4339 page = get_next_sit_page(sbi, start_segno);
4340 raw_sit = page_address(page);
4341 }
4342
4343 /* flush dirty sit entries in region of current sit set */
4344 for_each_set_bit_from(segno, bitmap, end) {
4345 int offset, sit_offset;
4346
4347 se = get_seg_entry(sbi, segno);
4348 #ifdef CONFIG_F2FS_CHECK_FS
4349 if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
4350 SIT_VBLOCK_MAP_SIZE))
4351 f2fs_bug_on(sbi, 1);
4352 #endif
4353
4354 /* add discard candidates */
4355 if (!(cpc->reason & CP_DISCARD)) {
4356 cpc->trim_start = segno;
4357 add_discard_addrs(sbi, cpc, false);
4358 }
4359
4360 if (to_journal) {
4361 offset = f2fs_lookup_journal_in_cursum(journal,
4362 SIT_JOURNAL, segno, 1);
4363 f2fs_bug_on(sbi, offset < 0);
4364 segno_in_journal(journal, offset) =
4365 cpu_to_le32(segno);
4366 seg_info_to_raw_sit(se,
4367 &sit_in_journal(journal, offset));
4368 check_block_count(sbi, segno,
4369 &sit_in_journal(journal, offset));
4370 } else {
4371 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
4372 seg_info_to_raw_sit(se,
4373 &raw_sit->entries[sit_offset]);
4374 check_block_count(sbi, segno,
4375 &raw_sit->entries[sit_offset]);
4376 }
4377
4378 __clear_bit(segno, bitmap);
4379 sit_i->dirty_sentries--;
4380 ses->entry_cnt--;
4381 }
4382
4383 if (to_journal)
4384 up_write(&curseg->journal_rwsem);
4385 else
4386 f2fs_put_page(page, 1);
4387
4388 f2fs_bug_on(sbi, ses->entry_cnt);
4389 release_sit_entry_set(ses);
4390 }
4391
4392 f2fs_bug_on(sbi, !list_empty(head));
4393 f2fs_bug_on(sbi, sit_i->dirty_sentries);
4394 out:
4395 if (cpc->reason & CP_DISCARD) {
4396 __u64 trim_start = cpc->trim_start;
4397
4398 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
4399 add_discard_addrs(sbi, cpc, false);
4400
4401 cpc->trim_start = trim_start;
4402 }
4403 up_write(&sit_i->sentry_lock);
4404
4405 set_prefree_as_free_segments(sbi);
4406 }
4407
build_sit_info(struct f2fs_sb_info * sbi)4408 static int build_sit_info(struct f2fs_sb_info *sbi)
4409 {
4410 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4411 struct sit_info *sit_i;
4412 unsigned int sit_segs, start;
4413 char *src_bitmap, *bitmap;
4414 unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4415
4416 /* allocate memory for SIT information */
4417 sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4418 if (!sit_i)
4419 return -ENOMEM;
4420
4421 SM_I(sbi)->sit_info = sit_i;
4422
4423 sit_i->sentries =
4424 f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4425 MAIN_SEGS(sbi)),
4426 GFP_KERNEL);
4427 if (!sit_i->sentries)
4428 return -ENOMEM;
4429
4430 main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4431 sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4432 GFP_KERNEL);
4433 if (!sit_i->dirty_sentries_bitmap)
4434 return -ENOMEM;
4435
4436 #ifdef CONFIG_F2FS_CHECK_FS
4437 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
4438 #else
4439 bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
4440 #endif
4441 sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4442 if (!sit_i->bitmap)
4443 return -ENOMEM;
4444
4445 bitmap = sit_i->bitmap;
4446
4447 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4448 sit_i->sentries[start].cur_valid_map = bitmap;
4449 bitmap += SIT_VBLOCK_MAP_SIZE;
4450
4451 sit_i->sentries[start].ckpt_valid_map = bitmap;
4452 bitmap += SIT_VBLOCK_MAP_SIZE;
4453
4454 #ifdef CONFIG_F2FS_CHECK_FS
4455 sit_i->sentries[start].cur_valid_map_mir = bitmap;
4456 bitmap += SIT_VBLOCK_MAP_SIZE;
4457 #endif
4458
4459 sit_i->sentries[start].discard_map = bitmap;
4460 bitmap += SIT_VBLOCK_MAP_SIZE;
4461 }
4462
4463 sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4464 if (!sit_i->tmp_map)
4465 return -ENOMEM;
4466
4467 if (__is_large_section(sbi)) {
4468 sit_i->sec_entries =
4469 f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4470 MAIN_SECS(sbi)),
4471 GFP_KERNEL);
4472 if (!sit_i->sec_entries)
4473 return -ENOMEM;
4474 }
4475
4476 /* get information related with SIT */
4477 sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4478
4479 /* setup SIT bitmap from ckeckpoint pack */
4480 sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4481 src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4482
4483 sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4484 if (!sit_i->sit_bitmap)
4485 return -ENOMEM;
4486
4487 #ifdef CONFIG_F2FS_CHECK_FS
4488 sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4489 sit_bitmap_size, GFP_KERNEL);
4490 if (!sit_i->sit_bitmap_mir)
4491 return -ENOMEM;
4492
4493 sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4494 main_bitmap_size, GFP_KERNEL);
4495 if (!sit_i->invalid_segmap)
4496 return -ENOMEM;
4497 #endif
4498
4499 /* init SIT information */
4500 sit_i->s_ops = &default_salloc_ops;
4501
4502 sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4503 sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4504 sit_i->written_valid_blocks = 0;
4505 sit_i->bitmap_size = sit_bitmap_size;
4506 sit_i->dirty_sentries = 0;
4507 sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4508 sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4509 sit_i->mounted_time = ktime_get_boottime_seconds();
4510 init_rwsem(&sit_i->sentry_lock);
4511 return 0;
4512 }
4513
build_free_segmap(struct f2fs_sb_info * sbi)4514 static int build_free_segmap(struct f2fs_sb_info *sbi)
4515 {
4516 struct free_segmap_info *free_i;
4517 unsigned int bitmap_size, sec_bitmap_size;
4518
4519 /* allocate memory for free segmap information */
4520 free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4521 if (!free_i)
4522 return -ENOMEM;
4523
4524 SM_I(sbi)->free_info = free_i;
4525
4526 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4527 free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4528 if (!free_i->free_segmap)
4529 return -ENOMEM;
4530
4531 sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4532 free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4533 if (!free_i->free_secmap)
4534 return -ENOMEM;
4535
4536 /* set all segments as dirty temporarily */
4537 memset(free_i->free_segmap, 0xff, bitmap_size);
4538 memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4539
4540 /* init free segmap information */
4541 free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4542 free_i->free_segments = 0;
4543 free_i->free_sections = 0;
4544 spin_lock_init(&free_i->segmap_lock);
4545 return 0;
4546 }
4547
build_curseg(struct f2fs_sb_info * sbi)4548 static int build_curseg(struct f2fs_sb_info *sbi)
4549 {
4550 struct curseg_info *array;
4551 int i;
4552
4553 array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
4554 sizeof(*array)), GFP_KERNEL);
4555 if (!array)
4556 return -ENOMEM;
4557
4558 SM_I(sbi)->curseg_array = array;
4559
4560 for (i = 0; i < NO_CHECK_TYPE; i++) {
4561 mutex_init(&array[i].curseg_mutex);
4562 array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4563 if (!array[i].sum_blk)
4564 return -ENOMEM;
4565 init_rwsem(&array[i].journal_rwsem);
4566 array[i].journal = f2fs_kzalloc(sbi,
4567 sizeof(struct f2fs_journal), GFP_KERNEL);
4568 if (!array[i].journal)
4569 return -ENOMEM;
4570 if (i < NR_PERSISTENT_LOG)
4571 array[i].seg_type = CURSEG_HOT_DATA + i;
4572 else if (i == CURSEG_COLD_DATA_PINNED)
4573 array[i].seg_type = CURSEG_COLD_DATA;
4574 else if (i == CURSEG_ALL_DATA_ATGC)
4575 array[i].seg_type = CURSEG_COLD_DATA;
4576 array[i].segno = NULL_SEGNO;
4577 array[i].next_blkoff = 0;
4578 array[i].inited = false;
4579 }
4580 return restore_curseg_summaries(sbi);
4581 }
4582
build_sit_entries(struct f2fs_sb_info * sbi)4583 static int build_sit_entries(struct f2fs_sb_info *sbi)
4584 {
4585 struct sit_info *sit_i = SIT_I(sbi);
4586 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4587 struct f2fs_journal *journal = curseg->journal;
4588 struct seg_entry *se;
4589 struct f2fs_sit_entry sit;
4590 int sit_blk_cnt = SIT_BLK_CNT(sbi);
4591 unsigned int i, start, end;
4592 unsigned int readed, start_blk = 0;
4593 int err = 0;
4594 block_t total_node_blocks = 0;
4595
4596 do {
4597 readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
4598 META_SIT, true);
4599
4600 start = start_blk * sit_i->sents_per_block;
4601 end = (start_blk + readed) * sit_i->sents_per_block;
4602
4603 for (; start < end && start < MAIN_SEGS(sbi); start++) {
4604 struct f2fs_sit_block *sit_blk;
4605 struct page *page;
4606
4607 se = &sit_i->sentries[start];
4608 page = get_current_sit_page(sbi, start);
4609 if (IS_ERR(page))
4610 return PTR_ERR(page);
4611 sit_blk = (struct f2fs_sit_block *)page_address(page);
4612 sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4613 f2fs_put_page(page, 1);
4614
4615 err = check_block_count(sbi, start, &sit);
4616 if (err)
4617 return err;
4618 seg_info_from_raw_sit(se, &sit);
4619 if (IS_NODESEG(se->type))
4620 total_node_blocks += se->valid_blocks;
4621
4622 /* build discard map only one time */
4623 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4624 memset(se->discard_map, 0xff,
4625 SIT_VBLOCK_MAP_SIZE);
4626 } else {
4627 memcpy(se->discard_map,
4628 se->cur_valid_map,
4629 SIT_VBLOCK_MAP_SIZE);
4630 sbi->discard_blks +=
4631 sbi->blocks_per_seg -
4632 se->valid_blocks;
4633 }
4634
4635 if (__is_large_section(sbi))
4636 get_sec_entry(sbi, start)->valid_blocks +=
4637 se->valid_blocks;
4638 }
4639 start_blk += readed;
4640 } while (start_blk < sit_blk_cnt);
4641
4642 down_read(&curseg->journal_rwsem);
4643 for (i = 0; i < sits_in_cursum(journal); i++) {
4644 unsigned int old_valid_blocks;
4645
4646 start = le32_to_cpu(segno_in_journal(journal, i));
4647 if (start >= MAIN_SEGS(sbi)) {
4648 f2fs_err(sbi, "Wrong journal entry on segno %u",
4649 start);
4650 err = -EFSCORRUPTED;
4651 break;
4652 }
4653
4654 se = &sit_i->sentries[start];
4655 sit = sit_in_journal(journal, i);
4656
4657 old_valid_blocks = se->valid_blocks;
4658 if (IS_NODESEG(se->type))
4659 total_node_blocks -= old_valid_blocks;
4660
4661 err = check_block_count(sbi, start, &sit);
4662 if (err)
4663 break;
4664 seg_info_from_raw_sit(se, &sit);
4665 if (IS_NODESEG(se->type))
4666 total_node_blocks += se->valid_blocks;
4667
4668 if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4669 memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4670 } else {
4671 memcpy(se->discard_map, se->cur_valid_map,
4672 SIT_VBLOCK_MAP_SIZE);
4673 sbi->discard_blks += old_valid_blocks;
4674 sbi->discard_blks -= se->valid_blocks;
4675 }
4676
4677 if (__is_large_section(sbi)) {
4678 get_sec_entry(sbi, start)->valid_blocks +=
4679 se->valid_blocks;
4680 get_sec_entry(sbi, start)->valid_blocks -=
4681 old_valid_blocks;
4682 }
4683 }
4684 up_read(&curseg->journal_rwsem);
4685
4686 if (!err && total_node_blocks != valid_node_count(sbi)) {
4687 f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4688 total_node_blocks, valid_node_count(sbi));
4689 err = -EFSCORRUPTED;
4690 }
4691
4692 return err;
4693 }
4694
init_free_segmap(struct f2fs_sb_info * sbi)4695 static void init_free_segmap(struct f2fs_sb_info *sbi)
4696 {
4697 unsigned int start;
4698 int type;
4699 struct seg_entry *sentry;
4700
4701 for (start = 0; start < MAIN_SEGS(sbi); start++) {
4702 if (f2fs_usable_blks_in_seg(sbi, start) == 0)
4703 continue;
4704 sentry = get_seg_entry(sbi, start);
4705 if (!sentry->valid_blocks)
4706 __set_free(sbi, start);
4707 else
4708 SIT_I(sbi)->written_valid_blocks +=
4709 sentry->valid_blocks;
4710 }
4711
4712 /* set use the current segments */
4713 for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4714 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4715 __set_test_and_inuse(sbi, curseg_t->segno);
4716 }
4717 }
4718
init_dirty_segmap(struct f2fs_sb_info * sbi)4719 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4720 {
4721 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4722 struct free_segmap_info *free_i = FREE_I(sbi);
4723 unsigned int segno = 0, offset = 0, secno;
4724 block_t valid_blocks, usable_blks_in_seg;
4725 block_t blks_per_sec = BLKS_PER_SEC(sbi);
4726
4727 while (1) {
4728 /* find dirty segment based on free segmap */
4729 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4730 if (segno >= MAIN_SEGS(sbi))
4731 break;
4732 offset = segno + 1;
4733 valid_blocks = get_valid_blocks(sbi, segno, false);
4734 usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
4735 if (valid_blocks == usable_blks_in_seg || !valid_blocks)
4736 continue;
4737 if (valid_blocks > usable_blks_in_seg) {
4738 f2fs_bug_on(sbi, 1);
4739 continue;
4740 }
4741 mutex_lock(&dirty_i->seglist_lock);
4742 __locate_dirty_segment(sbi, segno, DIRTY);
4743 mutex_unlock(&dirty_i->seglist_lock);
4744 }
4745
4746 if (!__is_large_section(sbi))
4747 return;
4748
4749 mutex_lock(&dirty_i->seglist_lock);
4750 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4751 valid_blocks = get_valid_blocks(sbi, segno, true);
4752 secno = GET_SEC_FROM_SEG(sbi, segno);
4753
4754 if (!valid_blocks || valid_blocks == blks_per_sec)
4755 continue;
4756 if (IS_CURSEC(sbi, secno))
4757 continue;
4758 set_bit(secno, dirty_i->dirty_secmap);
4759 }
4760 mutex_unlock(&dirty_i->seglist_lock);
4761 }
4762
init_victim_secmap(struct f2fs_sb_info * sbi)4763 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4764 {
4765 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4766 unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4767
4768 dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4769 if (!dirty_i->victim_secmap)
4770 return -ENOMEM;
4771 return 0;
4772 }
4773
build_dirty_segmap(struct f2fs_sb_info * sbi)4774 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4775 {
4776 struct dirty_seglist_info *dirty_i;
4777 unsigned int bitmap_size, i;
4778
4779 /* allocate memory for dirty segments list information */
4780 dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4781 GFP_KERNEL);
4782 if (!dirty_i)
4783 return -ENOMEM;
4784
4785 SM_I(sbi)->dirty_info = dirty_i;
4786 mutex_init(&dirty_i->seglist_lock);
4787
4788 bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4789
4790 for (i = 0; i < NR_DIRTY_TYPE; i++) {
4791 dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4792 GFP_KERNEL);
4793 if (!dirty_i->dirty_segmap[i])
4794 return -ENOMEM;
4795 }
4796
4797 if (__is_large_section(sbi)) {
4798 bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4799 dirty_i->dirty_secmap = f2fs_kvzalloc(sbi,
4800 bitmap_size, GFP_KERNEL);
4801 if (!dirty_i->dirty_secmap)
4802 return -ENOMEM;
4803 }
4804
4805 init_dirty_segmap(sbi);
4806 return init_victim_secmap(sbi);
4807 }
4808
sanity_check_curseg(struct f2fs_sb_info * sbi)4809 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4810 {
4811 int i;
4812
4813 /*
4814 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
4815 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
4816 */
4817 for (i = 0; i < NR_PERSISTENT_LOG; i++) {
4818 struct curseg_info *curseg = CURSEG_I(sbi, i);
4819 struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4820 unsigned int blkofs = curseg->next_blkoff;
4821
4822 sanity_check_seg_type(sbi, curseg->seg_type);
4823
4824 if (f2fs_test_bit(blkofs, se->cur_valid_map))
4825 goto out;
4826
4827 if (curseg->alloc_type == SSR)
4828 continue;
4829
4830 for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
4831 if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4832 continue;
4833 out:
4834 f2fs_err(sbi,
4835 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4836 i, curseg->segno, curseg->alloc_type,
4837 curseg->next_blkoff, blkofs);
4838 return -EFSCORRUPTED;
4839 }
4840 }
4841 return 0;
4842 }
4843
4844 #ifdef CONFIG_BLK_DEV_ZONED
4845
check_zone_write_pointer(struct f2fs_sb_info * sbi,struct f2fs_dev_info * fdev,struct blk_zone * zone)4846 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
4847 struct f2fs_dev_info *fdev,
4848 struct blk_zone *zone)
4849 {
4850 unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
4851 block_t zone_block, wp_block, last_valid_block;
4852 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4853 int i, s, b, ret;
4854 struct seg_entry *se;
4855
4856 if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4857 return 0;
4858
4859 wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
4860 wp_segno = GET_SEGNO(sbi, wp_block);
4861 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4862 zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
4863 zone_segno = GET_SEGNO(sbi, zone_block);
4864 zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
4865
4866 if (zone_segno >= MAIN_SEGS(sbi))
4867 return 0;
4868
4869 /*
4870 * Skip check of zones cursegs point to, since
4871 * fix_curseg_write_pointer() checks them.
4872 */
4873 for (i = 0; i < NO_CHECK_TYPE; i++)
4874 if (zone_secno == GET_SEC_FROM_SEG(sbi,
4875 CURSEG_I(sbi, i)->segno))
4876 return 0;
4877
4878 /*
4879 * Get last valid block of the zone.
4880 */
4881 last_valid_block = zone_block - 1;
4882 for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
4883 segno = zone_segno + s;
4884 se = get_seg_entry(sbi, segno);
4885 for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
4886 if (f2fs_test_bit(b, se->cur_valid_map)) {
4887 last_valid_block = START_BLOCK(sbi, segno) + b;
4888 break;
4889 }
4890 if (last_valid_block >= zone_block)
4891 break;
4892 }
4893
4894 /*
4895 * If last valid block is beyond the write pointer, report the
4896 * inconsistency. This inconsistency does not cause write error
4897 * because the zone will not be selected for write operation until
4898 * it get discarded. Just report it.
4899 */
4900 if (last_valid_block >= wp_block) {
4901 f2fs_notice(sbi, "Valid block beyond write pointer: "
4902 "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
4903 GET_SEGNO(sbi, last_valid_block),
4904 GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
4905 wp_segno, wp_blkoff);
4906 return 0;
4907 }
4908
4909 /*
4910 * If there is no valid block in the zone and if write pointer is
4911 * not at zone start, reset the write pointer.
4912 */
4913 if (last_valid_block + 1 == zone_block && zone->wp != zone->start) {
4914 f2fs_notice(sbi,
4915 "Zone without valid block has non-zero write "
4916 "pointer. Reset the write pointer: wp[0x%x,0x%x]",
4917 wp_segno, wp_blkoff);
4918 ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
4919 zone->len >> log_sectors_per_block);
4920 if (ret) {
4921 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
4922 fdev->path, ret);
4923 return ret;
4924 }
4925 }
4926
4927 return 0;
4928 }
4929
get_target_zoned_dev(struct f2fs_sb_info * sbi,block_t zone_blkaddr)4930 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
4931 block_t zone_blkaddr)
4932 {
4933 int i;
4934
4935 for (i = 0; i < sbi->s_ndevs; i++) {
4936 if (!bdev_is_zoned(FDEV(i).bdev))
4937 continue;
4938 if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
4939 zone_blkaddr <= FDEV(i).end_blk))
4940 return &FDEV(i);
4941 }
4942
4943 return NULL;
4944 }
4945
report_one_zone_cb(struct blk_zone * zone,unsigned int idx,void * data)4946 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
4947 void *data) {
4948 memcpy(data, zone, sizeof(struct blk_zone));
4949 return 0;
4950 }
4951
fix_curseg_write_pointer(struct f2fs_sb_info * sbi,int type)4952 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
4953 {
4954 struct curseg_info *cs = CURSEG_I(sbi, type);
4955 struct f2fs_dev_info *zbd;
4956 struct blk_zone zone;
4957 unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
4958 block_t cs_zone_block, wp_block;
4959 unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4960 sector_t zone_sector;
4961 int err;
4962
4963 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
4964 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
4965
4966 zbd = get_target_zoned_dev(sbi, cs_zone_block);
4967 if (!zbd)
4968 return 0;
4969
4970 /* report zone for the sector the curseg points to */
4971 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
4972 << log_sectors_per_block;
4973 err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
4974 report_one_zone_cb, &zone);
4975 if (err != 1) {
4976 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
4977 zbd->path, err);
4978 return err;
4979 }
4980
4981 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4982 return 0;
4983
4984 wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
4985 wp_segno = GET_SEGNO(sbi, wp_block);
4986 wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4987 wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
4988
4989 if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
4990 wp_sector_off == 0)
4991 return 0;
4992
4993 f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
4994 "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
4995 type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
4996
4997 f2fs_notice(sbi, "Assign new section to curseg[%d]: "
4998 "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
4999 allocate_segment_by_default(sbi, type, true, SEQ_NONE);
5000
5001 /* check consistency of the zone curseg pointed to */
5002 if (check_zone_write_pointer(sbi, zbd, &zone))
5003 return -EIO;
5004
5005 /* check newly assigned zone */
5006 cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
5007 cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
5008
5009 zbd = get_target_zoned_dev(sbi, cs_zone_block);
5010 if (!zbd)
5011 return 0;
5012
5013 zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
5014 << log_sectors_per_block;
5015 err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
5016 report_one_zone_cb, &zone);
5017 if (err != 1) {
5018 f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
5019 zbd->path, err);
5020 return err;
5021 }
5022
5023 if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
5024 return 0;
5025
5026 if (zone.wp != zone.start) {
5027 f2fs_notice(sbi,
5028 "New zone for curseg[%d] is not yet discarded. "
5029 "Reset the zone: curseg[0x%x,0x%x]",
5030 type, cs->segno, cs->next_blkoff);
5031 err = __f2fs_issue_discard_zone(sbi, zbd->bdev,
5032 zone_sector >> log_sectors_per_block,
5033 zone.len >> log_sectors_per_block);
5034 if (err) {
5035 f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
5036 zbd->path, err);
5037 return err;
5038 }
5039 }
5040
5041 return 0;
5042 }
5043
f2fs_fix_curseg_write_pointer(struct f2fs_sb_info * sbi)5044 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5045 {
5046 int i, ret;
5047
5048 for (i = 0; i < NR_PERSISTENT_LOG; i++) {
5049 ret = fix_curseg_write_pointer(sbi, i);
5050 if (ret)
5051 return ret;
5052 }
5053
5054 return 0;
5055 }
5056
5057 struct check_zone_write_pointer_args {
5058 struct f2fs_sb_info *sbi;
5059 struct f2fs_dev_info *fdev;
5060 };
5061
check_zone_write_pointer_cb(struct blk_zone * zone,unsigned int idx,void * data)5062 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
5063 void *data) {
5064 struct check_zone_write_pointer_args *args;
5065 args = (struct check_zone_write_pointer_args *)data;
5066
5067 return check_zone_write_pointer(args->sbi, args->fdev, zone);
5068 }
5069
f2fs_check_write_pointer(struct f2fs_sb_info * sbi)5070 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5071 {
5072 int i, ret;
5073 struct check_zone_write_pointer_args args;
5074
5075 for (i = 0; i < sbi->s_ndevs; i++) {
5076 if (!bdev_is_zoned(FDEV(i).bdev))
5077 continue;
5078
5079 args.sbi = sbi;
5080 args.fdev = &FDEV(i);
5081 ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
5082 check_zone_write_pointer_cb, &args);
5083 if (ret < 0)
5084 return ret;
5085 }
5086
5087 return 0;
5088 }
5089
is_conv_zone(struct f2fs_sb_info * sbi,unsigned int zone_idx,unsigned int dev_idx)5090 static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
5091 unsigned int dev_idx)
5092 {
5093 if (!bdev_is_zoned(FDEV(dev_idx).bdev))
5094 return true;
5095 return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
5096 }
5097
5098 /* Return the zone index in the given device */
get_zone_idx(struct f2fs_sb_info * sbi,unsigned int secno,int dev_idx)5099 static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
5100 int dev_idx)
5101 {
5102 block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5103
5104 return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
5105 sbi->log_blocks_per_blkz;
5106 }
5107
5108 /*
5109 * Return the usable segments in a section based on the zone's
5110 * corresponding zone capacity. Zone is equal to a section.
5111 */
f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info * sbi,unsigned int segno)5112 static inline unsigned int f2fs_usable_zone_segs_in_sec(
5113 struct f2fs_sb_info *sbi, unsigned int segno)
5114 {
5115 unsigned int dev_idx, zone_idx, unusable_segs_in_sec;
5116
5117 dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
5118 zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
5119
5120 /* Conventional zone's capacity is always equal to zone size */
5121 if (is_conv_zone(sbi, zone_idx, dev_idx))
5122 return sbi->segs_per_sec;
5123
5124 /*
5125 * If the zone_capacity_blocks array is NULL, then zone capacity
5126 * is equal to the zone size for all zones
5127 */
5128 if (!FDEV(dev_idx).zone_capacity_blocks)
5129 return sbi->segs_per_sec;
5130
5131 /* Get the segment count beyond zone capacity block */
5132 unusable_segs_in_sec = (sbi->blocks_per_blkz -
5133 FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >>
5134 sbi->log_blocks_per_seg;
5135 return sbi->segs_per_sec - unusable_segs_in_sec;
5136 }
5137
5138 /*
5139 * Return the number of usable blocks in a segment. The number of blocks
5140 * returned is always equal to the number of blocks in a segment for
5141 * segments fully contained within a sequential zone capacity or a
5142 * conventional zone. For segments partially contained in a sequential
5143 * zone capacity, the number of usable blocks up to the zone capacity
5144 * is returned. 0 is returned in all other cases.
5145 */
f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info * sbi,unsigned int segno)5146 static inline unsigned int f2fs_usable_zone_blks_in_seg(
5147 struct f2fs_sb_info *sbi, unsigned int segno)
5148 {
5149 block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
5150 unsigned int zone_idx, dev_idx, secno;
5151
5152 secno = GET_SEC_FROM_SEG(sbi, segno);
5153 seg_start = START_BLOCK(sbi, segno);
5154 dev_idx = f2fs_target_device_index(sbi, seg_start);
5155 zone_idx = get_zone_idx(sbi, secno, dev_idx);
5156
5157 /*
5158 * Conventional zone's capacity is always equal to zone size,
5159 * so, blocks per segment is unchanged.
5160 */
5161 if (is_conv_zone(sbi, zone_idx, dev_idx))
5162 return sbi->blocks_per_seg;
5163
5164 if (!FDEV(dev_idx).zone_capacity_blocks)
5165 return sbi->blocks_per_seg;
5166
5167 sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
5168 sec_cap_blkaddr = sec_start_blkaddr +
5169 FDEV(dev_idx).zone_capacity_blocks[zone_idx];
5170
5171 /*
5172 * If segment starts before zone capacity and spans beyond
5173 * zone capacity, then usable blocks are from seg start to
5174 * zone capacity. If the segment starts after the zone capacity,
5175 * then there are no usable blocks.
5176 */
5177 if (seg_start >= sec_cap_blkaddr)
5178 return 0;
5179 if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
5180 return sec_cap_blkaddr - seg_start;
5181
5182 return sbi->blocks_per_seg;
5183 }
5184 #else
f2fs_fix_curseg_write_pointer(struct f2fs_sb_info * sbi)5185 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
5186 {
5187 return 0;
5188 }
5189
f2fs_check_write_pointer(struct f2fs_sb_info * sbi)5190 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
5191 {
5192 return 0;
5193 }
5194
f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info * sbi,unsigned int segno)5195 static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
5196 unsigned int segno)
5197 {
5198 return 0;
5199 }
5200
f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info * sbi,unsigned int segno)5201 static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi,
5202 unsigned int segno)
5203 {
5204 return 0;
5205 }
5206 #endif
f2fs_usable_blks_in_seg(struct f2fs_sb_info * sbi,unsigned int segno)5207 unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
5208 unsigned int segno)
5209 {
5210 if (f2fs_sb_has_blkzoned(sbi))
5211 return f2fs_usable_zone_blks_in_seg(sbi, segno);
5212
5213 return sbi->blocks_per_seg;
5214 }
5215
f2fs_usable_segs_in_sec(struct f2fs_sb_info * sbi,unsigned int segno)5216 unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
5217 unsigned int segno)
5218 {
5219 if (f2fs_sb_has_blkzoned(sbi))
5220 return f2fs_usable_zone_segs_in_sec(sbi, segno);
5221
5222 return sbi->segs_per_sec;
5223 }
5224
5225 /*
5226 * Update min, max modified time for cost-benefit GC algorithm
5227 */
init_min_max_mtime(struct f2fs_sb_info * sbi)5228 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
5229 {
5230 struct sit_info *sit_i = SIT_I(sbi);
5231 unsigned int segno;
5232
5233 down_write(&sit_i->sentry_lock);
5234
5235 sit_i->min_mtime = ULLONG_MAX;
5236
5237 for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
5238 unsigned int i;
5239 unsigned long long mtime = 0;
5240
5241 for (i = 0; i < sbi->segs_per_sec; i++)
5242 mtime += get_seg_entry(sbi, segno + i)->mtime;
5243
5244 mtime = div_u64(mtime, sbi->segs_per_sec);
5245
5246 if (sit_i->min_mtime > mtime)
5247 sit_i->min_mtime = mtime;
5248 }
5249 sit_i->max_mtime = get_mtime(sbi, false);
5250 sit_i->dirty_max_mtime = 0;
5251 up_write(&sit_i->sentry_lock);
5252 }
5253
f2fs_build_segment_manager(struct f2fs_sb_info * sbi)5254 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
5255 {
5256 struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
5257 struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
5258 struct f2fs_sm_info *sm_info;
5259 int err;
5260
5261 sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
5262 if (!sm_info)
5263 return -ENOMEM;
5264
5265 /* init sm info */
5266 sbi->sm_info = sm_info;
5267 sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
5268 sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
5269 sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
5270 sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
5271 sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
5272 sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
5273 sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
5274 sm_info->rec_prefree_segments = sm_info->main_segments *
5275 DEF_RECLAIM_PREFREE_SEGMENTS / 100;
5276 if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
5277 sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
5278
5279 if (!f2fs_lfs_mode(sbi))
5280 sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
5281 sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
5282 sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
5283 sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
5284 sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
5285 sm_info->min_ssr_sections = reserved_sections(sbi);
5286
5287 INIT_LIST_HEAD(&sm_info->sit_entry_set);
5288
5289 init_rwsem(&sm_info->curseg_lock);
5290
5291 if (!f2fs_readonly(sbi->sb)) {
5292 err = f2fs_create_flush_cmd_control(sbi);
5293 if (err)
5294 return err;
5295 }
5296
5297 err = create_discard_cmd_control(sbi);
5298 if (err)
5299 return err;
5300
5301 err = build_sit_info(sbi);
5302 if (err)
5303 return err;
5304 err = build_free_segmap(sbi);
5305 if (err)
5306 return err;
5307 err = build_curseg(sbi);
5308 if (err)
5309 return err;
5310
5311 /* reinit free segmap based on SIT */
5312 err = build_sit_entries(sbi);
5313 if (err)
5314 return err;
5315
5316 init_free_segmap(sbi);
5317 err = build_dirty_segmap(sbi);
5318 if (err)
5319 return err;
5320
5321 err = sanity_check_curseg(sbi);
5322 if (err)
5323 return err;
5324
5325 init_min_max_mtime(sbi);
5326 return 0;
5327 }
5328
discard_dirty_segmap(struct f2fs_sb_info * sbi,enum dirty_type dirty_type)5329 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
5330 enum dirty_type dirty_type)
5331 {
5332 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5333
5334 mutex_lock(&dirty_i->seglist_lock);
5335 kvfree(dirty_i->dirty_segmap[dirty_type]);
5336 dirty_i->nr_dirty[dirty_type] = 0;
5337 mutex_unlock(&dirty_i->seglist_lock);
5338 }
5339
destroy_victim_secmap(struct f2fs_sb_info * sbi)5340 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
5341 {
5342 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5343 kvfree(dirty_i->victim_secmap);
5344 }
5345
destroy_dirty_segmap(struct f2fs_sb_info * sbi)5346 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
5347 {
5348 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
5349 int i;
5350
5351 if (!dirty_i)
5352 return;
5353
5354 /* discard pre-free/dirty segments list */
5355 for (i = 0; i < NR_DIRTY_TYPE; i++)
5356 discard_dirty_segmap(sbi, i);
5357
5358 if (__is_large_section(sbi)) {
5359 mutex_lock(&dirty_i->seglist_lock);
5360 kvfree(dirty_i->dirty_secmap);
5361 mutex_unlock(&dirty_i->seglist_lock);
5362 }
5363
5364 destroy_victim_secmap(sbi);
5365 SM_I(sbi)->dirty_info = NULL;
5366 kfree(dirty_i);
5367 }
5368
destroy_curseg(struct f2fs_sb_info * sbi)5369 static void destroy_curseg(struct f2fs_sb_info *sbi)
5370 {
5371 struct curseg_info *array = SM_I(sbi)->curseg_array;
5372 int i;
5373
5374 if (!array)
5375 return;
5376 SM_I(sbi)->curseg_array = NULL;
5377 for (i = 0; i < NR_CURSEG_TYPE; i++) {
5378 kfree(array[i].sum_blk);
5379 kfree(array[i].journal);
5380 }
5381 kfree(array);
5382 }
5383
destroy_free_segmap(struct f2fs_sb_info * sbi)5384 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
5385 {
5386 struct free_segmap_info *free_i = SM_I(sbi)->free_info;
5387 if (!free_i)
5388 return;
5389 SM_I(sbi)->free_info = NULL;
5390 kvfree(free_i->free_segmap);
5391 kvfree(free_i->free_secmap);
5392 kfree(free_i);
5393 }
5394
destroy_sit_info(struct f2fs_sb_info * sbi)5395 static void destroy_sit_info(struct f2fs_sb_info *sbi)
5396 {
5397 struct sit_info *sit_i = SIT_I(sbi);
5398
5399 if (!sit_i)
5400 return;
5401
5402 if (sit_i->sentries)
5403 kvfree(sit_i->bitmap);
5404 kfree(sit_i->tmp_map);
5405
5406 kvfree(sit_i->sentries);
5407 kvfree(sit_i->sec_entries);
5408 kvfree(sit_i->dirty_sentries_bitmap);
5409
5410 SM_I(sbi)->sit_info = NULL;
5411 kvfree(sit_i->sit_bitmap);
5412 #ifdef CONFIG_F2FS_CHECK_FS
5413 kvfree(sit_i->sit_bitmap_mir);
5414 kvfree(sit_i->invalid_segmap);
5415 #endif
5416 kfree(sit_i);
5417 }
5418
f2fs_destroy_segment_manager(struct f2fs_sb_info * sbi)5419 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
5420 {
5421 struct f2fs_sm_info *sm_info = SM_I(sbi);
5422
5423 if (!sm_info)
5424 return;
5425 f2fs_destroy_flush_cmd_control(sbi, true);
5426 destroy_discard_cmd_control(sbi);
5427 destroy_dirty_segmap(sbi);
5428 destroy_curseg(sbi);
5429 destroy_free_segmap(sbi);
5430 destroy_sit_info(sbi);
5431 sbi->sm_info = NULL;
5432 kfree(sm_info);
5433 }
5434
f2fs_create_segment_manager_caches(void)5435 int __init f2fs_create_segment_manager_caches(void)
5436 {
5437 discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
5438 sizeof(struct discard_entry));
5439 if (!discard_entry_slab)
5440 goto fail;
5441
5442 discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
5443 sizeof(struct discard_cmd));
5444 if (!discard_cmd_slab)
5445 goto destroy_discard_entry;
5446
5447 sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
5448 sizeof(struct sit_entry_set));
5449 if (!sit_entry_set_slab)
5450 goto destroy_discard_cmd;
5451
5452 inmem_entry_slab = f2fs_kmem_cache_create("f2fs_inmem_page_entry",
5453 sizeof(struct inmem_pages));
5454 if (!inmem_entry_slab)
5455 goto destroy_sit_entry_set;
5456 return 0;
5457
5458 destroy_sit_entry_set:
5459 kmem_cache_destroy(sit_entry_set_slab);
5460 destroy_discard_cmd:
5461 kmem_cache_destroy(discard_cmd_slab);
5462 destroy_discard_entry:
5463 kmem_cache_destroy(discard_entry_slab);
5464 fail:
5465 return -ENOMEM;
5466 }
5467
f2fs_destroy_segment_manager_caches(void)5468 void f2fs_destroy_segment_manager_caches(void)
5469 {
5470 kmem_cache_destroy(sit_entry_set_slab);
5471 kmem_cache_destroy(discard_cmd_slab);
5472 kmem_cache_destroy(discard_entry_slab);
5473 kmem_cache_destroy(inmem_entry_slab);
5474 }
5475