• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * fs/f2fs/segment.c
4  *
5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6  *             http://www.samsung.com/
7  */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/bio.h>
11 #include <linux/blkdev.h>
12 #include <linux/prefetch.h>
13 #include <linux/kthread.h>
14 #include <linux/swap.h>
15 #include <linux/timer.h>
16 #include <linux/freezer.h>
17 #include <linux/sched/signal.h>
18 
19 #include "f2fs.h"
20 #include "segment.h"
21 #include "node.h"
22 #include "gc.h"
23 #include "trace.h"
24 #include <trace/events/f2fs.h>
25 
26 #define __reverse_ffz(x) __reverse_ffs(~(x))
27 
28 static struct kmem_cache *discard_entry_slab;
29 static struct kmem_cache *discard_cmd_slab;
30 static struct kmem_cache *sit_entry_set_slab;
31 static struct kmem_cache *inmem_entry_slab;
32 
__reverse_ulong(unsigned char * str)33 static unsigned long __reverse_ulong(unsigned char *str)
34 {
35 	unsigned long tmp = 0;
36 	int shift = 24, idx = 0;
37 
38 #if BITS_PER_LONG == 64
39 	shift = 56;
40 #endif
41 	while (shift >= 0) {
42 		tmp |= (unsigned long)str[idx++] << shift;
43 		shift -= BITS_PER_BYTE;
44 	}
45 	return tmp;
46 }
47 
48 /*
49  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
50  * MSB and LSB are reversed in a byte by f2fs_set_bit.
51  */
__reverse_ffs(unsigned long word)52 static inline unsigned long __reverse_ffs(unsigned long word)
53 {
54 	int num = 0;
55 
56 #if BITS_PER_LONG == 64
57 	if ((word & 0xffffffff00000000UL) == 0)
58 		num += 32;
59 	else
60 		word >>= 32;
61 #endif
62 	if ((word & 0xffff0000) == 0)
63 		num += 16;
64 	else
65 		word >>= 16;
66 
67 	if ((word & 0xff00) == 0)
68 		num += 8;
69 	else
70 		word >>= 8;
71 
72 	if ((word & 0xf0) == 0)
73 		num += 4;
74 	else
75 		word >>= 4;
76 
77 	if ((word & 0xc) == 0)
78 		num += 2;
79 	else
80 		word >>= 2;
81 
82 	if ((word & 0x2) == 0)
83 		num += 1;
84 	return num;
85 }
86 
87 /*
88  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
89  * f2fs_set_bit makes MSB and LSB reversed in a byte.
90  * @size must be integral times of unsigned long.
91  * Example:
92  *                             MSB <--> LSB
93  *   f2fs_set_bit(0, bitmap) => 1000 0000
94  *   f2fs_set_bit(7, bitmap) => 0000 0001
95  */
__find_rev_next_bit(const unsigned long * addr,unsigned long size,unsigned long offset)96 static unsigned long __find_rev_next_bit(const unsigned long *addr,
97 			unsigned long size, unsigned long offset)
98 {
99 	const unsigned long *p = addr + BIT_WORD(offset);
100 	unsigned long result = size;
101 	unsigned long tmp;
102 
103 	if (offset >= size)
104 		return size;
105 
106 	size -= (offset & ~(BITS_PER_LONG - 1));
107 	offset %= BITS_PER_LONG;
108 
109 	while (1) {
110 		if (*p == 0)
111 			goto pass;
112 
113 		tmp = __reverse_ulong((unsigned char *)p);
114 
115 		tmp &= ~0UL >> offset;
116 		if (size < BITS_PER_LONG)
117 			tmp &= (~0UL << (BITS_PER_LONG - size));
118 		if (tmp)
119 			goto found;
120 pass:
121 		if (size <= BITS_PER_LONG)
122 			break;
123 		size -= BITS_PER_LONG;
124 		offset = 0;
125 		p++;
126 	}
127 	return result;
128 found:
129 	return result - size + __reverse_ffs(tmp);
130 }
131 
__find_rev_next_zero_bit(const unsigned long * addr,unsigned long size,unsigned long offset)132 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
133 			unsigned long size, unsigned long offset)
134 {
135 	const unsigned long *p = addr + BIT_WORD(offset);
136 	unsigned long result = size;
137 	unsigned long tmp;
138 
139 	if (offset >= size)
140 		return size;
141 
142 	size -= (offset & ~(BITS_PER_LONG - 1));
143 	offset %= BITS_PER_LONG;
144 
145 	while (1) {
146 		if (*p == ~0UL)
147 			goto pass;
148 
149 		tmp = __reverse_ulong((unsigned char *)p);
150 
151 		if (offset)
152 			tmp |= ~0UL << (BITS_PER_LONG - offset);
153 		if (size < BITS_PER_LONG)
154 			tmp |= ~0UL >> size;
155 		if (tmp != ~0UL)
156 			goto found;
157 pass:
158 		if (size <= BITS_PER_LONG)
159 			break;
160 		size -= BITS_PER_LONG;
161 		offset = 0;
162 		p++;
163 	}
164 	return result;
165 found:
166 	return result - size + __reverse_ffz(tmp);
167 }
168 
f2fs_need_SSR(struct f2fs_sb_info * sbi)169 bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
170 {
171 	int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
172 	int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
173 	int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);
174 
175 	if (f2fs_lfs_mode(sbi))
176 		return false;
177 	if (sbi->gc_mode == GC_URGENT)
178 		return true;
179 	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
180 		return true;
181 
182 	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
183 			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
184 }
185 
f2fs_register_inmem_page(struct inode * inode,struct page * page)186 void f2fs_register_inmem_page(struct inode *inode, struct page *page)
187 {
188 	struct inmem_pages *new;
189 
190 	f2fs_trace_pid(page);
191 
192 	f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
193 
194 	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
195 
196 	/* add atomic page indices to the list */
197 	new->page = page;
198 	INIT_LIST_HEAD(&new->list);
199 
200 	/* increase reference count with clean state */
201 	get_page(page);
202 	mutex_lock(&F2FS_I(inode)->inmem_lock);
203 	list_add_tail(&new->list, &F2FS_I(inode)->inmem_pages);
204 	inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
205 	mutex_unlock(&F2FS_I(inode)->inmem_lock);
206 
207 	trace_f2fs_register_inmem_page(page, INMEM);
208 }
209 
__revoke_inmem_pages(struct inode * inode,struct list_head * head,bool drop,bool recover,bool trylock)210 static int __revoke_inmem_pages(struct inode *inode,
211 				struct list_head *head, bool drop, bool recover,
212 				bool trylock)
213 {
214 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
215 	struct inmem_pages *cur, *tmp;
216 	int err = 0;
217 
218 	list_for_each_entry_safe(cur, tmp, head, list) {
219 		struct page *page = cur->page;
220 
221 		if (drop)
222 			trace_f2fs_commit_inmem_page(page, INMEM_DROP);
223 
224 		if (trylock) {
225 			/*
226 			 * to avoid deadlock in between page lock and
227 			 * inmem_lock.
228 			 */
229 			if (!trylock_page(page))
230 				continue;
231 		} else {
232 			lock_page(page);
233 		}
234 
235 		f2fs_wait_on_page_writeback(page, DATA, true, true);
236 
237 		if (recover) {
238 			struct dnode_of_data dn;
239 			struct node_info ni;
240 
241 			trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
242 retry:
243 			set_new_dnode(&dn, inode, NULL, NULL, 0);
244 			err = f2fs_get_dnode_of_data(&dn, page->index,
245 								LOOKUP_NODE);
246 			if (err) {
247 				if (err == -ENOMEM) {
248 					congestion_wait(BLK_RW_ASYNC,
249 							DEFAULT_IO_TIMEOUT);
250 					cond_resched();
251 					goto retry;
252 				}
253 				err = -EAGAIN;
254 				goto next;
255 			}
256 
257 			err = f2fs_get_node_info(sbi, dn.nid, &ni);
258 			if (err) {
259 				f2fs_put_dnode(&dn);
260 				return err;
261 			}
262 
263 			if (cur->old_addr == NEW_ADDR) {
264 				f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
265 				f2fs_update_data_blkaddr(&dn, NEW_ADDR);
266 			} else
267 				f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
268 					cur->old_addr, ni.version, true, true);
269 			f2fs_put_dnode(&dn);
270 		}
271 next:
272 		/* we don't need to invalidate this in the sccessful status */
273 		if (drop || recover) {
274 			ClearPageUptodate(page);
275 			clear_cold_data(page);
276 		}
277 		f2fs_clear_page_private(page);
278 		f2fs_put_page(page, 1);
279 
280 		list_del(&cur->list);
281 		kmem_cache_free(inmem_entry_slab, cur);
282 		dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
283 	}
284 	return err;
285 }
286 
f2fs_drop_inmem_pages_all(struct f2fs_sb_info * sbi,bool gc_failure)287 void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
288 {
289 	struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
290 	struct inode *inode;
291 	struct f2fs_inode_info *fi;
292 	unsigned int count = sbi->atomic_files;
293 	unsigned int looped = 0;
294 next:
295 	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
296 	if (list_empty(head)) {
297 		spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
298 		return;
299 	}
300 	fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
301 	inode = igrab(&fi->vfs_inode);
302 	if (inode)
303 		list_move_tail(&fi->inmem_ilist, head);
304 	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
305 
306 	if (inode) {
307 		if (gc_failure) {
308 			if (!fi->i_gc_failures[GC_FAILURE_ATOMIC])
309 				goto skip;
310 		}
311 		set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
312 		f2fs_drop_inmem_pages(inode);
313 skip:
314 		iput(inode);
315 	}
316 	congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
317 	cond_resched();
318 	if (gc_failure) {
319 		if (++looped >= count)
320 			return;
321 	}
322 	goto next;
323 }
324 
f2fs_drop_inmem_pages(struct inode * inode)325 void f2fs_drop_inmem_pages(struct inode *inode)
326 {
327 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
328 	struct f2fs_inode_info *fi = F2FS_I(inode);
329 
330 	while (!list_empty(&fi->inmem_pages)) {
331 		mutex_lock(&fi->inmem_lock);
332 		__revoke_inmem_pages(inode, &fi->inmem_pages,
333 						true, false, true);
334 		mutex_unlock(&fi->inmem_lock);
335 	}
336 
337 	fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
338 
339 	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
340 	if (!list_empty(&fi->inmem_ilist))
341 		list_del_init(&fi->inmem_ilist);
342 	if (f2fs_is_atomic_file(inode)) {
343 		clear_inode_flag(inode, FI_ATOMIC_FILE);
344 		sbi->atomic_files--;
345 	}
346 	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
347 }
348 
f2fs_drop_inmem_page(struct inode * inode,struct page * page)349 void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
350 {
351 	struct f2fs_inode_info *fi = F2FS_I(inode);
352 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
353 	struct list_head *head = &fi->inmem_pages;
354 	struct inmem_pages *cur = NULL;
355 	struct inmem_pages *tmp;
356 
357 	f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));
358 
359 	mutex_lock(&fi->inmem_lock);
360 	list_for_each_entry(tmp, head, list) {
361 		if (tmp->page == page) {
362 			cur = tmp;
363 			break;
364 		}
365 	}
366 
367 	f2fs_bug_on(sbi, !cur);
368 	list_del(&cur->list);
369 	mutex_unlock(&fi->inmem_lock);
370 
371 	dec_page_count(sbi, F2FS_INMEM_PAGES);
372 	kmem_cache_free(inmem_entry_slab, cur);
373 
374 	ClearPageUptodate(page);
375 	f2fs_clear_page_private(page);
376 	f2fs_put_page(page, 0);
377 
378 	trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
379 }
380 
__f2fs_commit_inmem_pages(struct inode * inode)381 static int __f2fs_commit_inmem_pages(struct inode *inode)
382 {
383 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
384 	struct f2fs_inode_info *fi = F2FS_I(inode);
385 	struct inmem_pages *cur, *tmp;
386 	struct f2fs_io_info fio = {
387 		.sbi = sbi,
388 		.ino = inode->i_ino,
389 		.type = DATA,
390 		.op = REQ_OP_WRITE,
391 		.op_flags = REQ_SYNC | REQ_PRIO,
392 		.io_type = FS_DATA_IO,
393 	};
394 	struct list_head revoke_list;
395 	bool submit_bio = false;
396 	int err = 0;
397 
398 	INIT_LIST_HEAD(&revoke_list);
399 
400 	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
401 		struct page *page = cur->page;
402 
403 		lock_page(page);
404 		if (page->mapping == inode->i_mapping) {
405 			trace_f2fs_commit_inmem_page(page, INMEM);
406 
407 			f2fs_wait_on_page_writeback(page, DATA, true, true);
408 
409 			set_page_dirty(page);
410 			if (clear_page_dirty_for_io(page)) {
411 				inode_dec_dirty_pages(inode);
412 				f2fs_remove_dirty_inode(inode);
413 			}
414 retry:
415 			fio.page = page;
416 			fio.old_blkaddr = NULL_ADDR;
417 			fio.encrypted_page = NULL;
418 			fio.need_lock = LOCK_DONE;
419 			err = f2fs_do_write_data_page(&fio);
420 			if (err) {
421 				if (err == -ENOMEM) {
422 					congestion_wait(BLK_RW_ASYNC,
423 							DEFAULT_IO_TIMEOUT);
424 					cond_resched();
425 					goto retry;
426 				}
427 				unlock_page(page);
428 				break;
429 			}
430 			/* record old blkaddr for revoking */
431 			cur->old_addr = fio.old_blkaddr;
432 			submit_bio = true;
433 		}
434 		unlock_page(page);
435 		list_move_tail(&cur->list, &revoke_list);
436 	}
437 
438 	if (submit_bio)
439 		f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
440 
441 	if (err) {
442 		/*
443 		 * try to revoke all committed pages, but still we could fail
444 		 * due to no memory or other reason, if that happened, EAGAIN
445 		 * will be returned, which means in such case, transaction is
446 		 * already not integrity, caller should use journal to do the
447 		 * recovery or rewrite & commit last transaction. For other
448 		 * error number, revoking was done by filesystem itself.
449 		 */
450 		err = __revoke_inmem_pages(inode, &revoke_list,
451 						false, true, false);
452 
453 		/* drop all uncommitted pages */
454 		__revoke_inmem_pages(inode, &fi->inmem_pages,
455 						true, false, false);
456 	} else {
457 		__revoke_inmem_pages(inode, &revoke_list,
458 						false, false, false);
459 	}
460 
461 	return err;
462 }
463 
f2fs_commit_inmem_pages(struct inode * inode)464 int f2fs_commit_inmem_pages(struct inode *inode)
465 {
466 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
467 	struct f2fs_inode_info *fi = F2FS_I(inode);
468 	int err;
469 
470 	f2fs_balance_fs(sbi, true);
471 
472 	down_write(&fi->i_gc_rwsem[WRITE]);
473 
474 	f2fs_lock_op(sbi);
475 	set_inode_flag(inode, FI_ATOMIC_COMMIT);
476 
477 	mutex_lock(&fi->inmem_lock);
478 	err = __f2fs_commit_inmem_pages(inode);
479 	mutex_unlock(&fi->inmem_lock);
480 
481 	clear_inode_flag(inode, FI_ATOMIC_COMMIT);
482 
483 	f2fs_unlock_op(sbi);
484 	up_write(&fi->i_gc_rwsem[WRITE]);
485 
486 	return err;
487 }
488 
489 /*
490  * This function balances dirty node and dentry pages.
491  * In addition, it controls garbage collection.
492  */
f2fs_balance_fs(struct f2fs_sb_info * sbi,bool need)493 void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
494 {
495 	if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
496 		f2fs_show_injection_info(sbi, FAULT_CHECKPOINT);
497 		f2fs_stop_checkpoint(sbi, false);
498 	}
499 
500 	/* balance_fs_bg is able to be pending */
501 	if (need && excess_cached_nats(sbi))
502 		f2fs_balance_fs_bg(sbi, false);
503 
504 	if (!f2fs_is_checkpoint_ready(sbi))
505 		return;
506 
507 	/*
508 	 * We should do GC or end up with checkpoint, if there are so many dirty
509 	 * dir/node pages without enough free segments.
510 	 */
511 	if (has_not_enough_free_secs(sbi, 0, 0)) {
512 		down_write(&sbi->gc_lock);
513 		f2fs_gc(sbi, false, false, NULL_SEGNO);
514 	}
515 }
516 
f2fs_balance_fs_bg(struct f2fs_sb_info * sbi,bool from_bg)517 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
518 {
519 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
520 		return;
521 
522 	/* try to shrink extent cache when there is no enough memory */
523 	if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
524 		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
525 
526 	/* check the # of cached NAT entries */
527 	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
528 		f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
529 
530 	if (!f2fs_available_free_memory(sbi, FREE_NIDS))
531 		f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
532 	else
533 		f2fs_build_free_nids(sbi, false, false);
534 
535 	if (!is_idle(sbi, REQ_TIME) &&
536 		(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
537 		return;
538 
539 	/* checkpoint is the only way to shrink partial cached entries */
540 	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
541 			!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
542 			excess_prefree_segs(sbi) ||
543 			excess_dirty_nats(sbi) ||
544 			excess_dirty_nodes(sbi) ||
545 			f2fs_time_over(sbi, CP_TIME)) {
546 		if (test_opt(sbi, DATA_FLUSH) && from_bg) {
547 			struct blk_plug plug;
548 
549 			mutex_lock(&sbi->flush_lock);
550 
551 			blk_start_plug(&plug);
552 			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
553 			blk_finish_plug(&plug);
554 
555 			mutex_unlock(&sbi->flush_lock);
556 		}
557 		f2fs_sync_fs(sbi->sb, true);
558 		stat_inc_bg_cp_count(sbi->stat_info);
559 	}
560 }
561 
__submit_flush_wait(struct f2fs_sb_info * sbi,struct block_device * bdev)562 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
563 				struct block_device *bdev)
564 {
565 	struct bio *bio;
566 	int ret;
567 
568 	bio = f2fs_bio_alloc(sbi, 0, false);
569 	if (!bio)
570 		return -ENOMEM;
571 
572 	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
573 	bio_set_dev(bio, bdev);
574 	ret = submit_bio_wait(bio);
575 	bio_put(bio);
576 
577 	trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
578 				test_opt(sbi, FLUSH_MERGE), ret);
579 	return ret;
580 }
581 
submit_flush_wait(struct f2fs_sb_info * sbi,nid_t ino)582 static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
583 {
584 	int ret = 0;
585 	int i;
586 
587 	if (!f2fs_is_multi_device(sbi))
588 		return __submit_flush_wait(sbi, sbi->sb->s_bdev);
589 
590 	for (i = 0; i < sbi->s_ndevs; i++) {
591 		if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
592 			continue;
593 		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
594 		if (ret)
595 			break;
596 	}
597 	return ret;
598 }
599 
issue_flush_thread(void * data)600 static int issue_flush_thread(void *data)
601 {
602 	struct f2fs_sb_info *sbi = data;
603 	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
604 	wait_queue_head_t *q = &fcc->flush_wait_queue;
605 repeat:
606 	if (kthread_should_stop())
607 		return 0;
608 
609 	sb_start_intwrite(sbi->sb);
610 
611 	if (!llist_empty(&fcc->issue_list)) {
612 		struct flush_cmd *cmd, *next;
613 		int ret;
614 
615 		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
616 		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
617 
618 		cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);
619 
620 		ret = submit_flush_wait(sbi, cmd->ino);
621 		atomic_inc(&fcc->issued_flush);
622 
623 		llist_for_each_entry_safe(cmd, next,
624 					  fcc->dispatch_list, llnode) {
625 			cmd->ret = ret;
626 			complete(&cmd->wait);
627 		}
628 		fcc->dispatch_list = NULL;
629 	}
630 
631 	sb_end_intwrite(sbi->sb);
632 
633 	wait_event_interruptible(*q,
634 		kthread_should_stop() || !llist_empty(&fcc->issue_list));
635 	goto repeat;
636 }
637 
f2fs_issue_flush(struct f2fs_sb_info * sbi,nid_t ino)638 int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
639 {
640 	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
641 	struct flush_cmd cmd;
642 	int ret;
643 
644 	if (test_opt(sbi, NOBARRIER))
645 		return 0;
646 
647 	if (!test_opt(sbi, FLUSH_MERGE)) {
648 		atomic_inc(&fcc->queued_flush);
649 		ret = submit_flush_wait(sbi, ino);
650 		atomic_dec(&fcc->queued_flush);
651 		atomic_inc(&fcc->issued_flush);
652 		return ret;
653 	}
654 
655 	if (atomic_inc_return(&fcc->queued_flush) == 1 ||
656 	    f2fs_is_multi_device(sbi)) {
657 		ret = submit_flush_wait(sbi, ino);
658 		atomic_dec(&fcc->queued_flush);
659 
660 		atomic_inc(&fcc->issued_flush);
661 		return ret;
662 	}
663 
664 	cmd.ino = ino;
665 	init_completion(&cmd.wait);
666 
667 	llist_add(&cmd.llnode, &fcc->issue_list);
668 
669 	/* update issue_list before we wake up issue_flush thread */
670 	smp_mb();
671 
672 	if (waitqueue_active(&fcc->flush_wait_queue))
673 		wake_up(&fcc->flush_wait_queue);
674 
675 	if (fcc->f2fs_issue_flush) {
676 		wait_for_completion(&cmd.wait);
677 		atomic_dec(&fcc->queued_flush);
678 	} else {
679 		struct llist_node *list;
680 
681 		list = llist_del_all(&fcc->issue_list);
682 		if (!list) {
683 			wait_for_completion(&cmd.wait);
684 			atomic_dec(&fcc->queued_flush);
685 		} else {
686 			struct flush_cmd *tmp, *next;
687 
688 			ret = submit_flush_wait(sbi, ino);
689 
690 			llist_for_each_entry_safe(tmp, next, list, llnode) {
691 				if (tmp == &cmd) {
692 					cmd.ret = ret;
693 					atomic_dec(&fcc->queued_flush);
694 					continue;
695 				}
696 				tmp->ret = ret;
697 				complete(&tmp->wait);
698 			}
699 		}
700 	}
701 
702 	return cmd.ret;
703 }
704 
f2fs_create_flush_cmd_control(struct f2fs_sb_info * sbi)705 int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
706 {
707 	dev_t dev = sbi->sb->s_bdev->bd_dev;
708 	struct flush_cmd_control *fcc;
709 	int err = 0;
710 
711 	if (SM_I(sbi)->fcc_info) {
712 		fcc = SM_I(sbi)->fcc_info;
713 		if (fcc->f2fs_issue_flush)
714 			return err;
715 		goto init_thread;
716 	}
717 
718 	fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
719 	if (!fcc)
720 		return -ENOMEM;
721 	atomic_set(&fcc->issued_flush, 0);
722 	atomic_set(&fcc->queued_flush, 0);
723 	init_waitqueue_head(&fcc->flush_wait_queue);
724 	init_llist_head(&fcc->issue_list);
725 	SM_I(sbi)->fcc_info = fcc;
726 	if (!test_opt(sbi, FLUSH_MERGE))
727 		return err;
728 
729 init_thread:
730 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
731 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
732 	if (IS_ERR(fcc->f2fs_issue_flush)) {
733 		err = PTR_ERR(fcc->f2fs_issue_flush);
734 		kvfree(fcc);
735 		SM_I(sbi)->fcc_info = NULL;
736 		return err;
737 	}
738 
739 	return err;
740 }
741 
f2fs_destroy_flush_cmd_control(struct f2fs_sb_info * sbi,bool free)742 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
743 {
744 	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
745 
746 	if (fcc && fcc->f2fs_issue_flush) {
747 		struct task_struct *flush_thread = fcc->f2fs_issue_flush;
748 
749 		fcc->f2fs_issue_flush = NULL;
750 		kthread_stop(flush_thread);
751 	}
752 	if (free) {
753 		kvfree(fcc);
754 		SM_I(sbi)->fcc_info = NULL;
755 	}
756 }
757 
f2fs_flush_device_cache(struct f2fs_sb_info * sbi)758 int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
759 {
760 	int ret = 0, i;
761 
762 	if (!f2fs_is_multi_device(sbi))
763 		return 0;
764 
765 	for (i = 1; i < sbi->s_ndevs; i++) {
766 		if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
767 			continue;
768 		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
769 		if (ret)
770 			break;
771 
772 		spin_lock(&sbi->dev_lock);
773 		f2fs_clear_bit(i, (char *)&sbi->dirty_device);
774 		spin_unlock(&sbi->dev_lock);
775 	}
776 
777 	return ret;
778 }
779 
__locate_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno,enum dirty_type dirty_type)780 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
781 		enum dirty_type dirty_type)
782 {
783 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
784 
785 	/* need not be added */
786 	if (IS_CURSEG(sbi, segno))
787 		return;
788 
789 	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
790 		dirty_i->nr_dirty[dirty_type]++;
791 
792 	if (dirty_type == DIRTY) {
793 		struct seg_entry *sentry = get_seg_entry(sbi, segno);
794 		enum dirty_type t = sentry->type;
795 
796 		if (unlikely(t >= DIRTY)) {
797 			f2fs_bug_on(sbi, 1);
798 			return;
799 		}
800 		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
801 			dirty_i->nr_dirty[t]++;
802 	}
803 }
804 
__remove_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno,enum dirty_type dirty_type)805 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
806 		enum dirty_type dirty_type)
807 {
808 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
809 
810 	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
811 		dirty_i->nr_dirty[dirty_type]--;
812 
813 	if (dirty_type == DIRTY) {
814 		struct seg_entry *sentry = get_seg_entry(sbi, segno);
815 		enum dirty_type t = sentry->type;
816 
817 		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
818 			dirty_i->nr_dirty[t]--;
819 
820 		if (get_valid_blocks(sbi, segno, true) == 0) {
821 			clear_bit(GET_SEC_FROM_SEG(sbi, segno),
822 						dirty_i->victim_secmap);
823 #ifdef CONFIG_F2FS_CHECK_FS
824 			clear_bit(segno, SIT_I(sbi)->invalid_segmap);
825 #endif
826 		}
827 	}
828 }
829 
830 /*
831  * Should not occur error such as -ENOMEM.
832  * Adding dirty entry into seglist is not critical operation.
833  * If a given segment is one of current working segments, it won't be added.
834  */
locate_dirty_segment(struct f2fs_sb_info * sbi,unsigned int segno)835 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
836 {
837 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
838 	unsigned short valid_blocks, ckpt_valid_blocks;
839 
840 	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
841 		return;
842 
843 	mutex_lock(&dirty_i->seglist_lock);
844 
845 	valid_blocks = get_valid_blocks(sbi, segno, false);
846 	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
847 
848 	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
849 				ckpt_valid_blocks == sbi->blocks_per_seg)) {
850 		__locate_dirty_segment(sbi, segno, PRE);
851 		__remove_dirty_segment(sbi, segno, DIRTY);
852 	} else if (valid_blocks < sbi->blocks_per_seg) {
853 		__locate_dirty_segment(sbi, segno, DIRTY);
854 	} else {
855 		/* Recovery routine with SSR needs this */
856 		__remove_dirty_segment(sbi, segno, DIRTY);
857 	}
858 
859 	mutex_unlock(&dirty_i->seglist_lock);
860 }
861 
862 /* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
f2fs_dirty_to_prefree(struct f2fs_sb_info * sbi)863 void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
864 {
865 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
866 	unsigned int segno;
867 
868 	mutex_lock(&dirty_i->seglist_lock);
869 	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
870 		if (get_valid_blocks(sbi, segno, false))
871 			continue;
872 		if (IS_CURSEG(sbi, segno))
873 			continue;
874 		__locate_dirty_segment(sbi, segno, PRE);
875 		__remove_dirty_segment(sbi, segno, DIRTY);
876 	}
877 	mutex_unlock(&dirty_i->seglist_lock);
878 }
879 
f2fs_get_unusable_blocks(struct f2fs_sb_info * sbi)880 block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
881 {
882 	int ovp_hole_segs =
883 		(overprovision_segments(sbi) - reserved_segments(sbi));
884 	block_t ovp_holes = ovp_hole_segs << sbi->log_blocks_per_seg;
885 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
886 	block_t holes[2] = {0, 0};	/* DATA and NODE */
887 	block_t unusable;
888 	struct seg_entry *se;
889 	unsigned int segno;
890 
891 	mutex_lock(&dirty_i->seglist_lock);
892 	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
893 		se = get_seg_entry(sbi, segno);
894 		if (IS_NODESEG(se->type))
895 			holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
896 		else
897 			holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
898 	}
899 	mutex_unlock(&dirty_i->seglist_lock);
900 
901 	unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
902 	if (unusable > ovp_holes)
903 		return unusable - ovp_holes;
904 	return 0;
905 }
906 
f2fs_disable_cp_again(struct f2fs_sb_info * sbi,block_t unusable)907 int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable)
908 {
909 	int ovp_hole_segs =
910 		(overprovision_segments(sbi) - reserved_segments(sbi));
911 	if (unusable > F2FS_OPTION(sbi).unusable_cap)
912 		return -EAGAIN;
913 	if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
914 		dirty_segments(sbi) > ovp_hole_segs)
915 		return -EAGAIN;
916 	return 0;
917 }
918 
919 /* This is only used by SBI_CP_DISABLED */
get_free_segment(struct f2fs_sb_info * sbi)920 static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
921 {
922 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
923 	unsigned int segno = 0;
924 
925 	mutex_lock(&dirty_i->seglist_lock);
926 	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
927 		if (get_valid_blocks(sbi, segno, false))
928 			continue;
929 		if (get_ckpt_valid_blocks(sbi, segno))
930 			continue;
931 		mutex_unlock(&dirty_i->seglist_lock);
932 		return segno;
933 	}
934 	mutex_unlock(&dirty_i->seglist_lock);
935 	return NULL_SEGNO;
936 }
937 
__create_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len)938 static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
939 		struct block_device *bdev, block_t lstart,
940 		block_t start, block_t len)
941 {
942 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
943 	struct list_head *pend_list;
944 	struct discard_cmd *dc;
945 
946 	f2fs_bug_on(sbi, !len);
947 
948 	pend_list = &dcc->pend_list[plist_idx(len)];
949 
950 	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
951 	INIT_LIST_HEAD(&dc->list);
952 	dc->bdev = bdev;
953 	dc->lstart = lstart;
954 	dc->start = start;
955 	dc->len = len;
956 	dc->ref = 0;
957 	dc->state = D_PREP;
958 	dc->queued = 0;
959 	dc->error = 0;
960 	init_completion(&dc->wait);
961 	list_add_tail(&dc->list, pend_list);
962 	spin_lock_init(&dc->lock);
963 	dc->bio_ref = 0;
964 	atomic_inc(&dcc->discard_cmd_cnt);
965 	dcc->undiscard_blks += len;
966 
967 	return dc;
968 }
969 
__attach_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len,struct rb_node * parent,struct rb_node ** p,bool leftmost)970 static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
971 				struct block_device *bdev, block_t lstart,
972 				block_t start, block_t len,
973 				struct rb_node *parent, struct rb_node **p,
974 				bool leftmost)
975 {
976 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
977 	struct discard_cmd *dc;
978 
979 	dc = __create_discard_cmd(sbi, bdev, lstart, start, len);
980 
981 	rb_link_node(&dc->rb_node, parent, p);
982 	rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
983 
984 	return dc;
985 }
986 
__detach_discard_cmd(struct discard_cmd_control * dcc,struct discard_cmd * dc)987 static void __detach_discard_cmd(struct discard_cmd_control *dcc,
988 							struct discard_cmd *dc)
989 {
990 	if (dc->state == D_DONE)
991 		atomic_sub(dc->queued, &dcc->queued_discard);
992 
993 	list_del(&dc->list);
994 	rb_erase_cached(&dc->rb_node, &dcc->root);
995 	dcc->undiscard_blks -= dc->len;
996 
997 	kmem_cache_free(discard_cmd_slab, dc);
998 
999 	atomic_dec(&dcc->discard_cmd_cnt);
1000 }
1001 
__remove_discard_cmd(struct f2fs_sb_info * sbi,struct discard_cmd * dc)1002 static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
1003 							struct discard_cmd *dc)
1004 {
1005 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1006 	unsigned long flags;
1007 
1008 	trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);
1009 
1010 	spin_lock_irqsave(&dc->lock, flags);
1011 	if (dc->bio_ref) {
1012 		spin_unlock_irqrestore(&dc->lock, flags);
1013 		return;
1014 	}
1015 	spin_unlock_irqrestore(&dc->lock, flags);
1016 
1017 	f2fs_bug_on(sbi, dc->ref);
1018 
1019 	if (dc->error == -EOPNOTSUPP)
1020 		dc->error = 0;
1021 
1022 	if (dc->error)
1023 		printk_ratelimited(
1024 			"%sF2FS-fs (%s): Issue discard(%u, %u, %u) failed, ret: %d",
1025 			KERN_INFO, sbi->sb->s_id,
1026 			dc->lstart, dc->start, dc->len, dc->error);
1027 	__detach_discard_cmd(dcc, dc);
1028 }
1029 
f2fs_submit_discard_endio(struct bio * bio)1030 static void f2fs_submit_discard_endio(struct bio *bio)
1031 {
1032 	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
1033 	unsigned long flags;
1034 
1035 	spin_lock_irqsave(&dc->lock, flags);
1036 	if (!dc->error)
1037 		dc->error = blk_status_to_errno(bio->bi_status);
1038 	dc->bio_ref--;
1039 	if (!dc->bio_ref && dc->state == D_SUBMIT) {
1040 		dc->state = D_DONE;
1041 		complete_all(&dc->wait);
1042 	}
1043 	spin_unlock_irqrestore(&dc->lock, flags);
1044 	bio_put(bio);
1045 }
1046 
__check_sit_bitmap(struct f2fs_sb_info * sbi,block_t start,block_t end)1047 static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
1048 				block_t start, block_t end)
1049 {
1050 #ifdef CONFIG_F2FS_CHECK_FS
1051 	struct seg_entry *sentry;
1052 	unsigned int segno;
1053 	block_t blk = start;
1054 	unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
1055 	unsigned long *map;
1056 
1057 	while (blk < end) {
1058 		segno = GET_SEGNO(sbi, blk);
1059 		sentry = get_seg_entry(sbi, segno);
1060 		offset = GET_BLKOFF_FROM_SEG0(sbi, blk);
1061 
1062 		if (end < START_BLOCK(sbi, segno + 1))
1063 			size = GET_BLKOFF_FROM_SEG0(sbi, end);
1064 		else
1065 			size = max_blocks;
1066 		map = (unsigned long *)(sentry->cur_valid_map);
1067 		offset = __find_rev_next_bit(map, size, offset);
1068 		f2fs_bug_on(sbi, offset != size);
1069 		blk = START_BLOCK(sbi, segno + 1);
1070 	}
1071 #endif
1072 }
1073 
__init_discard_policy(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,int discard_type,unsigned int granularity)1074 static void __init_discard_policy(struct f2fs_sb_info *sbi,
1075 				struct discard_policy *dpolicy,
1076 				int discard_type, unsigned int granularity)
1077 {
1078 	/* common policy */
1079 	dpolicy->type = discard_type;
1080 	dpolicy->sync = true;
1081 	dpolicy->ordered = false;
1082 	dpolicy->granularity = granularity;
1083 
1084 	dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
1085 	dpolicy->io_aware_gran = MAX_PLIST_NUM;
1086 	dpolicy->timeout = false;
1087 
1088 	if (discard_type == DPOLICY_BG) {
1089 		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1090 		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1091 		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1092 		dpolicy->io_aware = true;
1093 		dpolicy->sync = false;
1094 		dpolicy->ordered = true;
1095 		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
1096 			dpolicy->granularity = 1;
1097 			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1098 		}
1099 	} else if (discard_type == DPOLICY_FORCE) {
1100 		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1101 		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1102 		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
1103 		dpolicy->io_aware = false;
1104 	} else if (discard_type == DPOLICY_FSTRIM) {
1105 		dpolicy->io_aware = false;
1106 	} else if (discard_type == DPOLICY_UMOUNT) {
1107 		dpolicy->io_aware = false;
1108 		/* we need to issue all to keep CP_TRIMMED_FLAG */
1109 		dpolicy->granularity = 1;
1110 		dpolicy->timeout = true;
1111 	}
1112 }
1113 
1114 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1115 				struct block_device *bdev, block_t lstart,
1116 				block_t start, block_t len);
1117 /* this function is copied from blkdev_issue_discard from block/blk-lib.c */
__submit_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,struct discard_cmd * dc,unsigned int * issued)1118 static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
1119 						struct discard_policy *dpolicy,
1120 						struct discard_cmd *dc,
1121 						unsigned int *issued)
1122 {
1123 	struct block_device *bdev = dc->bdev;
1124 	struct request_queue *q = bdev_get_queue(bdev);
1125 	unsigned int max_discard_blocks =
1126 			SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1127 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1128 	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1129 					&(dcc->fstrim_list) : &(dcc->wait_list);
1130 	int flag = dpolicy->sync ? REQ_SYNC : 0;
1131 	block_t lstart, start, len, total_len;
1132 	int err = 0;
1133 
1134 	if (dc->state != D_PREP)
1135 		return 0;
1136 
1137 	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1138 		return 0;
1139 
1140 	trace_f2fs_issue_discard(bdev, dc->start, dc->len);
1141 
1142 	lstart = dc->lstart;
1143 	start = dc->start;
1144 	len = dc->len;
1145 	total_len = len;
1146 
1147 	dc->len = 0;
1148 
1149 	while (total_len && *issued < dpolicy->max_requests && !err) {
1150 		struct bio *bio = NULL;
1151 		unsigned long flags;
1152 		bool last = true;
1153 
1154 		if (len > max_discard_blocks) {
1155 			len = max_discard_blocks;
1156 			last = false;
1157 		}
1158 
1159 		(*issued)++;
1160 		if (*issued == dpolicy->max_requests)
1161 			last = true;
1162 
1163 		dc->len += len;
1164 
1165 		if (time_to_inject(sbi, FAULT_DISCARD)) {
1166 			f2fs_show_injection_info(sbi, FAULT_DISCARD);
1167 			err = -EIO;
1168 			goto submit;
1169 		}
1170 		err = __blkdev_issue_discard(bdev,
1171 					SECTOR_FROM_BLOCK(start),
1172 					SECTOR_FROM_BLOCK(len),
1173 					GFP_NOFS, 0, &bio);
1174 submit:
1175 		if (err) {
1176 			spin_lock_irqsave(&dc->lock, flags);
1177 			if (dc->state == D_PARTIAL)
1178 				dc->state = D_SUBMIT;
1179 			spin_unlock_irqrestore(&dc->lock, flags);
1180 
1181 			break;
1182 		}
1183 
1184 		f2fs_bug_on(sbi, !bio);
1185 
1186 		/*
1187 		 * should keep before submission to avoid D_DONE
1188 		 * right away
1189 		 */
1190 		spin_lock_irqsave(&dc->lock, flags);
1191 		if (last)
1192 			dc->state = D_SUBMIT;
1193 		else
1194 			dc->state = D_PARTIAL;
1195 		dc->bio_ref++;
1196 		spin_unlock_irqrestore(&dc->lock, flags);
1197 
1198 		atomic_inc(&dcc->queued_discard);
1199 		dc->queued++;
1200 		list_move_tail(&dc->list, wait_list);
1201 
1202 		/* sanity check on discard range */
1203 		__check_sit_bitmap(sbi, lstart, lstart + len);
1204 
1205 		bio->bi_private = dc;
1206 		bio->bi_end_io = f2fs_submit_discard_endio;
1207 		bio->bi_opf |= flag;
1208 		submit_bio(bio);
1209 
1210 		atomic_inc(&dcc->issued_discard);
1211 
1212 		f2fs_update_iostat(sbi, FS_DISCARD, 1);
1213 
1214 		lstart += len;
1215 		start += len;
1216 		total_len -= len;
1217 		len = total_len;
1218 	}
1219 
1220 	if (!err && len) {
1221 		dcc->undiscard_blks -= len;
1222 		__update_discard_tree_range(sbi, bdev, lstart, start, len);
1223 	}
1224 	return err;
1225 }
1226 
__insert_discard_tree(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len,struct rb_node ** insert_p,struct rb_node * insert_parent)1227 static void __insert_discard_tree(struct f2fs_sb_info *sbi,
1228 				struct block_device *bdev, block_t lstart,
1229 				block_t start, block_t len,
1230 				struct rb_node **insert_p,
1231 				struct rb_node *insert_parent)
1232 {
1233 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1234 	struct rb_node **p;
1235 	struct rb_node *parent = NULL;
1236 	bool leftmost = true;
1237 
1238 	if (insert_p && insert_parent) {
1239 		parent = insert_parent;
1240 		p = insert_p;
1241 		goto do_insert;
1242 	}
1243 
1244 	p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
1245 							lstart, &leftmost);
1246 do_insert:
1247 	__attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
1248 								p, leftmost);
1249 }
1250 
__relocate_discard_cmd(struct discard_cmd_control * dcc,struct discard_cmd * dc)1251 static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
1252 						struct discard_cmd *dc)
1253 {
1254 	list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
1255 }
1256 
__punch_discard_cmd(struct f2fs_sb_info * sbi,struct discard_cmd * dc,block_t blkaddr)1257 static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
1258 				struct discard_cmd *dc, block_t blkaddr)
1259 {
1260 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1261 	struct discard_info di = dc->di;
1262 	bool modified = false;
1263 
1264 	if (dc->state == D_DONE || dc->len == 1) {
1265 		__remove_discard_cmd(sbi, dc);
1266 		return;
1267 	}
1268 
1269 	dcc->undiscard_blks -= di.len;
1270 
1271 	if (blkaddr > di.lstart) {
1272 		dc->len = blkaddr - dc->lstart;
1273 		dcc->undiscard_blks += dc->len;
1274 		__relocate_discard_cmd(dcc, dc);
1275 		modified = true;
1276 	}
1277 
1278 	if (blkaddr < di.lstart + di.len - 1) {
1279 		if (modified) {
1280 			__insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
1281 					di.start + blkaddr + 1 - di.lstart,
1282 					di.lstart + di.len - 1 - blkaddr,
1283 					NULL, NULL);
1284 		} else {
1285 			dc->lstart++;
1286 			dc->len--;
1287 			dc->start++;
1288 			dcc->undiscard_blks += dc->len;
1289 			__relocate_discard_cmd(dcc, dc);
1290 		}
1291 	}
1292 }
1293 
__update_discard_tree_range(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t lstart,block_t start,block_t len)1294 static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
1295 				struct block_device *bdev, block_t lstart,
1296 				block_t start, block_t len)
1297 {
1298 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1299 	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1300 	struct discard_cmd *dc;
1301 	struct discard_info di = {0};
1302 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
1303 	struct request_queue *q = bdev_get_queue(bdev);
1304 	unsigned int max_discard_blocks =
1305 			SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1306 	block_t end = lstart + len;
1307 
1308 	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1309 					NULL, lstart,
1310 					(struct rb_entry **)&prev_dc,
1311 					(struct rb_entry **)&next_dc,
1312 					&insert_p, &insert_parent, true, NULL);
1313 	if (dc)
1314 		prev_dc = dc;
1315 
1316 	if (!prev_dc) {
1317 		di.lstart = lstart;
1318 		di.len = next_dc ? next_dc->lstart - lstart : len;
1319 		di.len = min(di.len, len);
1320 		di.start = start;
1321 	}
1322 
1323 	while (1) {
1324 		struct rb_node *node;
1325 		bool merged = false;
1326 		struct discard_cmd *tdc = NULL;
1327 
1328 		if (prev_dc) {
1329 			di.lstart = prev_dc->lstart + prev_dc->len;
1330 			if (di.lstart < lstart)
1331 				di.lstart = lstart;
1332 			if (di.lstart >= end)
1333 				break;
1334 
1335 			if (!next_dc || next_dc->lstart > end)
1336 				di.len = end - di.lstart;
1337 			else
1338 				di.len = next_dc->lstart - di.lstart;
1339 			di.start = start + di.lstart - lstart;
1340 		}
1341 
1342 		if (!di.len)
1343 			goto next;
1344 
1345 		if (prev_dc && prev_dc->state == D_PREP &&
1346 			prev_dc->bdev == bdev &&
1347 			__is_discard_back_mergeable(&di, &prev_dc->di,
1348 							max_discard_blocks)) {
1349 			prev_dc->di.len += di.len;
1350 			dcc->undiscard_blks += di.len;
1351 			__relocate_discard_cmd(dcc, prev_dc);
1352 			di = prev_dc->di;
1353 			tdc = prev_dc;
1354 			merged = true;
1355 		}
1356 
1357 		if (next_dc && next_dc->state == D_PREP &&
1358 			next_dc->bdev == bdev &&
1359 			__is_discard_front_mergeable(&di, &next_dc->di,
1360 							max_discard_blocks)) {
1361 			next_dc->di.lstart = di.lstart;
1362 			next_dc->di.len += di.len;
1363 			next_dc->di.start = di.start;
1364 			dcc->undiscard_blks += di.len;
1365 			__relocate_discard_cmd(dcc, next_dc);
1366 			if (tdc)
1367 				__remove_discard_cmd(sbi, tdc);
1368 			merged = true;
1369 		}
1370 
1371 		if (!merged) {
1372 			__insert_discard_tree(sbi, bdev, di.lstart, di.start,
1373 							di.len, NULL, NULL);
1374 		}
1375  next:
1376 		prev_dc = next_dc;
1377 		if (!prev_dc)
1378 			break;
1379 
1380 		node = rb_next(&prev_dc->rb_node);
1381 		next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1382 	}
1383 }
1384 
__queue_discard_cmd(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1385 static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
1386 		struct block_device *bdev, block_t blkstart, block_t blklen)
1387 {
1388 	block_t lblkstart = blkstart;
1389 
1390 	if (!f2fs_bdev_support_discard(bdev))
1391 		return 0;
1392 
1393 	trace_f2fs_queue_discard(bdev, blkstart, blklen);
1394 
1395 	if (f2fs_is_multi_device(sbi)) {
1396 		int devi = f2fs_target_device_index(sbi, blkstart);
1397 
1398 		blkstart -= FDEV(devi).start_blk;
1399 	}
1400 	mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1401 	__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1402 	mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1403 	return 0;
1404 }
1405 
__issue_discard_cmd_orderly(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy)1406 static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
1407 					struct discard_policy *dpolicy)
1408 {
1409 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1410 	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
1411 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
1412 	struct discard_cmd *dc;
1413 	struct blk_plug plug;
1414 	unsigned int pos = dcc->next_pos;
1415 	unsigned int issued = 0;
1416 	bool io_interrupted = false;
1417 
1418 	mutex_lock(&dcc->cmd_lock);
1419 	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1420 					NULL, pos,
1421 					(struct rb_entry **)&prev_dc,
1422 					(struct rb_entry **)&next_dc,
1423 					&insert_p, &insert_parent, true, NULL);
1424 	if (!dc)
1425 		dc = next_dc;
1426 
1427 	blk_start_plug(&plug);
1428 
1429 	while (dc) {
1430 		struct rb_node *node;
1431 		int err = 0;
1432 
1433 		if (dc->state != D_PREP)
1434 			goto next;
1435 
1436 		if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
1437 			io_interrupted = true;
1438 			break;
1439 		}
1440 
1441 		dcc->next_pos = dc->lstart + dc->len;
1442 		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
1443 
1444 		if (issued >= dpolicy->max_requests)
1445 			break;
1446 next:
1447 		node = rb_next(&dc->rb_node);
1448 		if (err)
1449 			__remove_discard_cmd(sbi, dc);
1450 		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
1451 	}
1452 
1453 	blk_finish_plug(&plug);
1454 
1455 	if (!dc)
1456 		dcc->next_pos = 0;
1457 
1458 	mutex_unlock(&dcc->cmd_lock);
1459 
1460 	if (!issued && io_interrupted)
1461 		issued = -1;
1462 
1463 	return issued;
1464 }
1465 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1466 					struct discard_policy *dpolicy);
1467 
__issue_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy)1468 static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
1469 					struct discard_policy *dpolicy)
1470 {
1471 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1472 	struct list_head *pend_list;
1473 	struct discard_cmd *dc, *tmp;
1474 	struct blk_plug plug;
1475 	int i, issued;
1476 	bool io_interrupted = false;
1477 
1478 	if (dpolicy->timeout)
1479 		f2fs_update_time(sbi, UMOUNT_DISCARD_TIMEOUT);
1480 
1481 retry:
1482 	issued = 0;
1483 	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1484 		if (dpolicy->timeout &&
1485 				f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1486 			break;
1487 
1488 		if (i + 1 < dpolicy->granularity)
1489 			break;
1490 
1491 		if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
1492 			return __issue_discard_cmd_orderly(sbi, dpolicy);
1493 
1494 		pend_list = &dcc->pend_list[i];
1495 
1496 		mutex_lock(&dcc->cmd_lock);
1497 		if (list_empty(pend_list))
1498 			goto next;
1499 		if (unlikely(dcc->rbtree_check))
1500 			f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
1501 								&dcc->root));
1502 		blk_start_plug(&plug);
1503 		list_for_each_entry_safe(dc, tmp, pend_list, list) {
1504 			f2fs_bug_on(sbi, dc->state != D_PREP);
1505 
1506 			if (dpolicy->timeout &&
1507 				f2fs_time_over(sbi, UMOUNT_DISCARD_TIMEOUT))
1508 				break;
1509 
1510 			if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1511 						!is_idle(sbi, DISCARD_TIME)) {
1512 				io_interrupted = true;
1513 				break;
1514 			}
1515 
1516 			__submit_discard_cmd(sbi, dpolicy, dc, &issued);
1517 
1518 			if (issued >= dpolicy->max_requests)
1519 				break;
1520 		}
1521 		blk_finish_plug(&plug);
1522 next:
1523 		mutex_unlock(&dcc->cmd_lock);
1524 
1525 		if (issued >= dpolicy->max_requests || io_interrupted)
1526 			break;
1527 	}
1528 
1529 	if (dpolicy->type == DPOLICY_UMOUNT && issued) {
1530 		__wait_all_discard_cmd(sbi, dpolicy);
1531 		goto retry;
1532 	}
1533 
1534 	if (!issued && io_interrupted)
1535 		issued = -1;
1536 
1537 	return issued;
1538 }
1539 
__drop_discard_cmd(struct f2fs_sb_info * sbi)1540 static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1541 {
1542 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1543 	struct list_head *pend_list;
1544 	struct discard_cmd *dc, *tmp;
1545 	int i;
1546 	bool dropped = false;
1547 
1548 	mutex_lock(&dcc->cmd_lock);
1549 	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1550 		pend_list = &dcc->pend_list[i];
1551 		list_for_each_entry_safe(dc, tmp, pend_list, list) {
1552 			f2fs_bug_on(sbi, dc->state != D_PREP);
1553 			__remove_discard_cmd(sbi, dc);
1554 			dropped = true;
1555 		}
1556 	}
1557 	mutex_unlock(&dcc->cmd_lock);
1558 
1559 	return dropped;
1560 }
1561 
f2fs_drop_discard_cmd(struct f2fs_sb_info * sbi)1562 void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1563 {
1564 	__drop_discard_cmd(sbi);
1565 }
1566 
__wait_one_discard_bio(struct f2fs_sb_info * sbi,struct discard_cmd * dc)1567 static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
1568 							struct discard_cmd *dc)
1569 {
1570 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1571 	unsigned int len = 0;
1572 
1573 	wait_for_completion_io(&dc->wait);
1574 	mutex_lock(&dcc->cmd_lock);
1575 	f2fs_bug_on(sbi, dc->state != D_DONE);
1576 	dc->ref--;
1577 	if (!dc->ref) {
1578 		if (!dc->error)
1579 			len = dc->len;
1580 		__remove_discard_cmd(sbi, dc);
1581 	}
1582 	mutex_unlock(&dcc->cmd_lock);
1583 
1584 	return len;
1585 }
1586 
__wait_discard_cmd_range(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,block_t start,block_t end)1587 static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
1588 						struct discard_policy *dpolicy,
1589 						block_t start, block_t end)
1590 {
1591 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1592 	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
1593 					&(dcc->fstrim_list) : &(dcc->wait_list);
1594 	struct discard_cmd *dc, *tmp;
1595 	bool need_wait;
1596 	unsigned int trimmed = 0;
1597 
1598 next:
1599 	need_wait = false;
1600 
1601 	mutex_lock(&dcc->cmd_lock);
1602 	list_for_each_entry_safe(dc, tmp, wait_list, list) {
1603 		if (dc->lstart + dc->len <= start || end <= dc->lstart)
1604 			continue;
1605 		if (dc->len < dpolicy->granularity)
1606 			continue;
1607 		if (dc->state == D_DONE && !dc->ref) {
1608 			wait_for_completion_io(&dc->wait);
1609 			if (!dc->error)
1610 				trimmed += dc->len;
1611 			__remove_discard_cmd(sbi, dc);
1612 		} else {
1613 			dc->ref++;
1614 			need_wait = true;
1615 			break;
1616 		}
1617 	}
1618 	mutex_unlock(&dcc->cmd_lock);
1619 
1620 	if (need_wait) {
1621 		trimmed += __wait_one_discard_bio(sbi, dc);
1622 		goto next;
1623 	}
1624 
1625 	return trimmed;
1626 }
1627 
__wait_all_discard_cmd(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy)1628 static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
1629 						struct discard_policy *dpolicy)
1630 {
1631 	struct discard_policy dp;
1632 	unsigned int discard_blks;
1633 
1634 	if (dpolicy)
1635 		return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1636 
1637 	/* wait all */
1638 	__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1639 	discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1640 	__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1641 	discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1642 
1643 	return discard_blks;
1644 }
1645 
1646 /* This should be covered by global mutex, &sit_i->sentry_lock */
f2fs_wait_discard_bio(struct f2fs_sb_info * sbi,block_t blkaddr)1647 static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1648 {
1649 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1650 	struct discard_cmd *dc;
1651 	bool need_wait = false;
1652 
1653 	mutex_lock(&dcc->cmd_lock);
1654 	dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
1655 							NULL, blkaddr);
1656 	if (dc) {
1657 		if (dc->state == D_PREP) {
1658 			__punch_discard_cmd(sbi, dc, blkaddr);
1659 		} else {
1660 			dc->ref++;
1661 			need_wait = true;
1662 		}
1663 	}
1664 	mutex_unlock(&dcc->cmd_lock);
1665 
1666 	if (need_wait)
1667 		__wait_one_discard_bio(sbi, dc);
1668 }
1669 
f2fs_stop_discard_thread(struct f2fs_sb_info * sbi)1670 void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1671 {
1672 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1673 
1674 	if (dcc && dcc->f2fs_issue_discard) {
1675 		struct task_struct *discard_thread = dcc->f2fs_issue_discard;
1676 
1677 		dcc->f2fs_issue_discard = NULL;
1678 		kthread_stop(discard_thread);
1679 	}
1680 }
1681 
1682 /* This comes from f2fs_put_super */
f2fs_issue_discard_timeout(struct f2fs_sb_info * sbi)1683 bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1684 {
1685 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1686 	struct discard_policy dpolicy;
1687 	bool dropped;
1688 
1689 	__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
1690 					dcc->discard_granularity);
1691 	__issue_discard_cmd(sbi, &dpolicy);
1692 	dropped = __drop_discard_cmd(sbi);
1693 
1694 	/* just to make sure there is no pending discard commands */
1695 	__wait_all_discard_cmd(sbi, NULL);
1696 
1697 	f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1698 	return dropped;
1699 }
1700 
issue_discard_thread(void * data)1701 static int issue_discard_thread(void *data)
1702 {
1703 	struct f2fs_sb_info *sbi = data;
1704 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1705 	wait_queue_head_t *q = &dcc->discard_wait_queue;
1706 	struct discard_policy dpolicy;
1707 	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
1708 	int issued;
1709 
1710 	set_freezable();
1711 
1712 	do {
1713 		__init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
1714 					dcc->discard_granularity);
1715 
1716 		wait_event_interruptible_timeout(*q,
1717 				kthread_should_stop() || freezing(current) ||
1718 				dcc->discard_wake,
1719 				msecs_to_jiffies(wait_ms));
1720 
1721 		if (dcc->discard_wake)
1722 			dcc->discard_wake = 0;
1723 
1724 		/* clean up pending candidates before going to sleep */
1725 		if (atomic_read(&dcc->queued_discard))
1726 			__wait_all_discard_cmd(sbi, NULL);
1727 
1728 		if (try_to_freeze())
1729 			continue;
1730 		if (f2fs_readonly(sbi->sb))
1731 			continue;
1732 		if (kthread_should_stop())
1733 			return 0;
1734 		if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
1735 			wait_ms = dpolicy.max_interval;
1736 			continue;
1737 		}
1738 
1739 		if (sbi->gc_mode == GC_URGENT)
1740 			__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1741 
1742 		sb_start_intwrite(sbi->sb);
1743 
1744 		issued = __issue_discard_cmd(sbi, &dpolicy);
1745 		if (issued > 0) {
1746 			__wait_all_discard_cmd(sbi, &dpolicy);
1747 			wait_ms = dpolicy.min_interval;
1748 		} else if (issued == -1){
1749 			wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
1750 			if (!wait_ms)
1751 				wait_ms = dpolicy.mid_interval;
1752 		} else {
1753 			wait_ms = dpolicy.max_interval;
1754 		}
1755 
1756 		sb_end_intwrite(sbi->sb);
1757 
1758 	} while (!kthread_should_stop());
1759 	return 0;
1760 }
1761 
1762 #ifdef CONFIG_BLK_DEV_ZONED
__f2fs_issue_discard_zone(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1763 static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
1764 		struct block_device *bdev, block_t blkstart, block_t blklen)
1765 {
1766 	sector_t sector, nr_sects;
1767 	block_t lblkstart = blkstart;
1768 	int devi = 0;
1769 
1770 	if (f2fs_is_multi_device(sbi)) {
1771 		devi = f2fs_target_device_index(sbi, blkstart);
1772 		if (blkstart < FDEV(devi).start_blk ||
1773 		    blkstart > FDEV(devi).end_blk) {
1774 			f2fs_err(sbi, "Invalid block %x", blkstart);
1775 			return -EIO;
1776 		}
1777 		blkstart -= FDEV(devi).start_blk;
1778 	}
1779 
1780 	/* For sequential zones, reset the zone write pointer */
1781 	if (f2fs_blkz_is_seq(sbi, devi, blkstart)) {
1782 		sector = SECTOR_FROM_BLOCK(blkstart);
1783 		nr_sects = SECTOR_FROM_BLOCK(blklen);
1784 
1785 		if (sector & (bdev_zone_sectors(bdev) - 1) ||
1786 				nr_sects != bdev_zone_sectors(bdev)) {
1787 			f2fs_err(sbi, "(%d) %s: Unaligned zone reset attempted (block %x + %x)",
1788 				 devi, sbi->s_ndevs ? FDEV(devi).path : "",
1789 				 blkstart, blklen);
1790 			return -EIO;
1791 		}
1792 		trace_f2fs_issue_reset_zone(bdev, blkstart);
1793 		return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
1794 					sector, nr_sects, GFP_NOFS);
1795 	}
1796 
1797 	/* For conventional zones, use regular discard if supported */
1798 	return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1799 }
1800 #endif
1801 
__issue_discard_async(struct f2fs_sb_info * sbi,struct block_device * bdev,block_t blkstart,block_t blklen)1802 static int __issue_discard_async(struct f2fs_sb_info *sbi,
1803 		struct block_device *bdev, block_t blkstart, block_t blklen)
1804 {
1805 #ifdef CONFIG_BLK_DEV_ZONED
1806 	if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
1807 		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
1808 #endif
1809 	return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
1810 }
1811 
f2fs_issue_discard(struct f2fs_sb_info * sbi,block_t blkstart,block_t blklen)1812 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1813 				block_t blkstart, block_t blklen)
1814 {
1815 	sector_t start = blkstart, len = 0;
1816 	struct block_device *bdev;
1817 	struct seg_entry *se;
1818 	unsigned int offset;
1819 	block_t i;
1820 	int err = 0;
1821 
1822 	bdev = f2fs_target_device(sbi, blkstart, NULL);
1823 
1824 	for (i = blkstart; i < blkstart + blklen; i++, len++) {
1825 		if (i != start) {
1826 			struct block_device *bdev2 =
1827 				f2fs_target_device(sbi, i, NULL);
1828 
1829 			if (bdev2 != bdev) {
1830 				err = __issue_discard_async(sbi, bdev,
1831 						start, len);
1832 				if (err)
1833 					return err;
1834 				bdev = bdev2;
1835 				start = i;
1836 				len = 0;
1837 			}
1838 		}
1839 
1840 		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
1841 		offset = GET_BLKOFF_FROM_SEG0(sbi, i);
1842 
1843 		if (!f2fs_test_and_set_bit(offset, se->discard_map))
1844 			sbi->discard_blks--;
1845 	}
1846 
1847 	if (len)
1848 		err = __issue_discard_async(sbi, bdev, start, len);
1849 	return err;
1850 }
1851 
add_discard_addrs(struct f2fs_sb_info * sbi,struct cp_control * cpc,bool check_only)1852 static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
1853 							bool check_only)
1854 {
1855 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
1856 	int max_blocks = sbi->blocks_per_seg;
1857 	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1858 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
1859 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1860 	unsigned long *discard_map = (unsigned long *)se->discard_map;
1861 	unsigned long *dmap = SIT_I(sbi)->tmp_map;
1862 	unsigned int start = 0, end = -1;
1863 	bool force = (cpc->reason & CP_DISCARD);
1864 	struct discard_entry *de = NULL;
1865 	struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1866 	int i;
1867 
1868 	if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1869 		return false;
1870 
1871 	if (!force) {
1872 		if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1873 			SM_I(sbi)->dcc_info->nr_discards >=
1874 				SM_I(sbi)->dcc_info->max_discards)
1875 			return false;
1876 	}
1877 
1878 	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
1879 	for (i = 0; i < entries; i++)
1880 		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1881 				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1882 
1883 	while (force || SM_I(sbi)->dcc_info->nr_discards <=
1884 				SM_I(sbi)->dcc_info->max_discards) {
1885 		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
1886 		if (start >= max_blocks)
1887 			break;
1888 
1889 		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1890 		if (force && start && end != max_blocks
1891 					&& (end - start) < cpc->trim_minlen)
1892 			continue;
1893 
1894 		if (check_only)
1895 			return true;
1896 
1897 		if (!de) {
1898 			de = f2fs_kmem_cache_alloc(discard_entry_slab,
1899 								GFP_F2FS_ZERO);
1900 			de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
1901 			list_add_tail(&de->list, head);
1902 		}
1903 
1904 		for (i = start; i < end; i++)
1905 			__set_bit_le(i, (void *)de->discard_map);
1906 
1907 		SM_I(sbi)->dcc_info->nr_discards += end - start;
1908 	}
1909 	return false;
1910 }
1911 
release_discard_addr(struct discard_entry * entry)1912 static void release_discard_addr(struct discard_entry *entry)
1913 {
1914 	list_del(&entry->list);
1915 	kmem_cache_free(discard_entry_slab, entry);
1916 }
1917 
f2fs_release_discard_addrs(struct f2fs_sb_info * sbi)1918 void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1919 {
1920 	struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1921 	struct discard_entry *entry, *this;
1922 
1923 	/* drop caches */
1924 	list_for_each_entry_safe(entry, this, head, list)
1925 		release_discard_addr(entry);
1926 }
1927 
1928 /*
1929  * Should call f2fs_clear_prefree_segments after checkpoint is done.
1930  */
set_prefree_as_free_segments(struct f2fs_sb_info * sbi)1931 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
1932 {
1933 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1934 	unsigned int segno;
1935 
1936 	mutex_lock(&dirty_i->seglist_lock);
1937 	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
1938 		__set_test_and_free(sbi, segno);
1939 	mutex_unlock(&dirty_i->seglist_lock);
1940 }
1941 
f2fs_clear_prefree_segments(struct f2fs_sb_info * sbi,struct cp_control * cpc)1942 void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
1943 						struct cp_control *cpc)
1944 {
1945 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1946 	struct list_head *head = &dcc->entry_list;
1947 	struct discard_entry *entry, *this;
1948 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1949 	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
1950 	unsigned int start = 0, end = -1;
1951 	unsigned int secno, start_segno;
1952 	bool force = (cpc->reason & CP_DISCARD);
1953 	bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
1954 
1955 	mutex_lock(&dirty_i->seglist_lock);
1956 
1957 	while (1) {
1958 		int i;
1959 
1960 		if (need_align && end != -1)
1961 			end--;
1962 		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
1963 		if (start >= MAIN_SEGS(sbi))
1964 			break;
1965 		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
1966 								start + 1);
1967 
1968 		if (need_align) {
1969 			start = rounddown(start, sbi->segs_per_sec);
1970 			end = roundup(end, sbi->segs_per_sec);
1971 		}
1972 
1973 		for (i = start; i < end; i++) {
1974 			if (test_and_clear_bit(i, prefree_map))
1975 				dirty_i->nr_dirty[PRE]--;
1976 		}
1977 
1978 		if (!f2fs_realtime_discard_enable(sbi))
1979 			continue;
1980 
1981 		if (force && start >= cpc->trim_start &&
1982 					(end - 1) <= cpc->trim_end)
1983 				continue;
1984 
1985 		if (!f2fs_lfs_mode(sbi) || !__is_large_section(sbi)) {
1986 			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1987 				(end - start) << sbi->log_blocks_per_seg);
1988 			continue;
1989 		}
1990 next:
1991 		secno = GET_SEC_FROM_SEG(sbi, start);
1992 		start_segno = GET_SEG_FROM_SEC(sbi, secno);
1993 		if (!IS_CURSEC(sbi, secno) &&
1994 			!get_valid_blocks(sbi, start, true))
1995 			f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
1996 				sbi->segs_per_sec << sbi->log_blocks_per_seg);
1997 
1998 		start = start_segno + sbi->segs_per_sec;
1999 		if (start < end)
2000 			goto next;
2001 		else
2002 			end = start - 1;
2003 	}
2004 	mutex_unlock(&dirty_i->seglist_lock);
2005 
2006 	/* send small discards */
2007 	list_for_each_entry_safe(entry, this, head, list) {
2008 		unsigned int cur_pos = 0, next_pos, len, total_len = 0;
2009 		bool is_valid = test_bit_le(0, entry->discard_map);
2010 
2011 find_next:
2012 		if (is_valid) {
2013 			next_pos = find_next_zero_bit_le(entry->discard_map,
2014 					sbi->blocks_per_seg, cur_pos);
2015 			len = next_pos - cur_pos;
2016 
2017 			if (f2fs_sb_has_blkzoned(sbi) ||
2018 			    (force && len < cpc->trim_minlen))
2019 				goto skip;
2020 
2021 			f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
2022 									len);
2023 			total_len += len;
2024 		} else {
2025 			next_pos = find_next_bit_le(entry->discard_map,
2026 					sbi->blocks_per_seg, cur_pos);
2027 		}
2028 skip:
2029 		cur_pos = next_pos;
2030 		is_valid = !is_valid;
2031 
2032 		if (cur_pos < sbi->blocks_per_seg)
2033 			goto find_next;
2034 
2035 		release_discard_addr(entry);
2036 		dcc->nr_discards -= total_len;
2037 	}
2038 
2039 	wake_up_discard_thread(sbi, false);
2040 }
2041 
create_discard_cmd_control(struct f2fs_sb_info * sbi)2042 static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
2043 {
2044 	dev_t dev = sbi->sb->s_bdev->bd_dev;
2045 	struct discard_cmd_control *dcc;
2046 	int err = 0, i;
2047 
2048 	if (SM_I(sbi)->dcc_info) {
2049 		dcc = SM_I(sbi)->dcc_info;
2050 		goto init_thread;
2051 	}
2052 
2053 	dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2054 	if (!dcc)
2055 		return -ENOMEM;
2056 
2057 	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2058 	INIT_LIST_HEAD(&dcc->entry_list);
2059 	for (i = 0; i < MAX_PLIST_NUM; i++)
2060 		INIT_LIST_HEAD(&dcc->pend_list[i]);
2061 	INIT_LIST_HEAD(&dcc->wait_list);
2062 	INIT_LIST_HEAD(&dcc->fstrim_list);
2063 	mutex_init(&dcc->cmd_lock);
2064 	atomic_set(&dcc->issued_discard, 0);
2065 	atomic_set(&dcc->queued_discard, 0);
2066 	atomic_set(&dcc->discard_cmd_cnt, 0);
2067 	dcc->nr_discards = 0;
2068 	dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
2069 	dcc->undiscard_blks = 0;
2070 	dcc->next_pos = 0;
2071 	dcc->root = RB_ROOT_CACHED;
2072 	dcc->rbtree_check = false;
2073 
2074 	init_waitqueue_head(&dcc->discard_wait_queue);
2075 	SM_I(sbi)->dcc_info = dcc;
2076 init_thread:
2077 	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
2078 				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
2079 	if (IS_ERR(dcc->f2fs_issue_discard)) {
2080 		err = PTR_ERR(dcc->f2fs_issue_discard);
2081 		kvfree(dcc);
2082 		SM_I(sbi)->dcc_info = NULL;
2083 		return err;
2084 	}
2085 
2086 	return err;
2087 }
2088 
destroy_discard_cmd_control(struct f2fs_sb_info * sbi)2089 static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2090 {
2091 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2092 
2093 	if (!dcc)
2094 		return;
2095 
2096 	f2fs_stop_discard_thread(sbi);
2097 
2098 	/*
2099 	 * Recovery can cache discard commands, so in error path of
2100 	 * fill_super(), it needs to give a chance to handle them.
2101 	 */
2102 	if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
2103 		f2fs_issue_discard_timeout(sbi);
2104 
2105 	kvfree(dcc);
2106 	SM_I(sbi)->dcc_info = NULL;
2107 }
2108 
__mark_sit_entry_dirty(struct f2fs_sb_info * sbi,unsigned int segno)2109 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
2110 {
2111 	struct sit_info *sit_i = SIT_I(sbi);
2112 
2113 	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
2114 		sit_i->dirty_sentries++;
2115 		return false;
2116 	}
2117 
2118 	return true;
2119 }
2120 
__set_sit_entry_type(struct f2fs_sb_info * sbi,int type,unsigned int segno,int modified)2121 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
2122 					unsigned int segno, int modified)
2123 {
2124 	struct seg_entry *se = get_seg_entry(sbi, segno);
2125 	se->type = type;
2126 	if (modified)
2127 		__mark_sit_entry_dirty(sbi, segno);
2128 }
2129 
update_sit_entry(struct f2fs_sb_info * sbi,block_t blkaddr,int del)2130 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
2131 {
2132 	struct seg_entry *se;
2133 	unsigned int segno, offset;
2134 	long int new_vblocks;
2135 	bool exist;
2136 #ifdef CONFIG_F2FS_CHECK_FS
2137 	bool mir_exist;
2138 #endif
2139 
2140 	segno = GET_SEGNO(sbi, blkaddr);
2141 
2142 	se = get_seg_entry(sbi, segno);
2143 	new_vblocks = se->valid_blocks + del;
2144 	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2145 
2146 	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
2147 				(new_vblocks > sbi->blocks_per_seg)));
2148 
2149 	se->valid_blocks = new_vblocks;
2150 	se->mtime = get_mtime(sbi, false);
2151 	if (se->mtime > SIT_I(sbi)->max_mtime)
2152 		SIT_I(sbi)->max_mtime = se->mtime;
2153 
2154 	/* Update valid block bitmap */
2155 	if (del > 0) {
2156 		exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
2157 #ifdef CONFIG_F2FS_CHECK_FS
2158 		mir_exist = f2fs_test_and_set_bit(offset,
2159 						se->cur_valid_map_mir);
2160 		if (unlikely(exist != mir_exist)) {
2161 			f2fs_err(sbi, "Inconsistent error when setting bitmap, blk:%u, old bit:%d",
2162 				 blkaddr, exist);
2163 			f2fs_bug_on(sbi, 1);
2164 		}
2165 #endif
2166 		if (unlikely(exist)) {
2167 			f2fs_err(sbi, "Bitmap was wrongly set, blk:%u",
2168 				 blkaddr);
2169 			f2fs_bug_on(sbi, 1);
2170 			se->valid_blocks--;
2171 			del = 0;
2172 		}
2173 
2174 		if (!f2fs_test_and_set_bit(offset, se->discard_map))
2175 			sbi->discard_blks--;
2176 
2177 		/*
2178 		 * SSR should never reuse block which is checkpointed
2179 		 * or newly invalidated.
2180 		 */
2181 		if (!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2182 			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
2183 				se->ckpt_valid_blocks++;
2184 		}
2185 	} else {
2186 		exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
2187 #ifdef CONFIG_F2FS_CHECK_FS
2188 		mir_exist = f2fs_test_and_clear_bit(offset,
2189 						se->cur_valid_map_mir);
2190 		if (unlikely(exist != mir_exist)) {
2191 			f2fs_err(sbi, "Inconsistent error when clearing bitmap, blk:%u, old bit:%d",
2192 				 blkaddr, exist);
2193 			f2fs_bug_on(sbi, 1);
2194 		}
2195 #endif
2196 		if (unlikely(!exist)) {
2197 			f2fs_err(sbi, "Bitmap was wrongly cleared, blk:%u",
2198 				 blkaddr);
2199 			f2fs_bug_on(sbi, 1);
2200 			se->valid_blocks++;
2201 			del = 0;
2202 		} else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2203 			/*
2204 			 * If checkpoints are off, we must not reuse data that
2205 			 * was used in the previous checkpoint. If it was used
2206 			 * before, we must track that to know how much space we
2207 			 * really have.
2208 			 */
2209 			if (f2fs_test_bit(offset, se->ckpt_valid_map)) {
2210 				spin_lock(&sbi->stat_lock);
2211 				sbi->unusable_block_count++;
2212 				spin_unlock(&sbi->stat_lock);
2213 			}
2214 		}
2215 
2216 		if (f2fs_test_and_clear_bit(offset, se->discard_map))
2217 			sbi->discard_blks++;
2218 	}
2219 	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
2220 		se->ckpt_valid_blocks += del;
2221 
2222 	__mark_sit_entry_dirty(sbi, segno);
2223 
2224 	/* update total number of valid blocks to be written in ckpt area */
2225 	SIT_I(sbi)->written_valid_blocks += del;
2226 
2227 	if (__is_large_section(sbi))
2228 		get_sec_entry(sbi, segno)->valid_blocks += del;
2229 }
2230 
f2fs_invalidate_blocks(struct f2fs_sb_info * sbi,block_t addr)2231 void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
2232 {
2233 	unsigned int segno = GET_SEGNO(sbi, addr);
2234 	struct sit_info *sit_i = SIT_I(sbi);
2235 
2236 	f2fs_bug_on(sbi, addr == NULL_ADDR);
2237 	if (addr == NEW_ADDR || addr == COMPRESS_ADDR)
2238 		return;
2239 
2240 	invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);
2241 
2242 	/* add it into sit main buffer */
2243 	down_write(&sit_i->sentry_lock);
2244 
2245 	update_sit_entry(sbi, addr, -1);
2246 
2247 	/* add it into dirty seglist */
2248 	locate_dirty_segment(sbi, segno);
2249 
2250 	up_write(&sit_i->sentry_lock);
2251 }
2252 
f2fs_is_checkpointed_data(struct f2fs_sb_info * sbi,block_t blkaddr)2253 bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2254 {
2255 	struct sit_info *sit_i = SIT_I(sbi);
2256 	unsigned int segno, offset;
2257 	struct seg_entry *se;
2258 	bool is_cp = false;
2259 
2260 	if (!__is_valid_data_blkaddr(blkaddr))
2261 		return true;
2262 
2263 	down_read(&sit_i->sentry_lock);
2264 
2265 	segno = GET_SEGNO(sbi, blkaddr);
2266 	se = get_seg_entry(sbi, segno);
2267 	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
2268 
2269 	if (f2fs_test_bit(offset, se->ckpt_valid_map))
2270 		is_cp = true;
2271 
2272 	up_read(&sit_i->sentry_lock);
2273 
2274 	return is_cp;
2275 }
2276 
2277 /*
2278  * This function should be resided under the curseg_mutex lock
2279  */
__add_sum_entry(struct f2fs_sb_info * sbi,int type,struct f2fs_summary * sum)2280 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2281 					struct f2fs_summary *sum)
2282 {
2283 	struct curseg_info *curseg = CURSEG_I(sbi, type);
2284 	void *addr = curseg->sum_blk;
2285 	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
2286 	memcpy(addr, sum, sizeof(struct f2fs_summary));
2287 }
2288 
2289 /*
2290  * Calculate the number of current summary pages for writing
2291  */
f2fs_npages_for_summary_flush(struct f2fs_sb_info * sbi,bool for_ra)2292 int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
2293 {
2294 	int valid_sum_count = 0;
2295 	int i, sum_in_page;
2296 
2297 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2298 		if (sbi->ckpt->alloc_type[i] == SSR)
2299 			valid_sum_count += sbi->blocks_per_seg;
2300 		else {
2301 			if (for_ra)
2302 				valid_sum_count += le16_to_cpu(
2303 					F2FS_CKPT(sbi)->cur_data_blkoff[i]);
2304 			else
2305 				valid_sum_count += curseg_blkoff(sbi, i);
2306 		}
2307 	}
2308 
2309 	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2310 			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
2311 	if (valid_sum_count <= sum_in_page)
2312 		return 1;
2313 	else if ((valid_sum_count - sum_in_page) <=
2314 		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
2315 		return 2;
2316 	return 3;
2317 }
2318 
2319 /*
2320  * Caller should put this summary page
2321  */
f2fs_get_sum_page(struct f2fs_sb_info * sbi,unsigned int segno)2322 struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
2323 {
2324 	if (unlikely(f2fs_cp_error(sbi)))
2325 		return ERR_PTR(-EIO);
2326 	return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
2327 }
2328 
f2fs_update_meta_page(struct f2fs_sb_info * sbi,void * src,block_t blk_addr)2329 void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
2330 					void *src, block_t blk_addr)
2331 {
2332 	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2333 
2334 	memcpy(page_address(page), src, PAGE_SIZE);
2335 	set_page_dirty(page);
2336 	f2fs_put_page(page, 1);
2337 }
2338 
write_sum_page(struct f2fs_sb_info * sbi,struct f2fs_summary_block * sum_blk,block_t blk_addr)2339 static void write_sum_page(struct f2fs_sb_info *sbi,
2340 			struct f2fs_summary_block *sum_blk, block_t blk_addr)
2341 {
2342 	f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
2343 }
2344 
write_current_sum_page(struct f2fs_sb_info * sbi,int type,block_t blk_addr)2345 static void write_current_sum_page(struct f2fs_sb_info *sbi,
2346 						int type, block_t blk_addr)
2347 {
2348 	struct curseg_info *curseg = CURSEG_I(sbi, type);
2349 	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2350 	struct f2fs_summary_block *src = curseg->sum_blk;
2351 	struct f2fs_summary_block *dst;
2352 
2353 	dst = (struct f2fs_summary_block *)page_address(page);
2354 	memset(dst, 0, PAGE_SIZE);
2355 
2356 	mutex_lock(&curseg->curseg_mutex);
2357 
2358 	down_read(&curseg->journal_rwsem);
2359 	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
2360 	up_read(&curseg->journal_rwsem);
2361 
2362 	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
2363 	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
2364 
2365 	mutex_unlock(&curseg->curseg_mutex);
2366 
2367 	set_page_dirty(page);
2368 	f2fs_put_page(page, 1);
2369 }
2370 
is_next_segment_free(struct f2fs_sb_info * sbi,int type)2371 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
2372 {
2373 	struct curseg_info *curseg = CURSEG_I(sbi, type);
2374 	unsigned int segno = curseg->segno + 1;
2375 	struct free_segmap_info *free_i = FREE_I(sbi);
2376 
2377 	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
2378 		return !test_bit(segno, free_i->free_segmap);
2379 	return 0;
2380 }
2381 
2382 /*
2383  * Find a new segment from the free segments bitmap to right order
2384  * This function should be returned with success, otherwise BUG
2385  */
get_new_segment(struct f2fs_sb_info * sbi,unsigned int * newseg,bool new_sec,int dir)2386 static void get_new_segment(struct f2fs_sb_info *sbi,
2387 			unsigned int *newseg, bool new_sec, int dir)
2388 {
2389 	struct free_segmap_info *free_i = FREE_I(sbi);
2390 	unsigned int segno, secno, zoneno;
2391 	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2392 	unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
2393 	unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
2394 	unsigned int left_start = hint;
2395 	bool init = true;
2396 	int go_left = 0;
2397 	int i;
2398 
2399 	spin_lock(&free_i->segmap_lock);
2400 
2401 	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
2402 		segno = find_next_zero_bit(free_i->free_segmap,
2403 			GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
2404 		if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
2405 			goto got_it;
2406 	}
2407 find_other_zone:
2408 	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
2409 	if (secno >= MAIN_SECS(sbi)) {
2410 		if (dir == ALLOC_RIGHT) {
2411 			secno = find_next_zero_bit(free_i->free_secmap,
2412 							MAIN_SECS(sbi), 0);
2413 			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
2414 		} else {
2415 			go_left = 1;
2416 			left_start = hint - 1;
2417 		}
2418 	}
2419 	if (go_left == 0)
2420 		goto skip_left;
2421 
2422 	while (test_bit(left_start, free_i->free_secmap)) {
2423 		if (left_start > 0) {
2424 			left_start--;
2425 			continue;
2426 		}
2427 		left_start = find_next_zero_bit(free_i->free_secmap,
2428 							MAIN_SECS(sbi), 0);
2429 		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
2430 		break;
2431 	}
2432 	secno = left_start;
2433 skip_left:
2434 	segno = GET_SEG_FROM_SEC(sbi, secno);
2435 	zoneno = GET_ZONE_FROM_SEC(sbi, secno);
2436 
2437 	/* give up on finding another zone */
2438 	if (!init)
2439 		goto got_it;
2440 	if (sbi->secs_per_zone == 1)
2441 		goto got_it;
2442 	if (zoneno == old_zoneno)
2443 		goto got_it;
2444 	if (dir == ALLOC_LEFT) {
2445 		if (!go_left && zoneno + 1 >= total_zones)
2446 			goto got_it;
2447 		if (go_left && zoneno == 0)
2448 			goto got_it;
2449 	}
2450 	for (i = 0; i < NR_CURSEG_TYPE; i++)
2451 		if (CURSEG_I(sbi, i)->zone == zoneno)
2452 			break;
2453 
2454 	if (i < NR_CURSEG_TYPE) {
2455 		/* zone is in user, try another */
2456 		if (go_left)
2457 			hint = zoneno * sbi->secs_per_zone - 1;
2458 		else if (zoneno + 1 >= total_zones)
2459 			hint = 0;
2460 		else
2461 			hint = (zoneno + 1) * sbi->secs_per_zone;
2462 		init = false;
2463 		goto find_other_zone;
2464 	}
2465 got_it:
2466 	/* set it as dirty segment in free segmap */
2467 	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
2468 	__set_inuse(sbi, segno);
2469 	*newseg = segno;
2470 	spin_unlock(&free_i->segmap_lock);
2471 }
2472 
reset_curseg(struct f2fs_sb_info * sbi,int type,int modified)2473 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
2474 {
2475 	struct curseg_info *curseg = CURSEG_I(sbi, type);
2476 	struct summary_footer *sum_footer;
2477 
2478 	curseg->segno = curseg->next_segno;
2479 	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
2480 	curseg->next_blkoff = 0;
2481 	curseg->next_segno = NULL_SEGNO;
2482 
2483 	sum_footer = &(curseg->sum_blk->footer);
2484 	memset(sum_footer, 0, sizeof(struct summary_footer));
2485 	if (IS_DATASEG(type))
2486 		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
2487 	if (IS_NODESEG(type))
2488 		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
2489 	__set_sit_entry_type(sbi, type, curseg->segno, modified);
2490 }
2491 
__get_next_segno(struct f2fs_sb_info * sbi,int type)2492 static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
2493 {
2494 	/* if segs_per_sec is large than 1, we need to keep original policy. */
2495 	if (__is_large_section(sbi))
2496 		return CURSEG_I(sbi, type)->segno;
2497 
2498 	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2499 		return 0;
2500 
2501 	if (test_opt(sbi, NOHEAP) &&
2502 		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2503 		return 0;
2504 
2505 	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
2506 		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2507 
2508 	/* find segments from 0 to reuse freed segments */
2509 	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2510 		return 0;
2511 
2512 	return CURSEG_I(sbi, type)->segno;
2513 }
2514 
2515 /*
2516  * Allocate a current working segment.
2517  * This function always allocates a free segment in LFS manner.
2518  */
new_curseg(struct f2fs_sb_info * sbi,int type,bool new_sec)2519 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
2520 {
2521 	struct curseg_info *curseg = CURSEG_I(sbi, type);
2522 	unsigned int segno = curseg->segno;
2523 	int dir = ALLOC_LEFT;
2524 
2525 	write_sum_page(sbi, curseg->sum_blk,
2526 				GET_SUM_BLOCK(sbi, segno));
2527 	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
2528 		dir = ALLOC_RIGHT;
2529 
2530 	if (test_opt(sbi, NOHEAP))
2531 		dir = ALLOC_RIGHT;
2532 
2533 	segno = __get_next_segno(sbi, type);
2534 	get_new_segment(sbi, &segno, new_sec, dir);
2535 	curseg->next_segno = segno;
2536 	reset_curseg(sbi, type, 1);
2537 	curseg->alloc_type = LFS;
2538 }
2539 
__next_free_blkoff(struct f2fs_sb_info * sbi,struct curseg_info * seg,block_t start)2540 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
2541 			struct curseg_info *seg, block_t start)
2542 {
2543 	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2544 	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
2545 	unsigned long *target_map = SIT_I(sbi)->tmp_map;
2546 	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
2547 	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
2548 	int i, pos;
2549 
2550 	for (i = 0; i < entries; i++)
2551 		target_map[i] = ckpt_map[i] | cur_map[i];
2552 
2553 	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
2554 
2555 	seg->next_blkoff = pos;
2556 }
2557 
2558 /*
2559  * If a segment is written by LFS manner, next block offset is just obtained
2560  * by increasing the current block offset. However, if a segment is written by
2561  * SSR manner, next block offset obtained by calling __next_free_blkoff
2562  */
__refresh_next_blkoff(struct f2fs_sb_info * sbi,struct curseg_info * seg)2563 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
2564 				struct curseg_info *seg)
2565 {
2566 	if (seg->alloc_type == SSR)
2567 		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
2568 	else
2569 		seg->next_blkoff++;
2570 }
2571 
2572 /*
2573  * This function always allocates a used segment(from dirty seglist) by SSR
2574  * manner, so it should recover the existing segment information of valid blocks
2575  */
change_curseg(struct f2fs_sb_info * sbi,int type)2576 static void change_curseg(struct f2fs_sb_info *sbi, int type)
2577 {
2578 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2579 	struct curseg_info *curseg = CURSEG_I(sbi, type);
2580 	unsigned int new_segno = curseg->next_segno;
2581 	struct f2fs_summary_block *sum_node;
2582 	struct page *sum_page;
2583 
2584 	write_sum_page(sbi, curseg->sum_blk,
2585 				GET_SUM_BLOCK(sbi, curseg->segno));
2586 	__set_test_and_inuse(sbi, new_segno);
2587 
2588 	mutex_lock(&dirty_i->seglist_lock);
2589 	__remove_dirty_segment(sbi, new_segno, PRE);
2590 	__remove_dirty_segment(sbi, new_segno, DIRTY);
2591 	mutex_unlock(&dirty_i->seglist_lock);
2592 
2593 	reset_curseg(sbi, type, 1);
2594 	curseg->alloc_type = SSR;
2595 	__next_free_blkoff(sbi, curseg, 0);
2596 
2597 	sum_page = f2fs_get_sum_page(sbi, new_segno);
2598 	if (IS_ERR(sum_page)) {
2599 		/* GC won't be able to use stale summary pages by cp_error */
2600 		memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
2601 		return;
2602 	}
2603 	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
2604 	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
2605 	f2fs_put_page(sum_page, 1);
2606 }
2607 
get_ssr_segment(struct f2fs_sb_info * sbi,int type)2608 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
2609 {
2610 	struct curseg_info *curseg = CURSEG_I(sbi, type);
2611 	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2612 	unsigned segno = NULL_SEGNO;
2613 	int i, cnt;
2614 	bool reversed = false;
2615 
2616 	/* f2fs_need_SSR() already forces to do this */
2617 	if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
2618 		curseg->next_segno = segno;
2619 		return 1;
2620 	}
2621 
2622 	/* For node segments, let's do SSR more intensively */
2623 	if (IS_NODESEG(type)) {
2624 		if (type >= CURSEG_WARM_NODE) {
2625 			reversed = true;
2626 			i = CURSEG_COLD_NODE;
2627 		} else {
2628 			i = CURSEG_HOT_NODE;
2629 		}
2630 		cnt = NR_CURSEG_NODE_TYPE;
2631 	} else {
2632 		if (type >= CURSEG_WARM_DATA) {
2633 			reversed = true;
2634 			i = CURSEG_COLD_DATA;
2635 		} else {
2636 			i = CURSEG_HOT_DATA;
2637 		}
2638 		cnt = NR_CURSEG_DATA_TYPE;
2639 	}
2640 
2641 	for (; cnt-- > 0; reversed ? i-- : i++) {
2642 		if (i == type)
2643 			continue;
2644 		if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
2645 			curseg->next_segno = segno;
2646 			return 1;
2647 		}
2648 	}
2649 
2650 	/* find valid_blocks=0 in dirty list */
2651 	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
2652 		segno = get_free_segment(sbi);
2653 		if (segno != NULL_SEGNO) {
2654 			curseg->next_segno = segno;
2655 			return 1;
2656 		}
2657 	}
2658 	return 0;
2659 }
2660 
2661 /*
2662  * flush out current segment and replace it with new segment
2663  * This function should be returned with success, otherwise BUG
2664  */
allocate_segment_by_default(struct f2fs_sb_info * sbi,int type,bool force)2665 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
2666 						int type, bool force)
2667 {
2668 	struct curseg_info *curseg = CURSEG_I(sbi, type);
2669 
2670 	if (force)
2671 		new_curseg(sbi, type, true);
2672 	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
2673 					type == CURSEG_WARM_NODE)
2674 		new_curseg(sbi, type, false);
2675 	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
2676 			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
2677 		new_curseg(sbi, type, false);
2678 	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2679 		change_curseg(sbi, type);
2680 	else
2681 		new_curseg(sbi, type, false);
2682 
2683 	stat_inc_seg_type(sbi, curseg);
2684 }
2685 
allocate_segment_for_resize(struct f2fs_sb_info * sbi,int type,unsigned int start,unsigned int end)2686 void allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
2687 					unsigned int start, unsigned int end)
2688 {
2689 	struct curseg_info *curseg = CURSEG_I(sbi, type);
2690 	unsigned int segno;
2691 
2692 	down_read(&SM_I(sbi)->curseg_lock);
2693 	mutex_lock(&curseg->curseg_mutex);
2694 	down_write(&SIT_I(sbi)->sentry_lock);
2695 
2696 	segno = CURSEG_I(sbi, type)->segno;
2697 	if (segno < start || segno > end)
2698 		goto unlock;
2699 
2700 	if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2701 		change_curseg(sbi, type);
2702 	else
2703 		new_curseg(sbi, type, true);
2704 
2705 	stat_inc_seg_type(sbi, curseg);
2706 
2707 	locate_dirty_segment(sbi, segno);
2708 unlock:
2709 	up_write(&SIT_I(sbi)->sentry_lock);
2710 
2711 	if (segno != curseg->segno)
2712 		f2fs_notice(sbi, "For resize: curseg of type %d: %u ==> %u",
2713 			    type, segno, curseg->segno);
2714 
2715 	mutex_unlock(&curseg->curseg_mutex);
2716 	up_read(&SM_I(sbi)->curseg_lock);
2717 }
2718 
f2fs_allocate_new_segments(struct f2fs_sb_info * sbi,int type)2719 void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi, int type)
2720 {
2721 	struct curseg_info *curseg;
2722 	unsigned int old_segno;
2723 	int i;
2724 
2725 	down_write(&SIT_I(sbi)->sentry_lock);
2726 
2727 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
2728 		if (type != NO_CHECK_TYPE && i != type)
2729 			continue;
2730 
2731 		curseg = CURSEG_I(sbi, i);
2732 		if (type == NO_CHECK_TYPE || curseg->next_blkoff ||
2733 				get_valid_blocks(sbi, curseg->segno, false) ||
2734 				get_ckpt_valid_blocks(sbi, curseg->segno)) {
2735 			old_segno = curseg->segno;
2736 			SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
2737 			locate_dirty_segment(sbi, old_segno);
2738 		}
2739 	}
2740 
2741 	up_write(&SIT_I(sbi)->sentry_lock);
2742 }
2743 
2744 static const struct segment_allocation default_salloc_ops = {
2745 	.allocate_segment = allocate_segment_by_default,
2746 };
2747 
f2fs_exist_trim_candidates(struct f2fs_sb_info * sbi,struct cp_control * cpc)2748 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
2749 						struct cp_control *cpc)
2750 {
2751 	__u64 trim_start = cpc->trim_start;
2752 	bool has_candidate = false;
2753 
2754 	down_write(&SIT_I(sbi)->sentry_lock);
2755 	for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
2756 		if (add_discard_addrs(sbi, cpc, true)) {
2757 			has_candidate = true;
2758 			break;
2759 		}
2760 	}
2761 	up_write(&SIT_I(sbi)->sentry_lock);
2762 
2763 	cpc->trim_start = trim_start;
2764 	return has_candidate;
2765 }
2766 
__issue_discard_cmd_range(struct f2fs_sb_info * sbi,struct discard_policy * dpolicy,unsigned int start,unsigned int end)2767 static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2768 					struct discard_policy *dpolicy,
2769 					unsigned int start, unsigned int end)
2770 {
2771 	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
2772 	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
2773 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
2774 	struct discard_cmd *dc;
2775 	struct blk_plug plug;
2776 	int issued;
2777 	unsigned int trimmed = 0;
2778 
2779 next:
2780 	issued = 0;
2781 
2782 	mutex_lock(&dcc->cmd_lock);
2783 	if (unlikely(dcc->rbtree_check))
2784 		f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
2785 								&dcc->root));
2786 
2787 	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2788 					NULL, start,
2789 					(struct rb_entry **)&prev_dc,
2790 					(struct rb_entry **)&next_dc,
2791 					&insert_p, &insert_parent, true, NULL);
2792 	if (!dc)
2793 		dc = next_dc;
2794 
2795 	blk_start_plug(&plug);
2796 
2797 	while (dc && dc->lstart <= end) {
2798 		struct rb_node *node;
2799 		int err = 0;
2800 
2801 		if (dc->len < dpolicy->granularity)
2802 			goto skip;
2803 
2804 		if (dc->state != D_PREP) {
2805 			list_move_tail(&dc->list, &dcc->fstrim_list);
2806 			goto skip;
2807 		}
2808 
2809 		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2810 
2811 		if (issued >= dpolicy->max_requests) {
2812 			start = dc->lstart + dc->len;
2813 
2814 			if (err)
2815 				__remove_discard_cmd(sbi, dc);
2816 
2817 			blk_finish_plug(&plug);
2818 			mutex_unlock(&dcc->cmd_lock);
2819 			trimmed += __wait_all_discard_cmd(sbi, NULL);
2820 			congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
2821 			goto next;
2822 		}
2823 skip:
2824 		node = rb_next(&dc->rb_node);
2825 		if (err)
2826 			__remove_discard_cmd(sbi, dc);
2827 		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
2828 
2829 		if (fatal_signal_pending(current))
2830 			break;
2831 	}
2832 
2833 	blk_finish_plug(&plug);
2834 	mutex_unlock(&dcc->cmd_lock);
2835 
2836 	return trimmed;
2837 }
2838 
f2fs_trim_fs(struct f2fs_sb_info * sbi,struct fstrim_range * range)2839 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
2840 {
2841 	__u64 start = F2FS_BYTES_TO_BLK(range->start);
2842 	__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
2843 	unsigned int start_segno, end_segno;
2844 	block_t start_block, end_block;
2845 	struct cp_control cpc;
2846 	struct discard_policy dpolicy;
2847 	unsigned long long trimmed = 0;
2848 	int err = 0;
2849 	bool need_align = f2fs_lfs_mode(sbi) && __is_large_section(sbi);
2850 
2851 	if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2852 		return -EINVAL;
2853 
2854 	if (end < MAIN_BLKADDR(sbi))
2855 		goto out;
2856 
2857 	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
2858 		f2fs_warn(sbi, "Found FS corruption, run fsck to fix.");
2859 		return -EFSCORRUPTED;
2860 	}
2861 
2862 	/* start/end segment number in main_area */
2863 	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
2864 	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
2865 						GET_SEGNO(sbi, end);
2866 	if (need_align) {
2867 		start_segno = rounddown(start_segno, sbi->segs_per_sec);
2868 		end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
2869 	}
2870 
2871 	cpc.reason = CP_DISCARD;
2872 	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
2873 	cpc.trim_start = start_segno;
2874 	cpc.trim_end = end_segno;
2875 
2876 	if (sbi->discard_blks == 0)
2877 		goto out;
2878 
2879 	down_write(&sbi->gc_lock);
2880 	err = f2fs_write_checkpoint(sbi, &cpc);
2881 	up_write(&sbi->gc_lock);
2882 	if (err)
2883 		goto out;
2884 
2885 	/*
2886 	 * We filed discard candidates, but actually we don't need to wait for
2887 	 * all of them, since they'll be issued in idle time along with runtime
2888 	 * discard option. User configuration looks like using runtime discard
2889 	 * or periodic fstrim instead of it.
2890 	 */
2891 	if (f2fs_realtime_discard_enable(sbi))
2892 		goto out;
2893 
2894 	start_block = START_BLOCK(sbi, start_segno);
2895 	end_block = START_BLOCK(sbi, end_segno + 1);
2896 
2897 	__init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2898 	trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
2899 					start_block, end_block);
2900 
2901 	trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2902 					start_block, end_block);
2903 out:
2904 	if (!err)
2905 		range->len = F2FS_BLK_TO_BYTES(trimmed);
2906 	return err;
2907 }
2908 
__has_curseg_space(struct f2fs_sb_info * sbi,int type)2909 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
2910 {
2911 	struct curseg_info *curseg = CURSEG_I(sbi, type);
2912 	if (curseg->next_blkoff < sbi->blocks_per_seg)
2913 		return true;
2914 	return false;
2915 }
2916 
f2fs_rw_hint_to_seg_type(enum rw_hint hint)2917 int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2918 {
2919 	switch (hint) {
2920 	case WRITE_LIFE_SHORT:
2921 		return CURSEG_HOT_DATA;
2922 	case WRITE_LIFE_EXTREME:
2923 		return CURSEG_COLD_DATA;
2924 	default:
2925 		return CURSEG_WARM_DATA;
2926 	}
2927 }
2928 
2929 /* This returns write hints for each segment type. This hints will be
2930  * passed down to block layer. There are mapping tables which depend on
2931  * the mount option 'whint_mode'.
2932  *
2933  * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
2934  *
2935  * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
2936  *
2937  * User                  F2FS                     Block
2938  * ----                  ----                     -----
2939  *                       META                     WRITE_LIFE_NOT_SET
2940  *                       HOT_NODE                 "
2941  *                       WARM_NODE                "
2942  *                       COLD_NODE                "
2943  * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
2944  * extension list        "                        "
2945  *
2946  * -- buffered io
2947  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2948  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2949  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2950  * WRITE_LIFE_NONE       "                        "
2951  * WRITE_LIFE_MEDIUM     "                        "
2952  * WRITE_LIFE_LONG       "                        "
2953  *
2954  * -- direct io
2955  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2956  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2957  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2958  * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
2959  * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
2960  * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2961  *
2962  * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
2963  *
2964  * User                  F2FS                     Block
2965  * ----                  ----                     -----
2966  *                       META                     WRITE_LIFE_MEDIUM;
2967  *                       HOT_NODE                 WRITE_LIFE_NOT_SET
2968  *                       WARM_NODE                "
2969  *                       COLD_NODE                WRITE_LIFE_NONE
2970  * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
2971  * extension list        "                        "
2972  *
2973  * -- buffered io
2974  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2975  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2976  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
2977  * WRITE_LIFE_NONE       "                        "
2978  * WRITE_LIFE_MEDIUM     "                        "
2979  * WRITE_LIFE_LONG       "                        "
2980  *
2981  * -- direct io
2982  * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
2983  * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
2984  * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
2985  * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
2986  * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
2987  * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2988  */
2989 
f2fs_io_type_to_rw_hint(struct f2fs_sb_info * sbi,enum page_type type,enum temp_type temp)2990 enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2991 				enum page_type type, enum temp_type temp)
2992 {
2993 	if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2994 		if (type == DATA) {
2995 			if (temp == WARM)
2996 				return WRITE_LIFE_NOT_SET;
2997 			else if (temp == HOT)
2998 				return WRITE_LIFE_SHORT;
2999 			else if (temp == COLD)
3000 				return WRITE_LIFE_EXTREME;
3001 		} else {
3002 			return WRITE_LIFE_NOT_SET;
3003 		}
3004 	} else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
3005 		if (type == DATA) {
3006 			if (temp == WARM)
3007 				return WRITE_LIFE_LONG;
3008 			else if (temp == HOT)
3009 				return WRITE_LIFE_SHORT;
3010 			else if (temp == COLD)
3011 				return WRITE_LIFE_EXTREME;
3012 		} else if (type == NODE) {
3013 			if (temp == WARM || temp == HOT)
3014 				return WRITE_LIFE_NOT_SET;
3015 			else if (temp == COLD)
3016 				return WRITE_LIFE_NONE;
3017 		} else if (type == META) {
3018 			return WRITE_LIFE_MEDIUM;
3019 		}
3020 	}
3021 	return WRITE_LIFE_NOT_SET;
3022 }
3023 
__get_segment_type_2(struct f2fs_io_info * fio)3024 static int __get_segment_type_2(struct f2fs_io_info *fio)
3025 {
3026 	if (fio->type == DATA)
3027 		return CURSEG_HOT_DATA;
3028 	else
3029 		return CURSEG_HOT_NODE;
3030 }
3031 
__get_segment_type_4(struct f2fs_io_info * fio)3032 static int __get_segment_type_4(struct f2fs_io_info *fio)
3033 {
3034 	if (fio->type == DATA) {
3035 		struct inode *inode = fio->page->mapping->host;
3036 
3037 		if (S_ISDIR(inode->i_mode))
3038 			return CURSEG_HOT_DATA;
3039 		else
3040 			return CURSEG_COLD_DATA;
3041 	} else {
3042 		if (IS_DNODE(fio->page) && is_cold_node(fio->page))
3043 			return CURSEG_WARM_NODE;
3044 		else
3045 			return CURSEG_COLD_NODE;
3046 	}
3047 }
3048 
__get_segment_type_6(struct f2fs_io_info * fio)3049 static int __get_segment_type_6(struct f2fs_io_info *fio)
3050 {
3051 	if (fio->type == DATA) {
3052 		struct inode *inode = fio->page->mapping->host;
3053 
3054 		if (is_cold_data(fio->page) || file_is_cold(inode) ||
3055 				f2fs_compressed_file(inode))
3056 			return CURSEG_COLD_DATA;
3057 		if (file_is_hot(inode) ||
3058 				is_inode_flag_set(inode, FI_HOT_DATA) ||
3059 				f2fs_is_atomic_file(inode) ||
3060 				f2fs_is_volatile_file(inode))
3061 			return CURSEG_HOT_DATA;
3062 		return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
3063 	} else {
3064 		if (IS_DNODE(fio->page))
3065 			return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
3066 						CURSEG_HOT_NODE;
3067 		return CURSEG_COLD_NODE;
3068 	}
3069 }
3070 
__get_segment_type(struct f2fs_io_info * fio)3071 static int __get_segment_type(struct f2fs_io_info *fio)
3072 {
3073 	int type = 0;
3074 
3075 	switch (F2FS_OPTION(fio->sbi).active_logs) {
3076 	case 2:
3077 		type = __get_segment_type_2(fio);
3078 		break;
3079 	case 4:
3080 		type = __get_segment_type_4(fio);
3081 		break;
3082 	case 6:
3083 		type = __get_segment_type_6(fio);
3084 		break;
3085 	default:
3086 		f2fs_bug_on(fio->sbi, true);
3087 	}
3088 
3089 	if (IS_HOT(type))
3090 		fio->temp = HOT;
3091 	else if (IS_WARM(type))
3092 		fio->temp = WARM;
3093 	else
3094 		fio->temp = COLD;
3095 	return type;
3096 }
3097 
f2fs_allocate_data_block(struct f2fs_sb_info * sbi,struct page * page,block_t old_blkaddr,block_t * new_blkaddr,struct f2fs_summary * sum,int type,struct f2fs_io_info * fio,bool add_list)3098 void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
3099 		block_t old_blkaddr, block_t *new_blkaddr,
3100 		struct f2fs_summary *sum, int type,
3101 		struct f2fs_io_info *fio, bool add_list)
3102 {
3103 	struct sit_info *sit_i = SIT_I(sbi);
3104 	struct curseg_info *curseg = CURSEG_I(sbi, type);
3105 	bool put_pin_sem = false;
3106 
3107 	if (type == CURSEG_COLD_DATA) {
3108 		/* GC during CURSEG_COLD_DATA_PINNED allocation */
3109 		if (down_read_trylock(&sbi->pin_sem)) {
3110 			put_pin_sem = true;
3111 		} else {
3112 			type = CURSEG_WARM_DATA;
3113 			curseg = CURSEG_I(sbi, type);
3114 		}
3115 	} else if (type == CURSEG_COLD_DATA_PINNED) {
3116 		type = CURSEG_COLD_DATA;
3117 	}
3118 
3119 	/*
3120 	 * We need to wait for node_write to avoid block allocation during
3121 	 * checkpoint. This can only happen to quota writes which can cause
3122 	 * the below discard race condition.
3123 	 */
3124 	if (IS_DATASEG(type))
3125 		down_write(&sbi->node_write);
3126 
3127 	down_read(&SM_I(sbi)->curseg_lock);
3128 
3129 	mutex_lock(&curseg->curseg_mutex);
3130 	down_write(&sit_i->sentry_lock);
3131 
3132 	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
3133 
3134 	f2fs_wait_discard_bio(sbi, *new_blkaddr);
3135 
3136 	/*
3137 	 * __add_sum_entry should be resided under the curseg_mutex
3138 	 * because, this function updates a summary entry in the
3139 	 * current summary block.
3140 	 */
3141 	__add_sum_entry(sbi, type, sum);
3142 
3143 	__refresh_next_blkoff(sbi, curseg);
3144 
3145 	stat_inc_block_count(sbi, curseg);
3146 
3147 	/*
3148 	 * SIT information should be updated before segment allocation,
3149 	 * since SSR needs latest valid block information.
3150 	 */
3151 	update_sit_entry(sbi, *new_blkaddr, 1);
3152 	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
3153 		update_sit_entry(sbi, old_blkaddr, -1);
3154 
3155 	if (!__has_curseg_space(sbi, type))
3156 		sit_i->s_ops->allocate_segment(sbi, type, false);
3157 
3158 	/*
3159 	 * segment dirty status should be updated after segment allocation,
3160 	 * so we just need to update status only one time after previous
3161 	 * segment being closed.
3162 	 */
3163 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3164 	locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3165 
3166 	up_write(&sit_i->sentry_lock);
3167 
3168 	if (page && IS_NODESEG(type)) {
3169 		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
3170 
3171 		f2fs_inode_chksum_set(sbi, page);
3172 	}
3173 
3174 	if (F2FS_IO_ALIGNED(sbi))
3175 		fio->retry = false;
3176 
3177 	if (add_list) {
3178 		struct f2fs_bio_info *io;
3179 
3180 		INIT_LIST_HEAD(&fio->list);
3181 		fio->in_list = true;
3182 		io = sbi->write_io[fio->type] + fio->temp;
3183 		spin_lock(&io->io_lock);
3184 		list_add_tail(&fio->list, &io->io_list);
3185 		spin_unlock(&io->io_lock);
3186 	}
3187 
3188 	mutex_unlock(&curseg->curseg_mutex);
3189 
3190 	up_read(&SM_I(sbi)->curseg_lock);
3191 
3192 	if (IS_DATASEG(type))
3193 		up_write(&sbi->node_write);
3194 
3195 	if (put_pin_sem)
3196 		up_read(&sbi->pin_sem);
3197 }
3198 
update_device_state(struct f2fs_io_info * fio)3199 static void update_device_state(struct f2fs_io_info *fio)
3200 {
3201 	struct f2fs_sb_info *sbi = fio->sbi;
3202 	unsigned int devidx;
3203 
3204 	if (!f2fs_is_multi_device(sbi))
3205 		return;
3206 
3207 	devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
3208 
3209 	/* update device state for fsync */
3210 	f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3211 
3212 	/* update device state for checkpoint */
3213 	if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3214 		spin_lock(&sbi->dev_lock);
3215 		f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3216 		spin_unlock(&sbi->dev_lock);
3217 	}
3218 }
3219 
do_write_page(struct f2fs_summary * sum,struct f2fs_io_info * fio)3220 static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3221 {
3222 	int type = __get_segment_type(fio);
3223 	bool keep_order = (f2fs_lfs_mode(fio->sbi) && type == CURSEG_COLD_DATA);
3224 
3225 	if (keep_order)
3226 		down_read(&fio->sbi->io_order_lock);
3227 reallocate:
3228 	f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3229 			&fio->new_blkaddr, sum, type, fio, true);
3230 	if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
3231 		invalidate_mapping_pages(META_MAPPING(fio->sbi),
3232 					fio->old_blkaddr, fio->old_blkaddr);
3233 
3234 	/* writeout dirty page into bdev */
3235 	f2fs_submit_page_write(fio);
3236 	if (fio->retry) {
3237 		fio->old_blkaddr = fio->new_blkaddr;
3238 		goto reallocate;
3239 	}
3240 
3241 	update_device_state(fio);
3242 
3243 	if (keep_order)
3244 		up_read(&fio->sbi->io_order_lock);
3245 }
3246 
f2fs_do_write_meta_page(struct f2fs_sb_info * sbi,struct page * page,enum iostat_type io_type)3247 void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
3248 					enum iostat_type io_type)
3249 {
3250 	struct f2fs_io_info fio = {
3251 		.sbi = sbi,
3252 		.type = META,
3253 		.temp = HOT,
3254 		.op = REQ_OP_WRITE,
3255 		.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3256 		.old_blkaddr = page->index,
3257 		.new_blkaddr = page->index,
3258 		.page = page,
3259 		.encrypted_page = NULL,
3260 		.in_list = false,
3261 	};
3262 
3263 	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
3264 		fio.op_flags &= ~REQ_META;
3265 
3266 	set_page_writeback(page);
3267 	ClearPageError(page);
3268 	f2fs_submit_page_write(&fio);
3269 
3270 	stat_inc_meta_count(sbi, page->index);
3271 	f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
3272 }
3273 
f2fs_do_write_node_page(unsigned int nid,struct f2fs_io_info * fio)3274 void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
3275 {
3276 	struct f2fs_summary sum;
3277 
3278 	set_summary(&sum, nid, 0, 0);
3279 	do_write_page(&sum, fio);
3280 
3281 	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3282 }
3283 
f2fs_outplace_write_data(struct dnode_of_data * dn,struct f2fs_io_info * fio)3284 void f2fs_outplace_write_data(struct dnode_of_data *dn,
3285 					struct f2fs_io_info *fio)
3286 {
3287 	struct f2fs_sb_info *sbi = fio->sbi;
3288 	struct f2fs_summary sum;
3289 
3290 	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3291 	set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3292 	do_write_page(&sum, fio);
3293 	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
3294 
3295 	f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
3296 }
3297 
f2fs_inplace_write_data(struct f2fs_io_info * fio)3298 int f2fs_inplace_write_data(struct f2fs_io_info *fio)
3299 {
3300 	int err;
3301 	struct f2fs_sb_info *sbi = fio->sbi;
3302 	unsigned int segno;
3303 
3304 	fio->new_blkaddr = fio->old_blkaddr;
3305 	/* i/o temperature is needed for passing down write hints */
3306 	__get_segment_type(fio);
3307 
3308 	segno = GET_SEGNO(sbi, fio->new_blkaddr);
3309 
3310 	if (!IS_DATASEG(get_seg_entry(sbi, segno)->type)) {
3311 		set_sbi_flag(sbi, SBI_NEED_FSCK);
3312 		f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
3313 			  __func__, segno);
3314 		return -EFSCORRUPTED;
3315 	}
3316 
3317 	stat_inc_inplace_blocks(fio->sbi);
3318 
3319 	if (fio->bio && !(SM_I(sbi)->ipu_policy & (1 << F2FS_IPU_NOCACHE)))
3320 		err = f2fs_merge_page_bio(fio);
3321 	else
3322 		err = f2fs_submit_page_bio(fio);
3323 	if (!err) {
3324 		update_device_state(fio);
3325 		f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
3326 	}
3327 
3328 	return err;
3329 }
3330 
__f2fs_get_curseg(struct f2fs_sb_info * sbi,unsigned int segno)3331 static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
3332 						unsigned int segno)
3333 {
3334 	int i;
3335 
3336 	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
3337 		if (CURSEG_I(sbi, i)->segno == segno)
3338 			break;
3339 	}
3340 	return i;
3341 }
3342 
f2fs_do_replace_block(struct f2fs_sb_info * sbi,struct f2fs_summary * sum,block_t old_blkaddr,block_t new_blkaddr,bool recover_curseg,bool recover_newaddr)3343 void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3344 				block_t old_blkaddr, block_t new_blkaddr,
3345 				bool recover_curseg, bool recover_newaddr)
3346 {
3347 	struct sit_info *sit_i = SIT_I(sbi);
3348 	struct curseg_info *curseg;
3349 	unsigned int segno, old_cursegno;
3350 	struct seg_entry *se;
3351 	int type;
3352 	unsigned short old_blkoff;
3353 
3354 	segno = GET_SEGNO(sbi, new_blkaddr);
3355 	se = get_seg_entry(sbi, segno);
3356 	type = se->type;
3357 
3358 	down_write(&SM_I(sbi)->curseg_lock);
3359 
3360 	if (!recover_curseg) {
3361 		/* for recovery flow */
3362 		if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
3363 			if (old_blkaddr == NULL_ADDR)
3364 				type = CURSEG_COLD_DATA;
3365 			else
3366 				type = CURSEG_WARM_DATA;
3367 		}
3368 	} else {
3369 		if (IS_CURSEG(sbi, segno)) {
3370 			/* se->type is volatile as SSR allocation */
3371 			type = __f2fs_get_curseg(sbi, segno);
3372 			f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
3373 		} else {
3374 			type = CURSEG_WARM_DATA;
3375 		}
3376 	}
3377 
3378 	f2fs_bug_on(sbi, !IS_DATASEG(type));
3379 	curseg = CURSEG_I(sbi, type);
3380 
3381 	mutex_lock(&curseg->curseg_mutex);
3382 	down_write(&sit_i->sentry_lock);
3383 
3384 	old_cursegno = curseg->segno;
3385 	old_blkoff = curseg->next_blkoff;
3386 
3387 	/* change the current segment */
3388 	if (segno != curseg->segno) {
3389 		curseg->next_segno = segno;
3390 		change_curseg(sbi, type);
3391 	}
3392 
3393 	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3394 	__add_sum_entry(sbi, type, sum);
3395 
3396 	if (!recover_curseg || recover_newaddr)
3397 		update_sit_entry(sbi, new_blkaddr, 1);
3398 	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
3399 		invalidate_mapping_pages(META_MAPPING(sbi),
3400 					old_blkaddr, old_blkaddr);
3401 		update_sit_entry(sbi, old_blkaddr, -1);
3402 	}
3403 
3404 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
3405 	locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));
3406 
3407 	locate_dirty_segment(sbi, old_cursegno);
3408 
3409 	if (recover_curseg) {
3410 		if (old_cursegno != curseg->segno) {
3411 			curseg->next_segno = old_cursegno;
3412 			change_curseg(sbi, type);
3413 		}
3414 		curseg->next_blkoff = old_blkoff;
3415 	}
3416 
3417 	up_write(&sit_i->sentry_lock);
3418 	mutex_unlock(&curseg->curseg_mutex);
3419 	up_write(&SM_I(sbi)->curseg_lock);
3420 }
3421 
f2fs_replace_block(struct f2fs_sb_info * sbi,struct dnode_of_data * dn,block_t old_addr,block_t new_addr,unsigned char version,bool recover_curseg,bool recover_newaddr)3422 void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
3423 				block_t old_addr, block_t new_addr,
3424 				unsigned char version, bool recover_curseg,
3425 				bool recover_newaddr)
3426 {
3427 	struct f2fs_summary sum;
3428 
3429 	set_summary(&sum, dn->nid, dn->ofs_in_node, version);
3430 
3431 	f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3432 					recover_curseg, recover_newaddr);
3433 
3434 	f2fs_update_data_blkaddr(dn, new_addr);
3435 }
3436 
f2fs_wait_on_page_writeback(struct page * page,enum page_type type,bool ordered,bool locked)3437 void f2fs_wait_on_page_writeback(struct page *page,
3438 				enum page_type type, bool ordered, bool locked)
3439 {
3440 	if (PageWriteback(page)) {
3441 		struct f2fs_sb_info *sbi = F2FS_P_SB(page);
3442 
3443 		/* submit cached LFS IO */
3444 		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3445 		/* sbumit cached IPU IO */
3446 		f2fs_submit_merged_ipu_write(sbi, NULL, page);
3447 		if (ordered) {
3448 			wait_on_page_writeback(page);
3449 			f2fs_bug_on(sbi, locked && PageWriteback(page));
3450 		} else {
3451 			wait_for_stable_page(page);
3452 		}
3453 	}
3454 }
3455 
f2fs_wait_on_block_writeback(struct inode * inode,block_t blkaddr)3456 void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3457 {
3458 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3459 	struct page *cpage;
3460 
3461 	if (!f2fs_post_read_required(inode))
3462 		return;
3463 
3464 	if (!__is_valid_data_blkaddr(blkaddr))
3465 		return;
3466 
3467 	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
3468 	if (cpage) {
3469 		f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3470 		f2fs_put_page(cpage, 1);
3471 	}
3472 }
3473 
f2fs_wait_on_block_writeback_range(struct inode * inode,block_t blkaddr,block_t len)3474 void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
3475 								block_t len)
3476 {
3477 	block_t i;
3478 
3479 	for (i = 0; i < len; i++)
3480 		f2fs_wait_on_block_writeback(inode, blkaddr + i);
3481 }
3482 
read_compacted_summaries(struct f2fs_sb_info * sbi)3483 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
3484 {
3485 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3486 	struct curseg_info *seg_i;
3487 	unsigned char *kaddr;
3488 	struct page *page;
3489 	block_t start;
3490 	int i, j, offset;
3491 
3492 	start = start_sum_block(sbi);
3493 
3494 	page = f2fs_get_meta_page(sbi, start++);
3495 	if (IS_ERR(page))
3496 		return PTR_ERR(page);
3497 	kaddr = (unsigned char *)page_address(page);
3498 
3499 	/* Step 1: restore nat cache */
3500 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3501 	memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
3502 
3503 	/* Step 2: restore sit cache */
3504 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3505 	memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
3506 	offset = 2 * SUM_JOURNAL_SIZE;
3507 
3508 	/* Step 3: restore summary entries */
3509 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3510 		unsigned short blk_off;
3511 		unsigned int segno;
3512 
3513 		seg_i = CURSEG_I(sbi, i);
3514 		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
3515 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
3516 		seg_i->next_segno = segno;
3517 		reset_curseg(sbi, i, 0);
3518 		seg_i->alloc_type = ckpt->alloc_type[i];
3519 		seg_i->next_blkoff = blk_off;
3520 
3521 		if (seg_i->alloc_type == SSR)
3522 			blk_off = sbi->blocks_per_seg;
3523 
3524 		for (j = 0; j < blk_off; j++) {
3525 			struct f2fs_summary *s;
3526 			s = (struct f2fs_summary *)(kaddr + offset);
3527 			seg_i->sum_blk->entries[j] = *s;
3528 			offset += SUMMARY_SIZE;
3529 			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
3530 						SUM_FOOTER_SIZE)
3531 				continue;
3532 
3533 			f2fs_put_page(page, 1);
3534 			page = NULL;
3535 
3536 			page = f2fs_get_meta_page(sbi, start++);
3537 			if (IS_ERR(page))
3538 				return PTR_ERR(page);
3539 			kaddr = (unsigned char *)page_address(page);
3540 			offset = 0;
3541 		}
3542 	}
3543 	f2fs_put_page(page, 1);
3544 	return 0;
3545 }
3546 
read_normal_summaries(struct f2fs_sb_info * sbi,int type)3547 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
3548 {
3549 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
3550 	struct f2fs_summary_block *sum;
3551 	struct curseg_info *curseg;
3552 	struct page *new;
3553 	unsigned short blk_off;
3554 	unsigned int segno = 0;
3555 	block_t blk_addr = 0;
3556 	int err = 0;
3557 
3558 	/* get segment number and block addr */
3559 	if (IS_DATASEG(type)) {
3560 		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
3561 		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
3562 							CURSEG_HOT_DATA]);
3563 		if (__exist_node_summaries(sbi))
3564 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
3565 		else
3566 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
3567 	} else {
3568 		segno = le32_to_cpu(ckpt->cur_node_segno[type -
3569 							CURSEG_HOT_NODE]);
3570 		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
3571 							CURSEG_HOT_NODE]);
3572 		if (__exist_node_summaries(sbi))
3573 			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
3574 							type - CURSEG_HOT_NODE);
3575 		else
3576 			blk_addr = GET_SUM_BLOCK(sbi, segno);
3577 	}
3578 
3579 	new = f2fs_get_meta_page(sbi, blk_addr);
3580 	if (IS_ERR(new))
3581 		return PTR_ERR(new);
3582 	sum = (struct f2fs_summary_block *)page_address(new);
3583 
3584 	if (IS_NODESEG(type)) {
3585 		if (__exist_node_summaries(sbi)) {
3586 			struct f2fs_summary *ns = &sum->entries[0];
3587 			int i;
3588 			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
3589 				ns->version = 0;
3590 				ns->ofs_in_node = 0;
3591 			}
3592 		} else {
3593 			err = f2fs_restore_node_summary(sbi, segno, sum);
3594 			if (err)
3595 				goto out;
3596 		}
3597 	}
3598 
3599 	/* set uncompleted segment to curseg */
3600 	curseg = CURSEG_I(sbi, type);
3601 	mutex_lock(&curseg->curseg_mutex);
3602 
3603 	/* update journal info */
3604 	down_write(&curseg->journal_rwsem);
3605 	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
3606 	up_write(&curseg->journal_rwsem);
3607 
3608 	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
3609 	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
3610 	curseg->next_segno = segno;
3611 	reset_curseg(sbi, type, 0);
3612 	curseg->alloc_type = ckpt->alloc_type[type];
3613 	curseg->next_blkoff = blk_off;
3614 	mutex_unlock(&curseg->curseg_mutex);
3615 out:
3616 	f2fs_put_page(new, 1);
3617 	return err;
3618 }
3619 
restore_curseg_summaries(struct f2fs_sb_info * sbi)3620 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
3621 {
3622 	struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
3623 	struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
3624 	int type = CURSEG_HOT_DATA;
3625 	int err;
3626 
3627 	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
3628 		int npages = f2fs_npages_for_summary_flush(sbi, true);
3629 
3630 		if (npages >= 2)
3631 			f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3632 							META_CP, true);
3633 
3634 		/* restore for compacted data summary */
3635 		err = read_compacted_summaries(sbi);
3636 		if (err)
3637 			return err;
3638 		type = CURSEG_HOT_NODE;
3639 	}
3640 
3641 	if (__exist_node_summaries(sbi))
3642 		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3643 					NR_CURSEG_TYPE - type, META_CP, true);
3644 
3645 	for (; type <= CURSEG_COLD_NODE; type++) {
3646 		err = read_normal_summaries(sbi, type);
3647 		if (err)
3648 			return err;
3649 	}
3650 
3651 	/* sanity check for summary blocks */
3652 	if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
3653 			sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES) {
3654 		f2fs_err(sbi, "invalid journal entries nats %u sits %u\n",
3655 			 nats_in_cursum(nat_j), sits_in_cursum(sit_j));
3656 		return -EINVAL;
3657 	}
3658 
3659 	return 0;
3660 }
3661 
write_compacted_summaries(struct f2fs_sb_info * sbi,block_t blkaddr)3662 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
3663 {
3664 	struct page *page;
3665 	unsigned char *kaddr;
3666 	struct f2fs_summary *summary;
3667 	struct curseg_info *seg_i;
3668 	int written_size = 0;
3669 	int i, j;
3670 
3671 	page = f2fs_grab_meta_page(sbi, blkaddr++);
3672 	kaddr = (unsigned char *)page_address(page);
3673 	memset(kaddr, 0, PAGE_SIZE);
3674 
3675 	/* Step 1: write nat cache */
3676 	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3677 	memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
3678 	written_size += SUM_JOURNAL_SIZE;
3679 
3680 	/* Step 2: write sit cache */
3681 	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3682 	memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
3683 	written_size += SUM_JOURNAL_SIZE;
3684 
3685 	/* Step 3: write summary entries */
3686 	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
3687 		unsigned short blkoff;
3688 		seg_i = CURSEG_I(sbi, i);
3689 		if (sbi->ckpt->alloc_type[i] == SSR)
3690 			blkoff = sbi->blocks_per_seg;
3691 		else
3692 			blkoff = curseg_blkoff(sbi, i);
3693 
3694 		for (j = 0; j < blkoff; j++) {
3695 			if (!page) {
3696 				page = f2fs_grab_meta_page(sbi, blkaddr++);
3697 				kaddr = (unsigned char *)page_address(page);
3698 				memset(kaddr, 0, PAGE_SIZE);
3699 				written_size = 0;
3700 			}
3701 			summary = (struct f2fs_summary *)(kaddr + written_size);
3702 			*summary = seg_i->sum_blk->entries[j];
3703 			written_size += SUMMARY_SIZE;
3704 
3705 			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
3706 							SUM_FOOTER_SIZE)
3707 				continue;
3708 
3709 			set_page_dirty(page);
3710 			f2fs_put_page(page, 1);
3711 			page = NULL;
3712 		}
3713 	}
3714 	if (page) {
3715 		set_page_dirty(page);
3716 		f2fs_put_page(page, 1);
3717 	}
3718 }
3719 
write_normal_summaries(struct f2fs_sb_info * sbi,block_t blkaddr,int type)3720 static void write_normal_summaries(struct f2fs_sb_info *sbi,
3721 					block_t blkaddr, int type)
3722 {
3723 	int i, end;
3724 	if (IS_DATASEG(type))
3725 		end = type + NR_CURSEG_DATA_TYPE;
3726 	else
3727 		end = type + NR_CURSEG_NODE_TYPE;
3728 
3729 	for (i = type; i < end; i++)
3730 		write_current_sum_page(sbi, i, blkaddr + (i - type));
3731 }
3732 
f2fs_write_data_summaries(struct f2fs_sb_info * sbi,block_t start_blk)3733 void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3734 {
3735 	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
3736 		write_compacted_summaries(sbi, start_blk);
3737 	else
3738 		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
3739 }
3740 
f2fs_write_node_summaries(struct f2fs_sb_info * sbi,block_t start_blk)3741 void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
3742 {
3743 	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
3744 }
3745 
f2fs_lookup_journal_in_cursum(struct f2fs_journal * journal,int type,unsigned int val,int alloc)3746 int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
3747 					unsigned int val, int alloc)
3748 {
3749 	int i;
3750 
3751 	if (type == NAT_JOURNAL) {
3752 		for (i = 0; i < nats_in_cursum(journal); i++) {
3753 			if (le32_to_cpu(nid_in_journal(journal, i)) == val)
3754 				return i;
3755 		}
3756 		if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
3757 			return update_nats_in_cursum(journal, 1);
3758 	} else if (type == SIT_JOURNAL) {
3759 		for (i = 0; i < sits_in_cursum(journal); i++)
3760 			if (le32_to_cpu(segno_in_journal(journal, i)) == val)
3761 				return i;
3762 		if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
3763 			return update_sits_in_cursum(journal, 1);
3764 	}
3765 	return -1;
3766 }
3767 
get_current_sit_page(struct f2fs_sb_info * sbi,unsigned int segno)3768 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
3769 					unsigned int segno)
3770 {
3771 	return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
3772 }
3773 
get_next_sit_page(struct f2fs_sb_info * sbi,unsigned int start)3774 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
3775 					unsigned int start)
3776 {
3777 	struct sit_info *sit_i = SIT_I(sbi);
3778 	struct page *page;
3779 	pgoff_t src_off, dst_off;
3780 
3781 	src_off = current_sit_addr(sbi, start);
3782 	dst_off = next_sit_addr(sbi, src_off);
3783 
3784 	page = f2fs_grab_meta_page(sbi, dst_off);
3785 	seg_info_to_sit_page(sbi, page, start);
3786 
3787 	set_page_dirty(page);
3788 	set_to_next_sit(sit_i, start);
3789 
3790 	return page;
3791 }
3792 
grab_sit_entry_set(void)3793 static struct sit_entry_set *grab_sit_entry_set(void)
3794 {
3795 	struct sit_entry_set *ses =
3796 			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3797 
3798 	ses->entry_cnt = 0;
3799 	INIT_LIST_HEAD(&ses->set_list);
3800 	return ses;
3801 }
3802 
release_sit_entry_set(struct sit_entry_set * ses)3803 static void release_sit_entry_set(struct sit_entry_set *ses)
3804 {
3805 	list_del(&ses->set_list);
3806 	kmem_cache_free(sit_entry_set_slab, ses);
3807 }
3808 
adjust_sit_entry_set(struct sit_entry_set * ses,struct list_head * head)3809 static void adjust_sit_entry_set(struct sit_entry_set *ses,
3810 						struct list_head *head)
3811 {
3812 	struct sit_entry_set *next = ses;
3813 
3814 	if (list_is_last(&ses->set_list, head))
3815 		return;
3816 
3817 	list_for_each_entry_continue(next, head, set_list)
3818 		if (ses->entry_cnt <= next->entry_cnt)
3819 			break;
3820 
3821 	list_move_tail(&ses->set_list, &next->set_list);
3822 }
3823 
add_sit_entry(unsigned int segno,struct list_head * head)3824 static void add_sit_entry(unsigned int segno, struct list_head *head)
3825 {
3826 	struct sit_entry_set *ses;
3827 	unsigned int start_segno = START_SEGNO(segno);
3828 
3829 	list_for_each_entry(ses, head, set_list) {
3830 		if (ses->start_segno == start_segno) {
3831 			ses->entry_cnt++;
3832 			adjust_sit_entry_set(ses, head);
3833 			return;
3834 		}
3835 	}
3836 
3837 	ses = grab_sit_entry_set();
3838 
3839 	ses->start_segno = start_segno;
3840 	ses->entry_cnt++;
3841 	list_add(&ses->set_list, head);
3842 }
3843 
add_sits_in_set(struct f2fs_sb_info * sbi)3844 static void add_sits_in_set(struct f2fs_sb_info *sbi)
3845 {
3846 	struct f2fs_sm_info *sm_info = SM_I(sbi);
3847 	struct list_head *set_list = &sm_info->sit_entry_set;
3848 	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
3849 	unsigned int segno;
3850 
3851 	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3852 		add_sit_entry(segno, set_list);
3853 }
3854 
remove_sits_in_journal(struct f2fs_sb_info * sbi)3855 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
3856 {
3857 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3858 	struct f2fs_journal *journal = curseg->journal;
3859 	int i;
3860 
3861 	down_write(&curseg->journal_rwsem);
3862 	for (i = 0; i < sits_in_cursum(journal); i++) {
3863 		unsigned int segno;
3864 		bool dirtied;
3865 
3866 		segno = le32_to_cpu(segno_in_journal(journal, i));
3867 		dirtied = __mark_sit_entry_dirty(sbi, segno);
3868 
3869 		if (!dirtied)
3870 			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
3871 	}
3872 	update_sits_in_cursum(journal, -i);
3873 	up_write(&curseg->journal_rwsem);
3874 }
3875 
3876 /*
3877  * CP calls this function, which flushes SIT entries including sit_journal,
3878  * and moves prefree segs to free segs.
3879  */
f2fs_flush_sit_entries(struct f2fs_sb_info * sbi,struct cp_control * cpc)3880 void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
3881 {
3882 	struct sit_info *sit_i = SIT_I(sbi);
3883 	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
3884 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3885 	struct f2fs_journal *journal = curseg->journal;
3886 	struct sit_entry_set *ses, *tmp;
3887 	struct list_head *head = &SM_I(sbi)->sit_entry_set;
3888 	bool to_journal = !is_sbi_flag_set(sbi, SBI_IS_RESIZEFS);
3889 	struct seg_entry *se;
3890 
3891 	down_write(&sit_i->sentry_lock);
3892 
3893 	if (!sit_i->dirty_sentries)
3894 		goto out;
3895 
3896 	/*
3897 	 * add and account sit entries of dirty bitmap in sit entry
3898 	 * set temporarily
3899 	 */
3900 	add_sits_in_set(sbi);
3901 
3902 	/*
3903 	 * if there are no enough space in journal to store dirty sit
3904 	 * entries, remove all entries from journal and add and account
3905 	 * them in sit entry set.
3906 	 */
3907 	if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL) ||
3908 								!to_journal)
3909 		remove_sits_in_journal(sbi);
3910 
3911 	/*
3912 	 * there are two steps to flush sit entries:
3913 	 * #1, flush sit entries to journal in current cold data summary block.
3914 	 * #2, flush sit entries to sit page.
3915 	 */
3916 	list_for_each_entry_safe(ses, tmp, head, set_list) {
3917 		struct page *page = NULL;
3918 		struct f2fs_sit_block *raw_sit = NULL;
3919 		unsigned int start_segno = ses->start_segno;
3920 		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3921 						(unsigned long)MAIN_SEGS(sbi));
3922 		unsigned int segno = start_segno;
3923 
3924 		if (to_journal &&
3925 			!__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3926 			to_journal = false;
3927 
3928 		if (to_journal) {
3929 			down_write(&curseg->journal_rwsem);
3930 		} else {
3931 			page = get_next_sit_page(sbi, start_segno);
3932 			raw_sit = page_address(page);
3933 		}
3934 
3935 		/* flush dirty sit entries in region of current sit set */
3936 		for_each_set_bit_from(segno, bitmap, end) {
3937 			int offset, sit_offset;
3938 
3939 			se = get_seg_entry(sbi, segno);
3940 #ifdef CONFIG_F2FS_CHECK_FS
3941 			if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
3942 						SIT_VBLOCK_MAP_SIZE))
3943 				f2fs_bug_on(sbi, 1);
3944 #endif
3945 
3946 			/* add discard candidates */
3947 			if (!(cpc->reason & CP_DISCARD)) {
3948 				cpc->trim_start = segno;
3949 				add_discard_addrs(sbi, cpc, false);
3950 			}
3951 
3952 			if (to_journal) {
3953 				offset = f2fs_lookup_journal_in_cursum(journal,
3954 							SIT_JOURNAL, segno, 1);
3955 				f2fs_bug_on(sbi, offset < 0);
3956 				segno_in_journal(journal, offset) =
3957 							cpu_to_le32(segno);
3958 				seg_info_to_raw_sit(se,
3959 					&sit_in_journal(journal, offset));
3960 				check_block_count(sbi, segno,
3961 					&sit_in_journal(journal, offset));
3962 			} else {
3963 				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
3964 				seg_info_to_raw_sit(se,
3965 						&raw_sit->entries[sit_offset]);
3966 				check_block_count(sbi, segno,
3967 						&raw_sit->entries[sit_offset]);
3968 			}
3969 
3970 			__clear_bit(segno, bitmap);
3971 			sit_i->dirty_sentries--;
3972 			ses->entry_cnt--;
3973 		}
3974 
3975 		if (to_journal)
3976 			up_write(&curseg->journal_rwsem);
3977 		else
3978 			f2fs_put_page(page, 1);
3979 
3980 		f2fs_bug_on(sbi, ses->entry_cnt);
3981 		release_sit_entry_set(ses);
3982 	}
3983 
3984 	f2fs_bug_on(sbi, !list_empty(head));
3985 	f2fs_bug_on(sbi, sit_i->dirty_sentries);
3986 out:
3987 	if (cpc->reason & CP_DISCARD) {
3988 		__u64 trim_start = cpc->trim_start;
3989 
3990 		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3991 			add_discard_addrs(sbi, cpc, false);
3992 
3993 		cpc->trim_start = trim_start;
3994 	}
3995 	up_write(&sit_i->sentry_lock);
3996 
3997 	set_prefree_as_free_segments(sbi);
3998 }
3999 
build_sit_info(struct f2fs_sb_info * sbi)4000 static int build_sit_info(struct f2fs_sb_info *sbi)
4001 {
4002 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4003 	struct sit_info *sit_i;
4004 	unsigned int sit_segs, start;
4005 	char *src_bitmap, *bitmap;
4006 	unsigned int bitmap_size, main_bitmap_size, sit_bitmap_size;
4007 
4008 	/* allocate memory for SIT information */
4009 	sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
4010 	if (!sit_i)
4011 		return -ENOMEM;
4012 
4013 	SM_I(sbi)->sit_info = sit_i;
4014 
4015 	sit_i->sentries =
4016 		f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
4017 					      MAIN_SEGS(sbi)),
4018 			      GFP_KERNEL);
4019 	if (!sit_i->sentries)
4020 		return -ENOMEM;
4021 
4022 	main_bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4023 	sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, main_bitmap_size,
4024 								GFP_KERNEL);
4025 	if (!sit_i->dirty_sentries_bitmap)
4026 		return -ENOMEM;
4027 
4028 #ifdef CONFIG_F2FS_CHECK_FS
4029 	bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 4;
4030 #else
4031 	bitmap_size = MAIN_SEGS(sbi) * SIT_VBLOCK_MAP_SIZE * 3;
4032 #endif
4033 	sit_i->bitmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4034 	if (!sit_i->bitmap)
4035 		return -ENOMEM;
4036 
4037 	bitmap = sit_i->bitmap;
4038 
4039 	for (start = 0; start < MAIN_SEGS(sbi); start++) {
4040 		sit_i->sentries[start].cur_valid_map = bitmap;
4041 		bitmap += SIT_VBLOCK_MAP_SIZE;
4042 
4043 		sit_i->sentries[start].ckpt_valid_map = bitmap;
4044 		bitmap += SIT_VBLOCK_MAP_SIZE;
4045 
4046 #ifdef CONFIG_F2FS_CHECK_FS
4047 		sit_i->sentries[start].cur_valid_map_mir = bitmap;
4048 		bitmap += SIT_VBLOCK_MAP_SIZE;
4049 #endif
4050 
4051 		sit_i->sentries[start].discard_map = bitmap;
4052 		bitmap += SIT_VBLOCK_MAP_SIZE;
4053 	}
4054 
4055 	sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
4056 	if (!sit_i->tmp_map)
4057 		return -ENOMEM;
4058 
4059 	if (__is_large_section(sbi)) {
4060 		sit_i->sec_entries =
4061 			f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
4062 						      MAIN_SECS(sbi)),
4063 				      GFP_KERNEL);
4064 		if (!sit_i->sec_entries)
4065 			return -ENOMEM;
4066 	}
4067 
4068 	/* get information related with SIT */
4069 	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
4070 
4071 	/* setup SIT bitmap from ckeckpoint pack */
4072 	sit_bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
4073 	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
4074 
4075 	sit_i->sit_bitmap = kmemdup(src_bitmap, sit_bitmap_size, GFP_KERNEL);
4076 	if (!sit_i->sit_bitmap)
4077 		return -ENOMEM;
4078 
4079 #ifdef CONFIG_F2FS_CHECK_FS
4080 	sit_i->sit_bitmap_mir = kmemdup(src_bitmap,
4081 					sit_bitmap_size, GFP_KERNEL);
4082 	if (!sit_i->sit_bitmap_mir)
4083 		return -ENOMEM;
4084 
4085 	sit_i->invalid_segmap = f2fs_kvzalloc(sbi,
4086 					main_bitmap_size, GFP_KERNEL);
4087 	if (!sit_i->invalid_segmap)
4088 		return -ENOMEM;
4089 #endif
4090 
4091 	/* init SIT information */
4092 	sit_i->s_ops = &default_salloc_ops;
4093 
4094 	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
4095 	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
4096 	sit_i->written_valid_blocks = 0;
4097 	sit_i->bitmap_size = sit_bitmap_size;
4098 	sit_i->dirty_sentries = 0;
4099 	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
4100 	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
4101 	sit_i->mounted_time = ktime_get_boottime_seconds();
4102 	init_rwsem(&sit_i->sentry_lock);
4103 	return 0;
4104 }
4105 
build_free_segmap(struct f2fs_sb_info * sbi)4106 static int build_free_segmap(struct f2fs_sb_info *sbi)
4107 {
4108 	struct free_segmap_info *free_i;
4109 	unsigned int bitmap_size, sec_bitmap_size;
4110 
4111 	/* allocate memory for free segmap information */
4112 	free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
4113 	if (!free_i)
4114 		return -ENOMEM;
4115 
4116 	SM_I(sbi)->free_info = free_i;
4117 
4118 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4119 	free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
4120 	if (!free_i->free_segmap)
4121 		return -ENOMEM;
4122 
4123 	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4124 	free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
4125 	if (!free_i->free_secmap)
4126 		return -ENOMEM;
4127 
4128 	/* set all segments as dirty temporarily */
4129 	memset(free_i->free_segmap, 0xff, bitmap_size);
4130 	memset(free_i->free_secmap, 0xff, sec_bitmap_size);
4131 
4132 	/* init free segmap information */
4133 	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
4134 	free_i->free_segments = 0;
4135 	free_i->free_sections = 0;
4136 	spin_lock_init(&free_i->segmap_lock);
4137 	return 0;
4138 }
4139 
build_curseg(struct f2fs_sb_info * sbi)4140 static int build_curseg(struct f2fs_sb_info *sbi)
4141 {
4142 	struct curseg_info *array;
4143 	int i;
4144 
4145 	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
4146 			     GFP_KERNEL);
4147 	if (!array)
4148 		return -ENOMEM;
4149 
4150 	SM_I(sbi)->curseg_array = array;
4151 
4152 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
4153 		mutex_init(&array[i].curseg_mutex);
4154 		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
4155 		if (!array[i].sum_blk)
4156 			return -ENOMEM;
4157 		init_rwsem(&array[i].journal_rwsem);
4158 		array[i].journal = f2fs_kzalloc(sbi,
4159 				sizeof(struct f2fs_journal), GFP_KERNEL);
4160 		if (!array[i].journal)
4161 			return -ENOMEM;
4162 		array[i].segno = NULL_SEGNO;
4163 		array[i].next_blkoff = 0;
4164 	}
4165 	return restore_curseg_summaries(sbi);
4166 }
4167 
build_sit_entries(struct f2fs_sb_info * sbi)4168 static int build_sit_entries(struct f2fs_sb_info *sbi)
4169 {
4170 	struct sit_info *sit_i = SIT_I(sbi);
4171 	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4172 	struct f2fs_journal *journal = curseg->journal;
4173 	struct seg_entry *se;
4174 	struct f2fs_sit_entry sit;
4175 	int sit_blk_cnt = SIT_BLK_CNT(sbi);
4176 	unsigned int i, start, end;
4177 	unsigned int readed, start_blk = 0;
4178 	int err = 0;
4179 	block_t total_node_blocks = 0;
4180 
4181 	do {
4182 		readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
4183 							META_SIT, true);
4184 
4185 		start = start_blk * sit_i->sents_per_block;
4186 		end = (start_blk + readed) * sit_i->sents_per_block;
4187 
4188 		for (; start < end && start < MAIN_SEGS(sbi); start++) {
4189 			struct f2fs_sit_block *sit_blk;
4190 			struct page *page;
4191 
4192 			se = &sit_i->sentries[start];
4193 			page = get_current_sit_page(sbi, start);
4194 			if (IS_ERR(page))
4195 				return PTR_ERR(page);
4196 			sit_blk = (struct f2fs_sit_block *)page_address(page);
4197 			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
4198 			f2fs_put_page(page, 1);
4199 
4200 			err = check_block_count(sbi, start, &sit);
4201 			if (err)
4202 				return err;
4203 			seg_info_from_raw_sit(se, &sit);
4204 			if (IS_NODESEG(se->type))
4205 				total_node_blocks += se->valid_blocks;
4206 
4207 			/* build discard map only one time */
4208 			if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4209 				memset(se->discard_map, 0xff,
4210 					SIT_VBLOCK_MAP_SIZE);
4211 			} else {
4212 				memcpy(se->discard_map,
4213 					se->cur_valid_map,
4214 					SIT_VBLOCK_MAP_SIZE);
4215 				sbi->discard_blks +=
4216 					sbi->blocks_per_seg -
4217 					se->valid_blocks;
4218 			}
4219 
4220 			if (__is_large_section(sbi))
4221 				get_sec_entry(sbi, start)->valid_blocks +=
4222 							se->valid_blocks;
4223 		}
4224 		start_blk += readed;
4225 	} while (start_blk < sit_blk_cnt);
4226 
4227 	down_read(&curseg->journal_rwsem);
4228 	for (i = 0; i < sits_in_cursum(journal); i++) {
4229 		unsigned int old_valid_blocks;
4230 
4231 		start = le32_to_cpu(segno_in_journal(journal, i));
4232 		if (start >= MAIN_SEGS(sbi)) {
4233 			f2fs_err(sbi, "Wrong journal entry on segno %u",
4234 				 start);
4235 			err = -EFSCORRUPTED;
4236 			break;
4237 		}
4238 
4239 		se = &sit_i->sentries[start];
4240 		sit = sit_in_journal(journal, i);
4241 
4242 		old_valid_blocks = se->valid_blocks;
4243 		if (IS_NODESEG(se->type))
4244 			total_node_blocks -= old_valid_blocks;
4245 
4246 		err = check_block_count(sbi, start, &sit);
4247 		if (err)
4248 			break;
4249 		seg_info_from_raw_sit(se, &sit);
4250 		if (IS_NODESEG(se->type))
4251 			total_node_blocks += se->valid_blocks;
4252 
4253 		if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
4254 			memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
4255 		} else {
4256 			memcpy(se->discard_map, se->cur_valid_map,
4257 						SIT_VBLOCK_MAP_SIZE);
4258 			sbi->discard_blks += old_valid_blocks;
4259 			sbi->discard_blks -= se->valid_blocks;
4260 		}
4261 
4262 		if (__is_large_section(sbi)) {
4263 			get_sec_entry(sbi, start)->valid_blocks +=
4264 							se->valid_blocks;
4265 			get_sec_entry(sbi, start)->valid_blocks -=
4266 							old_valid_blocks;
4267 		}
4268 	}
4269 	up_read(&curseg->journal_rwsem);
4270 
4271 	if (!err && total_node_blocks != valid_node_count(sbi)) {
4272 		f2fs_err(sbi, "SIT is corrupted node# %u vs %u",
4273 			 total_node_blocks, valid_node_count(sbi));
4274 		err = -EFSCORRUPTED;
4275 	}
4276 
4277 	return err;
4278 }
4279 
init_free_segmap(struct f2fs_sb_info * sbi)4280 static void init_free_segmap(struct f2fs_sb_info *sbi)
4281 {
4282 	unsigned int start;
4283 	int type;
4284 
4285 	for (start = 0; start < MAIN_SEGS(sbi); start++) {
4286 		struct seg_entry *sentry = get_seg_entry(sbi, start);
4287 		if (!sentry->valid_blocks)
4288 			__set_free(sbi, start);
4289 		else
4290 			SIT_I(sbi)->written_valid_blocks +=
4291 						sentry->valid_blocks;
4292 	}
4293 
4294 	/* set use the current segments */
4295 	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
4296 		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
4297 		__set_test_and_inuse(sbi, curseg_t->segno);
4298 	}
4299 }
4300 
init_dirty_segmap(struct f2fs_sb_info * sbi)4301 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
4302 {
4303 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4304 	struct free_segmap_info *free_i = FREE_I(sbi);
4305 	unsigned int segno = 0, offset = 0;
4306 	unsigned short valid_blocks;
4307 
4308 	while (1) {
4309 		/* find dirty segment based on free segmap */
4310 		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
4311 		if (segno >= MAIN_SEGS(sbi))
4312 			break;
4313 		offset = segno + 1;
4314 		valid_blocks = get_valid_blocks(sbi, segno, false);
4315 		if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
4316 			continue;
4317 		if (valid_blocks > sbi->blocks_per_seg) {
4318 			f2fs_bug_on(sbi, 1);
4319 			continue;
4320 		}
4321 		mutex_lock(&dirty_i->seglist_lock);
4322 		__locate_dirty_segment(sbi, segno, DIRTY);
4323 		mutex_unlock(&dirty_i->seglist_lock);
4324 	}
4325 }
4326 
init_victim_secmap(struct f2fs_sb_info * sbi)4327 static int init_victim_secmap(struct f2fs_sb_info *sbi)
4328 {
4329 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4330 	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
4331 
4332 	dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4333 	if (!dirty_i->victim_secmap)
4334 		return -ENOMEM;
4335 	return 0;
4336 }
4337 
build_dirty_segmap(struct f2fs_sb_info * sbi)4338 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
4339 {
4340 	struct dirty_seglist_info *dirty_i;
4341 	unsigned int bitmap_size, i;
4342 
4343 	/* allocate memory for dirty segments list information */
4344 	dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
4345 								GFP_KERNEL);
4346 	if (!dirty_i)
4347 		return -ENOMEM;
4348 
4349 	SM_I(sbi)->dirty_info = dirty_i;
4350 	mutex_init(&dirty_i->seglist_lock);
4351 
4352 	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
4353 
4354 	for (i = 0; i < NR_DIRTY_TYPE; i++) {
4355 		dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
4356 								GFP_KERNEL);
4357 		if (!dirty_i->dirty_segmap[i])
4358 			return -ENOMEM;
4359 	}
4360 
4361 	init_dirty_segmap(sbi);
4362 	return init_victim_secmap(sbi);
4363 }
4364 
sanity_check_curseg(struct f2fs_sb_info * sbi)4365 static int sanity_check_curseg(struct f2fs_sb_info *sbi)
4366 {
4367 	int i;
4368 
4369 	/*
4370 	 * In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
4371 	 * In LFS curseg, all blkaddr after .next_blkoff should be unused.
4372 	 */
4373 	for (i = 0; i < NO_CHECK_TYPE; i++) {
4374 		struct curseg_info *curseg = CURSEG_I(sbi, i);
4375 		struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
4376 		unsigned int blkofs = curseg->next_blkoff;
4377 
4378 		if (f2fs_sb_has_readonly(sbi) &&
4379 			i != CURSEG_HOT_DATA && i != CURSEG_HOT_NODE)
4380 			continue;
4381 
4382 		if (f2fs_test_bit(blkofs, se->cur_valid_map))
4383 			goto out;
4384 
4385 		if (curseg->alloc_type == SSR)
4386 			continue;
4387 
4388 		for (blkofs += 1; blkofs < sbi->blocks_per_seg; blkofs++) {
4389 			if (!f2fs_test_bit(blkofs, se->cur_valid_map))
4390 				continue;
4391 out:
4392 			f2fs_err(sbi,
4393 				 "Current segment's next free block offset is inconsistent with bitmap, logtype:%u, segno:%u, type:%u, next_blkoff:%u, blkofs:%u",
4394 				 i, curseg->segno, curseg->alloc_type,
4395 				 curseg->next_blkoff, blkofs);
4396 			return -EFSCORRUPTED;
4397 		}
4398 	}
4399 	return 0;
4400 }
4401 
4402 #ifdef CONFIG_BLK_DEV_ZONED
4403 
check_zone_write_pointer(struct f2fs_sb_info * sbi,struct f2fs_dev_info * fdev,struct blk_zone * zone)4404 static int check_zone_write_pointer(struct f2fs_sb_info *sbi,
4405 				    struct f2fs_dev_info *fdev,
4406 				    struct blk_zone *zone)
4407 {
4408 	unsigned int wp_segno, wp_blkoff, zone_secno, zone_segno, segno;
4409 	block_t zone_block, wp_block, last_valid_block;
4410 	unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4411 	int i, s, b, ret;
4412 	struct seg_entry *se;
4413 
4414 	if (zone->type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4415 		return 0;
4416 
4417 	wp_block = fdev->start_blk + (zone->wp >> log_sectors_per_block);
4418 	wp_segno = GET_SEGNO(sbi, wp_block);
4419 	wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4420 	zone_block = fdev->start_blk + (zone->start >> log_sectors_per_block);
4421 	zone_segno = GET_SEGNO(sbi, zone_block);
4422 	zone_secno = GET_SEC_FROM_SEG(sbi, zone_segno);
4423 
4424 	if (zone_segno >= MAIN_SEGS(sbi))
4425 		return 0;
4426 
4427 	/*
4428 	 * Skip check of zones cursegs point to, since
4429 	 * fix_curseg_write_pointer() checks them.
4430 	 */
4431 	for (i = 0; i < NO_CHECK_TYPE; i++)
4432 		if (zone_secno == GET_SEC_FROM_SEG(sbi,
4433 						   CURSEG_I(sbi, i)->segno))
4434 			return 0;
4435 
4436 	/*
4437 	 * Get last valid block of the zone.
4438 	 */
4439 	last_valid_block = zone_block - 1;
4440 	for (s = sbi->segs_per_sec - 1; s >= 0; s--) {
4441 		segno = zone_segno + s;
4442 		se = get_seg_entry(sbi, segno);
4443 		for (b = sbi->blocks_per_seg - 1; b >= 0; b--)
4444 			if (f2fs_test_bit(b, se->cur_valid_map)) {
4445 				last_valid_block = START_BLOCK(sbi, segno) + b;
4446 				break;
4447 			}
4448 		if (last_valid_block >= zone_block)
4449 			break;
4450 	}
4451 
4452 	/*
4453 	 * If last valid block is beyond the write pointer, report the
4454 	 * inconsistency. This inconsistency does not cause write error
4455 	 * because the zone will not be selected for write operation until
4456 	 * it get discarded. Just report it.
4457 	 */
4458 	if (last_valid_block >= wp_block) {
4459 		f2fs_notice(sbi, "Valid block beyond write pointer: "
4460 			    "valid block[0x%x,0x%x] wp[0x%x,0x%x]",
4461 			    GET_SEGNO(sbi, last_valid_block),
4462 			    GET_BLKOFF_FROM_SEG0(sbi, last_valid_block),
4463 			    wp_segno, wp_blkoff);
4464 		return 0;
4465 	}
4466 
4467 	/*
4468 	 * If there is no valid block in the zone and if write pointer is
4469 	 * not at zone start, reset the write pointer.
4470 	 */
4471 	if (last_valid_block + 1 == zone_block && zone->wp != zone->start) {
4472 		f2fs_notice(sbi,
4473 			    "Zone without valid block has non-zero write "
4474 			    "pointer. Reset the write pointer: wp[0x%x,0x%x]",
4475 			    wp_segno, wp_blkoff);
4476 		ret = __f2fs_issue_discard_zone(sbi, fdev->bdev, zone_block,
4477 					zone->len >> log_sectors_per_block);
4478 		if (ret) {
4479 			f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
4480 				 fdev->path, ret);
4481 			return ret;
4482 		}
4483 	}
4484 
4485 	return 0;
4486 }
4487 
get_target_zoned_dev(struct f2fs_sb_info * sbi,block_t zone_blkaddr)4488 static struct f2fs_dev_info *get_target_zoned_dev(struct f2fs_sb_info *sbi,
4489 						  block_t zone_blkaddr)
4490 {
4491 	int i;
4492 
4493 	for (i = 0; i < sbi->s_ndevs; i++) {
4494 		if (!bdev_is_zoned(FDEV(i).bdev))
4495 			continue;
4496 		if (sbi->s_ndevs == 1 || (FDEV(i).start_blk <= zone_blkaddr &&
4497 				zone_blkaddr <= FDEV(i).end_blk))
4498 			return &FDEV(i);
4499 	}
4500 
4501 	return NULL;
4502 }
4503 
report_one_zone_cb(struct blk_zone * zone,unsigned int idx,void * data)4504 static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
4505 			      void *data) {
4506 	memcpy(data, zone, sizeof(struct blk_zone));
4507 	return 0;
4508 }
4509 
fix_curseg_write_pointer(struct f2fs_sb_info * sbi,int type)4510 static int fix_curseg_write_pointer(struct f2fs_sb_info *sbi, int type)
4511 {
4512 	struct curseg_info *cs = CURSEG_I(sbi, type);
4513 	struct f2fs_dev_info *zbd;
4514 	struct blk_zone zone;
4515 	unsigned int cs_section, wp_segno, wp_blkoff, wp_sector_off;
4516 	block_t cs_zone_block, wp_block;
4517 	unsigned int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT;
4518 	sector_t zone_sector;
4519 	int err;
4520 
4521 	cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
4522 	cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
4523 
4524 	zbd = get_target_zoned_dev(sbi, cs_zone_block);
4525 	if (!zbd)
4526 		return 0;
4527 
4528 	/* report zone for the sector the curseg points to */
4529 	zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
4530 		<< log_sectors_per_block;
4531 	err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
4532 				  report_one_zone_cb, &zone);
4533 	if (err != 1) {
4534 		f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
4535 			 zbd->path, err);
4536 		return err;
4537 	}
4538 
4539 	if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4540 		return 0;
4541 
4542 	wp_block = zbd->start_blk + (zone.wp >> log_sectors_per_block);
4543 	wp_segno = GET_SEGNO(sbi, wp_block);
4544 	wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno);
4545 	wp_sector_off = zone.wp & GENMASK(log_sectors_per_block - 1, 0);
4546 
4547 	if (cs->segno == wp_segno && cs->next_blkoff == wp_blkoff &&
4548 		wp_sector_off == 0)
4549 		return 0;
4550 
4551 	f2fs_notice(sbi, "Unaligned curseg[%d] with write pointer: "
4552 		    "curseg[0x%x,0x%x] wp[0x%x,0x%x]",
4553 		    type, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff);
4554 
4555 	f2fs_notice(sbi, "Assign new section to curseg[%d]: "
4556 		    "curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
4557 	allocate_segment_by_default(sbi, type, true);
4558 
4559 	/* check consistency of the zone curseg pointed to */
4560 	if (check_zone_write_pointer(sbi, zbd, &zone))
4561 		return -EIO;
4562 
4563 	/* check newly assigned zone */
4564 	cs_section = GET_SEC_FROM_SEG(sbi, cs->segno);
4565 	cs_zone_block = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, cs_section));
4566 
4567 	zbd = get_target_zoned_dev(sbi, cs_zone_block);
4568 	if (!zbd)
4569 		return 0;
4570 
4571 	zone_sector = (sector_t)(cs_zone_block - zbd->start_blk)
4572 		<< log_sectors_per_block;
4573 	err = blkdev_report_zones(zbd->bdev, zone_sector, 1,
4574 				  report_one_zone_cb, &zone);
4575 	if (err != 1) {
4576 		f2fs_err(sbi, "Report zone failed: %s errno=(%d)",
4577 			 zbd->path, err);
4578 		return err;
4579 	}
4580 
4581 	if (zone.type != BLK_ZONE_TYPE_SEQWRITE_REQ)
4582 		return 0;
4583 
4584 	if (zone.wp != zone.start) {
4585 		f2fs_notice(sbi,
4586 			    "New zone for curseg[%d] is not yet discarded. "
4587 			    "Reset the zone: curseg[0x%x,0x%x]",
4588 			    type, cs->segno, cs->next_blkoff);
4589 		err = __f2fs_issue_discard_zone(sbi, zbd->bdev,
4590 				zone_sector >> log_sectors_per_block,
4591 				zone.len >> log_sectors_per_block);
4592 		if (err) {
4593 			f2fs_err(sbi, "Discard zone failed: %s (errno=%d)",
4594 				 zbd->path, err);
4595 			return err;
4596 		}
4597 	}
4598 
4599 	return 0;
4600 }
4601 
f2fs_fix_curseg_write_pointer(struct f2fs_sb_info * sbi)4602 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
4603 {
4604 	int i, ret;
4605 
4606 	for (i = 0; i < NO_CHECK_TYPE; i++) {
4607 		ret = fix_curseg_write_pointer(sbi, i);
4608 		if (ret)
4609 			return ret;
4610 	}
4611 
4612 	return 0;
4613 }
4614 
4615 struct check_zone_write_pointer_args {
4616 	struct f2fs_sb_info *sbi;
4617 	struct f2fs_dev_info *fdev;
4618 };
4619 
check_zone_write_pointer_cb(struct blk_zone * zone,unsigned int idx,void * data)4620 static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
4621 				      void *data) {
4622 	struct check_zone_write_pointer_args *args;
4623 	args = (struct check_zone_write_pointer_args *)data;
4624 
4625 	return check_zone_write_pointer(args->sbi, args->fdev, zone);
4626 }
4627 
f2fs_check_write_pointer(struct f2fs_sb_info * sbi)4628 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
4629 {
4630 	int i, ret;
4631 	struct check_zone_write_pointer_args args;
4632 
4633 	for (i = 0; i < sbi->s_ndevs; i++) {
4634 		if (!bdev_is_zoned(FDEV(i).bdev))
4635 			continue;
4636 
4637 		args.sbi = sbi;
4638 		args.fdev = &FDEV(i);
4639 		ret = blkdev_report_zones(FDEV(i).bdev, 0, BLK_ALL_ZONES,
4640 					  check_zone_write_pointer_cb, &args);
4641 		if (ret < 0)
4642 			return ret;
4643 	}
4644 
4645 	return 0;
4646 }
4647 #else
f2fs_fix_curseg_write_pointer(struct f2fs_sb_info * sbi)4648 int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
4649 {
4650 	return 0;
4651 }
4652 
f2fs_check_write_pointer(struct f2fs_sb_info * sbi)4653 int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
4654 {
4655 	return 0;
4656 }
4657 #endif
4658 
4659 /*
4660  * Update min, max modified time for cost-benefit GC algorithm
4661  */
init_min_max_mtime(struct f2fs_sb_info * sbi)4662 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
4663 {
4664 	struct sit_info *sit_i = SIT_I(sbi);
4665 	unsigned int segno;
4666 
4667 	down_write(&sit_i->sentry_lock);
4668 
4669 	sit_i->min_mtime = ULLONG_MAX;
4670 
4671 	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
4672 		unsigned int i;
4673 		unsigned long long mtime = 0;
4674 
4675 		for (i = 0; i < sbi->segs_per_sec; i++)
4676 			mtime += get_seg_entry(sbi, segno + i)->mtime;
4677 
4678 		mtime = div_u64(mtime, sbi->segs_per_sec);
4679 
4680 		if (sit_i->min_mtime > mtime)
4681 			sit_i->min_mtime = mtime;
4682 	}
4683 	sit_i->max_mtime = get_mtime(sbi, false);
4684 	up_write(&sit_i->sentry_lock);
4685 }
4686 
f2fs_build_segment_manager(struct f2fs_sb_info * sbi)4687 int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
4688 {
4689 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
4690 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
4691 	struct f2fs_sm_info *sm_info;
4692 	int err;
4693 
4694 	sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
4695 	if (!sm_info)
4696 		return -ENOMEM;
4697 
4698 	/* init sm info */
4699 	sbi->sm_info = sm_info;
4700 	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
4701 	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
4702 	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
4703 	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
4704 	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
4705 	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
4706 	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4707 	sm_info->rec_prefree_segments = sm_info->main_segments *
4708 					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
4709 	if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
4710 		sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;
4711 
4712 	if (!f2fs_lfs_mode(sbi))
4713 		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4714 	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4715 	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4716 	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4717 	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
4718 	sm_info->min_ssr_sections = reserved_sections(sbi);
4719 
4720 	INIT_LIST_HEAD(&sm_info->sit_entry_set);
4721 
4722 	init_rwsem(&sm_info->curseg_lock);
4723 
4724 	if (!f2fs_readonly(sbi->sb)) {
4725 		err = f2fs_create_flush_cmd_control(sbi);
4726 		if (err)
4727 			return err;
4728 	}
4729 
4730 	err = create_discard_cmd_control(sbi);
4731 	if (err)
4732 		return err;
4733 
4734 	err = build_sit_info(sbi);
4735 	if (err)
4736 		return err;
4737 	err = build_free_segmap(sbi);
4738 	if (err)
4739 		return err;
4740 	err = build_curseg(sbi);
4741 	if (err)
4742 		return err;
4743 
4744 	/* reinit free segmap based on SIT */
4745 	err = build_sit_entries(sbi);
4746 	if (err)
4747 		return err;
4748 
4749 	init_free_segmap(sbi);
4750 	err = build_dirty_segmap(sbi);
4751 	if (err)
4752 		return err;
4753 
4754 	err = sanity_check_curseg(sbi);
4755 	if (err)
4756 		return err;
4757 
4758 	init_min_max_mtime(sbi);
4759 	return 0;
4760 }
4761 
discard_dirty_segmap(struct f2fs_sb_info * sbi,enum dirty_type dirty_type)4762 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
4763 		enum dirty_type dirty_type)
4764 {
4765 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4766 
4767 	mutex_lock(&dirty_i->seglist_lock);
4768 	kvfree(dirty_i->dirty_segmap[dirty_type]);
4769 	dirty_i->nr_dirty[dirty_type] = 0;
4770 	mutex_unlock(&dirty_i->seglist_lock);
4771 }
4772 
destroy_victim_secmap(struct f2fs_sb_info * sbi)4773 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
4774 {
4775 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4776 	kvfree(dirty_i->victim_secmap);
4777 }
4778 
destroy_dirty_segmap(struct f2fs_sb_info * sbi)4779 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
4780 {
4781 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4782 	int i;
4783 
4784 	if (!dirty_i)
4785 		return;
4786 
4787 	/* discard pre-free/dirty segments list */
4788 	for (i = 0; i < NR_DIRTY_TYPE; i++)
4789 		discard_dirty_segmap(sbi, i);
4790 
4791 	destroy_victim_secmap(sbi);
4792 	SM_I(sbi)->dirty_info = NULL;
4793 	kvfree(dirty_i);
4794 }
4795 
destroy_curseg(struct f2fs_sb_info * sbi)4796 static void destroy_curseg(struct f2fs_sb_info *sbi)
4797 {
4798 	struct curseg_info *array = SM_I(sbi)->curseg_array;
4799 	int i;
4800 
4801 	if (!array)
4802 		return;
4803 	SM_I(sbi)->curseg_array = NULL;
4804 	for (i = 0; i < NR_CURSEG_TYPE; i++) {
4805 		kvfree(array[i].sum_blk);
4806 		kvfree(array[i].journal);
4807 	}
4808 	kvfree(array);
4809 }
4810 
destroy_free_segmap(struct f2fs_sb_info * sbi)4811 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
4812 {
4813 	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
4814 	if (!free_i)
4815 		return;
4816 	SM_I(sbi)->free_info = NULL;
4817 	kvfree(free_i->free_segmap);
4818 	kvfree(free_i->free_secmap);
4819 	kvfree(free_i);
4820 }
4821 
destroy_sit_info(struct f2fs_sb_info * sbi)4822 static void destroy_sit_info(struct f2fs_sb_info *sbi)
4823 {
4824 	struct sit_info *sit_i = SIT_I(sbi);
4825 
4826 	if (!sit_i)
4827 		return;
4828 
4829 	if (sit_i->sentries)
4830 		kvfree(sit_i->bitmap);
4831 	kvfree(sit_i->tmp_map);
4832 
4833 	kvfree(sit_i->sentries);
4834 	kvfree(sit_i->sec_entries);
4835 	kvfree(sit_i->dirty_sentries_bitmap);
4836 
4837 	SM_I(sbi)->sit_info = NULL;
4838 	kvfree(sit_i->sit_bitmap);
4839 #ifdef CONFIG_F2FS_CHECK_FS
4840 	kvfree(sit_i->sit_bitmap_mir);
4841 	kvfree(sit_i->invalid_segmap);
4842 #endif
4843 	kvfree(sit_i);
4844 }
4845 
f2fs_destroy_segment_manager(struct f2fs_sb_info * sbi)4846 void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
4847 {
4848 	struct f2fs_sm_info *sm_info = SM_I(sbi);
4849 
4850 	if (!sm_info)
4851 		return;
4852 	f2fs_destroy_flush_cmd_control(sbi, true);
4853 	destroy_discard_cmd_control(sbi);
4854 	destroy_dirty_segmap(sbi);
4855 	destroy_curseg(sbi);
4856 	destroy_free_segmap(sbi);
4857 	destroy_sit_info(sbi);
4858 	sbi->sm_info = NULL;
4859 	kvfree(sm_info);
4860 }
4861 
f2fs_create_segment_manager_caches(void)4862 int __init f2fs_create_segment_manager_caches(void)
4863 {
4864 	discard_entry_slab = f2fs_kmem_cache_create("f2fs_discard_entry",
4865 			sizeof(struct discard_entry));
4866 	if (!discard_entry_slab)
4867 		goto fail;
4868 
4869 	discard_cmd_slab = f2fs_kmem_cache_create("f2fs_discard_cmd",
4870 			sizeof(struct discard_cmd));
4871 	if (!discard_cmd_slab)
4872 		goto destroy_discard_entry;
4873 
4874 	sit_entry_set_slab = f2fs_kmem_cache_create("f2fs_sit_entry_set",
4875 			sizeof(struct sit_entry_set));
4876 	if (!sit_entry_set_slab)
4877 		goto destroy_discard_cmd;
4878 
4879 	inmem_entry_slab = f2fs_kmem_cache_create("f2fs_inmem_page_entry",
4880 			sizeof(struct inmem_pages));
4881 	if (!inmem_entry_slab)
4882 		goto destroy_sit_entry_set;
4883 	return 0;
4884 
4885 destroy_sit_entry_set:
4886 	kmem_cache_destroy(sit_entry_set_slab);
4887 destroy_discard_cmd:
4888 	kmem_cache_destroy(discard_cmd_slab);
4889 destroy_discard_entry:
4890 	kmem_cache_destroy(discard_entry_slab);
4891 fail:
4892 	return -ENOMEM;
4893 }
4894 
f2fs_destroy_segment_manager_caches(void)4895 void f2fs_destroy_segment_manager_caches(void)
4896 {
4897 	kmem_cache_destroy(sit_entry_set_slab);
4898 	kmem_cache_destroy(discard_cmd_slab);
4899 	kmem_cache_destroy(discard_entry_slab);
4900 	kmem_cache_destroy(inmem_entry_slab);
4901 }
4902