1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * fs/f2fs/data.c
4 *
5 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
6 * http://www.samsung.com/
7 */
8 #include <linux/fs.h>
9 #include <linux/f2fs_fs.h>
10 #include <linux/buffer_head.h>
11 #include <linux/mpage.h>
12 #include <linux/writeback.h>
13 #include <linux/backing-dev.h>
14 #include <linux/pagevec.h>
15 #include <linux/blkdev.h>
16 #include <linux/bio.h>
17 #include <linux/swap.h>
18 #include <linux/prefetch.h>
19 #include <linux/uio.h>
20 #include <linux/cleancache.h>
21 #include <linux/sched/signal.h>
22
23 #include "f2fs.h"
24 #include "node.h"
25 #include "segment.h"
26 #include "trace.h"
27 #include <trace/events/f2fs.h>
28 #include <trace/events/android_fs.h>
29
30 #define NUM_PREALLOC_POST_READ_CTXS 128
31
32 static struct kmem_cache *bio_post_read_ctx_cache;
33 static struct kmem_cache *bio_entry_slab;
34 static mempool_t *bio_post_read_ctx_pool;
35 static struct bio_set f2fs_bioset;
36
37 #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE
38
f2fs_init_bioset(void)39 int __init f2fs_init_bioset(void)
40 {
41 if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
42 0, BIOSET_NEED_BVECS))
43 return -ENOMEM;
44 return 0;
45 }
46
f2fs_destroy_bioset(void)47 void f2fs_destroy_bioset(void)
48 {
49 bioset_exit(&f2fs_bioset);
50 }
51
__f2fs_bio_alloc(gfp_t gfp_mask,unsigned int nr_iovecs)52 static inline struct bio *__f2fs_bio_alloc(gfp_t gfp_mask,
53 unsigned int nr_iovecs)
54 {
55 return bio_alloc_bioset(gfp_mask, nr_iovecs, &f2fs_bioset);
56 }
57
f2fs_bio_alloc(struct f2fs_sb_info * sbi,int npages,bool noio)58 struct bio *f2fs_bio_alloc(struct f2fs_sb_info *sbi, int npages, bool noio)
59 {
60 if (noio) {
61 /* No failure on bio allocation */
62 return __f2fs_bio_alloc(GFP_NOIO, npages);
63 }
64
65 if (time_to_inject(sbi, FAULT_ALLOC_BIO)) {
66 f2fs_show_injection_info(sbi, FAULT_ALLOC_BIO);
67 return NULL;
68 }
69
70 return __f2fs_bio_alloc(GFP_KERNEL, npages);
71 }
72
__is_cp_guaranteed(struct page * page)73 static bool __is_cp_guaranteed(struct page *page)
74 {
75 struct address_space *mapping = page->mapping;
76 struct inode *inode;
77 struct f2fs_sb_info *sbi;
78
79 if (!mapping)
80 return false;
81
82 if (f2fs_is_compressed_page(page))
83 return false;
84
85 inode = mapping->host;
86 sbi = F2FS_I_SB(inode);
87
88 if (inode->i_ino == F2FS_META_INO(sbi) ||
89 inode->i_ino == F2FS_NODE_INO(sbi) ||
90 S_ISDIR(inode->i_mode) ||
91 (S_ISREG(inode->i_mode) &&
92 (f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
93 is_cold_data(page))
94 return true;
95 return false;
96 }
97
__read_io_type(struct page * page)98 static enum count_type __read_io_type(struct page *page)
99 {
100 struct address_space *mapping = page_file_mapping(page);
101
102 if (mapping) {
103 struct inode *inode = mapping->host;
104 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
105
106 if (inode->i_ino == F2FS_META_INO(sbi))
107 return F2FS_RD_META;
108
109 if (inode->i_ino == F2FS_NODE_INO(sbi))
110 return F2FS_RD_NODE;
111 }
112 return F2FS_RD_DATA;
113 }
114
115 /* postprocessing steps for read bios */
116 enum bio_post_read_step {
117 STEP_DECRYPT,
118 STEP_DECOMPRESS_NOWQ, /* handle normal cluster data inplace */
119 STEP_DECOMPRESS, /* handle compressed cluster data in workqueue */
120 STEP_VERITY,
121 };
122
123 struct bio_post_read_ctx {
124 struct bio *bio;
125 struct f2fs_sb_info *sbi;
126 struct work_struct work;
127 unsigned int enabled_steps;
128 };
129
__read_end_io(struct bio * bio,bool compr,bool verity)130 static void __read_end_io(struct bio *bio, bool compr, bool verity)
131 {
132 struct page *page;
133 struct bio_vec *bv;
134 struct bvec_iter_all iter_all;
135
136 bio_for_each_segment_all(bv, bio, iter_all) {
137 page = bv->bv_page;
138
139 #ifdef CONFIG_F2FS_FS_COMPRESSION
140 if (compr && f2fs_is_compressed_page(page)) {
141 f2fs_decompress_pages(bio, page, verity);
142 continue;
143 }
144 if (verity)
145 continue;
146 #endif
147
148 /* PG_error was set if any post_read step failed */
149 if (bio->bi_status || PageError(page)) {
150 ClearPageUptodate(page);
151 /* will re-read again later */
152 ClearPageError(page);
153 } else {
154 SetPageUptodate(page);
155 }
156 dec_page_count(F2FS_P_SB(page), __read_io_type(page));
157 unlock_page(page);
158 }
159 }
160
161 static void f2fs_release_read_bio(struct bio *bio);
__f2fs_read_end_io(struct bio * bio,bool compr,bool verity)162 static void __f2fs_read_end_io(struct bio *bio, bool compr, bool verity)
163 {
164 if (!compr)
165 __read_end_io(bio, false, verity);
166 f2fs_release_read_bio(bio);
167 }
168
f2fs_decompress_bio(struct bio * bio,bool verity)169 static void f2fs_decompress_bio(struct bio *bio, bool verity)
170 {
171 __read_end_io(bio, true, verity);
172 }
173
174 static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
175
f2fs_decrypt_work(struct bio_post_read_ctx * ctx)176 static void f2fs_decrypt_work(struct bio_post_read_ctx *ctx)
177 {
178 fscrypt_decrypt_bio(ctx->bio);
179 }
180
f2fs_decompress_work(struct bio_post_read_ctx * ctx)181 static void f2fs_decompress_work(struct bio_post_read_ctx *ctx)
182 {
183 f2fs_decompress_bio(ctx->bio, ctx->enabled_steps & (1 << STEP_VERITY));
184 }
185
186 #ifdef CONFIG_F2FS_FS_COMPRESSION
f2fs_verify_pages(struct page ** rpages,unsigned int cluster_size)187 static void f2fs_verify_pages(struct page **rpages, unsigned int cluster_size)
188 {
189 f2fs_decompress_end_io(rpages, cluster_size, false, true);
190 }
191
f2fs_verify_bio(struct bio * bio)192 static void f2fs_verify_bio(struct bio *bio)
193 {
194 struct bio_vec *bv;
195 struct bvec_iter_all iter_all;
196
197 bio_for_each_segment_all(bv, bio, iter_all) {
198 struct page *page = bv->bv_page;
199 struct decompress_io_ctx *dic;
200
201 dic = (struct decompress_io_ctx *)page_private(page);
202
203 if (dic) {
204 if (refcount_dec_not_one(&dic->ref))
205 continue;
206 f2fs_verify_pages(dic->rpages,
207 dic->cluster_size);
208 f2fs_free_dic(dic);
209 continue;
210 }
211
212 if (bio->bi_status || PageError(page))
213 goto clear_uptodate;
214
215 if (fsverity_verify_page(page)) {
216 SetPageUptodate(page);
217 goto unlock;
218 }
219 clear_uptodate:
220 ClearPageUptodate(page);
221 ClearPageError(page);
222 unlock:
223 dec_page_count(F2FS_P_SB(page), __read_io_type(page));
224 unlock_page(page);
225 }
226 }
227 #endif
228
f2fs_verity_work(struct work_struct * work)229 static void f2fs_verity_work(struct work_struct *work)
230 {
231 struct bio_post_read_ctx *ctx =
232 container_of(work, struct bio_post_read_ctx, work);
233 struct bio *bio = ctx->bio;
234 #ifdef CONFIG_F2FS_FS_COMPRESSION
235 unsigned int enabled_steps = ctx->enabled_steps;
236 #endif
237
238 /*
239 * fsverity_verify_bio() may call readpages() again, and while verity
240 * will be disabled for this, decryption may still be needed, resulting
241 * in another bio_post_read_ctx being allocated. So to prevent
242 * deadlocks we need to release the current ctx to the mempool first.
243 * This assumes that verity is the last post-read step.
244 */
245 mempool_free(ctx, bio_post_read_ctx_pool);
246 bio->bi_private = NULL;
247
248 #ifdef CONFIG_F2FS_FS_COMPRESSION
249 /* previous step is decompression */
250 if (enabled_steps & (1 << STEP_DECOMPRESS)) {
251 f2fs_verify_bio(bio);
252 f2fs_release_read_bio(bio);
253 return;
254 }
255 #endif
256
257 fsverity_verify_bio(bio);
258 __f2fs_read_end_io(bio, false, false);
259 }
260
f2fs_post_read_work(struct work_struct * work)261 static void f2fs_post_read_work(struct work_struct *work)
262 {
263 struct bio_post_read_ctx *ctx =
264 container_of(work, struct bio_post_read_ctx, work);
265
266 if (ctx->enabled_steps & (1 << STEP_DECRYPT))
267 f2fs_decrypt_work(ctx);
268
269 if (ctx->enabled_steps & (1 << STEP_DECOMPRESS))
270 f2fs_decompress_work(ctx);
271
272 if (ctx->enabled_steps & (1 << STEP_VERITY)) {
273 INIT_WORK(&ctx->work, f2fs_verity_work);
274 fsverity_enqueue_verify_work(&ctx->work);
275 return;
276 }
277
278 __f2fs_read_end_io(ctx->bio,
279 ctx->enabled_steps & (1 << STEP_DECOMPRESS), false);
280 }
281
f2fs_enqueue_post_read_work(struct f2fs_sb_info * sbi,struct work_struct * work)282 static void f2fs_enqueue_post_read_work(struct f2fs_sb_info *sbi,
283 struct work_struct *work)
284 {
285 queue_work(sbi->post_read_wq, work);
286 }
287
bio_post_read_processing(struct bio_post_read_ctx * ctx)288 static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
289 {
290 /*
291 * We use different work queues for decryption and for verity because
292 * verity may require reading metadata pages that need decryption, and
293 * we shouldn't recurse to the same workqueue.
294 */
295
296 if (ctx->enabled_steps & (1 << STEP_DECRYPT) ||
297 ctx->enabled_steps & (1 << STEP_DECOMPRESS)) {
298 INIT_WORK(&ctx->work, f2fs_post_read_work);
299 f2fs_enqueue_post_read_work(ctx->sbi, &ctx->work);
300 return;
301 }
302
303 if (ctx->enabled_steps & (1 << STEP_VERITY)) {
304 INIT_WORK(&ctx->work, f2fs_verity_work);
305 fsverity_enqueue_verify_work(&ctx->work);
306 return;
307 }
308
309 __f2fs_read_end_io(ctx->bio, false, false);
310 }
311
f2fs_bio_post_read_required(struct bio * bio)312 static bool f2fs_bio_post_read_required(struct bio *bio)
313 {
314 return bio->bi_private;
315 }
316
f2fs_read_end_io(struct bio * bio)317 static void f2fs_read_end_io(struct bio *bio)
318 {
319 struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
320
321 if (time_to_inject(sbi, FAULT_READ_IO)) {
322 f2fs_show_injection_info(sbi, FAULT_READ_IO);
323 bio->bi_status = BLK_STS_IOERR;
324 }
325
326 if (f2fs_bio_post_read_required(bio)) {
327 struct bio_post_read_ctx *ctx = bio->bi_private;
328
329 bio_post_read_processing(ctx);
330 return;
331 }
332
333 __f2fs_read_end_io(bio, false, false);
334 }
335
f2fs_write_end_io(struct bio * bio)336 static void f2fs_write_end_io(struct bio *bio)
337 {
338 struct f2fs_sb_info *sbi = bio->bi_private;
339 struct bio_vec *bvec;
340 struct bvec_iter_all iter_all;
341
342 if (time_to_inject(sbi, FAULT_WRITE_IO)) {
343 f2fs_show_injection_info(sbi, FAULT_WRITE_IO);
344 bio->bi_status = BLK_STS_IOERR;
345 }
346
347 bio_for_each_segment_all(bvec, bio, iter_all) {
348 struct page *page = bvec->bv_page;
349 enum count_type type = WB_DATA_TYPE(page);
350
351 if (IS_DUMMY_WRITTEN_PAGE(page)) {
352 set_page_private(page, (unsigned long)NULL);
353 ClearPagePrivate(page);
354 unlock_page(page);
355 mempool_free(page, sbi->write_io_dummy);
356
357 if (unlikely(bio->bi_status))
358 f2fs_stop_checkpoint(sbi, true);
359 continue;
360 }
361
362 fscrypt_finalize_bounce_page(&page);
363
364 #ifdef CONFIG_F2FS_FS_COMPRESSION
365 if (f2fs_is_compressed_page(page)) {
366 f2fs_compress_write_end_io(bio, page);
367 continue;
368 }
369 #endif
370
371 if (unlikely(bio->bi_status)) {
372 mapping_set_error(page->mapping, -EIO);
373 if (type == F2FS_WB_CP_DATA)
374 f2fs_stop_checkpoint(sbi, true);
375 }
376
377 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
378 page->index != nid_of_node(page));
379
380 dec_page_count(sbi, type);
381 if (f2fs_in_warm_node_list(sbi, page))
382 f2fs_del_fsync_node_entry(sbi, page);
383 clear_cold_data(page);
384 end_page_writeback(page);
385 }
386 if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
387 wq_has_sleeper(&sbi->cp_wait))
388 wake_up(&sbi->cp_wait);
389
390 bio_put(bio);
391 }
392
f2fs_target_device(struct f2fs_sb_info * sbi,block_t blk_addr,struct bio * bio)393 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
394 block_t blk_addr, struct bio *bio)
395 {
396 struct block_device *bdev = sbi->sb->s_bdev;
397 int i;
398
399 if (f2fs_is_multi_device(sbi)) {
400 for (i = 0; i < sbi->s_ndevs; i++) {
401 if (FDEV(i).start_blk <= blk_addr &&
402 FDEV(i).end_blk >= blk_addr) {
403 blk_addr -= FDEV(i).start_blk;
404 bdev = FDEV(i).bdev;
405 break;
406 }
407 }
408 }
409 if (bio) {
410 bio_set_dev(bio, bdev);
411 bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
412 }
413 return bdev;
414 }
415
f2fs_target_device_index(struct f2fs_sb_info * sbi,block_t blkaddr)416 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
417 {
418 int i;
419
420 if (!f2fs_is_multi_device(sbi))
421 return 0;
422
423 for (i = 0; i < sbi->s_ndevs; i++)
424 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
425 return i;
426 return 0;
427 }
428
429 /*
430 * Return true, if pre_bio's bdev is same as its target device.
431 */
__same_bdev(struct f2fs_sb_info * sbi,block_t blk_addr,struct bio * bio)432 static bool __same_bdev(struct f2fs_sb_info *sbi,
433 block_t blk_addr, struct bio *bio)
434 {
435 struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
436 return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
437 }
438
__bio_alloc(struct f2fs_io_info * fio,int npages)439 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
440 {
441 struct f2fs_sb_info *sbi = fio->sbi;
442 struct bio *bio;
443
444 bio = f2fs_bio_alloc(sbi, npages, true);
445
446 f2fs_target_device(sbi, fio->new_blkaddr, bio);
447 if (is_read_io(fio->op)) {
448 bio->bi_end_io = f2fs_read_end_io;
449 bio->bi_private = NULL;
450 } else {
451 bio->bi_end_io = f2fs_write_end_io;
452 bio->bi_private = sbi;
453 bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
454 fio->type, fio->temp);
455 }
456 if (fio->io_wbc)
457 wbc_init_bio(fio->io_wbc, bio);
458
459 return bio;
460 }
461
f2fs_set_bio_crypt_ctx(struct bio * bio,const struct inode * inode,pgoff_t first_idx,const struct f2fs_io_info * fio,gfp_t gfp_mask)462 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
463 pgoff_t first_idx,
464 const struct f2fs_io_info *fio,
465 gfp_t gfp_mask)
466 {
467 /*
468 * The f2fs garbage collector sets ->encrypted_page when it wants to
469 * read/write raw data without encryption.
470 */
471 if (!fio || !fio->encrypted_page)
472 fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
473 else if (fscrypt_inode_should_skip_dm_default_key(inode))
474 bio_set_skip_dm_default_key(bio);
475 }
476
f2fs_crypt_mergeable_bio(struct bio * bio,const struct inode * inode,pgoff_t next_idx,const struct f2fs_io_info * fio)477 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
478 pgoff_t next_idx,
479 const struct f2fs_io_info *fio)
480 {
481 /*
482 * The f2fs garbage collector sets ->encrypted_page when it wants to
483 * read/write raw data without encryption.
484 */
485 if (fio && fio->encrypted_page)
486 return !bio_has_crypt_ctx(bio) &&
487 (bio_should_skip_dm_default_key(bio) ==
488 fscrypt_inode_should_skip_dm_default_key(inode));
489
490 return fscrypt_mergeable_bio(bio, inode, next_idx);
491 }
492
__submit_bio(struct f2fs_sb_info * sbi,struct bio * bio,enum page_type type)493 static inline void __submit_bio(struct f2fs_sb_info *sbi,
494 struct bio *bio, enum page_type type)
495 {
496 if (!is_read_io(bio_op(bio))) {
497 unsigned int start;
498
499 if (type != DATA && type != NODE)
500 goto submit_io;
501
502 if (f2fs_lfs_mode(sbi) && current->plug)
503 blk_finish_plug(current->plug);
504
505 if (!F2FS_IO_ALIGNED(sbi))
506 goto submit_io;
507
508 start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
509 start %= F2FS_IO_SIZE(sbi);
510
511 if (start == 0)
512 goto submit_io;
513
514 /* fill dummy pages */
515 for (; start < F2FS_IO_SIZE(sbi); start++) {
516 struct page *page =
517 mempool_alloc(sbi->write_io_dummy,
518 GFP_NOIO | __GFP_NOFAIL);
519 f2fs_bug_on(sbi, !page);
520
521 zero_user_segment(page, 0, PAGE_SIZE);
522 SetPagePrivate(page);
523 set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
524 lock_page(page);
525 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
526 f2fs_bug_on(sbi, 1);
527 }
528 /*
529 * In the NODE case, we lose next block address chain. So, we
530 * need to do checkpoint in f2fs_sync_file.
531 */
532 if (type == NODE)
533 set_sbi_flag(sbi, SBI_NEED_CP);
534 }
535 submit_io:
536 if (is_read_io(bio_op(bio)))
537 trace_f2fs_submit_read_bio(sbi->sb, type, bio);
538 else
539 trace_f2fs_submit_write_bio(sbi->sb, type, bio);
540 submit_bio(bio);
541 }
542
f2fs_submit_bio(struct f2fs_sb_info * sbi,struct bio * bio,enum page_type type)543 void f2fs_submit_bio(struct f2fs_sb_info *sbi,
544 struct bio *bio, enum page_type type)
545 {
546 __submit_bio(sbi, bio, type);
547 }
548
__attach_io_flag(struct f2fs_io_info * fio)549 static void __attach_io_flag(struct f2fs_io_info *fio)
550 {
551 struct f2fs_sb_info *sbi = fio->sbi;
552 unsigned int temp_mask = (1 << NR_TEMP_TYPE) - 1;
553 unsigned int io_flag, fua_flag, meta_flag;
554
555 if (fio->type == DATA)
556 io_flag = sbi->data_io_flag;
557 else if (fio->type == NODE)
558 io_flag = sbi->node_io_flag;
559 else
560 return;
561
562 fua_flag = io_flag & temp_mask;
563 meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
564
565 /*
566 * data/node io flag bits per temp:
567 * REQ_META | REQ_FUA |
568 * 5 | 4 | 3 | 2 | 1 | 0 |
569 * Cold | Warm | Hot | Cold | Warm | Hot |
570 */
571 if ((1 << fio->temp) & meta_flag)
572 fio->op_flags |= REQ_META;
573 if ((1 << fio->temp) & fua_flag)
574 fio->op_flags |= REQ_FUA;
575 }
576
__submit_merged_bio(struct f2fs_bio_info * io)577 static void __submit_merged_bio(struct f2fs_bio_info *io)
578 {
579 struct f2fs_io_info *fio = &io->fio;
580
581 if (!io->bio)
582 return;
583
584 __attach_io_flag(fio);
585 bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
586
587 if (is_read_io(fio->op))
588 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
589 else
590 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
591
592 __submit_bio(io->sbi, io->bio, fio->type);
593 io->bio = NULL;
594 }
595
__has_merged_page(struct bio * bio,struct inode * inode,struct page * page,nid_t ino)596 static bool __has_merged_page(struct bio *bio, struct inode *inode,
597 struct page *page, nid_t ino)
598 {
599 struct bio_vec *bvec;
600 struct bvec_iter_all iter_all;
601
602 if (!bio)
603 return false;
604
605 if (!inode && !page && !ino)
606 return true;
607
608 bio_for_each_segment_all(bvec, bio, iter_all) {
609 struct page *target = bvec->bv_page;
610
611 if (fscrypt_is_bounce_page(target)) {
612 target = fscrypt_pagecache_page(target);
613 if (IS_ERR(target))
614 continue;
615 }
616 if (f2fs_is_compressed_page(target)) {
617 target = f2fs_compress_control_page(target);
618 if (IS_ERR(target))
619 continue;
620 }
621
622 if (inode && inode == target->mapping->host)
623 return true;
624 if (page && page == target)
625 return true;
626 if (ino && ino == ino_of_node(target))
627 return true;
628 }
629
630 return false;
631 }
632
__f2fs_submit_merged_write(struct f2fs_sb_info * sbi,enum page_type type,enum temp_type temp)633 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
634 enum page_type type, enum temp_type temp)
635 {
636 enum page_type btype = PAGE_TYPE_OF_BIO(type);
637 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
638
639 down_write(&io->io_rwsem);
640
641 /* change META to META_FLUSH in the checkpoint procedure */
642 if (type >= META_FLUSH) {
643 io->fio.type = META_FLUSH;
644 io->fio.op = REQ_OP_WRITE;
645 io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
646 if (!test_opt(sbi, NOBARRIER))
647 io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
648 }
649 __submit_merged_bio(io);
650 up_write(&io->io_rwsem);
651 }
652
__submit_merged_write_cond(struct f2fs_sb_info * sbi,struct inode * inode,struct page * page,nid_t ino,enum page_type type,bool force)653 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
654 struct inode *inode, struct page *page,
655 nid_t ino, enum page_type type, bool force)
656 {
657 enum temp_type temp;
658 bool ret = true;
659
660 for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
661 if (!force) {
662 enum page_type btype = PAGE_TYPE_OF_BIO(type);
663 struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
664
665 down_read(&io->io_rwsem);
666 ret = __has_merged_page(io->bio, inode, page, ino);
667 up_read(&io->io_rwsem);
668 }
669 if (ret)
670 __f2fs_submit_merged_write(sbi, type, temp);
671
672 /* TODO: use HOT temp only for meta pages now. */
673 if (type >= META)
674 break;
675 }
676 }
677
f2fs_submit_merged_write(struct f2fs_sb_info * sbi,enum page_type type)678 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
679 {
680 __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
681 }
682
f2fs_submit_merged_write_cond(struct f2fs_sb_info * sbi,struct inode * inode,struct page * page,nid_t ino,enum page_type type)683 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
684 struct inode *inode, struct page *page,
685 nid_t ino, enum page_type type)
686 {
687 __submit_merged_write_cond(sbi, inode, page, ino, type, false);
688 }
689
f2fs_flush_merged_writes(struct f2fs_sb_info * sbi)690 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
691 {
692 f2fs_submit_merged_write(sbi, DATA);
693 f2fs_submit_merged_write(sbi, NODE);
694 f2fs_submit_merged_write(sbi, META);
695 }
696
697 /*
698 * Fill the locked page with data located in the block address.
699 * A caller needs to unlock the page on failure.
700 */
f2fs_submit_page_bio(struct f2fs_io_info * fio)701 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
702 {
703 struct bio *bio;
704 struct page *page = fio->encrypted_page ?
705 fio->encrypted_page : fio->page;
706
707 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
708 fio->is_por ? META_POR : (__is_meta_io(fio) ?
709 META_GENERIC : DATA_GENERIC_ENHANCE)))
710 return -EFSCORRUPTED;
711
712 trace_f2fs_submit_page_bio(page, fio);
713 f2fs_trace_ios(fio, 0);
714
715 /* Allocate a new bio */
716 bio = __bio_alloc(fio, 1);
717
718 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
719 fio->page->index, fio, GFP_NOIO);
720
721 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
722 bio_put(bio);
723 return -EFAULT;
724 }
725
726 if (fio->io_wbc && !is_read_io(fio->op))
727 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
728
729 __attach_io_flag(fio);
730 bio_set_op_attrs(bio, fio->op, fio->op_flags);
731
732 inc_page_count(fio->sbi, is_read_io(fio->op) ?
733 __read_io_type(page): WB_DATA_TYPE(fio->page));
734
735 __submit_bio(fio->sbi, bio, fio->type);
736 return 0;
737 }
738
page_is_mergeable(struct f2fs_sb_info * sbi,struct bio * bio,block_t last_blkaddr,block_t cur_blkaddr)739 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
740 block_t last_blkaddr, block_t cur_blkaddr)
741 {
742 if (last_blkaddr + 1 != cur_blkaddr)
743 return false;
744 return __same_bdev(sbi, cur_blkaddr, bio);
745 }
746
io_type_is_mergeable(struct f2fs_bio_info * io,struct f2fs_io_info * fio)747 static bool io_type_is_mergeable(struct f2fs_bio_info *io,
748 struct f2fs_io_info *fio)
749 {
750 if (io->fio.op != fio->op)
751 return false;
752 return io->fio.op_flags == fio->op_flags;
753 }
754
io_is_mergeable(struct f2fs_sb_info * sbi,struct bio * bio,struct f2fs_bio_info * io,struct f2fs_io_info * fio,block_t last_blkaddr,block_t cur_blkaddr)755 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
756 struct f2fs_bio_info *io,
757 struct f2fs_io_info *fio,
758 block_t last_blkaddr,
759 block_t cur_blkaddr)
760 {
761 if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
762 unsigned int filled_blocks =
763 F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
764 unsigned int io_size = F2FS_IO_SIZE(sbi);
765 unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;
766
767 /* IOs in bio is aligned and left space of vectors is not enough */
768 if (!(filled_blocks % io_size) && left_vecs < io_size)
769 return false;
770 }
771 if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
772 return false;
773 return io_type_is_mergeable(io, fio);
774 }
775
add_bio_entry(struct f2fs_sb_info * sbi,struct bio * bio,struct page * page,enum temp_type temp)776 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
777 struct page *page, enum temp_type temp)
778 {
779 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
780 struct bio_entry *be;
781
782 be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS);
783 be->bio = bio;
784 bio_get(bio);
785
786 if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
787 f2fs_bug_on(sbi, 1);
788
789 down_write(&io->bio_list_lock);
790 list_add_tail(&be->list, &io->bio_list);
791 up_write(&io->bio_list_lock);
792 }
793
del_bio_entry(struct bio_entry * be)794 static void del_bio_entry(struct bio_entry *be)
795 {
796 list_del(&be->list);
797 kmem_cache_free(bio_entry_slab, be);
798 }
799
add_ipu_page(struct f2fs_io_info * fio,struct bio ** bio,struct page * page)800 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
801 struct page *page)
802 {
803 struct f2fs_sb_info *sbi = fio->sbi;
804 enum temp_type temp;
805 bool found = false;
806 int ret = -EAGAIN;
807
808 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
809 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
810 struct list_head *head = &io->bio_list;
811 struct bio_entry *be;
812
813 down_write(&io->bio_list_lock);
814 list_for_each_entry(be, head, list) {
815 if (be->bio != *bio)
816 continue;
817
818 found = true;
819
820 if (page_is_mergeable(sbi, *bio, *fio->last_block,
821 fio->new_blkaddr) &&
822 f2fs_crypt_mergeable_bio(*bio,
823 fio->page->mapping->host,
824 fio->page->index, fio) &&
825 bio_add_page(*bio, page, PAGE_SIZE, 0) ==
826 PAGE_SIZE) {
827 ret = 0;
828 break;
829 }
830
831 /* page can't be merged into bio; submit the bio */
832 del_bio_entry(be);
833 __submit_bio(sbi, *bio, DATA);
834 break;
835 }
836 up_write(&io->bio_list_lock);
837 }
838
839 if (ret) {
840 bio_put(*bio);
841 *bio = NULL;
842 }
843
844 return ret;
845 }
846
f2fs_submit_merged_ipu_write(struct f2fs_sb_info * sbi,struct bio ** bio,struct page * page)847 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
848 struct bio **bio, struct page *page)
849 {
850 enum temp_type temp;
851 bool found = false;
852 struct bio *target = bio ? *bio : NULL;
853
854 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
855 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
856 struct list_head *head = &io->bio_list;
857 struct bio_entry *be;
858
859 if (list_empty(head))
860 continue;
861
862 down_read(&io->bio_list_lock);
863 list_for_each_entry(be, head, list) {
864 if (target)
865 found = (target == be->bio);
866 else
867 found = __has_merged_page(be->bio, NULL,
868 page, 0);
869 if (found)
870 break;
871 }
872 up_read(&io->bio_list_lock);
873
874 if (!found)
875 continue;
876
877 found = false;
878
879 down_write(&io->bio_list_lock);
880 list_for_each_entry(be, head, list) {
881 if (target)
882 found = (target == be->bio);
883 else
884 found = __has_merged_page(be->bio, NULL,
885 page, 0);
886 if (found) {
887 target = be->bio;
888 del_bio_entry(be);
889 break;
890 }
891 }
892 up_write(&io->bio_list_lock);
893 }
894
895 if (found)
896 __submit_bio(sbi, target, DATA);
897 if (bio && *bio) {
898 bio_put(*bio);
899 *bio = NULL;
900 }
901 }
902
f2fs_merge_page_bio(struct f2fs_io_info * fio)903 int f2fs_merge_page_bio(struct f2fs_io_info *fio)
904 {
905 struct bio *bio = *fio->bio;
906 struct page *page = fio->encrypted_page ?
907 fio->encrypted_page : fio->page;
908
909 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
910 __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
911 return -EFSCORRUPTED;
912
913 trace_f2fs_submit_page_bio(page, fio);
914 f2fs_trace_ios(fio, 0);
915
916 alloc_new:
917 if (!bio) {
918 bio = __bio_alloc(fio, BIO_MAX_PAGES);
919 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
920 fio->page->index, fio,
921 GFP_NOIO);
922 __attach_io_flag(fio);
923 bio_set_op_attrs(bio, fio->op, fio->op_flags);
924
925 add_bio_entry(fio->sbi, bio, page, fio->temp);
926 } else {
927 if (add_ipu_page(fio, &bio, page))
928 goto alloc_new;
929 }
930
931 if (fio->io_wbc)
932 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
933
934 inc_page_count(fio->sbi, WB_DATA_TYPE(page));
935
936 *fio->last_block = fio->new_blkaddr;
937 *fio->bio = bio;
938
939 return 0;
940 }
941
f2fs_submit_page_write(struct f2fs_io_info * fio)942 void f2fs_submit_page_write(struct f2fs_io_info *fio)
943 {
944 struct f2fs_sb_info *sbi = fio->sbi;
945 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
946 struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
947 struct page *bio_page;
948
949 f2fs_bug_on(sbi, is_read_io(fio->op));
950
951 down_write(&io->io_rwsem);
952 next:
953 if (fio->in_list) {
954 spin_lock(&io->io_lock);
955 if (list_empty(&io->io_list)) {
956 spin_unlock(&io->io_lock);
957 goto out;
958 }
959 fio = list_first_entry(&io->io_list,
960 struct f2fs_io_info, list);
961 list_del(&fio->list);
962 spin_unlock(&io->io_lock);
963 }
964
965 verify_fio_blkaddr(fio);
966
967 if (fio->encrypted_page)
968 bio_page = fio->encrypted_page;
969 else if (fio->compressed_page)
970 bio_page = fio->compressed_page;
971 else
972 bio_page = fio->page;
973
974 /* set submitted = true as a return value */
975 fio->submitted = true;
976
977 inc_page_count(sbi, WB_DATA_TYPE(bio_page));
978
979 if (io->bio &&
980 (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
981 fio->new_blkaddr) ||
982 !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
983 fio->page->index, fio)))
984 __submit_merged_bio(io);
985 alloc_new:
986 if (io->bio == NULL) {
987 if (F2FS_IO_ALIGNED(sbi) &&
988 (fio->type == DATA || fio->type == NODE) &&
989 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
990 dec_page_count(sbi, WB_DATA_TYPE(bio_page));
991 fio->retry = true;
992 goto skip;
993 }
994 io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
995 f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
996 fio->page->index, fio,
997 GFP_NOIO);
998 io->fio = *fio;
999 }
1000
1001 if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
1002 __submit_merged_bio(io);
1003 goto alloc_new;
1004 }
1005
1006 if (fio->io_wbc)
1007 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
1008
1009 io->last_block_in_bio = fio->new_blkaddr;
1010 f2fs_trace_ios(fio, 0);
1011
1012 trace_f2fs_submit_page_write(fio->page, fio);
1013 skip:
1014 if (fio->in_list)
1015 goto next;
1016 out:
1017 if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1018 !f2fs_is_checkpoint_ready(sbi))
1019 __submit_merged_bio(io);
1020 up_write(&io->io_rwsem);
1021 }
1022
f2fs_need_verity(const struct inode * inode,pgoff_t idx)1023 static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
1024 {
1025 return fsverity_active(inode) &&
1026 idx < DIV_ROUND_UP(inode->i_size, PAGE_SIZE);
1027 }
1028
f2fs_grab_read_bio(struct inode * inode,block_t blkaddr,unsigned nr_pages,unsigned op_flag,pgoff_t first_idx,bool for_write)1029 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
1030 unsigned nr_pages, unsigned op_flag,
1031 pgoff_t first_idx, bool for_write)
1032 {
1033 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1034 struct bio *bio;
1035 struct bio_post_read_ctx *ctx;
1036 unsigned int post_read_steps = 0;
1037
1038 bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES),
1039 for_write);
1040 if (!bio)
1041 return ERR_PTR(-ENOMEM);
1042
1043 f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
1044
1045 f2fs_target_device(sbi, blkaddr, bio);
1046 bio->bi_end_io = f2fs_read_end_io;
1047 bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
1048
1049 if (fscrypt_inode_uses_fs_layer_crypto(inode))
1050 post_read_steps |= 1 << STEP_DECRYPT;
1051 if (f2fs_compressed_file(inode))
1052 post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
1053 if (f2fs_need_verity(inode, first_idx))
1054 post_read_steps |= 1 << STEP_VERITY;
1055
1056 if (post_read_steps) {
1057 /* Due to the mempool, this never fails. */
1058 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
1059 ctx->bio = bio;
1060 ctx->sbi = sbi;
1061 ctx->enabled_steps = post_read_steps;
1062 bio->bi_private = ctx;
1063 }
1064
1065 return bio;
1066 }
1067
f2fs_release_read_bio(struct bio * bio)1068 static void f2fs_release_read_bio(struct bio *bio)
1069 {
1070 if (bio->bi_private)
1071 mempool_free(bio->bi_private, bio_post_read_ctx_pool);
1072 bio_put(bio);
1073 }
1074
1075 /* This can handle encryption stuffs */
f2fs_submit_page_read(struct inode * inode,struct page * page,block_t blkaddr,bool for_write)1076 static int f2fs_submit_page_read(struct inode *inode, struct page *page,
1077 block_t blkaddr, bool for_write)
1078 {
1079 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1080 struct bio *bio;
1081
1082 bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0, page->index, for_write);
1083 if (IS_ERR(bio))
1084 return PTR_ERR(bio);
1085
1086 /* wait for GCed page writeback via META_MAPPING */
1087 f2fs_wait_on_block_writeback(inode, blkaddr);
1088
1089 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
1090 bio_put(bio);
1091 return -EFAULT;
1092 }
1093 ClearPageError(page);
1094 inc_page_count(sbi, F2FS_RD_DATA);
1095 f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
1096 __submit_bio(sbi, bio, DATA);
1097 return 0;
1098 }
1099
__set_data_blkaddr(struct dnode_of_data * dn)1100 static void __set_data_blkaddr(struct dnode_of_data *dn)
1101 {
1102 struct f2fs_node *rn = F2FS_NODE(dn->node_page);
1103 __le32 *addr_array;
1104 int base = 0;
1105
1106 if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
1107 base = get_extra_isize(dn->inode);
1108
1109 /* Get physical address of data block */
1110 addr_array = blkaddr_in_node(rn);
1111 addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1112 }
1113
1114 /*
1115 * Lock ordering for the change of data block address:
1116 * ->data_page
1117 * ->node_page
1118 * update block addresses in the node page
1119 */
f2fs_set_data_blkaddr(struct dnode_of_data * dn)1120 void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
1121 {
1122 f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1123 __set_data_blkaddr(dn);
1124 if (set_page_dirty(dn->node_page))
1125 dn->node_changed = true;
1126 }
1127
f2fs_update_data_blkaddr(struct dnode_of_data * dn,block_t blkaddr)1128 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1129 {
1130 dn->data_blkaddr = blkaddr;
1131 f2fs_set_data_blkaddr(dn);
1132 f2fs_update_extent_cache(dn);
1133 }
1134
1135 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
f2fs_reserve_new_blocks(struct dnode_of_data * dn,blkcnt_t count)1136 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1137 {
1138 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1139 int err;
1140
1141 if (!count)
1142 return 0;
1143
1144 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1145 return -EPERM;
1146 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1147 return err;
1148
1149 trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
1150 dn->ofs_in_node, count);
1151
1152 f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1153
1154 for (; count > 0; dn->ofs_in_node++) {
1155 block_t blkaddr = f2fs_data_blkaddr(dn);
1156 if (blkaddr == NULL_ADDR) {
1157 dn->data_blkaddr = NEW_ADDR;
1158 __set_data_blkaddr(dn);
1159 count--;
1160 }
1161 }
1162
1163 if (set_page_dirty(dn->node_page))
1164 dn->node_changed = true;
1165 return 0;
1166 }
1167
1168 /* Should keep dn->ofs_in_node unchanged */
f2fs_reserve_new_block(struct dnode_of_data * dn)1169 int f2fs_reserve_new_block(struct dnode_of_data *dn)
1170 {
1171 unsigned int ofs_in_node = dn->ofs_in_node;
1172 int ret;
1173
1174 ret = f2fs_reserve_new_blocks(dn, 1);
1175 dn->ofs_in_node = ofs_in_node;
1176 return ret;
1177 }
1178
f2fs_reserve_block(struct dnode_of_data * dn,pgoff_t index)1179 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1180 {
1181 bool need_put = dn->inode_page ? false : true;
1182 int err;
1183
1184 err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
1185 if (err)
1186 return err;
1187
1188 if (dn->data_blkaddr == NULL_ADDR)
1189 err = f2fs_reserve_new_block(dn);
1190 if (err || need_put)
1191 f2fs_put_dnode(dn);
1192 return err;
1193 }
1194
f2fs_get_block(struct dnode_of_data * dn,pgoff_t index)1195 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
1196 {
1197 struct extent_info ei = {0,0,0};
1198 struct inode *inode = dn->inode;
1199
1200 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1201 dn->data_blkaddr = ei.blk + index - ei.fofs;
1202 return 0;
1203 }
1204
1205 return f2fs_reserve_block(dn, index);
1206 }
1207
f2fs_get_read_data_page(struct inode * inode,pgoff_t index,int op_flags,bool for_write)1208 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
1209 int op_flags, bool for_write)
1210 {
1211 struct address_space *mapping = inode->i_mapping;
1212 struct dnode_of_data dn;
1213 struct page *page;
1214 struct extent_info ei = {0,0,0};
1215 int err;
1216
1217 page = f2fs_grab_cache_page(mapping, index, for_write);
1218 if (!page)
1219 return ERR_PTR(-ENOMEM);
1220
1221 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
1222 dn.data_blkaddr = ei.blk + index - ei.fofs;
1223 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
1224 DATA_GENERIC_ENHANCE_READ)) {
1225 err = -EFSCORRUPTED;
1226 goto put_err;
1227 }
1228 goto got_it;
1229 }
1230
1231 set_new_dnode(&dn, inode, NULL, NULL, 0);
1232 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
1233 if (err)
1234 goto put_err;
1235 f2fs_put_dnode(&dn);
1236
1237 if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1238 err = -ENOENT;
1239 goto put_err;
1240 }
1241 if (dn.data_blkaddr != NEW_ADDR &&
1242 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
1243 dn.data_blkaddr,
1244 DATA_GENERIC_ENHANCE)) {
1245 err = -EFSCORRUPTED;
1246 goto put_err;
1247 }
1248 got_it:
1249 if (PageUptodate(page)) {
1250 unlock_page(page);
1251 return page;
1252 }
1253
1254 /*
1255 * A new dentry page is allocated but not able to be written, since its
1256 * new inode page couldn't be allocated due to -ENOSPC.
1257 * In such the case, its blkaddr can be remained as NEW_ADDR.
1258 * see, f2fs_add_link -> f2fs_get_new_data_page ->
1259 * f2fs_init_inode_metadata.
1260 */
1261 if (dn.data_blkaddr == NEW_ADDR) {
1262 zero_user_segment(page, 0, PAGE_SIZE);
1263 if (!PageUptodate(page))
1264 SetPageUptodate(page);
1265 unlock_page(page);
1266 return page;
1267 }
1268
1269 err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, for_write);
1270 if (err)
1271 goto put_err;
1272 return page;
1273
1274 put_err:
1275 f2fs_put_page(page, 1);
1276 return ERR_PTR(err);
1277 }
1278
f2fs_find_data_page(struct inode * inode,pgoff_t index)1279 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
1280 {
1281 struct address_space *mapping = inode->i_mapping;
1282 struct page *page;
1283
1284 page = find_get_page(mapping, index);
1285 if (page && PageUptodate(page))
1286 return page;
1287 f2fs_put_page(page, 0);
1288
1289 page = f2fs_get_read_data_page(inode, index, 0, false);
1290 if (IS_ERR(page))
1291 return page;
1292
1293 if (PageUptodate(page))
1294 return page;
1295
1296 wait_on_page_locked(page);
1297 if (unlikely(!PageUptodate(page))) {
1298 f2fs_put_page(page, 0);
1299 return ERR_PTR(-EIO);
1300 }
1301 return page;
1302 }
1303
1304 /*
1305 * If it tries to access a hole, return an error.
1306 * Because, the callers, functions in dir.c and GC, should be able to know
1307 * whether this page exists or not.
1308 */
f2fs_get_lock_data_page(struct inode * inode,pgoff_t index,bool for_write)1309 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
1310 bool for_write)
1311 {
1312 struct address_space *mapping = inode->i_mapping;
1313 struct page *page;
1314 repeat:
1315 page = f2fs_get_read_data_page(inode, index, 0, for_write);
1316 if (IS_ERR(page))
1317 return page;
1318
1319 /* wait for read completion */
1320 lock_page(page);
1321 if (unlikely(page->mapping != mapping)) {
1322 f2fs_put_page(page, 1);
1323 goto repeat;
1324 }
1325 if (unlikely(!PageUptodate(page))) {
1326 f2fs_put_page(page, 1);
1327 return ERR_PTR(-EIO);
1328 }
1329 return page;
1330 }
1331
1332 /*
1333 * Caller ensures that this data page is never allocated.
1334 * A new zero-filled data page is allocated in the page cache.
1335 *
1336 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1337 * f2fs_unlock_op().
1338 * Note that, ipage is set only by make_empty_dir, and if any error occur,
1339 * ipage should be released by this function.
1340 */
f2fs_get_new_data_page(struct inode * inode,struct page * ipage,pgoff_t index,bool new_i_size)1341 struct page *f2fs_get_new_data_page(struct inode *inode,
1342 struct page *ipage, pgoff_t index, bool new_i_size)
1343 {
1344 struct address_space *mapping = inode->i_mapping;
1345 struct page *page;
1346 struct dnode_of_data dn;
1347 int err;
1348
1349 page = f2fs_grab_cache_page(mapping, index, true);
1350 if (!page) {
1351 /*
1352 * before exiting, we should make sure ipage will be released
1353 * if any error occur.
1354 */
1355 f2fs_put_page(ipage, 1);
1356 return ERR_PTR(-ENOMEM);
1357 }
1358
1359 set_new_dnode(&dn, inode, ipage, NULL, 0);
1360 err = f2fs_reserve_block(&dn, index);
1361 if (err) {
1362 f2fs_put_page(page, 1);
1363 return ERR_PTR(err);
1364 }
1365 if (!ipage)
1366 f2fs_put_dnode(&dn);
1367
1368 if (PageUptodate(page))
1369 goto got_it;
1370
1371 if (dn.data_blkaddr == NEW_ADDR) {
1372 zero_user_segment(page, 0, PAGE_SIZE);
1373 if (!PageUptodate(page))
1374 SetPageUptodate(page);
1375 } else {
1376 f2fs_put_page(page, 1);
1377
1378 /* if ipage exists, blkaddr should be NEW_ADDR */
1379 f2fs_bug_on(F2FS_I_SB(inode), ipage);
1380 page = f2fs_get_lock_data_page(inode, index, true);
1381 if (IS_ERR(page))
1382 return page;
1383 }
1384 got_it:
1385 if (new_i_size && i_size_read(inode) <
1386 ((loff_t)(index + 1) << PAGE_SHIFT))
1387 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
1388 return page;
1389 }
1390
__allocate_data_block(struct dnode_of_data * dn,int seg_type)1391 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
1392 {
1393 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1394 struct f2fs_summary sum;
1395 struct node_info ni;
1396 block_t old_blkaddr;
1397 blkcnt_t count = 1;
1398 int err;
1399
1400 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1401 return -EPERM;
1402
1403 err = f2fs_get_node_info(sbi, dn->nid, &ni);
1404 if (err)
1405 return err;
1406
1407 dn->data_blkaddr = f2fs_data_blkaddr(dn);
1408 if (dn->data_blkaddr != NULL_ADDR)
1409 goto alloc;
1410
1411 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
1412 return err;
1413
1414 alloc:
1415 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1416 old_blkaddr = dn->data_blkaddr;
1417 f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
1418 &sum, seg_type, NULL, false);
1419 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
1420 invalidate_mapping_pages(META_MAPPING(sbi),
1421 old_blkaddr, old_blkaddr);
1422 f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
1423
1424 /*
1425 * i_size will be updated by direct_IO. Otherwise, we'll get stale
1426 * data from unwritten block via dio_read.
1427 */
1428 return 0;
1429 }
1430
f2fs_preallocate_blocks(struct kiocb * iocb,struct iov_iter * from)1431 int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
1432 {
1433 struct inode *inode = file_inode(iocb->ki_filp);
1434 struct f2fs_map_blocks map;
1435 int flag;
1436 int err = 0;
1437 bool direct_io = iocb->ki_flags & IOCB_DIRECT;
1438
1439 map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
1440 map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
1441 if (map.m_len > map.m_lblk)
1442 map.m_len -= map.m_lblk;
1443 else
1444 map.m_len = 0;
1445
1446 map.m_next_pgofs = NULL;
1447 map.m_next_extent = NULL;
1448 map.m_seg_type = NO_CHECK_TYPE;
1449 map.m_may_create = true;
1450
1451 if (direct_io) {
1452 map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
1453 flag = f2fs_force_buffered_io(inode, iocb, from) ?
1454 F2FS_GET_BLOCK_PRE_AIO :
1455 F2FS_GET_BLOCK_PRE_DIO;
1456 goto map_blocks;
1457 }
1458 if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
1459 err = f2fs_convert_inline_inode(inode);
1460 if (err)
1461 return err;
1462 }
1463 if (f2fs_has_inline_data(inode))
1464 return err;
1465
1466 flag = F2FS_GET_BLOCK_PRE_AIO;
1467
1468 map_blocks:
1469 err = f2fs_map_blocks(inode, &map, 1, flag);
1470 if (map.m_len > 0 && err == -ENOSPC) {
1471 if (!direct_io)
1472 set_inode_flag(inode, FI_NO_PREALLOC);
1473 err = 0;
1474 }
1475 return err;
1476 }
1477
__do_map_lock(struct f2fs_sb_info * sbi,int flag,bool lock)1478 void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
1479 {
1480 if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1481 if (lock)
1482 down_read(&sbi->node_change);
1483 else
1484 up_read(&sbi->node_change);
1485 } else {
1486 if (lock)
1487 f2fs_lock_op(sbi);
1488 else
1489 f2fs_unlock_op(sbi);
1490 }
1491 }
1492
1493 /*
1494 * f2fs_map_blocks() tries to find or build mapping relationship which
1495 * maps continuous logical blocks to physical blocks, and return such
1496 * info via f2fs_map_blocks structure.
1497 */
f2fs_map_blocks(struct inode * inode,struct f2fs_map_blocks * map,int create,int flag)1498 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
1499 int create, int flag)
1500 {
1501 unsigned int maxblocks = map->m_len;
1502 struct dnode_of_data dn;
1503 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1504 int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1505 pgoff_t pgofs, end_offset, end;
1506 int err = 0, ofs = 1;
1507 unsigned int ofs_in_node, last_ofs_in_node;
1508 blkcnt_t prealloc;
1509 struct extent_info ei = {0,0,0};
1510 block_t blkaddr;
1511 unsigned int start_pgofs;
1512
1513 if (!maxblocks)
1514 return 0;
1515
1516 map->m_len = 0;
1517 map->m_flags = 0;
1518
1519 /* it only supports block size == page size */
1520 pgofs = (pgoff_t)map->m_lblk;
1521 end = pgofs + maxblocks;
1522
1523 if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1524 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
1525 map->m_may_create)
1526 goto next_dnode;
1527
1528 map->m_pblk = ei.blk + pgofs - ei.fofs;
1529 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
1530 map->m_flags = F2FS_MAP_MAPPED;
1531 if (map->m_next_extent)
1532 *map->m_next_extent = pgofs + map->m_len;
1533
1534 /* for hardware encryption, but to avoid potential issue in future */
1535 if (flag == F2FS_GET_BLOCK_DIO)
1536 f2fs_wait_on_block_writeback_range(inode,
1537 map->m_pblk, map->m_len);
1538 goto out;
1539 }
1540
1541 next_dnode:
1542 if (map->m_may_create)
1543 __do_map_lock(sbi, flag, true);
1544
1545 /* When reading holes, we need its node page */
1546 set_new_dnode(&dn, inode, NULL, NULL, 0);
1547 err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1548 if (err) {
1549 if (flag == F2FS_GET_BLOCK_BMAP)
1550 map->m_pblk = 0;
1551
1552 if (err == -ENOENT) {
1553 /*
1554 * There is one exceptional case that read_node_page()
1555 * may return -ENOENT due to filesystem has been
1556 * shutdown or cp_error, so force to convert error
1557 * number to EIO for such case.
1558 */
1559 if (map->m_may_create &&
1560 (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1561 f2fs_cp_error(sbi))) {
1562 err = -EIO;
1563 goto unlock_out;
1564 }
1565
1566 err = 0;
1567 if (map->m_next_pgofs)
1568 *map->m_next_pgofs =
1569 f2fs_get_next_page_offset(&dn, pgofs);
1570 if (map->m_next_extent)
1571 *map->m_next_extent =
1572 f2fs_get_next_page_offset(&dn, pgofs);
1573 }
1574 goto unlock_out;
1575 }
1576
1577 start_pgofs = pgofs;
1578 prealloc = 0;
1579 last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1580 end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1581
1582 next_block:
1583 blkaddr = f2fs_data_blkaddr(&dn);
1584
1585 if (__is_valid_data_blkaddr(blkaddr) &&
1586 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1587 err = -EFSCORRUPTED;
1588 goto sync_out;
1589 }
1590
1591 if (__is_valid_data_blkaddr(blkaddr)) {
1592 /* use out-place-update for driect IO under LFS mode */
1593 if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
1594 map->m_may_create) {
1595 err = __allocate_data_block(&dn, map->m_seg_type);
1596 if (err)
1597 goto sync_out;
1598 blkaddr = dn.data_blkaddr;
1599 set_inode_flag(inode, FI_APPEND_WRITE);
1600 }
1601 } else {
1602 if (create) {
1603 if (unlikely(f2fs_cp_error(sbi))) {
1604 err = -EIO;
1605 goto sync_out;
1606 }
1607 if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1608 if (blkaddr == NULL_ADDR) {
1609 prealloc++;
1610 last_ofs_in_node = dn.ofs_in_node;
1611 }
1612 } else {
1613 WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
1614 flag != F2FS_GET_BLOCK_DIO);
1615 err = __allocate_data_block(&dn,
1616 map->m_seg_type);
1617 if (!err)
1618 set_inode_flag(inode, FI_APPEND_WRITE);
1619 }
1620 if (err)
1621 goto sync_out;
1622 map->m_flags |= F2FS_MAP_NEW;
1623 blkaddr = dn.data_blkaddr;
1624 } else {
1625 if (flag == F2FS_GET_BLOCK_BMAP) {
1626 map->m_pblk = 0;
1627 goto sync_out;
1628 }
1629 if (flag == F2FS_GET_BLOCK_PRECACHE)
1630 goto sync_out;
1631 if (flag == F2FS_GET_BLOCK_FIEMAP &&
1632 blkaddr == NULL_ADDR) {
1633 if (map->m_next_pgofs)
1634 *map->m_next_pgofs = pgofs + 1;
1635 goto sync_out;
1636 }
1637 if (flag != F2FS_GET_BLOCK_FIEMAP) {
1638 /* for defragment case */
1639 if (map->m_next_pgofs)
1640 *map->m_next_pgofs = pgofs + 1;
1641 goto sync_out;
1642 }
1643 }
1644 }
1645
1646 if (flag == F2FS_GET_BLOCK_PRE_AIO)
1647 goto skip;
1648
1649 if (map->m_len == 0) {
1650 /* preallocated unwritten block should be mapped for fiemap. */
1651 if (blkaddr == NEW_ADDR)
1652 map->m_flags |= F2FS_MAP_UNWRITTEN;
1653 map->m_flags |= F2FS_MAP_MAPPED;
1654
1655 map->m_pblk = blkaddr;
1656 map->m_len = 1;
1657 } else if ((map->m_pblk != NEW_ADDR &&
1658 blkaddr == (map->m_pblk + ofs)) ||
1659 (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1660 flag == F2FS_GET_BLOCK_PRE_DIO) {
1661 ofs++;
1662 map->m_len++;
1663 } else {
1664 goto sync_out;
1665 }
1666
1667 skip:
1668 dn.ofs_in_node++;
1669 pgofs++;
1670
1671 /* preallocate blocks in batch for one dnode page */
1672 if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1673 (pgofs == end || dn.ofs_in_node == end_offset)) {
1674
1675 dn.ofs_in_node = ofs_in_node;
1676 err = f2fs_reserve_new_blocks(&dn, prealloc);
1677 if (err)
1678 goto sync_out;
1679
1680 map->m_len += dn.ofs_in_node - ofs_in_node;
1681 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1682 err = -ENOSPC;
1683 goto sync_out;
1684 }
1685 dn.ofs_in_node = end_offset;
1686 }
1687
1688 if (pgofs >= end)
1689 goto sync_out;
1690 else if (dn.ofs_in_node < end_offset)
1691 goto next_block;
1692
1693 if (flag == F2FS_GET_BLOCK_PRECACHE) {
1694 if (map->m_flags & F2FS_MAP_MAPPED) {
1695 unsigned int ofs = start_pgofs - map->m_lblk;
1696
1697 f2fs_update_extent_cache_range(&dn,
1698 start_pgofs, map->m_pblk + ofs,
1699 map->m_len - ofs);
1700 }
1701 }
1702
1703 f2fs_put_dnode(&dn);
1704
1705 if (map->m_may_create) {
1706 __do_map_lock(sbi, flag, false);
1707 f2fs_balance_fs(sbi, dn.node_changed);
1708 }
1709 goto next_dnode;
1710
1711 sync_out:
1712
1713 /* for hardware encryption, but to avoid potential issue in future */
1714 if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
1715 f2fs_wait_on_block_writeback_range(inode,
1716 map->m_pblk, map->m_len);
1717
1718 if (flag == F2FS_GET_BLOCK_PRECACHE) {
1719 if (map->m_flags & F2FS_MAP_MAPPED) {
1720 unsigned int ofs = start_pgofs - map->m_lblk;
1721
1722 f2fs_update_extent_cache_range(&dn,
1723 start_pgofs, map->m_pblk + ofs,
1724 map->m_len - ofs);
1725 }
1726 if (map->m_next_extent)
1727 *map->m_next_extent = pgofs + 1;
1728 }
1729 f2fs_put_dnode(&dn);
1730 unlock_out:
1731 if (map->m_may_create) {
1732 __do_map_lock(sbi, flag, false);
1733 f2fs_balance_fs(sbi, dn.node_changed);
1734 }
1735 out:
1736 trace_f2fs_map_blocks(inode, map, err);
1737 return err;
1738 }
1739
f2fs_overwrite_io(struct inode * inode,loff_t pos,size_t len)1740 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1741 {
1742 struct f2fs_map_blocks map;
1743 block_t last_lblk;
1744 int err;
1745
1746 if (pos + len > i_size_read(inode))
1747 return false;
1748
1749 map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1750 map.m_next_pgofs = NULL;
1751 map.m_next_extent = NULL;
1752 map.m_seg_type = NO_CHECK_TYPE;
1753 map.m_may_create = false;
1754 last_lblk = F2FS_BLK_ALIGN(pos + len);
1755
1756 while (map.m_lblk < last_lblk) {
1757 map.m_len = last_lblk - map.m_lblk;
1758 err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
1759 if (err || map.m_len == 0)
1760 return false;
1761 map.m_lblk += map.m_len;
1762 }
1763 return true;
1764 }
1765
__get_data_block(struct inode * inode,sector_t iblock,struct buffer_head * bh,int create,int flag,pgoff_t * next_pgofs,int seg_type,bool may_write)1766 static int __get_data_block(struct inode *inode, sector_t iblock,
1767 struct buffer_head *bh, int create, int flag,
1768 pgoff_t *next_pgofs, int seg_type, bool may_write)
1769 {
1770 struct f2fs_map_blocks map;
1771 int err;
1772
1773 map.m_lblk = iblock;
1774 map.m_len = bh->b_size >> inode->i_blkbits;
1775 map.m_next_pgofs = next_pgofs;
1776 map.m_next_extent = NULL;
1777 map.m_seg_type = seg_type;
1778 map.m_may_create = may_write;
1779
1780 err = f2fs_map_blocks(inode, &map, create, flag);
1781 if (!err) {
1782 map_bh(bh, inode->i_sb, map.m_pblk);
1783 bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1784 bh->b_size = (u64)map.m_len << inode->i_blkbits;
1785 }
1786 return err;
1787 }
1788
get_data_block(struct inode * inode,sector_t iblock,struct buffer_head * bh_result,int create,int flag,pgoff_t * next_pgofs)1789 static int get_data_block(struct inode *inode, sector_t iblock,
1790 struct buffer_head *bh_result, int create, int flag,
1791 pgoff_t *next_pgofs)
1792 {
1793 return __get_data_block(inode, iblock, bh_result, create,
1794 flag, next_pgofs,
1795 NO_CHECK_TYPE, create);
1796 }
1797
get_data_block_dio_write(struct inode * inode,sector_t iblock,struct buffer_head * bh_result,int create)1798 static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
1799 struct buffer_head *bh_result, int create)
1800 {
1801 return __get_data_block(inode, iblock, bh_result, create,
1802 F2FS_GET_BLOCK_DIO, NULL,
1803 f2fs_rw_hint_to_seg_type(inode->i_write_hint),
1804 IS_SWAPFILE(inode) ? false : true);
1805 }
1806
get_data_block_dio(struct inode * inode,sector_t iblock,struct buffer_head * bh_result,int create)1807 static int get_data_block_dio(struct inode *inode, sector_t iblock,
1808 struct buffer_head *bh_result, int create)
1809 {
1810 return __get_data_block(inode, iblock, bh_result, create,
1811 F2FS_GET_BLOCK_DIO, NULL,
1812 f2fs_rw_hint_to_seg_type(inode->i_write_hint),
1813 false);
1814 }
1815
get_data_block_bmap(struct inode * inode,sector_t iblock,struct buffer_head * bh_result,int create)1816 static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1817 struct buffer_head *bh_result, int create)
1818 {
1819 /* Block number less than F2FS MAX BLOCKS */
1820 if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
1821 return -EFBIG;
1822
1823 return __get_data_block(inode, iblock, bh_result, create,
1824 F2FS_GET_BLOCK_BMAP, NULL,
1825 NO_CHECK_TYPE, create);
1826 }
1827
logical_to_blk(struct inode * inode,loff_t offset)1828 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
1829 {
1830 return (offset >> inode->i_blkbits);
1831 }
1832
blk_to_logical(struct inode * inode,sector_t blk)1833 static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
1834 {
1835 return (blk << inode->i_blkbits);
1836 }
1837
f2fs_xattr_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo)1838 static int f2fs_xattr_fiemap(struct inode *inode,
1839 struct fiemap_extent_info *fieinfo)
1840 {
1841 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1842 struct page *page;
1843 struct node_info ni;
1844 __u64 phys = 0, len;
1845 __u32 flags;
1846 nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1847 int err = 0;
1848
1849 if (f2fs_has_inline_xattr(inode)) {
1850 int offset;
1851
1852 page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1853 inode->i_ino, false);
1854 if (!page)
1855 return -ENOMEM;
1856
1857 err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
1858 if (err) {
1859 f2fs_put_page(page, 1);
1860 return err;
1861 }
1862
1863 phys = (__u64)blk_to_logical(inode, ni.blk_addr);
1864 offset = offsetof(struct f2fs_inode, i_addr) +
1865 sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1866 get_inline_xattr_addrs(inode));
1867
1868 phys += offset;
1869 len = inline_xattr_size(inode);
1870
1871 f2fs_put_page(page, 1);
1872
1873 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1874
1875 if (!xnid)
1876 flags |= FIEMAP_EXTENT_LAST;
1877
1878 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1879 if (err || err == 1)
1880 return err;
1881 }
1882
1883 if (xnid) {
1884 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1885 if (!page)
1886 return -ENOMEM;
1887
1888 err = f2fs_get_node_info(sbi, xnid, &ni);
1889 if (err) {
1890 f2fs_put_page(page, 1);
1891 return err;
1892 }
1893
1894 phys = (__u64)blk_to_logical(inode, ni.blk_addr);
1895 len = inode->i_sb->s_blocksize;
1896
1897 f2fs_put_page(page, 1);
1898
1899 flags = FIEMAP_EXTENT_LAST;
1900 }
1901
1902 if (phys)
1903 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1904
1905 return (err < 0 ? err : 0);
1906 }
1907
max_inode_blocks(struct inode * inode)1908 static loff_t max_inode_blocks(struct inode *inode)
1909 {
1910 loff_t result = ADDRS_PER_INODE(inode);
1911 loff_t leaf_count = ADDRS_PER_BLOCK(inode);
1912
1913 /* two direct node blocks */
1914 result += (leaf_count * 2);
1915
1916 /* two indirect node blocks */
1917 leaf_count *= NIDS_PER_BLOCK;
1918 result += (leaf_count * 2);
1919
1920 /* one double indirect node block */
1921 leaf_count *= NIDS_PER_BLOCK;
1922 result += leaf_count;
1923
1924 return result;
1925 }
1926
f2fs_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,u64 start,u64 len)1927 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1928 u64 start, u64 len)
1929 {
1930 struct buffer_head map_bh;
1931 sector_t start_blk, last_blk;
1932 pgoff_t next_pgofs;
1933 u64 logical = 0, phys = 0, size = 0;
1934 u32 flags = 0;
1935 int ret = 0;
1936 bool compr_cluster = false;
1937 unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1938
1939 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1940 ret = f2fs_precache_extents(inode);
1941 if (ret)
1942 return ret;
1943 }
1944
1945 ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1946 if (ret)
1947 return ret;
1948
1949 inode_lock(inode);
1950
1951 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1952 ret = f2fs_xattr_fiemap(inode, fieinfo);
1953 goto out;
1954 }
1955
1956 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
1957 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1958 if (ret != -EAGAIN)
1959 goto out;
1960 }
1961
1962 if (logical_to_blk(inode, len) == 0)
1963 len = blk_to_logical(inode, 1);
1964
1965 start_blk = logical_to_blk(inode, start);
1966 last_blk = logical_to_blk(inode, start + len - 1);
1967
1968 next:
1969 memset(&map_bh, 0, sizeof(struct buffer_head));
1970 map_bh.b_size = len;
1971
1972 if (compr_cluster)
1973 map_bh.b_size = blk_to_logical(inode, cluster_size - 1);
1974
1975 ret = get_data_block(inode, start_blk, &map_bh, 0,
1976 F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
1977 if (ret)
1978 goto out;
1979
1980 /* HOLE */
1981 if (!buffer_mapped(&map_bh)) {
1982 start_blk = next_pgofs;
1983
1984 if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
1985 max_inode_blocks(inode)))
1986 goto prep_next;
1987
1988 flags |= FIEMAP_EXTENT_LAST;
1989 }
1990
1991 if (size) {
1992 if (IS_ENCRYPTED(inode))
1993 flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
1994
1995 ret = fiemap_fill_next_extent(fieinfo, logical,
1996 phys, size, flags);
1997 if (ret)
1998 goto out;
1999 size = 0;
2000 }
2001
2002 if (start_blk > last_blk)
2003 goto out;
2004
2005 if (compr_cluster) {
2006 compr_cluster = false;
2007
2008
2009 logical = blk_to_logical(inode, start_blk - 1);
2010 phys = blk_to_logical(inode, map_bh.b_blocknr);
2011 size = blk_to_logical(inode, cluster_size);
2012
2013 flags |= FIEMAP_EXTENT_ENCODED;
2014
2015 start_blk += cluster_size - 1;
2016
2017 if (start_blk > last_blk)
2018 goto out;
2019
2020 goto prep_next;
2021 }
2022
2023 if (map_bh.b_blocknr == COMPRESS_ADDR) {
2024 compr_cluster = true;
2025 start_blk++;
2026 goto prep_next;
2027 }
2028
2029 logical = blk_to_logical(inode, start_blk);
2030 phys = blk_to_logical(inode, map_bh.b_blocknr);
2031 size = map_bh.b_size;
2032 flags = 0;
2033 if (buffer_unwritten(&map_bh))
2034 flags = FIEMAP_EXTENT_UNWRITTEN;
2035
2036 start_blk += logical_to_blk(inode, size);
2037
2038 prep_next:
2039 cond_resched();
2040 if (fatal_signal_pending(current))
2041 ret = -EINTR;
2042 else
2043 goto next;
2044 out:
2045 if (ret == 1)
2046 ret = 0;
2047
2048 inode_unlock(inode);
2049 return ret;
2050 }
2051
f2fs_readpage_limit(struct inode * inode)2052 static inline loff_t f2fs_readpage_limit(struct inode *inode)
2053 {
2054 if (IS_ENABLED(CONFIG_FS_VERITY) &&
2055 (IS_VERITY(inode) || f2fs_verity_in_progress(inode)))
2056 return inode->i_sb->s_maxbytes;
2057
2058 return i_size_read(inode);
2059 }
2060
f2fs_read_single_page(struct inode * inode,struct page * page,unsigned nr_pages,struct f2fs_map_blocks * map,struct bio ** bio_ret,sector_t * last_block_in_bio,bool is_readahead)2061 static int f2fs_read_single_page(struct inode *inode, struct page *page,
2062 unsigned nr_pages,
2063 struct f2fs_map_blocks *map,
2064 struct bio **bio_ret,
2065 sector_t *last_block_in_bio,
2066 bool is_readahead)
2067 {
2068 struct bio *bio = *bio_ret;
2069 const unsigned blkbits = inode->i_blkbits;
2070 const unsigned blocksize = 1 << blkbits;
2071 sector_t block_in_file;
2072 sector_t last_block;
2073 sector_t last_block_in_file;
2074 sector_t block_nr;
2075 int ret = 0;
2076
2077 block_in_file = (sector_t)page_index(page);
2078 last_block = block_in_file + nr_pages;
2079 last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >>
2080 blkbits;
2081 if (last_block > last_block_in_file)
2082 last_block = last_block_in_file;
2083
2084 /* just zeroing out page which is beyond EOF */
2085 if (block_in_file >= last_block)
2086 goto zero_out;
2087 /*
2088 * Map blocks using the previous result first.
2089 */
2090 if ((map->m_flags & F2FS_MAP_MAPPED) &&
2091 block_in_file > map->m_lblk &&
2092 block_in_file < (map->m_lblk + map->m_len))
2093 goto got_it;
2094
2095 /*
2096 * Then do more f2fs_map_blocks() calls until we are
2097 * done with this page.
2098 */
2099 map->m_lblk = block_in_file;
2100 map->m_len = last_block - block_in_file;
2101
2102 ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
2103 if (ret)
2104 goto out;
2105 got_it:
2106 if ((map->m_flags & F2FS_MAP_MAPPED)) {
2107 block_nr = map->m_pblk + block_in_file - map->m_lblk;
2108 SetPageMappedToDisk(page);
2109
2110 if (!PageUptodate(page) && (!PageSwapCache(page) &&
2111 !cleancache_get_page(page))) {
2112 SetPageUptodate(page);
2113 goto confused;
2114 }
2115
2116 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2117 DATA_GENERIC_ENHANCE_READ)) {
2118 ret = -EFSCORRUPTED;
2119 goto out;
2120 }
2121 } else {
2122 zero_out:
2123 zero_user_segment(page, 0, PAGE_SIZE);
2124 if (f2fs_need_verity(inode, page->index) &&
2125 !fsverity_verify_page(page)) {
2126 ret = -EIO;
2127 goto out;
2128 }
2129 if (!PageUptodate(page))
2130 SetPageUptodate(page);
2131 unlock_page(page);
2132 goto out;
2133 }
2134
2135 /*
2136 * This page will go to BIO. Do we need to send this
2137 * BIO off first?
2138 */
2139 if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2140 *last_block_in_bio, block_nr) ||
2141 !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2142 submit_and_realloc:
2143 __submit_bio(F2FS_I_SB(inode), bio, DATA);
2144 bio = NULL;
2145 }
2146 if (bio == NULL) {
2147 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
2148 is_readahead ? REQ_RAHEAD : 0, page->index,
2149 false);
2150 if (IS_ERR(bio)) {
2151 ret = PTR_ERR(bio);
2152 bio = NULL;
2153 goto out;
2154 }
2155 }
2156
2157 /*
2158 * If the page is under writeback, we need to wait for
2159 * its completion to see the correct decrypted data.
2160 */
2161 f2fs_wait_on_block_writeback(inode, block_nr);
2162
2163 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2164 goto submit_and_realloc;
2165
2166 inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2167 f2fs_update_iostat(F2FS_I_SB(inode), FS_DATA_READ_IO, F2FS_BLKSIZE);
2168 ClearPageError(page);
2169 *last_block_in_bio = block_nr;
2170 goto out;
2171 confused:
2172 if (bio) {
2173 __submit_bio(F2FS_I_SB(inode), bio, DATA);
2174 bio = NULL;
2175 }
2176 unlock_page(page);
2177 out:
2178 *bio_ret = bio;
2179 return ret;
2180 }
2181
2182 #ifdef CONFIG_F2FS_FS_COMPRESSION
f2fs_read_multi_pages(struct compress_ctx * cc,struct bio ** bio_ret,unsigned nr_pages,sector_t * last_block_in_bio,bool is_readahead,bool for_write)2183 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
2184 unsigned nr_pages, sector_t *last_block_in_bio,
2185 bool is_readahead, bool for_write)
2186 {
2187 struct dnode_of_data dn;
2188 struct inode *inode = cc->inode;
2189 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2190 struct bio *bio = *bio_ret;
2191 unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2192 sector_t last_block_in_file;
2193 const unsigned blkbits = inode->i_blkbits;
2194 const unsigned blocksize = 1 << blkbits;
2195 struct decompress_io_ctx *dic = NULL;
2196 int i;
2197 int ret = 0;
2198
2199 f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2200
2201 last_block_in_file = (f2fs_readpage_limit(inode) +
2202 blocksize - 1) >> blkbits;
2203
2204 /* get rid of pages beyond EOF */
2205 for (i = 0; i < cc->cluster_size; i++) {
2206 struct page *page = cc->rpages[i];
2207
2208 if (!page)
2209 continue;
2210 if ((sector_t)page->index >= last_block_in_file) {
2211 zero_user_segment(page, 0, PAGE_SIZE);
2212 if (!PageUptodate(page))
2213 SetPageUptodate(page);
2214 } else if (!PageUptodate(page)) {
2215 continue;
2216 }
2217 unlock_page(page);
2218 cc->rpages[i] = NULL;
2219 cc->nr_rpages--;
2220 }
2221
2222 /* we are done since all pages are beyond EOF */
2223 if (f2fs_cluster_is_empty(cc))
2224 goto out;
2225
2226 set_new_dnode(&dn, inode, NULL, NULL, 0);
2227 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
2228 if (ret)
2229 goto out;
2230
2231 /* cluster was overwritten as normal cluster */
2232 if (dn.data_blkaddr != COMPRESS_ADDR)
2233 goto out;
2234
2235 for (i = 1; i < cc->cluster_size; i++) {
2236 block_t blkaddr;
2237
2238 blkaddr = data_blkaddr(dn.inode, dn.node_page,
2239 dn.ofs_in_node + i);
2240
2241 if (!__is_valid_data_blkaddr(blkaddr))
2242 break;
2243
2244 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
2245 ret = -EFAULT;
2246 goto out_put_dnode;
2247 }
2248 cc->nr_cpages++;
2249 }
2250
2251 /* nothing to decompress */
2252 if (cc->nr_cpages == 0) {
2253 ret = 0;
2254 goto out_put_dnode;
2255 }
2256
2257 dic = f2fs_alloc_dic(cc);
2258 if (IS_ERR(dic)) {
2259 ret = PTR_ERR(dic);
2260 goto out_put_dnode;
2261 }
2262
2263 for (i = 0; i < dic->nr_cpages; i++) {
2264 struct page *page = dic->cpages[i];
2265 block_t blkaddr;
2266 struct bio_post_read_ctx *ctx;
2267
2268 blkaddr = data_blkaddr(dn.inode, dn.node_page,
2269 dn.ofs_in_node + i + 1);
2270
2271 if (bio && (!page_is_mergeable(sbi, bio,
2272 *last_block_in_bio, blkaddr) ||
2273 !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2274 submit_and_realloc:
2275 __submit_bio(sbi, bio, DATA);
2276 bio = NULL;
2277 }
2278
2279 if (!bio) {
2280 bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
2281 is_readahead ? REQ_RAHEAD : 0,
2282 page->index, for_write);
2283 if (IS_ERR(bio)) {
2284 ret = PTR_ERR(bio);
2285 dic->failed = true;
2286 if (refcount_sub_and_test(dic->nr_cpages - i,
2287 &dic->ref)) {
2288 f2fs_decompress_end_io(dic->rpages,
2289 cc->cluster_size, true,
2290 false);
2291 f2fs_free_dic(dic);
2292 }
2293 f2fs_put_dnode(&dn);
2294 *bio_ret = NULL;
2295 return ret;
2296 }
2297 }
2298
2299 f2fs_wait_on_block_writeback(inode, blkaddr);
2300
2301 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2302 goto submit_and_realloc;
2303
2304 /* tag STEP_DECOMPRESS to handle IO in wq */
2305 ctx = bio->bi_private;
2306 if (!(ctx->enabled_steps & (1 << STEP_DECOMPRESS)))
2307 ctx->enabled_steps |= 1 << STEP_DECOMPRESS;
2308
2309 inc_page_count(sbi, F2FS_RD_DATA);
2310 f2fs_update_iostat(sbi, FS_DATA_READ_IO, F2FS_BLKSIZE);
2311 f2fs_update_iostat(sbi, FS_CDATA_READ_IO, F2FS_BLKSIZE);
2312 ClearPageError(page);
2313 *last_block_in_bio = blkaddr;
2314 }
2315
2316 f2fs_put_dnode(&dn);
2317
2318 *bio_ret = bio;
2319 return 0;
2320
2321 out_put_dnode:
2322 f2fs_put_dnode(&dn);
2323 out:
2324 f2fs_decompress_end_io(cc->rpages, cc->cluster_size, true, false);
2325 *bio_ret = bio;
2326 return ret;
2327 }
2328 #endif
2329
2330 /*
2331 * This function was originally taken from fs/mpage.c, and customized for f2fs.
2332 * Major change was from block_size == page_size in f2fs by default.
2333 *
2334 * Note that the aops->readpages() function is ONLY used for read-ahead. If
2335 * this function ever deviates from doing just read-ahead, it should either
2336 * use ->readpage() or do the necessary surgery to decouple ->readpages()
2337 * from read-ahead.
2338 */
f2fs_mpage_readpages(struct address_space * mapping,struct list_head * pages,struct page * page,unsigned nr_pages,bool is_readahead)2339 int f2fs_mpage_readpages(struct address_space *mapping,
2340 struct list_head *pages, struct page *page,
2341 unsigned nr_pages, bool is_readahead)
2342 {
2343 struct bio *bio = NULL;
2344 sector_t last_block_in_bio = 0;
2345 struct inode *inode = mapping->host;
2346 struct f2fs_map_blocks map;
2347 #ifdef CONFIG_F2FS_FS_COMPRESSION
2348 struct compress_ctx cc = {
2349 .inode = inode,
2350 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2351 .cluster_size = F2FS_I(inode)->i_cluster_size,
2352 .cluster_idx = NULL_CLUSTER,
2353 .rpages = NULL,
2354 .cpages = NULL,
2355 .nr_rpages = 0,
2356 .nr_cpages = 0,
2357 };
2358 #endif
2359 unsigned max_nr_pages = nr_pages;
2360 int ret = 0;
2361
2362 /* this is real from f2fs_merkle_tree_readahead() in old kernel only. */
2363 if (!nr_pages)
2364 return 0;
2365
2366 map.m_pblk = 0;
2367 map.m_lblk = 0;
2368 map.m_len = 0;
2369 map.m_flags = 0;
2370 map.m_next_pgofs = NULL;
2371 map.m_next_extent = NULL;
2372 map.m_seg_type = NO_CHECK_TYPE;
2373 map.m_may_create = false;
2374
2375 for (; nr_pages; nr_pages--) {
2376 if (pages) {
2377 page = list_last_entry(pages, struct page, lru);
2378
2379 prefetchw(&page->flags);
2380 list_del(&page->lru);
2381 if (add_to_page_cache_lru(page, mapping,
2382 page_index(page),
2383 readahead_gfp_mask(mapping)))
2384 goto next_page;
2385 }
2386
2387 #ifdef CONFIG_F2FS_FS_COMPRESSION
2388 if (f2fs_compressed_file(inode)) {
2389 /* there are remained comressed pages, submit them */
2390 if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
2391 ret = f2fs_read_multi_pages(&cc, &bio,
2392 max_nr_pages,
2393 &last_block_in_bio,
2394 is_readahead, false);
2395 f2fs_destroy_compress_ctx(&cc);
2396 if (ret)
2397 goto set_error_page;
2398 }
2399 ret = f2fs_is_compressed_cluster(inode, page->index);
2400 if (ret < 0)
2401 goto set_error_page;
2402 else if (!ret)
2403 goto read_single_page;
2404
2405 ret = f2fs_init_compress_ctx(&cc);
2406 if (ret)
2407 goto set_error_page;
2408
2409 f2fs_compress_ctx_add_page(&cc, page);
2410
2411 goto next_page;
2412 }
2413 read_single_page:
2414 #endif
2415
2416 ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
2417 &bio, &last_block_in_bio, is_readahead);
2418 if (ret) {
2419 #ifdef CONFIG_F2FS_FS_COMPRESSION
2420 set_error_page:
2421 #endif
2422 SetPageError(page);
2423 zero_user_segment(page, 0, PAGE_SIZE);
2424 unlock_page(page);
2425 }
2426 next_page:
2427 if (pages)
2428 put_page(page);
2429
2430 #ifdef CONFIG_F2FS_FS_COMPRESSION
2431 if (f2fs_compressed_file(inode)) {
2432 /* last page */
2433 if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
2434 ret = f2fs_read_multi_pages(&cc, &bio,
2435 max_nr_pages,
2436 &last_block_in_bio,
2437 is_readahead, false);
2438 f2fs_destroy_compress_ctx(&cc);
2439 }
2440 }
2441 #endif
2442 }
2443 BUG_ON(pages && !list_empty(pages));
2444 if (bio)
2445 __submit_bio(F2FS_I_SB(inode), bio, DATA);
2446 return pages ? 0 : ret;
2447 }
2448
f2fs_read_data_page(struct file * file,struct page * page)2449 static int f2fs_read_data_page(struct file *file, struct page *page)
2450 {
2451 struct inode *inode = page_file_mapping(page)->host;
2452 int ret = -EAGAIN;
2453
2454 trace_f2fs_readpage(page, DATA);
2455
2456 if (!f2fs_is_compress_backend_ready(inode)) {
2457 unlock_page(page);
2458 return -EOPNOTSUPP;
2459 }
2460
2461 /* If the file has inline data, try to read it directly */
2462 if (f2fs_has_inline_data(inode))
2463 ret = f2fs_read_inline_data(inode, page);
2464 if (ret == -EAGAIN)
2465 ret = f2fs_mpage_readpages(page_file_mapping(page),
2466 NULL, page, 1, false);
2467 return ret;
2468 }
2469
f2fs_read_data_pages(struct file * file,struct address_space * mapping,struct list_head * pages,unsigned nr_pages)2470 static int f2fs_read_data_pages(struct file *file,
2471 struct address_space *mapping,
2472 struct list_head *pages, unsigned nr_pages)
2473 {
2474 struct inode *inode = mapping->host;
2475 struct page *page = list_last_entry(pages, struct page, lru);
2476
2477 trace_f2fs_readpages(inode, page, nr_pages);
2478
2479 if (!f2fs_is_compress_backend_ready(inode))
2480 return 0;
2481
2482 /* If the file has inline data, skip readpages */
2483 if (f2fs_has_inline_data(inode))
2484 return 0;
2485
2486 return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
2487 }
2488
f2fs_encrypt_one_page(struct f2fs_io_info * fio)2489 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2490 {
2491 struct inode *inode = fio->page->mapping->host;
2492 struct page *mpage, *page;
2493 gfp_t gfp_flags = GFP_NOFS;
2494
2495 if (!f2fs_encrypted_file(inode))
2496 return 0;
2497
2498 page = fio->compressed_page ? fio->compressed_page : fio->page;
2499
2500 /* wait for GCed page writeback via META_MAPPING */
2501 f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
2502
2503 if (fscrypt_inode_uses_inline_crypto(inode))
2504 return 0;
2505
2506 retry_encrypt:
2507 fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
2508 PAGE_SIZE, 0, gfp_flags);
2509 if (IS_ERR(fio->encrypted_page)) {
2510 /* flush pending IOs and wait for a while in the ENOMEM case */
2511 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
2512 f2fs_flush_merged_writes(fio->sbi);
2513 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT);
2514 gfp_flags |= __GFP_NOFAIL;
2515 goto retry_encrypt;
2516 }
2517 return PTR_ERR(fio->encrypted_page);
2518 }
2519
2520 mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
2521 if (mpage) {
2522 if (PageUptodate(mpage))
2523 memcpy(page_address(mpage),
2524 page_address(fio->encrypted_page), PAGE_SIZE);
2525 f2fs_put_page(mpage, 1);
2526 }
2527 return 0;
2528 }
2529
check_inplace_update_policy(struct inode * inode,struct f2fs_io_info * fio)2530 static inline bool check_inplace_update_policy(struct inode *inode,
2531 struct f2fs_io_info *fio)
2532 {
2533 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2534 unsigned int policy = SM_I(sbi)->ipu_policy;
2535
2536 if (policy & (0x1 << F2FS_IPU_FORCE))
2537 return true;
2538 if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
2539 return true;
2540 if (policy & (0x1 << F2FS_IPU_UTIL) &&
2541 utilization(sbi) > SM_I(sbi)->min_ipu_util)
2542 return true;
2543 if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
2544 utilization(sbi) > SM_I(sbi)->min_ipu_util)
2545 return true;
2546
2547 /*
2548 * IPU for rewrite async pages
2549 */
2550 if (policy & (0x1 << F2FS_IPU_ASYNC) &&
2551 fio && fio->op == REQ_OP_WRITE &&
2552 !(fio->op_flags & REQ_SYNC) &&
2553 !IS_ENCRYPTED(inode))
2554 return true;
2555
2556 /* this is only set during fdatasync */
2557 if (policy & (0x1 << F2FS_IPU_FSYNC) &&
2558 is_inode_flag_set(inode, FI_NEED_IPU))
2559 return true;
2560
2561 if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2562 !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2563 return true;
2564
2565 return false;
2566 }
2567
f2fs_should_update_inplace(struct inode * inode,struct f2fs_io_info * fio)2568 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
2569 {
2570 if (f2fs_is_pinned_file(inode))
2571 return true;
2572
2573 /* if this is cold file, we should overwrite to avoid fragmentation */
2574 if (file_is_cold(inode))
2575 return true;
2576
2577 return check_inplace_update_policy(inode, fio);
2578 }
2579
f2fs_should_update_outplace(struct inode * inode,struct f2fs_io_info * fio)2580 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
2581 {
2582 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2583
2584 /* The below cases were checked when setting it. */
2585 if (f2fs_is_pinned_file(inode))
2586 return false;
2587 if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
2588 return true;
2589 if (f2fs_lfs_mode(sbi))
2590 return true;
2591 if (S_ISDIR(inode->i_mode))
2592 return true;
2593 if (IS_NOQUOTA(inode))
2594 return true;
2595 if (f2fs_is_atomic_file(inode))
2596 return true;
2597 if (fio) {
2598 if (is_cold_data(fio->page))
2599 return true;
2600 if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
2601 return true;
2602 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2603 f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2604 return true;
2605 }
2606 return false;
2607 }
2608
need_inplace_update(struct f2fs_io_info * fio)2609 static inline bool need_inplace_update(struct f2fs_io_info *fio)
2610 {
2611 struct inode *inode = fio->page->mapping->host;
2612
2613 if (f2fs_should_update_outplace(inode, fio))
2614 return false;
2615
2616 return f2fs_should_update_inplace(inode, fio);
2617 }
2618
f2fs_do_write_data_page(struct f2fs_io_info * fio)2619 int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2620 {
2621 struct page *page = fio->page;
2622 struct inode *inode = page->mapping->host;
2623 struct dnode_of_data dn;
2624 struct extent_info ei = {0,0,0};
2625 struct node_info ni;
2626 bool ipu_force = false;
2627 int err = 0;
2628
2629 set_new_dnode(&dn, inode, NULL, NULL, 0);
2630 if (need_inplace_update(fio) &&
2631 f2fs_lookup_extent_cache(inode, page->index, &ei)) {
2632 fio->old_blkaddr = ei.blk + page->index - ei.fofs;
2633
2634 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2635 DATA_GENERIC_ENHANCE))
2636 return -EFSCORRUPTED;
2637
2638 ipu_force = true;
2639 fio->need_lock = LOCK_DONE;
2640 goto got_it;
2641 }
2642
2643 /* Deadlock due to between page->lock and f2fs_lock_op */
2644 if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
2645 return -EAGAIN;
2646
2647 err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
2648 if (err)
2649 goto out;
2650
2651 fio->old_blkaddr = dn.data_blkaddr;
2652
2653 /* This page is already truncated */
2654 if (fio->old_blkaddr == NULL_ADDR) {
2655 ClearPageUptodate(page);
2656 clear_cold_data(page);
2657 goto out_writepage;
2658 }
2659 got_it:
2660 if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2661 !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2662 DATA_GENERIC_ENHANCE)) {
2663 err = -EFSCORRUPTED;
2664 goto out_writepage;
2665 }
2666 /*
2667 * If current allocation needs SSR,
2668 * it had better in-place writes for updated data.
2669 */
2670 if (ipu_force ||
2671 (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2672 need_inplace_update(fio))) {
2673 err = f2fs_encrypt_one_page(fio);
2674 if (err)
2675 goto out_writepage;
2676
2677 set_page_writeback(page);
2678 ClearPageError(page);
2679 f2fs_put_dnode(&dn);
2680 if (fio->need_lock == LOCK_REQ)
2681 f2fs_unlock_op(fio->sbi);
2682 err = f2fs_inplace_write_data(fio);
2683 if (err) {
2684 if (fscrypt_inode_uses_fs_layer_crypto(inode))
2685 fscrypt_finalize_bounce_page(&fio->encrypted_page);
2686 if (PageWriteback(page))
2687 end_page_writeback(page);
2688 } else {
2689 set_inode_flag(inode, FI_UPDATE_WRITE);
2690 }
2691 trace_f2fs_do_write_data_page(fio->page, IPU);
2692 return err;
2693 }
2694
2695 if (fio->need_lock == LOCK_RETRY) {
2696 if (!f2fs_trylock_op(fio->sbi)) {
2697 err = -EAGAIN;
2698 goto out_writepage;
2699 }
2700 fio->need_lock = LOCK_REQ;
2701 }
2702
2703 err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
2704 if (err)
2705 goto out_writepage;
2706
2707 fio->version = ni.version;
2708
2709 err = f2fs_encrypt_one_page(fio);
2710 if (err)
2711 goto out_writepage;
2712
2713 set_page_writeback(page);
2714 ClearPageError(page);
2715
2716 if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
2717 f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
2718
2719 /* LFS mode write path */
2720 f2fs_outplace_write_data(&dn, fio);
2721 trace_f2fs_do_write_data_page(page, OPU);
2722 set_inode_flag(inode, FI_APPEND_WRITE);
2723 if (page->index == 0)
2724 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
2725 out_writepage:
2726 f2fs_put_dnode(&dn);
2727 out:
2728 if (fio->need_lock == LOCK_REQ)
2729 f2fs_unlock_op(fio->sbi);
2730 return err;
2731 }
2732
f2fs_write_single_data_page(struct page * page,int * submitted,struct bio ** bio,sector_t * last_block,struct writeback_control * wbc,enum iostat_type io_type,int compr_blocks)2733 int f2fs_write_single_data_page(struct page *page, int *submitted,
2734 struct bio **bio,
2735 sector_t *last_block,
2736 struct writeback_control *wbc,
2737 enum iostat_type io_type,
2738 int compr_blocks)
2739 {
2740 struct inode *inode = page->mapping->host;
2741 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2742 loff_t i_size = i_size_read(inode);
2743 const pgoff_t end_index = ((unsigned long long)i_size)
2744 >> PAGE_SHIFT;
2745 loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
2746 unsigned offset = 0;
2747 bool need_balance_fs = false;
2748 int err = 0;
2749 struct f2fs_io_info fio = {
2750 .sbi = sbi,
2751 .ino = inode->i_ino,
2752 .type = DATA,
2753 .op = REQ_OP_WRITE,
2754 .op_flags = wbc_to_write_flags(wbc),
2755 .old_blkaddr = NULL_ADDR,
2756 .page = page,
2757 .encrypted_page = NULL,
2758 .submitted = false,
2759 .compr_blocks = compr_blocks,
2760 .need_lock = LOCK_RETRY,
2761 .io_type = io_type,
2762 .io_wbc = wbc,
2763 .bio = bio,
2764 .last_block = last_block,
2765 };
2766
2767 trace_f2fs_writepage(page, DATA);
2768
2769 /* we should bypass data pages to proceed the kworkder jobs */
2770 if (unlikely(f2fs_cp_error(sbi))) {
2771 mapping_set_error(page->mapping, -EIO);
2772 /*
2773 * don't drop any dirty dentry pages for keeping lastest
2774 * directory structure.
2775 */
2776 if (S_ISDIR(inode->i_mode) &&
2777 !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
2778 goto redirty_out;
2779 goto out;
2780 }
2781
2782 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2783 goto redirty_out;
2784
2785 if (page->index < end_index ||
2786 f2fs_verity_in_progress(inode) ||
2787 compr_blocks)
2788 goto write;
2789
2790 /*
2791 * If the offset is out-of-range of file size,
2792 * this page does not have to be written to disk.
2793 */
2794 offset = i_size & (PAGE_SIZE - 1);
2795 if ((page->index >= end_index + 1) || !offset)
2796 goto out;
2797
2798 zero_user_segment(page, offset, PAGE_SIZE);
2799 write:
2800 if (f2fs_is_drop_cache(inode))
2801 goto out;
2802 /* we should not write 0'th page having journal header */
2803 if (f2fs_is_volatile_file(inode) && (!page->index ||
2804 (!wbc->for_reclaim &&
2805 f2fs_available_free_memory(sbi, BASE_CHECK))))
2806 goto redirty_out;
2807
2808 /* Dentry/quota blocks are controlled by checkpoint */
2809 if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) {
2810 fio.need_lock = LOCK_DONE;
2811 err = f2fs_do_write_data_page(&fio);
2812 goto done;
2813 }
2814
2815 if (!wbc->for_reclaim)
2816 need_balance_fs = true;
2817 else if (has_not_enough_free_secs(sbi, 0, 0))
2818 goto redirty_out;
2819 else
2820 set_inode_flag(inode, FI_HOT_DATA);
2821
2822 err = -EAGAIN;
2823 if (f2fs_has_inline_data(inode)) {
2824 err = f2fs_write_inline_data(inode, page);
2825 if (!err)
2826 goto out;
2827 }
2828
2829 if (err == -EAGAIN) {
2830 err = f2fs_do_write_data_page(&fio);
2831 if (err == -EAGAIN) {
2832 fio.need_lock = LOCK_REQ;
2833 err = f2fs_do_write_data_page(&fio);
2834 }
2835 }
2836
2837 if (err) {
2838 file_set_keep_isize(inode);
2839 } else {
2840 spin_lock(&F2FS_I(inode)->i_size_lock);
2841 if (F2FS_I(inode)->last_disk_size < psize)
2842 F2FS_I(inode)->last_disk_size = psize;
2843 spin_unlock(&F2FS_I(inode)->i_size_lock);
2844 }
2845
2846 done:
2847 if (err && err != -ENOENT)
2848 goto redirty_out;
2849
2850 out:
2851 inode_dec_dirty_pages(inode);
2852 if (err) {
2853 ClearPageUptodate(page);
2854 clear_cold_data(page);
2855 }
2856
2857 if (wbc->for_reclaim) {
2858 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2859 clear_inode_flag(inode, FI_HOT_DATA);
2860 f2fs_remove_dirty_inode(inode);
2861 submitted = NULL;
2862 }
2863 unlock_page(page);
2864 if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2865 !F2FS_I(inode)->cp_task)
2866 f2fs_balance_fs(sbi, need_balance_fs);
2867
2868 if (unlikely(f2fs_cp_error(sbi))) {
2869 f2fs_submit_merged_write(sbi, DATA);
2870 f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2871 submitted = NULL;
2872 }
2873
2874 if (submitted)
2875 *submitted = fio.submitted ? 1 : 0;
2876
2877 return 0;
2878
2879 redirty_out:
2880 redirty_page_for_writepage(wbc, page);
2881 /*
2882 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
2883 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2884 * file_write_and_wait_range() will see EIO error, which is critical
2885 * to return value of fsync() followed by atomic_write failure to user.
2886 */
2887 if (!err || wbc->for_reclaim)
2888 return AOP_WRITEPAGE_ACTIVATE;
2889 unlock_page(page);
2890 return err;
2891 }
2892
f2fs_write_data_page(struct page * page,struct writeback_control * wbc)2893 static int f2fs_write_data_page(struct page *page,
2894 struct writeback_control *wbc)
2895 {
2896 #ifdef CONFIG_F2FS_FS_COMPRESSION
2897 struct inode *inode = page->mapping->host;
2898
2899 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
2900 goto out;
2901
2902 if (f2fs_compressed_file(inode)) {
2903 if (f2fs_is_compressed_cluster(inode, page->index)) {
2904 redirty_page_for_writepage(wbc, page);
2905 return AOP_WRITEPAGE_ACTIVATE;
2906 }
2907 }
2908 out:
2909 #endif
2910
2911 return f2fs_write_single_data_page(page, NULL, NULL, NULL,
2912 wbc, FS_DATA_IO, 0);
2913 }
2914
2915 /*
2916 * This function was copied from write_cche_pages from mm/page-writeback.c.
2917 * The major change is making write step of cold data page separately from
2918 * warm/hot data page.
2919 */
f2fs_write_cache_pages(struct address_space * mapping,struct writeback_control * wbc,enum iostat_type io_type)2920 static int f2fs_write_cache_pages(struct address_space *mapping,
2921 struct writeback_control *wbc,
2922 enum iostat_type io_type)
2923 {
2924 int ret = 0;
2925 int done = 0, retry = 0;
2926 struct pagevec pvec;
2927 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
2928 struct bio *bio = NULL;
2929 sector_t last_block;
2930 #ifdef CONFIG_F2FS_FS_COMPRESSION
2931 struct inode *inode = mapping->host;
2932 struct compress_ctx cc = {
2933 .inode = inode,
2934 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2935 .cluster_size = F2FS_I(inode)->i_cluster_size,
2936 .cluster_idx = NULL_CLUSTER,
2937 .rpages = NULL,
2938 .nr_rpages = 0,
2939 .cpages = NULL,
2940 .rbuf = NULL,
2941 .cbuf = NULL,
2942 .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
2943 .private = NULL,
2944 };
2945 #endif
2946 int nr_pages;
2947 pgoff_t uninitialized_var(writeback_index);
2948 pgoff_t index;
2949 pgoff_t end; /* Inclusive */
2950 pgoff_t done_index;
2951 int range_whole = 0;
2952 xa_mark_t tag;
2953 int nwritten = 0;
2954 int submitted = 0;
2955 int i;
2956
2957 pagevec_init(&pvec);
2958
2959 if (get_dirty_pages(mapping->host) <=
2960 SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
2961 set_inode_flag(mapping->host, FI_HOT_DATA);
2962 else
2963 clear_inode_flag(mapping->host, FI_HOT_DATA);
2964
2965 if (wbc->range_cyclic) {
2966 writeback_index = mapping->writeback_index; /* prev offset */
2967 index = writeback_index;
2968 end = -1;
2969 } else {
2970 index = wbc->range_start >> PAGE_SHIFT;
2971 end = wbc->range_end >> PAGE_SHIFT;
2972 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2973 range_whole = 1;
2974 }
2975 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2976 tag = PAGECACHE_TAG_TOWRITE;
2977 else
2978 tag = PAGECACHE_TAG_DIRTY;
2979 retry:
2980 retry = 0;
2981 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
2982 tag_pages_for_writeback(mapping, index, end);
2983 done_index = index;
2984 while (!done && !retry && (index <= end)) {
2985 nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2986 tag);
2987 if (nr_pages == 0)
2988 break;
2989
2990 for (i = 0; i < nr_pages; i++) {
2991 struct page *page = pvec.pages[i];
2992 bool need_readd;
2993 readd:
2994 need_readd = false;
2995 #ifdef CONFIG_F2FS_FS_COMPRESSION
2996 if (f2fs_compressed_file(inode)) {
2997 ret = f2fs_init_compress_ctx(&cc);
2998 if (ret) {
2999 done = 1;
3000 break;
3001 }
3002
3003 if (!f2fs_cluster_can_merge_page(&cc,
3004 page->index)) {
3005 ret = f2fs_write_multi_pages(&cc,
3006 &submitted, wbc, io_type);
3007 if (!ret)
3008 need_readd = true;
3009 goto result;
3010 }
3011
3012 if (unlikely(f2fs_cp_error(sbi)))
3013 goto lock_page;
3014
3015 if (f2fs_cluster_is_empty(&cc)) {
3016 void *fsdata = NULL;
3017 struct page *pagep;
3018 int ret2;
3019
3020 ret2 = f2fs_prepare_compress_overwrite(
3021 inode, &pagep,
3022 page->index, &fsdata);
3023 if (ret2 < 0) {
3024 ret = ret2;
3025 done = 1;
3026 break;
3027 } else if (ret2 &&
3028 !f2fs_compress_write_end(inode,
3029 fsdata, page->index,
3030 1)) {
3031 retry = 1;
3032 break;
3033 }
3034 } else {
3035 goto lock_page;
3036 }
3037 }
3038 #endif
3039 /* give a priority to WB_SYNC threads */
3040 if (atomic_read(&sbi->wb_sync_req[DATA]) &&
3041 wbc->sync_mode == WB_SYNC_NONE) {
3042 done = 1;
3043 break;
3044 }
3045 #ifdef CONFIG_F2FS_FS_COMPRESSION
3046 lock_page:
3047 #endif
3048 done_index = page->index;
3049 retry_write:
3050 lock_page(page);
3051
3052 if (unlikely(page->mapping != mapping)) {
3053 continue_unlock:
3054 unlock_page(page);
3055 continue;
3056 }
3057
3058 if (!PageDirty(page)) {
3059 /* someone wrote it for us */
3060 goto continue_unlock;
3061 }
3062
3063 if (PageWriteback(page)) {
3064 if (wbc->sync_mode != WB_SYNC_NONE)
3065 f2fs_wait_on_page_writeback(page,
3066 DATA, true, true);
3067 else
3068 goto continue_unlock;
3069 }
3070
3071 if (!clear_page_dirty_for_io(page))
3072 goto continue_unlock;
3073
3074 #ifdef CONFIG_F2FS_FS_COMPRESSION
3075 if (f2fs_compressed_file(inode)) {
3076 get_page(page);
3077 f2fs_compress_ctx_add_page(&cc, page);
3078 continue;
3079 }
3080 #endif
3081 ret = f2fs_write_single_data_page(page, &submitted,
3082 &bio, &last_block, wbc, io_type, 0);
3083 if (ret == AOP_WRITEPAGE_ACTIVATE)
3084 unlock_page(page);
3085 #ifdef CONFIG_F2FS_FS_COMPRESSION
3086 result:
3087 #endif
3088 nwritten += submitted;
3089 wbc->nr_to_write -= submitted;
3090
3091 if (unlikely(ret)) {
3092 /*
3093 * keep nr_to_write, since vfs uses this to
3094 * get # of written pages.
3095 */
3096 if (ret == AOP_WRITEPAGE_ACTIVATE) {
3097 ret = 0;
3098 goto next;
3099 } else if (ret == -EAGAIN) {
3100 ret = 0;
3101 if (wbc->sync_mode == WB_SYNC_ALL) {
3102 cond_resched();
3103 congestion_wait(BLK_RW_ASYNC,
3104 DEFAULT_IO_TIMEOUT);
3105 goto retry_write;
3106 }
3107 goto next;
3108 }
3109 done_index = page->index + 1;
3110 done = 1;
3111 break;
3112 }
3113
3114 if (wbc->nr_to_write <= 0 &&
3115 wbc->sync_mode == WB_SYNC_NONE) {
3116 done = 1;
3117 break;
3118 }
3119 next:
3120 if (need_readd)
3121 goto readd;
3122 }
3123 pagevec_release(&pvec);
3124 cond_resched();
3125 }
3126 #ifdef CONFIG_F2FS_FS_COMPRESSION
3127 /* flush remained pages in compress cluster */
3128 if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
3129 ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
3130 nwritten += submitted;
3131 wbc->nr_to_write -= submitted;
3132 if (ret) {
3133 done = 1;
3134 retry = 0;
3135 }
3136 }
3137 #endif
3138 if (retry) {
3139 index = 0;
3140 end = -1;
3141 goto retry;
3142 }
3143 if (wbc->range_cyclic && !done)
3144 done_index = 0;
3145 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3146 mapping->writeback_index = done_index;
3147
3148 if (nwritten)
3149 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
3150 NULL, 0, DATA);
3151 /* submit cached bio of IPU write */
3152 if (bio)
3153 f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
3154
3155 return ret;
3156 }
3157
__should_serialize_io(struct inode * inode,struct writeback_control * wbc)3158 static inline bool __should_serialize_io(struct inode *inode,
3159 struct writeback_control *wbc)
3160 {
3161 /* to avoid deadlock in path of data flush */
3162 if (F2FS_I(inode)->cp_task)
3163 return false;
3164
3165 if (!S_ISREG(inode->i_mode))
3166 return false;
3167 if (IS_NOQUOTA(inode))
3168 return false;
3169
3170 if (f2fs_compressed_file(inode))
3171 return true;
3172 if (wbc->sync_mode != WB_SYNC_ALL)
3173 return true;
3174 if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
3175 return true;
3176 return false;
3177 }
3178
__f2fs_write_data_pages(struct address_space * mapping,struct writeback_control * wbc,enum iostat_type io_type)3179 static int __f2fs_write_data_pages(struct address_space *mapping,
3180 struct writeback_control *wbc,
3181 enum iostat_type io_type)
3182 {
3183 struct inode *inode = mapping->host;
3184 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3185 struct blk_plug plug;
3186 int ret;
3187 bool locked = false;
3188
3189 /* deal with chardevs and other special file */
3190 if (!mapping->a_ops->writepage)
3191 return 0;
3192
3193 /* skip writing if there is no dirty page in this inode */
3194 if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3195 return 0;
3196
3197 /* during POR, we don't need to trigger writepage at all. */
3198 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3199 goto skip_write;
3200
3201 if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
3202 wbc->sync_mode == WB_SYNC_NONE &&
3203 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
3204 f2fs_available_free_memory(sbi, DIRTY_DENTS))
3205 goto skip_write;
3206
3207 /* skip writing during file defragment */
3208 if (is_inode_flag_set(inode, FI_DO_DEFRAG))
3209 goto skip_write;
3210
3211 trace_f2fs_writepages(mapping->host, wbc, DATA);
3212
3213 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3214 if (wbc->sync_mode == WB_SYNC_ALL)
3215 atomic_inc(&sbi->wb_sync_req[DATA]);
3216 else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3217 /* to avoid potential deadlock */
3218 if (current->plug)
3219 blk_finish_plug(current->plug);
3220 goto skip_write;
3221 }
3222
3223 if (__should_serialize_io(inode, wbc)) {
3224 mutex_lock(&sbi->writepages);
3225 locked = true;
3226 }
3227
3228 blk_start_plug(&plug);
3229 ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3230 blk_finish_plug(&plug);
3231
3232 if (locked)
3233 mutex_unlock(&sbi->writepages);
3234
3235 if (wbc->sync_mode == WB_SYNC_ALL)
3236 atomic_dec(&sbi->wb_sync_req[DATA]);
3237 /*
3238 * if some pages were truncated, we cannot guarantee its mapping->host
3239 * to detect pending bios.
3240 */
3241
3242 f2fs_remove_dirty_inode(inode);
3243 return ret;
3244
3245 skip_write:
3246 wbc->pages_skipped += get_dirty_pages(inode);
3247 trace_f2fs_writepages(mapping->host, wbc, DATA);
3248 return 0;
3249 }
3250
f2fs_write_data_pages(struct address_space * mapping,struct writeback_control * wbc)3251 static int f2fs_write_data_pages(struct address_space *mapping,
3252 struct writeback_control *wbc)
3253 {
3254 struct inode *inode = mapping->host;
3255
3256 return __f2fs_write_data_pages(mapping, wbc,
3257 F2FS_I(inode)->cp_task == current ?
3258 FS_CP_DATA_IO : FS_DATA_IO);
3259 }
3260
f2fs_write_failed(struct address_space * mapping,loff_t to)3261 static void f2fs_write_failed(struct address_space *mapping, loff_t to)
3262 {
3263 struct inode *inode = mapping->host;
3264 loff_t i_size = i_size_read(inode);
3265
3266 if (IS_NOQUOTA(inode))
3267 return;
3268
3269 /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3270 if (to > i_size && !f2fs_verity_in_progress(inode)) {
3271 down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3272 down_write(&F2FS_I(inode)->i_mmap_sem);
3273
3274 truncate_pagecache(inode, i_size);
3275 f2fs_truncate_blocks(inode, i_size, true);
3276
3277 up_write(&F2FS_I(inode)->i_mmap_sem);
3278 up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3279 }
3280 }
3281
prepare_write_begin(struct f2fs_sb_info * sbi,struct page * page,loff_t pos,unsigned len,block_t * blk_addr,bool * node_changed)3282 static int prepare_write_begin(struct f2fs_sb_info *sbi,
3283 struct page *page, loff_t pos, unsigned len,
3284 block_t *blk_addr, bool *node_changed)
3285 {
3286 struct inode *inode = page->mapping->host;
3287 pgoff_t index = page->index;
3288 struct dnode_of_data dn;
3289 struct page *ipage;
3290 bool locked = false;
3291 struct extent_info ei = {0,0,0};
3292 int err = 0;
3293 int flag;
3294
3295 /*
3296 * we already allocated all the blocks, so we don't need to get
3297 * the block addresses when there is no need to fill the page.
3298 */
3299 if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
3300 !is_inode_flag_set(inode, FI_NO_PREALLOC) &&
3301 !f2fs_verity_in_progress(inode))
3302 return 0;
3303
3304 /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3305 if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
3306 flag = F2FS_GET_BLOCK_DEFAULT;
3307 else
3308 flag = F2FS_GET_BLOCK_PRE_AIO;
3309
3310 if (f2fs_has_inline_data(inode) ||
3311 (pos & PAGE_MASK) >= i_size_read(inode)) {
3312 __do_map_lock(sbi, flag, true);
3313 locked = true;
3314 }
3315
3316 restart:
3317 /* check inline_data */
3318 ipage = f2fs_get_node_page(sbi, inode->i_ino);
3319 if (IS_ERR(ipage)) {
3320 err = PTR_ERR(ipage);
3321 goto unlock_out;
3322 }
3323
3324 set_new_dnode(&dn, inode, ipage, ipage, 0);
3325
3326 if (f2fs_has_inline_data(inode)) {
3327 if (pos + len <= MAX_INLINE_DATA(inode)) {
3328 f2fs_do_read_inline_data(page, ipage);
3329 set_inode_flag(inode, FI_DATA_EXIST);
3330 if (inode->i_nlink)
3331 set_inline_node(ipage);
3332 } else {
3333 err = f2fs_convert_inline_page(&dn, page);
3334 if (err)
3335 goto out;
3336 if (dn.data_blkaddr == NULL_ADDR)
3337 err = f2fs_get_block(&dn, index);
3338 }
3339 } else if (locked) {
3340 err = f2fs_get_block(&dn, index);
3341 } else {
3342 if (f2fs_lookup_extent_cache(inode, index, &ei)) {
3343 dn.data_blkaddr = ei.blk + index - ei.fofs;
3344 } else {
3345 /* hole case */
3346 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3347 if (err || dn.data_blkaddr == NULL_ADDR) {
3348 f2fs_put_dnode(&dn);
3349 __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
3350 true);
3351 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3352 locked = true;
3353 goto restart;
3354 }
3355 }
3356 }
3357
3358 /* convert_inline_page can make node_changed */
3359 *blk_addr = dn.data_blkaddr;
3360 *node_changed = dn.node_changed;
3361 out:
3362 f2fs_put_dnode(&dn);
3363 unlock_out:
3364 if (locked)
3365 __do_map_lock(sbi, flag, false);
3366 return err;
3367 }
3368
f2fs_write_begin(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned flags,struct page ** pagep,void ** fsdata)3369 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
3370 loff_t pos, unsigned len, unsigned flags,
3371 struct page **pagep, void **fsdata)
3372 {
3373 struct inode *inode = mapping->host;
3374 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3375 struct page *page = NULL;
3376 pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
3377 bool need_balance = false, drop_atomic = false;
3378 block_t blkaddr = NULL_ADDR;
3379 int err = 0;
3380
3381 /*
3382 * Should avoid quota operations which can make deadlock:
3383 * kswapd -> f2fs_evict_inode -> dquot_drop ->
3384 * f2fs_dquot_commit -> f2fs_write_begin ->
3385 * d_obtain_alias -> __d_alloc -> kmem_cache_alloc(GFP_KERNEL)
3386 */
3387 if (trace_android_fs_datawrite_start_enabled() && !IS_NOQUOTA(inode)) {
3388 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
3389
3390 path = android_fstrace_get_pathname(pathbuf,
3391 MAX_TRACE_PATHBUF_LEN,
3392 inode);
3393 trace_android_fs_datawrite_start(inode, pos, len,
3394 current->pid, path,
3395 current->comm);
3396 }
3397 trace_f2fs_write_begin(inode, pos, len, flags);
3398
3399 if (!f2fs_is_checkpoint_ready(sbi)) {
3400 err = -ENOSPC;
3401 goto fail;
3402 }
3403
3404 if ((f2fs_is_atomic_file(inode) &&
3405 !f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
3406 is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
3407 err = -ENOMEM;
3408 drop_atomic = true;
3409 goto fail;
3410 }
3411
3412 /*
3413 * We should check this at this moment to avoid deadlock on inode page
3414 * and #0 page. The locking rule for inline_data conversion should be:
3415 * lock_page(page #0) -> lock_page(inode_page)
3416 */
3417 if (index != 0) {
3418 err = f2fs_convert_inline_inode(inode);
3419 if (err)
3420 goto fail;
3421 }
3422
3423 #ifdef CONFIG_F2FS_FS_COMPRESSION
3424 if (f2fs_compressed_file(inode)) {
3425 int ret;
3426
3427 *fsdata = NULL;
3428
3429 ret = f2fs_prepare_compress_overwrite(inode, pagep,
3430 index, fsdata);
3431 if (ret < 0) {
3432 err = ret;
3433 goto fail;
3434 } else if (ret) {
3435 return 0;
3436 }
3437 }
3438 #endif
3439
3440 repeat:
3441 /*
3442 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
3443 * wait_for_stable_page. Will wait that below with our IO control.
3444 */
3445 page = f2fs_pagecache_get_page(mapping, index,
3446 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
3447 if (!page) {
3448 err = -ENOMEM;
3449 goto fail;
3450 }
3451
3452 /* TODO: cluster can be compressed due to race with .writepage */
3453
3454 *pagep = page;
3455
3456 err = prepare_write_begin(sbi, page, pos, len,
3457 &blkaddr, &need_balance);
3458 if (err)
3459 goto fail;
3460
3461 if (need_balance && !IS_NOQUOTA(inode) &&
3462 has_not_enough_free_secs(sbi, 0, 0)) {
3463 unlock_page(page);
3464 f2fs_balance_fs(sbi, true);
3465 lock_page(page);
3466 if (page->mapping != mapping) {
3467 /* The page got truncated from under us */
3468 f2fs_put_page(page, 1);
3469 goto repeat;
3470 }
3471 }
3472
3473 f2fs_wait_on_page_writeback(page, DATA, false, true);
3474
3475 if (len == PAGE_SIZE || PageUptodate(page))
3476 return 0;
3477
3478 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
3479 !f2fs_verity_in_progress(inode)) {
3480 zero_user_segment(page, len, PAGE_SIZE);
3481 return 0;
3482 }
3483
3484 if (blkaddr == NEW_ADDR) {
3485 zero_user_segment(page, 0, PAGE_SIZE);
3486 SetPageUptodate(page);
3487 } else {
3488 if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3489 DATA_GENERIC_ENHANCE_READ)) {
3490 err = -EFSCORRUPTED;
3491 goto fail;
3492 }
3493 err = f2fs_submit_page_read(inode, page, blkaddr, true);
3494 if (err)
3495 goto fail;
3496
3497 lock_page(page);
3498 if (unlikely(page->mapping != mapping)) {
3499 f2fs_put_page(page, 1);
3500 goto repeat;
3501 }
3502 if (unlikely(!PageUptodate(page))) {
3503 err = -EIO;
3504 goto fail;
3505 }
3506 }
3507 return 0;
3508
3509 fail:
3510 f2fs_put_page(page, 1);
3511 f2fs_write_failed(mapping, pos + len);
3512 if (drop_atomic)
3513 f2fs_drop_inmem_pages_all(sbi, false);
3514 return err;
3515 }
3516
f2fs_write_end(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned copied,struct page * page,void * fsdata)3517 static int f2fs_write_end(struct file *file,
3518 struct address_space *mapping,
3519 loff_t pos, unsigned len, unsigned copied,
3520 struct page *page, void *fsdata)
3521 {
3522 struct inode *inode = page->mapping->host;
3523
3524 trace_android_fs_datawrite_end(inode, pos, len);
3525 trace_f2fs_write_end(inode, pos, len, copied);
3526
3527 /*
3528 * This should be come from len == PAGE_SIZE, and we expect copied
3529 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3530 * let generic_perform_write() try to copy data again through copied=0.
3531 */
3532 if (!PageUptodate(page)) {
3533 if (unlikely(copied != len))
3534 copied = 0;
3535 else
3536 SetPageUptodate(page);
3537 }
3538
3539 #ifdef CONFIG_F2FS_FS_COMPRESSION
3540 /* overwrite compressed file */
3541 if (f2fs_compressed_file(inode) && fsdata) {
3542 f2fs_compress_write_end(inode, fsdata, page->index, copied);
3543 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3544 return copied;
3545 }
3546 #endif
3547
3548 if (!copied)
3549 goto unlock_out;
3550
3551 set_page_dirty(page);
3552
3553 if (pos + copied > i_size_read(inode) &&
3554 !f2fs_verity_in_progress(inode))
3555 f2fs_i_size_write(inode, pos + copied);
3556 unlock_out:
3557 f2fs_put_page(page, 1);
3558 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3559 return copied;
3560 }
3561
check_direct_IO(struct inode * inode,struct iov_iter * iter,loff_t offset)3562 static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
3563 loff_t offset)
3564 {
3565 unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
3566 unsigned blkbits = i_blkbits;
3567 unsigned blocksize_mask = (1 << blkbits) - 1;
3568 unsigned long align = offset | iov_iter_alignment(iter);
3569 struct block_device *bdev = inode->i_sb->s_bdev;
3570
3571 if (iov_iter_rw(iter) == READ && offset >= i_size_read(inode))
3572 return 1;
3573
3574 if (align & blocksize_mask) {
3575 if (bdev)
3576 blkbits = blksize_bits(bdev_logical_block_size(bdev));
3577 blocksize_mask = (1 << blkbits) - 1;
3578 if (align & blocksize_mask)
3579 return -EINVAL;
3580 return 1;
3581 }
3582 return 0;
3583 }
3584
f2fs_dio_end_io(struct bio * bio)3585 static void f2fs_dio_end_io(struct bio *bio)
3586 {
3587 struct f2fs_private_dio *dio = bio->bi_private;
3588
3589 dec_page_count(F2FS_I_SB(dio->inode),
3590 dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
3591
3592 bio->bi_private = dio->orig_private;
3593 bio->bi_end_io = dio->orig_end_io;
3594
3595 kvfree(dio);
3596
3597 bio_endio(bio);
3598 }
3599
f2fs_dio_submit_bio(struct bio * bio,struct inode * inode,loff_t file_offset)3600 static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
3601 loff_t file_offset)
3602 {
3603 struct f2fs_private_dio *dio;
3604 bool write = (bio_op(bio) == REQ_OP_WRITE);
3605
3606 dio = f2fs_kzalloc(F2FS_I_SB(inode),
3607 sizeof(struct f2fs_private_dio), GFP_NOFS);
3608 if (!dio)
3609 goto out;
3610
3611 dio->inode = inode;
3612 dio->orig_end_io = bio->bi_end_io;
3613 dio->orig_private = bio->bi_private;
3614 dio->write = write;
3615
3616 bio->bi_end_io = f2fs_dio_end_io;
3617 bio->bi_private = dio;
3618
3619 inc_page_count(F2FS_I_SB(inode),
3620 write ? F2FS_DIO_WRITE : F2FS_DIO_READ);
3621
3622 submit_bio(bio);
3623 return;
3624 out:
3625 bio->bi_status = BLK_STS_IOERR;
3626 bio_endio(bio);
3627 }
3628
f2fs_direct_IO(struct kiocb * iocb,struct iov_iter * iter)3629 static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
3630 {
3631 struct address_space *mapping = iocb->ki_filp->f_mapping;
3632 struct inode *inode = mapping->host;
3633 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3634 struct f2fs_inode_info *fi = F2FS_I(inode);
3635 size_t count = iov_iter_count(iter);
3636 loff_t offset = iocb->ki_pos;
3637 int rw = iov_iter_rw(iter);
3638 int err;
3639 enum rw_hint hint = iocb->ki_hint;
3640 int whint_mode = F2FS_OPTION(sbi).whint_mode;
3641 bool do_opu;
3642
3643 err = check_direct_IO(inode, iter, offset);
3644 if (err)
3645 return err < 0 ? err : 0;
3646
3647 if (f2fs_force_buffered_io(inode, iocb, iter))
3648 return 0;
3649
3650 do_opu = allow_outplace_dio(inode, iocb, iter);
3651
3652 trace_f2fs_direct_IO_enter(inode, offset, count, rw);
3653
3654 if (trace_android_fs_dataread_start_enabled() &&
3655 (rw == READ)) {
3656 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
3657
3658 path = android_fstrace_get_pathname(pathbuf,
3659 MAX_TRACE_PATHBUF_LEN,
3660 inode);
3661 trace_android_fs_dataread_start(inode, offset,
3662 count, current->pid, path,
3663 current->comm);
3664 }
3665 if (trace_android_fs_datawrite_start_enabled() &&
3666 (rw == WRITE)) {
3667 char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
3668
3669 path = android_fstrace_get_pathname(pathbuf,
3670 MAX_TRACE_PATHBUF_LEN,
3671 inode);
3672 trace_android_fs_datawrite_start(inode, offset, count,
3673 current->pid, path,
3674 current->comm);
3675 }
3676
3677 if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
3678 iocb->ki_hint = WRITE_LIFE_NOT_SET;
3679
3680 if (iocb->ki_flags & IOCB_NOWAIT) {
3681 if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
3682 iocb->ki_hint = hint;
3683 err = -EAGAIN;
3684 goto out;
3685 }
3686 if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
3687 up_read(&fi->i_gc_rwsem[rw]);
3688 iocb->ki_hint = hint;
3689 err = -EAGAIN;
3690 goto out;
3691 }
3692 } else {
3693 down_read(&fi->i_gc_rwsem[rw]);
3694 if (do_opu)
3695 down_read(&fi->i_gc_rwsem[READ]);
3696 }
3697
3698 err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
3699 iter, rw == WRITE ? get_data_block_dio_write :
3700 get_data_block_dio, NULL, f2fs_dio_submit_bio,
3701 rw == WRITE ? DIO_LOCKING | DIO_SKIP_HOLES :
3702 DIO_SKIP_HOLES);
3703
3704 if (do_opu)
3705 up_read(&fi->i_gc_rwsem[READ]);
3706
3707 up_read(&fi->i_gc_rwsem[rw]);
3708
3709 if (rw == WRITE) {
3710 if (whint_mode == WHINT_MODE_OFF)
3711 iocb->ki_hint = hint;
3712 if (err > 0) {
3713 f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
3714 err);
3715 if (!do_opu)
3716 set_inode_flag(inode, FI_UPDATE_WRITE);
3717 } else if (err < 0) {
3718 f2fs_write_failed(mapping, offset + count);
3719 }
3720 } else {
3721 if (err > 0)
3722 f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
3723 }
3724
3725 out:
3726 if (trace_android_fs_dataread_start_enabled() &&
3727 (rw == READ))
3728 trace_android_fs_dataread_end(inode, offset, count);
3729 if (trace_android_fs_datawrite_start_enabled() &&
3730 (rw == WRITE))
3731 trace_android_fs_datawrite_end(inode, offset, count);
3732
3733 trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
3734
3735 return err;
3736 }
3737
f2fs_invalidate_page(struct page * page,unsigned int offset,unsigned int length)3738 void f2fs_invalidate_page(struct page *page, unsigned int offset,
3739 unsigned int length)
3740 {
3741 struct inode *inode = page->mapping->host;
3742 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3743
3744 if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
3745 (offset % PAGE_SIZE || length != PAGE_SIZE))
3746 return;
3747
3748 if (PageDirty(page)) {
3749 if (inode->i_ino == F2FS_META_INO(sbi)) {
3750 dec_page_count(sbi, F2FS_DIRTY_META);
3751 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
3752 dec_page_count(sbi, F2FS_DIRTY_NODES);
3753 } else {
3754 inode_dec_dirty_pages(inode);
3755 f2fs_remove_dirty_inode(inode);
3756 }
3757 }
3758
3759 clear_cold_data(page);
3760
3761 if (IS_ATOMIC_WRITTEN_PAGE(page))
3762 return f2fs_drop_inmem_page(inode, page);
3763
3764 f2fs_clear_page_private(page);
3765 }
3766
f2fs_release_page(struct page * page,gfp_t wait)3767 int f2fs_release_page(struct page *page, gfp_t wait)
3768 {
3769 /* If this is dirty page, keep PagePrivate */
3770 if (PageDirty(page))
3771 return 0;
3772
3773 /* This is atomic written page, keep Private */
3774 if (IS_ATOMIC_WRITTEN_PAGE(page))
3775 return 0;
3776
3777 clear_cold_data(page);
3778 f2fs_clear_page_private(page);
3779 return 1;
3780 }
3781
f2fs_set_data_page_dirty(struct page * page)3782 static int f2fs_set_data_page_dirty(struct page *page)
3783 {
3784 struct inode *inode = page_file_mapping(page)->host;
3785
3786 trace_f2fs_set_page_dirty(page, DATA);
3787
3788 if (!PageUptodate(page))
3789 SetPageUptodate(page);
3790 if (PageSwapCache(page))
3791 return __set_page_dirty_nobuffers(page);
3792
3793 if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
3794 if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
3795 f2fs_register_inmem_page(inode, page);
3796 return 1;
3797 }
3798 /*
3799 * Previously, this page has been registered, we just
3800 * return here.
3801 */
3802 return 0;
3803 }
3804
3805 if (!PageDirty(page)) {
3806 __set_page_dirty_nobuffers(page);
3807 f2fs_update_dirty_page(inode, page);
3808 return 1;
3809 }
3810 return 0;
3811 }
3812
3813
f2fs_bmap_compress(struct inode * inode,sector_t block)3814 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
3815 {
3816 #ifdef CONFIG_F2FS_FS_COMPRESSION
3817 struct dnode_of_data dn;
3818 sector_t start_idx, blknr = 0;
3819 int ret;
3820
3821 start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
3822
3823 set_new_dnode(&dn, inode, NULL, NULL, 0);
3824 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
3825 if (ret)
3826 return 0;
3827
3828 if (dn.data_blkaddr != COMPRESS_ADDR) {
3829 dn.ofs_in_node += block - start_idx;
3830 blknr = f2fs_data_blkaddr(&dn);
3831 if (!__is_valid_data_blkaddr(blknr))
3832 blknr = 0;
3833 }
3834
3835 f2fs_put_dnode(&dn);
3836
3837 return blknr;
3838 #else
3839 return -EOPNOTSUPP;
3840 #endif
3841 }
3842
3843
f2fs_bmap(struct address_space * mapping,sector_t block)3844 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
3845 {
3846 struct inode *inode = mapping->host;
3847
3848 if (f2fs_has_inline_data(inode))
3849 return 0;
3850
3851 /* make sure allocating whole blocks */
3852 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
3853 filemap_write_and_wait(mapping);
3854
3855 if (f2fs_compressed_file(inode))
3856 return f2fs_bmap_compress(inode, block);
3857
3858 return generic_block_bmap(mapping, block, get_data_block_bmap);
3859 }
3860
3861 #ifdef CONFIG_MIGRATION
3862 #include <linux/migrate.h>
3863
f2fs_migrate_page(struct address_space * mapping,struct page * newpage,struct page * page,enum migrate_mode mode)3864 int f2fs_migrate_page(struct address_space *mapping,
3865 struct page *newpage, struct page *page, enum migrate_mode mode)
3866 {
3867 int rc, extra_count;
3868 struct f2fs_inode_info *fi = F2FS_I(mapping->host);
3869 bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);
3870
3871 BUG_ON(PageWriteback(page));
3872
3873 /* migrating an atomic written page is safe with the inmem_lock hold */
3874 if (atomic_written) {
3875 if (mode != MIGRATE_SYNC)
3876 return -EBUSY;
3877 if (!mutex_trylock(&fi->inmem_lock))
3878 return -EAGAIN;
3879 }
3880
3881 /* one extra reference was held for atomic_write page */
3882 extra_count = atomic_written ? 1 : 0;
3883 rc = migrate_page_move_mapping(mapping, newpage,
3884 page, extra_count);
3885 if (rc != MIGRATEPAGE_SUCCESS) {
3886 if (atomic_written)
3887 mutex_unlock(&fi->inmem_lock);
3888 return rc;
3889 }
3890
3891 if (atomic_written) {
3892 struct inmem_pages *cur;
3893 list_for_each_entry(cur, &fi->inmem_pages, list)
3894 if (cur->page == page) {
3895 cur->page = newpage;
3896 break;
3897 }
3898 mutex_unlock(&fi->inmem_lock);
3899 put_page(page);
3900 get_page(newpage);
3901 }
3902
3903 if (PagePrivate(page)) {
3904 f2fs_set_page_private(newpage, page_private(page));
3905 f2fs_clear_page_private(page);
3906 }
3907
3908 if (mode != MIGRATE_SYNC_NO_COPY)
3909 migrate_page_copy(newpage, page);
3910 else
3911 migrate_page_states(newpage, page);
3912
3913 return MIGRATEPAGE_SUCCESS;
3914 }
3915 #endif
3916
3917 #ifdef CONFIG_SWAP
3918 /* Copied from generic_swapfile_activate() to check any holes */
check_swap_activate(struct swap_info_struct * sis,struct file * swap_file,sector_t * span)3919 static int check_swap_activate(struct swap_info_struct *sis,
3920 struct file *swap_file, sector_t *span)
3921 {
3922 struct address_space *mapping = swap_file->f_mapping;
3923 struct inode *inode = mapping->host;
3924 unsigned blocks_per_page;
3925 unsigned long page_no;
3926 unsigned blkbits;
3927 sector_t probe_block;
3928 sector_t last_block;
3929 sector_t lowest_block = -1;
3930 sector_t highest_block = 0;
3931 int nr_extents = 0;
3932 int ret;
3933
3934 blkbits = inode->i_blkbits;
3935 blocks_per_page = PAGE_SIZE >> blkbits;
3936
3937 /*
3938 * Map all the blocks into the extent list. This code doesn't try
3939 * to be very smart.
3940 */
3941 probe_block = 0;
3942 page_no = 0;
3943 last_block = i_size_read(inode) >> blkbits;
3944 while ((probe_block + blocks_per_page) <= last_block &&
3945 page_no < sis->max) {
3946 unsigned block_in_page;
3947 sector_t first_block;
3948
3949 cond_resched();
3950
3951 first_block = bmap(inode, probe_block);
3952 if (first_block == 0)
3953 goto bad_bmap;
3954
3955 /*
3956 * It must be PAGE_SIZE aligned on-disk
3957 */
3958 if (first_block & (blocks_per_page - 1)) {
3959 probe_block++;
3960 goto reprobe;
3961 }
3962
3963 for (block_in_page = 1; block_in_page < blocks_per_page;
3964 block_in_page++) {
3965 sector_t block;
3966
3967 block = bmap(inode, probe_block + block_in_page);
3968 if (block == 0)
3969 goto bad_bmap;
3970 if (block != first_block + block_in_page) {
3971 /* Discontiguity */
3972 probe_block++;
3973 goto reprobe;
3974 }
3975 }
3976
3977 first_block >>= (PAGE_SHIFT - blkbits);
3978 if (page_no) { /* exclude the header page */
3979 if (first_block < lowest_block)
3980 lowest_block = first_block;
3981 if (first_block > highest_block)
3982 highest_block = first_block;
3983 }
3984
3985 /*
3986 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
3987 */
3988 ret = add_swap_extent(sis, page_no, 1, first_block);
3989 if (ret < 0)
3990 goto out;
3991 nr_extents += ret;
3992 page_no++;
3993 probe_block += blocks_per_page;
3994 reprobe:
3995 continue;
3996 }
3997 ret = nr_extents;
3998 *span = 1 + highest_block - lowest_block;
3999 if (page_no == 0)
4000 page_no = 1; /* force Empty message */
4001 sis->max = page_no;
4002 sis->pages = page_no - 1;
4003 sis->highest_bit = page_no - 1;
4004 out:
4005 return ret;
4006 bad_bmap:
4007 pr_err("swapon: swapfile has holes\n");
4008 return -EINVAL;
4009 }
4010
f2fs_swap_activate(struct swap_info_struct * sis,struct file * file,sector_t * span)4011 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4012 sector_t *span)
4013 {
4014 struct inode *inode = file_inode(file);
4015 int ret;
4016
4017 if (!S_ISREG(inode->i_mode))
4018 return -EINVAL;
4019
4020 if (f2fs_readonly(F2FS_I_SB(inode)->sb))
4021 return -EROFS;
4022
4023 ret = f2fs_convert_inline_inode(inode);
4024 if (ret)
4025 return ret;
4026
4027 if (f2fs_disable_compressed_file(inode))
4028 return -EINVAL;
4029
4030 ret = check_swap_activate(sis, file, span);
4031 if (ret < 0)
4032 return ret;
4033
4034 set_inode_flag(inode, FI_PIN_FILE);
4035 f2fs_precache_extents(inode);
4036 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
4037 return ret;
4038 }
4039
f2fs_swap_deactivate(struct file * file)4040 static void f2fs_swap_deactivate(struct file *file)
4041 {
4042 struct inode *inode = file_inode(file);
4043
4044 clear_inode_flag(inode, FI_PIN_FILE);
4045 }
4046 #else
f2fs_swap_activate(struct swap_info_struct * sis,struct file * file,sector_t * span)4047 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4048 sector_t *span)
4049 {
4050 return -EOPNOTSUPP;
4051 }
4052
f2fs_swap_deactivate(struct file * file)4053 static void f2fs_swap_deactivate(struct file *file)
4054 {
4055 }
4056 #endif
4057
4058 const struct address_space_operations f2fs_dblock_aops = {
4059 .readpage = f2fs_read_data_page,
4060 .readpages = f2fs_read_data_pages,
4061 .writepage = f2fs_write_data_page,
4062 .writepages = f2fs_write_data_pages,
4063 .write_begin = f2fs_write_begin,
4064 .write_end = f2fs_write_end,
4065 .set_page_dirty = f2fs_set_data_page_dirty,
4066 .invalidatepage = f2fs_invalidate_page,
4067 .releasepage = f2fs_release_page,
4068 .direct_IO = f2fs_direct_IO,
4069 .bmap = f2fs_bmap,
4070 .swap_activate = f2fs_swap_activate,
4071 .swap_deactivate = f2fs_swap_deactivate,
4072 #ifdef CONFIG_MIGRATION
4073 .migratepage = f2fs_migrate_page,
4074 #endif
4075 };
4076
f2fs_clear_page_cache_dirty_tag(struct page * page)4077 void f2fs_clear_page_cache_dirty_tag(struct page *page)
4078 {
4079 struct address_space *mapping = page_mapping(page);
4080 unsigned long flags;
4081
4082 xa_lock_irqsave(&mapping->i_pages, flags);
4083 __xa_clear_mark(&mapping->i_pages, page_index(page),
4084 PAGECACHE_TAG_DIRTY);
4085 xa_unlock_irqrestore(&mapping->i_pages, flags);
4086 }
4087
f2fs_init_post_read_processing(void)4088 int __init f2fs_init_post_read_processing(void)
4089 {
4090 bio_post_read_ctx_cache =
4091 kmem_cache_create("f2fs_bio_post_read_ctx",
4092 sizeof(struct bio_post_read_ctx), 0, 0, NULL);
4093 if (!bio_post_read_ctx_cache)
4094 goto fail;
4095 bio_post_read_ctx_pool =
4096 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4097 bio_post_read_ctx_cache);
4098 if (!bio_post_read_ctx_pool)
4099 goto fail_free_cache;
4100 return 0;
4101
4102 fail_free_cache:
4103 kmem_cache_destroy(bio_post_read_ctx_cache);
4104 fail:
4105 return -ENOMEM;
4106 }
4107
f2fs_destroy_post_read_processing(void)4108 void f2fs_destroy_post_read_processing(void)
4109 {
4110 mempool_destroy(bio_post_read_ctx_pool);
4111 kmem_cache_destroy(bio_post_read_ctx_cache);
4112 }
4113
f2fs_init_post_read_wq(struct f2fs_sb_info * sbi)4114 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4115 {
4116 if (!f2fs_sb_has_encrypt(sbi) &&
4117 !f2fs_sb_has_verity(sbi) &&
4118 !f2fs_sb_has_compression(sbi))
4119 return 0;
4120
4121 sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
4122 WQ_UNBOUND | WQ_HIGHPRI,
4123 num_online_cpus());
4124 if (!sbi->post_read_wq)
4125 return -ENOMEM;
4126 return 0;
4127 }
4128
f2fs_destroy_post_read_wq(struct f2fs_sb_info * sbi)4129 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4130 {
4131 if (sbi->post_read_wq)
4132 destroy_workqueue(sbi->post_read_wq);
4133 }
4134
f2fs_init_bio_entry_cache(void)4135 int __init f2fs_init_bio_entry_cache(void)
4136 {
4137 bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4138 sizeof(struct bio_entry));
4139 if (!bio_entry_slab)
4140 return -ENOMEM;
4141 return 0;
4142 }
4143
f2fs_destroy_bio_entry_cache(void)4144 void f2fs_destroy_bio_entry_cache(void)
4145 {
4146 kmem_cache_destroy(bio_entry_slab);
4147 }
4148