1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/f2fs/data.c 4 * 5 * Copyright (c) 2012 Samsung Electronics Co., Ltd. 6 * http://www.samsung.com/ 7 */ 8 #include <linux/fs.h> 9 #include <linux/f2fs_fs.h> 10 #include <linux/buffer_head.h> 11 #include <linux/mpage.h> 12 #include <linux/writeback.h> 13 #include <linux/backing-dev.h> 14 #include <linux/pagevec.h> 15 #include <linux/blkdev.h> 16 #include <linux/bio.h> 17 #include <linux/blk-crypto.h> 18 #include <linux/swap.h> 19 #include <linux/prefetch.h> 20 #include <linux/uio.h> 21 #include <linux/cleancache.h> 22 #include <linux/sched/signal.h> 23 #include <linux/fiemap.h> 24 #include <linux/iomap.h> 25 26 #include "f2fs.h" 27 #include "node.h" 28 #include "segment.h" 29 #include "iostat.h" 30 #include <trace/events/f2fs.h> 31 32 #define NUM_PREALLOC_POST_READ_CTXS 128 33 34 static struct kmem_cache *bio_post_read_ctx_cache; 35 static struct kmem_cache *bio_entry_slab; 36 static mempool_t *bio_post_read_ctx_pool; 37 static struct bio_set f2fs_bioset; 38 39 #define F2FS_BIO_POOL_SIZE NR_CURSEG_TYPE 40 f2fs_init_bioset(void)41 int __init f2fs_init_bioset(void) 42 { 43 return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE, 44 0, BIOSET_NEED_BVECS); 45 } 46 f2fs_destroy_bioset(void)47 void f2fs_destroy_bioset(void) 48 { 49 bioset_exit(&f2fs_bioset); 50 } 51 __is_cp_guaranteed(struct page * page)52 static bool __is_cp_guaranteed(struct page *page) 53 { 54 struct address_space *mapping = page->mapping; 55 struct inode *inode; 56 struct f2fs_sb_info *sbi; 57 58 if (!mapping) 59 return false; 60 61 inode = mapping->host; 62 sbi = F2FS_I_SB(inode); 63 64 if (inode->i_ino == F2FS_META_INO(sbi) || 65 inode->i_ino == F2FS_NODE_INO(sbi) || 66 S_ISDIR(inode->i_mode)) 67 return true; 68 69 if (f2fs_is_compressed_page(page)) 70 return false; 71 if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) || 72 page_private_gcing(page)) 73 return true; 74 return false; 75 } 76 __read_io_type(struct page * page)77 static enum count_type __read_io_type(struct page *page) 78 { 79 struct address_space *mapping = page_file_mapping(page); 80 81 if (mapping) { 82 struct inode *inode = mapping->host; 83 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 84 85 if (inode->i_ino == F2FS_META_INO(sbi)) 86 return F2FS_RD_META; 87 88 if (inode->i_ino == F2FS_NODE_INO(sbi)) 89 return F2FS_RD_NODE; 90 } 91 return F2FS_RD_DATA; 92 } 93 94 /* postprocessing steps for read bios */ 95 enum bio_post_read_step { 96 #ifdef CONFIG_FS_ENCRYPTION 97 STEP_DECRYPT = BIT(0), 98 #else 99 STEP_DECRYPT = 0, /* compile out the decryption-related code */ 100 #endif 101 #ifdef CONFIG_F2FS_FS_COMPRESSION 102 STEP_DECOMPRESS = BIT(1), 103 #else 104 STEP_DECOMPRESS = 0, /* compile out the decompression-related code */ 105 #endif 106 #ifdef CONFIG_FS_VERITY 107 STEP_VERITY = BIT(2), 108 #else 109 STEP_VERITY = 0, /* compile out the verity-related code */ 110 #endif 111 }; 112 113 struct bio_post_read_ctx { 114 struct bio *bio; 115 struct f2fs_sb_info *sbi; 116 struct work_struct work; 117 unsigned int enabled_steps; 118 /* 119 * decompression_attempted keeps track of whether 120 * f2fs_end_read_compressed_page() has been called on the pages in the 121 * bio that belong to a compressed cluster yet. 122 */ 123 bool decompression_attempted; 124 block_t fs_blkaddr; 125 }; 126 127 /* 128 * Update and unlock a bio's pages, and free the bio. 129 * 130 * This marks pages up-to-date only if there was no error in the bio (I/O error, 131 * decryption error, or verity error), as indicated by bio->bi_status. 132 * 133 * "Compressed pages" (pagecache pages backed by a compressed cluster on-disk) 134 * aren't marked up-to-date here, as decompression is done on a per-compression- 135 * cluster basis rather than a per-bio basis. Instead, we only must do two 136 * things for each compressed page here: call f2fs_end_read_compressed_page() 137 * with failed=true if an error occurred before it would have normally gotten 138 * called (i.e., I/O error or decryption error, but *not* verity error), and 139 * release the bio's reference to the decompress_io_ctx of the page's cluster. 140 */ f2fs_finish_read_bio(struct bio * bio,bool in_task)141 static void f2fs_finish_read_bio(struct bio *bio, bool in_task) 142 { 143 struct bio_vec *bv; 144 struct bvec_iter_all iter_all; 145 struct bio_post_read_ctx *ctx = bio->bi_private; 146 147 bio_for_each_segment_all(bv, bio, iter_all) { 148 struct page *page = bv->bv_page; 149 150 if (f2fs_is_compressed_page(page)) { 151 if (ctx && !ctx->decompression_attempted) 152 f2fs_end_read_compressed_page(page, true, 0, 153 in_task); 154 f2fs_put_page_dic(page, in_task); 155 continue; 156 } 157 158 if (bio->bi_status) 159 ClearPageUptodate(page); 160 else 161 SetPageUptodate(page); 162 dec_page_count(F2FS_P_SB(page), __read_io_type(page)); 163 unlock_page(page); 164 } 165 166 if (ctx) 167 mempool_free(ctx, bio_post_read_ctx_pool); 168 bio_put(bio); 169 } 170 f2fs_verify_bio(struct work_struct * work)171 static void f2fs_verify_bio(struct work_struct *work) 172 { 173 struct bio_post_read_ctx *ctx = 174 container_of(work, struct bio_post_read_ctx, work); 175 struct bio *bio = ctx->bio; 176 bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS); 177 178 /* 179 * fsverity_verify_bio() may call readpages() again, and while verity 180 * will be disabled for this, decryption and/or decompression may still 181 * be needed, resulting in another bio_post_read_ctx being allocated. 182 * So to prevent deadlocks we need to release the current ctx to the 183 * mempool first. This assumes that verity is the last post-read step. 184 */ 185 mempool_free(ctx, bio_post_read_ctx_pool); 186 bio->bi_private = NULL; 187 188 /* 189 * Verify the bio's pages with fs-verity. Exclude compressed pages, 190 * as those were handled separately by f2fs_end_read_compressed_page(). 191 */ 192 if (may_have_compressed_pages) { 193 struct bio_vec *bv; 194 struct bvec_iter_all iter_all; 195 196 bio_for_each_segment_all(bv, bio, iter_all) { 197 struct page *page = bv->bv_page; 198 199 if (!f2fs_is_compressed_page(page) && 200 !fsverity_verify_page(page)) { 201 bio->bi_status = BLK_STS_IOERR; 202 break; 203 } 204 } 205 } else { 206 fsverity_verify_bio(bio); 207 } 208 209 f2fs_finish_read_bio(bio, true); 210 } 211 212 /* 213 * If the bio's data needs to be verified with fs-verity, then enqueue the 214 * verity work for the bio. Otherwise finish the bio now. 215 * 216 * Note that to avoid deadlocks, the verity work can't be done on the 217 * decryption/decompression workqueue. This is because verifying the data pages 218 * can involve reading verity metadata pages from the file, and these verity 219 * metadata pages may be encrypted and/or compressed. 220 */ f2fs_verify_and_finish_bio(struct bio * bio,bool in_task)221 static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task) 222 { 223 struct bio_post_read_ctx *ctx = bio->bi_private; 224 225 if (ctx && (ctx->enabled_steps & STEP_VERITY)) { 226 INIT_WORK(&ctx->work, f2fs_verify_bio); 227 fsverity_enqueue_verify_work(&ctx->work); 228 } else { 229 f2fs_finish_read_bio(bio, in_task); 230 } 231 } 232 233 /* 234 * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last 235 * remaining page was read by @ctx->bio. 236 * 237 * Note that a bio may span clusters (even a mix of compressed and uncompressed 238 * clusters) or be for just part of a cluster. STEP_DECOMPRESS just indicates 239 * that the bio includes at least one compressed page. The actual decompression 240 * is done on a per-cluster basis, not a per-bio basis. 241 */ f2fs_handle_step_decompress(struct bio_post_read_ctx * ctx,bool in_task)242 static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx, 243 bool in_task) 244 { 245 struct bio_vec *bv; 246 struct bvec_iter_all iter_all; 247 bool all_compressed = true; 248 block_t blkaddr = ctx->fs_blkaddr; 249 250 bio_for_each_segment_all(bv, ctx->bio, iter_all) { 251 struct page *page = bv->bv_page; 252 253 if (f2fs_is_compressed_page(page)) 254 f2fs_end_read_compressed_page(page, false, blkaddr, 255 in_task); 256 else 257 all_compressed = false; 258 259 blkaddr++; 260 } 261 262 ctx->decompression_attempted = true; 263 264 /* 265 * Optimization: if all the bio's pages are compressed, then scheduling 266 * the per-bio verity work is unnecessary, as verity will be fully 267 * handled at the compression cluster level. 268 */ 269 if (all_compressed) 270 ctx->enabled_steps &= ~STEP_VERITY; 271 } 272 f2fs_post_read_work(struct work_struct * work)273 static void f2fs_post_read_work(struct work_struct *work) 274 { 275 struct bio_post_read_ctx *ctx = 276 container_of(work, struct bio_post_read_ctx, work); 277 struct bio *bio = ctx->bio; 278 279 if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) { 280 f2fs_finish_read_bio(bio, true); 281 return; 282 } 283 284 if (ctx->enabled_steps & STEP_DECOMPRESS) 285 f2fs_handle_step_decompress(ctx, true); 286 287 f2fs_verify_and_finish_bio(bio, true); 288 } 289 f2fs_read_end_io(struct bio * bio)290 static void f2fs_read_end_io(struct bio *bio) 291 { 292 struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio)); 293 struct bio_post_read_ctx *ctx; 294 bool intask = in_task(); 295 296 iostat_update_and_unbind_ctx(bio); 297 ctx = bio->bi_private; 298 299 if (time_to_inject(sbi, FAULT_READ_IO)) 300 bio->bi_status = BLK_STS_IOERR; 301 302 if (bio->bi_status) { 303 f2fs_finish_read_bio(bio, intask); 304 return; 305 } 306 307 if (ctx) { 308 unsigned int enabled_steps = ctx->enabled_steps & 309 (STEP_DECRYPT | STEP_DECOMPRESS); 310 311 /* 312 * If we have only decompression step between decompression and 313 * decrypt, we don't need post processing for this. 314 */ 315 if (enabled_steps == STEP_DECOMPRESS && 316 !f2fs_low_mem_mode(sbi)) { 317 f2fs_handle_step_decompress(ctx, intask); 318 } else if (enabled_steps) { 319 INIT_WORK(&ctx->work, f2fs_post_read_work); 320 queue_work(ctx->sbi->post_read_wq, &ctx->work); 321 return; 322 } 323 } 324 325 f2fs_verify_and_finish_bio(bio, intask); 326 } 327 f2fs_write_end_io(struct bio * bio)328 static void f2fs_write_end_io(struct bio *bio) 329 { 330 struct f2fs_sb_info *sbi; 331 struct bio_vec *bvec; 332 struct bvec_iter_all iter_all; 333 334 iostat_update_and_unbind_ctx(bio); 335 sbi = bio->bi_private; 336 337 if (time_to_inject(sbi, FAULT_WRITE_IO)) 338 bio->bi_status = BLK_STS_IOERR; 339 340 bio_for_each_segment_all(bvec, bio, iter_all) { 341 struct page *page = bvec->bv_page; 342 enum count_type type = WB_DATA_TYPE(page); 343 344 if (page_private_dummy(page)) { 345 clear_page_private_dummy(page); 346 unlock_page(page); 347 mempool_free(page, sbi->write_io_dummy); 348 349 if (unlikely(bio->bi_status)) 350 f2fs_stop_checkpoint(sbi, true, 351 STOP_CP_REASON_WRITE_FAIL); 352 continue; 353 } 354 355 fscrypt_finalize_bounce_page(&page); 356 357 #ifdef CONFIG_F2FS_FS_COMPRESSION 358 if (f2fs_is_compressed_page(page)) { 359 f2fs_compress_write_end_io(bio, page); 360 continue; 361 } 362 #endif 363 364 if (unlikely(bio->bi_status)) { 365 mapping_set_error(page->mapping, -EIO); 366 if (type == F2FS_WB_CP_DATA) 367 f2fs_stop_checkpoint(sbi, true, 368 STOP_CP_REASON_WRITE_FAIL); 369 } 370 371 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) && 372 page->index != nid_of_node(page)); 373 374 dec_page_count(sbi, type); 375 if (f2fs_in_warm_node_list(sbi, page)) 376 f2fs_del_fsync_node_entry(sbi, page); 377 clear_page_private_gcing(page); 378 end_page_writeback(page); 379 } 380 if (!get_pages(sbi, F2FS_WB_CP_DATA) && 381 wq_has_sleeper(&sbi->cp_wait)) 382 wake_up(&sbi->cp_wait); 383 384 bio_put(bio); 385 } 386 f2fs_target_device(struct f2fs_sb_info * sbi,block_t blk_addr,sector_t * sector)387 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi, 388 block_t blk_addr, sector_t *sector) 389 { 390 struct block_device *bdev = sbi->sb->s_bdev; 391 int i; 392 393 if (f2fs_is_multi_device(sbi)) { 394 for (i = 0; i < sbi->s_ndevs; i++) { 395 if (FDEV(i).start_blk <= blk_addr && 396 FDEV(i).end_blk >= blk_addr) { 397 blk_addr -= FDEV(i).start_blk; 398 bdev = FDEV(i).bdev; 399 break; 400 } 401 } 402 } 403 404 if (sector) 405 *sector = SECTOR_FROM_BLOCK(blk_addr); 406 return bdev; 407 } 408 f2fs_target_device_index(struct f2fs_sb_info * sbi,block_t blkaddr)409 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr) 410 { 411 int i; 412 413 if (!f2fs_is_multi_device(sbi)) 414 return 0; 415 416 for (i = 0; i < sbi->s_ndevs; i++) 417 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr) 418 return i; 419 return 0; 420 } 421 f2fs_io_flags(struct f2fs_io_info * fio)422 static unsigned int f2fs_io_flags(struct f2fs_io_info *fio) 423 { 424 unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0); 425 unsigned int fua_flag, meta_flag, io_flag; 426 unsigned int op_flags = 0; 427 428 if (fio->op != REQ_OP_WRITE) 429 return 0; 430 if (fio->type == DATA) 431 io_flag = fio->sbi->data_io_flag; 432 else if (fio->type == NODE) 433 io_flag = fio->sbi->node_io_flag; 434 else 435 return 0; 436 437 fua_flag = io_flag & temp_mask; 438 meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask; 439 440 /* 441 * data/node io flag bits per temp: 442 * REQ_META | REQ_FUA | 443 * 5 | 4 | 3 | 2 | 1 | 0 | 444 * Cold | Warm | Hot | Cold | Warm | Hot | 445 */ 446 if (BIT(fio->temp) & meta_flag) 447 op_flags |= REQ_META; 448 if (BIT(fio->temp) & fua_flag) 449 op_flags |= REQ_FUA; 450 return op_flags; 451 } 452 __bio_alloc(struct f2fs_io_info * fio,int npages)453 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages) 454 { 455 struct f2fs_sb_info *sbi = fio->sbi; 456 struct block_device *bdev; 457 sector_t sector; 458 struct bio *bio; 459 460 bdev = f2fs_target_device(sbi, fio->new_blkaddr, §or); 461 bio = bio_alloc_bioset(GFP_NOIO, npages, &f2fs_bioset); 462 bio_set_dev(bio, bdev); 463 bio_set_op_attrs(bio, fio->op, fio->op_flags | f2fs_io_flags(fio)); 464 bio->bi_iter.bi_sector = sector; 465 if (is_read_io(fio->op)) { 466 bio->bi_end_io = f2fs_read_end_io; 467 bio->bi_private = NULL; 468 } else { 469 bio->bi_end_io = f2fs_write_end_io; 470 bio->bi_private = sbi; 471 } 472 iostat_alloc_and_bind_ctx(sbi, bio, NULL); 473 474 if (fio->io_wbc) 475 wbc_init_bio(fio->io_wbc, bio); 476 477 return bio; 478 } 479 f2fs_set_bio_crypt_ctx(struct bio * bio,const struct inode * inode,pgoff_t first_idx,const struct f2fs_io_info * fio,gfp_t gfp_mask)480 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode, 481 pgoff_t first_idx, 482 const struct f2fs_io_info *fio, 483 gfp_t gfp_mask) 484 { 485 /* 486 * The f2fs garbage collector sets ->encrypted_page when it wants to 487 * read/write raw data without encryption. 488 */ 489 if (!fio || !fio->encrypted_page) 490 fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask); 491 else if (fscrypt_inode_should_skip_dm_default_key(inode)) 492 bio_set_skip_dm_default_key(bio); 493 } 494 f2fs_crypt_mergeable_bio(struct bio * bio,const struct inode * inode,pgoff_t next_idx,const struct f2fs_io_info * fio)495 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode, 496 pgoff_t next_idx, 497 const struct f2fs_io_info *fio) 498 { 499 /* 500 * The f2fs garbage collector sets ->encrypted_page when it wants to 501 * read/write raw data without encryption. 502 */ 503 if (fio && fio->encrypted_page) 504 return !bio_has_crypt_ctx(bio) && 505 (bio_should_skip_dm_default_key(bio) == 506 fscrypt_inode_should_skip_dm_default_key(inode)); 507 508 return fscrypt_mergeable_bio(bio, inode, next_idx); 509 } 510 f2fs_submit_read_bio(struct f2fs_sb_info * sbi,struct bio * bio,enum page_type type)511 void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio, 512 enum page_type type) 513 { 514 WARN_ON_ONCE(!is_read_io(bio_op(bio))); 515 trace_f2fs_submit_read_bio(sbi->sb, type, bio); 516 517 iostat_update_submit_ctx(bio, type); 518 submit_bio(bio); 519 } 520 f2fs_align_write_bio(struct f2fs_sb_info * sbi,struct bio * bio)521 static void f2fs_align_write_bio(struct f2fs_sb_info *sbi, struct bio *bio) 522 { 523 unsigned int start = 524 (bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS) % F2FS_IO_SIZE(sbi); 525 526 if (start == 0) 527 return; 528 529 /* fill dummy pages */ 530 for (; start < F2FS_IO_SIZE(sbi); start++) { 531 struct page *page = 532 mempool_alloc(sbi->write_io_dummy, 533 GFP_NOIO | __GFP_NOFAIL); 534 f2fs_bug_on(sbi, !page); 535 536 lock_page(page); 537 538 zero_user_segment(page, 0, PAGE_SIZE); 539 set_page_private_dummy(page); 540 541 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) 542 f2fs_bug_on(sbi, 1); 543 } 544 } 545 f2fs_submit_write_bio(struct f2fs_sb_info * sbi,struct bio * bio,enum page_type type)546 static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio, 547 enum page_type type) 548 { 549 WARN_ON_ONCE(is_read_io(bio_op(bio))); 550 551 if (type == DATA || type == NODE) { 552 if (f2fs_lfs_mode(sbi) && current->plug) 553 blk_finish_plug(current->plug); 554 555 if (F2FS_IO_ALIGNED(sbi)) { 556 f2fs_align_write_bio(sbi, bio); 557 /* 558 * In the NODE case, we lose next block address chain. 559 * So, we need to do checkpoint in f2fs_sync_file. 560 */ 561 if (type == NODE) 562 set_sbi_flag(sbi, SBI_NEED_CP); 563 } 564 } 565 566 trace_f2fs_submit_write_bio(sbi->sb, type, bio); 567 iostat_update_submit_ctx(bio, type); 568 submit_bio(bio); 569 } 570 __submit_merged_bio(struct f2fs_bio_info * io)571 static void __submit_merged_bio(struct f2fs_bio_info *io) 572 { 573 struct f2fs_io_info *fio = &io->fio; 574 575 if (!io->bio) 576 return; 577 578 if (is_read_io(fio->op)) { 579 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio); 580 f2fs_submit_read_bio(io->sbi, io->bio, fio->type); 581 } else { 582 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio); 583 f2fs_submit_write_bio(io->sbi, io->bio, fio->type); 584 } 585 io->bio = NULL; 586 } 587 __has_merged_page(struct bio * bio,struct inode * inode,struct page * page,nid_t ino)588 static bool __has_merged_page(struct bio *bio, struct inode *inode, 589 struct page *page, nid_t ino) 590 { 591 struct bio_vec *bvec; 592 struct bvec_iter_all iter_all; 593 594 if (!bio) 595 return false; 596 597 if (!inode && !page && !ino) 598 return true; 599 600 bio_for_each_segment_all(bvec, bio, iter_all) { 601 struct page *target = bvec->bv_page; 602 603 if (fscrypt_is_bounce_page(target)) { 604 target = fscrypt_pagecache_page(target); 605 if (IS_ERR(target)) 606 continue; 607 } 608 if (f2fs_is_compressed_page(target)) { 609 target = f2fs_compress_control_page(target); 610 if (IS_ERR(target)) 611 continue; 612 } 613 614 if (inode && inode == target->mapping->host) 615 return true; 616 if (page && page == target) 617 return true; 618 if (ino && ino == ino_of_node(target)) 619 return true; 620 } 621 622 return false; 623 } 624 f2fs_init_write_merge_io(struct f2fs_sb_info * sbi)625 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi) 626 { 627 int i; 628 629 for (i = 0; i < NR_PAGE_TYPE; i++) { 630 int n = (i == META) ? 1 : NR_TEMP_TYPE; 631 int j; 632 633 sbi->write_io[i] = f2fs_kmalloc(sbi, 634 array_size(n, sizeof(struct f2fs_bio_info)), 635 GFP_KERNEL); 636 if (!sbi->write_io[i]) 637 return -ENOMEM; 638 639 for (j = HOT; j < n; j++) { 640 init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem); 641 sbi->write_io[i][j].sbi = sbi; 642 sbi->write_io[i][j].bio = NULL; 643 spin_lock_init(&sbi->write_io[i][j].io_lock); 644 INIT_LIST_HEAD(&sbi->write_io[i][j].io_list); 645 INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list); 646 init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock); 647 } 648 } 649 650 return 0; 651 } 652 __f2fs_submit_merged_write(struct f2fs_sb_info * sbi,enum page_type type,enum temp_type temp)653 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi, 654 enum page_type type, enum temp_type temp) 655 { 656 enum page_type btype = PAGE_TYPE_OF_BIO(type); 657 struct f2fs_bio_info *io = sbi->write_io[btype] + temp; 658 659 f2fs_down_write(&io->io_rwsem); 660 661 if (!io->bio) 662 goto unlock_out; 663 664 /* change META to META_FLUSH in the checkpoint procedure */ 665 if (type >= META_FLUSH) { 666 io->fio.type = META_FLUSH; 667 io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC; 668 if (!test_opt(sbi, NOBARRIER)) 669 io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA; 670 } 671 __submit_merged_bio(io); 672 unlock_out: 673 f2fs_up_write(&io->io_rwsem); 674 } 675 __submit_merged_write_cond(struct f2fs_sb_info * sbi,struct inode * inode,struct page * page,nid_t ino,enum page_type type,bool force)676 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi, 677 struct inode *inode, struct page *page, 678 nid_t ino, enum page_type type, bool force) 679 { 680 enum temp_type temp; 681 bool ret = true; 682 683 for (temp = HOT; temp < NR_TEMP_TYPE; temp++) { 684 if (!force) { 685 enum page_type btype = PAGE_TYPE_OF_BIO(type); 686 struct f2fs_bio_info *io = sbi->write_io[btype] + temp; 687 688 f2fs_down_read(&io->io_rwsem); 689 ret = __has_merged_page(io->bio, inode, page, ino); 690 f2fs_up_read(&io->io_rwsem); 691 } 692 if (ret) 693 __f2fs_submit_merged_write(sbi, type, temp); 694 695 /* TODO: use HOT temp only for meta pages now. */ 696 if (type >= META) 697 break; 698 } 699 } 700 f2fs_submit_merged_write(struct f2fs_sb_info * sbi,enum page_type type)701 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type) 702 { 703 __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true); 704 } 705 f2fs_submit_merged_write_cond(struct f2fs_sb_info * sbi,struct inode * inode,struct page * page,nid_t ino,enum page_type type)706 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi, 707 struct inode *inode, struct page *page, 708 nid_t ino, enum page_type type) 709 { 710 __submit_merged_write_cond(sbi, inode, page, ino, type, false); 711 } 712 f2fs_flush_merged_writes(struct f2fs_sb_info * sbi)713 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi) 714 { 715 f2fs_submit_merged_write(sbi, DATA); 716 f2fs_submit_merged_write(sbi, NODE); 717 f2fs_submit_merged_write(sbi, META); 718 } 719 720 /* 721 * Fill the locked page with data located in the block address. 722 * A caller needs to unlock the page on failure. 723 */ f2fs_submit_page_bio(struct f2fs_io_info * fio)724 int f2fs_submit_page_bio(struct f2fs_io_info *fio) 725 { 726 struct bio *bio; 727 struct page *page = fio->encrypted_page ? 728 fio->encrypted_page : fio->page; 729 730 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, 731 fio->is_por ? META_POR : (__is_meta_io(fio) ? 732 META_GENERIC : DATA_GENERIC_ENHANCE))) { 733 f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); 734 return -EFSCORRUPTED; 735 } 736 737 trace_f2fs_submit_page_bio(page, fio); 738 739 /* Allocate a new bio */ 740 bio = __bio_alloc(fio, 1); 741 742 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, 743 fio->page->index, fio, GFP_NOIO); 744 745 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { 746 bio_put(bio); 747 return -EFAULT; 748 } 749 750 if (fio->io_wbc && !is_read_io(fio->op)) 751 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); 752 753 inc_page_count(fio->sbi, is_read_io(fio->op) ? 754 __read_io_type(page) : WB_DATA_TYPE(fio->page)); 755 756 if (is_read_io(bio_op(bio))) 757 f2fs_submit_read_bio(fio->sbi, bio, fio->type); 758 else 759 f2fs_submit_write_bio(fio->sbi, bio, fio->type); 760 return 0; 761 } 762 page_is_mergeable(struct f2fs_sb_info * sbi,struct bio * bio,block_t last_blkaddr,block_t cur_blkaddr)763 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, 764 block_t last_blkaddr, block_t cur_blkaddr) 765 { 766 if (unlikely(sbi->max_io_bytes && 767 bio->bi_iter.bi_size >= sbi->max_io_bytes)) 768 return false; 769 if (last_blkaddr + 1 != cur_blkaddr) 770 return false; 771 return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL); 772 } 773 io_type_is_mergeable(struct f2fs_bio_info * io,struct f2fs_io_info * fio)774 static bool io_type_is_mergeable(struct f2fs_bio_info *io, 775 struct f2fs_io_info *fio) 776 { 777 if (io->fio.op != fio->op) 778 return false; 779 return io->fio.op_flags == fio->op_flags; 780 } 781 io_is_mergeable(struct f2fs_sb_info * sbi,struct bio * bio,struct f2fs_bio_info * io,struct f2fs_io_info * fio,block_t last_blkaddr,block_t cur_blkaddr)782 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio, 783 struct f2fs_bio_info *io, 784 struct f2fs_io_info *fio, 785 block_t last_blkaddr, 786 block_t cur_blkaddr) 787 { 788 if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) { 789 unsigned int filled_blocks = 790 F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size); 791 unsigned int io_size = F2FS_IO_SIZE(sbi); 792 unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt; 793 794 /* IOs in bio is aligned and left space of vectors is not enough */ 795 if (!(filled_blocks % io_size) && left_vecs < io_size) 796 return false; 797 } 798 if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr)) 799 return false; 800 return io_type_is_mergeable(io, fio); 801 } 802 add_bio_entry(struct f2fs_sb_info * sbi,struct bio * bio,struct page * page,enum temp_type temp)803 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio, 804 struct page *page, enum temp_type temp) 805 { 806 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; 807 struct bio_entry *be; 808 809 be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL); 810 be->bio = bio; 811 bio_get(bio); 812 813 if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE) 814 f2fs_bug_on(sbi, 1); 815 816 f2fs_down_write(&io->bio_list_lock); 817 list_add_tail(&be->list, &io->bio_list); 818 f2fs_up_write(&io->bio_list_lock); 819 } 820 del_bio_entry(struct bio_entry * be)821 static void del_bio_entry(struct bio_entry *be) 822 { 823 list_del(&be->list); 824 kmem_cache_free(bio_entry_slab, be); 825 } 826 add_ipu_page(struct f2fs_io_info * fio,struct bio ** bio,struct page * page)827 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio, 828 struct page *page) 829 { 830 struct f2fs_sb_info *sbi = fio->sbi; 831 enum temp_type temp; 832 bool found = false; 833 int ret = -EAGAIN; 834 835 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { 836 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; 837 struct list_head *head = &io->bio_list; 838 struct bio_entry *be; 839 840 f2fs_down_write(&io->bio_list_lock); 841 list_for_each_entry(be, head, list) { 842 if (be->bio != *bio) 843 continue; 844 845 found = true; 846 847 f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio, 848 *fio->last_block, 849 fio->new_blkaddr)); 850 if (f2fs_crypt_mergeable_bio(*bio, 851 fio->page->mapping->host, 852 fio->page->index, fio) && 853 bio_add_page(*bio, page, PAGE_SIZE, 0) == 854 PAGE_SIZE) { 855 ret = 0; 856 break; 857 } 858 859 /* page can't be merged into bio; submit the bio */ 860 del_bio_entry(be); 861 f2fs_submit_write_bio(sbi, *bio, DATA); 862 break; 863 } 864 f2fs_up_write(&io->bio_list_lock); 865 } 866 867 if (ret) { 868 bio_put(*bio); 869 *bio = NULL; 870 } 871 872 return ret; 873 } 874 f2fs_submit_merged_ipu_write(struct f2fs_sb_info * sbi,struct bio ** bio,struct page * page)875 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi, 876 struct bio **bio, struct page *page) 877 { 878 enum temp_type temp; 879 bool found = false; 880 struct bio *target = bio ? *bio : NULL; 881 882 f2fs_bug_on(sbi, !target && !page); 883 884 for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) { 885 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp; 886 struct list_head *head = &io->bio_list; 887 struct bio_entry *be; 888 889 if (list_empty(head)) 890 continue; 891 892 f2fs_down_read(&io->bio_list_lock); 893 list_for_each_entry(be, head, list) { 894 if (target) 895 found = (target == be->bio); 896 else 897 found = __has_merged_page(be->bio, NULL, 898 page, 0); 899 if (found) 900 break; 901 } 902 f2fs_up_read(&io->bio_list_lock); 903 904 if (!found) 905 continue; 906 907 found = false; 908 909 f2fs_down_write(&io->bio_list_lock); 910 list_for_each_entry(be, head, list) { 911 if (target) 912 found = (target == be->bio); 913 else 914 found = __has_merged_page(be->bio, NULL, 915 page, 0); 916 if (found) { 917 target = be->bio; 918 del_bio_entry(be); 919 break; 920 } 921 } 922 f2fs_up_write(&io->bio_list_lock); 923 } 924 925 if (found) 926 f2fs_submit_write_bio(sbi, target, DATA); 927 if (bio && *bio) { 928 bio_put(*bio); 929 *bio = NULL; 930 } 931 } 932 f2fs_merge_page_bio(struct f2fs_io_info * fio)933 int f2fs_merge_page_bio(struct f2fs_io_info *fio) 934 { 935 struct bio *bio = *fio->bio; 936 struct page *page = fio->encrypted_page ? 937 fio->encrypted_page : fio->page; 938 939 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr, 940 __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC)) { 941 f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); 942 return -EFSCORRUPTED; 943 } 944 945 trace_f2fs_submit_page_bio(page, fio); 946 947 if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block, 948 fio->new_blkaddr)) 949 f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL); 950 alloc_new: 951 if (!bio) { 952 bio = __bio_alloc(fio, BIO_MAX_VECS); 953 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host, 954 fio->page->index, fio, GFP_NOIO); 955 956 add_bio_entry(fio->sbi, bio, page, fio->temp); 957 } else { 958 if (add_ipu_page(fio, &bio, page)) 959 goto alloc_new; 960 } 961 962 if (fio->io_wbc) 963 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); 964 965 inc_page_count(fio->sbi, WB_DATA_TYPE(page)); 966 967 *fio->last_block = fio->new_blkaddr; 968 *fio->bio = bio; 969 970 return 0; 971 } 972 f2fs_submit_page_write(struct f2fs_io_info * fio)973 void f2fs_submit_page_write(struct f2fs_io_info *fio) 974 { 975 struct f2fs_sb_info *sbi = fio->sbi; 976 enum page_type btype = PAGE_TYPE_OF_BIO(fio->type); 977 struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp; 978 struct page *bio_page; 979 980 f2fs_bug_on(sbi, is_read_io(fio->op)); 981 982 f2fs_down_write(&io->io_rwsem); 983 next: 984 if (fio->in_list) { 985 spin_lock(&io->io_lock); 986 if (list_empty(&io->io_list)) { 987 spin_unlock(&io->io_lock); 988 goto out; 989 } 990 fio = list_first_entry(&io->io_list, 991 struct f2fs_io_info, list); 992 list_del(&fio->list); 993 spin_unlock(&io->io_lock); 994 } 995 996 verify_fio_blkaddr(fio); 997 998 if (fio->encrypted_page) 999 bio_page = fio->encrypted_page; 1000 else if (fio->compressed_page) 1001 bio_page = fio->compressed_page; 1002 else 1003 bio_page = fio->page; 1004 1005 /* set submitted = true as a return value */ 1006 fio->submitted = 1; 1007 1008 inc_page_count(sbi, WB_DATA_TYPE(bio_page)); 1009 1010 if (io->bio && 1011 (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio, 1012 fio->new_blkaddr) || 1013 !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host, 1014 bio_page->index, fio))) 1015 __submit_merged_bio(io); 1016 alloc_new: 1017 if (io->bio == NULL) { 1018 if (F2FS_IO_ALIGNED(sbi) && 1019 (fio->type == DATA || fio->type == NODE) && 1020 fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) { 1021 dec_page_count(sbi, WB_DATA_TYPE(bio_page)); 1022 fio->retry = 1; 1023 goto skip; 1024 } 1025 io->bio = __bio_alloc(fio, BIO_MAX_VECS); 1026 f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host, 1027 bio_page->index, fio, GFP_NOIO); 1028 io->fio = *fio; 1029 } 1030 1031 if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) { 1032 __submit_merged_bio(io); 1033 goto alloc_new; 1034 } 1035 1036 if (fio->io_wbc) 1037 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE); 1038 1039 io->last_block_in_bio = fio->new_blkaddr; 1040 1041 trace_f2fs_submit_page_write(fio->page, fio); 1042 skip: 1043 if (fio->in_list) 1044 goto next; 1045 out: 1046 if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || 1047 !f2fs_is_checkpoint_ready(sbi)) 1048 __submit_merged_bio(io); 1049 f2fs_up_write(&io->io_rwsem); 1050 } 1051 f2fs_grab_read_bio(struct inode * inode,block_t blkaddr,unsigned nr_pages,unsigned op_flag,pgoff_t first_idx,bool for_write)1052 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr, 1053 unsigned nr_pages, unsigned op_flag, 1054 pgoff_t first_idx, bool for_write) 1055 { 1056 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1057 struct bio *bio; 1058 struct bio_post_read_ctx *ctx = NULL; 1059 unsigned int post_read_steps = 0; 1060 sector_t sector; 1061 struct block_device *bdev = f2fs_target_device(sbi, blkaddr, §or); 1062 1063 bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL, 1064 bio_max_segs(nr_pages), &f2fs_bioset); 1065 bio_set_dev(bio, bdev); 1066 bio_set_op_attrs(bio, REQ_OP_READ, op_flag); 1067 if (!bio) 1068 return ERR_PTR(-ENOMEM); 1069 bio->bi_iter.bi_sector = sector; 1070 f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS); 1071 bio->bi_end_io = f2fs_read_end_io; 1072 1073 if (fscrypt_inode_uses_fs_layer_crypto(inode)) 1074 post_read_steps |= STEP_DECRYPT; 1075 1076 if (f2fs_need_verity(inode, first_idx)) 1077 post_read_steps |= STEP_VERITY; 1078 1079 /* 1080 * STEP_DECOMPRESS is handled specially, since a compressed file might 1081 * contain both compressed and uncompressed clusters. We'll allocate a 1082 * bio_post_read_ctx if the file is compressed, but the caller is 1083 * responsible for enabling STEP_DECOMPRESS if it's actually needed. 1084 */ 1085 1086 if (post_read_steps || f2fs_compressed_file(inode)) { 1087 /* Due to the mempool, this never fails. */ 1088 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS); 1089 ctx->bio = bio; 1090 ctx->sbi = sbi; 1091 ctx->enabled_steps = post_read_steps; 1092 ctx->fs_blkaddr = blkaddr; 1093 ctx->decompression_attempted = false; 1094 bio->bi_private = ctx; 1095 } 1096 iostat_alloc_and_bind_ctx(sbi, bio, ctx); 1097 1098 return bio; 1099 } 1100 1101 /* This can handle encryption stuffs */ f2fs_submit_page_read(struct inode * inode,struct page * page,block_t blkaddr,int op_flags,bool for_write)1102 static int f2fs_submit_page_read(struct inode *inode, struct page *page, 1103 block_t blkaddr, int op_flags, bool for_write) 1104 { 1105 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1106 struct bio *bio; 1107 1108 bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags, 1109 page->index, for_write); 1110 if (IS_ERR(bio)) 1111 return PTR_ERR(bio); 1112 1113 /* wait for GCed page writeback via META_MAPPING */ 1114 f2fs_wait_on_block_writeback(inode, blkaddr); 1115 1116 if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) { 1117 bio_put(bio); 1118 return -EFAULT; 1119 } 1120 inc_page_count(sbi, F2FS_RD_DATA); 1121 f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE); 1122 f2fs_submit_read_bio(sbi, bio, DATA); 1123 return 0; 1124 } 1125 __set_data_blkaddr(struct dnode_of_data * dn)1126 static void __set_data_blkaddr(struct dnode_of_data *dn) 1127 { 1128 struct f2fs_node *rn = F2FS_NODE(dn->node_page); 1129 __le32 *addr_array; 1130 int base = 0; 1131 1132 if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode)) 1133 base = get_extra_isize(dn->inode); 1134 1135 /* Get physical address of data block */ 1136 addr_array = blkaddr_in_node(rn); 1137 addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr); 1138 } 1139 1140 /* 1141 * Lock ordering for the change of data block address: 1142 * ->data_page 1143 * ->node_page 1144 * update block addresses in the node page 1145 */ f2fs_set_data_blkaddr(struct dnode_of_data * dn)1146 void f2fs_set_data_blkaddr(struct dnode_of_data *dn) 1147 { 1148 f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); 1149 __set_data_blkaddr(dn); 1150 if (set_page_dirty(dn->node_page)) 1151 dn->node_changed = true; 1152 } 1153 f2fs_update_data_blkaddr(struct dnode_of_data * dn,block_t blkaddr)1154 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr) 1155 { 1156 dn->data_blkaddr = blkaddr; 1157 f2fs_set_data_blkaddr(dn); 1158 f2fs_update_read_extent_cache(dn); 1159 } 1160 1161 /* dn->ofs_in_node will be returned with up-to-date last block pointer */ f2fs_reserve_new_blocks(struct dnode_of_data * dn,blkcnt_t count)1162 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count) 1163 { 1164 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1165 int err; 1166 1167 if (!count) 1168 return 0; 1169 1170 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) 1171 return -EPERM; 1172 if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count)))) 1173 return err; 1174 1175 trace_f2fs_reserve_new_blocks(dn->inode, dn->nid, 1176 dn->ofs_in_node, count); 1177 1178 f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true); 1179 1180 for (; count > 0; dn->ofs_in_node++) { 1181 block_t blkaddr = f2fs_data_blkaddr(dn); 1182 1183 if (blkaddr == NULL_ADDR) { 1184 dn->data_blkaddr = NEW_ADDR; 1185 __set_data_blkaddr(dn); 1186 count--; 1187 } 1188 } 1189 1190 if (set_page_dirty(dn->node_page)) 1191 dn->node_changed = true; 1192 return 0; 1193 } 1194 1195 /* Should keep dn->ofs_in_node unchanged */ f2fs_reserve_new_block(struct dnode_of_data * dn)1196 int f2fs_reserve_new_block(struct dnode_of_data *dn) 1197 { 1198 unsigned int ofs_in_node = dn->ofs_in_node; 1199 int ret; 1200 1201 ret = f2fs_reserve_new_blocks(dn, 1); 1202 dn->ofs_in_node = ofs_in_node; 1203 return ret; 1204 } 1205 f2fs_reserve_block(struct dnode_of_data * dn,pgoff_t index)1206 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index) 1207 { 1208 bool need_put = dn->inode_page ? false : true; 1209 int err; 1210 1211 err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE); 1212 if (err) 1213 return err; 1214 1215 if (dn->data_blkaddr == NULL_ADDR) 1216 err = f2fs_reserve_new_block(dn); 1217 if (err || need_put) 1218 f2fs_put_dnode(dn); 1219 return err; 1220 } 1221 f2fs_get_read_data_page(struct inode * inode,pgoff_t index,int op_flags,bool for_write,pgoff_t * next_pgofs)1222 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, 1223 int op_flags, bool for_write, 1224 pgoff_t *next_pgofs) 1225 { 1226 struct address_space *mapping = inode->i_mapping; 1227 struct dnode_of_data dn; 1228 struct page *page; 1229 int err; 1230 1231 page = f2fs_grab_cache_page(mapping, index, for_write); 1232 if (!page) 1233 return ERR_PTR(-ENOMEM); 1234 1235 if (f2fs_lookup_read_extent_cache_block(inode, index, 1236 &dn.data_blkaddr)) { 1237 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr, 1238 DATA_GENERIC_ENHANCE_READ)) { 1239 err = -EFSCORRUPTED; 1240 f2fs_handle_error(F2FS_I_SB(inode), 1241 ERROR_INVALID_BLKADDR); 1242 goto put_err; 1243 } 1244 goto got_it; 1245 } 1246 1247 set_new_dnode(&dn, inode, NULL, NULL, 0); 1248 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 1249 if (err) { 1250 if (err == -ENOENT && next_pgofs) 1251 *next_pgofs = f2fs_get_next_page_offset(&dn, index); 1252 goto put_err; 1253 } 1254 f2fs_put_dnode(&dn); 1255 1256 if (unlikely(dn.data_blkaddr == NULL_ADDR)) { 1257 err = -ENOENT; 1258 if (next_pgofs) 1259 *next_pgofs = index + 1; 1260 goto put_err; 1261 } 1262 if (dn.data_blkaddr != NEW_ADDR && 1263 !f2fs_is_valid_blkaddr(F2FS_I_SB(inode), 1264 dn.data_blkaddr, 1265 DATA_GENERIC_ENHANCE)) { 1266 err = -EFSCORRUPTED; 1267 f2fs_handle_error(F2FS_I_SB(inode), 1268 ERROR_INVALID_BLKADDR); 1269 goto put_err; 1270 } 1271 got_it: 1272 if (PageUptodate(page)) { 1273 unlock_page(page); 1274 return page; 1275 } 1276 1277 /* 1278 * A new dentry page is allocated but not able to be written, since its 1279 * new inode page couldn't be allocated due to -ENOSPC. 1280 * In such the case, its blkaddr can be remained as NEW_ADDR. 1281 * see, f2fs_add_link -> f2fs_get_new_data_page -> 1282 * f2fs_init_inode_metadata. 1283 */ 1284 if (dn.data_blkaddr == NEW_ADDR) { 1285 zero_user_segment(page, 0, PAGE_SIZE); 1286 if (!PageUptodate(page)) 1287 SetPageUptodate(page); 1288 unlock_page(page); 1289 return page; 1290 } 1291 1292 err = f2fs_submit_page_read(inode, page, dn.data_blkaddr, 1293 op_flags, for_write); 1294 if (err) 1295 goto put_err; 1296 return page; 1297 1298 put_err: 1299 f2fs_put_page(page, 1); 1300 return ERR_PTR(err); 1301 } 1302 f2fs_find_data_page(struct inode * inode,pgoff_t index,pgoff_t * next_pgofs)1303 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index, 1304 pgoff_t *next_pgofs) 1305 { 1306 struct address_space *mapping = inode->i_mapping; 1307 struct page *page; 1308 1309 page = find_get_page(mapping, index); 1310 if (page && PageUptodate(page)) 1311 return page; 1312 f2fs_put_page(page, 0); 1313 1314 page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs); 1315 if (IS_ERR(page)) 1316 return page; 1317 1318 if (PageUptodate(page)) 1319 return page; 1320 1321 wait_on_page_locked(page); 1322 if (unlikely(!PageUptodate(page))) { 1323 f2fs_put_page(page, 0); 1324 return ERR_PTR(-EIO); 1325 } 1326 return page; 1327 } 1328 1329 /* 1330 * If it tries to access a hole, return an error. 1331 * Because, the callers, functions in dir.c and GC, should be able to know 1332 * whether this page exists or not. 1333 */ f2fs_get_lock_data_page(struct inode * inode,pgoff_t index,bool for_write)1334 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index, 1335 bool for_write) 1336 { 1337 struct address_space *mapping = inode->i_mapping; 1338 struct page *page; 1339 repeat: 1340 page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL); 1341 if (IS_ERR(page)) 1342 return page; 1343 1344 /* wait for read completion */ 1345 lock_page(page); 1346 if (unlikely(page->mapping != mapping)) { 1347 f2fs_put_page(page, 1); 1348 goto repeat; 1349 } 1350 if (unlikely(!PageUptodate(page))) { 1351 f2fs_put_page(page, 1); 1352 return ERR_PTR(-EIO); 1353 } 1354 return page; 1355 } 1356 1357 /* 1358 * Caller ensures that this data page is never allocated. 1359 * A new zero-filled data page is allocated in the page cache. 1360 * 1361 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and 1362 * f2fs_unlock_op(). 1363 * Note that, ipage is set only by make_empty_dir, and if any error occur, 1364 * ipage should be released by this function. 1365 */ f2fs_get_new_data_page(struct inode * inode,struct page * ipage,pgoff_t index,bool new_i_size)1366 struct page *f2fs_get_new_data_page(struct inode *inode, 1367 struct page *ipage, pgoff_t index, bool new_i_size) 1368 { 1369 struct address_space *mapping = inode->i_mapping; 1370 struct page *page; 1371 struct dnode_of_data dn; 1372 int err; 1373 1374 page = f2fs_grab_cache_page(mapping, index, true); 1375 if (!page) { 1376 /* 1377 * before exiting, we should make sure ipage will be released 1378 * if any error occur. 1379 */ 1380 f2fs_put_page(ipage, 1); 1381 return ERR_PTR(-ENOMEM); 1382 } 1383 1384 set_new_dnode(&dn, inode, ipage, NULL, 0); 1385 err = f2fs_reserve_block(&dn, index); 1386 if (err) { 1387 f2fs_put_page(page, 1); 1388 return ERR_PTR(err); 1389 } 1390 if (!ipage) 1391 f2fs_put_dnode(&dn); 1392 1393 if (PageUptodate(page)) 1394 goto got_it; 1395 1396 if (dn.data_blkaddr == NEW_ADDR) { 1397 zero_user_segment(page, 0, PAGE_SIZE); 1398 if (!PageUptodate(page)) 1399 SetPageUptodate(page); 1400 } else { 1401 f2fs_put_page(page, 1); 1402 1403 /* if ipage exists, blkaddr should be NEW_ADDR */ 1404 f2fs_bug_on(F2FS_I_SB(inode), ipage); 1405 page = f2fs_get_lock_data_page(inode, index, true); 1406 if (IS_ERR(page)) 1407 return page; 1408 } 1409 got_it: 1410 if (new_i_size && i_size_read(inode) < 1411 ((loff_t)(index + 1) << PAGE_SHIFT)) 1412 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT)); 1413 return page; 1414 } 1415 __allocate_data_block(struct dnode_of_data * dn,int seg_type)1416 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type) 1417 { 1418 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1419 struct f2fs_summary sum; 1420 struct node_info ni; 1421 block_t old_blkaddr; 1422 blkcnt_t count = 1; 1423 int err; 1424 1425 if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC))) 1426 return -EPERM; 1427 1428 err = f2fs_get_node_info(sbi, dn->nid, &ni, false); 1429 if (err) 1430 return err; 1431 1432 dn->data_blkaddr = f2fs_data_blkaddr(dn); 1433 if (dn->data_blkaddr == NULL_ADDR) { 1434 err = inc_valid_block_count(sbi, dn->inode, &count); 1435 if (unlikely(err)) 1436 return err; 1437 } 1438 1439 set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version); 1440 old_blkaddr = dn->data_blkaddr; 1441 f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr, 1442 &sum, seg_type, NULL); 1443 if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) { 1444 invalidate_mapping_pages(META_MAPPING(sbi), 1445 old_blkaddr, old_blkaddr); 1446 f2fs_invalidate_compress_page(sbi, old_blkaddr); 1447 } 1448 f2fs_update_data_blkaddr(dn, dn->data_blkaddr); 1449 return 0; 1450 } 1451 f2fs_map_lock(struct f2fs_sb_info * sbi,int flag)1452 static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag) 1453 { 1454 if (flag == F2FS_GET_BLOCK_PRE_AIO) 1455 f2fs_down_read(&sbi->node_change); 1456 else 1457 f2fs_lock_op(sbi); 1458 } 1459 f2fs_map_unlock(struct f2fs_sb_info * sbi,int flag)1460 static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag) 1461 { 1462 if (flag == F2FS_GET_BLOCK_PRE_AIO) 1463 f2fs_up_read(&sbi->node_change); 1464 else 1465 f2fs_unlock_op(sbi); 1466 } 1467 f2fs_get_block_locked(struct dnode_of_data * dn,pgoff_t index)1468 int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index) 1469 { 1470 struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode); 1471 int err = 0; 1472 1473 f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO); 1474 if (!f2fs_lookup_read_extent_cache_block(dn->inode, index, 1475 &dn->data_blkaddr)) 1476 err = f2fs_reserve_block(dn, index); 1477 f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO); 1478 1479 return err; 1480 } 1481 f2fs_map_no_dnode(struct inode * inode,struct f2fs_map_blocks * map,struct dnode_of_data * dn,pgoff_t pgoff)1482 static int f2fs_map_no_dnode(struct inode *inode, 1483 struct f2fs_map_blocks *map, struct dnode_of_data *dn, 1484 pgoff_t pgoff) 1485 { 1486 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1487 1488 /* 1489 * There is one exceptional case that read_node_page() may return 1490 * -ENOENT due to filesystem has been shutdown or cp_error, return 1491 * -EIO in that case. 1492 */ 1493 if (map->m_may_create && 1494 (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi))) 1495 return -EIO; 1496 1497 if (map->m_next_pgofs) 1498 *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff); 1499 if (map->m_next_extent) 1500 *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff); 1501 return 0; 1502 } 1503 f2fs_map_blocks_cached(struct inode * inode,struct f2fs_map_blocks * map,int flag)1504 static bool f2fs_map_blocks_cached(struct inode *inode, 1505 struct f2fs_map_blocks *map, int flag) 1506 { 1507 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1508 unsigned int maxblocks = map->m_len; 1509 pgoff_t pgoff = (pgoff_t)map->m_lblk; 1510 struct extent_info ei = {}; 1511 1512 if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei)) 1513 return false; 1514 1515 map->m_pblk = ei.blk + pgoff - ei.fofs; 1516 map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff); 1517 map->m_flags = F2FS_MAP_MAPPED; 1518 if (map->m_next_extent) 1519 *map->m_next_extent = pgoff + map->m_len; 1520 1521 /* for hardware encryption, but to avoid potential issue in future */ 1522 if (flag == F2FS_GET_BLOCK_DIO) 1523 f2fs_wait_on_block_writeback_range(inode, 1524 map->m_pblk, map->m_len); 1525 1526 if (f2fs_allow_multi_device_dio(sbi, flag)) { 1527 int bidx = f2fs_target_device_index(sbi, map->m_pblk); 1528 struct f2fs_dev_info *dev = &sbi->devs[bidx]; 1529 1530 map->m_bdev = dev->bdev; 1531 map->m_pblk -= dev->start_blk; 1532 map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk); 1533 } else { 1534 map->m_bdev = inode->i_sb->s_bdev; 1535 } 1536 return true; 1537 } 1538 1539 /* 1540 * f2fs_map_blocks() tries to find or build mapping relationship which 1541 * maps continuous logical blocks to physical blocks, and return such 1542 * info via f2fs_map_blocks structure. 1543 */ f2fs_map_blocks(struct inode * inode,struct f2fs_map_blocks * map,int flag)1544 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag) 1545 { 1546 unsigned int maxblocks = map->m_len; 1547 struct dnode_of_data dn; 1548 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1549 int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE; 1550 pgoff_t pgofs, end_offset, end; 1551 int err = 0, ofs = 1; 1552 unsigned int ofs_in_node, last_ofs_in_node; 1553 blkcnt_t prealloc; 1554 block_t blkaddr; 1555 unsigned int start_pgofs; 1556 int bidx = 0; 1557 bool is_hole; 1558 1559 if (!maxblocks) 1560 return 0; 1561 1562 if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) 1563 goto out; 1564 1565 map->m_bdev = inode->i_sb->s_bdev; 1566 map->m_multidev_dio = 1567 f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag); 1568 1569 map->m_len = 0; 1570 map->m_flags = 0; 1571 1572 /* it only supports block size == page size */ 1573 pgofs = (pgoff_t)map->m_lblk; 1574 end = pgofs + maxblocks; 1575 1576 next_dnode: 1577 if (map->m_may_create) 1578 f2fs_map_lock(sbi, flag); 1579 1580 /* When reading holes, we need its node page */ 1581 set_new_dnode(&dn, inode, NULL, NULL, 0); 1582 err = f2fs_get_dnode_of_data(&dn, pgofs, mode); 1583 if (err) { 1584 if (flag == F2FS_GET_BLOCK_BMAP) 1585 map->m_pblk = 0; 1586 if (err == -ENOENT) 1587 err = f2fs_map_no_dnode(inode, map, &dn, pgofs); 1588 goto unlock_out; 1589 } 1590 1591 start_pgofs = pgofs; 1592 prealloc = 0; 1593 last_ofs_in_node = ofs_in_node = dn.ofs_in_node; 1594 end_offset = ADDRS_PER_PAGE(dn.node_page, inode); 1595 1596 next_block: 1597 blkaddr = f2fs_data_blkaddr(&dn); 1598 is_hole = !__is_valid_data_blkaddr(blkaddr); 1599 if (!is_hole && 1600 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) { 1601 err = -EFSCORRUPTED; 1602 f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); 1603 goto sync_out; 1604 } 1605 1606 /* use out-place-update for direct IO under LFS mode */ 1607 if (map->m_may_create && 1608 (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) { 1609 if (unlikely(f2fs_cp_error(sbi))) { 1610 err = -EIO; 1611 goto sync_out; 1612 } 1613 1614 switch (flag) { 1615 case F2FS_GET_BLOCK_PRE_AIO: 1616 if (blkaddr == NULL_ADDR) { 1617 prealloc++; 1618 last_ofs_in_node = dn.ofs_in_node; 1619 } 1620 break; 1621 case F2FS_GET_BLOCK_PRE_DIO: 1622 case F2FS_GET_BLOCK_DIO: 1623 err = __allocate_data_block(&dn, map->m_seg_type); 1624 if (err) 1625 goto sync_out; 1626 if (flag == F2FS_GET_BLOCK_PRE_DIO) 1627 file_need_truncate(inode); 1628 set_inode_flag(inode, FI_APPEND_WRITE); 1629 break; 1630 default: 1631 WARN_ON_ONCE(1); 1632 err = -EIO; 1633 goto sync_out; 1634 } 1635 1636 blkaddr = dn.data_blkaddr; 1637 if (is_hole) 1638 map->m_flags |= F2FS_MAP_NEW; 1639 } else if (is_hole) { 1640 if (f2fs_compressed_file(inode) && 1641 f2fs_sanity_check_cluster(&dn) && 1642 (flag != F2FS_GET_BLOCK_FIEMAP || 1643 IS_ENABLED(CONFIG_F2FS_CHECK_FS))) { 1644 err = -EFSCORRUPTED; 1645 f2fs_handle_error(sbi, 1646 ERROR_CORRUPTED_CLUSTER); 1647 goto sync_out; 1648 } 1649 1650 switch (flag) { 1651 case F2FS_GET_BLOCK_PRECACHE: 1652 goto sync_out; 1653 case F2FS_GET_BLOCK_BMAP: 1654 map->m_pblk = 0; 1655 goto sync_out; 1656 case F2FS_GET_BLOCK_FIEMAP: 1657 if (blkaddr == NULL_ADDR) { 1658 if (map->m_next_pgofs) 1659 *map->m_next_pgofs = pgofs + 1; 1660 goto sync_out; 1661 } 1662 break; 1663 default: 1664 /* for defragment case */ 1665 if (map->m_next_pgofs) 1666 *map->m_next_pgofs = pgofs + 1; 1667 goto sync_out; 1668 } 1669 } 1670 1671 if (flag == F2FS_GET_BLOCK_PRE_AIO) 1672 goto skip; 1673 1674 if (map->m_multidev_dio) 1675 bidx = f2fs_target_device_index(sbi, blkaddr); 1676 1677 if (map->m_len == 0) { 1678 /* reserved delalloc block should be mapped for fiemap. */ 1679 if (blkaddr == NEW_ADDR) 1680 map->m_flags |= F2FS_MAP_DELALLOC; 1681 map->m_flags |= F2FS_MAP_MAPPED; 1682 1683 map->m_pblk = blkaddr; 1684 map->m_len = 1; 1685 1686 if (map->m_multidev_dio) 1687 map->m_bdev = FDEV(bidx).bdev; 1688 } else if ((map->m_pblk != NEW_ADDR && 1689 blkaddr == (map->m_pblk + ofs)) || 1690 (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) || 1691 flag == F2FS_GET_BLOCK_PRE_DIO) { 1692 if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev) 1693 goto sync_out; 1694 ofs++; 1695 map->m_len++; 1696 } else { 1697 goto sync_out; 1698 } 1699 1700 skip: 1701 dn.ofs_in_node++; 1702 pgofs++; 1703 1704 /* preallocate blocks in batch for one dnode page */ 1705 if (flag == F2FS_GET_BLOCK_PRE_AIO && 1706 (pgofs == end || dn.ofs_in_node == end_offset)) { 1707 1708 dn.ofs_in_node = ofs_in_node; 1709 err = f2fs_reserve_new_blocks(&dn, prealloc); 1710 if (err) 1711 goto sync_out; 1712 1713 map->m_len += dn.ofs_in_node - ofs_in_node; 1714 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) { 1715 err = -ENOSPC; 1716 goto sync_out; 1717 } 1718 dn.ofs_in_node = end_offset; 1719 } 1720 1721 if (pgofs >= end) 1722 goto sync_out; 1723 else if (dn.ofs_in_node < end_offset) 1724 goto next_block; 1725 1726 if (flag == F2FS_GET_BLOCK_PRECACHE) { 1727 if (map->m_flags & F2FS_MAP_MAPPED) { 1728 unsigned int ofs = start_pgofs - map->m_lblk; 1729 1730 f2fs_update_read_extent_cache_range(&dn, 1731 start_pgofs, map->m_pblk + ofs, 1732 map->m_len - ofs); 1733 } 1734 } 1735 1736 f2fs_put_dnode(&dn); 1737 1738 if (map->m_may_create) { 1739 f2fs_map_unlock(sbi, flag); 1740 f2fs_balance_fs(sbi, dn.node_changed); 1741 } 1742 goto next_dnode; 1743 1744 sync_out: 1745 1746 if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) { 1747 /* 1748 * for hardware encryption, but to avoid potential issue 1749 * in future 1750 */ 1751 f2fs_wait_on_block_writeback_range(inode, 1752 map->m_pblk, map->m_len); 1753 1754 if (map->m_multidev_dio) { 1755 block_t blk_addr = map->m_pblk; 1756 1757 bidx = f2fs_target_device_index(sbi, map->m_pblk); 1758 1759 map->m_bdev = FDEV(bidx).bdev; 1760 map->m_pblk -= FDEV(bidx).start_blk; 1761 1762 if (map->m_may_create) 1763 f2fs_update_device_state(sbi, inode->i_ino, 1764 blk_addr, map->m_len); 1765 1766 f2fs_bug_on(sbi, blk_addr + map->m_len > 1767 FDEV(bidx).end_blk + 1); 1768 } 1769 } 1770 1771 if (flag == F2FS_GET_BLOCK_PRECACHE) { 1772 if (map->m_flags & F2FS_MAP_MAPPED) { 1773 unsigned int ofs = start_pgofs - map->m_lblk; 1774 1775 f2fs_update_read_extent_cache_range(&dn, 1776 start_pgofs, map->m_pblk + ofs, 1777 map->m_len - ofs); 1778 } 1779 if (map->m_next_extent) 1780 *map->m_next_extent = pgofs + 1; 1781 } 1782 f2fs_put_dnode(&dn); 1783 unlock_out: 1784 if (map->m_may_create) { 1785 f2fs_map_unlock(sbi, flag); 1786 f2fs_balance_fs(sbi, dn.node_changed); 1787 } 1788 out: 1789 trace_f2fs_map_blocks(inode, map, flag, err); 1790 return err; 1791 } 1792 f2fs_overwrite_io(struct inode * inode,loff_t pos,size_t len)1793 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len) 1794 { 1795 struct f2fs_map_blocks map; 1796 block_t last_lblk; 1797 int err; 1798 1799 if (pos + len > i_size_read(inode)) 1800 return false; 1801 1802 map.m_lblk = F2FS_BYTES_TO_BLK(pos); 1803 map.m_next_pgofs = NULL; 1804 map.m_next_extent = NULL; 1805 map.m_seg_type = NO_CHECK_TYPE; 1806 map.m_may_create = false; 1807 last_lblk = F2FS_BLK_ALIGN(pos + len); 1808 1809 while (map.m_lblk < last_lblk) { 1810 map.m_len = last_lblk - map.m_lblk; 1811 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT); 1812 if (err || map.m_len == 0) 1813 return false; 1814 map.m_lblk += map.m_len; 1815 } 1816 return true; 1817 } 1818 bytes_to_blks(struct inode * inode,u64 bytes)1819 static inline u64 bytes_to_blks(struct inode *inode, u64 bytes) 1820 { 1821 return (bytes >> inode->i_blkbits); 1822 } 1823 blks_to_bytes(struct inode * inode,u64 blks)1824 static inline u64 blks_to_bytes(struct inode *inode, u64 blks) 1825 { 1826 return (blks << inode->i_blkbits); 1827 } 1828 f2fs_xattr_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo)1829 static int f2fs_xattr_fiemap(struct inode *inode, 1830 struct fiemap_extent_info *fieinfo) 1831 { 1832 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 1833 struct page *page; 1834 struct node_info ni; 1835 __u64 phys = 0, len; 1836 __u32 flags; 1837 nid_t xnid = F2FS_I(inode)->i_xattr_nid; 1838 int err = 0; 1839 1840 if (f2fs_has_inline_xattr(inode)) { 1841 int offset; 1842 1843 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), 1844 inode->i_ino, false); 1845 if (!page) 1846 return -ENOMEM; 1847 1848 err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false); 1849 if (err) { 1850 f2fs_put_page(page, 1); 1851 return err; 1852 } 1853 1854 phys = blks_to_bytes(inode, ni.blk_addr); 1855 offset = offsetof(struct f2fs_inode, i_addr) + 1856 sizeof(__le32) * (DEF_ADDRS_PER_INODE - 1857 get_inline_xattr_addrs(inode)); 1858 1859 phys += offset; 1860 len = inline_xattr_size(inode); 1861 1862 f2fs_put_page(page, 1); 1863 1864 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED; 1865 1866 if (!xnid) 1867 flags |= FIEMAP_EXTENT_LAST; 1868 1869 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); 1870 trace_f2fs_fiemap(inode, 0, phys, len, flags, err); 1871 if (err) 1872 return err; 1873 } 1874 1875 if (xnid) { 1876 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false); 1877 if (!page) 1878 return -ENOMEM; 1879 1880 err = f2fs_get_node_info(sbi, xnid, &ni, false); 1881 if (err) { 1882 f2fs_put_page(page, 1); 1883 return err; 1884 } 1885 1886 phys = blks_to_bytes(inode, ni.blk_addr); 1887 len = inode->i_sb->s_blocksize; 1888 1889 f2fs_put_page(page, 1); 1890 1891 flags = FIEMAP_EXTENT_LAST; 1892 } 1893 1894 if (phys) { 1895 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags); 1896 trace_f2fs_fiemap(inode, 0, phys, len, flags, err); 1897 } 1898 1899 return (err < 0 ? err : 0); 1900 } 1901 max_inode_blocks(struct inode * inode)1902 static loff_t max_inode_blocks(struct inode *inode) 1903 { 1904 loff_t result = ADDRS_PER_INODE(inode); 1905 loff_t leaf_count = ADDRS_PER_BLOCK(inode); 1906 1907 /* two direct node blocks */ 1908 result += (leaf_count * 2); 1909 1910 /* two indirect node blocks */ 1911 leaf_count *= NIDS_PER_BLOCK; 1912 result += (leaf_count * 2); 1913 1914 /* one double indirect node block */ 1915 leaf_count *= NIDS_PER_BLOCK; 1916 result += leaf_count; 1917 1918 return result; 1919 } 1920 f2fs_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,u64 start,u64 len)1921 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 1922 u64 start, u64 len) 1923 { 1924 struct f2fs_map_blocks map; 1925 sector_t start_blk, last_blk; 1926 pgoff_t next_pgofs; 1927 u64 logical = 0, phys = 0, size = 0; 1928 u32 flags = 0; 1929 int ret = 0; 1930 bool compr_cluster = false, compr_appended; 1931 unsigned int cluster_size = F2FS_I(inode)->i_cluster_size; 1932 unsigned int count_in_cluster = 0; 1933 loff_t maxbytes; 1934 1935 if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) { 1936 ret = f2fs_precache_extents(inode); 1937 if (ret) 1938 return ret; 1939 } 1940 1941 ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR); 1942 if (ret) 1943 return ret; 1944 1945 inode_lock(inode); 1946 1947 maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS; 1948 if (start > maxbytes) { 1949 ret = -EFBIG; 1950 goto out; 1951 } 1952 1953 if (len > maxbytes || (maxbytes - len) < start) 1954 len = maxbytes - start; 1955 1956 if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { 1957 ret = f2fs_xattr_fiemap(inode, fieinfo); 1958 goto out; 1959 } 1960 1961 if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) { 1962 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len); 1963 if (ret != -EAGAIN) 1964 goto out; 1965 } 1966 1967 if (bytes_to_blks(inode, len) == 0) 1968 len = blks_to_bytes(inode, 1); 1969 1970 start_blk = bytes_to_blks(inode, start); 1971 last_blk = bytes_to_blks(inode, start + len - 1); 1972 1973 next: 1974 memset(&map, 0, sizeof(map)); 1975 map.m_lblk = start_blk; 1976 map.m_len = bytes_to_blks(inode, len); 1977 map.m_next_pgofs = &next_pgofs; 1978 map.m_seg_type = NO_CHECK_TYPE; 1979 1980 if (compr_cluster) { 1981 map.m_lblk += 1; 1982 map.m_len = cluster_size - count_in_cluster; 1983 } 1984 1985 ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP); 1986 if (ret) 1987 goto out; 1988 1989 /* HOLE */ 1990 if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) { 1991 start_blk = next_pgofs; 1992 1993 if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode, 1994 max_inode_blocks(inode))) 1995 goto prep_next; 1996 1997 flags |= FIEMAP_EXTENT_LAST; 1998 } 1999 2000 compr_appended = false; 2001 /* In a case of compressed cluster, append this to the last extent */ 2002 if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) || 2003 !(map.m_flags & F2FS_MAP_FLAGS))) { 2004 compr_appended = true; 2005 goto skip_fill; 2006 } 2007 2008 if (size) { 2009 flags |= FIEMAP_EXTENT_MERGED; 2010 if (IS_ENCRYPTED(inode)) 2011 flags |= FIEMAP_EXTENT_DATA_ENCRYPTED; 2012 2013 ret = fiemap_fill_next_extent(fieinfo, logical, 2014 phys, size, flags); 2015 trace_f2fs_fiemap(inode, logical, phys, size, flags, ret); 2016 if (ret) 2017 goto out; 2018 size = 0; 2019 } 2020 2021 if (start_blk > last_blk) 2022 goto out; 2023 2024 skip_fill: 2025 if (map.m_pblk == COMPRESS_ADDR) { 2026 compr_cluster = true; 2027 count_in_cluster = 1; 2028 } else if (compr_appended) { 2029 unsigned int appended_blks = cluster_size - 2030 count_in_cluster + 1; 2031 size += blks_to_bytes(inode, appended_blks); 2032 start_blk += appended_blks; 2033 compr_cluster = false; 2034 } else { 2035 logical = blks_to_bytes(inode, start_blk); 2036 phys = __is_valid_data_blkaddr(map.m_pblk) ? 2037 blks_to_bytes(inode, map.m_pblk) : 0; 2038 size = blks_to_bytes(inode, map.m_len); 2039 flags = 0; 2040 2041 if (compr_cluster) { 2042 flags = FIEMAP_EXTENT_ENCODED; 2043 count_in_cluster += map.m_len; 2044 if (count_in_cluster == cluster_size) { 2045 compr_cluster = false; 2046 size += blks_to_bytes(inode, 1); 2047 } 2048 } else if (map.m_flags & F2FS_MAP_DELALLOC) { 2049 flags = FIEMAP_EXTENT_UNWRITTEN; 2050 } 2051 2052 start_blk += bytes_to_blks(inode, size); 2053 } 2054 2055 prep_next: 2056 cond_resched(); 2057 if (fatal_signal_pending(current)) 2058 ret = -EINTR; 2059 else 2060 goto next; 2061 out: 2062 if (ret == 1) 2063 ret = 0; 2064 2065 inode_unlock(inode); 2066 return ret; 2067 } 2068 f2fs_readpage_limit(struct inode * inode)2069 static inline loff_t f2fs_readpage_limit(struct inode *inode) 2070 { 2071 if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode)) 2072 return inode->i_sb->s_maxbytes; 2073 2074 return i_size_read(inode); 2075 } 2076 f2fs_read_single_page(struct inode * inode,struct page * page,unsigned nr_pages,struct f2fs_map_blocks * map,struct bio ** bio_ret,sector_t * last_block_in_bio,bool is_readahead)2077 static int f2fs_read_single_page(struct inode *inode, struct page *page, 2078 unsigned nr_pages, 2079 struct f2fs_map_blocks *map, 2080 struct bio **bio_ret, 2081 sector_t *last_block_in_bio, 2082 bool is_readahead) 2083 { 2084 struct bio *bio = *bio_ret; 2085 const unsigned blocksize = blks_to_bytes(inode, 1); 2086 sector_t block_in_file; 2087 sector_t last_block; 2088 sector_t last_block_in_file; 2089 sector_t block_nr; 2090 int ret = 0; 2091 2092 block_in_file = (sector_t)page_index(page); 2093 last_block = block_in_file + nr_pages; 2094 last_block_in_file = bytes_to_blks(inode, 2095 f2fs_readpage_limit(inode) + blocksize - 1); 2096 if (last_block > last_block_in_file) 2097 last_block = last_block_in_file; 2098 2099 /* just zeroing out page which is beyond EOF */ 2100 if (block_in_file >= last_block) 2101 goto zero_out; 2102 /* 2103 * Map blocks using the previous result first. 2104 */ 2105 if ((map->m_flags & F2FS_MAP_MAPPED) && 2106 block_in_file > map->m_lblk && 2107 block_in_file < (map->m_lblk + map->m_len)) 2108 goto got_it; 2109 2110 /* 2111 * Then do more f2fs_map_blocks() calls until we are 2112 * done with this page. 2113 */ 2114 map->m_lblk = block_in_file; 2115 map->m_len = last_block - block_in_file; 2116 2117 ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT); 2118 if (ret) 2119 goto out; 2120 got_it: 2121 if ((map->m_flags & F2FS_MAP_MAPPED)) { 2122 block_nr = map->m_pblk + block_in_file - map->m_lblk; 2123 SetPageMappedToDisk(page); 2124 2125 if (!PageUptodate(page) && (!PageSwapCache(page) && 2126 !cleancache_get_page(page))) { 2127 SetPageUptodate(page); 2128 goto confused; 2129 } 2130 2131 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr, 2132 DATA_GENERIC_ENHANCE_READ)) { 2133 ret = -EFSCORRUPTED; 2134 f2fs_handle_error(F2FS_I_SB(inode), 2135 ERROR_INVALID_BLKADDR); 2136 goto out; 2137 } 2138 } else { 2139 zero_out: 2140 zero_user_segment(page, 0, PAGE_SIZE); 2141 if (f2fs_need_verity(inode, page->index) && 2142 !fsverity_verify_page(page)) { 2143 ret = -EIO; 2144 goto out; 2145 } 2146 if (!PageUptodate(page)) 2147 SetPageUptodate(page); 2148 unlock_page(page); 2149 goto out; 2150 } 2151 2152 /* 2153 * This page will go to BIO. Do we need to send this 2154 * BIO off first? 2155 */ 2156 if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio, 2157 *last_block_in_bio, block_nr) || 2158 !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { 2159 submit_and_realloc: 2160 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); 2161 bio = NULL; 2162 } 2163 if (bio == NULL) { 2164 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages, 2165 is_readahead ? REQ_RAHEAD : 0, page->index, 2166 false); 2167 if (IS_ERR(bio)) { 2168 ret = PTR_ERR(bio); 2169 bio = NULL; 2170 goto out; 2171 } 2172 } 2173 2174 /* 2175 * If the page is under writeback, we need to wait for 2176 * its completion to see the correct decrypted data. 2177 */ 2178 f2fs_wait_on_block_writeback(inode, block_nr); 2179 2180 if (bio_add_page(bio, page, blocksize, 0) < blocksize) 2181 goto submit_and_realloc; 2182 2183 inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA); 2184 f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO, 2185 F2FS_BLKSIZE); 2186 *last_block_in_bio = block_nr; 2187 goto out; 2188 confused: 2189 if (bio) { 2190 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); 2191 bio = NULL; 2192 } 2193 unlock_page(page); 2194 out: 2195 *bio_ret = bio; 2196 return ret; 2197 } 2198 2199 #ifdef CONFIG_F2FS_FS_COMPRESSION f2fs_read_multi_pages(struct compress_ctx * cc,struct bio ** bio_ret,unsigned nr_pages,sector_t * last_block_in_bio,bool is_readahead,bool for_write)2200 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret, 2201 unsigned nr_pages, sector_t *last_block_in_bio, 2202 bool is_readahead, bool for_write) 2203 { 2204 struct dnode_of_data dn; 2205 struct inode *inode = cc->inode; 2206 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2207 struct bio *bio = *bio_ret; 2208 unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size; 2209 sector_t last_block_in_file; 2210 const unsigned blocksize = blks_to_bytes(inode, 1); 2211 struct decompress_io_ctx *dic = NULL; 2212 struct extent_info ei = {}; 2213 bool from_dnode = true; 2214 int i; 2215 int ret = 0; 2216 2217 f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc)); 2218 2219 last_block_in_file = bytes_to_blks(inode, 2220 f2fs_readpage_limit(inode) + blocksize - 1); 2221 2222 /* get rid of pages beyond EOF */ 2223 for (i = 0; i < cc->cluster_size; i++) { 2224 struct page *page = cc->rpages[i]; 2225 2226 if (!page) 2227 continue; 2228 if ((sector_t)page->index >= last_block_in_file) { 2229 zero_user_segment(page, 0, PAGE_SIZE); 2230 if (!PageUptodate(page)) 2231 SetPageUptodate(page); 2232 } else if (!PageUptodate(page)) { 2233 continue; 2234 } 2235 unlock_page(page); 2236 if (for_write) 2237 put_page(page); 2238 cc->rpages[i] = NULL; 2239 cc->nr_rpages--; 2240 } 2241 2242 /* we are done since all pages are beyond EOF */ 2243 if (f2fs_cluster_is_empty(cc)) 2244 goto out; 2245 2246 if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei)) 2247 from_dnode = false; 2248 2249 if (!from_dnode) 2250 goto skip_reading_dnode; 2251 2252 set_new_dnode(&dn, inode, NULL, NULL, 0); 2253 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); 2254 if (ret) 2255 goto out; 2256 2257 if (unlikely(f2fs_cp_error(sbi))) { 2258 ret = -EIO; 2259 goto out_put_dnode; 2260 } 2261 f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR); 2262 2263 skip_reading_dnode: 2264 for (i = 1; i < cc->cluster_size; i++) { 2265 block_t blkaddr; 2266 2267 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, 2268 dn.ofs_in_node + i) : 2269 ei.blk + i - 1; 2270 2271 if (!__is_valid_data_blkaddr(blkaddr)) 2272 break; 2273 2274 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) { 2275 ret = -EFAULT; 2276 goto out_put_dnode; 2277 } 2278 cc->nr_cpages++; 2279 2280 if (!from_dnode && i >= ei.c_len) 2281 break; 2282 } 2283 2284 /* nothing to decompress */ 2285 if (cc->nr_cpages == 0) { 2286 ret = 0; 2287 goto out_put_dnode; 2288 } 2289 2290 dic = f2fs_alloc_dic(cc); 2291 if (IS_ERR(dic)) { 2292 ret = PTR_ERR(dic); 2293 goto out_put_dnode; 2294 } 2295 2296 for (i = 0; i < cc->nr_cpages; i++) { 2297 struct page *page = dic->cpages[i]; 2298 block_t blkaddr; 2299 struct bio_post_read_ctx *ctx; 2300 2301 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page, 2302 dn.ofs_in_node + i + 1) : 2303 ei.blk + i; 2304 2305 f2fs_wait_on_block_writeback(inode, blkaddr); 2306 2307 if (f2fs_load_compressed_page(sbi, page, blkaddr)) { 2308 if (atomic_dec_and_test(&dic->remaining_pages)) 2309 f2fs_decompress_cluster(dic, true); 2310 continue; 2311 } 2312 2313 if (bio && (!page_is_mergeable(sbi, bio, 2314 *last_block_in_bio, blkaddr) || 2315 !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) { 2316 submit_and_realloc: 2317 f2fs_submit_read_bio(sbi, bio, DATA); 2318 bio = NULL; 2319 } 2320 2321 if (!bio) { 2322 bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages, 2323 is_readahead ? REQ_RAHEAD : 0, 2324 page->index, for_write); 2325 if (IS_ERR(bio)) { 2326 ret = PTR_ERR(bio); 2327 f2fs_decompress_end_io(dic, ret, true); 2328 f2fs_put_dnode(&dn); 2329 *bio_ret = NULL; 2330 return ret; 2331 } 2332 } 2333 2334 if (bio_add_page(bio, page, blocksize, 0) < blocksize) 2335 goto submit_and_realloc; 2336 2337 ctx = get_post_read_ctx(bio); 2338 ctx->enabled_steps |= STEP_DECOMPRESS; 2339 refcount_inc(&dic->refcnt); 2340 2341 inc_page_count(sbi, F2FS_RD_DATA); 2342 f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE); 2343 *last_block_in_bio = blkaddr; 2344 } 2345 2346 if (from_dnode) 2347 f2fs_put_dnode(&dn); 2348 2349 *bio_ret = bio; 2350 return 0; 2351 2352 out_put_dnode: 2353 if (from_dnode) 2354 f2fs_put_dnode(&dn); 2355 out: 2356 for (i = 0; i < cc->cluster_size; i++) { 2357 if (cc->rpages[i]) { 2358 ClearPageUptodate(cc->rpages[i]); 2359 unlock_page(cc->rpages[i]); 2360 } 2361 } 2362 *bio_ret = bio; 2363 return ret; 2364 } 2365 #endif 2366 2367 /* 2368 * This function was originally taken from fs/mpage.c, and customized for f2fs. 2369 * Major change was from block_size == page_size in f2fs by default. 2370 */ f2fs_mpage_readpages(struct inode * inode,struct readahead_control * rac,struct page * page)2371 static int f2fs_mpage_readpages(struct inode *inode, 2372 struct readahead_control *rac, struct page *page) 2373 { 2374 struct bio *bio = NULL; 2375 sector_t last_block_in_bio = 0; 2376 struct f2fs_map_blocks map; 2377 #ifdef CONFIG_F2FS_FS_COMPRESSION 2378 struct compress_ctx cc = { 2379 .inode = inode, 2380 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, 2381 .cluster_size = F2FS_I(inode)->i_cluster_size, 2382 .cluster_idx = NULL_CLUSTER, 2383 .rpages = NULL, 2384 .cpages = NULL, 2385 .nr_rpages = 0, 2386 .nr_cpages = 0, 2387 }; 2388 pgoff_t nc_cluster_idx = NULL_CLUSTER; 2389 #endif 2390 unsigned nr_pages = rac ? readahead_count(rac) : 1; 2391 unsigned max_nr_pages = nr_pages; 2392 int ret = 0; 2393 2394 map.m_pblk = 0; 2395 map.m_lblk = 0; 2396 map.m_len = 0; 2397 map.m_flags = 0; 2398 map.m_next_pgofs = NULL; 2399 map.m_next_extent = NULL; 2400 map.m_seg_type = NO_CHECK_TYPE; 2401 map.m_may_create = false; 2402 2403 for (; nr_pages; nr_pages--) { 2404 if (rac) { 2405 page = readahead_page(rac); 2406 prefetchw(&page->flags); 2407 } 2408 2409 #ifdef CONFIG_F2FS_FS_COMPRESSION 2410 if (f2fs_compressed_file(inode)) { 2411 /* there are remained compressed pages, submit them */ 2412 if (!f2fs_cluster_can_merge_page(&cc, page->index)) { 2413 ret = f2fs_read_multi_pages(&cc, &bio, 2414 max_nr_pages, 2415 &last_block_in_bio, 2416 rac != NULL, false); 2417 f2fs_destroy_compress_ctx(&cc, false); 2418 if (ret) 2419 goto set_error_page; 2420 } 2421 if (cc.cluster_idx == NULL_CLUSTER) { 2422 if (nc_cluster_idx == 2423 page->index >> cc.log_cluster_size) { 2424 goto read_single_page; 2425 } 2426 2427 ret = f2fs_is_compressed_cluster(inode, page->index); 2428 if (ret < 0) 2429 goto set_error_page; 2430 else if (!ret) { 2431 nc_cluster_idx = 2432 page->index >> cc.log_cluster_size; 2433 goto read_single_page; 2434 } 2435 2436 nc_cluster_idx = NULL_CLUSTER; 2437 } 2438 ret = f2fs_init_compress_ctx(&cc); 2439 if (ret) 2440 goto set_error_page; 2441 2442 f2fs_compress_ctx_add_page(&cc, page); 2443 2444 goto next_page; 2445 } 2446 read_single_page: 2447 #endif 2448 2449 ret = f2fs_read_single_page(inode, page, max_nr_pages, &map, 2450 &bio, &last_block_in_bio, rac); 2451 if (ret) { 2452 #ifdef CONFIG_F2FS_FS_COMPRESSION 2453 set_error_page: 2454 #endif 2455 zero_user_segment(page, 0, PAGE_SIZE); 2456 unlock_page(page); 2457 } 2458 #ifdef CONFIG_F2FS_FS_COMPRESSION 2459 next_page: 2460 #endif 2461 if (rac) 2462 put_page(page); 2463 2464 #ifdef CONFIG_F2FS_FS_COMPRESSION 2465 if (f2fs_compressed_file(inode)) { 2466 /* last page */ 2467 if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) { 2468 ret = f2fs_read_multi_pages(&cc, &bio, 2469 max_nr_pages, 2470 &last_block_in_bio, 2471 rac != NULL, false); 2472 f2fs_destroy_compress_ctx(&cc, false); 2473 } 2474 } 2475 #endif 2476 } 2477 if (bio) 2478 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA); 2479 return ret; 2480 } 2481 f2fs_read_data_page(struct file * file,struct page * page)2482 static int f2fs_read_data_page(struct file *file, struct page *page) 2483 { 2484 struct inode *inode = page_file_mapping(page)->host; 2485 int ret = -EAGAIN; 2486 2487 trace_f2fs_readpage(page, DATA); 2488 2489 if (!f2fs_is_compress_backend_ready(inode)) { 2490 unlock_page(page); 2491 return -EOPNOTSUPP; 2492 } 2493 2494 /* If the file has inline data, try to read it directly */ 2495 if (f2fs_has_inline_data(inode)) 2496 ret = f2fs_read_inline_data(inode, page); 2497 if (ret == -EAGAIN) 2498 ret = f2fs_mpage_readpages(inode, NULL, page); 2499 return ret; 2500 } 2501 f2fs_readahead(struct readahead_control * rac)2502 static void f2fs_readahead(struct readahead_control *rac) 2503 { 2504 struct inode *inode = rac->mapping->host; 2505 2506 trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac)); 2507 2508 if (!f2fs_is_compress_backend_ready(inode)) 2509 return; 2510 2511 /* If the file has inline data, skip readpages */ 2512 if (f2fs_has_inline_data(inode)) 2513 return; 2514 2515 f2fs_mpage_readpages(inode, rac, NULL); 2516 } 2517 f2fs_encrypt_one_page(struct f2fs_io_info * fio)2518 int f2fs_encrypt_one_page(struct f2fs_io_info *fio) 2519 { 2520 struct inode *inode = fio->page->mapping->host; 2521 struct page *mpage, *page; 2522 gfp_t gfp_flags = GFP_NOFS; 2523 2524 if (!f2fs_encrypted_file(inode)) 2525 return 0; 2526 2527 page = fio->compressed_page ? fio->compressed_page : fio->page; 2528 2529 /* wait for GCed page writeback via META_MAPPING */ 2530 f2fs_wait_on_block_writeback(inode, fio->old_blkaddr); 2531 2532 if (fscrypt_inode_uses_inline_crypto(inode)) 2533 return 0; 2534 2535 retry_encrypt: 2536 fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page, 2537 PAGE_SIZE, 0, gfp_flags); 2538 if (IS_ERR(fio->encrypted_page)) { 2539 /* flush pending IOs and wait for a while in the ENOMEM case */ 2540 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { 2541 f2fs_flush_merged_writes(fio->sbi); 2542 congestion_wait(BLK_RW_ASYNC, DEFAULT_IO_TIMEOUT); 2543 gfp_flags |= __GFP_NOFAIL; 2544 goto retry_encrypt; 2545 } 2546 return PTR_ERR(fio->encrypted_page); 2547 } 2548 2549 mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr); 2550 if (mpage) { 2551 if (PageUptodate(mpage)) 2552 memcpy(page_address(mpage), 2553 page_address(fio->encrypted_page), PAGE_SIZE); 2554 f2fs_put_page(mpage, 1); 2555 } 2556 return 0; 2557 } 2558 check_inplace_update_policy(struct inode * inode,struct f2fs_io_info * fio)2559 static inline bool check_inplace_update_policy(struct inode *inode, 2560 struct f2fs_io_info *fio) 2561 { 2562 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2563 2564 if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) && 2565 is_inode_flag_set(inode, FI_OPU_WRITE)) 2566 return false; 2567 if (IS_F2FS_IPU_FORCE(sbi)) 2568 return true; 2569 if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi)) 2570 return true; 2571 if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util) 2572 return true; 2573 if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) && 2574 utilization(sbi) > SM_I(sbi)->min_ipu_util) 2575 return true; 2576 2577 /* 2578 * IPU for rewrite async pages 2579 */ 2580 if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE && 2581 !(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode)) 2582 return true; 2583 2584 /* this is only set during fdatasync */ 2585 if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, FI_NEED_IPU)) 2586 return true; 2587 2588 if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) && 2589 !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) 2590 return true; 2591 2592 return false; 2593 } 2594 f2fs_should_update_inplace(struct inode * inode,struct f2fs_io_info * fio)2595 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio) 2596 { 2597 /* swap file is migrating in aligned write mode */ 2598 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) 2599 return false; 2600 2601 if (f2fs_is_pinned_file(inode)) 2602 return true; 2603 2604 /* if this is cold file, we should overwrite to avoid fragmentation */ 2605 if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE)) 2606 return true; 2607 2608 return check_inplace_update_policy(inode, fio); 2609 } 2610 f2fs_should_update_outplace(struct inode * inode,struct f2fs_io_info * fio)2611 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) 2612 { 2613 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2614 2615 /* The below cases were checked when setting it. */ 2616 if (f2fs_is_pinned_file(inode)) 2617 return false; 2618 if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK)) 2619 return true; 2620 if (f2fs_lfs_mode(sbi)) 2621 return true; 2622 if (S_ISDIR(inode->i_mode)) 2623 return true; 2624 if (IS_NOQUOTA(inode)) 2625 return true; 2626 if (f2fs_is_atomic_file(inode)) 2627 return true; 2628 2629 /* swap file is migrating in aligned write mode */ 2630 if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) 2631 return true; 2632 2633 if (is_inode_flag_set(inode, FI_OPU_WRITE)) 2634 return true; 2635 2636 if (fio) { 2637 if (page_private_gcing(fio->page)) 2638 return true; 2639 if (page_private_dummy(fio->page)) 2640 return true; 2641 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) && 2642 f2fs_is_checkpointed_data(sbi, fio->old_blkaddr))) 2643 return true; 2644 } 2645 return false; 2646 } 2647 need_inplace_update(struct f2fs_io_info * fio)2648 static inline bool need_inplace_update(struct f2fs_io_info *fio) 2649 { 2650 struct inode *inode = fio->page->mapping->host; 2651 2652 if (f2fs_should_update_outplace(inode, fio)) 2653 return false; 2654 2655 return f2fs_should_update_inplace(inode, fio); 2656 } 2657 f2fs_do_write_data_page(struct f2fs_io_info * fio)2658 int f2fs_do_write_data_page(struct f2fs_io_info *fio) 2659 { 2660 struct page *page = fio->page; 2661 struct inode *inode = page->mapping->host; 2662 struct dnode_of_data dn; 2663 struct node_info ni; 2664 bool ipu_force = false; 2665 int err = 0; 2666 2667 /* Use COW inode to make dnode_of_data for atomic write */ 2668 if (f2fs_is_atomic_file(inode)) 2669 set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0); 2670 else 2671 set_new_dnode(&dn, inode, NULL, NULL, 0); 2672 2673 if (need_inplace_update(fio) && 2674 f2fs_lookup_read_extent_cache_block(inode, page->index, 2675 &fio->old_blkaddr)) { 2676 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, 2677 DATA_GENERIC_ENHANCE)) { 2678 f2fs_handle_error(fio->sbi, 2679 ERROR_INVALID_BLKADDR); 2680 return -EFSCORRUPTED; 2681 } 2682 2683 ipu_force = true; 2684 fio->need_lock = LOCK_DONE; 2685 goto got_it; 2686 } 2687 2688 /* Deadlock due to between page->lock and f2fs_lock_op */ 2689 if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi)) 2690 return -EAGAIN; 2691 2692 err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE); 2693 if (err) 2694 goto out; 2695 2696 fio->old_blkaddr = dn.data_blkaddr; 2697 2698 /* This page is already truncated */ 2699 if (fio->old_blkaddr == NULL_ADDR) { 2700 ClearPageUptodate(page); 2701 clear_page_private_gcing(page); 2702 goto out_writepage; 2703 } 2704 got_it: 2705 if (__is_valid_data_blkaddr(fio->old_blkaddr) && 2706 !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr, 2707 DATA_GENERIC_ENHANCE)) { 2708 err = -EFSCORRUPTED; 2709 f2fs_handle_error(fio->sbi, ERROR_INVALID_BLKADDR); 2710 goto out_writepage; 2711 } 2712 2713 /* 2714 * If current allocation needs SSR, 2715 * it had better in-place writes for updated data. 2716 */ 2717 if (ipu_force || 2718 (__is_valid_data_blkaddr(fio->old_blkaddr) && 2719 need_inplace_update(fio))) { 2720 err = f2fs_encrypt_one_page(fio); 2721 if (err) 2722 goto out_writepage; 2723 2724 set_page_writeback(page); 2725 f2fs_put_dnode(&dn); 2726 if (fio->need_lock == LOCK_REQ) 2727 f2fs_unlock_op(fio->sbi); 2728 err = f2fs_inplace_write_data(fio); 2729 if (err) { 2730 if (fscrypt_inode_uses_fs_layer_crypto(inode)) 2731 fscrypt_finalize_bounce_page(&fio->encrypted_page); 2732 if (PageWriteback(page)) 2733 end_page_writeback(page); 2734 } else { 2735 set_inode_flag(inode, FI_UPDATE_WRITE); 2736 } 2737 trace_f2fs_do_write_data_page(fio->page, IPU); 2738 return err; 2739 } 2740 2741 if (fio->need_lock == LOCK_RETRY) { 2742 if (!f2fs_trylock_op(fio->sbi)) { 2743 err = -EAGAIN; 2744 goto out_writepage; 2745 } 2746 fio->need_lock = LOCK_REQ; 2747 } 2748 2749 err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false); 2750 if (err) 2751 goto out_writepage; 2752 2753 fio->version = ni.version; 2754 2755 err = f2fs_encrypt_one_page(fio); 2756 if (err) 2757 goto out_writepage; 2758 2759 set_page_writeback(page); 2760 2761 if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR) 2762 f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false); 2763 2764 /* LFS mode write path */ 2765 f2fs_outplace_write_data(&dn, fio); 2766 trace_f2fs_do_write_data_page(page, OPU); 2767 set_inode_flag(inode, FI_APPEND_WRITE); 2768 if (page->index == 0) 2769 set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); 2770 out_writepage: 2771 f2fs_put_dnode(&dn); 2772 out: 2773 if (fio->need_lock == LOCK_REQ) 2774 f2fs_unlock_op(fio->sbi); 2775 return err; 2776 } 2777 f2fs_write_single_data_page(struct page * page,int * submitted,struct bio ** bio,sector_t * last_block,struct writeback_control * wbc,enum iostat_type io_type,int compr_blocks,bool allow_balance)2778 int f2fs_write_single_data_page(struct page *page, int *submitted, 2779 struct bio **bio, 2780 sector_t *last_block, 2781 struct writeback_control *wbc, 2782 enum iostat_type io_type, 2783 int compr_blocks, 2784 bool allow_balance) 2785 { 2786 struct inode *inode = page->mapping->host; 2787 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 2788 loff_t i_size = i_size_read(inode); 2789 const pgoff_t end_index = ((unsigned long long)i_size) 2790 >> PAGE_SHIFT; 2791 loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT; 2792 unsigned offset = 0; 2793 bool need_balance_fs = false; 2794 int err = 0; 2795 struct f2fs_io_info fio = { 2796 .sbi = sbi, 2797 .ino = inode->i_ino, 2798 .type = DATA, 2799 .op = REQ_OP_WRITE, 2800 .op_flags = wbc_to_write_flags(wbc), 2801 .old_blkaddr = NULL_ADDR, 2802 .page = page, 2803 .encrypted_page = NULL, 2804 .submitted = 0, 2805 .compr_blocks = compr_blocks, 2806 .need_lock = LOCK_RETRY, 2807 .post_read = f2fs_post_read_required(inode) ? 1 : 0, 2808 .io_type = io_type, 2809 .io_wbc = wbc, 2810 .bio = bio, 2811 .last_block = last_block, 2812 }; 2813 2814 trace_f2fs_writepage(page, DATA); 2815 2816 /* we should bypass data pages to proceed the kworker jobs */ 2817 if (unlikely(f2fs_cp_error(sbi))) { 2818 mapping_set_error(page->mapping, -EIO); 2819 /* 2820 * don't drop any dirty dentry pages for keeping lastest 2821 * directory structure. 2822 */ 2823 if (S_ISDIR(inode->i_mode) && 2824 !is_sbi_flag_set(sbi, SBI_IS_CLOSE)) 2825 goto redirty_out; 2826 goto out; 2827 } 2828 2829 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 2830 goto redirty_out; 2831 2832 if (page->index < end_index || 2833 f2fs_verity_in_progress(inode) || 2834 compr_blocks) 2835 goto write; 2836 2837 /* 2838 * If the offset is out-of-range of file size, 2839 * this page does not have to be written to disk. 2840 */ 2841 offset = i_size & (PAGE_SIZE - 1); 2842 if ((page->index >= end_index + 1) || !offset) 2843 goto out; 2844 2845 zero_user_segment(page, offset, PAGE_SIZE); 2846 write: 2847 if (f2fs_is_drop_cache(inode)) 2848 goto out; 2849 2850 /* Dentry/quota blocks are controlled by checkpoint */ 2851 if (S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) { 2852 /* 2853 * We need to wait for node_write to avoid block allocation during 2854 * checkpoint. This can only happen to quota writes which can cause 2855 * the below discard race condition. 2856 */ 2857 if (IS_NOQUOTA(inode)) 2858 f2fs_down_read(&sbi->node_write); 2859 2860 fio.need_lock = LOCK_DONE; 2861 err = f2fs_do_write_data_page(&fio); 2862 2863 if (IS_NOQUOTA(inode)) 2864 f2fs_up_read(&sbi->node_write); 2865 2866 goto done; 2867 } 2868 2869 if (!wbc->for_reclaim) 2870 need_balance_fs = true; 2871 else if (has_not_enough_free_secs(sbi, 0, 0)) 2872 goto redirty_out; 2873 else 2874 set_inode_flag(inode, FI_HOT_DATA); 2875 2876 err = -EAGAIN; 2877 if (f2fs_has_inline_data(inode)) { 2878 err = f2fs_write_inline_data(inode, page); 2879 if (!err) 2880 goto out; 2881 } 2882 2883 if (err == -EAGAIN) { 2884 err = f2fs_do_write_data_page(&fio); 2885 if (err == -EAGAIN) { 2886 fio.need_lock = LOCK_REQ; 2887 err = f2fs_do_write_data_page(&fio); 2888 } 2889 } 2890 2891 if (err) { 2892 file_set_keep_isize(inode); 2893 } else { 2894 spin_lock(&F2FS_I(inode)->i_size_lock); 2895 if (F2FS_I(inode)->last_disk_size < psize) 2896 F2FS_I(inode)->last_disk_size = psize; 2897 spin_unlock(&F2FS_I(inode)->i_size_lock); 2898 } 2899 2900 done: 2901 if (err && err != -ENOENT) 2902 goto redirty_out; 2903 2904 out: 2905 inode_dec_dirty_pages(inode); 2906 if (err) { 2907 ClearPageUptodate(page); 2908 clear_page_private_gcing(page); 2909 } 2910 2911 if (wbc->for_reclaim) { 2912 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA); 2913 clear_inode_flag(inode, FI_HOT_DATA); 2914 f2fs_remove_dirty_inode(inode); 2915 submitted = NULL; 2916 } 2917 unlock_page(page); 2918 if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) && 2919 !F2FS_I(inode)->wb_task && allow_balance) 2920 f2fs_balance_fs(sbi, need_balance_fs); 2921 2922 if (unlikely(f2fs_cp_error(sbi))) { 2923 f2fs_submit_merged_write(sbi, DATA); 2924 if (bio && *bio) 2925 f2fs_submit_merged_ipu_write(sbi, bio, NULL); 2926 submitted = NULL; 2927 } 2928 2929 if (submitted) 2930 *submitted = fio.submitted; 2931 2932 return 0; 2933 2934 redirty_out: 2935 redirty_page_for_writepage(wbc, page); 2936 /* 2937 * pageout() in MM translates EAGAIN, so calls handle_write_error() 2938 * -> mapping_set_error() -> set_bit(AS_EIO, ...). 2939 * file_write_and_wait_range() will see EIO error, which is critical 2940 * to return value of fsync() followed by atomic_write failure to user. 2941 */ 2942 if (!err || wbc->for_reclaim) 2943 return AOP_WRITEPAGE_ACTIVATE; 2944 unlock_page(page); 2945 return err; 2946 } 2947 f2fs_write_data_page(struct page * page,struct writeback_control * wbc)2948 static int f2fs_write_data_page(struct page *page, 2949 struct writeback_control *wbc) 2950 { 2951 #ifdef CONFIG_F2FS_FS_COMPRESSION 2952 struct inode *inode = page->mapping->host; 2953 2954 if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) 2955 goto out; 2956 2957 if (f2fs_compressed_file(inode)) { 2958 if (f2fs_is_compressed_cluster(inode, page->index)) { 2959 redirty_page_for_writepage(wbc, page); 2960 return AOP_WRITEPAGE_ACTIVATE; 2961 } 2962 } 2963 out: 2964 #endif 2965 2966 return f2fs_write_single_data_page(page, NULL, NULL, NULL, 2967 wbc, FS_DATA_IO, 0, true); 2968 } 2969 2970 /* 2971 * This function was copied from write_cache_pages from mm/page-writeback.c. 2972 * The major change is making write step of cold data page separately from 2973 * warm/hot data page. 2974 */ f2fs_write_cache_pages(struct address_space * mapping,struct writeback_control * wbc,enum iostat_type io_type)2975 static int f2fs_write_cache_pages(struct address_space *mapping, 2976 struct writeback_control *wbc, 2977 enum iostat_type io_type) 2978 { 2979 int ret = 0; 2980 int done = 0, retry = 0; 2981 struct page *pages[F2FS_ONSTACK_PAGES]; 2982 struct f2fs_sb_info *sbi = F2FS_M_SB(mapping); 2983 struct bio *bio = NULL; 2984 sector_t last_block; 2985 #ifdef CONFIG_F2FS_FS_COMPRESSION 2986 struct inode *inode = mapping->host; 2987 struct compress_ctx cc = { 2988 .inode = inode, 2989 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size, 2990 .cluster_size = F2FS_I(inode)->i_cluster_size, 2991 .cluster_idx = NULL_CLUSTER, 2992 .rpages = NULL, 2993 .nr_rpages = 0, 2994 .cpages = NULL, 2995 .valid_nr_cpages = 0, 2996 .rbuf = NULL, 2997 .cbuf = NULL, 2998 .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size, 2999 .private = NULL, 3000 }; 3001 #endif 3002 int nr_pages; 3003 pgoff_t index; 3004 pgoff_t end; /* Inclusive */ 3005 pgoff_t done_index; 3006 int range_whole = 0; 3007 xa_mark_t tag; 3008 int nwritten = 0; 3009 int submitted = 0; 3010 int i; 3011 3012 if (get_dirty_pages(mapping->host) <= 3013 SM_I(F2FS_M_SB(mapping))->min_hot_blocks) 3014 set_inode_flag(mapping->host, FI_HOT_DATA); 3015 else 3016 clear_inode_flag(mapping->host, FI_HOT_DATA); 3017 3018 if (wbc->range_cyclic) { 3019 index = mapping->writeback_index; /* prev offset */ 3020 end = -1; 3021 } else { 3022 index = wbc->range_start >> PAGE_SHIFT; 3023 end = wbc->range_end >> PAGE_SHIFT; 3024 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) 3025 range_whole = 1; 3026 } 3027 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 3028 tag = PAGECACHE_TAG_TOWRITE; 3029 else 3030 tag = PAGECACHE_TAG_DIRTY; 3031 retry: 3032 retry = 0; 3033 if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) 3034 tag_pages_for_writeback(mapping, index, end); 3035 done_index = index; 3036 while (!done && !retry && (index <= end)) { 3037 nr_pages = find_get_pages_range_tag(mapping, &index, end, 3038 tag, F2FS_ONSTACK_PAGES, pages); 3039 if (nr_pages == 0) 3040 break; 3041 3042 for (i = 0; i < nr_pages; i++) { 3043 struct page *page = pages[i]; 3044 bool need_readd; 3045 readd: 3046 need_readd = false; 3047 #ifdef CONFIG_F2FS_FS_COMPRESSION 3048 if (f2fs_compressed_file(inode)) { 3049 void *fsdata = NULL; 3050 struct page *pagep; 3051 int ret2; 3052 3053 ret = f2fs_init_compress_ctx(&cc); 3054 if (ret) { 3055 done = 1; 3056 break; 3057 } 3058 3059 if (!f2fs_cluster_can_merge_page(&cc, 3060 page->index)) { 3061 ret = f2fs_write_multi_pages(&cc, 3062 &submitted, wbc, io_type); 3063 if (!ret) 3064 need_readd = true; 3065 goto result; 3066 } 3067 3068 if (unlikely(f2fs_cp_error(sbi))) 3069 goto lock_page; 3070 3071 if (!f2fs_cluster_is_empty(&cc)) 3072 goto lock_page; 3073 3074 if (f2fs_all_cluster_page_ready(&cc, 3075 pages, i, nr_pages, true)) 3076 goto lock_page; 3077 3078 ret2 = f2fs_prepare_compress_overwrite( 3079 inode, &pagep, 3080 page->index, &fsdata); 3081 if (ret2 < 0) { 3082 ret = ret2; 3083 done = 1; 3084 break; 3085 } else if (ret2 && 3086 (!f2fs_compress_write_end(inode, 3087 fsdata, page->index, 1) || 3088 !f2fs_all_cluster_page_ready(&cc, 3089 pages, i, nr_pages, false))) { 3090 retry = 1; 3091 break; 3092 } 3093 } 3094 #endif 3095 /* give a priority to WB_SYNC threads */ 3096 if (atomic_read(&sbi->wb_sync_req[DATA]) && 3097 wbc->sync_mode == WB_SYNC_NONE) { 3098 done = 1; 3099 break; 3100 } 3101 #ifdef CONFIG_F2FS_FS_COMPRESSION 3102 lock_page: 3103 #endif 3104 done_index = page->index; 3105 retry_write: 3106 lock_page(page); 3107 3108 if (unlikely(page->mapping != mapping)) { 3109 continue_unlock: 3110 unlock_page(page); 3111 continue; 3112 } 3113 3114 if (!PageDirty(page)) { 3115 /* someone wrote it for us */ 3116 goto continue_unlock; 3117 } 3118 3119 if (PageWriteback(page)) { 3120 if (wbc->sync_mode == WB_SYNC_NONE) 3121 goto continue_unlock; 3122 f2fs_wait_on_page_writeback(page, DATA, true, true); 3123 } 3124 3125 if (!clear_page_dirty_for_io(page)) 3126 goto continue_unlock; 3127 3128 #ifdef CONFIG_F2FS_FS_COMPRESSION 3129 if (f2fs_compressed_file(inode)) { 3130 get_page(page); 3131 f2fs_compress_ctx_add_page(&cc, page); 3132 continue; 3133 } 3134 #endif 3135 ret = f2fs_write_single_data_page(page, &submitted, 3136 &bio, &last_block, wbc, io_type, 3137 0, true); 3138 if (ret == AOP_WRITEPAGE_ACTIVATE) 3139 unlock_page(page); 3140 #ifdef CONFIG_F2FS_FS_COMPRESSION 3141 result: 3142 #endif 3143 nwritten += submitted; 3144 wbc->nr_to_write -= submitted; 3145 3146 if (unlikely(ret)) { 3147 /* 3148 * keep nr_to_write, since vfs uses this to 3149 * get # of written pages. 3150 */ 3151 if (ret == AOP_WRITEPAGE_ACTIVATE) { 3152 ret = 0; 3153 goto next; 3154 } else if (ret == -EAGAIN) { 3155 ret = 0; 3156 if (wbc->sync_mode == WB_SYNC_ALL) { 3157 f2fs_io_schedule_timeout( 3158 DEFAULT_IO_TIMEOUT); 3159 goto retry_write; 3160 } 3161 goto next; 3162 } 3163 done_index = page->index + 1; 3164 done = 1; 3165 break; 3166 } 3167 3168 if (wbc->nr_to_write <= 0 && 3169 wbc->sync_mode == WB_SYNC_NONE) { 3170 done = 1; 3171 break; 3172 } 3173 next: 3174 if (need_readd) 3175 goto readd; 3176 } 3177 release_pages(pages, nr_pages); 3178 cond_resched(); 3179 } 3180 #ifdef CONFIG_F2FS_FS_COMPRESSION 3181 /* flush remained pages in compress cluster */ 3182 if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) { 3183 ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type); 3184 nwritten += submitted; 3185 wbc->nr_to_write -= submitted; 3186 if (ret) { 3187 done = 1; 3188 retry = 0; 3189 } 3190 } 3191 if (f2fs_compressed_file(inode)) 3192 f2fs_destroy_compress_ctx(&cc, false); 3193 #endif 3194 if (retry) { 3195 index = 0; 3196 end = -1; 3197 goto retry; 3198 } 3199 if (wbc->range_cyclic && !done) 3200 done_index = 0; 3201 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) 3202 mapping->writeback_index = done_index; 3203 3204 if (nwritten) 3205 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host, 3206 NULL, 0, DATA); 3207 /* submit cached bio of IPU write */ 3208 if (bio) 3209 f2fs_submit_merged_ipu_write(sbi, &bio, NULL); 3210 3211 return ret; 3212 } 3213 __should_serialize_io(struct inode * inode,struct writeback_control * wbc)3214 static inline bool __should_serialize_io(struct inode *inode, 3215 struct writeback_control *wbc) 3216 { 3217 /* to avoid deadlock in path of data flush */ 3218 if (F2FS_I(inode)->wb_task) 3219 return false; 3220 3221 if (!S_ISREG(inode->i_mode)) 3222 return false; 3223 if (IS_NOQUOTA(inode)) 3224 return false; 3225 3226 if (f2fs_need_compress_data(inode)) 3227 return true; 3228 if (wbc->sync_mode != WB_SYNC_ALL) 3229 return true; 3230 if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks) 3231 return true; 3232 return false; 3233 } 3234 __f2fs_write_data_pages(struct address_space * mapping,struct writeback_control * wbc,enum iostat_type io_type)3235 static int __f2fs_write_data_pages(struct address_space *mapping, 3236 struct writeback_control *wbc, 3237 enum iostat_type io_type) 3238 { 3239 struct inode *inode = mapping->host; 3240 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3241 struct blk_plug plug; 3242 int ret; 3243 bool locked = false; 3244 3245 /* deal with chardevs and other special file */ 3246 if (!mapping->a_ops->writepage) 3247 return 0; 3248 3249 /* skip writing if there is no dirty page in this inode */ 3250 if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) 3251 return 0; 3252 3253 /* during POR, we don't need to trigger writepage at all. */ 3254 if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING))) 3255 goto skip_write; 3256 3257 if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) && 3258 wbc->sync_mode == WB_SYNC_NONE && 3259 get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) && 3260 f2fs_available_free_memory(sbi, DIRTY_DENTS)) 3261 goto skip_write; 3262 3263 /* skip writing in file defragment preparing stage */ 3264 if (is_inode_flag_set(inode, FI_SKIP_WRITES)) 3265 goto skip_write; 3266 3267 trace_f2fs_writepages(mapping->host, wbc, DATA); 3268 3269 /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ 3270 if (wbc->sync_mode == WB_SYNC_ALL) 3271 atomic_inc(&sbi->wb_sync_req[DATA]); 3272 else if (atomic_read(&sbi->wb_sync_req[DATA])) { 3273 /* to avoid potential deadlock */ 3274 if (current->plug) 3275 blk_finish_plug(current->plug); 3276 goto skip_write; 3277 } 3278 3279 if (__should_serialize_io(inode, wbc)) { 3280 mutex_lock(&sbi->writepages); 3281 locked = true; 3282 } 3283 3284 blk_start_plug(&plug); 3285 ret = f2fs_write_cache_pages(mapping, wbc, io_type); 3286 blk_finish_plug(&plug); 3287 3288 if (locked) 3289 mutex_unlock(&sbi->writepages); 3290 3291 if (wbc->sync_mode == WB_SYNC_ALL) 3292 atomic_dec(&sbi->wb_sync_req[DATA]); 3293 /* 3294 * if some pages were truncated, we cannot guarantee its mapping->host 3295 * to detect pending bios. 3296 */ 3297 3298 f2fs_remove_dirty_inode(inode); 3299 return ret; 3300 3301 skip_write: 3302 wbc->pages_skipped += get_dirty_pages(inode); 3303 trace_f2fs_writepages(mapping->host, wbc, DATA); 3304 return 0; 3305 } 3306 f2fs_write_data_pages(struct address_space * mapping,struct writeback_control * wbc)3307 static int f2fs_write_data_pages(struct address_space *mapping, 3308 struct writeback_control *wbc) 3309 { 3310 struct inode *inode = mapping->host; 3311 3312 return __f2fs_write_data_pages(mapping, wbc, 3313 F2FS_I(inode)->cp_task == current ? 3314 FS_CP_DATA_IO : FS_DATA_IO); 3315 } 3316 f2fs_write_failed(struct inode * inode,loff_t to)3317 void f2fs_write_failed(struct inode *inode, loff_t to) 3318 { 3319 loff_t i_size = i_size_read(inode); 3320 3321 if (IS_NOQUOTA(inode)) 3322 return; 3323 3324 /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */ 3325 if (to > i_size && !f2fs_verity_in_progress(inode)) { 3326 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 3327 filemap_invalidate_lock(inode->i_mapping); 3328 3329 truncate_pagecache(inode, i_size); 3330 f2fs_truncate_blocks(inode, i_size, true); 3331 3332 filemap_invalidate_unlock(inode->i_mapping); 3333 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 3334 } 3335 } 3336 prepare_write_begin(struct f2fs_sb_info * sbi,struct page * page,loff_t pos,unsigned len,block_t * blk_addr,bool * node_changed)3337 static int prepare_write_begin(struct f2fs_sb_info *sbi, 3338 struct page *page, loff_t pos, unsigned len, 3339 block_t *blk_addr, bool *node_changed) 3340 { 3341 struct inode *inode = page->mapping->host; 3342 pgoff_t index = page->index; 3343 struct dnode_of_data dn; 3344 struct page *ipage; 3345 bool locked = false; 3346 int flag = F2FS_GET_BLOCK_PRE_AIO; 3347 int err = 0; 3348 3349 /* 3350 * If a whole page is being written and we already preallocated all the 3351 * blocks, then there is no need to get a block address now. 3352 */ 3353 if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL)) 3354 return 0; 3355 3356 /* f2fs_lock_op avoids race between write CP and convert_inline_page */ 3357 if (f2fs_has_inline_data(inode)) { 3358 if (pos + len > MAX_INLINE_DATA(inode)) 3359 flag = F2FS_GET_BLOCK_DEFAULT; 3360 f2fs_map_lock(sbi, flag); 3361 locked = true; 3362 } else if ((pos & PAGE_MASK) >= i_size_read(inode)) { 3363 f2fs_map_lock(sbi, flag); 3364 locked = true; 3365 } 3366 3367 restart: 3368 /* check inline_data */ 3369 ipage = f2fs_get_node_page(sbi, inode->i_ino); 3370 if (IS_ERR(ipage)) { 3371 err = PTR_ERR(ipage); 3372 goto unlock_out; 3373 } 3374 3375 set_new_dnode(&dn, inode, ipage, ipage, 0); 3376 3377 if (f2fs_has_inline_data(inode)) { 3378 if (pos + len <= MAX_INLINE_DATA(inode)) { 3379 f2fs_do_read_inline_data(page, ipage); 3380 set_inode_flag(inode, FI_DATA_EXIST); 3381 if (inode->i_nlink) 3382 set_page_private_inline(ipage); 3383 goto out; 3384 } 3385 err = f2fs_convert_inline_page(&dn, page); 3386 if (err || dn.data_blkaddr != NULL_ADDR) 3387 goto out; 3388 } 3389 3390 if (!f2fs_lookup_read_extent_cache_block(inode, index, 3391 &dn.data_blkaddr)) { 3392 if (locked) { 3393 err = f2fs_reserve_block(&dn, index); 3394 goto out; 3395 } 3396 3397 /* hole case */ 3398 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 3399 if (!err && dn.data_blkaddr != NULL_ADDR) 3400 goto out; 3401 f2fs_put_dnode(&dn); 3402 f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO); 3403 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO); 3404 locked = true; 3405 goto restart; 3406 } 3407 out: 3408 if (!err) { 3409 /* convert_inline_page can make node_changed */ 3410 *blk_addr = dn.data_blkaddr; 3411 *node_changed = dn.node_changed; 3412 } 3413 f2fs_put_dnode(&dn); 3414 unlock_out: 3415 if (locked) 3416 f2fs_map_unlock(sbi, flag); 3417 return err; 3418 } 3419 __find_data_block(struct inode * inode,pgoff_t index,block_t * blk_addr)3420 static int __find_data_block(struct inode *inode, pgoff_t index, 3421 block_t *blk_addr) 3422 { 3423 struct dnode_of_data dn; 3424 struct page *ipage; 3425 int err = 0; 3426 3427 ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino); 3428 if (IS_ERR(ipage)) 3429 return PTR_ERR(ipage); 3430 3431 set_new_dnode(&dn, inode, ipage, ipage, 0); 3432 3433 if (!f2fs_lookup_read_extent_cache_block(inode, index, 3434 &dn.data_blkaddr)) { 3435 /* hole case */ 3436 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE); 3437 if (err) { 3438 dn.data_blkaddr = NULL_ADDR; 3439 err = 0; 3440 } 3441 } 3442 *blk_addr = dn.data_blkaddr; 3443 f2fs_put_dnode(&dn); 3444 return err; 3445 } 3446 __reserve_data_block(struct inode * inode,pgoff_t index,block_t * blk_addr,bool * node_changed)3447 static int __reserve_data_block(struct inode *inode, pgoff_t index, 3448 block_t *blk_addr, bool *node_changed) 3449 { 3450 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3451 struct dnode_of_data dn; 3452 struct page *ipage; 3453 int err = 0; 3454 3455 f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO); 3456 3457 ipage = f2fs_get_node_page(sbi, inode->i_ino); 3458 if (IS_ERR(ipage)) { 3459 err = PTR_ERR(ipage); 3460 goto unlock_out; 3461 } 3462 set_new_dnode(&dn, inode, ipage, ipage, 0); 3463 3464 if (!f2fs_lookup_read_extent_cache_block(dn.inode, index, 3465 &dn.data_blkaddr)) 3466 err = f2fs_reserve_block(&dn, index); 3467 3468 *blk_addr = dn.data_blkaddr; 3469 *node_changed = dn.node_changed; 3470 f2fs_put_dnode(&dn); 3471 3472 unlock_out: 3473 f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO); 3474 return err; 3475 } 3476 prepare_atomic_write_begin(struct f2fs_sb_info * sbi,struct page * page,loff_t pos,unsigned int len,block_t * blk_addr,bool * node_changed,bool * use_cow)3477 static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi, 3478 struct page *page, loff_t pos, unsigned int len, 3479 block_t *blk_addr, bool *node_changed, bool *use_cow) 3480 { 3481 struct inode *inode = page->mapping->host; 3482 struct inode *cow_inode = F2FS_I(inode)->cow_inode; 3483 pgoff_t index = page->index; 3484 int err = 0; 3485 block_t ori_blk_addr = NULL_ADDR; 3486 3487 /* If pos is beyond the end of file, reserve a new block in COW inode */ 3488 if ((pos & PAGE_MASK) >= i_size_read(inode)) 3489 goto reserve_block; 3490 3491 /* Look for the block in COW inode first */ 3492 err = __find_data_block(cow_inode, index, blk_addr); 3493 if (err) { 3494 return err; 3495 } else if (*blk_addr != NULL_ADDR) { 3496 *use_cow = true; 3497 return 0; 3498 } 3499 3500 if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE)) 3501 goto reserve_block; 3502 3503 /* Look for the block in the original inode */ 3504 err = __find_data_block(inode, index, &ori_blk_addr); 3505 if (err) 3506 return err; 3507 3508 reserve_block: 3509 /* Finally, we should reserve a new block in COW inode for the update */ 3510 err = __reserve_data_block(cow_inode, index, blk_addr, node_changed); 3511 if (err) 3512 return err; 3513 inc_atomic_write_cnt(inode); 3514 3515 if (ori_blk_addr != NULL_ADDR) 3516 *blk_addr = ori_blk_addr; 3517 return 0; 3518 } 3519 f2fs_write_begin(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned flags,struct page ** pagep,void ** fsdata)3520 static int f2fs_write_begin(struct file *file, struct address_space *mapping, 3521 loff_t pos, unsigned len, unsigned flags, 3522 struct page **pagep, void **fsdata) 3523 { 3524 struct inode *inode = mapping->host; 3525 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3526 struct page *page = NULL; 3527 pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT; 3528 bool need_balance = false; 3529 bool use_cow = false; 3530 block_t blkaddr = NULL_ADDR; 3531 int err = 0; 3532 3533 trace_f2fs_write_begin(inode, pos, len, flags); 3534 3535 if (!f2fs_is_checkpoint_ready(sbi)) { 3536 err = -ENOSPC; 3537 goto fail; 3538 } 3539 3540 /* 3541 * We should check this at this moment to avoid deadlock on inode page 3542 * and #0 page. The locking rule for inline_data conversion should be: 3543 * lock_page(page #0) -> lock_page(inode_page) 3544 */ 3545 if (index != 0) { 3546 err = f2fs_convert_inline_inode(inode); 3547 if (err) 3548 goto fail; 3549 } 3550 3551 #ifdef CONFIG_F2FS_FS_COMPRESSION 3552 if (f2fs_compressed_file(inode)) { 3553 int ret; 3554 3555 *fsdata = NULL; 3556 3557 if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode))) 3558 goto repeat; 3559 3560 ret = f2fs_prepare_compress_overwrite(inode, pagep, 3561 index, fsdata); 3562 if (ret < 0) { 3563 err = ret; 3564 goto fail; 3565 } else if (ret) { 3566 return 0; 3567 } 3568 } 3569 #endif 3570 3571 repeat: 3572 /* 3573 * Do not use grab_cache_page_write_begin() to avoid deadlock due to 3574 * wait_for_stable_page. Will wait that below with our IO control. 3575 */ 3576 page = f2fs_pagecache_get_page(mapping, index, 3577 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS); 3578 if (!page) { 3579 err = -ENOMEM; 3580 goto fail; 3581 } 3582 3583 /* TODO: cluster can be compressed due to race with .writepage */ 3584 3585 *pagep = page; 3586 3587 if (f2fs_is_atomic_file(inode)) 3588 err = prepare_atomic_write_begin(sbi, page, pos, len, 3589 &blkaddr, &need_balance, &use_cow); 3590 else 3591 err = prepare_write_begin(sbi, page, pos, len, 3592 &blkaddr, &need_balance); 3593 if (err) 3594 goto fail; 3595 3596 if (need_balance && !IS_NOQUOTA(inode) && 3597 has_not_enough_free_secs(sbi, 0, 0)) { 3598 unlock_page(page); 3599 f2fs_balance_fs(sbi, true); 3600 lock_page(page); 3601 if (page->mapping != mapping) { 3602 /* The page got truncated from under us */ 3603 f2fs_put_page(page, 1); 3604 goto repeat; 3605 } 3606 } 3607 3608 f2fs_wait_on_page_writeback(page, DATA, false, true); 3609 3610 if (len == PAGE_SIZE || PageUptodate(page)) 3611 return 0; 3612 3613 if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) && 3614 !f2fs_verity_in_progress(inode)) { 3615 zero_user_segment(page, len, PAGE_SIZE); 3616 return 0; 3617 } 3618 3619 if (blkaddr == NEW_ADDR) { 3620 zero_user_segment(page, 0, PAGE_SIZE); 3621 SetPageUptodate(page); 3622 } else { 3623 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, 3624 DATA_GENERIC_ENHANCE_READ)) { 3625 err = -EFSCORRUPTED; 3626 f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); 3627 goto fail; 3628 } 3629 err = f2fs_submit_page_read(use_cow ? 3630 F2FS_I(inode)->cow_inode : inode, page, 3631 blkaddr, 0, true); 3632 if (err) 3633 goto fail; 3634 3635 lock_page(page); 3636 if (unlikely(page->mapping != mapping)) { 3637 f2fs_put_page(page, 1); 3638 goto repeat; 3639 } 3640 if (unlikely(!PageUptodate(page))) { 3641 err = -EIO; 3642 goto fail; 3643 } 3644 } 3645 return 0; 3646 3647 fail: 3648 f2fs_put_page(page, 1); 3649 f2fs_write_failed(inode, pos + len); 3650 return err; 3651 } 3652 f2fs_write_end(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned copied,struct page * page,void * fsdata)3653 static int f2fs_write_end(struct file *file, 3654 struct address_space *mapping, 3655 loff_t pos, unsigned len, unsigned copied, 3656 struct page *page, void *fsdata) 3657 { 3658 struct inode *inode = page->mapping->host; 3659 3660 trace_f2fs_write_end(inode, pos, len, copied); 3661 3662 /* 3663 * This should be come from len == PAGE_SIZE, and we expect copied 3664 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and 3665 * let generic_perform_write() try to copy data again through copied=0. 3666 */ 3667 if (!PageUptodate(page)) { 3668 if (unlikely(copied != len)) 3669 copied = 0; 3670 else 3671 SetPageUptodate(page); 3672 } 3673 3674 #ifdef CONFIG_F2FS_FS_COMPRESSION 3675 /* overwrite compressed file */ 3676 if (f2fs_compressed_file(inode) && fsdata) { 3677 f2fs_compress_write_end(inode, fsdata, page->index, copied); 3678 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3679 3680 if (pos + copied > i_size_read(inode) && 3681 !f2fs_verity_in_progress(inode)) 3682 f2fs_i_size_write(inode, pos + copied); 3683 return copied; 3684 } 3685 #endif 3686 3687 if (!copied) 3688 goto unlock_out; 3689 3690 set_page_dirty(page); 3691 3692 if (pos + copied > i_size_read(inode) && 3693 !f2fs_verity_in_progress(inode)) { 3694 f2fs_i_size_write(inode, pos + copied); 3695 if (f2fs_is_atomic_file(inode)) 3696 f2fs_i_size_write(F2FS_I(inode)->cow_inode, 3697 pos + copied); 3698 } 3699 unlock_out: 3700 f2fs_put_page(page, 1); 3701 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 3702 return copied; 3703 } 3704 f2fs_invalidate_page(struct page * page,unsigned int offset,unsigned int length)3705 void f2fs_invalidate_page(struct page *page, unsigned int offset, 3706 unsigned int length) 3707 { 3708 struct inode *inode = page->mapping->host; 3709 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3710 3711 if (inode->i_ino >= F2FS_ROOT_INO(sbi) && 3712 (offset % PAGE_SIZE || length != PAGE_SIZE)) 3713 return; 3714 3715 if (PageDirty(page)) { 3716 if (inode->i_ino == F2FS_META_INO(sbi)) { 3717 dec_page_count(sbi, F2FS_DIRTY_META); 3718 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) { 3719 dec_page_count(sbi, F2FS_DIRTY_NODES); 3720 } else { 3721 inode_dec_dirty_pages(inode); 3722 f2fs_remove_dirty_inode(inode); 3723 } 3724 } 3725 clear_page_private_all(page); 3726 } 3727 f2fs_release_page(struct page * page,gfp_t wait)3728 int f2fs_release_page(struct page *page, gfp_t wait) 3729 { 3730 /* If this is dirty page, keep PagePrivate */ 3731 if (PageDirty(page)) 3732 return 0; 3733 3734 clear_page_private_all(page); 3735 return 1; 3736 } 3737 f2fs_set_data_page_dirty(struct page * page)3738 static int f2fs_set_data_page_dirty(struct page *page) 3739 { 3740 struct inode *inode = page_file_mapping(page)->host; 3741 3742 trace_f2fs_set_page_dirty(page, DATA); 3743 3744 if (!PageUptodate(page)) 3745 SetPageUptodate(page); 3746 if (PageSwapCache(page)) 3747 return __set_page_dirty_nobuffers(page); 3748 3749 if (__set_page_dirty_nobuffers(page)) { 3750 f2fs_update_dirty_page(inode, page); 3751 return 1; 3752 } 3753 return 0; 3754 } 3755 3756 f2fs_bmap_compress(struct inode * inode,sector_t block)3757 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block) 3758 { 3759 #ifdef CONFIG_F2FS_FS_COMPRESSION 3760 struct dnode_of_data dn; 3761 sector_t start_idx, blknr = 0; 3762 int ret; 3763 3764 start_idx = round_down(block, F2FS_I(inode)->i_cluster_size); 3765 3766 set_new_dnode(&dn, inode, NULL, NULL, 0); 3767 ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE); 3768 if (ret) 3769 return 0; 3770 3771 if (dn.data_blkaddr != COMPRESS_ADDR) { 3772 dn.ofs_in_node += block - start_idx; 3773 blknr = f2fs_data_blkaddr(&dn); 3774 if (!__is_valid_data_blkaddr(blknr)) 3775 blknr = 0; 3776 } 3777 3778 f2fs_put_dnode(&dn); 3779 return blknr; 3780 #else 3781 return 0; 3782 #endif 3783 } 3784 3785 f2fs_bmap(struct address_space * mapping,sector_t block)3786 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block) 3787 { 3788 struct inode *inode = mapping->host; 3789 sector_t blknr = 0; 3790 3791 if (f2fs_has_inline_data(inode)) 3792 goto out; 3793 3794 /* make sure allocating whole blocks */ 3795 if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 3796 filemap_write_and_wait(mapping); 3797 3798 /* Block number less than F2FS MAX BLOCKS */ 3799 if (unlikely(block >= max_file_blocks(inode))) 3800 goto out; 3801 3802 if (f2fs_compressed_file(inode)) { 3803 blknr = f2fs_bmap_compress(inode, block); 3804 } else { 3805 struct f2fs_map_blocks map; 3806 3807 memset(&map, 0, sizeof(map)); 3808 map.m_lblk = block; 3809 map.m_len = 1; 3810 map.m_next_pgofs = NULL; 3811 map.m_seg_type = NO_CHECK_TYPE; 3812 3813 if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP)) 3814 blknr = map.m_pblk; 3815 } 3816 out: 3817 trace_f2fs_bmap(inode, block, blknr); 3818 return blknr; 3819 } 3820 3821 #ifdef CONFIG_MIGRATION 3822 #include <linux/migrate.h> 3823 f2fs_migrate_page(struct address_space * mapping,struct page * newpage,struct page * page,enum migrate_mode mode)3824 int f2fs_migrate_page(struct address_space *mapping, 3825 struct page *newpage, struct page *page, enum migrate_mode mode) 3826 { 3827 int rc, extra_count = 0; 3828 3829 BUG_ON(PageWriteback(page)); 3830 3831 rc = migrate_page_move_mapping(mapping, newpage, 3832 page, extra_count); 3833 if (rc != MIGRATEPAGE_SUCCESS) 3834 return rc; 3835 3836 /* guarantee to start from no stale private field */ 3837 set_page_private(newpage, 0); 3838 if (PagePrivate(page)) { 3839 set_page_private(newpage, page_private(page)); 3840 SetPagePrivate(newpage); 3841 get_page(newpage); 3842 3843 set_page_private(page, 0); 3844 ClearPagePrivate(page); 3845 put_page(page); 3846 } 3847 3848 if (mode != MIGRATE_SYNC_NO_COPY) 3849 migrate_page_copy(newpage, page); 3850 else 3851 migrate_page_states(newpage, page); 3852 3853 return MIGRATEPAGE_SUCCESS; 3854 } 3855 #endif 3856 3857 #ifdef CONFIG_SWAP f2fs_migrate_blocks(struct inode * inode,block_t start_blk,unsigned int blkcnt)3858 static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, 3859 unsigned int blkcnt) 3860 { 3861 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3862 unsigned int blkofs; 3863 unsigned int blk_per_sec = BLKS_PER_SEC(sbi); 3864 unsigned int secidx = start_blk / blk_per_sec; 3865 unsigned int end_sec = secidx + blkcnt / blk_per_sec; 3866 int ret = 0; 3867 3868 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 3869 filemap_invalidate_lock(inode->i_mapping); 3870 3871 set_inode_flag(inode, FI_ALIGNED_WRITE); 3872 set_inode_flag(inode, FI_OPU_WRITE); 3873 3874 for (; secidx < end_sec; secidx++) { 3875 f2fs_down_write(&sbi->pin_sem); 3876 3877 f2fs_lock_op(sbi); 3878 f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); 3879 f2fs_unlock_op(sbi); 3880 3881 set_inode_flag(inode, FI_SKIP_WRITES); 3882 3883 for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { 3884 struct page *page; 3885 unsigned int blkidx = secidx * blk_per_sec + blkofs; 3886 3887 page = f2fs_get_lock_data_page(inode, blkidx, true); 3888 if (IS_ERR(page)) { 3889 f2fs_up_write(&sbi->pin_sem); 3890 ret = PTR_ERR(page); 3891 goto done; 3892 } 3893 3894 set_page_dirty(page); 3895 f2fs_put_page(page, 1); 3896 } 3897 3898 clear_inode_flag(inode, FI_SKIP_WRITES); 3899 3900 ret = filemap_fdatawrite(inode->i_mapping); 3901 3902 f2fs_up_write(&sbi->pin_sem); 3903 3904 if (ret) 3905 break; 3906 } 3907 3908 done: 3909 clear_inode_flag(inode, FI_SKIP_WRITES); 3910 clear_inode_flag(inode, FI_OPU_WRITE); 3911 clear_inode_flag(inode, FI_ALIGNED_WRITE); 3912 3913 filemap_invalidate_unlock(inode->i_mapping); 3914 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]); 3915 3916 return ret; 3917 } 3918 check_swap_activate(struct swap_info_struct * sis,struct file * swap_file,sector_t * span)3919 static int check_swap_activate(struct swap_info_struct *sis, 3920 struct file *swap_file, sector_t *span) 3921 { 3922 struct address_space *mapping = swap_file->f_mapping; 3923 struct inode *inode = mapping->host; 3924 struct f2fs_sb_info *sbi = F2FS_I_SB(inode); 3925 sector_t cur_lblock; 3926 sector_t last_lblock; 3927 sector_t pblock; 3928 sector_t lowest_pblock = -1; 3929 sector_t highest_pblock = 0; 3930 int nr_extents = 0; 3931 unsigned long nr_pblocks; 3932 unsigned int blks_per_sec = BLKS_PER_SEC(sbi); 3933 unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1; 3934 unsigned int not_aligned = 0; 3935 int ret = 0; 3936 3937 /* 3938 * Map all the blocks into the extent list. This code doesn't try 3939 * to be very smart. 3940 */ 3941 cur_lblock = 0; 3942 last_lblock = bytes_to_blks(inode, i_size_read(inode)); 3943 3944 while (cur_lblock < last_lblock && cur_lblock < sis->max) { 3945 struct f2fs_map_blocks map; 3946 retry: 3947 cond_resched(); 3948 3949 memset(&map, 0, sizeof(map)); 3950 map.m_lblk = cur_lblock; 3951 map.m_len = last_lblock - cur_lblock; 3952 map.m_next_pgofs = NULL; 3953 map.m_next_extent = NULL; 3954 map.m_seg_type = NO_CHECK_TYPE; 3955 map.m_may_create = false; 3956 3957 ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP); 3958 if (ret) 3959 goto out; 3960 3961 /* hole */ 3962 if (!(map.m_flags & F2FS_MAP_FLAGS)) { 3963 f2fs_err(sbi, "Swapfile has holes"); 3964 ret = -EINVAL; 3965 goto out; 3966 } 3967 3968 pblock = map.m_pblk; 3969 nr_pblocks = map.m_len; 3970 3971 if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || 3972 nr_pblocks & sec_blks_mask) { 3973 not_aligned++; 3974 3975 nr_pblocks = roundup(nr_pblocks, blks_per_sec); 3976 if (cur_lblock + nr_pblocks > sis->max) 3977 nr_pblocks -= blks_per_sec; 3978 3979 if (!nr_pblocks) { 3980 /* this extent is last one */ 3981 nr_pblocks = map.m_len; 3982 f2fs_warn(sbi, "Swapfile: last extent is not aligned to section"); 3983 goto next; 3984 } 3985 3986 ret = f2fs_migrate_blocks(inode, cur_lblock, 3987 nr_pblocks); 3988 if (ret) 3989 goto out; 3990 goto retry; 3991 } 3992 next: 3993 if (cur_lblock + nr_pblocks >= sis->max) 3994 nr_pblocks = sis->max - cur_lblock; 3995 3996 if (cur_lblock) { /* exclude the header page */ 3997 if (pblock < lowest_pblock) 3998 lowest_pblock = pblock; 3999 if (pblock + nr_pblocks - 1 > highest_pblock) 4000 highest_pblock = pblock + nr_pblocks - 1; 4001 } 4002 4003 /* 4004 * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks 4005 */ 4006 ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock); 4007 if (ret < 0) 4008 goto out; 4009 nr_extents += ret; 4010 cur_lblock += nr_pblocks; 4011 } 4012 ret = nr_extents; 4013 *span = 1 + highest_pblock - lowest_pblock; 4014 if (cur_lblock == 0) 4015 cur_lblock = 1; /* force Empty message */ 4016 sis->max = cur_lblock; 4017 sis->pages = cur_lblock - 1; 4018 sis->highest_bit = cur_lblock - 1; 4019 out: 4020 if (not_aligned) 4021 f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)", 4022 not_aligned, blks_per_sec * F2FS_BLKSIZE); 4023 return ret; 4024 } 4025 f2fs_swap_activate(struct swap_info_struct * sis,struct file * file,sector_t * span)4026 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, 4027 sector_t *span) 4028 { 4029 struct inode *inode = file_inode(file); 4030 int ret; 4031 4032 if (!S_ISREG(inode->i_mode)) 4033 return -EINVAL; 4034 4035 if (f2fs_readonly(F2FS_I_SB(inode)->sb)) 4036 return -EROFS; 4037 4038 if (f2fs_lfs_mode(F2FS_I_SB(inode))) { 4039 f2fs_err(F2FS_I_SB(inode), 4040 "Swapfile not supported in LFS mode"); 4041 return -EINVAL; 4042 } 4043 4044 ret = f2fs_convert_inline_inode(inode); 4045 if (ret) 4046 return ret; 4047 4048 if (!f2fs_disable_compressed_file(inode)) 4049 return -EINVAL; 4050 4051 f2fs_precache_extents(inode); 4052 4053 ret = check_swap_activate(sis, file, span); 4054 if (ret < 0) 4055 return ret; 4056 4057 stat_inc_swapfile_inode(inode); 4058 set_inode_flag(inode, FI_PIN_FILE); 4059 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); 4060 return ret; 4061 } 4062 f2fs_swap_deactivate(struct file * file)4063 static void f2fs_swap_deactivate(struct file *file) 4064 { 4065 struct inode *inode = file_inode(file); 4066 4067 stat_dec_swapfile_inode(inode); 4068 clear_inode_flag(inode, FI_PIN_FILE); 4069 } 4070 #else f2fs_swap_activate(struct swap_info_struct * sis,struct file * file,sector_t * span)4071 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file, 4072 sector_t *span) 4073 { 4074 return -EOPNOTSUPP; 4075 } 4076 f2fs_swap_deactivate(struct file * file)4077 static void f2fs_swap_deactivate(struct file *file) 4078 { 4079 } 4080 #endif 4081 4082 const struct address_space_operations f2fs_dblock_aops = { 4083 .readpage = f2fs_read_data_page, 4084 .readahead = f2fs_readahead, 4085 .writepage = f2fs_write_data_page, 4086 .writepages = f2fs_write_data_pages, 4087 .write_begin = f2fs_write_begin, 4088 .write_end = f2fs_write_end, 4089 .set_page_dirty = f2fs_set_data_page_dirty, 4090 .invalidatepage = f2fs_invalidate_page, 4091 .releasepage = f2fs_release_page, 4092 .direct_IO = noop_direct_IO, 4093 .bmap = f2fs_bmap, 4094 .swap_activate = f2fs_swap_activate, 4095 .swap_deactivate = f2fs_swap_deactivate, 4096 #ifdef CONFIG_MIGRATION 4097 .migratepage = f2fs_migrate_page, 4098 #endif 4099 }; 4100 f2fs_clear_page_cache_dirty_tag(struct page * page)4101 void f2fs_clear_page_cache_dirty_tag(struct page *page) 4102 { 4103 struct address_space *mapping = page_mapping(page); 4104 unsigned long flags; 4105 4106 xa_lock_irqsave(&mapping->i_pages, flags); 4107 __xa_clear_mark(&mapping->i_pages, page_index(page), 4108 PAGECACHE_TAG_DIRTY); 4109 xa_unlock_irqrestore(&mapping->i_pages, flags); 4110 } 4111 f2fs_init_post_read_processing(void)4112 int __init f2fs_init_post_read_processing(void) 4113 { 4114 bio_post_read_ctx_cache = 4115 kmem_cache_create("f2fs_bio_post_read_ctx", 4116 sizeof(struct bio_post_read_ctx), 0, 0, NULL); 4117 if (!bio_post_read_ctx_cache) 4118 goto fail; 4119 bio_post_read_ctx_pool = 4120 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS, 4121 bio_post_read_ctx_cache); 4122 if (!bio_post_read_ctx_pool) 4123 goto fail_free_cache; 4124 return 0; 4125 4126 fail_free_cache: 4127 kmem_cache_destroy(bio_post_read_ctx_cache); 4128 fail: 4129 return -ENOMEM; 4130 } 4131 f2fs_destroy_post_read_processing(void)4132 void f2fs_destroy_post_read_processing(void) 4133 { 4134 mempool_destroy(bio_post_read_ctx_pool); 4135 kmem_cache_destroy(bio_post_read_ctx_cache); 4136 } 4137 f2fs_init_post_read_wq(struct f2fs_sb_info * sbi)4138 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi) 4139 { 4140 if (!f2fs_sb_has_encrypt(sbi) && 4141 !f2fs_sb_has_verity(sbi) && 4142 !f2fs_sb_has_compression(sbi)) 4143 return 0; 4144 4145 sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq", 4146 WQ_UNBOUND | WQ_HIGHPRI, 4147 num_online_cpus()); 4148 return sbi->post_read_wq ? 0 : -ENOMEM; 4149 } 4150 f2fs_destroy_post_read_wq(struct f2fs_sb_info * sbi)4151 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi) 4152 { 4153 if (sbi->post_read_wq) 4154 destroy_workqueue(sbi->post_read_wq); 4155 } 4156 f2fs_init_bio_entry_cache(void)4157 int __init f2fs_init_bio_entry_cache(void) 4158 { 4159 bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab", 4160 sizeof(struct bio_entry)); 4161 return bio_entry_slab ? 0 : -ENOMEM; 4162 } 4163 f2fs_destroy_bio_entry_cache(void)4164 void f2fs_destroy_bio_entry_cache(void) 4165 { 4166 kmem_cache_destroy(bio_entry_slab); 4167 } 4168 f2fs_iomap_begin(struct inode * inode,loff_t offset,loff_t length,unsigned int flags,struct iomap * iomap,struct iomap * srcmap)4169 static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, 4170 unsigned int flags, struct iomap *iomap, 4171 struct iomap *srcmap) 4172 { 4173 struct f2fs_map_blocks map = {}; 4174 pgoff_t next_pgofs = 0; 4175 int err; 4176 4177 map.m_lblk = bytes_to_blks(inode, offset); 4178 map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1; 4179 map.m_next_pgofs = &next_pgofs; 4180 map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint); 4181 if (flags & IOMAP_WRITE) 4182 map.m_may_create = true; 4183 4184 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO); 4185 if (err) 4186 return err; 4187 4188 iomap->offset = blks_to_bytes(inode, map.m_lblk); 4189 4190 /* 4191 * When inline encryption is enabled, sometimes I/O to an encrypted file 4192 * has to be broken up to guarantee DUN contiguity. Handle this by 4193 * limiting the length of the mapping returned. 4194 */ 4195 map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len); 4196 4197 /* 4198 * We should never see delalloc or compressed extents here based on 4199 * prior flushing and checks. 4200 */ 4201 if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR)) 4202 return -EINVAL; 4203 if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR)) 4204 return -EINVAL; 4205 4206 if (map.m_pblk != NULL_ADDR) { 4207 iomap->length = blks_to_bytes(inode, map.m_len); 4208 iomap->type = IOMAP_MAPPED; 4209 iomap->flags |= IOMAP_F_MERGED; 4210 iomap->bdev = map.m_bdev; 4211 iomap->addr = blks_to_bytes(inode, map.m_pblk); 4212 } else { 4213 if (flags & IOMAP_WRITE) 4214 return -ENOTBLK; 4215 iomap->length = blks_to_bytes(inode, next_pgofs) - 4216 iomap->offset; 4217 iomap->type = IOMAP_HOLE; 4218 iomap->addr = IOMAP_NULL_ADDR; 4219 } 4220 4221 if (map.m_flags & F2FS_MAP_NEW) 4222 iomap->flags |= IOMAP_F_NEW; 4223 if ((inode->i_state & I_DIRTY_DATASYNC) || 4224 offset + length > i_size_read(inode)) 4225 iomap->flags |= IOMAP_F_DIRTY; 4226 4227 return 0; 4228 } 4229 4230 const struct iomap_ops f2fs_iomap_ops = { 4231 .iomap_begin = f2fs_iomap_begin, 4232 }; 4233