• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * fs/f2fs/data.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/buffer_head.h>
14 #include <linux/mpage.h>
15 #include <linux/writeback.h>
16 #include <linux/backing-dev.h>
17 #include <linux/pagevec.h>
18 #include <linux/blkdev.h>
19 #include <linux/bio.h>
20 #include <linux/prefetch.h>
21 #include <linux/uio.h>
22 #include <linux/cleancache.h>
23 
24 #include "f2fs.h"
25 #include "node.h"
26 #include "segment.h"
27 #include "trace.h"
28 #include <trace/events/f2fs.h>
29 #include <trace/events/android_fs.h>
30 
31 #define NUM_PREALLOC_POST_READ_CTXS	128
32 
33 static struct kmem_cache *bio_post_read_ctx_cache;
34 static mempool_t *bio_post_read_ctx_pool;
35 
__is_cp_guaranteed(struct page * page)36 static bool __is_cp_guaranteed(struct page *page)
37 {
38 	struct address_space *mapping = page->mapping;
39 	struct inode *inode;
40 	struct f2fs_sb_info *sbi;
41 
42 	if (!mapping)
43 		return false;
44 
45 	inode = mapping->host;
46 	sbi = F2FS_I_SB(inode);
47 
48 	if (inode->i_ino == F2FS_META_INO(sbi) ||
49 			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
50 			S_ISDIR(inode->i_mode) ||
51 			is_cold_data(page))
52 		return true;
53 	return false;
54 }
55 
56 /* postprocessing steps for read bios */
57 enum bio_post_read_step {
58 	STEP_INITIAL = 0,
59 	STEP_DECRYPT,
60 };
61 
62 struct bio_post_read_ctx {
63 	struct bio *bio;
64 	struct work_struct work;
65 	unsigned int cur_step;
66 	unsigned int enabled_steps;
67 };
68 
__read_end_io(struct bio * bio)69 static void __read_end_io(struct bio *bio)
70 {
71 	struct page *page;
72 	struct bio_vec *bv;
73 	int i;
74 
75 	bio_for_each_segment_all(bv, bio, i) {
76 		page = bv->bv_page;
77 
78 		/* PG_error was set if any post_read step failed */
79 		if (bio->bi_error || PageError(page)) {
80 			ClearPageUptodate(page);
81 			SetPageError(page);
82 		} else {
83 			SetPageUptodate(page);
84 		}
85 		unlock_page(page);
86 	}
87 	if (bio->bi_private)
88 		mempool_free(bio->bi_private, bio_post_read_ctx_pool);
89 	bio_put(bio);
90 }
91 
92 static void bio_post_read_processing(struct bio_post_read_ctx *ctx);
93 
decrypt_work(struct work_struct * work)94 static void decrypt_work(struct work_struct *work)
95 {
96 	struct bio_post_read_ctx *ctx =
97 		container_of(work, struct bio_post_read_ctx, work);
98 
99 	fscrypt_decrypt_bio(ctx->bio);
100 
101 	bio_post_read_processing(ctx);
102 }
103 
bio_post_read_processing(struct bio_post_read_ctx * ctx)104 static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
105 {
106 	switch (++ctx->cur_step) {
107 	case STEP_DECRYPT:
108 		if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
109 			INIT_WORK(&ctx->work, decrypt_work);
110 			fscrypt_enqueue_decrypt_work(&ctx->work);
111 			return;
112 		}
113 		ctx->cur_step++;
114 		/* fall-through */
115 	default:
116 		__read_end_io(ctx->bio);
117 	}
118 }
119 
f2fs_bio_post_read_required(struct bio * bio)120 static bool f2fs_bio_post_read_required(struct bio *bio)
121 {
122 	return bio->bi_private && !bio->bi_error;
123 }
124 
f2fs_read_end_io(struct bio * bio)125 static void f2fs_read_end_io(struct bio *bio)
126 {
127 #ifdef CONFIG_F2FS_FAULT_INJECTION
128 	if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
129 		f2fs_show_injection_info(FAULT_IO);
130 		bio->bi_error = -EIO;
131 	}
132 #endif
133 
134 	if (f2fs_bio_post_read_required(bio)) {
135 		struct bio_post_read_ctx *ctx = bio->bi_private;
136 
137 		ctx->cur_step = STEP_INITIAL;
138 		bio_post_read_processing(ctx);
139 		return;
140 	}
141 
142 	__read_end_io(bio);
143 }
144 
f2fs_write_end_io(struct bio * bio)145 static void f2fs_write_end_io(struct bio *bio)
146 {
147 	struct f2fs_sb_info *sbi = bio->bi_private;
148 	struct bio_vec *bvec;
149 	int i;
150 
151 	bio_for_each_segment_all(bvec, bio, i) {
152 		struct page *page = bvec->bv_page;
153 		enum count_type type = WB_DATA_TYPE(page);
154 
155 		if (IS_DUMMY_WRITTEN_PAGE(page)) {
156 			set_page_private(page, (unsigned long)NULL);
157 			ClearPagePrivate(page);
158 			unlock_page(page);
159 			mempool_free(page, sbi->write_io_dummy);
160 
161 			if (unlikely(bio->bi_error))
162 				f2fs_stop_checkpoint(sbi, true);
163 			continue;
164 		}
165 
166 		fscrypt_pullback_bio_page(&page, true);
167 
168 		if (unlikely(bio->bi_error)) {
169 			mapping_set_error(page->mapping, -EIO);
170 			if (type == F2FS_WB_CP_DATA)
171 				f2fs_stop_checkpoint(sbi, true);
172 		}
173 
174 		f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
175 					page->index != nid_of_node(page));
176 
177 		dec_page_count(sbi, type);
178 		clear_cold_data(page);
179 		end_page_writeback(page);
180 	}
181 	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
182 				wq_has_sleeper(&sbi->cp_wait))
183 		wake_up(&sbi->cp_wait);
184 
185 	bio_put(bio);
186 }
187 
188 /*
189  * Return true, if pre_bio's bdev is same as its target device.
190  */
f2fs_target_device(struct f2fs_sb_info * sbi,block_t blk_addr,struct bio * bio)191 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
192 				block_t blk_addr, struct bio *bio)
193 {
194 	struct block_device *bdev = sbi->sb->s_bdev;
195 	int i;
196 
197 	for (i = 0; i < sbi->s_ndevs; i++) {
198 		if (FDEV(i).start_blk <= blk_addr &&
199 					FDEV(i).end_blk >= blk_addr) {
200 			blk_addr -= FDEV(i).start_blk;
201 			bdev = FDEV(i).bdev;
202 			break;
203 		}
204 	}
205 	if (bio) {
206 		bio->bi_bdev = bdev;
207 		bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
208 	}
209 	return bdev;
210 }
211 
f2fs_target_device_index(struct f2fs_sb_info * sbi,block_t blkaddr)212 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
213 {
214 	int i;
215 
216 	for (i = 0; i < sbi->s_ndevs; i++)
217 		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
218 			return i;
219 	return 0;
220 }
221 
__same_bdev(struct f2fs_sb_info * sbi,block_t blk_addr,struct bio * bio)222 static bool __same_bdev(struct f2fs_sb_info *sbi,
223 				block_t blk_addr, struct bio *bio)
224 {
225 	return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
226 }
227 
228 /*
229  * Low-level block read/write IO operations.
230  */
__bio_alloc(struct f2fs_sb_info * sbi,block_t blk_addr,struct writeback_control * wbc,int npages,bool is_read,enum page_type type,enum temp_type temp)231 static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
232 				struct writeback_control *wbc,
233 				int npages, bool is_read,
234 				enum page_type type, enum temp_type temp)
235 {
236 	struct bio *bio;
237 
238 	bio = f2fs_bio_alloc(sbi, npages, true);
239 
240 	f2fs_target_device(sbi, blk_addr, bio);
241 	if (is_read) {
242 		bio->bi_end_io = f2fs_read_end_io;
243 		bio->bi_private = NULL;
244 	} else {
245 		bio->bi_end_io = f2fs_write_end_io;
246 		bio->bi_private = sbi;
247 		bio->bi_write_hint = io_type_to_rw_hint(sbi, type, temp);
248 	}
249 	if (wbc)
250 		wbc_init_bio(wbc, bio);
251 
252 	return bio;
253 }
254 
__submit_bio(struct f2fs_sb_info * sbi,struct bio * bio,enum page_type type)255 static inline void __submit_bio(struct f2fs_sb_info *sbi,
256 				struct bio *bio, enum page_type type)
257 {
258 	if (!is_read_io(bio_op(bio))) {
259 		unsigned int start;
260 
261 		if (type != DATA && type != NODE)
262 			goto submit_io;
263 
264 		if (f2fs_sb_has_blkzoned(sbi->sb) && current->plug)
265 			blk_finish_plug(current->plug);
266 
267 		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
268 		start %= F2FS_IO_SIZE(sbi);
269 
270 		if (start == 0)
271 			goto submit_io;
272 
273 		/* fill dummy pages */
274 		for (; start < F2FS_IO_SIZE(sbi); start++) {
275 			struct page *page =
276 				mempool_alloc(sbi->write_io_dummy,
277 					GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
278 			f2fs_bug_on(sbi, !page);
279 
280 			SetPagePrivate(page);
281 			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
282 			lock_page(page);
283 			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
284 				f2fs_bug_on(sbi, 1);
285 		}
286 		/*
287 		 * In the NODE case, we lose next block address chain. So, we
288 		 * need to do checkpoint in f2fs_sync_file.
289 		 */
290 		if (type == NODE)
291 			set_sbi_flag(sbi, SBI_NEED_CP);
292 	}
293 submit_io:
294 	if (is_read_io(bio_op(bio)))
295 		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
296 	else
297 		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
298 	submit_bio(bio);
299 }
300 
__submit_merged_bio(struct f2fs_bio_info * io)301 static void __submit_merged_bio(struct f2fs_bio_info *io)
302 {
303 	struct f2fs_io_info *fio = &io->fio;
304 
305 	if (!io->bio)
306 		return;
307 
308 	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
309 
310 	if (is_read_io(fio->op))
311 		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
312 	else
313 		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
314 
315 	__submit_bio(io->sbi, io->bio, fio->type);
316 	io->bio = NULL;
317 }
318 
__has_merged_page(struct f2fs_bio_info * io,struct inode * inode,nid_t ino,pgoff_t idx)319 static bool __has_merged_page(struct f2fs_bio_info *io,
320 				struct inode *inode, nid_t ino, pgoff_t idx)
321 {
322 	struct bio_vec *bvec;
323 	struct page *target;
324 	int i;
325 
326 	if (!io->bio)
327 		return false;
328 
329 	if (!inode && !ino)
330 		return true;
331 
332 	bio_for_each_segment_all(bvec, io->bio, i) {
333 
334 		if (bvec->bv_page->mapping)
335 			target = bvec->bv_page;
336 		else
337 			target = fscrypt_control_page(bvec->bv_page);
338 
339 		if (idx != target->index)
340 			continue;
341 
342 		if (inode && inode == target->mapping->host)
343 			return true;
344 		if (ino && ino == ino_of_node(target))
345 			return true;
346 	}
347 
348 	return false;
349 }
350 
has_merged_page(struct f2fs_sb_info * sbi,struct inode * inode,nid_t ino,pgoff_t idx,enum page_type type)351 static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
352 				nid_t ino, pgoff_t idx, enum page_type type)
353 {
354 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
355 	enum temp_type temp;
356 	struct f2fs_bio_info *io;
357 	bool ret = false;
358 
359 	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
360 		io = sbi->write_io[btype] + temp;
361 
362 		down_read(&io->io_rwsem);
363 		ret = __has_merged_page(io, inode, ino, idx);
364 		up_read(&io->io_rwsem);
365 
366 		/* TODO: use HOT temp only for meta pages now. */
367 		if (ret || btype == META)
368 			break;
369 	}
370 	return ret;
371 }
372 
__f2fs_submit_merged_write(struct f2fs_sb_info * sbi,enum page_type type,enum temp_type temp)373 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
374 				enum page_type type, enum temp_type temp)
375 {
376 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
377 	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
378 
379 	down_write(&io->io_rwsem);
380 
381 	/* change META to META_FLUSH in the checkpoint procedure */
382 	if (type >= META_FLUSH) {
383 		io->fio.type = META_FLUSH;
384 		io->fio.op = REQ_OP_WRITE;
385 		io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
386 		if (!test_opt(sbi, NOBARRIER))
387 			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
388 	}
389 	__submit_merged_bio(io);
390 	up_write(&io->io_rwsem);
391 }
392 
__submit_merged_write_cond(struct f2fs_sb_info * sbi,struct inode * inode,nid_t ino,pgoff_t idx,enum page_type type,bool force)393 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
394 				struct inode *inode, nid_t ino, pgoff_t idx,
395 				enum page_type type, bool force)
396 {
397 	enum temp_type temp;
398 
399 	if (!force && !has_merged_page(sbi, inode, ino, idx, type))
400 		return;
401 
402 	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
403 
404 		__f2fs_submit_merged_write(sbi, type, temp);
405 
406 		/* TODO: use HOT temp only for meta pages now. */
407 		if (type >= META)
408 			break;
409 	}
410 }
411 
f2fs_submit_merged_write(struct f2fs_sb_info * sbi,enum page_type type)412 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
413 {
414 	__submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
415 }
416 
f2fs_submit_merged_write_cond(struct f2fs_sb_info * sbi,struct inode * inode,nid_t ino,pgoff_t idx,enum page_type type)417 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
418 				struct inode *inode, nid_t ino, pgoff_t idx,
419 				enum page_type type)
420 {
421 	__submit_merged_write_cond(sbi, inode, ino, idx, type, false);
422 }
423 
f2fs_flush_merged_writes(struct f2fs_sb_info * sbi)424 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
425 {
426 	f2fs_submit_merged_write(sbi, DATA);
427 	f2fs_submit_merged_write(sbi, NODE);
428 	f2fs_submit_merged_write(sbi, META);
429 }
430 
431 /*
432  * Fill the locked page with data located in the block address.
433  * A caller needs to unlock the page on failure.
434  */
f2fs_submit_page_bio(struct f2fs_io_info * fio)435 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
436 {
437 	struct bio *bio;
438 	struct page *page = fio->encrypted_page ?
439 			fio->encrypted_page : fio->page;
440 
441 	verify_block_addr(fio, fio->new_blkaddr);
442 	trace_f2fs_submit_page_bio(page, fio);
443 	f2fs_trace_ios(fio, 0);
444 
445 	/* Allocate a new bio */
446 	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
447 				1, is_read_io(fio->op), fio->type, fio->temp);
448 
449 	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
450 		bio_put(bio);
451 		return -EFAULT;
452 	}
453 	bio_set_op_attrs(bio, fio->op, fio->op_flags);
454 
455 	__submit_bio(fio->sbi, bio, fio->type);
456 
457 	if (!is_read_io(fio->op))
458 		inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
459 	return 0;
460 }
461 
f2fs_submit_page_write(struct f2fs_io_info * fio)462 int f2fs_submit_page_write(struct f2fs_io_info *fio)
463 {
464 	struct f2fs_sb_info *sbi = fio->sbi;
465 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
466 	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
467 	struct page *bio_page;
468 	int err = 0;
469 
470 	f2fs_bug_on(sbi, is_read_io(fio->op));
471 
472 	down_write(&io->io_rwsem);
473 next:
474 	if (fio->in_list) {
475 		spin_lock(&io->io_lock);
476 		if (list_empty(&io->io_list)) {
477 			spin_unlock(&io->io_lock);
478 			goto out_fail;
479 		}
480 		fio = list_first_entry(&io->io_list,
481 						struct f2fs_io_info, list);
482 		list_del(&fio->list);
483 		spin_unlock(&io->io_lock);
484 	}
485 
486 	if (fio->old_blkaddr != NEW_ADDR)
487 		verify_block_addr(fio, fio->old_blkaddr);
488 	verify_block_addr(fio, fio->new_blkaddr);
489 
490 	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
491 
492 	/* set submitted = true as a return value */
493 	fio->submitted = true;
494 
495 	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
496 
497 	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
498 	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
499 			!__same_bdev(sbi, fio->new_blkaddr, io->bio)))
500 		__submit_merged_bio(io);
501 alloc_new:
502 	if (io->bio == NULL) {
503 		if ((fio->type == DATA || fio->type == NODE) &&
504 				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
505 			err = -EAGAIN;
506 			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
507 			goto out_fail;
508 		}
509 		io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
510 						BIO_MAX_PAGES, false,
511 						fio->type, fio->temp);
512 		io->fio = *fio;
513 	}
514 
515 	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
516 		__submit_merged_bio(io);
517 		goto alloc_new;
518 	}
519 
520 	if (fio->io_wbc)
521 		wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);
522 
523 	io->last_block_in_bio = fio->new_blkaddr;
524 	f2fs_trace_ios(fio, 0);
525 
526 	trace_f2fs_submit_page_write(fio->page, fio);
527 
528 	if (fio->in_list)
529 		goto next;
530 out_fail:
531 	up_write(&io->io_rwsem);
532 	return err;
533 }
534 
f2fs_grab_read_bio(struct inode * inode,block_t blkaddr,unsigned nr_pages)535 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
536 							 unsigned nr_pages)
537 {
538 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
539 	struct bio *bio;
540 	struct bio_post_read_ctx *ctx;
541 	unsigned int post_read_steps = 0;
542 
543 	bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
544 	if (!bio)
545 		return ERR_PTR(-ENOMEM);
546 	f2fs_target_device(sbi, blkaddr, bio);
547 	bio->bi_end_io = f2fs_read_end_io;
548 	bio_set_op_attrs(bio, REQ_OP_READ, 0);
549 
550 	if (f2fs_encrypted_file(inode))
551 		post_read_steps |= 1 << STEP_DECRYPT;
552 	if (post_read_steps) {
553 		ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
554 		if (!ctx) {
555 			bio_put(bio);
556 			return ERR_PTR(-ENOMEM);
557 		}
558 		ctx->bio = bio;
559 		ctx->enabled_steps = post_read_steps;
560 		bio->bi_private = ctx;
561 
562 		/* wait the page to be moved by cleaning */
563 		f2fs_wait_on_block_writeback(sbi, blkaddr);
564 	}
565 
566 	return bio;
567 }
568 
569 /* This can handle encryption stuffs */
f2fs_submit_page_read(struct inode * inode,struct page * page,block_t blkaddr)570 static int f2fs_submit_page_read(struct inode *inode, struct page *page,
571 							block_t blkaddr)
572 {
573 	struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1);
574 
575 	if (IS_ERR(bio))
576 		return PTR_ERR(bio);
577 
578 	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
579 		bio_put(bio);
580 		return -EFAULT;
581 	}
582 	__submit_bio(F2FS_I_SB(inode), bio, DATA);
583 	return 0;
584 }
585 
__set_data_blkaddr(struct dnode_of_data * dn)586 static void __set_data_blkaddr(struct dnode_of_data *dn)
587 {
588 	struct f2fs_node *rn = F2FS_NODE(dn->node_page);
589 	__le32 *addr_array;
590 	int base = 0;
591 
592 	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
593 		base = get_extra_isize(dn->inode);
594 
595 	/* Get physical address of data block */
596 	addr_array = blkaddr_in_node(rn);
597 	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
598 }
599 
600 /*
601  * Lock ordering for the change of data block address:
602  * ->data_page
603  *  ->node_page
604  *    update block addresses in the node page
605  */
set_data_blkaddr(struct dnode_of_data * dn)606 void set_data_blkaddr(struct dnode_of_data *dn)
607 {
608 	f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
609 	__set_data_blkaddr(dn);
610 	if (set_page_dirty(dn->node_page))
611 		dn->node_changed = true;
612 }
613 
f2fs_update_data_blkaddr(struct dnode_of_data * dn,block_t blkaddr)614 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
615 {
616 	dn->data_blkaddr = blkaddr;
617 	set_data_blkaddr(dn);
618 	f2fs_update_extent_cache(dn);
619 }
620 
621 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
reserve_new_blocks(struct dnode_of_data * dn,blkcnt_t count)622 int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
623 {
624 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
625 	int err;
626 
627 	if (!count)
628 		return 0;
629 
630 	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
631 		return -EPERM;
632 	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
633 		return err;
634 
635 	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
636 						dn->ofs_in_node, count);
637 
638 	f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
639 
640 	for (; count > 0; dn->ofs_in_node++) {
641 		block_t blkaddr = datablock_addr(dn->inode,
642 					dn->node_page, dn->ofs_in_node);
643 		if (blkaddr == NULL_ADDR) {
644 			dn->data_blkaddr = NEW_ADDR;
645 			__set_data_blkaddr(dn);
646 			count--;
647 		}
648 	}
649 
650 	if (set_page_dirty(dn->node_page))
651 		dn->node_changed = true;
652 	return 0;
653 }
654 
655 /* Should keep dn->ofs_in_node unchanged */
reserve_new_block(struct dnode_of_data * dn)656 int reserve_new_block(struct dnode_of_data *dn)
657 {
658 	unsigned int ofs_in_node = dn->ofs_in_node;
659 	int ret;
660 
661 	ret = reserve_new_blocks(dn, 1);
662 	dn->ofs_in_node = ofs_in_node;
663 	return ret;
664 }
665 
f2fs_reserve_block(struct dnode_of_data * dn,pgoff_t index)666 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
667 {
668 	bool need_put = dn->inode_page ? false : true;
669 	int err;
670 
671 	err = get_dnode_of_data(dn, index, ALLOC_NODE);
672 	if (err)
673 		return err;
674 
675 	if (dn->data_blkaddr == NULL_ADDR)
676 		err = reserve_new_block(dn);
677 	if (err || need_put)
678 		f2fs_put_dnode(dn);
679 	return err;
680 }
681 
f2fs_get_block(struct dnode_of_data * dn,pgoff_t index)682 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
683 {
684 	struct extent_info ei  = {0,0,0};
685 	struct inode *inode = dn->inode;
686 
687 	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
688 		dn->data_blkaddr = ei.blk + index - ei.fofs;
689 		return 0;
690 	}
691 
692 	return f2fs_reserve_block(dn, index);
693 }
694 
get_read_data_page(struct inode * inode,pgoff_t index,int op_flags,bool for_write)695 struct page *get_read_data_page(struct inode *inode, pgoff_t index,
696 						int op_flags, bool for_write)
697 {
698 	struct address_space *mapping = inode->i_mapping;
699 	struct dnode_of_data dn;
700 	struct page *page;
701 	struct extent_info ei = {0,0,0};
702 	int err;
703 
704 	page = f2fs_grab_cache_page(mapping, index, for_write);
705 	if (!page)
706 		return ERR_PTR(-ENOMEM);
707 
708 	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
709 		dn.data_blkaddr = ei.blk + index - ei.fofs;
710 		goto got_it;
711 	}
712 
713 	set_new_dnode(&dn, inode, NULL, NULL, 0);
714 	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
715 	if (err)
716 		goto put_err;
717 	f2fs_put_dnode(&dn);
718 
719 	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
720 		err = -ENOENT;
721 		goto put_err;
722 	}
723 got_it:
724 	if (PageUptodate(page)) {
725 		unlock_page(page);
726 		return page;
727 	}
728 
729 	/*
730 	 * A new dentry page is allocated but not able to be written, since its
731 	 * new inode page couldn't be allocated due to -ENOSPC.
732 	 * In such the case, its blkaddr can be remained as NEW_ADDR.
733 	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
734 	 */
735 	if (dn.data_blkaddr == NEW_ADDR) {
736 		zero_user_segment(page, 0, PAGE_SIZE);
737 		if (!PageUptodate(page))
738 			SetPageUptodate(page);
739 		unlock_page(page);
740 		return page;
741 	}
742 
743 	err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
744 	if (err)
745 		goto put_err;
746 	return page;
747 
748 put_err:
749 	f2fs_put_page(page, 1);
750 	return ERR_PTR(err);
751 }
752 
find_data_page(struct inode * inode,pgoff_t index)753 struct page *find_data_page(struct inode *inode, pgoff_t index)
754 {
755 	struct address_space *mapping = inode->i_mapping;
756 	struct page *page;
757 
758 	page = find_get_page(mapping, index);
759 	if (page && PageUptodate(page))
760 		return page;
761 	f2fs_put_page(page, 0);
762 
763 	page = get_read_data_page(inode, index, 0, false);
764 	if (IS_ERR(page))
765 		return page;
766 
767 	if (PageUptodate(page))
768 		return page;
769 
770 	wait_on_page_locked(page);
771 	if (unlikely(!PageUptodate(page))) {
772 		f2fs_put_page(page, 0);
773 		return ERR_PTR(-EIO);
774 	}
775 	return page;
776 }
777 
778 /*
779  * If it tries to access a hole, return an error.
780  * Because, the callers, functions in dir.c and GC, should be able to know
781  * whether this page exists or not.
782  */
get_lock_data_page(struct inode * inode,pgoff_t index,bool for_write)783 struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
784 							bool for_write)
785 {
786 	struct address_space *mapping = inode->i_mapping;
787 	struct page *page;
788 repeat:
789 	page = get_read_data_page(inode, index, 0, for_write);
790 	if (IS_ERR(page))
791 		return page;
792 
793 	/* wait for read completion */
794 	lock_page(page);
795 	if (unlikely(page->mapping != mapping)) {
796 		f2fs_put_page(page, 1);
797 		goto repeat;
798 	}
799 	if (unlikely(!PageUptodate(page))) {
800 		f2fs_put_page(page, 1);
801 		return ERR_PTR(-EIO);
802 	}
803 	return page;
804 }
805 
806 /*
807  * Caller ensures that this data page is never allocated.
808  * A new zero-filled data page is allocated in the page cache.
809  *
810  * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
811  * f2fs_unlock_op().
812  * Note that, ipage is set only by make_empty_dir, and if any error occur,
813  * ipage should be released by this function.
814  */
get_new_data_page(struct inode * inode,struct page * ipage,pgoff_t index,bool new_i_size)815 struct page *get_new_data_page(struct inode *inode,
816 		struct page *ipage, pgoff_t index, bool new_i_size)
817 {
818 	struct address_space *mapping = inode->i_mapping;
819 	struct page *page;
820 	struct dnode_of_data dn;
821 	int err;
822 
823 	page = f2fs_grab_cache_page(mapping, index, true);
824 	if (!page) {
825 		/*
826 		 * before exiting, we should make sure ipage will be released
827 		 * if any error occur.
828 		 */
829 		f2fs_put_page(ipage, 1);
830 		return ERR_PTR(-ENOMEM);
831 	}
832 
833 	set_new_dnode(&dn, inode, ipage, NULL, 0);
834 	err = f2fs_reserve_block(&dn, index);
835 	if (err) {
836 		f2fs_put_page(page, 1);
837 		return ERR_PTR(err);
838 	}
839 	if (!ipage)
840 		f2fs_put_dnode(&dn);
841 
842 	if (PageUptodate(page))
843 		goto got_it;
844 
845 	if (dn.data_blkaddr == NEW_ADDR) {
846 		zero_user_segment(page, 0, PAGE_SIZE);
847 		if (!PageUptodate(page))
848 			SetPageUptodate(page);
849 	} else {
850 		f2fs_put_page(page, 1);
851 
852 		/* if ipage exists, blkaddr should be NEW_ADDR */
853 		f2fs_bug_on(F2FS_I_SB(inode), ipage);
854 		page = get_lock_data_page(inode, index, true);
855 		if (IS_ERR(page))
856 			return page;
857 	}
858 got_it:
859 	if (new_i_size && i_size_read(inode) <
860 				((loff_t)(index + 1) << PAGE_SHIFT))
861 		f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
862 	return page;
863 }
864 
__allocate_data_block(struct dnode_of_data * dn,int seg_type)865 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
866 {
867 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
868 	struct f2fs_summary sum;
869 	struct node_info ni;
870 	pgoff_t fofs;
871 	blkcnt_t count = 1;
872 	int err;
873 
874 	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
875 		return -EPERM;
876 
877 	dn->data_blkaddr = datablock_addr(dn->inode,
878 				dn->node_page, dn->ofs_in_node);
879 	if (dn->data_blkaddr == NEW_ADDR)
880 		goto alloc;
881 
882 	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
883 		return err;
884 
885 alloc:
886 	get_node_info(sbi, dn->nid, &ni);
887 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
888 
889 	allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
890 					&sum, seg_type, NULL, false);
891 	set_data_blkaddr(dn);
892 
893 	/* update i_size */
894 	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
895 							dn->ofs_in_node;
896 	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
897 		f2fs_i_size_write(dn->inode,
898 				((loff_t)(fofs + 1) << PAGE_SHIFT));
899 	return 0;
900 }
901 
f2fs_preallocate_blocks(struct kiocb * iocb,struct iov_iter * from)902 int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
903 {
904 	struct inode *inode = file_inode(iocb->ki_filp);
905 	struct f2fs_map_blocks map;
906 	int flag;
907 	int err = 0;
908 	bool direct_io = iocb->ki_flags & IOCB_DIRECT;
909 
910 	/* convert inline data for Direct I/O*/
911 	if (direct_io) {
912 		err = f2fs_convert_inline_inode(inode);
913 		if (err)
914 			return err;
915 	}
916 
917 	if (is_inode_flag_set(inode, FI_NO_PREALLOC))
918 		return 0;
919 
920 	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
921 	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
922 	if (map.m_len > map.m_lblk)
923 		map.m_len -= map.m_lblk;
924 	else
925 		map.m_len = 0;
926 
927 	map.m_next_pgofs = NULL;
928 	map.m_next_extent = NULL;
929 	map.m_seg_type = NO_CHECK_TYPE;
930 
931 	if (direct_io) {
932 		map.m_seg_type = rw_hint_to_seg_type(iocb->ki_hint);
933 		flag = f2fs_force_buffered_io(inode, WRITE) ?
934 					F2FS_GET_BLOCK_PRE_AIO :
935 					F2FS_GET_BLOCK_PRE_DIO;
936 		goto map_blocks;
937 	}
938 	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
939 		err = f2fs_convert_inline_inode(inode);
940 		if (err)
941 			return err;
942 	}
943 	if (f2fs_has_inline_data(inode))
944 		return err;
945 
946 	flag = F2FS_GET_BLOCK_PRE_AIO;
947 
948 map_blocks:
949 	err = f2fs_map_blocks(inode, &map, 1, flag);
950 	if (map.m_len > 0 && err == -ENOSPC) {
951 		if (!direct_io)
952 			set_inode_flag(inode, FI_NO_PREALLOC);
953 		err = 0;
954 	}
955 	return err;
956 }
957 
__do_map_lock(struct f2fs_sb_info * sbi,int flag,bool lock)958 static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
959 {
960 	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
961 		if (lock)
962 			down_read(&sbi->node_change);
963 		else
964 			up_read(&sbi->node_change);
965 	} else {
966 		if (lock)
967 			f2fs_lock_op(sbi);
968 		else
969 			f2fs_unlock_op(sbi);
970 	}
971 }
972 
973 /*
974  * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
975  * f2fs_map_blocks structure.
976  * If original data blocks are allocated, then give them to blockdev.
977  * Otherwise,
978  *     a. preallocate requested block addresses
979  *     b. do not use extent cache for better performance
980  *     c. give the block addresses to blockdev
981  */
f2fs_map_blocks(struct inode * inode,struct f2fs_map_blocks * map,int create,int flag)982 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
983 						int create, int flag)
984 {
985 	unsigned int maxblocks = map->m_len;
986 	struct dnode_of_data dn;
987 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
988 	int mode = create ? ALLOC_NODE : LOOKUP_NODE;
989 	pgoff_t pgofs, end_offset, end;
990 	int err = 0, ofs = 1;
991 	unsigned int ofs_in_node, last_ofs_in_node;
992 	blkcnt_t prealloc;
993 	struct extent_info ei = {0,0,0};
994 	block_t blkaddr;
995 	unsigned int start_pgofs;
996 
997 	if (!maxblocks)
998 		return 0;
999 
1000 	map->m_len = 0;
1001 	map->m_flags = 0;
1002 
1003 	/* it only supports block size == page size */
1004 	pgofs =	(pgoff_t)map->m_lblk;
1005 	end = pgofs + maxblocks;
1006 
1007 	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1008 		map->m_pblk = ei.blk + pgofs - ei.fofs;
1009 		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
1010 		map->m_flags = F2FS_MAP_MAPPED;
1011 		if (map->m_next_extent)
1012 			*map->m_next_extent = pgofs + map->m_len;
1013 		goto out;
1014 	}
1015 
1016 next_dnode:
1017 	if (create)
1018 		__do_map_lock(sbi, flag, true);
1019 
1020 	/* When reading holes, we need its node page */
1021 	set_new_dnode(&dn, inode, NULL, NULL, 0);
1022 	err = get_dnode_of_data(&dn, pgofs, mode);
1023 	if (err) {
1024 		if (flag == F2FS_GET_BLOCK_BMAP)
1025 			map->m_pblk = 0;
1026 		if (err == -ENOENT) {
1027 			err = 0;
1028 			if (map->m_next_pgofs)
1029 				*map->m_next_pgofs =
1030 					get_next_page_offset(&dn, pgofs);
1031 			if (map->m_next_extent)
1032 				*map->m_next_extent =
1033 					get_next_page_offset(&dn, pgofs);
1034 		}
1035 		goto unlock_out;
1036 	}
1037 
1038 	start_pgofs = pgofs;
1039 	prealloc = 0;
1040 	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1041 	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1042 
1043 next_block:
1044 	blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
1045 
1046 	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
1047 		if (create) {
1048 			if (unlikely(f2fs_cp_error(sbi))) {
1049 				err = -EIO;
1050 				goto sync_out;
1051 			}
1052 			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1053 				if (blkaddr == NULL_ADDR) {
1054 					prealloc++;
1055 					last_ofs_in_node = dn.ofs_in_node;
1056 				}
1057 			} else {
1058 				err = __allocate_data_block(&dn,
1059 							map->m_seg_type);
1060 				if (!err)
1061 					set_inode_flag(inode, FI_APPEND_WRITE);
1062 			}
1063 			if (err)
1064 				goto sync_out;
1065 			map->m_flags |= F2FS_MAP_NEW;
1066 			blkaddr = dn.data_blkaddr;
1067 		} else {
1068 			if (flag == F2FS_GET_BLOCK_BMAP) {
1069 				map->m_pblk = 0;
1070 				goto sync_out;
1071 			}
1072 			if (flag == F2FS_GET_BLOCK_PRECACHE)
1073 				goto sync_out;
1074 			if (flag == F2FS_GET_BLOCK_FIEMAP &&
1075 						blkaddr == NULL_ADDR) {
1076 				if (map->m_next_pgofs)
1077 					*map->m_next_pgofs = pgofs + 1;
1078 				goto sync_out;
1079 			}
1080 			if (flag != F2FS_GET_BLOCK_FIEMAP) {
1081 				/* for defragment case */
1082 				if (map->m_next_pgofs)
1083 					*map->m_next_pgofs = pgofs + 1;
1084 				goto sync_out;
1085 			}
1086 		}
1087 	}
1088 
1089 	if (flag == F2FS_GET_BLOCK_PRE_AIO)
1090 		goto skip;
1091 
1092 	if (map->m_len == 0) {
1093 		/* preallocated unwritten block should be mapped for fiemap. */
1094 		if (blkaddr == NEW_ADDR)
1095 			map->m_flags |= F2FS_MAP_UNWRITTEN;
1096 		map->m_flags |= F2FS_MAP_MAPPED;
1097 
1098 		map->m_pblk = blkaddr;
1099 		map->m_len = 1;
1100 	} else if ((map->m_pblk != NEW_ADDR &&
1101 			blkaddr == (map->m_pblk + ofs)) ||
1102 			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1103 			flag == F2FS_GET_BLOCK_PRE_DIO) {
1104 		ofs++;
1105 		map->m_len++;
1106 	} else {
1107 		goto sync_out;
1108 	}
1109 
1110 skip:
1111 	dn.ofs_in_node++;
1112 	pgofs++;
1113 
1114 	/* preallocate blocks in batch for one dnode page */
1115 	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1116 			(pgofs == end || dn.ofs_in_node == end_offset)) {
1117 
1118 		dn.ofs_in_node = ofs_in_node;
1119 		err = reserve_new_blocks(&dn, prealloc);
1120 		if (err)
1121 			goto sync_out;
1122 
1123 		map->m_len += dn.ofs_in_node - ofs_in_node;
1124 		if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1125 			err = -ENOSPC;
1126 			goto sync_out;
1127 		}
1128 		dn.ofs_in_node = end_offset;
1129 	}
1130 
1131 	if (pgofs >= end)
1132 		goto sync_out;
1133 	else if (dn.ofs_in_node < end_offset)
1134 		goto next_block;
1135 
1136 	if (flag == F2FS_GET_BLOCK_PRECACHE) {
1137 		if (map->m_flags & F2FS_MAP_MAPPED) {
1138 			unsigned int ofs = start_pgofs - map->m_lblk;
1139 
1140 			f2fs_update_extent_cache_range(&dn,
1141 				start_pgofs, map->m_pblk + ofs,
1142 				map->m_len - ofs);
1143 		}
1144 	}
1145 
1146 	f2fs_put_dnode(&dn);
1147 
1148 	if (create) {
1149 		__do_map_lock(sbi, flag, false);
1150 		f2fs_balance_fs(sbi, dn.node_changed);
1151 	}
1152 	goto next_dnode;
1153 
1154 sync_out:
1155 	if (flag == F2FS_GET_BLOCK_PRECACHE) {
1156 		if (map->m_flags & F2FS_MAP_MAPPED) {
1157 			unsigned int ofs = start_pgofs - map->m_lblk;
1158 
1159 			f2fs_update_extent_cache_range(&dn,
1160 				start_pgofs, map->m_pblk + ofs,
1161 				map->m_len - ofs);
1162 		}
1163 		if (map->m_next_extent)
1164 			*map->m_next_extent = pgofs + 1;
1165 	}
1166 	f2fs_put_dnode(&dn);
1167 unlock_out:
1168 	if (create) {
1169 		__do_map_lock(sbi, flag, false);
1170 		f2fs_balance_fs(sbi, dn.node_changed);
1171 	}
1172 out:
1173 	trace_f2fs_map_blocks(inode, map, err);
1174 	return err;
1175 }
1176 
f2fs_overwrite_io(struct inode * inode,loff_t pos,size_t len)1177 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1178 {
1179 	struct f2fs_map_blocks map;
1180 	block_t last_lblk;
1181 	int err;
1182 
1183 	if (pos + len > i_size_read(inode))
1184 		return false;
1185 
1186 	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1187 	map.m_next_pgofs = NULL;
1188 	map.m_next_extent = NULL;
1189 	map.m_seg_type = NO_CHECK_TYPE;
1190 	last_lblk = F2FS_BLK_ALIGN(pos + len);
1191 
1192 	while (map.m_lblk < last_lblk) {
1193 		map.m_len = last_lblk - map.m_lblk;
1194 		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
1195 		if (err || map.m_len == 0)
1196 			return false;
1197 		map.m_lblk += map.m_len;
1198 	}
1199 	return true;
1200 }
1201 
__get_data_block(struct inode * inode,sector_t iblock,struct buffer_head * bh,int create,int flag,pgoff_t * next_pgofs,int seg_type)1202 static int __get_data_block(struct inode *inode, sector_t iblock,
1203 			struct buffer_head *bh, int create, int flag,
1204 			pgoff_t *next_pgofs, int seg_type)
1205 {
1206 	struct f2fs_map_blocks map;
1207 	int err;
1208 
1209 	map.m_lblk = iblock;
1210 	map.m_len = bh->b_size >> inode->i_blkbits;
1211 	map.m_next_pgofs = next_pgofs;
1212 	map.m_next_extent = NULL;
1213 	map.m_seg_type = seg_type;
1214 
1215 	err = f2fs_map_blocks(inode, &map, create, flag);
1216 	if (!err) {
1217 		map_bh(bh, inode->i_sb, map.m_pblk);
1218 		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1219 		bh->b_size = (u64)map.m_len << inode->i_blkbits;
1220 	}
1221 	return err;
1222 }
1223 
get_data_block(struct inode * inode,sector_t iblock,struct buffer_head * bh_result,int create,int flag,pgoff_t * next_pgofs)1224 static int get_data_block(struct inode *inode, sector_t iblock,
1225 			struct buffer_head *bh_result, int create, int flag,
1226 			pgoff_t *next_pgofs)
1227 {
1228 	return __get_data_block(inode, iblock, bh_result, create,
1229 							flag, next_pgofs,
1230 							NO_CHECK_TYPE);
1231 }
1232 
get_data_block_dio(struct inode * inode,sector_t iblock,struct buffer_head * bh_result,int create)1233 static int get_data_block_dio(struct inode *inode, sector_t iblock,
1234 			struct buffer_head *bh_result, int create)
1235 {
1236 	return __get_data_block(inode, iblock, bh_result, create,
1237 						F2FS_GET_BLOCK_DEFAULT, NULL,
1238 						rw_hint_to_seg_type(
1239 							inode->i_write_hint));
1240 }
1241 
get_data_block_bmap(struct inode * inode,sector_t iblock,struct buffer_head * bh_result,int create)1242 static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1243 			struct buffer_head *bh_result, int create)
1244 {
1245 	/* Block number less than F2FS MAX BLOCKS */
1246 	if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
1247 		return -EFBIG;
1248 
1249 	return __get_data_block(inode, iblock, bh_result, create,
1250 						F2FS_GET_BLOCK_BMAP, NULL,
1251 						NO_CHECK_TYPE);
1252 }
1253 
logical_to_blk(struct inode * inode,loff_t offset)1254 static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
1255 {
1256 	return (offset >> inode->i_blkbits);
1257 }
1258 
blk_to_logical(struct inode * inode,sector_t blk)1259 static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
1260 {
1261 	return (blk << inode->i_blkbits);
1262 }
1263 
f2fs_xattr_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo)1264 static int f2fs_xattr_fiemap(struct inode *inode,
1265 				struct fiemap_extent_info *fieinfo)
1266 {
1267 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1268 	struct page *page;
1269 	struct node_info ni;
1270 	__u64 phys = 0, len;
1271 	__u32 flags;
1272 	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1273 	int err = 0;
1274 
1275 	if (f2fs_has_inline_xattr(inode)) {
1276 		int offset;
1277 
1278 		page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1279 						inode->i_ino, false);
1280 		if (!page)
1281 			return -ENOMEM;
1282 
1283 		get_node_info(sbi, inode->i_ino, &ni);
1284 
1285 		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
1286 		offset = offsetof(struct f2fs_inode, i_addr) +
1287 					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1288 					get_inline_xattr_addrs(inode));
1289 
1290 		phys += offset;
1291 		len = inline_xattr_size(inode);
1292 
1293 		f2fs_put_page(page, 1);
1294 
1295 		flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1296 
1297 		if (!xnid)
1298 			flags |= FIEMAP_EXTENT_LAST;
1299 
1300 		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1301 		if (err || err == 1)
1302 			return err;
1303 	}
1304 
1305 	if (xnid) {
1306 		page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1307 		if (!page)
1308 			return -ENOMEM;
1309 
1310 		get_node_info(sbi, xnid, &ni);
1311 
1312 		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
1313 		len = inode->i_sb->s_blocksize;
1314 
1315 		f2fs_put_page(page, 1);
1316 
1317 		flags = FIEMAP_EXTENT_LAST;
1318 	}
1319 
1320 	if (phys)
1321 		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1322 
1323 	return (err < 0 ? err : 0);
1324 }
1325 
f2fs_fiemap(struct inode * inode,struct fiemap_extent_info * fieinfo,u64 start,u64 len)1326 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1327 		u64 start, u64 len)
1328 {
1329 	struct buffer_head map_bh;
1330 	sector_t start_blk, last_blk;
1331 	pgoff_t next_pgofs;
1332 	u64 logical = 0, phys = 0, size = 0;
1333 	u32 flags = 0;
1334 	int ret = 0;
1335 
1336 	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1337 		ret = f2fs_precache_extents(inode);
1338 		if (ret)
1339 			return ret;
1340 	}
1341 
1342 	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1343 	if (ret)
1344 		return ret;
1345 
1346 	inode_lock(inode);
1347 
1348 	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1349 		ret = f2fs_xattr_fiemap(inode, fieinfo);
1350 		goto out;
1351 	}
1352 
1353 	if (f2fs_has_inline_data(inode)) {
1354 		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1355 		if (ret != -EAGAIN)
1356 			goto out;
1357 	}
1358 
1359 	if (logical_to_blk(inode, len) == 0)
1360 		len = blk_to_logical(inode, 1);
1361 
1362 	start_blk = logical_to_blk(inode, start);
1363 	last_blk = logical_to_blk(inode, start + len - 1);
1364 
1365 next:
1366 	memset(&map_bh, 0, sizeof(struct buffer_head));
1367 	map_bh.b_size = len;
1368 
1369 	ret = get_data_block(inode, start_blk, &map_bh, 0,
1370 					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
1371 	if (ret)
1372 		goto out;
1373 
1374 	/* HOLE */
1375 	if (!buffer_mapped(&map_bh)) {
1376 		start_blk = next_pgofs;
1377 
1378 		if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
1379 					F2FS_I_SB(inode)->max_file_blocks))
1380 			goto prep_next;
1381 
1382 		flags |= FIEMAP_EXTENT_LAST;
1383 	}
1384 
1385 	if (size) {
1386 		if (f2fs_encrypted_inode(inode))
1387 			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
1388 
1389 		ret = fiemap_fill_next_extent(fieinfo, logical,
1390 				phys, size, flags);
1391 	}
1392 
1393 	if (start_blk > last_blk || ret)
1394 		goto out;
1395 
1396 	logical = blk_to_logical(inode, start_blk);
1397 	phys = blk_to_logical(inode, map_bh.b_blocknr);
1398 	size = map_bh.b_size;
1399 	flags = 0;
1400 	if (buffer_unwritten(&map_bh))
1401 		flags = FIEMAP_EXTENT_UNWRITTEN;
1402 
1403 	start_blk += logical_to_blk(inode, size);
1404 
1405 prep_next:
1406 	cond_resched();
1407 	if (fatal_signal_pending(current))
1408 		ret = -EINTR;
1409 	else
1410 		goto next;
1411 out:
1412 	if (ret == 1)
1413 		ret = 0;
1414 
1415 	inode_unlock(inode);
1416 	return ret;
1417 }
1418 
1419 /*
1420  * This function was originally taken from fs/mpage.c, and customized for f2fs.
1421  * Major change was from block_size == page_size in f2fs by default.
1422  */
f2fs_mpage_readpages(struct address_space * mapping,struct list_head * pages,struct page * page,unsigned nr_pages)1423 static int f2fs_mpage_readpages(struct address_space *mapping,
1424 			struct list_head *pages, struct page *page,
1425 			unsigned nr_pages)
1426 {
1427 	struct bio *bio = NULL;
1428 	sector_t last_block_in_bio = 0;
1429 	struct inode *inode = mapping->host;
1430 	const unsigned blkbits = inode->i_blkbits;
1431 	const unsigned blocksize = 1 << blkbits;
1432 	sector_t block_in_file;
1433 	sector_t last_block;
1434 	sector_t last_block_in_file;
1435 	sector_t block_nr;
1436 	struct f2fs_map_blocks map;
1437 
1438 	map.m_pblk = 0;
1439 	map.m_lblk = 0;
1440 	map.m_len = 0;
1441 	map.m_flags = 0;
1442 	map.m_next_pgofs = NULL;
1443 	map.m_next_extent = NULL;
1444 	map.m_seg_type = NO_CHECK_TYPE;
1445 
1446 	for (; nr_pages; nr_pages--) {
1447 		if (pages) {
1448 			page = list_last_entry(pages, struct page, lru);
1449 
1450 			prefetchw(&page->flags);
1451 			list_del(&page->lru);
1452 			if (add_to_page_cache_lru(page, mapping,
1453 						  page->index,
1454 						  readahead_gfp_mask(mapping)))
1455 				goto next_page;
1456 		}
1457 
1458 		block_in_file = (sector_t)page->index;
1459 		last_block = block_in_file + nr_pages;
1460 		last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
1461 								blkbits;
1462 		if (last_block > last_block_in_file)
1463 			last_block = last_block_in_file;
1464 
1465 		/*
1466 		 * Map blocks using the previous result first.
1467 		 */
1468 		if ((map.m_flags & F2FS_MAP_MAPPED) &&
1469 				block_in_file > map.m_lblk &&
1470 				block_in_file < (map.m_lblk + map.m_len))
1471 			goto got_it;
1472 
1473 		/*
1474 		 * Then do more f2fs_map_blocks() calls until we are
1475 		 * done with this page.
1476 		 */
1477 		map.m_flags = 0;
1478 
1479 		if (block_in_file < last_block) {
1480 			map.m_lblk = block_in_file;
1481 			map.m_len = last_block - block_in_file;
1482 
1483 			if (f2fs_map_blocks(inode, &map, 0,
1484 						F2FS_GET_BLOCK_DEFAULT))
1485 				goto set_error_page;
1486 		}
1487 got_it:
1488 		if ((map.m_flags & F2FS_MAP_MAPPED)) {
1489 			block_nr = map.m_pblk + block_in_file - map.m_lblk;
1490 			SetPageMappedToDisk(page);
1491 
1492 			if (!PageUptodate(page) && !cleancache_get_page(page)) {
1493 				SetPageUptodate(page);
1494 				goto confused;
1495 			}
1496 		} else {
1497 			zero_user_segment(page, 0, PAGE_SIZE);
1498 			if (!PageUptodate(page))
1499 				SetPageUptodate(page);
1500 			unlock_page(page);
1501 			goto next_page;
1502 		}
1503 
1504 		/*
1505 		 * This page will go to BIO.  Do we need to send this
1506 		 * BIO off first?
1507 		 */
1508 		if (bio && (last_block_in_bio != block_nr - 1 ||
1509 			!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
1510 submit_and_realloc:
1511 			__submit_bio(F2FS_I_SB(inode), bio, DATA);
1512 			bio = NULL;
1513 		}
1514 		if (bio == NULL) {
1515 			bio = f2fs_grab_read_bio(inode, block_nr, nr_pages);
1516 			if (IS_ERR(bio)) {
1517 				bio = NULL;
1518 				goto set_error_page;
1519 			}
1520 		}
1521 
1522 		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
1523 			goto submit_and_realloc;
1524 
1525 		last_block_in_bio = block_nr;
1526 		goto next_page;
1527 set_error_page:
1528 		SetPageError(page);
1529 		zero_user_segment(page, 0, PAGE_SIZE);
1530 		unlock_page(page);
1531 		goto next_page;
1532 confused:
1533 		if (bio) {
1534 			__submit_bio(F2FS_I_SB(inode), bio, DATA);
1535 			bio = NULL;
1536 		}
1537 		unlock_page(page);
1538 next_page:
1539 		if (pages)
1540 			put_page(page);
1541 	}
1542 	BUG_ON(pages && !list_empty(pages));
1543 	if (bio)
1544 		__submit_bio(F2FS_I_SB(inode), bio, DATA);
1545 	return 0;
1546 }
1547 
f2fs_read_data_page(struct file * file,struct page * page)1548 static int f2fs_read_data_page(struct file *file, struct page *page)
1549 {
1550 	struct inode *inode = page->mapping->host;
1551 	int ret = -EAGAIN;
1552 
1553 	trace_f2fs_readpage(page, DATA);
1554 
1555 	/* If the file has inline data, try to read it directly */
1556 	if (f2fs_has_inline_data(inode))
1557 		ret = f2fs_read_inline_data(inode, page);
1558 	if (ret == -EAGAIN)
1559 		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
1560 	return ret;
1561 }
1562 
f2fs_read_data_pages(struct file * file,struct address_space * mapping,struct list_head * pages,unsigned nr_pages)1563 static int f2fs_read_data_pages(struct file *file,
1564 			struct address_space *mapping,
1565 			struct list_head *pages, unsigned nr_pages)
1566 {
1567 	struct inode *inode = mapping->host;
1568 	struct page *page = list_last_entry(pages, struct page, lru);
1569 
1570 	trace_f2fs_readpages(inode, page, nr_pages);
1571 
1572 	/* If the file has inline data, skip readpages */
1573 	if (f2fs_has_inline_data(inode))
1574 		return 0;
1575 
1576 	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
1577 }
1578 
encrypt_one_page(struct f2fs_io_info * fio)1579 static int encrypt_one_page(struct f2fs_io_info *fio)
1580 {
1581 	struct inode *inode = fio->page->mapping->host;
1582 	gfp_t gfp_flags = GFP_NOFS;
1583 
1584 	if (!f2fs_encrypted_file(inode))
1585 		return 0;
1586 
1587 	/* wait for GCed page writeback via META_MAPPING */
1588 	f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr);
1589 
1590 retry_encrypt:
1591 	fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
1592 			PAGE_SIZE, 0, fio->page->index, gfp_flags);
1593 	if (!IS_ERR(fio->encrypted_page))
1594 		return 0;
1595 
1596 	/* flush pending IOs and wait for a while in the ENOMEM case */
1597 	if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
1598 		f2fs_flush_merged_writes(fio->sbi);
1599 		congestion_wait(BLK_RW_ASYNC, HZ/50);
1600 		gfp_flags |= __GFP_NOFAIL;
1601 		goto retry_encrypt;
1602 	}
1603 	return PTR_ERR(fio->encrypted_page);
1604 }
1605 
check_inplace_update_policy(struct inode * inode,struct f2fs_io_info * fio)1606 static inline bool check_inplace_update_policy(struct inode *inode,
1607 				struct f2fs_io_info *fio)
1608 {
1609 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1610 	unsigned int policy = SM_I(sbi)->ipu_policy;
1611 
1612 	if (policy & (0x1 << F2FS_IPU_FORCE))
1613 		return true;
1614 	if (policy & (0x1 << F2FS_IPU_SSR) && need_SSR(sbi))
1615 		return true;
1616 	if (policy & (0x1 << F2FS_IPU_UTIL) &&
1617 			utilization(sbi) > SM_I(sbi)->min_ipu_util)
1618 		return true;
1619 	if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && need_SSR(sbi) &&
1620 			utilization(sbi) > SM_I(sbi)->min_ipu_util)
1621 		return true;
1622 
1623 	/*
1624 	 * IPU for rewrite async pages
1625 	 */
1626 	if (policy & (0x1 << F2FS_IPU_ASYNC) &&
1627 			fio && fio->op == REQ_OP_WRITE &&
1628 			!(fio->op_flags & REQ_SYNC) &&
1629 			!f2fs_encrypted_inode(inode))
1630 		return true;
1631 
1632 	/* this is only set during fdatasync */
1633 	if (policy & (0x1 << F2FS_IPU_FSYNC) &&
1634 			is_inode_flag_set(inode, FI_NEED_IPU))
1635 		return true;
1636 
1637 	return false;
1638 }
1639 
should_update_inplace(struct inode * inode,struct f2fs_io_info * fio)1640 bool should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
1641 {
1642 	if (f2fs_is_pinned_file(inode))
1643 		return true;
1644 
1645 	/* if this is cold file, we should overwrite to avoid fragmentation */
1646 	if (file_is_cold(inode))
1647 		return true;
1648 
1649 	return check_inplace_update_policy(inode, fio);
1650 }
1651 
should_update_outplace(struct inode * inode,struct f2fs_io_info * fio)1652 bool should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
1653 {
1654 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1655 
1656 	if (test_opt(sbi, LFS))
1657 		return true;
1658 	if (S_ISDIR(inode->i_mode))
1659 		return true;
1660 	if (f2fs_is_atomic_file(inode))
1661 		return true;
1662 	if (fio) {
1663 		if (is_cold_data(fio->page))
1664 			return true;
1665 		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
1666 			return true;
1667 	}
1668 	return false;
1669 }
1670 
need_inplace_update(struct f2fs_io_info * fio)1671 static inline bool need_inplace_update(struct f2fs_io_info *fio)
1672 {
1673 	struct inode *inode = fio->page->mapping->host;
1674 
1675 	if (should_update_outplace(inode, fio))
1676 		return false;
1677 
1678 	return should_update_inplace(inode, fio);
1679 }
1680 
valid_ipu_blkaddr(struct f2fs_io_info * fio)1681 static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio)
1682 {
1683 	if (fio->old_blkaddr == NEW_ADDR)
1684 		return false;
1685 	if (fio->old_blkaddr == NULL_ADDR)
1686 		return false;
1687 	return true;
1688 }
1689 
do_write_data_page(struct f2fs_io_info * fio)1690 int do_write_data_page(struct f2fs_io_info *fio)
1691 {
1692 	struct page *page = fio->page;
1693 	struct inode *inode = page->mapping->host;
1694 	struct dnode_of_data dn;
1695 	struct extent_info ei = {0,0,0};
1696 	bool ipu_force = false;
1697 	int err = 0;
1698 
1699 	set_new_dnode(&dn, inode, NULL, NULL, 0);
1700 	if (need_inplace_update(fio) &&
1701 			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
1702 		fio->old_blkaddr = ei.blk + page->index - ei.fofs;
1703 
1704 		if (valid_ipu_blkaddr(fio)) {
1705 			ipu_force = true;
1706 			fio->need_lock = LOCK_DONE;
1707 			goto got_it;
1708 		}
1709 	}
1710 
1711 	/* Deadlock due to between page->lock and f2fs_lock_op */
1712 	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
1713 		return -EAGAIN;
1714 
1715 	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1716 	if (err)
1717 		goto out;
1718 
1719 	fio->old_blkaddr = dn.data_blkaddr;
1720 
1721 	/* This page is already truncated */
1722 	if (fio->old_blkaddr == NULL_ADDR) {
1723 		ClearPageUptodate(page);
1724 		goto out_writepage;
1725 	}
1726 got_it:
1727 	/*
1728 	 * If current allocation needs SSR,
1729 	 * it had better in-place writes for updated data.
1730 	 */
1731 	if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) {
1732 		err = encrypt_one_page(fio);
1733 		if (err)
1734 			goto out_writepage;
1735 
1736 		set_page_writeback(page);
1737 		ClearPageError(page);
1738 		f2fs_put_dnode(&dn);
1739 		if (fio->need_lock == LOCK_REQ)
1740 			f2fs_unlock_op(fio->sbi);
1741 		err = rewrite_data_page(fio);
1742 		trace_f2fs_do_write_data_page(fio->page, IPU);
1743 		set_inode_flag(inode, FI_UPDATE_WRITE);
1744 		return err;
1745 	}
1746 
1747 	if (fio->need_lock == LOCK_RETRY) {
1748 		if (!f2fs_trylock_op(fio->sbi)) {
1749 			err = -EAGAIN;
1750 			goto out_writepage;
1751 		}
1752 		fio->need_lock = LOCK_REQ;
1753 	}
1754 
1755 	err = encrypt_one_page(fio);
1756 	if (err)
1757 		goto out_writepage;
1758 
1759 	set_page_writeback(page);
1760 	ClearPageError(page);
1761 
1762 	/* LFS mode write path */
1763 	write_data_page(&dn, fio);
1764 	trace_f2fs_do_write_data_page(page, OPU);
1765 	set_inode_flag(inode, FI_APPEND_WRITE);
1766 	if (page->index == 0)
1767 		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
1768 out_writepage:
1769 	f2fs_put_dnode(&dn);
1770 out:
1771 	if (fio->need_lock == LOCK_REQ)
1772 		f2fs_unlock_op(fio->sbi);
1773 	return err;
1774 }
1775 
__write_data_page(struct page * page,bool * submitted,struct writeback_control * wbc,enum iostat_type io_type)1776 static int __write_data_page(struct page *page, bool *submitted,
1777 				struct writeback_control *wbc,
1778 				enum iostat_type io_type)
1779 {
1780 	struct inode *inode = page->mapping->host;
1781 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1782 	loff_t i_size = i_size_read(inode);
1783 	const pgoff_t end_index = ((unsigned long long) i_size)
1784 							>> PAGE_SHIFT;
1785 	loff_t psize = (page->index + 1) << PAGE_SHIFT;
1786 	unsigned offset = 0;
1787 	bool need_balance_fs = false;
1788 	int err = 0;
1789 	struct f2fs_io_info fio = {
1790 		.sbi = sbi,
1791 		.ino = inode->i_ino,
1792 		.type = DATA,
1793 		.op = REQ_OP_WRITE,
1794 		.op_flags = wbc_to_write_flags(wbc),
1795 		.old_blkaddr = NULL_ADDR,
1796 		.page = page,
1797 		.encrypted_page = NULL,
1798 		.submitted = false,
1799 		.need_lock = LOCK_RETRY,
1800 		.io_type = io_type,
1801 		.io_wbc = wbc,
1802 	};
1803 
1804 	trace_f2fs_writepage(page, DATA);
1805 
1806 	/* we should bypass data pages to proceed the kworkder jobs */
1807 	if (unlikely(f2fs_cp_error(sbi))) {
1808 		mapping_set_error(page->mapping, -EIO);
1809 		goto out;
1810 	}
1811 
1812 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1813 		goto redirty_out;
1814 
1815 	if (page->index < end_index)
1816 		goto write;
1817 
1818 	/*
1819 	 * If the offset is out-of-range of file size,
1820 	 * this page does not have to be written to disk.
1821 	 */
1822 	offset = i_size & (PAGE_SIZE - 1);
1823 	if ((page->index >= end_index + 1) || !offset)
1824 		goto out;
1825 
1826 	zero_user_segment(page, offset, PAGE_SIZE);
1827 write:
1828 	if (f2fs_is_drop_cache(inode))
1829 		goto out;
1830 	/* we should not write 0'th page having journal header */
1831 	if (f2fs_is_volatile_file(inode) && (!page->index ||
1832 			(!wbc->for_reclaim &&
1833 			available_free_memory(sbi, BASE_CHECK))))
1834 		goto redirty_out;
1835 
1836 	/* Dentry blocks are controlled by checkpoint */
1837 	if (S_ISDIR(inode->i_mode)) {
1838 		fio.need_lock = LOCK_DONE;
1839 		err = do_write_data_page(&fio);
1840 		goto done;
1841 	}
1842 
1843 	if (!wbc->for_reclaim)
1844 		need_balance_fs = true;
1845 	else if (has_not_enough_free_secs(sbi, 0, 0))
1846 		goto redirty_out;
1847 	else
1848 		set_inode_flag(inode, FI_HOT_DATA);
1849 
1850 	err = -EAGAIN;
1851 	if (f2fs_has_inline_data(inode)) {
1852 		err = f2fs_write_inline_data(inode, page);
1853 		if (!err)
1854 			goto out;
1855 	}
1856 
1857 	if (err == -EAGAIN) {
1858 		err = do_write_data_page(&fio);
1859 		if (err == -EAGAIN) {
1860 			fio.need_lock = LOCK_REQ;
1861 			err = do_write_data_page(&fio);
1862 		}
1863 	}
1864 
1865 	if (err) {
1866 		file_set_keep_isize(inode);
1867 	} else {
1868 		down_write(&F2FS_I(inode)->i_sem);
1869 		if (F2FS_I(inode)->last_disk_size < psize)
1870 			F2FS_I(inode)->last_disk_size = psize;
1871 		up_write(&F2FS_I(inode)->i_sem);
1872 	}
1873 
1874 done:
1875 	if (err && err != -ENOENT)
1876 		goto redirty_out;
1877 
1878 out:
1879 	inode_dec_dirty_pages(inode);
1880 	if (err)
1881 		ClearPageUptodate(page);
1882 
1883 	if (wbc->for_reclaim) {
1884 		f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
1885 		clear_inode_flag(inode, FI_HOT_DATA);
1886 		remove_dirty_inode(inode);
1887 		submitted = NULL;
1888 	}
1889 
1890 	unlock_page(page);
1891 	if (!S_ISDIR(inode->i_mode))
1892 		f2fs_balance_fs(sbi, need_balance_fs);
1893 
1894 	if (unlikely(f2fs_cp_error(sbi))) {
1895 		f2fs_submit_merged_write(sbi, DATA);
1896 		submitted = NULL;
1897 	}
1898 
1899 	if (submitted)
1900 		*submitted = fio.submitted;
1901 
1902 	return 0;
1903 
1904 redirty_out:
1905 	redirty_page_for_writepage(wbc, page);
1906 	if (!err)
1907 		return AOP_WRITEPAGE_ACTIVATE;
1908 	unlock_page(page);
1909 	return err;
1910 }
1911 
f2fs_write_data_page(struct page * page,struct writeback_control * wbc)1912 static int f2fs_write_data_page(struct page *page,
1913 					struct writeback_control *wbc)
1914 {
1915 	return __write_data_page(page, NULL, wbc, FS_DATA_IO);
1916 }
1917 
1918 /*
1919  * This function was copied from write_cche_pages from mm/page-writeback.c.
1920  * The major change is making write step of cold data page separately from
1921  * warm/hot data page.
1922  */
f2fs_write_cache_pages(struct address_space * mapping,struct writeback_control * wbc,enum iostat_type io_type)1923 static int f2fs_write_cache_pages(struct address_space *mapping,
1924 					struct writeback_control *wbc,
1925 					enum iostat_type io_type)
1926 {
1927 	int ret = 0;
1928 	int done = 0;
1929 	struct pagevec pvec;
1930 	int nr_pages;
1931 	pgoff_t uninitialized_var(writeback_index);
1932 	pgoff_t index;
1933 	pgoff_t end;		/* Inclusive */
1934 	pgoff_t done_index;
1935 	pgoff_t last_idx = ULONG_MAX;
1936 	int cycled;
1937 	int range_whole = 0;
1938 	int tag;
1939 
1940 	pagevec_init(&pvec, 0);
1941 
1942 	if (get_dirty_pages(mapping->host) <=
1943 				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
1944 		set_inode_flag(mapping->host, FI_HOT_DATA);
1945 	else
1946 		clear_inode_flag(mapping->host, FI_HOT_DATA);
1947 
1948 	if (wbc->range_cyclic) {
1949 		writeback_index = mapping->writeback_index; /* prev offset */
1950 		index = writeback_index;
1951 		if (index == 0)
1952 			cycled = 1;
1953 		else
1954 			cycled = 0;
1955 		end = -1;
1956 	} else {
1957 		index = wbc->range_start >> PAGE_SHIFT;
1958 		end = wbc->range_end >> PAGE_SHIFT;
1959 		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1960 			range_whole = 1;
1961 		cycled = 1; /* ignore range_cyclic tests */
1962 	}
1963 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1964 		tag = PAGECACHE_TAG_TOWRITE;
1965 	else
1966 		tag = PAGECACHE_TAG_DIRTY;
1967 retry:
1968 	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
1969 		tag_pages_for_writeback(mapping, index, end);
1970 	done_index = index;
1971 	while (!done && (index <= end)) {
1972 		int i;
1973 
1974 		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
1975 			      min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
1976 		if (nr_pages == 0)
1977 			break;
1978 
1979 		for (i = 0; i < nr_pages; i++) {
1980 			struct page *page = pvec.pages[i];
1981 			bool submitted = false;
1982 
1983 			if (page->index > end) {
1984 				done = 1;
1985 				break;
1986 			}
1987 
1988 			done_index = page->index;
1989 retry_write:
1990 			lock_page(page);
1991 
1992 			if (unlikely(page->mapping != mapping)) {
1993 continue_unlock:
1994 				unlock_page(page);
1995 				continue;
1996 			}
1997 
1998 			if (!PageDirty(page)) {
1999 				/* someone wrote it for us */
2000 				goto continue_unlock;
2001 			}
2002 
2003 			if (PageWriteback(page)) {
2004 				if (wbc->sync_mode != WB_SYNC_NONE)
2005 					f2fs_wait_on_page_writeback(page,
2006 								DATA, true);
2007 				else
2008 					goto continue_unlock;
2009 			}
2010 
2011 			BUG_ON(PageWriteback(page));
2012 			if (!clear_page_dirty_for_io(page))
2013 				goto continue_unlock;
2014 
2015 			ret = __write_data_page(page, &submitted, wbc, io_type);
2016 			if (unlikely(ret)) {
2017 				/*
2018 				 * keep nr_to_write, since vfs uses this to
2019 				 * get # of written pages.
2020 				 */
2021 				if (ret == AOP_WRITEPAGE_ACTIVATE) {
2022 					unlock_page(page);
2023 					ret = 0;
2024 					continue;
2025 				} else if (ret == -EAGAIN) {
2026 					ret = 0;
2027 					if (wbc->sync_mode == WB_SYNC_ALL) {
2028 						cond_resched();
2029 						congestion_wait(BLK_RW_ASYNC,
2030 									HZ/50);
2031 						goto retry_write;
2032 					}
2033 					continue;
2034 				}
2035 				done_index = page->index + 1;
2036 				done = 1;
2037 				break;
2038 			} else if (submitted) {
2039 				last_idx = page->index;
2040 			}
2041 
2042 			/* give a priority to WB_SYNC threads */
2043 			if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) ||
2044 					--wbc->nr_to_write <= 0) &&
2045 					wbc->sync_mode == WB_SYNC_NONE) {
2046 				done = 1;
2047 				break;
2048 			}
2049 		}
2050 		pagevec_release(&pvec);
2051 		cond_resched();
2052 	}
2053 
2054 	if (!cycled && !done) {
2055 		cycled = 1;
2056 		index = 0;
2057 		end = writeback_index - 1;
2058 		goto retry;
2059 	}
2060 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2061 		mapping->writeback_index = done_index;
2062 
2063 	if (last_idx != ULONG_MAX)
2064 		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
2065 						0, last_idx, DATA);
2066 
2067 	return ret;
2068 }
2069 
__f2fs_write_data_pages(struct address_space * mapping,struct writeback_control * wbc,enum iostat_type io_type)2070 int __f2fs_write_data_pages(struct address_space *mapping,
2071 						struct writeback_control *wbc,
2072 						enum iostat_type io_type)
2073 {
2074 	struct inode *inode = mapping->host;
2075 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2076 	struct blk_plug plug;
2077 	int ret;
2078 
2079 	/* deal with chardevs and other special file */
2080 	if (!mapping->a_ops->writepage)
2081 		return 0;
2082 
2083 	/* skip writing if there is no dirty page in this inode */
2084 	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
2085 		return 0;
2086 
2087 	/* during POR, we don't need to trigger writepage at all. */
2088 	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2089 		goto skip_write;
2090 
2091 	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
2092 			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
2093 			available_free_memory(sbi, DIRTY_DENTS))
2094 		goto skip_write;
2095 
2096 	/* skip writing during file defragment */
2097 	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
2098 		goto skip_write;
2099 
2100 	trace_f2fs_writepages(mapping->host, wbc, DATA);
2101 
2102 	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
2103 	if (wbc->sync_mode == WB_SYNC_ALL)
2104 		atomic_inc(&sbi->wb_sync_req);
2105 	else if (atomic_read(&sbi->wb_sync_req))
2106 		goto skip_write;
2107 
2108 	blk_start_plug(&plug);
2109 	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
2110 	blk_finish_plug(&plug);
2111 
2112 	if (wbc->sync_mode == WB_SYNC_ALL)
2113 		atomic_dec(&sbi->wb_sync_req);
2114 	/*
2115 	 * if some pages were truncated, we cannot guarantee its mapping->host
2116 	 * to detect pending bios.
2117 	 */
2118 
2119 	remove_dirty_inode(inode);
2120 	return ret;
2121 
2122 skip_write:
2123 	wbc->pages_skipped += get_dirty_pages(inode);
2124 	trace_f2fs_writepages(mapping->host, wbc, DATA);
2125 	return 0;
2126 }
2127 
f2fs_write_data_pages(struct address_space * mapping,struct writeback_control * wbc)2128 static int f2fs_write_data_pages(struct address_space *mapping,
2129 			    struct writeback_control *wbc)
2130 {
2131 	struct inode *inode = mapping->host;
2132 
2133 	return __f2fs_write_data_pages(mapping, wbc,
2134 			F2FS_I(inode)->cp_task == current ?
2135 			FS_CP_DATA_IO : FS_DATA_IO);
2136 }
2137 
f2fs_write_failed(struct address_space * mapping,loff_t to)2138 static void f2fs_write_failed(struct address_space *mapping, loff_t to)
2139 {
2140 	struct inode *inode = mapping->host;
2141 	loff_t i_size = i_size_read(inode);
2142 
2143 	if (to > i_size) {
2144 		down_write(&F2FS_I(inode)->i_mmap_sem);
2145 		truncate_pagecache(inode, i_size);
2146 		truncate_blocks(inode, i_size, true);
2147 		up_write(&F2FS_I(inode)->i_mmap_sem);
2148 	}
2149 }
2150 
prepare_write_begin(struct f2fs_sb_info * sbi,struct page * page,loff_t pos,unsigned len,block_t * blk_addr,bool * node_changed)2151 static int prepare_write_begin(struct f2fs_sb_info *sbi,
2152 			struct page *page, loff_t pos, unsigned len,
2153 			block_t *blk_addr, bool *node_changed)
2154 {
2155 	struct inode *inode = page->mapping->host;
2156 	pgoff_t index = page->index;
2157 	struct dnode_of_data dn;
2158 	struct page *ipage;
2159 	bool locked = false;
2160 	struct extent_info ei = {0,0,0};
2161 	int err = 0;
2162 
2163 	/*
2164 	 * we already allocated all the blocks, so we don't need to get
2165 	 * the block addresses when there is no need to fill the page.
2166 	 */
2167 	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
2168 			!is_inode_flag_set(inode, FI_NO_PREALLOC))
2169 		return 0;
2170 
2171 	if (f2fs_has_inline_data(inode) ||
2172 			(pos & PAGE_MASK) >= i_size_read(inode)) {
2173 		__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
2174 		locked = true;
2175 	}
2176 restart:
2177 	/* check inline_data */
2178 	ipage = get_node_page(sbi, inode->i_ino);
2179 	if (IS_ERR(ipage)) {
2180 		err = PTR_ERR(ipage);
2181 		goto unlock_out;
2182 	}
2183 
2184 	set_new_dnode(&dn, inode, ipage, ipage, 0);
2185 
2186 	if (f2fs_has_inline_data(inode)) {
2187 		if (pos + len <= MAX_INLINE_DATA(inode)) {
2188 			read_inline_data(page, ipage);
2189 			set_inode_flag(inode, FI_DATA_EXIST);
2190 			if (inode->i_nlink)
2191 				set_inline_node(ipage);
2192 		} else {
2193 			err = f2fs_convert_inline_page(&dn, page);
2194 			if (err)
2195 				goto out;
2196 			if (dn.data_blkaddr == NULL_ADDR)
2197 				err = f2fs_get_block(&dn, index);
2198 		}
2199 	} else if (locked) {
2200 		err = f2fs_get_block(&dn, index);
2201 	} else {
2202 		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
2203 			dn.data_blkaddr = ei.blk + index - ei.fofs;
2204 		} else {
2205 			/* hole case */
2206 			err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
2207 			if (err || dn.data_blkaddr == NULL_ADDR) {
2208 				f2fs_put_dnode(&dn);
2209 				__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
2210 								true);
2211 				locked = true;
2212 				goto restart;
2213 			}
2214 		}
2215 	}
2216 
2217 	/* convert_inline_page can make node_changed */
2218 	*blk_addr = dn.data_blkaddr;
2219 	*node_changed = dn.node_changed;
2220 out:
2221 	f2fs_put_dnode(&dn);
2222 unlock_out:
2223 	if (locked)
2224 		__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
2225 	return err;
2226 }
2227 
f2fs_write_begin(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned flags,struct page ** pagep,void ** fsdata)2228 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
2229 		loff_t pos, unsigned len, unsigned flags,
2230 		struct page **pagep, void **fsdata)
2231 {
2232 	struct inode *inode = mapping->host;
2233 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2234 	struct page *page = NULL;
2235 	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
2236 	bool need_balance = false, drop_atomic = false;
2237 	block_t blkaddr = NULL_ADDR;
2238 	int err = 0;
2239 
2240 	if (trace_android_fs_datawrite_start_enabled()) {
2241 		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
2242 
2243 		path = android_fstrace_get_pathname(pathbuf,
2244 						    MAX_TRACE_PATHBUF_LEN,
2245 						    inode);
2246 		trace_android_fs_datawrite_start(inode, pos, len,
2247 						 current->pid, path,
2248 						 current->comm);
2249 	}
2250 	trace_f2fs_write_begin(inode, pos, len, flags);
2251 
2252 	if (f2fs_is_atomic_file(inode) &&
2253 			!available_free_memory(sbi, INMEM_PAGES)) {
2254 		err = -ENOMEM;
2255 		drop_atomic = true;
2256 		goto fail;
2257 	}
2258 
2259 	/*
2260 	 * We should check this at this moment to avoid deadlock on inode page
2261 	 * and #0 page. The locking rule for inline_data conversion should be:
2262 	 * lock_page(page #0) -> lock_page(inode_page)
2263 	 */
2264 	if (index != 0) {
2265 		err = f2fs_convert_inline_inode(inode);
2266 		if (err)
2267 			goto fail;
2268 	}
2269 repeat:
2270 	/*
2271 	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
2272 	 * wait_for_stable_page. Will wait that below with our IO control.
2273 	 */
2274 	page = f2fs_pagecache_get_page(mapping, index,
2275 				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
2276 	if (!page) {
2277 		err = -ENOMEM;
2278 		goto fail;
2279 	}
2280 
2281 	*pagep = page;
2282 
2283 	err = prepare_write_begin(sbi, page, pos, len,
2284 					&blkaddr, &need_balance);
2285 	if (err)
2286 		goto fail;
2287 
2288 	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
2289 		unlock_page(page);
2290 		f2fs_balance_fs(sbi, true);
2291 		lock_page(page);
2292 		if (page->mapping != mapping) {
2293 			/* The page got truncated from under us */
2294 			f2fs_put_page(page, 1);
2295 			goto repeat;
2296 		}
2297 	}
2298 
2299 	f2fs_wait_on_page_writeback(page, DATA, false);
2300 
2301 	/* wait for GCed page writeback via META_MAPPING */
2302 	if (f2fs_post_read_required(inode))
2303 		f2fs_wait_on_block_writeback(sbi, blkaddr);
2304 
2305 	if (len == PAGE_SIZE || PageUptodate(page))
2306 		return 0;
2307 
2308 	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
2309 		zero_user_segment(page, len, PAGE_SIZE);
2310 		return 0;
2311 	}
2312 
2313 	if (blkaddr == NEW_ADDR) {
2314 		zero_user_segment(page, 0, PAGE_SIZE);
2315 		SetPageUptodate(page);
2316 	} else {
2317 		err = f2fs_submit_page_read(inode, page, blkaddr);
2318 		if (err)
2319 			goto fail;
2320 
2321 		lock_page(page);
2322 		if (unlikely(page->mapping != mapping)) {
2323 			f2fs_put_page(page, 1);
2324 			goto repeat;
2325 		}
2326 		if (unlikely(!PageUptodate(page))) {
2327 			err = -EIO;
2328 			goto fail;
2329 		}
2330 	}
2331 	return 0;
2332 
2333 fail:
2334 	f2fs_put_page(page, 1);
2335 	f2fs_write_failed(mapping, pos + len);
2336 	if (drop_atomic)
2337 		drop_inmem_pages_all(sbi);
2338 	return err;
2339 }
2340 
f2fs_write_end(struct file * file,struct address_space * mapping,loff_t pos,unsigned len,unsigned copied,struct page * page,void * fsdata)2341 static int f2fs_write_end(struct file *file,
2342 			struct address_space *mapping,
2343 			loff_t pos, unsigned len, unsigned copied,
2344 			struct page *page, void *fsdata)
2345 {
2346 	struct inode *inode = page->mapping->host;
2347 
2348 	trace_android_fs_datawrite_end(inode, pos, len);
2349 	trace_f2fs_write_end(inode, pos, len, copied);
2350 
2351 	/*
2352 	 * This should be come from len == PAGE_SIZE, and we expect copied
2353 	 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
2354 	 * let generic_perform_write() try to copy data again through copied=0.
2355 	 */
2356 	if (!PageUptodate(page)) {
2357 		if (unlikely(copied != len))
2358 			copied = 0;
2359 		else
2360 			SetPageUptodate(page);
2361 	}
2362 	if (!copied)
2363 		goto unlock_out;
2364 
2365 	set_page_dirty(page);
2366 
2367 	if (pos + copied > i_size_read(inode))
2368 		f2fs_i_size_write(inode, pos + copied);
2369 unlock_out:
2370 	f2fs_put_page(page, 1);
2371 	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2372 	return copied;
2373 }
2374 
check_direct_IO(struct inode * inode,struct iov_iter * iter,loff_t offset)2375 static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
2376 			   loff_t offset)
2377 {
2378 	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
2379 
2380 	if (offset & blocksize_mask)
2381 		return -EINVAL;
2382 
2383 	if (iov_iter_alignment(iter) & blocksize_mask)
2384 		return -EINVAL;
2385 
2386 	return 0;
2387 }
2388 
f2fs_direct_IO(struct kiocb * iocb,struct iov_iter * iter)2389 static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2390 {
2391 	struct address_space *mapping = iocb->ki_filp->f_mapping;
2392 	struct inode *inode = mapping->host;
2393 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2394 	size_t count = iov_iter_count(iter);
2395 	loff_t offset = iocb->ki_pos;
2396 	int rw = iov_iter_rw(iter);
2397 	int err;
2398 	enum rw_hint hint = iocb->ki_hint;
2399 	int whint_mode = F2FS_OPTION(sbi).whint_mode;
2400 
2401 	err = check_direct_IO(inode, iter, offset);
2402 	if (err)
2403 		return err;
2404 
2405 	if (f2fs_force_buffered_io(inode, rw))
2406 		return 0;
2407 
2408 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
2409 
2410 	if (trace_android_fs_dataread_start_enabled() &&
2411 	    (rw == READ)) {
2412 		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
2413 
2414 		path = android_fstrace_get_pathname(pathbuf,
2415 						    MAX_TRACE_PATHBUF_LEN,
2416 						    inode);
2417 		trace_android_fs_dataread_start(inode, offset,
2418 						count, current->pid, path,
2419 						current->comm);
2420 	}
2421 	if (trace_android_fs_datawrite_start_enabled() &&
2422 	    (rw == WRITE)) {
2423 		char *path, pathbuf[MAX_TRACE_PATHBUF_LEN];
2424 
2425 		path = android_fstrace_get_pathname(pathbuf,
2426 						    MAX_TRACE_PATHBUF_LEN,
2427 						    inode);
2428 		trace_android_fs_datawrite_start(inode, offset, count,
2429 						 current->pid, path,
2430 						 current->comm);
2431 	}
2432 	if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
2433 		iocb->ki_hint = WRITE_LIFE_NOT_SET;
2434 
2435 	if (!down_read_trylock(&F2FS_I(inode)->dio_rwsem[rw])) {
2436 		if (iocb->ki_flags & IOCB_NOWAIT) {
2437 			iocb->ki_hint = hint;
2438 			err = -EAGAIN;
2439 			goto out;
2440 		}
2441 		down_read(&F2FS_I(inode)->dio_rwsem[rw]);
2442 	}
2443 
2444 	err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
2445 	up_read(&F2FS_I(inode)->dio_rwsem[rw]);
2446 
2447 	if (rw == WRITE) {
2448 		if (whint_mode == WHINT_MODE_OFF)
2449 			iocb->ki_hint = hint;
2450 		if (err > 0) {
2451 			f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
2452 									err);
2453 			set_inode_flag(inode, FI_UPDATE_WRITE);
2454 		} else if (err < 0) {
2455 			f2fs_write_failed(mapping, offset + count);
2456 		}
2457 	}
2458 out:
2459 	if (trace_android_fs_dataread_start_enabled() &&
2460 	    (rw == READ))
2461 		trace_android_fs_dataread_end(inode, offset, count);
2462 	if (trace_android_fs_datawrite_start_enabled() &&
2463 	    (rw == WRITE))
2464 		trace_android_fs_datawrite_end(inode, offset, count);
2465 
2466 	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
2467 
2468 	return err;
2469 }
2470 
f2fs_invalidate_page(struct page * page,unsigned int offset,unsigned int length)2471 void f2fs_invalidate_page(struct page *page, unsigned int offset,
2472 							unsigned int length)
2473 {
2474 	struct inode *inode = page->mapping->host;
2475 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2476 
2477 	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
2478 		(offset % PAGE_SIZE || length != PAGE_SIZE))
2479 		return;
2480 
2481 	if (PageDirty(page)) {
2482 		if (inode->i_ino == F2FS_META_INO(sbi)) {
2483 			dec_page_count(sbi, F2FS_DIRTY_META);
2484 		} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
2485 			dec_page_count(sbi, F2FS_DIRTY_NODES);
2486 		} else {
2487 			inode_dec_dirty_pages(inode);
2488 			remove_dirty_inode(inode);
2489 		}
2490 	}
2491 
2492 	/* This is atomic written page, keep Private */
2493 	if (IS_ATOMIC_WRITTEN_PAGE(page))
2494 		return drop_inmem_page(inode, page);
2495 
2496 	set_page_private(page, 0);
2497 	ClearPagePrivate(page);
2498 }
2499 
f2fs_release_page(struct page * page,gfp_t wait)2500 int f2fs_release_page(struct page *page, gfp_t wait)
2501 {
2502 	/* If this is dirty page, keep PagePrivate */
2503 	if (PageDirty(page))
2504 		return 0;
2505 
2506 	/* This is atomic written page, keep Private */
2507 	if (IS_ATOMIC_WRITTEN_PAGE(page))
2508 		return 0;
2509 
2510 	set_page_private(page, 0);
2511 	ClearPagePrivate(page);
2512 	return 1;
2513 }
2514 
f2fs_set_data_page_dirty(struct page * page)2515 static int f2fs_set_data_page_dirty(struct page *page)
2516 {
2517 	struct address_space *mapping = page->mapping;
2518 	struct inode *inode = mapping->host;
2519 
2520 	trace_f2fs_set_page_dirty(page, DATA);
2521 
2522 	if (!PageUptodate(page))
2523 		SetPageUptodate(page);
2524 
2525 	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
2526 		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
2527 			register_inmem_page(inode, page);
2528 			return 1;
2529 		}
2530 		/*
2531 		 * Previously, this page has been registered, we just
2532 		 * return here.
2533 		 */
2534 		return 0;
2535 	}
2536 
2537 	if (!PageDirty(page)) {
2538 		__set_page_dirty_nobuffers(page);
2539 		update_dirty_page(inode, page);
2540 		return 1;
2541 	}
2542 	return 0;
2543 }
2544 
f2fs_bmap(struct address_space * mapping,sector_t block)2545 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
2546 {
2547 	struct inode *inode = mapping->host;
2548 
2549 	if (f2fs_has_inline_data(inode))
2550 		return 0;
2551 
2552 	/* make sure allocating whole blocks */
2553 	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
2554 		filemap_write_and_wait(mapping);
2555 
2556 	return generic_block_bmap(mapping, block, get_data_block_bmap);
2557 }
2558 
2559 #ifdef CONFIG_MIGRATION
2560 #include <linux/migrate.h>
2561 
f2fs_migrate_page(struct address_space * mapping,struct page * newpage,struct page * page,enum migrate_mode mode)2562 int f2fs_migrate_page(struct address_space *mapping,
2563 		struct page *newpage, struct page *page, enum migrate_mode mode)
2564 {
2565 	int rc, extra_count;
2566 	struct f2fs_inode_info *fi = F2FS_I(mapping->host);
2567 	bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);
2568 
2569 	BUG_ON(PageWriteback(page));
2570 
2571 	/* migrating an atomic written page is safe with the inmem_lock hold */
2572 	if (atomic_written) {
2573 		if (mode != MIGRATE_SYNC)
2574 			return -EBUSY;
2575 		if (!mutex_trylock(&fi->inmem_lock))
2576 			return -EAGAIN;
2577 	}
2578 
2579 	/*
2580 	 * A reference is expected if PagePrivate set when move mapping,
2581 	 * however F2FS breaks this for maintaining dirty page counts when
2582 	 * truncating pages. So here adjusting the 'extra_count' make it work.
2583 	 */
2584 	extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
2585 	rc = migrate_page_move_mapping(mapping, newpage,
2586 				page, NULL, mode, extra_count);
2587 	if (rc != MIGRATEPAGE_SUCCESS) {
2588 		if (atomic_written)
2589 			mutex_unlock(&fi->inmem_lock);
2590 		return rc;
2591 	}
2592 
2593 	if (atomic_written) {
2594 		struct inmem_pages *cur;
2595 		list_for_each_entry(cur, &fi->inmem_pages, list)
2596 			if (cur->page == page) {
2597 				cur->page = newpage;
2598 				break;
2599 			}
2600 		mutex_unlock(&fi->inmem_lock);
2601 		put_page(page);
2602 		get_page(newpage);
2603 	}
2604 
2605 	if (PagePrivate(page))
2606 		SetPagePrivate(newpage);
2607 	set_page_private(newpage, page_private(page));
2608 
2609 	migrate_page_copy(newpage, page);
2610 
2611 	return MIGRATEPAGE_SUCCESS;
2612 }
2613 #endif
2614 
2615 const struct address_space_operations f2fs_dblock_aops = {
2616 	.readpage	= f2fs_read_data_page,
2617 	.readpages	= f2fs_read_data_pages,
2618 	.writepage	= f2fs_write_data_page,
2619 	.writepages	= f2fs_write_data_pages,
2620 	.write_begin	= f2fs_write_begin,
2621 	.write_end	= f2fs_write_end,
2622 	.set_page_dirty	= f2fs_set_data_page_dirty,
2623 	.invalidatepage	= f2fs_invalidate_page,
2624 	.releasepage	= f2fs_release_page,
2625 	.direct_IO	= f2fs_direct_IO,
2626 	.bmap		= f2fs_bmap,
2627 #ifdef CONFIG_MIGRATION
2628 	.migratepage    = f2fs_migrate_page,
2629 #endif
2630 };
2631 
f2fs_init_post_read_processing(void)2632 int __init f2fs_init_post_read_processing(void)
2633 {
2634 	bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
2635 	if (!bio_post_read_ctx_cache)
2636 		goto fail;
2637 	bio_post_read_ctx_pool =
2638 		mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
2639 					 bio_post_read_ctx_cache);
2640 	if (!bio_post_read_ctx_pool)
2641 		goto fail_free_cache;
2642 	return 0;
2643 
2644 fail_free_cache:
2645 	kmem_cache_destroy(bio_post_read_ctx_cache);
2646 fail:
2647 	return -ENOMEM;
2648 }
2649 
f2fs_destroy_post_read_processing(void)2650 void __exit f2fs_destroy_post_read_processing(void)
2651 {
2652 	mempool_destroy(bio_post_read_ctx_pool);
2653 	kmem_cache_destroy(bio_post_read_ctx_cache);
2654 }
2655