• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   * fs/mpage.c
4   *
5   * Copyright (C) 2002, Linus Torvalds.
6   *
7   * Contains functions related to preparing and submitting BIOs which contain
8   * multiple pagecache pages.
9   *
10   * 15May2002	Andrew Morton
11   *		Initial version
12   * 27Jun2002	axboe@suse.de
13   *		use bio_add_page() to build bio's just the right size
14   */
15  
16  #include <linux/kernel.h>
17  #include <linux/export.h>
18  #include <linux/mm.h>
19  #include <linux/kdev_t.h>
20  #include <linux/gfp.h>
21  #include <linux/bio.h>
22  #include <linux/fs.h>
23  #include <linux/buffer_head.h>
24  #include <linux/blkdev.h>
25  #include <linux/highmem.h>
26  #include <linux/prefetch.h>
27  #include <linux/mpage.h>
28  #include <linux/mm_inline.h>
29  #include <linux/writeback.h>
30  #include <linux/backing-dev.h>
31  #include <linux/pagevec.h>
32  #include <linux/cleancache.h>
33  #include "internal.h"
34  
35  /*
36   * I/O completion handler for multipage BIOs.
37   *
38   * The mpage code never puts partial pages into a BIO (except for end-of-file).
39   * If a page does not map to a contiguous run of blocks then it simply falls
40   * back to block_read_full_page().
41   *
42   * Why is this?  If a page's completion depends on a number of different BIOs
43   * which can complete in any order (or at the same time) then determining the
44   * status of that page is hard.  See end_buffer_async_read() for the details.
45   * There is no point in duplicating all that complexity.
46   */
mpage_end_io(struct bio * bio)47  static void mpage_end_io(struct bio *bio)
48  {
49  	struct bio_vec *bv;
50  	struct bvec_iter_all iter_all;
51  
52  	bio_for_each_segment_all(bv, bio, iter_all) {
53  		struct page *page = bv->bv_page;
54  		page_endio(page, bio_op(bio),
55  			   blk_status_to_errno(bio->bi_status));
56  	}
57  
58  	bio_put(bio);
59  }
60  
mpage_bio_submit(int op,int op_flags,struct bio * bio)61  static struct bio *mpage_bio_submit(int op, int op_flags, struct bio *bio)
62  {
63  	bio->bi_end_io = mpage_end_io;
64  	bio_set_op_attrs(bio, op, op_flags);
65  	guard_bio_eod(bio);
66  	submit_bio(bio);
67  	return NULL;
68  }
69  
70  static struct bio *
mpage_alloc(struct block_device * bdev,sector_t first_sector,int nr_vecs,gfp_t gfp_flags)71  mpage_alloc(struct block_device *bdev,
72  		sector_t first_sector, int nr_vecs,
73  		gfp_t gfp_flags)
74  {
75  	struct bio *bio;
76  
77  	/* Restrict the given (page cache) mask for slab allocations */
78  	gfp_flags &= GFP_KERNEL;
79  	bio = bio_alloc(gfp_flags, nr_vecs);
80  
81  	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
82  		while (!bio && (nr_vecs /= 2))
83  			bio = bio_alloc(gfp_flags, nr_vecs);
84  	}
85  
86  	if (bio) {
87  		bio_set_dev(bio, bdev);
88  		bio->bi_iter.bi_sector = first_sector;
89  	}
90  	return bio;
91  }
92  
93  /*
94   * support function for mpage_readahead.  The fs supplied get_block might
95   * return an up to date buffer.  This is used to map that buffer into
96   * the page, which allows readpage to avoid triggering a duplicate call
97   * to get_block.
98   *
99   * The idea is to avoid adding buffers to pages that don't already have
100   * them.  So when the buffer is up to date and the page size == block size,
101   * this marks the page up to date instead of adding new buffers.
102   */
103  static void
map_buffer_to_page(struct page * page,struct buffer_head * bh,int page_block)104  map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
105  {
106  	struct inode *inode = page->mapping->host;
107  	struct buffer_head *page_bh, *head;
108  	int block = 0;
109  
110  	if (!page_has_buffers(page)) {
111  		/*
112  		 * don't make any buffers if there is only one buffer on
113  		 * the page and the page just needs to be set up to date
114  		 */
115  		if (inode->i_blkbits == PAGE_SHIFT &&
116  		    buffer_uptodate(bh)) {
117  			SetPageUptodate(page);
118  			return;
119  		}
120  		create_empty_buffers(page, i_blocksize(inode), 0);
121  	}
122  	head = page_buffers(page);
123  	page_bh = head;
124  	do {
125  		if (block == page_block) {
126  			page_bh->b_state = bh->b_state;
127  			page_bh->b_bdev = bh->b_bdev;
128  			page_bh->b_blocknr = bh->b_blocknr;
129  			break;
130  		}
131  		page_bh = page_bh->b_this_page;
132  		block++;
133  	} while (page_bh != head);
134  }
135  
136  struct mpage_readpage_args {
137  	struct bio *bio;
138  	struct page *page;
139  	unsigned int nr_pages;
140  	bool is_readahead;
141  	sector_t last_block_in_bio;
142  	struct buffer_head map_bh;
143  	unsigned long first_logical_block;
144  	get_block_t *get_block;
145  };
146  
147  /*
148   * This is the worker routine which does all the work of mapping the disk
149   * blocks and constructs largest possible bios, submits them for IO if the
150   * blocks are not contiguous on the disk.
151   *
152   * We pass a buffer_head back and forth and use its buffer_mapped() flag to
153   * represent the validity of its disk mapping and to decide when to do the next
154   * get_block() call.
155   */
do_mpage_readpage(struct mpage_readpage_args * args)156  static struct bio *do_mpage_readpage(struct mpage_readpage_args *args)
157  {
158  	struct page *page = args->page;
159  	struct inode *inode = page->mapping->host;
160  	const unsigned blkbits = inode->i_blkbits;
161  	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
162  	const unsigned blocksize = 1 << blkbits;
163  	struct buffer_head *map_bh = &args->map_bh;
164  	sector_t block_in_file;
165  	sector_t last_block;
166  	sector_t last_block_in_file;
167  	sector_t blocks[MAX_BUF_PER_PAGE];
168  	unsigned page_block;
169  	unsigned first_hole = blocks_per_page;
170  	struct block_device *bdev = NULL;
171  	int length;
172  	int fully_mapped = 1;
173  	int op_flags;
174  	unsigned nblocks;
175  	unsigned relative_block;
176  	gfp_t gfp;
177  
178  	if (args->is_readahead) {
179  		op_flags = REQ_RAHEAD;
180  		gfp = readahead_gfp_mask(page->mapping);
181  	} else {
182  		op_flags = 0;
183  		gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
184  	}
185  
186  	if (page_has_buffers(page))
187  		goto confused;
188  
189  	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
190  	last_block = block_in_file + args->nr_pages * blocks_per_page;
191  	last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
192  	if (last_block > last_block_in_file)
193  		last_block = last_block_in_file;
194  	page_block = 0;
195  
196  	/*
197  	 * Map blocks using the result from the previous get_blocks call first.
198  	 */
199  	nblocks = map_bh->b_size >> blkbits;
200  	if (buffer_mapped(map_bh) &&
201  			block_in_file > args->first_logical_block &&
202  			block_in_file < (args->first_logical_block + nblocks)) {
203  		unsigned map_offset = block_in_file - args->first_logical_block;
204  		unsigned last = nblocks - map_offset;
205  
206  		for (relative_block = 0; ; relative_block++) {
207  			if (relative_block == last) {
208  				clear_buffer_mapped(map_bh);
209  				break;
210  			}
211  			if (page_block == blocks_per_page)
212  				break;
213  			blocks[page_block] = map_bh->b_blocknr + map_offset +
214  						relative_block;
215  			page_block++;
216  			block_in_file++;
217  		}
218  		bdev = map_bh->b_bdev;
219  	}
220  
221  	/*
222  	 * Then do more get_blocks calls until we are done with this page.
223  	 */
224  	map_bh->b_page = page;
225  	while (page_block < blocks_per_page) {
226  		map_bh->b_state = 0;
227  		map_bh->b_size = 0;
228  
229  		if (block_in_file < last_block) {
230  			map_bh->b_size = (last_block-block_in_file) << blkbits;
231  			if (args->get_block(inode, block_in_file, map_bh, 0))
232  				goto confused;
233  			args->first_logical_block = block_in_file;
234  		}
235  
236  		if (!buffer_mapped(map_bh)) {
237  			fully_mapped = 0;
238  			if (first_hole == blocks_per_page)
239  				first_hole = page_block;
240  			page_block++;
241  			block_in_file++;
242  			continue;
243  		}
244  
245  		/* some filesystems will copy data into the page during
246  		 * the get_block call, in which case we don't want to
247  		 * read it again.  map_buffer_to_page copies the data
248  		 * we just collected from get_block into the page's buffers
249  		 * so readpage doesn't have to repeat the get_block call
250  		 */
251  		if (buffer_uptodate(map_bh)) {
252  			map_buffer_to_page(page, map_bh, page_block);
253  			goto confused;
254  		}
255  
256  		if (first_hole != blocks_per_page)
257  			goto confused;		/* hole -> non-hole */
258  
259  		/* Contiguous blocks? */
260  		if (page_block && blocks[page_block-1] != map_bh->b_blocknr-1)
261  			goto confused;
262  		nblocks = map_bh->b_size >> blkbits;
263  		for (relative_block = 0; ; relative_block++) {
264  			if (relative_block == nblocks) {
265  				clear_buffer_mapped(map_bh);
266  				break;
267  			} else if (page_block == blocks_per_page)
268  				break;
269  			blocks[page_block] = map_bh->b_blocknr+relative_block;
270  			page_block++;
271  			block_in_file++;
272  		}
273  		bdev = map_bh->b_bdev;
274  	}
275  
276  	if (first_hole != blocks_per_page) {
277  		zero_user_segment(page, first_hole << blkbits, PAGE_SIZE);
278  		if (first_hole == 0) {
279  			SetPageUptodate(page);
280  			unlock_page(page);
281  			goto out;
282  		}
283  	} else if (fully_mapped) {
284  		SetPageMappedToDisk(page);
285  	}
286  
287  	if (fully_mapped && blocks_per_page == 1 && !PageUptodate(page) &&
288  	    cleancache_get_page(page) == 0) {
289  		SetPageUptodate(page);
290  		goto confused;
291  	}
292  
293  	/*
294  	 * This page will go to BIO.  Do we need to send this BIO off first?
295  	 */
296  	if (args->bio && (args->last_block_in_bio != blocks[0] - 1))
297  		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
298  
299  alloc_new:
300  	if (args->bio == NULL) {
301  		if (first_hole == blocks_per_page) {
302  			if (!bdev_read_page(bdev, blocks[0] << (blkbits - 9),
303  								page))
304  				goto out;
305  		}
306  		args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
307  					min_t(int, args->nr_pages,
308  					      BIO_MAX_PAGES),
309  					gfp);
310  		if (args->bio == NULL)
311  			goto confused;
312  	}
313  
314  	length = first_hole << blkbits;
315  	if (bio_add_page(args->bio, page, length, 0) < length) {
316  		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
317  		goto alloc_new;
318  	}
319  
320  	relative_block = block_in_file - args->first_logical_block;
321  	nblocks = map_bh->b_size >> blkbits;
322  	if ((buffer_boundary(map_bh) && relative_block == nblocks) ||
323  	    (first_hole != blocks_per_page))
324  		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
325  	else
326  		args->last_block_in_bio = blocks[blocks_per_page - 1];
327  out:
328  	return args->bio;
329  
330  confused:
331  	if (args->bio)
332  		args->bio = mpage_bio_submit(REQ_OP_READ, op_flags, args->bio);
333  	if (!PageUptodate(page))
334  		block_read_full_page(page, args->get_block);
335  	else
336  		unlock_page(page);
337  	goto out;
338  }
339  
340  /**
341   * mpage_readahead - start reads against pages
342   * @rac: Describes which pages to read.
343   * @get_block: The filesystem's block mapper function.
344   *
345   * This function walks the pages and the blocks within each page, building and
346   * emitting large BIOs.
347   *
348   * If anything unusual happens, such as:
349   *
350   * - encountering a page which has buffers
351   * - encountering a page which has a non-hole after a hole
352   * - encountering a page with non-contiguous blocks
353   *
354   * then this code just gives up and calls the buffer_head-based read function.
355   * It does handle a page which has holes at the end - that is a common case:
356   * the end-of-file on blocksize < PAGE_SIZE setups.
357   *
358   * BH_Boundary explanation:
359   *
360   * There is a problem.  The mpage read code assembles several pages, gets all
361   * their disk mappings, and then submits them all.  That's fine, but obtaining
362   * the disk mappings may require I/O.  Reads of indirect blocks, for example.
363   *
364   * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
365   * submitted in the following order:
366   *
367   * 	12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
368   *
369   * because the indirect block has to be read to get the mappings of blocks
370   * 13,14,15,16.  Obviously, this impacts performance.
371   *
372   * So what we do it to allow the filesystem's get_block() function to set
373   * BH_Boundary when it maps block 11.  BH_Boundary says: mapping of the block
374   * after this one will require I/O against a block which is probably close to
375   * this one.  So you should push what I/O you have currently accumulated.
376   *
377   * This all causes the disk requests to be issued in the correct order.
378   */
mpage_readahead(struct readahead_control * rac,get_block_t get_block)379  void mpage_readahead(struct readahead_control *rac, get_block_t get_block)
380  {
381  	struct page *page;
382  	struct mpage_readpage_args args = {
383  		.get_block = get_block,
384  		.is_readahead = true,
385  	};
386  
387  	while ((page = readahead_page(rac))) {
388  		prefetchw(&page->flags);
389  		args.page = page;
390  		args.nr_pages = readahead_count(rac);
391  		args.bio = do_mpage_readpage(&args);
392  		put_page(page);
393  	}
394  	if (args.bio)
395  		mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio);
396  }
397  EXPORT_SYMBOL(mpage_readahead);
398  
399  /*
400   * This isn't called much at all
401   */
mpage_readpage(struct page * page,get_block_t get_block)402  int mpage_readpage(struct page *page, get_block_t get_block)
403  {
404  	struct mpage_readpage_args args = {
405  		.page = page,
406  		.nr_pages = 1,
407  		.get_block = get_block,
408  	};
409  
410  	args.bio = do_mpage_readpage(&args);
411  	if (args.bio)
412  		mpage_bio_submit(REQ_OP_READ, 0, args.bio);
413  	return 0;
414  }
415  EXPORT_SYMBOL(mpage_readpage);
416  
417  /*
418   * Writing is not so simple.
419   *
420   * If the page has buffers then they will be used for obtaining the disk
421   * mapping.  We only support pages which are fully mapped-and-dirty, with a
422   * special case for pages which are unmapped at the end: end-of-file.
423   *
424   * If the page has no buffers (preferred) then the page is mapped here.
425   *
426   * If all blocks are found to be contiguous then the page can go into the
427   * BIO.  Otherwise fall back to the mapping's writepage().
428   *
429   * FIXME: This code wants an estimate of how many pages are still to be
430   * written, so it can intelligently allocate a suitably-sized BIO.  For now,
431   * just allocate full-size (16-page) BIOs.
432   */
433  
434  struct mpage_data {
435  	struct bio *bio;
436  	sector_t last_block_in_bio;
437  	get_block_t *get_block;
438  	unsigned use_writepage;
439  };
440  
441  /*
442   * We have our BIO, so we can now mark the buffers clean.  Make
443   * sure to only clean buffers which we know we'll be writing.
444   */
clean_buffers(struct page * page,unsigned first_unmapped)445  static void clean_buffers(struct page *page, unsigned first_unmapped)
446  {
447  	unsigned buffer_counter = 0;
448  	struct buffer_head *bh, *head;
449  	if (!page_has_buffers(page))
450  		return;
451  	head = page_buffers(page);
452  	bh = head;
453  
454  	do {
455  		if (buffer_counter++ == first_unmapped)
456  			break;
457  		clear_buffer_dirty(bh);
458  		bh = bh->b_this_page;
459  	} while (bh != head);
460  
461  	/*
462  	 * we cannot drop the bh if the page is not uptodate or a concurrent
463  	 * readpage would fail to serialize with the bh and it would read from
464  	 * disk before we reach the platter.
465  	 */
466  	if (buffer_heads_over_limit && PageUptodate(page))
467  		try_to_free_buffers(page);
468  }
469  
470  /*
471   * For situations where we want to clean all buffers attached to a page.
472   * We don't need to calculate how many buffers are attached to the page,
473   * we just need to specify a number larger than the maximum number of buffers.
474   */
clean_page_buffers(struct page * page)475  void clean_page_buffers(struct page *page)
476  {
477  	clean_buffers(page, ~0U);
478  }
479  
__mpage_writepage(struct page * page,struct writeback_control * wbc,void * data)480  static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
481  		      void *data)
482  {
483  	struct mpage_data *mpd = data;
484  	struct bio *bio = mpd->bio;
485  	struct address_space *mapping = page->mapping;
486  	struct inode *inode = page->mapping->host;
487  	const unsigned blkbits = inode->i_blkbits;
488  	unsigned long end_index;
489  	const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
490  	sector_t last_block;
491  	sector_t block_in_file;
492  	sector_t blocks[MAX_BUF_PER_PAGE];
493  	unsigned page_block;
494  	unsigned first_unmapped = blocks_per_page;
495  	struct block_device *bdev = NULL;
496  	int boundary = 0;
497  	sector_t boundary_block = 0;
498  	struct block_device *boundary_bdev = NULL;
499  	int length;
500  	struct buffer_head map_bh;
501  	loff_t i_size = i_size_read(inode);
502  	int ret = 0;
503  	int op_flags = wbc_to_write_flags(wbc);
504  
505  	if (page_has_buffers(page)) {
506  		struct buffer_head *head = page_buffers(page);
507  		struct buffer_head *bh = head;
508  
509  		/* If they're all mapped and dirty, do it */
510  		page_block = 0;
511  		do {
512  			BUG_ON(buffer_locked(bh));
513  			if (!buffer_mapped(bh)) {
514  				/*
515  				 * unmapped dirty buffers are created by
516  				 * __set_page_dirty_buffers -> mmapped data
517  				 */
518  				if (buffer_dirty(bh))
519  					goto confused;
520  				if (first_unmapped == blocks_per_page)
521  					first_unmapped = page_block;
522  				continue;
523  			}
524  
525  			if (first_unmapped != blocks_per_page)
526  				goto confused;	/* hole -> non-hole */
527  
528  			if (!buffer_dirty(bh) || !buffer_uptodate(bh))
529  				goto confused;
530  			if (page_block) {
531  				if (bh->b_blocknr != blocks[page_block-1] + 1)
532  					goto confused;
533  			}
534  			blocks[page_block++] = bh->b_blocknr;
535  			boundary = buffer_boundary(bh);
536  			if (boundary) {
537  				boundary_block = bh->b_blocknr;
538  				boundary_bdev = bh->b_bdev;
539  			}
540  			bdev = bh->b_bdev;
541  		} while ((bh = bh->b_this_page) != head);
542  
543  		if (first_unmapped)
544  			goto page_is_mapped;
545  
546  		/*
547  		 * Page has buffers, but they are all unmapped. The page was
548  		 * created by pagein or read over a hole which was handled by
549  		 * block_read_full_page().  If this address_space is also
550  		 * using mpage_readahead then this can rarely happen.
551  		 */
552  		goto confused;
553  	}
554  
555  	/*
556  	 * The page has no buffers: map it to disk
557  	 */
558  	BUG_ON(!PageUptodate(page));
559  	block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
560  	last_block = (i_size - 1) >> blkbits;
561  	map_bh.b_page = page;
562  	for (page_block = 0; page_block < blocks_per_page; ) {
563  
564  		map_bh.b_state = 0;
565  		map_bh.b_size = 1 << blkbits;
566  		if (mpd->get_block(inode, block_in_file, &map_bh, 1))
567  			goto confused;
568  		if (buffer_new(&map_bh))
569  			clean_bdev_bh_alias(&map_bh);
570  		if (buffer_boundary(&map_bh)) {
571  			boundary_block = map_bh.b_blocknr;
572  			boundary_bdev = map_bh.b_bdev;
573  		}
574  		if (page_block) {
575  			if (map_bh.b_blocknr != blocks[page_block-1] + 1)
576  				goto confused;
577  		}
578  		blocks[page_block++] = map_bh.b_blocknr;
579  		boundary = buffer_boundary(&map_bh);
580  		bdev = map_bh.b_bdev;
581  		if (block_in_file == last_block)
582  			break;
583  		block_in_file++;
584  	}
585  	BUG_ON(page_block == 0);
586  
587  	first_unmapped = page_block;
588  
589  page_is_mapped:
590  	end_index = i_size >> PAGE_SHIFT;
591  	if (page->index >= end_index) {
592  		/*
593  		 * The page straddles i_size.  It must be zeroed out on each
594  		 * and every writepage invocation because it may be mmapped.
595  		 * "A file is mapped in multiples of the page size.  For a file
596  		 * that is not a multiple of the page size, the remaining memory
597  		 * is zeroed when mapped, and writes to that region are not
598  		 * written out to the file."
599  		 */
600  		unsigned offset = i_size & (PAGE_SIZE - 1);
601  
602  		if (page->index > end_index || !offset)
603  			goto confused;
604  		zero_user_segment(page, offset, PAGE_SIZE);
605  	}
606  
607  	/*
608  	 * This page will go to BIO.  Do we need to send this BIO off first?
609  	 */
610  	if (bio && mpd->last_block_in_bio != blocks[0] - 1)
611  		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
612  
613  alloc_new:
614  	if (bio == NULL) {
615  		if (first_unmapped == blocks_per_page) {
616  			if (!bdev_write_page(bdev, blocks[0] << (blkbits - 9),
617  								page, wbc))
618  				goto out;
619  		}
620  		bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
621  				BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
622  		if (bio == NULL)
623  			goto confused;
624  
625  		wbc_init_bio(wbc, bio);
626  		bio->bi_write_hint = inode->i_write_hint;
627  	}
628  
629  	/*
630  	 * Must try to add the page before marking the buffer clean or
631  	 * the confused fail path above (OOM) will be very confused when
632  	 * it finds all bh marked clean (i.e. it will not write anything)
633  	 */
634  	wbc_account_cgroup_owner(wbc, page, PAGE_SIZE);
635  	length = first_unmapped << blkbits;
636  	if (bio_add_page(bio, page, length, 0) < length) {
637  		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
638  		goto alloc_new;
639  	}
640  
641  	clean_buffers(page, first_unmapped);
642  
643  	BUG_ON(PageWriteback(page));
644  	set_page_writeback(page);
645  	unlock_page(page);
646  	if (boundary || (first_unmapped != blocks_per_page)) {
647  		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
648  		if (boundary_block) {
649  			write_boundary_block(boundary_bdev,
650  					boundary_block, 1 << blkbits);
651  		}
652  	} else {
653  		mpd->last_block_in_bio = blocks[blocks_per_page - 1];
654  	}
655  	goto out;
656  
657  confused:
658  	if (bio)
659  		bio = mpage_bio_submit(REQ_OP_WRITE, op_flags, bio);
660  
661  	if (mpd->use_writepage) {
662  		ret = mapping->a_ops->writepage(page, wbc);
663  	} else {
664  		ret = -EAGAIN;
665  		goto out;
666  	}
667  	/*
668  	 * The caller has a ref on the inode, so *mapping is stable
669  	 */
670  	mapping_set_error(mapping, ret);
671  out:
672  	mpd->bio = bio;
673  	return ret;
674  }
675  
676  /**
677   * mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them
678   * @mapping: address space structure to write
679   * @wbc: subtract the number of written pages from *@wbc->nr_to_write
680   * @get_block: the filesystem's block mapper function.
681   *             If this is NULL then use a_ops->writepage.  Otherwise, go
682   *             direct-to-BIO.
683   *
684   * This is a library function, which implements the writepages()
685   * address_space_operation.
686   *
687   * If a page is already under I/O, generic_writepages() skips it, even
688   * if it's dirty.  This is desirable behaviour for memory-cleaning writeback,
689   * but it is INCORRECT for data-integrity system calls such as fsync().  fsync()
690   * and msync() need to guarantee that all the data which was dirty at the time
691   * the call was made get new I/O started against them.  If wbc->sync_mode is
692   * WB_SYNC_ALL then we were called for data integrity and we must wait for
693   * existing IO to complete.
694   */
695  int
mpage_writepages(struct address_space * mapping,struct writeback_control * wbc,get_block_t get_block)696  mpage_writepages(struct address_space *mapping,
697  		struct writeback_control *wbc, get_block_t get_block)
698  {
699  	struct blk_plug plug;
700  	int ret;
701  
702  	blk_start_plug(&plug);
703  
704  	if (!get_block)
705  		ret = generic_writepages(mapping, wbc);
706  	else {
707  		struct mpage_data mpd = {
708  			.bio = NULL,
709  			.last_block_in_bio = 0,
710  			.get_block = get_block,
711  			.use_writepage = 1,
712  		};
713  
714  		ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd);
715  		if (mpd.bio) {
716  			int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
717  				  REQ_SYNC : 0);
718  			mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
719  		}
720  	}
721  	blk_finish_plug(&plug);
722  	return ret;
723  }
724  EXPORT_SYMBOL(mpage_writepages);
725  
mpage_writepage(struct page * page,get_block_t get_block,struct writeback_control * wbc)726  int mpage_writepage(struct page *page, get_block_t get_block,
727  	struct writeback_control *wbc)
728  {
729  	struct mpage_data mpd = {
730  		.bio = NULL,
731  		.last_block_in_bio = 0,
732  		.get_block = get_block,
733  		.use_writepage = 0,
734  	};
735  	int ret = __mpage_writepage(page, wbc, &mpd);
736  	if (mpd.bio) {
737  		int op_flags = (wbc->sync_mode == WB_SYNC_ALL ?
738  			  REQ_SYNC : 0);
739  		mpage_bio_submit(REQ_OP_WRITE, op_flags, mpd.bio);
740  	}
741  	return ret;
742  }
743  EXPORT_SYMBOL(mpage_writepage);
744