• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * mm/truncate.c - code for taking down pages from address_spaces
3  *
4  * Copyright (C) 2002, Linus Torvalds
5  *
6  * 10Sep2002	Andrew Morton
7  *		Initial version.
8  */
9 
10 #include <linux/kernel.h>
11 #include <linux/backing-dev.h>
12 #include <linux/gfp.h>
13 #include <linux/mm.h>
14 #include <linux/swap.h>
15 #include <linux/export.h>
16 #include <linux/pagemap.h>
17 #include <linux/highmem.h>
18 #include <linux/pagevec.h>
19 #include <linux/task_io_accounting_ops.h>
20 #include <linux/buffer_head.h>	/* grr. try_to_release_page,
21 				   do_invalidatepage */
22 #include <linux/cleancache.h>
23 #include <linux/rmap.h>
24 #include "internal.h"
25 
26 
27 /**
28  * do_invalidatepage - invalidate part or all of a page
29  * @page: the page which is affected
30  * @offset: start of the range to invalidate
31  * @length: length of the range to invalidate
32  *
33  * do_invalidatepage() is called when all or part of the page has become
34  * invalidated by a truncate operation.
35  *
36  * do_invalidatepage() does not have to release all buffers, but it must
37  * ensure that no dirty buffer is left outside @offset and that no I/O
38  * is underway against any of the blocks which are outside the truncation
39  * point.  Because the caller is about to free (and possibly reuse) those
40  * blocks on-disk.
41  */
do_invalidatepage(struct page * page,unsigned int offset,unsigned int length)42 void do_invalidatepage(struct page *page, unsigned int offset,
43 		       unsigned int length)
44 {
45 	void (*invalidatepage)(struct page *, unsigned int, unsigned int);
46 
47 	invalidatepage = page->mapping->a_ops->invalidatepage;
48 #ifdef CONFIG_BLOCK
49 	if (!invalidatepage)
50 		invalidatepage = block_invalidatepage;
51 #endif
52 	if (invalidatepage)
53 		(*invalidatepage)(page, offset, length);
54 }
55 
truncate_partial_page(struct page * page,unsigned partial)56 static inline void truncate_partial_page(struct page *page, unsigned partial)
57 {
58 	zero_user_segment(page, partial, PAGE_CACHE_SIZE);
59 	cleancache_invalidate_page(page->mapping, page);
60 	if (page_has_private(page))
61 		do_invalidatepage(page, partial, PAGE_CACHE_SIZE - partial);
62 }
63 
64 /*
65  * This cancels just the dirty bit on the kernel page itself, it
66  * does NOT actually remove dirty bits on any mmap's that may be
67  * around. It also leaves the page tagged dirty, so any sync
68  * activity will still find it on the dirty lists, and in particular,
69  * clear_page_dirty_for_io() will still look at the dirty bits in
70  * the VM.
71  *
72  * Doing this should *normally* only ever be done when a page
73  * is truncated, and is not actually mapped anywhere at all. However,
74  * fs/buffer.c does this when it notices that somebody has cleaned
75  * out all the buffers on a page without actually doing it through
76  * the VM. Can you say "ext3 is horribly ugly"? Tought you could.
77  */
cancel_dirty_page(struct page * page,unsigned int account_size)78 void cancel_dirty_page(struct page *page, unsigned int account_size)
79 {
80 	if (TestClearPageDirty(page)) {
81 		struct address_space *mapping = page->mapping;
82 		if (mapping && mapping_cap_account_dirty(mapping)) {
83 			dec_zone_page_state(page, NR_FILE_DIRTY);
84 			dec_bdi_stat(mapping->backing_dev_info,
85 					BDI_RECLAIMABLE);
86 			if (account_size)
87 				task_io_account_cancelled_write(account_size);
88 		}
89 	}
90 }
91 EXPORT_SYMBOL(cancel_dirty_page);
92 
93 /*
94  * If truncate cannot remove the fs-private metadata from the page, the page
95  * becomes orphaned.  It will be left on the LRU and may even be mapped into
96  * user pagetables if we're racing with filemap_fault().
97  *
98  * We need to bale out if page->mapping is no longer equal to the original
99  * mapping.  This happens a) when the VM reclaimed the page while we waited on
100  * its lock, b) when a concurrent invalidate_mapping_pages got there first and
101  * c) when tmpfs swizzles a page between a tmpfs inode and swapper_space.
102  */
103 static int
truncate_complete_page(struct address_space * mapping,struct page * page)104 truncate_complete_page(struct address_space *mapping, struct page *page)
105 {
106 	if (page->mapping != mapping)
107 		return -EIO;
108 
109 	if (page_has_private(page))
110 		do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
111 
112 	cancel_dirty_page(page, PAGE_CACHE_SIZE);
113 
114 	ClearPageMappedToDisk(page);
115 	delete_from_page_cache(page);
116 	return 0;
117 }
118 
119 /*
120  * This is for invalidate_mapping_pages().  That function can be called at
121  * any time, and is not supposed to throw away dirty pages.  But pages can
122  * be marked dirty at any time too, so use remove_mapping which safely
123  * discards clean, unused pages.
124  *
125  * Returns non-zero if the page was successfully invalidated.
126  */
127 static int
invalidate_complete_page(struct address_space * mapping,struct page * page)128 invalidate_complete_page(struct address_space *mapping, struct page *page)
129 {
130 	int ret;
131 
132 	if (page->mapping != mapping)
133 		return 0;
134 
135 	if (page_has_private(page) && !try_to_release_page(page, 0))
136 		return 0;
137 
138 	ret = remove_mapping(mapping, page);
139 
140 	return ret;
141 }
142 
truncate_inode_page(struct address_space * mapping,struct page * page)143 int truncate_inode_page(struct address_space *mapping, struct page *page)
144 {
145 	if (page_mapped(page)) {
146 		unmap_mapping_range(mapping,
147 				   (loff_t)page->index << PAGE_CACHE_SHIFT,
148 				   PAGE_CACHE_SIZE, 0);
149 	}
150 	return truncate_complete_page(mapping, page);
151 }
152 
153 /*
154  * Used to get rid of pages on hardware memory corruption.
155  */
generic_error_remove_page(struct address_space * mapping,struct page * page)156 int generic_error_remove_page(struct address_space *mapping, struct page *page)
157 {
158 	if (!mapping)
159 		return -EINVAL;
160 	/*
161 	 * Only punch for normal data pages for now.
162 	 * Handling other types like directories would need more auditing.
163 	 */
164 	if (!S_ISREG(mapping->host->i_mode))
165 		return -EIO;
166 	return truncate_inode_page(mapping, page);
167 }
168 EXPORT_SYMBOL(generic_error_remove_page);
169 
170 /*
171  * Safely invalidate one page from its pagecache mapping.
172  * It only drops clean, unused pages. The page must be locked.
173  *
174  * Returns 1 if the page is successfully invalidated, otherwise 0.
175  */
invalidate_inode_page(struct page * page)176 int invalidate_inode_page(struct page *page)
177 {
178 	struct address_space *mapping = page_mapping(page);
179 	if (!mapping)
180 		return 0;
181 	if (PageDirty(page) || PageWriteback(page))
182 		return 0;
183 	if (page_mapped(page))
184 		return 0;
185 	return invalidate_complete_page(mapping, page);
186 }
187 
188 /**
189  * truncate_inode_pages_range - truncate range of pages specified by start & end byte offsets
190  * @mapping: mapping to truncate
191  * @lstart: offset from which to truncate
192  * @lend: offset to which to truncate
193  *
194  * Truncate the page cache, removing the pages that are between
195  * specified offsets (and zeroing out partial page
196  * (if lstart is not page aligned)).
197  *
198  * Truncate takes two passes - the first pass is nonblocking.  It will not
199  * block on page locks and it will not block on writeback.  The second pass
200  * will wait.  This is to prevent as much IO as possible in the affected region.
201  * The first pass will remove most pages, so the search cost of the second pass
202  * is low.
203  *
204  * We pass down the cache-hot hint to the page freeing code.  Even if the
205  * mapping is large, it is probably the case that the final pages are the most
206  * recently touched, and freeing happens in ascending file offset order.
207  */
truncate_inode_pages_range(struct address_space * mapping,loff_t lstart,loff_t lend)208 void truncate_inode_pages_range(struct address_space *mapping,
209 				loff_t lstart, loff_t lend)
210 {
211 	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
212 	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
213 	struct pagevec pvec;
214 	pgoff_t index;
215 	pgoff_t end;
216 	int i;
217 
218 	cleancache_invalidate_inode(mapping);
219 	if (mapping->nrpages == 0)
220 		return;
221 
222 	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
223 	end = (lend >> PAGE_CACHE_SHIFT);
224 
225 	pagevec_init(&pvec, 0);
226 	index = start;
227 	while (index <= end && pagevec_lookup(&pvec, mapping, index,
228 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
229 		mem_cgroup_uncharge_start();
230 		for (i = 0; i < pagevec_count(&pvec); i++) {
231 			struct page *page = pvec.pages[i];
232 
233 			/* We rely upon deletion not changing page->index */
234 			index = page->index;
235 			if (index > end)
236 				break;
237 
238 			if (!trylock_page(page))
239 				continue;
240 			WARN_ON(page->index != index);
241 			if (PageWriteback(page)) {
242 				unlock_page(page);
243 				continue;
244 			}
245 			truncate_inode_page(mapping, page);
246 			unlock_page(page);
247 		}
248 		pagevec_release(&pvec);
249 		mem_cgroup_uncharge_end();
250 		cond_resched();
251 		index++;
252 	}
253 
254 	if (partial) {
255 		struct page *page = find_lock_page(mapping, start - 1);
256 		if (page) {
257 			wait_on_page_writeback(page);
258 			truncate_partial_page(page, partial);
259 			unlock_page(page);
260 			page_cache_release(page);
261 		}
262 	}
263 
264 	index = start;
265 	for ( ; ; ) {
266 		cond_resched();
267 		if (!pagevec_lookup(&pvec, mapping, index,
268 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
269 			if (index == start)
270 				break;
271 			index = start;
272 			continue;
273 		}
274 		if (index == start && pvec.pages[0]->index > end) {
275 			pagevec_release(&pvec);
276 			break;
277 		}
278 		mem_cgroup_uncharge_start();
279 		for (i = 0; i < pagevec_count(&pvec); i++) {
280 			struct page *page = pvec.pages[i];
281 
282 			/* We rely upon deletion not changing page->index */
283 			index = page->index;
284 			if (index > end)
285 				break;
286 
287 			lock_page(page);
288 			WARN_ON(page->index != index);
289 			wait_on_page_writeback(page);
290 			truncate_inode_page(mapping, page);
291 			unlock_page(page);
292 		}
293 		pagevec_release(&pvec);
294 		mem_cgroup_uncharge_end();
295 		index++;
296 	}
297 	cleancache_invalidate_inode(mapping);
298 }
299 EXPORT_SYMBOL(truncate_inode_pages_range);
300 
301 /**
302  * truncate_inode_pages - truncate *all* the pages from an offset
303  * @mapping: mapping to truncate
304  * @lstart: offset from which to truncate
305  *
306  * Called under (and serialised by) inode->i_mutex.
307  *
308  * Note: When this function returns, there can be a page in the process of
309  * deletion (inside __delete_from_page_cache()) in the specified range.  Thus
310  * mapping->nrpages can be non-zero when this function returns even after
311  * truncation of the whole mapping.
312  */
truncate_inode_pages(struct address_space * mapping,loff_t lstart)313 void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
314 {
315 	truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
316 }
317 EXPORT_SYMBOL(truncate_inode_pages);
318 
319 /**
320  * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
321  * @mapping: the address_space which holds the pages to invalidate
322  * @start: the offset 'from' which to invalidate
323  * @end: the offset 'to' which to invalidate (inclusive)
324  *
325  * This function only removes the unlocked pages, if you want to
326  * remove all the pages of one inode, you must call truncate_inode_pages.
327  *
328  * invalidate_mapping_pages() will not block on IO activity. It will not
329  * invalidate pages which are dirty, locked, under writeback or mapped into
330  * pagetables.
331  */
invalidate_mapping_pages(struct address_space * mapping,pgoff_t start,pgoff_t end)332 unsigned long invalidate_mapping_pages(struct address_space *mapping,
333 		pgoff_t start, pgoff_t end)
334 {
335 	struct pagevec pvec;
336 	pgoff_t index = start;
337 	unsigned long ret;
338 	unsigned long count = 0;
339 	int i;
340 
341 	/*
342 	 * Note: this function may get called on a shmem/tmpfs mapping:
343 	 * pagevec_lookup() might then return 0 prematurely (because it
344 	 * got a gangful of swap entries); but it's hardly worth worrying
345 	 * about - it can rarely have anything to free from such a mapping
346 	 * (most pages are dirty), and already skips over any difficulties.
347 	 */
348 
349 	pagevec_init(&pvec, 0);
350 	while (index <= end && pagevec_lookup(&pvec, mapping, index,
351 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
352 		mem_cgroup_uncharge_start();
353 		for (i = 0; i < pagevec_count(&pvec); i++) {
354 			struct page *page = pvec.pages[i];
355 
356 			/* We rely upon deletion not changing page->index */
357 			index = page->index;
358 			if (index > end)
359 				break;
360 
361 			if (!trylock_page(page))
362 				continue;
363 			WARN_ON(page->index != index);
364 			ret = invalidate_inode_page(page);
365 			unlock_page(page);
366 			/*
367 			 * Invalidation is a hint that the page is no longer
368 			 * of interest and try to speed up its reclaim.
369 			 */
370 			if (!ret)
371 				deactivate_page(page);
372 			count += ret;
373 		}
374 		pagevec_release(&pvec);
375 		mem_cgroup_uncharge_end();
376 		cond_resched();
377 		index++;
378 	}
379 	return count;
380 }
381 EXPORT_SYMBOL(invalidate_mapping_pages);
382 
383 /*
384  * This is like invalidate_complete_page(), except it ignores the page's
385  * refcount.  We do this because invalidate_inode_pages2() needs stronger
386  * invalidation guarantees, and cannot afford to leave pages behind because
387  * shrink_page_list() has a temp ref on them, or because they're transiently
388  * sitting in the lru_cache_add() pagevecs.
389  */
390 static int
invalidate_complete_page2(struct address_space * mapping,struct page * page)391 invalidate_complete_page2(struct address_space *mapping, struct page *page)
392 {
393 	if (page->mapping != mapping)
394 		return 0;
395 
396 	if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
397 		return 0;
398 
399 	spin_lock_irq(&mapping->tree_lock);
400 	if (PageDirty(page))
401 		goto failed;
402 
403 	BUG_ON(page_has_private(page));
404 	__delete_from_page_cache(page);
405 	spin_unlock_irq(&mapping->tree_lock);
406 	mem_cgroup_uncharge_cache_page(page);
407 
408 	if (mapping->a_ops->freepage)
409 		mapping->a_ops->freepage(page);
410 
411 	page_cache_release(page);	/* pagecache ref */
412 	return 1;
413 failed:
414 	spin_unlock_irq(&mapping->tree_lock);
415 	return 0;
416 }
417 
do_launder_page(struct address_space * mapping,struct page * page)418 static int do_launder_page(struct address_space *mapping, struct page *page)
419 {
420 	if (!PageDirty(page))
421 		return 0;
422 	if (page->mapping != mapping || mapping->a_ops->launder_page == NULL)
423 		return 0;
424 	return mapping->a_ops->launder_page(page);
425 }
426 
427 /**
428  * invalidate_inode_pages2_range - remove range of pages from an address_space
429  * @mapping: the address_space
430  * @start: the page offset 'from' which to invalidate
431  * @end: the page offset 'to' which to invalidate (inclusive)
432  *
433  * Any pages which are found to be mapped into pagetables are unmapped prior to
434  * invalidation.
435  *
436  * Returns -EBUSY if any pages could not be invalidated.
437  */
invalidate_inode_pages2_range(struct address_space * mapping,pgoff_t start,pgoff_t end)438 int invalidate_inode_pages2_range(struct address_space *mapping,
439 				  pgoff_t start, pgoff_t end)
440 {
441 	struct pagevec pvec;
442 	pgoff_t index;
443 	int i;
444 	int ret = 0;
445 	int ret2 = 0;
446 	int did_range_unmap = 0;
447 
448 	cleancache_invalidate_inode(mapping);
449 	pagevec_init(&pvec, 0);
450 	index = start;
451 	while (index <= end && pagevec_lookup(&pvec, mapping, index,
452 			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) {
453 		mem_cgroup_uncharge_start();
454 		for (i = 0; i < pagevec_count(&pvec); i++) {
455 			struct page *page = pvec.pages[i];
456 
457 			/* We rely upon deletion not changing page->index */
458 			index = page->index;
459 			if (index > end)
460 				break;
461 
462 			lock_page(page);
463 			WARN_ON(page->index != index);
464 			if (page->mapping != mapping) {
465 				unlock_page(page);
466 				continue;
467 			}
468 			wait_on_page_writeback(page);
469 			if (page_mapped(page)) {
470 				if (!did_range_unmap) {
471 					/*
472 					 * Zap the rest of the file in one hit.
473 					 */
474 					unmap_mapping_range(mapping,
475 					   (loff_t)index << PAGE_CACHE_SHIFT,
476 					   (loff_t)(1 + end - index)
477 							 << PAGE_CACHE_SHIFT,
478 					    0);
479 					did_range_unmap = 1;
480 				} else {
481 					/*
482 					 * Just zap this page
483 					 */
484 					unmap_mapping_range(mapping,
485 					   (loff_t)index << PAGE_CACHE_SHIFT,
486 					   PAGE_CACHE_SIZE, 0);
487 				}
488 			}
489 			BUG_ON(page_mapped(page));
490 			ret2 = do_launder_page(mapping, page);
491 			if (ret2 == 0) {
492 				if (!invalidate_complete_page2(mapping, page))
493 					ret2 = -EBUSY;
494 			}
495 			if (ret2 < 0)
496 				ret = ret2;
497 			unlock_page(page);
498 		}
499 		pagevec_release(&pvec);
500 		mem_cgroup_uncharge_end();
501 		cond_resched();
502 		index++;
503 	}
504 	cleancache_invalidate_inode(mapping);
505 	return ret;
506 }
507 EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
508 
509 /**
510  * invalidate_inode_pages2 - remove all pages from an address_space
511  * @mapping: the address_space
512  *
513  * Any pages which are found to be mapped into pagetables are unmapped prior to
514  * invalidation.
515  *
516  * Returns -EBUSY if any pages could not be invalidated.
517  */
invalidate_inode_pages2(struct address_space * mapping)518 int invalidate_inode_pages2(struct address_space *mapping)
519 {
520 	return invalidate_inode_pages2_range(mapping, 0, -1);
521 }
522 EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
523 
524 /**
525  * truncate_pagecache - unmap and remove pagecache that has been truncated
526  * @inode: inode
527  * @oldsize: old file size
528  * @newsize: new file size
529  *
530  * inode's new i_size must already be written before truncate_pagecache
531  * is called.
532  *
533  * This function should typically be called before the filesystem
534  * releases resources associated with the freed range (eg. deallocates
535  * blocks). This way, pagecache will always stay logically coherent
536  * with on-disk format, and the filesystem would not have to deal with
537  * situations such as writepage being called for a page that has already
538  * had its underlying blocks deallocated.
539  */
truncate_pagecache(struct inode * inode,loff_t oldsize,loff_t newsize)540 void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize)
541 {
542 	struct address_space *mapping = inode->i_mapping;
543 	loff_t holebegin = round_up(newsize, PAGE_SIZE);
544 
545 	/*
546 	 * unmap_mapping_range is called twice, first simply for
547 	 * efficiency so that truncate_inode_pages does fewer
548 	 * single-page unmaps.  However after this first call, and
549 	 * before truncate_inode_pages finishes, it is possible for
550 	 * private pages to be COWed, which remain after
551 	 * truncate_inode_pages finishes, hence the second
552 	 * unmap_mapping_range call must be made for correctness.
553 	 */
554 	unmap_mapping_range(mapping, holebegin, 0, 1);
555 	truncate_inode_pages(mapping, newsize);
556 	unmap_mapping_range(mapping, holebegin, 0, 1);
557 }
558 EXPORT_SYMBOL(truncate_pagecache);
559 
560 /**
561  * truncate_setsize - update inode and pagecache for a new file size
562  * @inode: inode
563  * @newsize: new file size
564  *
565  * truncate_setsize updates i_size and performs pagecache truncation (if
566  * necessary) to @newsize. It will be typically be called from the filesystem's
567  * setattr function when ATTR_SIZE is passed in.
568  *
569  * Must be called with inode_mutex held and before all filesystem specific
570  * block truncation has been performed.
571  */
truncate_setsize(struct inode * inode,loff_t newsize)572 void truncate_setsize(struct inode *inode, loff_t newsize)
573 {
574 	loff_t oldsize = inode->i_size;
575 
576 	i_size_write(inode, newsize);
577 	if (newsize > oldsize)
578 		pagecache_isize_extended(inode, oldsize, newsize);
579 	truncate_pagecache(inode, oldsize, newsize);
580 }
581 EXPORT_SYMBOL(truncate_setsize);
582 
583 /**
584  * pagecache_isize_extended - update pagecache after extension of i_size
585  * @inode:	inode for which i_size was extended
586  * @from:	original inode size
587  * @to:		new inode size
588  *
589  * Handle extension of inode size either caused by extending truncate or by
590  * write starting after current i_size. We mark the page straddling current
591  * i_size RO so that page_mkwrite() is called on the nearest write access to
592  * the page.  This way filesystem can be sure that page_mkwrite() is called on
593  * the page before user writes to the page via mmap after the i_size has been
594  * changed.
595  *
596  * The function must be called after i_size is updated so that page fault
597  * coming after we unlock the page will already see the new i_size.
598  * The function must be called while we still hold i_mutex - this not only
599  * makes sure i_size is stable but also that userspace cannot observe new
600  * i_size value before we are prepared to store mmap writes at new inode size.
601  */
pagecache_isize_extended(struct inode * inode,loff_t from,loff_t to)602 void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
603 {
604 	int bsize = 1 << inode->i_blkbits;
605 	loff_t rounded_from;
606 	struct page *page;
607 	pgoff_t index;
608 
609 	WARN_ON(!mutex_is_locked(&inode->i_mutex));
610 	WARN_ON(to > inode->i_size);
611 
612 	if (from >= to || bsize == PAGE_CACHE_SIZE)
613 		return;
614 	/* Page straddling @from will not have any hole block created? */
615 	rounded_from = round_up(from, bsize);
616 	if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
617 		return;
618 
619 	index = from >> PAGE_CACHE_SHIFT;
620 	page = find_lock_page(inode->i_mapping, index);
621 	/* Page not cached? Nothing to do */
622 	if (!page)
623 		return;
624 	/*
625 	 * See clear_page_dirty_for_io() for details why set_page_dirty()
626 	 * is needed.
627 	 */
628 	if (page_mkclean(page))
629 		set_page_dirty(page);
630 	unlock_page(page);
631 	page_cache_release(page);
632 }
633 EXPORT_SYMBOL(pagecache_isize_extended);
634 
635 /**
636  * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
637  * @inode: inode
638  * @lstart: offset of beginning of hole
639  * @lend: offset of last byte of hole
640  *
641  * This function should typically be called before the filesystem
642  * releases resources associated with the freed range (eg. deallocates
643  * blocks). This way, pagecache will always stay logically coherent
644  * with on-disk format, and the filesystem would not have to deal with
645  * situations such as writepage being called for a page that has already
646  * had its underlying blocks deallocated.
647  */
truncate_pagecache_range(struct inode * inode,loff_t lstart,loff_t lend)648 void truncate_pagecache_range(struct inode *inode, loff_t lstart, loff_t lend)
649 {
650 	struct address_space *mapping = inode->i_mapping;
651 	loff_t unmap_start = round_up(lstart, PAGE_SIZE);
652 	loff_t unmap_end = round_down(1 + lend, PAGE_SIZE) - 1;
653 	/*
654 	 * This rounding is currently just for example: unmap_mapping_range
655 	 * expands its hole outwards, whereas we want it to contract the hole
656 	 * inwards.  However, existing callers of truncate_pagecache_range are
657 	 * doing their own page rounding first; and truncate_inode_pages_range
658 	 * currently BUGs if lend is not pagealigned-1 (it handles partial
659 	 * page at start of hole, but not partial page at end of hole).  Note
660 	 * unmap_mapping_range allows holelen 0 for all, and we allow lend -1.
661 	 */
662 
663 	/*
664 	 * Unlike in truncate_pagecache, unmap_mapping_range is called only
665 	 * once (before truncating pagecache), and without "even_cows" flag:
666 	 * hole-punching should not remove private COWed pages from the hole.
667 	 */
668 	if ((u64)unmap_end > (u64)unmap_start)
669 		unmap_mapping_range(mapping, unmap_start,
670 				    1 + unmap_end - unmap_start, 0);
671 	truncate_inode_pages_range(mapping, lstart, lend);
672 }
673 EXPORT_SYMBOL(truncate_pagecache_range);
674