• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /**
2   * compress.c - NTFS kernel compressed attributes handling.
3   *		Part of the Linux-NTFS project.
4   *
5   * Copyright (c) 2001-2004 Anton Altaparmakov
6   * Copyright (c) 2002 Richard Russon
7   *
8   * This program/include file is free software; you can redistribute it and/or
9   * modify it under the terms of the GNU General Public License as published
10   * by the Free Software Foundation; either version 2 of the License, or
11   * (at your option) any later version.
12   *
13   * This program/include file is distributed in the hope that it will be
14   * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
15   * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16   * GNU General Public License for more details.
17   *
18   * You should have received a copy of the GNU General Public License
19   * along with this program (in the main directory of the Linux-NTFS
20   * distribution in the file COPYING); if not, write to the Free Software
21   * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  
24  #include <linux/fs.h>
25  #include <linux/buffer_head.h>
26  #include <linux/blkdev.h>
27  #include <linux/vmalloc.h>
28  #include <linux/slab.h>
29  
30  #include "attrib.h"
31  #include "inode.h"
32  #include "debug.h"
33  #include "ntfs.h"
34  
35  /**
36   * ntfs_compression_constants - enum of constants used in the compression code
37   */
38  typedef enum {
39  	/* Token types and access mask. */
40  	NTFS_SYMBOL_TOKEN	=	0,
41  	NTFS_PHRASE_TOKEN	=	1,
42  	NTFS_TOKEN_MASK		=	1,
43  
44  	/* Compression sub-block constants. */
45  	NTFS_SB_SIZE_MASK	=	0x0fff,
46  	NTFS_SB_SIZE		=	0x1000,
47  	NTFS_SB_IS_COMPRESSED	=	0x8000,
48  
49  	/*
50  	 * The maximum compression block size is by definition 16 * the cluster
51  	 * size, with the maximum supported cluster size being 4kiB. Thus the
52  	 * maximum compression buffer size is 64kiB, so we use this when
53  	 * initializing the compression buffer.
54  	 */
55  	NTFS_MAX_CB_SIZE	= 64 * 1024,
56  } ntfs_compression_constants;
57  
58  /**
59   * ntfs_compression_buffer - one buffer for the decompression engine
60   */
61  static u8 *ntfs_compression_buffer = NULL;
62  
63  /**
64   * ntfs_cb_lock - spinlock which protects ntfs_compression_buffer
65   */
66  static DEFINE_SPINLOCK(ntfs_cb_lock);
67  
68  /**
69   * allocate_compression_buffers - allocate the decompression buffers
70   *
71   * Caller has to hold the ntfs_lock mutex.
72   *
73   * Return 0 on success or -ENOMEM if the allocations failed.
74   */
allocate_compression_buffers(void)75  int allocate_compression_buffers(void)
76  {
77  	BUG_ON(ntfs_compression_buffer);
78  
79  	ntfs_compression_buffer = vmalloc(NTFS_MAX_CB_SIZE);
80  	if (!ntfs_compression_buffer)
81  		return -ENOMEM;
82  	return 0;
83  }
84  
85  /**
86   * free_compression_buffers - free the decompression buffers
87   *
88   * Caller has to hold the ntfs_lock mutex.
89   */
free_compression_buffers(void)90  void free_compression_buffers(void)
91  {
92  	BUG_ON(!ntfs_compression_buffer);
93  	vfree(ntfs_compression_buffer);
94  	ntfs_compression_buffer = NULL;
95  }
96  
97  /**
98   * zero_partial_compressed_page - zero out of bounds compressed page region
99   */
zero_partial_compressed_page(struct page * page,const s64 initialized_size)100  static void zero_partial_compressed_page(struct page *page,
101  		const s64 initialized_size)
102  {
103  	u8 *kp = page_address(page);
104  	unsigned int kp_ofs;
105  
106  	ntfs_debug("Zeroing page region outside initialized size.");
107  	if (((s64)page->index << PAGE_CACHE_SHIFT) >= initialized_size) {
108  		/*
109  		 * FIXME: Using clear_page() will become wrong when we get
110  		 * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem.
111  		 */
112  		clear_page(kp);
113  		return;
114  	}
115  	kp_ofs = initialized_size & ~PAGE_CACHE_MASK;
116  	memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs);
117  	return;
118  }
119  
120  /**
121   * handle_bounds_compressed_page - test for&handle out of bounds compressed page
122   */
handle_bounds_compressed_page(struct page * page,const loff_t i_size,const s64 initialized_size)123  static inline void handle_bounds_compressed_page(struct page *page,
124  		const loff_t i_size, const s64 initialized_size)
125  {
126  	if ((page->index >= (initialized_size >> PAGE_CACHE_SHIFT)) &&
127  			(initialized_size < i_size))
128  		zero_partial_compressed_page(page, initialized_size);
129  	return;
130  }
131  
132  /**
133   * ntfs_decompress - decompress a compression block into an array of pages
134   * @dest_pages:		destination array of pages
135   * @dest_index:		current index into @dest_pages (IN/OUT)
136   * @dest_ofs:		current offset within @dest_pages[@dest_index] (IN/OUT)
137   * @dest_max_index:	maximum index into @dest_pages (IN)
138   * @dest_max_ofs:	maximum offset within @dest_pages[@dest_max_index] (IN)
139   * @xpage:		the target page (-1 if none) (IN)
140   * @xpage_done:		set to 1 if xpage was completed successfully (IN/OUT)
141   * @cb_start:		compression block to decompress (IN)
142   * @cb_size:		size of compression block @cb_start in bytes (IN)
143   * @i_size:		file size when we started the read (IN)
144   * @initialized_size:	initialized file size when we started the read (IN)
145   *
146   * The caller must have disabled preemption. ntfs_decompress() reenables it when
147   * the critical section is finished.
148   *
149   * This decompresses the compression block @cb_start into the array of
150   * destination pages @dest_pages starting at index @dest_index into @dest_pages
151   * and at offset @dest_pos into the page @dest_pages[@dest_index].
152   *
153   * When the page @dest_pages[@xpage] is completed, @xpage_done is set to 1.
154   * If xpage is -1 or @xpage has not been completed, @xpage_done is not modified.
155   *
156   * @cb_start is a pointer to the compression block which needs decompressing
157   * and @cb_size is the size of @cb_start in bytes (8-64kiB).
158   *
159   * Return 0 if success or -EOVERFLOW on error in the compressed stream.
160   * @xpage_done indicates whether the target page (@dest_pages[@xpage]) was
161   * completed during the decompression of the compression block (@cb_start).
162   *
163   * Warning: This function *REQUIRES* PAGE_CACHE_SIZE >= 4096 or it will blow up
164   * unpredicatbly! You have been warned!
165   *
166   * Note to hackers: This function may not sleep until it has finished accessing
167   * the compression block @cb_start as it is a per-CPU buffer.
168   */
ntfs_decompress(struct page * dest_pages[],int * dest_index,int * dest_ofs,const int dest_max_index,const int dest_max_ofs,const int xpage,char * xpage_done,u8 * const cb_start,const u32 cb_size,const loff_t i_size,const s64 initialized_size)169  static int ntfs_decompress(struct page *dest_pages[], int *dest_index,
170  		int *dest_ofs, const int dest_max_index, const int dest_max_ofs,
171  		const int xpage, char *xpage_done, u8 *const cb_start,
172  		const u32 cb_size, const loff_t i_size,
173  		const s64 initialized_size)
174  {
175  	/*
176  	 * Pointers into the compressed data, i.e. the compression block (cb),
177  	 * and the therein contained sub-blocks (sb).
178  	 */
179  	u8 *cb_end = cb_start + cb_size; /* End of cb. */
180  	u8 *cb = cb_start;	/* Current position in cb. */
181  	u8 *cb_sb_start = cb;	/* Beginning of the current sb in the cb. */
182  	u8 *cb_sb_end;		/* End of current sb / beginning of next sb. */
183  
184  	/* Variables for uncompressed data / destination. */
185  	struct page *dp;	/* Current destination page being worked on. */
186  	u8 *dp_addr;		/* Current pointer into dp. */
187  	u8 *dp_sb_start;	/* Start of current sub-block in dp. */
188  	u8 *dp_sb_end;		/* End of current sb in dp (dp_sb_start +
189  				   NTFS_SB_SIZE). */
190  	u16 do_sb_start;	/* @dest_ofs when starting this sub-block. */
191  	u16 do_sb_end;		/* @dest_ofs of end of this sb (do_sb_start +
192  				   NTFS_SB_SIZE). */
193  
194  	/* Variables for tag and token parsing. */
195  	u8 tag;			/* Current tag. */
196  	int token;		/* Loop counter for the eight tokens in tag. */
197  
198  	/* Need this because we can't sleep, so need two stages. */
199  	int completed_pages[dest_max_index - *dest_index + 1];
200  	int nr_completed_pages = 0;
201  
202  	/* Default error code. */
203  	int err = -EOVERFLOW;
204  
205  	ntfs_debug("Entering, cb_size = 0x%x.", cb_size);
206  do_next_sb:
207  	ntfs_debug("Beginning sub-block at offset = 0x%zx in the cb.",
208  			cb - cb_start);
209  	/*
210  	 * Have we reached the end of the compression block or the end of the
211  	 * decompressed data?  The latter can happen for example if the current
212  	 * position in the compression block is one byte before its end so the
213  	 * first two checks do not detect it.
214  	 */
215  	if (cb == cb_end || !le16_to_cpup((le16*)cb) ||
216  			(*dest_index == dest_max_index &&
217  			*dest_ofs == dest_max_ofs)) {
218  		int i;
219  
220  		ntfs_debug("Completed. Returning success (0).");
221  		err = 0;
222  return_error:
223  		/* We can sleep from now on, so we drop lock. */
224  		spin_unlock(&ntfs_cb_lock);
225  		/* Second stage: finalize completed pages. */
226  		if (nr_completed_pages > 0) {
227  			for (i = 0; i < nr_completed_pages; i++) {
228  				int di = completed_pages[i];
229  
230  				dp = dest_pages[di];
231  				/*
232  				 * If we are outside the initialized size, zero
233  				 * the out of bounds page range.
234  				 */
235  				handle_bounds_compressed_page(dp, i_size,
236  						initialized_size);
237  				flush_dcache_page(dp);
238  				kunmap(dp);
239  				SetPageUptodate(dp);
240  				unlock_page(dp);
241  				if (di == xpage)
242  					*xpage_done = 1;
243  				else
244  					page_cache_release(dp);
245  				dest_pages[di] = NULL;
246  			}
247  		}
248  		return err;
249  	}
250  
251  	/* Setup offsets for the current sub-block destination. */
252  	do_sb_start = *dest_ofs;
253  	do_sb_end = do_sb_start + NTFS_SB_SIZE;
254  
255  	/* Check that we are still within allowed boundaries. */
256  	if (*dest_index == dest_max_index && do_sb_end > dest_max_ofs)
257  		goto return_overflow;
258  
259  	/* Does the minimum size of a compressed sb overflow valid range? */
260  	if (cb + 6 > cb_end)
261  		goto return_overflow;
262  
263  	/* Setup the current sub-block source pointers and validate range. */
264  	cb_sb_start = cb;
265  	cb_sb_end = cb_sb_start + (le16_to_cpup((le16*)cb) & NTFS_SB_SIZE_MASK)
266  			+ 3;
267  	if (cb_sb_end > cb_end)
268  		goto return_overflow;
269  
270  	/* Get the current destination page. */
271  	dp = dest_pages[*dest_index];
272  	if (!dp) {
273  		/* No page present. Skip decompression of this sub-block. */
274  		cb = cb_sb_end;
275  
276  		/* Advance destination position to next sub-block. */
277  		*dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_CACHE_MASK;
278  		if (!*dest_ofs && (++*dest_index > dest_max_index))
279  			goto return_overflow;
280  		goto do_next_sb;
281  	}
282  
283  	/* We have a valid destination page. Setup the destination pointers. */
284  	dp_addr = (u8*)page_address(dp) + do_sb_start;
285  
286  	/* Now, we are ready to process the current sub-block (sb). */
287  	if (!(le16_to_cpup((le16*)cb) & NTFS_SB_IS_COMPRESSED)) {
288  		ntfs_debug("Found uncompressed sub-block.");
289  		/* This sb is not compressed, just copy it into destination. */
290  
291  		/* Advance source position to first data byte. */
292  		cb += 2;
293  
294  		/* An uncompressed sb must be full size. */
295  		if (cb_sb_end - cb != NTFS_SB_SIZE)
296  			goto return_overflow;
297  
298  		/* Copy the block and advance the source position. */
299  		memcpy(dp_addr, cb, NTFS_SB_SIZE);
300  		cb += NTFS_SB_SIZE;
301  
302  		/* Advance destination position to next sub-block. */
303  		*dest_ofs += NTFS_SB_SIZE;
304  		if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) {
305  finalize_page:
306  			/*
307  			 * First stage: add current page index to array of
308  			 * completed pages.
309  			 */
310  			completed_pages[nr_completed_pages++] = *dest_index;
311  			if (++*dest_index > dest_max_index)
312  				goto return_overflow;
313  		}
314  		goto do_next_sb;
315  	}
316  	ntfs_debug("Found compressed sub-block.");
317  	/* This sb is compressed, decompress it into destination. */
318  
319  	/* Setup destination pointers. */
320  	dp_sb_start = dp_addr;
321  	dp_sb_end = dp_sb_start + NTFS_SB_SIZE;
322  
323  	/* Forward to the first tag in the sub-block. */
324  	cb += 2;
325  do_next_tag:
326  	if (cb == cb_sb_end) {
327  		/* Check if the decompressed sub-block was not full-length. */
328  		if (dp_addr < dp_sb_end) {
329  			int nr_bytes = do_sb_end - *dest_ofs;
330  
331  			ntfs_debug("Filling incomplete sub-block with "
332  					"zeroes.");
333  			/* Zero remainder and update destination position. */
334  			memset(dp_addr, 0, nr_bytes);
335  			*dest_ofs += nr_bytes;
336  		}
337  		/* We have finished the current sub-block. */
338  		if (!(*dest_ofs &= ~PAGE_CACHE_MASK))
339  			goto finalize_page;
340  		goto do_next_sb;
341  	}
342  
343  	/* Check we are still in range. */
344  	if (cb > cb_sb_end || dp_addr > dp_sb_end)
345  		goto return_overflow;
346  
347  	/* Get the next tag and advance to first token. */
348  	tag = *cb++;
349  
350  	/* Parse the eight tokens described by the tag. */
351  	for (token = 0; token < 8; token++, tag >>= 1) {
352  		u16 lg, pt, length, max_non_overlap;
353  		register u16 i;
354  		u8 *dp_back_addr;
355  
356  		/* Check if we are done / still in range. */
357  		if (cb >= cb_sb_end || dp_addr > dp_sb_end)
358  			break;
359  
360  		/* Determine token type and parse appropriately.*/
361  		if ((tag & NTFS_TOKEN_MASK) == NTFS_SYMBOL_TOKEN) {
362  			/*
363  			 * We have a symbol token, copy the symbol across, and
364  			 * advance the source and destination positions.
365  			 */
366  			*dp_addr++ = *cb++;
367  			++*dest_ofs;
368  
369  			/* Continue with the next token. */
370  			continue;
371  		}
372  
373  		/*
374  		 * We have a phrase token. Make sure it is not the first tag in
375  		 * the sb as this is illegal and would confuse the code below.
376  		 */
377  		if (dp_addr == dp_sb_start)
378  			goto return_overflow;
379  
380  		/*
381  		 * Determine the number of bytes to go back (p) and the number
382  		 * of bytes to copy (l). We use an optimized algorithm in which
383  		 * we first calculate log2(current destination position in sb),
384  		 * which allows determination of l and p in O(1) rather than
385  		 * O(n). We just need an arch-optimized log2() function now.
386  		 */
387  		lg = 0;
388  		for (i = *dest_ofs - do_sb_start - 1; i >= 0x10; i >>= 1)
389  			lg++;
390  
391  		/* Get the phrase token into i. */
392  		pt = le16_to_cpup((le16*)cb);
393  
394  		/*
395  		 * Calculate starting position of the byte sequence in
396  		 * the destination using the fact that p = (pt >> (12 - lg)) + 1
397  		 * and make sure we don't go too far back.
398  		 */
399  		dp_back_addr = dp_addr - (pt >> (12 - lg)) - 1;
400  		if (dp_back_addr < dp_sb_start)
401  			goto return_overflow;
402  
403  		/* Now calculate the length of the byte sequence. */
404  		length = (pt & (0xfff >> lg)) + 3;
405  
406  		/* Advance destination position and verify it is in range. */
407  		*dest_ofs += length;
408  		if (*dest_ofs > do_sb_end)
409  			goto return_overflow;
410  
411  		/* The number of non-overlapping bytes. */
412  		max_non_overlap = dp_addr - dp_back_addr;
413  
414  		if (length <= max_non_overlap) {
415  			/* The byte sequence doesn't overlap, just copy it. */
416  			memcpy(dp_addr, dp_back_addr, length);
417  
418  			/* Advance destination pointer. */
419  			dp_addr += length;
420  		} else {
421  			/*
422  			 * The byte sequence does overlap, copy non-overlapping
423  			 * part and then do a slow byte by byte copy for the
424  			 * overlapping part. Also, advance the destination
425  			 * pointer.
426  			 */
427  			memcpy(dp_addr, dp_back_addr, max_non_overlap);
428  			dp_addr += max_non_overlap;
429  			dp_back_addr += max_non_overlap;
430  			length -= max_non_overlap;
431  			while (length--)
432  				*dp_addr++ = *dp_back_addr++;
433  		}
434  
435  		/* Advance source position and continue with the next token. */
436  		cb += 2;
437  	}
438  
439  	/* No tokens left in the current tag. Continue with the next tag. */
440  	goto do_next_tag;
441  
442  return_overflow:
443  	ntfs_error(NULL, "Failed. Returning -EOVERFLOW.");
444  	goto return_error;
445  }
446  
447  /**
448   * ntfs_read_compressed_block - read a compressed block into the page cache
449   * @page:	locked page in the compression block(s) we need to read
450   *
451   * When we are called the page has already been verified to be locked and the
452   * attribute is known to be non-resident, not encrypted, but compressed.
453   *
454   * 1. Determine which compression block(s) @page is in.
455   * 2. Get hold of all pages corresponding to this/these compression block(s).
456   * 3. Read the (first) compression block.
457   * 4. Decompress it into the corresponding pages.
458   * 5. Throw the compressed data away and proceed to 3. for the next compression
459   *    block or return success if no more compression blocks left.
460   *
461   * Warning: We have to be careful what we do about existing pages. They might
462   * have been written to so that we would lose data if we were to just overwrite
463   * them with the out-of-date uncompressed data.
464   *
465   * FIXME: For PAGE_CACHE_SIZE > cb_size we are not doing the Right Thing(TM) at
466   * the end of the file I think. We need to detect this case and zero the out
467   * of bounds remainder of the page in question and mark it as handled. At the
468   * moment we would just return -EIO on such a page. This bug will only become
469   * apparent if pages are above 8kiB and the NTFS volume only uses 512 byte
470   * clusters so is probably not going to be seen by anyone. Still this should
471   * be fixed. (AIA)
472   *
473   * FIXME: Again for PAGE_CACHE_SIZE > cb_size we are screwing up both in
474   * handling sparse and compressed cbs. (AIA)
475   *
476   * FIXME: At the moment we don't do any zeroing out in the case that
477   * initialized_size is less than data_size. This should be safe because of the
478   * nature of the compression algorithm used. Just in case we check and output
479   * an error message in read inode if the two sizes are not equal for a
480   * compressed file. (AIA)
481   */
ntfs_read_compressed_block(struct page * page)482  int ntfs_read_compressed_block(struct page *page)
483  {
484  	loff_t i_size;
485  	s64 initialized_size;
486  	struct address_space *mapping = page->mapping;
487  	ntfs_inode *ni = NTFS_I(mapping->host);
488  	ntfs_volume *vol = ni->vol;
489  	struct super_block *sb = vol->sb;
490  	runlist_element *rl;
491  	unsigned long flags, block_size = sb->s_blocksize;
492  	unsigned char block_size_bits = sb->s_blocksize_bits;
493  	u8 *cb, *cb_pos, *cb_end;
494  	struct buffer_head **bhs;
495  	unsigned long offset, index = page->index;
496  	u32 cb_size = ni->itype.compressed.block_size;
497  	u64 cb_size_mask = cb_size - 1UL;
498  	VCN vcn;
499  	LCN lcn;
500  	/* The first wanted vcn (minimum alignment is PAGE_CACHE_SIZE). */
501  	VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >>
502  			vol->cluster_size_bits;
503  	/*
504  	 * The first vcn after the last wanted vcn (minimum alignment is again
505  	 * PAGE_CACHE_SIZE.
506  	 */
507  	VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1)
508  			& ~cb_size_mask) >> vol->cluster_size_bits;
509  	/* Number of compression blocks (cbs) in the wanted vcn range. */
510  	unsigned int nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits
511  			>> ni->itype.compressed.block_size_bits;
512  	/*
513  	 * Number of pages required to store the uncompressed data from all
514  	 * compression blocks (cbs) overlapping @page. Due to alignment
515  	 * guarantees of start_vcn and end_vcn, no need to round up here.
516  	 */
517  	unsigned int nr_pages = (end_vcn - start_vcn) <<
518  			vol->cluster_size_bits >> PAGE_CACHE_SHIFT;
519  	unsigned int xpage, max_page, cur_page, cur_ofs, i;
520  	unsigned int cb_clusters, cb_max_ofs;
521  	int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0;
522  	struct page **pages;
523  	unsigned char xpage_done = 0;
524  
525  	ntfs_debug("Entering, page->index = 0x%lx, cb_size = 0x%x, nr_pages = "
526  			"%i.", index, cb_size, nr_pages);
527  	/*
528  	 * Bad things happen if we get here for anything that is not an
529  	 * unnamed $DATA attribute.
530  	 */
531  	BUG_ON(ni->type != AT_DATA);
532  	BUG_ON(ni->name_len);
533  
534  	pages = kmalloc(nr_pages * sizeof(struct page *), GFP_NOFS);
535  
536  	/* Allocate memory to store the buffer heads we need. */
537  	bhs_size = cb_size / block_size * sizeof(struct buffer_head *);
538  	bhs = kmalloc(bhs_size, GFP_NOFS);
539  
540  	if (unlikely(!pages || !bhs)) {
541  		kfree(bhs);
542  		kfree(pages);
543  		unlock_page(page);
544  		ntfs_error(vol->sb, "Failed to allocate internal buffers.");
545  		return -ENOMEM;
546  	}
547  
548  	/*
549  	 * We have already been given one page, this is the one we must do.
550  	 * Once again, the alignment guarantees keep it simple.
551  	 */
552  	offset = start_vcn << vol->cluster_size_bits >> PAGE_CACHE_SHIFT;
553  	xpage = index - offset;
554  	pages[xpage] = page;
555  	/*
556  	 * The remaining pages need to be allocated and inserted into the page
557  	 * cache, alignment guarantees keep all the below much simpler. (-8
558  	 */
559  	read_lock_irqsave(&ni->size_lock, flags);
560  	i_size = i_size_read(VFS_I(ni));
561  	initialized_size = ni->initialized_size;
562  	read_unlock_irqrestore(&ni->size_lock, flags);
563  	max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
564  			offset;
565  	/* Is the page fully outside i_size? (truncate in progress) */
566  	if (xpage >= max_page) {
567  		kfree(bhs);
568  		kfree(pages);
569  		zero_user(page, 0, PAGE_CACHE_SIZE);
570  		ntfs_debug("Compressed read outside i_size - truncated?");
571  		SetPageUptodate(page);
572  		unlock_page(page);
573  		return 0;
574  	}
575  	if (nr_pages < max_page)
576  		max_page = nr_pages;
577  	for (i = 0; i < max_page; i++, offset++) {
578  		if (i != xpage)
579  			pages[i] = grab_cache_page_nowait(mapping, offset);
580  		page = pages[i];
581  		if (page) {
582  			/*
583  			 * We only (re)read the page if it isn't already read
584  			 * in and/or dirty or we would be losing data or at
585  			 * least wasting our time.
586  			 */
587  			if (!PageDirty(page) && (!PageUptodate(page) ||
588  					PageError(page))) {
589  				ClearPageError(page);
590  				kmap(page);
591  				continue;
592  			}
593  			unlock_page(page);
594  			page_cache_release(page);
595  			pages[i] = NULL;
596  		}
597  	}
598  
599  	/*
600  	 * We have the runlist, and all the destination pages we need to fill.
601  	 * Now read the first compression block.
602  	 */
603  	cur_page = 0;
604  	cur_ofs = 0;
605  	cb_clusters = ni->itype.compressed.block_clusters;
606  do_next_cb:
607  	nr_cbs--;
608  	nr_bhs = 0;
609  
610  	/* Read all cb buffer heads one cluster at a time. */
611  	rl = NULL;
612  	for (vcn = start_vcn, start_vcn += cb_clusters; vcn < start_vcn;
613  			vcn++) {
614  		bool is_retry = false;
615  
616  		if (!rl) {
617  lock_retry_remap:
618  			down_read(&ni->runlist.lock);
619  			rl = ni->runlist.rl;
620  		}
621  		if (likely(rl != NULL)) {
622  			/* Seek to element containing target vcn. */
623  			while (rl->length && rl[1].vcn <= vcn)
624  				rl++;
625  			lcn = ntfs_rl_vcn_to_lcn(rl, vcn);
626  		} else
627  			lcn = LCN_RL_NOT_MAPPED;
628  		ntfs_debug("Reading vcn = 0x%llx, lcn = 0x%llx.",
629  				(unsigned long long)vcn,
630  				(unsigned long long)lcn);
631  		if (lcn < 0) {
632  			/*
633  			 * When we reach the first sparse cluster we have
634  			 * finished with the cb.
635  			 */
636  			if (lcn == LCN_HOLE)
637  				break;
638  			if (is_retry || lcn != LCN_RL_NOT_MAPPED)
639  				goto rl_err;
640  			is_retry = true;
641  			/*
642  			 * Attempt to map runlist, dropping lock for the
643  			 * duration.
644  			 */
645  			up_read(&ni->runlist.lock);
646  			if (!ntfs_map_runlist(ni, vcn))
647  				goto lock_retry_remap;
648  			goto map_rl_err;
649  		}
650  		block = lcn << vol->cluster_size_bits >> block_size_bits;
651  		/* Read the lcn from device in chunks of block_size bytes. */
652  		max_block = block + (vol->cluster_size >> block_size_bits);
653  		do {
654  			ntfs_debug("block = 0x%x.", block);
655  			if (unlikely(!(bhs[nr_bhs] = sb_getblk(sb, block))))
656  				goto getblk_err;
657  			nr_bhs++;
658  		} while (++block < max_block);
659  	}
660  
661  	/* Release the lock if we took it. */
662  	if (rl)
663  		up_read(&ni->runlist.lock);
664  
665  	/* Setup and initiate io on all buffer heads. */
666  	for (i = 0; i < nr_bhs; i++) {
667  		struct buffer_head *tbh = bhs[i];
668  
669  		if (!trylock_buffer(tbh))
670  			continue;
671  		if (unlikely(buffer_uptodate(tbh))) {
672  			unlock_buffer(tbh);
673  			continue;
674  		}
675  		get_bh(tbh);
676  		tbh->b_end_io = end_buffer_read_sync;
677  		submit_bh(READ, tbh);
678  	}
679  
680  	/* Wait for io completion on all buffer heads. */
681  	for (i = 0; i < nr_bhs; i++) {
682  		struct buffer_head *tbh = bhs[i];
683  
684  		if (buffer_uptodate(tbh))
685  			continue;
686  		wait_on_buffer(tbh);
687  		/*
688  		 * We need an optimization barrier here, otherwise we start
689  		 * hitting the below fixup code when accessing a loopback
690  		 * mounted ntfs partition. This indicates either there is a
691  		 * race condition in the loop driver or, more likely, gcc
692  		 * overoptimises the code without the barrier and it doesn't
693  		 * do the Right Thing(TM).
694  		 */
695  		barrier();
696  		if (unlikely(!buffer_uptodate(tbh))) {
697  			ntfs_warning(vol->sb, "Buffer is unlocked but not "
698  					"uptodate! Unplugging the disk queue "
699  					"and rescheduling.");
700  			get_bh(tbh);
701  			io_schedule();
702  			put_bh(tbh);
703  			if (unlikely(!buffer_uptodate(tbh)))
704  				goto read_err;
705  			ntfs_warning(vol->sb, "Buffer is now uptodate. Good.");
706  		}
707  	}
708  
709  	/*
710  	 * Get the compression buffer. We must not sleep any more
711  	 * until we are finished with it.
712  	 */
713  	spin_lock(&ntfs_cb_lock);
714  	cb = ntfs_compression_buffer;
715  
716  	BUG_ON(!cb);
717  
718  	cb_pos = cb;
719  	cb_end = cb + cb_size;
720  
721  	/* Copy the buffer heads into the contiguous buffer. */
722  	for (i = 0; i < nr_bhs; i++) {
723  		memcpy(cb_pos, bhs[i]->b_data, block_size);
724  		cb_pos += block_size;
725  	}
726  
727  	/* Just a precaution. */
728  	if (cb_pos + 2 <= cb + cb_size)
729  		*(u16*)cb_pos = 0;
730  
731  	/* Reset cb_pos back to the beginning. */
732  	cb_pos = cb;
733  
734  	/* We now have both source (if present) and destination. */
735  	ntfs_debug("Successfully read the compression block.");
736  
737  	/* The last page and maximum offset within it for the current cb. */
738  	cb_max_page = (cur_page << PAGE_CACHE_SHIFT) + cur_ofs + cb_size;
739  	cb_max_ofs = cb_max_page & ~PAGE_CACHE_MASK;
740  	cb_max_page >>= PAGE_CACHE_SHIFT;
741  
742  	/* Catch end of file inside a compression block. */
743  	if (cb_max_page > max_page)
744  		cb_max_page = max_page;
745  
746  	if (vcn == start_vcn - cb_clusters) {
747  		/* Sparse cb, zero out page range overlapping the cb. */
748  		ntfs_debug("Found sparse compression block.");
749  		/* We can sleep from now on, so we drop lock. */
750  		spin_unlock(&ntfs_cb_lock);
751  		if (cb_max_ofs)
752  			cb_max_page--;
753  		for (; cur_page < cb_max_page; cur_page++) {
754  			page = pages[cur_page];
755  			if (page) {
756  				/*
757  				 * FIXME: Using clear_page() will become wrong
758  				 * when we get PAGE_CACHE_SIZE != PAGE_SIZE but
759  				 * for now there is no problem.
760  				 */
761  				if (likely(!cur_ofs))
762  					clear_page(page_address(page));
763  				else
764  					memset(page_address(page) + cur_ofs, 0,
765  							PAGE_CACHE_SIZE -
766  							cur_ofs);
767  				flush_dcache_page(page);
768  				kunmap(page);
769  				SetPageUptodate(page);
770  				unlock_page(page);
771  				if (cur_page == xpage)
772  					xpage_done = 1;
773  				else
774  					page_cache_release(page);
775  				pages[cur_page] = NULL;
776  			}
777  			cb_pos += PAGE_CACHE_SIZE - cur_ofs;
778  			cur_ofs = 0;
779  			if (cb_pos >= cb_end)
780  				break;
781  		}
782  		/* If we have a partial final page, deal with it now. */
783  		if (cb_max_ofs && cb_pos < cb_end) {
784  			page = pages[cur_page];
785  			if (page)
786  				memset(page_address(page) + cur_ofs, 0,
787  						cb_max_ofs - cur_ofs);
788  			/*
789  			 * No need to update cb_pos at this stage:
790  			 *	cb_pos += cb_max_ofs - cur_ofs;
791  			 */
792  			cur_ofs = cb_max_ofs;
793  		}
794  	} else if (vcn == start_vcn) {
795  		/* We can't sleep so we need two stages. */
796  		unsigned int cur2_page = cur_page;
797  		unsigned int cur_ofs2 = cur_ofs;
798  		u8 *cb_pos2 = cb_pos;
799  
800  		ntfs_debug("Found uncompressed compression block.");
801  		/* Uncompressed cb, copy it to the destination pages. */
802  		/*
803  		 * TODO: As a big optimization, we could detect this case
804  		 * before we read all the pages and use block_read_full_page()
805  		 * on all full pages instead (we still have to treat partial
806  		 * pages especially but at least we are getting rid of the
807  		 * synchronous io for the majority of pages.
808  		 * Or if we choose not to do the read-ahead/-behind stuff, we
809  		 * could just return block_read_full_page(pages[xpage]) as long
810  		 * as PAGE_CACHE_SIZE <= cb_size.
811  		 */
812  		if (cb_max_ofs)
813  			cb_max_page--;
814  		/* First stage: copy data into destination pages. */
815  		for (; cur_page < cb_max_page; cur_page++) {
816  			page = pages[cur_page];
817  			if (page)
818  				memcpy(page_address(page) + cur_ofs, cb_pos,
819  						PAGE_CACHE_SIZE - cur_ofs);
820  			cb_pos += PAGE_CACHE_SIZE - cur_ofs;
821  			cur_ofs = 0;
822  			if (cb_pos >= cb_end)
823  				break;
824  		}
825  		/* If we have a partial final page, deal with it now. */
826  		if (cb_max_ofs && cb_pos < cb_end) {
827  			page = pages[cur_page];
828  			if (page)
829  				memcpy(page_address(page) + cur_ofs, cb_pos,
830  						cb_max_ofs - cur_ofs);
831  			cb_pos += cb_max_ofs - cur_ofs;
832  			cur_ofs = cb_max_ofs;
833  		}
834  		/* We can sleep from now on, so drop lock. */
835  		spin_unlock(&ntfs_cb_lock);
836  		/* Second stage: finalize pages. */
837  		for (; cur2_page < cb_max_page; cur2_page++) {
838  			page = pages[cur2_page];
839  			if (page) {
840  				/*
841  				 * If we are outside the initialized size, zero
842  				 * the out of bounds page range.
843  				 */
844  				handle_bounds_compressed_page(page, i_size,
845  						initialized_size);
846  				flush_dcache_page(page);
847  				kunmap(page);
848  				SetPageUptodate(page);
849  				unlock_page(page);
850  				if (cur2_page == xpage)
851  					xpage_done = 1;
852  				else
853  					page_cache_release(page);
854  				pages[cur2_page] = NULL;
855  			}
856  			cb_pos2 += PAGE_CACHE_SIZE - cur_ofs2;
857  			cur_ofs2 = 0;
858  			if (cb_pos2 >= cb_end)
859  				break;
860  		}
861  	} else {
862  		/* Compressed cb, decompress it into the destination page(s). */
863  		unsigned int prev_cur_page = cur_page;
864  
865  		ntfs_debug("Found compressed compression block.");
866  		err = ntfs_decompress(pages, &cur_page, &cur_ofs,
867  				cb_max_page, cb_max_ofs, xpage, &xpage_done,
868  				cb_pos,	cb_size - (cb_pos - cb), i_size,
869  				initialized_size);
870  		/*
871  		 * We can sleep from now on, lock already dropped by
872  		 * ntfs_decompress().
873  		 */
874  		if (err) {
875  			ntfs_error(vol->sb, "ntfs_decompress() failed in inode "
876  					"0x%lx with error code %i. Skipping "
877  					"this compression block.",
878  					ni->mft_no, -err);
879  			/* Release the unfinished pages. */
880  			for (; prev_cur_page < cur_page; prev_cur_page++) {
881  				page = pages[prev_cur_page];
882  				if (page) {
883  					flush_dcache_page(page);
884  					kunmap(page);
885  					unlock_page(page);
886  					if (prev_cur_page != xpage)
887  						page_cache_release(page);
888  					pages[prev_cur_page] = NULL;
889  				}
890  			}
891  		}
892  	}
893  
894  	/* Release the buffer heads. */
895  	for (i = 0; i < nr_bhs; i++)
896  		brelse(bhs[i]);
897  
898  	/* Do we have more work to do? */
899  	if (nr_cbs)
900  		goto do_next_cb;
901  
902  	/* We no longer need the list of buffer heads. */
903  	kfree(bhs);
904  
905  	/* Clean up if we have any pages left. Should never happen. */
906  	for (cur_page = 0; cur_page < max_page; cur_page++) {
907  		page = pages[cur_page];
908  		if (page) {
909  			ntfs_error(vol->sb, "Still have pages left! "
910  					"Terminating them with extreme "
911  					"prejudice.  Inode 0x%lx, page index "
912  					"0x%lx.", ni->mft_no, page->index);
913  			flush_dcache_page(page);
914  			kunmap(page);
915  			unlock_page(page);
916  			if (cur_page != xpage)
917  				page_cache_release(page);
918  			pages[cur_page] = NULL;
919  		}
920  	}
921  
922  	/* We no longer need the list of pages. */
923  	kfree(pages);
924  
925  	/* If we have completed the requested page, we return success. */
926  	if (likely(xpage_done))
927  		return 0;
928  
929  	ntfs_debug("Failed. Returning error code %s.", err == -EOVERFLOW ?
930  			"EOVERFLOW" : (!err ? "EIO" : "unknown error"));
931  	return err < 0 ? err : -EIO;
932  
933  read_err:
934  	ntfs_error(vol->sb, "IO error while reading compressed data.");
935  	/* Release the buffer heads. */
936  	for (i = 0; i < nr_bhs; i++)
937  		brelse(bhs[i]);
938  	goto err_out;
939  
940  map_rl_err:
941  	ntfs_error(vol->sb, "ntfs_map_runlist() failed. Cannot read "
942  			"compression block.");
943  	goto err_out;
944  
945  rl_err:
946  	up_read(&ni->runlist.lock);
947  	ntfs_error(vol->sb, "ntfs_rl_vcn_to_lcn() failed. Cannot read "
948  			"compression block.");
949  	goto err_out;
950  
951  getblk_err:
952  	up_read(&ni->runlist.lock);
953  	ntfs_error(vol->sb, "getblk() failed. Cannot read compression block.");
954  
955  err_out:
956  	kfree(bhs);
957  	for (i = cur_page; i < max_page; i++) {
958  		page = pages[i];
959  		if (page) {
960  			flush_dcache_page(page);
961  			kunmap(page);
962  			unlock_page(page);
963  			if (i != xpage)
964  				page_cache_release(page);
965  		}
966  	}
967  	kfree(pages);
968  	return -EIO;
969  }
970