• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1  /**
2   * dir.c - NTFS kernel directory operations. Part of the Linux-NTFS project.
3   *
4   * Copyright (c) 2001-2007 Anton Altaparmakov
5   * Copyright (c) 2002 Richard Russon
6   *
7   * This program/include file is free software; you can redistribute it and/or
8   * modify it under the terms of the GNU General Public License as published
9   * by the Free Software Foundation; either version 2 of the License, or
10   * (at your option) any later version.
11   *
12   * This program/include file is distributed in the hope that it will be
13   * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
14   * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU General Public License for more details.
16   *
17   * You should have received a copy of the GNU General Public License
18   * along with this program (in the main directory of the Linux-NTFS
19   * distribution in the file COPYING); if not, write to the Free Software
20   * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
21   */
22  
23  #include <linux/buffer_head.h>
24  #include <linux/slab.h>
25  
26  #include "dir.h"
27  #include "aops.h"
28  #include "attrib.h"
29  #include "mft.h"
30  #include "debug.h"
31  #include "ntfs.h"
32  
33  /**
34   * The little endian Unicode string $I30 as a global constant.
35   */
36  ntfschar I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
37  		cpu_to_le16('3'),	cpu_to_le16('0'), 0 };
38  
39  /**
40   * ntfs_lookup_inode_by_name - find an inode in a directory given its name
41   * @dir_ni:	ntfs inode of the directory in which to search for the name
42   * @uname:	Unicode name for which to search in the directory
43   * @uname_len:	length of the name @uname in Unicode characters
44   * @res:	return the found file name if necessary (see below)
45   *
46   * Look for an inode with name @uname in the directory with inode @dir_ni.
47   * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
48   * the Unicode name. If the name is found in the directory, the corresponding
49   * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
50   * is a 64-bit number containing the sequence number.
51   *
52   * On error, a negative value is returned corresponding to the error code. In
53   * particular if the inode is not found -ENOENT is returned. Note that you
54   * can't just check the return value for being negative, you have to check the
55   * inode number for being negative which you can extract using MREC(return
56   * value).
57   *
58   * Note, @uname_len does not include the (optional) terminating NULL character.
59   *
60   * Note, we look for a case sensitive match first but we also look for a case
61   * insensitive match at the same time. If we find a case insensitive match, we
62   * save that for the case that we don't find an exact match, where we return
63   * the case insensitive match and setup @res (which we allocate!) with the mft
64   * reference, the file name type, length and with a copy of the little endian
65   * Unicode file name itself. If we match a file name which is in the DOS name
66   * space, we only return the mft reference and file name type in @res.
67   * ntfs_lookup() then uses this to find the long file name in the inode itself.
68   * This is to avoid polluting the dcache with short file names. We want them to
69   * work but we don't care for how quickly one can access them. This also fixes
70   * the dcache aliasing issues.
71   *
72   * Locking:  - Caller must hold i_mutex on the directory.
73   *	     - Each page cache page in the index allocation mapping must be
74   *	       locked whilst being accessed otherwise we may find a corrupt
75   *	       page due to it being under ->writepage at the moment which
76   *	       applies the mst protection fixups before writing out and then
77   *	       removes them again after the write is complete after which it
78   *	       unlocks the page.
79   */
ntfs_lookup_inode_by_name(ntfs_inode * dir_ni,const ntfschar * uname,const int uname_len,ntfs_name ** res)80  MFT_REF ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
81  		const int uname_len, ntfs_name **res)
82  {
83  	ntfs_volume *vol = dir_ni->vol;
84  	struct super_block *sb = vol->sb;
85  	MFT_RECORD *m;
86  	INDEX_ROOT *ir;
87  	INDEX_ENTRY *ie;
88  	INDEX_ALLOCATION *ia;
89  	u8 *index_end;
90  	u64 mref;
91  	ntfs_attr_search_ctx *ctx;
92  	int err, rc;
93  	VCN vcn, old_vcn;
94  	struct address_space *ia_mapping;
95  	struct page *page;
96  	u8 *kaddr;
97  	ntfs_name *name = NULL;
98  
99  	BUG_ON(!S_ISDIR(VFS_I(dir_ni)->i_mode));
100  	BUG_ON(NInoAttr(dir_ni));
101  	/* Get hold of the mft record for the directory. */
102  	m = map_mft_record(dir_ni);
103  	if (IS_ERR(m)) {
104  		ntfs_error(sb, "map_mft_record() failed with error code %ld.",
105  				-PTR_ERR(m));
106  		return ERR_MREF(PTR_ERR(m));
107  	}
108  	ctx = ntfs_attr_get_search_ctx(dir_ni, m);
109  	if (unlikely(!ctx)) {
110  		err = -ENOMEM;
111  		goto err_out;
112  	}
113  	/* Find the index root attribute in the mft record. */
114  	err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
115  			0, ctx);
116  	if (unlikely(err)) {
117  		if (err == -ENOENT) {
118  			ntfs_error(sb, "Index root attribute missing in "
119  					"directory inode 0x%lx.",
120  					dir_ni->mft_no);
121  			err = -EIO;
122  		}
123  		goto err_out;
124  	}
125  	/* Get to the index root value (it's been verified in read_inode). */
126  	ir = (INDEX_ROOT*)((u8*)ctx->attr +
127  			le16_to_cpu(ctx->attr->data.resident.value_offset));
128  	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
129  	/* The first index entry. */
130  	ie = (INDEX_ENTRY*)((u8*)&ir->index +
131  			le32_to_cpu(ir->index.entries_offset));
132  	/*
133  	 * Loop until we exceed valid memory (corruption case) or until we
134  	 * reach the last entry.
135  	 */
136  	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
137  		/* Bounds checks. */
138  		if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
139  				sizeof(INDEX_ENTRY_HEADER) > index_end ||
140  				(u8*)ie + le16_to_cpu(ie->key_length) >
141  				index_end)
142  			goto dir_err_out;
143  		/*
144  		 * The last entry cannot contain a name. It can however contain
145  		 * a pointer to a child node in the B+tree so we just break out.
146  		 */
147  		if (ie->flags & INDEX_ENTRY_END)
148  			break;
149  		/*
150  		 * We perform a case sensitive comparison and if that matches
151  		 * we are done and return the mft reference of the inode (i.e.
152  		 * the inode number together with the sequence number for
153  		 * consistency checking). We convert it to cpu format before
154  		 * returning.
155  		 */
156  		if (ntfs_are_names_equal(uname, uname_len,
157  				(ntfschar*)&ie->key.file_name.file_name,
158  				ie->key.file_name.file_name_length,
159  				CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
160  found_it:
161  			/*
162  			 * We have a perfect match, so we don't need to care
163  			 * about having matched imperfectly before, so we can
164  			 * free name and set *res to NULL.
165  			 * However, if the perfect match is a short file name,
166  			 * we need to signal this through *res, so that
167  			 * ntfs_lookup() can fix dcache aliasing issues.
168  			 * As an optimization we just reuse an existing
169  			 * allocation of *res.
170  			 */
171  			if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
172  				if (!name) {
173  					name = kmalloc(sizeof(ntfs_name),
174  							GFP_NOFS);
175  					if (!name) {
176  						err = -ENOMEM;
177  						goto err_out;
178  					}
179  				}
180  				name->mref = le64_to_cpu(
181  						ie->data.dir.indexed_file);
182  				name->type = FILE_NAME_DOS;
183  				name->len = 0;
184  				*res = name;
185  			} else {
186  				kfree(name);
187  				*res = NULL;
188  			}
189  			mref = le64_to_cpu(ie->data.dir.indexed_file);
190  			ntfs_attr_put_search_ctx(ctx);
191  			unmap_mft_record(dir_ni);
192  			return mref;
193  		}
194  		/*
195  		 * For a case insensitive mount, we also perform a case
196  		 * insensitive comparison (provided the file name is not in the
197  		 * POSIX namespace). If the comparison matches, and the name is
198  		 * in the WIN32 namespace, we cache the filename in *res so
199  		 * that the caller, ntfs_lookup(), can work on it. If the
200  		 * comparison matches, and the name is in the DOS namespace, we
201  		 * only cache the mft reference and the file name type (we set
202  		 * the name length to zero for simplicity).
203  		 */
204  		if (!NVolCaseSensitive(vol) &&
205  				ie->key.file_name.file_name_type &&
206  				ntfs_are_names_equal(uname, uname_len,
207  				(ntfschar*)&ie->key.file_name.file_name,
208  				ie->key.file_name.file_name_length,
209  				IGNORE_CASE, vol->upcase, vol->upcase_len)) {
210  			int name_size = sizeof(ntfs_name);
211  			u8 type = ie->key.file_name.file_name_type;
212  			u8 len = ie->key.file_name.file_name_length;
213  
214  			/* Only one case insensitive matching name allowed. */
215  			if (name) {
216  				ntfs_error(sb, "Found already allocated name "
217  						"in phase 1. Please run chkdsk "
218  						"and if that doesn't find any "
219  						"errors please report you saw "
220  						"this message to "
221  						"linux-ntfs-dev@lists."
222  						"sourceforge.net.");
223  				goto dir_err_out;
224  			}
225  
226  			if (type != FILE_NAME_DOS)
227  				name_size += len * sizeof(ntfschar);
228  			name = kmalloc(name_size, GFP_NOFS);
229  			if (!name) {
230  				err = -ENOMEM;
231  				goto err_out;
232  			}
233  			name->mref = le64_to_cpu(ie->data.dir.indexed_file);
234  			name->type = type;
235  			if (type != FILE_NAME_DOS) {
236  				name->len = len;
237  				memcpy(name->name, ie->key.file_name.file_name,
238  						len * sizeof(ntfschar));
239  			} else
240  				name->len = 0;
241  			*res = name;
242  		}
243  		/*
244  		 * Not a perfect match, need to do full blown collation so we
245  		 * know which way in the B+tree we have to go.
246  		 */
247  		rc = ntfs_collate_names(uname, uname_len,
248  				(ntfschar*)&ie->key.file_name.file_name,
249  				ie->key.file_name.file_name_length, 1,
250  				IGNORE_CASE, vol->upcase, vol->upcase_len);
251  		/*
252  		 * If uname collates before the name of the current entry, there
253  		 * is definitely no such name in this index but we might need to
254  		 * descend into the B+tree so we just break out of the loop.
255  		 */
256  		if (rc == -1)
257  			break;
258  		/* The names are not equal, continue the search. */
259  		if (rc)
260  			continue;
261  		/*
262  		 * Names match with case insensitive comparison, now try the
263  		 * case sensitive comparison, which is required for proper
264  		 * collation.
265  		 */
266  		rc = ntfs_collate_names(uname, uname_len,
267  				(ntfschar*)&ie->key.file_name.file_name,
268  				ie->key.file_name.file_name_length, 1,
269  				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
270  		if (rc == -1)
271  			break;
272  		if (rc)
273  			continue;
274  		/*
275  		 * Perfect match, this will never happen as the
276  		 * ntfs_are_names_equal() call will have gotten a match but we
277  		 * still treat it correctly.
278  		 */
279  		goto found_it;
280  	}
281  	/*
282  	 * We have finished with this index without success. Check for the
283  	 * presence of a child node and if not present return -ENOENT, unless
284  	 * we have got a matching name cached in name in which case return the
285  	 * mft reference associated with it.
286  	 */
287  	if (!(ie->flags & INDEX_ENTRY_NODE)) {
288  		if (name) {
289  			ntfs_attr_put_search_ctx(ctx);
290  			unmap_mft_record(dir_ni);
291  			return name->mref;
292  		}
293  		ntfs_debug("Entry not found.");
294  		err = -ENOENT;
295  		goto err_out;
296  	} /* Child node present, descend into it. */
297  	/* Consistency check: Verify that an index allocation exists. */
298  	if (!NInoIndexAllocPresent(dir_ni)) {
299  		ntfs_error(sb, "No index allocation attribute but index entry "
300  				"requires one. Directory inode 0x%lx is "
301  				"corrupt or driver bug.", dir_ni->mft_no);
302  		goto err_out;
303  	}
304  	/* Get the starting vcn of the index_block holding the child node. */
305  	vcn = sle64_to_cpup((sle64*)((u8*)ie + le16_to_cpu(ie->length) - 8));
306  	ia_mapping = VFS_I(dir_ni)->i_mapping;
307  	/*
308  	 * We are done with the index root and the mft record. Release them,
309  	 * otherwise we deadlock with ntfs_map_page().
310  	 */
311  	ntfs_attr_put_search_ctx(ctx);
312  	unmap_mft_record(dir_ni);
313  	m = NULL;
314  	ctx = NULL;
315  descend_into_child_node:
316  	/*
317  	 * Convert vcn to index into the index allocation attribute in units
318  	 * of PAGE_CACHE_SIZE and map the page cache page, reading it from
319  	 * disk if necessary.
320  	 */
321  	page = ntfs_map_page(ia_mapping, vcn <<
322  			dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
323  	if (IS_ERR(page)) {
324  		ntfs_error(sb, "Failed to map directory index page, error %ld.",
325  				-PTR_ERR(page));
326  		err = PTR_ERR(page);
327  		goto err_out;
328  	}
329  	lock_page(page);
330  	kaddr = (u8*)page_address(page);
331  fast_descend_into_child_node:
332  	/* Get to the index allocation block. */
333  	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
334  			dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
335  	/* Bounds checks. */
336  	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
337  		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
338  				"inode 0x%lx or driver bug.", dir_ni->mft_no);
339  		goto unm_err_out;
340  	}
341  	/* Catch multi sector transfer fixup errors. */
342  	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
343  		ntfs_error(sb, "Directory index record with vcn 0x%llx is "
344  				"corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
345  				(unsigned long long)vcn, dir_ni->mft_no);
346  		goto unm_err_out;
347  	}
348  	if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
349  		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
350  				"different from expected VCN (0x%llx). "
351  				"Directory inode 0x%lx is corrupt or driver "
352  				"bug.", (unsigned long long)
353  				sle64_to_cpu(ia->index_block_vcn),
354  				(unsigned long long)vcn, dir_ni->mft_no);
355  		goto unm_err_out;
356  	}
357  	if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
358  			dir_ni->itype.index.block_size) {
359  		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
360  				"0x%lx has a size (%u) differing from the "
361  				"directory specified size (%u). Directory "
362  				"inode is corrupt or driver bug.",
363  				(unsigned long long)vcn, dir_ni->mft_no,
364  				le32_to_cpu(ia->index.allocated_size) + 0x18,
365  				dir_ni->itype.index.block_size);
366  		goto unm_err_out;
367  	}
368  	index_end = (u8*)ia + dir_ni->itype.index.block_size;
369  	if (index_end > kaddr + PAGE_CACHE_SIZE) {
370  		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
371  				"0x%lx crosses page boundary. Impossible! "
372  				"Cannot access! This is probably a bug in the "
373  				"driver.", (unsigned long long)vcn,
374  				dir_ni->mft_no);
375  		goto unm_err_out;
376  	}
377  	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
378  	if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
379  		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
380  				"inode 0x%lx exceeds maximum size.",
381  				(unsigned long long)vcn, dir_ni->mft_no);
382  		goto unm_err_out;
383  	}
384  	/* The first index entry. */
385  	ie = (INDEX_ENTRY*)((u8*)&ia->index +
386  			le32_to_cpu(ia->index.entries_offset));
387  	/*
388  	 * Iterate similar to above big loop but applied to index buffer, thus
389  	 * loop until we exceed valid memory (corruption case) or until we
390  	 * reach the last entry.
391  	 */
392  	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
393  		/* Bounds check. */
394  		if ((u8*)ie < (u8*)ia || (u8*)ie +
395  				sizeof(INDEX_ENTRY_HEADER) > index_end ||
396  				(u8*)ie + le16_to_cpu(ie->key_length) >
397  				index_end) {
398  			ntfs_error(sb, "Index entry out of bounds in "
399  					"directory inode 0x%lx.",
400  					dir_ni->mft_no);
401  			goto unm_err_out;
402  		}
403  		/*
404  		 * The last entry cannot contain a name. It can however contain
405  		 * a pointer to a child node in the B+tree so we just break out.
406  		 */
407  		if (ie->flags & INDEX_ENTRY_END)
408  			break;
409  		/*
410  		 * We perform a case sensitive comparison and if that matches
411  		 * we are done and return the mft reference of the inode (i.e.
412  		 * the inode number together with the sequence number for
413  		 * consistency checking). We convert it to cpu format before
414  		 * returning.
415  		 */
416  		if (ntfs_are_names_equal(uname, uname_len,
417  				(ntfschar*)&ie->key.file_name.file_name,
418  				ie->key.file_name.file_name_length,
419  				CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
420  found_it2:
421  			/*
422  			 * We have a perfect match, so we don't need to care
423  			 * about having matched imperfectly before, so we can
424  			 * free name and set *res to NULL.
425  			 * However, if the perfect match is a short file name,
426  			 * we need to signal this through *res, so that
427  			 * ntfs_lookup() can fix dcache aliasing issues.
428  			 * As an optimization we just reuse an existing
429  			 * allocation of *res.
430  			 */
431  			if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
432  				if (!name) {
433  					name = kmalloc(sizeof(ntfs_name),
434  							GFP_NOFS);
435  					if (!name) {
436  						err = -ENOMEM;
437  						goto unm_err_out;
438  					}
439  				}
440  				name->mref = le64_to_cpu(
441  						ie->data.dir.indexed_file);
442  				name->type = FILE_NAME_DOS;
443  				name->len = 0;
444  				*res = name;
445  			} else {
446  				kfree(name);
447  				*res = NULL;
448  			}
449  			mref = le64_to_cpu(ie->data.dir.indexed_file);
450  			unlock_page(page);
451  			ntfs_unmap_page(page);
452  			return mref;
453  		}
454  		/*
455  		 * For a case insensitive mount, we also perform a case
456  		 * insensitive comparison (provided the file name is not in the
457  		 * POSIX namespace). If the comparison matches, and the name is
458  		 * in the WIN32 namespace, we cache the filename in *res so
459  		 * that the caller, ntfs_lookup(), can work on it. If the
460  		 * comparison matches, and the name is in the DOS namespace, we
461  		 * only cache the mft reference and the file name type (we set
462  		 * the name length to zero for simplicity).
463  		 */
464  		if (!NVolCaseSensitive(vol) &&
465  				ie->key.file_name.file_name_type &&
466  				ntfs_are_names_equal(uname, uname_len,
467  				(ntfschar*)&ie->key.file_name.file_name,
468  				ie->key.file_name.file_name_length,
469  				IGNORE_CASE, vol->upcase, vol->upcase_len)) {
470  			int name_size = sizeof(ntfs_name);
471  			u8 type = ie->key.file_name.file_name_type;
472  			u8 len = ie->key.file_name.file_name_length;
473  
474  			/* Only one case insensitive matching name allowed. */
475  			if (name) {
476  				ntfs_error(sb, "Found already allocated name "
477  						"in phase 2. Please run chkdsk "
478  						"and if that doesn't find any "
479  						"errors please report you saw "
480  						"this message to "
481  						"linux-ntfs-dev@lists."
482  						"sourceforge.net.");
483  				unlock_page(page);
484  				ntfs_unmap_page(page);
485  				goto dir_err_out;
486  			}
487  
488  			if (type != FILE_NAME_DOS)
489  				name_size += len * sizeof(ntfschar);
490  			name = kmalloc(name_size, GFP_NOFS);
491  			if (!name) {
492  				err = -ENOMEM;
493  				goto unm_err_out;
494  			}
495  			name->mref = le64_to_cpu(ie->data.dir.indexed_file);
496  			name->type = type;
497  			if (type != FILE_NAME_DOS) {
498  				name->len = len;
499  				memcpy(name->name, ie->key.file_name.file_name,
500  						len * sizeof(ntfschar));
501  			} else
502  				name->len = 0;
503  			*res = name;
504  		}
505  		/*
506  		 * Not a perfect match, need to do full blown collation so we
507  		 * know which way in the B+tree we have to go.
508  		 */
509  		rc = ntfs_collate_names(uname, uname_len,
510  				(ntfschar*)&ie->key.file_name.file_name,
511  				ie->key.file_name.file_name_length, 1,
512  				IGNORE_CASE, vol->upcase, vol->upcase_len);
513  		/*
514  		 * If uname collates before the name of the current entry, there
515  		 * is definitely no such name in this index but we might need to
516  		 * descend into the B+tree so we just break out of the loop.
517  		 */
518  		if (rc == -1)
519  			break;
520  		/* The names are not equal, continue the search. */
521  		if (rc)
522  			continue;
523  		/*
524  		 * Names match with case insensitive comparison, now try the
525  		 * case sensitive comparison, which is required for proper
526  		 * collation.
527  		 */
528  		rc = ntfs_collate_names(uname, uname_len,
529  				(ntfschar*)&ie->key.file_name.file_name,
530  				ie->key.file_name.file_name_length, 1,
531  				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
532  		if (rc == -1)
533  			break;
534  		if (rc)
535  			continue;
536  		/*
537  		 * Perfect match, this will never happen as the
538  		 * ntfs_are_names_equal() call will have gotten a match but we
539  		 * still treat it correctly.
540  		 */
541  		goto found_it2;
542  	}
543  	/*
544  	 * We have finished with this index buffer without success. Check for
545  	 * the presence of a child node.
546  	 */
547  	if (ie->flags & INDEX_ENTRY_NODE) {
548  		if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
549  			ntfs_error(sb, "Index entry with child node found in "
550  					"a leaf node in directory inode 0x%lx.",
551  					dir_ni->mft_no);
552  			goto unm_err_out;
553  		}
554  		/* Child node present, descend into it. */
555  		old_vcn = vcn;
556  		vcn = sle64_to_cpup((sle64*)((u8*)ie +
557  				le16_to_cpu(ie->length) - 8));
558  		if (vcn >= 0) {
559  			/* If vcn is in the same page cache page as old_vcn we
560  			 * recycle the mapped page. */
561  			if (old_vcn << vol->cluster_size_bits >>
562  					PAGE_CACHE_SHIFT == vcn <<
563  					vol->cluster_size_bits >>
564  					PAGE_CACHE_SHIFT)
565  				goto fast_descend_into_child_node;
566  			unlock_page(page);
567  			ntfs_unmap_page(page);
568  			goto descend_into_child_node;
569  		}
570  		ntfs_error(sb, "Negative child node vcn in directory inode "
571  				"0x%lx.", dir_ni->mft_no);
572  		goto unm_err_out;
573  	}
574  	/*
575  	 * No child node present, return -ENOENT, unless we have got a matching
576  	 * name cached in name in which case return the mft reference
577  	 * associated with it.
578  	 */
579  	if (name) {
580  		unlock_page(page);
581  		ntfs_unmap_page(page);
582  		return name->mref;
583  	}
584  	ntfs_debug("Entry not found.");
585  	err = -ENOENT;
586  unm_err_out:
587  	unlock_page(page);
588  	ntfs_unmap_page(page);
589  err_out:
590  	if (!err)
591  		err = -EIO;
592  	if (ctx)
593  		ntfs_attr_put_search_ctx(ctx);
594  	if (m)
595  		unmap_mft_record(dir_ni);
596  	if (name) {
597  		kfree(name);
598  		*res = NULL;
599  	}
600  	return ERR_MREF(err);
601  dir_err_out:
602  	ntfs_error(sb, "Corrupt directory.  Aborting lookup.");
603  	goto err_out;
604  }
605  
606  #if 0
607  
608  // TODO: (AIA)
609  // The algorithm embedded in this code will be required for the time when we
610  // want to support adding of entries to directories, where we require correct
611  // collation of file names in order not to cause corruption of the filesystem.
612  
613  /**
614   * ntfs_lookup_inode_by_name - find an inode in a directory given its name
615   * @dir_ni:	ntfs inode of the directory in which to search for the name
616   * @uname:	Unicode name for which to search in the directory
617   * @uname_len:	length of the name @uname in Unicode characters
618   *
619   * Look for an inode with name @uname in the directory with inode @dir_ni.
620   * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
621   * the Unicode name. If the name is found in the directory, the corresponding
622   * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
623   * is a 64-bit number containing the sequence number.
624   *
625   * On error, a negative value is returned corresponding to the error code. In
626   * particular if the inode is not found -ENOENT is returned. Note that you
627   * can't just check the return value for being negative, you have to check the
628   * inode number for being negative which you can extract using MREC(return
629   * value).
630   *
631   * Note, @uname_len does not include the (optional) terminating NULL character.
632   */
633  u64 ntfs_lookup_inode_by_name(ntfs_inode *dir_ni, const ntfschar *uname,
634  		const int uname_len)
635  {
636  	ntfs_volume *vol = dir_ni->vol;
637  	struct super_block *sb = vol->sb;
638  	MFT_RECORD *m;
639  	INDEX_ROOT *ir;
640  	INDEX_ENTRY *ie;
641  	INDEX_ALLOCATION *ia;
642  	u8 *index_end;
643  	u64 mref;
644  	ntfs_attr_search_ctx *ctx;
645  	int err, rc;
646  	IGNORE_CASE_BOOL ic;
647  	VCN vcn, old_vcn;
648  	struct address_space *ia_mapping;
649  	struct page *page;
650  	u8 *kaddr;
651  
652  	/* Get hold of the mft record for the directory. */
653  	m = map_mft_record(dir_ni);
654  	if (IS_ERR(m)) {
655  		ntfs_error(sb, "map_mft_record() failed with error code %ld.",
656  				-PTR_ERR(m));
657  		return ERR_MREF(PTR_ERR(m));
658  	}
659  	ctx = ntfs_attr_get_search_ctx(dir_ni, m);
660  	if (!ctx) {
661  		err = -ENOMEM;
662  		goto err_out;
663  	}
664  	/* Find the index root attribute in the mft record. */
665  	err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
666  			0, ctx);
667  	if (unlikely(err)) {
668  		if (err == -ENOENT) {
669  			ntfs_error(sb, "Index root attribute missing in "
670  					"directory inode 0x%lx.",
671  					dir_ni->mft_no);
672  			err = -EIO;
673  		}
674  		goto err_out;
675  	}
676  	/* Get to the index root value (it's been verified in read_inode). */
677  	ir = (INDEX_ROOT*)((u8*)ctx->attr +
678  			le16_to_cpu(ctx->attr->data.resident.value_offset));
679  	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
680  	/* The first index entry. */
681  	ie = (INDEX_ENTRY*)((u8*)&ir->index +
682  			le32_to_cpu(ir->index.entries_offset));
683  	/*
684  	 * Loop until we exceed valid memory (corruption case) or until we
685  	 * reach the last entry.
686  	 */
687  	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
688  		/* Bounds checks. */
689  		if ((u8*)ie < (u8*)ctx->mrec || (u8*)ie +
690  				sizeof(INDEX_ENTRY_HEADER) > index_end ||
691  				(u8*)ie + le16_to_cpu(ie->key_length) >
692  				index_end)
693  			goto dir_err_out;
694  		/*
695  		 * The last entry cannot contain a name. It can however contain
696  		 * a pointer to a child node in the B+tree so we just break out.
697  		 */
698  		if (ie->flags & INDEX_ENTRY_END)
699  			break;
700  		/*
701  		 * If the current entry has a name type of POSIX, the name is
702  		 * case sensitive and not otherwise. This has the effect of us
703  		 * not being able to access any POSIX file names which collate
704  		 * after the non-POSIX one when they only differ in case, but
705  		 * anyone doing screwy stuff like that deserves to burn in
706  		 * hell... Doing that kind of stuff on NT4 actually causes
707  		 * corruption on the partition even when using SP6a and Linux
708  		 * is not involved at all.
709  		 */
710  		ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
711  				CASE_SENSITIVE;
712  		/*
713  		 * If the names match perfectly, we are done and return the
714  		 * mft reference of the inode (i.e. the inode number together
715  		 * with the sequence number for consistency checking. We
716  		 * convert it to cpu format before returning.
717  		 */
718  		if (ntfs_are_names_equal(uname, uname_len,
719  				(ntfschar*)&ie->key.file_name.file_name,
720  				ie->key.file_name.file_name_length, ic,
721  				vol->upcase, vol->upcase_len)) {
722  found_it:
723  			mref = le64_to_cpu(ie->data.dir.indexed_file);
724  			ntfs_attr_put_search_ctx(ctx);
725  			unmap_mft_record(dir_ni);
726  			return mref;
727  		}
728  		/*
729  		 * Not a perfect match, need to do full blown collation so we
730  		 * know which way in the B+tree we have to go.
731  		 */
732  		rc = ntfs_collate_names(uname, uname_len,
733  				(ntfschar*)&ie->key.file_name.file_name,
734  				ie->key.file_name.file_name_length, 1,
735  				IGNORE_CASE, vol->upcase, vol->upcase_len);
736  		/*
737  		 * If uname collates before the name of the current entry, there
738  		 * is definitely no such name in this index but we might need to
739  		 * descend into the B+tree so we just break out of the loop.
740  		 */
741  		if (rc == -1)
742  			break;
743  		/* The names are not equal, continue the search. */
744  		if (rc)
745  			continue;
746  		/*
747  		 * Names match with case insensitive comparison, now try the
748  		 * case sensitive comparison, which is required for proper
749  		 * collation.
750  		 */
751  		rc = ntfs_collate_names(uname, uname_len,
752  				(ntfschar*)&ie->key.file_name.file_name,
753  				ie->key.file_name.file_name_length, 1,
754  				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
755  		if (rc == -1)
756  			break;
757  		if (rc)
758  			continue;
759  		/*
760  		 * Perfect match, this will never happen as the
761  		 * ntfs_are_names_equal() call will have gotten a match but we
762  		 * still treat it correctly.
763  		 */
764  		goto found_it;
765  	}
766  	/*
767  	 * We have finished with this index without success. Check for the
768  	 * presence of a child node.
769  	 */
770  	if (!(ie->flags & INDEX_ENTRY_NODE)) {
771  		/* No child node, return -ENOENT. */
772  		err = -ENOENT;
773  		goto err_out;
774  	} /* Child node present, descend into it. */
775  	/* Consistency check: Verify that an index allocation exists. */
776  	if (!NInoIndexAllocPresent(dir_ni)) {
777  		ntfs_error(sb, "No index allocation attribute but index entry "
778  				"requires one. Directory inode 0x%lx is "
779  				"corrupt or driver bug.", dir_ni->mft_no);
780  		goto err_out;
781  	}
782  	/* Get the starting vcn of the index_block holding the child node. */
783  	vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
784  	ia_mapping = VFS_I(dir_ni)->i_mapping;
785  	/*
786  	 * We are done with the index root and the mft record. Release them,
787  	 * otherwise we deadlock with ntfs_map_page().
788  	 */
789  	ntfs_attr_put_search_ctx(ctx);
790  	unmap_mft_record(dir_ni);
791  	m = NULL;
792  	ctx = NULL;
793  descend_into_child_node:
794  	/*
795  	 * Convert vcn to index into the index allocation attribute in units
796  	 * of PAGE_CACHE_SIZE and map the page cache page, reading it from
797  	 * disk if necessary.
798  	 */
799  	page = ntfs_map_page(ia_mapping, vcn <<
800  			dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
801  	if (IS_ERR(page)) {
802  		ntfs_error(sb, "Failed to map directory index page, error %ld.",
803  				-PTR_ERR(page));
804  		err = PTR_ERR(page);
805  		goto err_out;
806  	}
807  	lock_page(page);
808  	kaddr = (u8*)page_address(page);
809  fast_descend_into_child_node:
810  	/* Get to the index allocation block. */
811  	ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
812  			dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
813  	/* Bounds checks. */
814  	if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
815  		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
816  				"inode 0x%lx or driver bug.", dir_ni->mft_no);
817  		goto unm_err_out;
818  	}
819  	/* Catch multi sector transfer fixup errors. */
820  	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
821  		ntfs_error(sb, "Directory index record with vcn 0x%llx is "
822  				"corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
823  				(unsigned long long)vcn, dir_ni->mft_no);
824  		goto unm_err_out;
825  	}
826  	if (sle64_to_cpu(ia->index_block_vcn) != vcn) {
827  		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
828  				"different from expected VCN (0x%llx). "
829  				"Directory inode 0x%lx is corrupt or driver "
830  				"bug.", (unsigned long long)
831  				sle64_to_cpu(ia->index_block_vcn),
832  				(unsigned long long)vcn, dir_ni->mft_no);
833  		goto unm_err_out;
834  	}
835  	if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
836  			dir_ni->itype.index.block_size) {
837  		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
838  				"0x%lx has a size (%u) differing from the "
839  				"directory specified size (%u). Directory "
840  				"inode is corrupt or driver bug.",
841  				(unsigned long long)vcn, dir_ni->mft_no,
842  				le32_to_cpu(ia->index.allocated_size) + 0x18,
843  				dir_ni->itype.index.block_size);
844  		goto unm_err_out;
845  	}
846  	index_end = (u8*)ia + dir_ni->itype.index.block_size;
847  	if (index_end > kaddr + PAGE_CACHE_SIZE) {
848  		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
849  				"0x%lx crosses page boundary. Impossible! "
850  				"Cannot access! This is probably a bug in the "
851  				"driver.", (unsigned long long)vcn,
852  				dir_ni->mft_no);
853  		goto unm_err_out;
854  	}
855  	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
856  	if (index_end > (u8*)ia + dir_ni->itype.index.block_size) {
857  		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
858  				"inode 0x%lx exceeds maximum size.",
859  				(unsigned long long)vcn, dir_ni->mft_no);
860  		goto unm_err_out;
861  	}
862  	/* The first index entry. */
863  	ie = (INDEX_ENTRY*)((u8*)&ia->index +
864  			le32_to_cpu(ia->index.entries_offset));
865  	/*
866  	 * Iterate similar to above big loop but applied to index buffer, thus
867  	 * loop until we exceed valid memory (corruption case) or until we
868  	 * reach the last entry.
869  	 */
870  	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
871  		/* Bounds check. */
872  		if ((u8*)ie < (u8*)ia || (u8*)ie +
873  				sizeof(INDEX_ENTRY_HEADER) > index_end ||
874  				(u8*)ie + le16_to_cpu(ie->key_length) >
875  				index_end) {
876  			ntfs_error(sb, "Index entry out of bounds in "
877  					"directory inode 0x%lx.",
878  					dir_ni->mft_no);
879  			goto unm_err_out;
880  		}
881  		/*
882  		 * The last entry cannot contain a name. It can however contain
883  		 * a pointer to a child node in the B+tree so we just break out.
884  		 */
885  		if (ie->flags & INDEX_ENTRY_END)
886  			break;
887  		/*
888  		 * If the current entry has a name type of POSIX, the name is
889  		 * case sensitive and not otherwise. This has the effect of us
890  		 * not being able to access any POSIX file names which collate
891  		 * after the non-POSIX one when they only differ in case, but
892  		 * anyone doing screwy stuff like that deserves to burn in
893  		 * hell... Doing that kind of stuff on NT4 actually causes
894  		 * corruption on the partition even when using SP6a and Linux
895  		 * is not involved at all.
896  		 */
897  		ic = ie->key.file_name.file_name_type ? IGNORE_CASE :
898  				CASE_SENSITIVE;
899  		/*
900  		 * If the names match perfectly, we are done and return the
901  		 * mft reference of the inode (i.e. the inode number together
902  		 * with the sequence number for consistency checking. We
903  		 * convert it to cpu format before returning.
904  		 */
905  		if (ntfs_are_names_equal(uname, uname_len,
906  				(ntfschar*)&ie->key.file_name.file_name,
907  				ie->key.file_name.file_name_length, ic,
908  				vol->upcase, vol->upcase_len)) {
909  found_it2:
910  			mref = le64_to_cpu(ie->data.dir.indexed_file);
911  			unlock_page(page);
912  			ntfs_unmap_page(page);
913  			return mref;
914  		}
915  		/*
916  		 * Not a perfect match, need to do full blown collation so we
917  		 * know which way in the B+tree we have to go.
918  		 */
919  		rc = ntfs_collate_names(uname, uname_len,
920  				(ntfschar*)&ie->key.file_name.file_name,
921  				ie->key.file_name.file_name_length, 1,
922  				IGNORE_CASE, vol->upcase, vol->upcase_len);
923  		/*
924  		 * If uname collates before the name of the current entry, there
925  		 * is definitely no such name in this index but we might need to
926  		 * descend into the B+tree so we just break out of the loop.
927  		 */
928  		if (rc == -1)
929  			break;
930  		/* The names are not equal, continue the search. */
931  		if (rc)
932  			continue;
933  		/*
934  		 * Names match with case insensitive comparison, now try the
935  		 * case sensitive comparison, which is required for proper
936  		 * collation.
937  		 */
938  		rc = ntfs_collate_names(uname, uname_len,
939  				(ntfschar*)&ie->key.file_name.file_name,
940  				ie->key.file_name.file_name_length, 1,
941  				CASE_SENSITIVE, vol->upcase, vol->upcase_len);
942  		if (rc == -1)
943  			break;
944  		if (rc)
945  			continue;
946  		/*
947  		 * Perfect match, this will never happen as the
948  		 * ntfs_are_names_equal() call will have gotten a match but we
949  		 * still treat it correctly.
950  		 */
951  		goto found_it2;
952  	}
953  	/*
954  	 * We have finished with this index buffer without success. Check for
955  	 * the presence of a child node.
956  	 */
957  	if (ie->flags & INDEX_ENTRY_NODE) {
958  		if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
959  			ntfs_error(sb, "Index entry with child node found in "
960  					"a leaf node in directory inode 0x%lx.",
961  					dir_ni->mft_no);
962  			goto unm_err_out;
963  		}
964  		/* Child node present, descend into it. */
965  		old_vcn = vcn;
966  		vcn = sle64_to_cpup((u8*)ie + le16_to_cpu(ie->length) - 8);
967  		if (vcn >= 0) {
968  			/* If vcn is in the same page cache page as old_vcn we
969  			 * recycle the mapped page. */
970  			if (old_vcn << vol->cluster_size_bits >>
971  					PAGE_CACHE_SHIFT == vcn <<
972  					vol->cluster_size_bits >>
973  					PAGE_CACHE_SHIFT)
974  				goto fast_descend_into_child_node;
975  			unlock_page(page);
976  			ntfs_unmap_page(page);
977  			goto descend_into_child_node;
978  		}
979  		ntfs_error(sb, "Negative child node vcn in directory inode "
980  				"0x%lx.", dir_ni->mft_no);
981  		goto unm_err_out;
982  	}
983  	/* No child node, return -ENOENT. */
984  	ntfs_debug("Entry not found.");
985  	err = -ENOENT;
986  unm_err_out:
987  	unlock_page(page);
988  	ntfs_unmap_page(page);
989  err_out:
990  	if (!err)
991  		err = -EIO;
992  	if (ctx)
993  		ntfs_attr_put_search_ctx(ctx);
994  	if (m)
995  		unmap_mft_record(dir_ni);
996  	return ERR_MREF(err);
997  dir_err_out:
998  	ntfs_error(sb, "Corrupt directory. Aborting lookup.");
999  	goto err_out;
1000  }
1001  
1002  #endif
1003  
1004  /**
1005   * ntfs_filldir - ntfs specific filldir method
1006   * @vol:	current ntfs volume
1007   * @fpos:	position in the directory
1008   * @ndir:	ntfs inode of current directory
1009   * @ia_page:	page in which the index allocation buffer @ie is in resides
1010   * @ie:		current index entry
1011   * @name:	buffer to use for the converted name
1012   * @dirent:	vfs filldir callback context
1013   * @filldir:	vfs filldir callback
1014   *
1015   * Convert the Unicode @name to the loaded NLS and pass it to the @filldir
1016   * callback.
1017   *
1018   * If @ia_page is not NULL it is the locked page containing the index
1019   * allocation block containing the index entry @ie.
1020   *
1021   * Note, we drop (and then reacquire) the page lock on @ia_page across the
1022   * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup
1023   * since ntfs_lookup() will lock the same page.  As an optimization, we do not
1024   * retake the lock if we are returning a non-zero value as ntfs_readdir()
1025   * would need to drop the lock immediately anyway.
1026   */
ntfs_filldir(ntfs_volume * vol,loff_t fpos,ntfs_inode * ndir,struct page * ia_page,INDEX_ENTRY * ie,u8 * name,void * dirent,filldir_t filldir)1027  static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos,
1028  		ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie,
1029  		u8 *name, void *dirent, filldir_t filldir)
1030  {
1031  	unsigned long mref;
1032  	int name_len, rc;
1033  	unsigned dt_type;
1034  	FILE_NAME_TYPE_FLAGS name_type;
1035  
1036  	name_type = ie->key.file_name.file_name_type;
1037  	if (name_type == FILE_NAME_DOS) {
1038  		ntfs_debug("Skipping DOS name space entry.");
1039  		return 0;
1040  	}
1041  	if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) {
1042  		ntfs_debug("Skipping root directory self reference entry.");
1043  		return 0;
1044  	}
1045  	if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user &&
1046  			!NVolShowSystemFiles(vol)) {
1047  		ntfs_debug("Skipping system file.");
1048  		return 0;
1049  	}
1050  	name_len = ntfs_ucstonls(vol, (ntfschar*)&ie->key.file_name.file_name,
1051  			ie->key.file_name.file_name_length, &name,
1052  			NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
1053  	if (name_len <= 0) {
1054  		ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
1055  				(long long)MREF_LE(ie->data.dir.indexed_file));
1056  		return 0;
1057  	}
1058  	if (ie->key.file_name.file_attributes &
1059  			FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT)
1060  		dt_type = DT_DIR;
1061  	else
1062  		dt_type = DT_REG;
1063  	mref = MREF_LE(ie->data.dir.indexed_file);
1064  	/*
1065  	 * Drop the page lock otherwise we deadlock with NFS when it calls
1066  	 * ->lookup since ntfs_lookup() will lock the same page.
1067  	 */
1068  	if (ia_page)
1069  		unlock_page(ia_page);
1070  	ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode "
1071  			"0x%lx, DT_%s.", name, name_len, fpos, mref,
1072  			dt_type == DT_DIR ? "DIR" : "REG");
1073  	rc = filldir(dirent, name, name_len, fpos, mref, dt_type);
1074  	/* Relock the page but not if we are aborting ->readdir. */
1075  	if (!rc && ia_page)
1076  		lock_page(ia_page);
1077  	return rc;
1078  }
1079  
1080  /*
1081   * We use the same basic approach as the old NTFS driver, i.e. we parse the
1082   * index root entries and then the index allocation entries that are marked
1083   * as in use in the index bitmap.
1084   *
1085   * While this will return the names in random order this doesn't matter for
1086   * ->readdir but OTOH results in a faster ->readdir.
1087   *
1088   * VFS calls ->readdir without BKL but with i_mutex held. This protects the VFS
1089   * parts (e.g. ->f_pos and ->i_size, and it also protects against directory
1090   * modifications).
1091   *
1092   * Locking:  - Caller must hold i_mutex on the directory.
1093   *	     - Each page cache page in the index allocation mapping must be
1094   *	       locked whilst being accessed otherwise we may find a corrupt
1095   *	       page due to it being under ->writepage at the moment which
1096   *	       applies the mst protection fixups before writing out and then
1097   *	       removes them again after the write is complete after which it
1098   *	       unlocks the page.
1099   */
ntfs_readdir(struct file * filp,void * dirent,filldir_t filldir)1100  static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1101  {
1102  	s64 ia_pos, ia_start, prev_ia_pos, bmp_pos;
1103  	loff_t fpos, i_size;
1104  	struct inode *bmp_vi, *vdir = filp->f_path.dentry->d_inode;
1105  	struct super_block *sb = vdir->i_sb;
1106  	ntfs_inode *ndir = NTFS_I(vdir);
1107  	ntfs_volume *vol = NTFS_SB(sb);
1108  	MFT_RECORD *m;
1109  	INDEX_ROOT *ir = NULL;
1110  	INDEX_ENTRY *ie;
1111  	INDEX_ALLOCATION *ia;
1112  	u8 *name = NULL;
1113  	int rc, err, ir_pos, cur_bmp_pos;
1114  	struct address_space *ia_mapping, *bmp_mapping;
1115  	struct page *bmp_page = NULL, *ia_page = NULL;
1116  	u8 *kaddr, *bmp, *index_end;
1117  	ntfs_attr_search_ctx *ctx;
1118  
1119  	fpos = filp->f_pos;
1120  	ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.",
1121  			vdir->i_ino, fpos);
1122  	rc = err = 0;
1123  	/* Are we at end of dir yet? */
1124  	i_size = i_size_read(vdir);
1125  	if (fpos >= i_size + vol->mft_record_size)
1126  		goto done;
1127  	/* Emulate . and .. for all directories. */
1128  	if (!fpos) {
1129  		ntfs_debug("Calling filldir for . with len 1, fpos 0x0, "
1130  				"inode 0x%lx, DT_DIR.", vdir->i_ino);
1131  		rc = filldir(dirent, ".", 1, fpos, vdir->i_ino, DT_DIR);
1132  		if (rc)
1133  			goto done;
1134  		fpos++;
1135  	}
1136  	if (fpos == 1) {
1137  		ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, "
1138  				"inode 0x%lx, DT_DIR.",
1139  				(unsigned long)parent_ino(filp->f_path.dentry));
1140  		rc = filldir(dirent, "..", 2, fpos,
1141  				parent_ino(filp->f_path.dentry), DT_DIR);
1142  		if (rc)
1143  			goto done;
1144  		fpos++;
1145  	}
1146  	m = NULL;
1147  	ctx = NULL;
1148  	/*
1149  	 * Allocate a buffer to store the current name being processed
1150  	 * converted to format determined by current NLS.
1151  	 */
1152  	name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
1153  	if (unlikely(!name)) {
1154  		err = -ENOMEM;
1155  		goto err_out;
1156  	}
1157  	/* Are we jumping straight into the index allocation attribute? */
1158  	if (fpos >= vol->mft_record_size)
1159  		goto skip_index_root;
1160  	/* Get hold of the mft record for the directory. */
1161  	m = map_mft_record(ndir);
1162  	if (IS_ERR(m)) {
1163  		err = PTR_ERR(m);
1164  		m = NULL;
1165  		goto err_out;
1166  	}
1167  	ctx = ntfs_attr_get_search_ctx(ndir, m);
1168  	if (unlikely(!ctx)) {
1169  		err = -ENOMEM;
1170  		goto err_out;
1171  	}
1172  	/* Get the offset into the index root attribute. */
1173  	ir_pos = (s64)fpos;
1174  	/* Find the index root attribute in the mft record. */
1175  	err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
1176  			0, ctx);
1177  	if (unlikely(err)) {
1178  		ntfs_error(sb, "Index root attribute missing in directory "
1179  				"inode 0x%lx.", vdir->i_ino);
1180  		goto err_out;
1181  	}
1182  	/*
1183  	 * Copy the index root attribute value to a buffer so that we can put
1184  	 * the search context and unmap the mft record before calling the
1185  	 * filldir() callback.  We need to do this because of NFSd which calls
1186  	 * ->lookup() from its filldir callback() and this causes NTFS to
1187  	 * deadlock as ntfs_lookup() maps the mft record of the directory and
1188  	 * we have got it mapped here already.  The only solution is for us to
1189  	 * unmap the mft record here so that a call to ntfs_lookup() is able to
1190  	 * map the mft record without deadlocking.
1191  	 */
1192  	rc = le32_to_cpu(ctx->attr->data.resident.value_length);
1193  	ir = kmalloc(rc, GFP_NOFS);
1194  	if (unlikely(!ir)) {
1195  		err = -ENOMEM;
1196  		goto err_out;
1197  	}
1198  	/* Copy the index root value (it has been verified in read_inode). */
1199  	memcpy(ir, (u8*)ctx->attr +
1200  			le16_to_cpu(ctx->attr->data.resident.value_offset), rc);
1201  	ntfs_attr_put_search_ctx(ctx);
1202  	unmap_mft_record(ndir);
1203  	ctx = NULL;
1204  	m = NULL;
1205  	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
1206  	/* The first index entry. */
1207  	ie = (INDEX_ENTRY*)((u8*)&ir->index +
1208  			le32_to_cpu(ir->index.entries_offset));
1209  	/*
1210  	 * Loop until we exceed valid memory (corruption case) or until we
1211  	 * reach the last entry or until filldir tells us it has had enough
1212  	 * or signals an error (both covered by the rc test).
1213  	 */
1214  	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
1215  		ntfs_debug("In index root, offset 0x%zx.", (u8*)ie - (u8*)ir);
1216  		/* Bounds checks. */
1217  		if (unlikely((u8*)ie < (u8*)ir || (u8*)ie +
1218  				sizeof(INDEX_ENTRY_HEADER) > index_end ||
1219  				(u8*)ie + le16_to_cpu(ie->key_length) >
1220  				index_end))
1221  			goto err_out;
1222  		/* The last entry cannot contain a name. */
1223  		if (ie->flags & INDEX_ENTRY_END)
1224  			break;
1225  		/* Skip index root entry if continuing previous readdir. */
1226  		if (ir_pos > (u8*)ie - (u8*)ir)
1227  			continue;
1228  		/* Advance the position even if going to skip the entry. */
1229  		fpos = (u8*)ie - (u8*)ir;
1230  		/* Submit the name to the filldir callback. */
1231  		rc = ntfs_filldir(vol, fpos, ndir, NULL, ie, name, dirent,
1232  				filldir);
1233  		if (rc) {
1234  			kfree(ir);
1235  			goto abort;
1236  		}
1237  	}
1238  	/* We are done with the index root and can free the buffer. */
1239  	kfree(ir);
1240  	ir = NULL;
1241  	/* If there is no index allocation attribute we are finished. */
1242  	if (!NInoIndexAllocPresent(ndir))
1243  		goto EOD;
1244  	/* Advance fpos to the beginning of the index allocation. */
1245  	fpos = vol->mft_record_size;
1246  skip_index_root:
1247  	kaddr = NULL;
1248  	prev_ia_pos = -1LL;
1249  	/* Get the offset into the index allocation attribute. */
1250  	ia_pos = (s64)fpos - vol->mft_record_size;
1251  	ia_mapping = vdir->i_mapping;
1252  	ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino);
1253  	bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4);
1254  	if (IS_ERR(bmp_vi)) {
1255  		ntfs_error(sb, "Failed to get bitmap attribute.");
1256  		err = PTR_ERR(bmp_vi);
1257  		goto err_out;
1258  	}
1259  	bmp_mapping = bmp_vi->i_mapping;
1260  	/* Get the starting bitmap bit position and sanity check it. */
1261  	bmp_pos = ia_pos >> ndir->itype.index.block_size_bits;
1262  	if (unlikely(bmp_pos >> 3 >= i_size_read(bmp_vi))) {
1263  		ntfs_error(sb, "Current index allocation position exceeds "
1264  				"index bitmap size.");
1265  		goto iput_err_out;
1266  	}
1267  	/* Get the starting bit position in the current bitmap page. */
1268  	cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1);
1269  	bmp_pos &= ~(u64)((PAGE_CACHE_SIZE * 8) - 1);
1270  get_next_bmp_page:
1271  	ntfs_debug("Reading bitmap with page index 0x%llx, bit ofs 0x%llx",
1272  			(unsigned long long)bmp_pos >> (3 + PAGE_CACHE_SHIFT),
1273  			(unsigned long long)bmp_pos &
1274  			(unsigned long long)((PAGE_CACHE_SIZE * 8) - 1));
1275  	bmp_page = ntfs_map_page(bmp_mapping,
1276  			bmp_pos >> (3 + PAGE_CACHE_SHIFT));
1277  	if (IS_ERR(bmp_page)) {
1278  		ntfs_error(sb, "Reading index bitmap failed.");
1279  		err = PTR_ERR(bmp_page);
1280  		bmp_page = NULL;
1281  		goto iput_err_out;
1282  	}
1283  	bmp = (u8*)page_address(bmp_page);
1284  	/* Find next index block in use. */
1285  	while (!(bmp[cur_bmp_pos >> 3] & (1 << (cur_bmp_pos & 7)))) {
1286  find_next_index_buffer:
1287  		cur_bmp_pos++;
1288  		/*
1289  		 * If we have reached the end of the bitmap page, get the next
1290  		 * page, and put away the old one.
1291  		 */
1292  		if (unlikely((cur_bmp_pos >> 3) >= PAGE_CACHE_SIZE)) {
1293  			ntfs_unmap_page(bmp_page);
1294  			bmp_pos += PAGE_CACHE_SIZE * 8;
1295  			cur_bmp_pos = 0;
1296  			goto get_next_bmp_page;
1297  		}
1298  		/* If we have reached the end of the bitmap, we are done. */
1299  		if (unlikely(((bmp_pos + cur_bmp_pos) >> 3) >= i_size))
1300  			goto unm_EOD;
1301  		ia_pos = (bmp_pos + cur_bmp_pos) <<
1302  				ndir->itype.index.block_size_bits;
1303  	}
1304  	ntfs_debug("Handling index buffer 0x%llx.",
1305  			(unsigned long long)bmp_pos + cur_bmp_pos);
1306  	/* If the current index buffer is in the same page we reuse the page. */
1307  	if ((prev_ia_pos & (s64)PAGE_CACHE_MASK) !=
1308  			(ia_pos & (s64)PAGE_CACHE_MASK)) {
1309  		prev_ia_pos = ia_pos;
1310  		if (likely(ia_page != NULL)) {
1311  			unlock_page(ia_page);
1312  			ntfs_unmap_page(ia_page);
1313  		}
1314  		/*
1315  		 * Map the page cache page containing the current ia_pos,
1316  		 * reading it from disk if necessary.
1317  		 */
1318  		ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_CACHE_SHIFT);
1319  		if (IS_ERR(ia_page)) {
1320  			ntfs_error(sb, "Reading index allocation data failed.");
1321  			err = PTR_ERR(ia_page);
1322  			ia_page = NULL;
1323  			goto err_out;
1324  		}
1325  		lock_page(ia_page);
1326  		kaddr = (u8*)page_address(ia_page);
1327  	}
1328  	/* Get the current index buffer. */
1329  	ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_CACHE_MASK &
1330  			~(s64)(ndir->itype.index.block_size - 1)));
1331  	/* Bounds checks. */
1332  	if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE)) {
1333  		ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
1334  				"inode 0x%lx or driver bug.", vdir->i_ino);
1335  		goto err_out;
1336  	}
1337  	/* Catch multi sector transfer fixup errors. */
1338  	if (unlikely(!ntfs_is_indx_record(ia->magic))) {
1339  		ntfs_error(sb, "Directory index record with vcn 0x%llx is "
1340  				"corrupt.  Corrupt inode 0x%lx.  Run chkdsk.",
1341  				(unsigned long long)ia_pos >>
1342  				ndir->itype.index.vcn_size_bits, vdir->i_ino);
1343  		goto err_out;
1344  	}
1345  	if (unlikely(sle64_to_cpu(ia->index_block_vcn) != (ia_pos &
1346  			~(s64)(ndir->itype.index.block_size - 1)) >>
1347  			ndir->itype.index.vcn_size_bits)) {
1348  		ntfs_error(sb, "Actual VCN (0x%llx) of index buffer is "
1349  				"different from expected VCN (0x%llx). "
1350  				"Directory inode 0x%lx is corrupt or driver "
1351  				"bug. ", (unsigned long long)
1352  				sle64_to_cpu(ia->index_block_vcn),
1353  				(unsigned long long)ia_pos >>
1354  				ndir->itype.index.vcn_size_bits, vdir->i_ino);
1355  		goto err_out;
1356  	}
1357  	if (unlikely(le32_to_cpu(ia->index.allocated_size) + 0x18 !=
1358  			ndir->itype.index.block_size)) {
1359  		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
1360  				"0x%lx has a size (%u) differing from the "
1361  				"directory specified size (%u). Directory "
1362  				"inode is corrupt or driver bug.",
1363  				(unsigned long long)ia_pos >>
1364  				ndir->itype.index.vcn_size_bits, vdir->i_ino,
1365  				le32_to_cpu(ia->index.allocated_size) + 0x18,
1366  				ndir->itype.index.block_size);
1367  		goto err_out;
1368  	}
1369  	index_end = (u8*)ia + ndir->itype.index.block_size;
1370  	if (unlikely(index_end > kaddr + PAGE_CACHE_SIZE)) {
1371  		ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
1372  				"0x%lx crosses page boundary. Impossible! "
1373  				"Cannot access! This is probably a bug in the "
1374  				"driver.", (unsigned long long)ia_pos >>
1375  				ndir->itype.index.vcn_size_bits, vdir->i_ino);
1376  		goto err_out;
1377  	}
1378  	ia_start = ia_pos & ~(s64)(ndir->itype.index.block_size - 1);
1379  	index_end = (u8*)&ia->index + le32_to_cpu(ia->index.index_length);
1380  	if (unlikely(index_end > (u8*)ia + ndir->itype.index.block_size)) {
1381  		ntfs_error(sb, "Size of index buffer (VCN 0x%llx) of directory "
1382  				"inode 0x%lx exceeds maximum size.",
1383  				(unsigned long long)ia_pos >>
1384  				ndir->itype.index.vcn_size_bits, vdir->i_ino);
1385  		goto err_out;
1386  	}
1387  	/* The first index entry in this index buffer. */
1388  	ie = (INDEX_ENTRY*)((u8*)&ia->index +
1389  			le32_to_cpu(ia->index.entries_offset));
1390  	/*
1391  	 * Loop until we exceed valid memory (corruption case) or until we
1392  	 * reach the last entry or until filldir tells us it has had enough
1393  	 * or signals an error (both covered by the rc test).
1394  	 */
1395  	for (;; ie = (INDEX_ENTRY*)((u8*)ie + le16_to_cpu(ie->length))) {
1396  		ntfs_debug("In index allocation, offset 0x%llx.",
1397  				(unsigned long long)ia_start +
1398  				(unsigned long long)((u8*)ie - (u8*)ia));
1399  		/* Bounds checks. */
1400  		if (unlikely((u8*)ie < (u8*)ia || (u8*)ie +
1401  				sizeof(INDEX_ENTRY_HEADER) > index_end ||
1402  				(u8*)ie + le16_to_cpu(ie->key_length) >
1403  				index_end))
1404  			goto err_out;
1405  		/* The last entry cannot contain a name. */
1406  		if (ie->flags & INDEX_ENTRY_END)
1407  			break;
1408  		/* Skip index block entry if continuing previous readdir. */
1409  		if (ia_pos - ia_start > (u8*)ie - (u8*)ia)
1410  			continue;
1411  		/* Advance the position even if going to skip the entry. */
1412  		fpos = (u8*)ie - (u8*)ia +
1413  				(sle64_to_cpu(ia->index_block_vcn) <<
1414  				ndir->itype.index.vcn_size_bits) +
1415  				vol->mft_record_size;
1416  		/*
1417  		 * Submit the name to the @filldir callback.  Note,
1418  		 * ntfs_filldir() drops the lock on @ia_page but it retakes it
1419  		 * before returning, unless a non-zero value is returned in
1420  		 * which case the page is left unlocked.
1421  		 */
1422  		rc = ntfs_filldir(vol, fpos, ndir, ia_page, ie, name, dirent,
1423  				filldir);
1424  		if (rc) {
1425  			/* @ia_page is already unlocked in this case. */
1426  			ntfs_unmap_page(ia_page);
1427  			ntfs_unmap_page(bmp_page);
1428  			iput(bmp_vi);
1429  			goto abort;
1430  		}
1431  	}
1432  	goto find_next_index_buffer;
1433  unm_EOD:
1434  	if (ia_page) {
1435  		unlock_page(ia_page);
1436  		ntfs_unmap_page(ia_page);
1437  	}
1438  	ntfs_unmap_page(bmp_page);
1439  	iput(bmp_vi);
1440  EOD:
1441  	/* We are finished, set fpos to EOD. */
1442  	fpos = i_size + vol->mft_record_size;
1443  abort:
1444  	kfree(name);
1445  done:
1446  #ifdef DEBUG
1447  	if (!rc)
1448  		ntfs_debug("EOD, fpos 0x%llx, returning 0.", fpos);
1449  	else
1450  		ntfs_debug("filldir returned %i, fpos 0x%llx, returning 0.",
1451  				rc, fpos);
1452  #endif
1453  	filp->f_pos = fpos;
1454  	return 0;
1455  err_out:
1456  	if (bmp_page) {
1457  		ntfs_unmap_page(bmp_page);
1458  iput_err_out:
1459  		iput(bmp_vi);
1460  	}
1461  	if (ia_page) {
1462  		unlock_page(ia_page);
1463  		ntfs_unmap_page(ia_page);
1464  	}
1465  	kfree(ir);
1466  	kfree(name);
1467  	if (ctx)
1468  		ntfs_attr_put_search_ctx(ctx);
1469  	if (m)
1470  		unmap_mft_record(ndir);
1471  	if (!err)
1472  		err = -EIO;
1473  	ntfs_debug("Failed. Returning error code %i.", -err);
1474  	filp->f_pos = fpos;
1475  	return err;
1476  }
1477  
1478  /**
1479   * ntfs_dir_open - called when an inode is about to be opened
1480   * @vi:		inode to be opened
1481   * @filp:	file structure describing the inode
1482   *
1483   * Limit directory size to the page cache limit on architectures where unsigned
1484   * long is 32-bits. This is the most we can do for now without overflowing the
1485   * page cache page index. Doing it this way means we don't run into problems
1486   * because of existing too large directories. It would be better to allow the
1487   * user to read the accessible part of the directory but I doubt very much
1488   * anyone is going to hit this check on a 32-bit architecture, so there is no
1489   * point in adding the extra complexity required to support this.
1490   *
1491   * On 64-bit architectures, the check is hopefully optimized away by the
1492   * compiler.
1493   */
ntfs_dir_open(struct inode * vi,struct file * filp)1494  static int ntfs_dir_open(struct inode *vi, struct file *filp)
1495  {
1496  	if (sizeof(unsigned long) < 8) {
1497  		if (i_size_read(vi) > MAX_LFS_FILESIZE)
1498  			return -EFBIG;
1499  	}
1500  	return 0;
1501  }
1502  
1503  #ifdef NTFS_RW
1504  
1505  /**
1506   * ntfs_dir_fsync - sync a directory to disk
1507   * @filp:	directory to be synced
1508   * @dentry:	dentry describing the directory to sync
1509   * @datasync:	if non-zero only flush user data and not metadata
1510   *
1511   * Data integrity sync of a directory to disk.  Used for fsync, fdatasync, and
1512   * msync system calls.  This function is based on file.c::ntfs_file_fsync().
1513   *
1514   * Write the mft record and all associated extent mft records as well as the
1515   * $INDEX_ALLOCATION and $BITMAP attributes and then sync the block device.
1516   *
1517   * If @datasync is true, we do not wait on the inode(s) to be written out
1518   * but we always wait on the page cache pages to be written out.
1519   *
1520   * Note: In the past @filp could be NULL so we ignore it as we don't need it
1521   * anyway.
1522   *
1523   * Locking: Caller must hold i_mutex on the inode.
1524   *
1525   * TODO: We should probably also write all attribute/index inodes associated
1526   * with this inode but since we have no simple way of getting to them we ignore
1527   * this problem for now.  We do write the $BITMAP attribute if it is present
1528   * which is the important one for a directory so things are not too bad.
1529   */
ntfs_dir_fsync(struct file * filp,loff_t start,loff_t end,int datasync)1530  static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
1531  			  int datasync)
1532  {
1533  	struct inode *bmp_vi, *vi = filp->f_mapping->host;
1534  	int err, ret;
1535  	ntfs_attr na;
1536  
1537  	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
1538  
1539  	err = filemap_write_and_wait_range(vi->i_mapping, start, end);
1540  	if (err)
1541  		return err;
1542  	mutex_lock(&vi->i_mutex);
1543  
1544  	BUG_ON(!S_ISDIR(vi->i_mode));
1545  	/* If the bitmap attribute inode is in memory sync it, too. */
1546  	na.mft_no = vi->i_ino;
1547  	na.type = AT_BITMAP;
1548  	na.name = I30;
1549  	na.name_len = 4;
1550  	bmp_vi = ilookup5(vi->i_sb, vi->i_ino, (test_t)ntfs_test_inode, &na);
1551  	if (bmp_vi) {
1552   		write_inode_now(bmp_vi, !datasync);
1553  		iput(bmp_vi);
1554  	}
1555  	ret = __ntfs_write_inode(vi, 1);
1556  	write_inode_now(vi, !datasync);
1557  	err = sync_blockdev(vi->i_sb->s_bdev);
1558  	if (unlikely(err && !ret))
1559  		ret = err;
1560  	if (likely(!ret))
1561  		ntfs_debug("Done.");
1562  	else
1563  		ntfs_warning(vi->i_sb, "Failed to f%ssync inode 0x%lx.  Error "
1564  				"%u.", datasync ? "data" : "", vi->i_ino, -ret);
1565  	mutex_unlock(&vi->i_mutex);
1566  	return ret;
1567  }
1568  
1569  #endif /* NTFS_RW */
1570  
1571  const struct file_operations ntfs_dir_ops = {
1572  	.llseek		= generic_file_llseek,	/* Seek inside directory. */
1573  	.read		= generic_read_dir,	/* Return -EISDIR. */
1574  	.readdir	= ntfs_readdir,		/* Read directory contents. */
1575  #ifdef NTFS_RW
1576  	.fsync		= ntfs_dir_fsync,	/* Sync a directory to disk. */
1577  	/*.aio_fsync	= ,*/			/* Sync all outstanding async
1578  						   i/o operations on a kiocb. */
1579  #endif /* NTFS_RW */
1580  	/*.ioctl	= ,*/			/* Perform function on the
1581  						   mounted filesystem. */
1582  	.open		= ntfs_dir_open,	/* Open directory. */
1583  };
1584