• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  linux/fs/ext4/super.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  from
10  *
11  *  linux/fs/minix/inode.c
12  *
13  *  Copyright (C) 1991, 1992  Linus Torvalds
14  *
15  *  Big-endian to little-endian byte-swapping/bitmaps by
16  *        David S. Miller (davem@caip.rutgers.edu), 1995
17  */
18 
19 #include <linux/module.h>
20 #include <linux/string.h>
21 #include <linux/fs.h>
22 #include <linux/time.h>
23 #include <linux/jbd2.h>
24 #include <linux/slab.h>
25 #include <linux/init.h>
26 #include <linux/blkdev.h>
27 #include <linux/parser.h>
28 #include <linux/smp_lock.h>
29 #include <linux/buffer_head.h>
30 #include <linux/exportfs.h>
31 #include <linux/vfs.h>
32 #include <linux/random.h>
33 #include <linux/mount.h>
34 #include <linux/namei.h>
35 #include <linux/quotaops.h>
36 #include <linux/seq_file.h>
37 #include <linux/proc_fs.h>
38 #include <linux/marker.h>
39 #include <linux/log2.h>
40 #include <linux/crc16.h>
41 #include <asm/uaccess.h>
42 
43 #include "ext4.h"
44 #include "ext4_jbd2.h"
45 #include "xattr.h"
46 #include "acl.h"
47 #include "namei.h"
48 #include "group.h"
49 
50 struct proc_dir_entry *ext4_proc_root;
51 
52 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
53 			     unsigned long journal_devnum);
54 static int ext4_commit_super(struct super_block *sb,
55 			      struct ext4_super_block *es, int sync);
56 static void ext4_mark_recovery_complete(struct super_block *sb,
57 					struct ext4_super_block *es);
58 static void ext4_clear_journal_err(struct super_block *sb,
59 				   struct ext4_super_block *es);
60 static int ext4_sync_fs(struct super_block *sb, int wait);
61 static const char *ext4_decode_error(struct super_block *sb, int errno,
62 				     char nbuf[16]);
63 static int ext4_remount(struct super_block *sb, int *flags, char *data);
64 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
65 static int ext4_unfreeze(struct super_block *sb);
66 static void ext4_write_super(struct super_block *sb);
67 static int ext4_freeze(struct super_block *sb);
68 
69 
ext4_block_bitmap(struct super_block * sb,struct ext4_group_desc * bg)70 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
71 			       struct ext4_group_desc *bg)
72 {
73 	return le32_to_cpu(bg->bg_block_bitmap_lo) |
74 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
75 		(ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
76 }
77 
ext4_inode_bitmap(struct super_block * sb,struct ext4_group_desc * bg)78 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
79 			       struct ext4_group_desc *bg)
80 {
81 	return le32_to_cpu(bg->bg_inode_bitmap_lo) |
82 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
83 		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
84 }
85 
ext4_inode_table(struct super_block * sb,struct ext4_group_desc * bg)86 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
87 			      struct ext4_group_desc *bg)
88 {
89 	return le32_to_cpu(bg->bg_inode_table_lo) |
90 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
91 		(ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
92 }
93 
ext4_free_blks_count(struct super_block * sb,struct ext4_group_desc * bg)94 __u32 ext4_free_blks_count(struct super_block *sb,
95 			      struct ext4_group_desc *bg)
96 {
97 	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
98 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
99 		(__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
100 }
101 
ext4_free_inodes_count(struct super_block * sb,struct ext4_group_desc * bg)102 __u32 ext4_free_inodes_count(struct super_block *sb,
103 			      struct ext4_group_desc *bg)
104 {
105 	return le16_to_cpu(bg->bg_free_inodes_count_lo) |
106 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
107 		(__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
108 }
109 
ext4_used_dirs_count(struct super_block * sb,struct ext4_group_desc * bg)110 __u32 ext4_used_dirs_count(struct super_block *sb,
111 			      struct ext4_group_desc *bg)
112 {
113 	return le16_to_cpu(bg->bg_used_dirs_count_lo) |
114 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
115 		(__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
116 }
117 
ext4_itable_unused_count(struct super_block * sb,struct ext4_group_desc * bg)118 __u32 ext4_itable_unused_count(struct super_block *sb,
119 			      struct ext4_group_desc *bg)
120 {
121 	return le16_to_cpu(bg->bg_itable_unused_lo) |
122 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
123 		(__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
124 }
125 
ext4_block_bitmap_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)126 void ext4_block_bitmap_set(struct super_block *sb,
127 			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
128 {
129 	bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
130 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
131 		bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
132 }
133 
ext4_inode_bitmap_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)134 void ext4_inode_bitmap_set(struct super_block *sb,
135 			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
136 {
137 	bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
138 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
139 		bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
140 }
141 
ext4_inode_table_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)142 void ext4_inode_table_set(struct super_block *sb,
143 			  struct ext4_group_desc *bg, ext4_fsblk_t blk)
144 {
145 	bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
146 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
147 		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
148 }
149 
ext4_free_blks_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)150 void ext4_free_blks_set(struct super_block *sb,
151 			  struct ext4_group_desc *bg, __u32 count)
152 {
153 	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
154 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
155 		bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
156 }
157 
ext4_free_inodes_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)158 void ext4_free_inodes_set(struct super_block *sb,
159 			  struct ext4_group_desc *bg, __u32 count)
160 {
161 	bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
162 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
163 		bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
164 }
165 
ext4_used_dirs_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)166 void ext4_used_dirs_set(struct super_block *sb,
167 			  struct ext4_group_desc *bg, __u32 count)
168 {
169 	bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
170 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
171 		bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
172 }
173 
ext4_itable_unused_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)174 void ext4_itable_unused_set(struct super_block *sb,
175 			  struct ext4_group_desc *bg, __u32 count)
176 {
177 	bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
178 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
179 		bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
180 }
181 
182 /*
183  * Wrappers for jbd2_journal_start/end.
184  *
185  * The only special thing we need to do here is to make sure that all
186  * journal_end calls result in the superblock being marked dirty, so
187  * that sync() will call the filesystem's write_super callback if
188  * appropriate.
189  */
ext4_journal_start_sb(struct super_block * sb,int nblocks)190 handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
191 {
192 	journal_t *journal;
193 
194 	if (sb->s_flags & MS_RDONLY)
195 		return ERR_PTR(-EROFS);
196 
197 	/* Special case here: if the journal has aborted behind our
198 	 * backs (eg. EIO in the commit thread), then we still need to
199 	 * take the FS itself readonly cleanly. */
200 	journal = EXT4_SB(sb)->s_journal;
201 	if (journal) {
202 		if (is_journal_aborted(journal)) {
203 			ext4_abort(sb, __func__,
204 				   "Detected aborted journal");
205 			return ERR_PTR(-EROFS);
206 		}
207 		return jbd2_journal_start(journal, nblocks);
208 	}
209 	/*
210 	 * We're not journaling, return the appropriate indication.
211 	 */
212 	current->journal_info = EXT4_NOJOURNAL_HANDLE;
213 	return current->journal_info;
214 }
215 
216 /*
217  * The only special thing we need to do here is to make sure that all
218  * jbd2_journal_stop calls result in the superblock being marked dirty, so
219  * that sync() will call the filesystem's write_super callback if
220  * appropriate.
221  */
__ext4_journal_stop(const char * where,handle_t * handle)222 int __ext4_journal_stop(const char *where, handle_t *handle)
223 {
224 	struct super_block *sb;
225 	int err;
226 	int rc;
227 
228 	if (!ext4_handle_valid(handle)) {
229 		/*
230 		 * Do this here since we don't call jbd2_journal_stop() in
231 		 * no-journal mode.
232 		 */
233 		current->journal_info = NULL;
234 		return 0;
235 	}
236 	sb = handle->h_transaction->t_journal->j_private;
237 	err = handle->h_err;
238 	rc = jbd2_journal_stop(handle);
239 
240 	if (!err)
241 		err = rc;
242 	if (err)
243 		__ext4_std_error(sb, where, err);
244 	return err;
245 }
246 
ext4_journal_abort_handle(const char * caller,const char * err_fn,struct buffer_head * bh,handle_t * handle,int err)247 void ext4_journal_abort_handle(const char *caller, const char *err_fn,
248 		struct buffer_head *bh, handle_t *handle, int err)
249 {
250 	char nbuf[16];
251 	const char *errstr = ext4_decode_error(NULL, err, nbuf);
252 
253 	BUG_ON(!ext4_handle_valid(handle));
254 
255 	if (bh)
256 		BUFFER_TRACE(bh, "abort");
257 
258 	if (!handle->h_err)
259 		handle->h_err = err;
260 
261 	if (is_handle_aborted(handle))
262 		return;
263 
264 	printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
265 	       caller, errstr, err_fn);
266 
267 	jbd2_journal_abort_handle(handle);
268 }
269 
270 /* Deal with the reporting of failure conditions on a filesystem such as
271  * inconsistencies detected or read IO failures.
272  *
273  * On ext2, we can store the error state of the filesystem in the
274  * superblock.  That is not possible on ext4, because we may have other
275  * write ordering constraints on the superblock which prevent us from
276  * writing it out straight away; and given that the journal is about to
277  * be aborted, we can't rely on the current, or future, transactions to
278  * write out the superblock safely.
279  *
280  * We'll just use the jbd2_journal_abort() error code to record an error in
281  * the journal instead.  On recovery, the journal will compain about
282  * that error until we've noted it down and cleared it.
283  */
284 
ext4_handle_error(struct super_block * sb)285 static void ext4_handle_error(struct super_block *sb)
286 {
287 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
288 
289 	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
290 	es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
291 
292 	if (sb->s_flags & MS_RDONLY)
293 		return;
294 
295 	if (!test_opt(sb, ERRORS_CONT)) {
296 		journal_t *journal = EXT4_SB(sb)->s_journal;
297 
298 		EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
299 		if (journal)
300 			jbd2_journal_abort(journal, -EIO);
301 	}
302 	if (test_opt(sb, ERRORS_RO)) {
303 		printk(KERN_CRIT "Remounting filesystem read-only\n");
304 		sb->s_flags |= MS_RDONLY;
305 	}
306 	ext4_commit_super(sb, es, 1);
307 	if (test_opt(sb, ERRORS_PANIC))
308 		panic("EXT4-fs (device %s): panic forced after error\n",
309 			sb->s_id);
310 }
311 
ext4_error(struct super_block * sb,const char * function,const char * fmt,...)312 void ext4_error(struct super_block *sb, const char *function,
313 		const char *fmt, ...)
314 {
315 	va_list args;
316 
317 	va_start(args, fmt);
318 	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
319 	vprintk(fmt, args);
320 	printk("\n");
321 	va_end(args);
322 
323 	ext4_handle_error(sb);
324 }
325 
ext4_decode_error(struct super_block * sb,int errno,char nbuf[16])326 static const char *ext4_decode_error(struct super_block *sb, int errno,
327 				     char nbuf[16])
328 {
329 	char *errstr = NULL;
330 
331 	switch (errno) {
332 	case -EIO:
333 		errstr = "IO failure";
334 		break;
335 	case -ENOMEM:
336 		errstr = "Out of memory";
337 		break;
338 	case -EROFS:
339 		if (!sb || EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT)
340 			errstr = "Journal has aborted";
341 		else
342 			errstr = "Readonly filesystem";
343 		break;
344 	default:
345 		/* If the caller passed in an extra buffer for unknown
346 		 * errors, textualise them now.  Else we just return
347 		 * NULL. */
348 		if (nbuf) {
349 			/* Check for truncated error codes... */
350 			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
351 				errstr = nbuf;
352 		}
353 		break;
354 	}
355 
356 	return errstr;
357 }
358 
359 /* __ext4_std_error decodes expected errors from journaling functions
360  * automatically and invokes the appropriate error response.  */
361 
__ext4_std_error(struct super_block * sb,const char * function,int errno)362 void __ext4_std_error(struct super_block *sb, const char *function, int errno)
363 {
364 	char nbuf[16];
365 	const char *errstr;
366 
367 	/* Special case: if the error is EROFS, and we're not already
368 	 * inside a transaction, then there's really no point in logging
369 	 * an error. */
370 	if (errno == -EROFS && journal_current_handle() == NULL &&
371 	    (sb->s_flags & MS_RDONLY))
372 		return;
373 
374 	errstr = ext4_decode_error(sb, errno, nbuf);
375 	printk(KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
376 	       sb->s_id, function, errstr);
377 
378 	ext4_handle_error(sb);
379 }
380 
381 /*
382  * ext4_abort is a much stronger failure handler than ext4_error.  The
383  * abort function may be used to deal with unrecoverable failures such
384  * as journal IO errors or ENOMEM at a critical moment in log management.
385  *
386  * We unconditionally force the filesystem into an ABORT|READONLY state,
387  * unless the error response on the fs has been set to panic in which
388  * case we take the easy way out and panic immediately.
389  */
390 
ext4_abort(struct super_block * sb,const char * function,const char * fmt,...)391 void ext4_abort(struct super_block *sb, const char *function,
392 		const char *fmt, ...)
393 {
394 	va_list args;
395 
396 	printk(KERN_CRIT "ext4_abort called.\n");
397 
398 	va_start(args, fmt);
399 	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
400 	vprintk(fmt, args);
401 	printk("\n");
402 	va_end(args);
403 
404 	if (test_opt(sb, ERRORS_PANIC))
405 		panic("EXT4-fs panic from previous error\n");
406 
407 	if (sb->s_flags & MS_RDONLY)
408 		return;
409 
410 	printk(KERN_CRIT "Remounting filesystem read-only\n");
411 	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
412 	sb->s_flags |= MS_RDONLY;
413 	EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
414 	if (EXT4_SB(sb)->s_journal)
415 		jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO);
416 }
417 
ext4_warning(struct super_block * sb,const char * function,const char * fmt,...)418 void ext4_warning(struct super_block *sb, const char *function,
419 		  const char *fmt, ...)
420 {
421 	va_list args;
422 
423 	va_start(args, fmt);
424 	printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
425 	       sb->s_id, function);
426 	vprintk(fmt, args);
427 	printk("\n");
428 	va_end(args);
429 }
430 
ext4_grp_locked_error(struct super_block * sb,ext4_group_t grp,const char * function,const char * fmt,...)431 void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
432 				const char *function, const char *fmt, ...)
433 __releases(bitlock)
434 __acquires(bitlock)
435 {
436 	va_list args;
437 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
438 
439 	va_start(args, fmt);
440 	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ", sb->s_id, function);
441 	vprintk(fmt, args);
442 	printk("\n");
443 	va_end(args);
444 
445 	if (test_opt(sb, ERRORS_CONT)) {
446 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
447 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
448 		ext4_commit_super(sb, es, 0);
449 		return;
450 	}
451 	ext4_unlock_group(sb, grp);
452 	ext4_handle_error(sb);
453 	/*
454 	 * We only get here in the ERRORS_RO case; relocking the group
455 	 * may be dangerous, but nothing bad will happen since the
456 	 * filesystem will have already been marked read/only and the
457 	 * journal has been aborted.  We return 1 as a hint to callers
458 	 * who might what to use the return value from
459 	 * ext4_grp_locked_error() to distinguish beween the
460 	 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
461 	 * aggressively from the ext4 function in question, with a
462 	 * more appropriate error code.
463 	 */
464 	ext4_lock_group(sb, grp);
465 	return;
466 }
467 
468 
ext4_update_dynamic_rev(struct super_block * sb)469 void ext4_update_dynamic_rev(struct super_block *sb)
470 {
471 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
472 
473 	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
474 		return;
475 
476 	ext4_warning(sb, __func__,
477 		     "updating to rev %d because of new feature flag, "
478 		     "running e2fsck is recommended",
479 		     EXT4_DYNAMIC_REV);
480 
481 	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
482 	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
483 	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
484 	/* leave es->s_feature_*compat flags alone */
485 	/* es->s_uuid will be set by e2fsck if empty */
486 
487 	/*
488 	 * The rest of the superblock fields should be zero, and if not it
489 	 * means they are likely already in use, so leave them alone.  We
490 	 * can leave it up to e2fsck to clean up any inconsistencies there.
491 	 */
492 }
493 
494 /*
495  * Open the external journal device
496  */
ext4_blkdev_get(dev_t dev)497 static struct block_device *ext4_blkdev_get(dev_t dev)
498 {
499 	struct block_device *bdev;
500 	char b[BDEVNAME_SIZE];
501 
502 	bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
503 	if (IS_ERR(bdev))
504 		goto fail;
505 	return bdev;
506 
507 fail:
508 	printk(KERN_ERR "EXT4-fs: failed to open journal device %s: %ld\n",
509 			__bdevname(dev, b), PTR_ERR(bdev));
510 	return NULL;
511 }
512 
513 /*
514  * Release the journal device
515  */
ext4_blkdev_put(struct block_device * bdev)516 static int ext4_blkdev_put(struct block_device *bdev)
517 {
518 	bd_release(bdev);
519 	return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
520 }
521 
ext4_blkdev_remove(struct ext4_sb_info * sbi)522 static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
523 {
524 	struct block_device *bdev;
525 	int ret = -ENODEV;
526 
527 	bdev = sbi->journal_bdev;
528 	if (bdev) {
529 		ret = ext4_blkdev_put(bdev);
530 		sbi->journal_bdev = NULL;
531 	}
532 	return ret;
533 }
534 
orphan_list_entry(struct list_head * l)535 static inline struct inode *orphan_list_entry(struct list_head *l)
536 {
537 	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
538 }
539 
dump_orphan_list(struct super_block * sb,struct ext4_sb_info * sbi)540 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
541 {
542 	struct list_head *l;
543 
544 	printk(KERN_ERR "sb orphan head is %d\n",
545 	       le32_to_cpu(sbi->s_es->s_last_orphan));
546 
547 	printk(KERN_ERR "sb_info orphan list:\n");
548 	list_for_each(l, &sbi->s_orphan) {
549 		struct inode *inode = orphan_list_entry(l);
550 		printk(KERN_ERR "  "
551 		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
552 		       inode->i_sb->s_id, inode->i_ino, inode,
553 		       inode->i_mode, inode->i_nlink,
554 		       NEXT_ORPHAN(inode));
555 	}
556 }
557 
ext4_put_super(struct super_block * sb)558 static void ext4_put_super(struct super_block *sb)
559 {
560 	struct ext4_sb_info *sbi = EXT4_SB(sb);
561 	struct ext4_super_block *es = sbi->s_es;
562 	int i, err;
563 
564 	ext4_mb_release(sb);
565 	ext4_ext_release(sb);
566 	ext4_xattr_put_super(sb);
567 	if (sbi->s_journal) {
568 		err = jbd2_journal_destroy(sbi->s_journal);
569 		sbi->s_journal = NULL;
570 		if (err < 0)
571 			ext4_abort(sb, __func__,
572 				   "Couldn't clean up the journal");
573 	}
574 	if (!(sb->s_flags & MS_RDONLY)) {
575 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
576 		es->s_state = cpu_to_le16(sbi->s_mount_state);
577 		ext4_commit_super(sb, es, 1);
578 	}
579 	if (sbi->s_proc) {
580 		remove_proc_entry("inode_readahead_blks", sbi->s_proc);
581 		remove_proc_entry(sb->s_id, ext4_proc_root);
582 	}
583 
584 	for (i = 0; i < sbi->s_gdb_count; i++)
585 		brelse(sbi->s_group_desc[i]);
586 	kfree(sbi->s_group_desc);
587 	kfree(sbi->s_flex_groups);
588 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
589 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
590 	percpu_counter_destroy(&sbi->s_dirs_counter);
591 	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
592 	brelse(sbi->s_sbh);
593 #ifdef CONFIG_QUOTA
594 	for (i = 0; i < MAXQUOTAS; i++)
595 		kfree(sbi->s_qf_names[i]);
596 #endif
597 
598 	/* Debugging code just in case the in-memory inode orphan list
599 	 * isn't empty.  The on-disk one can be non-empty if we've
600 	 * detected an error and taken the fs readonly, but the
601 	 * in-memory list had better be clean by this point. */
602 	if (!list_empty(&sbi->s_orphan))
603 		dump_orphan_list(sb, sbi);
604 	J_ASSERT(list_empty(&sbi->s_orphan));
605 
606 	invalidate_bdev(sb->s_bdev);
607 	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
608 		/*
609 		 * Invalidate the journal device's buffers.  We don't want them
610 		 * floating about in memory - the physical journal device may
611 		 * hotswapped, and it breaks the `ro-after' testing code.
612 		 */
613 		sync_blockdev(sbi->journal_bdev);
614 		invalidate_bdev(sbi->journal_bdev);
615 		ext4_blkdev_remove(sbi);
616 	}
617 	sb->s_fs_info = NULL;
618 	kfree(sbi);
619 	return;
620 }
621 
622 static struct kmem_cache *ext4_inode_cachep;
623 
624 /*
625  * Called inside transaction, so use GFP_NOFS
626  */
ext4_alloc_inode(struct super_block * sb)627 static struct inode *ext4_alloc_inode(struct super_block *sb)
628 {
629 	struct ext4_inode_info *ei;
630 
631 	ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS);
632 	if (!ei)
633 		return NULL;
634 #ifdef CONFIG_EXT4_FS_POSIX_ACL
635 	ei->i_acl = EXT4_ACL_NOT_CACHED;
636 	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
637 #endif
638 	ei->vfs_inode.i_version = 1;
639 	ei->vfs_inode.i_data.writeback_index = 0;
640 	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
641 	INIT_LIST_HEAD(&ei->i_prealloc_list);
642 	spin_lock_init(&ei->i_prealloc_lock);
643 	/*
644 	 * Note:  We can be called before EXT4_SB(sb)->s_journal is set,
645 	 * therefore it can be null here.  Don't check it, just initialize
646 	 * jinode.
647 	 */
648 	jbd2_journal_init_jbd_inode(&ei->jinode, &ei->vfs_inode);
649 	ei->i_reserved_data_blocks = 0;
650 	ei->i_reserved_meta_blocks = 0;
651 	ei->i_allocated_meta_blocks = 0;
652 	ei->i_delalloc_reserved_flag = 0;
653 	spin_lock_init(&(ei->i_block_reservation_lock));
654 	return &ei->vfs_inode;
655 }
656 
ext4_destroy_inode(struct inode * inode)657 static void ext4_destroy_inode(struct inode *inode)
658 {
659 	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
660 		printk("EXT4 Inode %p: orphan list check failed!\n",
661 			EXT4_I(inode));
662 		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
663 				EXT4_I(inode), sizeof(struct ext4_inode_info),
664 				true);
665 		dump_stack();
666 	}
667 	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
668 }
669 
init_once(void * foo)670 static void init_once(void *foo)
671 {
672 	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
673 
674 	INIT_LIST_HEAD(&ei->i_orphan);
675 #ifdef CONFIG_EXT4_FS_XATTR
676 	init_rwsem(&ei->xattr_sem);
677 #endif
678 	init_rwsem(&ei->i_data_sem);
679 	inode_init_once(&ei->vfs_inode);
680 }
681 
init_inodecache(void)682 static int init_inodecache(void)
683 {
684 	ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
685 					     sizeof(struct ext4_inode_info),
686 					     0, (SLAB_RECLAIM_ACCOUNT|
687 						SLAB_MEM_SPREAD),
688 					     init_once);
689 	if (ext4_inode_cachep == NULL)
690 		return -ENOMEM;
691 	return 0;
692 }
693 
destroy_inodecache(void)694 static void destroy_inodecache(void)
695 {
696 	kmem_cache_destroy(ext4_inode_cachep);
697 }
698 
ext4_clear_inode(struct inode * inode)699 static void ext4_clear_inode(struct inode *inode)
700 {
701 #ifdef CONFIG_EXT4_FS_POSIX_ACL
702 	if (EXT4_I(inode)->i_acl &&
703 			EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
704 		posix_acl_release(EXT4_I(inode)->i_acl);
705 		EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
706 	}
707 	if (EXT4_I(inode)->i_default_acl &&
708 			EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
709 		posix_acl_release(EXT4_I(inode)->i_default_acl);
710 		EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
711 	}
712 #endif
713 	ext4_discard_preallocations(inode);
714 	if (EXT4_JOURNAL(inode))
715 		jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal,
716 				       &EXT4_I(inode)->jinode);
717 }
718 
ext4_show_quota_options(struct seq_file * seq,struct super_block * sb)719 static inline void ext4_show_quota_options(struct seq_file *seq,
720 					   struct super_block *sb)
721 {
722 #if defined(CONFIG_QUOTA)
723 	struct ext4_sb_info *sbi = EXT4_SB(sb);
724 
725 	if (sbi->s_jquota_fmt)
726 		seq_printf(seq, ",jqfmt=%s",
727 		(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0");
728 
729 	if (sbi->s_qf_names[USRQUOTA])
730 		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
731 
732 	if (sbi->s_qf_names[GRPQUOTA])
733 		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
734 
735 	if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
736 		seq_puts(seq, ",usrquota");
737 
738 	if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
739 		seq_puts(seq, ",grpquota");
740 #endif
741 }
742 
743 /*
744  * Show an option if
745  *  - it's set to a non-default value OR
746  *  - if the per-sb default is different from the global default
747  */
ext4_show_options(struct seq_file * seq,struct vfsmount * vfs)748 static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
749 {
750 	int def_errors;
751 	unsigned long def_mount_opts;
752 	struct super_block *sb = vfs->mnt_sb;
753 	struct ext4_sb_info *sbi = EXT4_SB(sb);
754 	struct ext4_super_block *es = sbi->s_es;
755 
756 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
757 	def_errors     = le16_to_cpu(es->s_errors);
758 
759 	if (sbi->s_sb_block != 1)
760 		seq_printf(seq, ",sb=%llu", sbi->s_sb_block);
761 	if (test_opt(sb, MINIX_DF))
762 		seq_puts(seq, ",minixdf");
763 	if (test_opt(sb, GRPID) && !(def_mount_opts & EXT4_DEFM_BSDGROUPS))
764 		seq_puts(seq, ",grpid");
765 	if (!test_opt(sb, GRPID) && (def_mount_opts & EXT4_DEFM_BSDGROUPS))
766 		seq_puts(seq, ",nogrpid");
767 	if (sbi->s_resuid != EXT4_DEF_RESUID ||
768 	    le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID) {
769 		seq_printf(seq, ",resuid=%u", sbi->s_resuid);
770 	}
771 	if (sbi->s_resgid != EXT4_DEF_RESGID ||
772 	    le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID) {
773 		seq_printf(seq, ",resgid=%u", sbi->s_resgid);
774 	}
775 	if (test_opt(sb, ERRORS_RO)) {
776 		if (def_errors == EXT4_ERRORS_PANIC ||
777 		    def_errors == EXT4_ERRORS_CONTINUE) {
778 			seq_puts(seq, ",errors=remount-ro");
779 		}
780 	}
781 	if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
782 		seq_puts(seq, ",errors=continue");
783 	if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
784 		seq_puts(seq, ",errors=panic");
785 	if (test_opt(sb, NO_UID32) && !(def_mount_opts & EXT4_DEFM_UID16))
786 		seq_puts(seq, ",nouid32");
787 	if (test_opt(sb, DEBUG) && !(def_mount_opts & EXT4_DEFM_DEBUG))
788 		seq_puts(seq, ",debug");
789 	if (test_opt(sb, OLDALLOC))
790 		seq_puts(seq, ",oldalloc");
791 #ifdef CONFIG_EXT4_FS_XATTR
792 	if (test_opt(sb, XATTR_USER) &&
793 		!(def_mount_opts & EXT4_DEFM_XATTR_USER))
794 		seq_puts(seq, ",user_xattr");
795 	if (!test_opt(sb, XATTR_USER) &&
796 	    (def_mount_opts & EXT4_DEFM_XATTR_USER)) {
797 		seq_puts(seq, ",nouser_xattr");
798 	}
799 #endif
800 #ifdef CONFIG_EXT4_FS_POSIX_ACL
801 	if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL))
802 		seq_puts(seq, ",acl");
803 	if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL))
804 		seq_puts(seq, ",noacl");
805 #endif
806 	if (!test_opt(sb, RESERVATION))
807 		seq_puts(seq, ",noreservation");
808 	if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
809 		seq_printf(seq, ",commit=%u",
810 			   (unsigned) (sbi->s_commit_interval / HZ));
811 	}
812 	if (sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME) {
813 		seq_printf(seq, ",min_batch_time=%u",
814 			   (unsigned) sbi->s_min_batch_time);
815 	}
816 	if (sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) {
817 		seq_printf(seq, ",max_batch_time=%u",
818 			   (unsigned) sbi->s_min_batch_time);
819 	}
820 
821 	/*
822 	 * We're changing the default of barrier mount option, so
823 	 * let's always display its mount state so it's clear what its
824 	 * status is.
825 	 */
826 	seq_puts(seq, ",barrier=");
827 	seq_puts(seq, test_opt(sb, BARRIER) ? "1" : "0");
828 	if (test_opt(sb, JOURNAL_ASYNC_COMMIT))
829 		seq_puts(seq, ",journal_async_commit");
830 	if (test_opt(sb, NOBH))
831 		seq_puts(seq, ",nobh");
832 	if (test_opt(sb, I_VERSION))
833 		seq_puts(seq, ",i_version");
834 	if (!test_opt(sb, DELALLOC))
835 		seq_puts(seq, ",nodelalloc");
836 
837 
838 	if (sbi->s_stripe)
839 		seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
840 	/*
841 	 * journal mode get enabled in different ways
842 	 * So just print the value even if we didn't specify it
843 	 */
844 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
845 		seq_puts(seq, ",data=journal");
846 	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
847 		seq_puts(seq, ",data=ordered");
848 	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
849 		seq_puts(seq, ",data=writeback");
850 
851 	if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
852 		seq_printf(seq, ",inode_readahead_blks=%u",
853 			   sbi->s_inode_readahead_blks);
854 
855 	if (test_opt(sb, DATA_ERR_ABORT))
856 		seq_puts(seq, ",data_err=abort");
857 
858 	ext4_show_quota_options(seq, sb);
859 	return 0;
860 }
861 
862 
ext4_nfs_get_inode(struct super_block * sb,u64 ino,u32 generation)863 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
864 		u64 ino, u32 generation)
865 {
866 	struct inode *inode;
867 
868 	if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
869 		return ERR_PTR(-ESTALE);
870 	if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
871 		return ERR_PTR(-ESTALE);
872 
873 	/* iget isn't really right if the inode is currently unallocated!!
874 	 *
875 	 * ext4_read_inode will return a bad_inode if the inode had been
876 	 * deleted, so we should be safe.
877 	 *
878 	 * Currently we don't know the generation for parent directory, so
879 	 * a generation of 0 means "accept any"
880 	 */
881 	inode = ext4_iget(sb, ino);
882 	if (IS_ERR(inode))
883 		return ERR_CAST(inode);
884 	if (generation && inode->i_generation != generation) {
885 		iput(inode);
886 		return ERR_PTR(-ESTALE);
887 	}
888 
889 	return inode;
890 }
891 
ext4_fh_to_dentry(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)892 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
893 		int fh_len, int fh_type)
894 {
895 	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
896 				    ext4_nfs_get_inode);
897 }
898 
ext4_fh_to_parent(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)899 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
900 		int fh_len, int fh_type)
901 {
902 	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
903 				    ext4_nfs_get_inode);
904 }
905 
906 /*
907  * Try to release metadata pages (indirect blocks, directories) which are
908  * mapped via the block device.  Since these pages could have journal heads
909  * which would prevent try_to_free_buffers() from freeing them, we must use
910  * jbd2 layer's try_to_free_buffers() function to release them.
911  */
bdev_try_to_free_page(struct super_block * sb,struct page * page,gfp_t wait)912 static int bdev_try_to_free_page(struct super_block *sb, struct page *page, gfp_t wait)
913 {
914 	journal_t *journal = EXT4_SB(sb)->s_journal;
915 
916 	WARN_ON(PageChecked(page));
917 	if (!page_has_buffers(page))
918 		return 0;
919 	if (journal)
920 		return jbd2_journal_try_to_free_buffers(journal, page,
921 							wait & ~__GFP_WAIT);
922 	return try_to_free_buffers(page);
923 }
924 
925 #ifdef CONFIG_QUOTA
926 #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group")
927 #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
928 
929 static int ext4_dquot_initialize(struct inode *inode, int type);
930 static int ext4_dquot_drop(struct inode *inode);
931 static int ext4_write_dquot(struct dquot *dquot);
932 static int ext4_acquire_dquot(struct dquot *dquot);
933 static int ext4_release_dquot(struct dquot *dquot);
934 static int ext4_mark_dquot_dirty(struct dquot *dquot);
935 static int ext4_write_info(struct super_block *sb, int type);
936 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
937 				char *path, int remount);
938 static int ext4_quota_on_mount(struct super_block *sb, int type);
939 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
940 			       size_t len, loff_t off);
941 static ssize_t ext4_quota_write(struct super_block *sb, int type,
942 				const char *data, size_t len, loff_t off);
943 
944 static struct dquot_operations ext4_quota_operations = {
945 	.initialize	= ext4_dquot_initialize,
946 	.drop		= ext4_dquot_drop,
947 	.alloc_space	= dquot_alloc_space,
948 	.alloc_inode	= dquot_alloc_inode,
949 	.free_space	= dquot_free_space,
950 	.free_inode	= dquot_free_inode,
951 	.transfer	= dquot_transfer,
952 	.write_dquot	= ext4_write_dquot,
953 	.acquire_dquot	= ext4_acquire_dquot,
954 	.release_dquot	= ext4_release_dquot,
955 	.mark_dirty	= ext4_mark_dquot_dirty,
956 	.write_info	= ext4_write_info,
957 	.alloc_dquot	= dquot_alloc,
958 	.destroy_dquot	= dquot_destroy,
959 };
960 
961 static struct quotactl_ops ext4_qctl_operations = {
962 	.quota_on	= ext4_quota_on,
963 	.quota_off	= vfs_quota_off,
964 	.quota_sync	= vfs_quota_sync,
965 	.get_info	= vfs_get_dqinfo,
966 	.set_info	= vfs_set_dqinfo,
967 	.get_dqblk	= vfs_get_dqblk,
968 	.set_dqblk	= vfs_set_dqblk
969 };
970 #endif
971 
972 static const struct super_operations ext4_sops = {
973 	.alloc_inode	= ext4_alloc_inode,
974 	.destroy_inode	= ext4_destroy_inode,
975 	.write_inode	= ext4_write_inode,
976 	.dirty_inode	= ext4_dirty_inode,
977 	.delete_inode	= ext4_delete_inode,
978 	.put_super	= ext4_put_super,
979 	.write_super	= ext4_write_super,
980 	.sync_fs	= ext4_sync_fs,
981 	.freeze_fs	= ext4_freeze,
982 	.unfreeze_fs	= ext4_unfreeze,
983 	.statfs		= ext4_statfs,
984 	.remount_fs	= ext4_remount,
985 	.clear_inode	= ext4_clear_inode,
986 	.show_options	= ext4_show_options,
987 #ifdef CONFIG_QUOTA
988 	.quota_read	= ext4_quota_read,
989 	.quota_write	= ext4_quota_write,
990 #endif
991 	.bdev_try_to_free_page = bdev_try_to_free_page,
992 };
993 
994 static const struct export_operations ext4_export_ops = {
995 	.fh_to_dentry = ext4_fh_to_dentry,
996 	.fh_to_parent = ext4_fh_to_parent,
997 	.get_parent = ext4_get_parent,
998 };
999 
1000 enum {
1001 	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1002 	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
1003 	Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov,
1004 	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
1005 	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
1006 	Opt_commit, Opt_min_batch_time, Opt_max_batch_time,
1007 	Opt_journal_update, Opt_journal_dev,
1008 	Opt_journal_checksum, Opt_journal_async_commit,
1009 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1010 	Opt_data_err_abort, Opt_data_err_ignore,
1011 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1012 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
1013 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
1014 	Opt_grpquota, Opt_i_version,
1015 	Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1016 	Opt_inode_readahead_blks, Opt_journal_ioprio
1017 };
1018 
1019 static const match_table_t tokens = {
1020 	{Opt_bsd_df, "bsddf"},
1021 	{Opt_minix_df, "minixdf"},
1022 	{Opt_grpid, "grpid"},
1023 	{Opt_grpid, "bsdgroups"},
1024 	{Opt_nogrpid, "nogrpid"},
1025 	{Opt_nogrpid, "sysvgroups"},
1026 	{Opt_resgid, "resgid=%u"},
1027 	{Opt_resuid, "resuid=%u"},
1028 	{Opt_sb, "sb=%u"},
1029 	{Opt_err_cont, "errors=continue"},
1030 	{Opt_err_panic, "errors=panic"},
1031 	{Opt_err_ro, "errors=remount-ro"},
1032 	{Opt_nouid32, "nouid32"},
1033 	{Opt_debug, "debug"},
1034 	{Opt_oldalloc, "oldalloc"},
1035 	{Opt_orlov, "orlov"},
1036 	{Opt_user_xattr, "user_xattr"},
1037 	{Opt_nouser_xattr, "nouser_xattr"},
1038 	{Opt_acl, "acl"},
1039 	{Opt_noacl, "noacl"},
1040 	{Opt_reservation, "reservation"},
1041 	{Opt_noreservation, "noreservation"},
1042 	{Opt_noload, "noload"},
1043 	{Opt_nobh, "nobh"},
1044 	{Opt_bh, "bh"},
1045 	{Opt_commit, "commit=%u"},
1046 	{Opt_min_batch_time, "min_batch_time=%u"},
1047 	{Opt_max_batch_time, "max_batch_time=%u"},
1048 	{Opt_journal_update, "journal=update"},
1049 	{Opt_journal_dev, "journal_dev=%u"},
1050 	{Opt_journal_checksum, "journal_checksum"},
1051 	{Opt_journal_async_commit, "journal_async_commit"},
1052 	{Opt_abort, "abort"},
1053 	{Opt_data_journal, "data=journal"},
1054 	{Opt_data_ordered, "data=ordered"},
1055 	{Opt_data_writeback, "data=writeback"},
1056 	{Opt_data_err_abort, "data_err=abort"},
1057 	{Opt_data_err_ignore, "data_err=ignore"},
1058 	{Opt_offusrjquota, "usrjquota="},
1059 	{Opt_usrjquota, "usrjquota=%s"},
1060 	{Opt_offgrpjquota, "grpjquota="},
1061 	{Opt_grpjquota, "grpjquota=%s"},
1062 	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1063 	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1064 	{Opt_grpquota, "grpquota"},
1065 	{Opt_noquota, "noquota"},
1066 	{Opt_quota, "quota"},
1067 	{Opt_usrquota, "usrquota"},
1068 	{Opt_barrier, "barrier=%u"},
1069 	{Opt_i_version, "i_version"},
1070 	{Opt_stripe, "stripe=%u"},
1071 	{Opt_resize, "resize"},
1072 	{Opt_delalloc, "delalloc"},
1073 	{Opt_nodelalloc, "nodelalloc"},
1074 	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
1075 	{Opt_journal_ioprio, "journal_ioprio=%u"},
1076 	{Opt_err, NULL},
1077 };
1078 
get_sb_block(void ** data)1079 static ext4_fsblk_t get_sb_block(void **data)
1080 {
1081 	ext4_fsblk_t	sb_block;
1082 	char		*options = (char *) *data;
1083 
1084 	if (!options || strncmp(options, "sb=", 3) != 0)
1085 		return 1;	/* Default location */
1086 	options += 3;
1087 	/*todo: use simple_strtoll with >32bit ext4 */
1088 	sb_block = simple_strtoul(options, &options, 0);
1089 	if (*options && *options != ',') {
1090 		printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n",
1091 		       (char *) *data);
1092 		return 1;
1093 	}
1094 	if (*options == ',')
1095 		options++;
1096 	*data = (void *) options;
1097 	return sb_block;
1098 }
1099 
1100 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1101 
parse_options(char * options,struct super_block * sb,unsigned long * journal_devnum,unsigned int * journal_ioprio,ext4_fsblk_t * n_blocks_count,int is_remount)1102 static int parse_options(char *options, struct super_block *sb,
1103 			 unsigned long *journal_devnum,
1104 			 unsigned int *journal_ioprio,
1105 			 ext4_fsblk_t *n_blocks_count, int is_remount)
1106 {
1107 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1108 	char *p;
1109 	substring_t args[MAX_OPT_ARGS];
1110 	int data_opt = 0;
1111 	int option;
1112 #ifdef CONFIG_QUOTA
1113 	int qtype, qfmt;
1114 	char *qname;
1115 #endif
1116 
1117 	if (!options)
1118 		return 1;
1119 
1120 	while ((p = strsep(&options, ",")) != NULL) {
1121 		int token;
1122 		if (!*p)
1123 			continue;
1124 
1125 		token = match_token(p, tokens, args);
1126 		switch (token) {
1127 		case Opt_bsd_df:
1128 			clear_opt(sbi->s_mount_opt, MINIX_DF);
1129 			break;
1130 		case Opt_minix_df:
1131 			set_opt(sbi->s_mount_opt, MINIX_DF);
1132 			break;
1133 		case Opt_grpid:
1134 			set_opt(sbi->s_mount_opt, GRPID);
1135 			break;
1136 		case Opt_nogrpid:
1137 			clear_opt(sbi->s_mount_opt, GRPID);
1138 			break;
1139 		case Opt_resuid:
1140 			if (match_int(&args[0], &option))
1141 				return 0;
1142 			sbi->s_resuid = option;
1143 			break;
1144 		case Opt_resgid:
1145 			if (match_int(&args[0], &option))
1146 				return 0;
1147 			sbi->s_resgid = option;
1148 			break;
1149 		case Opt_sb:
1150 			/* handled by get_sb_block() instead of here */
1151 			/* *sb_block = match_int(&args[0]); */
1152 			break;
1153 		case Opt_err_panic:
1154 			clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1155 			clear_opt(sbi->s_mount_opt, ERRORS_RO);
1156 			set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1157 			break;
1158 		case Opt_err_ro:
1159 			clear_opt(sbi->s_mount_opt, ERRORS_CONT);
1160 			clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1161 			set_opt(sbi->s_mount_opt, ERRORS_RO);
1162 			break;
1163 		case Opt_err_cont:
1164 			clear_opt(sbi->s_mount_opt, ERRORS_RO);
1165 			clear_opt(sbi->s_mount_opt, ERRORS_PANIC);
1166 			set_opt(sbi->s_mount_opt, ERRORS_CONT);
1167 			break;
1168 		case Opt_nouid32:
1169 			set_opt(sbi->s_mount_opt, NO_UID32);
1170 			break;
1171 		case Opt_debug:
1172 			set_opt(sbi->s_mount_opt, DEBUG);
1173 			break;
1174 		case Opt_oldalloc:
1175 			set_opt(sbi->s_mount_opt, OLDALLOC);
1176 			break;
1177 		case Opt_orlov:
1178 			clear_opt(sbi->s_mount_opt, OLDALLOC);
1179 			break;
1180 #ifdef CONFIG_EXT4_FS_XATTR
1181 		case Opt_user_xattr:
1182 			set_opt(sbi->s_mount_opt, XATTR_USER);
1183 			break;
1184 		case Opt_nouser_xattr:
1185 			clear_opt(sbi->s_mount_opt, XATTR_USER);
1186 			break;
1187 #else
1188 		case Opt_user_xattr:
1189 		case Opt_nouser_xattr:
1190 			printk(KERN_ERR "EXT4 (no)user_xattr options "
1191 			       "not supported\n");
1192 			break;
1193 #endif
1194 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1195 		case Opt_acl:
1196 			set_opt(sbi->s_mount_opt, POSIX_ACL);
1197 			break;
1198 		case Opt_noacl:
1199 			clear_opt(sbi->s_mount_opt, POSIX_ACL);
1200 			break;
1201 #else
1202 		case Opt_acl:
1203 		case Opt_noacl:
1204 			printk(KERN_ERR "EXT4 (no)acl options "
1205 			       "not supported\n");
1206 			break;
1207 #endif
1208 		case Opt_reservation:
1209 			set_opt(sbi->s_mount_opt, RESERVATION);
1210 			break;
1211 		case Opt_noreservation:
1212 			clear_opt(sbi->s_mount_opt, RESERVATION);
1213 			break;
1214 		case Opt_journal_update:
1215 			/* @@@ FIXME */
1216 			/* Eventually we will want to be able to create
1217 			   a journal file here.  For now, only allow the
1218 			   user to specify an existing inode to be the
1219 			   journal file. */
1220 			if (is_remount) {
1221 				printk(KERN_ERR "EXT4-fs: cannot specify "
1222 				       "journal on remount\n");
1223 				return 0;
1224 			}
1225 			set_opt(sbi->s_mount_opt, UPDATE_JOURNAL);
1226 			break;
1227 		case Opt_journal_dev:
1228 			if (is_remount) {
1229 				printk(KERN_ERR "EXT4-fs: cannot specify "
1230 				       "journal on remount\n");
1231 				return 0;
1232 			}
1233 			if (match_int(&args[0], &option))
1234 				return 0;
1235 			*journal_devnum = option;
1236 			break;
1237 		case Opt_journal_checksum:
1238 			set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1239 			break;
1240 		case Opt_journal_async_commit:
1241 			set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
1242 			set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1243 			break;
1244 		case Opt_noload:
1245 			set_opt(sbi->s_mount_opt, NOLOAD);
1246 			break;
1247 		case Opt_commit:
1248 			if (match_int(&args[0], &option))
1249 				return 0;
1250 			if (option < 0)
1251 				return 0;
1252 			if (option == 0)
1253 				option = JBD2_DEFAULT_MAX_COMMIT_AGE;
1254 			sbi->s_commit_interval = HZ * option;
1255 			break;
1256 		case Opt_max_batch_time:
1257 			if (match_int(&args[0], &option))
1258 				return 0;
1259 			if (option < 0)
1260 				return 0;
1261 			if (option == 0)
1262 				option = EXT4_DEF_MAX_BATCH_TIME;
1263 			sbi->s_max_batch_time = option;
1264 			break;
1265 		case Opt_min_batch_time:
1266 			if (match_int(&args[0], &option))
1267 				return 0;
1268 			if (option < 0)
1269 				return 0;
1270 			sbi->s_min_batch_time = option;
1271 			break;
1272 		case Opt_data_journal:
1273 			data_opt = EXT4_MOUNT_JOURNAL_DATA;
1274 			goto datacheck;
1275 		case Opt_data_ordered:
1276 			data_opt = EXT4_MOUNT_ORDERED_DATA;
1277 			goto datacheck;
1278 		case Opt_data_writeback:
1279 			data_opt = EXT4_MOUNT_WRITEBACK_DATA;
1280 		datacheck:
1281 			if (is_remount) {
1282 				if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
1283 						!= data_opt) {
1284 					printk(KERN_ERR
1285 						"EXT4-fs: cannot change data "
1286 						"mode on remount\n");
1287 					return 0;
1288 				}
1289 			} else {
1290 				sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
1291 				sbi->s_mount_opt |= data_opt;
1292 			}
1293 			break;
1294 		case Opt_data_err_abort:
1295 			set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1296 			break;
1297 		case Opt_data_err_ignore:
1298 			clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1299 			break;
1300 #ifdef CONFIG_QUOTA
1301 		case Opt_usrjquota:
1302 			qtype = USRQUOTA;
1303 			goto set_qf_name;
1304 		case Opt_grpjquota:
1305 			qtype = GRPQUOTA;
1306 set_qf_name:
1307 			if (sb_any_quota_loaded(sb) &&
1308 			    !sbi->s_qf_names[qtype]) {
1309 				printk(KERN_ERR
1310 				       "EXT4-fs: Cannot change journaled "
1311 				       "quota options when quota turned on.\n");
1312 				return 0;
1313 			}
1314 			qname = match_strdup(&args[0]);
1315 			if (!qname) {
1316 				printk(KERN_ERR
1317 					"EXT4-fs: not enough memory for "
1318 					"storing quotafile name.\n");
1319 				return 0;
1320 			}
1321 			if (sbi->s_qf_names[qtype] &&
1322 			    strcmp(sbi->s_qf_names[qtype], qname)) {
1323 				printk(KERN_ERR
1324 					"EXT4-fs: %s quota file already "
1325 					"specified.\n", QTYPE2NAME(qtype));
1326 				kfree(qname);
1327 				return 0;
1328 			}
1329 			sbi->s_qf_names[qtype] = qname;
1330 			if (strchr(sbi->s_qf_names[qtype], '/')) {
1331 				printk(KERN_ERR
1332 					"EXT4-fs: quotafile must be on "
1333 					"filesystem root.\n");
1334 				kfree(sbi->s_qf_names[qtype]);
1335 				sbi->s_qf_names[qtype] = NULL;
1336 				return 0;
1337 			}
1338 			set_opt(sbi->s_mount_opt, QUOTA);
1339 			break;
1340 		case Opt_offusrjquota:
1341 			qtype = USRQUOTA;
1342 			goto clear_qf_name;
1343 		case Opt_offgrpjquota:
1344 			qtype = GRPQUOTA;
1345 clear_qf_name:
1346 			if (sb_any_quota_loaded(sb) &&
1347 			    sbi->s_qf_names[qtype]) {
1348 				printk(KERN_ERR "EXT4-fs: Cannot change "
1349 					"journaled quota options when "
1350 					"quota turned on.\n");
1351 				return 0;
1352 			}
1353 			/*
1354 			 * The space will be released later when all options
1355 			 * are confirmed to be correct
1356 			 */
1357 			sbi->s_qf_names[qtype] = NULL;
1358 			break;
1359 		case Opt_jqfmt_vfsold:
1360 			qfmt = QFMT_VFS_OLD;
1361 			goto set_qf_format;
1362 		case Opt_jqfmt_vfsv0:
1363 			qfmt = QFMT_VFS_V0;
1364 set_qf_format:
1365 			if (sb_any_quota_loaded(sb) &&
1366 			    sbi->s_jquota_fmt != qfmt) {
1367 				printk(KERN_ERR "EXT4-fs: Cannot change "
1368 					"journaled quota options when "
1369 					"quota turned on.\n");
1370 				return 0;
1371 			}
1372 			sbi->s_jquota_fmt = qfmt;
1373 			break;
1374 		case Opt_quota:
1375 		case Opt_usrquota:
1376 			set_opt(sbi->s_mount_opt, QUOTA);
1377 			set_opt(sbi->s_mount_opt, USRQUOTA);
1378 			break;
1379 		case Opt_grpquota:
1380 			set_opt(sbi->s_mount_opt, QUOTA);
1381 			set_opt(sbi->s_mount_opt, GRPQUOTA);
1382 			break;
1383 		case Opt_noquota:
1384 			if (sb_any_quota_loaded(sb)) {
1385 				printk(KERN_ERR "EXT4-fs: Cannot change quota "
1386 					"options when quota turned on.\n");
1387 				return 0;
1388 			}
1389 			clear_opt(sbi->s_mount_opt, QUOTA);
1390 			clear_opt(sbi->s_mount_opt, USRQUOTA);
1391 			clear_opt(sbi->s_mount_opt, GRPQUOTA);
1392 			break;
1393 #else
1394 		case Opt_quota:
1395 		case Opt_usrquota:
1396 		case Opt_grpquota:
1397 			printk(KERN_ERR
1398 				"EXT4-fs: quota options not supported.\n");
1399 			break;
1400 		case Opt_usrjquota:
1401 		case Opt_grpjquota:
1402 		case Opt_offusrjquota:
1403 		case Opt_offgrpjquota:
1404 		case Opt_jqfmt_vfsold:
1405 		case Opt_jqfmt_vfsv0:
1406 			printk(KERN_ERR
1407 				"EXT4-fs: journaled quota options not "
1408 				"supported.\n");
1409 			break;
1410 		case Opt_noquota:
1411 			break;
1412 #endif
1413 		case Opt_abort:
1414 			set_opt(sbi->s_mount_opt, ABORT);
1415 			break;
1416 		case Opt_barrier:
1417 			if (match_int(&args[0], &option))
1418 				return 0;
1419 			if (option)
1420 				set_opt(sbi->s_mount_opt, BARRIER);
1421 			else
1422 				clear_opt(sbi->s_mount_opt, BARRIER);
1423 			break;
1424 		case Opt_ignore:
1425 			break;
1426 		case Opt_resize:
1427 			if (!is_remount) {
1428 				printk("EXT4-fs: resize option only available "
1429 					"for remount\n");
1430 				return 0;
1431 			}
1432 			if (match_int(&args[0], &option) != 0)
1433 				return 0;
1434 			*n_blocks_count = option;
1435 			break;
1436 		case Opt_nobh:
1437 			set_opt(sbi->s_mount_opt, NOBH);
1438 			break;
1439 		case Opt_bh:
1440 			clear_opt(sbi->s_mount_opt, NOBH);
1441 			break;
1442 		case Opt_i_version:
1443 			set_opt(sbi->s_mount_opt, I_VERSION);
1444 			sb->s_flags |= MS_I_VERSION;
1445 			break;
1446 		case Opt_nodelalloc:
1447 			clear_opt(sbi->s_mount_opt, DELALLOC);
1448 			break;
1449 		case Opt_stripe:
1450 			if (match_int(&args[0], &option))
1451 				return 0;
1452 			if (option < 0)
1453 				return 0;
1454 			sbi->s_stripe = option;
1455 			break;
1456 		case Opt_delalloc:
1457 			set_opt(sbi->s_mount_opt, DELALLOC);
1458 			break;
1459 		case Opt_inode_readahead_blks:
1460 			if (match_int(&args[0], &option))
1461 				return 0;
1462 			if (option < 0 || option > (1 << 30))
1463 				return 0;
1464 			sbi->s_inode_readahead_blks = option;
1465 			break;
1466 		case Opt_journal_ioprio:
1467 			if (match_int(&args[0], &option))
1468 				return 0;
1469 			if (option < 0 || option > 7)
1470 				break;
1471 			*journal_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE,
1472 							    option);
1473 			break;
1474 		default:
1475 			printk(KERN_ERR
1476 			       "EXT4-fs: Unrecognized mount option \"%s\" "
1477 			       "or missing value\n", p);
1478 			return 0;
1479 		}
1480 	}
1481 #ifdef CONFIG_QUOTA
1482 	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
1483 		if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
1484 		     sbi->s_qf_names[USRQUOTA])
1485 			clear_opt(sbi->s_mount_opt, USRQUOTA);
1486 
1487 		if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
1488 		     sbi->s_qf_names[GRPQUOTA])
1489 			clear_opt(sbi->s_mount_opt, GRPQUOTA);
1490 
1491 		if ((sbi->s_qf_names[USRQUOTA] &&
1492 				(sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
1493 		    (sbi->s_qf_names[GRPQUOTA] &&
1494 				(sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
1495 			printk(KERN_ERR "EXT4-fs: old and new quota "
1496 					"format mixing.\n");
1497 			return 0;
1498 		}
1499 
1500 		if (!sbi->s_jquota_fmt) {
1501 			printk(KERN_ERR "EXT4-fs: journaled quota format "
1502 					"not specified.\n");
1503 			return 0;
1504 		}
1505 	} else {
1506 		if (sbi->s_jquota_fmt) {
1507 			printk(KERN_ERR "EXT4-fs: journaled quota format "
1508 					"specified with no journaling "
1509 					"enabled.\n");
1510 			return 0;
1511 		}
1512 	}
1513 #endif
1514 	return 1;
1515 }
1516 
ext4_setup_super(struct super_block * sb,struct ext4_super_block * es,int read_only)1517 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1518 			    int read_only)
1519 {
1520 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1521 	int res = 0;
1522 
1523 	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
1524 		printk(KERN_ERR "EXT4-fs warning: revision level too high, "
1525 		       "forcing read-only mode\n");
1526 		res = MS_RDONLY;
1527 	}
1528 	if (read_only)
1529 		return res;
1530 	if (!(sbi->s_mount_state & EXT4_VALID_FS))
1531 		printk(KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
1532 		       "running e2fsck is recommended\n");
1533 	else if ((sbi->s_mount_state & EXT4_ERROR_FS))
1534 		printk(KERN_WARNING
1535 		       "EXT4-fs warning: mounting fs with errors, "
1536 		       "running e2fsck is recommended\n");
1537 	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1538 		 le16_to_cpu(es->s_mnt_count) >=
1539 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
1540 		printk(KERN_WARNING
1541 		       "EXT4-fs warning: maximal mount count reached, "
1542 		       "running e2fsck is recommended\n");
1543 	else if (le32_to_cpu(es->s_checkinterval) &&
1544 		(le32_to_cpu(es->s_lastcheck) +
1545 			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
1546 		printk(KERN_WARNING
1547 		       "EXT4-fs warning: checktime reached, "
1548 		       "running e2fsck is recommended\n");
1549 	if (!sbi->s_journal)
1550 		es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
1551 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1552 		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
1553 	le16_add_cpu(&es->s_mnt_count, 1);
1554 	es->s_mtime = cpu_to_le32(get_seconds());
1555 	ext4_update_dynamic_rev(sb);
1556 	if (sbi->s_journal)
1557 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
1558 
1559 	ext4_commit_super(sb, es, 1);
1560 	if (test_opt(sb, DEBUG))
1561 		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
1562 				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
1563 			sb->s_blocksize,
1564 			sbi->s_groups_count,
1565 			EXT4_BLOCKS_PER_GROUP(sb),
1566 			EXT4_INODES_PER_GROUP(sb),
1567 			sbi->s_mount_opt);
1568 
1569 	if (EXT4_SB(sb)->s_journal) {
1570 		printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n",
1571 		       sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" :
1572 		       "external", EXT4_SB(sb)->s_journal->j_devname);
1573 	} else {
1574 		printk(KERN_INFO "EXT4 FS on %s, no journal\n", sb->s_id);
1575 	}
1576 	return res;
1577 }
1578 
ext4_fill_flex_info(struct super_block * sb)1579 static int ext4_fill_flex_info(struct super_block *sb)
1580 {
1581 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1582 	struct ext4_group_desc *gdp = NULL;
1583 	struct buffer_head *bh;
1584 	ext4_group_t flex_group_count;
1585 	ext4_group_t flex_group;
1586 	int groups_per_flex = 0;
1587 	int i;
1588 
1589 	if (!sbi->s_es->s_log_groups_per_flex) {
1590 		sbi->s_log_groups_per_flex = 0;
1591 		return 1;
1592 	}
1593 
1594 	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1595 	groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1596 
1597 	/* We allocate both existing and potentially added groups */
1598 	flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1599 			((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
1600 			      EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex;
1601 	sbi->s_flex_groups = kzalloc(flex_group_count *
1602 				     sizeof(struct flex_groups), GFP_KERNEL);
1603 	if (sbi->s_flex_groups == NULL) {
1604 		printk(KERN_ERR "EXT4-fs: not enough memory for "
1605 				"%u flex groups\n", flex_group_count);
1606 		goto failed;
1607 	}
1608 
1609 	for (i = 0; i < sbi->s_groups_count; i++) {
1610 		gdp = ext4_get_group_desc(sb, i, &bh);
1611 
1612 		flex_group = ext4_flex_group(sbi, i);
1613 		sbi->s_flex_groups[flex_group].free_inodes +=
1614 			ext4_free_inodes_count(sb, gdp);
1615 		sbi->s_flex_groups[flex_group].free_blocks +=
1616 			ext4_free_blks_count(sb, gdp);
1617 	}
1618 
1619 	return 1;
1620 failed:
1621 	return 0;
1622 }
1623 
ext4_group_desc_csum(struct ext4_sb_info * sbi,__u32 block_group,struct ext4_group_desc * gdp)1624 __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
1625 			    struct ext4_group_desc *gdp)
1626 {
1627 	__u16 crc = 0;
1628 
1629 	if (sbi->s_es->s_feature_ro_compat &
1630 	    cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
1631 		int offset = offsetof(struct ext4_group_desc, bg_checksum);
1632 		__le32 le_group = cpu_to_le32(block_group);
1633 
1634 		crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
1635 		crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
1636 		crc = crc16(crc, (__u8 *)gdp, offset);
1637 		offset += sizeof(gdp->bg_checksum); /* skip checksum */
1638 		/* for checksum of struct ext4_group_desc do the rest...*/
1639 		if ((sbi->s_es->s_feature_incompat &
1640 		     cpu_to_le32(EXT4_FEATURE_INCOMPAT_64BIT)) &&
1641 		    offset < le16_to_cpu(sbi->s_es->s_desc_size))
1642 			crc = crc16(crc, (__u8 *)gdp + offset,
1643 				    le16_to_cpu(sbi->s_es->s_desc_size) -
1644 					offset);
1645 	}
1646 
1647 	return cpu_to_le16(crc);
1648 }
1649 
ext4_group_desc_csum_verify(struct ext4_sb_info * sbi,__u32 block_group,struct ext4_group_desc * gdp)1650 int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
1651 				struct ext4_group_desc *gdp)
1652 {
1653 	if ((sbi->s_es->s_feature_ro_compat &
1654 	     cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) &&
1655 	    (gdp->bg_checksum != ext4_group_desc_csum(sbi, block_group, gdp)))
1656 		return 0;
1657 
1658 	return 1;
1659 }
1660 
1661 /* Called at mount-time, super-block is locked */
ext4_check_descriptors(struct super_block * sb)1662 static int ext4_check_descriptors(struct super_block *sb)
1663 {
1664 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1665 	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
1666 	ext4_fsblk_t last_block;
1667 	ext4_fsblk_t block_bitmap;
1668 	ext4_fsblk_t inode_bitmap;
1669 	ext4_fsblk_t inode_table;
1670 	int flexbg_flag = 0;
1671 	ext4_group_t i;
1672 
1673 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
1674 		flexbg_flag = 1;
1675 
1676 	ext4_debug("Checking group descriptors");
1677 
1678 	for (i = 0; i < sbi->s_groups_count; i++) {
1679 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1680 
1681 		if (i == sbi->s_groups_count - 1 || flexbg_flag)
1682 			last_block = ext4_blocks_count(sbi->s_es) - 1;
1683 		else
1684 			last_block = first_block +
1685 				(EXT4_BLOCKS_PER_GROUP(sb) - 1);
1686 
1687 		block_bitmap = ext4_block_bitmap(sb, gdp);
1688 		if (block_bitmap < first_block || block_bitmap > last_block) {
1689 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1690 			       "Block bitmap for group %u not in group "
1691 			       "(block %llu)!\n", i, block_bitmap);
1692 			return 0;
1693 		}
1694 		inode_bitmap = ext4_inode_bitmap(sb, gdp);
1695 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
1696 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1697 			       "Inode bitmap for group %u not in group "
1698 			       "(block %llu)!\n", i, inode_bitmap);
1699 			return 0;
1700 		}
1701 		inode_table = ext4_inode_table(sb, gdp);
1702 		if (inode_table < first_block ||
1703 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
1704 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1705 			       "Inode table for group %u not in group "
1706 			       "(block %llu)!\n", i, inode_table);
1707 			return 0;
1708 		}
1709 		spin_lock(sb_bgl_lock(sbi, i));
1710 		if (!ext4_group_desc_csum_verify(sbi, i, gdp)) {
1711 			printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: "
1712 			       "Checksum for group %u failed (%u!=%u)\n",
1713 			       i, le16_to_cpu(ext4_group_desc_csum(sbi, i,
1714 			       gdp)), le16_to_cpu(gdp->bg_checksum));
1715 			if (!(sb->s_flags & MS_RDONLY)) {
1716 				spin_unlock(sb_bgl_lock(sbi, i));
1717 				return 0;
1718 			}
1719 		}
1720 		spin_unlock(sb_bgl_lock(sbi, i));
1721 		if (!flexbg_flag)
1722 			first_block += EXT4_BLOCKS_PER_GROUP(sb);
1723 	}
1724 
1725 	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
1726 	sbi->s_es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
1727 	return 1;
1728 }
1729 
1730 /* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
1731  * the superblock) which were deleted from all directories, but held open by
1732  * a process at the time of a crash.  We walk the list and try to delete these
1733  * inodes at recovery time (only with a read-write filesystem).
1734  *
1735  * In order to keep the orphan inode chain consistent during traversal (in
1736  * case of crash during recovery), we link each inode into the superblock
1737  * orphan list_head and handle it the same way as an inode deletion during
1738  * normal operation (which journals the operations for us).
1739  *
1740  * We only do an iget() and an iput() on each inode, which is very safe if we
1741  * accidentally point at an in-use or already deleted inode.  The worst that
1742  * can happen in this case is that we get a "bit already cleared" message from
1743  * ext4_free_inode().  The only reason we would point at a wrong inode is if
1744  * e2fsck was run on this filesystem, and it must have already done the orphan
1745  * inode cleanup for us, so we can safely abort without any further action.
1746  */
ext4_orphan_cleanup(struct super_block * sb,struct ext4_super_block * es)1747 static void ext4_orphan_cleanup(struct super_block *sb,
1748 				struct ext4_super_block *es)
1749 {
1750 	unsigned int s_flags = sb->s_flags;
1751 	int nr_orphans = 0, nr_truncates = 0;
1752 #ifdef CONFIG_QUOTA
1753 	int i;
1754 #endif
1755 	if (!es->s_last_orphan) {
1756 		jbd_debug(4, "no orphan inodes to clean up\n");
1757 		return;
1758 	}
1759 
1760 	if (bdev_read_only(sb->s_bdev)) {
1761 		printk(KERN_ERR "EXT4-fs: write access "
1762 			"unavailable, skipping orphan cleanup.\n");
1763 		return;
1764 	}
1765 
1766 	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
1767 		if (es->s_last_orphan)
1768 			jbd_debug(1, "Errors on filesystem, "
1769 				  "clearing orphan list.\n");
1770 		es->s_last_orphan = 0;
1771 		jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1772 		return;
1773 	}
1774 
1775 	if (s_flags & MS_RDONLY) {
1776 		printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
1777 		       sb->s_id);
1778 		sb->s_flags &= ~MS_RDONLY;
1779 	}
1780 #ifdef CONFIG_QUOTA
1781 	/* Needed for iput() to work correctly and not trash data */
1782 	sb->s_flags |= MS_ACTIVE;
1783 	/* Turn on quotas so that they are updated correctly */
1784 	for (i = 0; i < MAXQUOTAS; i++) {
1785 		if (EXT4_SB(sb)->s_qf_names[i]) {
1786 			int ret = ext4_quota_on_mount(sb, i);
1787 			if (ret < 0)
1788 				printk(KERN_ERR
1789 					"EXT4-fs: Cannot turn on journaled "
1790 					"quota: error %d\n", ret);
1791 		}
1792 	}
1793 #endif
1794 
1795 	while (es->s_last_orphan) {
1796 		struct inode *inode;
1797 
1798 		inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1799 		if (IS_ERR(inode)) {
1800 			es->s_last_orphan = 0;
1801 			break;
1802 		}
1803 
1804 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
1805 		DQUOT_INIT(inode);
1806 		if (inode->i_nlink) {
1807 			printk(KERN_DEBUG
1808 				"%s: truncating inode %lu to %lld bytes\n",
1809 				__func__, inode->i_ino, inode->i_size);
1810 			jbd_debug(2, "truncating inode %lu to %lld bytes\n",
1811 				  inode->i_ino, inode->i_size);
1812 			ext4_truncate(inode);
1813 			nr_truncates++;
1814 		} else {
1815 			printk(KERN_DEBUG
1816 				"%s: deleting unreferenced inode %lu\n",
1817 				__func__, inode->i_ino);
1818 			jbd_debug(2, "deleting unreferenced inode %lu\n",
1819 				  inode->i_ino);
1820 			nr_orphans++;
1821 		}
1822 		iput(inode);  /* The delete magic happens here! */
1823 	}
1824 
1825 #define PLURAL(x) (x), ((x) == 1) ? "" : "s"
1826 
1827 	if (nr_orphans)
1828 		printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
1829 		       sb->s_id, PLURAL(nr_orphans));
1830 	if (nr_truncates)
1831 		printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
1832 		       sb->s_id, PLURAL(nr_truncates));
1833 #ifdef CONFIG_QUOTA
1834 	/* Turn quotas off */
1835 	for (i = 0; i < MAXQUOTAS; i++) {
1836 		if (sb_dqopt(sb)->files[i])
1837 			vfs_quota_off(sb, i, 0);
1838 	}
1839 #endif
1840 	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1841 }
1842 /*
1843  * Maximal extent format file size.
1844  * Resulting logical blkno at s_maxbytes must fit in our on-disk
1845  * extent format containers, within a sector_t, and within i_blocks
1846  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
1847  * so that won't be a limiting factor.
1848  *
1849  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
1850  */
ext4_max_size(int blkbits,int has_huge_files)1851 static loff_t ext4_max_size(int blkbits, int has_huge_files)
1852 {
1853 	loff_t res;
1854 	loff_t upper_limit = MAX_LFS_FILESIZE;
1855 
1856 	/* small i_blocks in vfs inode? */
1857 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1858 		/*
1859 		 * CONFIG_LBD is not enabled implies the inode
1860 		 * i_block represent total blocks in 512 bytes
1861 		 * 32 == size of vfs inode i_blocks * 8
1862 		 */
1863 		upper_limit = (1LL << 32) - 1;
1864 
1865 		/* total blocks in file system block size */
1866 		upper_limit >>= (blkbits - 9);
1867 		upper_limit <<= blkbits;
1868 	}
1869 
1870 	/* 32-bit extent-start container, ee_block */
1871 	res = 1LL << 32;
1872 	res <<= blkbits;
1873 	res -= 1;
1874 
1875 	/* Sanity check against vm- & vfs- imposed limits */
1876 	if (res > upper_limit)
1877 		res = upper_limit;
1878 
1879 	return res;
1880 }
1881 
1882 /*
1883  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
1884  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
1885  * We need to be 1 filesystem block less than the 2^48 sector limit.
1886  */
ext4_max_bitmap_size(int bits,int has_huge_files)1887 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
1888 {
1889 	loff_t res = EXT4_NDIR_BLOCKS;
1890 	int meta_blocks;
1891 	loff_t upper_limit;
1892 	/* This is calculated to be the largest file size for a
1893 	 * dense, bitmapped file such that the total number of
1894 	 * sectors in the file, including data and all indirect blocks,
1895 	 * does not exceed 2^48 -1
1896 	 * __u32 i_blocks_lo and _u16 i_blocks_high representing the
1897 	 * total number of  512 bytes blocks of the file
1898 	 */
1899 
1900 	if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) {
1901 		/*
1902 		 * !has_huge_files or CONFIG_LBD is not enabled
1903 		 * implies the inode i_block represent total blocks in
1904 		 * 512 bytes 32 == size of vfs inode i_blocks * 8
1905 		 */
1906 		upper_limit = (1LL << 32) - 1;
1907 
1908 		/* total blocks in file system block size */
1909 		upper_limit >>= (bits - 9);
1910 
1911 	} else {
1912 		/*
1913 		 * We use 48 bit ext4_inode i_blocks
1914 		 * With EXT4_HUGE_FILE_FL set the i_blocks
1915 		 * represent total number of blocks in
1916 		 * file system block size
1917 		 */
1918 		upper_limit = (1LL << 48) - 1;
1919 
1920 	}
1921 
1922 	/* indirect blocks */
1923 	meta_blocks = 1;
1924 	/* double indirect blocks */
1925 	meta_blocks += 1 + (1LL << (bits-2));
1926 	/* tripple indirect blocks */
1927 	meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1928 
1929 	upper_limit -= meta_blocks;
1930 	upper_limit <<= bits;
1931 
1932 	res += 1LL << (bits-2);
1933 	res += 1LL << (2*(bits-2));
1934 	res += 1LL << (3*(bits-2));
1935 	res <<= bits;
1936 	if (res > upper_limit)
1937 		res = upper_limit;
1938 
1939 	if (res > MAX_LFS_FILESIZE)
1940 		res = MAX_LFS_FILESIZE;
1941 
1942 	return res;
1943 }
1944 
descriptor_loc(struct super_block * sb,ext4_fsblk_t logical_sb_block,int nr)1945 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
1946 				ext4_fsblk_t logical_sb_block, int nr)
1947 {
1948 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1949 	ext4_group_t bg, first_meta_bg;
1950 	int has_super = 0;
1951 
1952 	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
1953 
1954 	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
1955 	    nr < first_meta_bg)
1956 		return logical_sb_block + nr + 1;
1957 	bg = sbi->s_desc_per_block * nr;
1958 	if (ext4_bg_has_super(sb, bg))
1959 		has_super = 1;
1960 	return (has_super + ext4_group_first_block_no(sb, bg));
1961 }
1962 
1963 /**
1964  * ext4_get_stripe_size: Get the stripe size.
1965  * @sbi: In memory super block info
1966  *
1967  * If we have specified it via mount option, then
1968  * use the mount option value. If the value specified at mount time is
1969  * greater than the blocks per group use the super block value.
1970  * If the super block value is greater than blocks per group return 0.
1971  * Allocator needs it be less than blocks per group.
1972  *
1973  */
ext4_get_stripe_size(struct ext4_sb_info * sbi)1974 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
1975 {
1976 	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
1977 	unsigned long stripe_width =
1978 			le32_to_cpu(sbi->s_es->s_raid_stripe_width);
1979 
1980 	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
1981 		return sbi->s_stripe;
1982 
1983 	if (stripe_width <= sbi->s_blocks_per_group)
1984 		return stripe_width;
1985 
1986 	if (stride <= sbi->s_blocks_per_group)
1987 		return stride;
1988 
1989 	return 0;
1990 }
1991 
ext4_fill_super(struct super_block * sb,void * data,int silent)1992 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
1993 				__releases(kernel_lock)
1994 				__acquires(kernel_lock)
1995 
1996 {
1997 	struct buffer_head *bh;
1998 	struct ext4_super_block *es = NULL;
1999 	struct ext4_sb_info *sbi;
2000 	ext4_fsblk_t block;
2001 	ext4_fsblk_t sb_block = get_sb_block(&data);
2002 	ext4_fsblk_t logical_sb_block;
2003 	unsigned long offset = 0;
2004 	unsigned long journal_devnum = 0;
2005 	unsigned long def_mount_opts;
2006 	struct inode *root;
2007 	char *cp;
2008 	const char *descr;
2009 	int ret = -EINVAL;
2010 	int blocksize;
2011 	unsigned int db_count;
2012 	unsigned int i;
2013 	int needs_recovery, has_huge_files;
2014 	int features;
2015 	__u64 blocks_count;
2016 	int err;
2017 	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
2018 
2019 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
2020 	if (!sbi)
2021 		return -ENOMEM;
2022 	sb->s_fs_info = sbi;
2023 	sbi->s_mount_opt = 0;
2024 	sbi->s_resuid = EXT4_DEF_RESUID;
2025 	sbi->s_resgid = EXT4_DEF_RESGID;
2026 	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
2027 	sbi->s_sb_block = sb_block;
2028 
2029 	unlock_kernel();
2030 
2031 	/* Cleanup superblock name */
2032 	for (cp = sb->s_id; (cp = strchr(cp, '/'));)
2033 		*cp = '!';
2034 
2035 	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
2036 	if (!blocksize) {
2037 		printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
2038 		goto out_fail;
2039 	}
2040 
2041 	/*
2042 	 * The ext4 superblock will not be buffer aligned for other than 1kB
2043 	 * block sizes.  We need to calculate the offset from buffer start.
2044 	 */
2045 	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
2046 		logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2047 		offset = do_div(logical_sb_block, blocksize);
2048 	} else {
2049 		logical_sb_block = sb_block;
2050 	}
2051 
2052 	if (!(bh = sb_bread(sb, logical_sb_block))) {
2053 		printk(KERN_ERR "EXT4-fs: unable to read superblock\n");
2054 		goto out_fail;
2055 	}
2056 	/*
2057 	 * Note: s_es must be initialized as soon as possible because
2058 	 *       some ext4 macro-instructions depend on its value
2059 	 */
2060 	es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
2061 	sbi->s_es = es;
2062 	sb->s_magic = le16_to_cpu(es->s_magic);
2063 	if (sb->s_magic != EXT4_SUPER_MAGIC)
2064 		goto cantfind_ext4;
2065 
2066 	/* Set defaults before we parse the mount options */
2067 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
2068 	if (def_mount_opts & EXT4_DEFM_DEBUG)
2069 		set_opt(sbi->s_mount_opt, DEBUG);
2070 	if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
2071 		set_opt(sbi->s_mount_opt, GRPID);
2072 	if (def_mount_opts & EXT4_DEFM_UID16)
2073 		set_opt(sbi->s_mount_opt, NO_UID32);
2074 #ifdef CONFIG_EXT4_FS_XATTR
2075 	if (def_mount_opts & EXT4_DEFM_XATTR_USER)
2076 		set_opt(sbi->s_mount_opt, XATTR_USER);
2077 #endif
2078 #ifdef CONFIG_EXT4_FS_POSIX_ACL
2079 	if (def_mount_opts & EXT4_DEFM_ACL)
2080 		set_opt(sbi->s_mount_opt, POSIX_ACL);
2081 #endif
2082 	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
2083 		sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
2084 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
2085 		sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
2086 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
2087 		sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
2088 
2089 	if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
2090 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
2091 	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE)
2092 		set_opt(sbi->s_mount_opt, ERRORS_CONT);
2093 	else
2094 		set_opt(sbi->s_mount_opt, ERRORS_RO);
2095 
2096 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
2097 	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
2098 	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
2099 	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
2100 	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
2101 
2102 	set_opt(sbi->s_mount_opt, RESERVATION);
2103 	set_opt(sbi->s_mount_opt, BARRIER);
2104 
2105 	/*
2106 	 * enable delayed allocation by default
2107 	 * Use -o nodelalloc to turn it off
2108 	 */
2109 	set_opt(sbi->s_mount_opt, DELALLOC);
2110 
2111 
2112 	if (!parse_options((char *) data, sb, &journal_devnum,
2113 			   &journal_ioprio, NULL, 0))
2114 		goto failed_mount;
2115 
2116 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
2117 		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
2118 
2119 	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
2120 	    (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
2121 	     EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
2122 	     EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
2123 		printk(KERN_WARNING
2124 		       "EXT4-fs warning: feature flags set on rev 0 fs, "
2125 		       "running e2fsck is recommended\n");
2126 
2127 	/*
2128 	 * Check feature flags regardless of the revision level, since we
2129 	 * previously didn't change the revision level when setting the flags,
2130 	 * so there is a chance incompat flags are set on a rev 0 filesystem.
2131 	 */
2132 	features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
2133 	if (features) {
2134 		printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
2135 		       "unsupported optional features (%x).\n", sb->s_id,
2136 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
2137 			~EXT4_FEATURE_INCOMPAT_SUPP));
2138 		goto failed_mount;
2139 	}
2140 	features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
2141 	if (!(sb->s_flags & MS_RDONLY) && features) {
2142 		printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
2143 		       "unsupported optional features (%x).\n", sb->s_id,
2144 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
2145 			~EXT4_FEATURE_RO_COMPAT_SUPP));
2146 		goto failed_mount;
2147 	}
2148 	has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb,
2149 				    EXT4_FEATURE_RO_COMPAT_HUGE_FILE);
2150 	if (has_huge_files) {
2151 		/*
2152 		 * Large file size enabled file system can only be
2153 		 * mount if kernel is build with CONFIG_LBD
2154 		 */
2155 		if (sizeof(root->i_blocks) < sizeof(u64) &&
2156 				!(sb->s_flags & MS_RDONLY)) {
2157 			printk(KERN_ERR "EXT4-fs: %s: Filesystem with huge "
2158 					"files cannot be mounted read-write "
2159 					"without CONFIG_LBD.\n", sb->s_id);
2160 			goto failed_mount;
2161 		}
2162 	}
2163 	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
2164 
2165 	if (blocksize < EXT4_MIN_BLOCK_SIZE ||
2166 	    blocksize > EXT4_MAX_BLOCK_SIZE) {
2167 		printk(KERN_ERR
2168 		       "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
2169 		       blocksize, sb->s_id);
2170 		goto failed_mount;
2171 	}
2172 
2173 	if (sb->s_blocksize != blocksize) {
2174 
2175 		/* Validate the filesystem blocksize */
2176 		if (!sb_set_blocksize(sb, blocksize)) {
2177 			printk(KERN_ERR "EXT4-fs: bad block size %d.\n",
2178 					blocksize);
2179 			goto failed_mount;
2180 		}
2181 
2182 		brelse(bh);
2183 		logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
2184 		offset = do_div(logical_sb_block, blocksize);
2185 		bh = sb_bread(sb, logical_sb_block);
2186 		if (!bh) {
2187 			printk(KERN_ERR
2188 			       "EXT4-fs: Can't read superblock on 2nd try.\n");
2189 			goto failed_mount;
2190 		}
2191 		es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
2192 		sbi->s_es = es;
2193 		if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
2194 			printk(KERN_ERR
2195 			       "EXT4-fs: Magic mismatch, very weird !\n");
2196 			goto failed_mount;
2197 		}
2198 	}
2199 
2200 	sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
2201 						      has_huge_files);
2202 	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
2203 
2204 	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
2205 		sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
2206 		sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
2207 	} else {
2208 		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
2209 		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
2210 		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
2211 		    (!is_power_of_2(sbi->s_inode_size)) ||
2212 		    (sbi->s_inode_size > blocksize)) {
2213 			printk(KERN_ERR
2214 			       "EXT4-fs: unsupported inode size: %d\n",
2215 			       sbi->s_inode_size);
2216 			goto failed_mount;
2217 		}
2218 		if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE)
2219 			sb->s_time_gran = 1 << (EXT4_EPOCH_BITS - 2);
2220 	}
2221 	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
2222 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
2223 		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
2224 		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
2225 		    !is_power_of_2(sbi->s_desc_size)) {
2226 			printk(KERN_ERR
2227 			       "EXT4-fs: unsupported descriptor size %lu\n",
2228 			       sbi->s_desc_size);
2229 			goto failed_mount;
2230 		}
2231 	} else
2232 		sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
2233 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
2234 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
2235 	if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
2236 		goto cantfind_ext4;
2237 	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
2238 	if (sbi->s_inodes_per_block == 0)
2239 		goto cantfind_ext4;
2240 	sbi->s_itb_per_group = sbi->s_inodes_per_group /
2241 					sbi->s_inodes_per_block;
2242 	sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
2243 	sbi->s_sbh = bh;
2244 	sbi->s_mount_state = le16_to_cpu(es->s_state);
2245 	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
2246 	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
2247 	for (i = 0; i < 4; i++)
2248 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
2249 	sbi->s_def_hash_version = es->s_def_hash_version;
2250 	i = le32_to_cpu(es->s_flags);
2251 	if (i & EXT2_FLAGS_UNSIGNED_HASH)
2252 		sbi->s_hash_unsigned = 3;
2253 	else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
2254 #ifdef __CHAR_UNSIGNED__
2255 		es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
2256 		sbi->s_hash_unsigned = 3;
2257 #else
2258 		es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
2259 #endif
2260 		sb->s_dirt = 1;
2261 	}
2262 
2263 	if (sbi->s_blocks_per_group > blocksize * 8) {
2264 		printk(KERN_ERR
2265 		       "EXT4-fs: #blocks per group too big: %lu\n",
2266 		       sbi->s_blocks_per_group);
2267 		goto failed_mount;
2268 	}
2269 	if (sbi->s_inodes_per_group > blocksize * 8) {
2270 		printk(KERN_ERR
2271 		       "EXT4-fs: #inodes per group too big: %lu\n",
2272 		       sbi->s_inodes_per_group);
2273 		goto failed_mount;
2274 	}
2275 
2276 	if (ext4_blocks_count(es) >
2277 		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
2278 		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
2279 			" too large to mount safely\n", sb->s_id);
2280 		if (sizeof(sector_t) < 8)
2281 			printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
2282 					"enabled\n");
2283 		goto failed_mount;
2284 	}
2285 
2286 	if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
2287 		goto cantfind_ext4;
2288 
2289         /*
2290          * It makes no sense for the first data block to be beyond the end
2291          * of the filesystem.
2292          */
2293         if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
2294                 printk(KERN_WARNING "EXT4-fs: bad geometry: first data"
2295 		       "block %u is beyond end of filesystem (%llu)\n",
2296 		       le32_to_cpu(es->s_first_data_block),
2297 		       ext4_blocks_count(es));
2298 		goto failed_mount;
2299 	}
2300 	blocks_count = (ext4_blocks_count(es) -
2301 			le32_to_cpu(es->s_first_data_block) +
2302 			EXT4_BLOCKS_PER_GROUP(sb) - 1);
2303 	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
2304 	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
2305 		printk(KERN_WARNING "EXT4-fs: groups count too large: %u "
2306 		       "(block count %llu, first data block %u, "
2307 		       "blocks per group %lu)\n", sbi->s_groups_count,
2308 		       ext4_blocks_count(es),
2309 		       le32_to_cpu(es->s_first_data_block),
2310 		       EXT4_BLOCKS_PER_GROUP(sb));
2311 		goto failed_mount;
2312 	}
2313 	sbi->s_groups_count = blocks_count;
2314 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
2315 		   EXT4_DESC_PER_BLOCK(sb);
2316 	sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *),
2317 				    GFP_KERNEL);
2318 	if (sbi->s_group_desc == NULL) {
2319 		printk(KERN_ERR "EXT4-fs: not enough memory\n");
2320 		goto failed_mount;
2321 	}
2322 
2323 #ifdef CONFIG_PROC_FS
2324 	if (ext4_proc_root)
2325 		sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root);
2326 
2327 	if (sbi->s_proc)
2328 		proc_create_data("inode_readahead_blks", 0644, sbi->s_proc,
2329 				 &ext4_ui_proc_fops,
2330 				 &sbi->s_inode_readahead_blks);
2331 #endif
2332 
2333 	bgl_lock_init(&sbi->s_blockgroup_lock);
2334 
2335 	for (i = 0; i < db_count; i++) {
2336 		block = descriptor_loc(sb, logical_sb_block, i);
2337 		sbi->s_group_desc[i] = sb_bread(sb, block);
2338 		if (!sbi->s_group_desc[i]) {
2339 			printk(KERN_ERR "EXT4-fs: "
2340 			       "can't read group descriptor %d\n", i);
2341 			db_count = i;
2342 			goto failed_mount2;
2343 		}
2344 	}
2345 	if (!ext4_check_descriptors(sb)) {
2346 		printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
2347 		goto failed_mount2;
2348 	}
2349 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
2350 		if (!ext4_fill_flex_info(sb)) {
2351 			printk(KERN_ERR
2352 			       "EXT4-fs: unable to initialize "
2353 			       "flex_bg meta info!\n");
2354 			goto failed_mount2;
2355 		}
2356 
2357 	sbi->s_gdb_count = db_count;
2358 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
2359 	spin_lock_init(&sbi->s_next_gen_lock);
2360 
2361 	err = percpu_counter_init(&sbi->s_freeblocks_counter,
2362 			ext4_count_free_blocks(sb));
2363 	if (!err) {
2364 		err = percpu_counter_init(&sbi->s_freeinodes_counter,
2365 				ext4_count_free_inodes(sb));
2366 	}
2367 	if (!err) {
2368 		err = percpu_counter_init(&sbi->s_dirs_counter,
2369 				ext4_count_dirs(sb));
2370 	}
2371 	if (!err) {
2372 		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
2373 	}
2374 	if (err) {
2375 		printk(KERN_ERR "EXT4-fs: insufficient memory\n");
2376 		goto failed_mount3;
2377 	}
2378 
2379 	sbi->s_stripe = ext4_get_stripe_size(sbi);
2380 
2381 	/*
2382 	 * set up enough so that it can read an inode
2383 	 */
2384 	sb->s_op = &ext4_sops;
2385 	sb->s_export_op = &ext4_export_ops;
2386 	sb->s_xattr = ext4_xattr_handlers;
2387 #ifdef CONFIG_QUOTA
2388 	sb->s_qcop = &ext4_qctl_operations;
2389 	sb->dq_op = &ext4_quota_operations;
2390 #endif
2391 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
2392 
2393 	sb->s_root = NULL;
2394 
2395 	needs_recovery = (es->s_last_orphan != 0 ||
2396 			  EXT4_HAS_INCOMPAT_FEATURE(sb,
2397 				    EXT4_FEATURE_INCOMPAT_RECOVER));
2398 
2399 	/*
2400 	 * The first inode we look at is the journal inode.  Don't try
2401 	 * root first: it may be modified in the journal!
2402 	 */
2403 	if (!test_opt(sb, NOLOAD) &&
2404 	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2405 		if (ext4_load_journal(sb, es, journal_devnum))
2406 			goto failed_mount3;
2407 		if (!(sb->s_flags & MS_RDONLY) &&
2408 		    EXT4_SB(sb)->s_journal->j_failed_commit) {
2409 			printk(KERN_CRIT "EXT4-fs error (device %s): "
2410 			       "ext4_fill_super: Journal transaction "
2411 			       "%u is corrupt\n", sb->s_id,
2412 			       EXT4_SB(sb)->s_journal->j_failed_commit);
2413 			if (test_opt(sb, ERRORS_RO)) {
2414 				printk(KERN_CRIT
2415 				       "Mounting filesystem read-only\n");
2416 				sb->s_flags |= MS_RDONLY;
2417 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2418 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2419 			}
2420 			if (test_opt(sb, ERRORS_PANIC)) {
2421 				EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2422 				es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2423 				ext4_commit_super(sb, es, 1);
2424 				goto failed_mount4;
2425 			}
2426 		}
2427 	} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2428 	      EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2429 		printk(KERN_ERR "EXT4-fs: required journal recovery "
2430 		       "suppressed and not mounted read-only\n");
2431 		goto failed_mount4;
2432 	} else {
2433 		clear_opt(sbi->s_mount_opt, DATA_FLAGS);
2434 		set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
2435 		sbi->s_journal = NULL;
2436 		needs_recovery = 0;
2437 		goto no_journal;
2438 	}
2439 
2440 	if (ext4_blocks_count(es) > 0xffffffffULL &&
2441 	    !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
2442 				       JBD2_FEATURE_INCOMPAT_64BIT)) {
2443 		printk(KERN_ERR "EXT4-fs: Failed to set 64-bit journal feature\n");
2444 		goto failed_mount4;
2445 	}
2446 
2447 	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
2448 		jbd2_journal_set_features(sbi->s_journal,
2449 				JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2450 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2451 	} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
2452 		jbd2_journal_set_features(sbi->s_journal,
2453 				JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
2454 		jbd2_journal_clear_features(sbi->s_journal, 0, 0,
2455 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2456 	} else {
2457 		jbd2_journal_clear_features(sbi->s_journal,
2458 				JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2459 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2460 	}
2461 
2462 	/* We have now updated the journal if required, so we can
2463 	 * validate the data journaling mode. */
2464 	switch (test_opt(sb, DATA_FLAGS)) {
2465 	case 0:
2466 		/* No mode set, assume a default based on the journal
2467 		 * capabilities: ORDERED_DATA if the journal can
2468 		 * cope, else JOURNAL_DATA
2469 		 */
2470 		if (jbd2_journal_check_available_features
2471 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE))
2472 			set_opt(sbi->s_mount_opt, ORDERED_DATA);
2473 		else
2474 			set_opt(sbi->s_mount_opt, JOURNAL_DATA);
2475 		break;
2476 
2477 	case EXT4_MOUNT_ORDERED_DATA:
2478 	case EXT4_MOUNT_WRITEBACK_DATA:
2479 		if (!jbd2_journal_check_available_features
2480 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
2481 			printk(KERN_ERR "EXT4-fs: Journal does not support "
2482 			       "requested data journaling mode\n");
2483 			goto failed_mount4;
2484 		}
2485 	default:
2486 		break;
2487 	}
2488 	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
2489 
2490 no_journal:
2491 
2492 	if (test_opt(sb, NOBH)) {
2493 		if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
2494 			printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
2495 				"its supported only with writeback mode\n");
2496 			clear_opt(sbi->s_mount_opt, NOBH);
2497 		}
2498 	}
2499 	/*
2500 	 * The jbd2_journal_load will have done any necessary log recovery,
2501 	 * so we can safely mount the rest of the filesystem now.
2502 	 */
2503 
2504 	root = ext4_iget(sb, EXT4_ROOT_INO);
2505 	if (IS_ERR(root)) {
2506 		printk(KERN_ERR "EXT4-fs: get root inode failed\n");
2507 		ret = PTR_ERR(root);
2508 		goto failed_mount4;
2509 	}
2510 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
2511 		iput(root);
2512 		printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
2513 		goto failed_mount4;
2514 	}
2515 	sb->s_root = d_alloc_root(root);
2516 	if (!sb->s_root) {
2517 		printk(KERN_ERR "EXT4-fs: get root dentry failed\n");
2518 		iput(root);
2519 		ret = -ENOMEM;
2520 		goto failed_mount4;
2521 	}
2522 
2523 	ext4_setup_super(sb, es, sb->s_flags & MS_RDONLY);
2524 
2525 	/* determine the minimum size of new large inodes, if present */
2526 	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
2527 		sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2528 						     EXT4_GOOD_OLD_INODE_SIZE;
2529 		if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
2530 				       EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE)) {
2531 			if (sbi->s_want_extra_isize <
2532 			    le16_to_cpu(es->s_want_extra_isize))
2533 				sbi->s_want_extra_isize =
2534 					le16_to_cpu(es->s_want_extra_isize);
2535 			if (sbi->s_want_extra_isize <
2536 			    le16_to_cpu(es->s_min_extra_isize))
2537 				sbi->s_want_extra_isize =
2538 					le16_to_cpu(es->s_min_extra_isize);
2539 		}
2540 	}
2541 	/* Check if enough inode space is available */
2542 	if (EXT4_GOOD_OLD_INODE_SIZE + sbi->s_want_extra_isize >
2543 							sbi->s_inode_size) {
2544 		sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
2545 						       EXT4_GOOD_OLD_INODE_SIZE;
2546 		printk(KERN_INFO "EXT4-fs: required extra inode space not"
2547 			"available.\n");
2548 	}
2549 
2550 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
2551 		printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - "
2552 				"requested data journaling mode\n");
2553 		clear_opt(sbi->s_mount_opt, DELALLOC);
2554 	} else if (test_opt(sb, DELALLOC))
2555 		printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n");
2556 
2557 	ext4_ext_init(sb);
2558 	err = ext4_mb_init(sb, needs_recovery);
2559 	if (err) {
2560 		printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n",
2561 		       err);
2562 		goto failed_mount4;
2563 	}
2564 
2565 	/*
2566 	 * akpm: core read_super() calls in here with the superblock locked.
2567 	 * That deadlocks, because orphan cleanup needs to lock the superblock
2568 	 * in numerous places.  Here we just pop the lock - it's relatively
2569 	 * harmless, because we are now ready to accept write_super() requests,
2570 	 * and aviro says that's the only reason for hanging onto the
2571 	 * superblock lock.
2572 	 */
2573 	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
2574 	ext4_orphan_cleanup(sb, es);
2575 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
2576 	if (needs_recovery) {
2577 		printk(KERN_INFO "EXT4-fs: recovery complete.\n");
2578 		ext4_mark_recovery_complete(sb, es);
2579 	}
2580 	if (EXT4_SB(sb)->s_journal) {
2581 		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2582 			descr = " journalled data mode";
2583 		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2584 			descr = " ordered data mode";
2585 		else
2586 			descr = " writeback data mode";
2587 	} else
2588 		descr = "out journal";
2589 
2590 	printk(KERN_INFO "EXT4-fs: mounted filesystem %s with%s\n",
2591 	       sb->s_id, descr);
2592 
2593 	lock_kernel();
2594 	return 0;
2595 
2596 cantfind_ext4:
2597 	if (!silent)
2598 		printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
2599 		       sb->s_id);
2600 	goto failed_mount;
2601 
2602 failed_mount4:
2603 	printk(KERN_ERR "EXT4-fs (device %s): mount failed\n", sb->s_id);
2604 	if (sbi->s_journal) {
2605 		jbd2_journal_destroy(sbi->s_journal);
2606 		sbi->s_journal = NULL;
2607 	}
2608 failed_mount3:
2609 	percpu_counter_destroy(&sbi->s_freeblocks_counter);
2610 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
2611 	percpu_counter_destroy(&sbi->s_dirs_counter);
2612 	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
2613 failed_mount2:
2614 	for (i = 0; i < db_count; i++)
2615 		brelse(sbi->s_group_desc[i]);
2616 	kfree(sbi->s_group_desc);
2617 failed_mount:
2618 	if (sbi->s_proc) {
2619 		remove_proc_entry("inode_readahead_blks", sbi->s_proc);
2620 		remove_proc_entry(sb->s_id, ext4_proc_root);
2621 	}
2622 #ifdef CONFIG_QUOTA
2623 	for (i = 0; i < MAXQUOTAS; i++)
2624 		kfree(sbi->s_qf_names[i]);
2625 #endif
2626 	ext4_blkdev_remove(sbi);
2627 	brelse(bh);
2628 out_fail:
2629 	sb->s_fs_info = NULL;
2630 	kfree(sbi);
2631 	lock_kernel();
2632 	return ret;
2633 }
2634 
2635 /*
2636  * Setup any per-fs journal parameters now.  We'll do this both on
2637  * initial mount, once the journal has been initialised but before we've
2638  * done any recovery; and again on any subsequent remount.
2639  */
ext4_init_journal_params(struct super_block * sb,journal_t * journal)2640 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
2641 {
2642 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2643 
2644 	journal->j_commit_interval = sbi->s_commit_interval;
2645 	journal->j_min_batch_time = sbi->s_min_batch_time;
2646 	journal->j_max_batch_time = sbi->s_max_batch_time;
2647 
2648 	spin_lock(&journal->j_state_lock);
2649 	if (test_opt(sb, BARRIER))
2650 		journal->j_flags |= JBD2_BARRIER;
2651 	else
2652 		journal->j_flags &= ~JBD2_BARRIER;
2653 	if (test_opt(sb, DATA_ERR_ABORT))
2654 		journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
2655 	else
2656 		journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
2657 	spin_unlock(&journal->j_state_lock);
2658 }
2659 
ext4_get_journal(struct super_block * sb,unsigned int journal_inum)2660 static journal_t *ext4_get_journal(struct super_block *sb,
2661 				   unsigned int journal_inum)
2662 {
2663 	struct inode *journal_inode;
2664 	journal_t *journal;
2665 
2666 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2667 
2668 	/* First, test for the existence of a valid inode on disk.  Bad
2669 	 * things happen if we iget() an unused inode, as the subsequent
2670 	 * iput() will try to delete it. */
2671 
2672 	journal_inode = ext4_iget(sb, journal_inum);
2673 	if (IS_ERR(journal_inode)) {
2674 		printk(KERN_ERR "EXT4-fs: no journal found.\n");
2675 		return NULL;
2676 	}
2677 	if (!journal_inode->i_nlink) {
2678 		make_bad_inode(journal_inode);
2679 		iput(journal_inode);
2680 		printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
2681 		return NULL;
2682 	}
2683 
2684 	jbd_debug(2, "Journal inode found at %p: %lld bytes\n",
2685 		  journal_inode, journal_inode->i_size);
2686 	if (!S_ISREG(journal_inode->i_mode)) {
2687 		printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
2688 		iput(journal_inode);
2689 		return NULL;
2690 	}
2691 
2692 	journal = jbd2_journal_init_inode(journal_inode);
2693 	if (!journal) {
2694 		printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
2695 		iput(journal_inode);
2696 		return NULL;
2697 	}
2698 	journal->j_private = sb;
2699 	ext4_init_journal_params(sb, journal);
2700 	return journal;
2701 }
2702 
ext4_get_dev_journal(struct super_block * sb,dev_t j_dev)2703 static journal_t *ext4_get_dev_journal(struct super_block *sb,
2704 				       dev_t j_dev)
2705 {
2706 	struct buffer_head *bh;
2707 	journal_t *journal;
2708 	ext4_fsblk_t start;
2709 	ext4_fsblk_t len;
2710 	int hblock, blocksize;
2711 	ext4_fsblk_t sb_block;
2712 	unsigned long offset;
2713 	struct ext4_super_block *es;
2714 	struct block_device *bdev;
2715 
2716 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2717 
2718 	bdev = ext4_blkdev_get(j_dev);
2719 	if (bdev == NULL)
2720 		return NULL;
2721 
2722 	if (bd_claim(bdev, sb)) {
2723 		printk(KERN_ERR
2724 			"EXT4-fs: failed to claim external journal device.\n");
2725 		blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
2726 		return NULL;
2727 	}
2728 
2729 	blocksize = sb->s_blocksize;
2730 	hblock = bdev_hardsect_size(bdev);
2731 	if (blocksize < hblock) {
2732 		printk(KERN_ERR
2733 			"EXT4-fs: blocksize too small for journal device.\n");
2734 		goto out_bdev;
2735 	}
2736 
2737 	sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
2738 	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
2739 	set_blocksize(bdev, blocksize);
2740 	if (!(bh = __bread(bdev, sb_block, blocksize))) {
2741 		printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
2742 		       "external journal\n");
2743 		goto out_bdev;
2744 	}
2745 
2746 	es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
2747 	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
2748 	    !(le32_to_cpu(es->s_feature_incompat) &
2749 	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
2750 		printk(KERN_ERR "EXT4-fs: external journal has "
2751 					"bad superblock\n");
2752 		brelse(bh);
2753 		goto out_bdev;
2754 	}
2755 
2756 	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
2757 		printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
2758 		brelse(bh);
2759 		goto out_bdev;
2760 	}
2761 
2762 	len = ext4_blocks_count(es);
2763 	start = sb_block + 1;
2764 	brelse(bh);	/* we're done with the superblock */
2765 
2766 	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
2767 					start, len, blocksize);
2768 	if (!journal) {
2769 		printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
2770 		goto out_bdev;
2771 	}
2772 	journal->j_private = sb;
2773 	ll_rw_block(READ, 1, &journal->j_sb_buffer);
2774 	wait_on_buffer(journal->j_sb_buffer);
2775 	if (!buffer_uptodate(journal->j_sb_buffer)) {
2776 		printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
2777 		goto out_journal;
2778 	}
2779 	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
2780 		printk(KERN_ERR "EXT4-fs: External journal has more than one "
2781 					"user (unsupported) - %d\n",
2782 			be32_to_cpu(journal->j_superblock->s_nr_users));
2783 		goto out_journal;
2784 	}
2785 	EXT4_SB(sb)->journal_bdev = bdev;
2786 	ext4_init_journal_params(sb, journal);
2787 	return journal;
2788 out_journal:
2789 	jbd2_journal_destroy(journal);
2790 out_bdev:
2791 	ext4_blkdev_put(bdev);
2792 	return NULL;
2793 }
2794 
ext4_load_journal(struct super_block * sb,struct ext4_super_block * es,unsigned long journal_devnum)2795 static int ext4_load_journal(struct super_block *sb,
2796 			     struct ext4_super_block *es,
2797 			     unsigned long journal_devnum)
2798 {
2799 	journal_t *journal;
2800 	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
2801 	dev_t journal_dev;
2802 	int err = 0;
2803 	int really_read_only;
2804 
2805 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2806 
2807 	if (journal_devnum &&
2808 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2809 		printk(KERN_INFO "EXT4-fs: external journal device major/minor "
2810 			"numbers have changed\n");
2811 		journal_dev = new_decode_dev(journal_devnum);
2812 	} else
2813 		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
2814 
2815 	really_read_only = bdev_read_only(sb->s_bdev);
2816 
2817 	/*
2818 	 * Are we loading a blank journal or performing recovery after a
2819 	 * crash?  For recovery, we need to check in advance whether we
2820 	 * can get read-write access to the device.
2821 	 */
2822 
2823 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2824 		if (sb->s_flags & MS_RDONLY) {
2825 			printk(KERN_INFO "EXT4-fs: INFO: recovery "
2826 					"required on readonly filesystem.\n");
2827 			if (really_read_only) {
2828 				printk(KERN_ERR "EXT4-fs: write access "
2829 					"unavailable, cannot proceed.\n");
2830 				return -EROFS;
2831 			}
2832 			printk(KERN_INFO "EXT4-fs: write access will "
2833 			       "be enabled during recovery.\n");
2834 		}
2835 	}
2836 
2837 	if (journal_inum && journal_dev) {
2838 		printk(KERN_ERR "EXT4-fs: filesystem has both journal "
2839 		       "and inode journals!\n");
2840 		return -EINVAL;
2841 	}
2842 
2843 	if (journal_inum) {
2844 		if (!(journal = ext4_get_journal(sb, journal_inum)))
2845 			return -EINVAL;
2846 	} else {
2847 		if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
2848 			return -EINVAL;
2849 	}
2850 
2851 	if (journal->j_flags & JBD2_BARRIER)
2852 		printk(KERN_INFO "EXT4-fs: barriers enabled\n");
2853 	else
2854 		printk(KERN_INFO "EXT4-fs: barriers disabled\n");
2855 
2856 	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2857 		err = jbd2_journal_update_format(journal);
2858 		if (err)  {
2859 			printk(KERN_ERR "EXT4-fs: error updating journal.\n");
2860 			jbd2_journal_destroy(journal);
2861 			return err;
2862 		}
2863 	}
2864 
2865 	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
2866 		err = jbd2_journal_wipe(journal, !really_read_only);
2867 	if (!err)
2868 		err = jbd2_journal_load(journal);
2869 
2870 	if (err) {
2871 		printk(KERN_ERR "EXT4-fs: error loading journal.\n");
2872 		jbd2_journal_destroy(journal);
2873 		return err;
2874 	}
2875 
2876 	EXT4_SB(sb)->s_journal = journal;
2877 	ext4_clear_journal_err(sb, es);
2878 
2879 	if (journal_devnum &&
2880 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2881 		es->s_journal_dev = cpu_to_le32(journal_devnum);
2882 		sb->s_dirt = 1;
2883 
2884 		/* Make sure we flush the recovery flag to disk. */
2885 		ext4_commit_super(sb, es, 1);
2886 	}
2887 
2888 	return 0;
2889 }
2890 
ext4_commit_super(struct super_block * sb,struct ext4_super_block * es,int sync)2891 static int ext4_commit_super(struct super_block *sb,
2892 			      struct ext4_super_block *es, int sync)
2893 {
2894 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
2895 	int error = 0;
2896 
2897 	if (!sbh)
2898 		return error;
2899 	if (buffer_write_io_error(sbh)) {
2900 		/*
2901 		 * Oh, dear.  A previous attempt to write the
2902 		 * superblock failed.  This could happen because the
2903 		 * USB device was yanked out.  Or it could happen to
2904 		 * be a transient write error and maybe the block will
2905 		 * be remapped.  Nothing we can do but to retry the
2906 		 * write and hope for the best.
2907 		 */
2908 		printk(KERN_ERR "EXT4-fs: previous I/O error to "
2909 		       "superblock detected for %s.\n", sb->s_id);
2910 		clear_buffer_write_io_error(sbh);
2911 		set_buffer_uptodate(sbh);
2912 	}
2913 	es->s_wtime = cpu_to_le32(get_seconds());
2914 	ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
2915 					&EXT4_SB(sb)->s_freeblocks_counter));
2916 	es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
2917 					&EXT4_SB(sb)->s_freeinodes_counter));
2918 
2919 	BUFFER_TRACE(sbh, "marking dirty");
2920 	mark_buffer_dirty(sbh);
2921 	if (sync) {
2922 		error = sync_dirty_buffer(sbh);
2923 		if (error)
2924 			return error;
2925 
2926 		error = buffer_write_io_error(sbh);
2927 		if (error) {
2928 			printk(KERN_ERR "EXT4-fs: I/O error while writing "
2929 			       "superblock for %s.\n", sb->s_id);
2930 			clear_buffer_write_io_error(sbh);
2931 			set_buffer_uptodate(sbh);
2932 		}
2933 	}
2934 	return error;
2935 }
2936 
2937 
2938 /*
2939  * Have we just finished recovery?  If so, and if we are mounting (or
2940  * remounting) the filesystem readonly, then we will end up with a
2941  * consistent fs on disk.  Record that fact.
2942  */
ext4_mark_recovery_complete(struct super_block * sb,struct ext4_super_block * es)2943 static void ext4_mark_recovery_complete(struct super_block *sb,
2944 					struct ext4_super_block *es)
2945 {
2946 	journal_t *journal = EXT4_SB(sb)->s_journal;
2947 
2948 	if (!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2949 		BUG_ON(journal != NULL);
2950 		return;
2951 	}
2952 	jbd2_journal_lock_updates(journal);
2953 	if (jbd2_journal_flush(journal) < 0)
2954 		goto out;
2955 
2956 	lock_super(sb);
2957 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
2958 	    sb->s_flags & MS_RDONLY) {
2959 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
2960 		sb->s_dirt = 0;
2961 		ext4_commit_super(sb, es, 1);
2962 	}
2963 	unlock_super(sb);
2964 
2965 out:
2966 	jbd2_journal_unlock_updates(journal);
2967 }
2968 
2969 /*
2970  * If we are mounting (or read-write remounting) a filesystem whose journal
2971  * has recorded an error from a previous lifetime, move that error to the
2972  * main filesystem now.
2973  */
ext4_clear_journal_err(struct super_block * sb,struct ext4_super_block * es)2974 static void ext4_clear_journal_err(struct super_block *sb,
2975 				   struct ext4_super_block *es)
2976 {
2977 	journal_t *journal;
2978 	int j_errno;
2979 	const char *errstr;
2980 
2981 	BUG_ON(!EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL));
2982 
2983 	journal = EXT4_SB(sb)->s_journal;
2984 
2985 	/*
2986 	 * Now check for any error status which may have been recorded in the
2987 	 * journal by a prior ext4_error() or ext4_abort()
2988 	 */
2989 
2990 	j_errno = jbd2_journal_errno(journal);
2991 	if (j_errno) {
2992 		char nbuf[16];
2993 
2994 		errstr = ext4_decode_error(sb, j_errno, nbuf);
2995 		ext4_warning(sb, __func__, "Filesystem error recorded "
2996 			     "from previous mount: %s", errstr);
2997 		ext4_warning(sb, __func__, "Marking fs in need of "
2998 			     "filesystem check.");
2999 
3000 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
3001 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
3002 		ext4_commit_super(sb, es, 1);
3003 
3004 		jbd2_journal_clear_err(journal);
3005 	}
3006 }
3007 
3008 /*
3009  * Force the running and committing transactions to commit,
3010  * and wait on the commit.
3011  */
ext4_force_commit(struct super_block * sb)3012 int ext4_force_commit(struct super_block *sb)
3013 {
3014 	journal_t *journal;
3015 	int ret = 0;
3016 
3017 	if (sb->s_flags & MS_RDONLY)
3018 		return 0;
3019 
3020 	journal = EXT4_SB(sb)->s_journal;
3021 	if (journal) {
3022 		sb->s_dirt = 0;
3023 		ret = ext4_journal_force_commit(journal);
3024 	}
3025 
3026 	return ret;
3027 }
3028 
3029 /*
3030  * Ext4 always journals updates to the superblock itself, so we don't
3031  * have to propagate any other updates to the superblock on disk at this
3032  * point.  (We can probably nuke this function altogether, and remove
3033  * any mention to sb->s_dirt in all of fs/ext4; eventual cleanup...)
3034  */
ext4_write_super(struct super_block * sb)3035 static void ext4_write_super(struct super_block *sb)
3036 {
3037 	if (EXT4_SB(sb)->s_journal) {
3038 		if (mutex_trylock(&sb->s_lock) != 0)
3039 			BUG();
3040 		sb->s_dirt = 0;
3041 	} else {
3042 		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3043 	}
3044 }
3045 
ext4_sync_fs(struct super_block * sb,int wait)3046 static int ext4_sync_fs(struct super_block *sb, int wait)
3047 {
3048 	int ret = 0;
3049 	tid_t target;
3050 
3051 	trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait);
3052 	sb->s_dirt = 0;
3053 	if (EXT4_SB(sb)->s_journal) {
3054 		if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal,
3055 					      &target)) {
3056 			if (wait)
3057 				jbd2_log_wait_commit(EXT4_SB(sb)->s_journal,
3058 						     target);
3059 		}
3060 	} else {
3061 		ext4_commit_super(sb, EXT4_SB(sb)->s_es, wait);
3062 	}
3063 	return ret;
3064 }
3065 
3066 /*
3067  * LVM calls this function before a (read-only) snapshot is created.  This
3068  * gives us a chance to flush the journal completely and mark the fs clean.
3069  */
ext4_freeze(struct super_block * sb)3070 static int ext4_freeze(struct super_block *sb)
3071 {
3072 	int error = 0;
3073 	journal_t *journal;
3074 	sb->s_dirt = 0;
3075 
3076 	if (!(sb->s_flags & MS_RDONLY)) {
3077 		journal = EXT4_SB(sb)->s_journal;
3078 
3079 		if (journal) {
3080 			/* Now we set up the journal barrier. */
3081 			jbd2_journal_lock_updates(journal);
3082 
3083 			/*
3084 			 * We don't want to clear needs_recovery flag when we
3085 			 * failed to flush the journal.
3086 			 */
3087 			error = jbd2_journal_flush(journal);
3088 			if (error < 0)
3089 				goto out;
3090 		}
3091 
3092 		/* Journal blocked and flushed, clear needs_recovery flag. */
3093 		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3094 		error = ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3095 		if (error)
3096 			goto out;
3097 	}
3098 	return 0;
3099 out:
3100 	jbd2_journal_unlock_updates(journal);
3101 	return error;
3102 }
3103 
3104 /*
3105  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
3106  * flag here, even though the filesystem is not technically dirty yet.
3107  */
ext4_unfreeze(struct super_block * sb)3108 static int ext4_unfreeze(struct super_block *sb)
3109 {
3110 	if (EXT4_SB(sb)->s_journal && !(sb->s_flags & MS_RDONLY)) {
3111 		lock_super(sb);
3112 		/* Reser the needs_recovery flag before the fs is unlocked. */
3113 		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
3114 		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
3115 		unlock_super(sb);
3116 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3117 	}
3118 	return 0;
3119 }
3120 
ext4_remount(struct super_block * sb,int * flags,char * data)3121 static int ext4_remount(struct super_block *sb, int *flags, char *data)
3122 {
3123 	struct ext4_super_block *es;
3124 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3125 	ext4_fsblk_t n_blocks_count = 0;
3126 	unsigned long old_sb_flags;
3127 	struct ext4_mount_options old_opts;
3128 	ext4_group_t g;
3129 	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
3130 	int err;
3131 #ifdef CONFIG_QUOTA
3132 	int i;
3133 #endif
3134 
3135 	/* Store the original options */
3136 	old_sb_flags = sb->s_flags;
3137 	old_opts.s_mount_opt = sbi->s_mount_opt;
3138 	old_opts.s_resuid = sbi->s_resuid;
3139 	old_opts.s_resgid = sbi->s_resgid;
3140 	old_opts.s_commit_interval = sbi->s_commit_interval;
3141 	old_opts.s_min_batch_time = sbi->s_min_batch_time;
3142 	old_opts.s_max_batch_time = sbi->s_max_batch_time;
3143 #ifdef CONFIG_QUOTA
3144 	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
3145 	for (i = 0; i < MAXQUOTAS; i++)
3146 		old_opts.s_qf_names[i] = sbi->s_qf_names[i];
3147 #endif
3148 	if (sbi->s_journal && sbi->s_journal->j_task->io_context)
3149 		journal_ioprio = sbi->s_journal->j_task->io_context->ioprio;
3150 
3151 	/*
3152 	 * Allow the "check" option to be passed as a remount option.
3153 	 */
3154 	if (!parse_options(data, sb, NULL, &journal_ioprio,
3155 			   &n_blocks_count, 1)) {
3156 		err = -EINVAL;
3157 		goto restore_opts;
3158 	}
3159 
3160 	if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
3161 		ext4_abort(sb, __func__, "Abort forced by user");
3162 
3163 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
3164 		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
3165 
3166 	es = sbi->s_es;
3167 
3168 	if (sbi->s_journal) {
3169 		ext4_init_journal_params(sb, sbi->s_journal);
3170 		set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
3171 	}
3172 
3173 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
3174 		n_blocks_count > ext4_blocks_count(es)) {
3175 		if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
3176 			err = -EROFS;
3177 			goto restore_opts;
3178 		}
3179 
3180 		if (*flags & MS_RDONLY) {
3181 			/*
3182 			 * First of all, the unconditional stuff we have to do
3183 			 * to disable replay of the journal when we next remount
3184 			 */
3185 			sb->s_flags |= MS_RDONLY;
3186 
3187 			/*
3188 			 * OK, test if we are remounting a valid rw partition
3189 			 * readonly, and if so set the rdonly flag and then
3190 			 * mark the partition as valid again.
3191 			 */
3192 			if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
3193 			    (sbi->s_mount_state & EXT4_VALID_FS))
3194 				es->s_state = cpu_to_le16(sbi->s_mount_state);
3195 
3196 			/*
3197 			 * We have to unlock super so that we can wait for
3198 			 * transactions.
3199 			 */
3200 			if (sbi->s_journal) {
3201 				unlock_super(sb);
3202 				ext4_mark_recovery_complete(sb, es);
3203 				lock_super(sb);
3204 			}
3205 		} else {
3206 			int ret;
3207 			if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
3208 					~EXT4_FEATURE_RO_COMPAT_SUPP))) {
3209 				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3210 				       "remount RDWR because of unsupported "
3211 				       "optional features (%x).\n", sb->s_id,
3212 				(le32_to_cpu(sbi->s_es->s_feature_ro_compat) &
3213 					~EXT4_FEATURE_RO_COMPAT_SUPP));
3214 				err = -EROFS;
3215 				goto restore_opts;
3216 			}
3217 
3218 			/*
3219 			 * Make sure the group descriptor checksums
3220 			 * are sane.  If they aren't, refuse to
3221 			 * remount r/w.
3222 			 */
3223 			for (g = 0; g < sbi->s_groups_count; g++) {
3224 				struct ext4_group_desc *gdp =
3225 					ext4_get_group_desc(sb, g, NULL);
3226 
3227 				if (!ext4_group_desc_csum_verify(sbi, g, gdp)) {
3228 					printk(KERN_ERR
3229 	       "EXT4-fs: ext4_remount: "
3230 		"Checksum for group %u failed (%u!=%u)\n",
3231 		g, le16_to_cpu(ext4_group_desc_csum(sbi, g, gdp)),
3232 					       le16_to_cpu(gdp->bg_checksum));
3233 					err = -EINVAL;
3234 					goto restore_opts;
3235 				}
3236 			}
3237 
3238 			/*
3239 			 * If we have an unprocessed orphan list hanging
3240 			 * around from a previously readonly bdev mount,
3241 			 * require a full umount/remount for now.
3242 			 */
3243 			if (es->s_last_orphan) {
3244 				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
3245 				       "remount RDWR because of unprocessed "
3246 				       "orphan inode list.  Please "
3247 				       "umount/remount instead.\n",
3248 				       sb->s_id);
3249 				err = -EINVAL;
3250 				goto restore_opts;
3251 			}
3252 
3253 			/*
3254 			 * Mounting a RDONLY partition read-write, so reread
3255 			 * and store the current valid flag.  (It may have
3256 			 * been changed by e2fsck since we originally mounted
3257 			 * the partition.)
3258 			 */
3259 			if (sbi->s_journal)
3260 				ext4_clear_journal_err(sb, es);
3261 			sbi->s_mount_state = le16_to_cpu(es->s_state);
3262 			if ((err = ext4_group_extend(sb, es, n_blocks_count)))
3263 				goto restore_opts;
3264 			if (!ext4_setup_super(sb, es, 0))
3265 				sb->s_flags &= ~MS_RDONLY;
3266 		}
3267 	}
3268 	if (sbi->s_journal == NULL)
3269 		ext4_commit_super(sb, es, 1);
3270 
3271 #ifdef CONFIG_QUOTA
3272 	/* Release old quota file names */
3273 	for (i = 0; i < MAXQUOTAS; i++)
3274 		if (old_opts.s_qf_names[i] &&
3275 		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3276 			kfree(old_opts.s_qf_names[i]);
3277 #endif
3278 	return 0;
3279 restore_opts:
3280 	sb->s_flags = old_sb_flags;
3281 	sbi->s_mount_opt = old_opts.s_mount_opt;
3282 	sbi->s_resuid = old_opts.s_resuid;
3283 	sbi->s_resgid = old_opts.s_resgid;
3284 	sbi->s_commit_interval = old_opts.s_commit_interval;
3285 	sbi->s_min_batch_time = old_opts.s_min_batch_time;
3286 	sbi->s_max_batch_time = old_opts.s_max_batch_time;
3287 #ifdef CONFIG_QUOTA
3288 	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
3289 	for (i = 0; i < MAXQUOTAS; i++) {
3290 		if (sbi->s_qf_names[i] &&
3291 		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
3292 			kfree(sbi->s_qf_names[i]);
3293 		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
3294 	}
3295 #endif
3296 	return err;
3297 }
3298 
ext4_statfs(struct dentry * dentry,struct kstatfs * buf)3299 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3300 {
3301 	struct super_block *sb = dentry->d_sb;
3302 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3303 	struct ext4_super_block *es = sbi->s_es;
3304 	u64 fsid;
3305 
3306 	if (test_opt(sb, MINIX_DF)) {
3307 		sbi->s_overhead_last = 0;
3308 	} else if (sbi->s_blocks_last != ext4_blocks_count(es)) {
3309 		ext4_group_t ngroups = sbi->s_groups_count, i;
3310 		ext4_fsblk_t overhead = 0;
3311 		smp_rmb();
3312 
3313 		/*
3314 		 * Compute the overhead (FS structures).  This is constant
3315 		 * for a given filesystem unless the number of block groups
3316 		 * changes so we cache the previous value until it does.
3317 		 */
3318 
3319 		/*
3320 		 * All of the blocks before first_data_block are
3321 		 * overhead
3322 		 */
3323 		overhead = le32_to_cpu(es->s_first_data_block);
3324 
3325 		/*
3326 		 * Add the overhead attributed to the superblock and
3327 		 * block group descriptors.  If the sparse superblocks
3328 		 * feature is turned on, then not all groups have this.
3329 		 */
3330 		for (i = 0; i < ngroups; i++) {
3331 			overhead += ext4_bg_has_super(sb, i) +
3332 				ext4_bg_num_gdb(sb, i);
3333 			cond_resched();
3334 		}
3335 
3336 		/*
3337 		 * Every block group has an inode bitmap, a block
3338 		 * bitmap, and an inode table.
3339 		 */
3340 		overhead += ngroups * (2 + sbi->s_itb_per_group);
3341 		sbi->s_overhead_last = overhead;
3342 		smp_wmb();
3343 		sbi->s_blocks_last = ext4_blocks_count(es);
3344 	}
3345 
3346 	buf->f_type = EXT4_SUPER_MAGIC;
3347 	buf->f_bsize = sb->s_blocksize;
3348 	buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3349 	buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3350 		       percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3351 	ext4_free_blocks_count_set(es, buf->f_bfree);
3352 	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3353 	if (buf->f_bfree < ext4_r_blocks_count(es))
3354 		buf->f_bavail = 0;
3355 	buf->f_files = le32_to_cpu(es->s_inodes_count);
3356 	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3357 	es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3358 	buf->f_namelen = EXT4_NAME_LEN;
3359 	fsid = le64_to_cpup((void *)es->s_uuid) ^
3360 	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
3361 	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
3362 	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
3363 	return 0;
3364 }
3365 
3366 /* Helper function for writing quotas on sync - we need to start transaction before quota file
3367  * is locked for write. Otherwise the are possible deadlocks:
3368  * Process 1                         Process 2
3369  * ext4_create()                     quota_sync()
3370  *   jbd2_journal_start()                   write_dquot()
3371  *   DQUOT_INIT()                        down(dqio_mutex)
3372  *     down(dqio_mutex)                    jbd2_journal_start()
3373  *
3374  */
3375 
3376 #ifdef CONFIG_QUOTA
3377 
dquot_to_inode(struct dquot * dquot)3378 static inline struct inode *dquot_to_inode(struct dquot *dquot)
3379 {
3380 	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
3381 }
3382 
ext4_dquot_initialize(struct inode * inode,int type)3383 static int ext4_dquot_initialize(struct inode *inode, int type)
3384 {
3385 	handle_t *handle;
3386 	int ret, err;
3387 
3388 	/* We may create quota structure so we need to reserve enough blocks */
3389 	handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb));
3390 	if (IS_ERR(handle))
3391 		return PTR_ERR(handle);
3392 	ret = dquot_initialize(inode, type);
3393 	err = ext4_journal_stop(handle);
3394 	if (!ret)
3395 		ret = err;
3396 	return ret;
3397 }
3398 
ext4_dquot_drop(struct inode * inode)3399 static int ext4_dquot_drop(struct inode *inode)
3400 {
3401 	handle_t *handle;
3402 	int ret, err;
3403 
3404 	/* We may delete quota structure so we need to reserve enough blocks */
3405 	handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
3406 	if (IS_ERR(handle)) {
3407 		/*
3408 		 * We call dquot_drop() anyway to at least release references
3409 		 * to quota structures so that umount does not hang.
3410 		 */
3411 		dquot_drop(inode);
3412 		return PTR_ERR(handle);
3413 	}
3414 	ret = dquot_drop(inode);
3415 	err = ext4_journal_stop(handle);
3416 	if (!ret)
3417 		ret = err;
3418 	return ret;
3419 }
3420 
ext4_write_dquot(struct dquot * dquot)3421 static int ext4_write_dquot(struct dquot *dquot)
3422 {
3423 	int ret, err;
3424 	handle_t *handle;
3425 	struct inode *inode;
3426 
3427 	inode = dquot_to_inode(dquot);
3428 	handle = ext4_journal_start(inode,
3429 					EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
3430 	if (IS_ERR(handle))
3431 		return PTR_ERR(handle);
3432 	ret = dquot_commit(dquot);
3433 	err = ext4_journal_stop(handle);
3434 	if (!ret)
3435 		ret = err;
3436 	return ret;
3437 }
3438 
ext4_acquire_dquot(struct dquot * dquot)3439 static int ext4_acquire_dquot(struct dquot *dquot)
3440 {
3441 	int ret, err;
3442 	handle_t *handle;
3443 
3444 	handle = ext4_journal_start(dquot_to_inode(dquot),
3445 					EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
3446 	if (IS_ERR(handle))
3447 		return PTR_ERR(handle);
3448 	ret = dquot_acquire(dquot);
3449 	err = ext4_journal_stop(handle);
3450 	if (!ret)
3451 		ret = err;
3452 	return ret;
3453 }
3454 
ext4_release_dquot(struct dquot * dquot)3455 static int ext4_release_dquot(struct dquot *dquot)
3456 {
3457 	int ret, err;
3458 	handle_t *handle;
3459 
3460 	handle = ext4_journal_start(dquot_to_inode(dquot),
3461 					EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
3462 	if (IS_ERR(handle)) {
3463 		/* Release dquot anyway to avoid endless cycle in dqput() */
3464 		dquot_release(dquot);
3465 		return PTR_ERR(handle);
3466 	}
3467 	ret = dquot_release(dquot);
3468 	err = ext4_journal_stop(handle);
3469 	if (!ret)
3470 		ret = err;
3471 	return ret;
3472 }
3473 
ext4_mark_dquot_dirty(struct dquot * dquot)3474 static int ext4_mark_dquot_dirty(struct dquot *dquot)
3475 {
3476 	/* Are we journaling quotas? */
3477 	if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
3478 	    EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
3479 		dquot_mark_dquot_dirty(dquot);
3480 		return ext4_write_dquot(dquot);
3481 	} else {
3482 		return dquot_mark_dquot_dirty(dquot);
3483 	}
3484 }
3485 
ext4_write_info(struct super_block * sb,int type)3486 static int ext4_write_info(struct super_block *sb, int type)
3487 {
3488 	int ret, err;
3489 	handle_t *handle;
3490 
3491 	/* Data block + inode block */
3492 	handle = ext4_journal_start(sb->s_root->d_inode, 2);
3493 	if (IS_ERR(handle))
3494 		return PTR_ERR(handle);
3495 	ret = dquot_commit_info(sb, type);
3496 	err = ext4_journal_stop(handle);
3497 	if (!ret)
3498 		ret = err;
3499 	return ret;
3500 }
3501 
3502 /*
3503  * Turn on quotas during mount time - we need to find
3504  * the quota file and such...
3505  */
ext4_quota_on_mount(struct super_block * sb,int type)3506 static int ext4_quota_on_mount(struct super_block *sb, int type)
3507 {
3508 	return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
3509 			EXT4_SB(sb)->s_jquota_fmt, type);
3510 }
3511 
3512 /*
3513  * Standard function to be called on quota_on
3514  */
ext4_quota_on(struct super_block * sb,int type,int format_id,char * name,int remount)3515 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
3516 			 char *name, int remount)
3517 {
3518 	int err;
3519 	struct path path;
3520 
3521 	if (!test_opt(sb, QUOTA))
3522 		return -EINVAL;
3523 	/* When remounting, no checks are needed and in fact, name is NULL */
3524 	if (remount)
3525 		return vfs_quota_on(sb, type, format_id, name, remount);
3526 
3527 	err = kern_path(name, LOOKUP_FOLLOW, &path);
3528 	if (err)
3529 		return err;
3530 
3531 	/* Quotafile not on the same filesystem? */
3532 	if (path.mnt->mnt_sb != sb) {
3533 		path_put(&path);
3534 		return -EXDEV;
3535 	}
3536 	/* Journaling quota? */
3537 	if (EXT4_SB(sb)->s_qf_names[type]) {
3538 		/* Quotafile not in fs root? */
3539 		if (path.dentry->d_parent != sb->s_root)
3540 			printk(KERN_WARNING
3541 				"EXT4-fs: Quota file not on filesystem root. "
3542 				"Journaled quota will not work.\n");
3543 	}
3544 
3545 	/*
3546 	 * When we journal data on quota file, we have to flush journal to see
3547 	 * all updates to the file when we bypass pagecache...
3548 	 */
3549 	if (EXT4_SB(sb)->s_journal &&
3550 	    ext4_should_journal_data(path.dentry->d_inode)) {
3551 		/*
3552 		 * We don't need to lock updates but journal_flush() could
3553 		 * otherwise be livelocked...
3554 		 */
3555 		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
3556 		err = jbd2_journal_flush(EXT4_SB(sb)->s_journal);
3557 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
3558 		if (err) {
3559 			path_put(&path);
3560 			return err;
3561 		}
3562 	}
3563 
3564 	err = vfs_quota_on_path(sb, type, format_id, &path);
3565 	path_put(&path);
3566 	return err;
3567 }
3568 
3569 /* Read data from quotafile - avoid pagecache and such because we cannot afford
3570  * acquiring the locks... As quota files are never truncated and quota code
3571  * itself serializes the operations (and noone else should touch the files)
3572  * we don't have to be afraid of races */
ext4_quota_read(struct super_block * sb,int type,char * data,size_t len,loff_t off)3573 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
3574 			       size_t len, loff_t off)
3575 {
3576 	struct inode *inode = sb_dqopt(sb)->files[type];
3577 	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3578 	int err = 0;
3579 	int offset = off & (sb->s_blocksize - 1);
3580 	int tocopy;
3581 	size_t toread;
3582 	struct buffer_head *bh;
3583 	loff_t i_size = i_size_read(inode);
3584 
3585 	if (off > i_size)
3586 		return 0;
3587 	if (off+len > i_size)
3588 		len = i_size-off;
3589 	toread = len;
3590 	while (toread > 0) {
3591 		tocopy = sb->s_blocksize - offset < toread ?
3592 				sb->s_blocksize - offset : toread;
3593 		bh = ext4_bread(NULL, inode, blk, 0, &err);
3594 		if (err)
3595 			return err;
3596 		if (!bh)	/* A hole? */
3597 			memset(data, 0, tocopy);
3598 		else
3599 			memcpy(data, bh->b_data+offset, tocopy);
3600 		brelse(bh);
3601 		offset = 0;
3602 		toread -= tocopy;
3603 		data += tocopy;
3604 		blk++;
3605 	}
3606 	return len;
3607 }
3608 
3609 /* Write to quotafile (we know the transaction is already started and has
3610  * enough credits) */
ext4_quota_write(struct super_block * sb,int type,const char * data,size_t len,loff_t off)3611 static ssize_t ext4_quota_write(struct super_block *sb, int type,
3612 				const char *data, size_t len, loff_t off)
3613 {
3614 	struct inode *inode = sb_dqopt(sb)->files[type];
3615 	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
3616 	int err = 0;
3617 	int offset = off & (sb->s_blocksize - 1);
3618 	int tocopy;
3619 	int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
3620 	size_t towrite = len;
3621 	struct buffer_head *bh;
3622 	handle_t *handle = journal_current_handle();
3623 
3624 	if (EXT4_SB(sb)->s_journal && !handle) {
3625 		printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)"
3626 			" cancelled because transaction is not started.\n",
3627 			(unsigned long long)off, (unsigned long long)len);
3628 		return -EIO;
3629 	}
3630 	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
3631 	while (towrite > 0) {
3632 		tocopy = sb->s_blocksize - offset < towrite ?
3633 				sb->s_blocksize - offset : towrite;
3634 		bh = ext4_bread(handle, inode, blk, 1, &err);
3635 		if (!bh)
3636 			goto out;
3637 		if (journal_quota) {
3638 			err = ext4_journal_get_write_access(handle, bh);
3639 			if (err) {
3640 				brelse(bh);
3641 				goto out;
3642 			}
3643 		}
3644 		lock_buffer(bh);
3645 		memcpy(bh->b_data+offset, data, tocopy);
3646 		flush_dcache_page(bh->b_page);
3647 		unlock_buffer(bh);
3648 		if (journal_quota)
3649 			err = ext4_handle_dirty_metadata(handle, NULL, bh);
3650 		else {
3651 			/* Always do at least ordered writes for quotas */
3652 			err = ext4_jbd2_file_inode(handle, inode);
3653 			mark_buffer_dirty(bh);
3654 		}
3655 		brelse(bh);
3656 		if (err)
3657 			goto out;
3658 		offset = 0;
3659 		towrite -= tocopy;
3660 		data += tocopy;
3661 		blk++;
3662 	}
3663 out:
3664 	if (len == towrite) {
3665 		mutex_unlock(&inode->i_mutex);
3666 		return err;
3667 	}
3668 	if (inode->i_size < off+len-towrite) {
3669 		i_size_write(inode, off+len-towrite);
3670 		EXT4_I(inode)->i_disksize = inode->i_size;
3671 	}
3672 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
3673 	ext4_mark_inode_dirty(handle, inode);
3674 	mutex_unlock(&inode->i_mutex);
3675 	return len - towrite;
3676 }
3677 
3678 #endif
3679 
ext4_get_sb(struct file_system_type * fs_type,int flags,const char * dev_name,void * data,struct vfsmount * mnt)3680 static int ext4_get_sb(struct file_system_type *fs_type,
3681 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3682 {
3683 	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3684 }
3685 
3686 #ifdef CONFIG_PROC_FS
ext4_ui_proc_show(struct seq_file * m,void * v)3687 static int ext4_ui_proc_show(struct seq_file *m, void *v)
3688 {
3689 	unsigned int *p = m->private;
3690 
3691 	seq_printf(m, "%u\n", *p);
3692 	return 0;
3693 }
3694 
ext4_ui_proc_open(struct inode * inode,struct file * file)3695 static int ext4_ui_proc_open(struct inode *inode, struct file *file)
3696 {
3697 	return single_open(file, ext4_ui_proc_show, PDE(inode)->data);
3698 }
3699 
ext4_ui_proc_write(struct file * file,const char __user * buf,size_t cnt,loff_t * ppos)3700 static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf,
3701 			       size_t cnt, loff_t *ppos)
3702 {
3703 	unsigned long *p = PDE(file->f_path.dentry->d_inode)->data;
3704 	char str[32];
3705 
3706 	if (cnt >= sizeof(str))
3707 		return -EINVAL;
3708 	if (copy_from_user(str, buf, cnt))
3709 		return -EFAULT;
3710 
3711 	*p = simple_strtoul(str, NULL, 0);
3712 	return cnt;
3713 }
3714 
3715 const struct file_operations ext4_ui_proc_fops = {
3716 	.owner		= THIS_MODULE,
3717 	.open		= ext4_ui_proc_open,
3718 	.read		= seq_read,
3719 	.llseek		= seq_lseek,
3720 	.release	= single_release,
3721 	.write		= ext4_ui_proc_write,
3722 };
3723 #endif
3724 
3725 static struct file_system_type ext4_fs_type = {
3726 	.owner		= THIS_MODULE,
3727 	.name		= "ext4",
3728 	.get_sb		= ext4_get_sb,
3729 	.kill_sb	= kill_block_super,
3730 	.fs_flags	= FS_REQUIRES_DEV,
3731 };
3732 
3733 #ifdef CONFIG_EXT4DEV_COMPAT
ext4dev_get_sb(struct file_system_type * fs_type,int flags,const char * dev_name,void * data,struct vfsmount * mnt)3734 static int ext4dev_get_sb(struct file_system_type *fs_type,
3735 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
3736 {
3737 	printk(KERN_WARNING "EXT4-fs: Update your userspace programs "
3738 	       "to mount using ext4\n");
3739 	printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility "
3740 	       "will go away by 2.6.31\n");
3741 	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
3742 }
3743 
3744 static struct file_system_type ext4dev_fs_type = {
3745 	.owner		= THIS_MODULE,
3746 	.name		= "ext4dev",
3747 	.get_sb		= ext4dev_get_sb,
3748 	.kill_sb	= kill_block_super,
3749 	.fs_flags	= FS_REQUIRES_DEV,
3750 };
3751 MODULE_ALIAS("ext4dev");
3752 #endif
3753 
init_ext4_fs(void)3754 static int __init init_ext4_fs(void)
3755 {
3756 	int err;
3757 
3758 	ext4_proc_root = proc_mkdir("fs/ext4", NULL);
3759 	err = init_ext4_mballoc();
3760 	if (err)
3761 		return err;
3762 
3763 	err = init_ext4_xattr();
3764 	if (err)
3765 		goto out2;
3766 	err = init_inodecache();
3767 	if (err)
3768 		goto out1;
3769 	err = register_filesystem(&ext4_fs_type);
3770 	if (err)
3771 		goto out;
3772 #ifdef CONFIG_EXT4DEV_COMPAT
3773 	err = register_filesystem(&ext4dev_fs_type);
3774 	if (err) {
3775 		unregister_filesystem(&ext4_fs_type);
3776 		goto out;
3777 	}
3778 #endif
3779 	return 0;
3780 out:
3781 	destroy_inodecache();
3782 out1:
3783 	exit_ext4_xattr();
3784 out2:
3785 	exit_ext4_mballoc();
3786 	return err;
3787 }
3788 
exit_ext4_fs(void)3789 static void __exit exit_ext4_fs(void)
3790 {
3791 	unregister_filesystem(&ext4_fs_type);
3792 #ifdef CONFIG_EXT4DEV_COMPAT
3793 	unregister_filesystem(&ext4dev_fs_type);
3794 #endif
3795 	destroy_inodecache();
3796 	exit_ext4_xattr();
3797 	exit_ext4_mballoc();
3798 	remove_proc_entry("fs/ext4", NULL);
3799 }
3800 
3801 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3802 MODULE_DESCRIPTION("Fourth Extended Filesystem");
3803 MODULE_LICENSE("GPL");
3804 module_init(init_ext4_fs)
3805 module_exit(exit_ext4_fs)
3806