• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/ext4/super.c
4  *
5  * Copyright (C) 1992, 1993, 1994, 1995
6  * Remy Card (card@masi.ibp.fr)
7  * Laboratoire MASI - Institut Blaise Pascal
8  * Universite Pierre et Marie Curie (Paris VI)
9  *
10  *  from
11  *
12  *  linux/fs/minix/inode.c
13  *
14  *  Copyright (C) 1991, 1992  Linus Torvalds
15  *
16  *  Big-endian to little-endian byte-swapping/bitmaps by
17  *        David S. Miller (davem@caip.rutgers.edu), 1995
18  */
19 
20 #include <linux/module.h>
21 #include <linux/string.h>
22 #include <linux/fs.h>
23 #include <linux/time.h>
24 #include <linux/vmalloc.h>
25 #include <linux/slab.h>
26 #include <linux/init.h>
27 #include <linux/blkdev.h>
28 #include <linux/backing-dev.h>
29 #include <linux/parser.h>
30 #include <linux/buffer_head.h>
31 #include <linux/exportfs.h>
32 #include <linux/vfs.h>
33 #include <linux/random.h>
34 #include <linux/mount.h>
35 #include <linux/namei.h>
36 #include <linux/quotaops.h>
37 #include <linux/seq_file.h>
38 #include <linux/ctype.h>
39 #include <linux/log2.h>
40 #include <linux/crc16.h>
41 #include <linux/dax.h>
42 #include <linux/cleancache.h>
43 #include <linux/uaccess.h>
44 #include <linux/iversion.h>
45 #include <linux/unicode.h>
46 #include <linux/part_stat.h>
47 #include <linux/kthread.h>
48 #include <linux/freezer.h>
49 #include <linux/fsnotify.h>
50 #include <linux/fs_context.h>
51 #include <linux/fs_parser.h>
52 
53 #include "ext4.h"
54 #include "ext4_extents.h"	/* Needed for trace points definition */
55 #include "ext4_jbd2.h"
56 #include "xattr.h"
57 #include "acl.h"
58 #include "mballoc.h"
59 #include "fsmap.h"
60 
61 #define CREATE_TRACE_POINTS
62 #include <trace/events/ext4.h>
63 
64 static struct ext4_lazy_init *ext4_li_info;
65 static DEFINE_MUTEX(ext4_li_mtx);
66 static struct ratelimit_state ext4_mount_msg_ratelimit;
67 
68 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
69 			     unsigned long journal_devnum);
70 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
71 static void ext4_update_super(struct super_block *sb);
72 static int ext4_commit_super(struct super_block *sb);
73 static int ext4_mark_recovery_complete(struct super_block *sb,
74 					struct ext4_super_block *es);
75 static int ext4_clear_journal_err(struct super_block *sb,
76 				  struct ext4_super_block *es);
77 static int ext4_sync_fs(struct super_block *sb, int wait);
78 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
79 static int ext4_unfreeze(struct super_block *sb);
80 static int ext4_freeze(struct super_block *sb);
81 static inline int ext2_feature_set_ok(struct super_block *sb);
82 static inline int ext3_feature_set_ok(struct super_block *sb);
83 static void ext4_destroy_lazyinit_thread(void);
84 static void ext4_unregister_li_request(struct super_block *sb);
85 static void ext4_clear_request_list(void);
86 static struct inode *ext4_get_journal_inode(struct super_block *sb,
87 					    unsigned int journal_inum);
88 static int ext4_validate_options(struct fs_context *fc);
89 static int ext4_check_opt_consistency(struct fs_context *fc,
90 				      struct super_block *sb);
91 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
92 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
93 static int ext4_get_tree(struct fs_context *fc);
94 static int ext4_reconfigure(struct fs_context *fc);
95 static void ext4_fc_free(struct fs_context *fc);
96 static int ext4_init_fs_context(struct fs_context *fc);
97 static const struct fs_parameter_spec ext4_param_specs[];
98 
99 /*
100  * Lock ordering
101  *
102  * page fault path:
103  * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
104  *   -> page lock -> i_data_sem (rw)
105  *
106  * buffered write path:
107  * sb_start_write -> i_mutex -> mmap_lock
108  * sb_start_write -> i_mutex -> transaction start -> page lock ->
109  *   i_data_sem (rw)
110  *
111  * truncate:
112  * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
113  *   page lock
114  * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
115  *   i_data_sem (rw)
116  *
117  * direct IO:
118  * sb_start_write -> i_mutex -> mmap_lock
119  * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
120  *
121  * writepages:
122  * transaction start -> page lock(s) -> i_data_sem (rw)
123  */
124 
125 static const struct fs_context_operations ext4_context_ops = {
126 	.parse_param	= ext4_parse_param,
127 	.get_tree	= ext4_get_tree,
128 	.reconfigure	= ext4_reconfigure,
129 	.free		= ext4_fc_free,
130 };
131 
132 
133 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
134 static struct file_system_type ext2_fs_type = {
135 	.owner			= THIS_MODULE,
136 	.name			= "ext2",
137 	.init_fs_context	= ext4_init_fs_context,
138 	.parameters		= ext4_param_specs,
139 	.kill_sb		= kill_block_super,
140 	.fs_flags		= FS_REQUIRES_DEV,
141 };
142 MODULE_ALIAS_FS("ext2");
143 MODULE_ALIAS("ext2");
144 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
145 #else
146 #define IS_EXT2_SB(sb) (0)
147 #endif
148 
149 
150 static struct file_system_type ext3_fs_type = {
151 	.owner			= THIS_MODULE,
152 	.name			= "ext3",
153 	.init_fs_context	= ext4_init_fs_context,
154 	.parameters		= ext4_param_specs,
155 	.kill_sb		= kill_block_super,
156 	.fs_flags		= FS_REQUIRES_DEV,
157 };
158 MODULE_ALIAS_FS("ext3");
159 MODULE_ALIAS("ext3");
160 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
161 
162 
__ext4_read_bh(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)163 static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
164 				  bh_end_io_t *end_io)
165 {
166 	/*
167 	 * buffer's verified bit is no longer valid after reading from
168 	 * disk again due to write out error, clear it to make sure we
169 	 * recheck the buffer contents.
170 	 */
171 	clear_buffer_verified(bh);
172 
173 	bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
174 	get_bh(bh);
175 	submit_bh(REQ_OP_READ | op_flags, bh);
176 }
177 
ext4_read_bh_nowait(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)178 void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
179 			 bh_end_io_t *end_io)
180 {
181 	BUG_ON(!buffer_locked(bh));
182 
183 	if (ext4_buffer_uptodate(bh)) {
184 		unlock_buffer(bh);
185 		return;
186 	}
187 	__ext4_read_bh(bh, op_flags, end_io);
188 }
189 
ext4_read_bh(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)190 int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io)
191 {
192 	BUG_ON(!buffer_locked(bh));
193 
194 	if (ext4_buffer_uptodate(bh)) {
195 		unlock_buffer(bh);
196 		return 0;
197 	}
198 
199 	__ext4_read_bh(bh, op_flags, end_io);
200 
201 	wait_on_buffer(bh);
202 	if (buffer_uptodate(bh))
203 		return 0;
204 	return -EIO;
205 }
206 
ext4_read_bh_lock(struct buffer_head * bh,blk_opf_t op_flags,bool wait)207 int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
208 {
209 	lock_buffer(bh);
210 	if (!wait) {
211 		ext4_read_bh_nowait(bh, op_flags, NULL);
212 		return 0;
213 	}
214 	return ext4_read_bh(bh, op_flags, NULL);
215 }
216 
217 /*
218  * This works like __bread_gfp() except it uses ERR_PTR for error
219  * returns.  Currently with sb_bread it's impossible to distinguish
220  * between ENOMEM and EIO situations (since both result in a NULL
221  * return.
222  */
__ext4_sb_bread_gfp(struct super_block * sb,sector_t block,blk_opf_t op_flags,gfp_t gfp)223 static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
224 					       sector_t block,
225 					       blk_opf_t op_flags, gfp_t gfp)
226 {
227 	struct buffer_head *bh;
228 	int ret;
229 
230 	bh = sb_getblk_gfp(sb, block, gfp);
231 	if (bh == NULL)
232 		return ERR_PTR(-ENOMEM);
233 	if (ext4_buffer_uptodate(bh))
234 		return bh;
235 
236 	ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
237 	if (ret) {
238 		put_bh(bh);
239 		return ERR_PTR(ret);
240 	}
241 	return bh;
242 }
243 
ext4_sb_bread(struct super_block * sb,sector_t block,blk_opf_t op_flags)244 struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
245 				   blk_opf_t op_flags)
246 {
247 	return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE);
248 }
249 
ext4_sb_bread_unmovable(struct super_block * sb,sector_t block)250 struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
251 					    sector_t block)
252 {
253 	return __ext4_sb_bread_gfp(sb, block, 0, 0);
254 }
255 
ext4_sb_breadahead_unmovable(struct super_block * sb,sector_t block)256 void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
257 {
258 	struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
259 
260 	if (likely(bh)) {
261 		if (trylock_buffer(bh))
262 			ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
263 		brelse(bh);
264 	}
265 }
266 
ext4_verify_csum_type(struct super_block * sb,struct ext4_super_block * es)267 static int ext4_verify_csum_type(struct super_block *sb,
268 				 struct ext4_super_block *es)
269 {
270 	if (!ext4_has_feature_metadata_csum(sb))
271 		return 1;
272 
273 	return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
274 }
275 
ext4_superblock_csum(struct super_block * sb,struct ext4_super_block * es)276 __le32 ext4_superblock_csum(struct super_block *sb,
277 			    struct ext4_super_block *es)
278 {
279 	struct ext4_sb_info *sbi = EXT4_SB(sb);
280 	int offset = offsetof(struct ext4_super_block, s_checksum);
281 	__u32 csum;
282 
283 	csum = ext4_chksum(sbi, ~0, (char *)es, offset);
284 
285 	return cpu_to_le32(csum);
286 }
287 
ext4_superblock_csum_verify(struct super_block * sb,struct ext4_super_block * es)288 static int ext4_superblock_csum_verify(struct super_block *sb,
289 				       struct ext4_super_block *es)
290 {
291 	if (!ext4_has_metadata_csum(sb))
292 		return 1;
293 
294 	return es->s_checksum == ext4_superblock_csum(sb, es);
295 }
296 
ext4_superblock_csum_set(struct super_block * sb)297 void ext4_superblock_csum_set(struct super_block *sb)
298 {
299 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
300 
301 	if (!ext4_has_metadata_csum(sb))
302 		return;
303 
304 	es->s_checksum = ext4_superblock_csum(sb, es);
305 }
306 
ext4_block_bitmap(struct super_block * sb,struct ext4_group_desc * bg)307 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
308 			       struct ext4_group_desc *bg)
309 {
310 	return le32_to_cpu(bg->bg_block_bitmap_lo) |
311 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
312 		 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
313 }
314 
ext4_inode_bitmap(struct super_block * sb,struct ext4_group_desc * bg)315 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
316 			       struct ext4_group_desc *bg)
317 {
318 	return le32_to_cpu(bg->bg_inode_bitmap_lo) |
319 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
320 		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
321 }
322 
ext4_inode_table(struct super_block * sb,struct ext4_group_desc * bg)323 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
324 			      struct ext4_group_desc *bg)
325 {
326 	return le32_to_cpu(bg->bg_inode_table_lo) |
327 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
328 		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
329 }
330 
ext4_free_group_clusters(struct super_block * sb,struct ext4_group_desc * bg)331 __u32 ext4_free_group_clusters(struct super_block *sb,
332 			       struct ext4_group_desc *bg)
333 {
334 	return le16_to_cpu(bg->bg_free_blocks_count_lo) |
335 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
336 		 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
337 }
338 
ext4_free_inodes_count(struct super_block * sb,struct ext4_group_desc * bg)339 __u32 ext4_free_inodes_count(struct super_block *sb,
340 			      struct ext4_group_desc *bg)
341 {
342 	return le16_to_cpu(bg->bg_free_inodes_count_lo) |
343 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
344 		 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
345 }
346 
ext4_used_dirs_count(struct super_block * sb,struct ext4_group_desc * bg)347 __u32 ext4_used_dirs_count(struct super_block *sb,
348 			      struct ext4_group_desc *bg)
349 {
350 	return le16_to_cpu(bg->bg_used_dirs_count_lo) |
351 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
352 		 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
353 }
354 
ext4_itable_unused_count(struct super_block * sb,struct ext4_group_desc * bg)355 __u32 ext4_itable_unused_count(struct super_block *sb,
356 			      struct ext4_group_desc *bg)
357 {
358 	return le16_to_cpu(bg->bg_itable_unused_lo) |
359 		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
360 		 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
361 }
362 
ext4_block_bitmap_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)363 void ext4_block_bitmap_set(struct super_block *sb,
364 			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
365 {
366 	bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
367 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
368 		bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
369 }
370 
ext4_inode_bitmap_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)371 void ext4_inode_bitmap_set(struct super_block *sb,
372 			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
373 {
374 	bg->bg_inode_bitmap_lo  = cpu_to_le32((u32)blk);
375 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
376 		bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
377 }
378 
ext4_inode_table_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)379 void ext4_inode_table_set(struct super_block *sb,
380 			  struct ext4_group_desc *bg, ext4_fsblk_t blk)
381 {
382 	bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
383 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
384 		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
385 }
386 
ext4_free_group_clusters_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)387 void ext4_free_group_clusters_set(struct super_block *sb,
388 				  struct ext4_group_desc *bg, __u32 count)
389 {
390 	bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
391 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
392 		bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
393 }
394 
ext4_free_inodes_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)395 void ext4_free_inodes_set(struct super_block *sb,
396 			  struct ext4_group_desc *bg, __u32 count)
397 {
398 	bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
399 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
400 		bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
401 }
402 
ext4_used_dirs_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)403 void ext4_used_dirs_set(struct super_block *sb,
404 			  struct ext4_group_desc *bg, __u32 count)
405 {
406 	bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
407 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
408 		bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
409 }
410 
ext4_itable_unused_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)411 void ext4_itable_unused_set(struct super_block *sb,
412 			  struct ext4_group_desc *bg, __u32 count)
413 {
414 	bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
415 	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
416 		bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
417 }
418 
__ext4_update_tstamp(__le32 * lo,__u8 * hi,time64_t now)419 static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
420 {
421 	now = clamp_val(now, 0, (1ull << 40) - 1);
422 
423 	*lo = cpu_to_le32(lower_32_bits(now));
424 	*hi = upper_32_bits(now);
425 }
426 
__ext4_get_tstamp(__le32 * lo,__u8 * hi)427 static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
428 {
429 	return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
430 }
431 #define ext4_update_tstamp(es, tstamp) \
432 	__ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
433 			     ktime_get_real_seconds())
434 #define ext4_get_tstamp(es, tstamp) \
435 	__ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
436 
437 /*
438  * The del_gendisk() function uninitializes the disk-specific data
439  * structures, including the bdi structure, without telling anyone
440  * else.  Once this happens, any attempt to call mark_buffer_dirty()
441  * (for example, by ext4_commit_super), will cause a kernel OOPS.
442  * This is a kludge to prevent these oops until we can put in a proper
443  * hook in del_gendisk() to inform the VFS and file system layers.
444  */
block_device_ejected(struct super_block * sb)445 static int block_device_ejected(struct super_block *sb)
446 {
447 	struct inode *bd_inode = sb->s_bdev->bd_inode;
448 	struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
449 
450 	return bdi->dev == NULL;
451 }
452 
ext4_journal_commit_callback(journal_t * journal,transaction_t * txn)453 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
454 {
455 	struct super_block		*sb = journal->j_private;
456 	struct ext4_sb_info		*sbi = EXT4_SB(sb);
457 	int				error = is_journal_aborted(journal);
458 	struct ext4_journal_cb_entry	*jce;
459 
460 	BUG_ON(txn->t_state == T_FINISHED);
461 
462 	ext4_process_freed_data(sb, txn->t_tid);
463 
464 	spin_lock(&sbi->s_md_lock);
465 	while (!list_empty(&txn->t_private_list)) {
466 		jce = list_entry(txn->t_private_list.next,
467 				 struct ext4_journal_cb_entry, jce_list);
468 		list_del_init(&jce->jce_list);
469 		spin_unlock(&sbi->s_md_lock);
470 		jce->jce_func(sb, jce, error);
471 		spin_lock(&sbi->s_md_lock);
472 	}
473 	spin_unlock(&sbi->s_md_lock);
474 }
475 
476 /*
477  * This writepage callback for write_cache_pages()
478  * takes care of a few cases after page cleaning.
479  *
480  * write_cache_pages() already checks for dirty pages
481  * and calls clear_page_dirty_for_io(), which we want,
482  * to write protect the pages.
483  *
484  * However, we may have to redirty a page (see below.)
485  */
ext4_journalled_writepage_callback(struct page * page,struct writeback_control * wbc,void * data)486 static int ext4_journalled_writepage_callback(struct page *page,
487 					      struct writeback_control *wbc,
488 					      void *data)
489 {
490 	transaction_t *transaction = (transaction_t *) data;
491 	struct buffer_head *bh, *head;
492 	struct journal_head *jh;
493 
494 	bh = head = page_buffers(page);
495 	do {
496 		/*
497 		 * We have to redirty a page in these cases:
498 		 * 1) If buffer is dirty, it means the page was dirty because it
499 		 * contains a buffer that needs checkpointing. So the dirty bit
500 		 * needs to be preserved so that checkpointing writes the buffer
501 		 * properly.
502 		 * 2) If buffer is not part of the committing transaction
503 		 * (we may have just accidentally come across this buffer because
504 		 * inode range tracking is not exact) or if the currently running
505 		 * transaction already contains this buffer as well, dirty bit
506 		 * needs to be preserved so that the buffer gets writeprotected
507 		 * properly on running transaction's commit.
508 		 */
509 		jh = bh2jh(bh);
510 		if (buffer_dirty(bh) ||
511 		    (jh && (jh->b_transaction != transaction ||
512 			    jh->b_next_transaction))) {
513 			redirty_page_for_writepage(wbc, page);
514 			goto out;
515 		}
516 	} while ((bh = bh->b_this_page) != head);
517 
518 out:
519 	return AOP_WRITEPAGE_ACTIVATE;
520 }
521 
ext4_journalled_submit_inode_data_buffers(struct jbd2_inode * jinode)522 static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
523 {
524 	struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
525 	struct writeback_control wbc = {
526 		.sync_mode =  WB_SYNC_ALL,
527 		.nr_to_write = LONG_MAX,
528 		.range_start = jinode->i_dirty_start,
529 		.range_end = jinode->i_dirty_end,
530         };
531 
532 	return write_cache_pages(mapping, &wbc,
533 				 ext4_journalled_writepage_callback,
534 				 jinode->i_transaction);
535 }
536 
ext4_journal_submit_inode_data_buffers(struct jbd2_inode * jinode)537 static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
538 {
539 	int ret;
540 
541 	if (ext4_should_journal_data(jinode->i_vfs_inode))
542 		ret = ext4_journalled_submit_inode_data_buffers(jinode);
543 	else
544 		ret = jbd2_journal_submit_inode_data_buffers(jinode);
545 
546 	return ret;
547 }
548 
ext4_journal_finish_inode_data_buffers(struct jbd2_inode * jinode)549 static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
550 {
551 	int ret = 0;
552 
553 	if (!ext4_should_journal_data(jinode->i_vfs_inode))
554 		ret = jbd2_journal_finish_inode_data_buffers(jinode);
555 
556 	return ret;
557 }
558 
system_going_down(void)559 static bool system_going_down(void)
560 {
561 	return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
562 		|| system_state == SYSTEM_RESTART;
563 }
564 
565 struct ext4_err_translation {
566 	int code;
567 	int errno;
568 };
569 
570 #define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
571 
572 static struct ext4_err_translation err_translation[] = {
573 	EXT4_ERR_TRANSLATE(EIO),
574 	EXT4_ERR_TRANSLATE(ENOMEM),
575 	EXT4_ERR_TRANSLATE(EFSBADCRC),
576 	EXT4_ERR_TRANSLATE(EFSCORRUPTED),
577 	EXT4_ERR_TRANSLATE(ENOSPC),
578 	EXT4_ERR_TRANSLATE(ENOKEY),
579 	EXT4_ERR_TRANSLATE(EROFS),
580 	EXT4_ERR_TRANSLATE(EFBIG),
581 	EXT4_ERR_TRANSLATE(EEXIST),
582 	EXT4_ERR_TRANSLATE(ERANGE),
583 	EXT4_ERR_TRANSLATE(EOVERFLOW),
584 	EXT4_ERR_TRANSLATE(EBUSY),
585 	EXT4_ERR_TRANSLATE(ENOTDIR),
586 	EXT4_ERR_TRANSLATE(ENOTEMPTY),
587 	EXT4_ERR_TRANSLATE(ESHUTDOWN),
588 	EXT4_ERR_TRANSLATE(EFAULT),
589 };
590 
ext4_errno_to_code(int errno)591 static int ext4_errno_to_code(int errno)
592 {
593 	int i;
594 
595 	for (i = 0; i < ARRAY_SIZE(err_translation); i++)
596 		if (err_translation[i].errno == errno)
597 			return err_translation[i].code;
598 	return EXT4_ERR_UNKNOWN;
599 }
600 
save_error_info(struct super_block * sb,int error,__u32 ino,__u64 block,const char * func,unsigned int line)601 static void save_error_info(struct super_block *sb, int error,
602 			    __u32 ino, __u64 block,
603 			    const char *func, unsigned int line)
604 {
605 	struct ext4_sb_info *sbi = EXT4_SB(sb);
606 
607 	/* We default to EFSCORRUPTED error... */
608 	if (error == 0)
609 		error = EFSCORRUPTED;
610 
611 	spin_lock(&sbi->s_error_lock);
612 	sbi->s_add_error_count++;
613 	sbi->s_last_error_code = error;
614 	sbi->s_last_error_line = line;
615 	sbi->s_last_error_ino = ino;
616 	sbi->s_last_error_block = block;
617 	sbi->s_last_error_func = func;
618 	sbi->s_last_error_time = ktime_get_real_seconds();
619 	if (!sbi->s_first_error_time) {
620 		sbi->s_first_error_code = error;
621 		sbi->s_first_error_line = line;
622 		sbi->s_first_error_ino = ino;
623 		sbi->s_first_error_block = block;
624 		sbi->s_first_error_func = func;
625 		sbi->s_first_error_time = sbi->s_last_error_time;
626 	}
627 	spin_unlock(&sbi->s_error_lock);
628 }
629 
630 /* Deal with the reporting of failure conditions on a filesystem such as
631  * inconsistencies detected or read IO failures.
632  *
633  * On ext2, we can store the error state of the filesystem in the
634  * superblock.  That is not possible on ext4, because we may have other
635  * write ordering constraints on the superblock which prevent us from
636  * writing it out straight away; and given that the journal is about to
637  * be aborted, we can't rely on the current, or future, transactions to
638  * write out the superblock safely.
639  *
640  * We'll just use the jbd2_journal_abort() error code to record an error in
641  * the journal instead.  On recovery, the journal will complain about
642  * that error until we've noted it down and cleared it.
643  *
644  * If force_ro is set, we unconditionally force the filesystem into an
645  * ABORT|READONLY state, unless the error response on the fs has been set to
646  * panic in which case we take the easy way out and panic immediately. This is
647  * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
648  * at a critical moment in log management.
649  */
ext4_handle_error(struct super_block * sb,bool force_ro,int error,__u32 ino,__u64 block,const char * func,unsigned int line)650 static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
651 			      __u32 ino, __u64 block,
652 			      const char *func, unsigned int line)
653 {
654 	journal_t *journal = EXT4_SB(sb)->s_journal;
655 	bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
656 
657 	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
658 	if (test_opt(sb, WARN_ON_ERROR))
659 		WARN_ON_ONCE(1);
660 
661 	if (!continue_fs && !sb_rdonly(sb)) {
662 		ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
663 		if (journal)
664 			jbd2_journal_abort(journal, -EIO);
665 	}
666 
667 	if (!bdev_read_only(sb->s_bdev)) {
668 		save_error_info(sb, error, ino, block, func, line);
669 		/*
670 		 * In case the fs should keep running, we need to writeout
671 		 * superblock through the journal. Due to lock ordering
672 		 * constraints, it may not be safe to do it right here so we
673 		 * defer superblock flushing to a workqueue.
674 		 */
675 		if (continue_fs && journal)
676 			schedule_work(&EXT4_SB(sb)->s_error_work);
677 		else
678 			ext4_commit_super(sb);
679 	}
680 
681 	/*
682 	 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
683 	 * could panic during 'reboot -f' as the underlying device got already
684 	 * disabled.
685 	 */
686 	if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
687 		panic("EXT4-fs (device %s): panic forced after error\n",
688 			sb->s_id);
689 	}
690 
691 	if (sb_rdonly(sb) || continue_fs)
692 		return;
693 
694 	ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
695 	/*
696 	 * Make sure updated value of ->s_mount_flags will be visible before
697 	 * ->s_flags update
698 	 */
699 	smp_wmb();
700 	sb->s_flags |= SB_RDONLY;
701 }
702 
flush_stashed_error_work(struct work_struct * work)703 static void flush_stashed_error_work(struct work_struct *work)
704 {
705 	struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
706 						s_error_work);
707 	journal_t *journal = sbi->s_journal;
708 	handle_t *handle;
709 
710 	/*
711 	 * If the journal is still running, we have to write out superblock
712 	 * through the journal to avoid collisions of other journalled sb
713 	 * updates.
714 	 *
715 	 * We use directly jbd2 functions here to avoid recursing back into
716 	 * ext4 error handling code during handling of previous errors.
717 	 */
718 	if (!sb_rdonly(sbi->s_sb) && journal) {
719 		struct buffer_head *sbh = sbi->s_sbh;
720 		handle = jbd2_journal_start(journal, 1);
721 		if (IS_ERR(handle))
722 			goto write_directly;
723 		if (jbd2_journal_get_write_access(handle, sbh)) {
724 			jbd2_journal_stop(handle);
725 			goto write_directly;
726 		}
727 		ext4_update_super(sbi->s_sb);
728 		if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
729 			ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
730 				 "superblock detected");
731 			clear_buffer_write_io_error(sbh);
732 			set_buffer_uptodate(sbh);
733 		}
734 
735 		if (jbd2_journal_dirty_metadata(handle, sbh)) {
736 			jbd2_journal_stop(handle);
737 			goto write_directly;
738 		}
739 		jbd2_journal_stop(handle);
740 		ext4_notify_error_sysfs(sbi);
741 		return;
742 	}
743 write_directly:
744 	/*
745 	 * Write through journal failed. Write sb directly to get error info
746 	 * out and hope for the best.
747 	 */
748 	ext4_commit_super(sbi->s_sb);
749 	ext4_notify_error_sysfs(sbi);
750 }
751 
752 #define ext4_error_ratelimit(sb)					\
753 		___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state),	\
754 			     "EXT4-fs error")
755 
__ext4_error(struct super_block * sb,const char * function,unsigned int line,bool force_ro,int error,__u64 block,const char * fmt,...)756 void __ext4_error(struct super_block *sb, const char *function,
757 		  unsigned int line, bool force_ro, int error, __u64 block,
758 		  const char *fmt, ...)
759 {
760 	struct va_format vaf;
761 	va_list args;
762 
763 	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
764 		return;
765 
766 	trace_ext4_error(sb, function, line);
767 	if (ext4_error_ratelimit(sb)) {
768 		va_start(args, fmt);
769 		vaf.fmt = fmt;
770 		vaf.va = &args;
771 		printk(KERN_CRIT
772 		       "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
773 		       sb->s_id, function, line, current->comm, &vaf);
774 		va_end(args);
775 	}
776 	fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
777 
778 	ext4_handle_error(sb, force_ro, error, 0, block, function, line);
779 }
780 
__ext4_error_inode(struct inode * inode,const char * function,unsigned int line,ext4_fsblk_t block,int error,const char * fmt,...)781 void __ext4_error_inode(struct inode *inode, const char *function,
782 			unsigned int line, ext4_fsblk_t block, int error,
783 			const char *fmt, ...)
784 {
785 	va_list args;
786 	struct va_format vaf;
787 
788 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
789 		return;
790 
791 	trace_ext4_error(inode->i_sb, function, line);
792 	if (ext4_error_ratelimit(inode->i_sb)) {
793 		va_start(args, fmt);
794 		vaf.fmt = fmt;
795 		vaf.va = &args;
796 		if (block)
797 			printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
798 			       "inode #%lu: block %llu: comm %s: %pV\n",
799 			       inode->i_sb->s_id, function, line, inode->i_ino,
800 			       block, current->comm, &vaf);
801 		else
802 			printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
803 			       "inode #%lu: comm %s: %pV\n",
804 			       inode->i_sb->s_id, function, line, inode->i_ino,
805 			       current->comm, &vaf);
806 		va_end(args);
807 	}
808 	fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
809 
810 	ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
811 			  function, line);
812 }
813 
__ext4_error_file(struct file * file,const char * function,unsigned int line,ext4_fsblk_t block,const char * fmt,...)814 void __ext4_error_file(struct file *file, const char *function,
815 		       unsigned int line, ext4_fsblk_t block,
816 		       const char *fmt, ...)
817 {
818 	va_list args;
819 	struct va_format vaf;
820 	struct inode *inode = file_inode(file);
821 	char pathname[80], *path;
822 
823 	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
824 		return;
825 
826 	trace_ext4_error(inode->i_sb, function, line);
827 	if (ext4_error_ratelimit(inode->i_sb)) {
828 		path = file_path(file, pathname, sizeof(pathname));
829 		if (IS_ERR(path))
830 			path = "(unknown)";
831 		va_start(args, fmt);
832 		vaf.fmt = fmt;
833 		vaf.va = &args;
834 		if (block)
835 			printk(KERN_CRIT
836 			       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
837 			       "block %llu: comm %s: path %s: %pV\n",
838 			       inode->i_sb->s_id, function, line, inode->i_ino,
839 			       block, current->comm, path, &vaf);
840 		else
841 			printk(KERN_CRIT
842 			       "EXT4-fs error (device %s): %s:%d: inode #%lu: "
843 			       "comm %s: path %s: %pV\n",
844 			       inode->i_sb->s_id, function, line, inode->i_ino,
845 			       current->comm, path, &vaf);
846 		va_end(args);
847 	}
848 	fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
849 
850 	ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
851 			  function, line);
852 }
853 
ext4_decode_error(struct super_block * sb,int errno,char nbuf[16])854 const char *ext4_decode_error(struct super_block *sb, int errno,
855 			      char nbuf[16])
856 {
857 	char *errstr = NULL;
858 
859 	switch (errno) {
860 	case -EFSCORRUPTED:
861 		errstr = "Corrupt filesystem";
862 		break;
863 	case -EFSBADCRC:
864 		errstr = "Filesystem failed CRC";
865 		break;
866 	case -EIO:
867 		errstr = "IO failure";
868 		break;
869 	case -ENOMEM:
870 		errstr = "Out of memory";
871 		break;
872 	case -EROFS:
873 		if (!sb || (EXT4_SB(sb)->s_journal &&
874 			    EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
875 			errstr = "Journal has aborted";
876 		else
877 			errstr = "Readonly filesystem";
878 		break;
879 	default:
880 		/* If the caller passed in an extra buffer for unknown
881 		 * errors, textualise them now.  Else we just return
882 		 * NULL. */
883 		if (nbuf) {
884 			/* Check for truncated error codes... */
885 			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
886 				errstr = nbuf;
887 		}
888 		break;
889 	}
890 
891 	return errstr;
892 }
893 
894 /* __ext4_std_error decodes expected errors from journaling functions
895  * automatically and invokes the appropriate error response.  */
896 
__ext4_std_error(struct super_block * sb,const char * function,unsigned int line,int errno)897 void __ext4_std_error(struct super_block *sb, const char *function,
898 		      unsigned int line, int errno)
899 {
900 	char nbuf[16];
901 	const char *errstr;
902 
903 	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
904 		return;
905 
906 	/* Special case: if the error is EROFS, and we're not already
907 	 * inside a transaction, then there's really no point in logging
908 	 * an error. */
909 	if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
910 		return;
911 
912 	if (ext4_error_ratelimit(sb)) {
913 		errstr = ext4_decode_error(sb, errno, nbuf);
914 		printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
915 		       sb->s_id, function, line, errstr);
916 	}
917 	fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
918 
919 	ext4_handle_error(sb, false, -errno, 0, 0, function, line);
920 }
921 
__ext4_msg(struct super_block * sb,const char * prefix,const char * fmt,...)922 void __ext4_msg(struct super_block *sb,
923 		const char *prefix, const char *fmt, ...)
924 {
925 	struct va_format vaf;
926 	va_list args;
927 
928 	if (sb) {
929 		atomic_inc(&EXT4_SB(sb)->s_msg_count);
930 		if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
931 				  "EXT4-fs"))
932 			return;
933 	}
934 
935 	va_start(args, fmt);
936 	vaf.fmt = fmt;
937 	vaf.va = &args;
938 	if (sb)
939 		printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
940 	else
941 		printk("%sEXT4-fs: %pV\n", prefix, &vaf);
942 	va_end(args);
943 }
944 
ext4_warning_ratelimit(struct super_block * sb)945 static int ext4_warning_ratelimit(struct super_block *sb)
946 {
947 	atomic_inc(&EXT4_SB(sb)->s_warning_count);
948 	return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
949 			    "EXT4-fs warning");
950 }
951 
__ext4_warning(struct super_block * sb,const char * function,unsigned int line,const char * fmt,...)952 void __ext4_warning(struct super_block *sb, const char *function,
953 		    unsigned int line, const char *fmt, ...)
954 {
955 	struct va_format vaf;
956 	va_list args;
957 
958 	if (!ext4_warning_ratelimit(sb))
959 		return;
960 
961 	va_start(args, fmt);
962 	vaf.fmt = fmt;
963 	vaf.va = &args;
964 	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
965 	       sb->s_id, function, line, &vaf);
966 	va_end(args);
967 }
968 
__ext4_warning_inode(const struct inode * inode,const char * function,unsigned int line,const char * fmt,...)969 void __ext4_warning_inode(const struct inode *inode, const char *function,
970 			  unsigned int line, const char *fmt, ...)
971 {
972 	struct va_format vaf;
973 	va_list args;
974 
975 	if (!ext4_warning_ratelimit(inode->i_sb))
976 		return;
977 
978 	va_start(args, fmt);
979 	vaf.fmt = fmt;
980 	vaf.va = &args;
981 	printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
982 	       "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
983 	       function, line, inode->i_ino, current->comm, &vaf);
984 	va_end(args);
985 }
986 
__ext4_grp_locked_error(const char * function,unsigned int line,struct super_block * sb,ext4_group_t grp,unsigned long ino,ext4_fsblk_t block,const char * fmt,...)987 void __ext4_grp_locked_error(const char *function, unsigned int line,
988 			     struct super_block *sb, ext4_group_t grp,
989 			     unsigned long ino, ext4_fsblk_t block,
990 			     const char *fmt, ...)
991 __releases(bitlock)
992 __acquires(bitlock)
993 {
994 	struct va_format vaf;
995 	va_list args;
996 
997 	if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
998 		return;
999 
1000 	trace_ext4_error(sb, function, line);
1001 	if (ext4_error_ratelimit(sb)) {
1002 		va_start(args, fmt);
1003 		vaf.fmt = fmt;
1004 		vaf.va = &args;
1005 		printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
1006 		       sb->s_id, function, line, grp);
1007 		if (ino)
1008 			printk(KERN_CONT "inode %lu: ", ino);
1009 		if (block)
1010 			printk(KERN_CONT "block %llu:",
1011 			       (unsigned long long) block);
1012 		printk(KERN_CONT "%pV\n", &vaf);
1013 		va_end(args);
1014 	}
1015 
1016 	if (test_opt(sb, ERRORS_CONT)) {
1017 		if (test_opt(sb, WARN_ON_ERROR))
1018 			WARN_ON_ONCE(1);
1019 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
1020 		if (!bdev_read_only(sb->s_bdev)) {
1021 			save_error_info(sb, EFSCORRUPTED, ino, block, function,
1022 					line);
1023 			schedule_work(&EXT4_SB(sb)->s_error_work);
1024 		}
1025 		return;
1026 	}
1027 	ext4_unlock_group(sb, grp);
1028 	ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
1029 	/*
1030 	 * We only get here in the ERRORS_RO case; relocking the group
1031 	 * may be dangerous, but nothing bad will happen since the
1032 	 * filesystem will have already been marked read/only and the
1033 	 * journal has been aborted.  We return 1 as a hint to callers
1034 	 * who might what to use the return value from
1035 	 * ext4_grp_locked_error() to distinguish between the
1036 	 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
1037 	 * aggressively from the ext4 function in question, with a
1038 	 * more appropriate error code.
1039 	 */
1040 	ext4_lock_group(sb, grp);
1041 	return;
1042 }
1043 
ext4_mark_group_bitmap_corrupted(struct super_block * sb,ext4_group_t group,unsigned int flags)1044 void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
1045 				     ext4_group_t group,
1046 				     unsigned int flags)
1047 {
1048 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1049 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1050 	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
1051 	int ret;
1052 
1053 	if (!grp || !gdp)
1054 		return;
1055 	if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
1056 		ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1057 					    &grp->bb_state);
1058 		if (!ret)
1059 			percpu_counter_sub(&sbi->s_freeclusters_counter,
1060 					   grp->bb_free);
1061 	}
1062 
1063 	if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
1064 		ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
1065 					    &grp->bb_state);
1066 		if (!ret && gdp) {
1067 			int count;
1068 
1069 			count = ext4_free_inodes_count(sb, gdp);
1070 			percpu_counter_sub(&sbi->s_freeinodes_counter,
1071 					   count);
1072 		}
1073 	}
1074 }
1075 
ext4_update_dynamic_rev(struct super_block * sb)1076 void ext4_update_dynamic_rev(struct super_block *sb)
1077 {
1078 	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1079 
1080 	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
1081 		return;
1082 
1083 	ext4_warning(sb,
1084 		     "updating to rev %d because of new feature flag, "
1085 		     "running e2fsck is recommended",
1086 		     EXT4_DYNAMIC_REV);
1087 
1088 	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
1089 	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
1090 	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
1091 	/* leave es->s_feature_*compat flags alone */
1092 	/* es->s_uuid will be set by e2fsck if empty */
1093 
1094 	/*
1095 	 * The rest of the superblock fields should be zero, and if not it
1096 	 * means they are likely already in use, so leave them alone.  We
1097 	 * can leave it up to e2fsck to clean up any inconsistencies there.
1098 	 */
1099 }
1100 
1101 /*
1102  * Open the external journal device
1103  */
ext4_blkdev_get(dev_t dev,struct super_block * sb)1104 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
1105 {
1106 	struct block_device *bdev;
1107 
1108 	bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
1109 	if (IS_ERR(bdev))
1110 		goto fail;
1111 	return bdev;
1112 
1113 fail:
1114 	ext4_msg(sb, KERN_ERR,
1115 		 "failed to open journal device unknown-block(%u,%u) %ld",
1116 		 MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
1117 	return NULL;
1118 }
1119 
1120 /*
1121  * Release the journal device
1122  */
ext4_blkdev_put(struct block_device * bdev)1123 static void ext4_blkdev_put(struct block_device *bdev)
1124 {
1125 	blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1126 }
1127 
ext4_blkdev_remove(struct ext4_sb_info * sbi)1128 static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
1129 {
1130 	struct block_device *bdev;
1131 	bdev = sbi->s_journal_bdev;
1132 	if (bdev) {
1133 		/*
1134 		 * Invalidate the journal device's buffers.  We don't want them
1135 		 * floating about in memory - the physical journal device may
1136 		 * hotswapped, and it breaks the `ro-after' testing code.
1137 		 */
1138 		invalidate_bdev(bdev);
1139 		ext4_blkdev_put(bdev);
1140 		sbi->s_journal_bdev = NULL;
1141 	}
1142 }
1143 
orphan_list_entry(struct list_head * l)1144 static inline struct inode *orphan_list_entry(struct list_head *l)
1145 {
1146 	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
1147 }
1148 
dump_orphan_list(struct super_block * sb,struct ext4_sb_info * sbi)1149 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
1150 {
1151 	struct list_head *l;
1152 
1153 	ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
1154 		 le32_to_cpu(sbi->s_es->s_last_orphan));
1155 
1156 	printk(KERN_ERR "sb_info orphan list:\n");
1157 	list_for_each(l, &sbi->s_orphan) {
1158 		struct inode *inode = orphan_list_entry(l);
1159 		printk(KERN_ERR "  "
1160 		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1161 		       inode->i_sb->s_id, inode->i_ino, inode,
1162 		       inode->i_mode, inode->i_nlink,
1163 		       NEXT_ORPHAN(inode));
1164 	}
1165 }
1166 
1167 #ifdef CONFIG_QUOTA
1168 static int ext4_quota_off(struct super_block *sb, int type);
1169 
ext4_quota_off_umount(struct super_block * sb)1170 static inline void ext4_quota_off_umount(struct super_block *sb)
1171 {
1172 	int type;
1173 
1174 	/* Use our quota_off function to clear inode flags etc. */
1175 	for (type = 0; type < EXT4_MAXQUOTAS; type++)
1176 		ext4_quota_off(sb, type);
1177 }
1178 
1179 /*
1180  * This is a helper function which is used in the mount/remount
1181  * codepaths (which holds s_umount) to fetch the quota file name.
1182  */
get_qf_name(struct super_block * sb,struct ext4_sb_info * sbi,int type)1183 static inline char *get_qf_name(struct super_block *sb,
1184 				struct ext4_sb_info *sbi,
1185 				int type)
1186 {
1187 	return rcu_dereference_protected(sbi->s_qf_names[type],
1188 					 lockdep_is_held(&sb->s_umount));
1189 }
1190 #else
ext4_quota_off_umount(struct super_block * sb)1191 static inline void ext4_quota_off_umount(struct super_block *sb)
1192 {
1193 }
1194 #endif
1195 
ext4_put_super(struct super_block * sb)1196 static void ext4_put_super(struct super_block *sb)
1197 {
1198 	struct ext4_sb_info *sbi = EXT4_SB(sb);
1199 	struct ext4_super_block *es = sbi->s_es;
1200 	struct buffer_head **group_desc;
1201 	struct flex_groups **flex_groups;
1202 	int aborted = 0;
1203 	int i, err;
1204 
1205 	/*
1206 	 * Unregister sysfs before destroying jbd2 journal.
1207 	 * Since we could still access attr_journal_task attribute via sysfs
1208 	 * path which could have sbi->s_journal->j_task as NULL
1209 	 * Unregister sysfs before flush sbi->s_error_work.
1210 	 * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
1211 	 * read metadata verify failed then will queue error work.
1212 	 * flush_stashed_error_work will call start_this_handle may trigger
1213 	 * BUG_ON.
1214 	 */
1215 	ext4_unregister_sysfs(sb);
1216 
1217 	if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
1218 		ext4_msg(sb, KERN_INFO, "unmounting filesystem.");
1219 
1220 	ext4_unregister_li_request(sb);
1221 	ext4_quota_off_umount(sb);
1222 
1223 	flush_work(&sbi->s_error_work);
1224 	destroy_workqueue(sbi->rsv_conversion_wq);
1225 	ext4_release_orphan_info(sb);
1226 
1227 	if (sbi->s_journal) {
1228 		aborted = is_journal_aborted(sbi->s_journal);
1229 		err = jbd2_journal_destroy(sbi->s_journal);
1230 		sbi->s_journal = NULL;
1231 		if ((err < 0) && !aborted) {
1232 			ext4_abort(sb, -err, "Couldn't clean up the journal");
1233 		}
1234 	}
1235 
1236 	ext4_es_unregister_shrinker(sbi);
1237 	del_timer_sync(&sbi->s_err_report);
1238 	ext4_release_system_zone(sb);
1239 	ext4_mb_release(sb);
1240 	ext4_ext_release(sb);
1241 
1242 	if (!sb_rdonly(sb) && !aborted) {
1243 		ext4_clear_feature_journal_needs_recovery(sb);
1244 		ext4_clear_feature_orphan_present(sb);
1245 		es->s_state = cpu_to_le16(sbi->s_mount_state);
1246 	}
1247 	if (!sb_rdonly(sb))
1248 		ext4_commit_super(sb);
1249 
1250 	rcu_read_lock();
1251 	group_desc = rcu_dereference(sbi->s_group_desc);
1252 	for (i = 0; i < sbi->s_gdb_count; i++)
1253 		brelse(group_desc[i]);
1254 	kvfree(group_desc);
1255 	flex_groups = rcu_dereference(sbi->s_flex_groups);
1256 	if (flex_groups) {
1257 		for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1258 			kvfree(flex_groups[i]);
1259 		kvfree(flex_groups);
1260 	}
1261 	rcu_read_unlock();
1262 	percpu_counter_destroy(&sbi->s_freeclusters_counter);
1263 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
1264 	percpu_counter_destroy(&sbi->s_dirs_counter);
1265 	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1266 	percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
1267 	percpu_free_rwsem(&sbi->s_writepages_rwsem);
1268 #ifdef CONFIG_QUOTA
1269 	for (i = 0; i < EXT4_MAXQUOTAS; i++)
1270 		kfree(get_qf_name(sb, sbi, i));
1271 #endif
1272 
1273 	/* Debugging code just in case the in-memory inode orphan list
1274 	 * isn't empty.  The on-disk one can be non-empty if we've
1275 	 * detected an error and taken the fs readonly, but the
1276 	 * in-memory list had better be clean by this point. */
1277 	if (!list_empty(&sbi->s_orphan))
1278 		dump_orphan_list(sb, sbi);
1279 	ASSERT(list_empty(&sbi->s_orphan));
1280 
1281 	sync_blockdev(sb->s_bdev);
1282 	invalidate_bdev(sb->s_bdev);
1283 	if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
1284 		sync_blockdev(sbi->s_journal_bdev);
1285 		ext4_blkdev_remove(sbi);
1286 	}
1287 
1288 	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1289 	sbi->s_ea_inode_cache = NULL;
1290 
1291 	ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1292 	sbi->s_ea_block_cache = NULL;
1293 
1294 	ext4_stop_mmpd(sbi);
1295 
1296 	brelse(sbi->s_sbh);
1297 	sb->s_fs_info = NULL;
1298 	/*
1299 	 * Now that we are completely done shutting down the
1300 	 * superblock, we need to actually destroy the kobject.
1301 	 */
1302 	kobject_put(&sbi->s_kobj);
1303 	wait_for_completion(&sbi->s_kobj_unregister);
1304 	if (sbi->s_chksum_driver)
1305 		crypto_free_shash(sbi->s_chksum_driver);
1306 	kfree(sbi->s_blockgroup_lock);
1307 	fs_put_dax(sbi->s_daxdev, NULL);
1308 	fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
1309 #if IS_ENABLED(CONFIG_UNICODE)
1310 	utf8_unload(sb->s_encoding);
1311 #endif
1312 	kfree(sbi);
1313 }
1314 
1315 static struct kmem_cache *ext4_inode_cachep;
1316 
1317 /*
1318  * Called inside transaction, so use GFP_NOFS
1319  */
ext4_alloc_inode(struct super_block * sb)1320 static struct inode *ext4_alloc_inode(struct super_block *sb)
1321 {
1322 	struct ext4_inode_info *ei;
1323 
1324 	ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
1325 	if (!ei)
1326 		return NULL;
1327 
1328 	inode_set_iversion(&ei->vfs_inode, 1);
1329 	ei->i_flags = 0;
1330 	spin_lock_init(&ei->i_raw_lock);
1331 	INIT_LIST_HEAD(&ei->i_prealloc_list);
1332 	atomic_set(&ei->i_prealloc_active, 0);
1333 	spin_lock_init(&ei->i_prealloc_lock);
1334 	ext4_es_init_tree(&ei->i_es_tree);
1335 	rwlock_init(&ei->i_es_lock);
1336 	INIT_LIST_HEAD(&ei->i_es_list);
1337 	ei->i_es_all_nr = 0;
1338 	ei->i_es_shk_nr = 0;
1339 	ei->i_es_shrink_lblk = 0;
1340 	ei->i_reserved_data_blocks = 0;
1341 	spin_lock_init(&(ei->i_block_reservation_lock));
1342 	ext4_init_pending_tree(&ei->i_pending_tree);
1343 #ifdef CONFIG_QUOTA
1344 	ei->i_reserved_quota = 0;
1345 	memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
1346 #endif
1347 	ei->jinode = NULL;
1348 	INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
1349 	spin_lock_init(&ei->i_completed_io_lock);
1350 	ei->i_sync_tid = 0;
1351 	ei->i_datasync_tid = 0;
1352 	atomic_set(&ei->i_unwritten, 0);
1353 	INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1354 	ext4_fc_init_inode(&ei->vfs_inode);
1355 	mutex_init(&ei->i_fc_lock);
1356 	return &ei->vfs_inode;
1357 }
1358 
ext4_drop_inode(struct inode * inode)1359 static int ext4_drop_inode(struct inode *inode)
1360 {
1361 	int drop = generic_drop_inode(inode);
1362 
1363 	if (!drop)
1364 		drop = fscrypt_drop_inode(inode);
1365 
1366 	trace_ext4_drop_inode(inode, drop);
1367 	return drop;
1368 }
1369 
ext4_free_in_core_inode(struct inode * inode)1370 static void ext4_free_in_core_inode(struct inode *inode)
1371 {
1372 	fscrypt_free_inode(inode);
1373 	if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1374 		pr_warn("%s: inode %ld still in fc list",
1375 			__func__, inode->i_ino);
1376 	}
1377 	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1378 }
1379 
ext4_destroy_inode(struct inode * inode)1380 static void ext4_destroy_inode(struct inode *inode)
1381 {
1382 	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
1383 		ext4_msg(inode->i_sb, KERN_ERR,
1384 			 "Inode %lu (%p): orphan list check failed!",
1385 			 inode->i_ino, EXT4_I(inode));
1386 		print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1387 				EXT4_I(inode), sizeof(struct ext4_inode_info),
1388 				true);
1389 		dump_stack();
1390 	}
1391 
1392 	if (EXT4_I(inode)->i_reserved_data_blocks)
1393 		ext4_msg(inode->i_sb, KERN_ERR,
1394 			 "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1395 			 inode->i_ino, EXT4_I(inode),
1396 			 EXT4_I(inode)->i_reserved_data_blocks);
1397 }
1398 
init_once(void * foo)1399 static void init_once(void *foo)
1400 {
1401 	struct ext4_inode_info *ei = foo;
1402 
1403 	INIT_LIST_HEAD(&ei->i_orphan);
1404 	init_rwsem(&ei->xattr_sem);
1405 	init_rwsem(&ei->i_data_sem);
1406 	inode_init_once(&ei->vfs_inode);
1407 	ext4_fc_init_inode(&ei->vfs_inode);
1408 }
1409 
init_inodecache(void)1410 static int __init init_inodecache(void)
1411 {
1412 	ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1413 				sizeof(struct ext4_inode_info), 0,
1414 				(SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1415 					SLAB_ACCOUNT),
1416 				offsetof(struct ext4_inode_info, i_data),
1417 				sizeof_field(struct ext4_inode_info, i_data),
1418 				init_once);
1419 	if (ext4_inode_cachep == NULL)
1420 		return -ENOMEM;
1421 	return 0;
1422 }
1423 
destroy_inodecache(void)1424 static void destroy_inodecache(void)
1425 {
1426 	/*
1427 	 * Make sure all delayed rcu free inodes are flushed before we
1428 	 * destroy cache.
1429 	 */
1430 	rcu_barrier();
1431 	kmem_cache_destroy(ext4_inode_cachep);
1432 }
1433 
ext4_clear_inode(struct inode * inode)1434 void ext4_clear_inode(struct inode *inode)
1435 {
1436 	ext4_fc_del(inode);
1437 	invalidate_inode_buffers(inode);
1438 	clear_inode(inode);
1439 	ext4_discard_preallocations(inode, 0);
1440 	ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1441 	dquot_drop(inode);
1442 	if (EXT4_I(inode)->jinode) {
1443 		jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1444 					       EXT4_I(inode)->jinode);
1445 		jbd2_free_inode(EXT4_I(inode)->jinode);
1446 		EXT4_I(inode)->jinode = NULL;
1447 	}
1448 	fscrypt_put_encryption_info(inode);
1449 	fsverity_cleanup_inode(inode);
1450 }
1451 
ext4_nfs_get_inode(struct super_block * sb,u64 ino,u32 generation)1452 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1453 					u64 ino, u32 generation)
1454 {
1455 	struct inode *inode;
1456 
1457 	/*
1458 	 * Currently we don't know the generation for parent directory, so
1459 	 * a generation of 0 means "accept any"
1460 	 */
1461 	inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1462 	if (IS_ERR(inode))
1463 		return ERR_CAST(inode);
1464 	if (generation && inode->i_generation != generation) {
1465 		iput(inode);
1466 		return ERR_PTR(-ESTALE);
1467 	}
1468 
1469 	return inode;
1470 }
1471 
ext4_fh_to_dentry(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)1472 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1473 					int fh_len, int fh_type)
1474 {
1475 	return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1476 				    ext4_nfs_get_inode);
1477 }
1478 
ext4_fh_to_parent(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)1479 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1480 					int fh_len, int fh_type)
1481 {
1482 	return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1483 				    ext4_nfs_get_inode);
1484 }
1485 
ext4_nfs_commit_metadata(struct inode * inode)1486 static int ext4_nfs_commit_metadata(struct inode *inode)
1487 {
1488 	struct writeback_control wbc = {
1489 		.sync_mode = WB_SYNC_ALL
1490 	};
1491 
1492 	trace_ext4_nfs_commit_metadata(inode);
1493 	return ext4_write_inode(inode, &wbc);
1494 }
1495 
1496 #ifdef CONFIG_QUOTA
1497 static const char * const quotatypes[] = INITQFNAMES;
1498 #define QTYPE2NAME(t) (quotatypes[t])
1499 
1500 static int ext4_write_dquot(struct dquot *dquot);
1501 static int ext4_acquire_dquot(struct dquot *dquot);
1502 static int ext4_release_dquot(struct dquot *dquot);
1503 static int ext4_mark_dquot_dirty(struct dquot *dquot);
1504 static int ext4_write_info(struct super_block *sb, int type);
1505 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1506 			 const struct path *path);
1507 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1508 			       size_t len, loff_t off);
1509 static ssize_t ext4_quota_write(struct super_block *sb, int type,
1510 				const char *data, size_t len, loff_t off);
1511 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1512 			     unsigned int flags);
1513 
ext4_get_dquots(struct inode * inode)1514 static struct dquot **ext4_get_dquots(struct inode *inode)
1515 {
1516 	return EXT4_I(inode)->i_dquot;
1517 }
1518 
1519 static const struct dquot_operations ext4_quota_operations = {
1520 	.get_reserved_space	= ext4_get_reserved_space,
1521 	.write_dquot		= ext4_write_dquot,
1522 	.acquire_dquot		= ext4_acquire_dquot,
1523 	.release_dquot		= ext4_release_dquot,
1524 	.mark_dirty		= ext4_mark_dquot_dirty,
1525 	.write_info		= ext4_write_info,
1526 	.alloc_dquot		= dquot_alloc,
1527 	.destroy_dquot		= dquot_destroy,
1528 	.get_projid		= ext4_get_projid,
1529 	.get_inode_usage	= ext4_get_inode_usage,
1530 	.get_next_id		= dquot_get_next_id,
1531 };
1532 
1533 static const struct quotactl_ops ext4_qctl_operations = {
1534 	.quota_on	= ext4_quota_on,
1535 	.quota_off	= ext4_quota_off,
1536 	.quota_sync	= dquot_quota_sync,
1537 	.get_state	= dquot_get_state,
1538 	.set_info	= dquot_set_dqinfo,
1539 	.get_dqblk	= dquot_get_dqblk,
1540 	.set_dqblk	= dquot_set_dqblk,
1541 	.get_nextdqblk	= dquot_get_next_dqblk,
1542 };
1543 #endif
1544 
1545 static const struct super_operations ext4_sops = {
1546 	.alloc_inode	= ext4_alloc_inode,
1547 	.free_inode	= ext4_free_in_core_inode,
1548 	.destroy_inode	= ext4_destroy_inode,
1549 	.write_inode	= ext4_write_inode,
1550 	.dirty_inode	= ext4_dirty_inode,
1551 	.drop_inode	= ext4_drop_inode,
1552 	.evict_inode	= ext4_evict_inode,
1553 	.put_super	= ext4_put_super,
1554 	.sync_fs	= ext4_sync_fs,
1555 	.freeze_fs	= ext4_freeze,
1556 	.unfreeze_fs	= ext4_unfreeze,
1557 	.statfs		= ext4_statfs,
1558 	.show_options	= ext4_show_options,
1559 #ifdef CONFIG_QUOTA
1560 	.quota_read	= ext4_quota_read,
1561 	.quota_write	= ext4_quota_write,
1562 	.get_dquots	= ext4_get_dquots,
1563 #endif
1564 };
1565 
1566 static const struct export_operations ext4_export_ops = {
1567 	.fh_to_dentry = ext4_fh_to_dentry,
1568 	.fh_to_parent = ext4_fh_to_parent,
1569 	.get_parent = ext4_get_parent,
1570 	.commit_metadata = ext4_nfs_commit_metadata,
1571 };
1572 
1573 enum {
1574 	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1575 	Opt_resgid, Opt_resuid, Opt_sb,
1576 	Opt_nouid32, Opt_debug, Opt_removed,
1577 	Opt_user_xattr, Opt_acl,
1578 	Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1579 	Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1580 	Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1581 	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1582 	Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1583 	Opt_inlinecrypt,
1584 	Opt_usrjquota, Opt_grpjquota, Opt_quota,
1585 	Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1586 	Opt_usrquota, Opt_grpquota, Opt_prjquota,
1587 	Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
1588 	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1589 	Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
1590 	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1591 	Opt_inode_readahead_blks, Opt_journal_ioprio,
1592 	Opt_dioread_nolock, Opt_dioread_lock,
1593 	Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1594 	Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1595 	Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
1596 	Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
1597 #ifdef CONFIG_EXT4_DEBUG
1598 	Opt_fc_debug_max_replay, Opt_fc_debug_force
1599 #endif
1600 };
1601 
1602 static const struct constant_table ext4_param_errors[] = {
1603 	{"continue",	EXT4_MOUNT_ERRORS_CONT},
1604 	{"panic",	EXT4_MOUNT_ERRORS_PANIC},
1605 	{"remount-ro",	EXT4_MOUNT_ERRORS_RO},
1606 	{}
1607 };
1608 
1609 static const struct constant_table ext4_param_data[] = {
1610 	{"journal",	EXT4_MOUNT_JOURNAL_DATA},
1611 	{"ordered",	EXT4_MOUNT_ORDERED_DATA},
1612 	{"writeback",	EXT4_MOUNT_WRITEBACK_DATA},
1613 	{}
1614 };
1615 
1616 static const struct constant_table ext4_param_data_err[] = {
1617 	{"abort",	Opt_data_err_abort},
1618 	{"ignore",	Opt_data_err_ignore},
1619 	{}
1620 };
1621 
1622 static const struct constant_table ext4_param_jqfmt[] = {
1623 	{"vfsold",	QFMT_VFS_OLD},
1624 	{"vfsv0",	QFMT_VFS_V0},
1625 	{"vfsv1",	QFMT_VFS_V1},
1626 	{}
1627 };
1628 
1629 static const struct constant_table ext4_param_dax[] = {
1630 	{"always",	Opt_dax_always},
1631 	{"inode",	Opt_dax_inode},
1632 	{"never",	Opt_dax_never},
1633 	{}
1634 };
1635 
1636 /* String parameter that allows empty argument */
1637 #define fsparam_string_empty(NAME, OPT) \
1638 	__fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
1639 
1640 /*
1641  * Mount option specification
1642  * We don't use fsparam_flag_no because of the way we set the
1643  * options and the way we show them in _ext4_show_options(). To
1644  * keep the changes to a minimum, let's keep the negative options
1645  * separate for now.
1646  */
1647 static const struct fs_parameter_spec ext4_param_specs[] = {
1648 	fsparam_flag	("bsddf",		Opt_bsd_df),
1649 	fsparam_flag	("minixdf",		Opt_minix_df),
1650 	fsparam_flag	("grpid",		Opt_grpid),
1651 	fsparam_flag	("bsdgroups",		Opt_grpid),
1652 	fsparam_flag	("nogrpid",		Opt_nogrpid),
1653 	fsparam_flag	("sysvgroups",		Opt_nogrpid),
1654 	fsparam_u32	("resgid",		Opt_resgid),
1655 	fsparam_u32	("resuid",		Opt_resuid),
1656 	fsparam_u32	("sb",			Opt_sb),
1657 	fsparam_enum	("errors",		Opt_errors, ext4_param_errors),
1658 	fsparam_flag	("nouid32",		Opt_nouid32),
1659 	fsparam_flag	("debug",		Opt_debug),
1660 	fsparam_flag	("oldalloc",		Opt_removed),
1661 	fsparam_flag	("orlov",		Opt_removed),
1662 	fsparam_flag	("user_xattr",		Opt_user_xattr),
1663 	fsparam_flag	("acl",			Opt_acl),
1664 	fsparam_flag	("norecovery",		Opt_noload),
1665 	fsparam_flag	("noload",		Opt_noload),
1666 	fsparam_flag	("bh",			Opt_removed),
1667 	fsparam_flag	("nobh",		Opt_removed),
1668 	fsparam_u32	("commit",		Opt_commit),
1669 	fsparam_u32	("min_batch_time",	Opt_min_batch_time),
1670 	fsparam_u32	("max_batch_time",	Opt_max_batch_time),
1671 	fsparam_u32	("journal_dev",		Opt_journal_dev),
1672 	fsparam_bdev	("journal_path",	Opt_journal_path),
1673 	fsparam_flag	("journal_checksum",	Opt_journal_checksum),
1674 	fsparam_flag	("nojournal_checksum",	Opt_nojournal_checksum),
1675 	fsparam_flag	("journal_async_commit",Opt_journal_async_commit),
1676 	fsparam_flag	("abort",		Opt_abort),
1677 	fsparam_enum	("data",		Opt_data, ext4_param_data),
1678 	fsparam_enum	("data_err",		Opt_data_err,
1679 						ext4_param_data_err),
1680 	fsparam_string_empty
1681 			("usrjquota",		Opt_usrjquota),
1682 	fsparam_string_empty
1683 			("grpjquota",		Opt_grpjquota),
1684 	fsparam_enum	("jqfmt",		Opt_jqfmt, ext4_param_jqfmt),
1685 	fsparam_flag	("grpquota",		Opt_grpquota),
1686 	fsparam_flag	("quota",		Opt_quota),
1687 	fsparam_flag	("noquota",		Opt_noquota),
1688 	fsparam_flag	("usrquota",		Opt_usrquota),
1689 	fsparam_flag	("prjquota",		Opt_prjquota),
1690 	fsparam_flag	("barrier",		Opt_barrier),
1691 	fsparam_u32	("barrier",		Opt_barrier),
1692 	fsparam_flag	("nobarrier",		Opt_nobarrier),
1693 	fsparam_flag	("i_version",		Opt_removed),
1694 	fsparam_flag	("dax",			Opt_dax),
1695 	fsparam_enum	("dax",			Opt_dax_type, ext4_param_dax),
1696 	fsparam_u32	("stripe",		Opt_stripe),
1697 	fsparam_flag	("delalloc",		Opt_delalloc),
1698 	fsparam_flag	("nodelalloc",		Opt_nodelalloc),
1699 	fsparam_flag	("warn_on_error",	Opt_warn_on_error),
1700 	fsparam_flag	("nowarn_on_error",	Opt_nowarn_on_error),
1701 	fsparam_u32	("debug_want_extra_isize",
1702 						Opt_debug_want_extra_isize),
1703 	fsparam_flag	("mblk_io_submit",	Opt_removed),
1704 	fsparam_flag	("nomblk_io_submit",	Opt_removed),
1705 	fsparam_flag	("block_validity",	Opt_block_validity),
1706 	fsparam_flag	("noblock_validity",	Opt_noblock_validity),
1707 	fsparam_u32	("inode_readahead_blks",
1708 						Opt_inode_readahead_blks),
1709 	fsparam_u32	("journal_ioprio",	Opt_journal_ioprio),
1710 	fsparam_u32	("auto_da_alloc",	Opt_auto_da_alloc),
1711 	fsparam_flag	("auto_da_alloc",	Opt_auto_da_alloc),
1712 	fsparam_flag	("noauto_da_alloc",	Opt_noauto_da_alloc),
1713 	fsparam_flag	("dioread_nolock",	Opt_dioread_nolock),
1714 	fsparam_flag	("nodioread_nolock",	Opt_dioread_lock),
1715 	fsparam_flag	("dioread_lock",	Opt_dioread_lock),
1716 	fsparam_flag	("discard",		Opt_discard),
1717 	fsparam_flag	("nodiscard",		Opt_nodiscard),
1718 	fsparam_u32	("init_itable",		Opt_init_itable),
1719 	fsparam_flag	("init_itable",		Opt_init_itable),
1720 	fsparam_flag	("noinit_itable",	Opt_noinit_itable),
1721 #ifdef CONFIG_EXT4_DEBUG
1722 	fsparam_flag	("fc_debug_force",	Opt_fc_debug_force),
1723 	fsparam_u32	("fc_debug_max_replay",	Opt_fc_debug_max_replay),
1724 #endif
1725 	fsparam_u32	("max_dir_size_kb",	Opt_max_dir_size_kb),
1726 	fsparam_flag	("test_dummy_encryption",
1727 						Opt_test_dummy_encryption),
1728 	fsparam_string	("test_dummy_encryption",
1729 						Opt_test_dummy_encryption),
1730 	fsparam_flag	("inlinecrypt",		Opt_inlinecrypt),
1731 	fsparam_flag	("nombcache",		Opt_nombcache),
1732 	fsparam_flag	("no_mbcache",		Opt_nombcache),	/* for backward compatibility */
1733 	fsparam_flag	("prefetch_block_bitmaps",
1734 						Opt_removed),
1735 	fsparam_flag	("no_prefetch_block_bitmaps",
1736 						Opt_no_prefetch_block_bitmaps),
1737 	fsparam_s32	("mb_optimize_scan",	Opt_mb_optimize_scan),
1738 	fsparam_string	("check",		Opt_removed),	/* mount option from ext2/3 */
1739 	fsparam_flag	("nocheck",		Opt_removed),	/* mount option from ext2/3 */
1740 	fsparam_flag	("reservation",		Opt_removed),	/* mount option from ext2/3 */
1741 	fsparam_flag	("noreservation",	Opt_removed),	/* mount option from ext2/3 */
1742 	fsparam_u32	("journal",		Opt_removed),	/* mount option from ext2/3 */
1743 	{}
1744 };
1745 
1746 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1747 
1748 #define MOPT_SET	0x0001
1749 #define MOPT_CLEAR	0x0002
1750 #define MOPT_NOSUPPORT	0x0004
1751 #define MOPT_EXPLICIT	0x0008
1752 #ifdef CONFIG_QUOTA
1753 #define MOPT_Q		0
1754 #define MOPT_QFMT	0x0010
1755 #else
1756 #define MOPT_Q		MOPT_NOSUPPORT
1757 #define MOPT_QFMT	MOPT_NOSUPPORT
1758 #endif
1759 #define MOPT_NO_EXT2	0x0020
1760 #define MOPT_NO_EXT3	0x0040
1761 #define MOPT_EXT4_ONLY	(MOPT_NO_EXT2 | MOPT_NO_EXT3)
1762 #define MOPT_SKIP	0x0080
1763 #define	MOPT_2		0x0100
1764 
1765 static const struct mount_opts {
1766 	int	token;
1767 	int	mount_opt;
1768 	int	flags;
1769 } ext4_mount_opts[] = {
1770 	{Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1771 	{Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1772 	{Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1773 	{Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1774 	{Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1775 	{Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1776 	{Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1777 	 MOPT_EXT4_ONLY | MOPT_SET},
1778 	{Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1779 	 MOPT_EXT4_ONLY | MOPT_CLEAR},
1780 	{Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1781 	{Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1782 	{Opt_delalloc, EXT4_MOUNT_DELALLOC,
1783 	 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1784 	{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1785 	 MOPT_EXT4_ONLY | MOPT_CLEAR},
1786 	{Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1787 	{Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1788 	{Opt_commit, 0, MOPT_NO_EXT2},
1789 	{Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1790 	 MOPT_EXT4_ONLY | MOPT_CLEAR},
1791 	{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1792 	 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1793 	{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1794 				    EXT4_MOUNT_JOURNAL_CHECKSUM),
1795 	 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1796 	{Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1797 	{Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2},
1798 	{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1799 	{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1800 	{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1801 	{Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1802 	{Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1803 	{Opt_dax_type, 0, MOPT_EXT4_ONLY},
1804 	{Opt_journal_dev, 0, MOPT_NO_EXT2},
1805 	{Opt_journal_path, 0, MOPT_NO_EXT2},
1806 	{Opt_journal_ioprio, 0, MOPT_NO_EXT2},
1807 	{Opt_data, 0, MOPT_NO_EXT2},
1808 	{Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1809 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1810 	{Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1811 #else
1812 	{Opt_acl, 0, MOPT_NOSUPPORT},
1813 #endif
1814 	{Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1815 	{Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1816 	{Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1817 	{Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1818 							MOPT_SET | MOPT_Q},
1819 	{Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1820 							MOPT_SET | MOPT_Q},
1821 	{Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1822 							MOPT_SET | MOPT_Q},
1823 	{Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1824 		       EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1825 							MOPT_CLEAR | MOPT_Q},
1826 	{Opt_usrjquota, 0, MOPT_Q},
1827 	{Opt_grpjquota, 0, MOPT_Q},
1828 	{Opt_jqfmt, 0, MOPT_QFMT},
1829 	{Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1830 	{Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
1831 	 MOPT_SET},
1832 #ifdef CONFIG_EXT4_DEBUG
1833 	{Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
1834 	 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
1835 #endif
1836 	{Opt_err, 0, 0}
1837 };
1838 
1839 #if IS_ENABLED(CONFIG_UNICODE)
1840 static const struct ext4_sb_encodings {
1841 	__u16 magic;
1842 	char *name;
1843 	unsigned int version;
1844 } ext4_sb_encoding_map[] = {
1845 	{EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
1846 };
1847 
1848 static const struct ext4_sb_encodings *
ext4_sb_read_encoding(const struct ext4_super_block * es)1849 ext4_sb_read_encoding(const struct ext4_super_block *es)
1850 {
1851 	__u16 magic = le16_to_cpu(es->s_encoding);
1852 	int i;
1853 
1854 	for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
1855 		if (magic == ext4_sb_encoding_map[i].magic)
1856 			return &ext4_sb_encoding_map[i];
1857 
1858 	return NULL;
1859 }
1860 #endif
1861 
1862 #define EXT4_SPEC_JQUOTA			(1 <<  0)
1863 #define EXT4_SPEC_JQFMT				(1 <<  1)
1864 #define EXT4_SPEC_DATAJ				(1 <<  2)
1865 #define EXT4_SPEC_SB_BLOCK			(1 <<  3)
1866 #define EXT4_SPEC_JOURNAL_DEV			(1 <<  4)
1867 #define EXT4_SPEC_JOURNAL_IOPRIO		(1 <<  5)
1868 #define EXT4_SPEC_s_want_extra_isize		(1 <<  7)
1869 #define EXT4_SPEC_s_max_batch_time		(1 <<  8)
1870 #define EXT4_SPEC_s_min_batch_time		(1 <<  9)
1871 #define EXT4_SPEC_s_inode_readahead_blks	(1 << 10)
1872 #define EXT4_SPEC_s_li_wait_mult		(1 << 11)
1873 #define EXT4_SPEC_s_max_dir_size_kb		(1 << 12)
1874 #define EXT4_SPEC_s_stripe			(1 << 13)
1875 #define EXT4_SPEC_s_resuid			(1 << 14)
1876 #define EXT4_SPEC_s_resgid			(1 << 15)
1877 #define EXT4_SPEC_s_commit_interval		(1 << 16)
1878 #define EXT4_SPEC_s_fc_debug_max_replay		(1 << 17)
1879 #define EXT4_SPEC_s_sb_block			(1 << 18)
1880 #define EXT4_SPEC_mb_optimize_scan		(1 << 19)
1881 
1882 struct ext4_fs_context {
1883 	char		*s_qf_names[EXT4_MAXQUOTAS];
1884 	struct fscrypt_dummy_policy dummy_enc_policy;
1885 	int		s_jquota_fmt;	/* Format of quota to use */
1886 #ifdef CONFIG_EXT4_DEBUG
1887 	int s_fc_debug_max_replay;
1888 #endif
1889 	unsigned short	qname_spec;
1890 	unsigned long	vals_s_flags;	/* Bits to set in s_flags */
1891 	unsigned long	mask_s_flags;	/* Bits changed in s_flags */
1892 	unsigned long	journal_devnum;
1893 	unsigned long	s_commit_interval;
1894 	unsigned long	s_stripe;
1895 	unsigned int	s_inode_readahead_blks;
1896 	unsigned int	s_want_extra_isize;
1897 	unsigned int	s_li_wait_mult;
1898 	unsigned int	s_max_dir_size_kb;
1899 	unsigned int	journal_ioprio;
1900 	unsigned int	vals_s_mount_opt;
1901 	unsigned int	mask_s_mount_opt;
1902 	unsigned int	vals_s_mount_opt2;
1903 	unsigned int	mask_s_mount_opt2;
1904 	unsigned long	vals_s_mount_flags;
1905 	unsigned long	mask_s_mount_flags;
1906 	unsigned int	opt_flags;	/* MOPT flags */
1907 	unsigned int	spec;
1908 	u32		s_max_batch_time;
1909 	u32		s_min_batch_time;
1910 	kuid_t		s_resuid;
1911 	kgid_t		s_resgid;
1912 	ext4_fsblk_t	s_sb_block;
1913 };
1914 
ext4_fc_free(struct fs_context * fc)1915 static void ext4_fc_free(struct fs_context *fc)
1916 {
1917 	struct ext4_fs_context *ctx = fc->fs_private;
1918 	int i;
1919 
1920 	if (!ctx)
1921 		return;
1922 
1923 	for (i = 0; i < EXT4_MAXQUOTAS; i++)
1924 		kfree(ctx->s_qf_names[i]);
1925 
1926 	fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
1927 	kfree(ctx);
1928 }
1929 
ext4_init_fs_context(struct fs_context * fc)1930 int ext4_init_fs_context(struct fs_context *fc)
1931 {
1932 	struct ext4_fs_context *ctx;
1933 
1934 	ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
1935 	if (!ctx)
1936 		return -ENOMEM;
1937 
1938 	fc->fs_private = ctx;
1939 	fc->ops = &ext4_context_ops;
1940 
1941 	return 0;
1942 }
1943 
1944 #ifdef CONFIG_QUOTA
1945 /*
1946  * Note the name of the specified quota file.
1947  */
note_qf_name(struct fs_context * fc,int qtype,struct fs_parameter * param)1948 static int note_qf_name(struct fs_context *fc, int qtype,
1949 		       struct fs_parameter *param)
1950 {
1951 	struct ext4_fs_context *ctx = fc->fs_private;
1952 	char *qname;
1953 
1954 	if (param->size < 1) {
1955 		ext4_msg(NULL, KERN_ERR, "Missing quota name");
1956 		return -EINVAL;
1957 	}
1958 	if (strchr(param->string, '/')) {
1959 		ext4_msg(NULL, KERN_ERR,
1960 			 "quotafile must be on filesystem root");
1961 		return -EINVAL;
1962 	}
1963 	if (ctx->s_qf_names[qtype]) {
1964 		if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
1965 			ext4_msg(NULL, KERN_ERR,
1966 				 "%s quota file already specified",
1967 				 QTYPE2NAME(qtype));
1968 			return -EINVAL;
1969 		}
1970 		return 0;
1971 	}
1972 
1973 	qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
1974 	if (!qname) {
1975 		ext4_msg(NULL, KERN_ERR,
1976 			 "Not enough memory for storing quotafile name");
1977 		return -ENOMEM;
1978 	}
1979 	ctx->s_qf_names[qtype] = qname;
1980 	ctx->qname_spec |= 1 << qtype;
1981 	ctx->spec |= EXT4_SPEC_JQUOTA;
1982 	return 0;
1983 }
1984 
1985 /*
1986  * Clear the name of the specified quota file.
1987  */
unnote_qf_name(struct fs_context * fc,int qtype)1988 static int unnote_qf_name(struct fs_context *fc, int qtype)
1989 {
1990 	struct ext4_fs_context *ctx = fc->fs_private;
1991 
1992 	if (ctx->s_qf_names[qtype])
1993 		kfree(ctx->s_qf_names[qtype]);
1994 
1995 	ctx->s_qf_names[qtype] = NULL;
1996 	ctx->qname_spec |= 1 << qtype;
1997 	ctx->spec |= EXT4_SPEC_JQUOTA;
1998 	return 0;
1999 }
2000 #endif
2001 
ext4_parse_test_dummy_encryption(const struct fs_parameter * param,struct ext4_fs_context * ctx)2002 static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
2003 					    struct ext4_fs_context *ctx)
2004 {
2005 	int err;
2006 
2007 	if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
2008 		ext4_msg(NULL, KERN_WARNING,
2009 			 "test_dummy_encryption option not supported");
2010 		return -EINVAL;
2011 	}
2012 	err = fscrypt_parse_test_dummy_encryption(param,
2013 						  &ctx->dummy_enc_policy);
2014 	if (err == -EINVAL) {
2015 		ext4_msg(NULL, KERN_WARNING,
2016 			 "Value of option \"%s\" is unrecognized", param->key);
2017 	} else if (err == -EEXIST) {
2018 		ext4_msg(NULL, KERN_WARNING,
2019 			 "Conflicting test_dummy_encryption options");
2020 		return -EINVAL;
2021 	}
2022 	return err;
2023 }
2024 
2025 #define EXT4_SET_CTX(name)						\
2026 static inline void ctx_set_##name(struct ext4_fs_context *ctx,		\
2027 				  unsigned long flag)			\
2028 {									\
2029 	ctx->mask_s_##name |= flag;					\
2030 	ctx->vals_s_##name |= flag;					\
2031 }
2032 
2033 #define EXT4_CLEAR_CTX(name)						\
2034 static inline void ctx_clear_##name(struct ext4_fs_context *ctx,	\
2035 				    unsigned long flag)			\
2036 {									\
2037 	ctx->mask_s_##name |= flag;					\
2038 	ctx->vals_s_##name &= ~flag;					\
2039 }
2040 
2041 #define EXT4_TEST_CTX(name)						\
2042 static inline unsigned long						\
2043 ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag)	\
2044 {									\
2045 	return (ctx->vals_s_##name & flag);				\
2046 }
2047 
2048 EXT4_SET_CTX(flags); /* set only */
2049 EXT4_SET_CTX(mount_opt);
2050 EXT4_CLEAR_CTX(mount_opt);
2051 EXT4_TEST_CTX(mount_opt);
2052 EXT4_SET_CTX(mount_opt2);
2053 EXT4_CLEAR_CTX(mount_opt2);
2054 EXT4_TEST_CTX(mount_opt2);
2055 
ctx_set_mount_flag(struct ext4_fs_context * ctx,int bit)2056 static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit)
2057 {
2058 	set_bit(bit, &ctx->mask_s_mount_flags);
2059 	set_bit(bit, &ctx->vals_s_mount_flags);
2060 }
2061 
ext4_parse_param(struct fs_context * fc,struct fs_parameter * param)2062 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
2063 {
2064 	struct ext4_fs_context *ctx = fc->fs_private;
2065 	struct fs_parse_result result;
2066 	const struct mount_opts *m;
2067 	int is_remount;
2068 	kuid_t uid;
2069 	kgid_t gid;
2070 	int token;
2071 
2072 	token = fs_parse(fc, ext4_param_specs, param, &result);
2073 	if (token < 0)
2074 		return token;
2075 	is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2076 
2077 	for (m = ext4_mount_opts; m->token != Opt_err; m++)
2078 		if (token == m->token)
2079 			break;
2080 
2081 	ctx->opt_flags |= m->flags;
2082 
2083 	if (m->flags & MOPT_EXPLICIT) {
2084 		if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
2085 			ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
2086 		} else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
2087 			ctx_set_mount_opt2(ctx,
2088 				       EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
2089 		} else
2090 			return -EINVAL;
2091 	}
2092 
2093 	if (m->flags & MOPT_NOSUPPORT) {
2094 		ext4_msg(NULL, KERN_ERR, "%s option not supported",
2095 			 param->key);
2096 		return 0;
2097 	}
2098 
2099 	switch (token) {
2100 #ifdef CONFIG_QUOTA
2101 	case Opt_usrjquota:
2102 		if (!*param->string)
2103 			return unnote_qf_name(fc, USRQUOTA);
2104 		else
2105 			return note_qf_name(fc, USRQUOTA, param);
2106 	case Opt_grpjquota:
2107 		if (!*param->string)
2108 			return unnote_qf_name(fc, GRPQUOTA);
2109 		else
2110 			return note_qf_name(fc, GRPQUOTA, param);
2111 #endif
2112 	case Opt_sb:
2113 		if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2114 			ext4_msg(NULL, KERN_WARNING,
2115 				 "Ignoring %s option on remount", param->key);
2116 		} else {
2117 			ctx->s_sb_block = result.uint_32;
2118 			ctx->spec |= EXT4_SPEC_s_sb_block;
2119 		}
2120 		return 0;
2121 	case Opt_removed:
2122 		ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
2123 			 param->key);
2124 		return 0;
2125 	case Opt_abort:
2126 		ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
2127 		return 0;
2128 	case Opt_inlinecrypt:
2129 #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
2130 		ctx_set_flags(ctx, SB_INLINECRYPT);
2131 #else
2132 		ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
2133 #endif
2134 		return 0;
2135 	case Opt_errors:
2136 		ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
2137 		ctx_set_mount_opt(ctx, result.uint_32);
2138 		return 0;
2139 #ifdef CONFIG_QUOTA
2140 	case Opt_jqfmt:
2141 		ctx->s_jquota_fmt = result.uint_32;
2142 		ctx->spec |= EXT4_SPEC_JQFMT;
2143 		return 0;
2144 #endif
2145 	case Opt_data:
2146 		ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2147 		ctx_set_mount_opt(ctx, result.uint_32);
2148 		ctx->spec |= EXT4_SPEC_DATAJ;
2149 		return 0;
2150 	case Opt_commit:
2151 		if (result.uint_32 == 0)
2152 			result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
2153 		else if (result.uint_32 > INT_MAX / HZ) {
2154 			ext4_msg(NULL, KERN_ERR,
2155 				 "Invalid commit interval %d, "
2156 				 "must be smaller than %d",
2157 				 result.uint_32, INT_MAX / HZ);
2158 			return -EINVAL;
2159 		}
2160 		ctx->s_commit_interval = HZ * result.uint_32;
2161 		ctx->spec |= EXT4_SPEC_s_commit_interval;
2162 		return 0;
2163 	case Opt_debug_want_extra_isize:
2164 		if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
2165 			ext4_msg(NULL, KERN_ERR,
2166 				 "Invalid want_extra_isize %d", result.uint_32);
2167 			return -EINVAL;
2168 		}
2169 		ctx->s_want_extra_isize = result.uint_32;
2170 		ctx->spec |= EXT4_SPEC_s_want_extra_isize;
2171 		return 0;
2172 	case Opt_max_batch_time:
2173 		ctx->s_max_batch_time = result.uint_32;
2174 		ctx->spec |= EXT4_SPEC_s_max_batch_time;
2175 		return 0;
2176 	case Opt_min_batch_time:
2177 		ctx->s_min_batch_time = result.uint_32;
2178 		ctx->spec |= EXT4_SPEC_s_min_batch_time;
2179 		return 0;
2180 	case Opt_inode_readahead_blks:
2181 		if (result.uint_32 &&
2182 		    (result.uint_32 > (1 << 30) ||
2183 		     !is_power_of_2(result.uint_32))) {
2184 			ext4_msg(NULL, KERN_ERR,
2185 				 "EXT4-fs: inode_readahead_blks must be "
2186 				 "0 or a power of 2 smaller than 2^31");
2187 			return -EINVAL;
2188 		}
2189 		ctx->s_inode_readahead_blks = result.uint_32;
2190 		ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
2191 		return 0;
2192 	case Opt_init_itable:
2193 		ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
2194 		ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
2195 		if (param->type == fs_value_is_string)
2196 			ctx->s_li_wait_mult = result.uint_32;
2197 		ctx->spec |= EXT4_SPEC_s_li_wait_mult;
2198 		return 0;
2199 	case Opt_max_dir_size_kb:
2200 		ctx->s_max_dir_size_kb = result.uint_32;
2201 		ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
2202 		return 0;
2203 #ifdef CONFIG_EXT4_DEBUG
2204 	case Opt_fc_debug_max_replay:
2205 		ctx->s_fc_debug_max_replay = result.uint_32;
2206 		ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
2207 		return 0;
2208 #endif
2209 	case Opt_stripe:
2210 		ctx->s_stripe = result.uint_32;
2211 		ctx->spec |= EXT4_SPEC_s_stripe;
2212 		return 0;
2213 	case Opt_resuid:
2214 		uid = make_kuid(current_user_ns(), result.uint_32);
2215 		if (!uid_valid(uid)) {
2216 			ext4_msg(NULL, KERN_ERR, "Invalid uid value %d",
2217 				 result.uint_32);
2218 			return -EINVAL;
2219 		}
2220 		ctx->s_resuid = uid;
2221 		ctx->spec |= EXT4_SPEC_s_resuid;
2222 		return 0;
2223 	case Opt_resgid:
2224 		gid = make_kgid(current_user_ns(), result.uint_32);
2225 		if (!gid_valid(gid)) {
2226 			ext4_msg(NULL, KERN_ERR, "Invalid gid value %d",
2227 				 result.uint_32);
2228 			return -EINVAL;
2229 		}
2230 		ctx->s_resgid = gid;
2231 		ctx->spec |= EXT4_SPEC_s_resgid;
2232 		return 0;
2233 	case Opt_journal_dev:
2234 		if (is_remount) {
2235 			ext4_msg(NULL, KERN_ERR,
2236 				 "Cannot specify journal on remount");
2237 			return -EINVAL;
2238 		}
2239 		ctx->journal_devnum = result.uint_32;
2240 		ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2241 		return 0;
2242 	case Opt_journal_path:
2243 	{
2244 		struct inode *journal_inode;
2245 		struct path path;
2246 		int error;
2247 
2248 		if (is_remount) {
2249 			ext4_msg(NULL, KERN_ERR,
2250 				 "Cannot specify journal on remount");
2251 			return -EINVAL;
2252 		}
2253 
2254 		error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path);
2255 		if (error) {
2256 			ext4_msg(NULL, KERN_ERR, "error: could not find "
2257 				 "journal device path");
2258 			return -EINVAL;
2259 		}
2260 
2261 		journal_inode = d_inode(path.dentry);
2262 		ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
2263 		ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2264 		path_put(&path);
2265 		return 0;
2266 	}
2267 	case Opt_journal_ioprio:
2268 		if (result.uint_32 > 7) {
2269 			ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
2270 				 " (must be 0-7)");
2271 			return -EINVAL;
2272 		}
2273 		ctx->journal_ioprio =
2274 			IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
2275 		ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
2276 		return 0;
2277 	case Opt_test_dummy_encryption:
2278 		return ext4_parse_test_dummy_encryption(param, ctx);
2279 	case Opt_dax:
2280 	case Opt_dax_type:
2281 #ifdef CONFIG_FS_DAX
2282 	{
2283 		int type = (token == Opt_dax) ?
2284 			   Opt_dax : result.uint_32;
2285 
2286 		switch (type) {
2287 		case Opt_dax:
2288 		case Opt_dax_always:
2289 			ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2290 			ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2291 			break;
2292 		case Opt_dax_never:
2293 			ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2294 			ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2295 			break;
2296 		case Opt_dax_inode:
2297 			ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2298 			ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2299 			/* Strictly for printing options */
2300 			ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE);
2301 			break;
2302 		}
2303 		return 0;
2304 	}
2305 #else
2306 		ext4_msg(NULL, KERN_INFO, "dax option not supported");
2307 		return -EINVAL;
2308 #endif
2309 	case Opt_data_err:
2310 		if (result.uint_32 == Opt_data_err_abort)
2311 			ctx_set_mount_opt(ctx, m->mount_opt);
2312 		else if (result.uint_32 == Opt_data_err_ignore)
2313 			ctx_clear_mount_opt(ctx, m->mount_opt);
2314 		return 0;
2315 	case Opt_mb_optimize_scan:
2316 		if (result.int_32 == 1) {
2317 			ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2318 			ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2319 		} else if (result.int_32 == 0) {
2320 			ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2321 			ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2322 		} else {
2323 			ext4_msg(NULL, KERN_WARNING,
2324 				 "mb_optimize_scan should be set to 0 or 1.");
2325 			return -EINVAL;
2326 		}
2327 		return 0;
2328 	}
2329 
2330 	/*
2331 	 * At this point we should only be getting options requiring MOPT_SET,
2332 	 * or MOPT_CLEAR. Anything else is a bug
2333 	 */
2334 	if (m->token == Opt_err) {
2335 		ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
2336 			 param->key);
2337 		WARN_ON(1);
2338 		return -EINVAL;
2339 	}
2340 
2341 	else {
2342 		unsigned int set = 0;
2343 
2344 		if ((param->type == fs_value_is_flag) ||
2345 		    result.uint_32 > 0)
2346 			set = 1;
2347 
2348 		if (m->flags & MOPT_CLEAR)
2349 			set = !set;
2350 		else if (unlikely(!(m->flags & MOPT_SET))) {
2351 			ext4_msg(NULL, KERN_WARNING,
2352 				 "buggy handling of option %s",
2353 				 param->key);
2354 			WARN_ON(1);
2355 			return -EINVAL;
2356 		}
2357 		if (m->flags & MOPT_2) {
2358 			if (set != 0)
2359 				ctx_set_mount_opt2(ctx, m->mount_opt);
2360 			else
2361 				ctx_clear_mount_opt2(ctx, m->mount_opt);
2362 		} else {
2363 			if (set != 0)
2364 				ctx_set_mount_opt(ctx, m->mount_opt);
2365 			else
2366 				ctx_clear_mount_opt(ctx, m->mount_opt);
2367 		}
2368 	}
2369 
2370 	return 0;
2371 }
2372 
parse_options(struct fs_context * fc,char * options)2373 static int parse_options(struct fs_context *fc, char *options)
2374 {
2375 	struct fs_parameter param;
2376 	int ret;
2377 	char *key;
2378 
2379 	if (!options)
2380 		return 0;
2381 
2382 	while ((key = strsep(&options, ",")) != NULL) {
2383 		if (*key) {
2384 			size_t v_len = 0;
2385 			char *value = strchr(key, '=');
2386 
2387 			param.type = fs_value_is_flag;
2388 			param.string = NULL;
2389 
2390 			if (value) {
2391 				if (value == key)
2392 					continue;
2393 
2394 				*value++ = 0;
2395 				v_len = strlen(value);
2396 				param.string = kmemdup_nul(value, v_len,
2397 							   GFP_KERNEL);
2398 				if (!param.string)
2399 					return -ENOMEM;
2400 				param.type = fs_value_is_string;
2401 			}
2402 
2403 			param.key = key;
2404 			param.size = v_len;
2405 
2406 			ret = ext4_parse_param(fc, &param);
2407 			if (param.string)
2408 				kfree(param.string);
2409 			if (ret < 0)
2410 				return ret;
2411 		}
2412 	}
2413 
2414 	ret = ext4_validate_options(fc);
2415 	if (ret < 0)
2416 		return ret;
2417 
2418 	return 0;
2419 }
2420 
parse_apply_sb_mount_options(struct super_block * sb,struct ext4_fs_context * m_ctx)2421 static int parse_apply_sb_mount_options(struct super_block *sb,
2422 					struct ext4_fs_context *m_ctx)
2423 {
2424 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2425 	char *s_mount_opts = NULL;
2426 	struct ext4_fs_context *s_ctx = NULL;
2427 	struct fs_context *fc = NULL;
2428 	int ret = -ENOMEM;
2429 
2430 	if (!sbi->s_es->s_mount_opts[0])
2431 		return 0;
2432 
2433 	s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
2434 				sizeof(sbi->s_es->s_mount_opts),
2435 				GFP_KERNEL);
2436 	if (!s_mount_opts)
2437 		return ret;
2438 
2439 	fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
2440 	if (!fc)
2441 		goto out_free;
2442 
2443 	s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2444 	if (!s_ctx)
2445 		goto out_free;
2446 
2447 	fc->fs_private = s_ctx;
2448 	fc->s_fs_info = sbi;
2449 
2450 	ret = parse_options(fc, s_mount_opts);
2451 	if (ret < 0)
2452 		goto parse_failed;
2453 
2454 	ret = ext4_check_opt_consistency(fc, sb);
2455 	if (ret < 0) {
2456 parse_failed:
2457 		ext4_msg(sb, KERN_WARNING,
2458 			 "failed to parse options in superblock: %s",
2459 			 s_mount_opts);
2460 		ret = 0;
2461 		goto out_free;
2462 	}
2463 
2464 	if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV)
2465 		m_ctx->journal_devnum = s_ctx->journal_devnum;
2466 	if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
2467 		m_ctx->journal_ioprio = s_ctx->journal_ioprio;
2468 
2469 	ext4_apply_options(fc, sb);
2470 	ret = 0;
2471 
2472 out_free:
2473 	if (fc) {
2474 		ext4_fc_free(fc);
2475 		kfree(fc);
2476 	}
2477 	kfree(s_mount_opts);
2478 	return ret;
2479 }
2480 
ext4_apply_quota_options(struct fs_context * fc,struct super_block * sb)2481 static void ext4_apply_quota_options(struct fs_context *fc,
2482 				     struct super_block *sb)
2483 {
2484 #ifdef CONFIG_QUOTA
2485 	bool quota_feature = ext4_has_feature_quota(sb);
2486 	struct ext4_fs_context *ctx = fc->fs_private;
2487 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2488 	char *qname;
2489 	int i;
2490 
2491 	if (quota_feature)
2492 		return;
2493 
2494 	if (ctx->spec & EXT4_SPEC_JQUOTA) {
2495 		for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2496 			if (!(ctx->qname_spec & (1 << i)))
2497 				continue;
2498 
2499 			qname = ctx->s_qf_names[i]; /* May be NULL */
2500 			if (qname)
2501 				set_opt(sb, QUOTA);
2502 			ctx->s_qf_names[i] = NULL;
2503 			qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
2504 						lockdep_is_held(&sb->s_umount));
2505 			if (qname)
2506 				kfree_rcu(qname);
2507 		}
2508 	}
2509 
2510 	if (ctx->spec & EXT4_SPEC_JQFMT)
2511 		sbi->s_jquota_fmt = ctx->s_jquota_fmt;
2512 #endif
2513 }
2514 
2515 /*
2516  * Check quota settings consistency.
2517  */
ext4_check_quota_consistency(struct fs_context * fc,struct super_block * sb)2518 static int ext4_check_quota_consistency(struct fs_context *fc,
2519 					struct super_block *sb)
2520 {
2521 #ifdef CONFIG_QUOTA
2522 	struct ext4_fs_context *ctx = fc->fs_private;
2523 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2524 	bool quota_feature = ext4_has_feature_quota(sb);
2525 	bool quota_loaded = sb_any_quota_loaded(sb);
2526 	bool usr_qf_name, grp_qf_name, usrquota, grpquota;
2527 	int quota_flags, i;
2528 
2529 	/*
2530 	 * We do the test below only for project quotas. 'usrquota' and
2531 	 * 'grpquota' mount options are allowed even without quota feature
2532 	 * to support legacy quotas in quota files.
2533 	 */
2534 	if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
2535 	    !ext4_has_feature_project(sb)) {
2536 		ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
2537 			 "Cannot enable project quota enforcement.");
2538 		return -EINVAL;
2539 	}
2540 
2541 	quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
2542 		      EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
2543 	if (quota_loaded &&
2544 	    ctx->mask_s_mount_opt & quota_flags &&
2545 	    !ctx_test_mount_opt(ctx, quota_flags))
2546 		goto err_quota_change;
2547 
2548 	if (ctx->spec & EXT4_SPEC_JQUOTA) {
2549 
2550 		for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2551 			if (!(ctx->qname_spec & (1 << i)))
2552 				continue;
2553 
2554 			if (quota_loaded &&
2555 			    !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
2556 				goto err_jquota_change;
2557 
2558 			if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
2559 			    strcmp(get_qf_name(sb, sbi, i),
2560 				   ctx->s_qf_names[i]) != 0)
2561 				goto err_jquota_specified;
2562 		}
2563 
2564 		if (quota_feature) {
2565 			ext4_msg(NULL, KERN_INFO,
2566 				 "Journaled quota options ignored when "
2567 				 "QUOTA feature is enabled");
2568 			return 0;
2569 		}
2570 	}
2571 
2572 	if (ctx->spec & EXT4_SPEC_JQFMT) {
2573 		if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
2574 			goto err_jquota_change;
2575 		if (quota_feature) {
2576 			ext4_msg(NULL, KERN_INFO, "Quota format mount options "
2577 				 "ignored when QUOTA feature is enabled");
2578 			return 0;
2579 		}
2580 	}
2581 
2582 	/* Make sure we don't mix old and new quota format */
2583 	usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
2584 		       ctx->s_qf_names[USRQUOTA]);
2585 	grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
2586 		       ctx->s_qf_names[GRPQUOTA]);
2587 
2588 	usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2589 		    test_opt(sb, USRQUOTA));
2590 
2591 	grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
2592 		    test_opt(sb, GRPQUOTA));
2593 
2594 	if (usr_qf_name) {
2595 		ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2596 		usrquota = false;
2597 	}
2598 	if (grp_qf_name) {
2599 		ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2600 		grpquota = false;
2601 	}
2602 
2603 	if (usr_qf_name || grp_qf_name) {
2604 		if (usrquota || grpquota) {
2605 			ext4_msg(NULL, KERN_ERR, "old and new quota "
2606 				 "format mixing");
2607 			return -EINVAL;
2608 		}
2609 
2610 		if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
2611 			ext4_msg(NULL, KERN_ERR, "journaled quota format "
2612 				 "not specified");
2613 			return -EINVAL;
2614 		}
2615 	}
2616 
2617 	return 0;
2618 
2619 err_quota_change:
2620 	ext4_msg(NULL, KERN_ERR,
2621 		 "Cannot change quota options when quota turned on");
2622 	return -EINVAL;
2623 err_jquota_change:
2624 	ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
2625 		 "options when quota turned on");
2626 	return -EINVAL;
2627 err_jquota_specified:
2628 	ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
2629 		 QTYPE2NAME(i));
2630 	return -EINVAL;
2631 #else
2632 	return 0;
2633 #endif
2634 }
2635 
ext4_check_test_dummy_encryption(const struct fs_context * fc,struct super_block * sb)2636 static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
2637 					    struct super_block *sb)
2638 {
2639 	const struct ext4_fs_context *ctx = fc->fs_private;
2640 	const struct ext4_sb_info *sbi = EXT4_SB(sb);
2641 
2642 	if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
2643 		return 0;
2644 
2645 	if (!ext4_has_feature_encrypt(sb)) {
2646 		ext4_msg(NULL, KERN_WARNING,
2647 			 "test_dummy_encryption requires encrypt feature");
2648 		return -EINVAL;
2649 	}
2650 	/*
2651 	 * This mount option is just for testing, and it's not worthwhile to
2652 	 * implement the extra complexity (e.g. RCU protection) that would be
2653 	 * needed to allow it to be set or changed during remount.  We do allow
2654 	 * it to be specified during remount, but only if there is no change.
2655 	 */
2656 	if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2657 		if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2658 						 &ctx->dummy_enc_policy))
2659 			return 0;
2660 		ext4_msg(NULL, KERN_WARNING,
2661 			 "Can't set or change test_dummy_encryption on remount");
2662 		return -EINVAL;
2663 	}
2664 	/* Also make sure s_mount_opts didn't contain a conflicting value. */
2665 	if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
2666 		if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2667 						 &ctx->dummy_enc_policy))
2668 			return 0;
2669 		ext4_msg(NULL, KERN_WARNING,
2670 			 "Conflicting test_dummy_encryption options");
2671 		return -EINVAL;
2672 	}
2673 	return 0;
2674 }
2675 
ext4_apply_test_dummy_encryption(struct ext4_fs_context * ctx,struct super_block * sb)2676 static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
2677 					     struct super_block *sb)
2678 {
2679 	if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
2680 	    /* if already set, it was already verified to be the same */
2681 	    fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
2682 		return;
2683 	EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
2684 	memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
2685 	ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
2686 }
2687 
ext4_check_opt_consistency(struct fs_context * fc,struct super_block * sb)2688 static int ext4_check_opt_consistency(struct fs_context *fc,
2689 				      struct super_block *sb)
2690 {
2691 	struct ext4_fs_context *ctx = fc->fs_private;
2692 	struct ext4_sb_info *sbi = fc->s_fs_info;
2693 	int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2694 	int err;
2695 
2696 	if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
2697 		ext4_msg(NULL, KERN_ERR,
2698 			 "Mount option(s) incompatible with ext2");
2699 		return -EINVAL;
2700 	}
2701 	if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
2702 		ext4_msg(NULL, KERN_ERR,
2703 			 "Mount option(s) incompatible with ext3");
2704 		return -EINVAL;
2705 	}
2706 
2707 	if (ctx->s_want_extra_isize >
2708 	    (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
2709 		ext4_msg(NULL, KERN_ERR,
2710 			 "Invalid want_extra_isize %d",
2711 			 ctx->s_want_extra_isize);
2712 		return -EINVAL;
2713 	}
2714 
2715 	if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) {
2716 		int blocksize =
2717 			BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
2718 		if (blocksize < PAGE_SIZE)
2719 			ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an "
2720 				 "experimental mount option 'dioread_nolock' "
2721 				 "for blocksize < PAGE_SIZE");
2722 	}
2723 
2724 	err = ext4_check_test_dummy_encryption(fc, sb);
2725 	if (err)
2726 		return err;
2727 
2728 	if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
2729 		if (!sbi->s_journal) {
2730 			ext4_msg(NULL, KERN_WARNING,
2731 				 "Remounting file system with no journal "
2732 				 "so ignoring journalled data option");
2733 			ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2734 		} else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) !=
2735 			   test_opt(sb, DATA_FLAGS)) {
2736 			ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
2737 				 "on remount");
2738 			return -EINVAL;
2739 		}
2740 	}
2741 
2742 	if (is_remount) {
2743 		if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2744 		    (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
2745 			ext4_msg(NULL, KERN_ERR, "can't mount with "
2746 				 "both data=journal and dax");
2747 			return -EINVAL;
2748 		}
2749 
2750 		if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2751 		    (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2752 		     (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2753 fail_dax_change_remount:
2754 			ext4_msg(NULL, KERN_ERR, "can't change "
2755 				 "dax mount option while remounting");
2756 			return -EINVAL;
2757 		} else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
2758 			 (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2759 			  (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
2760 			goto fail_dax_change_remount;
2761 		} else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
2762 			   ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2763 			    (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2764 			    !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
2765 			goto fail_dax_change_remount;
2766 		}
2767 	}
2768 
2769 	return ext4_check_quota_consistency(fc, sb);
2770 }
2771 
ext4_apply_options(struct fs_context * fc,struct super_block * sb)2772 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
2773 {
2774 	struct ext4_fs_context *ctx = fc->fs_private;
2775 	struct ext4_sb_info *sbi = fc->s_fs_info;
2776 
2777 	sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
2778 	sbi->s_mount_opt |= ctx->vals_s_mount_opt;
2779 	sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
2780 	sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
2781 	sbi->s_mount_flags &= ~ctx->mask_s_mount_flags;
2782 	sbi->s_mount_flags |= ctx->vals_s_mount_flags;
2783 	sb->s_flags &= ~ctx->mask_s_flags;
2784 	sb->s_flags |= ctx->vals_s_flags;
2785 
2786 #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
2787 	APPLY(s_commit_interval);
2788 	APPLY(s_stripe);
2789 	APPLY(s_max_batch_time);
2790 	APPLY(s_min_batch_time);
2791 	APPLY(s_want_extra_isize);
2792 	APPLY(s_inode_readahead_blks);
2793 	APPLY(s_max_dir_size_kb);
2794 	APPLY(s_li_wait_mult);
2795 	APPLY(s_resgid);
2796 	APPLY(s_resuid);
2797 
2798 #ifdef CONFIG_EXT4_DEBUG
2799 	APPLY(s_fc_debug_max_replay);
2800 #endif
2801 
2802 	ext4_apply_quota_options(fc, sb);
2803 	ext4_apply_test_dummy_encryption(ctx, sb);
2804 }
2805 
2806 
ext4_validate_options(struct fs_context * fc)2807 static int ext4_validate_options(struct fs_context *fc)
2808 {
2809 #ifdef CONFIG_QUOTA
2810 	struct ext4_fs_context *ctx = fc->fs_private;
2811 	char *usr_qf_name, *grp_qf_name;
2812 
2813 	usr_qf_name = ctx->s_qf_names[USRQUOTA];
2814 	grp_qf_name = ctx->s_qf_names[GRPQUOTA];
2815 
2816 	if (usr_qf_name || grp_qf_name) {
2817 		if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
2818 			ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2819 
2820 		if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
2821 			ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2822 
2823 		if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2824 		    ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
2825 			ext4_msg(NULL, KERN_ERR, "old and new quota "
2826 				 "format mixing");
2827 			return -EINVAL;
2828 		}
2829 	}
2830 #endif
2831 	return 1;
2832 }
2833 
ext4_show_quota_options(struct seq_file * seq,struct super_block * sb)2834 static inline void ext4_show_quota_options(struct seq_file *seq,
2835 					   struct super_block *sb)
2836 {
2837 #if defined(CONFIG_QUOTA)
2838 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2839 	char *usr_qf_name, *grp_qf_name;
2840 
2841 	if (sbi->s_jquota_fmt) {
2842 		char *fmtname = "";
2843 
2844 		switch (sbi->s_jquota_fmt) {
2845 		case QFMT_VFS_OLD:
2846 			fmtname = "vfsold";
2847 			break;
2848 		case QFMT_VFS_V0:
2849 			fmtname = "vfsv0";
2850 			break;
2851 		case QFMT_VFS_V1:
2852 			fmtname = "vfsv1";
2853 			break;
2854 		}
2855 		seq_printf(seq, ",jqfmt=%s", fmtname);
2856 	}
2857 
2858 	rcu_read_lock();
2859 	usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2860 	grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2861 	if (usr_qf_name)
2862 		seq_show_option(seq, "usrjquota", usr_qf_name);
2863 	if (grp_qf_name)
2864 		seq_show_option(seq, "grpjquota", grp_qf_name);
2865 	rcu_read_unlock();
2866 #endif
2867 }
2868 
token2str(int token)2869 static const char *token2str(int token)
2870 {
2871 	const struct fs_parameter_spec *spec;
2872 
2873 	for (spec = ext4_param_specs; spec->name != NULL; spec++)
2874 		if (spec->opt == token && !spec->type)
2875 			break;
2876 	return spec->name;
2877 }
2878 
2879 /*
2880  * Show an option if
2881  *  - it's set to a non-default value OR
2882  *  - if the per-sb default is different from the global default
2883  */
_ext4_show_options(struct seq_file * seq,struct super_block * sb,int nodefs)2884 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2885 			      int nodefs)
2886 {
2887 	struct ext4_sb_info *sbi = EXT4_SB(sb);
2888 	struct ext4_super_block *es = sbi->s_es;
2889 	int def_errors;
2890 	const struct mount_opts *m;
2891 	char sep = nodefs ? '\n' : ',';
2892 
2893 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2894 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2895 
2896 	if (sbi->s_sb_block != 1)
2897 		SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2898 
2899 	for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2900 		int want_set = m->flags & MOPT_SET;
2901 		int opt_2 = m->flags & MOPT_2;
2902 		unsigned int mount_opt, def_mount_opt;
2903 
2904 		if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2905 		    m->flags & MOPT_SKIP)
2906 			continue;
2907 
2908 		if (opt_2) {
2909 			mount_opt = sbi->s_mount_opt2;
2910 			def_mount_opt = sbi->s_def_mount_opt2;
2911 		} else {
2912 			mount_opt = sbi->s_mount_opt;
2913 			def_mount_opt = sbi->s_def_mount_opt;
2914 		}
2915 		/* skip if same as the default */
2916 		if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
2917 			continue;
2918 		/* select Opt_noFoo vs Opt_Foo */
2919 		if ((want_set &&
2920 		     (mount_opt & m->mount_opt) != m->mount_opt) ||
2921 		    (!want_set && (mount_opt & m->mount_opt)))
2922 			continue;
2923 		SEQ_OPTS_PRINT("%s", token2str(m->token));
2924 	}
2925 
2926 	if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
2927 	    le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
2928 		SEQ_OPTS_PRINT("resuid=%u",
2929 				from_kuid_munged(&init_user_ns, sbi->s_resuid));
2930 	if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
2931 	    le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
2932 		SEQ_OPTS_PRINT("resgid=%u",
2933 				from_kgid_munged(&init_user_ns, sbi->s_resgid));
2934 	def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
2935 	if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
2936 		SEQ_OPTS_PUTS("errors=remount-ro");
2937 	if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
2938 		SEQ_OPTS_PUTS("errors=continue");
2939 	if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
2940 		SEQ_OPTS_PUTS("errors=panic");
2941 	if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
2942 		SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
2943 	if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
2944 		SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
2945 	if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
2946 		SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
2947 	if (nodefs || sbi->s_stripe)
2948 		SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
2949 	if (nodefs || EXT4_MOUNT_DATA_FLAGS &
2950 			(sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
2951 		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2952 			SEQ_OPTS_PUTS("data=journal");
2953 		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2954 			SEQ_OPTS_PUTS("data=ordered");
2955 		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
2956 			SEQ_OPTS_PUTS("data=writeback");
2957 	}
2958 	if (nodefs ||
2959 	    sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
2960 		SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2961 			       sbi->s_inode_readahead_blks);
2962 
2963 	if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
2964 		       (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
2965 		SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
2966 	if (nodefs || sbi->s_max_dir_size_kb)
2967 		SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
2968 	if (test_opt(sb, DATA_ERR_ABORT))
2969 		SEQ_OPTS_PUTS("data_err=abort");
2970 
2971 	fscrypt_show_test_dummy_encryption(seq, sep, sb);
2972 
2973 	if (sb->s_flags & SB_INLINECRYPT)
2974 		SEQ_OPTS_PUTS("inlinecrypt");
2975 
2976 	if (test_opt(sb, DAX_ALWAYS)) {
2977 		if (IS_EXT2_SB(sb))
2978 			SEQ_OPTS_PUTS("dax");
2979 		else
2980 			SEQ_OPTS_PUTS("dax=always");
2981 	} else if (test_opt2(sb, DAX_NEVER)) {
2982 		SEQ_OPTS_PUTS("dax=never");
2983 	} else if (test_opt2(sb, DAX_INODE)) {
2984 		SEQ_OPTS_PUTS("dax=inode");
2985 	}
2986 
2987 	if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
2988 			!test_opt2(sb, MB_OPTIMIZE_SCAN)) {
2989 		SEQ_OPTS_PUTS("mb_optimize_scan=0");
2990 	} else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
2991 			test_opt2(sb, MB_OPTIMIZE_SCAN)) {
2992 		SEQ_OPTS_PUTS("mb_optimize_scan=1");
2993 	}
2994 
2995 	ext4_show_quota_options(seq, sb);
2996 	return 0;
2997 }
2998 
ext4_show_options(struct seq_file * seq,struct dentry * root)2999 static int ext4_show_options(struct seq_file *seq, struct dentry *root)
3000 {
3001 	return _ext4_show_options(seq, root->d_sb, 0);
3002 }
3003 
ext4_seq_options_show(struct seq_file * seq,void * offset)3004 int ext4_seq_options_show(struct seq_file *seq, void *offset)
3005 {
3006 	struct super_block *sb = seq->private;
3007 	int rc;
3008 
3009 	seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
3010 	rc = _ext4_show_options(seq, sb, 1);
3011 	seq_puts(seq, "\n");
3012 	return rc;
3013 }
3014 
ext4_setup_super(struct super_block * sb,struct ext4_super_block * es,int read_only)3015 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
3016 			    int read_only)
3017 {
3018 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3019 	int err = 0;
3020 
3021 	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
3022 		ext4_msg(sb, KERN_ERR, "revision level too high, "
3023 			 "forcing read-only mode");
3024 		err = -EROFS;
3025 		goto done;
3026 	}
3027 	if (read_only)
3028 		goto done;
3029 	if (!(sbi->s_mount_state & EXT4_VALID_FS))
3030 		ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
3031 			 "running e2fsck is recommended");
3032 	else if (sbi->s_mount_state & EXT4_ERROR_FS)
3033 		ext4_msg(sb, KERN_WARNING,
3034 			 "warning: mounting fs with errors, "
3035 			 "running e2fsck is recommended");
3036 	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
3037 		 le16_to_cpu(es->s_mnt_count) >=
3038 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
3039 		ext4_msg(sb, KERN_WARNING,
3040 			 "warning: maximal mount count reached, "
3041 			 "running e2fsck is recommended");
3042 	else if (le32_to_cpu(es->s_checkinterval) &&
3043 		 (ext4_get_tstamp(es, s_lastcheck) +
3044 		  le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
3045 		ext4_msg(sb, KERN_WARNING,
3046 			 "warning: checktime reached, "
3047 			 "running e2fsck is recommended");
3048 	if (!sbi->s_journal)
3049 		es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
3050 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
3051 		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
3052 	le16_add_cpu(&es->s_mnt_count, 1);
3053 	ext4_update_tstamp(es, s_mtime);
3054 	if (sbi->s_journal) {
3055 		ext4_set_feature_journal_needs_recovery(sb);
3056 		if (ext4_has_feature_orphan_file(sb))
3057 			ext4_set_feature_orphan_present(sb);
3058 	}
3059 
3060 	err = ext4_commit_super(sb);
3061 done:
3062 	if (test_opt(sb, DEBUG))
3063 		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
3064 				"bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
3065 			sb->s_blocksize,
3066 			sbi->s_groups_count,
3067 			EXT4_BLOCKS_PER_GROUP(sb),
3068 			EXT4_INODES_PER_GROUP(sb),
3069 			sbi->s_mount_opt, sbi->s_mount_opt2);
3070 
3071 	cleancache_init_fs(sb);
3072 	return err;
3073 }
3074 
ext4_alloc_flex_bg_array(struct super_block * sb,ext4_group_t ngroup)3075 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
3076 {
3077 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3078 	struct flex_groups **old_groups, **new_groups;
3079 	int size, i, j;
3080 
3081 	if (!sbi->s_log_groups_per_flex)
3082 		return 0;
3083 
3084 	size = ext4_flex_group(sbi, ngroup - 1) + 1;
3085 	if (size <= sbi->s_flex_groups_allocated)
3086 		return 0;
3087 
3088 	new_groups = kvzalloc(roundup_pow_of_two(size *
3089 			      sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
3090 	if (!new_groups) {
3091 		ext4_msg(sb, KERN_ERR,
3092 			 "not enough memory for %d flex group pointers", size);
3093 		return -ENOMEM;
3094 	}
3095 	for (i = sbi->s_flex_groups_allocated; i < size; i++) {
3096 		new_groups[i] = kvzalloc(roundup_pow_of_two(
3097 					 sizeof(struct flex_groups)),
3098 					 GFP_KERNEL);
3099 		if (!new_groups[i]) {
3100 			for (j = sbi->s_flex_groups_allocated; j < i; j++)
3101 				kvfree(new_groups[j]);
3102 			kvfree(new_groups);
3103 			ext4_msg(sb, KERN_ERR,
3104 				 "not enough memory for %d flex groups", size);
3105 			return -ENOMEM;
3106 		}
3107 	}
3108 	rcu_read_lock();
3109 	old_groups = rcu_dereference(sbi->s_flex_groups);
3110 	if (old_groups)
3111 		memcpy(new_groups, old_groups,
3112 		       (sbi->s_flex_groups_allocated *
3113 			sizeof(struct flex_groups *)));
3114 	rcu_read_unlock();
3115 	rcu_assign_pointer(sbi->s_flex_groups, new_groups);
3116 	sbi->s_flex_groups_allocated = size;
3117 	if (old_groups)
3118 		ext4_kvfree_array_rcu(old_groups);
3119 	return 0;
3120 }
3121 
ext4_fill_flex_info(struct super_block * sb)3122 static int ext4_fill_flex_info(struct super_block *sb)
3123 {
3124 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3125 	struct ext4_group_desc *gdp = NULL;
3126 	struct flex_groups *fg;
3127 	ext4_group_t flex_group;
3128 	int i, err;
3129 
3130 	sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
3131 	if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
3132 		sbi->s_log_groups_per_flex = 0;
3133 		return 1;
3134 	}
3135 
3136 	err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
3137 	if (err)
3138 		goto failed;
3139 
3140 	for (i = 0; i < sbi->s_groups_count; i++) {
3141 		gdp = ext4_get_group_desc(sb, i, NULL);
3142 
3143 		flex_group = ext4_flex_group(sbi, i);
3144 		fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
3145 		atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
3146 		atomic64_add(ext4_free_group_clusters(sb, gdp),
3147 			     &fg->free_clusters);
3148 		atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
3149 	}
3150 
3151 	return 1;
3152 failed:
3153 	return 0;
3154 }
3155 
ext4_group_desc_csum(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3156 static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
3157 				   struct ext4_group_desc *gdp)
3158 {
3159 	int offset = offsetof(struct ext4_group_desc, bg_checksum);
3160 	__u16 crc = 0;
3161 	__le32 le_group = cpu_to_le32(block_group);
3162 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3163 
3164 	if (ext4_has_metadata_csum(sbi->s_sb)) {
3165 		/* Use new metadata_csum algorithm */
3166 		__u32 csum32;
3167 		__u16 dummy_csum = 0;
3168 
3169 		csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
3170 				     sizeof(le_group));
3171 		csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
3172 		csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
3173 				     sizeof(dummy_csum));
3174 		offset += sizeof(dummy_csum);
3175 		if (offset < sbi->s_desc_size)
3176 			csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
3177 					     sbi->s_desc_size - offset);
3178 
3179 		crc = csum32 & 0xFFFF;
3180 		goto out;
3181 	}
3182 
3183 	/* old crc16 code */
3184 	if (!ext4_has_feature_gdt_csum(sb))
3185 		return 0;
3186 
3187 	crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
3188 	crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
3189 	crc = crc16(crc, (__u8 *)gdp, offset);
3190 	offset += sizeof(gdp->bg_checksum); /* skip checksum */
3191 	/* for checksum of struct ext4_group_desc do the rest...*/
3192 	if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
3193 		crc = crc16(crc, (__u8 *)gdp + offset,
3194 			    sbi->s_desc_size - offset);
3195 
3196 out:
3197 	return cpu_to_le16(crc);
3198 }
3199 
ext4_group_desc_csum_verify(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3200 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
3201 				struct ext4_group_desc *gdp)
3202 {
3203 	if (ext4_has_group_desc_csum(sb) &&
3204 	    (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
3205 		return 0;
3206 
3207 	return 1;
3208 }
3209 
ext4_group_desc_csum_set(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3210 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
3211 			      struct ext4_group_desc *gdp)
3212 {
3213 	if (!ext4_has_group_desc_csum(sb))
3214 		return;
3215 	gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
3216 }
3217 
3218 /* Called at mount-time, super-block is locked */
ext4_check_descriptors(struct super_block * sb,ext4_fsblk_t sb_block,ext4_group_t * first_not_zeroed)3219 static int ext4_check_descriptors(struct super_block *sb,
3220 				  ext4_fsblk_t sb_block,
3221 				  ext4_group_t *first_not_zeroed)
3222 {
3223 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3224 	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
3225 	ext4_fsblk_t last_block;
3226 	ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
3227 	ext4_fsblk_t block_bitmap;
3228 	ext4_fsblk_t inode_bitmap;
3229 	ext4_fsblk_t inode_table;
3230 	int flexbg_flag = 0;
3231 	ext4_group_t i, grp = sbi->s_groups_count;
3232 
3233 	if (ext4_has_feature_flex_bg(sb))
3234 		flexbg_flag = 1;
3235 
3236 	ext4_debug("Checking group descriptors");
3237 
3238 	for (i = 0; i < sbi->s_groups_count; i++) {
3239 		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
3240 
3241 		if (i == sbi->s_groups_count - 1 || flexbg_flag)
3242 			last_block = ext4_blocks_count(sbi->s_es) - 1;
3243 		else
3244 			last_block = first_block +
3245 				(EXT4_BLOCKS_PER_GROUP(sb) - 1);
3246 
3247 		if ((grp == sbi->s_groups_count) &&
3248 		   !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3249 			grp = i;
3250 
3251 		block_bitmap = ext4_block_bitmap(sb, gdp);
3252 		if (block_bitmap == sb_block) {
3253 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3254 				 "Block bitmap for group %u overlaps "
3255 				 "superblock", i);
3256 			if (!sb_rdonly(sb))
3257 				return 0;
3258 		}
3259 		if (block_bitmap >= sb_block + 1 &&
3260 		    block_bitmap <= last_bg_block) {
3261 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3262 				 "Block bitmap for group %u overlaps "
3263 				 "block group descriptors", i);
3264 			if (!sb_rdonly(sb))
3265 				return 0;
3266 		}
3267 		if (block_bitmap < first_block || block_bitmap > last_block) {
3268 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3269 			       "Block bitmap for group %u not in group "
3270 			       "(block %llu)!", i, block_bitmap);
3271 			return 0;
3272 		}
3273 		inode_bitmap = ext4_inode_bitmap(sb, gdp);
3274 		if (inode_bitmap == sb_block) {
3275 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3276 				 "Inode bitmap for group %u overlaps "
3277 				 "superblock", i);
3278 			if (!sb_rdonly(sb))
3279 				return 0;
3280 		}
3281 		if (inode_bitmap >= sb_block + 1 &&
3282 		    inode_bitmap <= last_bg_block) {
3283 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3284 				 "Inode bitmap for group %u overlaps "
3285 				 "block group descriptors", i);
3286 			if (!sb_rdonly(sb))
3287 				return 0;
3288 		}
3289 		if (inode_bitmap < first_block || inode_bitmap > last_block) {
3290 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3291 			       "Inode bitmap for group %u not in group "
3292 			       "(block %llu)!", i, inode_bitmap);
3293 			return 0;
3294 		}
3295 		inode_table = ext4_inode_table(sb, gdp);
3296 		if (inode_table == sb_block) {
3297 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3298 				 "Inode table for group %u overlaps "
3299 				 "superblock", i);
3300 			if (!sb_rdonly(sb))
3301 				return 0;
3302 		}
3303 		if (inode_table >= sb_block + 1 &&
3304 		    inode_table <= last_bg_block) {
3305 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3306 				 "Inode table for group %u overlaps "
3307 				 "block group descriptors", i);
3308 			if (!sb_rdonly(sb))
3309 				return 0;
3310 		}
3311 		if (inode_table < first_block ||
3312 		    inode_table + sbi->s_itb_per_group - 1 > last_block) {
3313 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3314 			       "Inode table for group %u not in group "
3315 			       "(block %llu)!", i, inode_table);
3316 			return 0;
3317 		}
3318 		ext4_lock_group(sb, i);
3319 		if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
3320 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3321 				 "Checksum for group %u failed (%u!=%u)",
3322 				 i, le16_to_cpu(ext4_group_desc_csum(sb, i,
3323 				     gdp)), le16_to_cpu(gdp->bg_checksum));
3324 			if (!sb_rdonly(sb)) {
3325 				ext4_unlock_group(sb, i);
3326 				return 0;
3327 			}
3328 		}
3329 		ext4_unlock_group(sb, i);
3330 		if (!flexbg_flag)
3331 			first_block += EXT4_BLOCKS_PER_GROUP(sb);
3332 	}
3333 	if (NULL != first_not_zeroed)
3334 		*first_not_zeroed = grp;
3335 	return 1;
3336 }
3337 
3338 /*
3339  * Maximal extent format file size.
3340  * Resulting logical blkno at s_maxbytes must fit in our on-disk
3341  * extent format containers, within a sector_t, and within i_blocks
3342  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
3343  * so that won't be a limiting factor.
3344  *
3345  * However there is other limiting factor. We do store extents in the form
3346  * of starting block and length, hence the resulting length of the extent
3347  * covering maximum file size must fit into on-disk format containers as
3348  * well. Given that length is always by 1 unit bigger than max unit (because
3349  * we count 0 as well) we have to lower the s_maxbytes by one fs block.
3350  *
3351  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
3352  */
ext4_max_size(int blkbits,int has_huge_files)3353 static loff_t ext4_max_size(int blkbits, int has_huge_files)
3354 {
3355 	loff_t res;
3356 	loff_t upper_limit = MAX_LFS_FILESIZE;
3357 
3358 	BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
3359 
3360 	if (!has_huge_files) {
3361 		upper_limit = (1LL << 32) - 1;
3362 
3363 		/* total blocks in file system block size */
3364 		upper_limit >>= (blkbits - 9);
3365 		upper_limit <<= blkbits;
3366 	}
3367 
3368 	/*
3369 	 * 32-bit extent-start container, ee_block. We lower the maxbytes
3370 	 * by one fs block, so ee_len can cover the extent of maximum file
3371 	 * size
3372 	 */
3373 	res = (1LL << 32) - 1;
3374 	res <<= blkbits;
3375 
3376 	/* Sanity check against vm- & vfs- imposed limits */
3377 	if (res > upper_limit)
3378 		res = upper_limit;
3379 
3380 	return res;
3381 }
3382 
3383 /*
3384  * Maximal bitmap file size.  There is a direct, and {,double-,triple-}indirect
3385  * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
3386  * We need to be 1 filesystem block less than the 2^48 sector limit.
3387  */
ext4_max_bitmap_size(int bits,int has_huge_files)3388 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
3389 {
3390 	loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
3391 	int meta_blocks;
3392 	unsigned int ppb = 1 << (bits - 2);
3393 
3394 	/*
3395 	 * This is calculated to be the largest file size for a dense, block
3396 	 * mapped file such that the file's total number of 512-byte sectors,
3397 	 * including data and all indirect blocks, does not exceed (2^48 - 1).
3398 	 *
3399 	 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
3400 	 * number of 512-byte sectors of the file.
3401 	 */
3402 	if (!has_huge_files) {
3403 		/*
3404 		 * !has_huge_files or implies that the inode i_block field
3405 		 * represents total file blocks in 2^32 512-byte sectors ==
3406 		 * size of vfs inode i_blocks * 8
3407 		 */
3408 		upper_limit = (1LL << 32) - 1;
3409 
3410 		/* total blocks in file system block size */
3411 		upper_limit >>= (bits - 9);
3412 
3413 	} else {
3414 		/*
3415 		 * We use 48 bit ext4_inode i_blocks
3416 		 * With EXT4_HUGE_FILE_FL set the i_blocks
3417 		 * represent total number of blocks in
3418 		 * file system block size
3419 		 */
3420 		upper_limit = (1LL << 48) - 1;
3421 
3422 	}
3423 
3424 	/* Compute how many blocks we can address by block tree */
3425 	res += ppb;
3426 	res += ppb * ppb;
3427 	res += ((loff_t)ppb) * ppb * ppb;
3428 	/* Compute how many metadata blocks are needed */
3429 	meta_blocks = 1;
3430 	meta_blocks += 1 + ppb;
3431 	meta_blocks += 1 + ppb + ppb * ppb;
3432 	/* Does block tree limit file size? */
3433 	if (res + meta_blocks <= upper_limit)
3434 		goto check_lfs;
3435 
3436 	res = upper_limit;
3437 	/* How many metadata blocks are needed for addressing upper_limit? */
3438 	upper_limit -= EXT4_NDIR_BLOCKS;
3439 	/* indirect blocks */
3440 	meta_blocks = 1;
3441 	upper_limit -= ppb;
3442 	/* double indirect blocks */
3443 	if (upper_limit < ppb * ppb) {
3444 		meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
3445 		res -= meta_blocks;
3446 		goto check_lfs;
3447 	}
3448 	meta_blocks += 1 + ppb;
3449 	upper_limit -= ppb * ppb;
3450 	/* tripple indirect blocks for the rest */
3451 	meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
3452 		DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
3453 	res -= meta_blocks;
3454 check_lfs:
3455 	res <<= bits;
3456 	if (res > MAX_LFS_FILESIZE)
3457 		res = MAX_LFS_FILESIZE;
3458 
3459 	return res;
3460 }
3461 
descriptor_loc(struct super_block * sb,ext4_fsblk_t logical_sb_block,int nr)3462 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
3463 				   ext4_fsblk_t logical_sb_block, int nr)
3464 {
3465 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3466 	ext4_group_t bg, first_meta_bg;
3467 	int has_super = 0;
3468 
3469 	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3470 
3471 	if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
3472 		return logical_sb_block + nr + 1;
3473 	bg = sbi->s_desc_per_block * nr;
3474 	if (ext4_bg_has_super(sb, bg))
3475 		has_super = 1;
3476 
3477 	/*
3478 	 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3479 	 * block 2, not 1.  If s_first_data_block == 0 (bigalloc is enabled
3480 	 * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3481 	 * compensate.
3482 	 */
3483 	if (sb->s_blocksize == 1024 && nr == 0 &&
3484 	    le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
3485 		has_super++;
3486 
3487 	return (has_super + ext4_group_first_block_no(sb, bg));
3488 }
3489 
3490 /**
3491  * ext4_get_stripe_size: Get the stripe size.
3492  * @sbi: In memory super block info
3493  *
3494  * If we have specified it via mount option, then
3495  * use the mount option value. If the value specified at mount time is
3496  * greater than the blocks per group use the super block value.
3497  * If the super block value is greater than blocks per group return 0.
3498  * Allocator needs it be less than blocks per group.
3499  *
3500  */
ext4_get_stripe_size(struct ext4_sb_info * sbi)3501 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3502 {
3503 	unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3504 	unsigned long stripe_width =
3505 			le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3506 	int ret;
3507 
3508 	if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3509 		ret = sbi->s_stripe;
3510 	else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3511 		ret = stripe_width;
3512 	else if (stride && stride <= sbi->s_blocks_per_group)
3513 		ret = stride;
3514 	else
3515 		ret = 0;
3516 
3517 	/*
3518 	 * If the stripe width is 1, this makes no sense and
3519 	 * we set it to 0 to turn off stripe handling code.
3520 	 */
3521 	if (ret <= 1)
3522 		ret = 0;
3523 
3524 	return ret;
3525 }
3526 
3527 /*
3528  * Check whether this filesystem can be mounted based on
3529  * the features present and the RDONLY/RDWR mount requested.
3530  * Returns 1 if this filesystem can be mounted as requested,
3531  * 0 if it cannot be.
3532  */
ext4_feature_set_ok(struct super_block * sb,int readonly)3533 int ext4_feature_set_ok(struct super_block *sb, int readonly)
3534 {
3535 	if (ext4_has_unknown_ext4_incompat_features(sb)) {
3536 		ext4_msg(sb, KERN_ERR,
3537 			"Couldn't mount because of "
3538 			"unsupported optional features (%x)",
3539 			(le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3540 			~EXT4_FEATURE_INCOMPAT_SUPP));
3541 		return 0;
3542 	}
3543 
3544 #if !IS_ENABLED(CONFIG_UNICODE)
3545 	if (ext4_has_feature_casefold(sb)) {
3546 		ext4_msg(sb, KERN_ERR,
3547 			 "Filesystem with casefold feature cannot be "
3548 			 "mounted without CONFIG_UNICODE");
3549 		return 0;
3550 	}
3551 #endif
3552 
3553 	if (readonly)
3554 		return 1;
3555 
3556 	if (ext4_has_feature_readonly(sb)) {
3557 		ext4_msg(sb, KERN_INFO, "filesystem is read-only");
3558 		sb->s_flags |= SB_RDONLY;
3559 		return 1;
3560 	}
3561 
3562 	/* Check that feature set is OK for a read-write mount */
3563 	if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
3564 		ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3565 			 "unsupported optional features (%x)",
3566 			 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3567 				~EXT4_FEATURE_RO_COMPAT_SUPP));
3568 		return 0;
3569 	}
3570 	if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
3571 		ext4_msg(sb, KERN_ERR,
3572 			 "Can't support bigalloc feature without "
3573 			 "extents feature\n");
3574 		return 0;
3575 	}
3576 
3577 #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
3578 	if (!readonly && (ext4_has_feature_quota(sb) ||
3579 			  ext4_has_feature_project(sb))) {
3580 		ext4_msg(sb, KERN_ERR,
3581 			 "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
3582 		return 0;
3583 	}
3584 #endif  /* CONFIG_QUOTA */
3585 	return 1;
3586 }
3587 
3588 /*
3589  * This function is called once a day if we have errors logged
3590  * on the file system
3591  */
print_daily_error_info(struct timer_list * t)3592 static void print_daily_error_info(struct timer_list *t)
3593 {
3594 	struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3595 	struct super_block *sb = sbi->s_sb;
3596 	struct ext4_super_block *es = sbi->s_es;
3597 
3598 	if (es->s_error_count)
3599 		/* fsck newer than v1.41.13 is needed to clean this condition. */
3600 		ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
3601 			 le32_to_cpu(es->s_error_count));
3602 	if (es->s_first_error_time) {
3603 		printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3604 		       sb->s_id,
3605 		       ext4_get_tstamp(es, s_first_error_time),
3606 		       (int) sizeof(es->s_first_error_func),
3607 		       es->s_first_error_func,
3608 		       le32_to_cpu(es->s_first_error_line));
3609 		if (es->s_first_error_ino)
3610 			printk(KERN_CONT ": inode %u",
3611 			       le32_to_cpu(es->s_first_error_ino));
3612 		if (es->s_first_error_block)
3613 			printk(KERN_CONT ": block %llu", (unsigned long long)
3614 			       le64_to_cpu(es->s_first_error_block));
3615 		printk(KERN_CONT "\n");
3616 	}
3617 	if (es->s_last_error_time) {
3618 		printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3619 		       sb->s_id,
3620 		       ext4_get_tstamp(es, s_last_error_time),
3621 		       (int) sizeof(es->s_last_error_func),
3622 		       es->s_last_error_func,
3623 		       le32_to_cpu(es->s_last_error_line));
3624 		if (es->s_last_error_ino)
3625 			printk(KERN_CONT ": inode %u",
3626 			       le32_to_cpu(es->s_last_error_ino));
3627 		if (es->s_last_error_block)
3628 			printk(KERN_CONT ": block %llu", (unsigned long long)
3629 			       le64_to_cpu(es->s_last_error_block));
3630 		printk(KERN_CONT "\n");
3631 	}
3632 	mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
3633 }
3634 
3635 /* Find next suitable group and run ext4_init_inode_table */
ext4_run_li_request(struct ext4_li_request * elr)3636 static int ext4_run_li_request(struct ext4_li_request *elr)
3637 {
3638 	struct ext4_group_desc *gdp = NULL;
3639 	struct super_block *sb = elr->lr_super;
3640 	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3641 	ext4_group_t group = elr->lr_next_group;
3642 	unsigned int prefetch_ios = 0;
3643 	int ret = 0;
3644 	u64 start_time;
3645 
3646 	if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3647 		elr->lr_next_group = ext4_mb_prefetch(sb, group,
3648 				EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
3649 		if (prefetch_ios)
3650 			ext4_mb_prefetch_fini(sb, elr->lr_next_group,
3651 					      prefetch_ios);
3652 		trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
3653 					    prefetch_ios);
3654 		if (group >= elr->lr_next_group) {
3655 			ret = 1;
3656 			if (elr->lr_first_not_zeroed != ngroups &&
3657 			    !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
3658 				elr->lr_next_group = elr->lr_first_not_zeroed;
3659 				elr->lr_mode = EXT4_LI_MODE_ITABLE;
3660 				ret = 0;
3661 			}
3662 		}
3663 		return ret;
3664 	}
3665 
3666 	for (; group < ngroups; group++) {
3667 		gdp = ext4_get_group_desc(sb, group, NULL);
3668 		if (!gdp) {
3669 			ret = 1;
3670 			break;
3671 		}
3672 
3673 		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3674 			break;
3675 	}
3676 
3677 	if (group >= ngroups)
3678 		ret = 1;
3679 
3680 	if (!ret) {
3681 		start_time = ktime_get_real_ns();
3682 		ret = ext4_init_inode_table(sb, group,
3683 					    elr->lr_timeout ? 0 : 1);
3684 		trace_ext4_lazy_itable_init(sb, group);
3685 		if (elr->lr_timeout == 0) {
3686 			elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
3687 				EXT4_SB(elr->lr_super)->s_li_wait_mult);
3688 		}
3689 		elr->lr_next_sched = jiffies + elr->lr_timeout;
3690 		elr->lr_next_group = group + 1;
3691 	}
3692 	return ret;
3693 }
3694 
3695 /*
3696  * Remove lr_request from the list_request and free the
3697  * request structure. Should be called with li_list_mtx held
3698  */
ext4_remove_li_request(struct ext4_li_request * elr)3699 static void ext4_remove_li_request(struct ext4_li_request *elr)
3700 {
3701 	if (!elr)
3702 		return;
3703 
3704 	list_del(&elr->lr_request);
3705 	EXT4_SB(elr->lr_super)->s_li_request = NULL;
3706 	kfree(elr);
3707 }
3708 
ext4_unregister_li_request(struct super_block * sb)3709 static void ext4_unregister_li_request(struct super_block *sb)
3710 {
3711 	mutex_lock(&ext4_li_mtx);
3712 	if (!ext4_li_info) {
3713 		mutex_unlock(&ext4_li_mtx);
3714 		return;
3715 	}
3716 
3717 	mutex_lock(&ext4_li_info->li_list_mtx);
3718 	ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3719 	mutex_unlock(&ext4_li_info->li_list_mtx);
3720 	mutex_unlock(&ext4_li_mtx);
3721 }
3722 
3723 static struct task_struct *ext4_lazyinit_task;
3724 
3725 /*
3726  * This is the function where ext4lazyinit thread lives. It walks
3727  * through the request list searching for next scheduled filesystem.
3728  * When such a fs is found, run the lazy initialization request
3729  * (ext4_rn_li_request) and keep track of the time spend in this
3730  * function. Based on that time we compute next schedule time of
3731  * the request. When walking through the list is complete, compute
3732  * next waking time and put itself into sleep.
3733  */
ext4_lazyinit_thread(void * arg)3734 static int ext4_lazyinit_thread(void *arg)
3735 {
3736 	struct ext4_lazy_init *eli = arg;
3737 	struct list_head *pos, *n;
3738 	struct ext4_li_request *elr;
3739 	unsigned long next_wakeup, cur;
3740 
3741 	BUG_ON(NULL == eli);
3742 	set_freezable();
3743 
3744 cont_thread:
3745 	while (true) {
3746 		next_wakeup = MAX_JIFFY_OFFSET;
3747 
3748 		mutex_lock(&eli->li_list_mtx);
3749 		if (list_empty(&eli->li_request_list)) {
3750 			mutex_unlock(&eli->li_list_mtx);
3751 			goto exit_thread;
3752 		}
3753 		list_for_each_safe(pos, n, &eli->li_request_list) {
3754 			int err = 0;
3755 			int progress = 0;
3756 			elr = list_entry(pos, struct ext4_li_request,
3757 					 lr_request);
3758 
3759 			if (time_before(jiffies, elr->lr_next_sched)) {
3760 				if (time_before(elr->lr_next_sched, next_wakeup))
3761 					next_wakeup = elr->lr_next_sched;
3762 				continue;
3763 			}
3764 			if (down_read_trylock(&elr->lr_super->s_umount)) {
3765 				if (sb_start_write_trylock(elr->lr_super)) {
3766 					progress = 1;
3767 					/*
3768 					 * We hold sb->s_umount, sb can not
3769 					 * be removed from the list, it is
3770 					 * now safe to drop li_list_mtx
3771 					 */
3772 					mutex_unlock(&eli->li_list_mtx);
3773 					err = ext4_run_li_request(elr);
3774 					sb_end_write(elr->lr_super);
3775 					mutex_lock(&eli->li_list_mtx);
3776 					n = pos->next;
3777 				}
3778 				up_read((&elr->lr_super->s_umount));
3779 			}
3780 			/* error, remove the lazy_init job */
3781 			if (err) {
3782 				ext4_remove_li_request(elr);
3783 				continue;
3784 			}
3785 			if (!progress) {
3786 				elr->lr_next_sched = jiffies +
3787 					prandom_u32_max(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3788 			}
3789 			if (time_before(elr->lr_next_sched, next_wakeup))
3790 				next_wakeup = elr->lr_next_sched;
3791 		}
3792 		mutex_unlock(&eli->li_list_mtx);
3793 
3794 		try_to_freeze();
3795 
3796 		cur = jiffies;
3797 		if ((time_after_eq(cur, next_wakeup)) ||
3798 		    (MAX_JIFFY_OFFSET == next_wakeup)) {
3799 			cond_resched();
3800 			continue;
3801 		}
3802 
3803 		schedule_timeout_interruptible(next_wakeup - cur);
3804 
3805 		if (kthread_should_stop()) {
3806 			ext4_clear_request_list();
3807 			goto exit_thread;
3808 		}
3809 	}
3810 
3811 exit_thread:
3812 	/*
3813 	 * It looks like the request list is empty, but we need
3814 	 * to check it under the li_list_mtx lock, to prevent any
3815 	 * additions into it, and of course we should lock ext4_li_mtx
3816 	 * to atomically free the list and ext4_li_info, because at
3817 	 * this point another ext4 filesystem could be registering
3818 	 * new one.
3819 	 */
3820 	mutex_lock(&ext4_li_mtx);
3821 	mutex_lock(&eli->li_list_mtx);
3822 	if (!list_empty(&eli->li_request_list)) {
3823 		mutex_unlock(&eli->li_list_mtx);
3824 		mutex_unlock(&ext4_li_mtx);
3825 		goto cont_thread;
3826 	}
3827 	mutex_unlock(&eli->li_list_mtx);
3828 	kfree(ext4_li_info);
3829 	ext4_li_info = NULL;
3830 	mutex_unlock(&ext4_li_mtx);
3831 
3832 	return 0;
3833 }
3834 
ext4_clear_request_list(void)3835 static void ext4_clear_request_list(void)
3836 {
3837 	struct list_head *pos, *n;
3838 	struct ext4_li_request *elr;
3839 
3840 	mutex_lock(&ext4_li_info->li_list_mtx);
3841 	list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3842 		elr = list_entry(pos, struct ext4_li_request,
3843 				 lr_request);
3844 		ext4_remove_li_request(elr);
3845 	}
3846 	mutex_unlock(&ext4_li_info->li_list_mtx);
3847 }
3848 
ext4_run_lazyinit_thread(void)3849 static int ext4_run_lazyinit_thread(void)
3850 {
3851 	ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3852 					 ext4_li_info, "ext4lazyinit");
3853 	if (IS_ERR(ext4_lazyinit_task)) {
3854 		int err = PTR_ERR(ext4_lazyinit_task);
3855 		ext4_clear_request_list();
3856 		kfree(ext4_li_info);
3857 		ext4_li_info = NULL;
3858 		printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3859 				 "initialization thread\n",
3860 				 err);
3861 		return err;
3862 	}
3863 	ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3864 	return 0;
3865 }
3866 
3867 /*
3868  * Check whether it make sense to run itable init. thread or not.
3869  * If there is at least one uninitialized inode table, return
3870  * corresponding group number, else the loop goes through all
3871  * groups and return total number of groups.
3872  */
ext4_has_uninit_itable(struct super_block * sb)3873 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3874 {
3875 	ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3876 	struct ext4_group_desc *gdp = NULL;
3877 
3878 	if (!ext4_has_group_desc_csum(sb))
3879 		return ngroups;
3880 
3881 	for (group = 0; group < ngroups; group++) {
3882 		gdp = ext4_get_group_desc(sb, group, NULL);
3883 		if (!gdp)
3884 			continue;
3885 
3886 		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3887 			break;
3888 	}
3889 
3890 	return group;
3891 }
3892 
ext4_li_info_new(void)3893 static int ext4_li_info_new(void)
3894 {
3895 	struct ext4_lazy_init *eli = NULL;
3896 
3897 	eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3898 	if (!eli)
3899 		return -ENOMEM;
3900 
3901 	INIT_LIST_HEAD(&eli->li_request_list);
3902 	mutex_init(&eli->li_list_mtx);
3903 
3904 	eli->li_state |= EXT4_LAZYINIT_QUIT;
3905 
3906 	ext4_li_info = eli;
3907 
3908 	return 0;
3909 }
3910 
ext4_li_request_new(struct super_block * sb,ext4_group_t start)3911 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3912 					    ext4_group_t start)
3913 {
3914 	struct ext4_li_request *elr;
3915 
3916 	elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3917 	if (!elr)
3918 		return NULL;
3919 
3920 	elr->lr_super = sb;
3921 	elr->lr_first_not_zeroed = start;
3922 	if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
3923 		elr->lr_mode = EXT4_LI_MODE_ITABLE;
3924 		elr->lr_next_group = start;
3925 	} else {
3926 		elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3927 	}
3928 
3929 	/*
3930 	 * Randomize first schedule time of the request to
3931 	 * spread the inode table initialization requests
3932 	 * better.
3933 	 */
3934 	elr->lr_next_sched = jiffies + prandom_u32_max(
3935 				EXT4_DEF_LI_MAX_START_DELAY * HZ);
3936 	return elr;
3937 }
3938 
ext4_register_li_request(struct super_block * sb,ext4_group_t first_not_zeroed)3939 int ext4_register_li_request(struct super_block *sb,
3940 			     ext4_group_t first_not_zeroed)
3941 {
3942 	struct ext4_sb_info *sbi = EXT4_SB(sb);
3943 	struct ext4_li_request *elr = NULL;
3944 	ext4_group_t ngroups = sbi->s_groups_count;
3945 	int ret = 0;
3946 
3947 	mutex_lock(&ext4_li_mtx);
3948 	if (sbi->s_li_request != NULL) {
3949 		/*
3950 		 * Reset timeout so it can be computed again, because
3951 		 * s_li_wait_mult might have changed.
3952 		 */
3953 		sbi->s_li_request->lr_timeout = 0;
3954 		goto out;
3955 	}
3956 
3957 	if (sb_rdonly(sb) ||
3958 	    (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
3959 	     (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
3960 		goto out;
3961 
3962 	elr = ext4_li_request_new(sb, first_not_zeroed);
3963 	if (!elr) {
3964 		ret = -ENOMEM;
3965 		goto out;
3966 	}
3967 
3968 	if (NULL == ext4_li_info) {
3969 		ret = ext4_li_info_new();
3970 		if (ret)
3971 			goto out;
3972 	}
3973 
3974 	mutex_lock(&ext4_li_info->li_list_mtx);
3975 	list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3976 	mutex_unlock(&ext4_li_info->li_list_mtx);
3977 
3978 	sbi->s_li_request = elr;
3979 	/*
3980 	 * set elr to NULL here since it has been inserted to
3981 	 * the request_list and the removal and free of it is
3982 	 * handled by ext4_clear_request_list from now on.
3983 	 */
3984 	elr = NULL;
3985 
3986 	if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3987 		ret = ext4_run_lazyinit_thread();
3988 		if (ret)
3989 			goto out;
3990 	}
3991 out:
3992 	mutex_unlock(&ext4_li_mtx);
3993 	if (ret)
3994 		kfree(elr);
3995 	return ret;
3996 }
3997 
3998 /*
3999  * We do not need to lock anything since this is called on
4000  * module unload.
4001  */
ext4_destroy_lazyinit_thread(void)4002 static void ext4_destroy_lazyinit_thread(void)
4003 {
4004 	/*
4005 	 * If thread exited earlier
4006 	 * there's nothing to be done.
4007 	 */
4008 	if (!ext4_li_info || !ext4_lazyinit_task)
4009 		return;
4010 
4011 	kthread_stop(ext4_lazyinit_task);
4012 }
4013 
set_journal_csum_feature_set(struct super_block * sb)4014 static int set_journal_csum_feature_set(struct super_block *sb)
4015 {
4016 	int ret = 1;
4017 	int compat, incompat;
4018 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4019 
4020 	if (ext4_has_metadata_csum(sb)) {
4021 		/* journal checksum v3 */
4022 		compat = 0;
4023 		incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
4024 	} else {
4025 		/* journal checksum v1 */
4026 		compat = JBD2_FEATURE_COMPAT_CHECKSUM;
4027 		incompat = 0;
4028 	}
4029 
4030 	jbd2_journal_clear_features(sbi->s_journal,
4031 			JBD2_FEATURE_COMPAT_CHECKSUM, 0,
4032 			JBD2_FEATURE_INCOMPAT_CSUM_V3 |
4033 			JBD2_FEATURE_INCOMPAT_CSUM_V2);
4034 	if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4035 		ret = jbd2_journal_set_features(sbi->s_journal,
4036 				compat, 0,
4037 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
4038 				incompat);
4039 	} else if (test_opt(sb, JOURNAL_CHECKSUM)) {
4040 		ret = jbd2_journal_set_features(sbi->s_journal,
4041 				compat, 0,
4042 				incompat);
4043 		jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4044 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4045 	} else {
4046 		jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4047 				JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4048 	}
4049 
4050 	return ret;
4051 }
4052 
4053 /*
4054  * Note: calculating the overhead so we can be compatible with
4055  * historical BSD practice is quite difficult in the face of
4056  * clusters/bigalloc.  This is because multiple metadata blocks from
4057  * different block group can end up in the same allocation cluster.
4058  * Calculating the exact overhead in the face of clustered allocation
4059  * requires either O(all block bitmaps) in memory or O(number of block
4060  * groups**2) in time.  We will still calculate the superblock for
4061  * older file systems --- and if we come across with a bigalloc file
4062  * system with zero in s_overhead_clusters the estimate will be close to
4063  * correct especially for very large cluster sizes --- but for newer
4064  * file systems, it's better to calculate this figure once at mkfs
4065  * time, and store it in the superblock.  If the superblock value is
4066  * present (even for non-bigalloc file systems), we will use it.
4067  */
count_overhead(struct super_block * sb,ext4_group_t grp,char * buf)4068 static int count_overhead(struct super_block *sb, ext4_group_t grp,
4069 			  char *buf)
4070 {
4071 	struct ext4_sb_info	*sbi = EXT4_SB(sb);
4072 	struct ext4_group_desc	*gdp;
4073 	ext4_fsblk_t		first_block, last_block, b;
4074 	ext4_group_t		i, ngroups = ext4_get_groups_count(sb);
4075 	int			s, j, count = 0;
4076 	int			has_super = ext4_bg_has_super(sb, grp);
4077 
4078 	if (!ext4_has_feature_bigalloc(sb))
4079 		return (has_super + ext4_bg_num_gdb(sb, grp) +
4080 			(has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
4081 			sbi->s_itb_per_group + 2);
4082 
4083 	first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
4084 		(grp * EXT4_BLOCKS_PER_GROUP(sb));
4085 	last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
4086 	for (i = 0; i < ngroups; i++) {
4087 		gdp = ext4_get_group_desc(sb, i, NULL);
4088 		b = ext4_block_bitmap(sb, gdp);
4089 		if (b >= first_block && b <= last_block) {
4090 			ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4091 			count++;
4092 		}
4093 		b = ext4_inode_bitmap(sb, gdp);
4094 		if (b >= first_block && b <= last_block) {
4095 			ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4096 			count++;
4097 		}
4098 		b = ext4_inode_table(sb, gdp);
4099 		if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
4100 			for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
4101 				int c = EXT4_B2C(sbi, b - first_block);
4102 				ext4_set_bit(c, buf);
4103 				count++;
4104 			}
4105 		if (i != grp)
4106 			continue;
4107 		s = 0;
4108 		if (ext4_bg_has_super(sb, grp)) {
4109 			ext4_set_bit(s++, buf);
4110 			count++;
4111 		}
4112 		j = ext4_bg_num_gdb(sb, grp);
4113 		if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
4114 			ext4_error(sb, "Invalid number of block group "
4115 				   "descriptor blocks: %d", j);
4116 			j = EXT4_BLOCKS_PER_GROUP(sb) - s;
4117 		}
4118 		count += j;
4119 		for (; j > 0; j--)
4120 			ext4_set_bit(EXT4_B2C(sbi, s++), buf);
4121 	}
4122 	if (!count)
4123 		return 0;
4124 	return EXT4_CLUSTERS_PER_GROUP(sb) -
4125 		ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
4126 }
4127 
4128 /*
4129  * Compute the overhead and stash it in sbi->s_overhead
4130  */
ext4_calculate_overhead(struct super_block * sb)4131 int ext4_calculate_overhead(struct super_block *sb)
4132 {
4133 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4134 	struct ext4_super_block *es = sbi->s_es;
4135 	struct inode *j_inode;
4136 	unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
4137 	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4138 	ext4_fsblk_t overhead = 0;
4139 	char *buf = (char *) get_zeroed_page(GFP_NOFS);
4140 
4141 	if (!buf)
4142 		return -ENOMEM;
4143 
4144 	/*
4145 	 * Compute the overhead (FS structures).  This is constant
4146 	 * for a given filesystem unless the number of block groups
4147 	 * changes so we cache the previous value until it does.
4148 	 */
4149 
4150 	/*
4151 	 * All of the blocks before first_data_block are overhead
4152 	 */
4153 	overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4154 
4155 	/*
4156 	 * Add the overhead found in each block group
4157 	 */
4158 	for (i = 0; i < ngroups; i++) {
4159 		int blks;
4160 
4161 		blks = count_overhead(sb, i, buf);
4162 		overhead += blks;
4163 		if (blks)
4164 			memset(buf, 0, PAGE_SIZE);
4165 		cond_resched();
4166 	}
4167 
4168 	/*
4169 	 * Add the internal journal blocks whether the journal has been
4170 	 * loaded or not
4171 	 */
4172 	if (sbi->s_journal && !sbi->s_journal_bdev)
4173 		overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
4174 	else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
4175 		/* j_inum for internal journal is non-zero */
4176 		j_inode = ext4_get_journal_inode(sb, j_inum);
4177 		if (j_inode) {
4178 			j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
4179 			overhead += EXT4_NUM_B2C(sbi, j_blocks);
4180 			iput(j_inode);
4181 		} else {
4182 			ext4_msg(sb, KERN_ERR, "can't get journal size");
4183 		}
4184 	}
4185 	sbi->s_overhead = overhead;
4186 	smp_wmb();
4187 	free_page((unsigned long) buf);
4188 	return 0;
4189 }
4190 
ext4_set_resv_clusters(struct super_block * sb)4191 static void ext4_set_resv_clusters(struct super_block *sb)
4192 {
4193 	ext4_fsblk_t resv_clusters;
4194 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4195 
4196 	/*
4197 	 * There's no need to reserve anything when we aren't using extents.
4198 	 * The space estimates are exact, there are no unwritten extents,
4199 	 * hole punching doesn't need new metadata... This is needed especially
4200 	 * to keep ext2/3 backward compatibility.
4201 	 */
4202 	if (!ext4_has_feature_extents(sb))
4203 		return;
4204 	/*
4205 	 * By default we reserve 2% or 4096 clusters, whichever is smaller.
4206 	 * This should cover the situations where we can not afford to run
4207 	 * out of space like for example punch hole, or converting
4208 	 * unwritten extents in delalloc path. In most cases such
4209 	 * allocation would require 1, or 2 blocks, higher numbers are
4210 	 * very rare.
4211 	 */
4212 	resv_clusters = (ext4_blocks_count(sbi->s_es) >>
4213 			 sbi->s_cluster_bits);
4214 
4215 	do_div(resv_clusters, 50);
4216 	resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
4217 
4218 	atomic64_set(&sbi->s_resv_clusters, resv_clusters);
4219 }
4220 
ext4_quota_mode(struct super_block * sb)4221 static const char *ext4_quota_mode(struct super_block *sb)
4222 {
4223 #ifdef CONFIG_QUOTA
4224 	if (!ext4_quota_capable(sb))
4225 		return "none";
4226 
4227 	if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
4228 		return "journalled";
4229 	else
4230 		return "writeback";
4231 #else
4232 	return "disabled";
4233 #endif
4234 }
4235 
ext4_setup_csum_trigger(struct super_block * sb,enum ext4_journal_trigger_type type,void (* trigger)(struct jbd2_buffer_trigger_type * type,struct buffer_head * bh,void * mapped_data,size_t size))4236 static void ext4_setup_csum_trigger(struct super_block *sb,
4237 				    enum ext4_journal_trigger_type type,
4238 				    void (*trigger)(
4239 					struct jbd2_buffer_trigger_type *type,
4240 					struct buffer_head *bh,
4241 					void *mapped_data,
4242 					size_t size))
4243 {
4244 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4245 
4246 	sbi->s_journal_triggers[type].sb = sb;
4247 	sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
4248 }
4249 
ext4_free_sbi(struct ext4_sb_info * sbi)4250 static void ext4_free_sbi(struct ext4_sb_info *sbi)
4251 {
4252 	if (!sbi)
4253 		return;
4254 
4255 	kfree(sbi->s_blockgroup_lock);
4256 	fs_put_dax(sbi->s_daxdev, NULL);
4257 	kfree(sbi);
4258 }
4259 
ext4_alloc_sbi(struct super_block * sb)4260 static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
4261 {
4262 	struct ext4_sb_info *sbi;
4263 
4264 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
4265 	if (!sbi)
4266 		return NULL;
4267 
4268 	sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
4269 					   NULL, NULL);
4270 
4271 	sbi->s_blockgroup_lock =
4272 		kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
4273 
4274 	if (!sbi->s_blockgroup_lock)
4275 		goto err_out;
4276 
4277 	sb->s_fs_info = sbi;
4278 	sbi->s_sb = sb;
4279 	return sbi;
4280 err_out:
4281 	fs_put_dax(sbi->s_daxdev, NULL);
4282 	kfree(sbi);
4283 	return NULL;
4284 }
4285 
ext4_set_def_opts(struct super_block * sb,struct ext4_super_block * es)4286 static void ext4_set_def_opts(struct super_block *sb,
4287 			      struct ext4_super_block *es)
4288 {
4289 	unsigned long def_mount_opts;
4290 
4291 	/* Set defaults before we parse the mount options */
4292 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
4293 	set_opt(sb, INIT_INODE_TABLE);
4294 	if (def_mount_opts & EXT4_DEFM_DEBUG)
4295 		set_opt(sb, DEBUG);
4296 	if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
4297 		set_opt(sb, GRPID);
4298 	if (def_mount_opts & EXT4_DEFM_UID16)
4299 		set_opt(sb, NO_UID32);
4300 	/* xattr user namespace & acls are now defaulted on */
4301 	set_opt(sb, XATTR_USER);
4302 #ifdef CONFIG_EXT4_FS_POSIX_ACL
4303 	set_opt(sb, POSIX_ACL);
4304 #endif
4305 	if (ext4_has_feature_fast_commit(sb))
4306 		set_opt2(sb, JOURNAL_FAST_COMMIT);
4307 	/* don't forget to enable journal_csum when metadata_csum is enabled. */
4308 	if (ext4_has_metadata_csum(sb))
4309 		set_opt(sb, JOURNAL_CHECKSUM);
4310 
4311 	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
4312 		set_opt(sb, JOURNAL_DATA);
4313 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
4314 		set_opt(sb, ORDERED_DATA);
4315 	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
4316 		set_opt(sb, WRITEBACK_DATA);
4317 
4318 	if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
4319 		set_opt(sb, ERRORS_PANIC);
4320 	else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
4321 		set_opt(sb, ERRORS_CONT);
4322 	else
4323 		set_opt(sb, ERRORS_RO);
4324 	/* block_validity enabled by default; disable with noblock_validity */
4325 	set_opt(sb, BLOCK_VALIDITY);
4326 	if (def_mount_opts & EXT4_DEFM_DISCARD)
4327 		set_opt(sb, DISCARD);
4328 
4329 	if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
4330 		set_opt(sb, BARRIER);
4331 
4332 	/*
4333 	 * enable delayed allocation by default
4334 	 * Use -o nodelalloc to turn it off
4335 	 */
4336 	if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
4337 	    ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
4338 		set_opt(sb, DELALLOC);
4339 
4340 	if (sb->s_blocksize == PAGE_SIZE)
4341 		set_opt(sb, DIOREAD_NOLOCK);
4342 }
4343 
ext4_handle_clustersize(struct super_block * sb)4344 static int ext4_handle_clustersize(struct super_block *sb)
4345 {
4346 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4347 	struct ext4_super_block *es = sbi->s_es;
4348 	int clustersize;
4349 
4350 	/* Handle clustersize */
4351 	clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4352 	if (ext4_has_feature_bigalloc(sb)) {
4353 		if (clustersize < sb->s_blocksize) {
4354 			ext4_msg(sb, KERN_ERR,
4355 				 "cluster size (%d) smaller than "
4356 				 "block size (%lu)", clustersize, sb->s_blocksize);
4357 			return -EINVAL;
4358 		}
4359 		sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4360 			le32_to_cpu(es->s_log_block_size);
4361 		sbi->s_clusters_per_group =
4362 			le32_to_cpu(es->s_clusters_per_group);
4363 		if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
4364 			ext4_msg(sb, KERN_ERR,
4365 				 "#clusters per group too big: %lu",
4366 				 sbi->s_clusters_per_group);
4367 			return -EINVAL;
4368 		}
4369 		if (sbi->s_blocks_per_group !=
4370 		    (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
4371 			ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4372 				 "clusters per group (%lu) inconsistent",
4373 				 sbi->s_blocks_per_group,
4374 				 sbi->s_clusters_per_group);
4375 			return -EINVAL;
4376 		}
4377 	} else {
4378 		if (clustersize != sb->s_blocksize) {
4379 			ext4_msg(sb, KERN_ERR,
4380 				 "fragment/cluster size (%d) != "
4381 				 "block size (%lu)", clustersize, sb->s_blocksize);
4382 			return -EINVAL;
4383 		}
4384 		if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
4385 			ext4_msg(sb, KERN_ERR,
4386 				 "#blocks per group too big: %lu",
4387 				 sbi->s_blocks_per_group);
4388 			return -EINVAL;
4389 		}
4390 		sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4391 		sbi->s_cluster_bits = 0;
4392 	}
4393 	sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
4394 
4395 	/* Do we have standard group size of clustersize * 8 blocks ? */
4396 	if (sbi->s_blocks_per_group == clustersize << 3)
4397 		set_opt2(sb, STD_GROUP_SIZE);
4398 
4399 	return 0;
4400 }
4401 
ext4_fast_commit_init(struct super_block * sb)4402 static void ext4_fast_commit_init(struct super_block *sb)
4403 {
4404 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4405 
4406 	/* Initialize fast commit stuff */
4407 	atomic_set(&sbi->s_fc_subtid, 0);
4408 	INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
4409 	INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
4410 	INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
4411 	INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
4412 	sbi->s_fc_bytes = 0;
4413 	ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
4414 	sbi->s_fc_ineligible_tid = 0;
4415 	spin_lock_init(&sbi->s_fc_lock);
4416 	memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
4417 	sbi->s_fc_replay_state.fc_regions = NULL;
4418 	sbi->s_fc_replay_state.fc_regions_size = 0;
4419 	sbi->s_fc_replay_state.fc_regions_used = 0;
4420 	sbi->s_fc_replay_state.fc_regions_valid = 0;
4421 	sbi->s_fc_replay_state.fc_modified_inodes = NULL;
4422 	sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
4423 	sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
4424 }
4425 
ext4_inode_info_init(struct super_block * sb,struct ext4_super_block * es)4426 static int ext4_inode_info_init(struct super_block *sb,
4427 				struct ext4_super_block *es)
4428 {
4429 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4430 
4431 	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
4432 		sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
4433 		sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
4434 	} else {
4435 		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
4436 		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
4437 		if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
4438 			ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
4439 				 sbi->s_first_ino);
4440 			return -EINVAL;
4441 		}
4442 		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
4443 		    (!is_power_of_2(sbi->s_inode_size)) ||
4444 		    (sbi->s_inode_size > sb->s_blocksize)) {
4445 			ext4_msg(sb, KERN_ERR,
4446 			       "unsupported inode size: %d",
4447 			       sbi->s_inode_size);
4448 			ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize);
4449 			return -EINVAL;
4450 		}
4451 		/*
4452 		 * i_atime_extra is the last extra field available for
4453 		 * [acm]times in struct ext4_inode. Checking for that
4454 		 * field should suffice to ensure we have extra space
4455 		 * for all three.
4456 		 */
4457 		if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
4458 			sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
4459 			sb->s_time_gran = 1;
4460 			sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
4461 		} else {
4462 			sb->s_time_gran = NSEC_PER_SEC;
4463 			sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
4464 		}
4465 		sb->s_time_min = EXT4_TIMESTAMP_MIN;
4466 	}
4467 
4468 	if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4469 		sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4470 			EXT4_GOOD_OLD_INODE_SIZE;
4471 		if (ext4_has_feature_extra_isize(sb)) {
4472 			unsigned v, max = (sbi->s_inode_size -
4473 					   EXT4_GOOD_OLD_INODE_SIZE);
4474 
4475 			v = le16_to_cpu(es->s_want_extra_isize);
4476 			if (v > max) {
4477 				ext4_msg(sb, KERN_ERR,
4478 					 "bad s_want_extra_isize: %d", v);
4479 				return -EINVAL;
4480 			}
4481 			if (sbi->s_want_extra_isize < v)
4482 				sbi->s_want_extra_isize = v;
4483 
4484 			v = le16_to_cpu(es->s_min_extra_isize);
4485 			if (v > max) {
4486 				ext4_msg(sb, KERN_ERR,
4487 					 "bad s_min_extra_isize: %d", v);
4488 				return -EINVAL;
4489 			}
4490 			if (sbi->s_want_extra_isize < v)
4491 				sbi->s_want_extra_isize = v;
4492 		}
4493 	}
4494 
4495 	return 0;
4496 }
4497 
4498 #if IS_ENABLED(CONFIG_UNICODE)
ext4_encoding_init(struct super_block * sb,struct ext4_super_block * es)4499 static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4500 {
4501 	const struct ext4_sb_encodings *encoding_info;
4502 	struct unicode_map *encoding;
4503 	__u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
4504 
4505 	if (!ext4_has_feature_casefold(sb) || sb->s_encoding)
4506 		return 0;
4507 
4508 	encoding_info = ext4_sb_read_encoding(es);
4509 	if (!encoding_info) {
4510 		ext4_msg(sb, KERN_ERR,
4511 			"Encoding requested by superblock is unknown");
4512 		return -EINVAL;
4513 	}
4514 
4515 	encoding = utf8_load(encoding_info->version);
4516 	if (IS_ERR(encoding)) {
4517 		ext4_msg(sb, KERN_ERR,
4518 			"can't mount with superblock charset: %s-%u.%u.%u "
4519 			"not supported by the kernel. flags: 0x%x.",
4520 			encoding_info->name,
4521 			unicode_major(encoding_info->version),
4522 			unicode_minor(encoding_info->version),
4523 			unicode_rev(encoding_info->version),
4524 			encoding_flags);
4525 		return -EINVAL;
4526 	}
4527 	ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4528 		"%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
4529 		unicode_major(encoding_info->version),
4530 		unicode_minor(encoding_info->version),
4531 		unicode_rev(encoding_info->version),
4532 		encoding_flags);
4533 
4534 	sb->s_encoding = encoding;
4535 	sb->s_encoding_flags = encoding_flags;
4536 
4537 	return 0;
4538 }
4539 #else
ext4_encoding_init(struct super_block * sb,struct ext4_super_block * es)4540 static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4541 {
4542 	return 0;
4543 }
4544 #endif
4545 
ext4_init_metadata_csum(struct super_block * sb,struct ext4_super_block * es)4546 static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es)
4547 {
4548 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4549 
4550 	/* Warn if metadata_csum and gdt_csum are both set. */
4551 	if (ext4_has_feature_metadata_csum(sb) &&
4552 	    ext4_has_feature_gdt_csum(sb))
4553 		ext4_warning(sb, "metadata_csum and uninit_bg are "
4554 			     "redundant flags; please run fsck.");
4555 
4556 	/* Check for a known checksum algorithm */
4557 	if (!ext4_verify_csum_type(sb, es)) {
4558 		ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4559 			 "unknown checksum algorithm.");
4560 		return -EINVAL;
4561 	}
4562 	ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
4563 				ext4_orphan_file_block_trigger);
4564 
4565 	/* Load the checksum driver */
4566 	sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
4567 	if (IS_ERR(sbi->s_chksum_driver)) {
4568 		int ret = PTR_ERR(sbi->s_chksum_driver);
4569 		ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
4570 		sbi->s_chksum_driver = NULL;
4571 		return ret;
4572 	}
4573 
4574 	/* Check superblock checksum */
4575 	if (!ext4_superblock_csum_verify(sb, es)) {
4576 		ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4577 			 "invalid superblock checksum.  Run e2fsck?");
4578 		return -EFSBADCRC;
4579 	}
4580 
4581 	/* Precompute checksum seed for all metadata */
4582 	if (ext4_has_feature_csum_seed(sb))
4583 		sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
4584 	else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
4585 		sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
4586 					       sizeof(es->s_uuid));
4587 	return 0;
4588 }
4589 
ext4_check_feature_compatibility(struct super_block * sb,struct ext4_super_block * es,int silent)4590 static int ext4_check_feature_compatibility(struct super_block *sb,
4591 					    struct ext4_super_block *es,
4592 					    int silent)
4593 {
4594 	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
4595 	    (ext4_has_compat_features(sb) ||
4596 	     ext4_has_ro_compat_features(sb) ||
4597 	     ext4_has_incompat_features(sb)))
4598 		ext4_msg(sb, KERN_WARNING,
4599 		       "feature flags set on rev 0 fs, "
4600 		       "running e2fsck is recommended");
4601 
4602 	if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4603 		set_opt2(sb, HURD_COMPAT);
4604 		if (ext4_has_feature_64bit(sb)) {
4605 			ext4_msg(sb, KERN_ERR,
4606 				 "The Hurd can't support 64-bit file systems");
4607 			return -EINVAL;
4608 		}
4609 
4610 		/*
4611 		 * ea_inode feature uses l_i_version field which is not
4612 		 * available in HURD_COMPAT mode.
4613 		 */
4614 		if (ext4_has_feature_ea_inode(sb)) {
4615 			ext4_msg(sb, KERN_ERR,
4616 				 "ea_inode feature is not supported for Hurd");
4617 			return -EINVAL;
4618 		}
4619 	}
4620 
4621 	if (IS_EXT2_SB(sb)) {
4622 		if (ext2_feature_set_ok(sb))
4623 			ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4624 				 "using the ext4 subsystem");
4625 		else {
4626 			/*
4627 			 * If we're probing be silent, if this looks like
4628 			 * it's actually an ext[34] filesystem.
4629 			 */
4630 			if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4631 				return -EINVAL;
4632 			ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4633 				 "to feature incompatibilities");
4634 			return -EINVAL;
4635 		}
4636 	}
4637 
4638 	if (IS_EXT3_SB(sb)) {
4639 		if (ext3_feature_set_ok(sb))
4640 			ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4641 				 "using the ext4 subsystem");
4642 		else {
4643 			/*
4644 			 * If we're probing be silent, if this looks like
4645 			 * it's actually an ext4 filesystem.
4646 			 */
4647 			if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4648 				return -EINVAL;
4649 			ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4650 				 "to feature incompatibilities");
4651 			return -EINVAL;
4652 		}
4653 	}
4654 
4655 	/*
4656 	 * Check feature flags regardless of the revision level, since we
4657 	 * previously didn't change the revision level when setting the flags,
4658 	 * so there is a chance incompat flags are set on a rev 0 filesystem.
4659 	 */
4660 	if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
4661 		return -EINVAL;
4662 
4663 	return 0;
4664 }
4665 
ext4_geometry_check(struct super_block * sb,struct ext4_super_block * es)4666 static int ext4_geometry_check(struct super_block *sb,
4667 			       struct ext4_super_block *es)
4668 {
4669 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4670 	__u64 blocks_count;
4671 
4672 	/* check blocks count against device size */
4673 	blocks_count = sb_bdev_nr_blocks(sb);
4674 	if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4675 		ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4676 		       "exceeds size of device (%llu blocks)",
4677 		       ext4_blocks_count(es), blocks_count);
4678 		return -EINVAL;
4679 	}
4680 
4681 	/*
4682 	 * It makes no sense for the first data block to be beyond the end
4683 	 * of the filesystem.
4684 	 */
4685 	if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4686 		ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4687 			 "block %u is beyond end of filesystem (%llu)",
4688 			 le32_to_cpu(es->s_first_data_block),
4689 			 ext4_blocks_count(es));
4690 		return -EINVAL;
4691 	}
4692 	if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4693 	    (sbi->s_cluster_ratio == 1)) {
4694 		ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4695 			 "block is 0 with a 1k block and cluster size");
4696 		return -EINVAL;
4697 	}
4698 
4699 	blocks_count = (ext4_blocks_count(es) -
4700 			le32_to_cpu(es->s_first_data_block) +
4701 			EXT4_BLOCKS_PER_GROUP(sb) - 1);
4702 	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4703 	if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4704 		ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4705 		       "(block count %llu, first data block %u, "
4706 		       "blocks per group %lu)", blocks_count,
4707 		       ext4_blocks_count(es),
4708 		       le32_to_cpu(es->s_first_data_block),
4709 		       EXT4_BLOCKS_PER_GROUP(sb));
4710 		return -EINVAL;
4711 	}
4712 	sbi->s_groups_count = blocks_count;
4713 	sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4714 			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4715 	if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4716 	    le32_to_cpu(es->s_inodes_count)) {
4717 		ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4718 			 le32_to_cpu(es->s_inodes_count),
4719 			 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4720 		return -EINVAL;
4721 	}
4722 
4723 	return 0;
4724 }
4725 
ext4_group_desc_free(struct ext4_sb_info * sbi)4726 static void ext4_group_desc_free(struct ext4_sb_info *sbi)
4727 {
4728 	struct buffer_head **group_desc;
4729 	int i;
4730 
4731 	rcu_read_lock();
4732 	group_desc = rcu_dereference(sbi->s_group_desc);
4733 	for (i = 0; i < sbi->s_gdb_count; i++)
4734 		brelse(group_desc[i]);
4735 	kvfree(group_desc);
4736 	rcu_read_unlock();
4737 }
4738 
ext4_group_desc_init(struct super_block * sb,struct ext4_super_block * es,ext4_fsblk_t logical_sb_block,ext4_group_t * first_not_zeroed)4739 static int ext4_group_desc_init(struct super_block *sb,
4740 				struct ext4_super_block *es,
4741 				ext4_fsblk_t logical_sb_block,
4742 				ext4_group_t *first_not_zeroed)
4743 {
4744 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4745 	unsigned int db_count;
4746 	ext4_fsblk_t block;
4747 	int ret;
4748 	int i;
4749 
4750 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4751 		   EXT4_DESC_PER_BLOCK(sb);
4752 	if (ext4_has_feature_meta_bg(sb)) {
4753 		if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4754 			ext4_msg(sb, KERN_WARNING,
4755 				 "first meta block group too large: %u "
4756 				 "(group descriptor block count %u)",
4757 				 le32_to_cpu(es->s_first_meta_bg), db_count);
4758 			return -EINVAL;
4759 		}
4760 	}
4761 	rcu_assign_pointer(sbi->s_group_desc,
4762 			   kvmalloc_array(db_count,
4763 					  sizeof(struct buffer_head *),
4764 					  GFP_KERNEL));
4765 	if (sbi->s_group_desc == NULL) {
4766 		ext4_msg(sb, KERN_ERR, "not enough memory");
4767 		return -ENOMEM;
4768 	}
4769 
4770 	bgl_lock_init(sbi->s_blockgroup_lock);
4771 
4772 	/* Pre-read the descriptors into the buffer cache */
4773 	for (i = 0; i < db_count; i++) {
4774 		block = descriptor_loc(sb, logical_sb_block, i);
4775 		ext4_sb_breadahead_unmovable(sb, block);
4776 	}
4777 
4778 	for (i = 0; i < db_count; i++) {
4779 		struct buffer_head *bh;
4780 
4781 		block = descriptor_loc(sb, logical_sb_block, i);
4782 		bh = ext4_sb_bread_unmovable(sb, block);
4783 		if (IS_ERR(bh)) {
4784 			ext4_msg(sb, KERN_ERR,
4785 			       "can't read group descriptor %d", i);
4786 			sbi->s_gdb_count = i;
4787 			ret = PTR_ERR(bh);
4788 			goto out;
4789 		}
4790 		rcu_read_lock();
4791 		rcu_dereference(sbi->s_group_desc)[i] = bh;
4792 		rcu_read_unlock();
4793 	}
4794 	sbi->s_gdb_count = db_count;
4795 	if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
4796 		ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4797 		ret = -EFSCORRUPTED;
4798 		goto out;
4799 	}
4800 	return 0;
4801 out:
4802 	ext4_group_desc_free(sbi);
4803 	return ret;
4804 }
4805 
ext4_load_and_init_journal(struct super_block * sb,struct ext4_super_block * es,struct ext4_fs_context * ctx)4806 static int ext4_load_and_init_journal(struct super_block *sb,
4807 				      struct ext4_super_block *es,
4808 				      struct ext4_fs_context *ctx)
4809 {
4810 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4811 	int err;
4812 
4813 	err = ext4_load_journal(sb, es, ctx->journal_devnum);
4814 	if (err)
4815 		return err;
4816 
4817 	if (ext4_has_feature_64bit(sb) &&
4818 	    !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4819 				       JBD2_FEATURE_INCOMPAT_64BIT)) {
4820 		ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4821 		goto out;
4822 	}
4823 
4824 	if (!set_journal_csum_feature_set(sb)) {
4825 		ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4826 			 "feature set");
4827 		goto out;
4828 	}
4829 
4830 	if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
4831 		!jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4832 					  JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
4833 		ext4_msg(sb, KERN_ERR,
4834 			"Failed to set fast commit journal feature");
4835 		goto out;
4836 	}
4837 
4838 	/* We have now updated the journal if required, so we can
4839 	 * validate the data journaling mode. */
4840 	switch (test_opt(sb, DATA_FLAGS)) {
4841 	case 0:
4842 		/* No mode set, assume a default based on the journal
4843 		 * capabilities: ORDERED_DATA if the journal can
4844 		 * cope, else JOURNAL_DATA
4845 		 */
4846 		if (jbd2_journal_check_available_features
4847 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4848 			set_opt(sb, ORDERED_DATA);
4849 			sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
4850 		} else {
4851 			set_opt(sb, JOURNAL_DATA);
4852 			sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
4853 		}
4854 		break;
4855 
4856 	case EXT4_MOUNT_ORDERED_DATA:
4857 	case EXT4_MOUNT_WRITEBACK_DATA:
4858 		if (!jbd2_journal_check_available_features
4859 		    (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4860 			ext4_msg(sb, KERN_ERR, "Journal does not support "
4861 			       "requested data journaling mode");
4862 			goto out;
4863 		}
4864 		break;
4865 	default:
4866 		break;
4867 	}
4868 
4869 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4870 	    test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4871 		ext4_msg(sb, KERN_ERR, "can't mount with "
4872 			"journal_async_commit in data=ordered mode");
4873 		goto out;
4874 	}
4875 
4876 	set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
4877 
4878 	sbi->s_journal->j_submit_inode_data_buffers =
4879 		ext4_journal_submit_inode_data_buffers;
4880 	sbi->s_journal->j_finish_inode_data_buffers =
4881 		ext4_journal_finish_inode_data_buffers;
4882 
4883 	return 0;
4884 
4885 out:
4886 	/* flush s_error_work before journal destroy. */
4887 	flush_work(&sbi->s_error_work);
4888 	jbd2_journal_destroy(sbi->s_journal);
4889 	sbi->s_journal = NULL;
4890 	return -EINVAL;
4891 }
4892 
ext4_journal_data_mode_check(struct super_block * sb)4893 static int ext4_journal_data_mode_check(struct super_block *sb)
4894 {
4895 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4896 		printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
4897 			    "data=journal disables delayed allocation, "
4898 			    "dioread_nolock, O_DIRECT and fast_commit support!\n");
4899 		/* can't mount with both data=journal and dioread_nolock. */
4900 		clear_opt(sb, DIOREAD_NOLOCK);
4901 		clear_opt2(sb, JOURNAL_FAST_COMMIT);
4902 		if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4903 			ext4_msg(sb, KERN_ERR, "can't mount with "
4904 				 "both data=journal and delalloc");
4905 			return -EINVAL;
4906 		}
4907 		if (test_opt(sb, DAX_ALWAYS)) {
4908 			ext4_msg(sb, KERN_ERR, "can't mount with "
4909 				 "both data=journal and dax");
4910 			return -EINVAL;
4911 		}
4912 		if (ext4_has_feature_encrypt(sb)) {
4913 			ext4_msg(sb, KERN_WARNING,
4914 				 "encrypted files will use data=ordered "
4915 				 "instead of data journaling mode");
4916 		}
4917 		if (test_opt(sb, DELALLOC))
4918 			clear_opt(sb, DELALLOC);
4919 	} else {
4920 		sb->s_iflags |= SB_I_CGROUPWB;
4921 	}
4922 
4923 	return 0;
4924 }
4925 
ext4_load_super(struct super_block * sb,ext4_fsblk_t * lsb,int silent)4926 static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
4927 			   int silent)
4928 {
4929 	struct ext4_sb_info *sbi = EXT4_SB(sb);
4930 	struct ext4_super_block *es;
4931 	ext4_fsblk_t logical_sb_block;
4932 	unsigned long offset = 0;
4933 	struct buffer_head *bh;
4934 	int ret = -EINVAL;
4935 	int blocksize;
4936 
4937 	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
4938 	if (!blocksize) {
4939 		ext4_msg(sb, KERN_ERR, "unable to set blocksize");
4940 		return -EINVAL;
4941 	}
4942 
4943 	/*
4944 	 * The ext4 superblock will not be buffer aligned for other than 1kB
4945 	 * block sizes.  We need to calculate the offset from buffer start.
4946 	 */
4947 	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
4948 		logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
4949 		offset = do_div(logical_sb_block, blocksize);
4950 	} else {
4951 		logical_sb_block = sbi->s_sb_block;
4952 	}
4953 
4954 	bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
4955 	if (IS_ERR(bh)) {
4956 		ext4_msg(sb, KERN_ERR, "unable to read superblock");
4957 		return PTR_ERR(bh);
4958 	}
4959 	/*
4960 	 * Note: s_es must be initialized as soon as possible because
4961 	 *       some ext4 macro-instructions depend on its value
4962 	 */
4963 	es = (struct ext4_super_block *) (bh->b_data + offset);
4964 	sbi->s_es = es;
4965 	sb->s_magic = le16_to_cpu(es->s_magic);
4966 	if (sb->s_magic != EXT4_SUPER_MAGIC) {
4967 		if (!silent)
4968 			ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4969 		goto out;
4970 	}
4971 
4972 	if (le32_to_cpu(es->s_log_block_size) >
4973 	    (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4974 		ext4_msg(sb, KERN_ERR,
4975 			 "Invalid log block size: %u",
4976 			 le32_to_cpu(es->s_log_block_size));
4977 		goto out;
4978 	}
4979 	if (le32_to_cpu(es->s_log_cluster_size) >
4980 	    (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4981 		ext4_msg(sb, KERN_ERR,
4982 			 "Invalid log cluster size: %u",
4983 			 le32_to_cpu(es->s_log_cluster_size));
4984 		goto out;
4985 	}
4986 
4987 	blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
4988 
4989 	/*
4990 	 * If the default block size is not the same as the real block size,
4991 	 * we need to reload it.
4992 	 */
4993 	if (sb->s_blocksize == blocksize) {
4994 		*lsb = logical_sb_block;
4995 		sbi->s_sbh = bh;
4996 		return 0;
4997 	}
4998 
4999 	/*
5000 	 * bh must be released before kill_bdev(), otherwise
5001 	 * it won't be freed and its page also. kill_bdev()
5002 	 * is called by sb_set_blocksize().
5003 	 */
5004 	brelse(bh);
5005 	/* Validate the filesystem blocksize */
5006 	if (!sb_set_blocksize(sb, blocksize)) {
5007 		ext4_msg(sb, KERN_ERR, "bad block size %d",
5008 				blocksize);
5009 		bh = NULL;
5010 		goto out;
5011 	}
5012 
5013 	logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5014 	offset = do_div(logical_sb_block, blocksize);
5015 	bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5016 	if (IS_ERR(bh)) {
5017 		ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try");
5018 		ret = PTR_ERR(bh);
5019 		bh = NULL;
5020 		goto out;
5021 	}
5022 	es = (struct ext4_super_block *)(bh->b_data + offset);
5023 	sbi->s_es = es;
5024 	if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
5025 		ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!");
5026 		goto out;
5027 	}
5028 	*lsb = logical_sb_block;
5029 	sbi->s_sbh = bh;
5030 	return 0;
5031 out:
5032 	brelse(bh);
5033 	return ret;
5034 }
5035 
__ext4_fill_super(struct fs_context * fc,struct super_block * sb)5036 static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
5037 {
5038 	struct ext4_super_block *es = NULL;
5039 	struct ext4_sb_info *sbi = EXT4_SB(sb);
5040 	struct flex_groups **flex_groups;
5041 	ext4_fsblk_t block;
5042 	ext4_fsblk_t logical_sb_block;
5043 	struct inode *root;
5044 	int ret = -ENOMEM;
5045 	unsigned int i;
5046 	int needs_recovery, has_huge_files;
5047 	int err = 0;
5048 	ext4_group_t first_not_zeroed;
5049 	struct ext4_fs_context *ctx = fc->fs_private;
5050 	int silent = fc->sb_flags & SB_SILENT;
5051 
5052 	/* Set defaults for the variables that will be set during parsing */
5053 	if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
5054 		ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5055 
5056 	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
5057 	sbi->s_sectors_written_start =
5058 		part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
5059 
5060 	/* -EINVAL is default */
5061 	ret = -EINVAL;
5062 	err = ext4_load_super(sb, &logical_sb_block, silent);
5063 	if (err)
5064 		goto out_fail;
5065 
5066 	es = sbi->s_es;
5067 	sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
5068 
5069 	err = ext4_init_metadata_csum(sb, es);
5070 	if (err)
5071 		goto failed_mount;
5072 
5073 	ext4_set_def_opts(sb, es);
5074 
5075 	sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
5076 	sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
5077 	sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
5078 	sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
5079 	sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
5080 
5081 	/*
5082 	 * set default s_li_wait_mult for lazyinit, for the case there is
5083 	 * no mount option specified.
5084 	 */
5085 	sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
5086 
5087 	if (ext4_inode_info_init(sb, es))
5088 		goto failed_mount;
5089 
5090 	err = parse_apply_sb_mount_options(sb, ctx);
5091 	if (err < 0)
5092 		goto failed_mount;
5093 
5094 	sbi->s_def_mount_opt = sbi->s_mount_opt;
5095 	sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
5096 
5097 	err = ext4_check_opt_consistency(fc, sb);
5098 	if (err < 0)
5099 		goto failed_mount;
5100 
5101 	ext4_apply_options(fc, sb);
5102 
5103 	if (ext4_encoding_init(sb, es))
5104 		goto failed_mount;
5105 
5106 	if (ext4_journal_data_mode_check(sb))
5107 		goto failed_mount;
5108 
5109 	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
5110 		(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
5111 
5112 	/* i_version is always enabled now */
5113 	sb->s_flags |= SB_I_VERSION;
5114 
5115 	if (ext4_check_feature_compatibility(sb, es, silent))
5116 		goto failed_mount;
5117 
5118 	if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
5119 		ext4_msg(sb, KERN_ERR,
5120 			 "Number of reserved GDT blocks insanely large: %d",
5121 			 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
5122 		goto failed_mount;
5123 	}
5124 
5125 	if (sbi->s_daxdev) {
5126 		if (sb->s_blocksize == PAGE_SIZE)
5127 			set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
5128 		else
5129 			ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
5130 	}
5131 
5132 	if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
5133 		if (ext4_has_feature_inline_data(sb)) {
5134 			ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
5135 					" that may contain inline data");
5136 			goto failed_mount;
5137 		}
5138 		if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
5139 			ext4_msg(sb, KERN_ERR,
5140 				"DAX unsupported by block device.");
5141 			goto failed_mount;
5142 		}
5143 	}
5144 
5145 	if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
5146 		ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
5147 			 es->s_encryption_level);
5148 		goto failed_mount;
5149 	}
5150 
5151 	has_huge_files = ext4_has_feature_huge_file(sb);
5152 	sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
5153 						      has_huge_files);
5154 	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
5155 
5156 	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
5157 	if (ext4_has_feature_64bit(sb)) {
5158 		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
5159 		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
5160 		    !is_power_of_2(sbi->s_desc_size)) {
5161 			ext4_msg(sb, KERN_ERR,
5162 			       "unsupported descriptor size %lu",
5163 			       sbi->s_desc_size);
5164 			goto failed_mount;
5165 		}
5166 	} else
5167 		sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
5168 
5169 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
5170 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
5171 
5172 	sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
5173 	if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
5174 		if (!silent)
5175 			ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5176 		goto failed_mount;
5177 	}
5178 	if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
5179 	    sbi->s_inodes_per_group > sb->s_blocksize * 8) {
5180 		ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
5181 			 sbi->s_inodes_per_group);
5182 		goto failed_mount;
5183 	}
5184 	sbi->s_itb_per_group = sbi->s_inodes_per_group /
5185 					sbi->s_inodes_per_block;
5186 	sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
5187 	sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
5188 	sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
5189 	sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
5190 
5191 	for (i = 0; i < 4; i++)
5192 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
5193 	sbi->s_def_hash_version = es->s_def_hash_version;
5194 	if (ext4_has_feature_dir_index(sb)) {
5195 		i = le32_to_cpu(es->s_flags);
5196 		if (i & EXT2_FLAGS_UNSIGNED_HASH)
5197 			sbi->s_hash_unsigned = 3;
5198 		else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
5199 #ifdef __CHAR_UNSIGNED__
5200 			if (!sb_rdonly(sb))
5201 				es->s_flags |=
5202 					cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
5203 			sbi->s_hash_unsigned = 3;
5204 #else
5205 			if (!sb_rdonly(sb))
5206 				es->s_flags |=
5207 					cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
5208 #endif
5209 		}
5210 	}
5211 
5212 	if (ext4_handle_clustersize(sb))
5213 		goto failed_mount;
5214 
5215 	/*
5216 	 * Test whether we have more sectors than will fit in sector_t,
5217 	 * and whether the max offset is addressable by the page cache.
5218 	 */
5219 	err = generic_check_addressable(sb->s_blocksize_bits,
5220 					ext4_blocks_count(es));
5221 	if (err) {
5222 		ext4_msg(sb, KERN_ERR, "filesystem"
5223 			 " too large to mount safely on this system");
5224 		goto failed_mount;
5225 	}
5226 
5227 	if (ext4_geometry_check(sb, es))
5228 		goto failed_mount;
5229 
5230 	err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
5231 	if (err)
5232 		goto failed_mount;
5233 
5234 	timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
5235 	spin_lock_init(&sbi->s_error_lock);
5236 	INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
5237 
5238 	/* Register extent status tree shrinker */
5239 	if (ext4_es_register_shrinker(sbi))
5240 		goto failed_mount3;
5241 
5242 	sbi->s_stripe = ext4_get_stripe_size(sbi);
5243 	sbi->s_extent_max_zeroout_kb = 32;
5244 
5245 	/*
5246 	 * set up enough so that it can read an inode
5247 	 */
5248 	sb->s_op = &ext4_sops;
5249 	sb->s_export_op = &ext4_export_ops;
5250 	sb->s_xattr = ext4_xattr_handlers;
5251 #ifdef CONFIG_FS_ENCRYPTION
5252 	sb->s_cop = &ext4_cryptops;
5253 #endif
5254 #ifdef CONFIG_FS_VERITY
5255 	sb->s_vop = &ext4_verityops;
5256 #endif
5257 #ifdef CONFIG_QUOTA
5258 	sb->dq_op = &ext4_quota_operations;
5259 	if (ext4_has_feature_quota(sb))
5260 		sb->s_qcop = &dquot_quotactl_sysfile_ops;
5261 	else
5262 		sb->s_qcop = &ext4_qctl_operations;
5263 	sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
5264 #endif
5265 	memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
5266 
5267 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
5268 	mutex_init(&sbi->s_orphan_lock);
5269 
5270 	ext4_fast_commit_init(sb);
5271 
5272 	sb->s_root = NULL;
5273 
5274 	needs_recovery = (es->s_last_orphan != 0 ||
5275 			  ext4_has_feature_orphan_present(sb) ||
5276 			  ext4_has_feature_journal_needs_recovery(sb));
5277 
5278 	if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
5279 		err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
5280 		if (err)
5281 			goto failed_mount3a;
5282 	}
5283 
5284 	/*
5285 	 * The first inode we look at is the journal inode.  Don't try
5286 	 * root first: it may be modified in the journal!
5287 	 */
5288 	if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
5289 		err = ext4_load_and_init_journal(sb, es, ctx);
5290 		if (err)
5291 			goto failed_mount3a;
5292 	} else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
5293 		   ext4_has_feature_journal_needs_recovery(sb)) {
5294 		ext4_msg(sb, KERN_ERR, "required journal recovery "
5295 		       "suppressed and not mounted read-only");
5296 		goto failed_mount3a;
5297 	} else {
5298 		/* Nojournal mode, all journal mount options are illegal */
5299 		if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5300 			ext4_msg(sb, KERN_ERR, "can't mount with "
5301 				 "journal_async_commit, fs mounted w/o journal");
5302 			goto failed_mount3a;
5303 		}
5304 
5305 		if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
5306 			ext4_msg(sb, KERN_ERR, "can't mount with "
5307 				 "journal_checksum, fs mounted w/o journal");
5308 			goto failed_mount3a;
5309 		}
5310 		if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
5311 			ext4_msg(sb, KERN_ERR, "can't mount with "
5312 				 "commit=%lu, fs mounted w/o journal",
5313 				 sbi->s_commit_interval / HZ);
5314 			goto failed_mount3a;
5315 		}
5316 		if (EXT4_MOUNT_DATA_FLAGS &
5317 		    (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
5318 			ext4_msg(sb, KERN_ERR, "can't mount with "
5319 				 "data=, fs mounted w/o journal");
5320 			goto failed_mount3a;
5321 		}
5322 		sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
5323 		clear_opt(sb, JOURNAL_CHECKSUM);
5324 		clear_opt(sb, DATA_FLAGS);
5325 		clear_opt2(sb, JOURNAL_FAST_COMMIT);
5326 		sbi->s_journal = NULL;
5327 		needs_recovery = 0;
5328 	}
5329 
5330 	if (!test_opt(sb, NO_MBCACHE)) {
5331 		sbi->s_ea_block_cache = ext4_xattr_create_cache();
5332 		if (!sbi->s_ea_block_cache) {
5333 			ext4_msg(sb, KERN_ERR,
5334 				 "Failed to create ea_block_cache");
5335 			goto failed_mount_wq;
5336 		}
5337 
5338 		if (ext4_has_feature_ea_inode(sb)) {
5339 			sbi->s_ea_inode_cache = ext4_xattr_create_cache();
5340 			if (!sbi->s_ea_inode_cache) {
5341 				ext4_msg(sb, KERN_ERR,
5342 					 "Failed to create ea_inode_cache");
5343 				goto failed_mount_wq;
5344 			}
5345 		}
5346 	}
5347 
5348 	/*
5349 	 * Get the # of file system overhead blocks from the
5350 	 * superblock if present.
5351 	 */
5352 	sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
5353 	/* ignore the precalculated value if it is ridiculous */
5354 	if (sbi->s_overhead > ext4_blocks_count(es))
5355 		sbi->s_overhead = 0;
5356 	/*
5357 	 * If the bigalloc feature is not enabled recalculating the
5358 	 * overhead doesn't take long, so we might as well just redo
5359 	 * it to make sure we are using the correct value.
5360 	 */
5361 	if (!ext4_has_feature_bigalloc(sb))
5362 		sbi->s_overhead = 0;
5363 	if (sbi->s_overhead == 0) {
5364 		err = ext4_calculate_overhead(sb);
5365 		if (err)
5366 			goto failed_mount_wq;
5367 	}
5368 
5369 	/*
5370 	 * The maximum number of concurrent works can be high and
5371 	 * concurrency isn't really necessary.  Limit it to 1.
5372 	 */
5373 	EXT4_SB(sb)->rsv_conversion_wq =
5374 		alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
5375 	if (!EXT4_SB(sb)->rsv_conversion_wq) {
5376 		printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
5377 		ret = -ENOMEM;
5378 		goto failed_mount4;
5379 	}
5380 
5381 	/*
5382 	 * The jbd2_journal_load will have done any necessary log recovery,
5383 	 * so we can safely mount the rest of the filesystem now.
5384 	 */
5385 
5386 	root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
5387 	if (IS_ERR(root)) {
5388 		ext4_msg(sb, KERN_ERR, "get root inode failed");
5389 		ret = PTR_ERR(root);
5390 		root = NULL;
5391 		goto failed_mount4;
5392 	}
5393 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
5394 		ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
5395 		iput(root);
5396 		goto failed_mount4;
5397 	}
5398 
5399 	sb->s_root = d_make_root(root);
5400 	if (!sb->s_root) {
5401 		ext4_msg(sb, KERN_ERR, "get root dentry failed");
5402 		ret = -ENOMEM;
5403 		goto failed_mount4;
5404 	}
5405 
5406 	ret = ext4_setup_super(sb, es, sb_rdonly(sb));
5407 	if (ret == -EROFS) {
5408 		sb->s_flags |= SB_RDONLY;
5409 		ret = 0;
5410 	} else if (ret)
5411 		goto failed_mount4a;
5412 
5413 	ext4_set_resv_clusters(sb);
5414 
5415 	if (test_opt(sb, BLOCK_VALIDITY)) {
5416 		err = ext4_setup_system_zone(sb);
5417 		if (err) {
5418 			ext4_msg(sb, KERN_ERR, "failed to initialize system "
5419 				 "zone (%d)", err);
5420 			goto failed_mount4a;
5421 		}
5422 	}
5423 	ext4_fc_replay_cleanup(sb);
5424 
5425 	ext4_ext_init(sb);
5426 
5427 	/*
5428 	 * Enable optimize_scan if number of groups is > threshold. This can be
5429 	 * turned off by passing "mb_optimize_scan=0". This can also be
5430 	 * turned on forcefully by passing "mb_optimize_scan=1".
5431 	 */
5432 	if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
5433 		if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
5434 			set_opt2(sb, MB_OPTIMIZE_SCAN);
5435 		else
5436 			clear_opt2(sb, MB_OPTIMIZE_SCAN);
5437 	}
5438 
5439 	err = ext4_mb_init(sb);
5440 	if (err) {
5441 		ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
5442 			 err);
5443 		goto failed_mount5;
5444 	}
5445 
5446 	/*
5447 	 * We can only set up the journal commit callback once
5448 	 * mballoc is initialized
5449 	 */
5450 	if (sbi->s_journal)
5451 		sbi->s_journal->j_commit_callback =
5452 			ext4_journal_commit_callback;
5453 
5454 	block = ext4_count_free_clusters(sb);
5455 	ext4_free_blocks_count_set(sbi->s_es,
5456 				   EXT4_C2B(sbi, block));
5457 	err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
5458 				  GFP_KERNEL);
5459 	if (!err) {
5460 		unsigned long freei = ext4_count_free_inodes(sb);
5461 		sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
5462 		err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
5463 					  GFP_KERNEL);
5464 	}
5465 	if (!err)
5466 		err = percpu_counter_init(&sbi->s_dirs_counter,
5467 					  ext4_count_dirs(sb), GFP_KERNEL);
5468 	if (!err)
5469 		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
5470 					  GFP_KERNEL);
5471 	if (!err)
5472 		err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
5473 					  GFP_KERNEL);
5474 	if (!err)
5475 		err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
5476 
5477 	if (err) {
5478 		ext4_msg(sb, KERN_ERR, "insufficient memory");
5479 		goto failed_mount6;
5480 	}
5481 
5482 	if (ext4_has_feature_flex_bg(sb))
5483 		if (!ext4_fill_flex_info(sb)) {
5484 			ext4_msg(sb, KERN_ERR,
5485 			       "unable to initialize "
5486 			       "flex_bg meta info!");
5487 			ret = -ENOMEM;
5488 			goto failed_mount6;
5489 		}
5490 
5491 	err = ext4_register_li_request(sb, first_not_zeroed);
5492 	if (err)
5493 		goto failed_mount6;
5494 
5495 	err = ext4_register_sysfs(sb);
5496 	if (err)
5497 		goto failed_mount7;
5498 
5499 	err = ext4_init_orphan_info(sb);
5500 	if (err)
5501 		goto failed_mount8;
5502 #ifdef CONFIG_QUOTA
5503 	/* Enable quota usage during mount. */
5504 	if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
5505 		err = ext4_enable_quotas(sb);
5506 		if (err)
5507 			goto failed_mount9;
5508 	}
5509 #endif  /* CONFIG_QUOTA */
5510 
5511 	/*
5512 	 * Save the original bdev mapping's wb_err value which could be
5513 	 * used to detect the metadata async write error.
5514 	 */
5515 	spin_lock_init(&sbi->s_bdev_wb_lock);
5516 	errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
5517 				 &sbi->s_bdev_wb_err);
5518 	sb->s_bdev->bd_super = sb;
5519 	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
5520 	ext4_orphan_cleanup(sb, es);
5521 	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
5522 	/*
5523 	 * Update the checksum after updating free space/inode counters and
5524 	 * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
5525 	 * checksum in the buffer cache until it is written out and
5526 	 * e2fsprogs programs trying to open a file system immediately
5527 	 * after it is mounted can fail.
5528 	 */
5529 	ext4_superblock_csum_set(sb);
5530 	if (needs_recovery) {
5531 		ext4_msg(sb, KERN_INFO, "recovery complete");
5532 		err = ext4_mark_recovery_complete(sb, es);
5533 		if (err)
5534 			goto failed_mount10;
5535 	}
5536 
5537 	if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
5538 		ext4_msg(sb, KERN_WARNING,
5539 			 "mounting with \"discard\" option, but the device does not support discard");
5540 
5541 	if (es->s_error_count)
5542 		mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
5543 
5544 	/* Enable message ratelimiting. Default is 10 messages per 5 secs. */
5545 	ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
5546 	ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
5547 	ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
5548 	atomic_set(&sbi->s_warning_count, 0);
5549 	atomic_set(&sbi->s_msg_count, 0);
5550 
5551 	return 0;
5552 
5553 failed_mount10:
5554 	ext4_quota_off_umount(sb);
5555 failed_mount9: __maybe_unused
5556 	ext4_release_orphan_info(sb);
5557 failed_mount8:
5558 	ext4_unregister_sysfs(sb);
5559 	kobject_put(&sbi->s_kobj);
5560 failed_mount7:
5561 	ext4_unregister_li_request(sb);
5562 failed_mount6:
5563 	ext4_mb_release(sb);
5564 	rcu_read_lock();
5565 	flex_groups = rcu_dereference(sbi->s_flex_groups);
5566 	if (flex_groups) {
5567 		for (i = 0; i < sbi->s_flex_groups_allocated; i++)
5568 			kvfree(flex_groups[i]);
5569 		kvfree(flex_groups);
5570 	}
5571 	rcu_read_unlock();
5572 	percpu_counter_destroy(&sbi->s_freeclusters_counter);
5573 	percpu_counter_destroy(&sbi->s_freeinodes_counter);
5574 	percpu_counter_destroy(&sbi->s_dirs_counter);
5575 	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
5576 	percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
5577 	percpu_free_rwsem(&sbi->s_writepages_rwsem);
5578 failed_mount5:
5579 	ext4_ext_release(sb);
5580 	ext4_release_system_zone(sb);
5581 failed_mount4a:
5582 	dput(sb->s_root);
5583 	sb->s_root = NULL;
5584 failed_mount4:
5585 	ext4_msg(sb, KERN_ERR, "mount failed");
5586 	if (EXT4_SB(sb)->rsv_conversion_wq)
5587 		destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
5588 failed_mount_wq:
5589 	ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5590 	sbi->s_ea_inode_cache = NULL;
5591 
5592 	ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5593 	sbi->s_ea_block_cache = NULL;
5594 
5595 	if (sbi->s_journal) {
5596 		/* flush s_error_work before journal destroy. */
5597 		flush_work(&sbi->s_error_work);
5598 		jbd2_journal_destroy(sbi->s_journal);
5599 		sbi->s_journal = NULL;
5600 	}
5601 failed_mount3a:
5602 	ext4_es_unregister_shrinker(sbi);
5603 failed_mount3:
5604 	/* flush s_error_work before sbi destroy */
5605 	flush_work(&sbi->s_error_work);
5606 	del_timer_sync(&sbi->s_err_report);
5607 	ext4_stop_mmpd(sbi);
5608 	ext4_group_desc_free(sbi);
5609 failed_mount:
5610 	if (sbi->s_chksum_driver)
5611 		crypto_free_shash(sbi->s_chksum_driver);
5612 
5613 #if IS_ENABLED(CONFIG_UNICODE)
5614 	utf8_unload(sb->s_encoding);
5615 #endif
5616 
5617 #ifdef CONFIG_QUOTA
5618 	for (i = 0; i < EXT4_MAXQUOTAS; i++)
5619 		kfree(get_qf_name(sb, sbi, i));
5620 #endif
5621 	fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
5622 	/* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
5623 	brelse(sbi->s_sbh);
5624 	ext4_blkdev_remove(sbi);
5625 out_fail:
5626 	invalidate_bdev(sb->s_bdev);
5627 	sb->s_fs_info = NULL;
5628 	return err ? err : ret;
5629 }
5630 
ext4_fill_super(struct super_block * sb,struct fs_context * fc)5631 static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
5632 {
5633 	struct ext4_fs_context *ctx = fc->fs_private;
5634 	struct ext4_sb_info *sbi;
5635 	const char *descr;
5636 	int ret;
5637 
5638 	sbi = ext4_alloc_sbi(sb);
5639 	if (!sbi)
5640 		return -ENOMEM;
5641 
5642 	fc->s_fs_info = sbi;
5643 
5644 	/* Cleanup superblock name */
5645 	strreplace(sb->s_id, '/', '!');
5646 
5647 	sbi->s_sb_block = 1;	/* Default super block location */
5648 	if (ctx->spec & EXT4_SPEC_s_sb_block)
5649 		sbi->s_sb_block = ctx->s_sb_block;
5650 
5651 	ret = __ext4_fill_super(fc, sb);
5652 	if (ret < 0)
5653 		goto free_sbi;
5654 
5655 	if (sbi->s_journal) {
5656 		if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
5657 			descr = " journalled data mode";
5658 		else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
5659 			descr = " ordered data mode";
5660 		else
5661 			descr = " writeback data mode";
5662 	} else
5663 		descr = "out journal";
5664 
5665 	if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
5666 		ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
5667 			 "Quota mode: %s.", descr, ext4_quota_mode(sb));
5668 
5669 	/* Update the s_overhead_clusters if necessary */
5670 	ext4_update_overhead(sb, false);
5671 	return 0;
5672 
5673 free_sbi:
5674 	ext4_free_sbi(sbi);
5675 	fc->s_fs_info = NULL;
5676 	return ret;
5677 }
5678 
ext4_get_tree(struct fs_context * fc)5679 static int ext4_get_tree(struct fs_context *fc)
5680 {
5681 	return get_tree_bdev(fc, ext4_fill_super);
5682 }
5683 
5684 /*
5685  * Setup any per-fs journal parameters now.  We'll do this both on
5686  * initial mount, once the journal has been initialised but before we've
5687  * done any recovery; and again on any subsequent remount.
5688  */
ext4_init_journal_params(struct super_block * sb,journal_t * journal)5689 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
5690 {
5691 	struct ext4_sb_info *sbi = EXT4_SB(sb);
5692 
5693 	journal->j_commit_interval = sbi->s_commit_interval;
5694 	journal->j_min_batch_time = sbi->s_min_batch_time;
5695 	journal->j_max_batch_time = sbi->s_max_batch_time;
5696 	ext4_fc_init(sb, journal);
5697 
5698 	write_lock(&journal->j_state_lock);
5699 	if (test_opt(sb, BARRIER))
5700 		journal->j_flags |= JBD2_BARRIER;
5701 	else
5702 		journal->j_flags &= ~JBD2_BARRIER;
5703 	if (test_opt(sb, DATA_ERR_ABORT))
5704 		journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
5705 	else
5706 		journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
5707 	write_unlock(&journal->j_state_lock);
5708 }
5709 
ext4_get_journal_inode(struct super_block * sb,unsigned int journal_inum)5710 static struct inode *ext4_get_journal_inode(struct super_block *sb,
5711 					     unsigned int journal_inum)
5712 {
5713 	struct inode *journal_inode;
5714 
5715 	/*
5716 	 * Test for the existence of a valid inode on disk.  Bad things
5717 	 * happen if we iget() an unused inode, as the subsequent iput()
5718 	 * will try to delete it.
5719 	 */
5720 	journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
5721 	if (IS_ERR(journal_inode)) {
5722 		ext4_msg(sb, KERN_ERR, "no journal found");
5723 		return NULL;
5724 	}
5725 	if (!journal_inode->i_nlink) {
5726 		make_bad_inode(journal_inode);
5727 		iput(journal_inode);
5728 		ext4_msg(sb, KERN_ERR, "journal inode is deleted");
5729 		return NULL;
5730 	}
5731 
5732 	ext4_debug("Journal inode found at %p: %lld bytes\n",
5733 		  journal_inode, journal_inode->i_size);
5734 	if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
5735 		ext4_msg(sb, KERN_ERR, "invalid journal inode");
5736 		iput(journal_inode);
5737 		return NULL;
5738 	}
5739 	return journal_inode;
5740 }
5741 
ext4_get_journal(struct super_block * sb,unsigned int journal_inum)5742 static journal_t *ext4_get_journal(struct super_block *sb,
5743 				   unsigned int journal_inum)
5744 {
5745 	struct inode *journal_inode;
5746 	journal_t *journal;
5747 
5748 	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5749 		return NULL;
5750 
5751 	journal_inode = ext4_get_journal_inode(sb, journal_inum);
5752 	if (!journal_inode)
5753 		return NULL;
5754 
5755 	journal = jbd2_journal_init_inode(journal_inode);
5756 	if (!journal) {
5757 		ext4_msg(sb, KERN_ERR, "Could not load journal inode");
5758 		iput(journal_inode);
5759 		return NULL;
5760 	}
5761 	journal->j_private = sb;
5762 	ext4_init_journal_params(sb, journal);
5763 	return journal;
5764 }
5765 
ext4_get_dev_journal(struct super_block * sb,dev_t j_dev)5766 static journal_t *ext4_get_dev_journal(struct super_block *sb,
5767 				       dev_t j_dev)
5768 {
5769 	struct buffer_head *bh;
5770 	journal_t *journal;
5771 	ext4_fsblk_t start;
5772 	ext4_fsblk_t len;
5773 	int hblock, blocksize;
5774 	ext4_fsblk_t sb_block;
5775 	unsigned long offset;
5776 	struct ext4_super_block *es;
5777 	struct block_device *bdev;
5778 
5779 	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5780 		return NULL;
5781 
5782 	bdev = ext4_blkdev_get(j_dev, sb);
5783 	if (bdev == NULL)
5784 		return NULL;
5785 
5786 	blocksize = sb->s_blocksize;
5787 	hblock = bdev_logical_block_size(bdev);
5788 	if (blocksize < hblock) {
5789 		ext4_msg(sb, KERN_ERR,
5790 			"blocksize too small for journal device");
5791 		goto out_bdev;
5792 	}
5793 
5794 	sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5795 	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
5796 	set_blocksize(bdev, blocksize);
5797 	if (!(bh = __bread(bdev, sb_block, blocksize))) {
5798 		ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5799 		       "external journal");
5800 		goto out_bdev;
5801 	}
5802 
5803 	es = (struct ext4_super_block *) (bh->b_data + offset);
5804 	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
5805 	    !(le32_to_cpu(es->s_feature_incompat) &
5806 	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
5807 		ext4_msg(sb, KERN_ERR, "external journal has "
5808 					"bad superblock");
5809 		brelse(bh);
5810 		goto out_bdev;
5811 	}
5812 
5813 	if ((le32_to_cpu(es->s_feature_ro_compat) &
5814 	     EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5815 	    es->s_checksum != ext4_superblock_csum(sb, es)) {
5816 		ext4_msg(sb, KERN_ERR, "external journal has "
5817 				       "corrupt superblock");
5818 		brelse(bh);
5819 		goto out_bdev;
5820 	}
5821 
5822 	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
5823 		ext4_msg(sb, KERN_ERR, "journal UUID does not match");
5824 		brelse(bh);
5825 		goto out_bdev;
5826 	}
5827 
5828 	len = ext4_blocks_count(es);
5829 	start = sb_block + 1;
5830 	brelse(bh);	/* we're done with the superblock */
5831 
5832 	journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
5833 					start, len, blocksize);
5834 	if (!journal) {
5835 		ext4_msg(sb, KERN_ERR, "failed to create device journal");
5836 		goto out_bdev;
5837 	}
5838 	journal->j_private = sb;
5839 	if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
5840 		ext4_msg(sb, KERN_ERR, "I/O error on journal device");
5841 		goto out_journal;
5842 	}
5843 	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
5844 		ext4_msg(sb, KERN_ERR, "External journal has more than one "
5845 					"user (unsupported) - %d",
5846 			be32_to_cpu(journal->j_superblock->s_nr_users));
5847 		goto out_journal;
5848 	}
5849 	EXT4_SB(sb)->s_journal_bdev = bdev;
5850 	ext4_init_journal_params(sb, journal);
5851 	return journal;
5852 
5853 out_journal:
5854 	jbd2_journal_destroy(journal);
5855 out_bdev:
5856 	ext4_blkdev_put(bdev);
5857 	return NULL;
5858 }
5859 
ext4_load_journal(struct super_block * sb,struct ext4_super_block * es,unsigned long journal_devnum)5860 static int ext4_load_journal(struct super_block *sb,
5861 			     struct ext4_super_block *es,
5862 			     unsigned long journal_devnum)
5863 {
5864 	journal_t *journal;
5865 	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5866 	dev_t journal_dev;
5867 	int err = 0;
5868 	int really_read_only;
5869 	int journal_dev_ro;
5870 
5871 	if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5872 		return -EFSCORRUPTED;
5873 
5874 	if (journal_devnum &&
5875 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5876 		ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5877 			"numbers have changed");
5878 		journal_dev = new_decode_dev(journal_devnum);
5879 	} else
5880 		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
5881 
5882 	if (journal_inum && journal_dev) {
5883 		ext4_msg(sb, KERN_ERR,
5884 			 "filesystem has both journal inode and journal device!");
5885 		return -EINVAL;
5886 	}
5887 
5888 	if (journal_inum) {
5889 		journal = ext4_get_journal(sb, journal_inum);
5890 		if (!journal)
5891 			return -EINVAL;
5892 	} else {
5893 		journal = ext4_get_dev_journal(sb, journal_dev);
5894 		if (!journal)
5895 			return -EINVAL;
5896 	}
5897 
5898 	journal_dev_ro = bdev_read_only(journal->j_dev);
5899 	really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
5900 
5901 	if (journal_dev_ro && !sb_rdonly(sb)) {
5902 		ext4_msg(sb, KERN_ERR,
5903 			 "journal device read-only, try mounting with '-o ro'");
5904 		err = -EROFS;
5905 		goto err_out;
5906 	}
5907 
5908 	/*
5909 	 * Are we loading a blank journal or performing recovery after a
5910 	 * crash?  For recovery, we need to check in advance whether we
5911 	 * can get read-write access to the device.
5912 	 */
5913 	if (ext4_has_feature_journal_needs_recovery(sb)) {
5914 		if (sb_rdonly(sb)) {
5915 			ext4_msg(sb, KERN_INFO, "INFO: recovery "
5916 					"required on readonly filesystem");
5917 			if (really_read_only) {
5918 				ext4_msg(sb, KERN_ERR, "write access "
5919 					"unavailable, cannot proceed "
5920 					"(try mounting with noload)");
5921 				err = -EROFS;
5922 				goto err_out;
5923 			}
5924 			ext4_msg(sb, KERN_INFO, "write access will "
5925 			       "be enabled during recovery");
5926 		}
5927 	}
5928 
5929 	if (!(journal->j_flags & JBD2_BARRIER))
5930 		ext4_msg(sb, KERN_INFO, "barriers disabled");
5931 
5932 	if (!ext4_has_feature_journal_needs_recovery(sb))
5933 		err = jbd2_journal_wipe(journal, !really_read_only);
5934 	if (!err) {
5935 		char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
5936 		if (save)
5937 			memcpy(save, ((char *) es) +
5938 			       EXT4_S_ERR_START, EXT4_S_ERR_LEN);
5939 		err = jbd2_journal_load(journal);
5940 		if (save)
5941 			memcpy(((char *) es) + EXT4_S_ERR_START,
5942 			       save, EXT4_S_ERR_LEN);
5943 		kfree(save);
5944 	}
5945 
5946 	if (err) {
5947 		ext4_msg(sb, KERN_ERR, "error loading journal");
5948 		goto err_out;
5949 	}
5950 
5951 	EXT4_SB(sb)->s_journal = journal;
5952 	err = ext4_clear_journal_err(sb, es);
5953 	if (err) {
5954 		EXT4_SB(sb)->s_journal = NULL;
5955 		jbd2_journal_destroy(journal);
5956 		return err;
5957 	}
5958 
5959 	if (!really_read_only && journal_devnum &&
5960 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5961 		es->s_journal_dev = cpu_to_le32(journal_devnum);
5962 		ext4_commit_super(sb);
5963 	}
5964 	if (!really_read_only && journal_inum &&
5965 	    journal_inum != le32_to_cpu(es->s_journal_inum)) {
5966 		es->s_journal_inum = cpu_to_le32(journal_inum);
5967 		ext4_commit_super(sb);
5968 	}
5969 
5970 	return 0;
5971 
5972 err_out:
5973 	jbd2_journal_destroy(journal);
5974 	return err;
5975 }
5976 
5977 /* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
ext4_update_super(struct super_block * sb)5978 static void ext4_update_super(struct super_block *sb)
5979 {
5980 	struct ext4_sb_info *sbi = EXT4_SB(sb);
5981 	struct ext4_super_block *es = sbi->s_es;
5982 	struct buffer_head *sbh = sbi->s_sbh;
5983 
5984 	lock_buffer(sbh);
5985 	/*
5986 	 * If the file system is mounted read-only, don't update the
5987 	 * superblock write time.  This avoids updating the superblock
5988 	 * write time when we are mounting the root file system
5989 	 * read/only but we need to replay the journal; at that point,
5990 	 * for people who are east of GMT and who make their clock
5991 	 * tick in localtime for Windows bug-for-bug compatibility,
5992 	 * the clock is set in the future, and this will cause e2fsck
5993 	 * to complain and force a full file system check.
5994 	 */
5995 	if (!(sb->s_flags & SB_RDONLY))
5996 		ext4_update_tstamp(es, s_wtime);
5997 	es->s_kbytes_written =
5998 		cpu_to_le64(sbi->s_kbytes_written +
5999 		    ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
6000 		      sbi->s_sectors_written_start) >> 1));
6001 	if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
6002 		ext4_free_blocks_count_set(es,
6003 			EXT4_C2B(sbi, percpu_counter_sum_positive(
6004 				&sbi->s_freeclusters_counter)));
6005 	if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
6006 		es->s_free_inodes_count =
6007 			cpu_to_le32(percpu_counter_sum_positive(
6008 				&sbi->s_freeinodes_counter));
6009 	/* Copy error information to the on-disk superblock */
6010 	spin_lock(&sbi->s_error_lock);
6011 	if (sbi->s_add_error_count > 0) {
6012 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6013 		if (!es->s_first_error_time && !es->s_first_error_time_hi) {
6014 			__ext4_update_tstamp(&es->s_first_error_time,
6015 					     &es->s_first_error_time_hi,
6016 					     sbi->s_first_error_time);
6017 			strncpy(es->s_first_error_func, sbi->s_first_error_func,
6018 				sizeof(es->s_first_error_func));
6019 			es->s_first_error_line =
6020 				cpu_to_le32(sbi->s_first_error_line);
6021 			es->s_first_error_ino =
6022 				cpu_to_le32(sbi->s_first_error_ino);
6023 			es->s_first_error_block =
6024 				cpu_to_le64(sbi->s_first_error_block);
6025 			es->s_first_error_errcode =
6026 				ext4_errno_to_code(sbi->s_first_error_code);
6027 		}
6028 		__ext4_update_tstamp(&es->s_last_error_time,
6029 				     &es->s_last_error_time_hi,
6030 				     sbi->s_last_error_time);
6031 		strncpy(es->s_last_error_func, sbi->s_last_error_func,
6032 			sizeof(es->s_last_error_func));
6033 		es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
6034 		es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
6035 		es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
6036 		es->s_last_error_errcode =
6037 				ext4_errno_to_code(sbi->s_last_error_code);
6038 		/*
6039 		 * Start the daily error reporting function if it hasn't been
6040 		 * started already
6041 		 */
6042 		if (!es->s_error_count)
6043 			mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
6044 		le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
6045 		sbi->s_add_error_count = 0;
6046 	}
6047 	spin_unlock(&sbi->s_error_lock);
6048 
6049 	ext4_superblock_csum_set(sb);
6050 	unlock_buffer(sbh);
6051 }
6052 
ext4_commit_super(struct super_block * sb)6053 static int ext4_commit_super(struct super_block *sb)
6054 {
6055 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
6056 
6057 	if (!sbh)
6058 		return -EINVAL;
6059 	if (block_device_ejected(sb))
6060 		return -ENODEV;
6061 
6062 	ext4_update_super(sb);
6063 
6064 	lock_buffer(sbh);
6065 	/* Buffer got discarded which means block device got invalidated */
6066 	if (!buffer_mapped(sbh)) {
6067 		unlock_buffer(sbh);
6068 		return -EIO;
6069 	}
6070 
6071 	if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
6072 		/*
6073 		 * Oh, dear.  A previous attempt to write the
6074 		 * superblock failed.  This could happen because the
6075 		 * USB device was yanked out.  Or it could happen to
6076 		 * be a transient write error and maybe the block will
6077 		 * be remapped.  Nothing we can do but to retry the
6078 		 * write and hope for the best.
6079 		 */
6080 		ext4_msg(sb, KERN_ERR, "previous I/O error to "
6081 		       "superblock detected");
6082 		clear_buffer_write_io_error(sbh);
6083 		set_buffer_uptodate(sbh);
6084 	}
6085 	get_bh(sbh);
6086 	/* Clear potential dirty bit if it was journalled update */
6087 	clear_buffer_dirty(sbh);
6088 	sbh->b_end_io = end_buffer_write_sync;
6089 	submit_bh(REQ_OP_WRITE | REQ_SYNC |
6090 		  (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
6091 	wait_on_buffer(sbh);
6092 	if (buffer_write_io_error(sbh)) {
6093 		ext4_msg(sb, KERN_ERR, "I/O error while writing "
6094 		       "superblock");
6095 		clear_buffer_write_io_error(sbh);
6096 		set_buffer_uptodate(sbh);
6097 		return -EIO;
6098 	}
6099 	return 0;
6100 }
6101 
6102 /*
6103  * Have we just finished recovery?  If so, and if we are mounting (or
6104  * remounting) the filesystem readonly, then we will end up with a
6105  * consistent fs on disk.  Record that fact.
6106  */
ext4_mark_recovery_complete(struct super_block * sb,struct ext4_super_block * es)6107 static int ext4_mark_recovery_complete(struct super_block *sb,
6108 				       struct ext4_super_block *es)
6109 {
6110 	int err;
6111 	journal_t *journal = EXT4_SB(sb)->s_journal;
6112 
6113 	if (!ext4_has_feature_journal(sb)) {
6114 		if (journal != NULL) {
6115 			ext4_error(sb, "Journal got removed while the fs was "
6116 				   "mounted!");
6117 			return -EFSCORRUPTED;
6118 		}
6119 		return 0;
6120 	}
6121 	jbd2_journal_lock_updates(journal);
6122 	err = jbd2_journal_flush(journal, 0);
6123 	if (err < 0)
6124 		goto out;
6125 
6126 	if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
6127 	    ext4_has_feature_orphan_present(sb))) {
6128 		if (!ext4_orphan_file_empty(sb)) {
6129 			ext4_error(sb, "Orphan file not empty on read-only fs.");
6130 			err = -EFSCORRUPTED;
6131 			goto out;
6132 		}
6133 		ext4_clear_feature_journal_needs_recovery(sb);
6134 		ext4_clear_feature_orphan_present(sb);
6135 		ext4_commit_super(sb);
6136 	}
6137 out:
6138 	jbd2_journal_unlock_updates(journal);
6139 	return err;
6140 }
6141 
6142 /*
6143  * If we are mounting (or read-write remounting) a filesystem whose journal
6144  * has recorded an error from a previous lifetime, move that error to the
6145  * main filesystem now.
6146  */
ext4_clear_journal_err(struct super_block * sb,struct ext4_super_block * es)6147 static int ext4_clear_journal_err(struct super_block *sb,
6148 				   struct ext4_super_block *es)
6149 {
6150 	journal_t *journal;
6151 	int j_errno;
6152 	const char *errstr;
6153 
6154 	if (!ext4_has_feature_journal(sb)) {
6155 		ext4_error(sb, "Journal got removed while the fs was mounted!");
6156 		return -EFSCORRUPTED;
6157 	}
6158 
6159 	journal = EXT4_SB(sb)->s_journal;
6160 
6161 	/*
6162 	 * Now check for any error status which may have been recorded in the
6163 	 * journal by a prior ext4_error() or ext4_abort()
6164 	 */
6165 
6166 	j_errno = jbd2_journal_errno(journal);
6167 	if (j_errno) {
6168 		char nbuf[16];
6169 
6170 		errstr = ext4_decode_error(sb, j_errno, nbuf);
6171 		ext4_warning(sb, "Filesystem error recorded "
6172 			     "from previous mount: %s", errstr);
6173 		ext4_warning(sb, "Marking fs in need of filesystem check.");
6174 
6175 		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
6176 		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6177 		ext4_commit_super(sb);
6178 
6179 		jbd2_journal_clear_err(journal);
6180 		jbd2_journal_update_sb_errno(journal);
6181 	}
6182 	return 0;
6183 }
6184 
6185 /*
6186  * Force the running and committing transactions to commit,
6187  * and wait on the commit.
6188  */
ext4_force_commit(struct super_block * sb)6189 int ext4_force_commit(struct super_block *sb)
6190 {
6191 	journal_t *journal;
6192 
6193 	if (sb_rdonly(sb))
6194 		return 0;
6195 
6196 	journal = EXT4_SB(sb)->s_journal;
6197 	return ext4_journal_force_commit(journal);
6198 }
6199 
ext4_sync_fs(struct super_block * sb,int wait)6200 static int ext4_sync_fs(struct super_block *sb, int wait)
6201 {
6202 	int ret = 0;
6203 	tid_t target;
6204 	bool needs_barrier = false;
6205 	struct ext4_sb_info *sbi = EXT4_SB(sb);
6206 
6207 	if (unlikely(ext4_forced_shutdown(sbi)))
6208 		return 0;
6209 
6210 	trace_ext4_sync_fs(sb, wait);
6211 	flush_workqueue(sbi->rsv_conversion_wq);
6212 	/*
6213 	 * Writeback quota in non-journalled quota case - journalled quota has
6214 	 * no dirty dquots
6215 	 */
6216 	dquot_writeback_dquots(sb, -1);
6217 	/*
6218 	 * Data writeback is possible w/o journal transaction, so barrier must
6219 	 * being sent at the end of the function. But we can skip it if
6220 	 * transaction_commit will do it for us.
6221 	 */
6222 	if (sbi->s_journal) {
6223 		target = jbd2_get_latest_transaction(sbi->s_journal);
6224 		if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
6225 		    !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
6226 			needs_barrier = true;
6227 
6228 		if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
6229 			if (wait)
6230 				ret = jbd2_log_wait_commit(sbi->s_journal,
6231 							   target);
6232 		}
6233 	} else if (wait && test_opt(sb, BARRIER))
6234 		needs_barrier = true;
6235 	if (needs_barrier) {
6236 		int err;
6237 		err = blkdev_issue_flush(sb->s_bdev);
6238 		if (!ret)
6239 			ret = err;
6240 	}
6241 
6242 	return ret;
6243 }
6244 
6245 /*
6246  * LVM calls this function before a (read-only) snapshot is created.  This
6247  * gives us a chance to flush the journal completely and mark the fs clean.
6248  *
6249  * Note that only this function cannot bring a filesystem to be in a clean
6250  * state independently. It relies on upper layer to stop all data & metadata
6251  * modifications.
6252  */
ext4_freeze(struct super_block * sb)6253 static int ext4_freeze(struct super_block *sb)
6254 {
6255 	int error = 0;
6256 	journal_t *journal;
6257 
6258 	if (sb_rdonly(sb))
6259 		return 0;
6260 
6261 	journal = EXT4_SB(sb)->s_journal;
6262 
6263 	if (journal) {
6264 		/* Now we set up the journal barrier. */
6265 		jbd2_journal_lock_updates(journal);
6266 
6267 		/*
6268 		 * Don't clear the needs_recovery flag if we failed to
6269 		 * flush the journal.
6270 		 */
6271 		error = jbd2_journal_flush(journal, 0);
6272 		if (error < 0)
6273 			goto out;
6274 
6275 		/* Journal blocked and flushed, clear needs_recovery flag. */
6276 		ext4_clear_feature_journal_needs_recovery(sb);
6277 		if (ext4_orphan_file_empty(sb))
6278 			ext4_clear_feature_orphan_present(sb);
6279 	}
6280 
6281 	error = ext4_commit_super(sb);
6282 out:
6283 	if (journal)
6284 		/* we rely on upper layer to stop further updates */
6285 		jbd2_journal_unlock_updates(journal);
6286 	return error;
6287 }
6288 
6289 /*
6290  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
6291  * flag here, even though the filesystem is not technically dirty yet.
6292  */
ext4_unfreeze(struct super_block * sb)6293 static int ext4_unfreeze(struct super_block *sb)
6294 {
6295 	if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
6296 		return 0;
6297 
6298 	if (EXT4_SB(sb)->s_journal) {
6299 		/* Reset the needs_recovery flag before the fs is unlocked. */
6300 		ext4_set_feature_journal_needs_recovery(sb);
6301 		if (ext4_has_feature_orphan_file(sb))
6302 			ext4_set_feature_orphan_present(sb);
6303 	}
6304 
6305 	ext4_commit_super(sb);
6306 	return 0;
6307 }
6308 
6309 /*
6310  * Structure to save mount options for ext4_remount's benefit
6311  */
6312 struct ext4_mount_options {
6313 	unsigned long s_mount_opt;
6314 	unsigned long s_mount_opt2;
6315 	kuid_t s_resuid;
6316 	kgid_t s_resgid;
6317 	unsigned long s_commit_interval;
6318 	u32 s_min_batch_time, s_max_batch_time;
6319 #ifdef CONFIG_QUOTA
6320 	int s_jquota_fmt;
6321 	char *s_qf_names[EXT4_MAXQUOTAS];
6322 #endif
6323 };
6324 
__ext4_remount(struct fs_context * fc,struct super_block * sb)6325 static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
6326 {
6327 	struct ext4_fs_context *ctx = fc->fs_private;
6328 	struct ext4_super_block *es;
6329 	struct ext4_sb_info *sbi = EXT4_SB(sb);
6330 	unsigned long old_sb_flags;
6331 	struct ext4_mount_options old_opts;
6332 	ext4_group_t g;
6333 	int err = 0;
6334 #ifdef CONFIG_QUOTA
6335 	int enable_quota = 0;
6336 	int i, j;
6337 	char *to_free[EXT4_MAXQUOTAS];
6338 #endif
6339 
6340 
6341 	/* Store the original options */
6342 	old_sb_flags = sb->s_flags;
6343 	old_opts.s_mount_opt = sbi->s_mount_opt;
6344 	old_opts.s_mount_opt2 = sbi->s_mount_opt2;
6345 	old_opts.s_resuid = sbi->s_resuid;
6346 	old_opts.s_resgid = sbi->s_resgid;
6347 	old_opts.s_commit_interval = sbi->s_commit_interval;
6348 	old_opts.s_min_batch_time = sbi->s_min_batch_time;
6349 	old_opts.s_max_batch_time = sbi->s_max_batch_time;
6350 #ifdef CONFIG_QUOTA
6351 	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
6352 	for (i = 0; i < EXT4_MAXQUOTAS; i++)
6353 		if (sbi->s_qf_names[i]) {
6354 			char *qf_name = get_qf_name(sb, sbi, i);
6355 
6356 			old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
6357 			if (!old_opts.s_qf_names[i]) {
6358 				for (j = 0; j < i; j++)
6359 					kfree(old_opts.s_qf_names[j]);
6360 				return -ENOMEM;
6361 			}
6362 		} else
6363 			old_opts.s_qf_names[i] = NULL;
6364 #endif
6365 	if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) {
6366 		if (sbi->s_journal && sbi->s_journal->j_task->io_context)
6367 			ctx->journal_ioprio =
6368 				sbi->s_journal->j_task->io_context->ioprio;
6369 		else
6370 			ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
6371 
6372 	}
6373 
6374 	ext4_apply_options(fc, sb);
6375 
6376 	if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
6377 	    test_opt(sb, JOURNAL_CHECKSUM)) {
6378 		ext4_msg(sb, KERN_ERR, "changing journal_checksum "
6379 			 "during remount not supported; ignoring");
6380 		sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
6381 	}
6382 
6383 	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
6384 		if (test_opt2(sb, EXPLICIT_DELALLOC)) {
6385 			ext4_msg(sb, KERN_ERR, "can't mount with "
6386 				 "both data=journal and delalloc");
6387 			err = -EINVAL;
6388 			goto restore_opts;
6389 		}
6390 		if (test_opt(sb, DIOREAD_NOLOCK)) {
6391 			ext4_msg(sb, KERN_ERR, "can't mount with "
6392 				 "both data=journal and dioread_nolock");
6393 			err = -EINVAL;
6394 			goto restore_opts;
6395 		}
6396 	} else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
6397 		if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
6398 			ext4_msg(sb, KERN_ERR, "can't mount with "
6399 				"journal_async_commit in data=ordered mode");
6400 			err = -EINVAL;
6401 			goto restore_opts;
6402 		}
6403 	}
6404 
6405 	if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
6406 		ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
6407 		err = -EINVAL;
6408 		goto restore_opts;
6409 	}
6410 
6411 	if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
6412 		ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
6413 
6414 	sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
6415 		(test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
6416 
6417 	es = sbi->s_es;
6418 
6419 	if (sbi->s_journal) {
6420 		ext4_init_journal_params(sb, sbi->s_journal);
6421 		set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
6422 	}
6423 
6424 	/* Flush outstanding errors before changing fs state */
6425 	flush_work(&sbi->s_error_work);
6426 
6427 	if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
6428 		if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
6429 			err = -EROFS;
6430 			goto restore_opts;
6431 		}
6432 
6433 		if (fc->sb_flags & SB_RDONLY) {
6434 			err = sync_filesystem(sb);
6435 			if (err < 0)
6436 				goto restore_opts;
6437 			err = dquot_suspend(sb, -1);
6438 			if (err < 0)
6439 				goto restore_opts;
6440 
6441 			/*
6442 			 * First of all, the unconditional stuff we have to do
6443 			 * to disable replay of the journal when we next remount
6444 			 */
6445 			sb->s_flags |= SB_RDONLY;
6446 
6447 			/*
6448 			 * OK, test if we are remounting a valid rw partition
6449 			 * readonly, and if so set the rdonly flag and then
6450 			 * mark the partition as valid again.
6451 			 */
6452 			if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
6453 			    (sbi->s_mount_state & EXT4_VALID_FS))
6454 				es->s_state = cpu_to_le16(sbi->s_mount_state);
6455 
6456 			if (sbi->s_journal) {
6457 				/*
6458 				 * We let remount-ro finish even if marking fs
6459 				 * as clean failed...
6460 				 */
6461 				ext4_mark_recovery_complete(sb, es);
6462 			}
6463 		} else {
6464 			/* Make sure we can mount this feature set readwrite */
6465 			if (ext4_has_feature_readonly(sb) ||
6466 			    !ext4_feature_set_ok(sb, 0)) {
6467 				err = -EROFS;
6468 				goto restore_opts;
6469 			}
6470 			/*
6471 			 * Make sure the group descriptor checksums
6472 			 * are sane.  If they aren't, refuse to remount r/w.
6473 			 */
6474 			for (g = 0; g < sbi->s_groups_count; g++) {
6475 				struct ext4_group_desc *gdp =
6476 					ext4_get_group_desc(sb, g, NULL);
6477 
6478 				if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
6479 					ext4_msg(sb, KERN_ERR,
6480 	       "ext4_remount: Checksum for group %u failed (%u!=%u)",
6481 		g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
6482 					       le16_to_cpu(gdp->bg_checksum));
6483 					err = -EFSBADCRC;
6484 					goto restore_opts;
6485 				}
6486 			}
6487 
6488 			/*
6489 			 * If we have an unprocessed orphan list hanging
6490 			 * around from a previously readonly bdev mount,
6491 			 * require a full umount/remount for now.
6492 			 */
6493 			if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
6494 				ext4_msg(sb, KERN_WARNING, "Couldn't "
6495 				       "remount RDWR because of unprocessed "
6496 				       "orphan inode list.  Please "
6497 				       "umount/remount instead");
6498 				err = -EINVAL;
6499 				goto restore_opts;
6500 			}
6501 
6502 			/*
6503 			 * Mounting a RDONLY partition read-write, so reread
6504 			 * and store the current valid flag.  (It may have
6505 			 * been changed by e2fsck since we originally mounted
6506 			 * the partition.)
6507 			 */
6508 			if (sbi->s_journal) {
6509 				err = ext4_clear_journal_err(sb, es);
6510 				if (err)
6511 					goto restore_opts;
6512 			}
6513 			sbi->s_mount_state = (le16_to_cpu(es->s_state) &
6514 					      ~EXT4_FC_REPLAY);
6515 
6516 			err = ext4_setup_super(sb, es, 0);
6517 			if (err)
6518 				goto restore_opts;
6519 
6520 			sb->s_flags &= ~SB_RDONLY;
6521 			if (ext4_has_feature_mmp(sb)) {
6522 				err = ext4_multi_mount_protect(sb,
6523 						le64_to_cpu(es->s_mmp_block));
6524 				if (err)
6525 					goto restore_opts;
6526 			}
6527 #ifdef CONFIG_QUOTA
6528 			enable_quota = 1;
6529 #endif
6530 		}
6531 	}
6532 
6533 	/*
6534 	 * Handle creation of system zone data early because it can fail.
6535 	 * Releasing of existing data is done when we are sure remount will
6536 	 * succeed.
6537 	 */
6538 	if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
6539 		err = ext4_setup_system_zone(sb);
6540 		if (err)
6541 			goto restore_opts;
6542 	}
6543 
6544 	if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
6545 		err = ext4_commit_super(sb);
6546 		if (err)
6547 			goto restore_opts;
6548 	}
6549 
6550 #ifdef CONFIG_QUOTA
6551 	if (enable_quota) {
6552 		if (sb_any_quota_suspended(sb))
6553 			dquot_resume(sb, -1);
6554 		else if (ext4_has_feature_quota(sb)) {
6555 			err = ext4_enable_quotas(sb);
6556 			if (err)
6557 				goto restore_opts;
6558 		}
6559 	}
6560 	/* Release old quota file names */
6561 	for (i = 0; i < EXT4_MAXQUOTAS; i++)
6562 		kfree(old_opts.s_qf_names[i]);
6563 #endif
6564 	if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6565 		ext4_release_system_zone(sb);
6566 
6567 	/*
6568 	 * Reinitialize lazy itable initialization thread based on
6569 	 * current settings
6570 	 */
6571 	if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
6572 		ext4_unregister_li_request(sb);
6573 	else {
6574 		ext4_group_t first_not_zeroed;
6575 		first_not_zeroed = ext4_has_uninit_itable(sb);
6576 		ext4_register_li_request(sb, first_not_zeroed);
6577 	}
6578 
6579 	if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6580 		ext4_stop_mmpd(sbi);
6581 
6582 	return 0;
6583 
6584 restore_opts:
6585 	/*
6586 	 * If there was a failing r/w to ro transition, we may need to
6587 	 * re-enable quota
6588 	 */
6589 	if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
6590 	    sb_any_quota_suspended(sb))
6591 		dquot_resume(sb, -1);
6592 	sb->s_flags = old_sb_flags;
6593 	sbi->s_mount_opt = old_opts.s_mount_opt;
6594 	sbi->s_mount_opt2 = old_opts.s_mount_opt2;
6595 	sbi->s_resuid = old_opts.s_resuid;
6596 	sbi->s_resgid = old_opts.s_resgid;
6597 	sbi->s_commit_interval = old_opts.s_commit_interval;
6598 	sbi->s_min_batch_time = old_opts.s_min_batch_time;
6599 	sbi->s_max_batch_time = old_opts.s_max_batch_time;
6600 	if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6601 		ext4_release_system_zone(sb);
6602 #ifdef CONFIG_QUOTA
6603 	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
6604 	for (i = 0; i < EXT4_MAXQUOTAS; i++) {
6605 		to_free[i] = get_qf_name(sb, sbi, i);
6606 		rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
6607 	}
6608 	synchronize_rcu();
6609 	for (i = 0; i < EXT4_MAXQUOTAS; i++)
6610 		kfree(to_free[i]);
6611 #endif
6612 	if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6613 		ext4_stop_mmpd(sbi);
6614 	return err;
6615 }
6616 
ext4_reconfigure(struct fs_context * fc)6617 static int ext4_reconfigure(struct fs_context *fc)
6618 {
6619 	struct super_block *sb = fc->root->d_sb;
6620 	int ret;
6621 
6622 	fc->s_fs_info = EXT4_SB(sb);
6623 
6624 	ret = ext4_check_opt_consistency(fc, sb);
6625 	if (ret < 0)
6626 		return ret;
6627 
6628 	ret = __ext4_remount(fc, sb);
6629 	if (ret < 0)
6630 		return ret;
6631 
6632 	ext4_msg(sb, KERN_INFO, "re-mounted. Quota mode: %s.",
6633 		 ext4_quota_mode(sb));
6634 
6635 	return 0;
6636 }
6637 
6638 #ifdef CONFIG_QUOTA
ext4_statfs_project(struct super_block * sb,kprojid_t projid,struct kstatfs * buf)6639 static int ext4_statfs_project(struct super_block *sb,
6640 			       kprojid_t projid, struct kstatfs *buf)
6641 {
6642 	struct kqid qid;
6643 	struct dquot *dquot;
6644 	u64 limit;
6645 	u64 curblock;
6646 
6647 	qid = make_kqid_projid(projid);
6648 	dquot = dqget(sb, qid);
6649 	if (IS_ERR(dquot))
6650 		return PTR_ERR(dquot);
6651 	spin_lock(&dquot->dq_dqb_lock);
6652 
6653 	limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6654 			     dquot->dq_dqb.dqb_bhardlimit);
6655 	limit >>= sb->s_blocksize_bits;
6656 
6657 	if (limit && buf->f_blocks > limit) {
6658 		curblock = (dquot->dq_dqb.dqb_curspace +
6659 			    dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
6660 		buf->f_blocks = limit;
6661 		buf->f_bfree = buf->f_bavail =
6662 			(buf->f_blocks > curblock) ?
6663 			 (buf->f_blocks - curblock) : 0;
6664 	}
6665 
6666 	limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6667 			     dquot->dq_dqb.dqb_ihardlimit);
6668 	if (limit && buf->f_files > limit) {
6669 		buf->f_files = limit;
6670 		buf->f_ffree =
6671 			(buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
6672 			 (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
6673 	}
6674 
6675 	spin_unlock(&dquot->dq_dqb_lock);
6676 	dqput(dquot);
6677 	return 0;
6678 }
6679 #endif
6680 
ext4_statfs(struct dentry * dentry,struct kstatfs * buf)6681 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
6682 {
6683 	struct super_block *sb = dentry->d_sb;
6684 	struct ext4_sb_info *sbi = EXT4_SB(sb);
6685 	struct ext4_super_block *es = sbi->s_es;
6686 	ext4_fsblk_t overhead = 0, resv_blocks;
6687 	s64 bfree;
6688 	resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
6689 
6690 	if (!test_opt(sb, MINIX_DF))
6691 		overhead = sbi->s_overhead;
6692 
6693 	buf->f_type = EXT4_SUPER_MAGIC;
6694 	buf->f_bsize = sb->s_blocksize;
6695 	buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
6696 	bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
6697 		percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
6698 	/* prevent underflow in case that few free space is available */
6699 	buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
6700 	buf->f_bavail = buf->f_bfree -
6701 			(ext4_r_blocks_count(es) + resv_blocks);
6702 	if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
6703 		buf->f_bavail = 0;
6704 	buf->f_files = le32_to_cpu(es->s_inodes_count);
6705 	buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
6706 	buf->f_namelen = EXT4_NAME_LEN;
6707 	buf->f_fsid = uuid_to_fsid(es->s_uuid);
6708 
6709 #ifdef CONFIG_QUOTA
6710 	if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
6711 	    sb_has_quota_limits_enabled(sb, PRJQUOTA))
6712 		ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
6713 #endif
6714 	return 0;
6715 }
6716 
6717 
6718 #ifdef CONFIG_QUOTA
6719 
6720 /*
6721  * Helper functions so that transaction is started before we acquire dqio_sem
6722  * to keep correct lock ordering of transaction > dqio_sem
6723  */
dquot_to_inode(struct dquot * dquot)6724 static inline struct inode *dquot_to_inode(struct dquot *dquot)
6725 {
6726 	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
6727 }
6728 
ext4_write_dquot(struct dquot * dquot)6729 static int ext4_write_dquot(struct dquot *dquot)
6730 {
6731 	int ret, err;
6732 	handle_t *handle;
6733 	struct inode *inode;
6734 
6735 	inode = dquot_to_inode(dquot);
6736 	handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
6737 				    EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
6738 	if (IS_ERR(handle))
6739 		return PTR_ERR(handle);
6740 	ret = dquot_commit(dquot);
6741 	err = ext4_journal_stop(handle);
6742 	if (!ret)
6743 		ret = err;
6744 	return ret;
6745 }
6746 
ext4_acquire_dquot(struct dquot * dquot)6747 static int ext4_acquire_dquot(struct dquot *dquot)
6748 {
6749 	int ret, err;
6750 	handle_t *handle;
6751 
6752 	handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6753 				    EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
6754 	if (IS_ERR(handle))
6755 		return PTR_ERR(handle);
6756 	ret = dquot_acquire(dquot);
6757 	err = ext4_journal_stop(handle);
6758 	if (!ret)
6759 		ret = err;
6760 	return ret;
6761 }
6762 
ext4_release_dquot(struct dquot * dquot)6763 static int ext4_release_dquot(struct dquot *dquot)
6764 {
6765 	int ret, err;
6766 	handle_t *handle;
6767 
6768 	handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6769 				    EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
6770 	if (IS_ERR(handle)) {
6771 		/* Release dquot anyway to avoid endless cycle in dqput() */
6772 		dquot_release(dquot);
6773 		return PTR_ERR(handle);
6774 	}
6775 	ret = dquot_release(dquot);
6776 	err = ext4_journal_stop(handle);
6777 	if (!ret)
6778 		ret = err;
6779 	return ret;
6780 }
6781 
ext4_mark_dquot_dirty(struct dquot * dquot)6782 static int ext4_mark_dquot_dirty(struct dquot *dquot)
6783 {
6784 	struct super_block *sb = dquot->dq_sb;
6785 
6786 	if (ext4_is_quota_journalled(sb)) {
6787 		dquot_mark_dquot_dirty(dquot);
6788 		return ext4_write_dquot(dquot);
6789 	} else {
6790 		return dquot_mark_dquot_dirty(dquot);
6791 	}
6792 }
6793 
ext4_write_info(struct super_block * sb,int type)6794 static int ext4_write_info(struct super_block *sb, int type)
6795 {
6796 	int ret, err;
6797 	handle_t *handle;
6798 
6799 	/* Data block + inode block */
6800 	handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
6801 	if (IS_ERR(handle))
6802 		return PTR_ERR(handle);
6803 	ret = dquot_commit_info(sb, type);
6804 	err = ext4_journal_stop(handle);
6805 	if (!ret)
6806 		ret = err;
6807 	return ret;
6808 }
6809 
lockdep_set_quota_inode(struct inode * inode,int subclass)6810 static void lockdep_set_quota_inode(struct inode *inode, int subclass)
6811 {
6812 	struct ext4_inode_info *ei = EXT4_I(inode);
6813 
6814 	/* The first argument of lockdep_set_subclass has to be
6815 	 * *exactly* the same as the argument to init_rwsem() --- in
6816 	 * this case, in init_once() --- or lockdep gets unhappy
6817 	 * because the name of the lock is set using the
6818 	 * stringification of the argument to init_rwsem().
6819 	 */
6820 	(void) ei;	/* shut up clang warning if !CONFIG_LOCKDEP */
6821 	lockdep_set_subclass(&ei->i_data_sem, subclass);
6822 }
6823 
6824 /*
6825  * Standard function to be called on quota_on
6826  */
ext4_quota_on(struct super_block * sb,int type,int format_id,const struct path * path)6827 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
6828 			 const struct path *path)
6829 {
6830 	int err;
6831 
6832 	if (!test_opt(sb, QUOTA))
6833 		return -EINVAL;
6834 
6835 	/* Quotafile not on the same filesystem? */
6836 	if (path->dentry->d_sb != sb)
6837 		return -EXDEV;
6838 
6839 	/* Quota already enabled for this file? */
6840 	if (IS_NOQUOTA(d_inode(path->dentry)))
6841 		return -EBUSY;
6842 
6843 	/* Journaling quota? */
6844 	if (EXT4_SB(sb)->s_qf_names[type]) {
6845 		/* Quotafile not in fs root? */
6846 		if (path->dentry->d_parent != sb->s_root)
6847 			ext4_msg(sb, KERN_WARNING,
6848 				"Quota file not on filesystem root. "
6849 				"Journaled quota will not work");
6850 		sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
6851 	} else {
6852 		/*
6853 		 * Clear the flag just in case mount options changed since
6854 		 * last time.
6855 		 */
6856 		sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
6857 	}
6858 
6859 	/*
6860 	 * When we journal data on quota file, we have to flush journal to see
6861 	 * all updates to the file when we bypass pagecache...
6862 	 */
6863 	if (EXT4_SB(sb)->s_journal &&
6864 	    ext4_should_journal_data(d_inode(path->dentry))) {
6865 		/*
6866 		 * We don't need to lock updates but journal_flush() could
6867 		 * otherwise be livelocked...
6868 		 */
6869 		jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
6870 		err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
6871 		jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
6872 		if (err)
6873 			return err;
6874 	}
6875 
6876 	lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
6877 	err = dquot_quota_on(sb, type, format_id, path);
6878 	if (!err) {
6879 		struct inode *inode = d_inode(path->dentry);
6880 		handle_t *handle;
6881 
6882 		/*
6883 		 * Set inode flags to prevent userspace from messing with quota
6884 		 * files. If this fails, we return success anyway since quotas
6885 		 * are already enabled and this is not a hard failure.
6886 		 */
6887 		inode_lock(inode);
6888 		handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6889 		if (IS_ERR(handle))
6890 			goto unlock_inode;
6891 		EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
6892 		inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
6893 				S_NOATIME | S_IMMUTABLE);
6894 		err = ext4_mark_inode_dirty(handle, inode);
6895 		ext4_journal_stop(handle);
6896 	unlock_inode:
6897 		inode_unlock(inode);
6898 		if (err)
6899 			dquot_quota_off(sb, type);
6900 	}
6901 	if (err)
6902 		lockdep_set_quota_inode(path->dentry->d_inode,
6903 					     I_DATA_SEM_NORMAL);
6904 	return err;
6905 }
6906 
ext4_check_quota_inum(int type,unsigned long qf_inum)6907 static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
6908 {
6909 	switch (type) {
6910 	case USRQUOTA:
6911 		return qf_inum == EXT4_USR_QUOTA_INO;
6912 	case GRPQUOTA:
6913 		return qf_inum == EXT4_GRP_QUOTA_INO;
6914 	case PRJQUOTA:
6915 		return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
6916 	default:
6917 		BUG();
6918 	}
6919 }
6920 
ext4_quota_enable(struct super_block * sb,int type,int format_id,unsigned int flags)6921 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
6922 			     unsigned int flags)
6923 {
6924 	int err;
6925 	struct inode *qf_inode;
6926 	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
6927 		le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
6928 		le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6929 		le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
6930 	};
6931 
6932 	BUG_ON(!ext4_has_feature_quota(sb));
6933 
6934 	if (!qf_inums[type])
6935 		return -EPERM;
6936 
6937 	if (!ext4_check_quota_inum(type, qf_inums[type])) {
6938 		ext4_error(sb, "Bad quota inum: %lu, type: %d",
6939 				qf_inums[type], type);
6940 		return -EUCLEAN;
6941 	}
6942 
6943 	qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
6944 	if (IS_ERR(qf_inode)) {
6945 		ext4_error(sb, "Bad quota inode: %lu, type: %d",
6946 				qf_inums[type], type);
6947 		return PTR_ERR(qf_inode);
6948 	}
6949 
6950 	/* Don't account quota for quota files to avoid recursion */
6951 	qf_inode->i_flags |= S_NOQUOTA;
6952 	lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
6953 	err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
6954 	if (err)
6955 		lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
6956 	iput(qf_inode);
6957 
6958 	return err;
6959 }
6960 
6961 /* Enable usage tracking for all quota types. */
ext4_enable_quotas(struct super_block * sb)6962 int ext4_enable_quotas(struct super_block *sb)
6963 {
6964 	int type, err = 0;
6965 	unsigned long qf_inums[EXT4_MAXQUOTAS] = {
6966 		le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
6967 		le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6968 		le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
6969 	};
6970 	bool quota_mopt[EXT4_MAXQUOTAS] = {
6971 		test_opt(sb, USRQUOTA),
6972 		test_opt(sb, GRPQUOTA),
6973 		test_opt(sb, PRJQUOTA),
6974 	};
6975 
6976 	sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
6977 	for (type = 0; type < EXT4_MAXQUOTAS; type++) {
6978 		if (qf_inums[type]) {
6979 			err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
6980 				DQUOT_USAGE_ENABLED |
6981 				(quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
6982 			if (err) {
6983 				ext4_warning(sb,
6984 					"Failed to enable quota tracking "
6985 					"(type=%d, err=%d, ino=%lu). "
6986 					"Please run e2fsck to fix.", type,
6987 					err, qf_inums[type]);
6988 				for (type--; type >= 0; type--) {
6989 					struct inode *inode;
6990 
6991 					inode = sb_dqopt(sb)->files[type];
6992 					if (inode)
6993 						inode = igrab(inode);
6994 					dquot_quota_off(sb, type);
6995 					if (inode) {
6996 						lockdep_set_quota_inode(inode,
6997 							I_DATA_SEM_NORMAL);
6998 						iput(inode);
6999 					}
7000 				}
7001 
7002 				return err;
7003 			}
7004 		}
7005 	}
7006 	return 0;
7007 }
7008 
ext4_quota_off(struct super_block * sb,int type)7009 static int ext4_quota_off(struct super_block *sb, int type)
7010 {
7011 	struct inode *inode = sb_dqopt(sb)->files[type];
7012 	handle_t *handle;
7013 	int err;
7014 
7015 	/* Force all delayed allocation blocks to be allocated.
7016 	 * Caller already holds s_umount sem */
7017 	if (test_opt(sb, DELALLOC))
7018 		sync_filesystem(sb);
7019 
7020 	if (!inode || !igrab(inode))
7021 		goto out;
7022 
7023 	err = dquot_quota_off(sb, type);
7024 	if (err || ext4_has_feature_quota(sb))
7025 		goto out_put;
7026 
7027 	inode_lock(inode);
7028 	/*
7029 	 * Update modification times of quota files when userspace can
7030 	 * start looking at them. If we fail, we return success anyway since
7031 	 * this is not a hard failure and quotas are already disabled.
7032 	 */
7033 	handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
7034 	if (IS_ERR(handle)) {
7035 		err = PTR_ERR(handle);
7036 		goto out_unlock;
7037 	}
7038 	EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
7039 	inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
7040 	inode->i_mtime = inode->i_ctime = current_time(inode);
7041 	err = ext4_mark_inode_dirty(handle, inode);
7042 	ext4_journal_stop(handle);
7043 out_unlock:
7044 	inode_unlock(inode);
7045 out_put:
7046 	lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
7047 	iput(inode);
7048 	return err;
7049 out:
7050 	return dquot_quota_off(sb, type);
7051 }
7052 
7053 /* Read data from quotafile - avoid pagecache and such because we cannot afford
7054  * acquiring the locks... As quota files are never truncated and quota code
7055  * itself serializes the operations (and no one else should touch the files)
7056  * we don't have to be afraid of races */
ext4_quota_read(struct super_block * sb,int type,char * data,size_t len,loff_t off)7057 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
7058 			       size_t len, loff_t off)
7059 {
7060 	struct inode *inode = sb_dqopt(sb)->files[type];
7061 	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7062 	int offset = off & (sb->s_blocksize - 1);
7063 	int tocopy;
7064 	size_t toread;
7065 	struct buffer_head *bh;
7066 	loff_t i_size = i_size_read(inode);
7067 
7068 	if (off > i_size)
7069 		return 0;
7070 	if (off+len > i_size)
7071 		len = i_size-off;
7072 	toread = len;
7073 	while (toread > 0) {
7074 		tocopy = sb->s_blocksize - offset < toread ?
7075 				sb->s_blocksize - offset : toread;
7076 		bh = ext4_bread(NULL, inode, blk, 0);
7077 		if (IS_ERR(bh))
7078 			return PTR_ERR(bh);
7079 		if (!bh)	/* A hole? */
7080 			memset(data, 0, tocopy);
7081 		else
7082 			memcpy(data, bh->b_data+offset, tocopy);
7083 		brelse(bh);
7084 		offset = 0;
7085 		toread -= tocopy;
7086 		data += tocopy;
7087 		blk++;
7088 	}
7089 	return len;
7090 }
7091 
7092 /* Write to quotafile (we know the transaction is already started and has
7093  * enough credits) */
ext4_quota_write(struct super_block * sb,int type,const char * data,size_t len,loff_t off)7094 static ssize_t ext4_quota_write(struct super_block *sb, int type,
7095 				const char *data, size_t len, loff_t off)
7096 {
7097 	struct inode *inode = sb_dqopt(sb)->files[type];
7098 	ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7099 	int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
7100 	int retries = 0;
7101 	struct buffer_head *bh;
7102 	handle_t *handle = journal_current_handle();
7103 
7104 	if (!handle) {
7105 		ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7106 			" cancelled because transaction is not started",
7107 			(unsigned long long)off, (unsigned long long)len);
7108 		return -EIO;
7109 	}
7110 	/*
7111 	 * Since we account only one data block in transaction credits,
7112 	 * then it is impossible to cross a block boundary.
7113 	 */
7114 	if (sb->s_blocksize - offset < len) {
7115 		ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7116 			" cancelled because not block aligned",
7117 			(unsigned long long)off, (unsigned long long)len);
7118 		return -EIO;
7119 	}
7120 
7121 	do {
7122 		bh = ext4_bread(handle, inode, blk,
7123 				EXT4_GET_BLOCKS_CREATE |
7124 				EXT4_GET_BLOCKS_METADATA_NOFAIL);
7125 	} while (PTR_ERR(bh) == -ENOSPC &&
7126 		 ext4_should_retry_alloc(inode->i_sb, &retries));
7127 	if (IS_ERR(bh))
7128 		return PTR_ERR(bh);
7129 	if (!bh)
7130 		goto out;
7131 	BUFFER_TRACE(bh, "get write access");
7132 	err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
7133 	if (err) {
7134 		brelse(bh);
7135 		return err;
7136 	}
7137 	lock_buffer(bh);
7138 	memcpy(bh->b_data+offset, data, len);
7139 	flush_dcache_page(bh->b_page);
7140 	unlock_buffer(bh);
7141 	err = ext4_handle_dirty_metadata(handle, NULL, bh);
7142 	brelse(bh);
7143 out:
7144 	if (inode->i_size < off + len) {
7145 		i_size_write(inode, off + len);
7146 		EXT4_I(inode)->i_disksize = inode->i_size;
7147 		err2 = ext4_mark_inode_dirty(handle, inode);
7148 		if (unlikely(err2 && !err))
7149 			err = err2;
7150 	}
7151 	return err ? err : len;
7152 }
7153 #endif
7154 
7155 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
register_as_ext2(void)7156 static inline void register_as_ext2(void)
7157 {
7158 	int err = register_filesystem(&ext2_fs_type);
7159 	if (err)
7160 		printk(KERN_WARNING
7161 		       "EXT4-fs: Unable to register as ext2 (%d)\n", err);
7162 }
7163 
unregister_as_ext2(void)7164 static inline void unregister_as_ext2(void)
7165 {
7166 	unregister_filesystem(&ext2_fs_type);
7167 }
7168 
ext2_feature_set_ok(struct super_block * sb)7169 static inline int ext2_feature_set_ok(struct super_block *sb)
7170 {
7171 	if (ext4_has_unknown_ext2_incompat_features(sb))
7172 		return 0;
7173 	if (sb_rdonly(sb))
7174 		return 1;
7175 	if (ext4_has_unknown_ext2_ro_compat_features(sb))
7176 		return 0;
7177 	return 1;
7178 }
7179 #else
register_as_ext2(void)7180 static inline void register_as_ext2(void) { }
unregister_as_ext2(void)7181 static inline void unregister_as_ext2(void) { }
ext2_feature_set_ok(struct super_block * sb)7182 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
7183 #endif
7184 
register_as_ext3(void)7185 static inline void register_as_ext3(void)
7186 {
7187 	int err = register_filesystem(&ext3_fs_type);
7188 	if (err)
7189 		printk(KERN_WARNING
7190 		       "EXT4-fs: Unable to register as ext3 (%d)\n", err);
7191 }
7192 
unregister_as_ext3(void)7193 static inline void unregister_as_ext3(void)
7194 {
7195 	unregister_filesystem(&ext3_fs_type);
7196 }
7197 
ext3_feature_set_ok(struct super_block * sb)7198 static inline int ext3_feature_set_ok(struct super_block *sb)
7199 {
7200 	if (ext4_has_unknown_ext3_incompat_features(sb))
7201 		return 0;
7202 	if (!ext4_has_feature_journal(sb))
7203 		return 0;
7204 	if (sb_rdonly(sb))
7205 		return 1;
7206 	if (ext4_has_unknown_ext3_ro_compat_features(sb))
7207 		return 0;
7208 	return 1;
7209 }
7210 
7211 static struct file_system_type ext4_fs_type = {
7212 	.owner			= THIS_MODULE,
7213 	.name			= "ext4",
7214 	.init_fs_context	= ext4_init_fs_context,
7215 	.parameters		= ext4_param_specs,
7216 	.kill_sb		= kill_block_super,
7217 	.fs_flags		= FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
7218 };
7219 MODULE_ALIAS_FS("ext4");
7220 
7221 /* Shared across all ext4 file systems */
7222 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
7223 
ext4_init_fs(void)7224 static int __init ext4_init_fs(void)
7225 {
7226 	int i, err;
7227 
7228 	ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
7229 	ext4_li_info = NULL;
7230 
7231 	/* Build-time check for flags consistency */
7232 	ext4_check_flag_values();
7233 
7234 	for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
7235 		init_waitqueue_head(&ext4__ioend_wq[i]);
7236 
7237 	err = ext4_init_es();
7238 	if (err)
7239 		return err;
7240 
7241 	err = ext4_init_pending();
7242 	if (err)
7243 		goto out7;
7244 
7245 	err = ext4_init_post_read_processing();
7246 	if (err)
7247 		goto out6;
7248 
7249 	err = ext4_init_pageio();
7250 	if (err)
7251 		goto out5;
7252 
7253 	err = ext4_init_system_zone();
7254 	if (err)
7255 		goto out4;
7256 
7257 	err = ext4_init_sysfs();
7258 	if (err)
7259 		goto out3;
7260 
7261 	err = ext4_init_mballoc();
7262 	if (err)
7263 		goto out2;
7264 	err = init_inodecache();
7265 	if (err)
7266 		goto out1;
7267 
7268 	err = ext4_fc_init_dentry_cache();
7269 	if (err)
7270 		goto out05;
7271 
7272 	register_as_ext3();
7273 	register_as_ext2();
7274 	err = register_filesystem(&ext4_fs_type);
7275 	if (err)
7276 		goto out;
7277 
7278 	return 0;
7279 out:
7280 	unregister_as_ext2();
7281 	unregister_as_ext3();
7282 	ext4_fc_destroy_dentry_cache();
7283 out05:
7284 	destroy_inodecache();
7285 out1:
7286 	ext4_exit_mballoc();
7287 out2:
7288 	ext4_exit_sysfs();
7289 out3:
7290 	ext4_exit_system_zone();
7291 out4:
7292 	ext4_exit_pageio();
7293 out5:
7294 	ext4_exit_post_read_processing();
7295 out6:
7296 	ext4_exit_pending();
7297 out7:
7298 	ext4_exit_es();
7299 
7300 	return err;
7301 }
7302 
ext4_exit_fs(void)7303 static void __exit ext4_exit_fs(void)
7304 {
7305 	ext4_destroy_lazyinit_thread();
7306 	unregister_as_ext2();
7307 	unregister_as_ext3();
7308 	unregister_filesystem(&ext4_fs_type);
7309 	ext4_fc_destroy_dentry_cache();
7310 	destroy_inodecache();
7311 	ext4_exit_mballoc();
7312 	ext4_exit_sysfs();
7313 	ext4_exit_system_zone();
7314 	ext4_exit_pageio();
7315 	ext4_exit_post_read_processing();
7316 	ext4_exit_es();
7317 	ext4_exit_pending();
7318 }
7319 
7320 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
7321 MODULE_DESCRIPTION("Fourth Extended Filesystem");
7322 MODULE_LICENSE("GPL");
7323 MODULE_SOFTDEP("pre: crc32c");
7324 module_init(ext4_init_fs)
7325 module_exit(ext4_exit_fs)
7326