1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * linux/fs/ext4/super.c
4 *
5 * Copyright (C) 1992, 1993, 1994, 1995
6 * Remy Card (card@masi.ibp.fr)
7 * Laboratoire MASI - Institut Blaise Pascal
8 * Universite Pierre et Marie Curie (Paris VI)
9 *
10 * from
11 *
12 * linux/fs/minix/inode.c
13 *
14 * Copyright (C) 1991, 1992 Linus Torvalds
15 *
16 * Big-endian to little-endian byte-swapping/bitmaps by
17 * David S. Miller (davem@caip.rutgers.edu), 1995
18 */
19
20 #include <linux/module.h>
21 #include <linux/string.h>
22 #include <linux/fs.h>
23 #include <linux/time.h>
24 #include <linux/vmalloc.h>
25 #include <linux/slab.h>
26 #include <linux/init.h>
27 #include <linux/blkdev.h>
28 #include <linux/backing-dev.h>
29 #include <linux/parser.h>
30 #include <linux/buffer_head.h>
31 #include <linux/exportfs.h>
32 #include <linux/vfs.h>
33 #include <linux/random.h>
34 #include <linux/mount.h>
35 #include <linux/namei.h>
36 #include <linux/quotaops.h>
37 #include <linux/seq_file.h>
38 #include <linux/ctype.h>
39 #include <linux/log2.h>
40 #include <linux/crc16.h>
41 #include <linux/dax.h>
42 #include <linux/cleancache.h>
43 #include <linux/uaccess.h>
44 #include <linux/iversion.h>
45 #include <linux/unicode.h>
46 #include <linux/part_stat.h>
47 #include <linux/kthread.h>
48 #include <linux/freezer.h>
49 #include <linux/fsnotify.h>
50 #include <linux/fs_context.h>
51 #include <linux/fs_parser.h>
52
53 #include "ext4.h"
54 #include "ext4_extents.h" /* Needed for trace points definition */
55 #include "ext4_jbd2.h"
56 #include "xattr.h"
57 #include "acl.h"
58 #include "mballoc.h"
59 #include "fsmap.h"
60
61 #define CREATE_TRACE_POINTS
62 #include <trace/events/ext4.h>
63
64 static struct ext4_lazy_init *ext4_li_info;
65 static DEFINE_MUTEX(ext4_li_mtx);
66 static struct ratelimit_state ext4_mount_msg_ratelimit;
67
68 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
69 unsigned long journal_devnum);
70 static int ext4_show_options(struct seq_file *seq, struct dentry *root);
71 static void ext4_update_super(struct super_block *sb);
72 static int ext4_commit_super(struct super_block *sb);
73 static int ext4_mark_recovery_complete(struct super_block *sb,
74 struct ext4_super_block *es);
75 static int ext4_clear_journal_err(struct super_block *sb,
76 struct ext4_super_block *es);
77 static int ext4_sync_fs(struct super_block *sb, int wait);
78 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
79 static int ext4_unfreeze(struct super_block *sb);
80 static int ext4_freeze(struct super_block *sb);
81 static inline int ext2_feature_set_ok(struct super_block *sb);
82 static inline int ext3_feature_set_ok(struct super_block *sb);
83 static void ext4_destroy_lazyinit_thread(void);
84 static void ext4_unregister_li_request(struct super_block *sb);
85 static void ext4_clear_request_list(void);
86 static struct inode *ext4_get_journal_inode(struct super_block *sb,
87 unsigned int journal_inum);
88 static int ext4_validate_options(struct fs_context *fc);
89 static int ext4_check_opt_consistency(struct fs_context *fc,
90 struct super_block *sb);
91 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
92 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
93 static int ext4_get_tree(struct fs_context *fc);
94 static int ext4_reconfigure(struct fs_context *fc);
95 static void ext4_fc_free(struct fs_context *fc);
96 static int ext4_init_fs_context(struct fs_context *fc);
97 static const struct fs_parameter_spec ext4_param_specs[];
98
99 /*
100 * Lock ordering
101 *
102 * page fault path:
103 * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
104 * -> page lock -> i_data_sem (rw)
105 *
106 * buffered write path:
107 * sb_start_write -> i_mutex -> mmap_lock
108 * sb_start_write -> i_mutex -> transaction start -> page lock ->
109 * i_data_sem (rw)
110 *
111 * truncate:
112 * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
113 * page lock
114 * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
115 * i_data_sem (rw)
116 *
117 * direct IO:
118 * sb_start_write -> i_mutex -> mmap_lock
119 * sb_start_write -> i_mutex -> transaction start -> i_data_sem (rw)
120 *
121 * writepages:
122 * transaction start -> page lock(s) -> i_data_sem (rw)
123 */
124
125 static const struct fs_context_operations ext4_context_ops = {
126 .parse_param = ext4_parse_param,
127 .get_tree = ext4_get_tree,
128 .reconfigure = ext4_reconfigure,
129 .free = ext4_fc_free,
130 };
131
132
133 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
134 static struct file_system_type ext2_fs_type = {
135 .owner = THIS_MODULE,
136 .name = "ext2",
137 .init_fs_context = ext4_init_fs_context,
138 .parameters = ext4_param_specs,
139 .kill_sb = kill_block_super,
140 .fs_flags = FS_REQUIRES_DEV,
141 };
142 MODULE_ALIAS_FS("ext2");
143 MODULE_ALIAS("ext2");
144 #define IS_EXT2_SB(sb) ((sb)->s_bdev->bd_holder == &ext2_fs_type)
145 #else
146 #define IS_EXT2_SB(sb) (0)
147 #endif
148
149
150 static struct file_system_type ext3_fs_type = {
151 .owner = THIS_MODULE,
152 .name = "ext3",
153 .init_fs_context = ext4_init_fs_context,
154 .parameters = ext4_param_specs,
155 .kill_sb = kill_block_super,
156 .fs_flags = FS_REQUIRES_DEV,
157 };
158 MODULE_ALIAS_FS("ext3");
159 MODULE_ALIAS("ext3");
160 #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type)
161
162
__ext4_read_bh(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)163 static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags,
164 bh_end_io_t *end_io)
165 {
166 /*
167 * buffer's verified bit is no longer valid after reading from
168 * disk again due to write out error, clear it to make sure we
169 * recheck the buffer contents.
170 */
171 clear_buffer_verified(bh);
172
173 bh->b_end_io = end_io ? end_io : end_buffer_read_sync;
174 get_bh(bh);
175 submit_bh(REQ_OP_READ | op_flags, bh);
176 }
177
ext4_read_bh_nowait(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)178 void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags,
179 bh_end_io_t *end_io)
180 {
181 BUG_ON(!buffer_locked(bh));
182
183 if (ext4_buffer_uptodate(bh)) {
184 unlock_buffer(bh);
185 return;
186 }
187 __ext4_read_bh(bh, op_flags, end_io);
188 }
189
ext4_read_bh(struct buffer_head * bh,blk_opf_t op_flags,bh_end_io_t * end_io)190 int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io)
191 {
192 BUG_ON(!buffer_locked(bh));
193
194 if (ext4_buffer_uptodate(bh)) {
195 unlock_buffer(bh);
196 return 0;
197 }
198
199 __ext4_read_bh(bh, op_flags, end_io);
200
201 wait_on_buffer(bh);
202 if (buffer_uptodate(bh))
203 return 0;
204 return -EIO;
205 }
206
ext4_read_bh_lock(struct buffer_head * bh,blk_opf_t op_flags,bool wait)207 int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait)
208 {
209 lock_buffer(bh);
210 if (!wait) {
211 ext4_read_bh_nowait(bh, op_flags, NULL);
212 return 0;
213 }
214 return ext4_read_bh(bh, op_flags, NULL);
215 }
216
217 /*
218 * This works like __bread_gfp() except it uses ERR_PTR for error
219 * returns. Currently with sb_bread it's impossible to distinguish
220 * between ENOMEM and EIO situations (since both result in a NULL
221 * return.
222 */
__ext4_sb_bread_gfp(struct super_block * sb,sector_t block,blk_opf_t op_flags,gfp_t gfp)223 static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb,
224 sector_t block,
225 blk_opf_t op_flags, gfp_t gfp)
226 {
227 struct buffer_head *bh;
228 int ret;
229
230 bh = sb_getblk_gfp(sb, block, gfp);
231 if (bh == NULL)
232 return ERR_PTR(-ENOMEM);
233 if (ext4_buffer_uptodate(bh))
234 return bh;
235
236 ret = ext4_read_bh_lock(bh, REQ_META | op_flags, true);
237 if (ret) {
238 put_bh(bh);
239 return ERR_PTR(ret);
240 }
241 return bh;
242 }
243
ext4_sb_bread(struct super_block * sb,sector_t block,blk_opf_t op_flags)244 struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block,
245 blk_opf_t op_flags)
246 {
247 return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE);
248 }
249
ext4_sb_bread_unmovable(struct super_block * sb,sector_t block)250 struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb,
251 sector_t block)
252 {
253 return __ext4_sb_bread_gfp(sb, block, 0, 0);
254 }
255
ext4_sb_breadahead_unmovable(struct super_block * sb,sector_t block)256 void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block)
257 {
258 struct buffer_head *bh = sb_getblk_gfp(sb, block, 0);
259
260 if (likely(bh)) {
261 if (trylock_buffer(bh))
262 ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL);
263 brelse(bh);
264 }
265 }
266
ext4_verify_csum_type(struct super_block * sb,struct ext4_super_block * es)267 static int ext4_verify_csum_type(struct super_block *sb,
268 struct ext4_super_block *es)
269 {
270 if (!ext4_has_feature_metadata_csum(sb))
271 return 1;
272
273 return es->s_checksum_type == EXT4_CRC32C_CHKSUM;
274 }
275
ext4_superblock_csum(struct super_block * sb,struct ext4_super_block * es)276 __le32 ext4_superblock_csum(struct super_block *sb,
277 struct ext4_super_block *es)
278 {
279 struct ext4_sb_info *sbi = EXT4_SB(sb);
280 int offset = offsetof(struct ext4_super_block, s_checksum);
281 __u32 csum;
282
283 csum = ext4_chksum(sbi, ~0, (char *)es, offset);
284
285 return cpu_to_le32(csum);
286 }
287
ext4_superblock_csum_verify(struct super_block * sb,struct ext4_super_block * es)288 static int ext4_superblock_csum_verify(struct super_block *sb,
289 struct ext4_super_block *es)
290 {
291 if (!ext4_has_metadata_csum(sb))
292 return 1;
293
294 return es->s_checksum == ext4_superblock_csum(sb, es);
295 }
296
ext4_superblock_csum_set(struct super_block * sb)297 void ext4_superblock_csum_set(struct super_block *sb)
298 {
299 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
300
301 if (!ext4_has_metadata_csum(sb))
302 return;
303
304 es->s_checksum = ext4_superblock_csum(sb, es);
305 }
306
ext4_block_bitmap(struct super_block * sb,struct ext4_group_desc * bg)307 ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
308 struct ext4_group_desc *bg)
309 {
310 return le32_to_cpu(bg->bg_block_bitmap_lo) |
311 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
312 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
313 }
314
ext4_inode_bitmap(struct super_block * sb,struct ext4_group_desc * bg)315 ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
316 struct ext4_group_desc *bg)
317 {
318 return le32_to_cpu(bg->bg_inode_bitmap_lo) |
319 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
320 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
321 }
322
ext4_inode_table(struct super_block * sb,struct ext4_group_desc * bg)323 ext4_fsblk_t ext4_inode_table(struct super_block *sb,
324 struct ext4_group_desc *bg)
325 {
326 return le32_to_cpu(bg->bg_inode_table_lo) |
327 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
328 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
329 }
330
ext4_free_group_clusters(struct super_block * sb,struct ext4_group_desc * bg)331 __u32 ext4_free_group_clusters(struct super_block *sb,
332 struct ext4_group_desc *bg)
333 {
334 return le16_to_cpu(bg->bg_free_blocks_count_lo) |
335 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
336 (__u32)le16_to_cpu(bg->bg_free_blocks_count_hi) << 16 : 0);
337 }
338
ext4_free_inodes_count(struct super_block * sb,struct ext4_group_desc * bg)339 __u32 ext4_free_inodes_count(struct super_block *sb,
340 struct ext4_group_desc *bg)
341 {
342 return le16_to_cpu(bg->bg_free_inodes_count_lo) |
343 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
344 (__u32)le16_to_cpu(bg->bg_free_inodes_count_hi) << 16 : 0);
345 }
346
ext4_used_dirs_count(struct super_block * sb,struct ext4_group_desc * bg)347 __u32 ext4_used_dirs_count(struct super_block *sb,
348 struct ext4_group_desc *bg)
349 {
350 return le16_to_cpu(bg->bg_used_dirs_count_lo) |
351 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
352 (__u32)le16_to_cpu(bg->bg_used_dirs_count_hi) << 16 : 0);
353 }
354
ext4_itable_unused_count(struct super_block * sb,struct ext4_group_desc * bg)355 __u32 ext4_itable_unused_count(struct super_block *sb,
356 struct ext4_group_desc *bg)
357 {
358 return le16_to_cpu(bg->bg_itable_unused_lo) |
359 (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
360 (__u32)le16_to_cpu(bg->bg_itable_unused_hi) << 16 : 0);
361 }
362
ext4_block_bitmap_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)363 void ext4_block_bitmap_set(struct super_block *sb,
364 struct ext4_group_desc *bg, ext4_fsblk_t blk)
365 {
366 bg->bg_block_bitmap_lo = cpu_to_le32((u32)blk);
367 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
368 bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
369 }
370
ext4_inode_bitmap_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)371 void ext4_inode_bitmap_set(struct super_block *sb,
372 struct ext4_group_desc *bg, ext4_fsblk_t blk)
373 {
374 bg->bg_inode_bitmap_lo = cpu_to_le32((u32)blk);
375 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
376 bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
377 }
378
ext4_inode_table_set(struct super_block * sb,struct ext4_group_desc * bg,ext4_fsblk_t blk)379 void ext4_inode_table_set(struct super_block *sb,
380 struct ext4_group_desc *bg, ext4_fsblk_t blk)
381 {
382 bg->bg_inode_table_lo = cpu_to_le32((u32)blk);
383 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
384 bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
385 }
386
ext4_free_group_clusters_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)387 void ext4_free_group_clusters_set(struct super_block *sb,
388 struct ext4_group_desc *bg, __u32 count)
389 {
390 bg->bg_free_blocks_count_lo = cpu_to_le16((__u16)count);
391 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
392 bg->bg_free_blocks_count_hi = cpu_to_le16(count >> 16);
393 }
394
ext4_free_inodes_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)395 void ext4_free_inodes_set(struct super_block *sb,
396 struct ext4_group_desc *bg, __u32 count)
397 {
398 bg->bg_free_inodes_count_lo = cpu_to_le16((__u16)count);
399 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
400 bg->bg_free_inodes_count_hi = cpu_to_le16(count >> 16);
401 }
402
ext4_used_dirs_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)403 void ext4_used_dirs_set(struct super_block *sb,
404 struct ext4_group_desc *bg, __u32 count)
405 {
406 bg->bg_used_dirs_count_lo = cpu_to_le16((__u16)count);
407 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
408 bg->bg_used_dirs_count_hi = cpu_to_le16(count >> 16);
409 }
410
ext4_itable_unused_set(struct super_block * sb,struct ext4_group_desc * bg,__u32 count)411 void ext4_itable_unused_set(struct super_block *sb,
412 struct ext4_group_desc *bg, __u32 count)
413 {
414 bg->bg_itable_unused_lo = cpu_to_le16((__u16)count);
415 if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
416 bg->bg_itable_unused_hi = cpu_to_le16(count >> 16);
417 }
418
__ext4_update_tstamp(__le32 * lo,__u8 * hi,time64_t now)419 static void __ext4_update_tstamp(__le32 *lo, __u8 *hi, time64_t now)
420 {
421 now = clamp_val(now, 0, (1ull << 40) - 1);
422
423 *lo = cpu_to_le32(lower_32_bits(now));
424 *hi = upper_32_bits(now);
425 }
426
__ext4_get_tstamp(__le32 * lo,__u8 * hi)427 static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
428 {
429 return ((time64_t)(*hi) << 32) + le32_to_cpu(*lo);
430 }
431 #define ext4_update_tstamp(es, tstamp) \
432 __ext4_update_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi, \
433 ktime_get_real_seconds())
434 #define ext4_get_tstamp(es, tstamp) \
435 __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
436
437 /*
438 * The del_gendisk() function uninitializes the disk-specific data
439 * structures, including the bdi structure, without telling anyone
440 * else. Once this happens, any attempt to call mark_buffer_dirty()
441 * (for example, by ext4_commit_super), will cause a kernel OOPS.
442 * This is a kludge to prevent these oops until we can put in a proper
443 * hook in del_gendisk() to inform the VFS and file system layers.
444 */
block_device_ejected(struct super_block * sb)445 static int block_device_ejected(struct super_block *sb)
446 {
447 struct inode *bd_inode = sb->s_bdev->bd_inode;
448 struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
449
450 return bdi->dev == NULL;
451 }
452
ext4_journal_commit_callback(journal_t * journal,transaction_t * txn)453 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
454 {
455 struct super_block *sb = journal->j_private;
456 struct ext4_sb_info *sbi = EXT4_SB(sb);
457 int error = is_journal_aborted(journal);
458 struct ext4_journal_cb_entry *jce;
459
460 BUG_ON(txn->t_state == T_FINISHED);
461
462 ext4_process_freed_data(sb, txn->t_tid);
463
464 spin_lock(&sbi->s_md_lock);
465 while (!list_empty(&txn->t_private_list)) {
466 jce = list_entry(txn->t_private_list.next,
467 struct ext4_journal_cb_entry, jce_list);
468 list_del_init(&jce->jce_list);
469 spin_unlock(&sbi->s_md_lock);
470 jce->jce_func(sb, jce, error);
471 spin_lock(&sbi->s_md_lock);
472 }
473 spin_unlock(&sbi->s_md_lock);
474 }
475
476 /*
477 * This writepage callback for write_cache_pages()
478 * takes care of a few cases after page cleaning.
479 *
480 * write_cache_pages() already checks for dirty pages
481 * and calls clear_page_dirty_for_io(), which we want,
482 * to write protect the pages.
483 *
484 * However, we may have to redirty a page (see below.)
485 */
ext4_journalled_writepage_callback(struct page * page,struct writeback_control * wbc,void * data)486 static int ext4_journalled_writepage_callback(struct page *page,
487 struct writeback_control *wbc,
488 void *data)
489 {
490 transaction_t *transaction = (transaction_t *) data;
491 struct buffer_head *bh, *head;
492 struct journal_head *jh;
493
494 bh = head = page_buffers(page);
495 do {
496 /*
497 * We have to redirty a page in these cases:
498 * 1) If buffer is dirty, it means the page was dirty because it
499 * contains a buffer that needs checkpointing. So the dirty bit
500 * needs to be preserved so that checkpointing writes the buffer
501 * properly.
502 * 2) If buffer is not part of the committing transaction
503 * (we may have just accidentally come across this buffer because
504 * inode range tracking is not exact) or if the currently running
505 * transaction already contains this buffer as well, dirty bit
506 * needs to be preserved so that the buffer gets writeprotected
507 * properly on running transaction's commit.
508 */
509 jh = bh2jh(bh);
510 if (buffer_dirty(bh) ||
511 (jh && (jh->b_transaction != transaction ||
512 jh->b_next_transaction))) {
513 redirty_page_for_writepage(wbc, page);
514 goto out;
515 }
516 } while ((bh = bh->b_this_page) != head);
517
518 out:
519 return AOP_WRITEPAGE_ACTIVATE;
520 }
521
ext4_journalled_submit_inode_data_buffers(struct jbd2_inode * jinode)522 static int ext4_journalled_submit_inode_data_buffers(struct jbd2_inode *jinode)
523 {
524 struct address_space *mapping = jinode->i_vfs_inode->i_mapping;
525 struct writeback_control wbc = {
526 .sync_mode = WB_SYNC_ALL,
527 .nr_to_write = LONG_MAX,
528 .range_start = jinode->i_dirty_start,
529 .range_end = jinode->i_dirty_end,
530 };
531
532 return write_cache_pages(mapping, &wbc,
533 ext4_journalled_writepage_callback,
534 jinode->i_transaction);
535 }
536
ext4_journal_submit_inode_data_buffers(struct jbd2_inode * jinode)537 static int ext4_journal_submit_inode_data_buffers(struct jbd2_inode *jinode)
538 {
539 int ret;
540
541 if (ext4_should_journal_data(jinode->i_vfs_inode))
542 ret = ext4_journalled_submit_inode_data_buffers(jinode);
543 else
544 ret = jbd2_journal_submit_inode_data_buffers(jinode);
545
546 return ret;
547 }
548
ext4_journal_finish_inode_data_buffers(struct jbd2_inode * jinode)549 static int ext4_journal_finish_inode_data_buffers(struct jbd2_inode *jinode)
550 {
551 int ret = 0;
552
553 if (!ext4_should_journal_data(jinode->i_vfs_inode))
554 ret = jbd2_journal_finish_inode_data_buffers(jinode);
555
556 return ret;
557 }
558
system_going_down(void)559 static bool system_going_down(void)
560 {
561 return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF
562 || system_state == SYSTEM_RESTART;
563 }
564
565 struct ext4_err_translation {
566 int code;
567 int errno;
568 };
569
570 #define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
571
572 static struct ext4_err_translation err_translation[] = {
573 EXT4_ERR_TRANSLATE(EIO),
574 EXT4_ERR_TRANSLATE(ENOMEM),
575 EXT4_ERR_TRANSLATE(EFSBADCRC),
576 EXT4_ERR_TRANSLATE(EFSCORRUPTED),
577 EXT4_ERR_TRANSLATE(ENOSPC),
578 EXT4_ERR_TRANSLATE(ENOKEY),
579 EXT4_ERR_TRANSLATE(EROFS),
580 EXT4_ERR_TRANSLATE(EFBIG),
581 EXT4_ERR_TRANSLATE(EEXIST),
582 EXT4_ERR_TRANSLATE(ERANGE),
583 EXT4_ERR_TRANSLATE(EOVERFLOW),
584 EXT4_ERR_TRANSLATE(EBUSY),
585 EXT4_ERR_TRANSLATE(ENOTDIR),
586 EXT4_ERR_TRANSLATE(ENOTEMPTY),
587 EXT4_ERR_TRANSLATE(ESHUTDOWN),
588 EXT4_ERR_TRANSLATE(EFAULT),
589 };
590
ext4_errno_to_code(int errno)591 static int ext4_errno_to_code(int errno)
592 {
593 int i;
594
595 for (i = 0; i < ARRAY_SIZE(err_translation); i++)
596 if (err_translation[i].errno == errno)
597 return err_translation[i].code;
598 return EXT4_ERR_UNKNOWN;
599 }
600
save_error_info(struct super_block * sb,int error,__u32 ino,__u64 block,const char * func,unsigned int line)601 static void save_error_info(struct super_block *sb, int error,
602 __u32 ino, __u64 block,
603 const char *func, unsigned int line)
604 {
605 struct ext4_sb_info *sbi = EXT4_SB(sb);
606
607 /* We default to EFSCORRUPTED error... */
608 if (error == 0)
609 error = EFSCORRUPTED;
610
611 spin_lock(&sbi->s_error_lock);
612 sbi->s_add_error_count++;
613 sbi->s_last_error_code = error;
614 sbi->s_last_error_line = line;
615 sbi->s_last_error_ino = ino;
616 sbi->s_last_error_block = block;
617 sbi->s_last_error_func = func;
618 sbi->s_last_error_time = ktime_get_real_seconds();
619 if (!sbi->s_first_error_time) {
620 sbi->s_first_error_code = error;
621 sbi->s_first_error_line = line;
622 sbi->s_first_error_ino = ino;
623 sbi->s_first_error_block = block;
624 sbi->s_first_error_func = func;
625 sbi->s_first_error_time = sbi->s_last_error_time;
626 }
627 spin_unlock(&sbi->s_error_lock);
628 }
629
630 /* Deal with the reporting of failure conditions on a filesystem such as
631 * inconsistencies detected or read IO failures.
632 *
633 * On ext2, we can store the error state of the filesystem in the
634 * superblock. That is not possible on ext4, because we may have other
635 * write ordering constraints on the superblock which prevent us from
636 * writing it out straight away; and given that the journal is about to
637 * be aborted, we can't rely on the current, or future, transactions to
638 * write out the superblock safely.
639 *
640 * We'll just use the jbd2_journal_abort() error code to record an error in
641 * the journal instead. On recovery, the journal will complain about
642 * that error until we've noted it down and cleared it.
643 *
644 * If force_ro is set, we unconditionally force the filesystem into an
645 * ABORT|READONLY state, unless the error response on the fs has been set to
646 * panic in which case we take the easy way out and panic immediately. This is
647 * used to deal with unrecoverable failures such as journal IO errors or ENOMEM
648 * at a critical moment in log management.
649 */
ext4_handle_error(struct super_block * sb,bool force_ro,int error,__u32 ino,__u64 block,const char * func,unsigned int line)650 static void ext4_handle_error(struct super_block *sb, bool force_ro, int error,
651 __u32 ino, __u64 block,
652 const char *func, unsigned int line)
653 {
654 journal_t *journal = EXT4_SB(sb)->s_journal;
655 bool continue_fs = !force_ro && test_opt(sb, ERRORS_CONT);
656
657 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
658 if (test_opt(sb, WARN_ON_ERROR))
659 WARN_ON_ONCE(1);
660
661 if (!continue_fs && !sb_rdonly(sb)) {
662 ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED);
663 if (journal)
664 jbd2_journal_abort(journal, -EIO);
665 }
666
667 if (!bdev_read_only(sb->s_bdev)) {
668 save_error_info(sb, error, ino, block, func, line);
669 /*
670 * In case the fs should keep running, we need to writeout
671 * superblock through the journal. Due to lock ordering
672 * constraints, it may not be safe to do it right here so we
673 * defer superblock flushing to a workqueue.
674 */
675 if (continue_fs && journal)
676 schedule_work(&EXT4_SB(sb)->s_error_work);
677 else
678 ext4_commit_super(sb);
679 }
680
681 /*
682 * We force ERRORS_RO behavior when system is rebooting. Otherwise we
683 * could panic during 'reboot -f' as the underlying device got already
684 * disabled.
685 */
686 if (test_opt(sb, ERRORS_PANIC) && !system_going_down()) {
687 panic("EXT4-fs (device %s): panic forced after error\n",
688 sb->s_id);
689 }
690
691 if (sb_rdonly(sb) || continue_fs)
692 return;
693
694 ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only");
695 /*
696 * Make sure updated value of ->s_mount_flags will be visible before
697 * ->s_flags update
698 */
699 smp_wmb();
700 sb->s_flags |= SB_RDONLY;
701 }
702
flush_stashed_error_work(struct work_struct * work)703 static void flush_stashed_error_work(struct work_struct *work)
704 {
705 struct ext4_sb_info *sbi = container_of(work, struct ext4_sb_info,
706 s_error_work);
707 journal_t *journal = sbi->s_journal;
708 handle_t *handle;
709
710 /*
711 * If the journal is still running, we have to write out superblock
712 * through the journal to avoid collisions of other journalled sb
713 * updates.
714 *
715 * We use directly jbd2 functions here to avoid recursing back into
716 * ext4 error handling code during handling of previous errors.
717 */
718 if (!sb_rdonly(sbi->s_sb) && journal) {
719 struct buffer_head *sbh = sbi->s_sbh;
720 handle = jbd2_journal_start(journal, 1);
721 if (IS_ERR(handle))
722 goto write_directly;
723 if (jbd2_journal_get_write_access(handle, sbh)) {
724 jbd2_journal_stop(handle);
725 goto write_directly;
726 }
727 ext4_update_super(sbi->s_sb);
728 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
729 ext4_msg(sbi->s_sb, KERN_ERR, "previous I/O error to "
730 "superblock detected");
731 clear_buffer_write_io_error(sbh);
732 set_buffer_uptodate(sbh);
733 }
734
735 if (jbd2_journal_dirty_metadata(handle, sbh)) {
736 jbd2_journal_stop(handle);
737 goto write_directly;
738 }
739 jbd2_journal_stop(handle);
740 ext4_notify_error_sysfs(sbi);
741 return;
742 }
743 write_directly:
744 /*
745 * Write through journal failed. Write sb directly to get error info
746 * out and hope for the best.
747 */
748 ext4_commit_super(sbi->s_sb);
749 ext4_notify_error_sysfs(sbi);
750 }
751
752 #define ext4_error_ratelimit(sb) \
753 ___ratelimit(&(EXT4_SB(sb)->s_err_ratelimit_state), \
754 "EXT4-fs error")
755
__ext4_error(struct super_block * sb,const char * function,unsigned int line,bool force_ro,int error,__u64 block,const char * fmt,...)756 void __ext4_error(struct super_block *sb, const char *function,
757 unsigned int line, bool force_ro, int error, __u64 block,
758 const char *fmt, ...)
759 {
760 struct va_format vaf;
761 va_list args;
762
763 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
764 return;
765
766 trace_ext4_error(sb, function, line);
767 if (ext4_error_ratelimit(sb)) {
768 va_start(args, fmt);
769 vaf.fmt = fmt;
770 vaf.va = &args;
771 printk(KERN_CRIT
772 "EXT4-fs error (device %s): %s:%d: comm %s: %pV\n",
773 sb->s_id, function, line, current->comm, &vaf);
774 va_end(args);
775 }
776 fsnotify_sb_error(sb, NULL, error ? error : EFSCORRUPTED);
777
778 ext4_handle_error(sb, force_ro, error, 0, block, function, line);
779 }
780
__ext4_error_inode(struct inode * inode,const char * function,unsigned int line,ext4_fsblk_t block,int error,const char * fmt,...)781 void __ext4_error_inode(struct inode *inode, const char *function,
782 unsigned int line, ext4_fsblk_t block, int error,
783 const char *fmt, ...)
784 {
785 va_list args;
786 struct va_format vaf;
787
788 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
789 return;
790
791 trace_ext4_error(inode->i_sb, function, line);
792 if (ext4_error_ratelimit(inode->i_sb)) {
793 va_start(args, fmt);
794 vaf.fmt = fmt;
795 vaf.va = &args;
796 if (block)
797 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
798 "inode #%lu: block %llu: comm %s: %pV\n",
799 inode->i_sb->s_id, function, line, inode->i_ino,
800 block, current->comm, &vaf);
801 else
802 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: "
803 "inode #%lu: comm %s: %pV\n",
804 inode->i_sb->s_id, function, line, inode->i_ino,
805 current->comm, &vaf);
806 va_end(args);
807 }
808 fsnotify_sb_error(inode->i_sb, inode, error ? error : EFSCORRUPTED);
809
810 ext4_handle_error(inode->i_sb, false, error, inode->i_ino, block,
811 function, line);
812 }
813
__ext4_error_file(struct file * file,const char * function,unsigned int line,ext4_fsblk_t block,const char * fmt,...)814 void __ext4_error_file(struct file *file, const char *function,
815 unsigned int line, ext4_fsblk_t block,
816 const char *fmt, ...)
817 {
818 va_list args;
819 struct va_format vaf;
820 struct inode *inode = file_inode(file);
821 char pathname[80], *path;
822
823 if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
824 return;
825
826 trace_ext4_error(inode->i_sb, function, line);
827 if (ext4_error_ratelimit(inode->i_sb)) {
828 path = file_path(file, pathname, sizeof(pathname));
829 if (IS_ERR(path))
830 path = "(unknown)";
831 va_start(args, fmt);
832 vaf.fmt = fmt;
833 vaf.va = &args;
834 if (block)
835 printk(KERN_CRIT
836 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
837 "block %llu: comm %s: path %s: %pV\n",
838 inode->i_sb->s_id, function, line, inode->i_ino,
839 block, current->comm, path, &vaf);
840 else
841 printk(KERN_CRIT
842 "EXT4-fs error (device %s): %s:%d: inode #%lu: "
843 "comm %s: path %s: %pV\n",
844 inode->i_sb->s_id, function, line, inode->i_ino,
845 current->comm, path, &vaf);
846 va_end(args);
847 }
848 fsnotify_sb_error(inode->i_sb, inode, EFSCORRUPTED);
849
850 ext4_handle_error(inode->i_sb, false, EFSCORRUPTED, inode->i_ino, block,
851 function, line);
852 }
853
ext4_decode_error(struct super_block * sb,int errno,char nbuf[16])854 const char *ext4_decode_error(struct super_block *sb, int errno,
855 char nbuf[16])
856 {
857 char *errstr = NULL;
858
859 switch (errno) {
860 case -EFSCORRUPTED:
861 errstr = "Corrupt filesystem";
862 break;
863 case -EFSBADCRC:
864 errstr = "Filesystem failed CRC";
865 break;
866 case -EIO:
867 errstr = "IO failure";
868 break;
869 case -ENOMEM:
870 errstr = "Out of memory";
871 break;
872 case -EROFS:
873 if (!sb || (EXT4_SB(sb)->s_journal &&
874 EXT4_SB(sb)->s_journal->j_flags & JBD2_ABORT))
875 errstr = "Journal has aborted";
876 else
877 errstr = "Readonly filesystem";
878 break;
879 default:
880 /* If the caller passed in an extra buffer for unknown
881 * errors, textualise them now. Else we just return
882 * NULL. */
883 if (nbuf) {
884 /* Check for truncated error codes... */
885 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
886 errstr = nbuf;
887 }
888 break;
889 }
890
891 return errstr;
892 }
893
894 /* __ext4_std_error decodes expected errors from journaling functions
895 * automatically and invokes the appropriate error response. */
896
__ext4_std_error(struct super_block * sb,const char * function,unsigned int line,int errno)897 void __ext4_std_error(struct super_block *sb, const char *function,
898 unsigned int line, int errno)
899 {
900 char nbuf[16];
901 const char *errstr;
902
903 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
904 return;
905
906 /* Special case: if the error is EROFS, and we're not already
907 * inside a transaction, then there's really no point in logging
908 * an error. */
909 if (errno == -EROFS && journal_current_handle() == NULL && sb_rdonly(sb))
910 return;
911
912 if (ext4_error_ratelimit(sb)) {
913 errstr = ext4_decode_error(sb, errno, nbuf);
914 printk(KERN_CRIT "EXT4-fs error (device %s) in %s:%d: %s\n",
915 sb->s_id, function, line, errstr);
916 }
917 fsnotify_sb_error(sb, NULL, errno ? errno : EFSCORRUPTED);
918
919 ext4_handle_error(sb, false, -errno, 0, 0, function, line);
920 }
921
__ext4_msg(struct super_block * sb,const char * prefix,const char * fmt,...)922 void __ext4_msg(struct super_block *sb,
923 const char *prefix, const char *fmt, ...)
924 {
925 struct va_format vaf;
926 va_list args;
927
928 if (sb) {
929 atomic_inc(&EXT4_SB(sb)->s_msg_count);
930 if (!___ratelimit(&(EXT4_SB(sb)->s_msg_ratelimit_state),
931 "EXT4-fs"))
932 return;
933 }
934
935 va_start(args, fmt);
936 vaf.fmt = fmt;
937 vaf.va = &args;
938 if (sb)
939 printk("%sEXT4-fs (%s): %pV\n", prefix, sb->s_id, &vaf);
940 else
941 printk("%sEXT4-fs: %pV\n", prefix, &vaf);
942 va_end(args);
943 }
944
ext4_warning_ratelimit(struct super_block * sb)945 static int ext4_warning_ratelimit(struct super_block *sb)
946 {
947 atomic_inc(&EXT4_SB(sb)->s_warning_count);
948 return ___ratelimit(&(EXT4_SB(sb)->s_warning_ratelimit_state),
949 "EXT4-fs warning");
950 }
951
__ext4_warning(struct super_block * sb,const char * function,unsigned int line,const char * fmt,...)952 void __ext4_warning(struct super_block *sb, const char *function,
953 unsigned int line, const char *fmt, ...)
954 {
955 struct va_format vaf;
956 va_list args;
957
958 if (!ext4_warning_ratelimit(sb))
959 return;
960
961 va_start(args, fmt);
962 vaf.fmt = fmt;
963 vaf.va = &args;
964 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: %pV\n",
965 sb->s_id, function, line, &vaf);
966 va_end(args);
967 }
968
__ext4_warning_inode(const struct inode * inode,const char * function,unsigned int line,const char * fmt,...)969 void __ext4_warning_inode(const struct inode *inode, const char *function,
970 unsigned int line, const char *fmt, ...)
971 {
972 struct va_format vaf;
973 va_list args;
974
975 if (!ext4_warning_ratelimit(inode->i_sb))
976 return;
977
978 va_start(args, fmt);
979 vaf.fmt = fmt;
980 vaf.va = &args;
981 printk(KERN_WARNING "EXT4-fs warning (device %s): %s:%d: "
982 "inode #%lu: comm %s: %pV\n", inode->i_sb->s_id,
983 function, line, inode->i_ino, current->comm, &vaf);
984 va_end(args);
985 }
986
__ext4_grp_locked_error(const char * function,unsigned int line,struct super_block * sb,ext4_group_t grp,unsigned long ino,ext4_fsblk_t block,const char * fmt,...)987 void __ext4_grp_locked_error(const char *function, unsigned int line,
988 struct super_block *sb, ext4_group_t grp,
989 unsigned long ino, ext4_fsblk_t block,
990 const char *fmt, ...)
991 __releases(bitlock)
992 __acquires(bitlock)
993 {
994 struct va_format vaf;
995 va_list args;
996
997 if (unlikely(ext4_forced_shutdown(EXT4_SB(sb))))
998 return;
999
1000 trace_ext4_error(sb, function, line);
1001 if (ext4_error_ratelimit(sb)) {
1002 va_start(args, fmt);
1003 vaf.fmt = fmt;
1004 vaf.va = &args;
1005 printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ",
1006 sb->s_id, function, line, grp);
1007 if (ino)
1008 printk(KERN_CONT "inode %lu: ", ino);
1009 if (block)
1010 printk(KERN_CONT "block %llu:",
1011 (unsigned long long) block);
1012 printk(KERN_CONT "%pV\n", &vaf);
1013 va_end(args);
1014 }
1015
1016 if (test_opt(sb, ERRORS_CONT)) {
1017 if (test_opt(sb, WARN_ON_ERROR))
1018 WARN_ON_ONCE(1);
1019 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
1020 if (!bdev_read_only(sb->s_bdev)) {
1021 save_error_info(sb, EFSCORRUPTED, ino, block, function,
1022 line);
1023 schedule_work(&EXT4_SB(sb)->s_error_work);
1024 }
1025 return;
1026 }
1027 ext4_unlock_group(sb, grp);
1028 ext4_handle_error(sb, false, EFSCORRUPTED, ino, block, function, line);
1029 /*
1030 * We only get here in the ERRORS_RO case; relocking the group
1031 * may be dangerous, but nothing bad will happen since the
1032 * filesystem will have already been marked read/only and the
1033 * journal has been aborted. We return 1 as a hint to callers
1034 * who might what to use the return value from
1035 * ext4_grp_locked_error() to distinguish between the
1036 * ERRORS_CONT and ERRORS_RO case, and perhaps return more
1037 * aggressively from the ext4 function in question, with a
1038 * more appropriate error code.
1039 */
1040 ext4_lock_group(sb, grp);
1041 return;
1042 }
1043
ext4_mark_group_bitmap_corrupted(struct super_block * sb,ext4_group_t group,unsigned int flags)1044 void ext4_mark_group_bitmap_corrupted(struct super_block *sb,
1045 ext4_group_t group,
1046 unsigned int flags)
1047 {
1048 struct ext4_sb_info *sbi = EXT4_SB(sb);
1049 struct ext4_group_info *grp = ext4_get_group_info(sb, group);
1050 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
1051 int ret;
1052
1053 if (!grp || !gdp)
1054 return;
1055 if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT) {
1056 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
1057 &grp->bb_state);
1058 if (!ret)
1059 percpu_counter_sub(&sbi->s_freeclusters_counter,
1060 grp->bb_free);
1061 }
1062
1063 if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT) {
1064 ret = ext4_test_and_set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
1065 &grp->bb_state);
1066 if (!ret && gdp) {
1067 int count;
1068
1069 count = ext4_free_inodes_count(sb, gdp);
1070 percpu_counter_sub(&sbi->s_freeinodes_counter,
1071 count);
1072 }
1073 }
1074 }
1075
ext4_update_dynamic_rev(struct super_block * sb)1076 void ext4_update_dynamic_rev(struct super_block *sb)
1077 {
1078 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
1079
1080 if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
1081 return;
1082
1083 ext4_warning(sb,
1084 "updating to rev %d because of new feature flag, "
1085 "running e2fsck is recommended",
1086 EXT4_DYNAMIC_REV);
1087
1088 es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
1089 es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
1090 es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
1091 /* leave es->s_feature_*compat flags alone */
1092 /* es->s_uuid will be set by e2fsck if empty */
1093
1094 /*
1095 * The rest of the superblock fields should be zero, and if not it
1096 * means they are likely already in use, so leave them alone. We
1097 * can leave it up to e2fsck to clean up any inconsistencies there.
1098 */
1099 }
1100
1101 /*
1102 * Open the external journal device
1103 */
ext4_blkdev_get(dev_t dev,struct super_block * sb)1104 static struct block_device *ext4_blkdev_get(dev_t dev, struct super_block *sb)
1105 {
1106 struct block_device *bdev;
1107
1108 bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL, sb);
1109 if (IS_ERR(bdev))
1110 goto fail;
1111 return bdev;
1112
1113 fail:
1114 ext4_msg(sb, KERN_ERR,
1115 "failed to open journal device unknown-block(%u,%u) %ld",
1116 MAJOR(dev), MINOR(dev), PTR_ERR(bdev));
1117 return NULL;
1118 }
1119
1120 /*
1121 * Release the journal device
1122 */
ext4_blkdev_put(struct block_device * bdev)1123 static void ext4_blkdev_put(struct block_device *bdev)
1124 {
1125 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
1126 }
1127
ext4_blkdev_remove(struct ext4_sb_info * sbi)1128 static void ext4_blkdev_remove(struct ext4_sb_info *sbi)
1129 {
1130 struct block_device *bdev;
1131 bdev = sbi->s_journal_bdev;
1132 if (bdev) {
1133 /*
1134 * Invalidate the journal device's buffers. We don't want them
1135 * floating about in memory - the physical journal device may
1136 * hotswapped, and it breaks the `ro-after' testing code.
1137 */
1138 invalidate_bdev(bdev);
1139 ext4_blkdev_put(bdev);
1140 sbi->s_journal_bdev = NULL;
1141 }
1142 }
1143
orphan_list_entry(struct list_head * l)1144 static inline struct inode *orphan_list_entry(struct list_head *l)
1145 {
1146 return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
1147 }
1148
dump_orphan_list(struct super_block * sb,struct ext4_sb_info * sbi)1149 static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
1150 {
1151 struct list_head *l;
1152
1153 ext4_msg(sb, KERN_ERR, "sb orphan head is %d",
1154 le32_to_cpu(sbi->s_es->s_last_orphan));
1155
1156 printk(KERN_ERR "sb_info orphan list:\n");
1157 list_for_each(l, &sbi->s_orphan) {
1158 struct inode *inode = orphan_list_entry(l);
1159 printk(KERN_ERR " "
1160 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1161 inode->i_sb->s_id, inode->i_ino, inode,
1162 inode->i_mode, inode->i_nlink,
1163 NEXT_ORPHAN(inode));
1164 }
1165 }
1166
1167 #ifdef CONFIG_QUOTA
1168 static int ext4_quota_off(struct super_block *sb, int type);
1169
ext4_quota_off_umount(struct super_block * sb)1170 static inline void ext4_quota_off_umount(struct super_block *sb)
1171 {
1172 int type;
1173
1174 /* Use our quota_off function to clear inode flags etc. */
1175 for (type = 0; type < EXT4_MAXQUOTAS; type++)
1176 ext4_quota_off(sb, type);
1177 }
1178
1179 /*
1180 * This is a helper function which is used in the mount/remount
1181 * codepaths (which holds s_umount) to fetch the quota file name.
1182 */
get_qf_name(struct super_block * sb,struct ext4_sb_info * sbi,int type)1183 static inline char *get_qf_name(struct super_block *sb,
1184 struct ext4_sb_info *sbi,
1185 int type)
1186 {
1187 return rcu_dereference_protected(sbi->s_qf_names[type],
1188 lockdep_is_held(&sb->s_umount));
1189 }
1190 #else
ext4_quota_off_umount(struct super_block * sb)1191 static inline void ext4_quota_off_umount(struct super_block *sb)
1192 {
1193 }
1194 #endif
1195
ext4_put_super(struct super_block * sb)1196 static void ext4_put_super(struct super_block *sb)
1197 {
1198 struct ext4_sb_info *sbi = EXT4_SB(sb);
1199 struct ext4_super_block *es = sbi->s_es;
1200 struct buffer_head **group_desc;
1201 struct flex_groups **flex_groups;
1202 int aborted = 0;
1203 int i, err;
1204
1205 /*
1206 * Unregister sysfs before destroying jbd2 journal.
1207 * Since we could still access attr_journal_task attribute via sysfs
1208 * path which could have sbi->s_journal->j_task as NULL
1209 * Unregister sysfs before flush sbi->s_error_work.
1210 * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
1211 * read metadata verify failed then will queue error work.
1212 * flush_stashed_error_work will call start_this_handle may trigger
1213 * BUG_ON.
1214 */
1215 ext4_unregister_sysfs(sb);
1216
1217 if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount"))
1218 ext4_msg(sb, KERN_INFO, "unmounting filesystem.");
1219
1220 ext4_unregister_li_request(sb);
1221 ext4_quota_off_umount(sb);
1222
1223 flush_work(&sbi->s_error_work);
1224 destroy_workqueue(sbi->rsv_conversion_wq);
1225 ext4_release_orphan_info(sb);
1226
1227 if (sbi->s_journal) {
1228 aborted = is_journal_aborted(sbi->s_journal);
1229 err = jbd2_journal_destroy(sbi->s_journal);
1230 sbi->s_journal = NULL;
1231 if ((err < 0) && !aborted) {
1232 ext4_abort(sb, -err, "Couldn't clean up the journal");
1233 }
1234 }
1235
1236 ext4_es_unregister_shrinker(sbi);
1237 del_timer_sync(&sbi->s_err_report);
1238 ext4_release_system_zone(sb);
1239 ext4_mb_release(sb);
1240 ext4_ext_release(sb);
1241
1242 if (!sb_rdonly(sb) && !aborted) {
1243 ext4_clear_feature_journal_needs_recovery(sb);
1244 ext4_clear_feature_orphan_present(sb);
1245 es->s_state = cpu_to_le16(sbi->s_mount_state);
1246 }
1247 if (!sb_rdonly(sb))
1248 ext4_commit_super(sb);
1249
1250 rcu_read_lock();
1251 group_desc = rcu_dereference(sbi->s_group_desc);
1252 for (i = 0; i < sbi->s_gdb_count; i++)
1253 brelse(group_desc[i]);
1254 kvfree(group_desc);
1255 flex_groups = rcu_dereference(sbi->s_flex_groups);
1256 if (flex_groups) {
1257 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
1258 kvfree(flex_groups[i]);
1259 kvfree(flex_groups);
1260 }
1261 rcu_read_unlock();
1262 percpu_counter_destroy(&sbi->s_freeclusters_counter);
1263 percpu_counter_destroy(&sbi->s_freeinodes_counter);
1264 percpu_counter_destroy(&sbi->s_dirs_counter);
1265 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
1266 percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
1267 percpu_free_rwsem(&sbi->s_writepages_rwsem);
1268 #ifdef CONFIG_QUOTA
1269 for (i = 0; i < EXT4_MAXQUOTAS; i++)
1270 kfree(get_qf_name(sb, sbi, i));
1271 #endif
1272
1273 /* Debugging code just in case the in-memory inode orphan list
1274 * isn't empty. The on-disk one can be non-empty if we've
1275 * detected an error and taken the fs readonly, but the
1276 * in-memory list had better be clean by this point. */
1277 if (!list_empty(&sbi->s_orphan))
1278 dump_orphan_list(sb, sbi);
1279 ASSERT(list_empty(&sbi->s_orphan));
1280
1281 sync_blockdev(sb->s_bdev);
1282 invalidate_bdev(sb->s_bdev);
1283 if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) {
1284 sync_blockdev(sbi->s_journal_bdev);
1285 ext4_blkdev_remove(sbi);
1286 }
1287
1288 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
1289 sbi->s_ea_inode_cache = NULL;
1290
1291 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
1292 sbi->s_ea_block_cache = NULL;
1293
1294 ext4_stop_mmpd(sbi);
1295
1296 brelse(sbi->s_sbh);
1297 sb->s_fs_info = NULL;
1298 /*
1299 * Now that we are completely done shutting down the
1300 * superblock, we need to actually destroy the kobject.
1301 */
1302 kobject_put(&sbi->s_kobj);
1303 wait_for_completion(&sbi->s_kobj_unregister);
1304 if (sbi->s_chksum_driver)
1305 crypto_free_shash(sbi->s_chksum_driver);
1306 kfree(sbi->s_blockgroup_lock);
1307 fs_put_dax(sbi->s_daxdev, NULL);
1308 fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
1309 #if IS_ENABLED(CONFIG_UNICODE)
1310 utf8_unload(sb->s_encoding);
1311 #endif
1312 kfree(sbi);
1313 }
1314
1315 static struct kmem_cache *ext4_inode_cachep;
1316
1317 /*
1318 * Called inside transaction, so use GFP_NOFS
1319 */
ext4_alloc_inode(struct super_block * sb)1320 static struct inode *ext4_alloc_inode(struct super_block *sb)
1321 {
1322 struct ext4_inode_info *ei;
1323
1324 ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS);
1325 if (!ei)
1326 return NULL;
1327
1328 inode_set_iversion(&ei->vfs_inode, 1);
1329 ei->i_flags = 0;
1330 spin_lock_init(&ei->i_raw_lock);
1331 INIT_LIST_HEAD(&ei->i_prealloc_list);
1332 atomic_set(&ei->i_prealloc_active, 0);
1333 spin_lock_init(&ei->i_prealloc_lock);
1334 ext4_es_init_tree(&ei->i_es_tree);
1335 rwlock_init(&ei->i_es_lock);
1336 INIT_LIST_HEAD(&ei->i_es_list);
1337 ei->i_es_all_nr = 0;
1338 ei->i_es_shk_nr = 0;
1339 ei->i_es_shrink_lblk = 0;
1340 ei->i_reserved_data_blocks = 0;
1341 spin_lock_init(&(ei->i_block_reservation_lock));
1342 ext4_init_pending_tree(&ei->i_pending_tree);
1343 #ifdef CONFIG_QUOTA
1344 ei->i_reserved_quota = 0;
1345 memset(&ei->i_dquot, 0, sizeof(ei->i_dquot));
1346 #endif
1347 ei->jinode = NULL;
1348 INIT_LIST_HEAD(&ei->i_rsv_conversion_list);
1349 spin_lock_init(&ei->i_completed_io_lock);
1350 ei->i_sync_tid = 0;
1351 ei->i_datasync_tid = 0;
1352 atomic_set(&ei->i_unwritten, 0);
1353 INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
1354 ext4_fc_init_inode(&ei->vfs_inode);
1355 mutex_init(&ei->i_fc_lock);
1356 return &ei->vfs_inode;
1357 }
1358
ext4_drop_inode(struct inode * inode)1359 static int ext4_drop_inode(struct inode *inode)
1360 {
1361 int drop = generic_drop_inode(inode);
1362
1363 if (!drop)
1364 drop = fscrypt_drop_inode(inode);
1365
1366 trace_ext4_drop_inode(inode, drop);
1367 return drop;
1368 }
1369
ext4_free_in_core_inode(struct inode * inode)1370 static void ext4_free_in_core_inode(struct inode *inode)
1371 {
1372 fscrypt_free_inode(inode);
1373 if (!list_empty(&(EXT4_I(inode)->i_fc_list))) {
1374 pr_warn("%s: inode %ld still in fc list",
1375 __func__, inode->i_ino);
1376 }
1377 kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
1378 }
1379
ext4_destroy_inode(struct inode * inode)1380 static void ext4_destroy_inode(struct inode *inode)
1381 {
1382 if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
1383 ext4_msg(inode->i_sb, KERN_ERR,
1384 "Inode %lu (%p): orphan list check failed!",
1385 inode->i_ino, EXT4_I(inode));
1386 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
1387 EXT4_I(inode), sizeof(struct ext4_inode_info),
1388 true);
1389 dump_stack();
1390 }
1391
1392 if (EXT4_I(inode)->i_reserved_data_blocks)
1393 ext4_msg(inode->i_sb, KERN_ERR,
1394 "Inode %lu (%p): i_reserved_data_blocks (%u) not cleared!",
1395 inode->i_ino, EXT4_I(inode),
1396 EXT4_I(inode)->i_reserved_data_blocks);
1397 }
1398
init_once(void * foo)1399 static void init_once(void *foo)
1400 {
1401 struct ext4_inode_info *ei = foo;
1402
1403 INIT_LIST_HEAD(&ei->i_orphan);
1404 init_rwsem(&ei->xattr_sem);
1405 init_rwsem(&ei->i_data_sem);
1406 inode_init_once(&ei->vfs_inode);
1407 ext4_fc_init_inode(&ei->vfs_inode);
1408 }
1409
init_inodecache(void)1410 static int __init init_inodecache(void)
1411 {
1412 ext4_inode_cachep = kmem_cache_create_usercopy("ext4_inode_cache",
1413 sizeof(struct ext4_inode_info), 0,
1414 (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD|
1415 SLAB_ACCOUNT),
1416 offsetof(struct ext4_inode_info, i_data),
1417 sizeof_field(struct ext4_inode_info, i_data),
1418 init_once);
1419 if (ext4_inode_cachep == NULL)
1420 return -ENOMEM;
1421 return 0;
1422 }
1423
destroy_inodecache(void)1424 static void destroy_inodecache(void)
1425 {
1426 /*
1427 * Make sure all delayed rcu free inodes are flushed before we
1428 * destroy cache.
1429 */
1430 rcu_barrier();
1431 kmem_cache_destroy(ext4_inode_cachep);
1432 }
1433
ext4_clear_inode(struct inode * inode)1434 void ext4_clear_inode(struct inode *inode)
1435 {
1436 ext4_fc_del(inode);
1437 invalidate_inode_buffers(inode);
1438 clear_inode(inode);
1439 ext4_discard_preallocations(inode, 0);
1440 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
1441 dquot_drop(inode);
1442 if (EXT4_I(inode)->jinode) {
1443 jbd2_journal_release_jbd_inode(EXT4_JOURNAL(inode),
1444 EXT4_I(inode)->jinode);
1445 jbd2_free_inode(EXT4_I(inode)->jinode);
1446 EXT4_I(inode)->jinode = NULL;
1447 }
1448 fscrypt_put_encryption_info(inode);
1449 fsverity_cleanup_inode(inode);
1450 }
1451
ext4_nfs_get_inode(struct super_block * sb,u64 ino,u32 generation)1452 static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1453 u64 ino, u32 generation)
1454 {
1455 struct inode *inode;
1456
1457 /*
1458 * Currently we don't know the generation for parent directory, so
1459 * a generation of 0 means "accept any"
1460 */
1461 inode = ext4_iget(sb, ino, EXT4_IGET_HANDLE);
1462 if (IS_ERR(inode))
1463 return ERR_CAST(inode);
1464 if (generation && inode->i_generation != generation) {
1465 iput(inode);
1466 return ERR_PTR(-ESTALE);
1467 }
1468
1469 return inode;
1470 }
1471
ext4_fh_to_dentry(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)1472 static struct dentry *ext4_fh_to_dentry(struct super_block *sb, struct fid *fid,
1473 int fh_len, int fh_type)
1474 {
1475 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
1476 ext4_nfs_get_inode);
1477 }
1478
ext4_fh_to_parent(struct super_block * sb,struct fid * fid,int fh_len,int fh_type)1479 static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid,
1480 int fh_len, int fh_type)
1481 {
1482 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
1483 ext4_nfs_get_inode);
1484 }
1485
ext4_nfs_commit_metadata(struct inode * inode)1486 static int ext4_nfs_commit_metadata(struct inode *inode)
1487 {
1488 struct writeback_control wbc = {
1489 .sync_mode = WB_SYNC_ALL
1490 };
1491
1492 trace_ext4_nfs_commit_metadata(inode);
1493 return ext4_write_inode(inode, &wbc);
1494 }
1495
1496 #ifdef CONFIG_QUOTA
1497 static const char * const quotatypes[] = INITQFNAMES;
1498 #define QTYPE2NAME(t) (quotatypes[t])
1499
1500 static int ext4_write_dquot(struct dquot *dquot);
1501 static int ext4_acquire_dquot(struct dquot *dquot);
1502 static int ext4_release_dquot(struct dquot *dquot);
1503 static int ext4_mark_dquot_dirty(struct dquot *dquot);
1504 static int ext4_write_info(struct super_block *sb, int type);
1505 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
1506 const struct path *path);
1507 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
1508 size_t len, loff_t off);
1509 static ssize_t ext4_quota_write(struct super_block *sb, int type,
1510 const char *data, size_t len, loff_t off);
1511 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
1512 unsigned int flags);
1513
ext4_get_dquots(struct inode * inode)1514 static struct dquot **ext4_get_dquots(struct inode *inode)
1515 {
1516 return EXT4_I(inode)->i_dquot;
1517 }
1518
1519 static const struct dquot_operations ext4_quota_operations = {
1520 .get_reserved_space = ext4_get_reserved_space,
1521 .write_dquot = ext4_write_dquot,
1522 .acquire_dquot = ext4_acquire_dquot,
1523 .release_dquot = ext4_release_dquot,
1524 .mark_dirty = ext4_mark_dquot_dirty,
1525 .write_info = ext4_write_info,
1526 .alloc_dquot = dquot_alloc,
1527 .destroy_dquot = dquot_destroy,
1528 .get_projid = ext4_get_projid,
1529 .get_inode_usage = ext4_get_inode_usage,
1530 .get_next_id = dquot_get_next_id,
1531 };
1532
1533 static const struct quotactl_ops ext4_qctl_operations = {
1534 .quota_on = ext4_quota_on,
1535 .quota_off = ext4_quota_off,
1536 .quota_sync = dquot_quota_sync,
1537 .get_state = dquot_get_state,
1538 .set_info = dquot_set_dqinfo,
1539 .get_dqblk = dquot_get_dqblk,
1540 .set_dqblk = dquot_set_dqblk,
1541 .get_nextdqblk = dquot_get_next_dqblk,
1542 };
1543 #endif
1544
1545 static const struct super_operations ext4_sops = {
1546 .alloc_inode = ext4_alloc_inode,
1547 .free_inode = ext4_free_in_core_inode,
1548 .destroy_inode = ext4_destroy_inode,
1549 .write_inode = ext4_write_inode,
1550 .dirty_inode = ext4_dirty_inode,
1551 .drop_inode = ext4_drop_inode,
1552 .evict_inode = ext4_evict_inode,
1553 .put_super = ext4_put_super,
1554 .sync_fs = ext4_sync_fs,
1555 .freeze_fs = ext4_freeze,
1556 .unfreeze_fs = ext4_unfreeze,
1557 .statfs = ext4_statfs,
1558 .show_options = ext4_show_options,
1559 #ifdef CONFIG_QUOTA
1560 .quota_read = ext4_quota_read,
1561 .quota_write = ext4_quota_write,
1562 .get_dquots = ext4_get_dquots,
1563 #endif
1564 };
1565
1566 static const struct export_operations ext4_export_ops = {
1567 .fh_to_dentry = ext4_fh_to_dentry,
1568 .fh_to_parent = ext4_fh_to_parent,
1569 .get_parent = ext4_get_parent,
1570 .commit_metadata = ext4_nfs_commit_metadata,
1571 };
1572
1573 enum {
1574 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
1575 Opt_resgid, Opt_resuid, Opt_sb,
1576 Opt_nouid32, Opt_debug, Opt_removed,
1577 Opt_user_xattr, Opt_acl,
1578 Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload,
1579 Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev,
1580 Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit,
1581 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1582 Opt_data_err_abort, Opt_data_err_ignore, Opt_test_dummy_encryption,
1583 Opt_inlinecrypt,
1584 Opt_usrjquota, Opt_grpjquota, Opt_quota,
1585 Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err,
1586 Opt_usrquota, Opt_grpquota, Opt_prjquota,
1587 Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never,
1588 Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error,
1589 Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize,
1590 Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
1591 Opt_inode_readahead_blks, Opt_journal_ioprio,
1592 Opt_dioread_nolock, Opt_dioread_lock,
1593 Opt_discard, Opt_nodiscard, Opt_init_itable, Opt_noinit_itable,
1594 Opt_max_dir_size_kb, Opt_nojournal_checksum, Opt_nombcache,
1595 Opt_no_prefetch_block_bitmaps, Opt_mb_optimize_scan,
1596 Opt_errors, Opt_data, Opt_data_err, Opt_jqfmt, Opt_dax_type,
1597 #ifdef CONFIG_EXT4_DEBUG
1598 Opt_fc_debug_max_replay, Opt_fc_debug_force
1599 #endif
1600 };
1601
1602 static const struct constant_table ext4_param_errors[] = {
1603 {"continue", EXT4_MOUNT_ERRORS_CONT},
1604 {"panic", EXT4_MOUNT_ERRORS_PANIC},
1605 {"remount-ro", EXT4_MOUNT_ERRORS_RO},
1606 {}
1607 };
1608
1609 static const struct constant_table ext4_param_data[] = {
1610 {"journal", EXT4_MOUNT_JOURNAL_DATA},
1611 {"ordered", EXT4_MOUNT_ORDERED_DATA},
1612 {"writeback", EXT4_MOUNT_WRITEBACK_DATA},
1613 {}
1614 };
1615
1616 static const struct constant_table ext4_param_data_err[] = {
1617 {"abort", Opt_data_err_abort},
1618 {"ignore", Opt_data_err_ignore},
1619 {}
1620 };
1621
1622 static const struct constant_table ext4_param_jqfmt[] = {
1623 {"vfsold", QFMT_VFS_OLD},
1624 {"vfsv0", QFMT_VFS_V0},
1625 {"vfsv1", QFMT_VFS_V1},
1626 {}
1627 };
1628
1629 static const struct constant_table ext4_param_dax[] = {
1630 {"always", Opt_dax_always},
1631 {"inode", Opt_dax_inode},
1632 {"never", Opt_dax_never},
1633 {}
1634 };
1635
1636 /* String parameter that allows empty argument */
1637 #define fsparam_string_empty(NAME, OPT) \
1638 __fsparam(fs_param_is_string, NAME, OPT, fs_param_can_be_empty, NULL)
1639
1640 /*
1641 * Mount option specification
1642 * We don't use fsparam_flag_no because of the way we set the
1643 * options and the way we show them in _ext4_show_options(). To
1644 * keep the changes to a minimum, let's keep the negative options
1645 * separate for now.
1646 */
1647 static const struct fs_parameter_spec ext4_param_specs[] = {
1648 fsparam_flag ("bsddf", Opt_bsd_df),
1649 fsparam_flag ("minixdf", Opt_minix_df),
1650 fsparam_flag ("grpid", Opt_grpid),
1651 fsparam_flag ("bsdgroups", Opt_grpid),
1652 fsparam_flag ("nogrpid", Opt_nogrpid),
1653 fsparam_flag ("sysvgroups", Opt_nogrpid),
1654 fsparam_u32 ("resgid", Opt_resgid),
1655 fsparam_u32 ("resuid", Opt_resuid),
1656 fsparam_u32 ("sb", Opt_sb),
1657 fsparam_enum ("errors", Opt_errors, ext4_param_errors),
1658 fsparam_flag ("nouid32", Opt_nouid32),
1659 fsparam_flag ("debug", Opt_debug),
1660 fsparam_flag ("oldalloc", Opt_removed),
1661 fsparam_flag ("orlov", Opt_removed),
1662 fsparam_flag ("user_xattr", Opt_user_xattr),
1663 fsparam_flag ("acl", Opt_acl),
1664 fsparam_flag ("norecovery", Opt_noload),
1665 fsparam_flag ("noload", Opt_noload),
1666 fsparam_flag ("bh", Opt_removed),
1667 fsparam_flag ("nobh", Opt_removed),
1668 fsparam_u32 ("commit", Opt_commit),
1669 fsparam_u32 ("min_batch_time", Opt_min_batch_time),
1670 fsparam_u32 ("max_batch_time", Opt_max_batch_time),
1671 fsparam_u32 ("journal_dev", Opt_journal_dev),
1672 fsparam_bdev ("journal_path", Opt_journal_path),
1673 fsparam_flag ("journal_checksum", Opt_journal_checksum),
1674 fsparam_flag ("nojournal_checksum", Opt_nojournal_checksum),
1675 fsparam_flag ("journal_async_commit",Opt_journal_async_commit),
1676 fsparam_flag ("abort", Opt_abort),
1677 fsparam_enum ("data", Opt_data, ext4_param_data),
1678 fsparam_enum ("data_err", Opt_data_err,
1679 ext4_param_data_err),
1680 fsparam_string_empty
1681 ("usrjquota", Opt_usrjquota),
1682 fsparam_string_empty
1683 ("grpjquota", Opt_grpjquota),
1684 fsparam_enum ("jqfmt", Opt_jqfmt, ext4_param_jqfmt),
1685 fsparam_flag ("grpquota", Opt_grpquota),
1686 fsparam_flag ("quota", Opt_quota),
1687 fsparam_flag ("noquota", Opt_noquota),
1688 fsparam_flag ("usrquota", Opt_usrquota),
1689 fsparam_flag ("prjquota", Opt_prjquota),
1690 fsparam_flag ("barrier", Opt_barrier),
1691 fsparam_u32 ("barrier", Opt_barrier),
1692 fsparam_flag ("nobarrier", Opt_nobarrier),
1693 fsparam_flag ("i_version", Opt_removed),
1694 fsparam_flag ("dax", Opt_dax),
1695 fsparam_enum ("dax", Opt_dax_type, ext4_param_dax),
1696 fsparam_u32 ("stripe", Opt_stripe),
1697 fsparam_flag ("delalloc", Opt_delalloc),
1698 fsparam_flag ("nodelalloc", Opt_nodelalloc),
1699 fsparam_flag ("warn_on_error", Opt_warn_on_error),
1700 fsparam_flag ("nowarn_on_error", Opt_nowarn_on_error),
1701 fsparam_u32 ("debug_want_extra_isize",
1702 Opt_debug_want_extra_isize),
1703 fsparam_flag ("mblk_io_submit", Opt_removed),
1704 fsparam_flag ("nomblk_io_submit", Opt_removed),
1705 fsparam_flag ("block_validity", Opt_block_validity),
1706 fsparam_flag ("noblock_validity", Opt_noblock_validity),
1707 fsparam_u32 ("inode_readahead_blks",
1708 Opt_inode_readahead_blks),
1709 fsparam_u32 ("journal_ioprio", Opt_journal_ioprio),
1710 fsparam_u32 ("auto_da_alloc", Opt_auto_da_alloc),
1711 fsparam_flag ("auto_da_alloc", Opt_auto_da_alloc),
1712 fsparam_flag ("noauto_da_alloc", Opt_noauto_da_alloc),
1713 fsparam_flag ("dioread_nolock", Opt_dioread_nolock),
1714 fsparam_flag ("nodioread_nolock", Opt_dioread_lock),
1715 fsparam_flag ("dioread_lock", Opt_dioread_lock),
1716 fsparam_flag ("discard", Opt_discard),
1717 fsparam_flag ("nodiscard", Opt_nodiscard),
1718 fsparam_u32 ("init_itable", Opt_init_itable),
1719 fsparam_flag ("init_itable", Opt_init_itable),
1720 fsparam_flag ("noinit_itable", Opt_noinit_itable),
1721 #ifdef CONFIG_EXT4_DEBUG
1722 fsparam_flag ("fc_debug_force", Opt_fc_debug_force),
1723 fsparam_u32 ("fc_debug_max_replay", Opt_fc_debug_max_replay),
1724 #endif
1725 fsparam_u32 ("max_dir_size_kb", Opt_max_dir_size_kb),
1726 fsparam_flag ("test_dummy_encryption",
1727 Opt_test_dummy_encryption),
1728 fsparam_string ("test_dummy_encryption",
1729 Opt_test_dummy_encryption),
1730 fsparam_flag ("inlinecrypt", Opt_inlinecrypt),
1731 fsparam_flag ("nombcache", Opt_nombcache),
1732 fsparam_flag ("no_mbcache", Opt_nombcache), /* for backward compatibility */
1733 fsparam_flag ("prefetch_block_bitmaps",
1734 Opt_removed),
1735 fsparam_flag ("no_prefetch_block_bitmaps",
1736 Opt_no_prefetch_block_bitmaps),
1737 fsparam_s32 ("mb_optimize_scan", Opt_mb_optimize_scan),
1738 fsparam_string ("check", Opt_removed), /* mount option from ext2/3 */
1739 fsparam_flag ("nocheck", Opt_removed), /* mount option from ext2/3 */
1740 fsparam_flag ("reservation", Opt_removed), /* mount option from ext2/3 */
1741 fsparam_flag ("noreservation", Opt_removed), /* mount option from ext2/3 */
1742 fsparam_u32 ("journal", Opt_removed), /* mount option from ext2/3 */
1743 {}
1744 };
1745
1746 #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
1747
1748 #define MOPT_SET 0x0001
1749 #define MOPT_CLEAR 0x0002
1750 #define MOPT_NOSUPPORT 0x0004
1751 #define MOPT_EXPLICIT 0x0008
1752 #ifdef CONFIG_QUOTA
1753 #define MOPT_Q 0
1754 #define MOPT_QFMT 0x0010
1755 #else
1756 #define MOPT_Q MOPT_NOSUPPORT
1757 #define MOPT_QFMT MOPT_NOSUPPORT
1758 #endif
1759 #define MOPT_NO_EXT2 0x0020
1760 #define MOPT_NO_EXT3 0x0040
1761 #define MOPT_EXT4_ONLY (MOPT_NO_EXT2 | MOPT_NO_EXT3)
1762 #define MOPT_SKIP 0x0080
1763 #define MOPT_2 0x0100
1764
1765 static const struct mount_opts {
1766 int token;
1767 int mount_opt;
1768 int flags;
1769 } ext4_mount_opts[] = {
1770 {Opt_minix_df, EXT4_MOUNT_MINIX_DF, MOPT_SET},
1771 {Opt_bsd_df, EXT4_MOUNT_MINIX_DF, MOPT_CLEAR},
1772 {Opt_grpid, EXT4_MOUNT_GRPID, MOPT_SET},
1773 {Opt_nogrpid, EXT4_MOUNT_GRPID, MOPT_CLEAR},
1774 {Opt_block_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_SET},
1775 {Opt_noblock_validity, EXT4_MOUNT_BLOCK_VALIDITY, MOPT_CLEAR},
1776 {Opt_dioread_nolock, EXT4_MOUNT_DIOREAD_NOLOCK,
1777 MOPT_EXT4_ONLY | MOPT_SET},
1778 {Opt_dioread_lock, EXT4_MOUNT_DIOREAD_NOLOCK,
1779 MOPT_EXT4_ONLY | MOPT_CLEAR},
1780 {Opt_discard, EXT4_MOUNT_DISCARD, MOPT_SET},
1781 {Opt_nodiscard, EXT4_MOUNT_DISCARD, MOPT_CLEAR},
1782 {Opt_delalloc, EXT4_MOUNT_DELALLOC,
1783 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1784 {Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
1785 MOPT_EXT4_ONLY | MOPT_CLEAR},
1786 {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET},
1787 {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR},
1788 {Opt_commit, 0, MOPT_NO_EXT2},
1789 {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1790 MOPT_EXT4_ONLY | MOPT_CLEAR},
1791 {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
1792 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1793 {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
1794 EXT4_MOUNT_JOURNAL_CHECKSUM),
1795 MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
1796 {Opt_noload, EXT4_MOUNT_NOLOAD, MOPT_NO_EXT2 | MOPT_SET},
1797 {Opt_data_err, EXT4_MOUNT_DATA_ERR_ABORT, MOPT_NO_EXT2},
1798 {Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
1799 {Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
1800 {Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
1801 {Opt_auto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_CLEAR},
1802 {Opt_noinit_itable, EXT4_MOUNT_INIT_INODE_TABLE, MOPT_CLEAR},
1803 {Opt_dax_type, 0, MOPT_EXT4_ONLY},
1804 {Opt_journal_dev, 0, MOPT_NO_EXT2},
1805 {Opt_journal_path, 0, MOPT_NO_EXT2},
1806 {Opt_journal_ioprio, 0, MOPT_NO_EXT2},
1807 {Opt_data, 0, MOPT_NO_EXT2},
1808 {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
1809 #ifdef CONFIG_EXT4_FS_POSIX_ACL
1810 {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
1811 #else
1812 {Opt_acl, 0, MOPT_NOSUPPORT},
1813 #endif
1814 {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET},
1815 {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET},
1816 {Opt_quota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA, MOPT_SET | MOPT_Q},
1817 {Opt_usrquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA,
1818 MOPT_SET | MOPT_Q},
1819 {Opt_grpquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_GRPQUOTA,
1820 MOPT_SET | MOPT_Q},
1821 {Opt_prjquota, EXT4_MOUNT_QUOTA | EXT4_MOUNT_PRJQUOTA,
1822 MOPT_SET | MOPT_Q},
1823 {Opt_noquota, (EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
1824 EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA),
1825 MOPT_CLEAR | MOPT_Q},
1826 {Opt_usrjquota, 0, MOPT_Q},
1827 {Opt_grpjquota, 0, MOPT_Q},
1828 {Opt_jqfmt, 0, MOPT_QFMT},
1829 {Opt_nombcache, EXT4_MOUNT_NO_MBCACHE, MOPT_SET},
1830 {Opt_no_prefetch_block_bitmaps, EXT4_MOUNT_NO_PREFETCH_BLOCK_BITMAPS,
1831 MOPT_SET},
1832 #ifdef CONFIG_EXT4_DEBUG
1833 {Opt_fc_debug_force, EXT4_MOUNT2_JOURNAL_FAST_COMMIT,
1834 MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY},
1835 #endif
1836 {Opt_err, 0, 0}
1837 };
1838
1839 #if IS_ENABLED(CONFIG_UNICODE)
1840 static const struct ext4_sb_encodings {
1841 __u16 magic;
1842 char *name;
1843 unsigned int version;
1844 } ext4_sb_encoding_map[] = {
1845 {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)},
1846 };
1847
1848 static const struct ext4_sb_encodings *
ext4_sb_read_encoding(const struct ext4_super_block * es)1849 ext4_sb_read_encoding(const struct ext4_super_block *es)
1850 {
1851 __u16 magic = le16_to_cpu(es->s_encoding);
1852 int i;
1853
1854 for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++)
1855 if (magic == ext4_sb_encoding_map[i].magic)
1856 return &ext4_sb_encoding_map[i];
1857
1858 return NULL;
1859 }
1860 #endif
1861
1862 #define EXT4_SPEC_JQUOTA (1 << 0)
1863 #define EXT4_SPEC_JQFMT (1 << 1)
1864 #define EXT4_SPEC_DATAJ (1 << 2)
1865 #define EXT4_SPEC_SB_BLOCK (1 << 3)
1866 #define EXT4_SPEC_JOURNAL_DEV (1 << 4)
1867 #define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5)
1868 #define EXT4_SPEC_s_want_extra_isize (1 << 7)
1869 #define EXT4_SPEC_s_max_batch_time (1 << 8)
1870 #define EXT4_SPEC_s_min_batch_time (1 << 9)
1871 #define EXT4_SPEC_s_inode_readahead_blks (1 << 10)
1872 #define EXT4_SPEC_s_li_wait_mult (1 << 11)
1873 #define EXT4_SPEC_s_max_dir_size_kb (1 << 12)
1874 #define EXT4_SPEC_s_stripe (1 << 13)
1875 #define EXT4_SPEC_s_resuid (1 << 14)
1876 #define EXT4_SPEC_s_resgid (1 << 15)
1877 #define EXT4_SPEC_s_commit_interval (1 << 16)
1878 #define EXT4_SPEC_s_fc_debug_max_replay (1 << 17)
1879 #define EXT4_SPEC_s_sb_block (1 << 18)
1880 #define EXT4_SPEC_mb_optimize_scan (1 << 19)
1881
1882 struct ext4_fs_context {
1883 char *s_qf_names[EXT4_MAXQUOTAS];
1884 struct fscrypt_dummy_policy dummy_enc_policy;
1885 int s_jquota_fmt; /* Format of quota to use */
1886 #ifdef CONFIG_EXT4_DEBUG
1887 int s_fc_debug_max_replay;
1888 #endif
1889 unsigned short qname_spec;
1890 unsigned long vals_s_flags; /* Bits to set in s_flags */
1891 unsigned long mask_s_flags; /* Bits changed in s_flags */
1892 unsigned long journal_devnum;
1893 unsigned long s_commit_interval;
1894 unsigned long s_stripe;
1895 unsigned int s_inode_readahead_blks;
1896 unsigned int s_want_extra_isize;
1897 unsigned int s_li_wait_mult;
1898 unsigned int s_max_dir_size_kb;
1899 unsigned int journal_ioprio;
1900 unsigned int vals_s_mount_opt;
1901 unsigned int mask_s_mount_opt;
1902 unsigned int vals_s_mount_opt2;
1903 unsigned int mask_s_mount_opt2;
1904 unsigned long vals_s_mount_flags;
1905 unsigned long mask_s_mount_flags;
1906 unsigned int opt_flags; /* MOPT flags */
1907 unsigned int spec;
1908 u32 s_max_batch_time;
1909 u32 s_min_batch_time;
1910 kuid_t s_resuid;
1911 kgid_t s_resgid;
1912 ext4_fsblk_t s_sb_block;
1913 };
1914
ext4_fc_free(struct fs_context * fc)1915 static void ext4_fc_free(struct fs_context *fc)
1916 {
1917 struct ext4_fs_context *ctx = fc->fs_private;
1918 int i;
1919
1920 if (!ctx)
1921 return;
1922
1923 for (i = 0; i < EXT4_MAXQUOTAS; i++)
1924 kfree(ctx->s_qf_names[i]);
1925
1926 fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
1927 kfree(ctx);
1928 }
1929
ext4_init_fs_context(struct fs_context * fc)1930 int ext4_init_fs_context(struct fs_context *fc)
1931 {
1932 struct ext4_fs_context *ctx;
1933
1934 ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
1935 if (!ctx)
1936 return -ENOMEM;
1937
1938 fc->fs_private = ctx;
1939 fc->ops = &ext4_context_ops;
1940
1941 return 0;
1942 }
1943
1944 #ifdef CONFIG_QUOTA
1945 /*
1946 * Note the name of the specified quota file.
1947 */
note_qf_name(struct fs_context * fc,int qtype,struct fs_parameter * param)1948 static int note_qf_name(struct fs_context *fc, int qtype,
1949 struct fs_parameter *param)
1950 {
1951 struct ext4_fs_context *ctx = fc->fs_private;
1952 char *qname;
1953
1954 if (param->size < 1) {
1955 ext4_msg(NULL, KERN_ERR, "Missing quota name");
1956 return -EINVAL;
1957 }
1958 if (strchr(param->string, '/')) {
1959 ext4_msg(NULL, KERN_ERR,
1960 "quotafile must be on filesystem root");
1961 return -EINVAL;
1962 }
1963 if (ctx->s_qf_names[qtype]) {
1964 if (strcmp(ctx->s_qf_names[qtype], param->string) != 0) {
1965 ext4_msg(NULL, KERN_ERR,
1966 "%s quota file already specified",
1967 QTYPE2NAME(qtype));
1968 return -EINVAL;
1969 }
1970 return 0;
1971 }
1972
1973 qname = kmemdup_nul(param->string, param->size, GFP_KERNEL);
1974 if (!qname) {
1975 ext4_msg(NULL, KERN_ERR,
1976 "Not enough memory for storing quotafile name");
1977 return -ENOMEM;
1978 }
1979 ctx->s_qf_names[qtype] = qname;
1980 ctx->qname_spec |= 1 << qtype;
1981 ctx->spec |= EXT4_SPEC_JQUOTA;
1982 return 0;
1983 }
1984
1985 /*
1986 * Clear the name of the specified quota file.
1987 */
unnote_qf_name(struct fs_context * fc,int qtype)1988 static int unnote_qf_name(struct fs_context *fc, int qtype)
1989 {
1990 struct ext4_fs_context *ctx = fc->fs_private;
1991
1992 if (ctx->s_qf_names[qtype])
1993 kfree(ctx->s_qf_names[qtype]);
1994
1995 ctx->s_qf_names[qtype] = NULL;
1996 ctx->qname_spec |= 1 << qtype;
1997 ctx->spec |= EXT4_SPEC_JQUOTA;
1998 return 0;
1999 }
2000 #endif
2001
ext4_parse_test_dummy_encryption(const struct fs_parameter * param,struct ext4_fs_context * ctx)2002 static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
2003 struct ext4_fs_context *ctx)
2004 {
2005 int err;
2006
2007 if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
2008 ext4_msg(NULL, KERN_WARNING,
2009 "test_dummy_encryption option not supported");
2010 return -EINVAL;
2011 }
2012 err = fscrypt_parse_test_dummy_encryption(param,
2013 &ctx->dummy_enc_policy);
2014 if (err == -EINVAL) {
2015 ext4_msg(NULL, KERN_WARNING,
2016 "Value of option \"%s\" is unrecognized", param->key);
2017 } else if (err == -EEXIST) {
2018 ext4_msg(NULL, KERN_WARNING,
2019 "Conflicting test_dummy_encryption options");
2020 return -EINVAL;
2021 }
2022 return err;
2023 }
2024
2025 #define EXT4_SET_CTX(name) \
2026 static inline void ctx_set_##name(struct ext4_fs_context *ctx, \
2027 unsigned long flag) \
2028 { \
2029 ctx->mask_s_##name |= flag; \
2030 ctx->vals_s_##name |= flag; \
2031 }
2032
2033 #define EXT4_CLEAR_CTX(name) \
2034 static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \
2035 unsigned long flag) \
2036 { \
2037 ctx->mask_s_##name |= flag; \
2038 ctx->vals_s_##name &= ~flag; \
2039 }
2040
2041 #define EXT4_TEST_CTX(name) \
2042 static inline unsigned long \
2043 ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \
2044 { \
2045 return (ctx->vals_s_##name & flag); \
2046 }
2047
2048 EXT4_SET_CTX(flags); /* set only */
2049 EXT4_SET_CTX(mount_opt);
2050 EXT4_CLEAR_CTX(mount_opt);
2051 EXT4_TEST_CTX(mount_opt);
2052 EXT4_SET_CTX(mount_opt2);
2053 EXT4_CLEAR_CTX(mount_opt2);
2054 EXT4_TEST_CTX(mount_opt2);
2055
ctx_set_mount_flag(struct ext4_fs_context * ctx,int bit)2056 static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit)
2057 {
2058 set_bit(bit, &ctx->mask_s_mount_flags);
2059 set_bit(bit, &ctx->vals_s_mount_flags);
2060 }
2061
ext4_parse_param(struct fs_context * fc,struct fs_parameter * param)2062 static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param)
2063 {
2064 struct ext4_fs_context *ctx = fc->fs_private;
2065 struct fs_parse_result result;
2066 const struct mount_opts *m;
2067 int is_remount;
2068 kuid_t uid;
2069 kgid_t gid;
2070 int token;
2071
2072 token = fs_parse(fc, ext4_param_specs, param, &result);
2073 if (token < 0)
2074 return token;
2075 is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2076
2077 for (m = ext4_mount_opts; m->token != Opt_err; m++)
2078 if (token == m->token)
2079 break;
2080
2081 ctx->opt_flags |= m->flags;
2082
2083 if (m->flags & MOPT_EXPLICIT) {
2084 if (m->mount_opt & EXT4_MOUNT_DELALLOC) {
2085 ctx_set_mount_opt2(ctx, EXT4_MOUNT2_EXPLICIT_DELALLOC);
2086 } else if (m->mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) {
2087 ctx_set_mount_opt2(ctx,
2088 EXT4_MOUNT2_EXPLICIT_JOURNAL_CHECKSUM);
2089 } else
2090 return -EINVAL;
2091 }
2092
2093 if (m->flags & MOPT_NOSUPPORT) {
2094 ext4_msg(NULL, KERN_ERR, "%s option not supported",
2095 param->key);
2096 return 0;
2097 }
2098
2099 switch (token) {
2100 #ifdef CONFIG_QUOTA
2101 case Opt_usrjquota:
2102 if (!*param->string)
2103 return unnote_qf_name(fc, USRQUOTA);
2104 else
2105 return note_qf_name(fc, USRQUOTA, param);
2106 case Opt_grpjquota:
2107 if (!*param->string)
2108 return unnote_qf_name(fc, GRPQUOTA);
2109 else
2110 return note_qf_name(fc, GRPQUOTA, param);
2111 #endif
2112 case Opt_sb:
2113 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2114 ext4_msg(NULL, KERN_WARNING,
2115 "Ignoring %s option on remount", param->key);
2116 } else {
2117 ctx->s_sb_block = result.uint_32;
2118 ctx->spec |= EXT4_SPEC_s_sb_block;
2119 }
2120 return 0;
2121 case Opt_removed:
2122 ext4_msg(NULL, KERN_WARNING, "Ignoring removed %s option",
2123 param->key);
2124 return 0;
2125 case Opt_abort:
2126 ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED);
2127 return 0;
2128 case Opt_inlinecrypt:
2129 #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT
2130 ctx_set_flags(ctx, SB_INLINECRYPT);
2131 #else
2132 ext4_msg(NULL, KERN_ERR, "inline encryption not supported");
2133 #endif
2134 return 0;
2135 case Opt_errors:
2136 ctx_clear_mount_opt(ctx, EXT4_MOUNT_ERRORS_MASK);
2137 ctx_set_mount_opt(ctx, result.uint_32);
2138 return 0;
2139 #ifdef CONFIG_QUOTA
2140 case Opt_jqfmt:
2141 ctx->s_jquota_fmt = result.uint_32;
2142 ctx->spec |= EXT4_SPEC_JQFMT;
2143 return 0;
2144 #endif
2145 case Opt_data:
2146 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2147 ctx_set_mount_opt(ctx, result.uint_32);
2148 ctx->spec |= EXT4_SPEC_DATAJ;
2149 return 0;
2150 case Opt_commit:
2151 if (result.uint_32 == 0)
2152 result.uint_32 = JBD2_DEFAULT_MAX_COMMIT_AGE;
2153 else if (result.uint_32 > INT_MAX / HZ) {
2154 ext4_msg(NULL, KERN_ERR,
2155 "Invalid commit interval %d, "
2156 "must be smaller than %d",
2157 result.uint_32, INT_MAX / HZ);
2158 return -EINVAL;
2159 }
2160 ctx->s_commit_interval = HZ * result.uint_32;
2161 ctx->spec |= EXT4_SPEC_s_commit_interval;
2162 return 0;
2163 case Opt_debug_want_extra_isize:
2164 if ((result.uint_32 & 1) || (result.uint_32 < 4)) {
2165 ext4_msg(NULL, KERN_ERR,
2166 "Invalid want_extra_isize %d", result.uint_32);
2167 return -EINVAL;
2168 }
2169 ctx->s_want_extra_isize = result.uint_32;
2170 ctx->spec |= EXT4_SPEC_s_want_extra_isize;
2171 return 0;
2172 case Opt_max_batch_time:
2173 ctx->s_max_batch_time = result.uint_32;
2174 ctx->spec |= EXT4_SPEC_s_max_batch_time;
2175 return 0;
2176 case Opt_min_batch_time:
2177 ctx->s_min_batch_time = result.uint_32;
2178 ctx->spec |= EXT4_SPEC_s_min_batch_time;
2179 return 0;
2180 case Opt_inode_readahead_blks:
2181 if (result.uint_32 &&
2182 (result.uint_32 > (1 << 30) ||
2183 !is_power_of_2(result.uint_32))) {
2184 ext4_msg(NULL, KERN_ERR,
2185 "EXT4-fs: inode_readahead_blks must be "
2186 "0 or a power of 2 smaller than 2^31");
2187 return -EINVAL;
2188 }
2189 ctx->s_inode_readahead_blks = result.uint_32;
2190 ctx->spec |= EXT4_SPEC_s_inode_readahead_blks;
2191 return 0;
2192 case Opt_init_itable:
2193 ctx_set_mount_opt(ctx, EXT4_MOUNT_INIT_INODE_TABLE);
2194 ctx->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
2195 if (param->type == fs_value_is_string)
2196 ctx->s_li_wait_mult = result.uint_32;
2197 ctx->spec |= EXT4_SPEC_s_li_wait_mult;
2198 return 0;
2199 case Opt_max_dir_size_kb:
2200 ctx->s_max_dir_size_kb = result.uint_32;
2201 ctx->spec |= EXT4_SPEC_s_max_dir_size_kb;
2202 return 0;
2203 #ifdef CONFIG_EXT4_DEBUG
2204 case Opt_fc_debug_max_replay:
2205 ctx->s_fc_debug_max_replay = result.uint_32;
2206 ctx->spec |= EXT4_SPEC_s_fc_debug_max_replay;
2207 return 0;
2208 #endif
2209 case Opt_stripe:
2210 ctx->s_stripe = result.uint_32;
2211 ctx->spec |= EXT4_SPEC_s_stripe;
2212 return 0;
2213 case Opt_resuid:
2214 uid = make_kuid(current_user_ns(), result.uint_32);
2215 if (!uid_valid(uid)) {
2216 ext4_msg(NULL, KERN_ERR, "Invalid uid value %d",
2217 result.uint_32);
2218 return -EINVAL;
2219 }
2220 ctx->s_resuid = uid;
2221 ctx->spec |= EXT4_SPEC_s_resuid;
2222 return 0;
2223 case Opt_resgid:
2224 gid = make_kgid(current_user_ns(), result.uint_32);
2225 if (!gid_valid(gid)) {
2226 ext4_msg(NULL, KERN_ERR, "Invalid gid value %d",
2227 result.uint_32);
2228 return -EINVAL;
2229 }
2230 ctx->s_resgid = gid;
2231 ctx->spec |= EXT4_SPEC_s_resgid;
2232 return 0;
2233 case Opt_journal_dev:
2234 if (is_remount) {
2235 ext4_msg(NULL, KERN_ERR,
2236 "Cannot specify journal on remount");
2237 return -EINVAL;
2238 }
2239 ctx->journal_devnum = result.uint_32;
2240 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2241 return 0;
2242 case Opt_journal_path:
2243 {
2244 struct inode *journal_inode;
2245 struct path path;
2246 int error;
2247
2248 if (is_remount) {
2249 ext4_msg(NULL, KERN_ERR,
2250 "Cannot specify journal on remount");
2251 return -EINVAL;
2252 }
2253
2254 error = fs_lookup_param(fc, param, 1, LOOKUP_FOLLOW, &path);
2255 if (error) {
2256 ext4_msg(NULL, KERN_ERR, "error: could not find "
2257 "journal device path");
2258 return -EINVAL;
2259 }
2260
2261 journal_inode = d_inode(path.dentry);
2262 ctx->journal_devnum = new_encode_dev(journal_inode->i_rdev);
2263 ctx->spec |= EXT4_SPEC_JOURNAL_DEV;
2264 path_put(&path);
2265 return 0;
2266 }
2267 case Opt_journal_ioprio:
2268 if (result.uint_32 > 7) {
2269 ext4_msg(NULL, KERN_ERR, "Invalid journal IO priority"
2270 " (must be 0-7)");
2271 return -EINVAL;
2272 }
2273 ctx->journal_ioprio =
2274 IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, result.uint_32);
2275 ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
2276 return 0;
2277 case Opt_test_dummy_encryption:
2278 return ext4_parse_test_dummy_encryption(param, ctx);
2279 case Opt_dax:
2280 case Opt_dax_type:
2281 #ifdef CONFIG_FS_DAX
2282 {
2283 int type = (token == Opt_dax) ?
2284 Opt_dax : result.uint_32;
2285
2286 switch (type) {
2287 case Opt_dax:
2288 case Opt_dax_always:
2289 ctx_set_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2290 ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2291 break;
2292 case Opt_dax_never:
2293 ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2294 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2295 break;
2296 case Opt_dax_inode:
2297 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS);
2298 ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER);
2299 /* Strictly for printing options */
2300 ctx_set_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE);
2301 break;
2302 }
2303 return 0;
2304 }
2305 #else
2306 ext4_msg(NULL, KERN_INFO, "dax option not supported");
2307 return -EINVAL;
2308 #endif
2309 case Opt_data_err:
2310 if (result.uint_32 == Opt_data_err_abort)
2311 ctx_set_mount_opt(ctx, m->mount_opt);
2312 else if (result.uint_32 == Opt_data_err_ignore)
2313 ctx_clear_mount_opt(ctx, m->mount_opt);
2314 return 0;
2315 case Opt_mb_optimize_scan:
2316 if (result.int_32 == 1) {
2317 ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2318 ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2319 } else if (result.int_32 == 0) {
2320 ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN);
2321 ctx->spec |= EXT4_SPEC_mb_optimize_scan;
2322 } else {
2323 ext4_msg(NULL, KERN_WARNING,
2324 "mb_optimize_scan should be set to 0 or 1.");
2325 return -EINVAL;
2326 }
2327 return 0;
2328 }
2329
2330 /*
2331 * At this point we should only be getting options requiring MOPT_SET,
2332 * or MOPT_CLEAR. Anything else is a bug
2333 */
2334 if (m->token == Opt_err) {
2335 ext4_msg(NULL, KERN_WARNING, "buggy handling of option %s",
2336 param->key);
2337 WARN_ON(1);
2338 return -EINVAL;
2339 }
2340
2341 else {
2342 unsigned int set = 0;
2343
2344 if ((param->type == fs_value_is_flag) ||
2345 result.uint_32 > 0)
2346 set = 1;
2347
2348 if (m->flags & MOPT_CLEAR)
2349 set = !set;
2350 else if (unlikely(!(m->flags & MOPT_SET))) {
2351 ext4_msg(NULL, KERN_WARNING,
2352 "buggy handling of option %s",
2353 param->key);
2354 WARN_ON(1);
2355 return -EINVAL;
2356 }
2357 if (m->flags & MOPT_2) {
2358 if (set != 0)
2359 ctx_set_mount_opt2(ctx, m->mount_opt);
2360 else
2361 ctx_clear_mount_opt2(ctx, m->mount_opt);
2362 } else {
2363 if (set != 0)
2364 ctx_set_mount_opt(ctx, m->mount_opt);
2365 else
2366 ctx_clear_mount_opt(ctx, m->mount_opt);
2367 }
2368 }
2369
2370 return 0;
2371 }
2372
parse_options(struct fs_context * fc,char * options)2373 static int parse_options(struct fs_context *fc, char *options)
2374 {
2375 struct fs_parameter param;
2376 int ret;
2377 char *key;
2378
2379 if (!options)
2380 return 0;
2381
2382 while ((key = strsep(&options, ",")) != NULL) {
2383 if (*key) {
2384 size_t v_len = 0;
2385 char *value = strchr(key, '=');
2386
2387 param.type = fs_value_is_flag;
2388 param.string = NULL;
2389
2390 if (value) {
2391 if (value == key)
2392 continue;
2393
2394 *value++ = 0;
2395 v_len = strlen(value);
2396 param.string = kmemdup_nul(value, v_len,
2397 GFP_KERNEL);
2398 if (!param.string)
2399 return -ENOMEM;
2400 param.type = fs_value_is_string;
2401 }
2402
2403 param.key = key;
2404 param.size = v_len;
2405
2406 ret = ext4_parse_param(fc, ¶m);
2407 if (param.string)
2408 kfree(param.string);
2409 if (ret < 0)
2410 return ret;
2411 }
2412 }
2413
2414 ret = ext4_validate_options(fc);
2415 if (ret < 0)
2416 return ret;
2417
2418 return 0;
2419 }
2420
parse_apply_sb_mount_options(struct super_block * sb,struct ext4_fs_context * m_ctx)2421 static int parse_apply_sb_mount_options(struct super_block *sb,
2422 struct ext4_fs_context *m_ctx)
2423 {
2424 struct ext4_sb_info *sbi = EXT4_SB(sb);
2425 char *s_mount_opts = NULL;
2426 struct ext4_fs_context *s_ctx = NULL;
2427 struct fs_context *fc = NULL;
2428 int ret = -ENOMEM;
2429
2430 if (!sbi->s_es->s_mount_opts[0])
2431 return 0;
2432
2433 s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
2434 sizeof(sbi->s_es->s_mount_opts),
2435 GFP_KERNEL);
2436 if (!s_mount_opts)
2437 return ret;
2438
2439 fc = kzalloc(sizeof(struct fs_context), GFP_KERNEL);
2440 if (!fc)
2441 goto out_free;
2442
2443 s_ctx = kzalloc(sizeof(struct ext4_fs_context), GFP_KERNEL);
2444 if (!s_ctx)
2445 goto out_free;
2446
2447 fc->fs_private = s_ctx;
2448 fc->s_fs_info = sbi;
2449
2450 ret = parse_options(fc, s_mount_opts);
2451 if (ret < 0)
2452 goto parse_failed;
2453
2454 ret = ext4_check_opt_consistency(fc, sb);
2455 if (ret < 0) {
2456 parse_failed:
2457 ext4_msg(sb, KERN_WARNING,
2458 "failed to parse options in superblock: %s",
2459 s_mount_opts);
2460 ret = 0;
2461 goto out_free;
2462 }
2463
2464 if (s_ctx->spec & EXT4_SPEC_JOURNAL_DEV)
2465 m_ctx->journal_devnum = s_ctx->journal_devnum;
2466 if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
2467 m_ctx->journal_ioprio = s_ctx->journal_ioprio;
2468
2469 ext4_apply_options(fc, sb);
2470 ret = 0;
2471
2472 out_free:
2473 if (fc) {
2474 ext4_fc_free(fc);
2475 kfree(fc);
2476 }
2477 kfree(s_mount_opts);
2478 return ret;
2479 }
2480
ext4_apply_quota_options(struct fs_context * fc,struct super_block * sb)2481 static void ext4_apply_quota_options(struct fs_context *fc,
2482 struct super_block *sb)
2483 {
2484 #ifdef CONFIG_QUOTA
2485 bool quota_feature = ext4_has_feature_quota(sb);
2486 struct ext4_fs_context *ctx = fc->fs_private;
2487 struct ext4_sb_info *sbi = EXT4_SB(sb);
2488 char *qname;
2489 int i;
2490
2491 if (quota_feature)
2492 return;
2493
2494 if (ctx->spec & EXT4_SPEC_JQUOTA) {
2495 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2496 if (!(ctx->qname_spec & (1 << i)))
2497 continue;
2498
2499 qname = ctx->s_qf_names[i]; /* May be NULL */
2500 if (qname)
2501 set_opt(sb, QUOTA);
2502 ctx->s_qf_names[i] = NULL;
2503 qname = rcu_replace_pointer(sbi->s_qf_names[i], qname,
2504 lockdep_is_held(&sb->s_umount));
2505 if (qname)
2506 kfree_rcu(qname);
2507 }
2508 }
2509
2510 if (ctx->spec & EXT4_SPEC_JQFMT)
2511 sbi->s_jquota_fmt = ctx->s_jquota_fmt;
2512 #endif
2513 }
2514
2515 /*
2516 * Check quota settings consistency.
2517 */
ext4_check_quota_consistency(struct fs_context * fc,struct super_block * sb)2518 static int ext4_check_quota_consistency(struct fs_context *fc,
2519 struct super_block *sb)
2520 {
2521 #ifdef CONFIG_QUOTA
2522 struct ext4_fs_context *ctx = fc->fs_private;
2523 struct ext4_sb_info *sbi = EXT4_SB(sb);
2524 bool quota_feature = ext4_has_feature_quota(sb);
2525 bool quota_loaded = sb_any_quota_loaded(sb);
2526 bool usr_qf_name, grp_qf_name, usrquota, grpquota;
2527 int quota_flags, i;
2528
2529 /*
2530 * We do the test below only for project quotas. 'usrquota' and
2531 * 'grpquota' mount options are allowed even without quota feature
2532 * to support legacy quotas in quota files.
2533 */
2534 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_PRJQUOTA) &&
2535 !ext4_has_feature_project(sb)) {
2536 ext4_msg(NULL, KERN_ERR, "Project quota feature not enabled. "
2537 "Cannot enable project quota enforcement.");
2538 return -EINVAL;
2539 }
2540
2541 quota_flags = EXT4_MOUNT_QUOTA | EXT4_MOUNT_USRQUOTA |
2542 EXT4_MOUNT_GRPQUOTA | EXT4_MOUNT_PRJQUOTA;
2543 if (quota_loaded &&
2544 ctx->mask_s_mount_opt & quota_flags &&
2545 !ctx_test_mount_opt(ctx, quota_flags))
2546 goto err_quota_change;
2547
2548 if (ctx->spec & EXT4_SPEC_JQUOTA) {
2549
2550 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2551 if (!(ctx->qname_spec & (1 << i)))
2552 continue;
2553
2554 if (quota_loaded &&
2555 !!sbi->s_qf_names[i] != !!ctx->s_qf_names[i])
2556 goto err_jquota_change;
2557
2558 if (sbi->s_qf_names[i] && ctx->s_qf_names[i] &&
2559 strcmp(get_qf_name(sb, sbi, i),
2560 ctx->s_qf_names[i]) != 0)
2561 goto err_jquota_specified;
2562 }
2563
2564 if (quota_feature) {
2565 ext4_msg(NULL, KERN_INFO,
2566 "Journaled quota options ignored when "
2567 "QUOTA feature is enabled");
2568 return 0;
2569 }
2570 }
2571
2572 if (ctx->spec & EXT4_SPEC_JQFMT) {
2573 if (sbi->s_jquota_fmt != ctx->s_jquota_fmt && quota_loaded)
2574 goto err_jquota_change;
2575 if (quota_feature) {
2576 ext4_msg(NULL, KERN_INFO, "Quota format mount options "
2577 "ignored when QUOTA feature is enabled");
2578 return 0;
2579 }
2580 }
2581
2582 /* Make sure we don't mix old and new quota format */
2583 usr_qf_name = (get_qf_name(sb, sbi, USRQUOTA) ||
2584 ctx->s_qf_names[USRQUOTA]);
2585 grp_qf_name = (get_qf_name(sb, sbi, GRPQUOTA) ||
2586 ctx->s_qf_names[GRPQUOTA]);
2587
2588 usrquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2589 test_opt(sb, USRQUOTA));
2590
2591 grpquota = (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) ||
2592 test_opt(sb, GRPQUOTA));
2593
2594 if (usr_qf_name) {
2595 ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2596 usrquota = false;
2597 }
2598 if (grp_qf_name) {
2599 ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2600 grpquota = false;
2601 }
2602
2603 if (usr_qf_name || grp_qf_name) {
2604 if (usrquota || grpquota) {
2605 ext4_msg(NULL, KERN_ERR, "old and new quota "
2606 "format mixing");
2607 return -EINVAL;
2608 }
2609
2610 if (!(ctx->spec & EXT4_SPEC_JQFMT || sbi->s_jquota_fmt)) {
2611 ext4_msg(NULL, KERN_ERR, "journaled quota format "
2612 "not specified");
2613 return -EINVAL;
2614 }
2615 }
2616
2617 return 0;
2618
2619 err_quota_change:
2620 ext4_msg(NULL, KERN_ERR,
2621 "Cannot change quota options when quota turned on");
2622 return -EINVAL;
2623 err_jquota_change:
2624 ext4_msg(NULL, KERN_ERR, "Cannot change journaled quota "
2625 "options when quota turned on");
2626 return -EINVAL;
2627 err_jquota_specified:
2628 ext4_msg(NULL, KERN_ERR, "%s quota file already specified",
2629 QTYPE2NAME(i));
2630 return -EINVAL;
2631 #else
2632 return 0;
2633 #endif
2634 }
2635
ext4_check_test_dummy_encryption(const struct fs_context * fc,struct super_block * sb)2636 static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
2637 struct super_block *sb)
2638 {
2639 const struct ext4_fs_context *ctx = fc->fs_private;
2640 const struct ext4_sb_info *sbi = EXT4_SB(sb);
2641
2642 if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
2643 return 0;
2644
2645 if (!ext4_has_feature_encrypt(sb)) {
2646 ext4_msg(NULL, KERN_WARNING,
2647 "test_dummy_encryption requires encrypt feature");
2648 return -EINVAL;
2649 }
2650 /*
2651 * This mount option is just for testing, and it's not worthwhile to
2652 * implement the extra complexity (e.g. RCU protection) that would be
2653 * needed to allow it to be set or changed during remount. We do allow
2654 * it to be specified during remount, but only if there is no change.
2655 */
2656 if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
2657 if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2658 &ctx->dummy_enc_policy))
2659 return 0;
2660 ext4_msg(NULL, KERN_WARNING,
2661 "Can't set or change test_dummy_encryption on remount");
2662 return -EINVAL;
2663 }
2664 /* Also make sure s_mount_opts didn't contain a conflicting value. */
2665 if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
2666 if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
2667 &ctx->dummy_enc_policy))
2668 return 0;
2669 ext4_msg(NULL, KERN_WARNING,
2670 "Conflicting test_dummy_encryption options");
2671 return -EINVAL;
2672 }
2673 return 0;
2674 }
2675
ext4_apply_test_dummy_encryption(struct ext4_fs_context * ctx,struct super_block * sb)2676 static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
2677 struct super_block *sb)
2678 {
2679 if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
2680 /* if already set, it was already verified to be the same */
2681 fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
2682 return;
2683 EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
2684 memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
2685 ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
2686 }
2687
ext4_check_opt_consistency(struct fs_context * fc,struct super_block * sb)2688 static int ext4_check_opt_consistency(struct fs_context *fc,
2689 struct super_block *sb)
2690 {
2691 struct ext4_fs_context *ctx = fc->fs_private;
2692 struct ext4_sb_info *sbi = fc->s_fs_info;
2693 int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE;
2694 int err;
2695
2696 if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) {
2697 ext4_msg(NULL, KERN_ERR,
2698 "Mount option(s) incompatible with ext2");
2699 return -EINVAL;
2700 }
2701 if ((ctx->opt_flags & MOPT_NO_EXT3) && IS_EXT3_SB(sb)) {
2702 ext4_msg(NULL, KERN_ERR,
2703 "Mount option(s) incompatible with ext3");
2704 return -EINVAL;
2705 }
2706
2707 if (ctx->s_want_extra_isize >
2708 (sbi->s_inode_size - EXT4_GOOD_OLD_INODE_SIZE)) {
2709 ext4_msg(NULL, KERN_ERR,
2710 "Invalid want_extra_isize %d",
2711 ctx->s_want_extra_isize);
2712 return -EINVAL;
2713 }
2714
2715 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DIOREAD_NOLOCK)) {
2716 int blocksize =
2717 BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
2718 if (blocksize < PAGE_SIZE)
2719 ext4_msg(NULL, KERN_WARNING, "Warning: mounting with an "
2720 "experimental mount option 'dioread_nolock' "
2721 "for blocksize < PAGE_SIZE");
2722 }
2723
2724 err = ext4_check_test_dummy_encryption(fc, sb);
2725 if (err)
2726 return err;
2727
2728 if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) {
2729 if (!sbi->s_journal) {
2730 ext4_msg(NULL, KERN_WARNING,
2731 "Remounting file system with no journal "
2732 "so ignoring journalled data option");
2733 ctx_clear_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS);
2734 } else if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DATA_FLAGS) !=
2735 test_opt(sb, DATA_FLAGS)) {
2736 ext4_msg(NULL, KERN_ERR, "Cannot change data mode "
2737 "on remount");
2738 return -EINVAL;
2739 }
2740 }
2741
2742 if (is_remount) {
2743 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2744 (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)) {
2745 ext4_msg(NULL, KERN_ERR, "can't mount with "
2746 "both data=journal and dax");
2747 return -EINVAL;
2748 }
2749
2750 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_DAX_ALWAYS) &&
2751 (!(sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2752 (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER))) {
2753 fail_dax_change_remount:
2754 ext4_msg(NULL, KERN_ERR, "can't change "
2755 "dax mount option while remounting");
2756 return -EINVAL;
2757 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_NEVER) &&
2758 (!(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2759 (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS))) {
2760 goto fail_dax_change_remount;
2761 } else if (ctx_test_mount_opt2(ctx, EXT4_MOUNT2_DAX_INODE) &&
2762 ((sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) ||
2763 (sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_NEVER) ||
2764 !(sbi->s_mount_opt2 & EXT4_MOUNT2_DAX_INODE))) {
2765 goto fail_dax_change_remount;
2766 }
2767 }
2768
2769 return ext4_check_quota_consistency(fc, sb);
2770 }
2771
ext4_apply_options(struct fs_context * fc,struct super_block * sb)2772 static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
2773 {
2774 struct ext4_fs_context *ctx = fc->fs_private;
2775 struct ext4_sb_info *sbi = fc->s_fs_info;
2776
2777 sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
2778 sbi->s_mount_opt |= ctx->vals_s_mount_opt;
2779 sbi->s_mount_opt2 &= ~ctx->mask_s_mount_opt2;
2780 sbi->s_mount_opt2 |= ctx->vals_s_mount_opt2;
2781 sbi->s_mount_flags &= ~ctx->mask_s_mount_flags;
2782 sbi->s_mount_flags |= ctx->vals_s_mount_flags;
2783 sb->s_flags &= ~ctx->mask_s_flags;
2784 sb->s_flags |= ctx->vals_s_flags;
2785
2786 #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; })
2787 APPLY(s_commit_interval);
2788 APPLY(s_stripe);
2789 APPLY(s_max_batch_time);
2790 APPLY(s_min_batch_time);
2791 APPLY(s_want_extra_isize);
2792 APPLY(s_inode_readahead_blks);
2793 APPLY(s_max_dir_size_kb);
2794 APPLY(s_li_wait_mult);
2795 APPLY(s_resgid);
2796 APPLY(s_resuid);
2797
2798 #ifdef CONFIG_EXT4_DEBUG
2799 APPLY(s_fc_debug_max_replay);
2800 #endif
2801
2802 ext4_apply_quota_options(fc, sb);
2803 ext4_apply_test_dummy_encryption(ctx, sb);
2804 }
2805
2806
ext4_validate_options(struct fs_context * fc)2807 static int ext4_validate_options(struct fs_context *fc)
2808 {
2809 #ifdef CONFIG_QUOTA
2810 struct ext4_fs_context *ctx = fc->fs_private;
2811 char *usr_qf_name, *grp_qf_name;
2812
2813 usr_qf_name = ctx->s_qf_names[USRQUOTA];
2814 grp_qf_name = ctx->s_qf_names[GRPQUOTA];
2815
2816 if (usr_qf_name || grp_qf_name) {
2817 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) && usr_qf_name)
2818 ctx_clear_mount_opt(ctx, EXT4_MOUNT_USRQUOTA);
2819
2820 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA) && grp_qf_name)
2821 ctx_clear_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA);
2822
2823 if (ctx_test_mount_opt(ctx, EXT4_MOUNT_USRQUOTA) ||
2824 ctx_test_mount_opt(ctx, EXT4_MOUNT_GRPQUOTA)) {
2825 ext4_msg(NULL, KERN_ERR, "old and new quota "
2826 "format mixing");
2827 return -EINVAL;
2828 }
2829 }
2830 #endif
2831 return 1;
2832 }
2833
ext4_show_quota_options(struct seq_file * seq,struct super_block * sb)2834 static inline void ext4_show_quota_options(struct seq_file *seq,
2835 struct super_block *sb)
2836 {
2837 #if defined(CONFIG_QUOTA)
2838 struct ext4_sb_info *sbi = EXT4_SB(sb);
2839 char *usr_qf_name, *grp_qf_name;
2840
2841 if (sbi->s_jquota_fmt) {
2842 char *fmtname = "";
2843
2844 switch (sbi->s_jquota_fmt) {
2845 case QFMT_VFS_OLD:
2846 fmtname = "vfsold";
2847 break;
2848 case QFMT_VFS_V0:
2849 fmtname = "vfsv0";
2850 break;
2851 case QFMT_VFS_V1:
2852 fmtname = "vfsv1";
2853 break;
2854 }
2855 seq_printf(seq, ",jqfmt=%s", fmtname);
2856 }
2857
2858 rcu_read_lock();
2859 usr_qf_name = rcu_dereference(sbi->s_qf_names[USRQUOTA]);
2860 grp_qf_name = rcu_dereference(sbi->s_qf_names[GRPQUOTA]);
2861 if (usr_qf_name)
2862 seq_show_option(seq, "usrjquota", usr_qf_name);
2863 if (grp_qf_name)
2864 seq_show_option(seq, "grpjquota", grp_qf_name);
2865 rcu_read_unlock();
2866 #endif
2867 }
2868
token2str(int token)2869 static const char *token2str(int token)
2870 {
2871 const struct fs_parameter_spec *spec;
2872
2873 for (spec = ext4_param_specs; spec->name != NULL; spec++)
2874 if (spec->opt == token && !spec->type)
2875 break;
2876 return spec->name;
2877 }
2878
2879 /*
2880 * Show an option if
2881 * - it's set to a non-default value OR
2882 * - if the per-sb default is different from the global default
2883 */
_ext4_show_options(struct seq_file * seq,struct super_block * sb,int nodefs)2884 static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
2885 int nodefs)
2886 {
2887 struct ext4_sb_info *sbi = EXT4_SB(sb);
2888 struct ext4_super_block *es = sbi->s_es;
2889 int def_errors;
2890 const struct mount_opts *m;
2891 char sep = nodefs ? '\n' : ',';
2892
2893 #define SEQ_OPTS_PUTS(str) seq_printf(seq, "%c" str, sep)
2894 #define SEQ_OPTS_PRINT(str, arg) seq_printf(seq, "%c" str, sep, arg)
2895
2896 if (sbi->s_sb_block != 1)
2897 SEQ_OPTS_PRINT("sb=%llu", sbi->s_sb_block);
2898
2899 for (m = ext4_mount_opts; m->token != Opt_err; m++) {
2900 int want_set = m->flags & MOPT_SET;
2901 int opt_2 = m->flags & MOPT_2;
2902 unsigned int mount_opt, def_mount_opt;
2903
2904 if (((m->flags & (MOPT_SET|MOPT_CLEAR)) == 0) ||
2905 m->flags & MOPT_SKIP)
2906 continue;
2907
2908 if (opt_2) {
2909 mount_opt = sbi->s_mount_opt2;
2910 def_mount_opt = sbi->s_def_mount_opt2;
2911 } else {
2912 mount_opt = sbi->s_mount_opt;
2913 def_mount_opt = sbi->s_def_mount_opt;
2914 }
2915 /* skip if same as the default */
2916 if (!nodefs && !(m->mount_opt & (mount_opt ^ def_mount_opt)))
2917 continue;
2918 /* select Opt_noFoo vs Opt_Foo */
2919 if ((want_set &&
2920 (mount_opt & m->mount_opt) != m->mount_opt) ||
2921 (!want_set && (mount_opt & m->mount_opt)))
2922 continue;
2923 SEQ_OPTS_PRINT("%s", token2str(m->token));
2924 }
2925
2926 if (nodefs || !uid_eq(sbi->s_resuid, make_kuid(&init_user_ns, EXT4_DEF_RESUID)) ||
2927 le16_to_cpu(es->s_def_resuid) != EXT4_DEF_RESUID)
2928 SEQ_OPTS_PRINT("resuid=%u",
2929 from_kuid_munged(&init_user_ns, sbi->s_resuid));
2930 if (nodefs || !gid_eq(sbi->s_resgid, make_kgid(&init_user_ns, EXT4_DEF_RESGID)) ||
2931 le16_to_cpu(es->s_def_resgid) != EXT4_DEF_RESGID)
2932 SEQ_OPTS_PRINT("resgid=%u",
2933 from_kgid_munged(&init_user_ns, sbi->s_resgid));
2934 def_errors = nodefs ? -1 : le16_to_cpu(es->s_errors);
2935 if (test_opt(sb, ERRORS_RO) && def_errors != EXT4_ERRORS_RO)
2936 SEQ_OPTS_PUTS("errors=remount-ro");
2937 if (test_opt(sb, ERRORS_CONT) && def_errors != EXT4_ERRORS_CONTINUE)
2938 SEQ_OPTS_PUTS("errors=continue");
2939 if (test_opt(sb, ERRORS_PANIC) && def_errors != EXT4_ERRORS_PANIC)
2940 SEQ_OPTS_PUTS("errors=panic");
2941 if (nodefs || sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ)
2942 SEQ_OPTS_PRINT("commit=%lu", sbi->s_commit_interval / HZ);
2943 if (nodefs || sbi->s_min_batch_time != EXT4_DEF_MIN_BATCH_TIME)
2944 SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time);
2945 if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME)
2946 SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time);
2947 if (nodefs || sbi->s_stripe)
2948 SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe);
2949 if (nodefs || EXT4_MOUNT_DATA_FLAGS &
2950 (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
2951 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
2952 SEQ_OPTS_PUTS("data=journal");
2953 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
2954 SEQ_OPTS_PUTS("data=ordered");
2955 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
2956 SEQ_OPTS_PUTS("data=writeback");
2957 }
2958 if (nodefs ||
2959 sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS)
2960 SEQ_OPTS_PRINT("inode_readahead_blks=%u",
2961 sbi->s_inode_readahead_blks);
2962
2963 if (test_opt(sb, INIT_INODE_TABLE) && (nodefs ||
2964 (sbi->s_li_wait_mult != EXT4_DEF_LI_WAIT_MULT)))
2965 SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
2966 if (nodefs || sbi->s_max_dir_size_kb)
2967 SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
2968 if (test_opt(sb, DATA_ERR_ABORT))
2969 SEQ_OPTS_PUTS("data_err=abort");
2970
2971 fscrypt_show_test_dummy_encryption(seq, sep, sb);
2972
2973 if (sb->s_flags & SB_INLINECRYPT)
2974 SEQ_OPTS_PUTS("inlinecrypt");
2975
2976 if (test_opt(sb, DAX_ALWAYS)) {
2977 if (IS_EXT2_SB(sb))
2978 SEQ_OPTS_PUTS("dax");
2979 else
2980 SEQ_OPTS_PUTS("dax=always");
2981 } else if (test_opt2(sb, DAX_NEVER)) {
2982 SEQ_OPTS_PUTS("dax=never");
2983 } else if (test_opt2(sb, DAX_INODE)) {
2984 SEQ_OPTS_PUTS("dax=inode");
2985 }
2986
2987 if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
2988 !test_opt2(sb, MB_OPTIMIZE_SCAN)) {
2989 SEQ_OPTS_PUTS("mb_optimize_scan=0");
2990 } else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD &&
2991 test_opt2(sb, MB_OPTIMIZE_SCAN)) {
2992 SEQ_OPTS_PUTS("mb_optimize_scan=1");
2993 }
2994
2995 ext4_show_quota_options(seq, sb);
2996 return 0;
2997 }
2998
ext4_show_options(struct seq_file * seq,struct dentry * root)2999 static int ext4_show_options(struct seq_file *seq, struct dentry *root)
3000 {
3001 return _ext4_show_options(seq, root->d_sb, 0);
3002 }
3003
ext4_seq_options_show(struct seq_file * seq,void * offset)3004 int ext4_seq_options_show(struct seq_file *seq, void *offset)
3005 {
3006 struct super_block *sb = seq->private;
3007 int rc;
3008
3009 seq_puts(seq, sb_rdonly(sb) ? "ro" : "rw");
3010 rc = _ext4_show_options(seq, sb, 1);
3011 seq_puts(seq, "\n");
3012 return rc;
3013 }
3014
ext4_setup_super(struct super_block * sb,struct ext4_super_block * es,int read_only)3015 static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
3016 int read_only)
3017 {
3018 struct ext4_sb_info *sbi = EXT4_SB(sb);
3019 int err = 0;
3020
3021 if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
3022 ext4_msg(sb, KERN_ERR, "revision level too high, "
3023 "forcing read-only mode");
3024 err = -EROFS;
3025 goto done;
3026 }
3027 if (read_only)
3028 goto done;
3029 if (!(sbi->s_mount_state & EXT4_VALID_FS))
3030 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
3031 "running e2fsck is recommended");
3032 else if (sbi->s_mount_state & EXT4_ERROR_FS)
3033 ext4_msg(sb, KERN_WARNING,
3034 "warning: mounting fs with errors, "
3035 "running e2fsck is recommended");
3036 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
3037 le16_to_cpu(es->s_mnt_count) >=
3038 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
3039 ext4_msg(sb, KERN_WARNING,
3040 "warning: maximal mount count reached, "
3041 "running e2fsck is recommended");
3042 else if (le32_to_cpu(es->s_checkinterval) &&
3043 (ext4_get_tstamp(es, s_lastcheck) +
3044 le32_to_cpu(es->s_checkinterval) <= ktime_get_real_seconds()))
3045 ext4_msg(sb, KERN_WARNING,
3046 "warning: checktime reached, "
3047 "running e2fsck is recommended");
3048 if (!sbi->s_journal)
3049 es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
3050 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
3051 es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
3052 le16_add_cpu(&es->s_mnt_count, 1);
3053 ext4_update_tstamp(es, s_mtime);
3054 if (sbi->s_journal) {
3055 ext4_set_feature_journal_needs_recovery(sb);
3056 if (ext4_has_feature_orphan_file(sb))
3057 ext4_set_feature_orphan_present(sb);
3058 }
3059
3060 err = ext4_commit_super(sb);
3061 done:
3062 if (test_opt(sb, DEBUG))
3063 printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%u, "
3064 "bpg=%lu, ipg=%lu, mo=%04x, mo2=%04x]\n",
3065 sb->s_blocksize,
3066 sbi->s_groups_count,
3067 EXT4_BLOCKS_PER_GROUP(sb),
3068 EXT4_INODES_PER_GROUP(sb),
3069 sbi->s_mount_opt, sbi->s_mount_opt2);
3070
3071 cleancache_init_fs(sb);
3072 return err;
3073 }
3074
ext4_alloc_flex_bg_array(struct super_block * sb,ext4_group_t ngroup)3075 int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup)
3076 {
3077 struct ext4_sb_info *sbi = EXT4_SB(sb);
3078 struct flex_groups **old_groups, **new_groups;
3079 int size, i, j;
3080
3081 if (!sbi->s_log_groups_per_flex)
3082 return 0;
3083
3084 size = ext4_flex_group(sbi, ngroup - 1) + 1;
3085 if (size <= sbi->s_flex_groups_allocated)
3086 return 0;
3087
3088 new_groups = kvzalloc(roundup_pow_of_two(size *
3089 sizeof(*sbi->s_flex_groups)), GFP_KERNEL);
3090 if (!new_groups) {
3091 ext4_msg(sb, KERN_ERR,
3092 "not enough memory for %d flex group pointers", size);
3093 return -ENOMEM;
3094 }
3095 for (i = sbi->s_flex_groups_allocated; i < size; i++) {
3096 new_groups[i] = kvzalloc(roundup_pow_of_two(
3097 sizeof(struct flex_groups)),
3098 GFP_KERNEL);
3099 if (!new_groups[i]) {
3100 for (j = sbi->s_flex_groups_allocated; j < i; j++)
3101 kvfree(new_groups[j]);
3102 kvfree(new_groups);
3103 ext4_msg(sb, KERN_ERR,
3104 "not enough memory for %d flex groups", size);
3105 return -ENOMEM;
3106 }
3107 }
3108 rcu_read_lock();
3109 old_groups = rcu_dereference(sbi->s_flex_groups);
3110 if (old_groups)
3111 memcpy(new_groups, old_groups,
3112 (sbi->s_flex_groups_allocated *
3113 sizeof(struct flex_groups *)));
3114 rcu_read_unlock();
3115 rcu_assign_pointer(sbi->s_flex_groups, new_groups);
3116 sbi->s_flex_groups_allocated = size;
3117 if (old_groups)
3118 ext4_kvfree_array_rcu(old_groups);
3119 return 0;
3120 }
3121
ext4_fill_flex_info(struct super_block * sb)3122 static int ext4_fill_flex_info(struct super_block *sb)
3123 {
3124 struct ext4_sb_info *sbi = EXT4_SB(sb);
3125 struct ext4_group_desc *gdp = NULL;
3126 struct flex_groups *fg;
3127 ext4_group_t flex_group;
3128 int i, err;
3129
3130 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
3131 if (sbi->s_log_groups_per_flex < 1 || sbi->s_log_groups_per_flex > 31) {
3132 sbi->s_log_groups_per_flex = 0;
3133 return 1;
3134 }
3135
3136 err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count);
3137 if (err)
3138 goto failed;
3139
3140 for (i = 0; i < sbi->s_groups_count; i++) {
3141 gdp = ext4_get_group_desc(sb, i, NULL);
3142
3143 flex_group = ext4_flex_group(sbi, i);
3144 fg = sbi_array_rcu_deref(sbi, s_flex_groups, flex_group);
3145 atomic_add(ext4_free_inodes_count(sb, gdp), &fg->free_inodes);
3146 atomic64_add(ext4_free_group_clusters(sb, gdp),
3147 &fg->free_clusters);
3148 atomic_add(ext4_used_dirs_count(sb, gdp), &fg->used_dirs);
3149 }
3150
3151 return 1;
3152 failed:
3153 return 0;
3154 }
3155
ext4_group_desc_csum(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3156 static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
3157 struct ext4_group_desc *gdp)
3158 {
3159 int offset = offsetof(struct ext4_group_desc, bg_checksum);
3160 __u16 crc = 0;
3161 __le32 le_group = cpu_to_le32(block_group);
3162 struct ext4_sb_info *sbi = EXT4_SB(sb);
3163
3164 if (ext4_has_metadata_csum(sbi->s_sb)) {
3165 /* Use new metadata_csum algorithm */
3166 __u32 csum32;
3167 __u16 dummy_csum = 0;
3168
3169 csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
3170 sizeof(le_group));
3171 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
3172 csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
3173 sizeof(dummy_csum));
3174 offset += sizeof(dummy_csum);
3175 if (offset < sbi->s_desc_size)
3176 csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
3177 sbi->s_desc_size - offset);
3178
3179 crc = csum32 & 0xFFFF;
3180 goto out;
3181 }
3182
3183 /* old crc16 code */
3184 if (!ext4_has_feature_gdt_csum(sb))
3185 return 0;
3186
3187 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
3188 crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
3189 crc = crc16(crc, (__u8 *)gdp, offset);
3190 offset += sizeof(gdp->bg_checksum); /* skip checksum */
3191 /* for checksum of struct ext4_group_desc do the rest...*/
3192 if (ext4_has_feature_64bit(sb) && offset < sbi->s_desc_size)
3193 crc = crc16(crc, (__u8 *)gdp + offset,
3194 sbi->s_desc_size - offset);
3195
3196 out:
3197 return cpu_to_le16(crc);
3198 }
3199
ext4_group_desc_csum_verify(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3200 int ext4_group_desc_csum_verify(struct super_block *sb, __u32 block_group,
3201 struct ext4_group_desc *gdp)
3202 {
3203 if (ext4_has_group_desc_csum(sb) &&
3204 (gdp->bg_checksum != ext4_group_desc_csum(sb, block_group, gdp)))
3205 return 0;
3206
3207 return 1;
3208 }
3209
ext4_group_desc_csum_set(struct super_block * sb,__u32 block_group,struct ext4_group_desc * gdp)3210 void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
3211 struct ext4_group_desc *gdp)
3212 {
3213 if (!ext4_has_group_desc_csum(sb))
3214 return;
3215 gdp->bg_checksum = ext4_group_desc_csum(sb, block_group, gdp);
3216 }
3217
3218 /* Called at mount-time, super-block is locked */
ext4_check_descriptors(struct super_block * sb,ext4_fsblk_t sb_block,ext4_group_t * first_not_zeroed)3219 static int ext4_check_descriptors(struct super_block *sb,
3220 ext4_fsblk_t sb_block,
3221 ext4_group_t *first_not_zeroed)
3222 {
3223 struct ext4_sb_info *sbi = EXT4_SB(sb);
3224 ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
3225 ext4_fsblk_t last_block;
3226 ext4_fsblk_t last_bg_block = sb_block + ext4_bg_num_gdb(sb, 0);
3227 ext4_fsblk_t block_bitmap;
3228 ext4_fsblk_t inode_bitmap;
3229 ext4_fsblk_t inode_table;
3230 int flexbg_flag = 0;
3231 ext4_group_t i, grp = sbi->s_groups_count;
3232
3233 if (ext4_has_feature_flex_bg(sb))
3234 flexbg_flag = 1;
3235
3236 ext4_debug("Checking group descriptors");
3237
3238 for (i = 0; i < sbi->s_groups_count; i++) {
3239 struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
3240
3241 if (i == sbi->s_groups_count - 1 || flexbg_flag)
3242 last_block = ext4_blocks_count(sbi->s_es) - 1;
3243 else
3244 last_block = first_block +
3245 (EXT4_BLOCKS_PER_GROUP(sb) - 1);
3246
3247 if ((grp == sbi->s_groups_count) &&
3248 !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3249 grp = i;
3250
3251 block_bitmap = ext4_block_bitmap(sb, gdp);
3252 if (block_bitmap == sb_block) {
3253 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3254 "Block bitmap for group %u overlaps "
3255 "superblock", i);
3256 if (!sb_rdonly(sb))
3257 return 0;
3258 }
3259 if (block_bitmap >= sb_block + 1 &&
3260 block_bitmap <= last_bg_block) {
3261 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3262 "Block bitmap for group %u overlaps "
3263 "block group descriptors", i);
3264 if (!sb_rdonly(sb))
3265 return 0;
3266 }
3267 if (block_bitmap < first_block || block_bitmap > last_block) {
3268 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3269 "Block bitmap for group %u not in group "
3270 "(block %llu)!", i, block_bitmap);
3271 return 0;
3272 }
3273 inode_bitmap = ext4_inode_bitmap(sb, gdp);
3274 if (inode_bitmap == sb_block) {
3275 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3276 "Inode bitmap for group %u overlaps "
3277 "superblock", i);
3278 if (!sb_rdonly(sb))
3279 return 0;
3280 }
3281 if (inode_bitmap >= sb_block + 1 &&
3282 inode_bitmap <= last_bg_block) {
3283 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3284 "Inode bitmap for group %u overlaps "
3285 "block group descriptors", i);
3286 if (!sb_rdonly(sb))
3287 return 0;
3288 }
3289 if (inode_bitmap < first_block || inode_bitmap > last_block) {
3290 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3291 "Inode bitmap for group %u not in group "
3292 "(block %llu)!", i, inode_bitmap);
3293 return 0;
3294 }
3295 inode_table = ext4_inode_table(sb, gdp);
3296 if (inode_table == sb_block) {
3297 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3298 "Inode table for group %u overlaps "
3299 "superblock", i);
3300 if (!sb_rdonly(sb))
3301 return 0;
3302 }
3303 if (inode_table >= sb_block + 1 &&
3304 inode_table <= last_bg_block) {
3305 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3306 "Inode table for group %u overlaps "
3307 "block group descriptors", i);
3308 if (!sb_rdonly(sb))
3309 return 0;
3310 }
3311 if (inode_table < first_block ||
3312 inode_table + sbi->s_itb_per_group - 1 > last_block) {
3313 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3314 "Inode table for group %u not in group "
3315 "(block %llu)!", i, inode_table);
3316 return 0;
3317 }
3318 ext4_lock_group(sb, i);
3319 if (!ext4_group_desc_csum_verify(sb, i, gdp)) {
3320 ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
3321 "Checksum for group %u failed (%u!=%u)",
3322 i, le16_to_cpu(ext4_group_desc_csum(sb, i,
3323 gdp)), le16_to_cpu(gdp->bg_checksum));
3324 if (!sb_rdonly(sb)) {
3325 ext4_unlock_group(sb, i);
3326 return 0;
3327 }
3328 }
3329 ext4_unlock_group(sb, i);
3330 if (!flexbg_flag)
3331 first_block += EXT4_BLOCKS_PER_GROUP(sb);
3332 }
3333 if (NULL != first_not_zeroed)
3334 *first_not_zeroed = grp;
3335 return 1;
3336 }
3337
3338 /*
3339 * Maximal extent format file size.
3340 * Resulting logical blkno at s_maxbytes must fit in our on-disk
3341 * extent format containers, within a sector_t, and within i_blocks
3342 * in the vfs. ext4 inode has 48 bits of i_block in fsblock units,
3343 * so that won't be a limiting factor.
3344 *
3345 * However there is other limiting factor. We do store extents in the form
3346 * of starting block and length, hence the resulting length of the extent
3347 * covering maximum file size must fit into on-disk format containers as
3348 * well. Given that length is always by 1 unit bigger than max unit (because
3349 * we count 0 as well) we have to lower the s_maxbytes by one fs block.
3350 *
3351 * Note, this does *not* consider any metadata overhead for vfs i_blocks.
3352 */
ext4_max_size(int blkbits,int has_huge_files)3353 static loff_t ext4_max_size(int blkbits, int has_huge_files)
3354 {
3355 loff_t res;
3356 loff_t upper_limit = MAX_LFS_FILESIZE;
3357
3358 BUILD_BUG_ON(sizeof(blkcnt_t) < sizeof(u64));
3359
3360 if (!has_huge_files) {
3361 upper_limit = (1LL << 32) - 1;
3362
3363 /* total blocks in file system block size */
3364 upper_limit >>= (blkbits - 9);
3365 upper_limit <<= blkbits;
3366 }
3367
3368 /*
3369 * 32-bit extent-start container, ee_block. We lower the maxbytes
3370 * by one fs block, so ee_len can cover the extent of maximum file
3371 * size
3372 */
3373 res = (1LL << 32) - 1;
3374 res <<= blkbits;
3375
3376 /* Sanity check against vm- & vfs- imposed limits */
3377 if (res > upper_limit)
3378 res = upper_limit;
3379
3380 return res;
3381 }
3382
3383 /*
3384 * Maximal bitmap file size. There is a direct, and {,double-,triple-}indirect
3385 * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks.
3386 * We need to be 1 filesystem block less than the 2^48 sector limit.
3387 */
ext4_max_bitmap_size(int bits,int has_huge_files)3388 static loff_t ext4_max_bitmap_size(int bits, int has_huge_files)
3389 {
3390 loff_t upper_limit, res = EXT4_NDIR_BLOCKS;
3391 int meta_blocks;
3392 unsigned int ppb = 1 << (bits - 2);
3393
3394 /*
3395 * This is calculated to be the largest file size for a dense, block
3396 * mapped file such that the file's total number of 512-byte sectors,
3397 * including data and all indirect blocks, does not exceed (2^48 - 1).
3398 *
3399 * __u32 i_blocks_lo and _u16 i_blocks_high represent the total
3400 * number of 512-byte sectors of the file.
3401 */
3402 if (!has_huge_files) {
3403 /*
3404 * !has_huge_files or implies that the inode i_block field
3405 * represents total file blocks in 2^32 512-byte sectors ==
3406 * size of vfs inode i_blocks * 8
3407 */
3408 upper_limit = (1LL << 32) - 1;
3409
3410 /* total blocks in file system block size */
3411 upper_limit >>= (bits - 9);
3412
3413 } else {
3414 /*
3415 * We use 48 bit ext4_inode i_blocks
3416 * With EXT4_HUGE_FILE_FL set the i_blocks
3417 * represent total number of blocks in
3418 * file system block size
3419 */
3420 upper_limit = (1LL << 48) - 1;
3421
3422 }
3423
3424 /* Compute how many blocks we can address by block tree */
3425 res += ppb;
3426 res += ppb * ppb;
3427 res += ((loff_t)ppb) * ppb * ppb;
3428 /* Compute how many metadata blocks are needed */
3429 meta_blocks = 1;
3430 meta_blocks += 1 + ppb;
3431 meta_blocks += 1 + ppb + ppb * ppb;
3432 /* Does block tree limit file size? */
3433 if (res + meta_blocks <= upper_limit)
3434 goto check_lfs;
3435
3436 res = upper_limit;
3437 /* How many metadata blocks are needed for addressing upper_limit? */
3438 upper_limit -= EXT4_NDIR_BLOCKS;
3439 /* indirect blocks */
3440 meta_blocks = 1;
3441 upper_limit -= ppb;
3442 /* double indirect blocks */
3443 if (upper_limit < ppb * ppb) {
3444 meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb);
3445 res -= meta_blocks;
3446 goto check_lfs;
3447 }
3448 meta_blocks += 1 + ppb;
3449 upper_limit -= ppb * ppb;
3450 /* tripple indirect blocks for the rest */
3451 meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) +
3452 DIV_ROUND_UP_ULL(upper_limit, ppb*ppb);
3453 res -= meta_blocks;
3454 check_lfs:
3455 res <<= bits;
3456 if (res > MAX_LFS_FILESIZE)
3457 res = MAX_LFS_FILESIZE;
3458
3459 return res;
3460 }
3461
descriptor_loc(struct super_block * sb,ext4_fsblk_t logical_sb_block,int nr)3462 static ext4_fsblk_t descriptor_loc(struct super_block *sb,
3463 ext4_fsblk_t logical_sb_block, int nr)
3464 {
3465 struct ext4_sb_info *sbi = EXT4_SB(sb);
3466 ext4_group_t bg, first_meta_bg;
3467 int has_super = 0;
3468
3469 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
3470
3471 if (!ext4_has_feature_meta_bg(sb) || nr < first_meta_bg)
3472 return logical_sb_block + nr + 1;
3473 bg = sbi->s_desc_per_block * nr;
3474 if (ext4_bg_has_super(sb, bg))
3475 has_super = 1;
3476
3477 /*
3478 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
3479 * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled
3480 * on modern mke2fs or blksize > 1k on older mke2fs) then we must
3481 * compensate.
3482 */
3483 if (sb->s_blocksize == 1024 && nr == 0 &&
3484 le32_to_cpu(sbi->s_es->s_first_data_block) == 0)
3485 has_super++;
3486
3487 return (has_super + ext4_group_first_block_no(sb, bg));
3488 }
3489
3490 /**
3491 * ext4_get_stripe_size: Get the stripe size.
3492 * @sbi: In memory super block info
3493 *
3494 * If we have specified it via mount option, then
3495 * use the mount option value. If the value specified at mount time is
3496 * greater than the blocks per group use the super block value.
3497 * If the super block value is greater than blocks per group return 0.
3498 * Allocator needs it be less than blocks per group.
3499 *
3500 */
ext4_get_stripe_size(struct ext4_sb_info * sbi)3501 static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
3502 {
3503 unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride);
3504 unsigned long stripe_width =
3505 le32_to_cpu(sbi->s_es->s_raid_stripe_width);
3506 int ret;
3507
3508 if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group)
3509 ret = sbi->s_stripe;
3510 else if (stripe_width && stripe_width <= sbi->s_blocks_per_group)
3511 ret = stripe_width;
3512 else if (stride && stride <= sbi->s_blocks_per_group)
3513 ret = stride;
3514 else
3515 ret = 0;
3516
3517 /*
3518 * If the stripe width is 1, this makes no sense and
3519 * we set it to 0 to turn off stripe handling code.
3520 */
3521 if (ret <= 1)
3522 ret = 0;
3523
3524 return ret;
3525 }
3526
3527 /*
3528 * Check whether this filesystem can be mounted based on
3529 * the features present and the RDONLY/RDWR mount requested.
3530 * Returns 1 if this filesystem can be mounted as requested,
3531 * 0 if it cannot be.
3532 */
ext4_feature_set_ok(struct super_block * sb,int readonly)3533 int ext4_feature_set_ok(struct super_block *sb, int readonly)
3534 {
3535 if (ext4_has_unknown_ext4_incompat_features(sb)) {
3536 ext4_msg(sb, KERN_ERR,
3537 "Couldn't mount because of "
3538 "unsupported optional features (%x)",
3539 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_incompat) &
3540 ~EXT4_FEATURE_INCOMPAT_SUPP));
3541 return 0;
3542 }
3543
3544 #if !IS_ENABLED(CONFIG_UNICODE)
3545 if (ext4_has_feature_casefold(sb)) {
3546 ext4_msg(sb, KERN_ERR,
3547 "Filesystem with casefold feature cannot be "
3548 "mounted without CONFIG_UNICODE");
3549 return 0;
3550 }
3551 #endif
3552
3553 if (readonly)
3554 return 1;
3555
3556 if (ext4_has_feature_readonly(sb)) {
3557 ext4_msg(sb, KERN_INFO, "filesystem is read-only");
3558 sb->s_flags |= SB_RDONLY;
3559 return 1;
3560 }
3561
3562 /* Check that feature set is OK for a read-write mount */
3563 if (ext4_has_unknown_ext4_ro_compat_features(sb)) {
3564 ext4_msg(sb, KERN_ERR, "couldn't mount RDWR because of "
3565 "unsupported optional features (%x)",
3566 (le32_to_cpu(EXT4_SB(sb)->s_es->s_feature_ro_compat) &
3567 ~EXT4_FEATURE_RO_COMPAT_SUPP));
3568 return 0;
3569 }
3570 if (ext4_has_feature_bigalloc(sb) && !ext4_has_feature_extents(sb)) {
3571 ext4_msg(sb, KERN_ERR,
3572 "Can't support bigalloc feature without "
3573 "extents feature\n");
3574 return 0;
3575 }
3576
3577 #if !IS_ENABLED(CONFIG_QUOTA) || !IS_ENABLED(CONFIG_QFMT_V2)
3578 if (!readonly && (ext4_has_feature_quota(sb) ||
3579 ext4_has_feature_project(sb))) {
3580 ext4_msg(sb, KERN_ERR,
3581 "The kernel was not built with CONFIG_QUOTA and CONFIG_QFMT_V2");
3582 return 0;
3583 }
3584 #endif /* CONFIG_QUOTA */
3585 return 1;
3586 }
3587
3588 /*
3589 * This function is called once a day if we have errors logged
3590 * on the file system
3591 */
print_daily_error_info(struct timer_list * t)3592 static void print_daily_error_info(struct timer_list *t)
3593 {
3594 struct ext4_sb_info *sbi = from_timer(sbi, t, s_err_report);
3595 struct super_block *sb = sbi->s_sb;
3596 struct ext4_super_block *es = sbi->s_es;
3597
3598 if (es->s_error_count)
3599 /* fsck newer than v1.41.13 is needed to clean this condition. */
3600 ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
3601 le32_to_cpu(es->s_error_count));
3602 if (es->s_first_error_time) {
3603 printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %llu: %.*s:%d",
3604 sb->s_id,
3605 ext4_get_tstamp(es, s_first_error_time),
3606 (int) sizeof(es->s_first_error_func),
3607 es->s_first_error_func,
3608 le32_to_cpu(es->s_first_error_line));
3609 if (es->s_first_error_ino)
3610 printk(KERN_CONT ": inode %u",
3611 le32_to_cpu(es->s_first_error_ino));
3612 if (es->s_first_error_block)
3613 printk(KERN_CONT ": block %llu", (unsigned long long)
3614 le64_to_cpu(es->s_first_error_block));
3615 printk(KERN_CONT "\n");
3616 }
3617 if (es->s_last_error_time) {
3618 printk(KERN_NOTICE "EXT4-fs (%s): last error at time %llu: %.*s:%d",
3619 sb->s_id,
3620 ext4_get_tstamp(es, s_last_error_time),
3621 (int) sizeof(es->s_last_error_func),
3622 es->s_last_error_func,
3623 le32_to_cpu(es->s_last_error_line));
3624 if (es->s_last_error_ino)
3625 printk(KERN_CONT ": inode %u",
3626 le32_to_cpu(es->s_last_error_ino));
3627 if (es->s_last_error_block)
3628 printk(KERN_CONT ": block %llu", (unsigned long long)
3629 le64_to_cpu(es->s_last_error_block));
3630 printk(KERN_CONT "\n");
3631 }
3632 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
3633 }
3634
3635 /* Find next suitable group and run ext4_init_inode_table */
ext4_run_li_request(struct ext4_li_request * elr)3636 static int ext4_run_li_request(struct ext4_li_request *elr)
3637 {
3638 struct ext4_group_desc *gdp = NULL;
3639 struct super_block *sb = elr->lr_super;
3640 ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
3641 ext4_group_t group = elr->lr_next_group;
3642 unsigned int prefetch_ios = 0;
3643 int ret = 0;
3644 u64 start_time;
3645
3646 if (elr->lr_mode == EXT4_LI_MODE_PREFETCH_BBITMAP) {
3647 elr->lr_next_group = ext4_mb_prefetch(sb, group,
3648 EXT4_SB(sb)->s_mb_prefetch, &prefetch_ios);
3649 if (prefetch_ios)
3650 ext4_mb_prefetch_fini(sb, elr->lr_next_group,
3651 prefetch_ios);
3652 trace_ext4_prefetch_bitmaps(sb, group, elr->lr_next_group,
3653 prefetch_ios);
3654 if (group >= elr->lr_next_group) {
3655 ret = 1;
3656 if (elr->lr_first_not_zeroed != ngroups &&
3657 !sb_rdonly(sb) && test_opt(sb, INIT_INODE_TABLE)) {
3658 elr->lr_next_group = elr->lr_first_not_zeroed;
3659 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3660 ret = 0;
3661 }
3662 }
3663 return ret;
3664 }
3665
3666 for (; group < ngroups; group++) {
3667 gdp = ext4_get_group_desc(sb, group, NULL);
3668 if (!gdp) {
3669 ret = 1;
3670 break;
3671 }
3672
3673 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3674 break;
3675 }
3676
3677 if (group >= ngroups)
3678 ret = 1;
3679
3680 if (!ret) {
3681 start_time = ktime_get_real_ns();
3682 ret = ext4_init_inode_table(sb, group,
3683 elr->lr_timeout ? 0 : 1);
3684 trace_ext4_lazy_itable_init(sb, group);
3685 if (elr->lr_timeout == 0) {
3686 elr->lr_timeout = nsecs_to_jiffies((ktime_get_real_ns() - start_time) *
3687 EXT4_SB(elr->lr_super)->s_li_wait_mult);
3688 }
3689 elr->lr_next_sched = jiffies + elr->lr_timeout;
3690 elr->lr_next_group = group + 1;
3691 }
3692 return ret;
3693 }
3694
3695 /*
3696 * Remove lr_request from the list_request and free the
3697 * request structure. Should be called with li_list_mtx held
3698 */
ext4_remove_li_request(struct ext4_li_request * elr)3699 static void ext4_remove_li_request(struct ext4_li_request *elr)
3700 {
3701 if (!elr)
3702 return;
3703
3704 list_del(&elr->lr_request);
3705 EXT4_SB(elr->lr_super)->s_li_request = NULL;
3706 kfree(elr);
3707 }
3708
ext4_unregister_li_request(struct super_block * sb)3709 static void ext4_unregister_li_request(struct super_block *sb)
3710 {
3711 mutex_lock(&ext4_li_mtx);
3712 if (!ext4_li_info) {
3713 mutex_unlock(&ext4_li_mtx);
3714 return;
3715 }
3716
3717 mutex_lock(&ext4_li_info->li_list_mtx);
3718 ext4_remove_li_request(EXT4_SB(sb)->s_li_request);
3719 mutex_unlock(&ext4_li_info->li_list_mtx);
3720 mutex_unlock(&ext4_li_mtx);
3721 }
3722
3723 static struct task_struct *ext4_lazyinit_task;
3724
3725 /*
3726 * This is the function where ext4lazyinit thread lives. It walks
3727 * through the request list searching for next scheduled filesystem.
3728 * When such a fs is found, run the lazy initialization request
3729 * (ext4_rn_li_request) and keep track of the time spend in this
3730 * function. Based on that time we compute next schedule time of
3731 * the request. When walking through the list is complete, compute
3732 * next waking time and put itself into sleep.
3733 */
ext4_lazyinit_thread(void * arg)3734 static int ext4_lazyinit_thread(void *arg)
3735 {
3736 struct ext4_lazy_init *eli = arg;
3737 struct list_head *pos, *n;
3738 struct ext4_li_request *elr;
3739 unsigned long next_wakeup, cur;
3740
3741 BUG_ON(NULL == eli);
3742 set_freezable();
3743
3744 cont_thread:
3745 while (true) {
3746 next_wakeup = MAX_JIFFY_OFFSET;
3747
3748 mutex_lock(&eli->li_list_mtx);
3749 if (list_empty(&eli->li_request_list)) {
3750 mutex_unlock(&eli->li_list_mtx);
3751 goto exit_thread;
3752 }
3753 list_for_each_safe(pos, n, &eli->li_request_list) {
3754 int err = 0;
3755 int progress = 0;
3756 elr = list_entry(pos, struct ext4_li_request,
3757 lr_request);
3758
3759 if (time_before(jiffies, elr->lr_next_sched)) {
3760 if (time_before(elr->lr_next_sched, next_wakeup))
3761 next_wakeup = elr->lr_next_sched;
3762 continue;
3763 }
3764 if (down_read_trylock(&elr->lr_super->s_umount)) {
3765 if (sb_start_write_trylock(elr->lr_super)) {
3766 progress = 1;
3767 /*
3768 * We hold sb->s_umount, sb can not
3769 * be removed from the list, it is
3770 * now safe to drop li_list_mtx
3771 */
3772 mutex_unlock(&eli->li_list_mtx);
3773 err = ext4_run_li_request(elr);
3774 sb_end_write(elr->lr_super);
3775 mutex_lock(&eli->li_list_mtx);
3776 n = pos->next;
3777 }
3778 up_read((&elr->lr_super->s_umount));
3779 }
3780 /* error, remove the lazy_init job */
3781 if (err) {
3782 ext4_remove_li_request(elr);
3783 continue;
3784 }
3785 if (!progress) {
3786 elr->lr_next_sched = jiffies +
3787 prandom_u32_max(EXT4_DEF_LI_MAX_START_DELAY * HZ);
3788 }
3789 if (time_before(elr->lr_next_sched, next_wakeup))
3790 next_wakeup = elr->lr_next_sched;
3791 }
3792 mutex_unlock(&eli->li_list_mtx);
3793
3794 try_to_freeze();
3795
3796 cur = jiffies;
3797 if ((time_after_eq(cur, next_wakeup)) ||
3798 (MAX_JIFFY_OFFSET == next_wakeup)) {
3799 cond_resched();
3800 continue;
3801 }
3802
3803 schedule_timeout_interruptible(next_wakeup - cur);
3804
3805 if (kthread_should_stop()) {
3806 ext4_clear_request_list();
3807 goto exit_thread;
3808 }
3809 }
3810
3811 exit_thread:
3812 /*
3813 * It looks like the request list is empty, but we need
3814 * to check it under the li_list_mtx lock, to prevent any
3815 * additions into it, and of course we should lock ext4_li_mtx
3816 * to atomically free the list and ext4_li_info, because at
3817 * this point another ext4 filesystem could be registering
3818 * new one.
3819 */
3820 mutex_lock(&ext4_li_mtx);
3821 mutex_lock(&eli->li_list_mtx);
3822 if (!list_empty(&eli->li_request_list)) {
3823 mutex_unlock(&eli->li_list_mtx);
3824 mutex_unlock(&ext4_li_mtx);
3825 goto cont_thread;
3826 }
3827 mutex_unlock(&eli->li_list_mtx);
3828 kfree(ext4_li_info);
3829 ext4_li_info = NULL;
3830 mutex_unlock(&ext4_li_mtx);
3831
3832 return 0;
3833 }
3834
ext4_clear_request_list(void)3835 static void ext4_clear_request_list(void)
3836 {
3837 struct list_head *pos, *n;
3838 struct ext4_li_request *elr;
3839
3840 mutex_lock(&ext4_li_info->li_list_mtx);
3841 list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
3842 elr = list_entry(pos, struct ext4_li_request,
3843 lr_request);
3844 ext4_remove_li_request(elr);
3845 }
3846 mutex_unlock(&ext4_li_info->li_list_mtx);
3847 }
3848
ext4_run_lazyinit_thread(void)3849 static int ext4_run_lazyinit_thread(void)
3850 {
3851 ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
3852 ext4_li_info, "ext4lazyinit");
3853 if (IS_ERR(ext4_lazyinit_task)) {
3854 int err = PTR_ERR(ext4_lazyinit_task);
3855 ext4_clear_request_list();
3856 kfree(ext4_li_info);
3857 ext4_li_info = NULL;
3858 printk(KERN_CRIT "EXT4-fs: error %d creating inode table "
3859 "initialization thread\n",
3860 err);
3861 return err;
3862 }
3863 ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
3864 return 0;
3865 }
3866
3867 /*
3868 * Check whether it make sense to run itable init. thread or not.
3869 * If there is at least one uninitialized inode table, return
3870 * corresponding group number, else the loop goes through all
3871 * groups and return total number of groups.
3872 */
ext4_has_uninit_itable(struct super_block * sb)3873 static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
3874 {
3875 ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
3876 struct ext4_group_desc *gdp = NULL;
3877
3878 if (!ext4_has_group_desc_csum(sb))
3879 return ngroups;
3880
3881 for (group = 0; group < ngroups; group++) {
3882 gdp = ext4_get_group_desc(sb, group, NULL);
3883 if (!gdp)
3884 continue;
3885
3886 if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
3887 break;
3888 }
3889
3890 return group;
3891 }
3892
ext4_li_info_new(void)3893 static int ext4_li_info_new(void)
3894 {
3895 struct ext4_lazy_init *eli = NULL;
3896
3897 eli = kzalloc(sizeof(*eli), GFP_KERNEL);
3898 if (!eli)
3899 return -ENOMEM;
3900
3901 INIT_LIST_HEAD(&eli->li_request_list);
3902 mutex_init(&eli->li_list_mtx);
3903
3904 eli->li_state |= EXT4_LAZYINIT_QUIT;
3905
3906 ext4_li_info = eli;
3907
3908 return 0;
3909 }
3910
ext4_li_request_new(struct super_block * sb,ext4_group_t start)3911 static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
3912 ext4_group_t start)
3913 {
3914 struct ext4_li_request *elr;
3915
3916 elr = kzalloc(sizeof(*elr), GFP_KERNEL);
3917 if (!elr)
3918 return NULL;
3919
3920 elr->lr_super = sb;
3921 elr->lr_first_not_zeroed = start;
3922 if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS)) {
3923 elr->lr_mode = EXT4_LI_MODE_ITABLE;
3924 elr->lr_next_group = start;
3925 } else {
3926 elr->lr_mode = EXT4_LI_MODE_PREFETCH_BBITMAP;
3927 }
3928
3929 /*
3930 * Randomize first schedule time of the request to
3931 * spread the inode table initialization requests
3932 * better.
3933 */
3934 elr->lr_next_sched = jiffies + prandom_u32_max(
3935 EXT4_DEF_LI_MAX_START_DELAY * HZ);
3936 return elr;
3937 }
3938
ext4_register_li_request(struct super_block * sb,ext4_group_t first_not_zeroed)3939 int ext4_register_li_request(struct super_block *sb,
3940 ext4_group_t first_not_zeroed)
3941 {
3942 struct ext4_sb_info *sbi = EXT4_SB(sb);
3943 struct ext4_li_request *elr = NULL;
3944 ext4_group_t ngroups = sbi->s_groups_count;
3945 int ret = 0;
3946
3947 mutex_lock(&ext4_li_mtx);
3948 if (sbi->s_li_request != NULL) {
3949 /*
3950 * Reset timeout so it can be computed again, because
3951 * s_li_wait_mult might have changed.
3952 */
3953 sbi->s_li_request->lr_timeout = 0;
3954 goto out;
3955 }
3956
3957 if (sb_rdonly(sb) ||
3958 (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) &&
3959 (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE))))
3960 goto out;
3961
3962 elr = ext4_li_request_new(sb, first_not_zeroed);
3963 if (!elr) {
3964 ret = -ENOMEM;
3965 goto out;
3966 }
3967
3968 if (NULL == ext4_li_info) {
3969 ret = ext4_li_info_new();
3970 if (ret)
3971 goto out;
3972 }
3973
3974 mutex_lock(&ext4_li_info->li_list_mtx);
3975 list_add(&elr->lr_request, &ext4_li_info->li_request_list);
3976 mutex_unlock(&ext4_li_info->li_list_mtx);
3977
3978 sbi->s_li_request = elr;
3979 /*
3980 * set elr to NULL here since it has been inserted to
3981 * the request_list and the removal and free of it is
3982 * handled by ext4_clear_request_list from now on.
3983 */
3984 elr = NULL;
3985
3986 if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
3987 ret = ext4_run_lazyinit_thread();
3988 if (ret)
3989 goto out;
3990 }
3991 out:
3992 mutex_unlock(&ext4_li_mtx);
3993 if (ret)
3994 kfree(elr);
3995 return ret;
3996 }
3997
3998 /*
3999 * We do not need to lock anything since this is called on
4000 * module unload.
4001 */
ext4_destroy_lazyinit_thread(void)4002 static void ext4_destroy_lazyinit_thread(void)
4003 {
4004 /*
4005 * If thread exited earlier
4006 * there's nothing to be done.
4007 */
4008 if (!ext4_li_info || !ext4_lazyinit_task)
4009 return;
4010
4011 kthread_stop(ext4_lazyinit_task);
4012 }
4013
set_journal_csum_feature_set(struct super_block * sb)4014 static int set_journal_csum_feature_set(struct super_block *sb)
4015 {
4016 int ret = 1;
4017 int compat, incompat;
4018 struct ext4_sb_info *sbi = EXT4_SB(sb);
4019
4020 if (ext4_has_metadata_csum(sb)) {
4021 /* journal checksum v3 */
4022 compat = 0;
4023 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
4024 } else {
4025 /* journal checksum v1 */
4026 compat = JBD2_FEATURE_COMPAT_CHECKSUM;
4027 incompat = 0;
4028 }
4029
4030 jbd2_journal_clear_features(sbi->s_journal,
4031 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
4032 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
4033 JBD2_FEATURE_INCOMPAT_CSUM_V2);
4034 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4035 ret = jbd2_journal_set_features(sbi->s_journal,
4036 compat, 0,
4037 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
4038 incompat);
4039 } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
4040 ret = jbd2_journal_set_features(sbi->s_journal,
4041 compat, 0,
4042 incompat);
4043 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4044 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4045 } else {
4046 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
4047 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
4048 }
4049
4050 return ret;
4051 }
4052
4053 /*
4054 * Note: calculating the overhead so we can be compatible with
4055 * historical BSD practice is quite difficult in the face of
4056 * clusters/bigalloc. This is because multiple metadata blocks from
4057 * different block group can end up in the same allocation cluster.
4058 * Calculating the exact overhead in the face of clustered allocation
4059 * requires either O(all block bitmaps) in memory or O(number of block
4060 * groups**2) in time. We will still calculate the superblock for
4061 * older file systems --- and if we come across with a bigalloc file
4062 * system with zero in s_overhead_clusters the estimate will be close to
4063 * correct especially for very large cluster sizes --- but for newer
4064 * file systems, it's better to calculate this figure once at mkfs
4065 * time, and store it in the superblock. If the superblock value is
4066 * present (even for non-bigalloc file systems), we will use it.
4067 */
count_overhead(struct super_block * sb,ext4_group_t grp,char * buf)4068 static int count_overhead(struct super_block *sb, ext4_group_t grp,
4069 char *buf)
4070 {
4071 struct ext4_sb_info *sbi = EXT4_SB(sb);
4072 struct ext4_group_desc *gdp;
4073 ext4_fsblk_t first_block, last_block, b;
4074 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4075 int s, j, count = 0;
4076 int has_super = ext4_bg_has_super(sb, grp);
4077
4078 if (!ext4_has_feature_bigalloc(sb))
4079 return (has_super + ext4_bg_num_gdb(sb, grp) +
4080 (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) +
4081 sbi->s_itb_per_group + 2);
4082
4083 first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
4084 (grp * EXT4_BLOCKS_PER_GROUP(sb));
4085 last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
4086 for (i = 0; i < ngroups; i++) {
4087 gdp = ext4_get_group_desc(sb, i, NULL);
4088 b = ext4_block_bitmap(sb, gdp);
4089 if (b >= first_block && b <= last_block) {
4090 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4091 count++;
4092 }
4093 b = ext4_inode_bitmap(sb, gdp);
4094 if (b >= first_block && b <= last_block) {
4095 ext4_set_bit(EXT4_B2C(sbi, b - first_block), buf);
4096 count++;
4097 }
4098 b = ext4_inode_table(sb, gdp);
4099 if (b >= first_block && b + sbi->s_itb_per_group <= last_block)
4100 for (j = 0; j < sbi->s_itb_per_group; j++, b++) {
4101 int c = EXT4_B2C(sbi, b - first_block);
4102 ext4_set_bit(c, buf);
4103 count++;
4104 }
4105 if (i != grp)
4106 continue;
4107 s = 0;
4108 if (ext4_bg_has_super(sb, grp)) {
4109 ext4_set_bit(s++, buf);
4110 count++;
4111 }
4112 j = ext4_bg_num_gdb(sb, grp);
4113 if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
4114 ext4_error(sb, "Invalid number of block group "
4115 "descriptor blocks: %d", j);
4116 j = EXT4_BLOCKS_PER_GROUP(sb) - s;
4117 }
4118 count += j;
4119 for (; j > 0; j--)
4120 ext4_set_bit(EXT4_B2C(sbi, s++), buf);
4121 }
4122 if (!count)
4123 return 0;
4124 return EXT4_CLUSTERS_PER_GROUP(sb) -
4125 ext4_count_free(buf, EXT4_CLUSTERS_PER_GROUP(sb) / 8);
4126 }
4127
4128 /*
4129 * Compute the overhead and stash it in sbi->s_overhead
4130 */
ext4_calculate_overhead(struct super_block * sb)4131 int ext4_calculate_overhead(struct super_block *sb)
4132 {
4133 struct ext4_sb_info *sbi = EXT4_SB(sb);
4134 struct ext4_super_block *es = sbi->s_es;
4135 struct inode *j_inode;
4136 unsigned int j_blocks, j_inum = le32_to_cpu(es->s_journal_inum);
4137 ext4_group_t i, ngroups = ext4_get_groups_count(sb);
4138 ext4_fsblk_t overhead = 0;
4139 char *buf = (char *) get_zeroed_page(GFP_NOFS);
4140
4141 if (!buf)
4142 return -ENOMEM;
4143
4144 /*
4145 * Compute the overhead (FS structures). This is constant
4146 * for a given filesystem unless the number of block groups
4147 * changes so we cache the previous value until it does.
4148 */
4149
4150 /*
4151 * All of the blocks before first_data_block are overhead
4152 */
4153 overhead = EXT4_B2C(sbi, le32_to_cpu(es->s_first_data_block));
4154
4155 /*
4156 * Add the overhead found in each block group
4157 */
4158 for (i = 0; i < ngroups; i++) {
4159 int blks;
4160
4161 blks = count_overhead(sb, i, buf);
4162 overhead += blks;
4163 if (blks)
4164 memset(buf, 0, PAGE_SIZE);
4165 cond_resched();
4166 }
4167
4168 /*
4169 * Add the internal journal blocks whether the journal has been
4170 * loaded or not
4171 */
4172 if (sbi->s_journal && !sbi->s_journal_bdev)
4173 overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_total_len);
4174 else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) {
4175 /* j_inum for internal journal is non-zero */
4176 j_inode = ext4_get_journal_inode(sb, j_inum);
4177 if (j_inode) {
4178 j_blocks = j_inode->i_size >> sb->s_blocksize_bits;
4179 overhead += EXT4_NUM_B2C(sbi, j_blocks);
4180 iput(j_inode);
4181 } else {
4182 ext4_msg(sb, KERN_ERR, "can't get journal size");
4183 }
4184 }
4185 sbi->s_overhead = overhead;
4186 smp_wmb();
4187 free_page((unsigned long) buf);
4188 return 0;
4189 }
4190
ext4_set_resv_clusters(struct super_block * sb)4191 static void ext4_set_resv_clusters(struct super_block *sb)
4192 {
4193 ext4_fsblk_t resv_clusters;
4194 struct ext4_sb_info *sbi = EXT4_SB(sb);
4195
4196 /*
4197 * There's no need to reserve anything when we aren't using extents.
4198 * The space estimates are exact, there are no unwritten extents,
4199 * hole punching doesn't need new metadata... This is needed especially
4200 * to keep ext2/3 backward compatibility.
4201 */
4202 if (!ext4_has_feature_extents(sb))
4203 return;
4204 /*
4205 * By default we reserve 2% or 4096 clusters, whichever is smaller.
4206 * This should cover the situations where we can not afford to run
4207 * out of space like for example punch hole, or converting
4208 * unwritten extents in delalloc path. In most cases such
4209 * allocation would require 1, or 2 blocks, higher numbers are
4210 * very rare.
4211 */
4212 resv_clusters = (ext4_blocks_count(sbi->s_es) >>
4213 sbi->s_cluster_bits);
4214
4215 do_div(resv_clusters, 50);
4216 resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
4217
4218 atomic64_set(&sbi->s_resv_clusters, resv_clusters);
4219 }
4220
ext4_quota_mode(struct super_block * sb)4221 static const char *ext4_quota_mode(struct super_block *sb)
4222 {
4223 #ifdef CONFIG_QUOTA
4224 if (!ext4_quota_capable(sb))
4225 return "none";
4226
4227 if (EXT4_SB(sb)->s_journal && ext4_is_quota_journalled(sb))
4228 return "journalled";
4229 else
4230 return "writeback";
4231 #else
4232 return "disabled";
4233 #endif
4234 }
4235
ext4_setup_csum_trigger(struct super_block * sb,enum ext4_journal_trigger_type type,void (* trigger)(struct jbd2_buffer_trigger_type * type,struct buffer_head * bh,void * mapped_data,size_t size))4236 static void ext4_setup_csum_trigger(struct super_block *sb,
4237 enum ext4_journal_trigger_type type,
4238 void (*trigger)(
4239 struct jbd2_buffer_trigger_type *type,
4240 struct buffer_head *bh,
4241 void *mapped_data,
4242 size_t size))
4243 {
4244 struct ext4_sb_info *sbi = EXT4_SB(sb);
4245
4246 sbi->s_journal_triggers[type].sb = sb;
4247 sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
4248 }
4249
ext4_free_sbi(struct ext4_sb_info * sbi)4250 static void ext4_free_sbi(struct ext4_sb_info *sbi)
4251 {
4252 if (!sbi)
4253 return;
4254
4255 kfree(sbi->s_blockgroup_lock);
4256 fs_put_dax(sbi->s_daxdev, NULL);
4257 kfree(sbi);
4258 }
4259
ext4_alloc_sbi(struct super_block * sb)4260 static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb)
4261 {
4262 struct ext4_sb_info *sbi;
4263
4264 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
4265 if (!sbi)
4266 return NULL;
4267
4268 sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off,
4269 NULL, NULL);
4270
4271 sbi->s_blockgroup_lock =
4272 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
4273
4274 if (!sbi->s_blockgroup_lock)
4275 goto err_out;
4276
4277 sb->s_fs_info = sbi;
4278 sbi->s_sb = sb;
4279 return sbi;
4280 err_out:
4281 fs_put_dax(sbi->s_daxdev, NULL);
4282 kfree(sbi);
4283 return NULL;
4284 }
4285
ext4_set_def_opts(struct super_block * sb,struct ext4_super_block * es)4286 static void ext4_set_def_opts(struct super_block *sb,
4287 struct ext4_super_block *es)
4288 {
4289 unsigned long def_mount_opts;
4290
4291 /* Set defaults before we parse the mount options */
4292 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
4293 set_opt(sb, INIT_INODE_TABLE);
4294 if (def_mount_opts & EXT4_DEFM_DEBUG)
4295 set_opt(sb, DEBUG);
4296 if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
4297 set_opt(sb, GRPID);
4298 if (def_mount_opts & EXT4_DEFM_UID16)
4299 set_opt(sb, NO_UID32);
4300 /* xattr user namespace & acls are now defaulted on */
4301 set_opt(sb, XATTR_USER);
4302 #ifdef CONFIG_EXT4_FS_POSIX_ACL
4303 set_opt(sb, POSIX_ACL);
4304 #endif
4305 if (ext4_has_feature_fast_commit(sb))
4306 set_opt2(sb, JOURNAL_FAST_COMMIT);
4307 /* don't forget to enable journal_csum when metadata_csum is enabled. */
4308 if (ext4_has_metadata_csum(sb))
4309 set_opt(sb, JOURNAL_CHECKSUM);
4310
4311 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
4312 set_opt(sb, JOURNAL_DATA);
4313 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
4314 set_opt(sb, ORDERED_DATA);
4315 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
4316 set_opt(sb, WRITEBACK_DATA);
4317
4318 if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC)
4319 set_opt(sb, ERRORS_PANIC);
4320 else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE)
4321 set_opt(sb, ERRORS_CONT);
4322 else
4323 set_opt(sb, ERRORS_RO);
4324 /* block_validity enabled by default; disable with noblock_validity */
4325 set_opt(sb, BLOCK_VALIDITY);
4326 if (def_mount_opts & EXT4_DEFM_DISCARD)
4327 set_opt(sb, DISCARD);
4328
4329 if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0)
4330 set_opt(sb, BARRIER);
4331
4332 /*
4333 * enable delayed allocation by default
4334 * Use -o nodelalloc to turn it off
4335 */
4336 if (!IS_EXT3_SB(sb) && !IS_EXT2_SB(sb) &&
4337 ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0))
4338 set_opt(sb, DELALLOC);
4339
4340 if (sb->s_blocksize == PAGE_SIZE)
4341 set_opt(sb, DIOREAD_NOLOCK);
4342 }
4343
ext4_handle_clustersize(struct super_block * sb)4344 static int ext4_handle_clustersize(struct super_block *sb)
4345 {
4346 struct ext4_sb_info *sbi = EXT4_SB(sb);
4347 struct ext4_super_block *es = sbi->s_es;
4348 int clustersize;
4349
4350 /* Handle clustersize */
4351 clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size);
4352 if (ext4_has_feature_bigalloc(sb)) {
4353 if (clustersize < sb->s_blocksize) {
4354 ext4_msg(sb, KERN_ERR,
4355 "cluster size (%d) smaller than "
4356 "block size (%lu)", clustersize, sb->s_blocksize);
4357 return -EINVAL;
4358 }
4359 sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
4360 le32_to_cpu(es->s_log_block_size);
4361 sbi->s_clusters_per_group =
4362 le32_to_cpu(es->s_clusters_per_group);
4363 if (sbi->s_clusters_per_group > sb->s_blocksize * 8) {
4364 ext4_msg(sb, KERN_ERR,
4365 "#clusters per group too big: %lu",
4366 sbi->s_clusters_per_group);
4367 return -EINVAL;
4368 }
4369 if (sbi->s_blocks_per_group !=
4370 (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) {
4371 ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and "
4372 "clusters per group (%lu) inconsistent",
4373 sbi->s_blocks_per_group,
4374 sbi->s_clusters_per_group);
4375 return -EINVAL;
4376 }
4377 } else {
4378 if (clustersize != sb->s_blocksize) {
4379 ext4_msg(sb, KERN_ERR,
4380 "fragment/cluster size (%d) != "
4381 "block size (%lu)", clustersize, sb->s_blocksize);
4382 return -EINVAL;
4383 }
4384 if (sbi->s_blocks_per_group > sb->s_blocksize * 8) {
4385 ext4_msg(sb, KERN_ERR,
4386 "#blocks per group too big: %lu",
4387 sbi->s_blocks_per_group);
4388 return -EINVAL;
4389 }
4390 sbi->s_clusters_per_group = sbi->s_blocks_per_group;
4391 sbi->s_cluster_bits = 0;
4392 }
4393 sbi->s_cluster_ratio = clustersize / sb->s_blocksize;
4394
4395 /* Do we have standard group size of clustersize * 8 blocks ? */
4396 if (sbi->s_blocks_per_group == clustersize << 3)
4397 set_opt2(sb, STD_GROUP_SIZE);
4398
4399 return 0;
4400 }
4401
ext4_fast_commit_init(struct super_block * sb)4402 static void ext4_fast_commit_init(struct super_block *sb)
4403 {
4404 struct ext4_sb_info *sbi = EXT4_SB(sb);
4405
4406 /* Initialize fast commit stuff */
4407 atomic_set(&sbi->s_fc_subtid, 0);
4408 INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]);
4409 INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]);
4410 INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]);
4411 INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]);
4412 sbi->s_fc_bytes = 0;
4413 ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
4414 sbi->s_fc_ineligible_tid = 0;
4415 spin_lock_init(&sbi->s_fc_lock);
4416 memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
4417 sbi->s_fc_replay_state.fc_regions = NULL;
4418 sbi->s_fc_replay_state.fc_regions_size = 0;
4419 sbi->s_fc_replay_state.fc_regions_used = 0;
4420 sbi->s_fc_replay_state.fc_regions_valid = 0;
4421 sbi->s_fc_replay_state.fc_modified_inodes = NULL;
4422 sbi->s_fc_replay_state.fc_modified_inodes_size = 0;
4423 sbi->s_fc_replay_state.fc_modified_inodes_used = 0;
4424 }
4425
ext4_inode_info_init(struct super_block * sb,struct ext4_super_block * es)4426 static int ext4_inode_info_init(struct super_block *sb,
4427 struct ext4_super_block *es)
4428 {
4429 struct ext4_sb_info *sbi = EXT4_SB(sb);
4430
4431 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
4432 sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
4433 sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
4434 } else {
4435 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
4436 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
4437 if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) {
4438 ext4_msg(sb, KERN_ERR, "invalid first ino: %u",
4439 sbi->s_first_ino);
4440 return -EINVAL;
4441 }
4442 if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
4443 (!is_power_of_2(sbi->s_inode_size)) ||
4444 (sbi->s_inode_size > sb->s_blocksize)) {
4445 ext4_msg(sb, KERN_ERR,
4446 "unsupported inode size: %d",
4447 sbi->s_inode_size);
4448 ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize);
4449 return -EINVAL;
4450 }
4451 /*
4452 * i_atime_extra is the last extra field available for
4453 * [acm]times in struct ext4_inode. Checking for that
4454 * field should suffice to ensure we have extra space
4455 * for all three.
4456 */
4457 if (sbi->s_inode_size >= offsetof(struct ext4_inode, i_atime_extra) +
4458 sizeof(((struct ext4_inode *)0)->i_atime_extra)) {
4459 sb->s_time_gran = 1;
4460 sb->s_time_max = EXT4_EXTRA_TIMESTAMP_MAX;
4461 } else {
4462 sb->s_time_gran = NSEC_PER_SEC;
4463 sb->s_time_max = EXT4_NON_EXTRA_TIMESTAMP_MAX;
4464 }
4465 sb->s_time_min = EXT4_TIMESTAMP_MIN;
4466 }
4467
4468 if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) {
4469 sbi->s_want_extra_isize = sizeof(struct ext4_inode) -
4470 EXT4_GOOD_OLD_INODE_SIZE;
4471 if (ext4_has_feature_extra_isize(sb)) {
4472 unsigned v, max = (sbi->s_inode_size -
4473 EXT4_GOOD_OLD_INODE_SIZE);
4474
4475 v = le16_to_cpu(es->s_want_extra_isize);
4476 if (v > max) {
4477 ext4_msg(sb, KERN_ERR,
4478 "bad s_want_extra_isize: %d", v);
4479 return -EINVAL;
4480 }
4481 if (sbi->s_want_extra_isize < v)
4482 sbi->s_want_extra_isize = v;
4483
4484 v = le16_to_cpu(es->s_min_extra_isize);
4485 if (v > max) {
4486 ext4_msg(sb, KERN_ERR,
4487 "bad s_min_extra_isize: %d", v);
4488 return -EINVAL;
4489 }
4490 if (sbi->s_want_extra_isize < v)
4491 sbi->s_want_extra_isize = v;
4492 }
4493 }
4494
4495 return 0;
4496 }
4497
4498 #if IS_ENABLED(CONFIG_UNICODE)
ext4_encoding_init(struct super_block * sb,struct ext4_super_block * es)4499 static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4500 {
4501 const struct ext4_sb_encodings *encoding_info;
4502 struct unicode_map *encoding;
4503 __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags);
4504
4505 if (!ext4_has_feature_casefold(sb) || sb->s_encoding)
4506 return 0;
4507
4508 encoding_info = ext4_sb_read_encoding(es);
4509 if (!encoding_info) {
4510 ext4_msg(sb, KERN_ERR,
4511 "Encoding requested by superblock is unknown");
4512 return -EINVAL;
4513 }
4514
4515 encoding = utf8_load(encoding_info->version);
4516 if (IS_ERR(encoding)) {
4517 ext4_msg(sb, KERN_ERR,
4518 "can't mount with superblock charset: %s-%u.%u.%u "
4519 "not supported by the kernel. flags: 0x%x.",
4520 encoding_info->name,
4521 unicode_major(encoding_info->version),
4522 unicode_minor(encoding_info->version),
4523 unicode_rev(encoding_info->version),
4524 encoding_flags);
4525 return -EINVAL;
4526 }
4527 ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: "
4528 "%s-%u.%u.%u with flags 0x%hx", encoding_info->name,
4529 unicode_major(encoding_info->version),
4530 unicode_minor(encoding_info->version),
4531 unicode_rev(encoding_info->version),
4532 encoding_flags);
4533
4534 sb->s_encoding = encoding;
4535 sb->s_encoding_flags = encoding_flags;
4536
4537 return 0;
4538 }
4539 #else
ext4_encoding_init(struct super_block * sb,struct ext4_super_block * es)4540 static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es)
4541 {
4542 return 0;
4543 }
4544 #endif
4545
ext4_init_metadata_csum(struct super_block * sb,struct ext4_super_block * es)4546 static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es)
4547 {
4548 struct ext4_sb_info *sbi = EXT4_SB(sb);
4549
4550 /* Warn if metadata_csum and gdt_csum are both set. */
4551 if (ext4_has_feature_metadata_csum(sb) &&
4552 ext4_has_feature_gdt_csum(sb))
4553 ext4_warning(sb, "metadata_csum and uninit_bg are "
4554 "redundant flags; please run fsck.");
4555
4556 /* Check for a known checksum algorithm */
4557 if (!ext4_verify_csum_type(sb, es)) {
4558 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4559 "unknown checksum algorithm.");
4560 return -EINVAL;
4561 }
4562 ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
4563 ext4_orphan_file_block_trigger);
4564
4565 /* Load the checksum driver */
4566 sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
4567 if (IS_ERR(sbi->s_chksum_driver)) {
4568 int ret = PTR_ERR(sbi->s_chksum_driver);
4569 ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver.");
4570 sbi->s_chksum_driver = NULL;
4571 return ret;
4572 }
4573
4574 /* Check superblock checksum */
4575 if (!ext4_superblock_csum_verify(sb, es)) {
4576 ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with "
4577 "invalid superblock checksum. Run e2fsck?");
4578 return -EFSBADCRC;
4579 }
4580
4581 /* Precompute checksum seed for all metadata */
4582 if (ext4_has_feature_csum_seed(sb))
4583 sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed);
4584 else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb))
4585 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
4586 sizeof(es->s_uuid));
4587 return 0;
4588 }
4589
ext4_check_feature_compatibility(struct super_block * sb,struct ext4_super_block * es,int silent)4590 static int ext4_check_feature_compatibility(struct super_block *sb,
4591 struct ext4_super_block *es,
4592 int silent)
4593 {
4594 if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
4595 (ext4_has_compat_features(sb) ||
4596 ext4_has_ro_compat_features(sb) ||
4597 ext4_has_incompat_features(sb)))
4598 ext4_msg(sb, KERN_WARNING,
4599 "feature flags set on rev 0 fs, "
4600 "running e2fsck is recommended");
4601
4602 if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) {
4603 set_opt2(sb, HURD_COMPAT);
4604 if (ext4_has_feature_64bit(sb)) {
4605 ext4_msg(sb, KERN_ERR,
4606 "The Hurd can't support 64-bit file systems");
4607 return -EINVAL;
4608 }
4609
4610 /*
4611 * ea_inode feature uses l_i_version field which is not
4612 * available in HURD_COMPAT mode.
4613 */
4614 if (ext4_has_feature_ea_inode(sb)) {
4615 ext4_msg(sb, KERN_ERR,
4616 "ea_inode feature is not supported for Hurd");
4617 return -EINVAL;
4618 }
4619 }
4620
4621 if (IS_EXT2_SB(sb)) {
4622 if (ext2_feature_set_ok(sb))
4623 ext4_msg(sb, KERN_INFO, "mounting ext2 file system "
4624 "using the ext4 subsystem");
4625 else {
4626 /*
4627 * If we're probing be silent, if this looks like
4628 * it's actually an ext[34] filesystem.
4629 */
4630 if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4631 return -EINVAL;
4632 ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due "
4633 "to feature incompatibilities");
4634 return -EINVAL;
4635 }
4636 }
4637
4638 if (IS_EXT3_SB(sb)) {
4639 if (ext3_feature_set_ok(sb))
4640 ext4_msg(sb, KERN_INFO, "mounting ext3 file system "
4641 "using the ext4 subsystem");
4642 else {
4643 /*
4644 * If we're probing be silent, if this looks like
4645 * it's actually an ext4 filesystem.
4646 */
4647 if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb)))
4648 return -EINVAL;
4649 ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due "
4650 "to feature incompatibilities");
4651 return -EINVAL;
4652 }
4653 }
4654
4655 /*
4656 * Check feature flags regardless of the revision level, since we
4657 * previously didn't change the revision level when setting the flags,
4658 * so there is a chance incompat flags are set on a rev 0 filesystem.
4659 */
4660 if (!ext4_feature_set_ok(sb, (sb_rdonly(sb))))
4661 return -EINVAL;
4662
4663 return 0;
4664 }
4665
ext4_geometry_check(struct super_block * sb,struct ext4_super_block * es)4666 static int ext4_geometry_check(struct super_block *sb,
4667 struct ext4_super_block *es)
4668 {
4669 struct ext4_sb_info *sbi = EXT4_SB(sb);
4670 __u64 blocks_count;
4671
4672 /* check blocks count against device size */
4673 blocks_count = sb_bdev_nr_blocks(sb);
4674 if (blocks_count && ext4_blocks_count(es) > blocks_count) {
4675 ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu "
4676 "exceeds size of device (%llu blocks)",
4677 ext4_blocks_count(es), blocks_count);
4678 return -EINVAL;
4679 }
4680
4681 /*
4682 * It makes no sense for the first data block to be beyond the end
4683 * of the filesystem.
4684 */
4685 if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) {
4686 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4687 "block %u is beyond end of filesystem (%llu)",
4688 le32_to_cpu(es->s_first_data_block),
4689 ext4_blocks_count(es));
4690 return -EINVAL;
4691 }
4692 if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) &&
4693 (sbi->s_cluster_ratio == 1)) {
4694 ext4_msg(sb, KERN_WARNING, "bad geometry: first data "
4695 "block is 0 with a 1k block and cluster size");
4696 return -EINVAL;
4697 }
4698
4699 blocks_count = (ext4_blocks_count(es) -
4700 le32_to_cpu(es->s_first_data_block) +
4701 EXT4_BLOCKS_PER_GROUP(sb) - 1);
4702 do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
4703 if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) {
4704 ext4_msg(sb, KERN_WARNING, "groups count too large: %llu "
4705 "(block count %llu, first data block %u, "
4706 "blocks per group %lu)", blocks_count,
4707 ext4_blocks_count(es),
4708 le32_to_cpu(es->s_first_data_block),
4709 EXT4_BLOCKS_PER_GROUP(sb));
4710 return -EINVAL;
4711 }
4712 sbi->s_groups_count = blocks_count;
4713 sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
4714 (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
4715 if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
4716 le32_to_cpu(es->s_inodes_count)) {
4717 ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
4718 le32_to_cpu(es->s_inodes_count),
4719 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
4720 return -EINVAL;
4721 }
4722
4723 return 0;
4724 }
4725
ext4_group_desc_free(struct ext4_sb_info * sbi)4726 static void ext4_group_desc_free(struct ext4_sb_info *sbi)
4727 {
4728 struct buffer_head **group_desc;
4729 int i;
4730
4731 rcu_read_lock();
4732 group_desc = rcu_dereference(sbi->s_group_desc);
4733 for (i = 0; i < sbi->s_gdb_count; i++)
4734 brelse(group_desc[i]);
4735 kvfree(group_desc);
4736 rcu_read_unlock();
4737 }
4738
ext4_group_desc_init(struct super_block * sb,struct ext4_super_block * es,ext4_fsblk_t logical_sb_block,ext4_group_t * first_not_zeroed)4739 static int ext4_group_desc_init(struct super_block *sb,
4740 struct ext4_super_block *es,
4741 ext4_fsblk_t logical_sb_block,
4742 ext4_group_t *first_not_zeroed)
4743 {
4744 struct ext4_sb_info *sbi = EXT4_SB(sb);
4745 unsigned int db_count;
4746 ext4_fsblk_t block;
4747 int ret;
4748 int i;
4749
4750 db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
4751 EXT4_DESC_PER_BLOCK(sb);
4752 if (ext4_has_feature_meta_bg(sb)) {
4753 if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
4754 ext4_msg(sb, KERN_WARNING,
4755 "first meta block group too large: %u "
4756 "(group descriptor block count %u)",
4757 le32_to_cpu(es->s_first_meta_bg), db_count);
4758 return -EINVAL;
4759 }
4760 }
4761 rcu_assign_pointer(sbi->s_group_desc,
4762 kvmalloc_array(db_count,
4763 sizeof(struct buffer_head *),
4764 GFP_KERNEL));
4765 if (sbi->s_group_desc == NULL) {
4766 ext4_msg(sb, KERN_ERR, "not enough memory");
4767 return -ENOMEM;
4768 }
4769
4770 bgl_lock_init(sbi->s_blockgroup_lock);
4771
4772 /* Pre-read the descriptors into the buffer cache */
4773 for (i = 0; i < db_count; i++) {
4774 block = descriptor_loc(sb, logical_sb_block, i);
4775 ext4_sb_breadahead_unmovable(sb, block);
4776 }
4777
4778 for (i = 0; i < db_count; i++) {
4779 struct buffer_head *bh;
4780
4781 block = descriptor_loc(sb, logical_sb_block, i);
4782 bh = ext4_sb_bread_unmovable(sb, block);
4783 if (IS_ERR(bh)) {
4784 ext4_msg(sb, KERN_ERR,
4785 "can't read group descriptor %d", i);
4786 sbi->s_gdb_count = i;
4787 ret = PTR_ERR(bh);
4788 goto out;
4789 }
4790 rcu_read_lock();
4791 rcu_dereference(sbi->s_group_desc)[i] = bh;
4792 rcu_read_unlock();
4793 }
4794 sbi->s_gdb_count = db_count;
4795 if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) {
4796 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
4797 ret = -EFSCORRUPTED;
4798 goto out;
4799 }
4800 return 0;
4801 out:
4802 ext4_group_desc_free(sbi);
4803 return ret;
4804 }
4805
ext4_load_and_init_journal(struct super_block * sb,struct ext4_super_block * es,struct ext4_fs_context * ctx)4806 static int ext4_load_and_init_journal(struct super_block *sb,
4807 struct ext4_super_block *es,
4808 struct ext4_fs_context *ctx)
4809 {
4810 struct ext4_sb_info *sbi = EXT4_SB(sb);
4811 int err;
4812
4813 err = ext4_load_journal(sb, es, ctx->journal_devnum);
4814 if (err)
4815 return err;
4816
4817 if (ext4_has_feature_64bit(sb) &&
4818 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4819 JBD2_FEATURE_INCOMPAT_64BIT)) {
4820 ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature");
4821 goto out;
4822 }
4823
4824 if (!set_journal_csum_feature_set(sb)) {
4825 ext4_msg(sb, KERN_ERR, "Failed to set journal checksum "
4826 "feature set");
4827 goto out;
4828 }
4829
4830 if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
4831 !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0,
4832 JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) {
4833 ext4_msg(sb, KERN_ERR,
4834 "Failed to set fast commit journal feature");
4835 goto out;
4836 }
4837
4838 /* We have now updated the journal if required, so we can
4839 * validate the data journaling mode. */
4840 switch (test_opt(sb, DATA_FLAGS)) {
4841 case 0:
4842 /* No mode set, assume a default based on the journal
4843 * capabilities: ORDERED_DATA if the journal can
4844 * cope, else JOURNAL_DATA
4845 */
4846 if (jbd2_journal_check_available_features
4847 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4848 set_opt(sb, ORDERED_DATA);
4849 sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
4850 } else {
4851 set_opt(sb, JOURNAL_DATA);
4852 sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
4853 }
4854 break;
4855
4856 case EXT4_MOUNT_ORDERED_DATA:
4857 case EXT4_MOUNT_WRITEBACK_DATA:
4858 if (!jbd2_journal_check_available_features
4859 (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) {
4860 ext4_msg(sb, KERN_ERR, "Journal does not support "
4861 "requested data journaling mode");
4862 goto out;
4863 }
4864 break;
4865 default:
4866 break;
4867 }
4868
4869 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA &&
4870 test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
4871 ext4_msg(sb, KERN_ERR, "can't mount with "
4872 "journal_async_commit in data=ordered mode");
4873 goto out;
4874 }
4875
4876 set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
4877
4878 sbi->s_journal->j_submit_inode_data_buffers =
4879 ext4_journal_submit_inode_data_buffers;
4880 sbi->s_journal->j_finish_inode_data_buffers =
4881 ext4_journal_finish_inode_data_buffers;
4882
4883 return 0;
4884
4885 out:
4886 /* flush s_error_work before journal destroy. */
4887 flush_work(&sbi->s_error_work);
4888 jbd2_journal_destroy(sbi->s_journal);
4889 sbi->s_journal = NULL;
4890 return -EINVAL;
4891 }
4892
ext4_journal_data_mode_check(struct super_block * sb)4893 static int ext4_journal_data_mode_check(struct super_block *sb)
4894 {
4895 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4896 printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with "
4897 "data=journal disables delayed allocation, "
4898 "dioread_nolock, O_DIRECT and fast_commit support!\n");
4899 /* can't mount with both data=journal and dioread_nolock. */
4900 clear_opt(sb, DIOREAD_NOLOCK);
4901 clear_opt2(sb, JOURNAL_FAST_COMMIT);
4902 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4903 ext4_msg(sb, KERN_ERR, "can't mount with "
4904 "both data=journal and delalloc");
4905 return -EINVAL;
4906 }
4907 if (test_opt(sb, DAX_ALWAYS)) {
4908 ext4_msg(sb, KERN_ERR, "can't mount with "
4909 "both data=journal and dax");
4910 return -EINVAL;
4911 }
4912 if (ext4_has_feature_encrypt(sb)) {
4913 ext4_msg(sb, KERN_WARNING,
4914 "encrypted files will use data=ordered "
4915 "instead of data journaling mode");
4916 }
4917 if (test_opt(sb, DELALLOC))
4918 clear_opt(sb, DELALLOC);
4919 } else {
4920 sb->s_iflags |= SB_I_CGROUPWB;
4921 }
4922
4923 return 0;
4924 }
4925
ext4_load_super(struct super_block * sb,ext4_fsblk_t * lsb,int silent)4926 static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb,
4927 int silent)
4928 {
4929 struct ext4_sb_info *sbi = EXT4_SB(sb);
4930 struct ext4_super_block *es;
4931 ext4_fsblk_t logical_sb_block;
4932 unsigned long offset = 0;
4933 struct buffer_head *bh;
4934 int ret = -EINVAL;
4935 int blocksize;
4936
4937 blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
4938 if (!blocksize) {
4939 ext4_msg(sb, KERN_ERR, "unable to set blocksize");
4940 return -EINVAL;
4941 }
4942
4943 /*
4944 * The ext4 superblock will not be buffer aligned for other than 1kB
4945 * block sizes. We need to calculate the offset from buffer start.
4946 */
4947 if (blocksize != EXT4_MIN_BLOCK_SIZE) {
4948 logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
4949 offset = do_div(logical_sb_block, blocksize);
4950 } else {
4951 logical_sb_block = sbi->s_sb_block;
4952 }
4953
4954 bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
4955 if (IS_ERR(bh)) {
4956 ext4_msg(sb, KERN_ERR, "unable to read superblock");
4957 return PTR_ERR(bh);
4958 }
4959 /*
4960 * Note: s_es must be initialized as soon as possible because
4961 * some ext4 macro-instructions depend on its value
4962 */
4963 es = (struct ext4_super_block *) (bh->b_data + offset);
4964 sbi->s_es = es;
4965 sb->s_magic = le16_to_cpu(es->s_magic);
4966 if (sb->s_magic != EXT4_SUPER_MAGIC) {
4967 if (!silent)
4968 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
4969 goto out;
4970 }
4971
4972 if (le32_to_cpu(es->s_log_block_size) >
4973 (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4974 ext4_msg(sb, KERN_ERR,
4975 "Invalid log block size: %u",
4976 le32_to_cpu(es->s_log_block_size));
4977 goto out;
4978 }
4979 if (le32_to_cpu(es->s_log_cluster_size) >
4980 (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
4981 ext4_msg(sb, KERN_ERR,
4982 "Invalid log cluster size: %u",
4983 le32_to_cpu(es->s_log_cluster_size));
4984 goto out;
4985 }
4986
4987 blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
4988
4989 /*
4990 * If the default block size is not the same as the real block size,
4991 * we need to reload it.
4992 */
4993 if (sb->s_blocksize == blocksize) {
4994 *lsb = logical_sb_block;
4995 sbi->s_sbh = bh;
4996 return 0;
4997 }
4998
4999 /*
5000 * bh must be released before kill_bdev(), otherwise
5001 * it won't be freed and its page also. kill_bdev()
5002 * is called by sb_set_blocksize().
5003 */
5004 brelse(bh);
5005 /* Validate the filesystem blocksize */
5006 if (!sb_set_blocksize(sb, blocksize)) {
5007 ext4_msg(sb, KERN_ERR, "bad block size %d",
5008 blocksize);
5009 bh = NULL;
5010 goto out;
5011 }
5012
5013 logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE;
5014 offset = do_div(logical_sb_block, blocksize);
5015 bh = ext4_sb_bread_unmovable(sb, logical_sb_block);
5016 if (IS_ERR(bh)) {
5017 ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try");
5018 ret = PTR_ERR(bh);
5019 bh = NULL;
5020 goto out;
5021 }
5022 es = (struct ext4_super_block *)(bh->b_data + offset);
5023 sbi->s_es = es;
5024 if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
5025 ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!");
5026 goto out;
5027 }
5028 *lsb = logical_sb_block;
5029 sbi->s_sbh = bh;
5030 return 0;
5031 out:
5032 brelse(bh);
5033 return ret;
5034 }
5035
__ext4_fill_super(struct fs_context * fc,struct super_block * sb)5036 static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb)
5037 {
5038 struct ext4_super_block *es = NULL;
5039 struct ext4_sb_info *sbi = EXT4_SB(sb);
5040 struct flex_groups **flex_groups;
5041 ext4_fsblk_t block;
5042 ext4_fsblk_t logical_sb_block;
5043 struct inode *root;
5044 int ret = -ENOMEM;
5045 unsigned int i;
5046 int needs_recovery, has_huge_files;
5047 int err = 0;
5048 ext4_group_t first_not_zeroed;
5049 struct ext4_fs_context *ctx = fc->fs_private;
5050 int silent = fc->sb_flags & SB_SILENT;
5051
5052 /* Set defaults for the variables that will be set during parsing */
5053 if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO))
5054 ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
5055
5056 sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
5057 sbi->s_sectors_written_start =
5058 part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
5059
5060 /* -EINVAL is default */
5061 ret = -EINVAL;
5062 err = ext4_load_super(sb, &logical_sb_block, silent);
5063 if (err)
5064 goto out_fail;
5065
5066 es = sbi->s_es;
5067 sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written);
5068
5069 err = ext4_init_metadata_csum(sb, es);
5070 if (err)
5071 goto failed_mount;
5072
5073 ext4_set_def_opts(sb, es);
5074
5075 sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid));
5076 sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid));
5077 sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ;
5078 sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME;
5079 sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME;
5080
5081 /*
5082 * set default s_li_wait_mult for lazyinit, for the case there is
5083 * no mount option specified.
5084 */
5085 sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
5086
5087 if (ext4_inode_info_init(sb, es))
5088 goto failed_mount;
5089
5090 err = parse_apply_sb_mount_options(sb, ctx);
5091 if (err < 0)
5092 goto failed_mount;
5093
5094 sbi->s_def_mount_opt = sbi->s_mount_opt;
5095 sbi->s_def_mount_opt2 = sbi->s_mount_opt2;
5096
5097 err = ext4_check_opt_consistency(fc, sb);
5098 if (err < 0)
5099 goto failed_mount;
5100
5101 ext4_apply_options(fc, sb);
5102
5103 if (ext4_encoding_init(sb, es))
5104 goto failed_mount;
5105
5106 if (ext4_journal_data_mode_check(sb))
5107 goto failed_mount;
5108
5109 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
5110 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
5111
5112 /* i_version is always enabled now */
5113 sb->s_flags |= SB_I_VERSION;
5114
5115 if (ext4_check_feature_compatibility(sb, es, silent))
5116 goto failed_mount;
5117
5118 if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) {
5119 ext4_msg(sb, KERN_ERR,
5120 "Number of reserved GDT blocks insanely large: %d",
5121 le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
5122 goto failed_mount;
5123 }
5124
5125 if (sbi->s_daxdev) {
5126 if (sb->s_blocksize == PAGE_SIZE)
5127 set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
5128 else
5129 ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n");
5130 }
5131
5132 if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
5133 if (ext4_has_feature_inline_data(sb)) {
5134 ext4_msg(sb, KERN_ERR, "Cannot use DAX on a filesystem"
5135 " that may contain inline data");
5136 goto failed_mount;
5137 }
5138 if (!test_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags)) {
5139 ext4_msg(sb, KERN_ERR,
5140 "DAX unsupported by block device.");
5141 goto failed_mount;
5142 }
5143 }
5144
5145 if (ext4_has_feature_encrypt(sb) && es->s_encryption_level) {
5146 ext4_msg(sb, KERN_ERR, "Unsupported encryption level %d",
5147 es->s_encryption_level);
5148 goto failed_mount;
5149 }
5150
5151 has_huge_files = ext4_has_feature_huge_file(sb);
5152 sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits,
5153 has_huge_files);
5154 sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files);
5155
5156 sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
5157 if (ext4_has_feature_64bit(sb)) {
5158 if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
5159 sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
5160 !is_power_of_2(sbi->s_desc_size)) {
5161 ext4_msg(sb, KERN_ERR,
5162 "unsupported descriptor size %lu",
5163 sbi->s_desc_size);
5164 goto failed_mount;
5165 }
5166 } else
5167 sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
5168
5169 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
5170 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
5171
5172 sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb);
5173 if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) {
5174 if (!silent)
5175 ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
5176 goto failed_mount;
5177 }
5178 if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
5179 sbi->s_inodes_per_group > sb->s_blocksize * 8) {
5180 ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
5181 sbi->s_inodes_per_group);
5182 goto failed_mount;
5183 }
5184 sbi->s_itb_per_group = sbi->s_inodes_per_group /
5185 sbi->s_inodes_per_block;
5186 sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb);
5187 sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY;
5188 sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
5189 sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
5190
5191 for (i = 0; i < 4; i++)
5192 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
5193 sbi->s_def_hash_version = es->s_def_hash_version;
5194 if (ext4_has_feature_dir_index(sb)) {
5195 i = le32_to_cpu(es->s_flags);
5196 if (i & EXT2_FLAGS_UNSIGNED_HASH)
5197 sbi->s_hash_unsigned = 3;
5198 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
5199 #ifdef __CHAR_UNSIGNED__
5200 if (!sb_rdonly(sb))
5201 es->s_flags |=
5202 cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
5203 sbi->s_hash_unsigned = 3;
5204 #else
5205 if (!sb_rdonly(sb))
5206 es->s_flags |=
5207 cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
5208 #endif
5209 }
5210 }
5211
5212 if (ext4_handle_clustersize(sb))
5213 goto failed_mount;
5214
5215 /*
5216 * Test whether we have more sectors than will fit in sector_t,
5217 * and whether the max offset is addressable by the page cache.
5218 */
5219 err = generic_check_addressable(sb->s_blocksize_bits,
5220 ext4_blocks_count(es));
5221 if (err) {
5222 ext4_msg(sb, KERN_ERR, "filesystem"
5223 " too large to mount safely on this system");
5224 goto failed_mount;
5225 }
5226
5227 if (ext4_geometry_check(sb, es))
5228 goto failed_mount;
5229
5230 err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed);
5231 if (err)
5232 goto failed_mount;
5233
5234 timer_setup(&sbi->s_err_report, print_daily_error_info, 0);
5235 spin_lock_init(&sbi->s_error_lock);
5236 INIT_WORK(&sbi->s_error_work, flush_stashed_error_work);
5237
5238 /* Register extent status tree shrinker */
5239 if (ext4_es_register_shrinker(sbi))
5240 goto failed_mount3;
5241
5242 sbi->s_stripe = ext4_get_stripe_size(sbi);
5243 sbi->s_extent_max_zeroout_kb = 32;
5244
5245 /*
5246 * set up enough so that it can read an inode
5247 */
5248 sb->s_op = &ext4_sops;
5249 sb->s_export_op = &ext4_export_ops;
5250 sb->s_xattr = ext4_xattr_handlers;
5251 #ifdef CONFIG_FS_ENCRYPTION
5252 sb->s_cop = &ext4_cryptops;
5253 #endif
5254 #ifdef CONFIG_FS_VERITY
5255 sb->s_vop = &ext4_verityops;
5256 #endif
5257 #ifdef CONFIG_QUOTA
5258 sb->dq_op = &ext4_quota_operations;
5259 if (ext4_has_feature_quota(sb))
5260 sb->s_qcop = &dquot_quotactl_sysfile_ops;
5261 else
5262 sb->s_qcop = &ext4_qctl_operations;
5263 sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
5264 #endif
5265 memcpy(&sb->s_uuid, es->s_uuid, sizeof(es->s_uuid));
5266
5267 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
5268 mutex_init(&sbi->s_orphan_lock);
5269
5270 ext4_fast_commit_init(sb);
5271
5272 sb->s_root = NULL;
5273
5274 needs_recovery = (es->s_last_orphan != 0 ||
5275 ext4_has_feature_orphan_present(sb) ||
5276 ext4_has_feature_journal_needs_recovery(sb));
5277
5278 if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb)) {
5279 err = ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block));
5280 if (err)
5281 goto failed_mount3a;
5282 }
5283
5284 /*
5285 * The first inode we look at is the journal inode. Don't try
5286 * root first: it may be modified in the journal!
5287 */
5288 if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
5289 err = ext4_load_and_init_journal(sb, es, ctx);
5290 if (err)
5291 goto failed_mount3a;
5292 } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) &&
5293 ext4_has_feature_journal_needs_recovery(sb)) {
5294 ext4_msg(sb, KERN_ERR, "required journal recovery "
5295 "suppressed and not mounted read-only");
5296 goto failed_mount3a;
5297 } else {
5298 /* Nojournal mode, all journal mount options are illegal */
5299 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
5300 ext4_msg(sb, KERN_ERR, "can't mount with "
5301 "journal_async_commit, fs mounted w/o journal");
5302 goto failed_mount3a;
5303 }
5304
5305 if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
5306 ext4_msg(sb, KERN_ERR, "can't mount with "
5307 "journal_checksum, fs mounted w/o journal");
5308 goto failed_mount3a;
5309 }
5310 if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
5311 ext4_msg(sb, KERN_ERR, "can't mount with "
5312 "commit=%lu, fs mounted w/o journal",
5313 sbi->s_commit_interval / HZ);
5314 goto failed_mount3a;
5315 }
5316 if (EXT4_MOUNT_DATA_FLAGS &
5317 (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
5318 ext4_msg(sb, KERN_ERR, "can't mount with "
5319 "data=, fs mounted w/o journal");
5320 goto failed_mount3a;
5321 }
5322 sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
5323 clear_opt(sb, JOURNAL_CHECKSUM);
5324 clear_opt(sb, DATA_FLAGS);
5325 clear_opt2(sb, JOURNAL_FAST_COMMIT);
5326 sbi->s_journal = NULL;
5327 needs_recovery = 0;
5328 }
5329
5330 if (!test_opt(sb, NO_MBCACHE)) {
5331 sbi->s_ea_block_cache = ext4_xattr_create_cache();
5332 if (!sbi->s_ea_block_cache) {
5333 ext4_msg(sb, KERN_ERR,
5334 "Failed to create ea_block_cache");
5335 goto failed_mount_wq;
5336 }
5337
5338 if (ext4_has_feature_ea_inode(sb)) {
5339 sbi->s_ea_inode_cache = ext4_xattr_create_cache();
5340 if (!sbi->s_ea_inode_cache) {
5341 ext4_msg(sb, KERN_ERR,
5342 "Failed to create ea_inode_cache");
5343 goto failed_mount_wq;
5344 }
5345 }
5346 }
5347
5348 /*
5349 * Get the # of file system overhead blocks from the
5350 * superblock if present.
5351 */
5352 sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters);
5353 /* ignore the precalculated value if it is ridiculous */
5354 if (sbi->s_overhead > ext4_blocks_count(es))
5355 sbi->s_overhead = 0;
5356 /*
5357 * If the bigalloc feature is not enabled recalculating the
5358 * overhead doesn't take long, so we might as well just redo
5359 * it to make sure we are using the correct value.
5360 */
5361 if (!ext4_has_feature_bigalloc(sb))
5362 sbi->s_overhead = 0;
5363 if (sbi->s_overhead == 0) {
5364 err = ext4_calculate_overhead(sb);
5365 if (err)
5366 goto failed_mount_wq;
5367 }
5368
5369 /*
5370 * The maximum number of concurrent works can be high and
5371 * concurrency isn't really necessary. Limit it to 1.
5372 */
5373 EXT4_SB(sb)->rsv_conversion_wq =
5374 alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
5375 if (!EXT4_SB(sb)->rsv_conversion_wq) {
5376 printk(KERN_ERR "EXT4-fs: failed to create workqueue\n");
5377 ret = -ENOMEM;
5378 goto failed_mount4;
5379 }
5380
5381 /*
5382 * The jbd2_journal_load will have done any necessary log recovery,
5383 * so we can safely mount the rest of the filesystem now.
5384 */
5385
5386 root = ext4_iget(sb, EXT4_ROOT_INO, EXT4_IGET_SPECIAL);
5387 if (IS_ERR(root)) {
5388 ext4_msg(sb, KERN_ERR, "get root inode failed");
5389 ret = PTR_ERR(root);
5390 root = NULL;
5391 goto failed_mount4;
5392 }
5393 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
5394 ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck");
5395 iput(root);
5396 goto failed_mount4;
5397 }
5398
5399 sb->s_root = d_make_root(root);
5400 if (!sb->s_root) {
5401 ext4_msg(sb, KERN_ERR, "get root dentry failed");
5402 ret = -ENOMEM;
5403 goto failed_mount4;
5404 }
5405
5406 ret = ext4_setup_super(sb, es, sb_rdonly(sb));
5407 if (ret == -EROFS) {
5408 sb->s_flags |= SB_RDONLY;
5409 ret = 0;
5410 } else if (ret)
5411 goto failed_mount4a;
5412
5413 ext4_set_resv_clusters(sb);
5414
5415 if (test_opt(sb, BLOCK_VALIDITY)) {
5416 err = ext4_setup_system_zone(sb);
5417 if (err) {
5418 ext4_msg(sb, KERN_ERR, "failed to initialize system "
5419 "zone (%d)", err);
5420 goto failed_mount4a;
5421 }
5422 }
5423 ext4_fc_replay_cleanup(sb);
5424
5425 ext4_ext_init(sb);
5426
5427 /*
5428 * Enable optimize_scan if number of groups is > threshold. This can be
5429 * turned off by passing "mb_optimize_scan=0". This can also be
5430 * turned on forcefully by passing "mb_optimize_scan=1".
5431 */
5432 if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) {
5433 if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD)
5434 set_opt2(sb, MB_OPTIMIZE_SCAN);
5435 else
5436 clear_opt2(sb, MB_OPTIMIZE_SCAN);
5437 }
5438
5439 err = ext4_mb_init(sb);
5440 if (err) {
5441 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
5442 err);
5443 goto failed_mount5;
5444 }
5445
5446 /*
5447 * We can only set up the journal commit callback once
5448 * mballoc is initialized
5449 */
5450 if (sbi->s_journal)
5451 sbi->s_journal->j_commit_callback =
5452 ext4_journal_commit_callback;
5453
5454 block = ext4_count_free_clusters(sb);
5455 ext4_free_blocks_count_set(sbi->s_es,
5456 EXT4_C2B(sbi, block));
5457 err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
5458 GFP_KERNEL);
5459 if (!err) {
5460 unsigned long freei = ext4_count_free_inodes(sb);
5461 sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
5462 err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
5463 GFP_KERNEL);
5464 }
5465 if (!err)
5466 err = percpu_counter_init(&sbi->s_dirs_counter,
5467 ext4_count_dirs(sb), GFP_KERNEL);
5468 if (!err)
5469 err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
5470 GFP_KERNEL);
5471 if (!err)
5472 err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
5473 GFP_KERNEL);
5474 if (!err)
5475 err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
5476
5477 if (err) {
5478 ext4_msg(sb, KERN_ERR, "insufficient memory");
5479 goto failed_mount6;
5480 }
5481
5482 if (ext4_has_feature_flex_bg(sb))
5483 if (!ext4_fill_flex_info(sb)) {
5484 ext4_msg(sb, KERN_ERR,
5485 "unable to initialize "
5486 "flex_bg meta info!");
5487 ret = -ENOMEM;
5488 goto failed_mount6;
5489 }
5490
5491 err = ext4_register_li_request(sb, first_not_zeroed);
5492 if (err)
5493 goto failed_mount6;
5494
5495 err = ext4_register_sysfs(sb);
5496 if (err)
5497 goto failed_mount7;
5498
5499 err = ext4_init_orphan_info(sb);
5500 if (err)
5501 goto failed_mount8;
5502 #ifdef CONFIG_QUOTA
5503 /* Enable quota usage during mount. */
5504 if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
5505 err = ext4_enable_quotas(sb);
5506 if (err)
5507 goto failed_mount9;
5508 }
5509 #endif /* CONFIG_QUOTA */
5510
5511 /*
5512 * Save the original bdev mapping's wb_err value which could be
5513 * used to detect the metadata async write error.
5514 */
5515 spin_lock_init(&sbi->s_bdev_wb_lock);
5516 errseq_check_and_advance(&sb->s_bdev->bd_inode->i_mapping->wb_err,
5517 &sbi->s_bdev_wb_err);
5518 sb->s_bdev->bd_super = sb;
5519 EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
5520 ext4_orphan_cleanup(sb, es);
5521 EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
5522 /*
5523 * Update the checksum after updating free space/inode counters and
5524 * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
5525 * checksum in the buffer cache until it is written out and
5526 * e2fsprogs programs trying to open a file system immediately
5527 * after it is mounted can fail.
5528 */
5529 ext4_superblock_csum_set(sb);
5530 if (needs_recovery) {
5531 ext4_msg(sb, KERN_INFO, "recovery complete");
5532 err = ext4_mark_recovery_complete(sb, es);
5533 if (err)
5534 goto failed_mount10;
5535 }
5536
5537 if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev))
5538 ext4_msg(sb, KERN_WARNING,
5539 "mounting with \"discard\" option, but the device does not support discard");
5540
5541 if (es->s_error_count)
5542 mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
5543
5544 /* Enable message ratelimiting. Default is 10 messages per 5 secs. */
5545 ratelimit_state_init(&sbi->s_err_ratelimit_state, 5 * HZ, 10);
5546 ratelimit_state_init(&sbi->s_warning_ratelimit_state, 5 * HZ, 10);
5547 ratelimit_state_init(&sbi->s_msg_ratelimit_state, 5 * HZ, 10);
5548 atomic_set(&sbi->s_warning_count, 0);
5549 atomic_set(&sbi->s_msg_count, 0);
5550
5551 return 0;
5552
5553 failed_mount10:
5554 ext4_quota_off_umount(sb);
5555 failed_mount9: __maybe_unused
5556 ext4_release_orphan_info(sb);
5557 failed_mount8:
5558 ext4_unregister_sysfs(sb);
5559 kobject_put(&sbi->s_kobj);
5560 failed_mount7:
5561 ext4_unregister_li_request(sb);
5562 failed_mount6:
5563 ext4_mb_release(sb);
5564 rcu_read_lock();
5565 flex_groups = rcu_dereference(sbi->s_flex_groups);
5566 if (flex_groups) {
5567 for (i = 0; i < sbi->s_flex_groups_allocated; i++)
5568 kvfree(flex_groups[i]);
5569 kvfree(flex_groups);
5570 }
5571 rcu_read_unlock();
5572 percpu_counter_destroy(&sbi->s_freeclusters_counter);
5573 percpu_counter_destroy(&sbi->s_freeinodes_counter);
5574 percpu_counter_destroy(&sbi->s_dirs_counter);
5575 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
5576 percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
5577 percpu_free_rwsem(&sbi->s_writepages_rwsem);
5578 failed_mount5:
5579 ext4_ext_release(sb);
5580 ext4_release_system_zone(sb);
5581 failed_mount4a:
5582 dput(sb->s_root);
5583 sb->s_root = NULL;
5584 failed_mount4:
5585 ext4_msg(sb, KERN_ERR, "mount failed");
5586 if (EXT4_SB(sb)->rsv_conversion_wq)
5587 destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
5588 failed_mount_wq:
5589 ext4_xattr_destroy_cache(sbi->s_ea_inode_cache);
5590 sbi->s_ea_inode_cache = NULL;
5591
5592 ext4_xattr_destroy_cache(sbi->s_ea_block_cache);
5593 sbi->s_ea_block_cache = NULL;
5594
5595 if (sbi->s_journal) {
5596 /* flush s_error_work before journal destroy. */
5597 flush_work(&sbi->s_error_work);
5598 jbd2_journal_destroy(sbi->s_journal);
5599 sbi->s_journal = NULL;
5600 }
5601 failed_mount3a:
5602 ext4_es_unregister_shrinker(sbi);
5603 failed_mount3:
5604 /* flush s_error_work before sbi destroy */
5605 flush_work(&sbi->s_error_work);
5606 del_timer_sync(&sbi->s_err_report);
5607 ext4_stop_mmpd(sbi);
5608 ext4_group_desc_free(sbi);
5609 failed_mount:
5610 if (sbi->s_chksum_driver)
5611 crypto_free_shash(sbi->s_chksum_driver);
5612
5613 #if IS_ENABLED(CONFIG_UNICODE)
5614 utf8_unload(sb->s_encoding);
5615 #endif
5616
5617 #ifdef CONFIG_QUOTA
5618 for (i = 0; i < EXT4_MAXQUOTAS; i++)
5619 kfree(get_qf_name(sb, sbi, i));
5620 #endif
5621 fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy);
5622 /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */
5623 brelse(sbi->s_sbh);
5624 ext4_blkdev_remove(sbi);
5625 out_fail:
5626 invalidate_bdev(sb->s_bdev);
5627 sb->s_fs_info = NULL;
5628 return err ? err : ret;
5629 }
5630
ext4_fill_super(struct super_block * sb,struct fs_context * fc)5631 static int ext4_fill_super(struct super_block *sb, struct fs_context *fc)
5632 {
5633 struct ext4_fs_context *ctx = fc->fs_private;
5634 struct ext4_sb_info *sbi;
5635 const char *descr;
5636 int ret;
5637
5638 sbi = ext4_alloc_sbi(sb);
5639 if (!sbi)
5640 return -ENOMEM;
5641
5642 fc->s_fs_info = sbi;
5643
5644 /* Cleanup superblock name */
5645 strreplace(sb->s_id, '/', '!');
5646
5647 sbi->s_sb_block = 1; /* Default super block location */
5648 if (ctx->spec & EXT4_SPEC_s_sb_block)
5649 sbi->s_sb_block = ctx->s_sb_block;
5650
5651 ret = __ext4_fill_super(fc, sb);
5652 if (ret < 0)
5653 goto free_sbi;
5654
5655 if (sbi->s_journal) {
5656 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
5657 descr = " journalled data mode";
5658 else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
5659 descr = " ordered data mode";
5660 else
5661 descr = " writeback data mode";
5662 } else
5663 descr = "out journal";
5664
5665 if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
5666 ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
5667 "Quota mode: %s.", descr, ext4_quota_mode(sb));
5668
5669 /* Update the s_overhead_clusters if necessary */
5670 ext4_update_overhead(sb, false);
5671 return 0;
5672
5673 free_sbi:
5674 ext4_free_sbi(sbi);
5675 fc->s_fs_info = NULL;
5676 return ret;
5677 }
5678
ext4_get_tree(struct fs_context * fc)5679 static int ext4_get_tree(struct fs_context *fc)
5680 {
5681 return get_tree_bdev(fc, ext4_fill_super);
5682 }
5683
5684 /*
5685 * Setup any per-fs journal parameters now. We'll do this both on
5686 * initial mount, once the journal has been initialised but before we've
5687 * done any recovery; and again on any subsequent remount.
5688 */
ext4_init_journal_params(struct super_block * sb,journal_t * journal)5689 static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
5690 {
5691 struct ext4_sb_info *sbi = EXT4_SB(sb);
5692
5693 journal->j_commit_interval = sbi->s_commit_interval;
5694 journal->j_min_batch_time = sbi->s_min_batch_time;
5695 journal->j_max_batch_time = sbi->s_max_batch_time;
5696 ext4_fc_init(sb, journal);
5697
5698 write_lock(&journal->j_state_lock);
5699 if (test_opt(sb, BARRIER))
5700 journal->j_flags |= JBD2_BARRIER;
5701 else
5702 journal->j_flags &= ~JBD2_BARRIER;
5703 if (test_opt(sb, DATA_ERR_ABORT))
5704 journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR;
5705 else
5706 journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR;
5707 write_unlock(&journal->j_state_lock);
5708 }
5709
ext4_get_journal_inode(struct super_block * sb,unsigned int journal_inum)5710 static struct inode *ext4_get_journal_inode(struct super_block *sb,
5711 unsigned int journal_inum)
5712 {
5713 struct inode *journal_inode;
5714
5715 /*
5716 * Test for the existence of a valid inode on disk. Bad things
5717 * happen if we iget() an unused inode, as the subsequent iput()
5718 * will try to delete it.
5719 */
5720 journal_inode = ext4_iget(sb, journal_inum, EXT4_IGET_SPECIAL);
5721 if (IS_ERR(journal_inode)) {
5722 ext4_msg(sb, KERN_ERR, "no journal found");
5723 return NULL;
5724 }
5725 if (!journal_inode->i_nlink) {
5726 make_bad_inode(journal_inode);
5727 iput(journal_inode);
5728 ext4_msg(sb, KERN_ERR, "journal inode is deleted");
5729 return NULL;
5730 }
5731
5732 ext4_debug("Journal inode found at %p: %lld bytes\n",
5733 journal_inode, journal_inode->i_size);
5734 if (!S_ISREG(journal_inode->i_mode) || IS_ENCRYPTED(journal_inode)) {
5735 ext4_msg(sb, KERN_ERR, "invalid journal inode");
5736 iput(journal_inode);
5737 return NULL;
5738 }
5739 return journal_inode;
5740 }
5741
ext4_get_journal(struct super_block * sb,unsigned int journal_inum)5742 static journal_t *ext4_get_journal(struct super_block *sb,
5743 unsigned int journal_inum)
5744 {
5745 struct inode *journal_inode;
5746 journal_t *journal;
5747
5748 if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5749 return NULL;
5750
5751 journal_inode = ext4_get_journal_inode(sb, journal_inum);
5752 if (!journal_inode)
5753 return NULL;
5754
5755 journal = jbd2_journal_init_inode(journal_inode);
5756 if (!journal) {
5757 ext4_msg(sb, KERN_ERR, "Could not load journal inode");
5758 iput(journal_inode);
5759 return NULL;
5760 }
5761 journal->j_private = sb;
5762 ext4_init_journal_params(sb, journal);
5763 return journal;
5764 }
5765
ext4_get_dev_journal(struct super_block * sb,dev_t j_dev)5766 static journal_t *ext4_get_dev_journal(struct super_block *sb,
5767 dev_t j_dev)
5768 {
5769 struct buffer_head *bh;
5770 journal_t *journal;
5771 ext4_fsblk_t start;
5772 ext4_fsblk_t len;
5773 int hblock, blocksize;
5774 ext4_fsblk_t sb_block;
5775 unsigned long offset;
5776 struct ext4_super_block *es;
5777 struct block_device *bdev;
5778
5779 if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5780 return NULL;
5781
5782 bdev = ext4_blkdev_get(j_dev, sb);
5783 if (bdev == NULL)
5784 return NULL;
5785
5786 blocksize = sb->s_blocksize;
5787 hblock = bdev_logical_block_size(bdev);
5788 if (blocksize < hblock) {
5789 ext4_msg(sb, KERN_ERR,
5790 "blocksize too small for journal device");
5791 goto out_bdev;
5792 }
5793
5794 sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
5795 offset = EXT4_MIN_BLOCK_SIZE % blocksize;
5796 set_blocksize(bdev, blocksize);
5797 if (!(bh = __bread(bdev, sb_block, blocksize))) {
5798 ext4_msg(sb, KERN_ERR, "couldn't read superblock of "
5799 "external journal");
5800 goto out_bdev;
5801 }
5802
5803 es = (struct ext4_super_block *) (bh->b_data + offset);
5804 if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
5805 !(le32_to_cpu(es->s_feature_incompat) &
5806 EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
5807 ext4_msg(sb, KERN_ERR, "external journal has "
5808 "bad superblock");
5809 brelse(bh);
5810 goto out_bdev;
5811 }
5812
5813 if ((le32_to_cpu(es->s_feature_ro_compat) &
5814 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
5815 es->s_checksum != ext4_superblock_csum(sb, es)) {
5816 ext4_msg(sb, KERN_ERR, "external journal has "
5817 "corrupt superblock");
5818 brelse(bh);
5819 goto out_bdev;
5820 }
5821
5822 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
5823 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
5824 brelse(bh);
5825 goto out_bdev;
5826 }
5827
5828 len = ext4_blocks_count(es);
5829 start = sb_block + 1;
5830 brelse(bh); /* we're done with the superblock */
5831
5832 journal = jbd2_journal_init_dev(bdev, sb->s_bdev,
5833 start, len, blocksize);
5834 if (!journal) {
5835 ext4_msg(sb, KERN_ERR, "failed to create device journal");
5836 goto out_bdev;
5837 }
5838 journal->j_private = sb;
5839 if (ext4_read_bh_lock(journal->j_sb_buffer, REQ_META | REQ_PRIO, true)) {
5840 ext4_msg(sb, KERN_ERR, "I/O error on journal device");
5841 goto out_journal;
5842 }
5843 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
5844 ext4_msg(sb, KERN_ERR, "External journal has more than one "
5845 "user (unsupported) - %d",
5846 be32_to_cpu(journal->j_superblock->s_nr_users));
5847 goto out_journal;
5848 }
5849 EXT4_SB(sb)->s_journal_bdev = bdev;
5850 ext4_init_journal_params(sb, journal);
5851 return journal;
5852
5853 out_journal:
5854 jbd2_journal_destroy(journal);
5855 out_bdev:
5856 ext4_blkdev_put(bdev);
5857 return NULL;
5858 }
5859
ext4_load_journal(struct super_block * sb,struct ext4_super_block * es,unsigned long journal_devnum)5860 static int ext4_load_journal(struct super_block *sb,
5861 struct ext4_super_block *es,
5862 unsigned long journal_devnum)
5863 {
5864 journal_t *journal;
5865 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
5866 dev_t journal_dev;
5867 int err = 0;
5868 int really_read_only;
5869 int journal_dev_ro;
5870
5871 if (WARN_ON_ONCE(!ext4_has_feature_journal(sb)))
5872 return -EFSCORRUPTED;
5873
5874 if (journal_devnum &&
5875 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5876 ext4_msg(sb, KERN_INFO, "external journal device major/minor "
5877 "numbers have changed");
5878 journal_dev = new_decode_dev(journal_devnum);
5879 } else
5880 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
5881
5882 if (journal_inum && journal_dev) {
5883 ext4_msg(sb, KERN_ERR,
5884 "filesystem has both journal inode and journal device!");
5885 return -EINVAL;
5886 }
5887
5888 if (journal_inum) {
5889 journal = ext4_get_journal(sb, journal_inum);
5890 if (!journal)
5891 return -EINVAL;
5892 } else {
5893 journal = ext4_get_dev_journal(sb, journal_dev);
5894 if (!journal)
5895 return -EINVAL;
5896 }
5897
5898 journal_dev_ro = bdev_read_only(journal->j_dev);
5899 really_read_only = bdev_read_only(sb->s_bdev) | journal_dev_ro;
5900
5901 if (journal_dev_ro && !sb_rdonly(sb)) {
5902 ext4_msg(sb, KERN_ERR,
5903 "journal device read-only, try mounting with '-o ro'");
5904 err = -EROFS;
5905 goto err_out;
5906 }
5907
5908 /*
5909 * Are we loading a blank journal or performing recovery after a
5910 * crash? For recovery, we need to check in advance whether we
5911 * can get read-write access to the device.
5912 */
5913 if (ext4_has_feature_journal_needs_recovery(sb)) {
5914 if (sb_rdonly(sb)) {
5915 ext4_msg(sb, KERN_INFO, "INFO: recovery "
5916 "required on readonly filesystem");
5917 if (really_read_only) {
5918 ext4_msg(sb, KERN_ERR, "write access "
5919 "unavailable, cannot proceed "
5920 "(try mounting with noload)");
5921 err = -EROFS;
5922 goto err_out;
5923 }
5924 ext4_msg(sb, KERN_INFO, "write access will "
5925 "be enabled during recovery");
5926 }
5927 }
5928
5929 if (!(journal->j_flags & JBD2_BARRIER))
5930 ext4_msg(sb, KERN_INFO, "barriers disabled");
5931
5932 if (!ext4_has_feature_journal_needs_recovery(sb))
5933 err = jbd2_journal_wipe(journal, !really_read_only);
5934 if (!err) {
5935 char *save = kmalloc(EXT4_S_ERR_LEN, GFP_KERNEL);
5936 if (save)
5937 memcpy(save, ((char *) es) +
5938 EXT4_S_ERR_START, EXT4_S_ERR_LEN);
5939 err = jbd2_journal_load(journal);
5940 if (save)
5941 memcpy(((char *) es) + EXT4_S_ERR_START,
5942 save, EXT4_S_ERR_LEN);
5943 kfree(save);
5944 }
5945
5946 if (err) {
5947 ext4_msg(sb, KERN_ERR, "error loading journal");
5948 goto err_out;
5949 }
5950
5951 EXT4_SB(sb)->s_journal = journal;
5952 err = ext4_clear_journal_err(sb, es);
5953 if (err) {
5954 EXT4_SB(sb)->s_journal = NULL;
5955 jbd2_journal_destroy(journal);
5956 return err;
5957 }
5958
5959 if (!really_read_only && journal_devnum &&
5960 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
5961 es->s_journal_dev = cpu_to_le32(journal_devnum);
5962 ext4_commit_super(sb);
5963 }
5964 if (!really_read_only && journal_inum &&
5965 journal_inum != le32_to_cpu(es->s_journal_inum)) {
5966 es->s_journal_inum = cpu_to_le32(journal_inum);
5967 ext4_commit_super(sb);
5968 }
5969
5970 return 0;
5971
5972 err_out:
5973 jbd2_journal_destroy(journal);
5974 return err;
5975 }
5976
5977 /* Copy state of EXT4_SB(sb) into buffer for on-disk superblock */
ext4_update_super(struct super_block * sb)5978 static void ext4_update_super(struct super_block *sb)
5979 {
5980 struct ext4_sb_info *sbi = EXT4_SB(sb);
5981 struct ext4_super_block *es = sbi->s_es;
5982 struct buffer_head *sbh = sbi->s_sbh;
5983
5984 lock_buffer(sbh);
5985 /*
5986 * If the file system is mounted read-only, don't update the
5987 * superblock write time. This avoids updating the superblock
5988 * write time when we are mounting the root file system
5989 * read/only but we need to replay the journal; at that point,
5990 * for people who are east of GMT and who make their clock
5991 * tick in localtime for Windows bug-for-bug compatibility,
5992 * the clock is set in the future, and this will cause e2fsck
5993 * to complain and force a full file system check.
5994 */
5995 if (!(sb->s_flags & SB_RDONLY))
5996 ext4_update_tstamp(es, s_wtime);
5997 es->s_kbytes_written =
5998 cpu_to_le64(sbi->s_kbytes_written +
5999 ((part_stat_read(sb->s_bdev, sectors[STAT_WRITE]) -
6000 sbi->s_sectors_written_start) >> 1));
6001 if (percpu_counter_initialized(&sbi->s_freeclusters_counter))
6002 ext4_free_blocks_count_set(es,
6003 EXT4_C2B(sbi, percpu_counter_sum_positive(
6004 &sbi->s_freeclusters_counter)));
6005 if (percpu_counter_initialized(&sbi->s_freeinodes_counter))
6006 es->s_free_inodes_count =
6007 cpu_to_le32(percpu_counter_sum_positive(
6008 &sbi->s_freeinodes_counter));
6009 /* Copy error information to the on-disk superblock */
6010 spin_lock(&sbi->s_error_lock);
6011 if (sbi->s_add_error_count > 0) {
6012 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6013 if (!es->s_first_error_time && !es->s_first_error_time_hi) {
6014 __ext4_update_tstamp(&es->s_first_error_time,
6015 &es->s_first_error_time_hi,
6016 sbi->s_first_error_time);
6017 strncpy(es->s_first_error_func, sbi->s_first_error_func,
6018 sizeof(es->s_first_error_func));
6019 es->s_first_error_line =
6020 cpu_to_le32(sbi->s_first_error_line);
6021 es->s_first_error_ino =
6022 cpu_to_le32(sbi->s_first_error_ino);
6023 es->s_first_error_block =
6024 cpu_to_le64(sbi->s_first_error_block);
6025 es->s_first_error_errcode =
6026 ext4_errno_to_code(sbi->s_first_error_code);
6027 }
6028 __ext4_update_tstamp(&es->s_last_error_time,
6029 &es->s_last_error_time_hi,
6030 sbi->s_last_error_time);
6031 strncpy(es->s_last_error_func, sbi->s_last_error_func,
6032 sizeof(es->s_last_error_func));
6033 es->s_last_error_line = cpu_to_le32(sbi->s_last_error_line);
6034 es->s_last_error_ino = cpu_to_le32(sbi->s_last_error_ino);
6035 es->s_last_error_block = cpu_to_le64(sbi->s_last_error_block);
6036 es->s_last_error_errcode =
6037 ext4_errno_to_code(sbi->s_last_error_code);
6038 /*
6039 * Start the daily error reporting function if it hasn't been
6040 * started already
6041 */
6042 if (!es->s_error_count)
6043 mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);
6044 le32_add_cpu(&es->s_error_count, sbi->s_add_error_count);
6045 sbi->s_add_error_count = 0;
6046 }
6047 spin_unlock(&sbi->s_error_lock);
6048
6049 ext4_superblock_csum_set(sb);
6050 unlock_buffer(sbh);
6051 }
6052
ext4_commit_super(struct super_block * sb)6053 static int ext4_commit_super(struct super_block *sb)
6054 {
6055 struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
6056
6057 if (!sbh)
6058 return -EINVAL;
6059 if (block_device_ejected(sb))
6060 return -ENODEV;
6061
6062 ext4_update_super(sb);
6063
6064 lock_buffer(sbh);
6065 /* Buffer got discarded which means block device got invalidated */
6066 if (!buffer_mapped(sbh)) {
6067 unlock_buffer(sbh);
6068 return -EIO;
6069 }
6070
6071 if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
6072 /*
6073 * Oh, dear. A previous attempt to write the
6074 * superblock failed. This could happen because the
6075 * USB device was yanked out. Or it could happen to
6076 * be a transient write error and maybe the block will
6077 * be remapped. Nothing we can do but to retry the
6078 * write and hope for the best.
6079 */
6080 ext4_msg(sb, KERN_ERR, "previous I/O error to "
6081 "superblock detected");
6082 clear_buffer_write_io_error(sbh);
6083 set_buffer_uptodate(sbh);
6084 }
6085 get_bh(sbh);
6086 /* Clear potential dirty bit if it was journalled update */
6087 clear_buffer_dirty(sbh);
6088 sbh->b_end_io = end_buffer_write_sync;
6089 submit_bh(REQ_OP_WRITE | REQ_SYNC |
6090 (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
6091 wait_on_buffer(sbh);
6092 if (buffer_write_io_error(sbh)) {
6093 ext4_msg(sb, KERN_ERR, "I/O error while writing "
6094 "superblock");
6095 clear_buffer_write_io_error(sbh);
6096 set_buffer_uptodate(sbh);
6097 return -EIO;
6098 }
6099 return 0;
6100 }
6101
6102 /*
6103 * Have we just finished recovery? If so, and if we are mounting (or
6104 * remounting) the filesystem readonly, then we will end up with a
6105 * consistent fs on disk. Record that fact.
6106 */
ext4_mark_recovery_complete(struct super_block * sb,struct ext4_super_block * es)6107 static int ext4_mark_recovery_complete(struct super_block *sb,
6108 struct ext4_super_block *es)
6109 {
6110 int err;
6111 journal_t *journal = EXT4_SB(sb)->s_journal;
6112
6113 if (!ext4_has_feature_journal(sb)) {
6114 if (journal != NULL) {
6115 ext4_error(sb, "Journal got removed while the fs was "
6116 "mounted!");
6117 return -EFSCORRUPTED;
6118 }
6119 return 0;
6120 }
6121 jbd2_journal_lock_updates(journal);
6122 err = jbd2_journal_flush(journal, 0);
6123 if (err < 0)
6124 goto out;
6125
6126 if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
6127 ext4_has_feature_orphan_present(sb))) {
6128 if (!ext4_orphan_file_empty(sb)) {
6129 ext4_error(sb, "Orphan file not empty on read-only fs.");
6130 err = -EFSCORRUPTED;
6131 goto out;
6132 }
6133 ext4_clear_feature_journal_needs_recovery(sb);
6134 ext4_clear_feature_orphan_present(sb);
6135 ext4_commit_super(sb);
6136 }
6137 out:
6138 jbd2_journal_unlock_updates(journal);
6139 return err;
6140 }
6141
6142 /*
6143 * If we are mounting (or read-write remounting) a filesystem whose journal
6144 * has recorded an error from a previous lifetime, move that error to the
6145 * main filesystem now.
6146 */
ext4_clear_journal_err(struct super_block * sb,struct ext4_super_block * es)6147 static int ext4_clear_journal_err(struct super_block *sb,
6148 struct ext4_super_block *es)
6149 {
6150 journal_t *journal;
6151 int j_errno;
6152 const char *errstr;
6153
6154 if (!ext4_has_feature_journal(sb)) {
6155 ext4_error(sb, "Journal got removed while the fs was mounted!");
6156 return -EFSCORRUPTED;
6157 }
6158
6159 journal = EXT4_SB(sb)->s_journal;
6160
6161 /*
6162 * Now check for any error status which may have been recorded in the
6163 * journal by a prior ext4_error() or ext4_abort()
6164 */
6165
6166 j_errno = jbd2_journal_errno(journal);
6167 if (j_errno) {
6168 char nbuf[16];
6169
6170 errstr = ext4_decode_error(sb, j_errno, nbuf);
6171 ext4_warning(sb, "Filesystem error recorded "
6172 "from previous mount: %s", errstr);
6173 ext4_warning(sb, "Marking fs in need of filesystem check.");
6174
6175 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
6176 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
6177 ext4_commit_super(sb);
6178
6179 jbd2_journal_clear_err(journal);
6180 jbd2_journal_update_sb_errno(journal);
6181 }
6182 return 0;
6183 }
6184
6185 /*
6186 * Force the running and committing transactions to commit,
6187 * and wait on the commit.
6188 */
ext4_force_commit(struct super_block * sb)6189 int ext4_force_commit(struct super_block *sb)
6190 {
6191 journal_t *journal;
6192
6193 if (sb_rdonly(sb))
6194 return 0;
6195
6196 journal = EXT4_SB(sb)->s_journal;
6197 return ext4_journal_force_commit(journal);
6198 }
6199
ext4_sync_fs(struct super_block * sb,int wait)6200 static int ext4_sync_fs(struct super_block *sb, int wait)
6201 {
6202 int ret = 0;
6203 tid_t target;
6204 bool needs_barrier = false;
6205 struct ext4_sb_info *sbi = EXT4_SB(sb);
6206
6207 if (unlikely(ext4_forced_shutdown(sbi)))
6208 return 0;
6209
6210 trace_ext4_sync_fs(sb, wait);
6211 flush_workqueue(sbi->rsv_conversion_wq);
6212 /*
6213 * Writeback quota in non-journalled quota case - journalled quota has
6214 * no dirty dquots
6215 */
6216 dquot_writeback_dquots(sb, -1);
6217 /*
6218 * Data writeback is possible w/o journal transaction, so barrier must
6219 * being sent at the end of the function. But we can skip it if
6220 * transaction_commit will do it for us.
6221 */
6222 if (sbi->s_journal) {
6223 target = jbd2_get_latest_transaction(sbi->s_journal);
6224 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
6225 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
6226 needs_barrier = true;
6227
6228 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
6229 if (wait)
6230 ret = jbd2_log_wait_commit(sbi->s_journal,
6231 target);
6232 }
6233 } else if (wait && test_opt(sb, BARRIER))
6234 needs_barrier = true;
6235 if (needs_barrier) {
6236 int err;
6237 err = blkdev_issue_flush(sb->s_bdev);
6238 if (!ret)
6239 ret = err;
6240 }
6241
6242 return ret;
6243 }
6244
6245 /*
6246 * LVM calls this function before a (read-only) snapshot is created. This
6247 * gives us a chance to flush the journal completely and mark the fs clean.
6248 *
6249 * Note that only this function cannot bring a filesystem to be in a clean
6250 * state independently. It relies on upper layer to stop all data & metadata
6251 * modifications.
6252 */
ext4_freeze(struct super_block * sb)6253 static int ext4_freeze(struct super_block *sb)
6254 {
6255 int error = 0;
6256 journal_t *journal;
6257
6258 if (sb_rdonly(sb))
6259 return 0;
6260
6261 journal = EXT4_SB(sb)->s_journal;
6262
6263 if (journal) {
6264 /* Now we set up the journal barrier. */
6265 jbd2_journal_lock_updates(journal);
6266
6267 /*
6268 * Don't clear the needs_recovery flag if we failed to
6269 * flush the journal.
6270 */
6271 error = jbd2_journal_flush(journal, 0);
6272 if (error < 0)
6273 goto out;
6274
6275 /* Journal blocked and flushed, clear needs_recovery flag. */
6276 ext4_clear_feature_journal_needs_recovery(sb);
6277 if (ext4_orphan_file_empty(sb))
6278 ext4_clear_feature_orphan_present(sb);
6279 }
6280
6281 error = ext4_commit_super(sb);
6282 out:
6283 if (journal)
6284 /* we rely on upper layer to stop further updates */
6285 jbd2_journal_unlock_updates(journal);
6286 return error;
6287 }
6288
6289 /*
6290 * Called by LVM after the snapshot is done. We need to reset the RECOVER
6291 * flag here, even though the filesystem is not technically dirty yet.
6292 */
ext4_unfreeze(struct super_block * sb)6293 static int ext4_unfreeze(struct super_block *sb)
6294 {
6295 if (sb_rdonly(sb) || ext4_forced_shutdown(EXT4_SB(sb)))
6296 return 0;
6297
6298 if (EXT4_SB(sb)->s_journal) {
6299 /* Reset the needs_recovery flag before the fs is unlocked. */
6300 ext4_set_feature_journal_needs_recovery(sb);
6301 if (ext4_has_feature_orphan_file(sb))
6302 ext4_set_feature_orphan_present(sb);
6303 }
6304
6305 ext4_commit_super(sb);
6306 return 0;
6307 }
6308
6309 /*
6310 * Structure to save mount options for ext4_remount's benefit
6311 */
6312 struct ext4_mount_options {
6313 unsigned long s_mount_opt;
6314 unsigned long s_mount_opt2;
6315 kuid_t s_resuid;
6316 kgid_t s_resgid;
6317 unsigned long s_commit_interval;
6318 u32 s_min_batch_time, s_max_batch_time;
6319 #ifdef CONFIG_QUOTA
6320 int s_jquota_fmt;
6321 char *s_qf_names[EXT4_MAXQUOTAS];
6322 #endif
6323 };
6324
__ext4_remount(struct fs_context * fc,struct super_block * sb)6325 static int __ext4_remount(struct fs_context *fc, struct super_block *sb)
6326 {
6327 struct ext4_fs_context *ctx = fc->fs_private;
6328 struct ext4_super_block *es;
6329 struct ext4_sb_info *sbi = EXT4_SB(sb);
6330 unsigned long old_sb_flags;
6331 struct ext4_mount_options old_opts;
6332 ext4_group_t g;
6333 int err = 0;
6334 #ifdef CONFIG_QUOTA
6335 int enable_quota = 0;
6336 int i, j;
6337 char *to_free[EXT4_MAXQUOTAS];
6338 #endif
6339
6340
6341 /* Store the original options */
6342 old_sb_flags = sb->s_flags;
6343 old_opts.s_mount_opt = sbi->s_mount_opt;
6344 old_opts.s_mount_opt2 = sbi->s_mount_opt2;
6345 old_opts.s_resuid = sbi->s_resuid;
6346 old_opts.s_resgid = sbi->s_resgid;
6347 old_opts.s_commit_interval = sbi->s_commit_interval;
6348 old_opts.s_min_batch_time = sbi->s_min_batch_time;
6349 old_opts.s_max_batch_time = sbi->s_max_batch_time;
6350 #ifdef CONFIG_QUOTA
6351 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
6352 for (i = 0; i < EXT4_MAXQUOTAS; i++)
6353 if (sbi->s_qf_names[i]) {
6354 char *qf_name = get_qf_name(sb, sbi, i);
6355
6356 old_opts.s_qf_names[i] = kstrdup(qf_name, GFP_KERNEL);
6357 if (!old_opts.s_qf_names[i]) {
6358 for (j = 0; j < i; j++)
6359 kfree(old_opts.s_qf_names[j]);
6360 return -ENOMEM;
6361 }
6362 } else
6363 old_opts.s_qf_names[i] = NULL;
6364 #endif
6365 if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) {
6366 if (sbi->s_journal && sbi->s_journal->j_task->io_context)
6367 ctx->journal_ioprio =
6368 sbi->s_journal->j_task->io_context->ioprio;
6369 else
6370 ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
6371
6372 }
6373
6374 ext4_apply_options(fc, sb);
6375
6376 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
6377 test_opt(sb, JOURNAL_CHECKSUM)) {
6378 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
6379 "during remount not supported; ignoring");
6380 sbi->s_mount_opt ^= EXT4_MOUNT_JOURNAL_CHECKSUM;
6381 }
6382
6383 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
6384 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
6385 ext4_msg(sb, KERN_ERR, "can't mount with "
6386 "both data=journal and delalloc");
6387 err = -EINVAL;
6388 goto restore_opts;
6389 }
6390 if (test_opt(sb, DIOREAD_NOLOCK)) {
6391 ext4_msg(sb, KERN_ERR, "can't mount with "
6392 "both data=journal and dioread_nolock");
6393 err = -EINVAL;
6394 goto restore_opts;
6395 }
6396 } else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) {
6397 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
6398 ext4_msg(sb, KERN_ERR, "can't mount with "
6399 "journal_async_commit in data=ordered mode");
6400 err = -EINVAL;
6401 goto restore_opts;
6402 }
6403 }
6404
6405 if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_NO_MBCACHE) {
6406 ext4_msg(sb, KERN_ERR, "can't enable nombcache during remount");
6407 err = -EINVAL;
6408 goto restore_opts;
6409 }
6410
6411 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED))
6412 ext4_abort(sb, ESHUTDOWN, "Abort forced by user");
6413
6414 sb->s_flags = (sb->s_flags & ~SB_POSIXACL) |
6415 (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0);
6416
6417 es = sbi->s_es;
6418
6419 if (sbi->s_journal) {
6420 ext4_init_journal_params(sb, sbi->s_journal);
6421 set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio);
6422 }
6423
6424 /* Flush outstanding errors before changing fs state */
6425 flush_work(&sbi->s_error_work);
6426
6427 if ((bool)(fc->sb_flags & SB_RDONLY) != sb_rdonly(sb)) {
6428 if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) {
6429 err = -EROFS;
6430 goto restore_opts;
6431 }
6432
6433 if (fc->sb_flags & SB_RDONLY) {
6434 err = sync_filesystem(sb);
6435 if (err < 0)
6436 goto restore_opts;
6437 err = dquot_suspend(sb, -1);
6438 if (err < 0)
6439 goto restore_opts;
6440
6441 /*
6442 * First of all, the unconditional stuff we have to do
6443 * to disable replay of the journal when we next remount
6444 */
6445 sb->s_flags |= SB_RDONLY;
6446
6447 /*
6448 * OK, test if we are remounting a valid rw partition
6449 * readonly, and if so set the rdonly flag and then
6450 * mark the partition as valid again.
6451 */
6452 if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
6453 (sbi->s_mount_state & EXT4_VALID_FS))
6454 es->s_state = cpu_to_le16(sbi->s_mount_state);
6455
6456 if (sbi->s_journal) {
6457 /*
6458 * We let remount-ro finish even if marking fs
6459 * as clean failed...
6460 */
6461 ext4_mark_recovery_complete(sb, es);
6462 }
6463 } else {
6464 /* Make sure we can mount this feature set readwrite */
6465 if (ext4_has_feature_readonly(sb) ||
6466 !ext4_feature_set_ok(sb, 0)) {
6467 err = -EROFS;
6468 goto restore_opts;
6469 }
6470 /*
6471 * Make sure the group descriptor checksums
6472 * are sane. If they aren't, refuse to remount r/w.
6473 */
6474 for (g = 0; g < sbi->s_groups_count; g++) {
6475 struct ext4_group_desc *gdp =
6476 ext4_get_group_desc(sb, g, NULL);
6477
6478 if (!ext4_group_desc_csum_verify(sb, g, gdp)) {
6479 ext4_msg(sb, KERN_ERR,
6480 "ext4_remount: Checksum for group %u failed (%u!=%u)",
6481 g, le16_to_cpu(ext4_group_desc_csum(sb, g, gdp)),
6482 le16_to_cpu(gdp->bg_checksum));
6483 err = -EFSBADCRC;
6484 goto restore_opts;
6485 }
6486 }
6487
6488 /*
6489 * If we have an unprocessed orphan list hanging
6490 * around from a previously readonly bdev mount,
6491 * require a full umount/remount for now.
6492 */
6493 if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
6494 ext4_msg(sb, KERN_WARNING, "Couldn't "
6495 "remount RDWR because of unprocessed "
6496 "orphan inode list. Please "
6497 "umount/remount instead");
6498 err = -EINVAL;
6499 goto restore_opts;
6500 }
6501
6502 /*
6503 * Mounting a RDONLY partition read-write, so reread
6504 * and store the current valid flag. (It may have
6505 * been changed by e2fsck since we originally mounted
6506 * the partition.)
6507 */
6508 if (sbi->s_journal) {
6509 err = ext4_clear_journal_err(sb, es);
6510 if (err)
6511 goto restore_opts;
6512 }
6513 sbi->s_mount_state = (le16_to_cpu(es->s_state) &
6514 ~EXT4_FC_REPLAY);
6515
6516 err = ext4_setup_super(sb, es, 0);
6517 if (err)
6518 goto restore_opts;
6519
6520 sb->s_flags &= ~SB_RDONLY;
6521 if (ext4_has_feature_mmp(sb)) {
6522 err = ext4_multi_mount_protect(sb,
6523 le64_to_cpu(es->s_mmp_block));
6524 if (err)
6525 goto restore_opts;
6526 }
6527 #ifdef CONFIG_QUOTA
6528 enable_quota = 1;
6529 #endif
6530 }
6531 }
6532
6533 /*
6534 * Handle creation of system zone data early because it can fail.
6535 * Releasing of existing data is done when we are sure remount will
6536 * succeed.
6537 */
6538 if (test_opt(sb, BLOCK_VALIDITY) && !sbi->s_system_blks) {
6539 err = ext4_setup_system_zone(sb);
6540 if (err)
6541 goto restore_opts;
6542 }
6543
6544 if (sbi->s_journal == NULL && !(old_sb_flags & SB_RDONLY)) {
6545 err = ext4_commit_super(sb);
6546 if (err)
6547 goto restore_opts;
6548 }
6549
6550 #ifdef CONFIG_QUOTA
6551 if (enable_quota) {
6552 if (sb_any_quota_suspended(sb))
6553 dquot_resume(sb, -1);
6554 else if (ext4_has_feature_quota(sb)) {
6555 err = ext4_enable_quotas(sb);
6556 if (err)
6557 goto restore_opts;
6558 }
6559 }
6560 /* Release old quota file names */
6561 for (i = 0; i < EXT4_MAXQUOTAS; i++)
6562 kfree(old_opts.s_qf_names[i]);
6563 #endif
6564 if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6565 ext4_release_system_zone(sb);
6566
6567 /*
6568 * Reinitialize lazy itable initialization thread based on
6569 * current settings
6570 */
6571 if (sb_rdonly(sb) || !test_opt(sb, INIT_INODE_TABLE))
6572 ext4_unregister_li_request(sb);
6573 else {
6574 ext4_group_t first_not_zeroed;
6575 first_not_zeroed = ext4_has_uninit_itable(sb);
6576 ext4_register_li_request(sb, first_not_zeroed);
6577 }
6578
6579 if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6580 ext4_stop_mmpd(sbi);
6581
6582 return 0;
6583
6584 restore_opts:
6585 /*
6586 * If there was a failing r/w to ro transition, we may need to
6587 * re-enable quota
6588 */
6589 if ((sb->s_flags & SB_RDONLY) && !(old_sb_flags & SB_RDONLY) &&
6590 sb_any_quota_suspended(sb))
6591 dquot_resume(sb, -1);
6592 sb->s_flags = old_sb_flags;
6593 sbi->s_mount_opt = old_opts.s_mount_opt;
6594 sbi->s_mount_opt2 = old_opts.s_mount_opt2;
6595 sbi->s_resuid = old_opts.s_resuid;
6596 sbi->s_resgid = old_opts.s_resgid;
6597 sbi->s_commit_interval = old_opts.s_commit_interval;
6598 sbi->s_min_batch_time = old_opts.s_min_batch_time;
6599 sbi->s_max_batch_time = old_opts.s_max_batch_time;
6600 if (!test_opt(sb, BLOCK_VALIDITY) && sbi->s_system_blks)
6601 ext4_release_system_zone(sb);
6602 #ifdef CONFIG_QUOTA
6603 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
6604 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
6605 to_free[i] = get_qf_name(sb, sbi, i);
6606 rcu_assign_pointer(sbi->s_qf_names[i], old_opts.s_qf_names[i]);
6607 }
6608 synchronize_rcu();
6609 for (i = 0; i < EXT4_MAXQUOTAS; i++)
6610 kfree(to_free[i]);
6611 #endif
6612 if (!ext4_has_feature_mmp(sb) || sb_rdonly(sb))
6613 ext4_stop_mmpd(sbi);
6614 return err;
6615 }
6616
ext4_reconfigure(struct fs_context * fc)6617 static int ext4_reconfigure(struct fs_context *fc)
6618 {
6619 struct super_block *sb = fc->root->d_sb;
6620 int ret;
6621
6622 fc->s_fs_info = EXT4_SB(sb);
6623
6624 ret = ext4_check_opt_consistency(fc, sb);
6625 if (ret < 0)
6626 return ret;
6627
6628 ret = __ext4_remount(fc, sb);
6629 if (ret < 0)
6630 return ret;
6631
6632 ext4_msg(sb, KERN_INFO, "re-mounted. Quota mode: %s.",
6633 ext4_quota_mode(sb));
6634
6635 return 0;
6636 }
6637
6638 #ifdef CONFIG_QUOTA
ext4_statfs_project(struct super_block * sb,kprojid_t projid,struct kstatfs * buf)6639 static int ext4_statfs_project(struct super_block *sb,
6640 kprojid_t projid, struct kstatfs *buf)
6641 {
6642 struct kqid qid;
6643 struct dquot *dquot;
6644 u64 limit;
6645 u64 curblock;
6646
6647 qid = make_kqid_projid(projid);
6648 dquot = dqget(sb, qid);
6649 if (IS_ERR(dquot))
6650 return PTR_ERR(dquot);
6651 spin_lock(&dquot->dq_dqb_lock);
6652
6653 limit = min_not_zero(dquot->dq_dqb.dqb_bsoftlimit,
6654 dquot->dq_dqb.dqb_bhardlimit);
6655 limit >>= sb->s_blocksize_bits;
6656
6657 if (limit && buf->f_blocks > limit) {
6658 curblock = (dquot->dq_dqb.dqb_curspace +
6659 dquot->dq_dqb.dqb_rsvspace) >> sb->s_blocksize_bits;
6660 buf->f_blocks = limit;
6661 buf->f_bfree = buf->f_bavail =
6662 (buf->f_blocks > curblock) ?
6663 (buf->f_blocks - curblock) : 0;
6664 }
6665
6666 limit = min_not_zero(dquot->dq_dqb.dqb_isoftlimit,
6667 dquot->dq_dqb.dqb_ihardlimit);
6668 if (limit && buf->f_files > limit) {
6669 buf->f_files = limit;
6670 buf->f_ffree =
6671 (buf->f_files > dquot->dq_dqb.dqb_curinodes) ?
6672 (buf->f_files - dquot->dq_dqb.dqb_curinodes) : 0;
6673 }
6674
6675 spin_unlock(&dquot->dq_dqb_lock);
6676 dqput(dquot);
6677 return 0;
6678 }
6679 #endif
6680
ext4_statfs(struct dentry * dentry,struct kstatfs * buf)6681 static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
6682 {
6683 struct super_block *sb = dentry->d_sb;
6684 struct ext4_sb_info *sbi = EXT4_SB(sb);
6685 struct ext4_super_block *es = sbi->s_es;
6686 ext4_fsblk_t overhead = 0, resv_blocks;
6687 s64 bfree;
6688 resv_blocks = EXT4_C2B(sbi, atomic64_read(&sbi->s_resv_clusters));
6689
6690 if (!test_opt(sb, MINIX_DF))
6691 overhead = sbi->s_overhead;
6692
6693 buf->f_type = EXT4_SUPER_MAGIC;
6694 buf->f_bsize = sb->s_blocksize;
6695 buf->f_blocks = ext4_blocks_count(es) - EXT4_C2B(sbi, overhead);
6696 bfree = percpu_counter_sum_positive(&sbi->s_freeclusters_counter) -
6697 percpu_counter_sum_positive(&sbi->s_dirtyclusters_counter);
6698 /* prevent underflow in case that few free space is available */
6699 buf->f_bfree = EXT4_C2B(sbi, max_t(s64, bfree, 0));
6700 buf->f_bavail = buf->f_bfree -
6701 (ext4_r_blocks_count(es) + resv_blocks);
6702 if (buf->f_bfree < (ext4_r_blocks_count(es) + resv_blocks))
6703 buf->f_bavail = 0;
6704 buf->f_files = le32_to_cpu(es->s_inodes_count);
6705 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
6706 buf->f_namelen = EXT4_NAME_LEN;
6707 buf->f_fsid = uuid_to_fsid(es->s_uuid);
6708
6709 #ifdef CONFIG_QUOTA
6710 if (ext4_test_inode_flag(dentry->d_inode, EXT4_INODE_PROJINHERIT) &&
6711 sb_has_quota_limits_enabled(sb, PRJQUOTA))
6712 ext4_statfs_project(sb, EXT4_I(dentry->d_inode)->i_projid, buf);
6713 #endif
6714 return 0;
6715 }
6716
6717
6718 #ifdef CONFIG_QUOTA
6719
6720 /*
6721 * Helper functions so that transaction is started before we acquire dqio_sem
6722 * to keep correct lock ordering of transaction > dqio_sem
6723 */
dquot_to_inode(struct dquot * dquot)6724 static inline struct inode *dquot_to_inode(struct dquot *dquot)
6725 {
6726 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_id.type];
6727 }
6728
ext4_write_dquot(struct dquot * dquot)6729 static int ext4_write_dquot(struct dquot *dquot)
6730 {
6731 int ret, err;
6732 handle_t *handle;
6733 struct inode *inode;
6734
6735 inode = dquot_to_inode(dquot);
6736 handle = ext4_journal_start(inode, EXT4_HT_QUOTA,
6737 EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
6738 if (IS_ERR(handle))
6739 return PTR_ERR(handle);
6740 ret = dquot_commit(dquot);
6741 err = ext4_journal_stop(handle);
6742 if (!ret)
6743 ret = err;
6744 return ret;
6745 }
6746
ext4_acquire_dquot(struct dquot * dquot)6747 static int ext4_acquire_dquot(struct dquot *dquot)
6748 {
6749 int ret, err;
6750 handle_t *handle;
6751
6752 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6753 EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
6754 if (IS_ERR(handle))
6755 return PTR_ERR(handle);
6756 ret = dquot_acquire(dquot);
6757 err = ext4_journal_stop(handle);
6758 if (!ret)
6759 ret = err;
6760 return ret;
6761 }
6762
ext4_release_dquot(struct dquot * dquot)6763 static int ext4_release_dquot(struct dquot *dquot)
6764 {
6765 int ret, err;
6766 handle_t *handle;
6767
6768 handle = ext4_journal_start(dquot_to_inode(dquot), EXT4_HT_QUOTA,
6769 EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
6770 if (IS_ERR(handle)) {
6771 /* Release dquot anyway to avoid endless cycle in dqput() */
6772 dquot_release(dquot);
6773 return PTR_ERR(handle);
6774 }
6775 ret = dquot_release(dquot);
6776 err = ext4_journal_stop(handle);
6777 if (!ret)
6778 ret = err;
6779 return ret;
6780 }
6781
ext4_mark_dquot_dirty(struct dquot * dquot)6782 static int ext4_mark_dquot_dirty(struct dquot *dquot)
6783 {
6784 struct super_block *sb = dquot->dq_sb;
6785
6786 if (ext4_is_quota_journalled(sb)) {
6787 dquot_mark_dquot_dirty(dquot);
6788 return ext4_write_dquot(dquot);
6789 } else {
6790 return dquot_mark_dquot_dirty(dquot);
6791 }
6792 }
6793
ext4_write_info(struct super_block * sb,int type)6794 static int ext4_write_info(struct super_block *sb, int type)
6795 {
6796 int ret, err;
6797 handle_t *handle;
6798
6799 /* Data block + inode block */
6800 handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2);
6801 if (IS_ERR(handle))
6802 return PTR_ERR(handle);
6803 ret = dquot_commit_info(sb, type);
6804 err = ext4_journal_stop(handle);
6805 if (!ret)
6806 ret = err;
6807 return ret;
6808 }
6809
lockdep_set_quota_inode(struct inode * inode,int subclass)6810 static void lockdep_set_quota_inode(struct inode *inode, int subclass)
6811 {
6812 struct ext4_inode_info *ei = EXT4_I(inode);
6813
6814 /* The first argument of lockdep_set_subclass has to be
6815 * *exactly* the same as the argument to init_rwsem() --- in
6816 * this case, in init_once() --- or lockdep gets unhappy
6817 * because the name of the lock is set using the
6818 * stringification of the argument to init_rwsem().
6819 */
6820 (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
6821 lockdep_set_subclass(&ei->i_data_sem, subclass);
6822 }
6823
6824 /*
6825 * Standard function to be called on quota_on
6826 */
ext4_quota_on(struct super_block * sb,int type,int format_id,const struct path * path)6827 static int ext4_quota_on(struct super_block *sb, int type, int format_id,
6828 const struct path *path)
6829 {
6830 int err;
6831
6832 if (!test_opt(sb, QUOTA))
6833 return -EINVAL;
6834
6835 /* Quotafile not on the same filesystem? */
6836 if (path->dentry->d_sb != sb)
6837 return -EXDEV;
6838
6839 /* Quota already enabled for this file? */
6840 if (IS_NOQUOTA(d_inode(path->dentry)))
6841 return -EBUSY;
6842
6843 /* Journaling quota? */
6844 if (EXT4_SB(sb)->s_qf_names[type]) {
6845 /* Quotafile not in fs root? */
6846 if (path->dentry->d_parent != sb->s_root)
6847 ext4_msg(sb, KERN_WARNING,
6848 "Quota file not on filesystem root. "
6849 "Journaled quota will not work");
6850 sb_dqopt(sb)->flags |= DQUOT_NOLIST_DIRTY;
6851 } else {
6852 /*
6853 * Clear the flag just in case mount options changed since
6854 * last time.
6855 */
6856 sb_dqopt(sb)->flags &= ~DQUOT_NOLIST_DIRTY;
6857 }
6858
6859 /*
6860 * When we journal data on quota file, we have to flush journal to see
6861 * all updates to the file when we bypass pagecache...
6862 */
6863 if (EXT4_SB(sb)->s_journal &&
6864 ext4_should_journal_data(d_inode(path->dentry))) {
6865 /*
6866 * We don't need to lock updates but journal_flush() could
6867 * otherwise be livelocked...
6868 */
6869 jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
6870 err = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
6871 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
6872 if (err)
6873 return err;
6874 }
6875
6876 lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
6877 err = dquot_quota_on(sb, type, format_id, path);
6878 if (!err) {
6879 struct inode *inode = d_inode(path->dentry);
6880 handle_t *handle;
6881
6882 /*
6883 * Set inode flags to prevent userspace from messing with quota
6884 * files. If this fails, we return success anyway since quotas
6885 * are already enabled and this is not a hard failure.
6886 */
6887 inode_lock(inode);
6888 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
6889 if (IS_ERR(handle))
6890 goto unlock_inode;
6891 EXT4_I(inode)->i_flags |= EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL;
6892 inode_set_flags(inode, S_NOATIME | S_IMMUTABLE,
6893 S_NOATIME | S_IMMUTABLE);
6894 err = ext4_mark_inode_dirty(handle, inode);
6895 ext4_journal_stop(handle);
6896 unlock_inode:
6897 inode_unlock(inode);
6898 if (err)
6899 dquot_quota_off(sb, type);
6900 }
6901 if (err)
6902 lockdep_set_quota_inode(path->dentry->d_inode,
6903 I_DATA_SEM_NORMAL);
6904 return err;
6905 }
6906
ext4_check_quota_inum(int type,unsigned long qf_inum)6907 static inline bool ext4_check_quota_inum(int type, unsigned long qf_inum)
6908 {
6909 switch (type) {
6910 case USRQUOTA:
6911 return qf_inum == EXT4_USR_QUOTA_INO;
6912 case GRPQUOTA:
6913 return qf_inum == EXT4_GRP_QUOTA_INO;
6914 case PRJQUOTA:
6915 return qf_inum >= EXT4_GOOD_OLD_FIRST_INO;
6916 default:
6917 BUG();
6918 }
6919 }
6920
ext4_quota_enable(struct super_block * sb,int type,int format_id,unsigned int flags)6921 static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
6922 unsigned int flags)
6923 {
6924 int err;
6925 struct inode *qf_inode;
6926 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
6927 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
6928 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6929 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
6930 };
6931
6932 BUG_ON(!ext4_has_feature_quota(sb));
6933
6934 if (!qf_inums[type])
6935 return -EPERM;
6936
6937 if (!ext4_check_quota_inum(type, qf_inums[type])) {
6938 ext4_error(sb, "Bad quota inum: %lu, type: %d",
6939 qf_inums[type], type);
6940 return -EUCLEAN;
6941 }
6942
6943 qf_inode = ext4_iget(sb, qf_inums[type], EXT4_IGET_SPECIAL);
6944 if (IS_ERR(qf_inode)) {
6945 ext4_error(sb, "Bad quota inode: %lu, type: %d",
6946 qf_inums[type], type);
6947 return PTR_ERR(qf_inode);
6948 }
6949
6950 /* Don't account quota for quota files to avoid recursion */
6951 qf_inode->i_flags |= S_NOQUOTA;
6952 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
6953 err = dquot_load_quota_inode(qf_inode, type, format_id, flags);
6954 if (err)
6955 lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
6956 iput(qf_inode);
6957
6958 return err;
6959 }
6960
6961 /* Enable usage tracking for all quota types. */
ext4_enable_quotas(struct super_block * sb)6962 int ext4_enable_quotas(struct super_block *sb)
6963 {
6964 int type, err = 0;
6965 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
6966 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
6967 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum),
6968 le32_to_cpu(EXT4_SB(sb)->s_es->s_prj_quota_inum)
6969 };
6970 bool quota_mopt[EXT4_MAXQUOTAS] = {
6971 test_opt(sb, USRQUOTA),
6972 test_opt(sb, GRPQUOTA),
6973 test_opt(sb, PRJQUOTA),
6974 };
6975
6976 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE | DQUOT_NOLIST_DIRTY;
6977 for (type = 0; type < EXT4_MAXQUOTAS; type++) {
6978 if (qf_inums[type]) {
6979 err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
6980 DQUOT_USAGE_ENABLED |
6981 (quota_mopt[type] ? DQUOT_LIMITS_ENABLED : 0));
6982 if (err) {
6983 ext4_warning(sb,
6984 "Failed to enable quota tracking "
6985 "(type=%d, err=%d, ino=%lu). "
6986 "Please run e2fsck to fix.", type,
6987 err, qf_inums[type]);
6988 for (type--; type >= 0; type--) {
6989 struct inode *inode;
6990
6991 inode = sb_dqopt(sb)->files[type];
6992 if (inode)
6993 inode = igrab(inode);
6994 dquot_quota_off(sb, type);
6995 if (inode) {
6996 lockdep_set_quota_inode(inode,
6997 I_DATA_SEM_NORMAL);
6998 iput(inode);
6999 }
7000 }
7001
7002 return err;
7003 }
7004 }
7005 }
7006 return 0;
7007 }
7008
ext4_quota_off(struct super_block * sb,int type)7009 static int ext4_quota_off(struct super_block *sb, int type)
7010 {
7011 struct inode *inode = sb_dqopt(sb)->files[type];
7012 handle_t *handle;
7013 int err;
7014
7015 /* Force all delayed allocation blocks to be allocated.
7016 * Caller already holds s_umount sem */
7017 if (test_opt(sb, DELALLOC))
7018 sync_filesystem(sb);
7019
7020 if (!inode || !igrab(inode))
7021 goto out;
7022
7023 err = dquot_quota_off(sb, type);
7024 if (err || ext4_has_feature_quota(sb))
7025 goto out_put;
7026
7027 inode_lock(inode);
7028 /*
7029 * Update modification times of quota files when userspace can
7030 * start looking at them. If we fail, we return success anyway since
7031 * this is not a hard failure and quotas are already disabled.
7032 */
7033 handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);
7034 if (IS_ERR(handle)) {
7035 err = PTR_ERR(handle);
7036 goto out_unlock;
7037 }
7038 EXT4_I(inode)->i_flags &= ~(EXT4_NOATIME_FL | EXT4_IMMUTABLE_FL);
7039 inode_set_flags(inode, 0, S_NOATIME | S_IMMUTABLE);
7040 inode->i_mtime = inode->i_ctime = current_time(inode);
7041 err = ext4_mark_inode_dirty(handle, inode);
7042 ext4_journal_stop(handle);
7043 out_unlock:
7044 inode_unlock(inode);
7045 out_put:
7046 lockdep_set_quota_inode(inode, I_DATA_SEM_NORMAL);
7047 iput(inode);
7048 return err;
7049 out:
7050 return dquot_quota_off(sb, type);
7051 }
7052
7053 /* Read data from quotafile - avoid pagecache and such because we cannot afford
7054 * acquiring the locks... As quota files are never truncated and quota code
7055 * itself serializes the operations (and no one else should touch the files)
7056 * we don't have to be afraid of races */
ext4_quota_read(struct super_block * sb,int type,char * data,size_t len,loff_t off)7057 static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
7058 size_t len, loff_t off)
7059 {
7060 struct inode *inode = sb_dqopt(sb)->files[type];
7061 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7062 int offset = off & (sb->s_blocksize - 1);
7063 int tocopy;
7064 size_t toread;
7065 struct buffer_head *bh;
7066 loff_t i_size = i_size_read(inode);
7067
7068 if (off > i_size)
7069 return 0;
7070 if (off+len > i_size)
7071 len = i_size-off;
7072 toread = len;
7073 while (toread > 0) {
7074 tocopy = sb->s_blocksize - offset < toread ?
7075 sb->s_blocksize - offset : toread;
7076 bh = ext4_bread(NULL, inode, blk, 0);
7077 if (IS_ERR(bh))
7078 return PTR_ERR(bh);
7079 if (!bh) /* A hole? */
7080 memset(data, 0, tocopy);
7081 else
7082 memcpy(data, bh->b_data+offset, tocopy);
7083 brelse(bh);
7084 offset = 0;
7085 toread -= tocopy;
7086 data += tocopy;
7087 blk++;
7088 }
7089 return len;
7090 }
7091
7092 /* Write to quotafile (we know the transaction is already started and has
7093 * enough credits) */
ext4_quota_write(struct super_block * sb,int type,const char * data,size_t len,loff_t off)7094 static ssize_t ext4_quota_write(struct super_block *sb, int type,
7095 const char *data, size_t len, loff_t off)
7096 {
7097 struct inode *inode = sb_dqopt(sb)->files[type];
7098 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
7099 int err = 0, err2 = 0, offset = off & (sb->s_blocksize - 1);
7100 int retries = 0;
7101 struct buffer_head *bh;
7102 handle_t *handle = journal_current_handle();
7103
7104 if (!handle) {
7105 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7106 " cancelled because transaction is not started",
7107 (unsigned long long)off, (unsigned long long)len);
7108 return -EIO;
7109 }
7110 /*
7111 * Since we account only one data block in transaction credits,
7112 * then it is impossible to cross a block boundary.
7113 */
7114 if (sb->s_blocksize - offset < len) {
7115 ext4_msg(sb, KERN_WARNING, "Quota write (off=%llu, len=%llu)"
7116 " cancelled because not block aligned",
7117 (unsigned long long)off, (unsigned long long)len);
7118 return -EIO;
7119 }
7120
7121 do {
7122 bh = ext4_bread(handle, inode, blk,
7123 EXT4_GET_BLOCKS_CREATE |
7124 EXT4_GET_BLOCKS_METADATA_NOFAIL);
7125 } while (PTR_ERR(bh) == -ENOSPC &&
7126 ext4_should_retry_alloc(inode->i_sb, &retries));
7127 if (IS_ERR(bh))
7128 return PTR_ERR(bh);
7129 if (!bh)
7130 goto out;
7131 BUFFER_TRACE(bh, "get write access");
7132 err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
7133 if (err) {
7134 brelse(bh);
7135 return err;
7136 }
7137 lock_buffer(bh);
7138 memcpy(bh->b_data+offset, data, len);
7139 flush_dcache_page(bh->b_page);
7140 unlock_buffer(bh);
7141 err = ext4_handle_dirty_metadata(handle, NULL, bh);
7142 brelse(bh);
7143 out:
7144 if (inode->i_size < off + len) {
7145 i_size_write(inode, off + len);
7146 EXT4_I(inode)->i_disksize = inode->i_size;
7147 err2 = ext4_mark_inode_dirty(handle, inode);
7148 if (unlikely(err2 && !err))
7149 err = err2;
7150 }
7151 return err ? err : len;
7152 }
7153 #endif
7154
7155 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT2)
register_as_ext2(void)7156 static inline void register_as_ext2(void)
7157 {
7158 int err = register_filesystem(&ext2_fs_type);
7159 if (err)
7160 printk(KERN_WARNING
7161 "EXT4-fs: Unable to register as ext2 (%d)\n", err);
7162 }
7163
unregister_as_ext2(void)7164 static inline void unregister_as_ext2(void)
7165 {
7166 unregister_filesystem(&ext2_fs_type);
7167 }
7168
ext2_feature_set_ok(struct super_block * sb)7169 static inline int ext2_feature_set_ok(struct super_block *sb)
7170 {
7171 if (ext4_has_unknown_ext2_incompat_features(sb))
7172 return 0;
7173 if (sb_rdonly(sb))
7174 return 1;
7175 if (ext4_has_unknown_ext2_ro_compat_features(sb))
7176 return 0;
7177 return 1;
7178 }
7179 #else
register_as_ext2(void)7180 static inline void register_as_ext2(void) { }
unregister_as_ext2(void)7181 static inline void unregister_as_ext2(void) { }
ext2_feature_set_ok(struct super_block * sb)7182 static inline int ext2_feature_set_ok(struct super_block *sb) { return 0; }
7183 #endif
7184
register_as_ext3(void)7185 static inline void register_as_ext3(void)
7186 {
7187 int err = register_filesystem(&ext3_fs_type);
7188 if (err)
7189 printk(KERN_WARNING
7190 "EXT4-fs: Unable to register as ext3 (%d)\n", err);
7191 }
7192
unregister_as_ext3(void)7193 static inline void unregister_as_ext3(void)
7194 {
7195 unregister_filesystem(&ext3_fs_type);
7196 }
7197
ext3_feature_set_ok(struct super_block * sb)7198 static inline int ext3_feature_set_ok(struct super_block *sb)
7199 {
7200 if (ext4_has_unknown_ext3_incompat_features(sb))
7201 return 0;
7202 if (!ext4_has_feature_journal(sb))
7203 return 0;
7204 if (sb_rdonly(sb))
7205 return 1;
7206 if (ext4_has_unknown_ext3_ro_compat_features(sb))
7207 return 0;
7208 return 1;
7209 }
7210
7211 static struct file_system_type ext4_fs_type = {
7212 .owner = THIS_MODULE,
7213 .name = "ext4",
7214 .init_fs_context = ext4_init_fs_context,
7215 .parameters = ext4_param_specs,
7216 .kill_sb = kill_block_super,
7217 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
7218 };
7219 MODULE_ALIAS_FS("ext4");
7220
7221 /* Shared across all ext4 file systems */
7222 wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
7223
ext4_init_fs(void)7224 static int __init ext4_init_fs(void)
7225 {
7226 int i, err;
7227
7228 ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
7229 ext4_li_info = NULL;
7230
7231 /* Build-time check for flags consistency */
7232 ext4_check_flag_values();
7233
7234 for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
7235 init_waitqueue_head(&ext4__ioend_wq[i]);
7236
7237 err = ext4_init_es();
7238 if (err)
7239 return err;
7240
7241 err = ext4_init_pending();
7242 if (err)
7243 goto out7;
7244
7245 err = ext4_init_post_read_processing();
7246 if (err)
7247 goto out6;
7248
7249 err = ext4_init_pageio();
7250 if (err)
7251 goto out5;
7252
7253 err = ext4_init_system_zone();
7254 if (err)
7255 goto out4;
7256
7257 err = ext4_init_sysfs();
7258 if (err)
7259 goto out3;
7260
7261 err = ext4_init_mballoc();
7262 if (err)
7263 goto out2;
7264 err = init_inodecache();
7265 if (err)
7266 goto out1;
7267
7268 err = ext4_fc_init_dentry_cache();
7269 if (err)
7270 goto out05;
7271
7272 register_as_ext3();
7273 register_as_ext2();
7274 err = register_filesystem(&ext4_fs_type);
7275 if (err)
7276 goto out;
7277
7278 return 0;
7279 out:
7280 unregister_as_ext2();
7281 unregister_as_ext3();
7282 ext4_fc_destroy_dentry_cache();
7283 out05:
7284 destroy_inodecache();
7285 out1:
7286 ext4_exit_mballoc();
7287 out2:
7288 ext4_exit_sysfs();
7289 out3:
7290 ext4_exit_system_zone();
7291 out4:
7292 ext4_exit_pageio();
7293 out5:
7294 ext4_exit_post_read_processing();
7295 out6:
7296 ext4_exit_pending();
7297 out7:
7298 ext4_exit_es();
7299
7300 return err;
7301 }
7302
ext4_exit_fs(void)7303 static void __exit ext4_exit_fs(void)
7304 {
7305 ext4_destroy_lazyinit_thread();
7306 unregister_as_ext2();
7307 unregister_as_ext3();
7308 unregister_filesystem(&ext4_fs_type);
7309 ext4_fc_destroy_dentry_cache();
7310 destroy_inodecache();
7311 ext4_exit_mballoc();
7312 ext4_exit_sysfs();
7313 ext4_exit_system_zone();
7314 ext4_exit_pageio();
7315 ext4_exit_post_read_processing();
7316 ext4_exit_es();
7317 ext4_exit_pending();
7318 }
7319
7320 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
7321 MODULE_DESCRIPTION("Fourth Extended Filesystem");
7322 MODULE_LICENSE("GPL");
7323 MODULE_SOFTDEP("pre: crc32c");
7324 module_init(ext4_init_fs)
7325 module_exit(ext4_exit_fs)
7326