diff options
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r-- | fs/ext4/super.c | 1827 |
1 files changed, 966 insertions, 861 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9a936ecbaa3b..7cdd2138c897 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -39,7 +39,6 @@ #include <linux/log2.h> #include <linux/crc16.h> #include <linux/dax.h> -#include <linux/cleancache.h> #include <linux/uaccess.h> #include <linux/iversion.h> #include <linux/unicode.h> @@ -88,7 +87,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, static int ext4_validate_options(struct fs_context *fc); static int ext4_check_opt_consistency(struct fs_context *fc, struct super_block *sb); -static int ext4_apply_options(struct fs_context *fc, struct super_block *sb); +static void ext4_apply_options(struct fs_context *fc, struct super_block *sb); static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param); static int ext4_get_tree(struct fs_context *fc); static int ext4_reconfigure(struct fs_context *fc); @@ -160,7 +159,7 @@ MODULE_ALIAS("ext3"); #define IS_EXT3_SB(sb) ((sb)->s_bdev->bd_holder == &ext3_fs_type) -static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags, +static inline void __ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io) { /* @@ -172,10 +171,10 @@ static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags, bh->b_end_io = end_io ? end_io : end_buffer_read_sync; get_bh(bh); - submit_bh(REQ_OP_READ, op_flags, bh); + submit_bh(REQ_OP_READ | op_flags, bh); } -void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags, +void ext4_read_bh_nowait(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io) { BUG_ON(!buffer_locked(bh)); @@ -187,7 +186,7 @@ void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags, __ext4_read_bh(bh, op_flags, end_io); } -int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io) +int ext4_read_bh(struct buffer_head *bh, blk_opf_t op_flags, bh_end_io_t *end_io) { BUG_ON(!buffer_locked(bh)); @@ -204,21 +203,14 @@ int ext4_read_bh(struct buffer_head *bh, int op_flags, bh_end_io_t *end_io) return -EIO; } -int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait) +int ext4_read_bh_lock(struct buffer_head *bh, blk_opf_t op_flags, bool wait) { - if (trylock_buffer(bh)) { - if (wait) - return ext4_read_bh(bh, op_flags, NULL); + lock_buffer(bh); + if (!wait) { ext4_read_bh_nowait(bh, op_flags, NULL); return 0; } - if (wait) { - wait_on_buffer(bh); - if (buffer_uptodate(bh)) - return 0; - return -EIO; - } - return 0; + return ext4_read_bh(bh, op_flags, NULL); } /* @@ -228,8 +220,8 @@ int ext4_read_bh_lock(struct buffer_head *bh, int op_flags, bool wait) * return. */ static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb, - sector_t block, int op_flags, - gfp_t gfp) + sector_t block, + blk_opf_t op_flags, gfp_t gfp) { struct buffer_head *bh; int ret; @@ -249,7 +241,7 @@ static struct buffer_head *__ext4_sb_bread_gfp(struct super_block *sb, } struct buffer_head *ext4_sb_bread(struct super_block *sb, sector_t block, - int op_flags) + blk_opf_t op_flags) { return __ext4_sb_bread_gfp(sb, block, op_flags, __GFP_MOVABLE); } @@ -265,7 +257,8 @@ void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block) struct buffer_head *bh = sb_getblk_gfp(sb, block, 0); if (likely(bh)) { - ext4_read_bh_lock(bh, REQ_RAHEAD, false); + if (trylock_buffer(bh)) + ext4_read_bh_nowait(bh, REQ_RAHEAD, NULL); brelse(bh); } } @@ -1200,20 +1193,28 @@ static void ext4_put_super(struct super_block *sb) int aborted = 0; int i, err; - ext4_unregister_li_request(sb); - ext4_quota_off_umount(sb); - - flush_work(&sbi->s_error_work); - destroy_workqueue(sbi->rsv_conversion_wq); - ext4_release_orphan_info(sb); - /* * Unregister sysfs before destroying jbd2 journal. * Since we could still access attr_journal_task attribute via sysfs * path which could have sbi->s_journal->j_task as NULL + * Unregister sysfs before flush sbi->s_error_work. + * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If + * read metadata verify failed then will queue error work. + * flush_stashed_error_work will call start_this_handle may trigger + * BUG_ON. */ ext4_unregister_sysfs(sb); + if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs unmount")) + ext4_msg(sb, KERN_INFO, "unmounting filesystem."); + + ext4_unregister_li_request(sb); + ext4_quota_off_umount(sb); + + flush_work(&sbi->s_error_work); + destroy_workqueue(sbi->rsv_conversion_wq); + ext4_release_orphan_info(sb); + if (sbi->s_journal) { aborted = is_journal_aborted(sbi->s_journal); err = jbd2_journal_destroy(sbi->s_journal); @@ -1300,9 +1301,9 @@ static void ext4_put_super(struct super_block *sb) if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->s_blockgroup_lock); - fs_put_dax(sbi->s_daxdev); + fs_put_dax(sbi->s_daxdev, NULL); fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif kfree(sbi); @@ -1317,7 +1318,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) { struct ext4_inode_info *ei; - ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); + ei = alloc_inode_sb(sb, ext4_inode_cachep, GFP_NOFS); if (!ei) return NULL; @@ -1393,7 +1394,7 @@ static void ext4_destroy_inode(struct inode *inode) static void init_once(void *foo) { - struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; + struct ext4_inode_info *ei = foo; INIT_LIST_HEAD(&ei->i_orphan); init_rwsem(&ei->xattr_sem); @@ -1488,128 +1489,6 @@ static int ext4_nfs_commit_metadata(struct inode *inode) return ext4_write_inode(inode, &wbc); } -#ifdef CONFIG_FS_ENCRYPTION -static int ext4_get_context(struct inode *inode, void *ctx, size_t len) -{ - return ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, len); -} - -static int ext4_set_context(struct inode *inode, const void *ctx, size_t len, - void *fs_data) -{ - handle_t *handle = fs_data; - int res, res2, credits, retries = 0; - - /* - * Encrypting the root directory is not allowed because e2fsck expects - * lost+found to exist and be unencrypted, and encrypting the root - * directory would imply encrypting the lost+found directory as well as - * the filename "lost+found" itself. - */ - if (inode->i_ino == EXT4_ROOT_INO) - return -EPERM; - - if (WARN_ON_ONCE(IS_DAX(inode) && i_size_read(inode))) - return -EINVAL; - - if (ext4_test_inode_flag(inode, EXT4_INODE_DAX)) - return -EOPNOTSUPP; - - res = ext4_convert_inline_data(inode); - if (res) - return res; - - /* - * If a journal handle was specified, then the encryption context is - * being set on a new inode via inheritance and is part of a larger - * transaction to create the inode. Otherwise the encryption context is - * being set on an existing inode in its own transaction. Only in the - * latter case should the "retry on ENOSPC" logic be used. - */ - - if (handle) { - res = ext4_xattr_set_handle(handle, inode, - EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - ctx, len, 0); - if (!res) { - ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); - ext4_clear_inode_state(inode, - EXT4_STATE_MAY_INLINE_DATA); - /* - * Update inode->i_flags - S_ENCRYPTED will be enabled, - * S_DAX may be disabled - */ - ext4_set_inode_flags(inode, false); - } - return res; - } - - res = dquot_initialize(inode); - if (res) - return res; -retry: - res = ext4_xattr_set_credits(inode, len, false /* is_create */, - &credits); - if (res) - return res; - - handle = ext4_journal_start(inode, EXT4_HT_MISC, credits); - if (IS_ERR(handle)) - return PTR_ERR(handle); - - res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, - EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, - ctx, len, 0); - if (!res) { - ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); - /* - * Update inode->i_flags - S_ENCRYPTED will be enabled, - * S_DAX may be disabled - */ - ext4_set_inode_flags(inode, false); - res = ext4_mark_inode_dirty(handle, inode); - if (res) - EXT4_ERROR_INODE(inode, "Failed to mark inode dirty"); - } - res2 = ext4_journal_stop(handle); - - if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; - if (!res) - res = res2; - return res; -} - -static const union fscrypt_policy *ext4_get_dummy_policy(struct super_block *sb) -{ - return EXT4_SB(sb)->s_dummy_enc_policy.policy; -} - -static bool ext4_has_stable_inodes(struct super_block *sb) -{ - return ext4_has_feature_stable_inodes(sb); -} - -static void ext4_get_ino_and_lblk_bits(struct super_block *sb, - int *ino_bits_ret, int *lblk_bits_ret) -{ - *ino_bits_ret = 8 * sizeof(EXT4_SB(sb)->s_es->s_inodes_count); - *lblk_bits_ret = 8 * sizeof(ext4_lblk_t); -} - -static const struct fscrypt_operations ext4_cryptops = { - .key_prefix = "ext4:", - .get_context = ext4_get_context, - .set_context = ext4_set_context, - .get_dummy_policy = ext4_get_dummy_policy, - .empty_dir = ext4_empty_dir, - .has_stable_inodes = ext4_has_stable_inodes, - .get_ino_and_lblk_bits = ext4_get_ino_and_lblk_bits, -}; -#endif - #ifdef CONFIG_QUOTA static const char * const quotatypes[] = INITQFNAMES; #define QTYPE2NAME(t) (quotatypes[t]) @@ -1691,7 +1570,7 @@ enum { Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, Opt_resgid, Opt_resuid, Opt_sb, Opt_nouid32, Opt_debug, Opt_removed, - Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, + Opt_user_xattr, Opt_acl, Opt_auto_da_alloc, Opt_noauto_da_alloc, Opt_noload, Opt_commit, Opt_min_batch_time, Opt_max_batch_time, Opt_journal_dev, Opt_journal_path, Opt_journal_checksum, Opt_journal_async_commit, @@ -1700,7 +1579,7 @@ enum { Opt_inlinecrypt, Opt_usrjquota, Opt_grpjquota, Opt_quota, Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, - Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_i_version, + Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_dax, Opt_dax_always, Opt_dax_inode, Opt_dax_never, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_warn_on_error, Opt_nowarn_on_error, Opt_mblk_io_submit, Opt_debug_want_extra_isize, @@ -1777,9 +1656,7 @@ static const struct fs_parameter_spec ext4_param_specs[] = { fsparam_flag ("oldalloc", Opt_removed), fsparam_flag ("orlov", Opt_removed), fsparam_flag ("user_xattr", Opt_user_xattr), - fsparam_flag ("nouser_xattr", Opt_nouser_xattr), fsparam_flag ("acl", Opt_acl), - fsparam_flag ("noacl", Opt_noacl), fsparam_flag ("norecovery", Opt_noload), fsparam_flag ("noload", Opt_noload), fsparam_flag ("bh", Opt_removed), @@ -1809,7 +1686,7 @@ static const struct fs_parameter_spec ext4_param_specs[] = { fsparam_flag ("barrier", Opt_barrier), fsparam_u32 ("barrier", Opt_barrier), fsparam_flag ("nobarrier", Opt_nobarrier), - fsparam_flag ("i_version", Opt_i_version), + fsparam_flag ("i_version", Opt_removed), fsparam_flag ("dax", Opt_dax), fsparam_enum ("dax", Opt_dax_type, ext4_param_dax), fsparam_u32 ("stripe", Opt_stripe), @@ -1863,11 +1740,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = { }; #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3)) -#define DEFAULT_MB_OPTIMIZE_SCAN (-1) - -static const char deprecated_msg[] = - "Mount option \"%s\" will be removed by %s\n" - "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n"; #define MOPT_SET 0x0001 #define MOPT_CLEAR 0x0002 @@ -1909,6 +1781,7 @@ static const struct mount_opts { MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_warn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_SET}, {Opt_nowarn_on_error, EXT4_MOUNT_WARN_ON_ERROR, MOPT_CLEAR}, + {Opt_commit, 0, MOPT_NO_EXT2}, {Opt_nojournal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, @@ -1929,13 +1802,10 @@ static const struct mount_opts { {Opt_journal_ioprio, 0, MOPT_NO_EXT2}, {Opt_data, 0, MOPT_NO_EXT2}, {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET}, - {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR}, #ifdef CONFIG_EXT4_FS_POSIX_ACL {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET}, - {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR}, #else {Opt_acl, 0, MOPT_NOSUPPORT}, - {Opt_noacl, 0, MOPT_NOSUPPORT}, #endif {Opt_nouid32, EXT4_MOUNT_NO_UID32, MOPT_SET}, {Opt_debug, EXT4_MOUNT_DEBUG, MOPT_SET}, @@ -1962,61 +1832,35 @@ static const struct mount_opts { {Opt_err, 0, 0} }; -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) static const struct ext4_sb_encodings { __u16 magic; char *name; - char *version; + unsigned int version; } ext4_sb_encoding_map[] = { - {EXT4_ENC_UTF8_12_1, "utf8", "12.1.0"}, + {EXT4_ENC_UTF8_12_1, "utf8", UNICODE_AGE(12, 1, 0)}, }; -static int ext4_sb_read_encoding(const struct ext4_super_block *es, - const struct ext4_sb_encodings **encoding, - __u16 *flags) +static const struct ext4_sb_encodings * +ext4_sb_read_encoding(const struct ext4_super_block *es) { __u16 magic = le16_to_cpu(es->s_encoding); int i; for (i = 0; i < ARRAY_SIZE(ext4_sb_encoding_map); i++) if (magic == ext4_sb_encoding_map[i].magic) - break; - - if (i >= ARRAY_SIZE(ext4_sb_encoding_map)) - return -EINVAL; - - *encoding = &ext4_sb_encoding_map[i]; - *flags = le16_to_cpu(es->s_encoding_flags); + return &ext4_sb_encoding_map[i]; - return 0; + return NULL; } #endif -static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg) -{ -#ifdef CONFIG_FS_ENCRYPTION - struct ext4_sb_info *sbi = EXT4_SB(sb); - int err; - - err = fscrypt_set_test_dummy_encryption(sb, arg, - &sbi->s_dummy_enc_policy); - if (err) { - ext4_msg(sb, KERN_WARNING, - "Error while setting test dummy encryption [%d]", err); - return err; - } - ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); -#endif - return 0; -} - #define EXT4_SPEC_JQUOTA (1 << 0) #define EXT4_SPEC_JQFMT (1 << 1) #define EXT4_SPEC_DATAJ (1 << 2) #define EXT4_SPEC_SB_BLOCK (1 << 3) #define EXT4_SPEC_JOURNAL_DEV (1 << 4) #define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5) -#define EXT4_SPEC_DUMMY_ENCRYPTION (1 << 6) #define EXT4_SPEC_s_want_extra_isize (1 << 7) #define EXT4_SPEC_s_max_batch_time (1 << 8) #define EXT4_SPEC_s_min_batch_time (1 << 9) @@ -2029,12 +1873,12 @@ static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg) #define EXT4_SPEC_s_commit_interval (1 << 16) #define EXT4_SPEC_s_fc_debug_max_replay (1 << 17) #define EXT4_SPEC_s_sb_block (1 << 18) +#define EXT4_SPEC_mb_optimize_scan (1 << 19) struct ext4_fs_context { char *s_qf_names[EXT4_MAXQUOTAS]; - char *test_dummy_enc_arg; + struct fscrypt_dummy_policy dummy_enc_policy; int s_jquota_fmt; /* Format of quota to use */ - int mb_optimize_scan; #ifdef CONFIG_EXT4_DEBUG int s_fc_debug_max_replay; #endif @@ -2053,8 +1897,8 @@ struct ext4_fs_context { unsigned int mask_s_mount_opt; unsigned int vals_s_mount_opt2; unsigned int mask_s_mount_opt2; - unsigned int vals_s_mount_flags; - unsigned int mask_s_mount_flags; + unsigned long vals_s_mount_flags; + unsigned long mask_s_mount_flags; unsigned int opt_flags; /* MOPT flags */ unsigned int spec; u32 s_max_batch_time; @@ -2075,7 +1919,7 @@ static void ext4_fc_free(struct fs_context *fc) for (i = 0; i < EXT4_MAXQUOTAS; i++) kfree(ctx->s_qf_names[i]); - kfree(ctx->test_dummy_enc_arg); + fscrypt_free_dummy_policy(&ctx->dummy_enc_policy); kfree(ctx); } @@ -2151,29 +1995,65 @@ static int unnote_qf_name(struct fs_context *fc, int qtype) } #endif +static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param, + struct ext4_fs_context *ctx) +{ + int err; + + if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) { + ext4_msg(NULL, KERN_WARNING, + "test_dummy_encryption option not supported"); + return -EINVAL; + } + err = fscrypt_parse_test_dummy_encryption(param, + &ctx->dummy_enc_policy); + if (err == -EINVAL) { + ext4_msg(NULL, KERN_WARNING, + "Value of option \"%s\" is unrecognized", param->key); + } else if (err == -EEXIST) { + ext4_msg(NULL, KERN_WARNING, + "Conflicting test_dummy_encryption options"); + return -EINVAL; + } + return err; +} + #define EXT4_SET_CTX(name) \ static inline void ctx_set_##name(struct ext4_fs_context *ctx, \ unsigned long flag) \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name |= flag; \ -} \ +} + +#define EXT4_CLEAR_CTX(name) \ static inline void ctx_clear_##name(struct ext4_fs_context *ctx, \ unsigned long flag) \ { \ ctx->mask_s_##name |= flag; \ ctx->vals_s_##name &= ~flag; \ -} \ +} + +#define EXT4_TEST_CTX(name) \ static inline unsigned long \ ctx_test_##name(struct ext4_fs_context *ctx, unsigned long flag) \ { \ return (ctx->vals_s_##name & flag); \ -} \ +} -EXT4_SET_CTX(flags); +EXT4_SET_CTX(flags); /* set only */ EXT4_SET_CTX(mount_opt); +EXT4_CLEAR_CTX(mount_opt); +EXT4_TEST_CTX(mount_opt); EXT4_SET_CTX(mount_opt2); -EXT4_SET_CTX(mount_flags); +EXT4_CLEAR_CTX(mount_opt2); +EXT4_TEST_CTX(mount_opt2); + +static inline void ctx_set_mount_flag(struct ext4_fs_context *ctx, int bit) +{ + set_bit(bit, &ctx->mask_s_mount_flags); + set_bit(bit, &ctx->vals_s_mount_flags); +} static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) { @@ -2225,10 +2105,6 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) else return note_qf_name(fc, GRPQUOTA, param); #endif - case Opt_noacl: - case Opt_nouser_xattr: - ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "3.5"); - break; case Opt_sb: if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { ext4_msg(NULL, KERN_WARNING, @@ -2243,12 +2119,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) param->key); return 0; case Opt_abort: - ctx_set_mount_flags(ctx, EXT4_MF_FS_ABORTED); - return 0; - case Opt_i_version: - ext4_msg(NULL, KERN_WARNING, deprecated_msg, param->key, "5.20"); - ext4_msg(NULL, KERN_WARNING, "Use iversion instead\n"); - ctx_set_flags(ctx, SB_I_VERSION); + ctx_set_mount_flag(ctx, EXT4_MF_FS_ABORTED); return 0; case Opt_inlinecrypt: #ifdef CONFIG_FS_ENCRYPTION_INLINE_CRYPT @@ -2400,28 +2271,7 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO; return 0; case Opt_test_dummy_encryption: -#ifdef CONFIG_FS_ENCRYPTION - if (param->type == fs_value_is_flag) { - ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; - ctx->test_dummy_enc_arg = NULL; - return 0; - } - if (*param->string && - !(!strcmp(param->string, "v1") || - !strcmp(param->string, "v2"))) { - ext4_msg(NULL, KERN_WARNING, - "Value of option \"%s\" is unrecognized", - param->key); - return -EINVAL; - } - ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION; - ctx->test_dummy_enc_arg = kmemdup_nul(param->string, param->size, - GFP_KERNEL); -#else - ext4_msg(NULL, KERN_WARNING, - "Test dummy encryption mount option ignored"); -#endif - return 0; + return ext4_parse_test_dummy_encryption(param, ctx); case Opt_dax: case Opt_dax_type: #ifdef CONFIG_FS_DAX @@ -2459,12 +2309,17 @@ static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param) ctx_clear_mount_opt(ctx, m->mount_opt); return 0; case Opt_mb_optimize_scan: - if (result.int_32 != 0 && result.int_32 != 1) { + if (result.int_32 == 1) { + ctx_set_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); + ctx->spec |= EXT4_SPEC_mb_optimize_scan; + } else if (result.int_32 == 0) { + ctx_clear_mount_opt2(ctx, EXT4_MOUNT2_MB_OPTIMIZE_SCAN); + ctx->spec |= EXT4_SPEC_mb_optimize_scan; + } else { ext4_msg(NULL, KERN_WARNING, "mb_optimize_scan should be set to 0 or 1."); return -EINVAL; } - ctx->mb_optimize_scan = result.int_32; return 0; } @@ -2607,11 +2462,14 @@ parse_failed: if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO) m_ctx->journal_ioprio = s_ctx->journal_ioprio; - ret = ext4_apply_options(fc, sb); + ext4_apply_options(fc, sb); + ret = 0; out_free: - kfree(s_ctx); - kfree(fc); + if (fc) { + ext4_fc_free(fc); + kfree(fc); + } kfree(s_mount_opts); return ret; } @@ -2771,12 +2629,76 @@ err_jquota_specified: #endif } +static int ext4_check_test_dummy_encryption(const struct fs_context *fc, + struct super_block *sb) +{ + const struct ext4_fs_context *ctx = fc->fs_private; + const struct ext4_sb_info *sbi = EXT4_SB(sb); + int err; + + if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy)) + return 0; + + if (!ext4_has_feature_encrypt(sb)) { + ext4_msg(NULL, KERN_WARNING, + "test_dummy_encryption requires encrypt feature"); + return -EINVAL; + } + /* + * This mount option is just for testing, and it's not worthwhile to + * implement the extra complexity (e.g. RCU protection) that would be + * needed to allow it to be set or changed during remount. We do allow + * it to be specified during remount, but only if there is no change. + */ + if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) { + if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy, + &ctx->dummy_enc_policy)) + return 0; + ext4_msg(NULL, KERN_WARNING, + "Can't set or change test_dummy_encryption on remount"); + return -EINVAL; + } + /* Also make sure s_mount_opts didn't contain a conflicting value. */ + if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) { + if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy, + &ctx->dummy_enc_policy)) + return 0; + ext4_msg(NULL, KERN_WARNING, + "Conflicting test_dummy_encryption options"); + return -EINVAL; + } + /* + * fscrypt_add_test_dummy_key() technically changes the super_block, so + * technically it should be delayed until ext4_apply_options() like the + * other changes. But since we never get here for remounts (see above), + * and this is the last chance to report errors, we do it here. + */ + err = fscrypt_add_test_dummy_key(sb, &ctx->dummy_enc_policy); + if (err) + ext4_msg(NULL, KERN_WARNING, + "Error adding test dummy encryption key [%d]", err); + return err; +} + +static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx, + struct super_block *sb) +{ + if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) || + /* if already set, it was already verified to be the same */ + fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy)) + return; + EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy; + memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy)); + ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled"); +} + static int ext4_check_opt_consistency(struct fs_context *fc, struct super_block *sb) { struct ext4_fs_context *ctx = fc->fs_private; struct ext4_sb_info *sbi = fc->s_fs_info; int is_remount = fc->purpose == FS_CONTEXT_FOR_RECONFIGURE; + int err; if ((ctx->opt_flags & MOPT_NO_EXT2) && IS_EXT2_SB(sb)) { ext4_msg(NULL, KERN_ERR, @@ -2806,20 +2728,9 @@ static int ext4_check_opt_consistency(struct fs_context *fc, "for blocksize < PAGE_SIZE"); } -#ifdef CONFIG_FS_ENCRYPTION - /* - * This mount option is just for testing, and it's not worthwhile to - * implement the extra complexity (e.g. RCU protection) that would be - * needed to allow it to be set or changed during remount. We do allow - * it to be specified during remount, but only if there is no change. - */ - if ((ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION) && - is_remount && !sbi->s_dummy_enc_policy.policy) { - ext4_msg(NULL, KERN_WARNING, - "Can't set test_dummy_encryption on remount"); - return -1; - } -#endif + err = ext4_check_test_dummy_encryption(fc, sb); + if (err) + return err; if ((ctx->spec & EXT4_SPEC_DATAJ) && is_remount) { if (!sbi->s_journal) { @@ -2865,11 +2776,10 @@ fail_dax_change_remount: return ext4_check_quota_consistency(fc, sb); } -static int ext4_apply_options(struct fs_context *fc, struct super_block *sb) +static void ext4_apply_options(struct fs_context *fc, struct super_block *sb) { struct ext4_fs_context *ctx = fc->fs_private; struct ext4_sb_info *sbi = fc->s_fs_info; - int ret = 0; sbi->s_mount_opt &= ~ctx->mask_s_mount_opt; sbi->s_mount_opt |= ctx->vals_s_mount_opt; @@ -2880,14 +2790,6 @@ static int ext4_apply_options(struct fs_context *fc, struct super_block *sb) sb->s_flags &= ~ctx->mask_s_flags; sb->s_flags |= ctx->vals_s_flags; - /* - * i_version differs from common mount option iversion so we have - * to let vfs know that it was set, otherwise it would get cleared - * on remount - */ - if (ctx->mask_s_flags & SB_I_VERSION) - fc->sb_flags |= SB_I_VERSION; - #define APPLY(X) ({ if (ctx->spec & EXT4_SPEC_##X) sbi->X = ctx->X; }) APPLY(s_commit_interval); APPLY(s_stripe); @@ -2905,11 +2807,7 @@ static int ext4_apply_options(struct fs_context *fc, struct super_block *sb) #endif ext4_apply_quota_options(fc, sb); - - if (ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION) - ret = ext4_set_test_dummy_encryption(sb, ctx->test_dummy_enc_arg); - - return ret; + ext4_apply_test_dummy_encryption(ctx, sb); } @@ -3040,8 +2938,6 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); - if (sb->s_flags & SB_I_VERSION) - SEQ_OPTS_PUTS("i_version"); if (nodefs || sbi->s_stripe) SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); if (nodefs || EXT4_MOUNT_DATA_FLAGS & @@ -3081,6 +2977,15 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, } else if (test_opt2(sb, DAX_INODE)) { SEQ_OPTS_PUTS("dax=inode"); } + + if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD && + !test_opt2(sb, MB_OPTIMIZE_SCAN)) { + SEQ_OPTS_PUTS("mb_optimize_scan=0"); + } else if (sbi->s_groups_count < MB_DEFAULT_LINEAR_SCAN_THRESHOLD && + test_opt2(sb, MB_OPTIMIZE_SCAN)) { + SEQ_OPTS_PUTS("mb_optimize_scan=1"); + } + ext4_show_quota_options(seq, sb); return 0; } @@ -3156,8 +3061,6 @@ done: EXT4_BLOCKS_PER_GROUP(sb), EXT4_INODES_PER_GROUP(sb), sbi->s_mount_opt, sbi->s_mount_opt2); - - cleancache_init_fs(sb); return err; } @@ -3478,8 +3381,9 @@ static loff_t ext4_max_size(int blkbits, int has_huge_files) */ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) { - unsigned long long upper_limit, res = EXT4_NDIR_BLOCKS; + loff_t upper_limit, res = EXT4_NDIR_BLOCKS; int meta_blocks; + unsigned int ppb = 1 << (bits - 2); /* * This is calculated to be the largest file size for a dense, block @@ -3511,27 +3415,42 @@ static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) } + /* Compute how many blocks we can address by block tree */ + res += ppb; + res += ppb * ppb; + res += ((loff_t)ppb) * ppb * ppb; + /* Compute how many metadata blocks are needed */ + meta_blocks = 1; + meta_blocks += 1 + ppb; + meta_blocks += 1 + ppb + ppb * ppb; + /* Does block tree limit file size? */ + if (res + meta_blocks <= upper_limit) + goto check_lfs; + + res = upper_limit; + /* How many metadata blocks are needed for addressing upper_limit? */ + upper_limit -= EXT4_NDIR_BLOCKS; /* indirect blocks */ meta_blocks = 1; + upper_limit -= ppb; /* double indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)); - /* tripple indirect blocks */ - meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2))); - - upper_limit -= meta_blocks; - upper_limit <<= bits; - - res += 1LL << (bits-2); - res += 1LL << (2*(bits-2)); - res += 1LL << (3*(bits-2)); + if (upper_limit < ppb * ppb) { + meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb); + res -= meta_blocks; + goto check_lfs; + } + meta_blocks += 1 + ppb; + upper_limit -= ppb * ppb; + /* tripple indirect blocks for the rest */ + meta_blocks += 1 + DIV_ROUND_UP_ULL(upper_limit, ppb) + + DIV_ROUND_UP_ULL(upper_limit, ppb*ppb); + res -= meta_blocks; +check_lfs: res <<= bits; - if (res > upper_limit) - res = upper_limit; - if (res > MAX_LFS_FILESIZE) res = MAX_LFS_FILESIZE; - return (loff_t)res; + return res; } static ext4_fsblk_t descriptor_loc(struct super_block *sb, @@ -3616,7 +3535,7 @@ int ext4_feature_set_ok(struct super_block *sb, int readonly) return 0; } -#ifndef CONFIG_UNICODE +#if !IS_ENABLED(CONFIG_UNICODE) if (ext4_has_feature_casefold(sb)) { ext4_msg(sb, KERN_ERR, "Filesystem with casefold feature cannot be " @@ -3808,12 +3727,13 @@ static struct task_struct *ext4_lazyinit_task; */ static int ext4_lazyinit_thread(void *arg) { - struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg; + struct ext4_lazy_init *eli = arg; struct list_head *pos, *n; struct ext4_li_request *elr; unsigned long next_wakeup, cur; BUG_ON(NULL == eli); + set_freezable(); cont_thread: while (true) { @@ -3858,8 +3778,7 @@ cont_thread: } if (!progress) { elr->lr_next_sched = jiffies + - (prandom_u32() - % (EXT4_DEF_LI_MAX_START_DELAY * HZ)); + prandom_u32_max(EXT4_DEF_LI_MAX_START_DELAY * HZ); } if (time_before(elr->lr_next_sched, next_wakeup)) next_wakeup = elr->lr_next_sched; @@ -4006,8 +3925,8 @@ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb, * spread the inode table initialization requests * better. */ - elr->lr_next_sched = jiffies + (prandom_u32() % - (EXT4_DEF_LI_MAX_START_DELAY * HZ)); + elr->lr_next_sched = jiffies + prandom_u32_max( + EXT4_DEF_LI_MAX_START_DELAY * HZ); return elr; } @@ -4029,9 +3948,9 @@ int ext4_register_li_request(struct super_block *sb, goto out; } - if (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) && - (first_not_zeroed == ngroups || sb_rdonly(sb) || - !test_opt(sb, INIT_INODE_TABLE))) + if (sb_rdonly(sb) || + (test_opt(sb, NO_PREFETCH_BLOCK_BITMAPS) && + (first_not_zeroed == ngroups || !test_opt(sb, INIT_INODE_TABLE)))) goto out; elr = ext4_li_request_new(sb, first_not_zeroed); @@ -4148,9 +4067,11 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp, ext4_fsblk_t first_block, last_block, b; ext4_group_t i, ngroups = ext4_get_groups_count(sb); int s, j, count = 0; + int has_super = ext4_bg_has_super(sb, grp); if (!ext4_has_feature_bigalloc(sb)) - return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) + + return (has_super + ext4_bg_num_gdb(sb, grp) + + (has_super ? le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0) + sbi->s_itb_per_group + 2); first_block = le32_to_cpu(sbi->s_es->s_first_data_block) + @@ -4326,7 +4247,7 @@ static void ext4_free_sbi(struct ext4_sb_info *sbi) return; kfree(sbi->s_blockgroup_lock); - fs_put_dax(sbi->s_daxdev); + fs_put_dax(sbi->s_daxdev, NULL); kfree(sbi); } @@ -4338,7 +4259,8 @@ static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb) if (!sbi) return NULL; - sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev); + sbi->s_daxdev = fs_dax_get_by_bdev(sb->s_bdev, &sbi->s_dax_part_off, + NULL, NULL); sbi->s_blockgroup_lock = kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); @@ -4350,117 +4272,15 @@ static struct ext4_sb_info *ext4_alloc_sbi(struct super_block *sb) sbi->s_sb = sb; return sbi; err_out: - fs_put_dax(sbi->s_daxdev); + fs_put_dax(sbi->s_daxdev, NULL); kfree(sbi); return NULL; } -static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) +static void ext4_set_def_opts(struct super_block *sb, + struct ext4_super_block *es) { - struct buffer_head *bh, **group_desc; - struct ext4_super_block *es = NULL; - struct ext4_sb_info *sbi = EXT4_SB(sb); - struct flex_groups **flex_groups; - ext4_fsblk_t block; - ext4_fsblk_t logical_sb_block; - unsigned long offset = 0; unsigned long def_mount_opts; - struct inode *root; - int ret = -ENOMEM; - int blocksize, clustersize; - unsigned int db_count; - unsigned int i; - int needs_recovery, has_huge_files; - __u64 blocks_count; - int err = 0; - ext4_group_t first_not_zeroed; - struct ext4_fs_context *ctx = fc->fs_private; - int silent = fc->sb_flags & SB_SILENT; - - /* Set defaults for the variables that will be set during parsing */ - ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; - ctx->mb_optimize_scan = DEFAULT_MB_OPTIMIZE_SCAN; - - sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; - sbi->s_sectors_written_start = - part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); - - /* -EINVAL is default */ - ret = -EINVAL; - blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); - if (!blocksize) { - ext4_msg(sb, KERN_ERR, "unable to set blocksize"); - goto out_fail; - } - - /* - * The ext4 superblock will not be buffer aligned for other than 1kB - * block sizes. We need to calculate the offset from buffer start. - */ - if (blocksize != EXT4_MIN_BLOCK_SIZE) { - logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; - offset = do_div(logical_sb_block, blocksize); - } else { - logical_sb_block = sbi->s_sb_block; - } - - bh = ext4_sb_bread_unmovable(sb, logical_sb_block); - if (IS_ERR(bh)) { - ext4_msg(sb, KERN_ERR, "unable to read superblock"); - ret = PTR_ERR(bh); - goto out_fail; - } - /* - * Note: s_es must be initialized as soon as possible because - * some ext4 macro-instructions depend on its value - */ - es = (struct ext4_super_block *) (bh->b_data + offset); - sbi->s_es = es; - sb->s_magic = le16_to_cpu(es->s_magic); - if (sb->s_magic != EXT4_SUPER_MAGIC) - goto cantfind_ext4; - sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); - - /* Warn if metadata_csum and gdt_csum are both set. */ - if (ext4_has_feature_metadata_csum(sb) && - ext4_has_feature_gdt_csum(sb)) - ext4_warning(sb, "metadata_csum and uninit_bg are " - "redundant flags; please run fsck."); - - /* Check for a known checksum algorithm */ - if (!ext4_verify_csum_type(sb, es)) { - ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " - "unknown checksum algorithm."); - silent = 1; - goto cantfind_ext4; - } - ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE, - ext4_orphan_file_block_trigger); - - /* Load the checksum driver */ - sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); - ret = PTR_ERR(sbi->s_chksum_driver); - sbi->s_chksum_driver = NULL; - goto failed_mount; - } - - /* Check superblock checksum */ - if (!ext4_superblock_csum_verify(sb, es)) { - ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " - "invalid superblock checksum. Run e2fsck?"); - silent = 1; - ret = -EFSBADCRC; - goto cantfind_ext4; - } - - /* Precompute checksum seed for all metadata */ - if (ext4_has_feature_csum_seed(sb)) - sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); - else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb)) - sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, - sizeof(es->s_uuid)); /* Set defaults before we parse the mount options */ def_mount_opts = le32_to_cpu(es->s_default_mount_opts); @@ -4489,9 +4309,9 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK) set_opt(sb, WRITEBACK_DATA); - if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC) + if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_PANIC) set_opt(sb, ERRORS_PANIC); - else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_CONTINUE) + else if (le16_to_cpu(es->s_errors) == EXT4_ERRORS_CONTINUE) set_opt(sb, ERRORS_CONT); else set_opt(sb, ERRORS_RO); @@ -4500,12 +4320,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (def_mount_opts & EXT4_DEFM_DISCARD) set_opt(sb, DISCARD); - sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); - sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); - sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; - sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; - sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; - if ((def_mount_opts & EXT4_DEFM_NOBARRIER) == 0) set_opt(sb, BARRIER); @@ -4517,31 +4331,96 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ((def_mount_opts & EXT4_DEFM_NODELALLOC) == 0)) set_opt(sb, DELALLOC); - /* - * set default s_li_wait_mult for lazyinit, for the case there is - * no mount option specified. - */ - sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + if (sb->s_blocksize == PAGE_SIZE) + set_opt(sb, DIOREAD_NOLOCK); +} - if (le32_to_cpu(es->s_log_block_size) > - (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log block size: %u", - le32_to_cpu(es->s_log_block_size)); - goto failed_mount; - } - if (le32_to_cpu(es->s_log_cluster_size) > - (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { - ext4_msg(sb, KERN_ERR, - "Invalid log cluster size: %u", - le32_to_cpu(es->s_log_cluster_size)); - goto failed_mount; +static int ext4_handle_clustersize(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es = sbi->s_es; + int clustersize; + + /* Handle clustersize */ + clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); + if (ext4_has_feature_bigalloc(sb)) { + if (clustersize < sb->s_blocksize) { + ext4_msg(sb, KERN_ERR, + "cluster size (%d) smaller than " + "block size (%lu)", clustersize, sb->s_blocksize); + return -EINVAL; + } + sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - + le32_to_cpu(es->s_log_block_size); + sbi->s_clusters_per_group = + le32_to_cpu(es->s_clusters_per_group); + if (sbi->s_clusters_per_group > sb->s_blocksize * 8) { + ext4_msg(sb, KERN_ERR, + "#clusters per group too big: %lu", + sbi->s_clusters_per_group); + return -EINVAL; + } + if (sbi->s_blocks_per_group != + (sbi->s_clusters_per_group * (clustersize / sb->s_blocksize))) { + ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " + "clusters per group (%lu) inconsistent", + sbi->s_blocks_per_group, + sbi->s_clusters_per_group); + return -EINVAL; + } + } else { + if (clustersize != sb->s_blocksize) { + ext4_msg(sb, KERN_ERR, + "fragment/cluster size (%d) != " + "block size (%lu)", clustersize, sb->s_blocksize); + return -EINVAL; + } + if (sbi->s_blocks_per_group > sb->s_blocksize * 8) { + ext4_msg(sb, KERN_ERR, + "#blocks per group too big: %lu", + sbi->s_blocks_per_group); + return -EINVAL; + } + sbi->s_clusters_per_group = sbi->s_blocks_per_group; + sbi->s_cluster_bits = 0; } + sbi->s_cluster_ratio = clustersize / sb->s_blocksize; - blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + /* Do we have standard group size of clustersize * 8 blocks ? */ + if (sbi->s_blocks_per_group == clustersize << 3) + set_opt2(sb, STD_GROUP_SIZE); - if (blocksize == PAGE_SIZE) - set_opt(sb, DIOREAD_NOLOCK); + return 0; +} + +static void ext4_fast_commit_init(struct super_block *sb) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + + /* Initialize fast commit stuff */ + atomic_set(&sbi->s_fc_subtid, 0); + INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]); + INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]); + INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]); + INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); + sbi->s_fc_bytes = 0; + ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); + sbi->s_fc_ineligible_tid = 0; + spin_lock_init(&sbi->s_fc_lock); + memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); + sbi->s_fc_replay_state.fc_regions = NULL; + sbi->s_fc_replay_state.fc_regions_size = 0; + sbi->s_fc_replay_state.fc_regions_used = 0; + sbi->s_fc_replay_state.fc_regions_valid = 0; + sbi->s_fc_replay_state.fc_modified_inodes = NULL; + sbi->s_fc_replay_state.fc_modified_inodes_size = 0; + sbi->s_fc_replay_state.fc_modified_inodes_used = 0; +} + +static int ext4_inode_info_init(struct super_block *sb, + struct ext4_super_block *es) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; @@ -4552,16 +4431,16 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (sbi->s_first_ino < EXT4_GOOD_OLD_FIRST_INO) { ext4_msg(sb, KERN_ERR, "invalid first ino: %u", sbi->s_first_ino); - goto failed_mount; + return -EINVAL; } if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) || (!is_power_of_2(sbi->s_inode_size)) || - (sbi->s_inode_size > blocksize)) { + (sbi->s_inode_size > sb->s_blocksize)) { ext4_msg(sb, KERN_ERR, "unsupported inode size: %d", sbi->s_inode_size); - ext4_msg(sb, KERN_ERR, "blocksize: %d", blocksize); - goto failed_mount; + ext4_msg(sb, KERN_ERR, "blocksize: %lu", sb->s_blocksize); + return -EINVAL; } /* * i_atime_extra is the last extra field available for @@ -4579,6 +4458,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) } sb->s_time_min = EXT4_TIMESTAMP_MIN; } + if (sbi->s_inode_size > EXT4_GOOD_OLD_INODE_SIZE) { sbi->s_want_extra_isize = sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE; @@ -4590,7 +4470,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (v > max) { ext4_msg(sb, KERN_ERR, "bad s_want_extra_isize: %d", v); - goto failed_mount; + return -EINVAL; } if (sbi->s_want_extra_isize < v) sbi->s_want_extra_isize = v; @@ -4599,87 +4479,112 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (v > max) { ext4_msg(sb, KERN_ERR, "bad s_min_extra_isize: %d", v); - goto failed_mount; + return -EINVAL; } if (sbi->s_want_extra_isize < v) sbi->s_want_extra_isize = v; } } - err = parse_apply_sb_mount_options(sb, ctx); - if (err < 0) - goto failed_mount; + return 0; +} - sbi->s_def_mount_opt = sbi->s_mount_opt; +#if IS_ENABLED(CONFIG_UNICODE) +static int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es) +{ + const struct ext4_sb_encodings *encoding_info; + struct unicode_map *encoding; + __u16 encoding_flags = le16_to_cpu(es->s_encoding_flags); - err = ext4_check_opt_consistency(fc, sb); - if (err < 0) - goto failed_mount; + if (!ext4_has_feature_casefold(sb) || sb->s_encoding) + return 0; - err = ext4_apply_options(fc, sb); - if (err < 0) - goto failed_mount; + encoding_info = ext4_sb_read_encoding(es); + if (!encoding_info) { + ext4_msg(sb, KERN_ERR, + "Encoding requested by superblock is unknown"); + return -EINVAL; + } -#ifdef CONFIG_UNICODE - if (ext4_has_feature_casefold(sb) && !sb->s_encoding) { - const struct ext4_sb_encodings *encoding_info; - struct unicode_map *encoding; - __u16 encoding_flags; + encoding = utf8_load(encoding_info->version); + if (IS_ERR(encoding)) { + ext4_msg(sb, KERN_ERR, + "can't mount with superblock charset: %s-%u.%u.%u " + "not supported by the kernel. flags: 0x%x.", + encoding_info->name, + unicode_major(encoding_info->version), + unicode_minor(encoding_info->version), + unicode_rev(encoding_info->version), + encoding_flags); + return -EINVAL; + } + ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: " + "%s-%u.%u.%u with flags 0x%hx", encoding_info->name, + unicode_major(encoding_info->version), + unicode_minor(encoding_info->version), + unicode_rev(encoding_info->version), + encoding_flags); - if (ext4_sb_read_encoding(es, &encoding_info, - &encoding_flags)) { - ext4_msg(sb, KERN_ERR, - "Encoding requested by superblock is unknown"); - goto failed_mount; - } + sb->s_encoding = encoding; + sb->s_encoding_flags = encoding_flags; - encoding = utf8_load(encoding_info->version); - if (IS_ERR(encoding)) { - ext4_msg(sb, KERN_ERR, - "can't mount with superblock charset: %s-%s " - "not supported by the kernel. flags: 0x%x.", - encoding_info->name, encoding_info->version, - encoding_flags); - goto failed_mount; - } - ext4_msg(sb, KERN_INFO,"Using encoding defined by superblock: " - "%s-%s with flags 0x%hx", encoding_info->name, - encoding_info->version?:"\b", encoding_flags); + return 0; +} +#else +static inline int ext4_encoding_init(struct super_block *sb, struct ext4_super_block *es) +{ + return 0; +} +#endif + +static int ext4_init_metadata_csum(struct super_block *sb, struct ext4_super_block *es) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); - sb->s_encoding = encoding; - sb->s_encoding_flags = encoding_flags; + /* Warn if metadata_csum and gdt_csum are both set. */ + if (ext4_has_feature_metadata_csum(sb) && + ext4_has_feature_gdt_csum(sb)) + ext4_warning(sb, "metadata_csum and uninit_bg are " + "redundant flags; please run fsck."); + + /* Check for a known checksum algorithm */ + if (!ext4_verify_csum_type(sb, es)) { + ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " + "unknown checksum algorithm."); + return -EINVAL; } -#endif + ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE, + ext4_orphan_file_block_trigger); - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { - printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with data=journal disables delayed allocation, dioread_nolock, O_DIRECT and fast_commit support!\n"); - /* can't mount with both data=journal and dioread_nolock. */ - clear_opt(sb, DIOREAD_NOLOCK); - clear_opt2(sb, JOURNAL_FAST_COMMIT); - if (test_opt2(sb, EXPLICIT_DELALLOC)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "both data=journal and delalloc"); - goto failed_mount; - } - if (test_opt(sb, DAX_ALWAYS)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "both data=journal and dax"); - goto failed_mount; - } - if (ext4_has_feature_encrypt(sb)) { - ext4_msg(sb, KERN_WARNING, - "encrypted files will use data=ordered " - "instead of data journaling mode"); - } - if (test_opt(sb, DELALLOC)) - clear_opt(sb, DELALLOC); - } else { - sb->s_iflags |= SB_I_CGROUPWB; + /* Load the checksum driver */ + sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); + if (IS_ERR(sbi->s_chksum_driver)) { + int ret = PTR_ERR(sbi->s_chksum_driver); + ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); + sbi->s_chksum_driver = NULL; + return ret; } - sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | - (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); + /* Check superblock checksum */ + if (!ext4_superblock_csum_verify(sb, es)) { + ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " + "invalid superblock checksum. Run e2fsck?"); + return -EFSBADCRC; + } + + /* Precompute checksum seed for all metadata */ + if (ext4_has_feature_csum_seed(sb)) + sbi->s_csum_seed = le32_to_cpu(es->s_checksum_seed); + else if (ext4_has_metadata_csum(sb) || ext4_has_feature_ea_inode(sb)) + sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, + sizeof(es->s_uuid)); + return 0; +} +static int ext4_check_feature_compatibility(struct super_block *sb, + struct ext4_super_block *es, + int silent) +{ if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV && (ext4_has_compat_features(sb) || ext4_has_ro_compat_features(sb) || @@ -4693,7 +4598,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (ext4_has_feature_64bit(sb)) { ext4_msg(sb, KERN_ERR, "The Hurd can't support 64-bit file systems"); - goto failed_mount; + return -EINVAL; } /* @@ -4703,7 +4608,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (ext4_has_feature_ea_inode(sb)) { ext4_msg(sb, KERN_ERR, "ea_inode feature is not supported for Hurd"); - goto failed_mount; + return -EINVAL; } } @@ -4717,10 +4622,10 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) * it's actually an ext[34] filesystem. */ if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) - goto failed_mount; + return -EINVAL; ext4_msg(sb, KERN_ERR, "couldn't mount as ext2 due " "to feature incompatibilities"); - goto failed_mount; + return -EINVAL; } } @@ -4734,10 +4639,10 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) * it's actually an ext4 filesystem. */ if (silent && ext4_feature_set_ok(sb, sb_rdonly(sb))) - goto failed_mount; + return -EINVAL; ext4_msg(sb, KERN_ERR, "couldn't mount as ext3 due " "to feature incompatibilities"); - goto failed_mount; + return -EINVAL; } } @@ -4747,18 +4652,475 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) * so there is a chance incompat flags are set on a rev 0 filesystem. */ if (!ext4_feature_set_ok(sb, (sb_rdonly(sb)))) + return -EINVAL; + + return 0; +} + +static int ext4_geometry_check(struct super_block *sb, + struct ext4_super_block *es) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + __u64 blocks_count; + + /* check blocks count against device size */ + blocks_count = sb_bdev_nr_blocks(sb); + if (blocks_count && ext4_blocks_count(es) > blocks_count) { + ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " + "exceeds size of device (%llu blocks)", + ext4_blocks_count(es), blocks_count); + return -EINVAL; + } + + /* + * It makes no sense for the first data block to be beyond the end + * of the filesystem. + */ + if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " + "block %u is beyond end of filesystem (%llu)", + le32_to_cpu(es->s_first_data_block), + ext4_blocks_count(es)); + return -EINVAL; + } + if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) && + (sbi->s_cluster_ratio == 1)) { + ext4_msg(sb, KERN_WARNING, "bad geometry: first data " + "block is 0 with a 1k block and cluster size"); + return -EINVAL; + } + + blocks_count = (ext4_blocks_count(es) - + le32_to_cpu(es->s_first_data_block) + + EXT4_BLOCKS_PER_GROUP(sb) - 1); + do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); + if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { + ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " + "(block count %llu, first data block %u, " + "blocks per group %lu)", blocks_count, + ext4_blocks_count(es), + le32_to_cpu(es->s_first_data_block), + EXT4_BLOCKS_PER_GROUP(sb)); + return -EINVAL; + } + sbi->s_groups_count = blocks_count; + sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, + (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); + if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != + le32_to_cpu(es->s_inodes_count)) { + ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", + le32_to_cpu(es->s_inodes_count), + ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); + return -EINVAL; + } + + return 0; +} + +static void ext4_group_desc_free(struct ext4_sb_info *sbi) +{ + struct buffer_head **group_desc; + int i; + + rcu_read_lock(); + group_desc = rcu_dereference(sbi->s_group_desc); + for (i = 0; i < sbi->s_gdb_count; i++) + brelse(group_desc[i]); + kvfree(group_desc); + rcu_read_unlock(); +} + +static int ext4_group_desc_init(struct super_block *sb, + struct ext4_super_block *es, + ext4_fsblk_t logical_sb_block, + ext4_group_t *first_not_zeroed) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + unsigned int db_count; + ext4_fsblk_t block; + int ret; + int i; + + db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / + EXT4_DESC_PER_BLOCK(sb); + if (ext4_has_feature_meta_bg(sb)) { + if (le32_to_cpu(es->s_first_meta_bg) > db_count) { + ext4_msg(sb, KERN_WARNING, + "first meta block group too large: %u " + "(group descriptor block count %u)", + le32_to_cpu(es->s_first_meta_bg), db_count); + return -EINVAL; + } + } + rcu_assign_pointer(sbi->s_group_desc, + kvmalloc_array(db_count, + sizeof(struct buffer_head *), + GFP_KERNEL)); + if (sbi->s_group_desc == NULL) { + ext4_msg(sb, KERN_ERR, "not enough memory"); + return -ENOMEM; + } + + bgl_lock_init(sbi->s_blockgroup_lock); + + /* Pre-read the descriptors into the buffer cache */ + for (i = 0; i < db_count; i++) { + block = descriptor_loc(sb, logical_sb_block, i); + ext4_sb_breadahead_unmovable(sb, block); + } + + for (i = 0; i < db_count; i++) { + struct buffer_head *bh; + + block = descriptor_loc(sb, logical_sb_block, i); + bh = ext4_sb_bread_unmovable(sb, block); + if (IS_ERR(bh)) { + ext4_msg(sb, KERN_ERR, + "can't read group descriptor %d", i); + sbi->s_gdb_count = i; + ret = PTR_ERR(bh); + goto out; + } + rcu_read_lock(); + rcu_dereference(sbi->s_group_desc)[i] = bh; + rcu_read_unlock(); + } + sbi->s_gdb_count = db_count; + if (!ext4_check_descriptors(sb, logical_sb_block, first_not_zeroed)) { + ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); + ret = -EFSCORRUPTED; + goto out; + } + return 0; +out: + ext4_group_desc_free(sbi); + return ret; +} + +static int ext4_load_and_init_journal(struct super_block *sb, + struct ext4_super_block *es, + struct ext4_fs_context *ctx) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + int err; + + err = ext4_load_journal(sb, es, ctx->journal_devnum); + if (err) + return err; + + if (ext4_has_feature_64bit(sb) && + !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_64BIT)) { + ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); + goto out; + } + + if (!set_journal_csum_feature_set(sb)) { + ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " + "feature set"); + goto out; + } + + if (test_opt2(sb, JOURNAL_FAST_COMMIT) && + !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, + JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) { + ext4_msg(sb, KERN_ERR, + "Failed to set fast commit journal feature"); + goto out; + } + + /* We have now updated the journal if required, so we can + * validate the data journaling mode. */ + switch (test_opt(sb, DATA_FLAGS)) { + case 0: + /* No mode set, assume a default based on the journal + * capabilities: ORDERED_DATA if the journal can + * cope, else JOURNAL_DATA + */ + if (jbd2_journal_check_available_features + (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { + set_opt(sb, ORDERED_DATA); + sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA; + } else { + set_opt(sb, JOURNAL_DATA); + sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; + } + break; + + case EXT4_MOUNT_ORDERED_DATA: + case EXT4_MOUNT_WRITEBACK_DATA: + if (!jbd2_journal_check_available_features + (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { + ext4_msg(sb, KERN_ERR, "Journal does not support " + "requested data journaling mode"); + goto out; + } + break; + default: + break; + } + + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && + test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_async_commit in data=ordered mode"); + goto out; + } + + set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio); + + sbi->s_journal->j_submit_inode_data_buffers = + ext4_journal_submit_inode_data_buffers; + sbi->s_journal->j_finish_inode_data_buffers = + ext4_journal_finish_inode_data_buffers; + + return 0; + +out: + /* flush s_error_work before journal destroy. */ + flush_work(&sbi->s_error_work); + jbd2_journal_destroy(sbi->s_journal); + sbi->s_journal = NULL; + return -EINVAL; +} + +static int ext4_journal_data_mode_check(struct super_block *sb) +{ + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { + printk_once(KERN_WARNING "EXT4-fs: Warning: mounting with " + "data=journal disables delayed allocation, " + "dioread_nolock, O_DIRECT and fast_commit support!\n"); + /* can't mount with both data=journal and dioread_nolock. */ + clear_opt(sb, DIOREAD_NOLOCK); + clear_opt2(sb, JOURNAL_FAST_COMMIT); + if (test_opt2(sb, EXPLICIT_DELALLOC)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and delalloc"); + return -EINVAL; + } + if (test_opt(sb, DAX_ALWAYS)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and dax"); + return -EINVAL; + } + if (ext4_has_feature_encrypt(sb)) { + ext4_msg(sb, KERN_WARNING, + "encrypted files will use data=ordered " + "instead of data journaling mode"); + } + if (test_opt(sb, DELALLOC)) + clear_opt(sb, DELALLOC); + } else { + sb->s_iflags |= SB_I_CGROUPWB; + } + + return 0; +} + +static int ext4_load_super(struct super_block *sb, ext4_fsblk_t *lsb, + int silent) +{ + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct ext4_super_block *es; + ext4_fsblk_t logical_sb_block; + unsigned long offset = 0; + struct buffer_head *bh; + int ret = -EINVAL; + int blocksize; + + blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); + if (!blocksize) { + ext4_msg(sb, KERN_ERR, "unable to set blocksize"); + return -EINVAL; + } + + /* + * The ext4 superblock will not be buffer aligned for other than 1kB + * block sizes. We need to calculate the offset from buffer start. + */ + if (blocksize != EXT4_MIN_BLOCK_SIZE) { + logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; + offset = do_div(logical_sb_block, blocksize); + } else { + logical_sb_block = sbi->s_sb_block; + } + + bh = ext4_sb_bread_unmovable(sb, logical_sb_block); + if (IS_ERR(bh)) { + ext4_msg(sb, KERN_ERR, "unable to read superblock"); + return PTR_ERR(bh); + } + /* + * Note: s_es must be initialized as soon as possible because + * some ext4 macro-instructions depend on its value + */ + es = (struct ext4_super_block *) (bh->b_data + offset); + sbi->s_es = es; + sb->s_magic = le16_to_cpu(es->s_magic); + if (sb->s_magic != EXT4_SUPER_MAGIC) { + if (!silent) + ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); + goto out; + } + + if (le32_to_cpu(es->s_log_block_size) > + (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log block size: %u", + le32_to_cpu(es->s_log_block_size)); + goto out; + } + if (le32_to_cpu(es->s_log_cluster_size) > + (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) { + ext4_msg(sb, KERN_ERR, + "Invalid log cluster size: %u", + le32_to_cpu(es->s_log_cluster_size)); + goto out; + } + + blocksize = EXT4_MIN_BLOCK_SIZE << le32_to_cpu(es->s_log_block_size); + + /* + * If the default block size is not the same as the real block size, + * we need to reload it. + */ + if (sb->s_blocksize == blocksize) { + *lsb = logical_sb_block; + sbi->s_sbh = bh; + return 0; + } + + /* + * bh must be released before kill_bdev(), otherwise + * it won't be freed and its page also. kill_bdev() + * is called by sb_set_blocksize(). + */ + brelse(bh); + /* Validate the filesystem blocksize */ + if (!sb_set_blocksize(sb, blocksize)) { + ext4_msg(sb, KERN_ERR, "bad block size %d", + blocksize); + bh = NULL; + goto out; + } + + logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; + offset = do_div(logical_sb_block, blocksize); + bh = ext4_sb_bread_unmovable(sb, logical_sb_block); + if (IS_ERR(bh)) { + ext4_msg(sb, KERN_ERR, "Can't read superblock on 2nd try"); + ret = PTR_ERR(bh); + bh = NULL; + goto out; + } + es = (struct ext4_super_block *)(bh->b_data + offset); + sbi->s_es = es; + if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { + ext4_msg(sb, KERN_ERR, "Magic mismatch, very weird!"); + goto out; + } + *lsb = logical_sb_block; + sbi->s_sbh = bh; + return 0; +out: + brelse(bh); + return ret; +} + +static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) +{ + struct ext4_super_block *es = NULL; + struct ext4_sb_info *sbi = EXT4_SB(sb); + struct flex_groups **flex_groups; + ext4_fsblk_t block; + ext4_fsblk_t logical_sb_block; + struct inode *root; + int ret = -ENOMEM; + unsigned int i; + int needs_recovery, has_huge_files; + int err = 0; + ext4_group_t first_not_zeroed; + struct ext4_fs_context *ctx = fc->fs_private; + int silent = fc->sb_flags & SB_SILENT; + + /* Set defaults for the variables that will be set during parsing */ + if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; + + sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; + sbi->s_sectors_written_start = + part_stat_read(sb->s_bdev, sectors[STAT_WRITE]); + + /* -EINVAL is default */ + ret = -EINVAL; + err = ext4_load_super(sb, &logical_sb_block, silent); + if (err) + goto out_fail; + + es = sbi->s_es; + sbi->s_kbytes_written = le64_to_cpu(es->s_kbytes_written); + + err = ext4_init_metadata_csum(sb, es); + if (err) goto failed_mount; - if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) { + ext4_set_def_opts(sb, es); + + sbi->s_resuid = make_kuid(&init_user_ns, le16_to_cpu(es->s_def_resuid)); + sbi->s_resgid = make_kgid(&init_user_ns, le16_to_cpu(es->s_def_resgid)); + sbi->s_commit_interval = JBD2_DEFAULT_MAX_COMMIT_AGE * HZ; + sbi->s_min_batch_time = EXT4_DEF_MIN_BATCH_TIME; + sbi->s_max_batch_time = EXT4_DEF_MAX_BATCH_TIME; + + /* + * set default s_li_wait_mult for lazyinit, for the case there is + * no mount option specified. + */ + sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; + + if (ext4_inode_info_init(sb, es)) + goto failed_mount; + + err = parse_apply_sb_mount_options(sb, ctx); + if (err < 0) + goto failed_mount; + + sbi->s_def_mount_opt = sbi->s_mount_opt; + + err = ext4_check_opt_consistency(fc, sb); + if (err < 0) + goto failed_mount; + + ext4_apply_options(fc, sb); + + if (ext4_encoding_init(sb, es)) + goto failed_mount; + + if (ext4_journal_data_mode_check(sb)) + goto failed_mount; + + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); + + /* i_version is always enabled now */ + sb->s_flags |= SB_I_VERSION; + + if (ext4_check_feature_compatibility(sb, es, silent)) + goto failed_mount; + + if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (sb->s_blocksize / 4)) { ext4_msg(sb, KERN_ERR, "Number of reserved GDT blocks insanely large: %d", le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks)); goto failed_mount; } - if (dax_supported(sbi->s_daxdev, sb->s_bdev, blocksize, 0, - bdev_nr_sectors(sb->s_bdev))) - set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + if (sbi->s_daxdev) { + if (sb->s_blocksize == PAGE_SIZE) + set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags); + else + ext4_msg(sb, KERN_ERR, "unsupported blocksize for DAX\n"); + } if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) { if (ext4_has_feature_inline_data(sb)) { @@ -4779,40 +5141,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount; } - if (sb->s_blocksize != blocksize) { - /* - * bh must be released before kill_bdev(), otherwise - * it won't be freed and its page also. kill_bdev() - * is called by sb_set_blocksize(). - */ - brelse(bh); - /* Validate the filesystem blocksize */ - if (!sb_set_blocksize(sb, blocksize)) { - ext4_msg(sb, KERN_ERR, "bad block size %d", - blocksize); - bh = NULL; - goto failed_mount; - } - - logical_sb_block = sbi->s_sb_block * EXT4_MIN_BLOCK_SIZE; - offset = do_div(logical_sb_block, blocksize); - bh = ext4_sb_bread_unmovable(sb, logical_sb_block); - if (IS_ERR(bh)) { - ext4_msg(sb, KERN_ERR, - "Can't read superblock on 2nd try"); - ret = PTR_ERR(bh); - bh = NULL; - goto failed_mount; - } - es = (struct ext4_super_block *)(bh->b_data + offset); - sbi->s_es = es; - if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) { - ext4_msg(sb, KERN_ERR, - "Magic mismatch, very weird!"); - goto failed_mount; - } - } - has_huge_files = ext4_has_feature_huge_file(sb); sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, has_huge_files); @@ -4834,20 +5162,22 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group); sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); - sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb); - if (sbi->s_inodes_per_block == 0) - goto cantfind_ext4; + sbi->s_inodes_per_block = sb->s_blocksize / EXT4_INODE_SIZE(sb); + if (sbi->s_inodes_per_block == 0 || sbi->s_blocks_per_group == 0) { + if (!silent) + ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); + goto failed_mount; + } if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || - sbi->s_inodes_per_group > blocksize * 8) { + sbi->s_inodes_per_group > sb->s_blocksize * 8) { ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", sbi->s_inodes_per_group); goto failed_mount; } sbi->s_itb_per_group = sbi->s_inodes_per_group / sbi->s_inodes_per_block; - sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); - sbi->s_sbh = bh; - sbi->s_mount_state = le16_to_cpu(es->s_state); + sbi->s_desc_per_block = sb->s_blocksize / EXT4_DESC_SIZE(sb); + sbi->s_mount_state = le16_to_cpu(es->s_state) & ~EXT4_FC_REPLAY; sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); @@ -4872,54 +5202,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) } } - /* Handle clustersize */ - clustersize = BLOCK_SIZE << le32_to_cpu(es->s_log_cluster_size); - if (ext4_has_feature_bigalloc(sb)) { - if (clustersize < blocksize) { - ext4_msg(sb, KERN_ERR, - "cluster size (%d) smaller than " - "block size (%d)", clustersize, blocksize); - goto failed_mount; - } - sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) - - le32_to_cpu(es->s_log_block_size); - sbi->s_clusters_per_group = - le32_to_cpu(es->s_clusters_per_group); - if (sbi->s_clusters_per_group > blocksize * 8) { - ext4_msg(sb, KERN_ERR, - "#clusters per group too big: %lu", - sbi->s_clusters_per_group); - goto failed_mount; - } - if (sbi->s_blocks_per_group != - (sbi->s_clusters_per_group * (clustersize / blocksize))) { - ext4_msg(sb, KERN_ERR, "blocks per group (%lu) and " - "clusters per group (%lu) inconsistent", - sbi->s_blocks_per_group, - sbi->s_clusters_per_group); - goto failed_mount; - } - } else { - if (clustersize != blocksize) { - ext4_msg(sb, KERN_ERR, - "fragment/cluster size (%d) != " - "block size (%d)", clustersize, blocksize); - goto failed_mount; - } - if (sbi->s_blocks_per_group > blocksize * 8) { - ext4_msg(sb, KERN_ERR, - "#blocks per group too big: %lu", - sbi->s_blocks_per_group); - goto failed_mount; - } - sbi->s_clusters_per_group = sbi->s_blocks_per_group; - sbi->s_cluster_bits = 0; - } - sbi->s_cluster_ratio = clustersize / blocksize; - - /* Do we have standard group size of clustersize * 8 blocks ? */ - if (sbi->s_blocks_per_group == clustersize << 3) - set_opt2(sb, STD_GROUP_SIZE); + if (ext4_handle_clustersize(sb)) + goto failed_mount; /* * Test whether we have more sectors than will fit in sector_t, @@ -4933,111 +5217,12 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) goto failed_mount; } - if (EXT4_BLOCKS_PER_GROUP(sb) == 0) - goto cantfind_ext4; - - /* check blocks count against device size */ - blocks_count = sb_bdev_nr_blocks(sb); - if (blocks_count && ext4_blocks_count(es) > blocks_count) { - ext4_msg(sb, KERN_WARNING, "bad geometry: block count %llu " - "exceeds size of device (%llu blocks)", - ext4_blocks_count(es), blocks_count); + if (ext4_geometry_check(sb, es)) goto failed_mount; - } - /* - * It makes no sense for the first data block to be beyond the end - * of the filesystem. - */ - if (le32_to_cpu(es->s_first_data_block) >= ext4_blocks_count(es)) { - ext4_msg(sb, KERN_WARNING, "bad geometry: first data " - "block %u is beyond end of filesystem (%llu)", - le32_to_cpu(es->s_first_data_block), - ext4_blocks_count(es)); - goto failed_mount; - } - if ((es->s_first_data_block == 0) && (es->s_log_block_size == 0) && - (sbi->s_cluster_ratio == 1)) { - ext4_msg(sb, KERN_WARNING, "bad geometry: first data " - "block is 0 with a 1k block and cluster size"); - goto failed_mount; - } - - blocks_count = (ext4_blocks_count(es) - - le32_to_cpu(es->s_first_data_block) + - EXT4_BLOCKS_PER_GROUP(sb) - 1); - do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb)); - if (blocks_count > ((uint64_t)1<<32) - EXT4_DESC_PER_BLOCK(sb)) { - ext4_msg(sb, KERN_WARNING, "groups count too large: %llu " - "(block count %llu, first data block %u, " - "blocks per group %lu)", blocks_count, - ext4_blocks_count(es), - le32_to_cpu(es->s_first_data_block), - EXT4_BLOCKS_PER_GROUP(sb)); - goto failed_mount; - } - sbi->s_groups_count = blocks_count; - sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count, - (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); - if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) != - le32_to_cpu(es->s_inodes_count)) { - ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu", - le32_to_cpu(es->s_inodes_count), - ((u64)sbi->s_groups_count * sbi->s_inodes_per_group)); - ret = -EINVAL; - goto failed_mount; - } - db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / - EXT4_DESC_PER_BLOCK(sb); - if (ext4_has_feature_meta_bg(sb)) { - if (le32_to_cpu(es->s_first_meta_bg) > db_count) { - ext4_msg(sb, KERN_WARNING, - "first meta block group too large: %u " - "(group descriptor block count %u)", - le32_to_cpu(es->s_first_meta_bg), db_count); - goto failed_mount; - } - } - rcu_assign_pointer(sbi->s_group_desc, - kvmalloc_array(db_count, - sizeof(struct buffer_head *), - GFP_KERNEL)); - if (sbi->s_group_desc == NULL) { - ext4_msg(sb, KERN_ERR, "not enough memory"); - ret = -ENOMEM; + err = ext4_group_desc_init(sb, es, logical_sb_block, &first_not_zeroed); + if (err) goto failed_mount; - } - - bgl_lock_init(sbi->s_blockgroup_lock); - - /* Pre-read the descriptors into the buffer cache */ - for (i = 0; i < db_count; i++) { - block = descriptor_loc(sb, logical_sb_block, i); - ext4_sb_breadahead_unmovable(sb, block); - } - - for (i = 0; i < db_count; i++) { - struct buffer_head *bh; - - block = descriptor_loc(sb, logical_sb_block, i); - bh = ext4_sb_bread_unmovable(sb, block); - if (IS_ERR(bh)) { - ext4_msg(sb, KERN_ERR, - "can't read group descriptor %d", i); - db_count = i; - ret = PTR_ERR(bh); - goto failed_mount2; - } - rcu_read_lock(); - rcu_dereference(sbi->s_group_desc)[i] = bh; - rcu_read_unlock(); - } - sbi->s_gdb_count = db_count; - if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) { - ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); - ret = -EFSCORRUPTED; - goto failed_mount2; - } timer_setup(&sbi->s_err_report, print_daily_error_info, 0); spin_lock_init(&sbi->s_error_lock); @@ -5075,24 +5260,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); - /* Initialize fast commit stuff */ - atomic_set(&sbi->s_fc_subtid, 0); - INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_MAIN]); - INIT_LIST_HEAD(&sbi->s_fc_q[FC_Q_STAGING]); - INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_MAIN]); - INIT_LIST_HEAD(&sbi->s_fc_dentry_q[FC_Q_STAGING]); - sbi->s_fc_bytes = 0; - ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE); - ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING); - spin_lock_init(&sbi->s_fc_lock); - memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats)); - sbi->s_fc_replay_state.fc_regions = NULL; - sbi->s_fc_replay_state.fc_regions_size = 0; - sbi->s_fc_replay_state.fc_regions_used = 0; - sbi->s_fc_replay_state.fc_regions_valid = 0; - sbi->s_fc_replay_state.fc_modified_inodes = NULL; - sbi->s_fc_replay_state.fc_modified_inodes_size = 0; - sbi->s_fc_replay_state.fc_modified_inodes_used = 0; + ext4_fast_commit_init(sb); sb->s_root = NULL; @@ -5109,37 +5277,37 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) * root first: it may be modified in the journal! */ if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) { - err = ext4_load_journal(sb, es, ctx->journal_devnum); + err = ext4_load_and_init_journal(sb, es, ctx); if (err) goto failed_mount3a; } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " "suppressed and not mounted read-only"); - goto failed_mount_wq; + goto failed_mount3a; } else { /* Nojournal mode, all journal mount options are illegal */ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { ext4_msg(sb, KERN_ERR, "can't mount with " "journal_checksum, fs mounted w/o journal"); - goto failed_mount_wq; + goto failed_mount3a; } if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { ext4_msg(sb, KERN_ERR, "can't mount with " "journal_async_commit, fs mounted w/o journal"); - goto failed_mount_wq; + goto failed_mount3a; } if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { ext4_msg(sb, KERN_ERR, "can't mount with " "commit=%lu, fs mounted w/o journal", sbi->s_commit_interval / HZ); - goto failed_mount_wq; + goto failed_mount3a; } if (EXT4_MOUNT_DATA_FLAGS & (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) { ext4_msg(sb, KERN_ERR, "can't mount with " "data=, fs mounted w/o journal"); - goto failed_mount_wq; + goto failed_mount3a; } sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; clear_opt(sb, JOURNAL_CHECKSUM); @@ -5147,76 +5315,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) clear_opt2(sb, JOURNAL_FAST_COMMIT); sbi->s_journal = NULL; needs_recovery = 0; - goto no_journal; - } - - if (ext4_has_feature_64bit(sb) && - !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, - JBD2_FEATURE_INCOMPAT_64BIT)) { - ext4_msg(sb, KERN_ERR, "Failed to set 64-bit journal feature"); - goto failed_mount_wq; - } - - if (!set_journal_csum_feature_set(sb)) { - ext4_msg(sb, KERN_ERR, "Failed to set journal checksum " - "feature set"); - goto failed_mount_wq; - } - - if (test_opt2(sb, JOURNAL_FAST_COMMIT) && - !jbd2_journal_set_features(EXT4_SB(sb)->s_journal, 0, 0, - JBD2_FEATURE_INCOMPAT_FAST_COMMIT)) { - ext4_msg(sb, KERN_ERR, - "Failed to set fast commit journal feature"); - goto failed_mount_wq; } - /* We have now updated the journal if required, so we can - * validate the data journaling mode. */ - switch (test_opt(sb, DATA_FLAGS)) { - case 0: - /* No mode set, assume a default based on the journal - * capabilities: ORDERED_DATA if the journal can - * cope, else JOURNAL_DATA - */ - if (jbd2_journal_check_available_features - (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { - set_opt(sb, ORDERED_DATA); - sbi->s_def_mount_opt |= EXT4_MOUNT_ORDERED_DATA; - } else { - set_opt(sb, JOURNAL_DATA); - sbi->s_def_mount_opt |= EXT4_MOUNT_JOURNAL_DATA; - } - break; - - case EXT4_MOUNT_ORDERED_DATA: - case EXT4_MOUNT_WRITEBACK_DATA: - if (!jbd2_journal_check_available_features - (sbi->s_journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)) { - ext4_msg(sb, KERN_ERR, "Journal does not support " - "requested data journaling mode"); - goto failed_mount_wq; - } - break; - default: - break; - } - - if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && - test_opt(sb, JOURNAL_ASYNC_COMMIT)) { - ext4_msg(sb, KERN_ERR, "can't mount with " - "journal_async_commit in data=ordered mode"); - goto failed_mount_wq; - } - - set_task_ioprio(sbi->s_journal->j_task, ctx->journal_ioprio); - - sbi->s_journal->j_submit_inode_data_buffers = - ext4_journal_submit_inode_data_buffers; - sbi->s_journal->j_finish_inode_data_buffers = - ext4_journal_finish_inode_data_buffers; - -no_journal: if (!test_opt(sb, NO_MBCACHE)) { sbi->s_ea_block_cache = ext4_xattr_create_cache(); if (!sbi->s_ea_block_cache) { @@ -5235,24 +5335,27 @@ no_journal: } } - if (ext4_has_feature_verity(sb) && blocksize != PAGE_SIZE) { + if (ext4_has_feature_verity(sb) && sb->s_blocksize != PAGE_SIZE) { ext4_msg(sb, KERN_ERR, "Unsupported blocksize for fs-verity"); goto failed_mount_wq; } - if (DUMMY_ENCRYPTION_ENABLED(sbi) && !sb_rdonly(sb) && - !ext4_has_feature_encrypt(sb)) { - ext4_set_feature_encrypt(sb); - ext4_commit_super(sb); - } - /* * Get the # of file system overhead blocks from the * superblock if present. */ - if (es->s_overhead_clusters) - sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); - else { + sbi->s_overhead = le32_to_cpu(es->s_overhead_clusters); + /* ignore the precalculated value if it is ridiculous */ + if (sbi->s_overhead > ext4_blocks_count(es)) + sbi->s_overhead = 0; + /* + * If the bigalloc feature is not enabled recalculating the + * overhead doesn't take long, so we might as well just redo + * it to make sure we are using the correct value. + */ + if (!ext4_has_feature_bigalloc(sb)) + sbi->s_overhead = 0; + if (sbi->s_overhead == 0) { err = ext4_calculate_overhead(sb); if (err) goto failed_mount_wq; @@ -5321,12 +5424,12 @@ no_journal: * turned off by passing "mb_optimize_scan=0". This can also be * turned on forcefully by passing "mb_optimize_scan=1". */ - if (ctx->mb_optimize_scan == 1) - set_opt2(sb, MB_OPTIMIZE_SCAN); - else if (ctx->mb_optimize_scan == 0) - clear_opt2(sb, MB_OPTIMIZE_SCAN); - else if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) - set_opt2(sb, MB_OPTIMIZE_SCAN); + if (!(ctx->spec & EXT4_SPEC_mb_optimize_scan)) { + if (sbi->s_groups_count >= MB_DEFAULT_LINEAR_SCAN_THRESHOLD) + set_opt2(sb, MB_OPTIMIZE_SCAN); + else + clear_opt2(sb, MB_OPTIMIZE_SCAN); + } err = ext4_mb_init(sb); if (err) { @@ -5354,14 +5457,6 @@ no_journal: err = percpu_counter_init(&sbi->s_freeinodes_counter, freei, GFP_KERNEL); } - /* - * Update the checksum after updating free space/inode - * counters. Otherwise the superblock can have an incorrect - * checksum in the buffer cache until it is written out and - * e2fsprogs programs trying to open a file system immediately - * after it is mounted can fail. - */ - ext4_superblock_csum_set(sb); if (!err) err = percpu_counter_init(&sbi->s_dirs_counter, ext4_count_dirs(sb), GFP_KERNEL); @@ -5419,6 +5514,14 @@ no_journal: EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS; ext4_orphan_cleanup(sb, es); EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS; + /* + * Update the checksum after updating free space/inode counters and + * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect + * checksum in the buffer cache until it is written out and + * e2fsprogs programs trying to open a file system immediately + * after it is mounted can fail. + */ + ext4_superblock_csum_set(sb); if (needs_recovery) { ext4_msg(sb, KERN_INFO, "recovery complete"); err = ext4_mark_recovery_complete(sb, es); @@ -5426,13 +5529,9 @@ no_journal: goto failed_mount9; } - if (test_opt(sb, DISCARD)) { - struct request_queue *q = bdev_get_queue(sb->s_bdev); - if (!blk_queue_discard(q)) - ext4_msg(sb, KERN_WARNING, - "mounting with \"discard\" option, but " - "the device does not support discard"); - } + if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) + ext4_msg(sb, KERN_WARNING, + "mounting with \"discard\" option, but the device does not support discard"); if (es->s_error_count) mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ @@ -5446,11 +5545,6 @@ no_journal: return 0; -cantfind_ext4: - if (!silent) - ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem"); - goto failed_mount; - failed_mount9: ext4_release_orphan_info(sb); failed_mount8: @@ -5504,18 +5598,12 @@ failed_mount3: flush_work(&sbi->s_error_work); del_timer_sync(&sbi->s_err_report); ext4_stop_mmpd(sbi); -failed_mount2: - rcu_read_lock(); - group_desc = rcu_dereference(sbi->s_group_desc); - for (i = 0; i < db_count; i++) - brelse(group_desc[i]); - kvfree(group_desc); - rcu_read_unlock(); + ext4_group_desc_free(sbi); failed_mount: if (sbi->s_chksum_driver) crypto_free_shash(sbi->s_chksum_driver); -#ifdef CONFIG_UNICODE +#if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif @@ -5525,7 +5613,7 @@ failed_mount: #endif fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); /* ext4_blkdev_remove() calls kill_bdev(), release bh before it. */ - brelse(bh); + brelse(sbi->s_sbh); ext4_blkdev_remove(sbi); out_fail: sb->s_fs_info = NULL; @@ -5541,7 +5629,7 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc) sbi = ext4_alloc_sbi(sb); if (!sbi) - ret = -ENOMEM; + return -ENOMEM; fc->s_fs_info = sbi; @@ -5570,6 +5658,8 @@ static int ext4_fill_super(struct super_block *sb, struct fs_context *fc) ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " "Quota mode: %s.", descr, ext4_quota_mode(sb)); + /* Update the s_overhead_clusters if necessary */ + ext4_update_overhead(sb, false); return 0; free_sbi: @@ -5631,7 +5721,7 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb, return NULL; } - jbd_debug(2, "Journal inode found at %p: %lld bytes\n", + ext4_debug("Journal inode found at %p: %lld bytes\n", journal_inode, journal_inode->i_size); if (!S_ISREG(journal_inode->i_mode)) { ext4_msg(sb, KERN_ERR, "invalid journal inode"); @@ -5952,7 +6042,6 @@ static void ext4_update_super(struct super_block *sb) static int ext4_commit_super(struct super_block *sb) { struct buffer_head *sbh = EXT4_SB(sb)->s_sbh; - int error = 0; if (!sbh) return -EINVAL; @@ -5961,6 +6050,13 @@ static int ext4_commit_super(struct super_block *sb) ext4_update_super(sb); + lock_buffer(sbh); + /* Buffer got discarded which means block device got invalidated */ + if (!buffer_mapped(sbh)) { + unlock_buffer(sbh); + return -EIO; + } + if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) { /* * Oh, dear. A previous attempt to write the @@ -5975,17 +6071,21 @@ static int ext4_commit_super(struct super_block *sb) clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); } - BUFFER_TRACE(sbh, "marking dirty"); - mark_buffer_dirty(sbh); - error = __sync_dirty_buffer(sbh, - REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0)); + get_bh(sbh); + /* Clear potential dirty bit if it was journalled update */ + clear_buffer_dirty(sbh); + sbh->b_end_io = end_buffer_write_sync; + submit_bh(REQ_OP_WRITE | REQ_SYNC | + (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh); + wait_on_buffer(sbh); if (buffer_write_io_error(sbh)) { ext4_msg(sb, KERN_ERR, "I/O error while writing " "superblock"); clear_buffer_write_io_error(sbh); set_buffer_uptodate(sbh); + return -EIO; } - return error; + return 0; } /* @@ -6226,7 +6326,6 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) char *to_free[EXT4_MAXQUOTAS]; #endif - ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; /* Store the original options */ old_sb_flags = sb->s_flags; @@ -6252,9 +6351,14 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) } else old_opts.s_qf_names[i] = NULL; #endif - if (sbi->s_journal && sbi->s_journal->j_task->io_context) - ctx->journal_ioprio = - sbi->s_journal->j_task->io_context->ioprio; + if (!(ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)) { + if (sbi->s_journal && sbi->s_journal->j_task->io_context) + ctx->journal_ioprio = + sbi->s_journal->j_task->io_context->ioprio; + else + ctx->journal_ioprio = DEFAULT_JOURNAL_IOPRIO; + + } ext4_apply_options(fc, sb); @@ -6395,7 +6499,8 @@ static int __ext4_remount(struct fs_context *fc, struct super_block *sb) if (err) goto restore_opts; } - sbi->s_mount_state = le16_to_cpu(es->s_state); + sbi->s_mount_state = (le16_to_cpu(es->s_state) & + ~EXT4_FC_REPLAY); err = ext4_setup_super(sb, es, 0); if (err) @@ -6674,7 +6779,7 @@ static int ext4_write_info(struct super_block *sb, int type) handle_t *handle; /* Data block + inode block */ - handle = ext4_journal_start(d_inode(sb->s_root), EXT4_HT_QUOTA, 2); + handle = ext4_journal_start_sb(sb, EXT4_HT_QUOTA, 2); if (IS_ERR(handle)) return PTR_ERR(handle); ret = dquot_commit_info(sb, type); |