aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4/super.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/super.c')
-rw-r--r--fs/ext4/super.c251
1 files changed, 59 insertions, 192 deletions
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index dfa09a277b56..136940af00b8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -80,7 +80,6 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data);
static inline int ext2_feature_set_ok(struct super_block *sb);
static inline int ext3_feature_set_ok(struct super_block *sb);
-static int ext4_feature_set_ok(struct super_block *sb, int readonly);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
static void ext4_clear_request_list(void);
@@ -90,12 +89,9 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
/*
* Lock ordering
*
- * Note the difference between i_mmap_sem (EXT4_I(inode)->i_mmap_sem) and
- * i_mmap_rwsem (inode->i_mmap_rwsem)!
- *
* page fault path:
- * mmap_lock -> sb_start_pagefault -> i_mmap_sem (r) -> transaction start ->
- * page lock -> i_data_sem (rw)
+ * mmap_lock -> sb_start_pagefault -> invalidate_lock (r) -> transaction start
+ * -> page lock -> i_data_sem (rw)
*
* buffered write path:
* sb_start_write -> i_mutex -> mmap_lock
@@ -103,8 +99,9 @@ static struct inode *ext4_get_journal_inode(struct super_block *sb,
* i_data_sem (rw)
*
* truncate:
- * sb_start_write -> i_mutex -> i_mmap_sem (w) -> i_mmap_rwsem (w) -> page lock
- * sb_start_write -> i_mutex -> i_mmap_sem (w) -> transaction start ->
+ * sb_start_write -> i_mutex -> invalidate_lock (w) -> i_mmap_rwsem (w) ->
+ * page lock
+ * sb_start_write -> i_mutex -> invalidate_lock (w) -> transaction start ->
* i_data_sem (rw)
*
* direct IO:
@@ -1175,6 +1172,7 @@ static void ext4_put_super(struct super_block *sb)
flush_work(&sbi->s_error_work);
destroy_workqueue(sbi->rsv_conversion_wq);
+ ext4_release_orphan_info(sb);
/*
* Unregister sysfs before destroying jbd2 journal.
@@ -1200,6 +1198,7 @@ static void ext4_put_super(struct super_block *sb)
if (!sb_rdonly(sb) && !aborted) {
ext4_clear_feature_journal_needs_recovery(sb);
+ ext4_clear_feature_orphan_present(sb);
es->s_state = cpu_to_le16(sbi->s_mount_state);
}
if (!sb_rdonly(sb))
@@ -1360,7 +1359,6 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
- init_rwsem(&ei->i_mmap_sem);
inode_init_once(&ei->vfs_inode);
ext4_fc_init_inode(&ei->vfs_inode);
}
@@ -1585,14 +1583,12 @@ static int ext4_mark_dquot_dirty(struct dquot *dquot);
static int ext4_write_info(struct super_block *sb, int type);
static int ext4_quota_on(struct super_block *sb, int type, int format_id,
const struct path *path);
-static int ext4_quota_on_mount(struct super_block *sb, int type);
static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
size_t len, loff_t off);
static ssize_t ext4_quota_write(struct super_block *sb, int type,
const char *data, size_t len, loff_t off);
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags);
-static int ext4_enable_quotas(struct super_block *sb);
static struct dquot **ext4_get_dquots(struct inode *inode)
{
@@ -2687,8 +2683,11 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
ext4_update_tstamp(es, s_mtime);
- if (sbi->s_journal)
+ if (sbi->s_journal) {
ext4_set_feature_journal_needs_recovery(sb);
+ if (ext4_has_feature_orphan_file(sb))
+ ext4_set_feature_orphan_present(sb);
+ }
err = ext4_commit_super(sb);
done:
@@ -2970,169 +2969,6 @@ static int ext4_check_descriptors(struct super_block *sb,
return 1;
}
-/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
- * the superblock) which were deleted from all directories, but held open by
- * a process at the time of a crash. We walk the list and try to delete these
- * inodes at recovery time (only with a read-write filesystem).
- *
- * In order to keep the orphan inode chain consistent during traversal (in
- * case of crash during recovery), we link each inode into the superblock
- * orphan list_head and handle it the same way as an inode deletion during
- * normal operation (which journals the operations for us).
- *
- * We only do an iget() and an iput() on each inode, which is very safe if we
- * accidentally point at an in-use or already deleted inode. The worst that
- * can happen in this case is that we get a "bit already cleared" message from
- * ext4_free_inode(). The only reason we would point at a wrong inode is if
- * e2fsck was run on this filesystem, and it must have already done the orphan
- * inode cleanup for us, so we can safely abort without any further action.
- */
-static void ext4_orphan_cleanup(struct super_block *sb,
- struct ext4_super_block *es)
-{
- unsigned int s_flags = sb->s_flags;
- int ret, nr_orphans = 0, nr_truncates = 0;
-#ifdef CONFIG_QUOTA
- int quota_update = 0;
- int i;
-#endif
- if (!es->s_last_orphan) {
- jbd_debug(4, "no orphan inodes to clean up\n");
- return;
- }
-
- if (bdev_read_only(sb->s_bdev)) {
- ext4_msg(sb, KERN_ERR, "write access "
- "unavailable, skipping orphan cleanup");
- return;
- }
-
- /* Check if feature set would not allow a r/w mount */
- if (!ext4_feature_set_ok(sb, 0)) {
- ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to "
- "unknown ROCOMPAT features");
- return;
- }
-
- if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
- /* don't clear list on RO mount w/ errors */
- if (es->s_last_orphan && !(s_flags & SB_RDONLY)) {
- ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
- "clearing orphan list.\n");
- es->s_last_orphan = 0;
- }
- jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
- return;
- }
-
- if (s_flags & SB_RDONLY) {
- ext4_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
- sb->s_flags &= ~SB_RDONLY;
- }
-#ifdef CONFIG_QUOTA
- /*
- * Turn on quotas which were not enabled for read-only mounts if
- * filesystem has quota feature, so that they are updated correctly.
- */
- if (ext4_has_feature_quota(sb) && (s_flags & SB_RDONLY)) {
- int ret = ext4_enable_quotas(sb);
-
- if (!ret)
- quota_update = 1;
- else
- ext4_msg(sb, KERN_ERR,
- "Cannot turn on quotas: error %d", ret);
- }
-
- /* Turn on journaled quotas used for old sytle */
- for (i = 0; i < EXT4_MAXQUOTAS; i++) {
- if (EXT4_SB(sb)->s_qf_names[i]) {
- int ret = ext4_quota_on_mount(sb, i);
-
- if (!ret)
- quota_update = 1;
- else
- ext4_msg(sb, KERN_ERR,
- "Cannot turn on journaled "
- "quota: type %d: error %d", i, ret);
- }
- }
-#endif
-
- while (es->s_last_orphan) {
- struct inode *inode;
-
- /*
- * We may have encountered an error during cleanup; if
- * so, skip the rest.
- */
- if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
- jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
- es->s_last_orphan = 0;
- break;
- }
-
- inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
- if (IS_ERR(inode)) {
- es->s_last_orphan = 0;
- break;
- }
-
- list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
- dquot_initialize(inode);
- if (inode->i_nlink) {
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG,
- "%s: truncating inode %lu to %lld bytes",
- __func__, inode->i_ino, inode->i_size);
- jbd_debug(2, "truncating inode %lu to %lld bytes\n",
- inode->i_ino, inode->i_size);
- inode_lock(inode);
- truncate_inode_pages(inode->i_mapping, inode->i_size);
- ret = ext4_truncate(inode);
- if (ret) {
- /*
- * We need to clean up the in-core orphan list
- * manually if ext4_truncate() failed to get a
- * transaction handle.
- */
- ext4_orphan_del(NULL, inode);
- ext4_std_error(inode->i_sb, ret);
- }
- inode_unlock(inode);
- nr_truncates++;
- } else {
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG,
- "%s: deleting unreferenced inode %lu",
- __func__, inode->i_ino);
- jbd_debug(2, "deleting unreferenced inode %lu\n",
- inode->i_ino);
- nr_orphans++;
- }
- iput(inode); /* The delete magic happens here! */
- }
-
-#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
-
- if (nr_orphans)
- ext4_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
- PLURAL(nr_orphans));
- if (nr_truncates)
- ext4_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
- PLURAL(nr_truncates));
-#ifdef CONFIG_QUOTA
- /* Turn off quotas if they were enabled for orphan cleanup */
- if (quota_update) {
- for (i = 0; i < EXT4_MAXQUOTAS; i++) {
- if (sb_dqopt(sb)->files[i])
- dquot_quota_off(sb, i);
- }
- }
-#endif
- sb->s_flags = s_flags; /* Restore SB_RDONLY status */
-}
-
/*
* Maximal extent format file size.
* Resulting logical blkno at s_maxbytes must fit in our on-disk
@@ -3312,7 +3148,7 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi)
* Returns 1 if this filesystem can be mounted as requested,
* 0 if it cannot be.
*/
-static int ext4_feature_set_ok(struct super_block *sb, int readonly)
+int ext4_feature_set_ok(struct super_block *sb, int readonly)
{
if (ext4_has_unknown_ext4_incompat_features(sb)) {
ext4_msg(sb, KERN_ERR,
@@ -4014,6 +3850,20 @@ static const char *ext4_quota_mode(struct super_block *sb)
#endif
}
+static void ext4_setup_csum_trigger(struct super_block *sb,
+ enum ext4_journal_trigger_type type,
+ void (*trigger)(
+ struct jbd2_buffer_trigger_type *type,
+ struct buffer_head *bh,
+ void *mapped_data,
+ size_t size))
+{
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+ sbi->s_journal_triggers[type].sb = sb;
+ sbi->s_journal_triggers[type].tr_triggers.t_frozen = trigger;
+}
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
{
struct dax_device *dax_dev = fs_dax_get_by_bdev(sb->s_bdev);
@@ -4112,6 +3962,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
silent = 1;
goto cantfind_ext4;
}
+ ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE,
+ ext4_orphan_file_block_trigger);
/* Load the checksum driver */
sbi->s_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
@@ -4776,6 +4628,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sb->s_root = NULL;
needs_recovery = (es->s_last_orphan != 0 ||
+ ext4_has_feature_orphan_present(sb) ||
ext4_has_feature_journal_needs_recovery(sb));
if (ext4_has_feature_mmp(sb) && !sb_rdonly(sb))
@@ -5032,6 +4885,14 @@ no_journal:
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
+ /*
+ * Update the checksum after updating free space/inode
+ * counters. Otherwise the superblock can have an incorrect
+ * checksum in the buffer cache until it is written out and
+ * e2fsprogs programs trying to open a file system immediately
+ * after it is mounted can fail.
+ */
+ ext4_superblock_csum_set(sb);
if (!err)
err = percpu_counter_init(&sbi->s_dirs_counter,
ext4_count_dirs(sb), GFP_KERNEL);
@@ -5066,12 +4927,15 @@ no_journal:
if (err)
goto failed_mount7;
+ err = ext4_init_orphan_info(sb);
+ if (err)
+ goto failed_mount8;
#ifdef CONFIG_QUOTA
/* Enable quota usage during mount. */
if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) {
err = ext4_enable_quotas(sb);
if (err)
- goto failed_mount8;
+ goto failed_mount9;
}
#endif /* CONFIG_QUOTA */
@@ -5090,7 +4954,7 @@ no_journal:
ext4_msg(sb, KERN_INFO, "recovery complete");
err = ext4_mark_recovery_complete(sb, es);
if (err)
- goto failed_mount8;
+ goto failed_mount9;
}
if (EXT4_SB(sb)->s_journal) {
if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
@@ -5136,6 +5000,8 @@ cantfind_ext4:
ext4_msg(sb, KERN_ERR, "VFS: Can't find ext4 filesystem");
goto failed_mount;
+failed_mount9:
+ ext4_release_orphan_info(sb);
failed_mount8:
ext4_unregister_sysfs(sb);
kobject_put(&sbi->s_kobj);
@@ -5646,8 +5512,15 @@ static int ext4_mark_recovery_complete(struct super_block *sb,
if (err < 0)
goto out;
- if (ext4_has_feature_journal_needs_recovery(sb) && sb_rdonly(sb)) {
+ if (sb_rdonly(sb) && (ext4_has_feature_journal_needs_recovery(sb) ||
+ ext4_has_feature_orphan_present(sb))) {
+ if (!ext4_orphan_file_empty(sb)) {
+ ext4_error(sb, "Orphan file not empty on read-only fs.");
+ err = -EFSCORRUPTED;
+ goto out;
+ }
ext4_clear_feature_journal_needs_recovery(sb);
+ ext4_clear_feature_orphan_present(sb);
ext4_commit_super(sb);
}
out:
@@ -5790,6 +5663,8 @@ static int ext4_freeze(struct super_block *sb)
/* Journal blocked and flushed, clear needs_recovery flag. */
ext4_clear_feature_journal_needs_recovery(sb);
+ if (ext4_orphan_file_empty(sb))
+ ext4_clear_feature_orphan_present(sb);
}
error = ext4_commit_super(sb);
@@ -5812,6 +5687,8 @@ static int ext4_unfreeze(struct super_block *sb)
if (EXT4_SB(sb)->s_journal) {
/* Reset the needs_recovery flag before the fs is unlocked. */
ext4_set_feature_journal_needs_recovery(sb);
+ if (ext4_has_feature_orphan_file(sb))
+ ext4_set_feature_orphan_present(sb);
}
ext4_commit_super(sb);
@@ -6015,7 +5892,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
* around from a previously readonly bdev mount,
* require a full umount/remount for now.
*/
- if (es->s_last_orphan) {
+ if (es->s_last_orphan || !ext4_orphan_file_empty(sb)) {
ext4_msg(sb, KERN_WARNING, "Couldn't "
"remount RDWR because of unprocessed "
"orphan inode list. Please "
@@ -6312,16 +6189,6 @@ static int ext4_write_info(struct super_block *sb, int type)
return ret;
}
-/*
- * Turn on quotas during mount time - we need to find
- * the quota file and such...
- */
-static int ext4_quota_on_mount(struct super_block *sb, int type)
-{
- return dquot_quota_on_mount(sb, get_qf_name(sb, EXT4_SB(sb), type),
- EXT4_SB(sb)->s_jquota_fmt, type);
-}
-
static void lockdep_set_quota_inode(struct inode *inode, int subclass)
{
struct ext4_inode_info *ei = EXT4_I(inode);
@@ -6451,7 +6318,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
}
/* Enable usage tracking for all quota types. */
-static int ext4_enable_quotas(struct super_block *sb)
+int ext4_enable_quotas(struct super_block *sb)
{
int type, err = 0;
unsigned long qf_inums[EXT4_MAXQUOTAS] = {
@@ -6609,7 +6476,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
if (!bh)
goto out;
BUFFER_TRACE(bh, "get write access");
- err = ext4_journal_get_write_access(handle, bh);
+ err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
if (err) {
brelse(bh);
return err;