aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-12 15:03:21 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-12 15:03:21 -0700
commita5adcfcad55d5f034b33f79f1a873229d1e77b24 (patch)
treee9548efcccb8f5ed3e120b0ca36ad04de116cdb7
parentMerge tag 'ceph-for-5.1-rc1' of git://github.com/ceph/ceph-client (diff)
parentjbd2: jbd2_get_transaction does not need to return a value (diff)
downloadlinux-dev-a5adcfcad55d5f034b33f79f1a873229d1e77b24.tar.xz
linux-dev-a5adcfcad55d5f034b33f79f1a873229d1e77b24.zip
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "A large number of bug fixes and cleanups. One new feature to allow users to more easily find the jbd2 journal thread for a particular ext4 file system" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits) jbd2: jbd2_get_transaction does not need to return a value jbd2: fix invalid descriptor block checksum ext4: fix bigalloc cluster freeing when hole punching under load ext4: add sysfs attr /sys/fs/ext4/<disk>/journal_task ext4: Change debugging support help prefix from EXT4 to Ext4 ext4: fix compile error when using BUFFER_TRACE jbd2: fix compile warning when using JBUFFER_TRACE ext4: fix some error pointer dereferences ext4: annotate more implicit fall throughs ext4: annotate implicit fall throughs ext4: don't update s_rev_level if not required jbd2: fold jbd2_superblock_csum_{verify,set} into their callers jbd2: fix race when writing superblock ext4: fix crash during online resizing ext4: disallow files with EXT4_JOURNAL_DATA_FL from EXT4_IOC_SWAP_BOOT ext4: add mask of ext4 flags to swap ext4: update quota information while swapping boot loader inode ext4: cleanup pagecache before swap i_data ext4: fix check of inode in swap_inode_boot_loader ext4: unlock unused_pages timely when doing writeback ...
-rw-r--r--Documentation/ABI/testing/sysfs-fs-ext47
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/ext4.h9
-rw-r--r--fs/ext4/extents.c29
-rw-r--r--fs/ext4/hash.c2
-rw-r--r--fs/ext4/indirect.c6
-rw-r--r--fs/ext4/inode.c21
-rw-r--r--fs/ext4/ioctl.c101
-rw-r--r--fs/ext4/mballoc.c7
-rw-r--r--fs/ext4/page-io.c4
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c1
-rw-r--r--fs/ext4/sysfs.c13
-rw-r--r--fs/ext4/xattr.c3
-rw-r--r--fs/jbd2/checkpoint.c17
-rw-r--r--fs/jbd2/commit.c6
-rw-r--r--fs/jbd2/journal.c90
-rw-r--r--fs/jbd2/transaction.c83
18 files changed, 257 insertions, 147 deletions
diff --git a/Documentation/ABI/testing/sysfs-fs-ext4 b/Documentation/ABI/testing/sysfs-fs-ext4
index c631253cf85c..78604db56279 100644
--- a/Documentation/ABI/testing/sysfs-fs-ext4
+++ b/Documentation/ABI/testing/sysfs-fs-ext4
@@ -109,3 +109,10 @@ Description:
write operation (since a 4k random write might turn
into a much larger write due to the zeroout
operation).
+
+What: /sys/fs/ext4/<disk>/journal_task
+Date: February 2019
+Contact: "Theodore Ts'o" <tytso@mit.edu>
+Description:
+ This file is read-only and shows the pid of journal thread in
+ current pid-namespace or 0 if task is unreachable.
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index 031e5a82d556..06f77ca7f36e 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -97,7 +97,7 @@ config EXT4_FS_SECURITY
extended attributes for file security labels, say N.
config EXT4_DEBUG
- bool "EXT4 debugging support"
+ bool "Ext4 debugging support"
depends on EXT4_FS
help
Enables run-time debugging support for the ext4 filesystem.
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5012ddb6daf9..82ffdacdc7fa 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -425,6 +425,9 @@ struct flex_groups {
/* Flags that are appropriate for non-directories/regular files. */
#define EXT4_OTHER_FLMASK (EXT4_NODUMP_FL | EXT4_NOATIME_FL)
+/* The only flags that should be swapped */
+#define EXT4_FL_SHOULD_SWAP (EXT4_HUGE_FILE_FL | EXT4_EXTENTS_FL)
+
/* Mask out flags that are inappropriate for the given type of inode. */
static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
{
@@ -1661,6 +1664,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
#define EXT4_FEATURE_INCOMPAT_INLINE_DATA 0x8000 /* data in inode */
#define EXT4_FEATURE_INCOMPAT_ENCRYPT 0x10000
+extern void ext4_update_dynamic_rev(struct super_block *sb);
+
#define EXT4_FEATURE_COMPAT_FUNCS(name, flagname) \
static inline bool ext4_has_feature_##name(struct super_block *sb) \
{ \
@@ -1669,6 +1674,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \
} \
static inline void ext4_set_feature_##name(struct super_block *sb) \
{ \
+ ext4_update_dynamic_rev(sb); \
EXT4_SB(sb)->s_es->s_feature_compat |= \
cpu_to_le32(EXT4_FEATURE_COMPAT_##flagname); \
} \
@@ -1686,6 +1692,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \
} \
static inline void ext4_set_feature_##name(struct super_block *sb) \
{ \
+ ext4_update_dynamic_rev(sb); \
EXT4_SB(sb)->s_es->s_feature_ro_compat |= \
cpu_to_le32(EXT4_FEATURE_RO_COMPAT_##flagname); \
} \
@@ -1703,6 +1710,7 @@ static inline bool ext4_has_feature_##name(struct super_block *sb) \
} \
static inline void ext4_set_feature_##name(struct super_block *sb) \
{ \
+ ext4_update_dynamic_rev(sb); \
EXT4_SB(sb)->s_es->s_feature_incompat |= \
cpu_to_le32(EXT4_FEATURE_INCOMPAT_##flagname); \
} \
@@ -2666,7 +2674,6 @@ do { \
#endif
-extern void ext4_update_dynamic_rev(struct super_block *sb);
extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb,
__u32 compat);
extern int ext4_update_rocompat_feature(handle_t *handle,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 79d986dbf5af..0f89f5190cd7 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2956,14 +2956,17 @@ again:
if (err < 0)
goto out;
- } else if (sbi->s_cluster_ratio > 1 && end >= ex_end) {
+ } else if (sbi->s_cluster_ratio > 1 && end >= ex_end &&
+ partial.state == initial) {
/*
- * If there's an extent to the right its first cluster
- * contains the immediate right boundary of the
- * truncated/punched region. Set partial_cluster to
- * its negative value so it won't be freed if shared
- * with the current extent. The end < ee_block case
- * is handled in ext4_ext_rm_leaf().
+ * If we're punching, there's an extent to the right.
+ * If the partial cluster hasn't been set, set it to
+ * that extent's first cluster and its state to nofree
+ * so it won't be freed should it contain blocks to be
+ * removed. If it's already set (tofree/nofree), we're
+ * retrying and keep the original partial cluster info
+ * so a cluster marked tofree as a result of earlier
+ * extent removal is not lost.
*/
lblk = ex_end + 1;
err = ext4_ext_search_right(inode, path, &lblk, &pblk,
@@ -4048,18 +4051,8 @@ out:
} else
allocated = ret;
map->m_flags |= EXT4_MAP_NEW;
- /*
- * if we allocated more blocks than requested
- * we need to make sure we unmap the extra block
- * allocated. The actual needed block will get
- * unmapped later when we find the buffer_head marked
- * new.
- */
- if (allocated > map->m_len) {
- clean_bdev_aliases(inode->i_sb->s_bdev, newblock + map->m_len,
- allocated - map->m_len);
+ if (allocated > map->m_len)
allocated = map->m_len;
- }
map->m_len = allocated;
map_out:
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index e22dcfab308b..46b24da33a28 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -231,6 +231,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
break;
case DX_HASH_HALF_MD4_UNSIGNED:
str2hashbuf = str2hashbuf_unsigned;
+ /* fall through */
case DX_HASH_HALF_MD4:
p = name;
while (len > 0) {
@@ -244,6 +245,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
break;
case DX_HASH_TEA_UNSIGNED:
str2hashbuf = str2hashbuf_unsigned;
+ /* fall through */
case DX_HASH_TEA:
p = name;
while (len > 0) {
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index bf7fa1507e81..c2225f0d31b5 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -1183,18 +1183,21 @@ do_indirects:
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0;
}
+ /* fall through */
case EXT4_IND_BLOCK:
nr = i_data[EXT4_DIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0;
}
+ /* fall through */
case EXT4_DIND_BLOCK:
nr = i_data[EXT4_TIND_BLOCK];
if (nr) {
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0;
}
+ /* fall through */
case EXT4_TIND_BLOCK:
;
}
@@ -1433,6 +1436,7 @@ do_indirects:
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
i_data[EXT4_IND_BLOCK] = 0;
}
+ /* fall through */
case EXT4_IND_BLOCK:
if (++n >= n2)
return 0;
@@ -1441,6 +1445,7 @@ do_indirects:
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
i_data[EXT4_DIND_BLOCK] = 0;
}
+ /* fall through */
case EXT4_DIND_BLOCK:
if (++n >= n2)
return 0;
@@ -1449,6 +1454,7 @@ do_indirects:
ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
i_data[EXT4_TIND_BLOCK] = 0;
}
+ /* fall through */
case EXT4_TIND_BLOCK:
;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4356ef6d728e..b54b261ded36 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -391,7 +391,7 @@ void ext4_da_update_reserve_space(struct inode *inode,
* inode's preallocations.
*/
if ((ei->i_reserved_data_blocks == 0) &&
- (atomic_read(&inode->i_writecount) == 0))
+ !inode_is_open_for_write(inode))
ext4_discard_preallocations(inode);
}
@@ -678,8 +678,6 @@ found:
if (flags & EXT4_GET_BLOCKS_ZERO &&
map->m_flags & EXT4_MAP_MAPPED &&
map->m_flags & EXT4_MAP_NEW) {
- clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
- map->m_len);
ret = ext4_issue_zeroout(inode, map->m_lblk,
map->m_pblk, map->m_len);
if (ret) {
@@ -1194,7 +1192,6 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
if (err)
break;
if (buffer_new(bh)) {
- clean_bdev_bh_alias(bh);
if (PageUptodate(page)) {
clear_buffer_new(bh);
set_buffer_uptodate(bh);
@@ -2489,10 +2486,6 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
}
BUG_ON(map->m_len == 0);
- if (map->m_flags & EXT4_MAP_NEW) {
- clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk,
- map->m_len);
- }
return 0;
}
@@ -2835,12 +2828,12 @@ retry:
goto unplug;
}
ret = mpage_prepare_extent_to_map(&mpd);
+ /* Unlock pages we didn't use */
+ mpage_release_unused_pages(&mpd, false);
/* Submit prepared bio */
ext4_io_submit(&mpd.io_submit);
ext4_put_io_end_defer(mpd.io_submit.io_end);
mpd.io_submit.io_end = NULL;
- /* Unlock pages we didn't use */
- mpage_release_unused_pages(&mpd, false);
if (ret < 0)
goto unplug;
@@ -2908,10 +2901,11 @@ retry:
handle = NULL;
mpd.do_map = 0;
}
- /* Submit prepared bio */
- ext4_io_submit(&mpd.io_submit);
/* Unlock pages we didn't use */
mpage_release_unused_pages(&mpd, give_up_on_write);
+ /* Submit prepared bio */
+ ext4_io_submit(&mpd.io_submit);
+
/*
* Drop our io_end reference we got from init. We have
* to be careful and use deferred io_end finishing if
@@ -5349,7 +5343,6 @@ static int ext4_do_update_inode(handle_t *handle,
err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
if (err)
goto out_brelse;
- ext4_update_dynamic_rev(sb);
ext4_set_feature_large_file(sb);
ext4_handle_sync(handle);
err = ext4_handle_dirty_super(handle, sb);
@@ -6000,7 +5993,7 @@ int ext4_expand_extra_isize(struct inode *inode,
ext4_write_lock_xattr(inode, &no_expand);
- BUFFER_TRACE(iloc.bh, "get_write_access");
+ BUFFER_TRACE(iloc->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, iloc->bh);
if (error) {
brelse(iloc->bh);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index d26bcac291bb..3c4f8bb59f8a 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -63,18 +63,20 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
loff_t isize;
struct ext4_inode_info *ei1;
struct ext4_inode_info *ei2;
+ unsigned long tmp;
ei1 = EXT4_I(inode1);
ei2 = EXT4_I(inode2);
swap(inode1->i_version, inode2->i_version);
- swap(inode1->i_blocks, inode2->i_blocks);
- swap(inode1->i_bytes, inode2->i_bytes);
swap(inode1->i_atime, inode2->i_atime);
swap(inode1->i_mtime, inode2->i_mtime);
memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
- swap(ei1->i_flags, ei2->i_flags);
+ tmp = ei1->i_flags & EXT4_FL_SHOULD_SWAP;
+ ei1->i_flags = (ei2->i_flags & EXT4_FL_SHOULD_SWAP) |
+ (ei1->i_flags & ~EXT4_FL_SHOULD_SWAP);
+ ei2->i_flags = tmp | (ei2->i_flags & ~EXT4_FL_SHOULD_SWAP);
swap(ei1->i_disksize, ei2->i_disksize);
ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
@@ -115,28 +117,42 @@ static long swap_inode_boot_loader(struct super_block *sb,
int err;
struct inode *inode_bl;
struct ext4_inode_info *ei_bl;
-
- if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode) ||
- IS_SWAPFILE(inode) || IS_ENCRYPTED(inode) ||
- ext4_has_inline_data(inode))
- return -EINVAL;
-
- if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
- !inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN))
- return -EPERM;
+ qsize_t size, size_bl, diff;
+ blkcnt_t blocks;
+ unsigned short bytes;
inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO, EXT4_IGET_SPECIAL);
if (IS_ERR(inode_bl))
return PTR_ERR(inode_bl);
ei_bl = EXT4_I(inode_bl);
- filemap_flush(inode->i_mapping);
- filemap_flush(inode_bl->i_mapping);
-
/* Protect orig inodes against a truncate and make sure,
* that only 1 swap_inode_boot_loader is running. */
lock_two_nondirectories(inode, inode_bl);
+ if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode) ||
+ IS_SWAPFILE(inode) || IS_ENCRYPTED(inode) ||
+ (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL) ||
+ ext4_has_inline_data(inode)) {
+ err = -EINVAL;
+ goto journal_err_out;
+ }
+
+ if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
+ !inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) {
+ err = -EPERM;
+ goto journal_err_out;
+ }
+
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto err_out;
+
+ err = filemap_write_and_wait(inode_bl->i_mapping);
+ if (err)
+ goto err_out;
+
/* Wait for all existing dio workers */
inode_dio_wait(inode);
inode_dio_wait(inode_bl);
@@ -147,7 +163,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
if (IS_ERR(handle)) {
err = -EINVAL;
- goto journal_err_out;
+ goto err_out;
}
/* Protect extent tree against block allocations via delalloc */
@@ -170,6 +186,13 @@ static long swap_inode_boot_loader(struct super_block *sb,
memset(ei_bl->i_data, 0, sizeof(ei_bl->i_data));
}
+ err = dquot_initialize(inode);
+ if (err)
+ goto err_out1;
+
+ size = (qsize_t)(inode->i_blocks) * (1 << 9) + inode->i_bytes;
+ size_bl = (qsize_t)(inode_bl->i_blocks) * (1 << 9) + inode_bl->i_bytes;
+ diff = size - size_bl;
swap_inode_data(inode, inode_bl);
inode->i_ctime = inode_bl->i_ctime = current_time(inode);
@@ -183,27 +206,51 @@ static long swap_inode_boot_loader(struct super_block *sb,
err = ext4_mark_inode_dirty(handle, inode);
if (err < 0) {
+ /* No need to update quota information. */
ext4_warning(inode->i_sb,
"couldn't mark inode #%lu dirty (err %d)",
inode->i_ino, err);
/* Revert all changes: */
swap_inode_data(inode, inode_bl);
ext4_mark_inode_dirty(handle, inode);
- } else {
- err = ext4_mark_inode_dirty(handle, inode_bl);
- if (err < 0) {
- ext4_warning(inode_bl->i_sb,
- "couldn't mark inode #%lu dirty (err %d)",
- inode_bl->i_ino, err);
- /* Revert all changes: */
- swap_inode_data(inode, inode_bl);
- ext4_mark_inode_dirty(handle, inode);
- ext4_mark_inode_dirty(handle, inode_bl);
- }
+ goto err_out1;
+ }
+
+ blocks = inode_bl->i_blocks;
+ bytes = inode_bl->i_bytes;
+ inode_bl->i_blocks = inode->i_blocks;
+ inode_bl->i_bytes = inode->i_bytes;
+ err = ext4_mark_inode_dirty(handle, inode_bl);
+ if (err < 0) {
+ /* No need to update quota information. */
+ ext4_warning(inode_bl->i_sb,
+ "couldn't mark inode #%lu dirty (err %d)",
+ inode_bl->i_ino, err);
+ goto revert;
+ }
+
+ /* Bootloader inode should not be counted into quota information. */
+ if (diff > 0)
+ dquot_free_space(inode, diff);
+ else
+ err = dquot_alloc_space(inode, -1 * diff);
+
+ if (err < 0) {
+revert:
+ /* Revert all changes: */
+ inode_bl->i_blocks = blocks;
+ inode_bl->i_bytes = bytes;
+ swap_inode_data(inode, inode_bl);
+ ext4_mark_inode_dirty(handle, inode);
+ ext4_mark_inode_dirty(handle, inode_bl);
}
+
+err_out1:
ext4_journal_stop(handle);
ext4_double_up_write_data_sem(inode, inode_bl);
+err_out:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
journal_err_out:
unlock_two_nondirectories(inode, inode_bl);
iput(inode_bl);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index e2248083cdca..6fb76d408093 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -4176,9 +4176,8 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac)
isize = (i_size_read(ac->ac_inode) + ac->ac_sb->s_blocksize - 1)
>> bsbits;
- if ((size == isize) &&
- !ext4_fs_is_busy(sbi) &&
- (atomic_read(&ac->ac_inode->i_writecount) == 0)) {
+ if ((size == isize) && !ext4_fs_is_busy(sbi) &&
+ !inode_is_open_for_write(ac->ac_inode)) {
ac->ac_flags |= EXT4_MB_HINT_NOPREALLOC;
return;
}
@@ -4258,7 +4257,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
(unsigned) ar->goal, ac->ac_flags, ac->ac_2order,
(unsigned) ar->lleft, (unsigned) ar->pleft,
(unsigned) ar->lright, (unsigned) ar->pright,
- atomic_read(&ar->inode->i_writecount) ? "" : "non-");
+ inode_is_open_for_write(ar->inode) ? "" : "non-");
return 0;
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 6f5305e9a6ac..3e9298e6a705 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -468,10 +468,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
ext4_io_submit(io);
continue;
}
- if (buffer_new(bh)) {
+ if (buffer_new(bh))
clear_buffer_new(bh);
- clean_bdev_bh_alias(bh);
- }
set_buffer_async_write(bh);
nr_to_submit++;
} while ((bh = bh->b_this_page) != head);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 48421de803b7..3d9b18505c0c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1960,7 +1960,8 @@ retry:
le16_to_cpu(es->s_reserved_gdt_blocks);
n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
n_blocks_count = (ext4_fsblk_t)n_group *
- EXT4_BLOCKS_PER_GROUP(sb);
+ EXT4_BLOCKS_PER_GROUP(sb) +
+ le32_to_cpu(es->s_first_data_block);
n_group--; /* set to last group number */
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 60da0a6e4d86..f5b828bf1299 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2249,7 +2249,6 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
le16_add_cpu(&es->s_mnt_count, 1);
ext4_update_tstamp(es, s_mtime);
- ext4_update_dynamic_rev(sb);
if (sbi->s_journal)
ext4_set_feature_journal_needs_recovery(sb);
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 5e4e78fc0b3a..616c075da062 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -30,6 +30,7 @@ typedef enum {
attr_feature,
attr_pointer_ui,
attr_pointer_atomic,
+ attr_journal_task,
} attr_id_t;
typedef enum {
@@ -125,6 +126,14 @@ static ssize_t trigger_test_error(struct ext4_sb_info *sbi,
return count;
}
+static ssize_t journal_task_show(struct ext4_sb_info *sbi, char *buf)
+{
+ if (!sbi->s_journal)
+ return snprintf(buf, PAGE_SIZE, "<none>\n");
+ return snprintf(buf, PAGE_SIZE, "%d\n",
+ task_pid_vnr(sbi->s_journal->j_task));
+}
+
#define EXT4_ATTR(_name,_mode,_id) \
static struct ext4_attr ext4_attr_##_name = { \
.attr = {.name = __stringify(_name), .mode = _mode }, \
@@ -188,6 +197,7 @@ EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
EXT4_ATTR(first_error_time, 0444, first_error_time);
EXT4_ATTR(last_error_time, 0444, last_error_time);
+EXT4_ATTR(journal_task, 0444, journal_task);
static unsigned int old_bump_val = 128;
EXT4_ATTR_PTR(max_writeback_mb_bump, 0444, pointer_ui, &old_bump_val);
@@ -217,6 +227,7 @@ static struct attribute *ext4_attrs[] = {
ATTR_LIST(errors_count),
ATTR_LIST(first_error_time),
ATTR_LIST(last_error_time),
+ ATTR_LIST(journal_task),
NULL,
};
@@ -304,6 +315,8 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
return print_tstamp(buf, sbi->s_es, s_first_error_time);
case attr_last_error_time:
return print_tstamp(buf, sbi->s_es, s_last_error_time);
+ case attr_journal_task:
+ return journal_task_show(sbi, buf);
}
return 0;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 86ed9c686249..dc82e7757f67 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -829,6 +829,7 @@ int ext4_get_inode_usage(struct inode *inode, qsize_t *usage)
bh = ext4_sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl, REQ_PRIO);
if (IS_ERR(bh)) {
ret = PTR_ERR(bh);
+ bh = NULL;
goto out;
}
@@ -2903,6 +2904,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
if (error == -EIO)
EXT4_ERROR_INODE(inode, "block %llu read error",
EXT4_I(inode)->i_file_acl);
+ bh = NULL;
goto cleanup;
}
error = ext4_xattr_check_block(inode, bh);
@@ -3059,6 +3061,7 @@ ext4_xattr_block_cache_find(struct inode *inode,
if (IS_ERR(bh)) {
if (PTR_ERR(bh) == -ENOMEM)
return NULL;
+ bh = NULL;
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long)ce->e_value);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 26f8d7e46462..02e0b79753e7 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -113,7 +113,7 @@ void __jbd2_log_wait_for_space(journal_t *journal)
nblocks = jbd2_space_needed(journal);
while (jbd2_log_space_left(journal) < nblocks) {
write_unlock(&journal->j_state_lock);
- mutex_lock(&journal->j_checkpoint_mutex);
+ mutex_lock_io(&journal->j_checkpoint_mutex);
/*
* Test again, another process may have checkpointed while we
@@ -276,9 +276,22 @@ restart:
"JBD2: %s: Waiting for Godot: block %llu\n",
journal->j_devname, (unsigned long long) bh->b_blocknr);
+ if (batch_count)
+ __flush_batch(journal, &batch_count);
jbd2_log_start_commit(journal, tid);
+ /*
+ * jbd2_journal_commit_transaction() may want
+ * to take the checkpoint_mutex if JBD2_FLUSHED
+ * is set, jbd2_update_log_tail() called by
+ * jbd2_journal_commit_transaction() may also take
+ * checkpoint_mutex. So we need to temporarily
+ * drop it.
+ */
+ mutex_unlock(&journal->j_checkpoint_mutex);
jbd2_log_wait_commit(journal, tid);
- goto retry;
+ mutex_lock_io(&journal->j_checkpoint_mutex);
+ spin_lock(&journal->j_list_lock);
+ goto restart;
}
if (!buffer_dirty(bh)) {
if (unlikely(buffer_write_io_error(bh)) && !result)
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 2eb55c3361a8..efd0ce9489ae 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -694,9 +694,11 @@ void jbd2_journal_commit_transaction(journal_t *journal)
the last tag we set up. */
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
-
- jbd2_descriptor_block_csum_set(journal, descriptor);
start_journal_io:
+ if (descriptor)
+ jbd2_descriptor_block_csum_set(journal,
+ descriptor);
+
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
/*
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8ef6b6daaa7a..382c030cc78b 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -142,22 +142,6 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
return cpu_to_be32(csum);
}
-static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
-{
- if (!jbd2_journal_has_csum_v2or3(j))
- return 1;
-
- return sb->s_checksum == jbd2_superblock_csum(j, sb);
-}
-
-static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
-{
- if (!jbd2_journal_has_csum_v2or3(j))
- return;
-
- sb->s_checksum = jbd2_superblock_csum(j, sb);
-}
-
/*
* Helper function used to manage commit timeouts
*/
@@ -1356,6 +1340,10 @@ static int journal_reset(journal_t *journal)
return jbd2_journal_start_thread(journal);
}
+/*
+ * This function expects that the caller will have locked the journal
+ * buffer head, and will return with it unlocked
+ */
static int jbd2_write_superblock(journal_t *journal, int write_flags)
{
struct buffer_head *bh = journal->j_sb_buffer;
@@ -1365,7 +1353,6 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
trace_jbd2_write_superblock(journal, write_flags);
if (!(journal->j_flags & JBD2_BARRIER))
write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
- lock_buffer(bh);
if (buffer_write_io_error(bh)) {
/*
* Oh, dear. A previous attempt to write the journal
@@ -1381,7 +1368,8 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags)
clear_buffer_write_io_error(bh);
set_buffer_uptodate(bh);
}
- jbd2_superblock_csum_set(journal, sb);
+ if (jbd2_journal_has_csum_v2or3(journal))
+ sb->s_checksum = jbd2_superblock_csum(journal, sb);
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(REQ_OP_WRITE, write_flags, bh);
@@ -1424,6 +1412,7 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
tail_block, tail_tid);
+ lock_buffer(journal->j_sb_buffer);
sb->s_sequence = cpu_to_be32(tail_tid);
sb->s_start = cpu_to_be32(tail_block);
@@ -1454,18 +1443,17 @@ static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
journal_superblock_t *sb = journal->j_superblock;
BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
- read_lock(&journal->j_state_lock);
- /* Is it already empty? */
- if (sb->s_start == 0) {
- read_unlock(&journal->j_state_lock);
+ lock_buffer(journal->j_sb_buffer);
+ if (sb->s_start == 0) { /* Is it already empty? */
+ unlock_buffer(journal->j_sb_buffer);
return;
}
+
jbd_debug(1, "JBD2: Marking journal as empty (seq %d)\n",
journal->j_tail_sequence);
sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
sb->s_start = cpu_to_be32(0);
- read_unlock(&journal->j_state_lock);
jbd2_write_superblock(journal, write_op);
@@ -1488,9 +1476,8 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
journal_superblock_t *sb = journal->j_superblock;
int errcode;
- read_lock(&journal->j_state_lock);
+ lock_buffer(journal->j_sb_buffer);
errcode = journal->j_errno;
- read_unlock(&journal->j_state_lock);
if (errcode == -ESHUTDOWN)
errcode = 0;
jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
@@ -1595,17 +1582,18 @@ static int journal_get_superblock(journal_t *journal)
}
}
- /* Check superblock checksum */
- if (!jbd2_superblock_csum_verify(journal, sb)) {
- printk(KERN_ERR "JBD2: journal checksum error\n");
- err = -EFSBADCRC;
- goto out;
- }
+ if (jbd2_journal_has_csum_v2or3(journal)) {
+ /* Check superblock checksum */
+ if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
+ printk(KERN_ERR "JBD2: journal checksum error\n");
+ err = -EFSBADCRC;
+ goto out;
+ }
- /* Precompute checksum seed for all metadata */
- if (jbd2_journal_has_csum_v2or3(journal))
+ /* Precompute checksum seed for all metadata */
journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
sizeof(sb->s_uuid));
+ }
set_buffer_verified(bh);
@@ -1894,28 +1882,27 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
sb = journal->j_superblock;
+ /* Load the checksum driver if necessary */
+ if ((journal->j_chksum_driver == NULL) &&
+ INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
+ journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+ if (IS_ERR(journal->j_chksum_driver)) {
+ printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
+ journal->j_chksum_driver = NULL;
+ return 0;
+ }
+ /* Precompute checksum seed for all metadata */
+ journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
+ sizeof(sb->s_uuid));
+ }
+
+ lock_buffer(journal->j_sb_buffer);
+
/* If enabling v3 checksums, update superblock */
if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
sb->s_feature_compat &=
~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
-
- /* Load the checksum driver */
- if (journal->j_chksum_driver == NULL) {
- journal->j_chksum_driver = crypto_alloc_shash("crc32c",
- 0, 0);
- if (IS_ERR(journal->j_chksum_driver)) {
- printk(KERN_ERR "JBD2: Cannot load crc32c "
- "driver.\n");
- journal->j_chksum_driver = NULL;
- return 0;
- }
-
- /* Precompute checksum seed for all metadata */
- journal->j_csum_seed = jbd2_chksum(journal, ~0,
- sb->s_uuid,
- sizeof(sb->s_uuid));
- }
}
/* If enabling v1 checksums, downgrade superblock */
@@ -1927,6 +1914,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
sb->s_feature_compat |= cpu_to_be32(compat);
sb->s_feature_ro_compat |= cpu_to_be32(ro);
sb->s_feature_incompat |= cpu_to_be32(incompat);
+ unlock_buffer(journal->j_sb_buffer);
return 1;
#undef COMPAT_FEATURE_ON
@@ -2067,7 +2055,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
err = jbd2_journal_skip_recovery(journal);
if (write) {
/* Lock to make assertions happy... */
- mutex_lock(&journal->j_checkpoint_mutex);
+ mutex_lock_io(&journal->j_checkpoint_mutex);
jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index cc35537232f2..f940d31c2adc 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -63,7 +63,7 @@ void jbd2_journal_free_transaction(transaction_t *transaction)
/*
* jbd2_get_transaction: obtain a new transaction_t object.
*
- * Simply allocate and initialise a new transaction. Create it in
+ * Simply initialise a new transaction. Initialize it in
* RUNNING state and add it to the current journal (which should not
* have an existing running transaction: we only make a new transaction
* once we have started to commit the old one).
@@ -75,8 +75,8 @@ void jbd2_journal_free_transaction(transaction_t *transaction)
*
*/
-static transaction_t *
-jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
+static void jbd2_get_transaction(journal_t *journal,
+ transaction_t *transaction)
{
transaction->t_journal = journal;
transaction->t_state = T_RUNNING;
@@ -100,8 +100,6 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
transaction->t_max_wait = 0;
transaction->t_start = jiffies;
transaction->t_requested = 0;
-
- return transaction;
}
/*
@@ -1252,11 +1250,12 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
struct journal_head *jh;
char *committed_data = NULL;
- JBUFFER_TRACE(jh, "entry");
if (jbd2_write_access_granted(handle, bh, true))
return 0;
jh = jbd2_journal_add_journal_head(bh);
+ JBUFFER_TRACE(jh, "entry");
+
/*
* Do this first --- it can drop the journal lock, so we want to
* make sure that obtaining the committed_data is done
@@ -1367,15 +1366,17 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
if (is_handle_aborted(handle))
return -EROFS;
- if (!buffer_jbd(bh)) {
- ret = -EUCLEAN;
- goto out;
- }
+ if (!buffer_jbd(bh))
+ return -EUCLEAN;
+
/*
* We don't grab jh reference here since the buffer must be part
* of the running transaction.
*/
jh = bh2jh(bh);
+ jbd_debug(5, "journal_head %p\n", jh);
+ JBUFFER_TRACE(jh, "entry");
+
/*
* This and the following assertions are unreliable since we may see jh
* in inconsistent state unless we grab bh_state lock. But this is
@@ -1409,9 +1410,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
}
journal = transaction->t_journal;
- jbd_debug(5, "journal_head %p\n", jh);
- JBUFFER_TRACE(jh, "entry");
-
jbd_lock_bh_state(bh);
if (jh->b_modified == 0) {
@@ -1597,9 +1595,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
__jbd2_journal_unfile_buffer(jh);
if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(bh);
- __bforget(bh);
- goto drop;
+ goto not_jbd;
}
}
spin_unlock(&journal->j_list_lock);
@@ -1609,14 +1605,21 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
/* However, if the buffer is still owned by a prior
* (committing) transaction, we can't drop it yet... */
JBUFFER_TRACE(jh, "belongs to older transaction");
- /* ... but we CAN drop it from the new transaction if we
- * have also modified it since the original commit. */
+ /* ... but we CAN drop it from the new transaction through
+ * marking the buffer as freed and set j_next_transaction to
+ * the new transaction, so that not only the commit code
+ * knows it should clear dirty bits when it is done with the
+ * buffer, but also the buffer can be checkpointed only
+ * after the new transaction commits. */
- if (jh->b_next_transaction) {
- J_ASSERT(jh->b_next_transaction == transaction);
+ set_buffer_freed(bh);
+
+ if (!jh->b_next_transaction) {
spin_lock(&journal->j_list_lock);
- jh->b_next_transaction = NULL;
+ jh->b_next_transaction = transaction;
spin_unlock(&journal->j_list_lock);
+ } else {
+ J_ASSERT(jh->b_next_transaction == transaction);
/*
* only drop a reference if this transaction modified
@@ -1625,9 +1628,40 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
if (was_modified)
drop_reserve = 1;
}
+ } else {
+ /*
+ * Finally, if the buffer is not belongs to any
+ * transaction, we can just drop it now if it has no
+ * checkpoint.
+ */
+ spin_lock(&journal->j_list_lock);
+ if (!jh->b_cp_transaction) {
+ JBUFFER_TRACE(jh, "belongs to none transaction");
+ spin_unlock(&journal->j_list_lock);
+ goto not_jbd;
+ }
+
+ /*
+ * Otherwise, if the buffer has been written to disk,
+ * it is safe to remove the checkpoint and drop it.
+ */
+ if (!buffer_dirty(bh)) {
+ __jbd2_journal_remove_checkpoint(jh);
+ spin_unlock(&journal->j_list_lock);
+ goto not_jbd;
+ }
+
+ /*
+ * The buffer is still not written to disk, we should
+ * attach this buffer to current transaction so that the
+ * buffer can be checkpointed only after the current
+ * transaction commits.
+ */
+ clear_buffer_dirty(bh);
+ __jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
+ spin_unlock(&journal->j_list_lock);
}
-not_jbd:
jbd_unlock_bh_state(bh);
__brelse(bh);
drop:
@@ -1636,6 +1670,11 @@ drop:
handle->h_buffer_credits++;
}
return err;
+
+not_jbd:
+ jbd_unlock_bh_state(bh);
+ __bforget(bh);
+ goto drop;
}
/**