From 6e06ae88edae77379bef7c0cb7d3c2dd88676867 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sun, 12 Jul 2015 18:11:30 -0400
Subject: jbd2: speedup jbd2_journal_dirty_metadata()

It is often the case that we mark buffer as having dirty metadata when
the buffer is already in that state (frequent for bitmaps, inode table
blocks, superblock). Thus it is unnecessary to contend on grabbing
journal head reference and bh_state lock. Avoid that by checking whether
any modification to the buffer is needed before grabbing any locks or
references.

[ Note: this is a fixed version of commit 2143c1965a761, which was
  reverted in ebeaa8ddb3663b5 due to a false positive triggering of an
  assertion check. -- Ted ]

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/transaction.c | 38 ++++++++++++++++++++++++++++++++------
 1 file changed, 32 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index f3d06174b051..a6eec9747c74 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1280,8 +1280,6 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
 	triggers->t_abort(triggers, jh2bh(jh));
 }
 
-
-
 /**
  * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
@@ -1314,12 +1312,41 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 
 	if (is_handle_aborted(handle))
 		return -EROFS;
-	journal = transaction->t_journal;
-	jh = jbd2_journal_grab_journal_head(bh);
-	if (!jh) {
+	if (!buffer_jbd(bh)) {
 		ret = -EUCLEAN;
 		goto out;
 	}
+	/*
+	 * We don't grab jh reference here since the buffer must be part
+	 * of the running transaction.
+	 */
+	jh = bh2jh(bh);
+	/*
+	 * This and the following assertions are unreliable since we may see jh
+	 * in inconsistent state unless we grab bh_state lock. But this is
+	 * crucial to catch bugs so let's do a reliable check until the
+	 * lockless handling is fully proven.
+	 */
+	if (jh->b_transaction != transaction &&
+	    jh->b_next_transaction != transaction) {
+		jbd_lock_bh_state(bh);
+		J_ASSERT_JH(jh, jh->b_transaction == transaction ||
+				jh->b_next_transaction == transaction);
+		jbd_unlock_bh_state(bh);
+	}
+	if (jh->b_modified == 1) {
+		/* If it's in our transaction it must be in BJ_Metadata list. */
+		if (jh->b_transaction == transaction &&
+		    jh->b_jlist != BJ_Metadata) {
+			jbd_lock_bh_state(bh);
+			J_ASSERT_JH(jh, jh->b_transaction != transaction ||
+					jh->b_jlist == BJ_Metadata);
+			jbd_unlock_bh_state(bh);
+		}
+		goto out;
+	}
+
+	journal = transaction->t_journal;
 	jbd_debug(5, "journal_head %p\n", jh);
 	JBUFFER_TRACE(jh, "entry");
 
@@ -1410,7 +1437,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 	spin_unlock(&journal->j_list_lock);
 out_unlock_bh:
 	jbd_unlock_bh_state(bh);
-	jbd2_journal_put_journal_head(jh);
 out:
 	JBUFFER_TRACE(jh, "exit");
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 806c24adf74ec02543e4dcad989c0336f9fe82c4 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 17 Jul 2015 11:16:47 -0400
Subject: ext4 crypto: use a jbd2 transaction when adding a crypto policy

Start a jbd2 transaction, and mark the inode dirty on the inode under
that transaction after setting the encrypt flag.  Otherwise if the
directory isn't modified after setting the crypto policy, the
encrypted flag might not survive the inode getting pushed out from
memory, or the the file system getting unmounted and remounted.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/crypto_policy.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
index 02c4e5df7afb..a640ec2c4b13 100644
--- a/fs/ext4/crypto_policy.c
+++ b/fs/ext4/crypto_policy.c
@@ -12,6 +12,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 
+#include "ext4_jbd2.h"
 #include "ext4.h"
 #include "xattr.h"
 
@@ -49,7 +50,8 @@ static int ext4_create_encryption_context_from_policy(
 	struct inode *inode, const struct ext4_encryption_policy *policy)
 {
 	struct ext4_encryption_context ctx;
-	int res = 0;
+	handle_t *handle;
+	int res, res2;
 
 	res = ext4_convert_inline_data(inode);
 	if (res)
@@ -78,11 +80,22 @@ static int ext4_create_encryption_context_from_policy(
 	BUILD_BUG_ON(sizeof(ctx.nonce) != EXT4_KEY_DERIVATION_NONCE_SIZE);
 	get_random_bytes(ctx.nonce, EXT4_KEY_DERIVATION_NONCE_SIZE);
 
+	handle = ext4_journal_start(inode, EXT4_HT_MISC,
+				    ext4_jbd2_credits_xattr(inode));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
 	res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION,
 			     EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
 			     sizeof(ctx), 0);
-	if (!res)
+	if (!res) {
 		ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+		res = ext4_mark_inode_dirty(handle, inode);
+		if (res)
+			EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");
+	}
+	res2 = ext4_journal_stop(handle);
+	if (!res)
+		res = res2;
 	return res;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 27977b69e493c9b259eb0490534e0f74bc325ba8 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Fri, 17 Jul 2015 11:33:16 -0400
Subject: ext4 crypto: check for too-short encrypted file names

An encrypted file name should never be shorter than an 16 bytes, the
AES block size.  The 3.10 crypto layer will oops and crash the kernel
if ciphertext shorter than the block size is passed to it.

Fortunately, in modern kernels the crypto layer will not crash the
kernel in this scenario, but nevertheless, it represents a corrupted
directory, and we should detect it and mark the file system as
corrupted so that e2fsck can fix this.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/crypto_fname.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index 7dc4eb55913c..86ee996a2bd4 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -329,6 +329,10 @@ int _ext4_fname_disk_to_usr(struct inode *inode,
 			return oname->len;
 		}
 	}
+	if (iname->len < EXT4_CRYPTO_BLOCK_SIZE) {
+		EXT4_ERROR_INODE(inode, "encrypted inode too small");
+		return -EUCLEAN;
+	}
 	if (EXT4_I(inode)->i_crypt_info)
 		return ext4_fname_decrypt(inode, iname, oname);
 
-- 
cgit v1.2.3-59-g8ed1b


From 5a33911fa5ecd7395115df2e27fbd22b73357ac5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Jul 2015 23:50:24 -0400
Subject: ext4: replace ext4_io_submit->io_op with ->io_wbc

ext4_io_submit_init() takes the pointer to writeback_control to test
its sync_mode and determine between WRITE and WRITE_SYNC and records
the result in ->io_op.  This patch makes it record the pointer
directly and moves the test to ext4_io_submit().

This doesn't cause any noticeable differences now but having
writeback_control available throughout IO submission path will be
depended upon by the planned cgroup writeback support.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/ext4.h    | 2 +-
 fs/ext4/page-io.c | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f5e9f04220c1..32071f5c1c26 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -187,7 +187,7 @@ typedef struct ext4_io_end {
 } ext4_io_end_t;
 
 struct ext4_io_submit {
-	int			io_op;
+	struct writeback_control *io_wbc;
 	struct bio		*io_bio;
 	ext4_io_end_t		*io_end;
 	sector_t		io_next_block;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 5602450f03f6..a917bfe3e70c 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -357,8 +357,10 @@ void ext4_io_submit(struct ext4_io_submit *io)
 	struct bio *bio = io->io_bio;
 
 	if (bio) {
+		int io_op = io->io_wbc->sync_mode == WB_SYNC_ALL ?
+			    WRITE_SYNC : WRITE;
 		bio_get(io->io_bio);
-		submit_bio(io->io_op, io->io_bio);
+		submit_bio(io_op, io->io_bio);
 		bio_put(io->io_bio);
 	}
 	io->io_bio = NULL;
@@ -367,7 +369,7 @@ void ext4_io_submit(struct ext4_io_submit *io)
 void ext4_io_submit_init(struct ext4_io_submit *io,
 			 struct writeback_control *wbc)
 {
-	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?  WRITE_SYNC : WRITE);
+	io->io_wbc = wbc;
 	io->io_bio = NULL;
 	io->io_end = NULL;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 001e4a8775f6e8ad52a89e0072f09aee47d5d252 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 21 Jul 2015 23:51:26 -0400
Subject: ext4: implement cgroup writeback support

For ordered and writeback data modes, all data IOs go through
ext4_io_submit.  This patch adds cgroup writeback support by invoking
wbc_init_bio() from io_submit_init_bio() and wbc_account_io() in
io_submit_add_bh().  Journal data which is written by jbd2 worker is
left alone by this patch and will always be written out from the root
cgroup.

ext4_fill_super() is updated to set MS_CGROUPWB when data mode is
either ordered or writeback.  In journaled data mode, most IOs become
synchronous through the journal and enabling cgroup writeback support
doesn't make much sense or difference.  Journaled data mode is left
alone.

Lightly tested with sequential data write workload.  Behaves as
expected.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/page-io.c | 2 ++
 fs/ext4/super.c   | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index a917bfe3e70c..58ab2e3dd114 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -383,6 +383,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
 	bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
 	if (!bio)
 		return -ENOMEM;
+	wbc_init_bio(io->io_wbc, bio);
 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
 	bio->bi_bdev = bh->b_bdev;
 	bio->bi_end_io = ext4_end_bio;
@@ -411,6 +412,7 @@ submit_and_retry:
 	ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
 	if (ret != bh->b_size)
 		goto submit_and_retry;
+	wbc_account_io(io->io_wbc, page, bh->b_size);
 	io->io_next_block++;
 	return 0;
 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 58987b5c514b..d2c9a7985fd7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3643,6 +3643,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		}
 		if (test_opt(sb, DELALLOC))
 			clear_opt(sb, DELALLOC);
+	} else {
+		sb->s_iflags |= SB_I_CGROUPWB;
 	}
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
-- 
cgit v1.2.3-59-g8ed1b


From 5ba92bcf0dd63b35a85e90c3016989733b239bb5 Mon Sep 17 00:00:00 2001
From: Carlos Maiolino <cmaiolino@redhat.com>
Date: Tue, 21 Jul 2015 23:57:59 -0400
Subject: ext4: reject journal options for ext2 mounts

There is no reason to allow ext2 filesystems be mounted with journal
mount options. So, this patch adds them to the MOPT_NO_EXT2 mount
options list.

Signed-off-by: Carlos Maiolino <cmaiolino@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d2c9a7985fd7..14909cd91515 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1394,9 +1394,9 @@ static const struct mount_opts {
 	{Opt_stripe, 0, MOPT_GTE0},
 	{Opt_resuid, 0, MOPT_GTE0},
 	{Opt_resgid, 0, MOPT_GTE0},
-	{Opt_journal_dev, 0, MOPT_GTE0},
-	{Opt_journal_path, 0, MOPT_STRING},
-	{Opt_journal_ioprio, 0, MOPT_GTE0},
+	{Opt_journal_dev, 0, MOPT_NO_EXT2 | MOPT_GTE0},
+	{Opt_journal_path, 0, MOPT_NO_EXT2 | MOPT_STRING},
+	{Opt_journal_ioprio, 0, MOPT_NO_EXT2 | MOPT_GTE0},
 	{Opt_data_journal, EXT4_MOUNT_JOURNAL_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
 	{Opt_data_ordered, EXT4_MOUNT_ORDERED_DATA, MOPT_NO_EXT2 | MOPT_DATAJ},
 	{Opt_data_writeback, EXT4_MOUNT_WRITEBACK_DATA,
-- 
cgit v1.2.3-59-g8ed1b


From d76d99b219e1a233a720775c0451c310d34594e8 Mon Sep 17 00:00:00 2001
From: Laurent Navet <laurent.navet@gmail.com>
Date: Wed, 22 Jul 2015 00:08:08 -0400
Subject: ext4 crypto: exit cleanly if ext4_derive_key_aes() fails

Return value of ext4_derive_key_aes() is stored but not used.
Add test to exit cleanly if ext4_derive_key_aes() fail.
Also fix coverity CID 1309760.

Signed-off-by: Laurent Navet <laurent.navet@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/crypto_key.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 442d24e8efc0..ce75bc8b9aef 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -220,6 +220,8 @@ retry:
 	BUG_ON(master_key->size != EXT4_AES_256_XTS_KEY_SIZE);
 	res = ext4_derive_key_aes(ctx.nonce, master_key->raw,
 				  raw_key);
+	if (res)
+		goto out;
 got_key:
 	ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
 	if (!ctfm || IS_ERR(ctfm)) {
-- 
cgit v1.2.3-59-g8ed1b


From bb9a4e7e824e998070c2a2d1d4c67bc971ab72b8 Mon Sep 17 00:00:00 2001
From: Laurent Navet <laurent.navet@gmail.com>
Date: Wed, 22 Jul 2015 00:09:45 -0400
Subject: ext4 crypto: fix spelling typo in comment

Signed-off-by: Laurent Navet <laurent.navet@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/crypto_key.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index ce75bc8b9aef..1d510c11b100 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -30,7 +30,7 @@ static void derive_crypt_complete(struct crypto_async_request *req, int rc)
 
 /**
  * ext4_derive_key_aes() - Derive a key using AES-128-ECB
- * @deriving_key: Encryption key used for derivatio.
+ * @deriving_key: Encryption key used for derivation.
  * @source_key:   Source key to which to apply derivation.
  * @derived_key:  Derived key.
  *
-- 
cgit v1.2.3-59-g8ed1b


From 564bc402526e437729ecafe3c3511f7cab9f0327 Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daeho.jeong@samsung.com>
Date: Thu, 23 Jul 2015 09:46:11 -0400
Subject: ext4, jbd2: add REQ_FUA flag when recording an error in the
 superblock

When an error condition is detected, an error status should be recorded into
superblocks of EXT4 or JBD2. However, the write request is submitted now
without REQ_FUA flag, even in "barrier=1" mode, which is followed by
panic() function in "errors=panic" mode. On mobile devices which make
whole system reset as soon as kernel panic occurs, this write request
containing an error flag will disappear just from storage cache without
written to the physical cells. Therefore, when next start, even forever,
the error flag cannot be shown in both superblocks, and e2fsck cannot fix
the filesystem problems automatically, unless e2fsck is executed in
force checking mode.

[ Changed use test_opt(sb, BARRIER) of checking the journal flags -- TYT ]

Signed-off-by: Daeho Jeong <daeho.jeong@samsung.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c   | 3 ++-
 fs/jbd2/journal.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 14909cd91515..a51db9ca90fd 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4667,7 +4667,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	ext4_superblock_csum_set(sb);
 	mark_buffer_dirty(sbh);
 	if (sync) {
-		error = sync_dirty_buffer(sbh);
+		error = __sync_dirty_buffer(sbh,
+			test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC);
 		if (error)
 			return error;
 
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 4ff3fad4e9e3..fe1b4bdecdfa 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1456,7 +1456,7 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
 	sb->s_errno    = cpu_to_be32(journal->j_errno);
 	read_unlock(&journal->j_state_lock);
 
-	jbd2_write_superblock(journal, WRITE_SYNC);
+	jbd2_write_superblock(journal, WRITE_FUA);
 }
 EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
 
-- 
cgit v1.2.3-59-g8ed1b


From 841df7df196237ea63233f0f9eaa41db53afd70f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.com>
Date: Tue, 28 Jul 2015 14:57:14 -0400
Subject: jbd2: avoid infinite loop when destroying aborted journal

Commit 6f6a6fda2945 "jbd2: fix ocfs2 corrupt when updating journal
superblock fails" changed jbd2_cleanup_journal_tail() to return EIO
when the journal is aborted. That makes logic in
jbd2_log_do_checkpoint() bail out which is fine, except that
jbd2_journal_destroy() expects jbd2_log_do_checkpoint() to always make
a progress in cleaning the journal. Without it jbd2_journal_destroy()
just loops in an infinite loop.

Fix jbd2_journal_destroy() to cleanup journal checkpoint lists of
jbd2_log_do_checkpoint() fails with error.

Reported-by: Eryu Guan <guaneryu@gmail.com>
Tested-by: Eryu Guan <guaneryu@gmail.com>
Fixes: 6f6a6fda294506dfe0e3e0a253bb2d2923f28f0a
Signed-off-by: Jan Kara <jack@suse.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c | 39 +++++++++++++++++++++++++++++++++------
 fs/jbd2/commit.c     |  2 +-
 fs/jbd2/journal.c    | 11 ++++++++++-
 include/linux/jbd2.h |  3 ++-
 4 files changed, 46 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 4227dc4f7437..8c44654ce274 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -417,12 +417,12 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
  * journal_clean_one_cp_list
  *
  * Find all the written-back checkpoint buffers in the given list and
- * release them.
+ * release them. If 'destroy' is set, clean all buffers unconditionally.
  *
  * Called with j_list_lock held.
  * Returns 1 if we freed the transaction, 0 otherwise.
  */
-static int journal_clean_one_cp_list(struct journal_head *jh)
+static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
 {
 	struct journal_head *last_jh;
 	struct journal_head *next_jh = jh;
@@ -436,7 +436,10 @@ static int journal_clean_one_cp_list(struct journal_head *jh)
 	do {
 		jh = next_jh;
 		next_jh = jh->b_cpnext;
-		ret = __try_to_free_cp_buf(jh);
+		if (!destroy)
+			ret = __try_to_free_cp_buf(jh);
+		else
+			ret = __jbd2_journal_remove_checkpoint(jh) + 1;
 		if (!ret)
 			return freed;
 		if (ret == 2)
@@ -459,10 +462,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh)
  * journal_clean_checkpoint_list
  *
  * Find all the written-back checkpoint buffers in the journal and release them.
+ * If 'destroy' is set, release all buffers unconditionally.
  *
  * Called with j_list_lock held.
  */
-void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int ret;
@@ -476,7 +480,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 	do {
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
-		ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
+		ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
+						destroy);
 		/*
 		 * This function only frees up some memory if possible so we
 		 * dont have an obligation to finish processing. Bail out if
@@ -492,7 +497,7 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 		 * we can possibly see not yet submitted buffers on io_list
 		 */
 		ret = journal_clean_one_cp_list(transaction->
-				t_checkpoint_io_list);
+				t_checkpoint_io_list, destroy);
 		if (need_resched())
 			return;
 		/*
@@ -505,6 +510,28 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 	} while (transaction != last_transaction);
 }
 
+/*
+ * Remove buffers from all checkpoint lists as journal is aborted and we just
+ * need to free memory
+ */
+void jbd2_journal_destroy_checkpoint(journal_t *journal)
+{
+	/*
+	 * We loop because __jbd2_journal_clean_checkpoint_list() may abort
+	 * early due to a need of rescheduling.
+	 */
+	while (1) {
+		spin_lock(&journal->j_list_lock);
+		if (!journal->j_checkpoint_transactions) {
+			spin_unlock(&journal->j_list_lock);
+			break;
+		}
+		__jbd2_journal_clean_checkpoint_list(journal, true);
+		spin_unlock(&journal->j_list_lock);
+		cond_resched();
+	}
+}
+
 /*
  * journal_remove_checkpoint: called after a buffer has been committed
  * to disk (either by being write-back flushed to disk, or being
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b73e0215baa7..362e5f614450 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	 * frees some memory
 	 */
 	spin_lock(&journal->j_list_lock);
-	__jbd2_journal_clean_checkpoint_list(journal);
+	__jbd2_journal_clean_checkpoint_list(journal, false);
 	spin_unlock(&journal->j_list_lock);
 
 	jbd_debug(3, "JBD2: commit phase 1\n");
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index fe1b4bdecdfa..8270fe9e3641 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1693,8 +1693,17 @@ int jbd2_journal_destroy(journal_t *journal)
 	while (journal->j_checkpoint_transactions != NULL) {
 		spin_unlock(&journal->j_list_lock);
 		mutex_lock(&journal->j_checkpoint_mutex);
-		jbd2_log_do_checkpoint(journal);
+		err = jbd2_log_do_checkpoint(journal);
 		mutex_unlock(&journal->j_checkpoint_mutex);
+		/*
+		 * If checkpointing failed, just free the buffers to avoid
+		 * looping forever
+		 */
+		if (err) {
+			jbd2_journal_destroy_checkpoint(journal);
+			spin_lock(&journal->j_list_lock);
+			break;
+		}
 		spin_lock(&journal->j_list_lock);
 	}
 
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index edb640ae9a94..eb1cebed3f36 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1042,8 +1042,9 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 extern void jbd2_journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
-void __jbd2_journal_clean_checkpoint_list(journal_t *journal);
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
+void jbd2_journal_destroy_checkpoint(journal_t *journal);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 
-- 
cgit v1.2.3-59-g8ed1b


From 911af577de4e444622d46500c1f9a37ab4335d3a Mon Sep 17 00:00:00 2001
From: Eryu Guan <guaneryu@gmail.com>
Date: Tue, 28 Jul 2015 15:08:41 -0400
Subject: ext4: update c/mtime on truncate up

Commit 3da40c7b0898 ("ext4: only call ext4_truncate when size <= isize")
introduced a bug that c/mtime is not updated on truncate up.

Fix the issue by setting c/mtime explicitly in the truncate up case.

Note that ftruncate(2) is not affected, so you won't see this bug using
truncate(1) and xfs_io(1).

Signed-off-by: Zirong Lang <zorro.lang@gmail.com>
Signed-off-by: Eryu Guan <guaneryu@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/inode.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index cecf9aa10811..2442eb065e19 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4725,6 +4725,14 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 				error = ext4_orphan_add(handle, inode);
 				orphan = 1;
 			}
+			/*
+			 * Update c/mtime on truncate up, ext4_truncate() will
+			 * update c/mtime in shrink case below
+			 */
+			if (!shrink) {
+				inode->i_mtime = ext4_current_time(inode);
+				inode->i_ctime = inode->i_mtime;
+			}
 			down_write(&EXT4_I(inode)->i_data_sem);
 			EXT4_I(inode)->i_disksize = attr->ia_size;
 			rc = ext4_mark_inode_dirty(handle, inode);
-- 
cgit v1.2.3-59-g8ed1b


From 923ae471773a6b324815ef3e3c5e787818ae129a Mon Sep 17 00:00:00 2001
From: "zilong.liu" <liuziloong@gmail.com>
Date: Tue, 28 Jul 2015 15:12:18 -0400
Subject: ext4 crypto: remove duplicate header file

Remove key.h which is included twice in crypto_fname.c

Signed-off-by: zilong.liu <liuziloong@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/crypto_fname.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index 86ee996a2bd4..847f919c84d9 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -19,7 +19,6 @@
 #include <linux/gfp.h>
 #include <linux/kernel.h>
 #include <linux/key.h>
-#include <linux/key.h>
 #include <linux/list.h>
 #include <linux/mempool.h>
 #include <linux/random.h>
-- 
cgit v1.2.3-59-g8ed1b


From 6d3ec14d703c660c4baf8d726538b5415e23b4fb Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Tue, 4 Aug 2015 11:21:52 -0400
Subject: jbd2: limit number of reserved credits

Currently there is no limitation on number of reserved credits we can
ask for. If we ask for more reserved credits than 1/2 of maximum
transaction size, or if total number of credits exceeds the maximum
transaction size per operation (which is currently only possible with
the former) we will spin forever in start_this_handle().

Fix this by adding this limitation at the start of start_this_handle().

This patch also removes the credit limitation 1/2 of maximum transaction
size, since we really only want to limit the number of reserved credits.
There is not much point to limit the credits if there is still space in
the journal.

This accidentally also fixes the online resize, where due to the
limitation of the journal credits we're unable to grow file systems with
1k block size and size between 16M and 32M. It has been partially fixed
by 2c869b262a10ca99cb866d04087d75311587a30c, but not entirely.

Thanks Jan Kara for helping me getting the correct fix.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/transaction.c | 36 +++++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index a6eec9747c74..6b8338ec2464 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -204,6 +204,20 @@ static int add_transaction_credits(journal_t *journal, int blocks,
 		 * attach this handle to a new transaction.
 		 */
 		atomic_sub(total, &t->t_outstanding_credits);
+
+		/*
+		 * Is the number of reserved credits in the current transaction too
+		 * big to fit this handle? Wait until reserved credits are freed.
+		 */
+		if (atomic_read(&journal->j_reserved_credits) + total >
+		    journal->j_max_transaction_buffers) {
+			read_unlock(&journal->j_state_lock);
+			wait_event(journal->j_wait_reserved,
+				   atomic_read(&journal->j_reserved_credits) + total <=
+				   journal->j_max_transaction_buffers);
+			return 1;
+		}
+
 		wait_transaction_locked(journal);
 		return 1;
 	}
@@ -262,20 +276,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
 	int		rsv_blocks = 0;
 	unsigned long ts = jiffies;
 
+	if (handle->h_rsv_handle)
+		rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
+
 	/*
-	 * 1/2 of transaction can be reserved so we can practically handle
-	 * only 1/2 of maximum transaction size per operation
+	 * Limit the number of reserved credits to 1/2 of maximum transaction
+	 * size and limit the number of total credits to not exceed maximum
+	 * transaction size per operation.
 	 */
-	if (WARN_ON(blocks > journal->j_max_transaction_buffers / 2)) {
-		printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n",
-		       current->comm, blocks,
-		       journal->j_max_transaction_buffers / 2);
+	if ((rsv_blocks > journal->j_max_transaction_buffers / 2) ||
+	    (rsv_blocks + blocks > journal->j_max_transaction_buffers)) {
+		printk(KERN_ERR "JBD2: %s wants too many credits "
+		       "credits:%d rsv_credits:%d max:%d\n",
+		       current->comm, blocks, rsv_blocks,
+		       journal->j_max_transaction_buffers);
+		WARN_ON(1);
 		return -ENOSPC;
 	}
 
-	if (handle->h_rsv_handle)
-		rsv_blocks = handle->h_rsv_handle->h_buffer_credits;
-
 alloc_transaction:
 	if (!journal->j_running_transaction) {
 		/*
-- 
cgit v1.2.3-59-g8ed1b


From c642dc9e1aaed953597e7092d7df329e6234096e Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Sat, 15 Aug 2015 10:45:06 -0400
Subject: ext4: don't manipulate recovery flag when freezing no-journal fs

At some point along this sequence of changes:

f6e63f9 ext4: fold ext4_nojournal_sops into ext4_sops
bb04457 ext4: support freezing ext2 (nojournal) file systems
9ca9238 ext4: Use separate super_operations structure for no_journal filesystems

ext4 started setting needs_recovery on filesystems without journals
when they are unfrozen.  This makes no sense, and in fact confuses
blkid to the point where it doesn't recognize the filesystem at all.

(freeze ext2; unfreeze ext2; run blkid; see no output; run dumpe2fs,
see needs_recovery set on fs w/ no journal).

To fix this, don't manipulate the INCOMPAT_RECOVER feature on
filesystems without journals.

Reported-by: Stu Mark <smark@datto.com>
Reviewed-by: Jan Kara <jack@suse.com>
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/super.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a51db9ca90fd..70c52ea0a27c 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4836,10 +4836,11 @@ static int ext4_freeze(struct super_block *sb)
 		error = jbd2_journal_flush(journal);
 		if (error < 0)
 			goto out;
+
+		/* Journal blocked and flushed, clear needs_recovery flag. */
+		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 	}
 
-	/* Journal blocked and flushed, clear needs_recovery flag. */
-	EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 	error = ext4_commit_super(sb, 1);
 out:
 	if (journal)
@@ -4857,8 +4858,11 @@ static int ext4_unfreeze(struct super_block *sb)
 	if (sb->s_flags & MS_RDONLY)
 		return 0;
 
-	/* Reset the needs_recovery flag before the fs is unlocked. */
-	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+	if (EXT4_SB(sb)->s_journal) {
+		/* Reset the needs_recovery flag before the fs is unlocked. */
+		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+	}
+
 	ext4_commit_super(sb, 1);
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 9810446836ab5a4b34a749bb77f3eb5836f982cc Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 15 Aug 2015 11:30:31 -0400
Subject: ext4: simplify some code in read_mmp_block()

My static check complains because we have:

	if (!*bh)
		return -ENOMEM;
	if (*bh) {

The second check is unnecessary.

I've simplified this code by moving the "if (!*bh)" checks around.  Also
Andreas Dilger says we should probably print a warning if sb_getblk()
fails.

[ Restructured the code so that we print a warning message as well if
  the mmp block doesn't check out, and to print the error code to
  disambiguate between the error cases.  - TYT ]

Reviewed-by: Andreas Dilger <adilger@dilger.ca>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mmp.c | 46 +++++++++++++++++++++++++---------------------
 1 file changed, 25 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 8313ca3324ec..048c52af2fb6 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -69,6 +69,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
 			  ext4_fsblk_t mmp_block)
 {
 	struct mmp_struct *mmp;
+	int ret;
 
 	if (*bh)
 		clear_buffer_uptodate(*bh);
@@ -76,33 +77,36 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
 	/* This would be sb_bread(sb, mmp_block), except we need to be sure
 	 * that the MD RAID device cache has been bypassed, and that the read
 	 * is not blocked in the elevator. */
-	if (!*bh)
+	if (!*bh) {
 		*bh = sb_getblk(sb, mmp_block);
-	if (!*bh)
-		return -ENOMEM;
-	if (*bh) {
-		get_bh(*bh);
-		lock_buffer(*bh);
-		(*bh)->b_end_io = end_buffer_read_sync;
-		submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
-		wait_on_buffer(*bh);
-		if (!buffer_uptodate(*bh)) {
-			brelse(*bh);
-			*bh = NULL;
+		if (!*bh) {
+			ret = -ENOMEM;
+			goto warn_exit;
 		}
 	}
-	if (unlikely(!*bh)) {
-		ext4_warning(sb, "Error while reading MMP block %llu",
-			     mmp_block);
-		return -EIO;
+
+	get_bh(*bh);
+	lock_buffer(*bh);
+	(*bh)->b_end_io = end_buffer_read_sync;
+	submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
+	wait_on_buffer(*bh);
+	if (!buffer_uptodate(*bh)) {
+		brelse(*bh);
+		*bh = NULL;
+		ret = -EIO;
+		goto warn_exit;
 	}
 
 	mmp = (struct mmp_struct *)((*bh)->b_data);
-	if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC ||
-	    !ext4_mmp_csum_verify(sb, mmp))
-		return -EINVAL;
-
-	return 0;
+	if (le32_to_cpu(mmp->mmp_magic) == EXT4_MMP_MAGIC &&
+	    ext4_mmp_csum_verify(sb, mmp))
+		return 0;
+	ret = -EINVAL;
+
+warn_exit:
+	ext4_warning(sb, "Error %d while reading MMP block %llu",
+		     ret, mmp_block);
+	return ret;
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From da0b5e40ab10f44019a1ecec09fadfcd5bdb76b6 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 15 Aug 2015 11:38:13 -0400
Subject: ext4: silence a format string false positive

Static checkers complain that the format string should be "%s".  It does
not make a difference for the current code.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/mmp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 048c52af2fb6..6eb1a619890c 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -115,7 +115,7 @@ warn_exit:
 void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
 		    const char *function, unsigned int line, const char *msg)
 {
-	__ext4_warning(sb, function, line, msg);
+	__ext4_warning(sb, function, line, "%s", msg);
 	__ext4_warning(sb, function, line,
 		       "MMP failure info: last update time: %llu, last update "
 		       "node: %s, last update device: %s\n",
-- 
cgit v1.2.3-59-g8ed1b


From e294a5371b2e0bd22d4a917d4c354a52a7057b6e Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 15 Aug 2015 14:59:44 -0400
Subject: ext4: ratelimit the file system mounted message

The xfstests ext4/305 will mount and unmount the same file system over
4,000 times, and each one of these will cause a system log message.
Ratelimit this message since if we are getting more than a few dozen
of these messages, they probably aren't going to be helpful.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/ext4/super.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 70c52ea0a27c..45658c1bc324 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -60,6 +60,7 @@ static struct ext4_lazy_init *ext4_li_info;
 static struct mutex ext4_li_mtx;
 static struct ext4_features *ext4_feat;
 static int ext4_mballoc_ready;
+static struct ratelimit_state ext4_mount_msg_ratelimit;
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
@@ -4277,9 +4278,10 @@ no_journal:
 				 "the device does not support discard");
 	}
 
-	ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
-		 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
-		 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
+	if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
+		ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
+			 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
+			 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);
 
 	if (es->s_error_count)
 		mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
@@ -5617,6 +5619,7 @@ static int __init ext4_init_fs(void)
 {
 	int i, err;
 
+	ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
 	ext4_li_info = NULL;
 	mutex_init(&ext4_li_mtx);
 
-- 
cgit v1.2.3-59-g8ed1b


From bdfe0cbd746aa9b2509c2f6d6be17193cf7facd7 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sun, 16 Aug 2015 10:03:57 -0400
Subject: Revert "ext4: remove block_device_ejected"

This reverts commit 08439fec266c3cc5702953b4f54bdf5649357de0.

Unfortunately we still need to test for bdi->dev to avoid a crash when a
USB stick is yanked out while a file system is mounted:

   usb 2-2: USB disconnect, device number 2
   Buffer I/O error on dev sdb1, logical block 15237120, lost sync page write
   JBD2: Error -5 detected when updating journal superblock for sdb1-8.
   BUG: unable to handle kernel paging request at 34beb000
   IP: [<c136ce88>] __percpu_counter_add+0x18/0xc0
   *pdpt = 0000000023db9001 *pde = 0000000000000000
   Oops: 0000 [#1] SMP
   CPU: 0 PID: 4083 Comm: umount Tainted: G     U     OE   4.1.1-040101-generic #201507011435
   Hardware name: LENOVO 7675CTO/7675CTO, BIOS 7NETC2WW (2.22 ) 03/22/2011
   task: ebf06b50 ti: ebebc000 task.ti: ebebc000
   EIP: 0060:[<c136ce88>] EFLAGS: 00010082 CPU: 0
   EIP is at __percpu_counter_add+0x18/0xc0
   EAX: f21c8e88 EBX: f21c8e88 ECX: 00000000 EDX: 00000001
   ESI: 00000001 EDI: 00000000 EBP: ebebde60 ESP: ebebde40
    DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068
   CR0: 8005003b CR2: 34beb000 CR3: 33354200 CR4: 000007f0
   Stack:
    c1abe100 edcb0098 edcb00ec ffffffff f21c8e68 ffffffff f21c8e68 f286d160
    ebebde84 c1160454 00000010 00000282 f72a77f8 00000984 f72a77f8 f286d160
    f286d170 ebebdea0 c11e613f 00000000 00000282 f72a77f8 edd7f4d0 00000000
   Call Trace:
    [<c1160454>] account_page_dirtied+0x74/0x110
    [<c11e613f>] __set_page_dirty+0x3f/0xb0
    [<c11e6203>] mark_buffer_dirty+0x53/0xc0
    [<c124a0cb>] ext4_commit_super+0x17b/0x250
    [<c124ac71>] ext4_put_super+0xc1/0x320
    [<c11f04ba>] ? fsnotify_unmount_inodes+0x1aa/0x1c0
    [<c11cfeda>] ? evict_inodes+0xca/0xe0
    [<c11b925a>] generic_shutdown_super+0x6a/0xe0
    [<c10a1df0>] ? prepare_to_wait_event+0xd0/0xd0
    [<c1165a50>] ? unregister_shrinker+0x40/0x50
    [<c11b92f6>] kill_block_super+0x26/0x70
    [<c11b94f5>] deactivate_locked_super+0x45/0x80
    [<c11ba007>] deactivate_super+0x47/0x60
    [<c11d2b39>] cleanup_mnt+0x39/0x80
    [<c11d2bc0>] __cleanup_mnt+0x10/0x20
    [<c1080b51>] task_work_run+0x91/0xd0
    [<c1011e3c>] do_notify_resume+0x7c/0x90
    [<c1720da5>] work_notify
   Code: 8b 55 e8 e9 f4 fe ff ff 90 90 90 90 90 90 90 90 90 90 90 55 89 e5 83 ec 20 89 5d f4 89 c3 89 75 f8 89 d6 89 7d fc 89 cf 8b 48 14 <64> 8b 01 89 45 ec 89 c2 8b 45 08 c1 fa 1f 01 75 ec 89 55 f0 89
   EIP: [<c136ce88>] __percpu_counter_add+0x18/0xc0 SS:ESP 0068:ebebde40
   CR2: 0000000034beb000
   ---[ end trace dd564a7bea834ecd ]---

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=101011

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/ext4/super.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 45658c1bc324..bd3ff923dd59 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -326,6 +326,22 @@ static void save_error_info(struct super_block *sb, const char *func,
 	ext4_commit_super(sb, 1);
 }
 
+/*
+ * The del_gendisk() function uninitializes the disk-specific data
+ * structures, including the bdi structure, without telling anyone
+ * else.  Once this happens, any attempt to call mark_buffer_dirty()
+ * (for example, by ext4_commit_super), will cause a kernel OOPS.
+ * This is a kludge to prevent these oops until we can put in a proper
+ * hook in del_gendisk() to inform the VFS and file system layers.
+ */
+static int block_device_ejected(struct super_block *sb)
+{
+	struct inode *bd_inode = sb->s_bdev->bd_inode;
+	struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
+
+	return bdi->dev == NULL;
+}
+
 static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
 {
 	struct super_block		*sb = journal->j_private;
@@ -4621,7 +4637,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
 	int error = 0;
 
-	if (!sbh)
+	if (!sbh || block_device_ejected(sb))
 		return error;
 	if (buffer_write_io_error(sbh)) {
 		/*
-- 
cgit v1.2.3-59-g8ed1b