aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/gfs2/acl.c7
-rw-r--r--fs/gfs2/aops.c11
-rw-r--r--fs/gfs2/bmap.c9
-rw-r--r--fs/gfs2/dir.c3
-rw-r--r--fs/gfs2/file.c43
-rw-r--r--fs/gfs2/glock.c137
-rw-r--r--fs/gfs2/glops.c157
-rw-r--r--fs/gfs2/incore.h27
-rw-r--r--fs/gfs2/inode.c53
-rw-r--r--fs/gfs2/lock_dlm.c52
-rw-r--r--fs/gfs2/log.c288
-rw-r--r--fs/gfs2/log.h1
-rw-r--r--fs/gfs2/lops.c14
-rw-r--r--fs/gfs2/meta_io.c3
-rw-r--r--fs/gfs2/ops_fstype.c59
-rw-r--r--fs/gfs2/quota.c76
-rw-r--r--fs/gfs2/quota.h4
-rw-r--r--fs/gfs2/recovery.c12
-rw-r--r--fs/gfs2/rgrp.c88
-rw-r--r--fs/gfs2/rgrp.h4
-rw-r--r--fs/gfs2/super.c112
-rw-r--r--fs/gfs2/super.h1
-rw-r--r--fs/gfs2/sys.c5
-rw-r--r--fs/gfs2/trans.c4
-rw-r--r--fs/gfs2/util.c419
-rw-r--r--fs/gfs2/util.h76
-rw-r--r--fs/gfs2/xattr.c12
27 files changed, 1168 insertions, 509 deletions
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 09e6be8aa036..2e939f5fe751 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -21,6 +21,7 @@
#include "glock.h"
#include "inode.h"
#include "meta_io.h"
+#include "quota.h"
#include "rgrp.h"
#include "trans.h"
#include "util.h"
@@ -116,14 +117,14 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (acl && acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode)))
return -E2BIG;
- ret = gfs2_rsqa_alloc(ip);
+ ret = gfs2_qa_get(ip);
if (ret)
return ret;
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
if (ret)
- return ret;
+ goto out;
need_unlock = true;
}
@@ -143,5 +144,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
unlock:
if (need_unlock)
gfs2_glock_dq_uninit(&gh);
+out:
+ gfs2_qa_put(ip);
return ret;
}
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index ba83b49ce18c..786c1ce8f030 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -805,11 +805,16 @@ int gfs2_releasepage(struct page *page, gfp_t gfp_mask)
bd = bh->b_private;
if (bd) {
gfs2_assert_warn(sdp, bd->bd_bh == bh);
- if (!list_empty(&bd->bd_list))
- list_del_init(&bd->bd_list);
bd->bd_bh = NULL;
bh->b_private = NULL;
- kmem_cache_free(gfs2_bufdata_cachep, bd);
+ /*
+ * The bd may still be queued as a revoke, in which
+ * case we must not dequeue nor free it.
+ */
+ if (!bd->bd_blkno && !list_empty(&bd->bd_list))
+ list_del_init(&bd->bd_list);
+ if (list_empty(&bd->bd_list))
+ kmem_cache_free(gfs2_bufdata_cachep, bd);
}
bh = bh->b_this_page;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 08f6fbb3655e..936a8ec6b48e 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -2183,7 +2183,7 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
inode_dio_wait(inode);
- ret = gfs2_rsqa_alloc(ip);
+ ret = gfs2_qa_get(ip);
if (ret)
goto out;
@@ -2194,7 +2194,8 @@ int gfs2_setattr_size(struct inode *inode, u64 newsize)
ret = do_shrink(inode, newsize);
out:
- gfs2_rsqa_delete(ip, NULL);
+ gfs2_rs_delete(ip, NULL);
+ gfs2_qa_put(ip);
return ret;
}
@@ -2223,7 +2224,7 @@ void gfs2_free_journal_extents(struct gfs2_jdesc *jd)
struct gfs2_journal_extent *jext;
while(!list_empty(&jd->extent_list)) {
- jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list);
+ jext = list_first_entry(&jd->extent_list, struct gfs2_journal_extent, list);
list_del(&jext->list);
kfree(jext);
}
@@ -2244,7 +2245,7 @@ static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 b
struct gfs2_journal_extent *jext;
if (!list_empty(&jd->extent_list)) {
- jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list);
+ jext = list_last_entry(&jd->extent_list, struct gfs2_journal_extent, list);
if ((jext->dblock + jext->blocks) == dblock) {
jext->blocks += blocks;
return 0;
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index c8b62577e2f2..c3f7732415be 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -2028,7 +2028,8 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len,
error = gfs2_trans_begin(sdp,
rg_blocks + (DIV_ROUND_UP(size, sdp->sd_jbsize) + 1) +
- RES_DINODE + RES_STATFS + RES_QUOTA, l_blocks);
+ RES_DINODE + RES_STATFS + RES_QUOTA, RES_DINODE +
+ l_blocks);
if (error)
goto out_rg_gunlock;
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index cb26be6f4351..fe305e4bfd37 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -458,10 +458,6 @@ static vm_fault_t gfs2_page_mkwrite(struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
- ret = gfs2_rsqa_alloc(ip);
- if (ret)
- goto out;
-
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
if (ret)
@@ -558,7 +554,6 @@ out_uninit:
set_page_dirty(page);
wait_for_stable_page(page);
}
-out:
sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(ret);
}
@@ -635,7 +630,17 @@ int gfs2_open_common(struct inode *inode, struct file *file)
gfs2_assert_warn(GFS2_SB(inode), !file->private_data);
file->private_data = fp;
+ if (file->f_mode & FMODE_WRITE) {
+ ret = gfs2_qa_get(GFS2_I(inode));
+ if (ret)
+ goto fail;
+ }
return 0;
+
+fail:
+ kfree(file->private_data);
+ file->private_data = NULL;
+ return ret;
}
/**
@@ -690,10 +695,10 @@ static int gfs2_release(struct inode *inode, struct file *file)
kfree(file->private_data);
file->private_data = NULL;
- if (!(file->f_mode & FMODE_WRITE))
- return 0;
-
- gfs2_rsqa_delete(ip, &inode->i_writecount);
+ if (file->f_mode & FMODE_WRITE) {
+ gfs2_rs_delete(ip, &inode->i_writecount);
+ gfs2_qa_put(ip);
+ }
return 0;
}
@@ -849,10 +854,6 @@ static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct gfs2_inode *ip = GFS2_I(inode);
ssize_t ret;
- ret = gfs2_rsqa_alloc(ip);
- if (ret)
- return ret;
-
gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from));
if (iocb->ki_flags & IOCB_APPEND) {
@@ -1149,17 +1150,11 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t le
if (mode & FALLOC_FL_PUNCH_HOLE) {
ret = __gfs2_punch_hole(file, offset, len);
} else {
- ret = gfs2_rsqa_alloc(ip);
- if (ret)
- goto out_putw;
-
ret = __gfs2_fallocate(file, mode, offset, len);
-
if (ret)
gfs2_rs_deltree(&ip->i_res);
}
-out_putw:
put_write_access(inode);
out_unlock:
gfs2_glock_dq(&gh);
@@ -1173,16 +1168,12 @@ static ssize_t gfs2_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
{
- int error;
- struct gfs2_inode *ip = GFS2_I(out->f_mapping->host);
-
- error = gfs2_rsqa_alloc(ip);
- if (error)
- return (ssize_t)error;
+ ssize_t ret;
gfs2_size_hint(out, *ppos, len);
- return iter_file_splice_write(pipe, out, ppos, len, flags);
+ ret = iter_file_splice_write(pipe, out, ppos, len, flags);
+ return ret;
}
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index d0eceaff3cea..29f9b6684b74 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -133,6 +133,33 @@ static void gfs2_glock_dealloc(struct rcu_head *rcu)
}
}
+/**
+ * glock_blocked_by_withdraw - determine if we can still use a glock
+ * @gl: the glock
+ *
+ * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted
+ * when we're withdrawn. For example, to maintain metadata integrity, we should
+ * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like
+ * iopen or the transaction glocks may be safely used because none of their
+ * metadata goes through the journal. So in general, we should disallow all
+ * glocks that are journaled, and allow all the others. One exception is:
+ * we need to allow our active journal to be promoted and demoted so others
+ * may recover it and we can reacquire it when they're done.
+ */
+static bool glock_blocked_by_withdraw(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ if (likely(!gfs2_withdrawn(sdp)))
+ return false;
+ if (gl->gl_ops->go_flags & GLOF_NONDISK)
+ return false;
+ if (!sdp->sd_jdesc ||
+ gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr)
+ return false;
+ return true;
+}
+
void gfs2_glock_free(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
@@ -244,7 +271,7 @@ static void __gfs2_glock_put(struct gfs2_glock *gl)
gfs2_glock_remove_from_lru(gl);
spin_unlock(&gl->gl_lockref.lock);
GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
- GLOCK_BUG_ON(gl, mapping && mapping->nrpages);
+ GLOCK_BUG_ON(gl, mapping && mapping->nrpages && !gfs2_withdrawn(sdp));
trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
}
@@ -281,7 +308,7 @@ void gfs2_glock_put(struct gfs2_glock *gl)
static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
{
- const struct gfs2_holder *gh_head = list_entry(gl->gl_holders.next, const struct gfs2_holder, gh_list);
+ const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list);
if ((gh->gh_state == LM_ST_EXCLUSIVE ||
gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
return 0;
@@ -549,8 +576,8 @@ __acquires(&gl->gl_lockref.lock)
unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
int ret;
- if (unlikely(gfs2_withdrawn(sdp)) &&
- target != LM_ST_UNLOCKED)
+ if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) &&
+ gh && !(gh->gh_flags & LM_FLAG_NOEXP))
return;
lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
LM_FLAG_PRIORITY);
@@ -575,13 +602,64 @@ __acquires(&gl->gl_lockref.lock)
(lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
clear_bit(GLF_BLOCKING, &gl->gl_flags);
spin_unlock(&gl->gl_lockref.lock);
- if (glops->go_sync)
- glops->go_sync(gl);
- if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
+ if (glops->go_sync) {
+ ret = glops->go_sync(gl);
+ /* If we had a problem syncing (due to io errors or whatever,
+ * we should not invalidate the metadata or tell dlm to
+ * release the glock to other nodes.
+ */
+ if (ret) {
+ if (cmpxchg(&sdp->sd_log_error, 0, ret)) {
+ fs_err(sdp, "Error %d syncing glock \n", ret);
+ gfs2_dump_glock(NULL, gl, true);
+ }
+ return;
+ }
+ }
+ if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
+ /*
+ * The call to go_sync should have cleared out the ail list.
+ * If there are still items, we have a problem. We ought to
+ * withdraw, but we can't because the withdraw code also uses
+ * glocks. Warn about the error, dump the glock, then fall
+ * through and wait for logd to do the withdraw for us.
+ */
+ if ((atomic_read(&gl->gl_ail_count) != 0) &&
+ (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) {
+ gfs2_assert_warn(sdp, !atomic_read(&gl->gl_ail_count));
+ gfs2_dump_glock(NULL, gl, true);
+ }
glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
- clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
+ clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
+ }
gfs2_glock_hold(gl);
+ /*
+ * Check for an error encountered since we called go_sync and go_inval.
+ * If so, we can't withdraw from the glock code because the withdraw
+ * code itself uses glocks (see function signal_our_withdraw) to
+ * change the mount to read-only. Most importantly, we must not call
+ * dlm to unlock the glock until the journal is in a known good state
+ * (after journal replay) otherwise other nodes may use the object
+ * (rgrp or dinode) and then later, journal replay will corrupt the
+ * file system. The best we can do here is wait for the logd daemon
+ * to see sd_log_error and withdraw, and in the meantime, requeue the
+ * work for later.
+ *
+ * However, if we're just unlocking the lock (say, for unmount, when
+ * gfs2_gl_hash_clear calls clear_glock) and recovery is complete
+ * then it's okay to tell dlm to unlock it.
+ */
+ if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp)))
+ gfs2_withdraw_delayed(sdp);
+ if (glock_blocked_by_withdraw(gl)) {
+ if (target != LM_ST_UNLOCKED ||
+ test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags)) {
+ gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
+ goto out;
+ }
+ }
+
if (sdp->sd_lockstruct.ls_ops->lm_lock) {
/* lock_dlm */
ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
@@ -590,8 +668,7 @@ __acquires(&gl->gl_lockref.lock)
test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
finish_xmote(gl, target);
gfs2_glock_queue_work(gl, 0);
- }
- else if (ret) {
+ } else if (ret) {
fs_err(sdp, "lm_lock ret %d\n", ret);
GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp));
}
@@ -599,7 +676,7 @@ __acquires(&gl->gl_lockref.lock)
finish_xmote(gl, target);
gfs2_glock_queue_work(gl, 0);
}
-
+out:
spin_lock(&gl->gl_lockref.lock);
}
@@ -613,7 +690,7 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
struct gfs2_holder *gh;
if (!list_empty(&gl->gl_holders)) {
- gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
+ gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
if (test_bit(HIF_HOLDER, &gh->gh_iflags))
return gh;
}
@@ -645,6 +722,9 @@ __acquires(&gl->gl_lockref.lock)
goto out_unlock;
if (nonblock)
goto out_sched;
+ smp_mb();
+ if (atomic_read(&gl->gl_revokes) != 0)
+ goto out_sched;
set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
gl->gl_target = gl->gl_demote_state;
@@ -1160,7 +1240,7 @@ fail:
}
list_add_tail(&gh->gh_list, insert_pt);
do_cancel:
- gh = list_entry(gl->gl_holders.next, struct gfs2_holder, gh_list);
+ gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
spin_unlock(&gl->gl_lockref.lock);
if (sdp->sd_lockstruct.ls_ops->lm_cancel)
@@ -1194,10 +1274,9 @@ trap_recursive:
int gfs2_glock_nq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
- struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
int error = 0;
- if (unlikely(gfs2_withdrawn(sdp)))
+ if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP))
return -EIO;
if (test_bit(GLF_LRU, &gl->gl_flags))
@@ -1241,24 +1320,32 @@ int gfs2_glock_poll(struct gfs2_holder *gh)
void gfs2_glock_dq(struct gfs2_holder *gh)
{
struct gfs2_glock *gl = gh->gh_gl;
- const struct gfs2_glock_operations *glops = gl->gl_ops;
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
unsigned delay = 0;
int fast_path = 0;
spin_lock(&gl->gl_lockref.lock);
+ /*
+ * If we're in the process of file system withdraw, we cannot just
+ * dequeue any glocks until our journal is recovered, lest we
+ * introduce file system corruption. We need two exceptions to this
+ * rule: We need to allow unlocking of nondisk glocks and the glock
+ * for our own journal that needs recovery.
+ */
+ if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
+ glock_blocked_by_withdraw(gl) &&
+ gh->gh_gl != sdp->sd_jinode_gl) {
+ sdp->sd_glock_dqs_held++;
+ might_sleep();
+ wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
+ TASK_UNINTERRUPTIBLE);
+ }
if (gh->gh_flags & GL_NOCACHE)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
list_del_init(&gh->gh_list);
clear_bit(HIF_HOLDER, &gh->gh_iflags);
if (find_first_holder(gl) == NULL) {
- if (glops->go_unlock) {
- GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags));
- spin_unlock(&gl->gl_lockref.lock);
- glops->go_unlock(gh);
- spin_lock(&gl->gl_lockref.lock);
- clear_bit(GLF_LOCK, &gl->gl_flags);
- }
if (list_empty(&gl->gl_holders) &&
!test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
!test_bit(GLF_DEMOTE, &gl->gl_flags))
@@ -1555,7 +1642,7 @@ __acquires(&lru_lock)
list_sort(NULL, list, glock_cmp);
while(!list_empty(list)) {
- gl = list_entry(list->next, struct gfs2_glock, gl_lru);
+ gl = list_first_entry(list, struct gfs2_glock, gl_lru);
list_del_init(&gl->gl_lru);
if (!spin_trylock(&gl->gl_lockref.lock)) {
add_back_to_lru:
@@ -1596,7 +1683,7 @@ static long gfs2_scan_glock_lru(int nr)
spin_lock(&lru_lock);
while ((nr-- >= 0) && !list_empty(&lru_list)) {
- gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
+ gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru);
/* Test for being demotable */
if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 061d22e1ceb6..9e9c7a4b8c66 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -29,6 +29,8 @@
struct workqueue_struct *gfs2_freeze_wq;
+extern struct workqueue_struct *gfs2_control_wq;
+
static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
{
fs_err(gl->gl_name.ln_sbd,
@@ -39,7 +41,8 @@ static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
fs_err(gl->gl_name.ln_sbd, "AIL glock %u:%llu mapping %p\n",
gl->gl_name.ln_type, gl->gl_name.ln_number,
gfs2_glock2aspace(gl));
- gfs2_lm_withdraw(gl->gl_name.ln_sbd, "AIL error\n");
+ gfs2_lm(gl->gl_name.ln_sbd, "AIL error\n");
+ gfs2_withdraw(gl->gl_name.ln_sbd);
}
/**
@@ -79,34 +82,62 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
}
-static void gfs2_ail_empty_gl(struct gfs2_glock *gl)
+static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct gfs2_trans tr;
+ int ret;
memset(&tr, 0, sizeof(tr));
INIT_LIST_HEAD(&tr.tr_buf);
INIT_LIST_HEAD(&tr.tr_databuf);
tr.tr_revokes = atomic_read(&gl->gl_ail_count);
- if (!tr.tr_revokes)
- return;
+ if (!tr.tr_revokes) {
+ bool have_revokes;
+ bool log_in_flight;
+
+ /*
+ * We have nothing on the ail, but there could be revokes on
+ * the sdp revoke queue, in which case, we still want to flush
+ * the log and wait for it to finish.
+ *
+ * If the sdp revoke list is empty too, we might still have an
+ * io outstanding for writing revokes, so we should wait for
+ * it before returning.
+ *
+ * If none of these conditions are true, our revokes are all
+ * flushed and we can return.
+ */
+ gfs2_log_lock(sdp);
+ have_revokes = !list_empty(&sdp->sd_log_revokes);
+ log_in_flight = atomic_read(&sdp->sd_log_in_flight);
+ gfs2_log_unlock(sdp);
+ if (have_revokes)
+ goto flush;
+ if (log_in_flight)
+ log_flush_wait(sdp);
+ return 0;
+ }
/* A shortened, inline version of gfs2_trans_begin()
* tr->alloced is not set since the transaction structure is
* on the stack */
tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes);
tr.tr_ip = _RET_IP_;
- if (gfs2_log_reserve(sdp, tr.tr_reserved) < 0)
- return;
+ ret = gfs2_log_reserve(sdp, tr.tr_reserved);
+ if (ret < 0)
+ return ret;
WARN_ON_ONCE(current->journal_info);
current->journal_info = &tr;
__gfs2_ail_flush(gl, 0, tr.tr_revokes);
gfs2_trans_end(sdp);
+flush:
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_AIL_EMPTY_GL);
+ return 0;
}
void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
@@ -140,35 +171,32 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
* return to caller to demote/unlock the glock until I/O is complete.
*/
-static void rgrp_go_sync(struct gfs2_glock *gl)
+static int rgrp_go_sync(struct gfs2_glock *gl)
{
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
struct address_space *mapping = &sdp->sd_aspace;
- struct gfs2_rgrpd *rgd;
+ struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
int error;
- spin_lock(&gl->gl_lockref.lock);
- rgd = gl->gl_object;
- if (rgd)
- gfs2_rgrp_brelse(rgd);
- spin_unlock(&gl->gl_lockref.lock);
-
if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
- return;
+ return 0;
GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_RGRP_GO_SYNC);
filemap_fdatawrite_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
error = filemap_fdatawait_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
+ WARN_ON_ONCE(error);
mapping_set_error(mapping, error);
- gfs2_ail_empty_gl(gl);
+ if (!error)
+ error = gfs2_ail_empty_gl(gl);
spin_lock(&gl->gl_lockref.lock);
rgd = gl->gl_object;
if (rgd)
gfs2_free_clones(rgd);
spin_unlock(&gl->gl_lockref.lock);
+ return error;
}
/**
@@ -191,7 +219,6 @@ static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
gfs2_rgrp_brelse(rgd);
WARN_ON_ONCE(!(flags & DIO_METADATA));
- gfs2_assert_withdraw(sdp, !atomic_read(&gl->gl_ail_count));
truncate_inode_pages_range(mapping, gl->gl_vm.start, gl->gl_vm.end);
if (rgd)
@@ -236,12 +263,12 @@ static void gfs2_clear_glop_pending(struct gfs2_inode *ip)
*
*/
-static void inode_go_sync(struct gfs2_glock *gl)
+static int inode_go_sync(struct gfs2_glock *gl)
{
struct gfs2_inode *ip = gfs2_glock2inode(gl);
int isreg = ip && S_ISREG(ip->i_inode.i_mode);
struct address_space *metamapping = gfs2_glock2aspace(gl);
- int error;
+ int error = 0;
if (isreg) {
if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
@@ -274,6 +301,7 @@ static void inode_go_sync(struct gfs2_glock *gl)
out:
gfs2_clear_glop_pending(ip);
+ return error;
}
/**
@@ -291,8 +319,6 @@ static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
struct gfs2_inode *ip = gfs2_glock2inode(gl);
- gfs2_assert_withdraw(gl->gl_name.ln_sbd, !atomic_read(&gl->gl_ail_count));
-
if (flags & DIO_METADATA) {
struct address_space *mapping = gfs2_glock2aspace(gl);
truncate_inode_pages(mapping, 0);
@@ -496,24 +522,29 @@ static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
*
*/
-static void freeze_go_sync(struct gfs2_glock *gl)
+static int freeze_go_sync(struct gfs2_glock *gl)
{
int error = 0;
struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
- if (gl->gl_state == LM_ST_SHARED &&
+ if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) &&
test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
error = freeze_super(sdp->sd_vfs);
if (error) {
fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
error);
+ if (gfs2_withdrawn(sdp)) {
+ atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
+ return 0;
+ }
gfs2_assert_withdraw(sdp, 0);
}
queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
GFS2_LFC_FREEZE_GO_SYNC);
}
+ return 0;
}
/**
@@ -582,8 +613,76 @@ static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
}
}
+/**
+ * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it
+ * @gl: glock being freed
+ *
+ * For now, this is only used for the journal inode glock. In withdraw
+ * situations, we need to wait for the glock to be freed so that we know
+ * other nodes may proceed with recovery / journal replay.
+ */
+static void inode_go_free(struct gfs2_glock *gl)
+{
+ /* Note that we cannot reference gl_object because it's already set
+ * to NULL by this point in its lifecycle. */
+ if (!test_bit(GLF_FREEING, &gl->gl_flags))
+ return;
+ clear_bit_unlock(GLF_FREEING, &gl->gl_flags);
+ wake_up_bit(&gl->gl_flags, GLF_FREEING);
+}
+
+/**
+ * nondisk_go_callback - used to signal when a node did a withdraw
+ * @gl: the nondisk glock
+ * @remote: true if this came from a different cluster node
+ *
+ */
+static void nondisk_go_callback(struct gfs2_glock *gl, bool remote)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ /* Ignore the callback unless it's from another node, and it's the
+ live lock. */
+ if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK)
+ return;
+
+ /* First order of business is to cancel the demote request. We don't
+ * really want to demote a nondisk glock. At best it's just to inform
+ * us of another node's withdraw. We'll keep it in SH mode. */
+ clear_bit(GLF_DEMOTE, &gl->gl_flags);
+ clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
+
+ /* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */
+ if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) ||
+ test_bit(SDF_WITHDRAWN, &sdp->sd_flags) ||
+ test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags))
+ return;
+
+ /* We only care when a node wants us to unlock, because that means
+ * they want a journal recovered. */
+ if (gl->gl_demote_state != LM_ST_UNLOCKED)
+ return;
+
+ if (sdp->sd_args.ar_spectator) {
+ fs_warn(sdp, "Spectator node cannot recover journals.\n");
+ return;
+ }
+
+ fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n");
+ set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
+ /*
+ * We can't call remote_withdraw directly here or gfs2_recover_journal
+ * because this is called from the glock unlock function and the
+ * remote_withdraw needs to enqueue and dequeue the same "live" glock
+ * we were called from. So we queue it to the control work queue in
+ * lock_dlm.
+ */
+ queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
+}
+
const struct gfs2_glock_operations gfs2_meta_glops = {
.go_type = LM_TYPE_META,
+ .go_flags = GLOF_NONDISK,
};
const struct gfs2_glock_operations gfs2_inode_glops = {
@@ -594,13 +693,13 @@ const struct gfs2_glock_operations gfs2_inode_glops = {
.go_dump = inode_go_dump,
.go_type = LM_TYPE_INODE,
.go_flags = GLOF_ASPACE | GLOF_LRU,
+ .go_free = inode_go_free,
};
const struct gfs2_glock_operations gfs2_rgrp_glops = {
.go_sync = rgrp_go_sync,
.go_inval = rgrp_go_inval,
.go_lock = gfs2_rgrp_go_lock,
- .go_unlock = gfs2_rgrp_go_unlock,
.go_dump = gfs2_rgrp_dump,
.go_type = LM_TYPE_RGRP,
.go_flags = GLOF_LVB,
@@ -611,30 +710,34 @@ const struct gfs2_glock_operations gfs2_freeze_glops = {
.go_xmote_bh = freeze_go_xmote_bh,
.go_demote_ok = freeze_go_demote_ok,
.go_type = LM_TYPE_NONDISK,
+ .go_flags = GLOF_NONDISK,
};
const struct gfs2_glock_operations gfs2_iopen_glops = {
.go_type = LM_TYPE_IOPEN,
.go_callback = iopen_go_callback,
- .go_flags = GLOF_LRU,
+ .go_flags = GLOF_LRU | GLOF_NONDISK,
};
const struct gfs2_glock_operations gfs2_flock_glops = {
.go_type = LM_TYPE_FLOCK,
- .go_flags = GLOF_LRU,
+ .go_flags = GLOF_LRU | GLOF_NONDISK,
};
const struct gfs2_glock_operations gfs2_nondisk_glops = {
.go_type = LM_TYPE_NONDISK,
+ .go_flags = GLOF_NONDISK,
+ .go_callback = nondisk_go_callback,
};
const struct gfs2_glock_operations gfs2_quota_glops = {
.go_type = LM_TYPE_QUOTA,
- .go_flags = GLOF_LVB | GLOF_LRU,
+ .go_flags = GLOF_LVB | GLOF_LRU | GLOF_NONDISK,
};
const struct gfs2_glock_operations gfs2_journal_glops = {
.go_type = LM_TYPE_JOURNAL,
+ .go_flags = GLOF_NONDISK,
};
const struct gfs2_glock_operations *gfs2_glops_list[] = {
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 9fd88ed18807..84a824293a78 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -234,20 +234,21 @@ struct lm_lockname {
struct gfs2_glock_operations {
- void (*go_sync) (struct gfs2_glock *gl);
+ int (*go_sync) (struct gfs2_glock *gl);
int (*go_xmote_bh) (struct gfs2_glock *gl, struct gfs2_holder *gh);
void (*go_inval) (struct gfs2_glock *gl, int flags);
int (*go_demote_ok) (const struct gfs2_glock *gl);
int (*go_lock) (struct gfs2_holder *gh);
- void (*go_unlock) (struct gfs2_holder *gh);
void (*go_dump)(struct seq_file *seq, struct gfs2_glock *gl,
const char *fs_id_buf);
void (*go_callback)(struct gfs2_glock *gl, bool remote);
+ void (*go_free)(struct gfs2_glock *gl);
const int go_type;
const unsigned long go_flags;
-#define GLOF_ASPACE 1
-#define GLOF_LVB 2
-#define GLOF_LRU 4
+#define GLOF_ASPACE 1 /* address space attached */
+#define GLOF_LVB 2 /* Lock Value Block attached */
+#define GLOF_LRU 4 /* LRU managed */
+#define GLOF_NONDISK 8 /* not I/O related */
};
enum {
@@ -294,6 +295,7 @@ struct gfs2_qadata { /* quota allocation data */
struct gfs2_quota_data *qa_qd[2 * GFS2_MAXQUOTAS];
struct gfs2_holder qa_qd_ghs[2 * GFS2_MAXQUOTAS];
unsigned int qa_qd_num;
+ int qa_ref;
};
/* Resource group multi-block reservation, in order of appearance:
@@ -343,6 +345,7 @@ enum {
GLF_OBJECT = 14, /* Used only for tracing */
GLF_BLOCKING = 15,
GLF_INODE_CREATING = 16, /* Inode creation occurring */
+ GLF_FREEING = 18, /* Wait for glock to be freed */
};
struct gfs2_glock {
@@ -542,6 +545,7 @@ struct gfs2_jdesc {
struct list_head jd_revoke_list;
unsigned int jd_replay_tail;
+ u64 jd_no_addr;
};
struct gfs2_statfs_change_host {
@@ -616,8 +620,12 @@ enum {
SDF_RORECOVERY = 7, /* read only recovery */
SDF_SKIP_DLM_UNLOCK = 8,
SDF_FORCE_AIL_FLUSH = 9,
- SDF_AIL1_IO_ERROR = 10,
- SDF_FS_FROZEN = 11,
+ SDF_FS_FROZEN = 10,
+ SDF_WITHDRAWING = 11, /* Will withdraw eventually */
+ SDF_WITHDRAW_IN_PROG = 12, /* Withdraw is in progress */
+ SDF_REMOTE_WITHDRAW = 13, /* Performing remote recovery */
+ SDF_WITHDRAW_RECOVERY = 14, /* Wait for journal recovery when we are
+ withdrawing */
};
enum gfs2_freeze_state {
@@ -768,6 +776,7 @@ struct gfs2_sbd {
struct gfs2_jdesc *sd_jdesc;
struct gfs2_holder sd_journal_gh;
struct gfs2_holder sd_jinode_gh;
+ struct gfs2_glock *sd_jinode_gl;
struct gfs2_holder sd_sc_gh;
struct gfs2_holder sd_qc_gh;
@@ -828,7 +837,8 @@ struct gfs2_sbd {
atomic_t sd_log_in_flight;
struct bio *sd_log_bio;
wait_queue_head_t sd_log_flush_wait;
- int sd_log_error;
+ int sd_log_error; /* First log error */
+ wait_queue_head_t sd_withdraw_wait;
atomic_t sd_reserving_log;
wait_queue_head_t sd_reserving_log_wait;
@@ -852,6 +862,7 @@ struct gfs2_sbd {
unsigned long sd_last_warning;
struct dentry *debugfs_dir; /* debugfs directory */
+ unsigned long sd_glock_dqs_held;
};
static inline void gfs2_glstats_inc(struct gfs2_glock *gl, int which)
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 8294851a9dd9..70b2d3a1e866 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -144,7 +144,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
if (unlikely(error))
- goto fail_put;
+ goto fail;
if (type == DT_UNKNOWN || blktype != GFS2_BLKST_FREE) {
/*
@@ -155,13 +155,13 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE,
GL_SKIP, &i_gh);
if (error)
- goto fail_put;
+ goto fail;
if (blktype != GFS2_BLKST_FREE) {
error = gfs2_check_blk_type(sdp, no_addr,
blktype);
if (error)
- goto fail_put;
+ goto fail;
}
}
@@ -169,7 +169,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
set_bit(GIF_INVALID, &ip->i_flags);
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (unlikely(error))
- goto fail_put;
+ goto fail;
glock_set_object(ip->i_iopen_gh.gh_gl, ip);
gfs2_glock_put(io_gl);
io_gl = NULL;
@@ -182,7 +182,7 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
/* Inode glock must be locked already */
error = gfs2_inode_refresh(GFS2_I(inode));
if (error)
- goto fail_refresh;
+ goto fail;
} else {
ip->i_no_formal_ino = no_formal_ino;
inode->i_mode = DT2IF(type);
@@ -197,17 +197,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
gfs2_glock_dq_uninit(&i_gh);
return inode;
-fail_refresh:
- ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
- gfs2_glock_dq_uninit(&ip->i_iopen_gh);
-fail_put:
+fail:
if (io_gl)
gfs2_glock_put(io_gl);
- glock_clear_object(ip->i_gl, ip);
if (gfs2_holder_initialized(&i_gh))
gfs2_glock_dq_uninit(&i_gh);
-fail:
iget_failed(inode);
return ERR_PTR(error);
}
@@ -594,13 +588,13 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (!name->len || name->len > GFS2_FNAMESIZE)
return -ENAMETOOLONG;
- error = gfs2_rsqa_alloc(dip);
+ error = gfs2_qa_get(dip);
if (error)
return error;
error = gfs2_rindex_update(sdp);
if (error)
- return error;
+ goto fail;
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
if (error)
@@ -647,7 +641,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
goto fail_gunlock;
ip = GFS2_I(inode);
- error = gfs2_rsqa_alloc(ip);
+ error = gfs2_qa_get(ip);
if (error)
goto fail_free_acls;
@@ -782,11 +776,13 @@ fail_gunlock2:
clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
gfs2_glock_put(io_gl);
fail_free_inode:
+ gfs2_qa_put(ip);
if (ip->i_gl) {
glock_clear_object(ip->i_gl, ip);
gfs2_glock_put(ip->i_gl);
}
- gfs2_rsqa_delete(ip, NULL);
+ gfs2_rs_delete(ip, NULL);
+ gfs2_qa_put(ip);
fail_free_acls:
posix_acl_release(default_acl);
posix_acl_release(acl);
@@ -804,6 +800,7 @@ fail_gunlock:
if (gfs2_holder_initialized(ghs + 1))
gfs2_glock_dq_uninit(ghs + 1);
fail:
+ gfs2_qa_put(dip);
return error;
}
@@ -905,7 +902,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
if (S_ISDIR(inode->i_mode))
return -EPERM;
- error = gfs2_rsqa_alloc(dip);
+ error = gfs2_qa_get(dip);
if (error)
return error;
@@ -1008,6 +1005,7 @@ out_gunlock:
out_child:
gfs2_glock_dq(ghs);
out_parent:
+ gfs2_qa_put(ip);
gfs2_holder_uninit(ghs);
gfs2_holder_uninit(ghs + 1);
return error;
@@ -1368,7 +1366,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
if (error)
return error;
- error = gfs2_rsqa_alloc(ndip);
+ error = gfs2_qa_get(ndip);
if (error)
return error;
@@ -1568,6 +1566,7 @@ out_gunlock_r:
if (gfs2_holder_initialized(&r_gh))
gfs2_glock_dq_uninit(&r_gh);
out:
+ gfs2_qa_put(ndip);
return error;
}
@@ -1879,10 +1878,9 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
ouid = nuid = NO_UID_QUOTA_CHANGE;
if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
ogid = ngid = NO_GID_QUOTA_CHANGE;
-
- error = gfs2_rsqa_alloc(ip);
+ error = gfs2_qa_get(ip);
if (error)
- goto out;
+ return error;
error = gfs2_rindex_update(sdp);
if (error)
@@ -1920,6 +1918,7 @@ out_end_trans:
out_gunlock_q:
gfs2_quota_unlock(ip);
out:
+ gfs2_qa_put(ip);
return error;
}
@@ -1941,21 +1940,21 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
struct gfs2_holder i_gh;
int error;
- error = gfs2_rsqa_alloc(ip);
+ error = gfs2_qa_get(ip);
if (error)
return error;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
if (error)
- return error;
+ goto out;
error = -EPERM;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
- goto out;
+ goto error;
error = setattr_prepare(dentry, attr);
if (error)
- goto out;
+ goto error;
if (attr->ia_valid & ATTR_SIZE)
error = gfs2_setattr_size(inode, attr->ia_size);
@@ -1967,10 +1966,12 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
error = posix_acl_chmod(inode, inode->i_mode);
}
-out:
+error:
if (!error)
mark_inode_dirty(inode);
gfs2_glock_dq_uninit(&i_gh);
+out:
+ gfs2_qa_put(ip);
return error;
}
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 7c7197343ee2..9f2b5609f225 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -16,6 +16,8 @@
#include "incore.h"
#include "glock.h"
+#include "glops.h"
+#include "recovery.h"
#include "util.h"
#include "sys.h"
#include "trace_gfs2.h"
@@ -124,6 +126,8 @@ static void gdlm_ast(void *arg)
switch (gl->gl_lksb.sb_status) {
case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
+ if (gl->gl_ops->go_free)
+ gl->gl_ops->go_free(gl);
gfs2_glock_free(gl);
return;
case -DLM_ECANCEL: /* Cancel while getting lock */
@@ -323,6 +327,7 @@ static void gdlm_cancel(struct gfs2_glock *gl)
/*
* dlm/gfs2 recovery coordination using dlm_recover callbacks
*
+ * 0. gfs2 checks for another cluster node withdraw, needing journal replay
* 1. dlm_controld sees lockspace members change
* 2. dlm_controld blocks dlm-kernel locking activity
* 3. dlm_controld within dlm-kernel notifies gfs2 (recover_prep)
@@ -571,6 +576,28 @@ static int control_lock(struct gfs2_sbd *sdp, int mode, uint32_t flags)
&ls->ls_control_lksb, "control_lock");
}
+/**
+ * remote_withdraw - react to a node withdrawing from the file system
+ * @sdp: The superblock
+ */
+static void remote_withdraw(struct gfs2_sbd *sdp)
+{
+ struct gfs2_jdesc *jd;
+ int ret = 0, count = 0;
+
+ list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) {
+ if (jd->jd_jid == sdp->sd_lockstruct.ls_jid)
+ continue;
+ ret = gfs2_recover_journal(jd, true);
+ if (ret)
+ break;
+ count++;
+ }
+
+ /* Now drop the additional reference we acquired */
+ fs_err(sdp, "Journals checked: %d, ret = %d.\n", count, ret);
+}
+
static void gfs2_control_func(struct work_struct *work)
{
struct gfs2_sbd *sdp = container_of(work, struct gfs2_sbd, sd_control_work.work);
@@ -581,6 +608,13 @@ static void gfs2_control_func(struct work_struct *work)
int recover_size;
int i, error;
+ /* First check for other nodes that may have done a withdraw. */
+ if (test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags)) {
+ remote_withdraw(sdp);
+ clear_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
+ return;
+ }
+
spin_lock(&ls->ls_recover_spin);
/*
* No MOUNT_DONE means we're still mounting; control_mount()
@@ -1079,6 +1113,10 @@ static void gdlm_recover_prep(void *arg)
struct gfs2_sbd *sdp = arg;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ if (gfs2_withdrawn(sdp)) {
+ fs_err(sdp, "recover_prep ignored due to withdraw.\n");
+ return;
+ }
spin_lock(&ls->ls_recover_spin);
ls->ls_recover_block = ls->ls_recover_start;
set_bit(DFL_DLM_RECOVERY, &ls->ls_recover_flags);
@@ -1101,6 +1139,11 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot)
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int jid = slot->slot - 1;
+ if (gfs2_withdrawn(sdp)) {
+ fs_err(sdp, "recover_slot jid %d ignored due to withdraw.\n",
+ jid);
+ return;
+ }
spin_lock(&ls->ls_recover_spin);
if (ls->ls_recover_size < jid + 1) {
fs_err(sdp, "recover_slot jid %d gen %u short size %d\n",
@@ -1125,6 +1168,10 @@ static void gdlm_recover_done(void *arg, struct dlm_slot *slots, int num_slots,
struct gfs2_sbd *sdp = arg;
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ if (gfs2_withdrawn(sdp)) {
+ fs_err(sdp, "recover_done ignored due to withdraw.\n");
+ return;
+ }
/* ensure the ls jid arrays are large enough */
set_recover_size(sdp, slots, num_slots);
@@ -1152,6 +1199,11 @@ static void gdlm_recovery_result(struct gfs2_sbd *sdp, unsigned int jid,
{
struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ if (gfs2_withdrawn(sdp)) {
+ fs_err(sdp, "recovery_result jid %d ignored due to withdraw.\n",
+ jid);
+ return;
+ }
if (test_bit(DFL_NO_DLM_OPS, &ls->ls_recover_flags))
return;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 00a2e721a374..3a75843ae580 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -88,8 +88,7 @@ static void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
struct writeback_control *wbc,
- struct gfs2_trans *tr,
- bool *withdraw)
+ struct gfs2_trans *tr)
__releases(&sdp->sd_ail_lock)
__acquires(&sdp->sd_ail_lock)
{
@@ -97,6 +96,7 @@ __acquires(&sdp->sd_ail_lock)
struct address_space *mapping;
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
+ int ret = 0;
list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) {
bh = bd->bd_bh;
@@ -104,16 +104,21 @@ __acquires(&sdp->sd_ail_lock)
gfs2_assert(sdp, bd->bd_tr == tr);
if (!buffer_busy(bh)) {
- if (!buffer_uptodate(bh) &&
- !test_and_set_bit(SDF_AIL1_IO_ERROR,
- &sdp->sd_flags)) {
+ if (buffer_uptodate(bh)) {
+ list_move(&bd->bd_ail_st_list,
+ &tr->tr_ail2_list);
+ continue;
+ }
+ if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
gfs2_io_error_bh(sdp, bh);
- *withdraw = true;
+ gfs2_withdraw_delayed(sdp);
}
- list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
- continue;
}
+ if (gfs2_withdrawn(sdp)) {
+ gfs2_remove_from_ail(bd);
+ continue;
+ }
if (!buffer_dirty(bh))
continue;
if (gl == bd->bd_gl)
@@ -124,16 +129,50 @@ __acquires(&sdp->sd_ail_lock)
if (!mapping)
continue;
spin_unlock(&sdp->sd_ail_lock);
- generic_writepages(mapping, wbc);
+ ret = generic_writepages(mapping, wbc);
spin_lock(&sdp->sd_ail_lock);
- if (wbc->nr_to_write <= 0)
+ if (ret || wbc->nr_to_write <= 0)
break;
- return 1;
+ return -EBUSY;
}
- return 0;
+ return ret;
}
+static void dump_ail_list(struct gfs2_sbd *sdp)
+{
+ struct gfs2_trans *tr;
+ struct gfs2_bufdata *bd;
+ struct buffer_head *bh;
+
+ fs_err(sdp, "Error: In gfs2_ail1_flush for ten minutes! t=%d\n",
+ current->journal_info ? 1 : 0);
+
+ list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
+ list_for_each_entry_reverse(bd, &tr->tr_ail1_list,
+ bd_ail_st_list) {
+ bh = bd->bd_bh;
+ fs_err(sdp, "bd %p: blk:0x%llx bh=%p ", bd,
+ (unsigned long long)bd->bd_blkno, bh);
+ if (!bh) {
+ fs_err(sdp, "\n");
+ continue;
+ }
+ fs_err(sdp, "0x%llx up2:%d dirt:%d lkd:%d req:%d "
+ "map:%d new:%d ar:%d aw:%d delay:%d "
+ "io err:%d unwritten:%d dfr:%d pin:%d esc:%d\n",
+ (unsigned long long)bh->b_blocknr,
+ buffer_uptodate(bh), buffer_dirty(bh),
+ buffer_locked(bh), buffer_req(bh),
+ buffer_mapped(bh), buffer_new(bh),
+ buffer_async_read(bh), buffer_async_write(bh),
+ buffer_delay(bh), buffer_write_io_error(bh),
+ buffer_unwritten(bh),
+ buffer_defer_completion(bh),
+ buffer_pinned(bh), buffer_escaped(bh));
+ }
+ }
+}
/**
* gfs2_ail1_flush - start writeback of some ail1 entries
@@ -149,23 +188,36 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc)
struct list_head *head = &sdp->sd_ail1_list;
struct gfs2_trans *tr;
struct blk_plug plug;
- bool withdraw = false;
+ int ret;
+ unsigned long flush_start = jiffies;
trace_gfs2_ail_flush(sdp, wbc, 1);
blk_start_plug(&plug);
spin_lock(&sdp->sd_ail_lock);
restart:
+ ret = 0;
+ if (time_after(jiffies, flush_start + (HZ * 600))) {
+ dump_ail_list(sdp);
+ goto out;
+ }
list_for_each_entry_reverse(tr, head, tr_list) {
if (wbc->nr_to_write <= 0)
break;
- if (gfs2_ail1_start_one(sdp, wbc, tr, &withdraw) &&
- !gfs2_withdrawn(sdp))
- goto restart;
+ ret = gfs2_ail1_start_one(sdp, wbc, tr);
+ if (ret) {
+ if (ret == -EBUSY)
+ goto restart;
+ break;
+ }
}
+out:
spin_unlock(&sdp->sd_ail_lock);
blk_finish_plug(&plug);
- if (withdraw)
- gfs2_lm_withdraw(sdp, NULL);
+ if (ret) {
+ gfs2_lm(sdp, "gfs2_ail1_start_one (generic_writepages) "
+ "returned: %d\n", ret);
+ gfs2_withdraw(sdp);
+ }
trace_gfs2_ail_flush(sdp, wbc, 0);
}
@@ -189,12 +241,13 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
/**
* gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
* @sdp: the filesystem
- * @ai: the AIL entry
+ * @tr: the transaction
+ * @max_revokes: If nonzero, issue revokes for the bd items for written buffers
*
*/
static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
- bool *withdraw)
+ int *max_revokes)
{
struct gfs2_bufdata *bd, *s;
struct buffer_head *bh;
@@ -203,12 +256,32 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
bd_ail_st_list) {
bh = bd->bd_bh;
gfs2_assert(sdp, bd->bd_tr == tr);
- if (buffer_busy(bh))
+ /*
+ * If another process flagged an io error, e.g. writing to the
+ * journal, error all other bhs and move them off the ail1 to
+ * prevent a tight loop when unmount tries to flush ail1,
+ * regardless of whether they're still busy. If no outside
+ * errors were found and the buffer is busy, move to the next.
+ * If the ail buffer is not busy and caught an error, flag it
+ * for others.
+ */
+ if (!sdp->sd_log_error && buffer_busy(bh))
continue;
if (!buffer_uptodate(bh) &&
- !test_and_set_bit(SDF_AIL1_IO_ERROR, &sdp->sd_flags)) {
+ !cmpxchg(&sdp->sd_log_error, 0, -EIO)) {
gfs2_io_error_bh(sdp, bh);
- *withdraw = true;
+ gfs2_withdraw_delayed(sdp);
+ }
+ /*
+ * If we have space for revokes and the bd is no longer on any
+ * buf list, we can just add a revoke for it immediately and
+ * avoid having to put it on the ail2 list, where it would need
+ * to be revoked later.
+ */
+ if (*max_revokes && list_empty(&bd->bd_list)) {
+ gfs2_add_revoke(sdp, bd);
+ (*max_revokes)--;
+ continue;
}
list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);
}
@@ -217,20 +290,20 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
/**
* gfs2_ail1_empty - Try to empty the ail1 lists
* @sdp: The superblock
+ * @max_revokes: If non-zero, add revokes where appropriate
*
* Tries to empty the ail1 lists, starting with the oldest first
*/
-static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
+static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
{
struct gfs2_trans *tr, *s;
int oldest_tr = 1;
int ret;
- bool withdraw = false;
spin_lock(&sdp->sd_ail_lock);
list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) {
- gfs2_ail1_empty_one(sdp, tr, &withdraw);
+ gfs2_ail1_empty_one(sdp, tr, &max_revokes);
if (list_empty(&tr->tr_ail1_list) && oldest_tr)
list_move(&tr->tr_list, &sdp->sd_ail2_list);
else
@@ -239,8 +312,10 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp)
ret = list_empty(&sdp->sd_ail1_list);
spin_unlock(&sdp->sd_ail_lock);
- if (withdraw)
- gfs2_lm_withdraw(sdp, "fatal: I/O error(s)\n");
+ if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) {
+ gfs2_lm(sdp, "fatal: I/O error(s)\n");
+ gfs2_withdraw(sdp);
+ }
return ret;
}
@@ -268,20 +343,17 @@ static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
}
/**
- * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced
- * @sdp: the filesystem
- * @ai: the AIL entry
- *
+ * gfs2_ail_empty_tr - empty one of the ail lists for a transaction
*/
-static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
+static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+ struct list_head *head)
{
- struct list_head *head = &tr->tr_ail2_list;
struct gfs2_bufdata *bd;
while (!list_empty(head)) {
- bd = list_entry(head->prev, struct gfs2_bufdata,
- bd_ail_st_list);
+ bd = list_first_entry(head, struct gfs2_bufdata,
+ bd_ail_st_list);
gfs2_assert(sdp, bd->bd_tr == tr);
gfs2_remove_from_ail(bd);
}
@@ -303,7 +375,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
if (!rm)
continue;
- gfs2_ail2_empty_one(sdp, tr);
+ gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
list_del(&tr->tr_list);
gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
@@ -487,7 +559,7 @@ static unsigned int current_tail(struct gfs2_sbd *sdp)
if (list_empty(&sdp->sd_ail1_list)) {
tail = sdp->sd_log_head;
} else {
- tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans,
+ tr = list_last_entry(&sdp->sd_ail1_list, struct gfs2_trans,
tr_list);
tail = tr->tr_first;
}
@@ -512,7 +584,7 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
}
-static void log_flush_wait(struct gfs2_sbd *sdp)
+void log_flush_wait(struct gfs2_sbd *sdp)
{
DEFINE_WAIT(wait);
@@ -549,7 +621,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
spin_lock(&sdp->sd_ordered_lock);
list_sort(NULL, &sdp->sd_log_ordered, &ip_cmp);
while (!list_empty(&sdp->sd_log_ordered)) {
- ip = list_entry(sdp->sd_log_ordered.next, struct gfs2_inode, i_ordered);
+ ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered);
if (ip->i_inode.i_mapping->nrpages == 0) {
test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
list_del(&ip->i_ordered);
@@ -570,7 +642,7 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
spin_lock(&sdp->sd_ordered_lock);
while (!list_empty(&sdp->sd_log_ordered)) {
- ip = list_entry(sdp->sd_log_ordered.next, struct gfs2_inode, i_ordered);
+ ip = list_first_entry(&sdp->sd_log_ordered, struct gfs2_inode, i_ordered);
list_del(&ip->i_ordered);
WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags));
if (ip->i_inode.i_mapping->nrpages == 0)
@@ -616,27 +688,24 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
}
}
+/**
+ * gfs2_write_revokes - Add as many revokes to the system transaction as we can
+ * @sdp: The GFS2 superblock
+ *
+ * Our usual strategy is to defer writing revokes as much as we can in the hope
+ * that we'll eventually overwrite the journal, which will make those revokes
+ * go away. This changes when we flush the log: at that point, there will
+ * likely be some left-over space in the last revoke block of that transaction.
+ * We can fill that space with additional revokes for blocks that have already
+ * been written back. This will basically come at no cost now, and will save
+ * us from having to keep track of those blocks on the AIL2 list later.
+ */
void gfs2_write_revokes(struct gfs2_sbd *sdp)
{
- struct gfs2_trans *tr;
- struct gfs2_bufdata *bd, *tmp;
- int have_revokes = 0;
+ /* number of revokes we still have room for */
int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
- gfs2_ail1_empty(sdp);
- spin_lock(&sdp->sd_ail_lock);
- list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
- list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) {
- if (list_empty(&bd->bd_list)) {
- have_revokes = 1;
- goto done;
- }
- }
- }
-done:
- spin_unlock(&sdp->sd_ail_lock);
- if (have_revokes == 0)
- return;
+ gfs2_log_lock(sdp);
while (sdp->sd_log_num_revoke > max_revokes)
max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
max_revokes -= sdp->sd_log_num_revoke;
@@ -647,20 +716,7 @@ done:
if (!sdp->sd_log_blks_reserved)
atomic_dec(&sdp->sd_log_blks_free);
}
- gfs2_log_lock(sdp);
- spin_lock(&sdp->sd_ail_lock);
- list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) {
- list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) {
- if (max_revokes == 0)
- goto out_of_blocks;
- if (!list_empty(&bd->bd_list))
- continue;
- gfs2_add_revoke(sdp, bd);
- max_revokes--;
- }
- }
-out_of_blocks:
- spin_unlock(&sdp->sd_ail_lock);
+ gfs2_ail1_empty(sdp, max_revokes);
gfs2_log_unlock(sdp);
if (!sdp->sd_log_num_revoke) {
@@ -787,6 +843,40 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
}
/**
+ * ail_drain - drain the ail lists after a withdraw
+ * @sdp: Pointer to GFS2 superblock
+ */
+static void ail_drain(struct gfs2_sbd *sdp)
+{
+ struct gfs2_trans *tr;
+
+ spin_lock(&sdp->sd_ail_lock);
+ /*
+ * For transactions on the sd_ail1_list we need to drain both the
+ * ail1 and ail2 lists. That's because function gfs2_ail1_start_one
+ * (temporarily) moves items from its tr_ail1 list to tr_ail2 list
+ * before revokes are sent for that block. Items on the sd_ail2_list
+ * should have already gotten beyond that point, so no need.
+ */
+ while (!list_empty(&sdp->sd_ail1_list)) {
+ tr = list_first_entry(&sdp->sd_ail1_list, struct gfs2_trans,
+ tr_list);
+ gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list);
+ gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
+ list_del(&tr->tr_list);
+ kfree(tr);
+ }
+ while (!list_empty(&sdp->sd_ail2_list)) {
+ tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans,
+ tr_list);
+ gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
+ list_del(&tr->tr_list);
+ kfree(tr);
+ }
+ spin_unlock(&sdp->sd_ail_lock);
+}
+
+/**
* gfs2_log_flush - flush incore transaction(s)
* @sdp: the filesystem
* @gl: The glock structure to flush. If NULL, flush the whole incore log
@@ -796,11 +886,18 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
{
- struct gfs2_trans *tr;
+ struct gfs2_trans *tr = NULL;
enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
down_write(&sdp->sd_log_flush_lock);
+ /*
+ * Do this check while holding the log_flush_lock to prevent new
+ * buffers from being added to the ail via gfs2_pin()
+ */
+ if (gfs2_withdrawn(sdp))
+ goto out;
+
/* Log might have been flushed while we waited for the flush lock */
if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) {
up_write(&sdp->sd_log_flush_lock);
@@ -819,17 +916,27 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
INIT_LIST_HEAD(&tr->tr_ail2_list);
tr->tr_first = sdp->sd_log_flush_head;
if (unlikely (state == SFS_FROZEN))
- gfs2_assert_withdraw(sdp, !tr->tr_num_buf_new && !tr->tr_num_databuf_new);
+ if (gfs2_assert_withdraw_delayed(sdp,
+ !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
+ goto out;
}
if (unlikely(state == SFS_FROZEN))
- gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
- gfs2_assert_withdraw(sdp,
- sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke);
+ if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
+ goto out;
+ if (gfs2_assert_withdraw_delayed(sdp,
+ sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke))
+ goto out;
gfs2_ordered_write(sdp);
+ if (gfs2_withdrawn(sdp))
+ goto out;
lops_before_commit(sdp, tr);
+ if (gfs2_withdrawn(sdp))
+ goto out;
gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
+ if (gfs2_withdrawn(sdp))
+ goto out;
if (sdp->sd_log_head != sdp->sd_log_flush_head) {
log_flush_wait(sdp);
@@ -839,6 +946,8 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
trace_gfs2_log_blocks(sdp, -1);
log_write_header(sdp, flags);
}
+ if (gfs2_withdrawn(sdp))
+ goto out;
lops_after_commit(sdp, tr);
gfs2_log_lock(sdp);
@@ -859,9 +968,11 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
for (;;) {
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
- if (gfs2_ail1_empty(sdp))
+ if (gfs2_ail1_empty(sdp, 0))
break;
}
+ if (gfs2_withdrawn(sdp))
+ goto out;
atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
trace_gfs2_log_blocks(sdp, -1);
log_write_header(sdp, flags);
@@ -874,6 +985,12 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
}
+out:
+ if (gfs2_withdrawn(sdp)) {
+ ail_drain(sdp); /* frees all transactions */
+ tr = NULL;
+ }
+
trace_gfs2_log_flush(sdp, 0, flags);
up_write(&sdp->sd_log_flush_lock);
@@ -1016,16 +1133,17 @@ int gfs2_logd(void *data)
/* Check for errors writing to the journal */
if (sdp->sd_log_error) {
- gfs2_lm_withdraw(sdp,
- "GFS2: fsid=%s: error %d: "
- "withdrawing the file system to "
- "prevent further damage.\n",
- sdp->sd_fsname, sdp->sd_log_error);
+ gfs2_lm(sdp,
+ "GFS2: fsid=%s: error %d: "
+ "withdrawing the file system to "
+ "prevent further damage.\n",
+ sdp->sd_fsname, sdp->sd_log_error);
+ gfs2_withdraw(sdp);
}
did_flush = false;
if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
- gfs2_ail1_empty(sdp);
+ gfs2_ail1_empty(sdp, 0);
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_LOGD_JFLUSH_REQD);
did_flush = true;
@@ -1034,7 +1152,7 @@ int gfs2_logd(void *data)
if (gfs2_ail_flush_reqd(sdp)) {
gfs2_ail1_start(sdp);
gfs2_ail1_wait(sdp);
- gfs2_ail1_empty(sdp);
+ gfs2_ail1_empty(sdp, 0);
gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
GFS2_LFC_LOGD_AIL_FLUSH_REQD);
did_flush = true;
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index c0a65e5a126b..c1cd6ae17659 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -73,6 +73,7 @@ extern void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl,
u32 type);
extern void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *trans);
extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc);
+extern void log_flush_wait(struct gfs2_sbd *sdp);
extern int gfs2_logd(void *data);
extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c
index c090d5ad3f22..5ea96757afc4 100644
--- a/fs/gfs2/lops.c
+++ b/fs/gfs2/lops.c
@@ -203,8 +203,12 @@ static void gfs2_end_log_write(struct bio *bio)
struct bvec_iter_all iter_all;
if (bio->bi_status) {
- fs_err(sdp, "Error %d writing to journal, jid=%u\n",
- bio->bi_status, sdp->sd_jdesc->jd_jid);
+ if (!cmpxchg(&sdp->sd_log_error, 0, (int)bio->bi_status))
+ fs_err(sdp, "Error %d writing to journal, jid=%u\n",
+ bio->bi_status, sdp->sd_jdesc->jd_jid);
+ gfs2_withdraw_delayed(sdp);
+ /* prevent more writes to the journal */
+ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
wake_up(&sdp->sd_logd_waitq);
}
@@ -730,7 +734,7 @@ static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
head = &tr->tr_buf;
while (!list_empty(head)) {
- bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
+ bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
list_del_init(&bd->bd_list);
gfs2_unpin(sdp, bd->bd_bh, tr);
}
@@ -900,7 +904,7 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
struct gfs2_glock *gl;
while (!list_empty(head)) {
- bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
+ bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
list_del_init(&bd->bd_list);
gl = bd->bd_gl;
gfs2_glock_remove_revoke(gl);
@@ -1079,7 +1083,7 @@ static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
head = &tr->tr_databuf;
while (!list_empty(head)) {
- bd = list_entry(head->next, struct gfs2_bufdata, bd_list);
+ bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
list_del_init(&bd->bd_list);
gfs2_unpin(sdp, bd->bd_bh, tr);
}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 0c3772974030..4b72abcf83b2 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -251,7 +251,8 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags,
struct buffer_head *bh, *bhs[2];
int num = 0;
- if (unlikely(gfs2_withdrawn(sdp))) {
+ if (unlikely(gfs2_withdrawn(sdp)) &&
+ (!sdp->sd_jdesc || (blkno != sdp->sd_jdesc->jd_no_addr))) {
*bhp = NULL;
return -EIO;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index a1a8ef7ed3fd..e2b69ffcc6a8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -552,6 +552,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
mutex_lock(&sdp->sd_jindex_mutex);
for (;;) {
+ struct gfs2_inode *jip;
+
error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, ji_gh);
if (error)
break;
@@ -591,6 +593,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
spin_lock(&sdp->sd_jindex_spin);
jd->jd_jid = sdp->sd_journals++;
+ jip = GFS2_I(jd->jd_inode);
+ jd->jd_no_addr = jip->i_no_addr;
list_add_tail(&jd->jd_list, &sdp->sd_jindex_list);
spin_unlock(&sdp->sd_jindex_spin);
}
@@ -600,48 +604,6 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh)
return error;
}
-/**
- * check_journal_clean - Make sure a journal is clean for a spectator mount
- * @sdp: The GFS2 superblock
- * @jd: The journal descriptor
- *
- * Returns: 0 if the journal is clean or locked, else an error
- */
-static int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd)
-{
- int error;
- struct gfs2_holder j_gh;
- struct gfs2_log_header_host head;
- struct gfs2_inode *ip;
-
- ip = GFS2_I(jd->jd_inode);
- error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
- GL_EXACT | GL_NOCACHE, &j_gh);
- if (error) {
- fs_err(sdp, "Error locking journal for spectator mount.\n");
- return -EPERM;
- }
- error = gfs2_jdesc_check(jd);
- if (error) {
- fs_err(sdp, "Error checking journal for spectator mount.\n");
- goto out_unlock;
- }
- error = gfs2_find_jhead(jd, &head, false);
- if (error) {
- fs_err(sdp, "Error parsing journal for spectator mount.\n");
- goto out_unlock;
- }
- if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
- error = -EPERM;
- fs_err(sdp, "jid=%u: Journal is dirty, so the first mounter "
- "must not be a spectator.\n", jd->jd_jid);
- }
-
-out_unlock:
- gfs2_glock_dq_uninit(&j_gh);
- return error;
-}
-
static int init_journal(struct gfs2_sbd *sdp, int undo)
{
struct inode *master = d_inode(sdp->sd_master_dir);
@@ -694,7 +656,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
error = gfs2_glock_nq_num(sdp, sdp->sd_lockstruct.ls_jid,
&gfs2_journal_glops,
- LM_ST_EXCLUSIVE, LM_FLAG_NOEXP,
+ LM_ST_EXCLUSIVE,
+ LM_FLAG_NOEXP | GL_NOCACHE,
&sdp->sd_journal_gh);
if (error) {
fs_err(sdp, "can't acquire journal glock: %d\n", error);
@@ -702,6 +665,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
}
ip = GFS2_I(sdp->sd_jdesc->jd_inode);
+ sdp->sd_jinode_gl = ip->i_gl;
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED,
LM_FLAG_NOEXP | GL_EXACT | GL_NOCACHE,
&sdp->sd_jinode_gh);
@@ -732,7 +696,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
struct gfs2_jdesc *jd = gfs2_jdesc_find(sdp, x);
if (sdp->sd_args.ar_spectator) {
- error = check_journal_clean(sdp, jd);
+ error = check_journal_clean(sdp, jd, true);
if (error)
goto fail_jinode_gh;
continue;
@@ -762,10 +726,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
return 0;
fail_jinode_gh:
- if (!sdp->sd_args.ar_spectator)
+ /* A withdraw may have done dq/uninit so now we need to check it */
+ if (!sdp->sd_args.ar_spectator &&
+ gfs2_holder_initialized(&sdp->sd_jinode_gh))
gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
fail_journal_gh:
- if (!sdp->sd_args.ar_spectator)
+ if (!sdp->sd_args.ar_spectator &&
+ gfs2_holder_initialized(&sdp->sd_journal_gh))
gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
fail_jindex:
gfs2_jindex_free(sdp);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index e9f93045eb01..cc0c4b5800be 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -115,7 +115,7 @@ static void gfs2_qd_dispose(struct list_head *list)
struct gfs2_sbd *sdp;
while (!list_empty(list)) {
- qd = list_entry(list->next, struct gfs2_quota_data, qd_lru);
+ qd = list_first_entry(list, struct gfs2_quota_data, qd_lru);
sdp = qd->qd_gl->gl_name.ln_sbd;
list_del(&qd->qd_lru);
@@ -525,11 +525,11 @@ static void qdsb_put(struct gfs2_quota_data *qd)
}
/**
- * gfs2_qa_alloc - make sure we have a quota allocations data structure,
- * if necessary
+ * gfs2_qa_get - make sure we have a quota allocations data structure,
+ * if necessary
* @ip: the inode for this reservation
*/
-int gfs2_qa_alloc(struct gfs2_inode *ip)
+int gfs2_qa_get(struct gfs2_inode *ip)
{
int error = 0;
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
@@ -540,17 +540,21 @@ int gfs2_qa_alloc(struct gfs2_inode *ip)
down_write(&ip->i_rw_mutex);
if (ip->i_qadata == NULL) {
ip->i_qadata = kmem_cache_zalloc(gfs2_qadata_cachep, GFP_NOFS);
- if (!ip->i_qadata)
+ if (!ip->i_qadata) {
error = -ENOMEM;
+ goto out;
+ }
}
+ ip->i_qadata->qa_ref++;
+out:
up_write(&ip->i_rw_mutex);
return error;
}
-void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_qa_put(struct gfs2_inode *ip)
{
down_write(&ip->i_rw_mutex);
- if (ip->i_qadata && ((wcount == NULL) || (atomic_read(wcount) <= 1))) {
+ if (ip->i_qadata && --ip->i_qadata->qa_ref == 0) {
kmem_cache_free(gfs2_qadata_cachep, ip->i_qadata);
ip->i_qadata = NULL;
}
@@ -566,27 +570,27 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF)
return 0;
- if (ip->i_qadata == NULL) {
- error = gfs2_rsqa_alloc(ip);
- if (error)
- return error;
- }
+ error = gfs2_qa_get(ip);
+ if (error)
+ return error;
qd = ip->i_qadata->qa_qd;
if (gfs2_assert_warn(sdp, !ip->i_qadata->qa_qd_num) ||
- gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags)))
- return -EIO;
+ gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags))) {
+ error = -EIO;
+ goto out;
+ }
error = qdsb_get(sdp, make_kqid_uid(ip->i_inode.i_uid), qd);
if (error)
- goto out;
+ goto out_unhold;
ip->i_qadata->qa_qd_num++;
qd++;
error = qdsb_get(sdp, make_kqid_gid(ip->i_inode.i_gid), qd);
if (error)
- goto out;
+ goto out_unhold;
ip->i_qadata->qa_qd_num++;
qd++;
@@ -594,7 +598,7 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
!uid_eq(uid, ip->i_inode.i_uid)) {
error = qdsb_get(sdp, make_kqid_uid(uid), qd);
if (error)
- goto out;
+ goto out_unhold;
ip->i_qadata->qa_qd_num++;
qd++;
}
@@ -603,14 +607,15 @@ int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid)
!gid_eq(gid, ip->i_inode.i_gid)) {
error = qdsb_get(sdp, make_kqid_gid(gid), qd);
if (error)
- goto out;
+ goto out_unhold;
ip->i_qadata->qa_qd_num++;
qd++;
}
-out:
+out_unhold:
if (error)
gfs2_quota_unhold(ip);
+out:
return error;
}
@@ -621,6 +626,7 @@ void gfs2_quota_unhold(struct gfs2_inode *ip)
if (ip->i_qadata == NULL)
return;
+
gfs2_assert_warn(sdp, !test_bit(GIF_QD_LOCKED, &ip->i_flags));
for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
@@ -628,6 +634,7 @@ void gfs2_quota_unhold(struct gfs2_inode *ip)
ip->i_qadata->qa_qd[x] = NULL;
}
ip->i_qadata->qa_qd_num = 0;
+ gfs2_qa_put(ip);
}
static int sort_qd(const void *a, const void *b)
@@ -876,7 +883,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
unsigned int nalloc = 0, blocks;
int error;
- error = gfs2_rsqa_alloc(ip);
+ error = gfs2_qa_get(ip);
if (error)
return error;
@@ -884,8 +891,10 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
&data_blocks, &ind_blocks);
ghs = kmalloc_array(num_qd, sizeof(struct gfs2_holder), GFP_NOFS);
- if (!ghs)
- return -ENOMEM;
+ if (!ghs) {
+ error = -ENOMEM;
+ goto out;
+ }
sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL);
inode_lock(&ip->i_inode);
@@ -893,12 +902,12 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda)
error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE,
GL_NOCACHE, &ghs[qx]);
if (error)
- goto out;
+ goto out_dq;
}
error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh);
if (error)
- goto out;
+ goto out_dq;
for (x = 0; x < num_qd; x++) {
offset = qd2offset(qda[x]);
@@ -950,13 +959,15 @@ out_ipres:
gfs2_inplace_release(ip);
out_alloc:
gfs2_glock_dq_uninit(&i_gh);
-out:
+out_dq:
while (qx--)
gfs2_glock_dq_uninit(&ghs[qx]);
inode_unlock(&ip->i_inode);
kfree(ghs);
gfs2_log_flush(ip->i_gl->gl_name.ln_sbd, ip->i_gl,
GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_DO_SYNC);
+out:
+ gfs2_qa_put(ip);
return error;
}
@@ -1259,6 +1270,7 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change,
if (ip->i_diskflags & GFS2_DIF_SYSTEM)
return;
+ BUG_ON(ip->i_qadata->qa_ref <= 0);
for (x = 0; x < ip->i_qadata->qa_qd_num; x++) {
qd = ip->i_qadata->qa_qd[x];
@@ -1441,7 +1453,7 @@ void gfs2_quota_cleanup(struct gfs2_sbd *sdp)
spin_lock(&qd_lock);
while (!list_empty(head)) {
- qd = list_entry(head->prev, struct gfs2_quota_data, qd_list);
+ qd = list_last_entry(head, struct gfs2_quota_data, qd_list);
list_del(&qd->qd_list);
@@ -1476,8 +1488,8 @@ static void quotad_error(struct gfs2_sbd *sdp, const char *msg, int error)
if (error == 0 || error == -EROFS)
return;
if (!gfs2_withdrawn(sdp)) {
- fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
- sdp->sd_log_error = error;
+ if (!cmpxchg(&sdp->sd_log_error, 0, error))
+ fs_err(sdp, "gfs2_quotad: %s error %d\n", msg, error);
wake_up(&sdp->sd_logd_waitq);
}
}
@@ -1504,7 +1516,7 @@ static void quotad_check_trunc_list(struct gfs2_sbd *sdp)
ip = NULL;
spin_lock(&sdp->sd_trunc_lock);
if (!list_empty(&sdp->sd_trunc_list)) {
- ip = list_entry(sdp->sd_trunc_list.next,
+ ip = list_first_entry(&sdp->sd_trunc_list,
struct gfs2_inode, i_trunc_list);
list_del_init(&ip->i_trunc_list);
}
@@ -1541,6 +1553,8 @@ int gfs2_quotad(void *data)
while (!kthread_should_stop()) {
+ if (gfs2_withdrawn(sdp))
+ goto bypass;
/* Update the master statfs file */
if (sdp->sd_statfs_force_sync) {
int error = gfs2_statfs_sync(sdp->sd_vfs, 0);
@@ -1561,6 +1575,7 @@ int gfs2_quotad(void *data)
try_to_freeze();
+bypass:
t = min(quotad_timeo, statfs_timeo);
prepare_to_wait(&sdp->sd_quota_wait, &wait, TASK_INTERRUPTIBLE);
@@ -1674,7 +1689,7 @@ static int gfs2_set_dqblk(struct super_block *sb, struct kqid qid,
if (error)
return error;
- error = gfs2_rsqa_alloc(ip);
+ error = gfs2_qa_get(ip);
if (error)
goto out_put;
@@ -1743,6 +1758,7 @@ out_i:
out_q:
gfs2_glock_dq_uninit(&q_gh);
out_unlockput:
+ gfs2_qa_put(ip);
inode_unlock(&ip->i_inode);
out_put:
qd_put(qd);
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h
index 765627d9a91e..7f9ca8ef40fc 100644
--- a/fs/gfs2/quota.h
+++ b/fs/gfs2/quota.h
@@ -15,8 +15,8 @@ struct gfs2_sbd;
#define NO_UID_QUOTA_CHANGE INVALID_UID
#define NO_GID_QUOTA_CHANGE INVALID_GID
-extern int gfs2_qa_alloc(struct gfs2_inode *ip);
-extern void gfs2_qa_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern int gfs2_qa_get(struct gfs2_inode *ip);
+extern void gfs2_qa_put(struct gfs2_inode *ip);
extern int gfs2_quota_hold(struct gfs2_inode *ip, kuid_t uid, kgid_t gid);
extern void gfs2_quota_unhold(struct gfs2_inode *ip);
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c
index 85f830e56945..96c345f49273 100644
--- a/fs/gfs2/recovery.c
+++ b/fs/gfs2/recovery.c
@@ -111,7 +111,7 @@ void gfs2_revoke_clean(struct gfs2_jdesc *jd)
struct gfs2_revoke_replay *rr;
while (!list_empty(head)) {
- rr = list_entry(head->next, struct gfs2_revoke_replay, rr_list);
+ rr = list_first_entry(head, struct gfs2_revoke_replay, rr_list);
list_del(&rr->rr_list);
kfree(rr);
}
@@ -305,6 +305,11 @@ void gfs2_recover_func(struct work_struct *work)
int error = 0;
int jlocked = 0;
+ if (gfs2_withdrawn(sdp)) {
+ fs_err(sdp, "jid=%u: Recovery not attempted due to withdraw.\n",
+ jd->jd_jid);
+ goto fail;
+ }
t_start = ktime_get();
if (sdp->sd_args.ar_spectator)
goto fail;
@@ -393,6 +398,10 @@ void gfs2_recover_func(struct work_struct *work)
fs_info(sdp, "jid=%u: Replaying journal...0x%x to 0x%x\n",
jd->jd_jid, head.lh_tail, head.lh_blkno);
+ /* We take the sd_log_flush_lock here primarily to prevent log
+ * flushes and simultaneous journal replays from stomping on
+ * each other wrt sd_log_bio. */
+ down_read(&sdp->sd_log_flush_lock);
for (pass = 0; pass < 2; pass++) {
lops_before_scan(jd, &head, pass);
error = foreach_descriptor(jd, head.lh_tail,
@@ -403,6 +412,7 @@ void gfs2_recover_func(struct work_struct *work)
}
clean_journal(jd, &head);
+ up_read(&sdp->sd_log_flush_lock);
gfs2_glock_dq_uninit(&thaw_gh);
t_rep = ktime_get();
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index e7bf91ec231c..a321c34e3d6e 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -457,24 +457,24 @@ void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd)
}
if (count[0] != rgd->rd_free) {
- if (gfs2_consist_rgrpd(rgd))
- fs_err(sdp, "free data mismatch: %u != %u\n",
- count[0], rgd->rd_free);
+ gfs2_lm(sdp, "free data mismatch: %u != %u\n",
+ count[0], rgd->rd_free);
+ gfs2_consist_rgrpd(rgd);
return;
}
tmp = rgd->rd_data - rgd->rd_free - rgd->rd_dinodes;
if (count[1] != tmp) {
- if (gfs2_consist_rgrpd(rgd))
- fs_err(sdp, "used data mismatch: %u != %u\n",
- count[1], tmp);
+ gfs2_lm(sdp, "used data mismatch: %u != %u\n",
+ count[1], tmp);
+ gfs2_consist_rgrpd(rgd);
return;
}
if (count[2] + count[3] != rgd->rd_dinodes) {
- if (gfs2_consist_rgrpd(rgd))
- fs_err(sdp, "used metadata mismatch: %u != %u\n",
- count[2] + count[3], rgd->rd_dinodes);
+ gfs2_lm(sdp, "used metadata mismatch: %u != %u\n",
+ count[2] + count[3], rgd->rd_dinodes);
+ gfs2_consist_rgrpd(rgd);
return;
}
}
@@ -590,16 +590,6 @@ void gfs2_free_clones(struct gfs2_rgrpd *rgd)
}
}
-/**
- * gfs2_rsqa_alloc - make sure we have a reservation assigned to the inode
- * plus a quota allocations data structure, if necessary
- * @ip: the inode for this reservation
- */
-int gfs2_rsqa_alloc(struct gfs2_inode *ip)
-{
- return gfs2_qa_alloc(ip);
-}
-
static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
const char *fs_id_buf)
{
@@ -672,18 +662,17 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
}
/**
- * gfs2_rsqa_delete - delete a multi-block reservation and quota allocation
+ * gfs2_rs_delete - delete a multi-block reservation
* @ip: The inode for this reservation
* @wcount: The inode's write count, or NULL
*
*/
-void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount)
+void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount)
{
down_write(&ip->i_rw_mutex);
if ((wcount == NULL) || (atomic_read(wcount) <= 1))
gfs2_rs_deltree(&ip->i_res);
up_write(&ip->i_rw_mutex);
- gfs2_qa_delete(ip, wcount);
}
/**
@@ -720,8 +709,12 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
rb_erase(n, &sdp->sd_rindex_tree);
if (gl) {
- glock_clear_object(gl, rgd);
+ if (gl->gl_state != LM_ST_UNLOCKED) {
+ gfs2_glock_cb(gl, LM_ST_UNLOCKED);
+ flush_delayed_work(&gl->gl_work);
+ }
gfs2_rgrp_brelse(rgd);
+ glock_clear_object(gl, rgd);
gfs2_glock_put(gl);
}
@@ -733,17 +726,6 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
}
}
-static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)
-{
- struct gfs2_sbd *sdp = rgd->rd_sbd;
-
- fs_info(sdp, "ri_addr = %llu\n", (unsigned long long)rgd->rd_addr);
- fs_info(sdp, "ri_length = %u\n", rgd->rd_length);
- fs_info(sdp, "ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0);
- fs_info(sdp, "ri_data = %u\n", rgd->rd_data);
- fs_info(sdp, "ri_bitbytes = %u\n", rgd->rd_bitbytes);
-}
-
/**
* gfs2_compute_bitstructs - Compute the bitmap sizes
* @rgd: The resource group descriptor
@@ -814,11 +796,20 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)
}
bi = rgd->rd_bits + (length - 1);
if ((bi->bi_start + bi->bi_bytes) * GFS2_NBBY != rgd->rd_data) {
- if (gfs2_consist_rgrpd(rgd)) {
- gfs2_rindex_print(rgd);
- fs_err(sdp, "start=%u len=%u offset=%u\n",
- bi->bi_start, bi->bi_bytes, bi->bi_offset);
- }
+ gfs2_lm(sdp,
+ "ri_addr = %llu\n"
+ "ri_length = %u\n"
+ "ri_data0 = %llu\n"
+ "ri_data = %u\n"
+ "ri_bitbytes = %u\n"
+ "start=%u len=%u offset=%u\n",
+ (unsigned long long)rgd->rd_addr,
+ rgd->rd_length,
+ (unsigned long long)rgd->rd_data0,
+ rgd->rd_data,
+ rgd->rd_bitbytes,
+ bi->bi_start, bi->bi_bytes, bi->bi_offset);
+ gfs2_consist_rgrpd(rgd);
return -EIO;
}
@@ -1286,23 +1277,6 @@ void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd)
bi->bi_bh = NULL;
}
}
-
-}
-
-/**
- * gfs2_rgrp_go_unlock - Unlock a rgrp glock
- * @gh: The glock holder for the resource group
- *
- */
-
-void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)
-{
- struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;
- int demote_requested = test_bit(GLF_DEMOTE, &gh->gh_gl->gl_flags) |
- test_bit(GLF_PENDING_DEMOTE, &gh->gh_gl->gl_flags);
-
- if (rgd && demote_requested)
- gfs2_rgrp_brelse(rgd);
}
int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset,
@@ -1832,10 +1806,8 @@ static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip
struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 };
while (1) {
- down_write(&sdp->sd_log_flush_lock);
error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL,
true);
- up_write(&sdp->sd_log_flush_lock);
if (error == -ENOSPC)
break;
if (WARN_ON_ONCE(error))
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h
index c14a673ae36f..a1d7e14fc55b 100644
--- a/fs/gfs2/rgrp.h
+++ b/fs/gfs2/rgrp.h
@@ -33,7 +33,6 @@ extern int gfs2_rindex_update(struct gfs2_sbd *sdp);
extern void gfs2_free_clones(struct gfs2_rgrpd *rgd);
extern int gfs2_rgrp_go_lock(struct gfs2_holder *gh);
extern void gfs2_rgrp_brelse(struct gfs2_rgrpd *rgd);
-extern void gfs2_rgrp_go_unlock(struct gfs2_holder *gh);
extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip);
@@ -45,9 +44,8 @@ extern void gfs2_inplace_release(struct gfs2_inode *ip);
extern int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *n,
bool dinode, u64 *generation);
-extern int gfs2_rsqa_alloc(struct gfs2_inode *ip);
extern void gfs2_rs_deltree(struct gfs2_blkreserv *rs);
-extern void gfs2_rsqa_delete(struct gfs2_inode *ip, atomic_t *wcount);
+extern void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount);
extern void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
u64 bstart, u32 blen, int meta);
extern void gfs2_free_meta(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 68cc7c291a81..37fc41632aa2 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -61,11 +61,13 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp)
sdp->sd_journals = 0;
spin_unlock(&sdp->sd_jindex_spin);
+ sdp->sd_jdesc = NULL;
while (!list_empty(&list)) {
- jd = list_entry(list.next, struct gfs2_jdesc, jd_list);
+ jd = list_first_entry(&list, struct gfs2_jdesc, jd_list);
gfs2_free_journal_extents(jd);
list_del(&jd->jd_list);
iput(jd->jd_inode);
+ jd->jd_inode = NULL;
kfree(jd);
}
}
@@ -171,9 +173,13 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
goto fail_threads;
j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
+ if (gfs2_withdrawn(sdp)) {
+ error = -EIO;
+ goto fail;
+ }
error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
- if (error)
+ if (error || gfs2_withdrawn(sdp))
goto fail;
if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
@@ -187,7 +193,7 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
gfs2_log_pointers_init(sdp, head.lh_blkno);
error = gfs2_quota_init(sdp);
- if (error)
+ if (error || gfs2_withdrawn(sdp))
goto fail;
set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
@@ -446,7 +452,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
out:
while (!list_empty(&list)) {
- lfcc = list_entry(list.next, struct lfcc, list);
+ lfcc = list_first_entry(&list, struct lfcc, list);
list_del(&lfcc->list);
gfs2_glock_dq_uninit(&lfcc->gh);
kfree(lfcc);
@@ -599,34 +605,63 @@ out:
int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
{
struct gfs2_holder freeze_gh;
- int error;
-
- error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, GL_NOCACHE,
- &freeze_gh);
- if (error && !gfs2_withdrawn(sdp))
- return error;
+ int error = 0;
+ int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
+ gfs2_holder_mark_uninitialized(&freeze_gh);
+ if (sdp->sd_freeze_gl &&
+ !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
+ if (!log_write_allowed) {
+ error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
+ LM_ST_SHARED, GL_NOCACHE |
+ LM_FLAG_TRY, &freeze_gh);
+ if (error == GLR_TRYFAILED)
+ error = 0;
+ } else {
+ error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
+ LM_ST_SHARED, GL_NOCACHE,
+ &freeze_gh);
+ if (error && !gfs2_withdrawn(sdp))
+ return error;
+ }
+ }
flush_workqueue(gfs2_delete_workqueue);
- if (sdp->sd_quotad_process)
+ if (!log_write_allowed && current == sdp->sd_quotad_process)
+ fs_warn(sdp, "The quotad daemon is withdrawing.\n");
+ else if (sdp->sd_quotad_process)
kthread_stop(sdp->sd_quotad_process);
sdp->sd_quotad_process = NULL;
- if (sdp->sd_logd_process)
+
+ if (!log_write_allowed && current == sdp->sd_logd_process)
+ fs_warn(sdp, "The logd daemon is withdrawing.\n");
+ else if (sdp->sd_logd_process)
kthread_stop(sdp->sd_logd_process);
sdp->sd_logd_process = NULL;
- gfs2_quota_sync(sdp->sd_vfs, 0);
- gfs2_statfs_sync(sdp->sd_vfs, 0);
-
- gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
- GFS2_LFC_MAKE_FS_RO);
- wait_event(sdp->sd_reserving_log_wait, atomic_read(&sdp->sd_reserving_log) == 0);
- gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);
+ if (log_write_allowed) {
+ gfs2_quota_sync(sdp->sd_vfs, 0);
+ gfs2_statfs_sync(sdp->sd_vfs, 0);
+ gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
+ GFS2_LFC_MAKE_FS_RO);
+ wait_event(sdp->sd_reserving_log_wait,
+ atomic_read(&sdp->sd_reserving_log) == 0);
+ gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) ==
+ sdp->sd_jdesc->jd_blocks);
+ } else {
+ wait_event_timeout(sdp->sd_reserving_log_wait,
+ atomic_read(&sdp->sd_reserving_log) == 0,
+ HZ * 5);
+ }
if (gfs2_holder_initialized(&freeze_gh))
gfs2_glock_dq_uninit(&freeze_gh);
gfs2_quota_cleanup(sdp);
+ if (!log_write_allowed)
+ sdp->sd_vfs->s_flags |= SB_RDONLY;
+
return error;
}
@@ -677,8 +712,10 @@ restart:
gfs2_glock_put(sdp->sd_freeze_gl);
if (!sdp->sd_args.ar_spectator) {
- gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
- gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
+ if (gfs2_holder_initialized(&sdp->sd_journal_gh))
+ gfs2_glock_dq_uninit(&sdp->sd_journal_gh);
+ if (gfs2_holder_initialized(&sdp->sd_jinode_gh))
+ gfs2_glock_dq_uninit(&sdp->sd_jinode_gh);
gfs2_glock_dq_uninit(&sdp->sd_sc_gh);
gfs2_glock_dq_uninit(&sdp->sd_qc_gh);
iput(sdp->sd_sc_inode);
@@ -1356,14 +1393,6 @@ out_unlock:
if (gfs2_rs_active(&ip->i_res))
gfs2_rs_deltree(&ip->i_res);
- if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
- glock_clear_object(ip->i_iopen_gh.gh_gl, ip);
- if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
- ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq(&ip->i_iopen_gh);
- }
- gfs2_holder_uninit(&ip->i_iopen_gh);
- }
if (gfs2_holder_initialized(&gh)) {
glock_clear_object(ip->i_gl, ip);
gfs2_glock_dq_uninit(&gh);
@@ -1372,22 +1401,30 @@ out_unlock:
fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
out:
truncate_inode_pages_final(&inode->i_data);
- gfs2_rsqa_delete(ip, NULL);
+ if (ip->i_qadata)
+ gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0);
+ gfs2_rs_delete(ip, NULL);
+ gfs2_qa_put(ip);
gfs2_ordered_del_inode(ip);
clear_inode(inode);
gfs2_dir_hash_inval(ip);
- glock_clear_object(ip->i_gl, ip);
- wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
- gfs2_glock_add_to_lru(ip->i_gl);
- gfs2_glock_put_eventually(ip->i_gl);
- ip->i_gl = NULL;
+ if (ip->i_gl) {
+ glock_clear_object(ip->i_gl, ip);
+ wait_on_bit_io(&ip->i_flags, GIF_GLOP_PENDING, TASK_UNINTERRUPTIBLE);
+ gfs2_glock_add_to_lru(ip->i_gl);
+ gfs2_glock_put_eventually(ip->i_gl);
+ ip->i_gl = NULL;
+ }
if (gfs2_holder_initialized(&ip->i_iopen_gh)) {
struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
glock_clear_object(gl, ip);
- ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+ if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
+ ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+ gfs2_glock_dq(&ip->i_iopen_gh);
+ }
gfs2_glock_hold(gl);
- gfs2_glock_dq_uninit(&ip->i_iopen_gh);
+ gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_put_eventually(gl);
}
}
@@ -1401,6 +1438,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
return NULL;
ip->i_flags = 0;
ip->i_gl = NULL;
+ gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
memset(&ip->i_res, 0, sizeof(ip->i_res));
RB_CLEAR_NODE(&ip->i_res.rs_node);
ip->i_rahead = 0;
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h
index b8bf811a1305..51900554ed81 100644
--- a/fs/gfs2/super.h
+++ b/fs/gfs2/super.h
@@ -26,7 +26,6 @@ extern void gfs2_jindex_free(struct gfs2_sbd *sdp);
extern struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid);
extern int gfs2_jdesc_check(struct gfs2_jdesc *jd);
-
extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
struct gfs2_inode **ipp);
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c
index 8ccb68f4ed16..d28c41bd69b0 100644
--- a/fs/gfs2/sys.c
+++ b/fs/gfs2/sys.c
@@ -136,7 +136,8 @@ static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len)
if (val != 1)
return -EINVAL;
- gfs2_lm_withdraw(sdp, "withdrawing from cluster at user's request\n");
+ gfs2_lm(sdp, "withdrawing from cluster at user's request\n");
+ gfs2_withdraw(sdp);
return len;
}
@@ -434,6 +435,8 @@ int gfs2_recover_set(struct gfs2_sbd *sdp, unsigned jid)
* never clear the DFL_BLOCK_LOCKS flag, so all our locks would
* permanently stop working.
*/
+ if (!sdp->sd_jdesc)
+ goto out;
if (sdp->sd_jdesc->jd_jid == jid && !sdp->sd_args.ar_spectator)
goto out;
rv = -ENOENT;
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index a685637a5b55..ffe840505082 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -228,6 +228,10 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh)
fs_info(sdp, "GFS2:adding buf while frozen\n");
gfs2_assert_withdraw(sdp, 0);
}
+ if (unlikely(gfs2_withdrawn(sdp))) {
+ fs_info(sdp, "GFS2:adding buf while withdrawn! 0x%llx\n",
+ (unsigned long long)bd->bd_bh->b_blocknr);
+ }
gfs2_pin(sdp, bd->bd_bh);
mh->__pad0 = cpu_to_be64(0);
mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index ec600b487498..9b64d40ab379 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -11,12 +11,18 @@
#include <linux/buffer_head.h>
#include <linux/crc32.h>
#include <linux/gfs2_ondisk.h>
+#include <linux/delay.h>
#include <linux/uaccess.h>
#include "gfs2.h"
#include "incore.h"
#include "glock.h"
+#include "glops.h"
+#include "log.h"
+#include "lops.h"
+#include "recovery.h"
#include "rgrp.h"
+#include "super.h"
#include "util.h"
struct kmem_cache *gfs2_glock_cachep __read_mostly;
@@ -33,32 +39,257 @@ void gfs2_assert_i(struct gfs2_sbd *sdp)
fs_emerg(sdp, "fatal assertion failed\n");
}
-int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
+/**
+ * check_journal_clean - Make sure a journal is clean for a spectator mount
+ * @sdp: The GFS2 superblock
+ * @jd: The journal descriptor
+ *
+ * Returns: 0 if the journal is clean or locked, else an error
+ */
+int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+ bool verbose)
+{
+ int error;
+ struct gfs2_holder j_gh;
+ struct gfs2_log_header_host head;
+ struct gfs2_inode *ip;
+
+ ip = GFS2_I(jd->jd_inode);
+ error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_NOEXP |
+ GL_EXACT | GL_NOCACHE, &j_gh);
+ if (error) {
+ if (verbose)
+ fs_err(sdp, "Error %d locking journal for spectator "
+ "mount.\n", error);
+ return -EPERM;
+ }
+ error = gfs2_jdesc_check(jd);
+ if (error) {
+ if (verbose)
+ fs_err(sdp, "Error checking journal for spectator "
+ "mount.\n");
+ goto out_unlock;
+ }
+ error = gfs2_find_jhead(jd, &head, false);
+ if (error) {
+ if (verbose)
+ fs_err(sdp, "Error parsing journal for spectator "
+ "mount.\n");
+ goto out_unlock;
+ }
+ if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT)) {
+ error = -EPERM;
+ if (verbose)
+ fs_err(sdp, "jid=%u: Journal is dirty, so the first "
+ "mounter must not be a spectator.\n",
+ jd->jd_jid);
+ }
+
+out_unlock:
+ gfs2_glock_dq_uninit(&j_gh);
+ return error;
+}
+
+static void signal_our_withdraw(struct gfs2_sbd *sdp)
+{
+ struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl;
+ struct inode *inode = sdp->sd_jdesc->jd_inode;
+ struct gfs2_inode *ip = GFS2_I(inode);
+ u64 no_formal_ino = ip->i_no_formal_ino;
+ int ret = 0;
+ int tries;
+
+ if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
+ return;
+
+ /* Prevent any glock dq until withdraw recovery is complete */
+ set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+ /*
+ * Don't tell dlm we're bailing until we have no more buffers in the
+ * wind. If journal had an IO error, the log code should just purge
+ * the outstanding buffers rather than submitting new IO. Making the
+ * file system read-only will flush the journal, etc.
+ *
+ * During a normal unmount, gfs2_make_fs_ro calls gfs2_log_shutdown
+ * which clears SDF_JOURNAL_LIVE. In a withdraw, we must not write
+ * any UNMOUNT log header, so we can't call gfs2_log_shutdown, and
+ * therefore we need to clear SDF_JOURNAL_LIVE manually.
+ */
+ clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+ if (!sb_rdonly(sdp->sd_vfs))
+ ret = gfs2_make_fs_ro(sdp);
+
+ /*
+ * Drop the glock for our journal so another node can recover it.
+ */
+ if (gfs2_holder_initialized(&sdp->sd_journal_gh)) {
+ gfs2_glock_dq_wait(&sdp->sd_journal_gh);
+ gfs2_holder_uninit(&sdp->sd_journal_gh);
+ }
+ sdp->sd_jinode_gh.gh_flags |= GL_NOCACHE;
+ gfs2_glock_dq(&sdp->sd_jinode_gh);
+ if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
+ /* Make sure gfs2_unfreeze works if partially-frozen */
+ flush_workqueue(gfs2_freeze_wq);
+ atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
+ thaw_super(sdp->sd_vfs);
+ } else {
+ wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
+ }
+
+ /*
+ * holder_uninit to force glock_put, to force dlm to let go
+ */
+ gfs2_holder_uninit(&sdp->sd_jinode_gh);
+
+ /*
+ * Note: We need to be careful here:
+ * Our iput of jd_inode will evict it. The evict will dequeue its
+ * glock, but the glock dq will wait for the withdraw unless we have
+ * exception code in glock_dq.
+ */
+ iput(inode);
+ /*
+ * Wait until the journal inode's glock is freed. This allows try locks
+ * on other nodes to be successful, otherwise we remain the owner of
+ * the glock as far as dlm is concerned.
+ */
+ if (gl->gl_ops->go_free) {
+ set_bit(GLF_FREEING, &gl->gl_flags);
+ wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
+ }
+
+ if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
+ clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+ goto skip_recovery;
+ }
+ /*
+ * Dequeue the "live" glock, but keep a reference so it's never freed.
+ */
+ gfs2_glock_hold(gl);
+ gfs2_glock_dq_wait(&sdp->sd_live_gh);
+ /*
+ * We enqueue the "live" glock in EX so that all other nodes
+ * get a demote request and act on it. We don't really want the
+ * lock in EX, so we send a "try" lock with 1CB to produce a callback.
+ */
+ fs_warn(sdp, "Requesting recovery of jid %d.\n",
+ sdp->sd_lockstruct.ls_jid);
+ gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | LM_FLAG_NOEXP,
+ &sdp->sd_live_gh);
+ msleep(GL_GLOCK_MAX_HOLD);
+ /*
+ * This will likely fail in a cluster, but succeed standalone:
+ */
+ ret = gfs2_glock_nq(&sdp->sd_live_gh);
+
+ /*
+ * If we actually got the "live" lock in EX mode, there are no other
+ * nodes available to replay our journal. So we try to replay it
+ * ourselves. We hold the "live" glock to prevent other mounters
+ * during recovery, then just dequeue it and reacquire it in our
+ * normal SH mode. Just in case the problem that caused us to
+ * withdraw prevents us from recovering our journal (e.g. io errors
+ * and such) we still check if the journal is clean before proceeding
+ * but we may wait forever until another mounter does the recovery.
+ */
+ if (ret == 0) {
+ fs_warn(sdp, "No other mounters found. Trying to recover our "
+ "own journal jid %d.\n", sdp->sd_lockstruct.ls_jid);
+ if (gfs2_recover_journal(sdp->sd_jdesc, 1))
+ fs_warn(sdp, "Unable to recover our journal jid %d.\n",
+ sdp->sd_lockstruct.ls_jid);
+ gfs2_glock_dq_wait(&sdp->sd_live_gh);
+ gfs2_holder_reinit(LM_ST_SHARED, LM_FLAG_NOEXP | GL_EXACT,
+ &sdp->sd_live_gh);
+ gfs2_glock_nq(&sdp->sd_live_gh);
+ }
+
+ gfs2_glock_queue_put(gl); /* drop the extra reference we acquired */
+ clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
+
+ /*
+ * At this point our journal is evicted, so we need to get a new inode
+ * for it. Once done, we need to call gfs2_find_jhead which
+ * calls gfs2_map_journal_extents to map it for us again.
+ *
+ * Note that we don't really want it to look up a FREE block. The
+ * GFS2_BLKST_FREE simply overrides a block check in gfs2_inode_lookup
+ * which would otherwise fail because it requires grabbing an rgrp
+ * glock, which would fail with -EIO because we're withdrawing.
+ */
+ inode = gfs2_inode_lookup(sdp->sd_vfs, DT_UNKNOWN,
+ sdp->sd_jdesc->jd_no_addr, no_formal_ino,
+ GFS2_BLKST_FREE);
+ if (IS_ERR(inode)) {
+ fs_warn(sdp, "Reprocessing of jid %d failed with %ld.\n",
+ sdp->sd_lockstruct.ls_jid, PTR_ERR(inode));
+ goto skip_recovery;
+ }
+ sdp->sd_jdesc->jd_inode = inode;
+
+ /*
+ * Now wait until recovery is complete.
+ */
+ for (tries = 0; tries < 10; tries++) {
+ ret = check_journal_clean(sdp, sdp->sd_jdesc, false);
+ if (!ret)
+ break;
+ msleep(HZ);
+ fs_warn(sdp, "Waiting for journal recovery jid %d.\n",
+ sdp->sd_lockstruct.ls_jid);
+ }
+skip_recovery:
+ if (!ret)
+ fs_warn(sdp, "Journal recovery complete for jid %d.\n",
+ sdp->sd_lockstruct.ls_jid);
+ else
+ fs_warn(sdp, "Journal recovery skipped for %d until next "
+ "mount.\n", sdp->sd_lockstruct.ls_jid);
+ fs_warn(sdp, "Glock dequeues delayed: %lu\n", sdp->sd_glock_dqs_held);
+ sdp->sd_glock_dqs_held = 0;
+ wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY);
+}
+
+void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...)
{
- struct lm_lockstruct *ls = &sdp->sd_lockstruct;
- const struct lm_lockops *lm = ls->ls_ops;
- va_list args;
struct va_format vaf;
+ va_list args;
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
- test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags))
- return 0;
-
- if (fmt) {
- va_start(args, fmt);
+ test_bit(SDF_WITHDRAWN, &sdp->sd_flags))
+ return;
+
+ va_start(args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &args;
+ fs_err(sdp, "%pV", &vaf);
+ va_end(args);
+}
- vaf.fmt = fmt;
- vaf.va = &args;
+int gfs2_withdraw(struct gfs2_sbd *sdp)
+{
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+ const struct lm_lockops *lm = ls->ls_ops;
- fs_err(sdp, "%pV", &vaf);
+ if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW &&
+ test_and_set_bit(SDF_WITHDRAWN, &sdp->sd_flags)) {
+ if (!test_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags))
+ return -1;
- va_end(args);
+ wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG,
+ TASK_UNINTERRUPTIBLE);
+ return -1;
}
+ set_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
+
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) {
fs_err(sdp, "about to withdraw this file system\n");
BUG_ON(sdp->sd_args.ar_debug);
+ signal_our_withdraw(sdp);
+
kobject_uevent(&sdp->sd_kobj, KOBJ_OFFLINE);
if (!strcmp(sdp->sd_lockstruct.ls_ops->lm_proto_name, "lock_dlm"))
@@ -69,8 +300,11 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
lm->lm_unmount(sdp);
}
set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
- fs_err(sdp, "withdrawn\n");
+ fs_err(sdp, "File system withdrawn\n");
dump_stack();
+ clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&sdp->sd_flags, SDF_WITHDRAW_IN_PROG);
}
if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
@@ -81,35 +315,45 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...)
/**
* gfs2_assert_withdraw_i - Cause the machine to withdraw if @assertion is false
- * Returns: -1 if this call withdrew the machine,
- * -2 if it was already withdrawn
*/
-int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
- const char *function, char *file, unsigned int line)
+void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
+ const char *function, char *file, unsigned int line,
+ bool delayed)
{
- int me;
- me = gfs2_lm_withdraw(sdp,
- "fatal: assertion \"%s\" failed\n"
- " function = %s, file = %s, line = %u\n",
- assertion, function, file, line);
+ if (gfs2_withdrawn(sdp))
+ return;
+
+ fs_err(sdp,
+ "fatal: assertion \"%s\" failed\n"
+ " function = %s, file = %s, line = %u\n",
+ assertion, function, file, line);
+
+ /*
+ * If errors=panic was specified on mount, it won't help to delay the
+ * withdraw.
+ */
+ if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC)
+ delayed = false;
+
+ if (delayed)
+ gfs2_withdraw_delayed(sdp);
+ else
+ gfs2_withdraw(sdp);
dump_stack();
- return (me) ? -1 : -2;
}
/**
* gfs2_assert_warn_i - Print a message to the console if @assertion is false
- * Returns: -1 if we printed something
- * -2 if we didn't
*/
-int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
- const char *function, char *file, unsigned int line)
+void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
+ const char *function, char *file, unsigned int line)
{
if (time_before(jiffies,
sdp->sd_last_warning +
gfs2_tune_get(sdp, gt_complain_secs) * HZ))
- return -2;
+ return;
if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW)
fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n",
@@ -127,69 +371,59 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
sdp->sd_fsname, function, file, line);
sdp->sd_last_warning = jiffies;
-
- return -1;
}
/**
* gfs2_consist_i - Flag a filesystem consistency error and withdraw
- * Returns: -1 if this call withdrew the machine,
- * 0 if it was already withdrawn
*/
-int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function,
- char *file, unsigned int line)
+void gfs2_consist_i(struct gfs2_sbd *sdp, const char *function,
+ char *file, unsigned int line)
{
- int rv;
- rv = gfs2_lm_withdraw(sdp,
- "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
- function, file, line);
- return rv;
+ gfs2_lm(sdp,
+ "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n",
+ function, file, line);
+ gfs2_withdraw(sdp);
}
/**
* gfs2_consist_inode_i - Flag an inode consistency error and withdraw
- * Returns: -1 if this call withdrew the machine,
- * 0 if it was already withdrawn
*/
-int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
- const char *function, char *file, unsigned int line)
+void gfs2_consist_inode_i(struct gfs2_inode *ip,
+ const char *function, char *file, unsigned int line)
{
struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
- int rv;
- rv = gfs2_lm_withdraw(sdp,
- "fatal: filesystem consistency error\n"
- " inode = %llu %llu\n"
- " function = %s, file = %s, line = %u\n",
- (unsigned long long)ip->i_no_formal_ino,
- (unsigned long long)ip->i_no_addr,
- function, file, line);
- return rv;
+
+ gfs2_lm(sdp,
+ "fatal: filesystem consistency error\n"
+ " inode = %llu %llu\n"
+ " function = %s, file = %s, line = %u\n",
+ (unsigned long long)ip->i_no_formal_ino,
+ (unsigned long long)ip->i_no_addr,
+ function, file, line);
+ gfs2_withdraw(sdp);
}
/**
* gfs2_consist_rgrpd_i - Flag a RG consistency error and withdraw
- * Returns: -1 if this call withdrew the machine,
- * 0 if it was already withdrawn
*/
-int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
- const char *function, char *file, unsigned int line)
+void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
+ const char *function, char *file, unsigned int line)
{
struct gfs2_sbd *sdp = rgd->rd_sbd;
char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
- int rv;
sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
gfs2_rgrp_dump(NULL, rgd->rd_gl, fs_id_buf);
- rv = gfs2_lm_withdraw(sdp,
- "fatal: filesystem consistency error\n"
- " RG = %llu\n"
- " function = %s, file = %s, line = %u\n",
- (unsigned long long)rgd->rd_addr,
- function, file, line);
- return rv;
+ gfs2_lm(sdp,
+ "fatal: filesystem consistency error\n"
+ " RG = %llu\n"
+ " function = %s, file = %s, line = %u\n",
+ (unsigned long long)rgd->rd_addr,
+ function, file, line);
+ gfs2_withdraw(sdp);
}
/**
@@ -203,12 +437,14 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
unsigned int line)
{
int me;
- me = gfs2_lm_withdraw(sdp,
- "fatal: invalid metadata block\n"
- " bh = %llu (%s)\n"
- " function = %s, file = %s, line = %u\n",
- (unsigned long long)bh->b_blocknr, type,
- function, file, line);
+
+ gfs2_lm(sdp,
+ "fatal: invalid metadata block\n"
+ " bh = %llu (%s)\n"
+ " function = %s, file = %s, line = %u\n",
+ (unsigned long long)bh->b_blocknr, type,
+ function, file, line);
+ me = gfs2_withdraw(sdp);
return (me) ? -1 : -2;
}
@@ -223,12 +459,14 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
char *file, unsigned int line)
{
int me;
- me = gfs2_lm_withdraw(sdp,
- "fatal: invalid metadata block\n"
- " bh = %llu (type: exp=%u, found=%u)\n"
- " function = %s, file = %s, line = %u\n",
- (unsigned long long)bh->b_blocknr, type, t,
- function, file, line);
+
+ gfs2_lm(sdp,
+ "fatal: invalid metadata block\n"
+ " bh = %llu (type: exp=%u, found=%u)\n"
+ " function = %s, file = %s, line = %u\n",
+ (unsigned long long)bh->b_blocknr, type, t,
+ function, file, line);
+ me = gfs2_withdraw(sdp);
return (me) ? -1 : -2;
}
@@ -241,12 +479,11 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file,
unsigned int line)
{
- int rv;
- rv = gfs2_lm_withdraw(sdp,
- "fatal: I/O error\n"
- " function = %s, file = %s, line = %u\n",
- function, file, line);
- return rv;
+ gfs2_lm(sdp,
+ "fatal: I/O error\n"
+ " function = %s, file = %s, line = %u\n",
+ function, file, line);
+ return gfs2_withdraw(sdp);
}
/**
@@ -258,14 +495,14 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
const char *function, char *file, unsigned int line,
bool withdraw)
{
- if (!gfs2_withdrawn(sdp))
- fs_err(sdp,
- "fatal: I/O error\n"
- " block = %llu\n"
- " function = %s, file = %s, line = %u\n",
- (unsigned long long)bh->b_blocknr,
- function, file, line);
+ if (gfs2_withdrawn(sdp))
+ return;
+
+ fs_err(sdp, "fatal: I/O error\n"
+ " block = %llu\n"
+ " function = %s, file = %s, line = %u\n",
+ (unsigned long long)bh->b_blocknr, function, file, line);
if (withdraw)
- gfs2_lm_withdraw(sdp, NULL);
+ gfs2_withdraw(sdp);
}
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index f2702bc9837c..a3542560da6f 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -36,41 +36,59 @@ do { \
} while (0)
-int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
- const char *function, char *file, unsigned int line);
+void gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion,
+ const char *function, char *file, unsigned int line,
+ bool delayed);
#define gfs2_assert_withdraw(sdp, assertion) \
-((likely(assertion)) ? 0 : gfs2_assert_withdraw_i((sdp), #assertion, \
- __func__, __FILE__, __LINE__))
-
-
-int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
- const char *function, char *file, unsigned int line);
+ ({ \
+ bool _bool = (assertion); \
+ if (unlikely(!_bool)) \
+ gfs2_assert_withdraw_i((sdp), #assertion, \
+ __func__, __FILE__, __LINE__, false); \
+ !_bool; \
+ })
+
+#define gfs2_assert_withdraw_delayed(sdp, assertion) \
+ ({ \
+ bool _bool = (assertion); \
+ if (unlikely(!_bool)) \
+ gfs2_assert_withdraw_i((sdp), #assertion, \
+ __func__, __FILE__, __LINE__, true); \
+ !_bool; \
+ })
+
+void gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion,
+ const char *function, char *file, unsigned int line);
#define gfs2_assert_warn(sdp, assertion) \
-((likely(assertion)) ? 0 : gfs2_assert_warn_i((sdp), #assertion, \
- __func__, __FILE__, __LINE__))
-
+ ({ \
+ bool _bool = (assertion); \
+ if (unlikely(!_bool)) \
+ gfs2_assert_warn_i((sdp), #assertion, \
+ __func__, __FILE__, __LINE__); \
+ !_bool; \
+ })
-int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide,
- const char *function, char *file, unsigned int line);
+void gfs2_consist_i(struct gfs2_sbd *sdp,
+ const char *function, char *file, unsigned int line);
#define gfs2_consist(sdp) \
-gfs2_consist_i((sdp), 0, __func__, __FILE__, __LINE__)
+gfs2_consist_i((sdp), __func__, __FILE__, __LINE__)
-int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide,
- const char *function, char *file, unsigned int line);
+void gfs2_consist_inode_i(struct gfs2_inode *ip,
+ const char *function, char *file, unsigned int line);
#define gfs2_consist_inode(ip) \
-gfs2_consist_inode_i((ip), 0, __func__, __FILE__, __LINE__)
+gfs2_consist_inode_i((ip), __func__, __FILE__, __LINE__)
-int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide,
- const char *function, char *file, unsigned int line);
+void gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd,
+ const char *function, char *file, unsigned int line);
#define gfs2_consist_rgrpd(rgd) \
-gfs2_consist_rgrpd_i((rgd), 0, __func__, __FILE__, __LINE__)
+gfs2_consist_rgrpd_i((rgd), __func__, __FILE__, __LINE__)
int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -129,6 +147,9 @@ static inline void gfs2_metatype_set(struct buffer_head *bh, u16 type,
int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
char *file, unsigned int line);
+extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+ bool verbose);
+
#define gfs2_io_error(sdp) \
gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
@@ -165,18 +186,29 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt,
}
/**
+ * gfs2_withdraw_delayed - withdraw as soon as possible without deadlocks
+ * @sdp: the superblock
+ */
+static inline void gfs2_withdraw_delayed(struct gfs2_sbd *sdp)
+{
+ set_bit(SDF_WITHDRAWING, &sdp->sd_flags);
+}
+
+/**
* gfs2_withdrawn - test whether the file system is withdrawing or withdrawn
* @sdp: the superblock
*/
static inline bool gfs2_withdrawn(struct gfs2_sbd *sdp)
{
- return test_bit(SDF_WITHDRAWN, &sdp->sd_flags);
+ return test_bit(SDF_WITHDRAWN, &sdp->sd_flags) ||
+ test_bit(SDF_WITHDRAWING, &sdp->sd_flags);
}
#define gfs2_tune_get(sdp, field) \
gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field)
__printf(2, 3)
-int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...);
+void gfs2_lm(struct gfs2_sbd *sdp, const char *fmt, ...);
+int gfs2_withdraw(struct gfs2_sbd *sdp);
#endif /* __UTIL_DOT_H__ */
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index bbe593d16bea..9d7667bc4292 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1222,7 +1222,7 @@ static int gfs2_xattr_set(const struct xattr_handler *handler,
struct gfs2_holder gh;
int ret;
- ret = gfs2_rsqa_alloc(ip);
+ ret = gfs2_qa_get(ip);
if (ret)
return ret;
@@ -1231,15 +1231,19 @@ static int gfs2_xattr_set(const struct xattr_handler *handler,
if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
if (ret)
- return ret;
+ goto out;
} else {
- if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE))
- return -EIO;
+ if (WARN_ON_ONCE(ip->i_gl->gl_state != LM_ST_EXCLUSIVE)) {
+ ret = -EIO;
+ goto out;
+ }
gfs2_holder_mark_uninitialized(&gh);
}
ret = __gfs2_xattr_set(inode, name, value, size, flags, handler->flags);
if (gfs2_holder_initialized(&gh))
gfs2_glock_dq_uninit(&gh);
+out:
+ gfs2_qa_put(ip);
return ret;
}