aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_trans_ail.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_trans_ail.c')
-rw-r--r--fs/xfs/xfs_trans_ail.c257
1 files changed, 166 insertions, 91 deletions
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 00cc5b8734be..f51df7d94ef7 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -17,6 +17,7 @@
#include "xfs_errortag.h"
#include "xfs_error.h"
#include "xfs_log.h"
+#include "xfs_log_priv.h"
#ifdef DEBUG
/*
@@ -32,6 +33,7 @@ STATIC void
xfs_ail_check(
struct xfs_ail *ailp,
struct xfs_log_item *lip)
+ __must_hold(&ailp->ail_lock)
{
struct xfs_log_item *prev_lip;
struct xfs_log_item *next_lip;
@@ -108,17 +110,25 @@ xfs_ail_next(
* We need the AIL lock in order to get a coherent read of the lsn of the last
* item in the AIL.
*/
+static xfs_lsn_t
+__xfs_ail_min_lsn(
+ struct xfs_ail *ailp)
+{
+ struct xfs_log_item *lip = xfs_ail_min(ailp);
+
+ if (lip)
+ return lip->li_lsn;
+ return 0;
+}
+
xfs_lsn_t
xfs_ail_min_lsn(
struct xfs_ail *ailp)
{
- xfs_lsn_t lsn = 0;
- struct xfs_log_item *lip;
+ xfs_lsn_t lsn;
spin_lock(&ailp->ail_lock);
- lip = xfs_ail_min(ailp);
- if (lip)
- lsn = lip->li_lsn;
+ lsn = __xfs_ail_min_lsn(ailp);
spin_unlock(&ailp->ail_lock);
return lsn;
@@ -336,6 +346,49 @@ xfs_ail_delete(
xfs_trans_ail_cursor_clear(ailp, lip);
}
+/*
+ * Requeue a failed buffer for writeback.
+ *
+ * We clear the log item failed state here as well, but we have to be careful
+ * about reference counts because the only active reference counts on the buffer
+ * may be the failed log items. Hence if we clear the log item failed state
+ * before queuing the buffer for IO we can release all active references to
+ * the buffer and free it, leading to use after free problems in
+ * xfs_buf_delwri_queue. It makes no difference to the buffer or log items which
+ * order we process them in - the buffer is locked, and we own the buffer list
+ * so nothing on them is going to change while we are performing this action.
+ *
+ * Hence we can safely queue the buffer for IO before we clear the failed log
+ * item state, therefore always having an active reference to the buffer and
+ * avoiding the transient zero-reference state that leads to use-after-free.
+ */
+static inline int
+xfsaild_resubmit_item(
+ struct xfs_log_item *lip,
+ struct list_head *buffer_list)
+{
+ struct xfs_buf *bp = lip->li_buf;
+
+ if (!xfs_buf_trylock(bp))
+ return XFS_ITEM_LOCKED;
+
+ if (!xfs_buf_delwri_queue(bp, buffer_list)) {
+ xfs_buf_unlock(bp);
+ return XFS_ITEM_FLUSHING;
+ }
+
+ /* protected by ail_lock */
+ list_for_each_entry(lip, &bp->b_li_list, li_bio_list) {
+ if (bp->b_flags & _XBF_INODES)
+ clear_bit(XFS_LI_FAILED, &lip->li_flags);
+ else
+ xfs_clear_li_failed(lip);
+ }
+
+ xfs_buf_unlock(bp);
+ return XFS_ITEM_SUCCESS;
+}
+
static inline uint
xfsaild_push_item(
struct xfs_ail *ailp,
@@ -345,7 +398,7 @@ xfsaild_push_item(
* If log item pinning is enabled, skip the push and track the item as
* pinned. This can help induce head-behind-tail conditions.
*/
- if (XFS_TEST_ERROR(false, ailp->ail_mount, XFS_ERRTAG_LOG_ITEM_PIN))
+ if (XFS_TEST_ERROR(false, ailp->ail_log->l_mp, XFS_ERRTAG_LOG_ITEM_PIN))
return XFS_ITEM_PINNED;
/*
@@ -356,6 +409,8 @@ xfsaild_push_item(
*/
if (!lip->li_ops->iop_push)
return XFS_ITEM_PINNED;
+ if (test_bit(XFS_LI_FAILED, &lip->li_flags))
+ return xfsaild_resubmit_item(lip, &ailp->ail_buf_list);
return lip->li_ops->iop_push(lip, &ailp->ail_buf_list);
}
@@ -363,7 +418,7 @@ static long
xfsaild_push(
struct xfs_ail *ailp)
{
- xfs_mount_t *mp = ailp->ail_mount;
+ struct xfs_mount *mp = ailp->ail_log->l_mp;
struct xfs_ail_cursor cur;
struct xfs_log_item *lip;
xfs_lsn_t lsn;
@@ -375,8 +430,12 @@ xfsaild_push(
/*
* If we encountered pinned items or did not finish writing out all
- * buffers the last time we ran, force the log first and wait for it
- * before pushing again.
+ * buffers the last time we ran, force a background CIL push to get the
+ * items unpinned in the near future. We do not wait on the CIL push as
+ * that could stall us for seconds if there is enough background IO
+ * load. Stalling for that long when the tail of the log is pinned and
+ * needs flushing will hard stop the transaction subsystem when log
+ * space runs out.
*/
if (ailp->ail_log_flush && ailp->ail_last_pushed_lsn == 0 &&
(!list_empty_careful(&ailp->ail_buf_list) ||
@@ -384,26 +443,32 @@ xfsaild_push(
ailp->ail_log_flush = 0;
XFS_STATS_INC(mp, xs_push_ail_flush);
- xfs_log_force(mp, XFS_LOG_SYNC);
+ xlog_cil_flush(ailp->ail_log);
}
spin_lock(&ailp->ail_lock);
- /* barrier matches the ail_target update in xfs_ail_push() */
- smp_rmb();
- target = ailp->ail_target;
- ailp->ail_target_prev = target;
+ /*
+ * If we have a sync push waiter, we always have to push till the AIL is
+ * empty. Update the target to point to the end of the AIL so that
+ * capture updates that occur after the sync push waiter has gone to
+ * sleep.
+ */
+ if (waitqueue_active(&ailp->ail_empty)) {
+ lip = xfs_ail_max(ailp);
+ if (lip)
+ target = lip->li_lsn;
+ } else {
+ /* barrier matches the ail_target update in xfs_ail_push() */
+ smp_rmb();
+ target = ailp->ail_target;
+ ailp->ail_target_prev = target;
+ }
+ /* we're done if the AIL is empty or our push has reached the end */
lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->ail_last_pushed_lsn);
- if (!lip) {
- /*
- * If the AIL is empty or our push has reached the end we are
- * done now.
- */
- xfs_trans_ail_cursor_done(&cur);
- spin_unlock(&ailp->ail_lock);
+ if (!lip)
goto out_done;
- }
XFS_STATS_INC(mp, xs_push_ail);
@@ -432,7 +497,7 @@ xfsaild_push(
* inode buffer is locked because we already pushed the
* updates to it as part of inode clustering.
*
- * We do not want to to stop flushing just because lots
+ * We do not want to stop flushing just because lots
* of items are already being flushed, but we need to
* re-try the flushing relatively soon if most of the
* AIL is being flushed.
@@ -467,7 +532,7 @@ xfsaild_push(
/*
* Are there too many items we can't do anything with?
*
- * If we we are skipping too many items because we can't flush
+ * If we are skipping too many items because we can't flush
* them or they are already being flushed, we back off and
* given them time to complete whatever operation is being
* done. i.e. remove pressure from the AIL while we can't make
@@ -485,6 +550,8 @@ xfsaild_push(
break;
lsn = lip->li_lsn;
}
+
+out_done:
xfs_trans_ail_cursor_done(&cur);
spin_unlock(&ailp->ail_lock);
@@ -492,7 +559,6 @@ xfsaild_push(
ailp->ail_log_flush++;
if (!count || XFS_LSN_CMP(lsn, target) >= 0) {
-out_done:
/*
* We reached the target or the AIL is empty, so wait a bit
* longer for I/O to complete and remove pushed items from the
@@ -529,15 +595,16 @@ xfsaild(
{
struct xfs_ail *ailp = data;
long tout = 0; /* milliseconds */
+ unsigned int noreclaim_flag;
- current->flags |= PF_MEMALLOC;
+ noreclaim_flag = memalloc_noreclaim_save();
set_freezable();
while (1) {
if (tout && tout <= 20)
- set_current_state(TASK_KILLABLE);
+ set_current_state(TASK_KILLABLE|TASK_FREEZABLE);
else
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE);
/*
* Check kthread_should_stop() after we set the task state to
@@ -565,7 +632,7 @@ xfsaild(
* opportunity to release such buffers from the queue.
*/
ASSERT(list_empty(&ailp->ail_buf_list) ||
- XFS_FORCED_SHUTDOWN(ailp->ail_mount));
+ xlog_is_shutdown(ailp->ail_log));
xfs_buf_delwri_cancel(&ailp->ail_buf_list);
break;
}
@@ -583,16 +650,17 @@ xfsaild(
*/
smp_rmb();
if (!xfs_ail_min(ailp) &&
- ailp->ail_target == ailp->ail_target_prev) {
+ ailp->ail_target == ailp->ail_target_prev &&
+ list_empty(&ailp->ail_buf_list)) {
spin_unlock(&ailp->ail_lock);
- freezable_schedule();
+ schedule();
tout = 0;
continue;
}
spin_unlock(&ailp->ail_lock);
if (tout)
- freezable_schedule_timeout(msecs_to_jiffies(tout));
+ schedule_timeout(msecs_to_jiffies(tout));
__set_current_state(TASK_RUNNING);
@@ -601,6 +669,7 @@ xfsaild(
tout = xfsaild_push(ailp);
}
+ memalloc_noreclaim_restore(noreclaim_flag);
return 0;
}
@@ -626,7 +695,7 @@ xfs_ail_push(
struct xfs_log_item *lip;
lip = xfs_ail_min(ailp);
- if (!lip || XFS_FORCED_SHUTDOWN(ailp->ail_mount) ||
+ if (!lip || xlog_is_shutdown(ailp->ail_log) ||
XFS_LSN_CMP(threshold_lsn, ailp->ail_target) <= 0)
return;
@@ -661,13 +730,11 @@ void
xfs_ail_push_all_sync(
struct xfs_ail *ailp)
{
- struct xfs_log_item *lip;
DEFINE_WAIT(wait);
spin_lock(&ailp->ail_lock);
- while ((lip = xfs_ail_max(ailp)) != NULL) {
+ while (xfs_ail_max(ailp) != NULL) {
prepare_to_wait(&ailp->ail_empty, &wait, TASK_UNINTERRUPTIBLE);
- ailp->ail_target = lip->li_lsn;
wake_up_process(ailp->ail_task);
spin_unlock(&ailp->ail_lock);
schedule();
@@ -678,6 +745,28 @@ xfs_ail_push_all_sync(
finish_wait(&ailp->ail_empty, &wait);
}
+void
+xfs_ail_update_finish(
+ struct xfs_ail *ailp,
+ xfs_lsn_t old_lsn) __releases(ailp->ail_lock)
+{
+ struct xlog *log = ailp->ail_log;
+
+ /* if the tail lsn hasn't changed, don't do updates or wakeups. */
+ if (!old_lsn || old_lsn == __xfs_ail_min_lsn(ailp)) {
+ spin_unlock(&ailp->ail_lock);
+ return;
+ }
+
+ if (!xlog_is_shutdown(log))
+ xlog_assign_tail_lsn_locked(log->l_mp);
+
+ if (list_empty(&ailp->ail_head))
+ wake_up_all(&ailp->ail_empty);
+ spin_unlock(&ailp->ail_lock);
+ xfs_log_space_wake(log->l_mp);
+}
+
/*
* xfs_trans_ail_update - bulk AIL insertion operation.
*
@@ -709,7 +798,7 @@ xfs_trans_ail_update_bulk(
xfs_lsn_t lsn) __releases(ailp->ail_lock)
{
struct xfs_log_item *mlip;
- int mlip_changed = 0;
+ xfs_lsn_t tail_lsn = 0;
int i;
LIST_HEAD(tmp);
@@ -724,9 +813,10 @@ xfs_trans_ail_update_bulk(
continue;
trace_xfs_ail_move(lip, lip->li_lsn, lsn);
+ if (mlip == lip && !tail_lsn)
+ tail_lsn = lip->li_lsn;
+
xfs_ail_delete(ailp, lip);
- if (mlip == lip)
- mlip_changed = 1;
} else {
trace_xfs_ail_insert(lip, 0, lsn);
}
@@ -737,85 +827,70 @@ xfs_trans_ail_update_bulk(
if (!list_empty(&tmp))
xfs_ail_splice(ailp, cur, &tmp, lsn);
- if (mlip_changed) {
- if (!XFS_FORCED_SHUTDOWN(ailp->ail_mount))
- xlog_assign_tail_lsn_locked(ailp->ail_mount);
- spin_unlock(&ailp->ail_lock);
+ xfs_ail_update_finish(ailp, tail_lsn);
+}
- xfs_log_space_wake(ailp->ail_mount);
- } else {
- spin_unlock(&ailp->ail_lock);
- }
+/* Insert a log item into the AIL. */
+void
+xfs_trans_ail_insert(
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip,
+ xfs_lsn_t lsn)
+{
+ spin_lock(&ailp->ail_lock);
+ xfs_trans_ail_update_bulk(ailp, NULL, &lip, 1, lsn);
}
-bool
+/*
+ * Delete one log item from the AIL.
+ *
+ * If this item was at the tail of the AIL, return the LSN of the log item so
+ * that we can use it to check if the LSN of the tail of the log has moved
+ * when finishing up the AIL delete process in xfs_ail_update_finish().
+ */
+xfs_lsn_t
xfs_ail_delete_one(
struct xfs_ail *ailp,
struct xfs_log_item *lip)
{
struct xfs_log_item *mlip = xfs_ail_min(ailp);
+ xfs_lsn_t lsn = lip->li_lsn;
trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);
xfs_ail_delete(ailp, lip);
- xfs_clear_li_failed(lip);
clear_bit(XFS_LI_IN_AIL, &lip->li_flags);
lip->li_lsn = 0;
- return mlip == lip;
+ if (mlip == lip)
+ return lsn;
+ return 0;
}
-/**
- * Remove a log items from the AIL
- *
- * @xfs_trans_ail_delete_bulk takes an array of log items that all need to
- * removed from the AIL. The caller is already holding the AIL lock, and done
- * all the checks necessary to ensure the items passed in via @log_items are
- * ready for deletion. This includes checking that the items are in the AIL.
- *
- * For each log item to be removed, unlink it from the AIL, clear the IN_AIL
- * flag from the item and reset the item's lsn to 0. If we remove the first
- * item in the AIL, update the log tail to match the new minimum LSN in the
- * AIL.
- *
- * This function will not drop the AIL lock until all items are removed from
- * the AIL to minimise the amount of lock traffic on the AIL. This does not
- * greatly increase the AIL hold time, but does significantly reduce the amount
- * of traffic on the lock, especially during IO completion.
- *
- * This function must be called with the AIL lock held. The lock is dropped
- * before returning.
- */
void
xfs_trans_ail_delete(
- struct xfs_ail *ailp,
struct xfs_log_item *lip,
- int shutdown_type) __releases(ailp->ail_lock)
+ int shutdown_type)
{
- struct xfs_mount *mp = ailp->ail_mount;
- bool mlip_changed;
+ struct xfs_ail *ailp = lip->li_ailp;
+ struct xlog *log = ailp->ail_log;
+ xfs_lsn_t tail_lsn;
+ spin_lock(&ailp->ail_lock);
if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
spin_unlock(&ailp->ail_lock);
- if (!XFS_FORCED_SHUTDOWN(mp)) {
- xfs_alert_tag(mp, XFS_PTAG_AILDELETE,
+ if (shutdown_type && !xlog_is_shutdown(log)) {
+ xfs_alert_tag(log->l_mp, XFS_PTAG_AILDELETE,
"%s: attempting to delete a log item that is not in the AIL",
__func__);
- xfs_force_shutdown(mp, shutdown_type);
+ xlog_force_shutdown(log, shutdown_type);
}
return;
}
- mlip_changed = xfs_ail_delete_one(ailp, lip);
- if (mlip_changed) {
- if (!XFS_FORCED_SHUTDOWN(mp))
- xlog_assign_tail_lsn_locked(mp);
- if (list_empty(&ailp->ail_head))
- wake_up_all(&ailp->ail_empty);
- }
-
- spin_unlock(&ailp->ail_lock);
- if (mlip_changed)
- xfs_log_space_wake(ailp->ail_mount);
+ /* xfs_ail_update_finish() drops the AIL lock */
+ xfs_clear_li_failed(lip);
+ tail_lsn = xfs_ail_delete_one(ailp, lip);
+ xfs_ail_update_finish(ailp, tail_lsn);
}
int
@@ -828,7 +903,7 @@ xfs_trans_ail_init(
if (!ailp)
return -ENOMEM;
- ailp->ail_mount = mp;
+ ailp->ail_log = mp->m_log;
INIT_LIST_HEAD(&ailp->ail_head);
INIT_LIST_HEAD(&ailp->ail_cursors);
spin_lock_init(&ailp->ail_lock);
@@ -836,7 +911,7 @@ xfs_trans_ail_init(
init_waitqueue_head(&ailp->ail_empty);
ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
- ailp->ail_mount->m_super->s_id);
+ mp->m_super->s_id);
if (IS_ERR(ailp->ail_task))
goto out_free_ailp;