aboutsummaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_buf_item.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_buf_item.c')
-rw-r--r--fs/xfs/xfs_buf_item.c462
1 files changed, 285 insertions, 177 deletions
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index eac97ef81e2a..a8d0ed911196 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -20,7 +20,6 @@
#include "xfs_types.h"
#include "xfs_bit.h"
#include "xfs_log.h"
-#include "xfs_inum.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
@@ -123,11 +122,11 @@ xfs_buf_item_log_check(
ASSERT(bip->bli_logged != NULL);
bp = bip->bli_buf;
- ASSERT(XFS_BUF_COUNT(bp) > 0);
+ ASSERT(bp->b_length > 0);
ASSERT(bp->b_addr != NULL);
orig = bip->bli_orig;
buffer = bp->b_addr;
- for (x = 0; x < XFS_BUF_COUNT(bp); x++) {
+ for (x = 0; x < BBTOB(bp->b_length); x++) {
if (orig[x] != buffer[x] && !btst(bip->bli_logged, x)) {
xfs_emerg(bp->b_mount,
"%s: bip %x buffer %x orig %x index %d",
@@ -154,33 +153,25 @@ STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
* If the XFS_BLI_STALE flag has been set, then log nothing.
*/
STATIC uint
-xfs_buf_item_size(
- struct xfs_log_item *lip)
+xfs_buf_item_size_segment(
+ struct xfs_buf_log_item *bip,
+ struct xfs_buf_log_format *blfp)
{
- struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
uint nvecs;
int next_bit;
int last_bit;
- ASSERT(atomic_read(&bip->bli_refcount) > 0);
- if (bip->bli_flags & XFS_BLI_STALE) {
- /*
- * The buffer is stale, so all we need to log
- * is the buf log format structure with the
- * cancel flag in it.
- */
- trace_xfs_buf_item_size_stale(bip);
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
- return 1;
- }
+ last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
+ if (last_bit == -1)
+ return 0;
+
+ /*
+ * initial count for a dirty buffer is 2 vectors - the format structure
+ * and the first dirty region.
+ */
+ nvecs = 2;
- ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
- nvecs = 1;
- last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size, 0);
- ASSERT(last_bit != -1);
- nvecs++;
while (last_bit != -1) {
/*
* This takes the bit number to start looking from and
@@ -188,16 +179,15 @@ xfs_buf_item_size(
* if there are no more bits set or the start bit is
* beyond the end of the bitmap.
*/
- next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size,
- last_bit + 1);
+ next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
+ last_bit + 1);
/*
* If we run out of bits, leave the loop,
* else if we find a new set of bits bump the number of vecs,
* else keep scanning the current set of bits.
*/
if (next_bit == -1) {
- last_bit = -1;
+ break;
} else if (next_bit != last_bit + 1) {
last_bit = next_bit;
nvecs++;
@@ -211,22 +201,73 @@ xfs_buf_item_size(
}
}
- trace_xfs_buf_item_size(bip);
return nvecs;
}
/*
- * This is called to fill in the vector of log iovecs for the
- * given log buf item. It fills the first entry with a buf log
- * format structure, and the rest point to contiguous chunks
- * within the buffer.
+ * This returns the number of log iovecs needed to log the given buf log item.
+ *
+ * It calculates this as 1 iovec for the buf log format structure and 1 for each
+ * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged
+ * in a single iovec.
+ *
+ * Discontiguous buffers need a format structure per region that that is being
+ * logged. This makes the changes in the buffer appear to log recovery as though
+ * they came from separate buffers, just like would occur if multiple buffers
+ * were used instead of a single discontiguous buffer. This enables
+ * discontiguous buffers to be in-memory constructs, completely transparent to
+ * what ends up on disk.
+ *
+ * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
+ * format structures.
*/
-STATIC void
-xfs_buf_item_format(
- struct xfs_log_item *lip,
- struct xfs_log_iovec *vecp)
+STATIC uint
+xfs_buf_item_size(
+ struct xfs_log_item *lip)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ uint nvecs;
+ int i;
+
+ ASSERT(atomic_read(&bip->bli_refcount) > 0);
+ if (bip->bli_flags & XFS_BLI_STALE) {
+ /*
+ * The buffer is stale, so all we need to log
+ * is the buf log format structure with the
+ * cancel flag in it.
+ */
+ trace_xfs_buf_item_size_stale(bip);
+ ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+ return bip->bli_format_count;
+ }
+
+ ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
+
+ /*
+ * the vector count is based on the number of buffer vectors we have
+ * dirty bits in. This will only be greater than one when we have a
+ * compound buffer with more than one segment dirty. Hence for compound
+ * buffers we need to track which segment the dirty bits correspond to,
+ * and when we move from one segment to the next increment the vector
+ * count for the extra buf log format structure that will need to be
+ * written.
+ */
+ nvecs = 0;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ nvecs += xfs_buf_item_size_segment(bip, &bip->bli_formats[i]);
+ }
+
+ trace_xfs_buf_item_size(bip);
+ return nvecs;
+}
+
+static struct xfs_log_iovec *
+xfs_buf_item_format_segment(
+ struct xfs_buf_log_item *bip,
+ struct xfs_log_iovec *vecp,
+ uint offset,
+ struct xfs_buf_log_format *blfp)
+{
struct xfs_buf *bp = bip->bli_buf;
uint base_size;
uint nvecs;
@@ -236,40 +277,22 @@ xfs_buf_item_format(
uint nbits;
uint buffer_offset;
- ASSERT(atomic_read(&bip->bli_refcount) > 0);
- ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
- (bip->bli_flags & XFS_BLI_STALE));
+ /* copy the flags across from the base format item */
+ blfp->blf_flags = bip->bli_format.blf_flags;
/*
- * The size of the base structure is the size of the
- * declared structure plus the space for the extra words
- * of the bitmap. We subtract one from the map size, because
- * the first element of the bitmap is accounted for in the
- * size of the base structure.
+ * Base size is the actual size of the ondisk structure - it reflects
+ * the actual size of the dirty bitmap rather than the size of the in
+ * memory structure.
*/
- base_size =
- (uint)(sizeof(xfs_buf_log_format_t) +
- ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
- vecp->i_addr = &bip->bli_format;
+ base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
+ (blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
+ vecp->i_addr = blfp;
vecp->i_len = base_size;
vecp->i_type = XLOG_REG_TYPE_BFORMAT;
vecp++;
nvecs = 1;
- /*
- * If it is an inode buffer, transfer the in-memory state to the
- * format flags and clear the in-memory state. We do not transfer
- * this state if the inode buffer allocation has not yet been committed
- * to the log as setting the XFS_BLI_INODE_BUF flag will prevent
- * correct replay of the inode allocation.
- */
- if (bip->bli_flags & XFS_BLI_INODE_BUF) {
- if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
- xfs_log_item_in_current_chkpt(lip)))
- bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
- bip->bli_flags &= ~XFS_BLI_INODE_BUF;
- }
-
if (bip->bli_flags & XFS_BLI_STALE) {
/*
* The buffer is stale, so all we need to log
@@ -277,16 +300,15 @@ xfs_buf_item_format(
* cancel flag in it.
*/
trace_xfs_buf_item_format_stale(bip);
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
- bip->bli_format.blf_size = nvecs;
- return;
+ ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
+ blfp->blf_size = nvecs;
+ return vecp;
}
/*
* Fill in an iovec for each set of contiguous chunks.
*/
- first_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size, 0);
+ first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
ASSERT(first_bit != -1);
last_bit = first_bit;
nbits = 1;
@@ -297,9 +319,8 @@ xfs_buf_item_format(
* if there are no more bits set or the start bit is
* beyond the end of the bitmap.
*/
- next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size,
- (uint)last_bit + 1);
+ next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
+ (uint)last_bit + 1);
/*
* If we run out of bits fill in the last iovec and get
* out of the loop.
@@ -310,14 +331,14 @@ xfs_buf_item_format(
* keep counting and scanning.
*/
if (next_bit == -1) {
- buffer_offset = first_bit * XFS_BLF_CHUNK;
+ buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
nvecs++;
break;
} else if (next_bit != last_bit + 1) {
- buffer_offset = first_bit * XFS_BLF_CHUNK;
+ buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
@@ -326,14 +347,17 @@ xfs_buf_item_format(
first_bit = next_bit;
last_bit = next_bit;
nbits = 1;
- } else if (xfs_buf_offset(bp, next_bit << XFS_BLF_SHIFT) !=
- (xfs_buf_offset(bp, last_bit << XFS_BLF_SHIFT) +
+ } else if (xfs_buf_offset(bp, offset +
+ (next_bit << XFS_BLF_SHIFT)) !=
+ (xfs_buf_offset(bp, offset +
+ (last_bit << XFS_BLF_SHIFT)) +
XFS_BLF_CHUNK)) {
- buffer_offset = first_bit * XFS_BLF_CHUNK;
+ buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
-/* You would think we need to bump the nvecs here too, but we do not
+/*
+ * You would think we need to bump the nvecs here too, but we do not
* this number is used by recovery, and it gets confused by the boundary
* split here
* nvecs++;
@@ -348,6 +372,48 @@ xfs_buf_item_format(
}
}
bip->bli_format.blf_size = nvecs;
+ return vecp;
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given log buf item. It fills the first entry with a buf log
+ * format structure, and the rest point to contiguous chunks
+ * within the buffer.
+ */
+STATIC void
+xfs_buf_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_iovec *vecp)
+{
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
+ uint offset = 0;
+ int i;
+
+ ASSERT(atomic_read(&bip->bli_refcount) > 0);
+ ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
+ (bip->bli_flags & XFS_BLI_STALE));
+
+ /*
+ * If it is an inode buffer, transfer the in-memory state to the
+ * format flags and clear the in-memory state. We do not transfer
+ * this state if the inode buffer allocation has not yet been committed
+ * to the log as setting the XFS_BLI_INODE_BUF flag will prevent
+ * correct replay of the inode allocation.
+ */
+ if (bip->bli_flags & XFS_BLI_INODE_BUF) {
+ if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
+ xfs_log_item_in_current_chkpt(lip)))
+ bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
+ bip->bli_flags &= ~XFS_BLI_INODE_BUF;
+ }
+
+ for (i = 0; i < bip->bli_format_count; i++) {
+ vecp = xfs_buf_item_format_segment(bip, vecp, offset,
+ &bip->bli_formats[i]);
+ offset += bp->b_maps[i].bm_len;
+ }
/*
* Check to make sure everything is consistent.
@@ -418,7 +484,6 @@ xfs_buf_item_unpin(
if (freed && stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
ASSERT(xfs_buf_islocked(bp));
- ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
ASSERT(XFS_BUF_ISSTALE(bp));
ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
@@ -455,42 +520,42 @@ xfs_buf_item_unpin(
bp->b_iodone = NULL;
} else {
spin_lock(&ailp->xa_lock);
- xfs_trans_ail_delete(ailp, (xfs_log_item_t *)bip);
+ xfs_trans_ail_delete(ailp, lip, SHUTDOWN_LOG_IO_ERROR);
xfs_buf_item_relse(bp);
ASSERT(bp->b_fspriv == NULL);
}
xfs_buf_relse(bp);
+ } else if (freed && remove) {
+ xfs_buf_lock(bp);
+ xfs_buf_ioerror(bp, EIO);
+ XFS_BUF_UNDONE(bp);
+ xfs_buf_stale(bp);
+ xfs_buf_ioend(bp, 0);
}
}
-/*
- * This is called to attempt to lock the buffer associated with this
- * buf log item. Don't sleep on the buffer lock. If we can't get
- * the lock right away, return 0. If we can get the lock, take a
- * reference to the buffer. If this is a delayed write buffer that
- * needs AIL help to be written back, invoke the pushbuf routine
- * rather than the normal success path.
- */
STATIC uint
-xfs_buf_item_trylock(
- struct xfs_log_item *lip)
+xfs_buf_item_push(
+ struct xfs_log_item *lip,
+ struct list_head *buffer_list)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
+ uint rval = XFS_ITEM_SUCCESS;
if (xfs_buf_ispinned(bp))
return XFS_ITEM_PINNED;
if (!xfs_buf_trylock(bp))
return XFS_ITEM_LOCKED;
- /* take a reference to the buffer. */
- xfs_buf_hold(bp);
-
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- trace_xfs_buf_item_trylock(bip);
- if (XFS_BUF_ISDELAYWRITE(bp))
- return XFS_ITEM_PUSHBUF;
- return XFS_ITEM_SUCCESS;
+
+ trace_xfs_buf_item_push(bip);
+
+ if (!xfs_buf_delwri_queue(bp, buffer_list))
+ rval = XFS_ITEM_FLUSHING;
+ xfs_buf_unlock(bp);
+ return rval;
}
/*
@@ -603,49 +668,6 @@ xfs_buf_item_committed(
return lsn;
}
-/*
- * The buffer is locked, but is not a delayed write buffer. This happens
- * if we race with IO completion and hence we don't want to try to write it
- * again. Just release the buffer.
- */
-STATIC void
-xfs_buf_item_push(
- struct xfs_log_item *lip)
-{
- struct xfs_buf_log_item *bip = BUF_ITEM(lip);
- struct xfs_buf *bp = bip->bli_buf;
-
- ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
-
- trace_xfs_buf_item_push(bip);
-
- xfs_buf_relse(bp);
-}
-
-/*
- * The buffer is locked and is a delayed write buffer. Promote the buffer
- * in the delayed write queue as the caller knows that they must invoke
- * the xfsbufd to get this buffer written. We have to unlock the buffer
- * to allow the xfsbufd to write it, too.
- */
-STATIC bool
-xfs_buf_item_pushbuf(
- struct xfs_log_item *lip)
-{
- struct xfs_buf_log_item *bip = BUF_ITEM(lip);
- struct xfs_buf *bp = bip->bli_buf;
-
- ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(XFS_BUF_ISDELAYWRITE(bp));
-
- trace_xfs_buf_item_pushbuf(bip);
-
- xfs_buf_delwri_promote(bp);
- xfs_buf_relse(bp);
- return true;
-}
-
STATIC void
xfs_buf_item_committing(
struct xfs_log_item *lip,
@@ -661,14 +683,41 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_format = xfs_buf_item_format,
.iop_pin = xfs_buf_item_pin,
.iop_unpin = xfs_buf_item_unpin,
- .iop_trylock = xfs_buf_item_trylock,
.iop_unlock = xfs_buf_item_unlock,
.iop_committed = xfs_buf_item_committed,
.iop_push = xfs_buf_item_push,
- .iop_pushbuf = xfs_buf_item_pushbuf,
.iop_committing = xfs_buf_item_committing
};
+STATIC int
+xfs_buf_item_get_format(
+ struct xfs_buf_log_item *bip,
+ int count)
+{
+ ASSERT(bip->bli_formats == NULL);
+ bip->bli_format_count = count;
+
+ if (count == 1) {
+ bip->bli_formats = &bip->bli_format;
+ return 0;
+ }
+
+ bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
+ KM_SLEEP);
+ if (!bip->bli_formats)
+ return ENOMEM;
+ return 0;
+}
+
+STATIC void
+xfs_buf_item_free_format(
+ struct xfs_buf_log_item *bip)
+{
+ if (bip->bli_formats != &bip->bli_format) {
+ kmem_free(bip->bli_formats);
+ bip->bli_formats = NULL;
+ }
+}
/*
* Allocate a new buf log item to go with the given buffer.
@@ -686,6 +735,8 @@ xfs_buf_item_init(
xfs_buf_log_item_t *bip;
int chunks;
int map_size;
+ int error;
+ int i;
/*
* Check to see if there is already a buf log item for
@@ -697,24 +748,33 @@ xfs_buf_item_init(
if (lip != NULL && lip->li_type == XFS_LI_BUF)
return;
- /*
- * chunks is the number of XFS_BLF_CHUNK size pieces
- * the buffer can be divided into. Make sure not to
- * truncate any pieces. map_size is the size of the
- * bitmap needed to describe the chunks of the buffer.
- */
- chunks = (int)((XFS_BUF_COUNT(bp) + (XFS_BLF_CHUNK - 1)) >> XFS_BLF_SHIFT);
- map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT);
-
- bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
- KM_SLEEP);
+ bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
bip->bli_buf = bp;
xfs_buf_hold(bp);
- bip->bli_format.blf_type = XFS_LI_BUF;
- bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
- bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp));
- bip->bli_format.blf_map_size = map_size;
+
+ /*
+ * chunks is the number of XFS_BLF_CHUNK size pieces the buffer
+ * can be divided into. Make sure not to truncate any pieces.
+ * map_size is the size of the bitmap needed to describe the
+ * chunks of the buffer.
+ *
+ * Discontiguous buffer support follows the layout of the underlying
+ * buffer. This makes the implementation as simple as possible.
+ */
+ error = xfs_buf_item_get_format(bip, bp->b_map_count);
+ ASSERT(error == 0);
+
+ for (i = 0; i < bip->bli_format_count; i++) {
+ chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
+ XFS_BLF_CHUNK);
+ map_size = DIV_ROUND_UP(chunks, NBWORD);
+
+ bip->bli_formats[i].blf_type = XFS_LI_BUF;
+ bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn;
+ bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len;
+ bip->bli_formats[i].blf_map_size = map_size;
+ }
#ifdef XFS_TRANS_DEBUG
/*
@@ -725,9 +785,9 @@ xfs_buf_item_init(
* the buffer to indicate which bytes the callers have asked
* to have logged.
*/
- bip->bli_orig = (char *)kmem_alloc(XFS_BUF_COUNT(bp), KM_SLEEP);
- memcpy(bip->bli_orig, bp->b_addr, XFS_BUF_COUNT(bp));
- bip->bli_logged = (char *)kmem_zalloc(XFS_BUF_COUNT(bp) / NBBY, KM_SLEEP);
+ bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
+ memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
+ bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
#endif
/*
@@ -745,10 +805,11 @@ xfs_buf_item_init(
* item's bitmap.
*/
void
-xfs_buf_item_log(
- xfs_buf_log_item_t *bip,
+xfs_buf_item_log_segment(
+ struct xfs_buf_log_item *bip,
uint first,
- uint last)
+ uint last,
+ uint *map)
{
uint first_bit;
uint last_bit;
@@ -761,12 +822,6 @@ xfs_buf_item_log(
uint mask;
/*
- * Mark the item as having some dirty data for
- * quick reference in xfs_buf_item_dirty.
- */
- bip->bli_flags |= XFS_BLI_DIRTY;
-
- /*
* Convert byte offsets to bit numbers.
*/
first_bit = first >> XFS_BLF_SHIFT;
@@ -782,7 +837,7 @@ xfs_buf_item_log(
* to set a bit in.
*/
word_num = first_bit >> BIT_TO_WORD_SHIFT;
- wordp = &(bip->bli_format.blf_data_map[word_num]);
+ wordp = &map[word_num];
/*
* Calculate the starting bit in the first word.
@@ -829,6 +884,51 @@ xfs_buf_item_log(
xfs_buf_item_log_debug(bip, first, last);
}
+/*
+ * Mark bytes first through last inclusive as dirty in the buf
+ * item's bitmap.
+ */
+void
+xfs_buf_item_log(
+ xfs_buf_log_item_t *bip,
+ uint first,
+ uint last)
+{
+ int i;
+ uint start;
+ uint end;
+ struct xfs_buf *bp = bip->bli_buf;
+
+ /*
+ * Mark the item as having some dirty data for
+ * quick reference in xfs_buf_item_dirty.
+ */
+ bip->bli_flags |= XFS_BLI_DIRTY;
+
+ /*
+ * walk each buffer segment and mark them dirty appropriately.
+ */
+ start = 0;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ if (start > last)
+ break;
+ end = start + BBTOB(bp->b_maps[i].bm_len);
+ if (first > end) {
+ start += BBTOB(bp->b_maps[i].bm_len);
+ continue;
+ }
+ if (first < start)
+ first = start;
+ if (end > last)
+ end = last;
+
+ xfs_buf_item_log_segment(bip, first, end,
+ &bip->bli_formats[i].blf_data_map[0]);
+
+ start += bp->b_maps[i].bm_len;
+ }
+}
+
/*
* Return 1 if the buffer has some data that has been logged (at any
@@ -850,6 +950,7 @@ xfs_buf_item_free(
kmem_free(bip->bli_logged);
#endif /* XFS_TRANS_DEBUG */
+ xfs_buf_item_free_format(bip);
kmem_zone_free(xfs_buf_item_zone, bip);
}
@@ -984,20 +1085,27 @@ xfs_buf_iodone_callbacks(
* If the write was asynchronous then no one will be looking for the
* error. Clear the error state and write the buffer out again.
*
- * During sync or umount we'll write all pending buffers again
- * synchronous, which will catch these errors if they keep hanging
- * around.
+ * XXX: This helps against transient write errors, but we need to find
+ * a way to shut the filesystem down if the writes keep failing.
+ *
+ * In practice we'll shut the filesystem down soon as non-transient
+ * erorrs tend to affect the whole device and a failing log write
+ * will make us give up. But we really ought to do better here.
*/
if (XFS_BUF_ISASYNC(bp)) {
+ ASSERT(bp->b_iodone != NULL);
+
+ trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
+
xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
if (!XFS_BUF_ISSTALE(bp)) {
- xfs_buf_delwri_queue(bp);
- XFS_BUF_DONE(bp);
+ bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
+ xfs_buf_iorequest(bp);
+ } else {
+ xfs_buf_relse(bp);
}
- ASSERT(bp->b_iodone != NULL);
- trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
- xfs_buf_relse(bp);
+
return;
}
@@ -1045,6 +1153,6 @@ xfs_buf_iodone(
* Either way, AIL is useless if we're forcing a shutdown.
*/
spin_lock(&ailp->xa_lock);
- xfs_trans_ail_delete(ailp, lip);
+ xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE);
xfs_buf_item_free(BUF_ITEM(lip));
}