aboutsummaryrefslogtreecommitdiffstats
path: root/fs/fs-writeback.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/fs-writeback.c')
-rw-r--r--fs/fs-writeback.c132
1 files changed, 77 insertions, 55 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index acfb55834af2..e91980f49388 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1442,9 +1442,15 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
}
/*
- * Write out an inode and its dirty pages. Do not update the writeback list
- * linkage. That is left to the caller. The caller is also responsible for
- * setting I_SYNC flag and calling inode_sync_complete() to clear it.
+ * Write out an inode and its dirty pages (or some of its dirty pages, depending
+ * on @wbc->nr_to_write), and clear the relevant dirty flags from i_state.
+ *
+ * This doesn't remove the inode from the writeback list it is on, except
+ * potentially to move it from b_dirty_time to b_dirty due to timestamp
+ * expiration. The caller is otherwise responsible for writeback list handling.
+ *
+ * The caller is also responsible for setting the I_SYNC flag beforehand and
+ * calling inode_sync_complete() to clear it afterwards.
*/
static int
__writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
@@ -1474,21 +1480,26 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
- * Some filesystems may redirty the inode during the writeback
- * due to delalloc, clear dirty metadata flags right before
- * write_inode()
+ * If the inode has dirty timestamps and we need to write them, call
+ * mark_inode_dirty_sync() to notify the filesystem about it and to
+ * change I_DIRTY_TIME into I_DIRTY_SYNC.
*/
- spin_lock(&inode->i_lock);
-
- dirty = inode->i_state & I_DIRTY;
if ((inode->i_state & I_DIRTY_TIME) &&
- ((dirty & I_DIRTY_INODE) ||
- wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
+ (wbc->sync_mode == WB_SYNC_ALL ||
time_after(jiffies, inode->dirtied_time_when +
dirtytime_expire_interval * HZ))) {
- dirty |= I_DIRTY_TIME;
trace_writeback_lazytime(inode);
+ mark_inode_dirty_sync(inode);
}
+
+ /*
+ * Get and clear the dirty flags from i_state. This needs to be done
+ * after calling writepages because some filesystems may redirty the
+ * inode during writepages due to delalloc. It also needs to be done
+ * after handling timestamp expiration, as that may dirty the inode too.
+ */
+ spin_lock(&inode->i_lock);
+ dirty = inode->i_state & I_DIRTY;
inode->i_state &= ~dirty;
/*
@@ -1509,8 +1520,6 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
spin_unlock(&inode->i_lock);
- if (dirty & I_DIRTY_TIME)
- mark_inode_dirty_sync(inode);
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & ~I_DIRTY_PAGES) {
int err = write_inode(inode, wbc);
@@ -1522,12 +1531,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
}
/*
- * Write out an inode's dirty pages. Either the caller has an active reference
- * on the inode or the inode has I_WILL_FREE set.
+ * Write out an inode's dirty data and metadata on-demand, i.e. separately from
+ * the regular batched writeback done by the flusher threads in
+ * writeback_sb_inodes(). @wbc controls various aspects of the write, such as
+ * whether it is a data-integrity sync (%WB_SYNC_ALL) or not (%WB_SYNC_NONE).
*
- * This function is designed to be called for writing back one inode which
- * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
- * and does more profound writeback list handling in writeback_sb_inodes().
+ * To prevent the inode from going away, either the caller must have a reference
+ * to the inode, or the inode must have I_WILL_FREE or I_FREEING set.
*/
static int writeback_single_inode(struct inode *inode,
struct writeback_control *wbc)
@@ -1542,23 +1552,23 @@ static int writeback_single_inode(struct inode *inode,
WARN_ON(inode->i_state & I_WILL_FREE);
if (inode->i_state & I_SYNC) {
- if (wbc->sync_mode != WB_SYNC_ALL)
- goto out;
/*
- * It's a data-integrity sync. We must wait. Since callers hold
- * inode reference or inode has I_WILL_FREE set, it cannot go
- * away under us.
+ * Writeback is already running on the inode. For WB_SYNC_NONE,
+ * that's enough and we can just return. For WB_SYNC_ALL, we
+ * must wait for the existing writeback to complete, then do
+ * writeback again if there's anything left.
*/
+ if (wbc->sync_mode != WB_SYNC_ALL)
+ goto out;
__inode_wait_for_writeback(inode);
}
WARN_ON(inode->i_state & I_SYNC);
/*
- * Skip inode if it is clean and we have no outstanding writeback in
- * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
- * function since flusher thread may be doing for example sync in
- * parallel and if we move the inode, it could get skipped. So here we
- * make sure inode is on some writeback list and leave it there unless
- * we have completely cleaned the inode.
+ * If the inode is already fully clean, then there's nothing to do.
+ *
+ * For data-integrity syncs we also need to check whether any pages are
+ * still under writeback, e.g. due to prior WB_SYNC_NONE writeback. If
+ * there are any such pages, we'll need to wait for them.
*/
if (!(inode->i_state & I_DIRTY_ALL) &&
(wbc->sync_mode != WB_SYNC_ALL ||
@@ -1574,8 +1584,9 @@ static int writeback_single_inode(struct inode *inode,
wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
/*
- * If inode is clean, remove it from writeback lists. Otherwise don't
- * touch it. See comment above for explanation.
+ * If the inode is now fully clean, then it can be safely removed from
+ * its writeback list (if any). Otherwise the flusher threads are
+ * responsible for the writeback lists.
*/
if (!(inode->i_state & I_DIRTY_ALL))
inode_io_list_del_locked(inode, wb);
@@ -2217,23 +2228,24 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
}
/**
- * __mark_inode_dirty - internal function
+ * __mark_inode_dirty - internal function to mark an inode dirty
*
* @inode: inode to mark
- * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
+ * @flags: what kind of dirty, e.g. I_DIRTY_SYNC. This can be a combination of
+ * multiple I_DIRTY_* flags, except that I_DIRTY_TIME can't be combined
+ * with I_DIRTY_PAGES.
*
- * Mark an inode as dirty. Callers should use mark_inode_dirty or
- * mark_inode_dirty_sync.
+ * Mark an inode as dirty. We notify the filesystem, then update the inode's
+ * dirty flags. Then, if needed we add the inode to the appropriate dirty list.
*
- * Put the inode on the super block's dirty list.
+ * Most callers should use mark_inode_dirty() or mark_inode_dirty_sync()
+ * instead of calling this directly.
*
- * CAREFUL! We mark it dirty unconditionally, but move it onto the
- * dirty list only if it is hashed or if it refers to a blockdev.
- * If it was not hashed, it will never be added to the dirty list
- * even if it is later hashed, as it will have been marked dirty already.
+ * CAREFUL! We only add the inode to the dirty list if it is hashed or if it
+ * refers to a blockdev. Unhashed inodes will never be added to the dirty list
+ * even if they are later hashed, as they will have been marked dirty already.
*
- * In short, make sure you hash any inodes _before_ you start marking
- * them dirty.
+ * In short, ensure you hash any inodes _before_ you start marking them dirty.
*
* Note that for blockdevs, inode->dirtied_when represents the dirtying time of
* the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
@@ -2245,25 +2257,34 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
void __mark_inode_dirty(struct inode *inode, int flags)
{
struct super_block *sb = inode->i_sb;
- int dirtytime;
+ int dirtytime = 0;
trace_writeback_mark_inode_dirty(inode, flags);
- /*
- * Don't do this for I_DIRTY_PAGES - that doesn't actually
- * dirty the inode itself
- */
- if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) {
+ if (flags & I_DIRTY_INODE) {
+ /*
+ * Notify the filesystem about the inode being dirtied, so that
+ * (if needed) it can update on-disk fields and journal the
+ * inode. This is only needed when the inode itself is being
+ * dirtied now. I.e. it's only needed for I_DIRTY_INODE, not
+ * for just I_DIRTY_PAGES or I_DIRTY_TIME.
+ */
trace_writeback_dirty_inode_start(inode, flags);
-
if (sb->s_op->dirty_inode)
- sb->s_op->dirty_inode(inode, flags);
-
+ sb->s_op->dirty_inode(inode, flags & I_DIRTY_INODE);
trace_writeback_dirty_inode(inode, flags);
- }
- if (flags & I_DIRTY_INODE)
+
+ /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
flags &= ~I_DIRTY_TIME;
- dirtytime = flags & I_DIRTY_TIME;
+ } else {
+ /*
+ * Else it's either I_DIRTY_PAGES, I_DIRTY_TIME, or nothing.
+ * (We don't support setting both I_DIRTY_PAGES and I_DIRTY_TIME
+ * in one call to __mark_inode_dirty().)
+ */
+ dirtytime = flags & I_DIRTY_TIME;
+ WARN_ON_ONCE(dirtytime && flags != I_DIRTY_TIME);
+ }
/*
* Paired with smp_mb() in __writeback_single_inode() for the
@@ -2286,6 +2307,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
inode_attach_wb(inode, NULL);
+ /* I_DIRTY_INODE supersedes I_DIRTY_TIME. */
if (flags & I_DIRTY_INODE)
inode->i_state &= ~I_DIRTY_TIME;
inode->i_state |= flags;