aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2025-02-10 12:46:43 +0100
committerChristian Brauner <brauner@kernel.org>2025-02-10 12:46:43 +0100
commit30f530096166202cf70e1b7d1de5a8cdfba42af1 (patch)
treed94361dd053a6f1031c4ae13f7cf01ced1c129cd
parentMerge patch series "iomap: allow the file system to submit the writeback bios" (diff)
parentiomap: advance the iter directly on zero range (diff)
downloadwireguard-linux-30f530096166202cf70e1b7d1de5a8cdfba42af1.tar.xz
wireguard-linux-30f530096166202cf70e1b7d1de5a8cdfba42af1.zip
Merge patch series "iomap: incremental per-operation iter advance"
Brian Foster <bfoster@redhat.com> says: This is a first pass at supporting more incremental, per-operation iomap_iter advancement. The motivation for this is folio_batch support for zero range, where the fs provides a batch of folios to process in certain situations. Since the batch may not be logically contiguous, processing loops require a bit more flexibility than the typical offset based iteration. The current iteration model basically has the operation _iter() handler lift the pos/length wrt to the current iomap out of the iomap_iter, process it locally, then return the result to be stored in iter.processed. The latter is overloaded with error status, so the handler must decide whether to return error or a partial completion (i.e. consider a short write). iomap_iter() then uses the result to advance the iter and look up the next iomap. The updated model proposed in this series is to allow an operation to advance the iter itself as subranges are processed and then return success or failure in iter.processed. Note that at least initially, this is implemented as an optional mode to minimize churn. This series converts operations that use iomap_write_begin(): buffered write, unshare, and zero range. The main advantage of this is that the future folio_batch work can be plumbed down into the folio get path more naturally, and the associated codepath can advance the iter itself when appropriate rather than require each operation to manage the gaps in the range being processed. Some secondary advantages are a little less boilerplate code for walking ranges and more clear semantics for partial completions in the event of errors, etc. * patches from https://lore.kernel.org/r/20250207143253.314068-1-bfoster@redhat.com: iomap: advance the iter directly on zero range iomap: advance the iter directly on unshare range iomap: advance the iter directly on buffered writes iomap: support incremental iomap_iter advances iomap: export iomap_iter_advance() and return remaining length iomap: lift iter termination logic from iomap_iter_advance() iomap: lift error code check out of iomap_iter_advance() iomap: refactor iomap_iter() length check and tracepoint iomap: split out iomap check and reset logic from iter advance iomap: factor out iomap length helper Link: https://lore.kernel.org/r/20250207143253.314068-1-bfoster@redhat.com Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/iomap/buffered-io.c67
-rw-r--r--fs/iomap/iter.c102
-rw-r--r--include/linux/iomap.h32
3 files changed, 122 insertions, 79 deletions
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 3458f97d1b1e..8368a4ae716f 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -905,8 +905,6 @@ static bool iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len,
static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
{
- loff_t length = iomap_length(iter);
- loff_t pos = iter->pos;
ssize_t total_written = 0;
long status = 0;
struct address_space *mapping = iter->inode->i_mapping;
@@ -919,7 +917,8 @@ static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i)
size_t offset; /* Offset into folio */
size_t bytes; /* Bytes to write to folio */
size_t copied; /* Bytes copied from user */
- size_t written; /* Bytes have been written */
+ u64 written; /* Bytes have been written */
+ loff_t pos = iter->pos;
bytes = iov_iter_count(i);
retry:
@@ -930,8 +929,8 @@ retry:
if (unlikely(status))
break;
- if (bytes > length)
- bytes = length;
+ if (bytes > iomap_length(iter))
+ bytes = iomap_length(iter);
/*
* Bring in the user page that we'll copy from _first_.
@@ -1002,17 +1001,12 @@ retry:
goto retry;
}
} else {
- pos += written;
total_written += written;
- length -= written;
+ iomap_iter_advance(iter, &written);
}
- } while (iov_iter_count(i) && length);
+ } while (iov_iter_count(i) && iomap_length(iter));
- if (status == -EAGAIN) {
- iov_iter_revert(i, total_written);
- return -EAGAIN;
- }
- return total_written ? total_written : status;
+ return total_written ? 0 : status;
}
ssize_t
@@ -1269,20 +1263,19 @@ EXPORT_SYMBOL_GPL(iomap_write_delalloc_release);
static loff_t iomap_unshare_iter(struct iomap_iter *iter)
{
struct iomap *iomap = &iter->iomap;
- loff_t pos = iter->pos;
- loff_t length = iomap_length(iter);
- loff_t written = 0;
+ u64 bytes = iomap_length(iter);
+ int status;
if (!iomap_want_unshare_iter(iter))
- return length;
+ return iomap_iter_advance(iter, &bytes);
do {
struct folio *folio;
- int status;
size_t offset;
- size_t bytes = min_t(u64, SIZE_MAX, length);
+ loff_t pos = iter->pos;
bool ret;
+ bytes = min_t(u64, SIZE_MAX, bytes);
status = iomap_write_begin(iter, pos, bytes, &folio);
if (unlikely(status))
return status;
@@ -1300,14 +1293,14 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
cond_resched();
- pos += bytes;
- written += bytes;
- length -= bytes;
-
balance_dirty_pages_ratelimited(iter->inode->i_mapping);
- } while (length > 0);
- return written;
+ status = iomap_iter_advance(iter, &bytes);
+ if (status)
+ break;
+ } while (bytes > 0);
+
+ return status;
}
int
@@ -1348,17 +1341,16 @@ static inline int iomap_zero_iter_flush_and_stale(struct iomap_iter *i)
static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
{
- loff_t pos = iter->pos;
- loff_t length = iomap_length(iter);
- loff_t written = 0;
+ u64 bytes = iomap_length(iter);
+ int status;
do {
struct folio *folio;
- int status;
size_t offset;
- size_t bytes = min_t(u64, SIZE_MAX, length);
+ loff_t pos = iter->pos;
bool ret;
+ bytes = min_t(u64, SIZE_MAX, bytes);
status = iomap_write_begin(iter, pos, bytes, &folio);
if (status)
return status;
@@ -1379,14 +1371,14 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
if (WARN_ON_ONCE(!ret))
return -EIO;
- pos += bytes;
- length -= bytes;
- written += bytes;
- } while (length > 0);
+ status = iomap_iter_advance(iter, &bytes);
+ if (status)
+ break;
+ } while (bytes > 0);
if (did_zero)
*did_zero = true;
- return written;
+ return status;
}
int
@@ -1440,11 +1432,14 @@ iomap_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
if (srcmap->type == IOMAP_HOLE ||
srcmap->type == IOMAP_UNWRITTEN) {
- loff_t proc = iomap_length(&iter);
+ s64 proc;
if (range_dirty) {
range_dirty = false;
proc = iomap_zero_iter_flush_and_stale(&iter);
+ } else {
+ u64 length = iomap_length(&iter);
+ proc = iomap_iter_advance(&iter, &length);
}
iter.processed = proc;
continue;
diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c
index 3790918646af..0ebcabc7df52 100644
--- a/fs/iomap/iter.c
+++ b/fs/iomap/iter.c
@@ -7,40 +7,25 @@
#include <linux/iomap.h>
#include "trace.h"
-/*
- * Advance to the next range we need to map.
- *
- * If the iomap is marked IOMAP_F_STALE, it means the existing map was not fully
- * processed - it was aborted because the extent the iomap spanned may have been
- * changed during the operation. In this case, the iteration behaviour is to
- * remap the unprocessed range of the iter, and that means we may need to remap
- * even when we've made no progress (i.e. iter->processed = 0). Hence the
- * "finished iterating" case needs to distinguish between
- * (processed = 0) meaning we are done and (processed = 0 && stale) meaning we
- * need to remap the entire remaining range.
- */
-static inline int iomap_iter_advance(struct iomap_iter *iter)
+static inline void iomap_iter_reset_iomap(struct iomap_iter *iter)
{
- bool stale = iter->iomap.flags & IOMAP_F_STALE;
- int ret = 1;
-
- /* handle the previous iteration (if any) */
- if (iter->iomap.length) {
- if (iter->processed < 0)
- return iter->processed;
- if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
- return -EIO;
- iter->pos += iter->processed;
- iter->len -= iter->processed;
- if (!iter->len || (!iter->processed && !stale))
- ret = 0;
- }
-
- /* clear the per iteration state */
iter->processed = 0;
memset(&iter->iomap, 0, sizeof(iter->iomap));
memset(&iter->srcmap, 0, sizeof(iter->srcmap));
- return ret;
+}
+
+/*
+ * Advance the current iterator position and output the length remaining for the
+ * current mapping.
+ */
+int iomap_iter_advance(struct iomap_iter *iter, u64 *count)
+{
+ if (WARN_ON_ONCE(*count > iomap_length(iter)))
+ return -EIO;
+ iter->pos += *count;
+ iter->len -= *count;
+ *count = iomap_length(iter);
+ return 0;
}
static inline void iomap_iter_done(struct iomap_iter *iter)
@@ -50,6 +35,8 @@ static inline void iomap_iter_done(struct iomap_iter *iter)
WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos);
WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE);
+ iter->iter_start_pos = iter->pos;
+
trace_iomap_iter_dstmap(iter->inode, &iter->iomap);
if (iter->srcmap.type != IOMAP_HOLE)
trace_iomap_iter_srcmap(iter->inode, &iter->srcmap);
@@ -72,21 +59,62 @@ static inline void iomap_iter_done(struct iomap_iter *iter)
*/
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops)
{
+ bool stale = iter->iomap.flags & IOMAP_F_STALE;
+ ssize_t advanced = iter->processed > 0 ? iter->processed : 0;
+ u64 olen = iter->len;
+ s64 processed;
int ret;
- if (iter->iomap.length && ops->iomap_end) {
- ret = ops->iomap_end(iter->inode, iter->pos, iomap_length(iter),
- iter->processed > 0 ? iter->processed : 0,
- iter->flags, &iter->iomap);
- if (ret < 0 && !iter->processed)
+ trace_iomap_iter(iter, ops, _RET_IP_);
+
+ if (!iter->iomap.length)
+ goto begin;
+
+ /*
+ * If iter.processed is zero, the op may still have advanced the iter
+ * itself. Calculate the advanced and original length bytes based on how
+ * far pos has advanced for ->iomap_end().
+ */
+ if (!advanced) {
+ advanced = iter->pos - iter->iter_start_pos;
+ olen += advanced;
+ }
+
+ if (ops->iomap_end) {
+ ret = ops->iomap_end(iter->inode, iter->iter_start_pos,
+ iomap_length_trim(iter, iter->iter_start_pos,
+ olen),
+ advanced, iter->flags, &iter->iomap);
+ if (ret < 0 && !advanced)
return ret;
}
- trace_iomap_iter(iter, ops, _RET_IP_);
- ret = iomap_iter_advance(iter);
+ processed = iter->processed;
+ if (processed < 0) {
+ iomap_iter_reset_iomap(iter);
+ return processed;
+ }
+
+ /*
+ * Advance the iter and clear state from the previous iteration. This
+ * passes iter->processed because that reflects the bytes processed but
+ * not yet advanced by the iter handler.
+ *
+ * Use iter->len to determine whether to continue onto the next mapping.
+ * Explicitly terminate in the case where the current iter has not
+ * advanced at all (i.e. no work was done for some reason) unless the
+ * mapping has been marked stale and needs to be reprocessed.
+ */
+ ret = iomap_iter_advance(iter, &processed);
+ if (!ret && iter->len > 0)
+ ret = 1;
+ if (ret > 0 && !advanced && !stale)
+ ret = 0;
+ iomap_iter_reset_iomap(iter);
if (ret <= 0)
return ret;
+begin:
ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, iter->flags,
&iter->iomap, &iter->srcmap);
if (ret < 0)
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 022d7f338c68..e180dacf434c 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -218,8 +218,11 @@ struct iomap_ops {
* calls to iomap_iter(). Treat as read-only in the body.
* @len: The remaining length of the file segment we're operating on.
* It is updated at the same time as @pos.
- * @processed: The number of bytes processed by the body in the most recent
- * iteration, or a negative errno. 0 causes the iteration to stop.
+ * @iter_start_pos: The original start pos for the current iomap. Used for
+ * incremental iter advance.
+ * @processed: The number of bytes the most recent iteration needs iomap_iter()
+ * to advance the iter, zero if the iter was already advanced, or a
+ * negative errno for an error during the operation.
* @flags: Zero or more of the iomap_begin flags above.
* @iomap: Map describing the I/O iteration
* @srcmap: Source map for COW operations
@@ -228,6 +231,7 @@ struct iomap_iter {
struct inode *inode;
loff_t pos;
u64 len;
+ loff_t iter_start_pos;
s64 processed;
unsigned flags;
struct iomap iomap;
@@ -236,20 +240,36 @@ struct iomap_iter {
};
int iomap_iter(struct iomap_iter *iter, const struct iomap_ops *ops);
+int iomap_iter_advance(struct iomap_iter *iter, u64 *count);
/**
- * iomap_length - length of the current iomap iteration
+ * iomap_length_trim - trimmed length of the current iomap iteration
* @iter: iteration structure
+ * @pos: File position to trim from.
+ * @len: Length of the mapping to trim to.
*
- * Returns the length that the operation applies to for the current iteration.
+ * Returns a trimmed length that the operation applies to for the current
+ * iteration.
*/
-static inline u64 iomap_length(const struct iomap_iter *iter)
+static inline u64 iomap_length_trim(const struct iomap_iter *iter, loff_t pos,
+ u64 len)
{
u64 end = iter->iomap.offset + iter->iomap.length;
if (iter->srcmap.type != IOMAP_HOLE)
end = min(end, iter->srcmap.offset + iter->srcmap.length);
- return min(iter->len, end - iter->pos);
+ return min(len, end - pos);
+}
+
+/**
+ * iomap_length - length of the current iomap iteration
+ * @iter: iteration structure
+ *
+ * Returns the length that the operation applies to for the current iteration.
+ */
+static inline u64 iomap_length(const struct iomap_iter *iter)
+{
+ return iomap_length_trim(iter, iter->pos, iter->len);
}
/**