aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/common.c1
-rw-r--r--fs/xfs/scrub/repair.h3
-rw-r--r--fs/xfs/scrub/rtsummary.c33
-rw-r--r--fs/xfs/scrub/rtsummary.h37
-rw-r--r--fs/xfs/scrub/rtsummary_repair.c177
-rw-r--r--fs/xfs/scrub/scrub.c11
-rw-r--r--fs/xfs/scrub/scrub.h7
-rw-r--r--fs/xfs/scrub/tempexch.h21
-rw-r--r--fs/xfs/scrub/tempfile.c388
-rw-r--r--fs/xfs/scrub/tempfile.h15
-rw-r--r--fs/xfs/scrub/trace.h40
12 files changed, 715 insertions, 19 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index ae8488ab4d6b..5e3ac7ec8fa5 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -212,6 +212,7 @@ xfs-y += $(addprefix scrub/, \
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
rtbitmap_repair.o \
+ rtsummary_repair.o \
)
xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index a27d33b6f464..a2da2bef509a 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -31,6 +31,7 @@
#include "xfs_ag.h"
#include "xfs_error.h"
#include "xfs_quota.h"
+#include "xfs_exchmaps.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index ce082d941459..0e2b695ab8f6 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -126,8 +126,10 @@ int xrep_fscounters(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xrep_rtbitmap(struct xfs_scrub *sc);
+int xrep_rtsummary(struct xfs_scrub *sc);
#else
# define xrep_rtbitmap xrep_notsupported
+# define xrep_rtsummary xrep_notsupported
#endif /* CONFIG_XFS_RT */
#ifdef CONFIG_XFS_QUOTA
@@ -212,6 +214,7 @@ xrep_setup_nothing(
#define xrep_quotacheck xrep_notsupported
#define xrep_nlinks xrep_notsupported
#define xrep_fscounters xrep_notsupported
+#define xrep_rtsummary xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index 5055092bd9e8..3fee603f5244 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -17,10 +17,14 @@
#include "xfs_bit.h"
#include "xfs_bmap.h"
#include "xfs_sb.h"
+#include "xfs_exchmaps.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/xfile.h"
+#include "scrub/repair.h"
+#include "scrub/tempexch.h"
+#include "scrub/rtsummary.h"
/*
* Realtime Summary
@@ -32,18 +36,6 @@
* (potentially large) amount of data in pageable memory.
*/
-struct xchk_rtsummary {
- struct xfs_rtalloc_args args;
-
- uint64_t rextents;
- uint64_t rbmblocks;
- uint64_t rsumsize;
- unsigned int rsumlevels;
-
- /* Memory buffer for the summary comparison. */
- union xfs_suminfo_raw words[];
-};
-
/* Set us up to check the rtsummary file. */
int
xchk_setup_rtsummary(
@@ -60,6 +52,12 @@ xchk_setup_rtsummary(
return -ENOMEM;
sc->buf = rts;
+ if (xchk_could_repair(sc)) {
+ error = xrep_setup_rtsummary(sc, rts);
+ if (error)
+ return error;
+ }
+
/*
* Create an xfile to construct a new rtsummary file. The xfile allows
* us to avoid pinning kernel memory for this purpose.
@@ -70,7 +68,7 @@ xchk_setup_rtsummary(
if (error)
return error;
- error = xchk_trans_alloc(sc, 0);
+ error = xchk_trans_alloc(sc, rts->resblks);
if (error)
return error;
@@ -135,7 +133,7 @@ xfsum_store(
sumoff << XFS_WORDLOG);
}
-static inline int
+inline int
xfsum_copyout(
struct xfs_scrub *sc,
xfs_rtsumoff_t sumoff,
@@ -362,7 +360,12 @@ xchk_rtsummary(
error = xchk_rtsum_compare(sc);
out_rbm:
- /* Unlock the rtbitmap since we're done with it. */
+ /*
+ * Unlock the rtbitmap since we're done with it. All other writers of
+ * the rt free space metadata grab the bitmap and summary ILOCKs in
+ * that order, so we're still protected against allocation activities
+ * even if we continue on to the repair function.
+ */
xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
return error;
}
diff --git a/fs/xfs/scrub/rtsummary.h b/fs/xfs/scrub/rtsummary.h
new file mode 100644
index 000000000000..e1d50304d8d4
--- /dev/null
+++ b/fs/xfs/scrub/rtsummary.h
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_RTSUMMARY_H__
+#define __XFS_SCRUB_RTSUMMARY_H__
+
+struct xchk_rtsummary {
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+ struct xrep_tempexch tempexch;
+#endif
+ struct xfs_rtalloc_args args;
+
+ uint64_t rextents;
+ uint64_t rbmblocks;
+ uint64_t rsumsize;
+ unsigned int rsumlevels;
+ unsigned int resblks;
+
+ /* suminfo position of xfile as we write buffers to disk. */
+ xfs_rtsumoff_t prep_wordoff;
+
+ /* Memory buffer for the summary comparison. */
+ union xfs_suminfo_raw words[];
+};
+
+int xfsum_copyout(struct xfs_scrub *sc, xfs_rtsumoff_t sumoff,
+ union xfs_suminfo_raw *rawinfo, unsigned int nr_words);
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+int xrep_setup_rtsummary(struct xfs_scrub *sc, struct xchk_rtsummary *rts);
+#else
+# define xrep_setup_rtsummary(sc, rts) (0)
+#endif /* CONFIG_XFS_ONLINE_REPAIR */
+
+#endif /* __XFS_SCRUB_RTSUMMARY_H__ */
diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c
new file mode 100644
index 000000000000..c8bb6c4f15d0
--- /dev/null
+++ b/fs/xfs/scrub/rtsummary_repair.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "xfs_bit.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_exchmaps.h"
+#include "xfs_rtbitmap.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/tempfile.h"
+#include "scrub/tempexch.h"
+#include "scrub/reap.h"
+#include "scrub/xfile.h"
+#include "scrub/rtsummary.h"
+
+/* Set us up to repair the rtsummary file. */
+int
+xrep_setup_rtsummary(
+ struct xfs_scrub *sc,
+ struct xchk_rtsummary *rts)
+{
+ struct xfs_mount *mp = sc->mp;
+ unsigned long long blocks;
+ int error;
+
+ error = xrep_tempfile_create(sc, S_IFREG);
+ if (error)
+ return error;
+
+ /*
+ * If we're doing a repair, we reserve enough blocks to write out a
+ * completely new summary file, plus twice as many blocks as we would
+ * need if we can only allocate one block per data fork mapping. This
+ * should cover the preallocation of the temporary file and exchanging
+ * the extent mappings.
+ *
+ * We cannot use xfs_exchmaps_estimate because we have not yet
+ * constructed the replacement rtsummary and therefore do not know how
+ * many extents it will use. By the time we do, we will have a dirty
+ * transaction (which we cannot drop because we cannot drop the
+ * rtsummary ILOCK) and cannot ask for more reservation.
+ */
+ blocks = XFS_B_TO_FSB(mp, mp->m_rsumsize);
+ blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
+ if (blocks > UINT_MAX)
+ return -EOPNOTSUPP;
+
+ rts->resblks += blocks;
+
+ /*
+ * Grab support for atomic file content exchanges before we allocate
+ * any transactions or grab ILOCKs.
+ */
+ return xrep_tempexch_enable(sc);
+}
+
+static int
+xrep_rtsummary_prep_buf(
+ struct xfs_scrub *sc,
+ struct xfs_buf *bp,
+ void *data)
+{
+ struct xchk_rtsummary *rts = data;
+ struct xfs_mount *mp = sc->mp;
+ union xfs_suminfo_raw *ondisk;
+ int error;
+
+ rts->args.mp = sc->mp;
+ rts->args.tp = sc->tp;
+ rts->args.sumbp = bp;
+ ondisk = xfs_rsumblock_infoptr(&rts->args, 0);
+ rts->args.sumbp = NULL;
+
+ bp->b_ops = &xfs_rtbuf_ops;
+
+ error = xfsum_copyout(sc, rts->prep_wordoff, ondisk, mp->m_blockwsize);
+ if (error)
+ return error;
+
+ rts->prep_wordoff += mp->m_blockwsize;
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTSUMMARY_BUF);
+ return 0;
+}
+
+/* Repair the realtime summary. */
+int
+xrep_rtsummary(
+ struct xfs_scrub *sc)
+{
+ struct xchk_rtsummary *rts = sc->buf;
+ struct xfs_mount *mp = sc->mp;
+ xfs_filblks_t rsumblocks;
+ int error;
+
+ /* We require the rmapbt to rebuild anything. */
+ if (!xfs_has_rmapbt(mp))
+ return -EOPNOTSUPP;
+
+ /* Walk away if we disagree on the size of the rt bitmap. */
+ if (rts->rbmblocks != mp->m_sb.sb_rbmblocks)
+ return 0;
+
+ /* Make sure any problems with the fork are fixed. */
+ error = xrep_metadata_inode_forks(sc);
+ if (error)
+ return error;
+
+ /*
+ * Try to take ILOCK_EXCL of the temporary file. We had better be the
+ * only ones holding onto this inode, but we can't block while holding
+ * the rtsummary file's ILOCK_EXCL.
+ */
+ while (!xrep_tempfile_ilock_nowait(sc)) {
+ if (xchk_should_terminate(sc, &error))
+ return error;
+ delay(1);
+ }
+
+ /* Make sure we have space allocated for the entire summary file. */
+ rsumblocks = XFS_B_TO_FSB(mp, rts->rsumsize);
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ error = xrep_tempfile_prealloc(sc, 0, rsumblocks);
+ if (error)
+ return error;
+
+ /* Last chance to abort before we start committing fixes. */
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ /* Copy the rtsummary file that we generated. */
+ error = xrep_tempfile_copyin(sc, 0, rsumblocks,
+ xrep_rtsummary_prep_buf, rts);
+ if (error)
+ return error;
+ error = xrep_tempfile_set_isize(sc, rts->rsumsize);
+ if (error)
+ return error;
+
+ /*
+ * Now exchange the contents. Nothing in repair uses the temporary
+ * buffer, so we can reuse it for the tempfile exchrange information.
+ */
+ error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, &rts->tempexch);
+ if (error)
+ return error;
+
+ error = xrep_tempexch_contents(sc, &rts->tempexch);
+ if (error)
+ return error;
+
+ /* Reset incore state and blow out the summary cache. */
+ if (mp->m_rsum_cache)
+ memset(mp->m_rsum_cache, 0xFF, mp->m_sb.sb_rbmblocks);
+
+ mp->m_rsumlevels = rts->rsumlevels;
+ mp->m_rsumsize = rts->rsumsize;
+
+ /* Free the old rtsummary blocks if they're not in use. */
+ return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
+}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index d9012e9a6afd..62a064c1a5d3 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -18,6 +18,7 @@
#include "xfs_buf_mem.h"
#include "xfs_rmap.h"
#include "xfs_exchrange.h"
+#include "xfs_exchmaps.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -149,14 +150,15 @@ xchk_probe(
/* Scrub setup and teardown */
+#define FSGATES_MASK (XCHK_FSGATES_ALL | XREP_FSGATES_ALL)
static inline void
xchk_fsgates_disable(
struct xfs_scrub *sc)
{
- if (!(sc->flags & XCHK_FSGATES_ALL))
+ if (!(sc->flags & FSGATES_MASK))
return;
- trace_xchk_fsgates_disable(sc, sc->flags & XCHK_FSGATES_ALL);
+ trace_xchk_fsgates_disable(sc, sc->flags & FSGATES_MASK);
if (sc->flags & XCHK_FSGATES_DRAIN)
xfs_drain_wait_disable();
@@ -170,8 +172,9 @@ xchk_fsgates_disable(
if (sc->flags & XCHK_FSGATES_RMAP)
xfs_rmap_hook_disable();
- sc->flags &= ~XCHK_FSGATES_ALL;
+ sc->flags &= ~FSGATES_MASK;
}
+#undef FSGATES_MASK
/* Free all the resources and finish the transactions. */
STATIC int
@@ -352,7 +355,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_FS,
.setup = xchk_setup_rtsummary,
.scrub = xchk_rtsummary,
- .repair = xrep_notsupported,
+ .repair = xrep_rtsummary,
},
[XFS_SCRUB_TYPE_UQUOTA] = { /* user quota */
.type = ST_FS,
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index e37d8599718e..d38f0b30416c 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -131,6 +131,7 @@ struct xfs_scrub {
#define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */
#define XCHK_FSGATES_DIRENTS (1U << 5) /* directory live update enabled */
#define XCHK_FSGATES_RMAP (1U << 6) /* rmapbt live update enabled */
+#define XREP_FSGATES_EXCHANGE_RANGE (1U << 29) /* uses file content exchange */
#define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */
#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
@@ -145,6 +146,12 @@ struct xfs_scrub {
XCHK_FSGATES_DIRENTS | \
XCHK_FSGATES_RMAP)
+/*
+ * The sole XREP_FSGATES* flag reflects a log intent item that is protected
+ * by a log-incompat feature flag. No code patching in use here.
+ */
+#define XREP_FSGATES_ALL (XREP_FSGATES_EXCHANGE_RANGE)
+
/* Metadata scrubbers */
int xchk_tester(struct xfs_scrub *sc);
int xchk_superblock(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/tempexch.h b/fs/xfs/scrub/tempexch.h
new file mode 100644
index 000000000000..98222b684b6a
--- /dev/null
+++ b/fs/xfs/scrub/tempexch.h
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2022-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_TEMPEXCH_H__
+#define __XFS_SCRUB_TEMPEXCH_H__
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+struct xrep_tempexch {
+ struct xfs_exchmaps_req req;
+};
+
+int xrep_tempexch_enable(struct xfs_scrub *sc);
+int xrep_tempexch_trans_reserve(struct xfs_scrub *sc, int whichfork,
+ struct xrep_tempexch *ti);
+
+int xrep_tempexch_contents(struct xfs_scrub *sc, struct xrep_tempexch *ti);
+#endif /* CONFIG_XFS_ONLINE_REPAIR */
+
+#endif /* __XFS_SCRUB_TEMPEXCH_H__ */
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index 68d245749bc1..7791336ca820 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -14,14 +14,20 @@
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
+#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
#include "xfs_dir2.h"
#include "xfs_exchrange.h"
+#include "xfs_exchmaps.h"
+#include "xfs_defer.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
+#include "scrub/repair.h"
#include "scrub/trace.h"
#include "scrub/tempfile.h"
+#include "scrub/tempexch.h"
+#include "scrub/xfile.h"
/*
* Create a temporary file for reconstructing metadata, with the intention of
@@ -249,3 +255,385 @@ xrep_tempfile_rele(
xchk_irele(sc, sc->tempip);
sc->tempip = NULL;
}
+
+/*
+ * Make sure that the given range of the data fork of the temporary file is
+ * mapped to written blocks. The caller must ensure that both inodes are
+ * joined to the transaction.
+ */
+int
+xrep_tempfile_prealloc(
+ struct xfs_scrub *sc,
+ xfs_fileoff_t off,
+ xfs_filblks_t len)
+{
+ struct xfs_bmbt_irec map;
+ xfs_fileoff_t end = off + len;
+ int error;
+
+ ASSERT(sc->tempip != NULL);
+ ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));
+
+ for (; off < end; off = map.br_startoff + map.br_blockcount) {
+ int nmaps = 1;
+
+ /*
+ * If we have a real extent mapping this block then we're
+ * in ok shape.
+ */
+ error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
+ XFS_DATA_FORK);
+ if (error)
+ return error;
+ if (nmaps == 0) {
+ ASSERT(nmaps != 0);
+ return -EFSCORRUPTED;
+ }
+
+ if (xfs_bmap_is_written_extent(&map))
+ continue;
+
+ /*
+ * If we find a delalloc reservation then something is very
+ * very wrong. Bail out.
+ */
+ if (map.br_startblock == DELAYSTARTBLOCK)
+ return -EFSCORRUPTED;
+
+ /*
+ * Make sure this block has a real zeroed extent allocated to
+ * it.
+ */
+ nmaps = 1;
+ error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
+ XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
+ &nmaps);
+ if (error)
+ return error;
+ if (nmaps != 1)
+ return -EFSCORRUPTED;
+
+ trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);
+
+ /* Commit new extent and all deferred work. */
+ error = xfs_defer_finish(&sc->tp);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+/*
+ * Write data to each block of a file. The given range of the tempfile's data
+ * fork must already be populated with written extents.
+ */
+int
+xrep_tempfile_copyin(
+ struct xfs_scrub *sc,
+ xfs_fileoff_t off,
+ xfs_filblks_t len,
+ xrep_tempfile_copyin_fn prep_fn,
+ void *data)
+{
+ LIST_HEAD(buffers_list);
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+ xfs_fileoff_t flush_mask;
+ xfs_fileoff_t end = off + len;
+ loff_t pos = XFS_FSB_TO_B(mp, off);
+ int error = 0;
+
+ ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));
+
+ /* Flush buffers to disk every 512K */
+ flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;
+
+ for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
+ struct xfs_bmbt_irec map;
+ int nmaps = 1;
+
+ /* Read block mapping for this file block. */
+ error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
+ if (error)
+ goto out_err;
+ if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
+ error = -EFSCORRUPTED;
+ goto out_err;
+ }
+
+ /* Get the metadata buffer for this offset in the file. */
+ error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, map.br_startblock),
+ mp->m_bsize, 0, &bp);
+ if (error)
+ goto out_err;
+
+ trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);
+
+ /* Read in a block's worth of data from the xfile. */
+ error = prep_fn(sc, bp, data);
+ if (error) {
+ xfs_trans_brelse(sc->tp, bp);
+ goto out_err;
+ }
+
+ /* Queue buffer, and flush if we have too much dirty data. */
+ xfs_buf_delwri_queue_here(bp, &buffers_list);
+ xfs_trans_brelse(sc->tp, bp);
+
+ if (!(off & flush_mask)) {
+ error = xfs_buf_delwri_submit(&buffers_list);
+ if (error)
+ goto out_err;
+ }
+ }
+
+ /*
+ * Write the new blocks to disk. If the ordered list isn't empty after
+ * that, then something went wrong and we have to fail. This should
+ * never happen, but we'll check anyway.
+ */
+ error = xfs_buf_delwri_submit(&buffers_list);
+ if (error)
+ goto out_err;
+
+ if (!list_empty(&buffers_list)) {
+ ASSERT(list_empty(&buffers_list));
+ error = -EIO;
+ goto out_err;
+ }
+
+ return 0;
+
+out_err:
+ xfs_buf_delwri_cancel(&buffers_list);
+ return error;
+}
+
+/*
+ * Set the temporary file's size. Caller must join the tempfile to the scrub
+ * transaction and is responsible for adjusting block mappings as needed.
+ */
+int
+xrep_tempfile_set_isize(
+ struct xfs_scrub *sc,
+ unsigned long long isize)
+{
+ if (sc->tempip->i_disk_size == isize)
+ return 0;
+
+ sc->tempip->i_disk_size = isize;
+ i_size_write(VFS_I(sc->tempip), isize);
+ return xrep_tempfile_roll_trans(sc);
+}
+
+/*
+ * Roll a repair transaction involving the temporary file. Caller must join
+ * both the temporary file and the file being scrubbed to the transaction.
+ * This function return with both inodes joined to a new scrub transaction,
+ * or the usual negative errno.
+ */
+int
+xrep_tempfile_roll_trans(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
+ error = xrep_roll_trans(sc);
+ if (error)
+ return error;
+
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ return 0;
+}
+
+/* Enable file content exchanges. */
+int
+xrep_tempexch_enable(
+ struct xfs_scrub *sc)
+{
+ if (sc->flags & XREP_FSGATES_EXCHANGE_RANGE)
+ return 0;
+
+ if (!xfs_has_exchange_range(sc->mp))
+ return -EOPNOTSUPP;
+
+ trace_xchk_fsgates_enable(sc, XREP_FSGATES_EXCHANGE_RANGE);
+
+ sc->flags |= XREP_FSGATES_EXCHANGE_RANGE;
+ return 0;
+}
+
+/*
+ * Fill out the mapping exchange request in preparation for atomically
+ * committing the contents of a metadata file that we've rebuilt in the temp
+ * file.
+ */
+STATIC int
+xrep_tempexch_prep_request(
+ struct xfs_scrub *sc,
+ int whichfork,
+ struct xrep_tempexch *tx)
+{
+ struct xfs_exchmaps_req *req = &tx->req;
+
+ memset(tx, 0, sizeof(struct xrep_tempexch));
+
+ /* COW forks don't exist on disk. */
+ if (whichfork == XFS_COW_FORK) {
+ ASSERT(0);
+ return -EINVAL;
+ }
+
+ /* Both files should have the relevant forks. */
+ if (!xfs_ifork_ptr(sc->ip, whichfork) ||
+ !xfs_ifork_ptr(sc->tempip, whichfork)) {
+ ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
+ ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
+ return -EINVAL;
+ }
+
+ /* Exchange all mappings in both forks. */
+ req->ip1 = sc->tempip;
+ req->ip2 = sc->ip;
+ req->startoff1 = 0;
+ req->startoff2 = 0;
+ switch (whichfork) {
+ case XFS_ATTR_FORK:
+ req->flags |= XFS_EXCHMAPS_ATTR_FORK;
+ break;
+ case XFS_DATA_FORK:
+ /* Always exchange sizes when exchanging data fork mappings. */
+ req->flags |= XFS_EXCHMAPS_SET_SIZES;
+ break;
+ }
+ req->blockcount = XFS_MAX_FILEOFF;
+
+ return 0;
+}
+
+/*
+ * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip
+ * this if quota enforcement is disabled or if both inodes' dquots are the
+ * same. The qretry structure must be initialized to zeroes before the first
+ * call to this function.
+ */
+STATIC int
+xrep_tempexch_reserve_quota(
+ struct xfs_scrub *sc,
+ const struct xrep_tempexch *tx)
+{
+ struct xfs_trans *tp = sc->tp;
+ const struct xfs_exchmaps_req *req = &tx->req;
+ int64_t ddelta, rdelta;
+ int error;
+
+ /*
+ * Don't bother with a quota reservation if we're not enforcing them
+ * or the two inodes have the same dquots.
+ */
+ if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
+ (req->ip1->i_udquot == req->ip2->i_udquot &&
+ req->ip1->i_gdquot == req->ip2->i_gdquot &&
+ req->ip1->i_pdquot == req->ip2->i_pdquot))
+ return 0;
+
+ /*
+ * Quota reservation for each file comes from two sources. First, we
+ * need to account for any net gain in mapped blocks during the
+ * exchange. Second, we need reservation for the gross gain in mapped
+ * blocks so that we don't trip over any quota block reservation
+ * assertions. We must reserve the gross gain because the quota code
+ * subtracts from bcount the number of blocks that we unmap; it does
+ * not add that quantity back to the quota block reservation.
+ */
+ ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
+ rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
+ error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
+ ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
+ true);
+ if (error)
+ return error;
+
+ ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
+ rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
+ return xfs_trans_reserve_quota_nblks(tp, req->ip2,
+ ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
+ true);
+}
+
+/*
+ * Prepare an existing transaction for an atomic file contents exchange.
+ *
+ * This function fills out the mapping exchange request and resource estimation
+ * structures in preparation for exchanging the contents of a metadata file
+ * that has been rebuilt in the temp file. Next, it reserves space and quota
+ * for the transaction.
+ *
+ * The caller must hold ILOCK_EXCL of the scrub target file and the temporary
+ * file. The caller must join both inodes to the transaction with no unlock
+ * flags, and is responsible for dropping both ILOCKs when appropriate. Only
+ * use this when those ILOCKs cannot be dropped.
+ */
+int
+xrep_tempexch_trans_reserve(
+ struct xfs_scrub *sc,
+ int whichfork,
+ struct xrep_tempexch *tx)
+{
+ int error;
+
+ ASSERT(sc->tp != NULL);
+ xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);
+
+ error = xrep_tempexch_prep_request(sc, whichfork, tx);
+ if (error)
+ return error;
+
+ error = xfs_exchmaps_estimate(&tx->req);
+ if (error)
+ return error;
+
+ error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
+ if (error)
+ return error;
+
+ return xrep_tempexch_reserve_quota(sc, tx);
+}
+
+/*
+ * Exchange file mappings (and hence file contents) between the file being
+ * repaired and the temporary file. Returns with both inodes locked and joined
+ * to a clean scrub transaction.
+ */
+int
+xrep_tempexch_contents(
+ struct xfs_scrub *sc,
+ struct xrep_tempexch *tx)
+{
+ int error;
+
+ ASSERT(sc->flags & XREP_FSGATES_EXCHANGE_RANGE);
+
+ xfs_exchange_mappings(sc->tp, &tx->req);
+ error = xfs_defer_finish(&sc->tp);
+ if (error)
+ return error;
+
+ /*
+ * If we exchanged the ondisk sizes of two metadata files, we must
+ * exchanged the incore sizes as well.
+ */
+ if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) {
+ loff_t temp;
+
+ temp = i_size_read(VFS_I(sc->ip));
+ i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
+ i_size_write(VFS_I(sc->tempip), temp);
+ }
+
+ return 0;
+}
diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h
index e165e0a3faf6..7980f9c4de55 100644
--- a/fs/xfs/scrub/tempfile.h
+++ b/fs/xfs/scrub/tempfile.h
@@ -17,6 +17,21 @@ void xrep_tempfile_iounlock(struct xfs_scrub *sc);
void xrep_tempfile_ilock(struct xfs_scrub *sc);
bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc);
void xrep_tempfile_iunlock(struct xfs_scrub *sc);
+
+int xrep_tempfile_prealloc(struct xfs_scrub *sc, xfs_fileoff_t off,
+ xfs_filblks_t len);
+
+enum xfs_blft;
+
+typedef int (*xrep_tempfile_copyin_fn)(struct xfs_scrub *sc,
+ struct xfs_buf *bp, void *data);
+
+int xrep_tempfile_copyin(struct xfs_scrub *sc, xfs_fileoff_t off,
+ xfs_filblks_t len, xrep_tempfile_copyin_fn fn, void *data);
+
+int xrep_tempfile_set_isize(struct xfs_scrub *sc, unsigned long long isize);
+
+int xrep_tempfile_roll_trans(struct xfs_scrub *sc);
#else
static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc)
{
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index cbd70ecd3011..8d05f2adae3d 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -114,6 +114,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY);
{ XCHK_FSGATES_QUOTA, "fsgates_quota" }, \
{ XCHK_FSGATES_DIRENTS, "fsgates_dirents" }, \
{ XCHK_FSGATES_RMAP, "fsgates_rmap" }, \
+ { XREP_FSGATES_EXCHANGE_RANGE, "fsgates_exchrange" }, \
{ XREP_RESET_PERAG_RESV, "reset_perag_resv" }, \
{ XREP_ALREADY_FIXED, "already_fixed" }
@@ -2314,6 +2315,45 @@ TRACE_EVENT(xrep_tempfile_create,
__entry->temp_inum)
);
+DECLARE_EVENT_CLASS(xrep_tempfile_class,
+ TP_PROTO(struct xfs_scrub *sc, int whichfork,
+ struct xfs_bmbt_irec *irec),
+ TP_ARGS(sc, whichfork, irec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(int, whichfork)
+ __field(xfs_fileoff_t, lblk)
+ __field(xfs_filblks_t, len)
+ __field(xfs_fsblock_t, pblk)
+ __field(int, state)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->ino = sc->tempip->i_ino;
+ __entry->whichfork = whichfork;
+ __entry->lblk = irec->br_startoff;
+ __entry->len = irec->br_blockcount;
+ __entry->pblk = irec->br_startblock;
+ __entry->state = irec->br_state;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx whichfork %s fileoff 0x%llx fsbcount 0x%llx startblock 0x%llx state %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
+ __entry->lblk,
+ __entry->len,
+ __entry->pblk,
+ __entry->state)
+);
+#define DEFINE_XREP_TEMPFILE_EVENT(name) \
+DEFINE_EVENT(xrep_tempfile_class, name, \
+ TP_PROTO(struct xfs_scrub *sc, int whichfork, \
+ struct xfs_bmbt_irec *irec), \
+ TP_ARGS(sc, whichfork, irec))
+DEFINE_XREP_TEMPFILE_EVENT(xrep_tempfile_prealloc);
+DEFINE_XREP_TEMPFILE_EVENT(xrep_tempfile_copyin);
+
TRACE_EVENT(xreap_ifork_extent,
TP_PROTO(struct xfs_scrub *sc, struct xfs_inode *ip, int whichfork,
const struct xfs_bmbt_irec *irec),