aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/libxfs/xfs_btree.c73
-rw-r--r--fs/xfs/libxfs/xfs_btree.h8
-rw-r--r--fs/xfs/libxfs/xfs_btree_mem.c1
-rw-r--r--fs/xfs/libxfs/xfs_btree_staging.c1
-rw-r--r--fs/xfs/libxfs/xfs_defer.h1
-rw-r--r--fs/xfs/libxfs/xfs_exchmaps.c4
-rw-r--r--fs/xfs/libxfs/xfs_format.h28
-rw-r--r--fs/xfs/libxfs/xfs_fs.h7
-rw-r--r--fs/xfs/libxfs/xfs_health.h4
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c32
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c25
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h6
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h2
-rw-r--r--fs/xfs/libxfs/xfs_metafile.c18
-rw-r--r--fs/xfs/libxfs/xfs_metafile.h2
-rw-r--r--fs/xfs/libxfs/xfs_ondisk.h2
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c6
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c171
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h12
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.h9
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.c53
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.h49
-rw-r--r--fs/xfs/libxfs/xfs_rtrmap_btree.c1011
-rw-r--r--fs/xfs/libxfs/xfs_rtrmap_btree.h210
-rw-r--r--fs/xfs/libxfs/xfs_sb.c6
-rw-r--r--fs/xfs/libxfs/xfs_shared.h14
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c12
-rw-r--r--fs/xfs/libxfs/xfs_trans_space.h13
-rw-r--r--fs/xfs/scrub/alloc_repair.c5
-rw-r--r--fs/xfs/scrub/bmap.c108
-rw-r--r--fs/xfs/scrub/bmap_repair.c129
-rw-r--r--fs/xfs/scrub/common.c160
-rw-r--r--fs/xfs/scrub/common.h23
-rw-r--r--fs/xfs/scrub/health.c1
-rw-r--r--fs/xfs/scrub/inode.c10
-rw-r--r--fs/xfs/scrub/inode_repair.c136
-rw-r--r--fs/xfs/scrub/metapath.c3
-rw-r--r--fs/xfs/scrub/newbt.c42
-rw-r--r--fs/xfs/scrub/newbt.h1
-rw-r--r--fs/xfs/scrub/reap.c41
-rw-r--r--fs/xfs/scrub/reap.h2
-rw-r--r--fs/xfs/scrub/repair.c191
-rw-r--r--fs/xfs/scrub/repair.h17
-rw-r--r--fs/xfs/scrub/rgsuper.c6
-rw-r--r--fs/xfs/scrub/rmap_repair.c84
-rw-r--r--fs/xfs/scrub/rtbitmap.c75
-rw-r--r--fs/xfs/scrub/rtbitmap.h55
-rw-r--r--fs/xfs/scrub/rtbitmap_repair.c429
-rw-r--r--fs/xfs/scrub/rtrmap.c271
-rw-r--r--fs/xfs/scrub/rtrmap_repair.c903
-rw-r--r--fs/xfs/scrub/rtsummary.c17
-rw-r--r--fs/xfs/scrub/rtsummary_repair.c3
-rw-r--r--fs/xfs/scrub/scrub.c11
-rw-r--r--fs/xfs/scrub/scrub.h14
-rw-r--r--fs/xfs/scrub/stats.c1
-rw-r--r--fs/xfs/scrub/tempexch.h2
-rw-r--r--fs/xfs/scrub/tempfile.c20
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h228
-rw-r--r--fs/xfs/xfs_buf.c1
-rw-r--r--fs/xfs/xfs_buf_item_recover.c4
-rw-r--r--fs/xfs/xfs_drain.c20
-rw-r--r--fs/xfs/xfs_drain.h7
-rw-r--r--fs/xfs/xfs_fsmap.c174
-rw-r--r--fs/xfs/xfs_fsops.c11
-rw-r--r--fs/xfs/xfs_health.c1
-rw-r--r--fs/xfs/xfs_inode.c19
-rw-r--r--fs/xfs/xfs_inode_item.c2
-rw-r--r--fs/xfs/xfs_inode_item_recover.c44
-rw-r--r--fs/xfs/xfs_log_recover.c2
-rw-r--r--fs/xfs/xfs_mount.c5
-rw-r--r--fs/xfs/xfs_mount.h9
-rw-r--r--fs/xfs/xfs_notify_failure.c230
-rw-r--r--fs/xfs/xfs_notify_failure.h11
-rw-r--r--fs/xfs/xfs_qm.c8
-rw-r--r--fs/xfs/xfs_rmap_item.c216
-rw-r--r--fs/xfs/xfs_rtalloc.c82
-rw-r--r--fs/xfs/xfs_rtalloc.h10
-rw-r--r--fs/xfs/xfs_stats.c4
-rw-r--r--fs/xfs/xfs_stats.h2
-rw-r--r--fs/xfs/xfs_super.c6
-rw-r--r--fs/xfs/xfs_super.h1
-rw-r--r--fs/xfs/xfs_trace.h104
85 files changed, 5381 insertions, 366 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index ed9b0dabc1f1..338e10f81b7b 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -51,6 +51,7 @@ xfs-y += $(addprefix libxfs/, \
xfs_rmap_btree.o \
xfs_refcount.o \
xfs_refcount_btree.o \
+ xfs_rtrmap_btree.o \
xfs_sb.o \
xfs_symlink_remote.o \
xfs_trans_inode.o \
@@ -193,6 +194,7 @@ xfs-$(CONFIG_XFS_ONLINE_SCRUB_STATS) += scrub/stats.o
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
rgsuper.o \
rtbitmap.o \
+ rtrmap.o \
rtsummary.o \
)
@@ -232,6 +234,7 @@ xfs-y += $(addprefix scrub/, \
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
rtbitmap_repair.o \
+ rtrmap_repair.o \
rtsummary_repair.o \
)
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 5ab201ef041e..36ab06f8a3bc 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -30,6 +30,11 @@
#include "xfs_health.h"
#include "xfs_buf_mem.h"
#include "xfs_btree_mem.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_bmap.h"
+#include "xfs_rmap.h"
+#include "xfs_quota.h"
+#include "xfs_metafile.h"
/*
* Btree magic numbers.
@@ -5527,6 +5532,9 @@ xfs_btree_init_cur_caches(void)
error = xfs_refcountbt_init_cur_cache();
if (error)
goto err;
+ error = xfs_rtrmapbt_init_cur_cache();
+ if (error)
+ goto err;
return 0;
err:
@@ -5543,6 +5551,7 @@ xfs_btree_destroy_cur_caches(void)
xfs_bmbt_destroy_cur_cache();
xfs_rmapbt_destroy_cur_cache();
xfs_refcountbt_destroy_cur_cache();
+ xfs_rtrmapbt_destroy_cur_cache();
}
/* Move the btree cursor before the first record. */
@@ -5571,3 +5580,67 @@ xfs_btree_goto_left_edge(
return 0;
}
+
+/* Allocate a block for an inode-rooted metadata btree. */
+int
+xfs_btree_alloc_metafile_block(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *start,
+ union xfs_btree_ptr *new,
+ int *stat)
+{
+ struct xfs_alloc_arg args = {
+ .mp = cur->bc_mp,
+ .tp = cur->bc_tp,
+ .resv = XFS_AG_RESV_METAFILE,
+ .minlen = 1,
+ .maxlen = 1,
+ .prod = 1,
+ };
+ struct xfs_inode *ip = cur->bc_ino.ip;
+ int error;
+
+ ASSERT(xfs_is_metadir_inode(ip));
+
+ xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, cur->bc_ino.whichfork);
+ error = xfs_alloc_vextent_start_ag(&args,
+ XFS_INO_TO_FSB(cur->bc_mp, ip->i_ino));
+ if (error)
+ return error;
+ if (args.fsbno == NULLFSBLOCK) {
+ *stat = 0;
+ return 0;
+ }
+ ASSERT(args.len == 1);
+
+ xfs_metafile_resv_alloc_space(ip, &args);
+
+ new->l = cpu_to_be64(args.fsbno);
+ *stat = 1;
+ return 0;
+}
+
+/* Free a block from an inode-rooted metadata btree. */
+int
+xfs_btree_free_metafile_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_inode *ip = cur->bc_ino.ip;
+ struct xfs_trans *tp = cur->bc_tp;
+ xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, xfs_buf_daddr(bp));
+ int error;
+
+ ASSERT(xfs_is_metadir_inode(ip));
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_ino.whichfork);
+ error = xfs_free_extent_later(tp, fsbno, 1, &oinfo, XFS_AG_RESV_METAFILE,
+ 0);
+ if (error)
+ return error;
+
+ xfs_metafile_resv_free_space(ip, tp, 1);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 3b8c2ccad908..dbc047b2fb2c 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -135,7 +135,7 @@ struct xfs_btree_ops {
/* offset of btree stats array */
unsigned int statoff;
- /* sick mask for health reporting (only for XFS_BTREE_TYPE_AG) */
+ /* sick mask for health reporting (not for bmap btrees) */
unsigned int sick_mask;
/* cursor operations */
@@ -703,4 +703,10 @@ xfs_btree_at_iroot(
level == cur->bc_nlevels - 1;
}
+int xfs_btree_alloc_metafile_block(struct xfs_btree_cur *cur,
+ const union xfs_btree_ptr *start, union xfs_btree_ptr *newp,
+ int *stat);
+int xfs_btree_free_metafile_block(struct xfs_btree_cur *cur,
+ struct xfs_buf *bp);
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_btree_mem.c b/fs/xfs/libxfs/xfs_btree_mem.c
index df3d613675a1..f2f7b4305413 100644
--- a/fs/xfs/libxfs/xfs_btree_mem.c
+++ b/fs/xfs/libxfs/xfs_btree_mem.c
@@ -18,6 +18,7 @@
#include "xfs_ag.h"
#include "xfs_buf_item.h"
#include "xfs_trace.h"
+#include "xfs_rtgroup.h"
/* Set the root of an in-memory btree. */
void
diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c
index 58c146b5c9d4..5ed84f9cc877 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.c
+++ b/fs/xfs/libxfs/xfs_btree_staging.c
@@ -134,6 +134,7 @@ xfs_btree_stage_ifakeroot(
cur->bc_ino.ifake = ifake;
cur->bc_nlevels = ifake->if_levels;
cur->bc_ino.forksize = ifake->if_fork_size;
+ cur->bc_ino.whichfork = XFS_STAGING_FORK;
cur->bc_flags |= XFS_BTREE_STAGING;
}
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index ec51b8465e61..1e2477eaa5a8 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -69,6 +69,7 @@ struct xfs_defer_op_type {
extern const struct xfs_defer_op_type xfs_bmap_update_defer_type;
extern const struct xfs_defer_op_type xfs_refcount_update_defer_type;
extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
+extern const struct xfs_defer_op_type xfs_rtrmap_update_defer_type;
extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
extern const struct xfs_defer_op_type xfs_rtextent_free_defer_type;
diff --git a/fs/xfs/libxfs/xfs_exchmaps.c b/fs/xfs/libxfs/xfs_exchmaps.c
index 2021396651de..3f1d6a98c118 100644
--- a/fs/xfs/libxfs/xfs_exchmaps.c
+++ b/fs/xfs/libxfs/xfs_exchmaps.c
@@ -662,7 +662,9 @@ xfs_exchmaps_rmapbt_blocks(
if (!xfs_has_rmapbt(mp))
return 0;
if (XFS_IS_REALTIME_INODE(req->ip1))
- return 0;
+ return howmany_64(req->nr_exchanges,
+ XFS_MAX_CONTIG_RTRMAPS_PER_BLOCK(mp)) *
+ XFS_RTRMAPADD_SPACE_RES(mp);
return howmany_64(req->nr_exchanges,
XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp)) *
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 4d47a3e723aa..fba4e59aded4 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -857,6 +857,7 @@ enum xfs_metafile_type {
XFS_METAFILE_PRJQUOTA, /* project quota */
XFS_METAFILE_RTBITMAP, /* rt bitmap */
XFS_METAFILE_RTSUMMARY, /* rt summary */
+ XFS_METAFILE_RTRMAP, /* rt rmap */
XFS_METAFILE_MAX
} __packed;
@@ -868,7 +869,8 @@ enum xfs_metafile_type {
{ XFS_METAFILE_GRPQUOTA, "grpquota" }, \
{ XFS_METAFILE_PRJQUOTA, "prjquota" }, \
{ XFS_METAFILE_RTBITMAP, "rtbitmap" }, \
- { XFS_METAFILE_RTSUMMARY, "rtsummary" }
+ { XFS_METAFILE_RTSUMMARY, "rtsummary" }, \
+ { XFS_METAFILE_RTRMAP, "rtrmap" }
/*
* On-disk inode structure.
@@ -997,7 +999,8 @@ enum xfs_dinode_fmt {
XFS_DINODE_FMT_LOCAL, /* bulk data */
XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */
XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */
- XFS_DINODE_FMT_UUID /* added long ago, but never used */
+ XFS_DINODE_FMT_UUID, /* added long ago, but never used */
+ XFS_DINODE_FMT_META_BTREE, /* metadata btree */
};
#define XFS_INODE_FORMAT_STR \
@@ -1005,7 +1008,8 @@ enum xfs_dinode_fmt {
{ XFS_DINODE_FMT_LOCAL, "local" }, \
{ XFS_DINODE_FMT_EXTENTS, "extent" }, \
{ XFS_DINODE_FMT_BTREE, "btree" }, \
- { XFS_DINODE_FMT_UUID, "uuid" }
+ { XFS_DINODE_FMT_UUID, "uuid" }, \
+ { XFS_DINODE_FMT_META_BTREE, "meta_btree" }
/*
* Max values for extnum and aextnum.
@@ -1726,6 +1730,24 @@ typedef __be32 xfs_rmap_ptr_t;
XFS_IBT_BLOCK(mp) + 1)
/*
+ * Realtime Reverse mapping btree format definitions
+ *
+ * This is a btree for reverse mapping records for realtime volumes
+ */
+#define XFS_RTRMAP_CRC_MAGIC 0x4d415052 /* 'MAPR' */
+
+/*
+ * rtrmap root header, on-disk form only.
+ */
+struct xfs_rtrmap_root {
+ __be16 bb_level; /* 0 is a leaf */
+ __be16 bb_numrecs; /* current # of data records */
+};
+
+/* inode-based btree pointer type */
+typedef __be64 xfs_rtrmap_ptr_t;
+
+/*
* Reference Count Btree format definitions
*
*/
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 41ce4d3d650e..d42d3a5617e3 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -737,9 +737,10 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */
#define XFS_SCRUB_TYPE_METAPATH 29 /* metadata directory tree paths */
#define XFS_SCRUB_TYPE_RGSUPER 30 /* realtime superblock */
+#define XFS_SCRUB_TYPE_RTRMAPBT 31 /* rtgroup reverse mapping btree */
/* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR 31
+#define XFS_SCRUB_TYPE_NR 32
/*
* This special type code only applies to the vectored scrub implementation.
@@ -829,9 +830,10 @@ struct xfs_scrub_vec_head {
#define XFS_SCRUB_METAPATH_USRQUOTA (5) /* user quota */
#define XFS_SCRUB_METAPATH_GRPQUOTA (6) /* group quota */
#define XFS_SCRUB_METAPATH_PRJQUOTA (7) /* project quota */
+#define XFS_SCRUB_METAPATH_RTRMAPBT (8) /* realtime reverse mapping */
/* Number of metapath sm_ino values */
-#define XFS_SCRUB_METAPATH_NR (8)
+#define XFS_SCRUB_METAPATH_NR (9)
/*
* ioctl limits
@@ -993,6 +995,7 @@ struct xfs_rtgroup_geometry {
#define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */
#define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */
#define XFS_RTGROUP_GEOM_SICK_SUMMARY (1U << 2) /* rtsummary */
+#define XFS_RTGROUP_GEOM_SICK_RMAPBT (1U << 3) /* reverse mappings */
/*
* ioctl commands that are used by Linux filesystems
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index d34986ac18c3..5c8a0aff6ba6 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -70,6 +70,7 @@ struct xfs_rtgroup;
#define XFS_SICK_RG_SUPER (1 << 0) /* rt group superblock */
#define XFS_SICK_RG_BITMAP (1 << 1) /* rt group bitmap */
#define XFS_SICK_RG_SUMMARY (1 << 2) /* rt groups summary */
+#define XFS_SICK_RG_RMAPBT (1 << 3) /* reverse mappings */
/* Observable health issues for AG metadata. */
#define XFS_SICK_AG_SB (1 << 0) /* superblock */
@@ -115,7 +116,8 @@ struct xfs_rtgroup;
#define XFS_SICK_RG_PRIMARY (XFS_SICK_RG_SUPER | \
XFS_SICK_RG_BITMAP | \
- XFS_SICK_RG_SUMMARY)
+ XFS_SICK_RG_SUMMARY | \
+ XFS_SICK_RG_RMAPBT)
#define XFS_SICK_AG_PRIMARY (XFS_SICK_AG_SB | \
XFS_SICK_AG_AGF | \
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 424861fbf1bd..17cb91b89fca 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -441,6 +441,25 @@ xfs_dinode_verify_fork(
if (di_nextents > max_extents)
return __this_address;
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ if (!xfs_has_metadir(mp))
+ return __this_address;
+ if (!(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA)))
+ return __this_address;
+ switch (be16_to_cpu(dip->di_metatype)) {
+ case XFS_METAFILE_RTRMAP:
+ /*
+ * growfs must create the rtrmap inodes before adding a
+ * realtime volume to the filesystem, so we cannot use
+ * the rtrmapbt predicate here.
+ */
+ if (!xfs_has_rmapbt(mp))
+ return __this_address;
+ break;
+ default:
+ return __this_address;
+ }
+ break;
default:
return __this_address;
}
@@ -460,6 +479,10 @@ xfs_dinode_verify_forkoff(
if (dip->di_forkoff != (roundup(sizeof(xfs_dev_t), 8) >> 3))
return __this_address;
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ if (!xfs_has_metadir(mp) || !xfs_has_parent(mp))
+ return __this_address;
+ fallthrough;
case XFS_DINODE_FMT_LOCAL: /* fall through ... */
case XFS_DINODE_FMT_EXTENTS: /* fall through ... */
case XFS_DINODE_FMT_BTREE:
@@ -637,9 +660,6 @@ xfs_dinode_verify(
if (mode && nextents + naextents > nblocks)
return __this_address;
- if (nextents + naextents == 0 && nblocks != 0)
- return __this_address;
-
if (S_ISDIR(mode) && nextents > mp->m_dir_geo->max_extents)
return __this_address;
@@ -743,6 +763,12 @@ xfs_dinode_verify(
return fa;
}
+ /* metadata inodes containing btrees always have zero extent count */
+ if (XFS_DFORK_FORMAT(dip, XFS_DATA_FORK) != XFS_DINODE_FMT_META_BTREE) {
+ if (nextents + naextents == 0 && nblocks != 0)
+ return __this_address;
+ }
+
return NULL;
}
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 60853bac289a..d9b3c182cb40 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -27,6 +27,7 @@
#include "xfs_errortag.h"
#include "xfs_health.h"
#include "xfs_symlink_remote.h"
+#include "xfs_rtrmap_btree.h"
struct kmem_cache *xfs_ifork_cache;
@@ -267,6 +268,14 @@ xfs_iformat_data_fork(
return xfs_iformat_extents(ip, dip, XFS_DATA_FORK);
case XFS_DINODE_FMT_BTREE:
return xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
+ case XFS_DINODE_FMT_META_BTREE:
+ switch (ip->i_metatype) {
+ case XFS_METAFILE_RTRMAP:
+ return xfs_iformat_rtrmap(ip, dip);
+ default:
+ break;
+ }
+ fallthrough;
default:
xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
dip, sizeof(*dip), __this_address);
@@ -601,6 +610,22 @@ xfs_iflush_fork(
}
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ ASSERT(whichfork == XFS_DATA_FORK);
+
+ if (!(iip->ili_fields & brootflag[whichfork]))
+ break;
+
+ switch (ip->i_metatype) {
+ case XFS_METAFILE_RTRMAP:
+ xfs_iflush_rtrmap(ip, dip);
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ break;
+
default:
ASSERT(0);
break;
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 15dec19b6c32..a7e0e479454d 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -250,6 +250,8 @@ typedef struct xfs_trans_header {
#define XFS_LI_XMD 0x1249 /* mapping exchange done */
#define XFS_LI_EFI_RT 0x124a /* realtime extent free intent */
#define XFS_LI_EFD_RT 0x124b /* realtime extent free done */
+#define XFS_LI_RUI_RT 0x124c /* realtime rmap update intent */
+#define XFS_LI_RUD_RT 0x124d /* realtime rmap update done */
#define XFS_LI_TYPE_DESC \
{ XFS_LI_EFI, "XFS_LI_EFI" }, \
@@ -271,7 +273,9 @@ typedef struct xfs_trans_header {
{ XFS_LI_XMI, "XFS_LI_XMI" }, \
{ XFS_LI_XMD, "XFS_LI_XMD" }, \
{ XFS_LI_EFI_RT, "XFS_LI_EFI_RT" }, \
- { XFS_LI_EFD_RT, "XFS_LI_EFD_RT" }
+ { XFS_LI_EFD_RT, "XFS_LI_EFD_RT" }, \
+ { XFS_LI_RUI_RT, "XFS_LI_RUI_RT" }, \
+ { XFS_LI_RUD_RT, "XFS_LI_RUD_RT" }
/*
* Inode Log Item Format definitions.
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 5397a8ff004d..abc705aff26d 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -79,6 +79,8 @@ extern const struct xlog_recover_item_ops xlog_xmi_item_ops;
extern const struct xlog_recover_item_ops xlog_xmd_item_ops;
extern const struct xlog_recover_item_ops xlog_rtefi_item_ops;
extern const struct xlog_recover_item_ops xlog_rtefd_item_ops;
+extern const struct xlog_recover_item_ops xlog_rtrui_item_ops;
+extern const struct xlog_recover_item_ops xlog_rtrud_item_ops;
/*
* Macros, structures, prototypes for internal log manager use.
diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c
index e151663cc9ef..2f5f554a36d4 100644
--- a/fs/xfs/libxfs/xfs_metafile.c
+++ b/fs/xfs/libxfs/xfs_metafile.c
@@ -22,6 +22,24 @@
#include "xfs_error.h"
#include "xfs_alloc.h"
+static const struct {
+ enum xfs_metafile_type mtype;
+ const char *name;
+} xfs_metafile_type_strs[] = { XFS_METAFILE_TYPE_STR };
+
+const char *
+xfs_metafile_type_str(enum xfs_metafile_type metatype)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(xfs_metafile_type_strs); i++) {
+ if (xfs_metafile_type_strs[i].mtype == metatype)
+ return xfs_metafile_type_strs[i].name;
+ }
+
+ return NULL;
+}
+
/* Set up an inode to be recognized as a metadata directory inode. */
void
xfs_metafile_set_iflag(
diff --git a/fs/xfs/libxfs/xfs_metafile.h b/fs/xfs/libxfs/xfs_metafile.h
index 8d8f08a6071c..95af4b52e5a7 100644
--- a/fs/xfs/libxfs/xfs_metafile.h
+++ b/fs/xfs/libxfs/xfs_metafile.h
@@ -6,6 +6,8 @@
#ifndef __XFS_METAFILE_H__
#define __XFS_METAFILE_H__
+const char *xfs_metafile_type_str(enum xfs_metafile_type metatype);
+
/* All metadata files must have these flags set. */
#define XFS_METAFILE_DIFLAGS (XFS_DIFLAG_IMMUTABLE | \
XFS_DIFLAG_SYNC | \
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index ad0dedf00f18..07e2f5fb3a94 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -83,6 +83,8 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(union xfs_rtword_raw, 4);
XFS_CHECK_STRUCT_SIZE(union xfs_suminfo_raw, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_rtbuf_blkinfo, 48);
+ XFS_CHECK_STRUCT_SIZE(xfs_rtrmap_ptr_t, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rtrmap_root, 4);
/*
* m68k has problems with struct xfs_attr_leaf_name_remote, but we pad
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 2dbab68b4fe6..bbb86dc9a25c 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1831,8 +1831,7 @@ xfs_refcount_alloc_cow_extent(
__xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len);
/* Add rmap entry */
- xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
- XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
+ xfs_rmap_alloc_extent(tp, false, fsb, len, XFS_RMAP_OWN_COW);
}
/* Forget a CoW staging event in the refcount btree. */
@@ -1848,8 +1847,7 @@ xfs_refcount_free_cow_extent(
return;
/* Remove rmap entry */
- xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
- XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
+ xfs_rmap_free_extent(tp, false, fsb, len, XFS_RMAP_OWN_COW);
__xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len);
}
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index d0df68dc3131..f8415fd96cc2 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -25,6 +25,8 @@
#include "xfs_ag.h"
#include "xfs_health.h"
#include "xfs_rmap_item.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
struct kmem_cache *xfs_rmap_intent_cache;
@@ -264,11 +266,70 @@ xfs_rmap_check_irec(
return NULL;
}
+static xfs_failaddr_t
+xfs_rtrmap_check_meta_irec(
+ struct xfs_rtgroup *rtg,
+ const struct xfs_rmap_irec *irec)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+
+ if (irec->rm_offset != 0)
+ return __this_address;
+ if (irec->rm_flags & XFS_RMAP_UNWRITTEN)
+ return __this_address;
+
+ switch (irec->rm_owner) {
+ case XFS_RMAP_OWN_FS:
+ if (irec->rm_startblock != 0)
+ return __this_address;
+ if (irec->rm_blockcount != mp->m_sb.sb_rextsize)
+ return __this_address;
+ return NULL;
+ default:
+ return __this_address;
+ }
+
+ return NULL;
+}
+
+static xfs_failaddr_t
+xfs_rtrmap_check_inode_irec(
+ struct xfs_rtgroup *rtg,
+ const struct xfs_rmap_irec *irec)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+
+ if (!xfs_verify_ino(mp, irec->rm_owner))
+ return __this_address;
+ if (!xfs_verify_rgbext(rtg, irec->rm_startblock, irec->rm_blockcount))
+ return __this_address;
+ if (!xfs_verify_fileext(mp, irec->rm_offset, irec->rm_blockcount))
+ return __this_address;
+ return NULL;
+}
+
+xfs_failaddr_t
+xfs_rtrmap_check_irec(
+ struct xfs_rtgroup *rtg,
+ const struct xfs_rmap_irec *irec)
+{
+ if (irec->rm_blockcount == 0)
+ return __this_address;
+ if (irec->rm_flags & (XFS_RMAP_BMBT_BLOCK | XFS_RMAP_ATTR_FORK))
+ return __this_address;
+ if (XFS_RMAP_NON_INODE_OWNER(irec->rm_owner))
+ return xfs_rtrmap_check_meta_irec(rtg, irec);
+ return xfs_rtrmap_check_inode_irec(rtg, irec);
+}
+
static inline xfs_failaddr_t
xfs_rmap_check_btrec(
struct xfs_btree_cur *cur,
const struct xfs_rmap_irec *irec)
{
+ if (xfs_btree_is_rtrmap(cur->bc_ops) ||
+ xfs_btree_is_mem_rtrmap(cur->bc_ops))
+ return xfs_rtrmap_check_irec(to_rtg(cur->bc_group), irec);
return xfs_rmap_check_irec(to_perag(cur->bc_group), irec);
}
@@ -283,6 +344,10 @@ xfs_rmap_complain_bad_rec(
if (xfs_btree_is_mem_rmap(cur->bc_ops))
xfs_warn(mp,
"In-Memory Reverse Mapping BTree record corruption detected at %pS!", fa);
+ else if (xfs_btree_is_rtrmap(cur->bc_ops))
+ xfs_warn(mp,
+ "RT Reverse Mapping BTree record corruption in rtgroup %u detected at %pS!",
+ cur->bc_group->xg_gno, fa);
else
xfs_warn(mp,
"Reverse Mapping BTree record corruption in AG %d detected at %pS!",
@@ -525,7 +590,7 @@ xfs_rmap_free_check_owner(
struct xfs_btree_cur *cur,
uint64_t ltoff,
struct xfs_rmap_irec *rec,
- xfs_filblks_t len,
+ xfs_extlen_t len,
uint64_t owner,
uint64_t offset,
unsigned int flags)
@@ -2556,6 +2621,47 @@ __xfs_rmap_finish_intent(
}
}
+static int
+xfs_rmap_finish_init_cursor(
+ struct xfs_trans *tp,
+ struct xfs_rmap_intent *ri,
+ struct xfs_btree_cur **pcur)
+{
+ struct xfs_perag *pag = to_perag(ri->ri_group);
+ struct xfs_buf *agbp = NULL;
+ int error;
+
+ /*
+ * Refresh the freelist before we start changing the rmapbt, because a
+ * shape change could cause us to allocate blocks.
+ */
+ error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
+ if (error) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
+ return error;
+ }
+ if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) {
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
+ return -EFSCORRUPTED;
+ }
+ *pcur = xfs_rmapbt_init_cursor(tp->t_mountp, tp, agbp, pag);
+ return 0;
+}
+
+static int
+xfs_rtrmap_finish_init_cursor(
+ struct xfs_trans *tp,
+ struct xfs_rmap_intent *ri,
+ struct xfs_btree_cur **pcur)
+{
+ struct xfs_rtgroup *rtg = to_rtg(ri->ri_group);
+
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_RMAP);
+ *pcur = xfs_rtrmapbt_init_cursor(tp, rtg);
+ return 0;
+}
+
/*
* Process one of the deferred rmap operations. We pass back the
* btree cursor to maintain our lock on the rmapbt between calls.
@@ -2571,8 +2677,6 @@ xfs_rmap_finish_one(
{
struct xfs_owner_info oinfo;
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_btree_cur *rcur = *pcur;
- struct xfs_buf *agbp = NULL;
xfs_agblock_t bno;
bool unwritten;
int error = 0;
@@ -2586,38 +2690,26 @@ xfs_rmap_finish_one(
* If we haven't gotten a cursor or the cursor AG doesn't match
* the startblock, get one now.
*/
- if (rcur != NULL && rcur->bc_group != ri->ri_group) {
- xfs_btree_del_cursor(rcur, 0);
- rcur = NULL;
+ if (*pcur != NULL && (*pcur)->bc_group != ri->ri_group) {
+ xfs_btree_del_cursor(*pcur, 0);
*pcur = NULL;
}
- if (rcur == NULL) {
- struct xfs_perag *pag = to_perag(ri->ri_group);
-
- /*
- * Refresh the freelist before we start changing the
- * rmapbt, because a shape change could cause us to
- * allocate blocks.
- */
- error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
- if (error) {
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
+ if (*pcur == NULL) {
+ if (ri->ri_group->xg_type == XG_TYPE_RTG)
+ error = xfs_rtrmap_finish_init_cursor(tp, ri, pcur);
+ else
+ error = xfs_rmap_finish_init_cursor(tp, ri, pcur);
+ if (error)
return error;
- }
- if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) {
- xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
- return -EFSCORRUPTED;
- }
-
- *pcur = rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
}
xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork,
ri->ri_bmap.br_startoff);
unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN;
- bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, ri->ri_bmap.br_startblock);
- error = __xfs_rmap_finish_intent(rcur, ri->ri_type, bno,
+ bno = xfs_fsb_to_gbno(mp, ri->ri_bmap.br_startblock,
+ ri->ri_group->xg_type);
+ error = __xfs_rmap_finish_intent(*pcur, ri->ri_type, bno,
ri->ri_bmap.br_blockcount, &oinfo, unwritten);
if (error)
return error;
@@ -2647,6 +2739,7 @@ __xfs_rmap_add(
struct xfs_trans *tp,
enum xfs_rmap_intent_type type,
uint64_t owner,
+ bool isrt,
int whichfork,
struct xfs_bmbt_irec *bmap)
{
@@ -2658,6 +2751,7 @@ __xfs_rmap_add(
ri->ri_owner = owner;
ri->ri_whichfork = whichfork;
ri->ri_bmap = *bmap;
+ ri->ri_realtime = isrt;
xfs_rmap_defer_add(tp, ri);
}
@@ -2671,6 +2765,7 @@ xfs_rmap_map_extent(
struct xfs_bmbt_irec *PREV)
{
enum xfs_rmap_intent_type type = XFS_RMAP_MAP;
+ bool isrt = xfs_ifork_is_realtime(ip, whichfork);
if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
return;
@@ -2678,7 +2773,7 @@ xfs_rmap_map_extent(
if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
type = XFS_RMAP_MAP_SHARED;
- __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
+ __xfs_rmap_add(tp, type, ip->i_ino, isrt, whichfork, PREV);
}
/* Unmap an extent out of a file. */
@@ -2690,6 +2785,7 @@ xfs_rmap_unmap_extent(
struct xfs_bmbt_irec *PREV)
{
enum xfs_rmap_intent_type type = XFS_RMAP_UNMAP;
+ bool isrt = xfs_ifork_is_realtime(ip, whichfork);
if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
return;
@@ -2697,7 +2793,7 @@ xfs_rmap_unmap_extent(
if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
type = XFS_RMAP_UNMAP_SHARED;
- __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
+ __xfs_rmap_add(tp, type, ip->i_ino, isrt, whichfork, PREV);
}
/*
@@ -2715,6 +2811,7 @@ xfs_rmap_convert_extent(
struct xfs_bmbt_irec *PREV)
{
enum xfs_rmap_intent_type type = XFS_RMAP_CONVERT;
+ bool isrt = xfs_ifork_is_realtime(ip, whichfork);
if (!xfs_rmap_update_is_needed(mp, whichfork))
return;
@@ -2722,15 +2819,15 @@ xfs_rmap_convert_extent(
if (whichfork != XFS_ATTR_FORK && xfs_is_reflink_inode(ip))
type = XFS_RMAP_CONVERT_SHARED;
- __xfs_rmap_add(tp, type, ip->i_ino, whichfork, PREV);
+ __xfs_rmap_add(tp, type, ip->i_ino, isrt, whichfork, PREV);
}
/* Schedule the creation of an rmap for non-file data. */
void
xfs_rmap_alloc_extent(
struct xfs_trans *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
+ bool isrt,
+ xfs_fsblock_t fsbno,
xfs_extlen_t len,
uint64_t owner)
{
@@ -2739,20 +2836,20 @@ xfs_rmap_alloc_extent(
if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
return;
- bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno);
+ bmap.br_startblock = fsbno;
bmap.br_blockcount = len;
bmap.br_startoff = 0;
bmap.br_state = XFS_EXT_NORM;
- __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap);
+ __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, isrt, XFS_DATA_FORK, &bmap);
}
/* Schedule the deletion of an rmap for non-file data. */
void
xfs_rmap_free_extent(
struct xfs_trans *tp,
- xfs_agnumber_t agno,
- xfs_agblock_t bno,
+ bool isrt,
+ xfs_fsblock_t fsbno,
xfs_extlen_t len,
uint64_t owner)
{
@@ -2761,12 +2858,12 @@ xfs_rmap_free_extent(
if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
return;
- bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno);
+ bmap.br_startblock = fsbno;
bmap.br_blockcount = len;
bmap.br_startoff = 0;
bmap.br_state = XFS_EXT_NORM;
- __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap);
+ __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, isrt, XFS_DATA_FORK, &bmap);
}
/* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 96b4321d8310..5f39f6e53cd1 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -7,6 +7,7 @@
#define __XFS_RMAP_H__
struct xfs_perag;
+struct xfs_rtgroup;
static inline void
xfs_rmap_ino_bmbt_owner(
@@ -174,6 +175,7 @@ struct xfs_rmap_intent {
uint64_t ri_owner;
struct xfs_bmbt_irec ri_bmap;
struct xfs_group *ri_group;
+ bool ri_realtime;
};
/* functions for updating the rmapbt based on bmbt map/unmap operations */
@@ -184,10 +186,10 @@ void xfs_rmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip,
void xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_trans *tp,
struct xfs_inode *ip, int whichfork,
struct xfs_bmbt_irec *imap);
-void xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
- xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner);
-void xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
- xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner);
+void xfs_rmap_alloc_extent(struct xfs_trans *tp, bool isrt, xfs_fsblock_t fsbno,
+ xfs_extlen_t len, uint64_t owner);
+void xfs_rmap_free_extent(struct xfs_trans *tp, bool isrt, xfs_fsblock_t fsbno,
+ xfs_extlen_t len, uint64_t owner);
int xfs_rmap_finish_one(struct xfs_trans *tp, struct xfs_rmap_intent *ri,
struct xfs_btree_cur **pcur);
@@ -206,6 +208,8 @@ xfs_failaddr_t xfs_rmap_btrec_to_irec(const union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec);
xfs_failaddr_t xfs_rmap_check_irec(struct xfs_perag *pag,
const struct xfs_rmap_irec *irec);
+xfs_failaddr_t xfs_rtrmap_check_irec(struct xfs_rtgroup *rtg,
+ const struct xfs_rmap_irec *irec);
int xfs_rmap_has_records(struct xfs_btree_cur *cur, xfs_agblock_t bno,
xfs_extlen_t len, enum xbtree_recpacking *outcome);
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 4ddfb7e395b3..770adf60dd73 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -1055,7 +1055,7 @@ xfs_rtfree_extent(
xfs_rtxlen_t len) /* length of extent freed */
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
+ struct xfs_inode *rbmip = rtg_bitmap(rtg);
struct xfs_rtalloc_args args = {
.mp = mp,
.tp = tp,
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h
index 16563a44bd13..22e5d9cd95f4 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.h
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -135,6 +135,15 @@ xfs_rtb_to_rtx(
return div_u64(rtbno, mp->m_sb.sb_rextsize);
}
+/* Return the offset of a rtgroup block number within an rt extent. */
+static inline xfs_extlen_t
+xfs_rgbno_to_rtxoff(
+ struct xfs_mount *mp,
+ xfs_rgblock_t rgbno)
+{
+ return rgbno % mp->m_sb.sb_rextsize;
+}
+
/* Return the offset of an rt block number within an rt extent. */
static inline xfs_extlen_t
xfs_rtb_to_rtxoff(
diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
index 4f3bfc884aff..b7ed2d27d545 100644
--- a/fs/xfs/libxfs/xfs_rtgroup.c
+++ b/fs/xfs/libxfs/xfs_rtgroup.c
@@ -33,6 +33,7 @@
#include "xfs_rtbitmap.h"
#include "xfs_metafile.h"
#include "xfs_metadir.h"
+#include "xfs_rtrmap_btree.h"
/* Find the first usable fsblock in this rtgroup. */
static inline uint32_t
@@ -197,11 +198,14 @@ xfs_rtgroup_lock(
* Lock both realtime free space metadata inodes for a freespace
* update.
*/
- xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL);
- xfs_ilock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL);
+ xfs_ilock(rtg_bitmap(rtg), XFS_ILOCK_EXCL);
+ xfs_ilock(rtg_summary(rtg), XFS_ILOCK_EXCL);
} else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
- xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED);
+ xfs_ilock(rtg_bitmap(rtg), XFS_ILOCK_SHARED);
}
+
+ if ((rtglock_flags & XFS_RTGLOCK_RMAP) && rtg_rmap(rtg))
+ xfs_ilock(rtg_rmap(rtg), XFS_ILOCK_EXCL);
}
/* Unlock metadata inodes associated with this rt group. */
@@ -214,11 +218,14 @@ xfs_rtgroup_unlock(
ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) ||
!(rtglock_flags & XFS_RTGLOCK_BITMAP));
+ if ((rtglock_flags & XFS_RTGLOCK_RMAP) && rtg_rmap(rtg))
+ xfs_iunlock(rtg_rmap(rtg), XFS_ILOCK_EXCL);
+
if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
- xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL);
- xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL);
+ xfs_iunlock(rtg_summary(rtg), XFS_ILOCK_EXCL);
+ xfs_iunlock(rtg_bitmap(rtg), XFS_ILOCK_EXCL);
} else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
- xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED);
+ xfs_iunlock(rtg_bitmap(rtg), XFS_ILOCK_SHARED);
}
}
@@ -236,11 +243,12 @@ xfs_rtgroup_trans_join(
ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED));
if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
- xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_BITMAP],
- XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_SUMMARY],
- XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, rtg_bitmap(rtg), XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, rtg_summary(rtg), XFS_ILOCK_EXCL);
}
+
+ if ((rtglock_flags & XFS_RTGLOCK_RMAP) && rtg_rmap(rtg))
+ xfs_trans_ijoin(tp, rtg_rmap(rtg), XFS_ILOCK_EXCL);
}
/* Retrieve rt group geometry. */
@@ -284,7 +292,8 @@ xfs_rtginode_ilock_print_fn(
const struct xfs_inode *ip =
container_of(m, struct xfs_inode, i_lock.dep_map);
- printk(KERN_CONT " rgno=%u", ip->i_projid);
+ printk(KERN_CONT " rgno=%u metatype=%s", ip->i_projid,
+ xfs_metafile_type_str(ip->i_metatype));
}
/*
@@ -316,6 +325,8 @@ struct xfs_rtginode_ops {
unsigned int sick; /* rtgroup sickness flag */
+ unsigned int fmt_mask; /* all valid data fork formats */
+
/* Does the fs have this feature? */
bool (*enabled)(struct xfs_mount *mp);
@@ -331,14 +342,31 @@ static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = {
.name = "bitmap",
.metafile_type = XFS_METAFILE_RTBITMAP,
.sick = XFS_SICK_RG_BITMAP,
+ .fmt_mask = (1U << XFS_DINODE_FMT_EXTENTS) |
+ (1U << XFS_DINODE_FMT_BTREE),
.create = xfs_rtbitmap_create,
},
[XFS_RTGI_SUMMARY] = {
.name = "summary",
.metafile_type = XFS_METAFILE_RTSUMMARY,
.sick = XFS_SICK_RG_SUMMARY,
+ .fmt_mask = (1U << XFS_DINODE_FMT_EXTENTS) |
+ (1U << XFS_DINODE_FMT_BTREE),
.create = xfs_rtsummary_create,
},
+ [XFS_RTGI_RMAP] = {
+ .name = "rmap",
+ .metafile_type = XFS_METAFILE_RTRMAP,
+ .sick = XFS_SICK_RG_RMAPBT,
+ .fmt_mask = 1U << XFS_DINODE_FMT_META_BTREE,
+ /*
+ * growfs must create the rtrmap inodes before adding a
+ * realtime volume to the filesystem, so we cannot use the
+ * rtrmapbt predicate here.
+ */
+ .enabled = xfs_has_rmapbt,
+ .create = xfs_rtrmapbt_create,
+ },
};
/* Return the shortname of this rtgroup inode. */
@@ -435,8 +463,7 @@ xfs_rtginode_load(
return error;
}
- if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
- ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
+ if (XFS_IS_CORRUPT(mp, !((1U << ip->i_df.if_format) & ops->fmt_mask))) {
xfs_irele(ip);
xfs_rtginode_mark_sick(rtg, type);
return -EFSCORRUPTED;
diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h
index 2d7822644eff..6ff222a05367 100644
--- a/fs/xfs/libxfs/xfs_rtgroup.h
+++ b/fs/xfs/libxfs/xfs_rtgroup.h
@@ -14,6 +14,7 @@ struct xfs_trans;
enum xfs_rtg_inodes {
XFS_RTGI_BITMAP, /* allocation bitmap */
XFS_RTGI_SUMMARY, /* allocation summary */
+ XFS_RTGI_RMAP, /* rmap btree inode */
XFS_RTGI_MAX,
};
@@ -64,6 +65,21 @@ static inline xfs_rgnumber_t rtg_rgno(const struct xfs_rtgroup *rtg)
return rtg->rtg_group.xg_gno;
}
+static inline struct xfs_inode *rtg_bitmap(const struct xfs_rtgroup *rtg)
+{
+ return rtg->rtg_inodes[XFS_RTGI_BITMAP];
+}
+
+static inline struct xfs_inode *rtg_summary(const struct xfs_rtgroup *rtg)
+{
+ return rtg->rtg_inodes[XFS_RTGI_SUMMARY];
+}
+
+static inline struct xfs_inode *rtg_rmap(const struct xfs_rtgroup *rtg)
+{
+ return rtg->rtg_inodes[XFS_RTGI_RMAP];
+}
+
/* Passive rtgroup references */
static inline struct xfs_rtgroup *
xfs_rtgroup_get(
@@ -122,6 +138,32 @@ xfs_rtgroup_next(
return xfs_rtgroup_next_range(mp, rtg, 0, mp->m_sb.sb_rgcount - 1);
}
+static inline bool
+xfs_verify_rgbno(
+ struct xfs_rtgroup *rtg,
+ xfs_rgblock_t rgbno)
+{
+ ASSERT(xfs_has_rtgroups(rtg_mount(rtg)));
+
+ return xfs_verify_gbno(rtg_group(rtg), rgbno);
+}
+
+/*
+ * Check that [@rgbno,@len] is a valid extent range in @rtg.
+ *
+ * Must only be used for RTG-enabled file systems.
+ */
+static inline bool
+xfs_verify_rgbext(
+ struct xfs_rtgroup *rtg,
+ xfs_rgblock_t rgbno,
+ xfs_extlen_t len)
+{
+ ASSERT(xfs_has_rtgroups(rtg_mount(rtg)));
+
+ return xfs_verify_gbext(rtg_group(rtg), rgbno, len);
+}
+
static inline xfs_rtblock_t
xfs_rgbno_to_rtb(
struct xfs_rtgroup *rtg,
@@ -223,9 +265,12 @@ int xfs_update_last_rtgroup_size(struct xfs_mount *mp,
#define XFS_RTGLOCK_BITMAP (1U << 0)
/* Lock the rt bitmap inode in shared mode */
#define XFS_RTGLOCK_BITMAP_SHARED (1U << 1)
+/* Lock the rt rmap inode in exclusive mode */
+#define XFS_RTGLOCK_RMAP (1U << 2)
#define XFS_RTGLOCK_ALL_FLAGS (XFS_RTGLOCK_BITMAP | \
- XFS_RTGLOCK_BITMAP_SHARED)
+ XFS_RTGLOCK_BITMAP_SHARED | \
+ XFS_RTGLOCK_RMAP)
void xfs_rtgroup_lock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags);
void xfs_rtgroup_unlock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags);
@@ -248,6 +293,8 @@ int xfs_rtginode_create(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type,
bool init);
void xfs_rtginode_irele(struct xfs_inode **ipp);
+void xfs_rtginode_irele(struct xfs_inode **ipp);
+
static inline const char *xfs_rtginode_path(xfs_rgnumber_t rgno,
enum xfs_rtg_inodes type)
{
diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c
new file mode 100644
index 000000000000..b90901e39e92
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c
@@ -0,0 +1,1011 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_inode.h"
+#include "xfs_trans.h"
+#include "xfs_alloc.h"
+#include "xfs_btree.h"
+#include "xfs_btree_staging.h"
+#include "xfs_metafile.h"
+#include "xfs_rmap.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_trace.h"
+#include "xfs_cksum.h"
+#include "xfs_error.h"
+#include "xfs_extent_busy.h"
+#include "xfs_rtgroup.h"
+#include "xfs_bmap.h"
+#include "xfs_health.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
+
+static struct kmem_cache *xfs_rtrmapbt_cur_cache;
+
+/*
+ * Realtime Reverse Map btree.
+ *
+ * This is a btree used to track the owner(s) of a given extent in the realtime
+ * device. See the comments in xfs_rmap_btree.c for more information.
+ *
+ * This tree is basically the same as the regular rmap btree except that it
+ * is rooted in an inode and does not live in free space.
+ */
+
+static struct xfs_btree_cur *
+xfs_rtrmapbt_dup_cursor(
+ struct xfs_btree_cur *cur)
+{
+ return xfs_rtrmapbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group));
+}
+
+STATIC int
+xfs_rtrmapbt_get_minrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ if (level == cur->bc_nlevels - 1) {
+ struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
+
+ return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
+ level == 0) / 2;
+ }
+
+ return cur->bc_mp->m_rtrmap_mnr[level != 0];
+}
+
+STATIC int
+xfs_rtrmapbt_get_maxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ if (level == cur->bc_nlevels - 1) {
+ struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
+
+ return xfs_rtrmapbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
+ level == 0);
+ }
+
+ return cur->bc_mp->m_rtrmap_mxr[level != 0];
+}
+
+/* Calculate number of records in the ondisk realtime rmap btree inode root. */
+unsigned int
+xfs_rtrmapbt_droot_maxrecs(
+ unsigned int blocklen,
+ bool leaf)
+{
+ blocklen -= sizeof(struct xfs_rtrmap_root);
+
+ if (leaf)
+ return blocklen / sizeof(struct xfs_rmap_rec);
+ return blocklen / (2 * sizeof(struct xfs_rmap_key) +
+ sizeof(xfs_rtrmap_ptr_t));
+}
+
+/*
+ * Get the maximum records we could store in the on-disk format.
+ *
+ * For non-root nodes this is equivalent to xfs_rtrmapbt_get_maxrecs, but
+ * for the root node this checks the available space in the dinode fork
+ * so that we can resize the in-memory buffer to match it. After a
+ * resize to the maximum size this function returns the same value
+ * as xfs_rtrmapbt_get_maxrecs for the root node, too.
+ */
+STATIC int
+xfs_rtrmapbt_get_dmaxrecs(
+ struct xfs_btree_cur *cur,
+ int level)
+{
+ if (level != cur->bc_nlevels - 1)
+ return cur->bc_mp->m_rtrmap_mxr[level != 0];
+ return xfs_rtrmapbt_droot_maxrecs(cur->bc_ino.forksize, level == 0);
+}
+
+/*
+ * Convert the ondisk record's offset field into the ondisk key's offset field.
+ * Fork and bmbt are significant parts of the rmap record key, but written
+ * status is merely a record attribute.
+ */
+static inline __be64 ondisk_rec_offset_to_key(const union xfs_btree_rec *rec)
+{
+ return rec->rmap.rm_offset & ~cpu_to_be64(XFS_RMAP_OFF_UNWRITTEN);
+}
+
+STATIC void
+xfs_rtrmapbt_init_key_from_rec(
+ union xfs_btree_key *key,
+ const union xfs_btree_rec *rec)
+{
+ key->rmap.rm_startblock = rec->rmap.rm_startblock;
+ key->rmap.rm_owner = rec->rmap.rm_owner;
+ key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
+}
+
+STATIC void
+xfs_rtrmapbt_init_high_key_from_rec(
+ union xfs_btree_key *key,
+ const union xfs_btree_rec *rec)
+{
+ uint64_t off;
+ int adj;
+
+ adj = be32_to_cpu(rec->rmap.rm_blockcount) - 1;
+
+ key->rmap.rm_startblock = rec->rmap.rm_startblock;
+ be32_add_cpu(&key->rmap.rm_startblock, adj);
+ key->rmap.rm_owner = rec->rmap.rm_owner;
+ key->rmap.rm_offset = ondisk_rec_offset_to_key(rec);
+ if (XFS_RMAP_NON_INODE_OWNER(be64_to_cpu(rec->rmap.rm_owner)) ||
+ XFS_RMAP_IS_BMBT_BLOCK(be64_to_cpu(rec->rmap.rm_offset)))
+ return;
+ off = be64_to_cpu(key->rmap.rm_offset);
+ off = (XFS_RMAP_OFF(off) + adj) | (off & ~XFS_RMAP_OFF_MASK);
+ key->rmap.rm_offset = cpu_to_be64(off);
+}
+
+STATIC void
+xfs_rtrmapbt_init_rec_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_rec *rec)
+{
+ rec->rmap.rm_startblock = cpu_to_be32(cur->bc_rec.r.rm_startblock);
+ rec->rmap.rm_blockcount = cpu_to_be32(cur->bc_rec.r.rm_blockcount);
+ rec->rmap.rm_owner = cpu_to_be64(cur->bc_rec.r.rm_owner);
+ rec->rmap.rm_offset = cpu_to_be64(
+ xfs_rmap_irec_offset_pack(&cur->bc_rec.r));
+}
+
+STATIC void
+xfs_rtrmapbt_init_ptr_from_cur(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr)
+{
+ ptr->l = 0;
+}
+
+/*
+ * Mask the appropriate parts of the ondisk key field for a key comparison.
+ * Fork and bmbt are significant parts of the rmap record key, but written
+ * status is merely a record attribute.
+ */
+static inline uint64_t offset_keymask(uint64_t offset)
+{
+ return offset & ~XFS_RMAP_OFF_UNWRITTEN;
+}
+
+STATIC int64_t
+xfs_rtrmapbt_key_diff(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key)
+{
+ struct xfs_rmap_irec *rec = &cur->bc_rec.r;
+ const struct xfs_rmap_key *kp = &key->rmap;
+ __u64 x, y;
+ int64_t d;
+
+ d = (int64_t)be32_to_cpu(kp->rm_startblock) - rec->rm_startblock;
+ if (d)
+ return d;
+
+ x = be64_to_cpu(kp->rm_owner);
+ y = rec->rm_owner;
+ if (x > y)
+ return 1;
+ else if (y > x)
+ return -1;
+
+ x = offset_keymask(be64_to_cpu(kp->rm_offset));
+ y = offset_keymask(xfs_rmap_irec_offset_pack(rec));
+ if (x > y)
+ return 1;
+ else if (y > x)
+ return -1;
+ return 0;
+}
+
+STATIC int64_t
+xfs_rtrmapbt_diff_two_keys(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *k1,
+ const union xfs_btree_key *k2,
+ const union xfs_btree_key *mask)
+{
+ const struct xfs_rmap_key *kp1 = &k1->rmap;
+ const struct xfs_rmap_key *kp2 = &k2->rmap;
+ int64_t d;
+ __u64 x, y;
+
+ /* Doesn't make sense to mask off the physical space part */
+ ASSERT(!mask || mask->rmap.rm_startblock);
+
+ d = (int64_t)be32_to_cpu(kp1->rm_startblock) -
+ be32_to_cpu(kp2->rm_startblock);
+ if (d)
+ return d;
+
+ if (!mask || mask->rmap.rm_owner) {
+ x = be64_to_cpu(kp1->rm_owner);
+ y = be64_to_cpu(kp2->rm_owner);
+ if (x > y)
+ return 1;
+ else if (y > x)
+ return -1;
+ }
+
+ if (!mask || mask->rmap.rm_offset) {
+ /* Doesn't make sense to allow offset but not owner */
+ ASSERT(!mask || mask->rmap.rm_owner);
+
+ x = offset_keymask(be64_to_cpu(kp1->rm_offset));
+ y = offset_keymask(be64_to_cpu(kp2->rm_offset));
+ if (x > y)
+ return 1;
+ else if (y > x)
+ return -1;
+ }
+
+ return 0;
+}
+
+static xfs_failaddr_t
+xfs_rtrmapbt_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ xfs_failaddr_t fa;
+ int level;
+
+ if (!xfs_verify_magic(bp, block->bb_magic))
+ return __this_address;
+
+ if (!xfs_has_rmapbt(mp))
+ return __this_address;
+ fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
+ if (fa)
+ return fa;
+ level = be16_to_cpu(block->bb_level);
+ if (level > mp->m_rtrmap_maxlevels)
+ return __this_address;
+
+ return xfs_btree_fsblock_verify(bp, mp->m_rtrmap_mxr[level != 0]);
+}
+
+static void
+xfs_rtrmapbt_read_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ if (!xfs_btree_fsblock_verify_crc(bp))
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ else {
+ fa = xfs_rtrmapbt_verify(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ }
+
+ if (bp->b_error)
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+}
+
+static void
+xfs_rtrmapbt_write_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ fa = xfs_rtrmapbt_verify(bp);
+ if (fa) {
+ trace_xfs_btree_corrupt(bp, _RET_IP_);
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ return;
+ }
+ xfs_btree_fsblock_calc_crc(bp);
+
+}
+
+const struct xfs_buf_ops xfs_rtrmapbt_buf_ops = {
+ .name = "xfs_rtrmapbt",
+ .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
+ .verify_read = xfs_rtrmapbt_read_verify,
+ .verify_write = xfs_rtrmapbt_write_verify,
+ .verify_struct = xfs_rtrmapbt_verify,
+};
+
+STATIC int
+xfs_rtrmapbt_keys_inorder(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *k1,
+ const union xfs_btree_key *k2)
+{
+ uint32_t x;
+ uint32_t y;
+ uint64_t a;
+ uint64_t b;
+
+ x = be32_to_cpu(k1->rmap.rm_startblock);
+ y = be32_to_cpu(k2->rmap.rm_startblock);
+ if (x < y)
+ return 1;
+ else if (x > y)
+ return 0;
+ a = be64_to_cpu(k1->rmap.rm_owner);
+ b = be64_to_cpu(k2->rmap.rm_owner);
+ if (a < b)
+ return 1;
+ else if (a > b)
+ return 0;
+ a = offset_keymask(be64_to_cpu(k1->rmap.rm_offset));
+ b = offset_keymask(be64_to_cpu(k2->rmap.rm_offset));
+ if (a <= b)
+ return 1;
+ return 0;
+}
+
+STATIC int
+xfs_rtrmapbt_recs_inorder(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_rec *r1,
+ const union xfs_btree_rec *r2)
+{
+ uint32_t x;
+ uint32_t y;
+ uint64_t a;
+ uint64_t b;
+
+ x = be32_to_cpu(r1->rmap.rm_startblock);
+ y = be32_to_cpu(r2->rmap.rm_startblock);
+ if (x < y)
+ return 1;
+ else if (x > y)
+ return 0;
+ a = be64_to_cpu(r1->rmap.rm_owner);
+ b = be64_to_cpu(r2->rmap.rm_owner);
+ if (a < b)
+ return 1;
+ else if (a > b)
+ return 0;
+ a = offset_keymask(be64_to_cpu(r1->rmap.rm_offset));
+ b = offset_keymask(be64_to_cpu(r2->rmap.rm_offset));
+ if (a <= b)
+ return 1;
+ return 0;
+}
+
+STATIC enum xbtree_key_contig
+xfs_rtrmapbt_keys_contiguous(
+ struct xfs_btree_cur *cur,
+ const union xfs_btree_key *key1,
+ const union xfs_btree_key *key2,
+ const union xfs_btree_key *mask)
+{
+ ASSERT(!mask || mask->rmap.rm_startblock);
+
+ /*
+ * We only support checking contiguity of the physical space component.
+ * If any callers ever need more specificity than that, they'll have to
+ * implement it here.
+ */
+ ASSERT(!mask || (!mask->rmap.rm_owner && !mask->rmap.rm_offset));
+
+ return xbtree_key_contig(be32_to_cpu(key1->rmap.rm_startblock),
+ be32_to_cpu(key2->rmap.rm_startblock));
+}
+
+static inline void
+xfs_rtrmapbt_move_ptrs(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *broot,
+ short old_size,
+ size_t new_size,
+ unsigned int numrecs)
+{
+ void *dptr;
+ void *sptr;
+
+ sptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, old_size);
+ dptr = xfs_rtrmap_broot_ptr_addr(mp, broot, 1, new_size);
+ memmove(dptr, sptr, numrecs * sizeof(xfs_rtrmap_ptr_t));
+}
+
+static struct xfs_btree_block *
+xfs_rtrmapbt_broot_realloc(
+ struct xfs_btree_cur *cur,
+ unsigned int new_numrecs)
+{
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
+ struct xfs_btree_block *broot;
+ unsigned int new_size;
+ unsigned int old_size = ifp->if_broot_bytes;
+ const unsigned int level = cur->bc_nlevels - 1;
+
+ new_size = xfs_rtrmap_broot_space_calc(mp, level, new_numrecs);
+
+ /* Handle the nop case quietly. */
+ if (new_size == old_size)
+ return ifp->if_broot;
+
+ if (new_size > old_size) {
+ unsigned int old_numrecs;
+
+ /*
+ * If there wasn't any memory allocated before, just allocate
+ * it now and get out.
+ */
+ if (old_size == 0)
+ return xfs_broot_realloc(ifp, new_size);
+
+ /*
+ * If there is already an existing if_broot, then we need to
+ * realloc it and possibly move the node block pointers because
+ * those are not butted up against the btree block header.
+ */
+ old_numrecs = xfs_rtrmapbt_maxrecs(mp, old_size, level == 0);
+ broot = xfs_broot_realloc(ifp, new_size);
+ if (level > 0)
+ xfs_rtrmapbt_move_ptrs(mp, broot, old_size, new_size,
+ old_numrecs);
+ goto out_broot;
+ }
+
+ /*
+ * We're reducing numrecs. If we're going all the way to zero, just
+ * free the block.
+ */
+ ASSERT(ifp->if_broot != NULL && old_size > 0);
+ if (new_size == 0)
+ return xfs_broot_realloc(ifp, 0);
+
+ /*
+ * Shrink the btree root by possibly moving the rtrmapbt pointers,
+ * since they are not butted up against the btree block header. Then
+ * reallocate broot.
+ */
+ if (level > 0)
+ xfs_rtrmapbt_move_ptrs(mp, ifp->if_broot, old_size, new_size,
+ new_numrecs);
+ broot = xfs_broot_realloc(ifp, new_size);
+
+out_broot:
+ ASSERT(xfs_rtrmap_droot_space(broot) <=
+ xfs_inode_fork_size(cur->bc_ino.ip, cur->bc_ino.whichfork));
+ return broot;
+}
+
+const struct xfs_btree_ops xfs_rtrmapbt_ops = {
+ .name = "rtrmap",
+ .type = XFS_BTREE_TYPE_INODE,
+ .geom_flags = XFS_BTGEO_OVERLAPPING |
+ XFS_BTGEO_IROOT_RECORDS,
+
+ .rec_len = sizeof(struct xfs_rmap_rec),
+ /* Overlapping btree; 2 keys per pointer. */
+ .key_len = 2 * sizeof(struct xfs_rmap_key),
+ .ptr_len = XFS_BTREE_LONG_PTR_LEN,
+
+ .lru_refs = XFS_RMAP_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_2),
+ .sick_mask = XFS_SICK_RG_RMAPBT,
+
+ .dup_cursor = xfs_rtrmapbt_dup_cursor,
+ .alloc_block = xfs_btree_alloc_metafile_block,
+ .free_block = xfs_btree_free_metafile_block,
+ .get_minrecs = xfs_rtrmapbt_get_minrecs,
+ .get_maxrecs = xfs_rtrmapbt_get_maxrecs,
+ .get_dmaxrecs = xfs_rtrmapbt_get_dmaxrecs,
+ .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec,
+ .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfs_rtrmapbt_init_ptr_from_cur,
+ .key_diff = xfs_rtrmapbt_key_diff,
+ .buf_ops = &xfs_rtrmapbt_buf_ops,
+ .diff_two_keys = xfs_rtrmapbt_diff_two_keys,
+ .keys_inorder = xfs_rtrmapbt_keys_inorder,
+ .recs_inorder = xfs_rtrmapbt_recs_inorder,
+ .keys_contiguous = xfs_rtrmapbt_keys_contiguous,
+ .broot_realloc = xfs_rtrmapbt_broot_realloc,
+};
+
+/* Allocate a new rt rmap btree cursor. */
+struct xfs_btree_cur *
+xfs_rtrmapbt_init_cursor(
+ struct xfs_trans *tp,
+ struct xfs_rtgroup *rtg)
+{
+ struct xfs_inode *ip = rtg_rmap(rtg);
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_btree_cur *cur;
+
+ xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
+
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_ops,
+ mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
+
+ cur->bc_ino.ip = ip;
+ cur->bc_group = xfs_group_hold(rtg_group(rtg));
+ cur->bc_ino.whichfork = XFS_DATA_FORK;
+ cur->bc_nlevels = be16_to_cpu(ip->i_df.if_broot->bb_level) + 1;
+ cur->bc_ino.forksize = xfs_inode_fork_size(ip, XFS_DATA_FORK);
+
+ return cur;
+}
+
+#ifdef CONFIG_XFS_BTREE_IN_MEM
+/*
+ * Validate an in-memory realtime rmap btree block. Callers are allowed to
+ * generate an in-memory btree even if the ondisk feature is not enabled.
+ */
+static xfs_failaddr_t
+xfs_rtrmapbt_mem_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ xfs_failaddr_t fa;
+ unsigned int level;
+ unsigned int maxrecs;
+
+ if (!xfs_verify_magic(bp, block->bb_magic))
+ return __this_address;
+
+ fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
+ if (fa)
+ return fa;
+
+ level = be16_to_cpu(block->bb_level);
+ if (xfs_has_rmapbt(mp)) {
+ if (level >= mp->m_rtrmap_maxlevels)
+ return __this_address;
+ } else {
+ if (level >= xfs_rtrmapbt_maxlevels_ondisk())
+ return __this_address;
+ }
+
+ maxrecs = xfs_rtrmapbt_maxrecs(mp, XFBNO_BLOCKSIZE, level == 0);
+ return xfs_btree_memblock_verify(bp, maxrecs);
+}
+
+static void
+xfs_rtrmapbt_mem_rw_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa = xfs_rtrmapbt_mem_verify(bp);
+
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+}
+
+/* skip crc checks on in-memory btrees to save time */
+static const struct xfs_buf_ops xfs_rtrmapbt_mem_buf_ops = {
+ .name = "xfs_rtrmapbt_mem",
+ .magic = { 0, cpu_to_be32(XFS_RTRMAP_CRC_MAGIC) },
+ .verify_read = xfs_rtrmapbt_mem_rw_verify,
+ .verify_write = xfs_rtrmapbt_mem_rw_verify,
+ .verify_struct = xfs_rtrmapbt_mem_verify,
+};
+
+const struct xfs_btree_ops xfs_rtrmapbt_mem_ops = {
+ .type = XFS_BTREE_TYPE_MEM,
+ .geom_flags = XFS_BTGEO_OVERLAPPING,
+
+ .rec_len = sizeof(struct xfs_rmap_rec),
+ /* Overlapping btree; 2 keys per pointer. */
+ .key_len = 2 * sizeof(struct xfs_rmap_key),
+ .ptr_len = XFS_BTREE_LONG_PTR_LEN,
+
+ .lru_refs = XFS_RMAP_BTREE_REF,
+ .statoff = XFS_STATS_CALC_INDEX(xs_rtrmap_mem_2),
+
+ .dup_cursor = xfbtree_dup_cursor,
+ .set_root = xfbtree_set_root,
+ .alloc_block = xfbtree_alloc_block,
+ .free_block = xfbtree_free_block,
+ .get_minrecs = xfbtree_get_minrecs,
+ .get_maxrecs = xfbtree_get_maxrecs,
+ .init_key_from_rec = xfs_rtrmapbt_init_key_from_rec,
+ .init_high_key_from_rec = xfs_rtrmapbt_init_high_key_from_rec,
+ .init_rec_from_cur = xfs_rtrmapbt_init_rec_from_cur,
+ .init_ptr_from_cur = xfbtree_init_ptr_from_cur,
+ .key_diff = xfs_rtrmapbt_key_diff,
+ .buf_ops = &xfs_rtrmapbt_mem_buf_ops,
+ .diff_two_keys = xfs_rtrmapbt_diff_two_keys,
+ .keys_inorder = xfs_rtrmapbt_keys_inorder,
+ .recs_inorder = xfs_rtrmapbt_recs_inorder,
+ .keys_contiguous = xfs_rtrmapbt_keys_contiguous,
+};
+
+/* Create a cursor for an in-memory btree. */
+struct xfs_btree_cur *
+xfs_rtrmapbt_mem_cursor(
+ struct xfs_rtgroup *rtg,
+ struct xfs_trans *tp,
+ struct xfbtree *xfbt)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_btree_cur *cur;
+
+ cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrmapbt_mem_ops,
+ mp->m_rtrmap_maxlevels, xfs_rtrmapbt_cur_cache);
+ cur->bc_mem.xfbtree = xfbt;
+ cur->bc_nlevels = xfbt->nlevels;
+ cur->bc_group = xfs_group_hold(rtg_group(rtg));
+ return cur;
+}
+
+/* Create an in-memory realtime rmap btree. */
+int
+xfs_rtrmapbt_mem_init(
+ struct xfs_mount *mp,
+ struct xfbtree *xfbt,
+ struct xfs_buftarg *btp,
+ xfs_rgnumber_t rgno)
+{
+ xfbt->owner = rgno;
+ return xfbtree_init(mp, xfbt, btp, &xfs_rtrmapbt_mem_ops);
+}
+#endif /* CONFIG_XFS_BTREE_IN_MEM */
+
+/*
+ * Install a new rt reverse mapping btree root. Caller is responsible for
+ * invalidating and freeing the old btree blocks.
+ */
+void
+xfs_rtrmapbt_commit_staged_btree(
+ struct xfs_btree_cur *cur,
+ struct xfs_trans *tp)
+{
+ struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake;
+ struct xfs_ifork *ifp;
+ int flags = XFS_ILOG_CORE | XFS_ILOG_DBROOT;
+
+ ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
+ ASSERT(ifake->if_fork->if_format == XFS_DINODE_FMT_META_BTREE);
+
+ /*
+ * Free any resources hanging off the real fork, then shallow-copy the
+ * staging fork's contents into the real fork to transfer everything
+ * we just built.
+ */
+ ifp = xfs_ifork_ptr(cur->bc_ino.ip, XFS_DATA_FORK);
+ xfs_idestroy_fork(ifp);
+ memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork));
+
+ cur->bc_ino.ip->i_projid = cur->bc_group->xg_gno;
+ xfs_trans_log_inode(tp, cur->bc_ino.ip, flags);
+ xfs_btree_commit_ifakeroot(cur, tp, XFS_DATA_FORK);
+}
+
+/* Calculate number of records in a rt reverse mapping btree block. */
+static inline unsigned int
+xfs_rtrmapbt_block_maxrecs(
+ unsigned int blocklen,
+ bool leaf)
+{
+ if (leaf)
+ return blocklen / sizeof(struct xfs_rmap_rec);
+ return blocklen /
+ (2 * sizeof(struct xfs_rmap_key) + sizeof(xfs_rtrmap_ptr_t));
+}
+
+/*
+ * Calculate number of records in an rt reverse mapping btree block.
+ */
+unsigned int
+xfs_rtrmapbt_maxrecs(
+ struct xfs_mount *mp,
+ unsigned int blocklen,
+ bool leaf)
+{
+ blocklen -= XFS_RTRMAP_BLOCK_LEN;
+ return xfs_rtrmapbt_block_maxrecs(blocklen, leaf);
+}
+
+/* Compute the max possible height for realtime reverse mapping btrees. */
+unsigned int
+xfs_rtrmapbt_maxlevels_ondisk(void)
+{
+ unsigned int minrecs[2];
+ unsigned int blocklen;
+
+ blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
+
+ minrecs[0] = xfs_rtrmapbt_block_maxrecs(blocklen, true) / 2;
+ minrecs[1] = xfs_rtrmapbt_block_maxrecs(blocklen, false) / 2;
+
+ /* We need at most one record for every block in an rt group. */
+ return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_RGBLOCKS);
+}
+
+int __init
+xfs_rtrmapbt_init_cur_cache(void)
+{
+ xfs_rtrmapbt_cur_cache = kmem_cache_create("xfs_rtrmapbt_cur",
+ xfs_btree_cur_sizeof(xfs_rtrmapbt_maxlevels_ondisk()),
+ 0, 0, NULL);
+
+ if (!xfs_rtrmapbt_cur_cache)
+ return -ENOMEM;
+ return 0;
+}
+
+void
+xfs_rtrmapbt_destroy_cur_cache(void)
+{
+ kmem_cache_destroy(xfs_rtrmapbt_cur_cache);
+ xfs_rtrmapbt_cur_cache = NULL;
+}
+
+/* Compute the maximum height of an rt reverse mapping btree. */
+void
+xfs_rtrmapbt_compute_maxlevels(
+ struct xfs_mount *mp)
+{
+ unsigned int d_maxlevels, r_maxlevels;
+
+ if (!xfs_has_rtrmapbt(mp)) {
+ mp->m_rtrmap_maxlevels = 0;
+ return;
+ }
+
+ /*
+ * The realtime rmapbt lives on the data device, which means that its
+ * maximum height is constrained by the size of the data device and
+ * the height required to store one rmap record for each block in an
+ * rt group.
+ */
+ d_maxlevels = xfs_btree_space_to_height(mp->m_rtrmap_mnr,
+ mp->m_sb.sb_dblocks);
+ r_maxlevels = xfs_btree_compute_maxlevels(mp->m_rtrmap_mnr,
+ mp->m_groups[XG_TYPE_RTG].blocks);
+
+ /* Add one level to handle the inode root level. */
+ mp->m_rtrmap_maxlevels = min(d_maxlevels, r_maxlevels) + 1;
+}
+
+/* Calculate the rtrmap btree size for some records. */
+unsigned long long
+xfs_rtrmapbt_calc_size(
+ struct xfs_mount *mp,
+ unsigned long long len)
+{
+ return xfs_btree_calc_size(mp->m_rtrmap_mnr, len);
+}
+
+/*
+ * Calculate the maximum rmap btree size.
+ */
+static unsigned long long
+xfs_rtrmapbt_max_size(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtblocks)
+{
+ /* Bail out if we're uninitialized, which can happen in mkfs. */
+ if (mp->m_rtrmap_mxr[0] == 0)
+ return 0;
+
+ return xfs_rtrmapbt_calc_size(mp, rtblocks);
+}
+
+/*
+ * Figure out how many blocks to reserve and how many are used by this btree.
+ */
+xfs_filblks_t
+xfs_rtrmapbt_calc_reserves(
+ struct xfs_mount *mp)
+{
+ uint32_t blocks = mp->m_groups[XG_TYPE_RTG].blocks;
+
+ if (!xfs_has_rtrmapbt(mp))
+ return 0;
+
+ /* Reserve 1% of the rtgroup or enough for 1 block per record. */
+ return max_t(xfs_filblks_t, blocks / 100,
+ xfs_rtrmapbt_max_size(mp, blocks));
+}
+
+/* Convert on-disk form of btree root to in-memory form. */
+STATIC void
+xfs_rtrmapbt_from_disk(
+ struct xfs_inode *ip,
+ struct xfs_rtrmap_root *dblock,
+ unsigned int dblocklen,
+ struct xfs_btree_block *rblock)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_rmap_key *fkp;
+ __be64 *fpp;
+ struct xfs_rmap_key *tkp;
+ __be64 *tpp;
+ struct xfs_rmap_rec *frp;
+ struct xfs_rmap_rec *trp;
+ unsigned int rblocklen = xfs_rtrmap_broot_space(mp, dblock);
+ unsigned int numrecs;
+ unsigned int maxrecs;
+
+ xfs_btree_init_block(mp, rblock, &xfs_rtrmapbt_ops, 0, 0, ip->i_ino);
+
+ rblock->bb_level = dblock->bb_level;
+ rblock->bb_numrecs = dblock->bb_numrecs;
+ numrecs = be16_to_cpu(dblock->bb_numrecs);
+
+ if (be16_to_cpu(rblock->bb_level) > 0) {
+ maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
+ fkp = xfs_rtrmap_droot_key_addr(dblock, 1);
+ tkp = xfs_rtrmap_key_addr(rblock, 1);
+ fpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
+ tpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
+ memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
+ memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
+ } else {
+ frp = xfs_rtrmap_droot_rec_addr(dblock, 1);
+ trp = xfs_rtrmap_rec_addr(rblock, 1);
+ memcpy(trp, frp, sizeof(*frp) * numrecs);
+ }
+}
+
+/* Load a realtime reverse mapping btree root in from disk. */
+int
+xfs_iformat_rtrmap(
+ struct xfs_inode *ip,
+ struct xfs_dinode *dip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+ struct xfs_btree_block *broot;
+ unsigned int numrecs;
+ unsigned int level;
+ int dsize;
+
+ /*
+ * growfs must create the rtrmap inodes before adding a realtime volume
+ * to the filesystem, so we cannot use the rtrmapbt predicate here.
+ */
+ if (!xfs_has_rmapbt(ip->i_mount)) {
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
+ return -EFSCORRUPTED;
+ }
+
+ dsize = XFS_DFORK_SIZE(dip, mp, XFS_DATA_FORK);
+ numrecs = be16_to_cpu(dfp->bb_numrecs);
+ level = be16_to_cpu(dfp->bb_level);
+
+ if (level > mp->m_rtrmap_maxlevels ||
+ xfs_rtrmap_droot_space_calc(level, numrecs) > dsize) {
+ xfs_inode_mark_sick(ip, XFS_SICK_INO_CORE);
+ return -EFSCORRUPTED;
+ }
+
+ broot = xfs_broot_alloc(xfs_ifork_ptr(ip, XFS_DATA_FORK),
+ xfs_rtrmap_broot_space_calc(mp, level, numrecs));
+ if (broot)
+ xfs_rtrmapbt_from_disk(ip, dfp, dsize, broot);
+ return 0;
+}
+
+/* Convert in-memory form of btree root to on-disk form. */
+void
+xfs_rtrmapbt_to_disk(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *rblock,
+ unsigned int rblocklen,
+ struct xfs_rtrmap_root *dblock,
+ unsigned int dblocklen)
+{
+ struct xfs_rmap_key *fkp;
+ __be64 *fpp;
+ struct xfs_rmap_key *tkp;
+ __be64 *tpp;
+ struct xfs_rmap_rec *frp;
+ struct xfs_rmap_rec *trp;
+ unsigned int numrecs;
+ unsigned int maxrecs;
+
+ ASSERT(rblock->bb_magic == cpu_to_be32(XFS_RTRMAP_CRC_MAGIC));
+ ASSERT(uuid_equal(&rblock->bb_u.l.bb_uuid, &mp->m_sb.sb_meta_uuid));
+ ASSERT(rblock->bb_u.l.bb_blkno == cpu_to_be64(XFS_BUF_DADDR_NULL));
+ ASSERT(rblock->bb_u.l.bb_leftsib == cpu_to_be64(NULLFSBLOCK));
+ ASSERT(rblock->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK));
+
+ dblock->bb_level = rblock->bb_level;
+ dblock->bb_numrecs = rblock->bb_numrecs;
+ numrecs = be16_to_cpu(rblock->bb_numrecs);
+
+ if (be16_to_cpu(rblock->bb_level) > 0) {
+ maxrecs = xfs_rtrmapbt_droot_maxrecs(dblocklen, false);
+ fkp = xfs_rtrmap_key_addr(rblock, 1);
+ tkp = xfs_rtrmap_droot_key_addr(dblock, 1);
+ fpp = xfs_rtrmap_broot_ptr_addr(mp, rblock, 1, rblocklen);
+ tpp = xfs_rtrmap_droot_ptr_addr(dblock, 1, maxrecs);
+ memcpy(tkp, fkp, 2 * sizeof(*fkp) * numrecs);
+ memcpy(tpp, fpp, sizeof(*fpp) * numrecs);
+ } else {
+ frp = xfs_rtrmap_rec_addr(rblock, 1);
+ trp = xfs_rtrmap_droot_rec_addr(dblock, 1);
+ memcpy(trp, frp, sizeof(*frp) * numrecs);
+ }
+}
+
+/* Flush a realtime reverse mapping btree root out to disk. */
+void
+xfs_iflush_rtrmap(
+ struct xfs_inode *ip,
+ struct xfs_dinode *dip)
+{
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
+ struct xfs_rtrmap_root *dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+
+ ASSERT(ifp->if_broot != NULL);
+ ASSERT(ifp->if_broot_bytes > 0);
+ ASSERT(xfs_rtrmap_droot_space(ifp->if_broot) <=
+ xfs_inode_fork_size(ip, XFS_DATA_FORK));
+ xfs_rtrmapbt_to_disk(ip->i_mount, ifp->if_broot, ifp->if_broot_bytes,
+ dfp, XFS_DFORK_SIZE(dip, ip->i_mount, XFS_DATA_FORK));
+}
+
+/*
+ * Create a realtime rmap btree inode.
+ */
+int
+xfs_rtrmapbt_create(
+ struct xfs_rtgroup *rtg,
+ struct xfs_inode *ip,
+ struct xfs_trans *tp,
+ bool init)
+{
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_btree_block *broot;
+
+ ifp->if_format = XFS_DINODE_FMT_META_BTREE;
+ ASSERT(ifp->if_broot_bytes == 0);
+ ASSERT(ifp->if_bytes == 0);
+
+ /* Initialize the empty incore btree root. */
+ broot = xfs_broot_realloc(ifp, xfs_rtrmap_broot_space_calc(mp, 0, 0));
+ if (broot)
+ xfs_btree_init_block(mp, broot, &xfs_rtrmapbt_ops, 0, 0,
+ ip->i_ino);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE | XFS_ILOG_DBROOT);
+
+ return 0;
+}
+
+/*
+ * Initialize an rmap for a realtime superblock using the potentially updated
+ * rt geometry in the provided @mp.
+ */
+int
+xfs_rtrmapbt_init_rtsb(
+ struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
+ struct xfs_trans *tp)
+{
+ struct xfs_rmap_irec rmap = {
+ .rm_blockcount = mp->m_sb.sb_rextsize,
+ .rm_owner = XFS_RMAP_OWN_FS,
+ };
+ struct xfs_btree_cur *cur;
+ int error;
+
+ ASSERT(xfs_has_rtsb(mp));
+ ASSERT(rtg_rgno(rtg) == 0);
+
+ cur = xfs_rtrmapbt_init_cursor(tp, rtg);
+ error = xfs_rmap_map_raw(cur, &rmap);
+ xfs_btree_del_cursor(cur, error);
+ return error;
+}
diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.h b/fs/xfs/libxfs/xfs_rtrmap_btree.h
new file mode 100644
index 000000000000..9d0915089891
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rtrmap_btree.h
@@ -0,0 +1,210 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_RTRMAP_BTREE_H__
+#define __XFS_RTRMAP_BTREE_H__
+
+struct xfs_buf;
+struct xfs_btree_cur;
+struct xfs_mount;
+struct xbtree_ifakeroot;
+struct xfs_rtgroup;
+struct xfbtree;
+
+/* rmaps only exist on crc enabled filesystems */
+#define XFS_RTRMAP_BLOCK_LEN XFS_BTREE_LBLOCK_CRC_LEN
+
+struct xfs_btree_cur *xfs_rtrmapbt_init_cursor(struct xfs_trans *tp,
+ struct xfs_rtgroup *rtg);
+struct xfs_btree_cur *xfs_rtrmapbt_stage_cursor(struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg, struct xfs_inode *ip,
+ struct xbtree_ifakeroot *ifake);
+void xfs_rtrmapbt_commit_staged_btree(struct xfs_btree_cur *cur,
+ struct xfs_trans *tp);
+unsigned int xfs_rtrmapbt_maxrecs(struct xfs_mount *mp, unsigned int blocklen,
+ bool leaf);
+void xfs_rtrmapbt_compute_maxlevels(struct xfs_mount *mp);
+unsigned int xfs_rtrmapbt_droot_maxrecs(unsigned int blocklen, bool leaf);
+
+/*
+ * Addresses of records, keys, and pointers within an incore rtrmapbt block.
+ *
+ * (note that some of these may appear unused, but they are used in userspace)
+ */
+static inline struct xfs_rmap_rec *
+xfs_rtrmap_rec_addr(
+ struct xfs_btree_block *block,
+ unsigned int index)
+{
+ return (struct xfs_rmap_rec *)
+ ((char *)block + XFS_RTRMAP_BLOCK_LEN +
+ (index - 1) * sizeof(struct xfs_rmap_rec));
+}
+
+static inline struct xfs_rmap_key *
+xfs_rtrmap_key_addr(
+ struct xfs_btree_block *block,
+ unsigned int index)
+{
+ return (struct xfs_rmap_key *)
+ ((char *)block + XFS_RTRMAP_BLOCK_LEN +
+ (index - 1) * 2 * sizeof(struct xfs_rmap_key));
+}
+
+static inline struct xfs_rmap_key *
+xfs_rtrmap_high_key_addr(
+ struct xfs_btree_block *block,
+ unsigned int index)
+{
+ return (struct xfs_rmap_key *)
+ ((char *)block + XFS_RTRMAP_BLOCK_LEN +
+ sizeof(struct xfs_rmap_key) +
+ (index - 1) * 2 * sizeof(struct xfs_rmap_key));
+}
+
+static inline xfs_rtrmap_ptr_t *
+xfs_rtrmap_ptr_addr(
+ struct xfs_btree_block *block,
+ unsigned int index,
+ unsigned int maxrecs)
+{
+ return (xfs_rtrmap_ptr_t *)
+ ((char *)block + XFS_RTRMAP_BLOCK_LEN +
+ maxrecs * 2 * sizeof(struct xfs_rmap_key) +
+ (index - 1) * sizeof(xfs_rtrmap_ptr_t));
+}
+
+unsigned int xfs_rtrmapbt_maxlevels_ondisk(void);
+
+int __init xfs_rtrmapbt_init_cur_cache(void);
+void xfs_rtrmapbt_destroy_cur_cache(void);
+
+xfs_filblks_t xfs_rtrmapbt_calc_reserves(struct xfs_mount *mp);
+
+/* Addresses of key, pointers, and records within an ondisk rtrmapbt block. */
+
+static inline struct xfs_rmap_rec *
+xfs_rtrmap_droot_rec_addr(
+ struct xfs_rtrmap_root *block,
+ unsigned int index)
+{
+ return (struct xfs_rmap_rec *)
+ ((char *)(block + 1) +
+ (index - 1) * sizeof(struct xfs_rmap_rec));
+}
+
+static inline struct xfs_rmap_key *
+xfs_rtrmap_droot_key_addr(
+ struct xfs_rtrmap_root *block,
+ unsigned int index)
+{
+ return (struct xfs_rmap_key *)
+ ((char *)(block + 1) +
+ (index - 1) * 2 * sizeof(struct xfs_rmap_key));
+}
+
+static inline xfs_rtrmap_ptr_t *
+xfs_rtrmap_droot_ptr_addr(
+ struct xfs_rtrmap_root *block,
+ unsigned int index,
+ unsigned int maxrecs)
+{
+ return (xfs_rtrmap_ptr_t *)
+ ((char *)(block + 1) +
+ maxrecs * 2 * sizeof(struct xfs_rmap_key) +
+ (index - 1) * sizeof(xfs_rtrmap_ptr_t));
+}
+
+/*
+ * Address of pointers within the incore btree root.
+ *
+ * These are to be used when we know the size of the block and
+ * we don't have a cursor.
+ */
+static inline xfs_rtrmap_ptr_t *
+xfs_rtrmap_broot_ptr_addr(
+ struct xfs_mount *mp,
+ struct xfs_btree_block *bb,
+ unsigned int index,
+ unsigned int block_size)
+{
+ return xfs_rtrmap_ptr_addr(bb, index,
+ xfs_rtrmapbt_maxrecs(mp, block_size, false));
+}
+
+/*
+ * Compute the space required for the incore btree root containing the given
+ * number of records.
+ */
+static inline size_t
+xfs_rtrmap_broot_space_calc(
+ struct xfs_mount *mp,
+ unsigned int level,
+ unsigned int nrecs)
+{
+ size_t sz = XFS_RTRMAP_BLOCK_LEN;
+
+ if (level > 0)
+ return sz + nrecs * (2 * sizeof(struct xfs_rmap_key) +
+ sizeof(xfs_rtrmap_ptr_t));
+ return sz + nrecs * sizeof(struct xfs_rmap_rec);
+}
+
+/*
+ * Compute the space required for the incore btree root given the ondisk
+ * btree root block.
+ */
+static inline size_t
+xfs_rtrmap_broot_space(struct xfs_mount *mp, struct xfs_rtrmap_root *bb)
+{
+ return xfs_rtrmap_broot_space_calc(mp, be16_to_cpu(bb->bb_level),
+ be16_to_cpu(bb->bb_numrecs));
+}
+
+/* Compute the space required for the ondisk root block. */
+static inline size_t
+xfs_rtrmap_droot_space_calc(
+ unsigned int level,
+ unsigned int nrecs)
+{
+ size_t sz = sizeof(struct xfs_rtrmap_root);
+
+ if (level > 0)
+ return sz + nrecs * (2 * sizeof(struct xfs_rmap_key) +
+ sizeof(xfs_rtrmap_ptr_t));
+ return sz + nrecs * sizeof(struct xfs_rmap_rec);
+}
+
+/*
+ * Compute the space required for the ondisk root block given an incore root
+ * block.
+ */
+static inline size_t
+xfs_rtrmap_droot_space(struct xfs_btree_block *bb)
+{
+ return xfs_rtrmap_droot_space_calc(be16_to_cpu(bb->bb_level),
+ be16_to_cpu(bb->bb_numrecs));
+}
+
+int xfs_iformat_rtrmap(struct xfs_inode *ip, struct xfs_dinode *dip);
+void xfs_rtrmapbt_to_disk(struct xfs_mount *mp, struct xfs_btree_block *rblock,
+ unsigned int rblocklen, struct xfs_rtrmap_root *dblock,
+ unsigned int dblocklen);
+void xfs_iflush_rtrmap(struct xfs_inode *ip, struct xfs_dinode *dip);
+
+int xfs_rtrmapbt_create(struct xfs_rtgroup *rtg, struct xfs_inode *ip,
+ struct xfs_trans *tp, bool init);
+int xfs_rtrmapbt_init_rtsb(struct xfs_mount *mp, struct xfs_rtgroup *rtg,
+ struct xfs_trans *tp);
+
+unsigned long long xfs_rtrmapbt_calc_size(struct xfs_mount *mp,
+ unsigned long long len);
+
+struct xfs_btree_cur *xfs_rtrmapbt_mem_cursor(struct xfs_rtgroup *rtg,
+ struct xfs_trans *tp, struct xfbtree *xfbtree);
+int xfs_rtrmapbt_mem_init(struct xfs_mount *mp, struct xfbtree *xfbtree,
+ struct xfs_buftarg *btp, xfs_rgnumber_t rgno);
+
+#endif /* __XFS_RTRMAP_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 3b5623611eba..83fb14b4074c 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -28,6 +28,7 @@
#include "xfs_rtbitmap.h"
#include "xfs_exchrange.h"
#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -1215,6 +1216,11 @@ xfs_sb_mount_common(
mp->m_rmap_mnr[0] = mp->m_rmap_mxr[0] / 2;
mp->m_rmap_mnr[1] = mp->m_rmap_mxr[1] / 2;
+ mp->m_rtrmap_mxr[0] = xfs_rtrmapbt_maxrecs(mp, sbp->sb_blocksize, true);
+ mp->m_rtrmap_mxr[1] = xfs_rtrmapbt_maxrecs(mp, sbp->sb_blocksize, false);
+ mp->m_rtrmap_mnr[0] = mp->m_rtrmap_mxr[0] / 2;
+ mp->m_rtrmap_mnr[1] = mp->m_rtrmap_mxr[1] / 2;
+
mp->m_refc_mxr[0] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, true);
mp->m_refc_mxr[1] = xfs_refcountbt_maxrecs(mp, sbp->sb_blocksize, false);
mp->m_refc_mnr[0] = mp->m_refc_mxr[0] / 2;
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index e7efdb9ceaf3..960716c387cc 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -42,6 +42,7 @@ extern const struct xfs_buf_ops xfs_rtbitmap_buf_ops;
extern const struct xfs_buf_ops xfs_rtsummary_buf_ops;
extern const struct xfs_buf_ops xfs_rtbuf_ops;
extern const struct xfs_buf_ops xfs_rtsb_buf_ops;
+extern const struct xfs_buf_ops xfs_rtrmapbt_buf_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
@@ -55,6 +56,8 @@ extern const struct xfs_btree_ops xfs_bmbt_ops;
extern const struct xfs_btree_ops xfs_refcountbt_ops;
extern const struct xfs_btree_ops xfs_rmapbt_ops;
extern const struct xfs_btree_ops xfs_rmapbt_mem_ops;
+extern const struct xfs_btree_ops xfs_rtrmapbt_ops;
+extern const struct xfs_btree_ops xfs_rtrmapbt_mem_ops;
static inline bool xfs_btree_is_bno(const struct xfs_btree_ops *ops)
{
@@ -96,10 +99,21 @@ static inline bool xfs_btree_is_mem_rmap(const struct xfs_btree_ops *ops)
{
return ops == &xfs_rmapbt_mem_ops;
}
+
+static inline bool xfs_btree_is_mem_rtrmap(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_rtrmapbt_mem_ops;
+}
#else
# define xfs_btree_is_mem_rmap(...) (false)
+# define xfs_btree_is_mem_rtrmap(...) (false)
#endif
+static inline bool xfs_btree_is_rtrmap(const struct xfs_btree_ops *ops)
+{
+ return ops == &xfs_rtrmapbt_ops;
+}
+
/* log size calculation functions */
int xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes);
int xfs_log_calc_minimum_size(struct xfs_mount *);
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index bab402340b5d..f3392eb2d7f4 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -213,7 +213,9 @@ xfs_calc_inode_chunk_res(
* Per-extent log reservation for the btree changes involved in freeing or
* allocating a realtime extent. We have to be able to log as many rtbitmap
* blocks as needed to mark inuse XFS_BMBT_MAX_EXTLEN blocks' worth of realtime
- * extents, as well as the realtime summary block.
+ * extents, as well as the realtime summary block (t1). Realtime rmap btree
+ * operations happen in a second transaction, so factor in a couple of rtrmapbt
+ * splits (t2).
*/
static unsigned int
xfs_rtalloc_block_count(
@@ -222,10 +224,16 @@ xfs_rtalloc_block_count(
{
unsigned int rtbmp_blocks;
xfs_rtxlen_t rtxlen;
+ unsigned int t1, t2 = 0;
rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
rtbmp_blocks = xfs_rtbitmap_blockcount_len(mp, rtxlen);
- return (rtbmp_blocks + 1) * num_ops;
+ t1 = (rtbmp_blocks + 1) * num_ops;
+
+ if (xfs_has_rmapbt(mp))
+ t2 = num_ops * (2 * mp->m_rtrmap_maxlevels - 1);
+
+ return max(t1, t2);
}
/*
diff --git a/fs/xfs/libxfs/xfs_trans_space.h b/fs/xfs/libxfs/xfs_trans_space.h
index 1155ff2d37e2..d89b570aafcc 100644
--- a/fs/xfs/libxfs/xfs_trans_space.h
+++ b/fs/xfs/libxfs/xfs_trans_space.h
@@ -14,6 +14,19 @@
#define XFS_MAX_CONTIG_BMAPS_PER_BLOCK(mp) \
(((mp)->m_bmap_dmxr[0]) - ((mp)->m_bmap_dmnr[0]))
+/* Worst case number of realtime rmaps that can be held in a block. */
+#define XFS_MAX_CONTIG_RTRMAPS_PER_BLOCK(mp) \
+ (((mp)->m_rtrmap_mxr[0]) - ((mp)->m_rtrmap_mnr[0]))
+
+/* Adding one realtime rmap could split every level to the top of the tree. */
+#define XFS_RTRMAPADD_SPACE_RES(mp) ((mp)->m_rtrmap_maxlevels)
+
+/* Blocks we might need to add "b" realtime rmaps to a tree. */
+#define XFS_NRTRMAPADD_SPACE_RES(mp, b) \
+ ((((b) + XFS_MAX_CONTIG_RTRMAPS_PER_BLOCK(mp) - 1) / \
+ XFS_MAX_CONTIG_RTRMAPS_PER_BLOCK(mp)) * \
+ XFS_RTRMAPADD_SPACE_RES(mp))
+
/* Worst case number of rmaps that can be held in a block. */
#define XFS_MAX_CONTIG_RMAPS_PER_BLOCK(mp) \
(((mp)->m_rmap_mxr[0]) - ((mp)->m_rmap_mnr[0]))
diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c
index 0433363a90b6..bed6a09aa791 100644
--- a/fs/xfs/scrub/alloc_repair.c
+++ b/fs/xfs/scrub/alloc_repair.c
@@ -542,8 +542,9 @@ xrep_abt_dispose_one(
/* Add a deferred rmap for each extent we used. */
if (resv->used > 0)
- xfs_rmap_alloc_extent(sc->tp, pag_agno(pag), resv->agbno,
- resv->used, XFS_RMAP_OWN_AG);
+ xfs_rmap_alloc_extent(sc->tp, false,
+ xfs_agbno_to_fsb(pag, resv->agbno), resv->used,
+ XFS_RMAP_OWN_AG);
/*
* For each reserved btree block we didn't use, add it to the free
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 7e00312225ed..f6077b0cba8a 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -21,6 +21,8 @@
#include "xfs_rmap_btree.h"
#include "xfs_rtgroup.h"
#include "xfs_health.h"
+#include "xfs_rtalloc.h"
+#include "xfs_rtrmap_btree.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
@@ -143,15 +145,22 @@ static inline bool
xchk_bmap_get_rmap(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec,
- xfs_agblock_t agbno,
+ xfs_agblock_t bno,
uint64_t owner,
struct xfs_rmap_irec *rmap)
{
+ struct xfs_btree_cur **curp = &info->sc->sa.rmap_cur;
xfs_fileoff_t offset;
unsigned int rflags = 0;
int has_rmap;
int error;
+ if (xfs_ifork_is_realtime(info->sc->ip, info->whichfork))
+ curp = &info->sc->sr.rmap_cur;
+
+ if (*curp == NULL)
+ return false;
+
if (info->whichfork == XFS_ATTR_FORK)
rflags |= XFS_RMAP_ATTR_FORK;
if (irec->br_state == XFS_EXT_UNWRITTEN)
@@ -172,13 +181,13 @@ xchk_bmap_get_rmap(
* range rmap lookup to make sure we get the correct owner/offset.
*/
if (info->is_shared) {
- error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno,
- owner, offset, rflags, rmap, &has_rmap);
+ error = xfs_rmap_lookup_le_range(*curp, bno, owner, offset,
+ rflags, rmap, &has_rmap);
} else {
- error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno,
- owner, offset, rflags, rmap, &has_rmap);
+ error = xfs_rmap_lookup_le(*curp, bno, owner, offset,
+ rflags, rmap, &has_rmap);
}
- if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur))
+ if (!xchk_should_check_xref(info->sc, &error, curp))
return false;
if (!has_rmap)
@@ -192,29 +201,29 @@ STATIC void
xchk_bmap_xref_rmap(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec,
- xfs_agblock_t agbno)
+ xfs_agblock_t bno)
{
struct xfs_rmap_irec rmap;
unsigned long long rmap_end;
uint64_t owner = info->sc->ip->i_ino;
- if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm))
+ if (xchk_skip_xref(info->sc->sm))
return;
/* Find the rmap record for this irec. */
- if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
+ if (!xchk_bmap_get_rmap(info, irec, bno, owner, &rmap))
return;
/*
* The rmap must be an exact match for this incore file mapping record,
* which may have arisen from multiple ondisk records.
*/
- if (rmap.rm_startblock != agbno)
+ if (rmap.rm_startblock != bno)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
- if (rmap_end != agbno + irec->br_blockcount)
+ if (rmap_end != bno + irec->br_blockcount)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
@@ -259,7 +268,7 @@ STATIC void
xchk_bmap_xref_rmap_cow(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec,
- xfs_agblock_t agbno)
+ xfs_agblock_t bno)
{
struct xfs_rmap_irec rmap;
unsigned long long rmap_end;
@@ -269,7 +278,7 @@ xchk_bmap_xref_rmap_cow(
return;
/* Find the rmap record for this irec. */
- if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap))
+ if (!xchk_bmap_get_rmap(info, irec, bno, owner, &rmap))
return;
/*
@@ -277,12 +286,12 @@ xchk_bmap_xref_rmap_cow(
* can start before and end after the physical space allocated to this
* mapping. There are no offsets to check.
*/
- if (rmap.rm_startblock > agbno)
+ if (rmap.rm_startblock > bno)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
- if (rmap_end < agbno + irec->br_blockcount)
+ if (rmap_end < bno + irec->br_blockcount)
xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
@@ -315,6 +324,8 @@ xchk_bmap_rt_iextent_xref(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
+ struct xfs_owner_info oinfo;
+ xfs_rgblock_t rgbno;
int error;
error = xchk_rtgroup_init_existing(info->sc,
@@ -324,10 +335,28 @@ xchk_bmap_rt_iextent_xref(
irec->br_startoff, &error))
return;
- xchk_rtgroup_lock(&info->sc->sr, XCHK_RTGLOCK_ALL);
+ error = xchk_rtgroup_lock(info->sc, &info->sc->sr, XCHK_RTGLOCK_ALL);
+ if (!xchk_fblock_process_error(info->sc, info->whichfork,
+ irec->br_startoff, &error))
+ goto out_free;
+
xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
irec->br_blockcount);
+ if (!xfs_has_rtrmapbt(info->sc->mp))
+ goto out_cur;
+
+ rgbno = xfs_rtb_to_rgbno(info->sc->mp, irec->br_startblock);
+ xchk_bmap_xref_rmap(info, irec, rgbno);
+
+ xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, info->whichfork,
+ irec->br_startoff);
+ xchk_xref_is_only_rt_owned_by(info->sc, rgbno,
+ irec->br_blockcount, &oinfo);
+
+out_cur:
+ xchk_rtgroup_btcur_free(&info->sc->sr);
+out_free:
xchk_rtgroup_free(info->sc, &info->sc->sr);
}
@@ -614,8 +643,7 @@ xchk_bmap_check_rmap(
xchk_fblock_set_corrupt(sc, sbcri->whichfork,
check_rec.rm_offset);
if (irec.br_startblock !=
- xfs_agbno_to_fsb(to_perag(cur->bc_group),
- check_rec.rm_startblock))
+ xfs_gbno_to_fsb(cur->bc_group, check_rec.rm_startblock))
xchk_fblock_set_corrupt(sc, sbcri->whichfork,
check_rec.rm_offset);
if (irec.br_blockcount > check_rec.rm_blockcount)
@@ -669,6 +697,30 @@ xchk_bmap_check_ag_rmaps(
return error;
}
+/* Make sure each rt rmap has a corresponding bmbt entry. */
+STATIC int
+xchk_bmap_check_rt_rmaps(
+ struct xfs_scrub *sc,
+ struct xfs_rtgroup *rtg)
+{
+ struct xchk_bmap_check_rmap_info sbcri;
+ struct xfs_btree_cur *cur;
+ int error;
+
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ cur = xfs_rtrmapbt_init_cursor(sc->tp, rtg);
+
+ sbcri.sc = sc;
+ sbcri.whichfork = XFS_DATA_FORK;
+ error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
+ if (error == -ECANCELED)
+ error = 0;
+
+ xfs_btree_del_cursor(cur, error);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
+ return error;
+}
+
/*
* Decide if we want to scan the reverse mappings to determine if the attr
* fork /really/ has zero space mappings.
@@ -723,10 +775,6 @@ xchk_bmap_check_empty_datafork(
{
struct xfs_ifork *ifp = &ip->i_df;
- /* Don't support realtime rmap checks yet. */
- if (XFS_IS_REALTIME_INODE(ip))
- return false;
-
/*
* If the dinode repair found a bad data fork, it will reset the fork
* to extents format with zero records and wait for the this scrubber
@@ -777,6 +825,21 @@ xchk_bmap_check_rmaps(
struct xfs_perag *pag = NULL;
int error;
+ if (xfs_ifork_is_realtime(sc->ip, whichfork)) {
+ struct xfs_rtgroup *rtg = NULL;
+
+ while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) {
+ error = xchk_bmap_check_rt_rmaps(sc, rtg);
+ if (error ||
+ (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
+ xfs_rtgroup_rele(rtg);
+ return error;
+ }
+ }
+
+ return 0;
+ }
+
while ((pag = xfs_perag_next(sc->mp, pag))) {
error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
if (error ||
@@ -983,6 +1046,7 @@ xchk_bmap(
case XFS_DINODE_FMT_UUID:
case XFS_DINODE_FMT_DEV:
case XFS_DINODE_FMT_LOCAL:
+ case XFS_DINODE_FMT_META_BTREE:
/* No mappings to check. */
if (whichfork == XFS_COW_FORK)
xchk_fblock_set_corrupt(sc, whichfork, 0);
diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c
index 7c4955482641..fd64bdf4e138 100644
--- a/fs/xfs/scrub/bmap_repair.c
+++ b/fs/xfs/scrub/bmap_repair.c
@@ -25,11 +25,13 @@
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
+#include "xfs_rtrmap_btree.h"
#include "xfs_refcount.h"
#include "xfs_quota.h"
#include "xfs_ialloc.h"
#include "xfs_ag.h"
#include "xfs_reflink.h"
+#include "xfs_rtgroup.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -359,6 +361,112 @@ xrep_bmap_scan_ag(
return error;
}
+#ifdef CONFIG_XFS_RT
+/* Check for any obvious errors or conflicts in the file mapping. */
+STATIC int
+xrep_bmap_check_rtfork_rmap(
+ struct xfs_scrub *sc,
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec)
+{
+ /* xattr extents are never stored on realtime devices */
+ if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
+ return -EFSCORRUPTED;
+
+ /* bmbt blocks are never stored on realtime devices */
+ if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
+ return -EFSCORRUPTED;
+
+ /* Data extents for non-rt files are never stored on the rt device. */
+ if (!XFS_IS_REALTIME_INODE(sc->ip))
+ return -EFSCORRUPTED;
+
+ /* Check the file offsets and physical extents. */
+ if (!xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount))
+ return -EFSCORRUPTED;
+
+ /* Check that this is within the rtgroup. */
+ if (!xfs_verify_rgbext(to_rtg(cur->bc_group), rec->rm_startblock,
+ rec->rm_blockcount))
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space. */
+ return xrep_require_rtext_inuse(sc, rec->rm_startblock,
+ rec->rm_blockcount);
+}
+
+/* Record realtime extents that belong to this inode's fork. */
+STATIC int
+xrep_bmap_walk_rtrmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_bmap *rb = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(rb->sc, &error))
+ return error;
+
+ /* Skip extents which are not owned by this inode and fork. */
+ if (rec->rm_owner != rb->sc->ip->i_ino)
+ return 0;
+
+ error = xrep_bmap_check_rtfork_rmap(rb->sc, cur, rec);
+ if (error)
+ return error;
+
+ /*
+ * Record all blocks allocated to this file even if the extent isn't
+ * for the fork we're rebuilding so that we can reset di_nblocks later.
+ */
+ rb->nblocks += rec->rm_blockcount;
+
+ /* If this rmap isn't for the fork we want, we're done. */
+ if (rb->whichfork == XFS_DATA_FORK &&
+ (rec->rm_flags & XFS_RMAP_ATTR_FORK))
+ return 0;
+ if (rb->whichfork == XFS_ATTR_FORK &&
+ !(rec->rm_flags & XFS_RMAP_ATTR_FORK))
+ return 0;
+
+ return xrep_bmap_from_rmap(rb, rec->rm_offset,
+ xfs_rgbno_to_rtb(to_rtg(cur->bc_group),
+ rec->rm_startblock),
+ rec->rm_blockcount,
+ rec->rm_flags & XFS_RMAP_UNWRITTEN);
+}
+
+/* Scan the realtime reverse mappings to build the new extent map. */
+STATIC int
+xrep_bmap_scan_rtgroup(
+ struct xrep_bmap *rb,
+ struct xfs_rtgroup *rtg)
+{
+ struct xfs_scrub *sc = rb->sc;
+ int error;
+
+ if (!xfs_has_rtrmapbt(sc->mp))
+ return 0;
+
+ error = xrep_rtgroup_init(sc, rtg, &sc->sr,
+ XFS_RTGLOCK_RMAP | XFS_RTGLOCK_BITMAP_SHARED);
+ if (error)
+ return error;
+
+ error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_bmap_walk_rtrmap, rb);
+ xchk_rtgroup_btcur_free(&sc->sr);
+ xchk_rtgroup_free(sc, &sc->sr);
+ return error;
+}
+#else
+static inline int
+xrep_bmap_scan_rtgroup(struct xrep_bmap *rb, struct xfs_rtgroup *rtg)
+{
+ return -EFSCORRUPTED;
+}
+#endif
+
/* Find the delalloc extents from the old incore extent tree. */
STATIC int
xrep_bmap_find_delalloc(
@@ -410,6 +518,22 @@ xrep_bmap_find_mappings(
struct xfs_perag *pag = NULL;
int error = 0;
+ /*
+ * Iterate the rtrmaps for extents. Metadata files never have content
+ * on the realtime device, so there's no need to scan them.
+ */
+ if (!xfs_is_metadir_inode(sc->ip)) {
+ struct xfs_rtgroup *rtg = NULL;
+
+ while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) {
+ error = xrep_bmap_scan_rtgroup(rb, rtg);
+ if (error) {
+ xfs_rtgroup_rele(rtg);
+ return error;
+ }
+ }
+ }
+
/* Iterate the rmaps for extents. */
while ((pag = xfs_perag_next(sc->mp, pag))) {
error = xrep_bmap_scan_ag(rb, pag);
@@ -731,6 +855,7 @@ xrep_bmap_check_inputs(
case XFS_DINODE_FMT_DEV:
case XFS_DINODE_FMT_LOCAL:
case XFS_DINODE_FMT_UUID:
+ case XFS_DINODE_FMT_META_BTREE:
return -ECANCELED;
case XFS_DINODE_FMT_EXTENTS:
case XFS_DINODE_FMT_BTREE:
@@ -753,10 +878,6 @@ xrep_bmap_check_inputs(
return -EINVAL;
}
- /* Don't know how to rebuild realtime data forks. */
- if (XFS_IS_REALTIME_INODE(sc->ip))
- return -EOPNOTSUPP;
-
return 0;
}
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 5cbd94b56582..06cb61e63498 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -35,6 +35,8 @@
#include "xfs_exchmaps.h"
#include "xfs_rtbitmap.h"
#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_bmap_util.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -719,20 +721,105 @@ xchk_rtgroup_init(
return 0;
}
-void
+/* Lock all the rt group metadata inode ILOCKs and wait for intents. */
+int
xchk_rtgroup_lock(
+ struct xfs_scrub *sc,
struct xchk_rt *sr,
unsigned int rtglock_flags)
{
- xfs_rtgroup_lock(sr->rtg, rtglock_flags);
+ int error = 0;
+
+ ASSERT(sr->rtg != NULL);
+
+ /*
+ * If we're /only/ locking the rtbitmap in shared mode, then we're
+ * obviously not trying to compare records in two metadata inodes.
+ * There's no need to drain intents here because the caller (most
+ * likely the rgsuper scanner) doesn't need that level of consistency.
+ */
+ if (rtglock_flags == XFS_RTGLOCK_BITMAP_SHARED) {
+ xfs_rtgroup_lock(sr->rtg, rtglock_flags);
+ sr->rtlock_flags = rtglock_flags;
+ return 0;
+ }
+
+ do {
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ xfs_rtgroup_lock(sr->rtg, rtglock_flags);
+
+ /*
+ * If we've grabbed a non-metadata file for scrubbing, we
+ * assume that holding its ILOCK will suffice to coordinate
+ * with any rt intent chains involving this inode.
+ */
+ if (sc->ip && !xfs_is_internal_inode(sc->ip))
+ break;
+
+ /*
+ * Decide if the rt group is quiet enough for all metadata to
+ * be consistent with each other. Regular file IO doesn't get
+ * to lock all the rt inodes at the same time, which means that
+ * there could be other threads in the middle of processing a
+ * chain of deferred ops.
+ *
+ * We just locked all the metadata inodes for this rt group;
+ * now take a look to see if there are any intents in progress.
+ * If there are, drop the rt group inode locks and wait for the
+ * intents to drain. Since we hold the rt group inode locks
+ * for the duration of the scrub, this is the only time we have
+ * to sample the intents counter; any threads increasing it
+ * after this point can't possibly be in the middle of a chain
+ * of rt metadata updates.
+ *
+ * Obviously, this should be slanted against scrub and in favor
+ * of runtime threads.
+ */
+ if (!xfs_group_intent_busy(rtg_group(sr->rtg)))
+ break;
+
+ xfs_rtgroup_unlock(sr->rtg, rtglock_flags);
+
+ if (!(sc->flags & XCHK_FSGATES_DRAIN))
+ return -ECHRNG;
+ error = xfs_group_intent_drain(rtg_group(sr->rtg));
+ if (error) {
+ if (error == -ERESTARTSYS)
+ error = -EINTR;
+ return error;
+ }
+ } while (1);
+
sr->rtlock_flags = rtglock_flags;
+
+ if (xfs_has_rtrmapbt(sc->mp) && (rtglock_flags & XFS_RTGLOCK_RMAP))
+ sr->rmap_cur = xfs_rtrmapbt_init_cursor(sc->tp, sr->rtg);
+
+ return 0;
+}
+
+/*
+ * Free all the btree cursors and other incore data relating to the realtime
+ * group. This has to be done /before/ committing (or cancelling) the scrub
+ * transaction.
+ */
+void
+xchk_rtgroup_btcur_free(
+ struct xchk_rt *sr)
+{
+ if (sr->rmap_cur)
+ xfs_btree_del_cursor(sr->rmap_cur, XFS_BTREE_ERROR);
+
+ sr->rmap_cur = NULL;
}
/*
* Unlock the realtime group. This must be done /after/ committing (or
* cancelling) the scrub transaction.
*/
-static void
+void
xchk_rtgroup_unlock(
struct xchk_rt *sr)
{
@@ -812,6 +899,14 @@ xchk_setup_fs(
return xchk_trans_alloc(sc, resblks);
}
+/* Set us up with a transaction and an empty context to repair rt metadata. */
+int
+xchk_setup_rt(
+ struct xfs_scrub *sc)
+{
+ return xchk_trans_alloc(sc, xrep_calc_rtgroup_resblks(sc));
+}
+
/* Set us up with AG headers and btree cursors. */
int
xchk_setup_ag_btree(
@@ -1379,7 +1474,7 @@ xchk_fsgates_enable(
trace_xchk_fsgates_enable(sc, scrub_fsgates);
if (scrub_fsgates & XCHK_FSGATES_DRAIN)
- xfs_drain_wait_enable();
+ xfs_defer_drain_wait_enable();
if (scrub_fsgates & XCHK_FSGATES_QUOTA)
xfs_dqtrx_hook_enable();
@@ -1573,3 +1668,60 @@ xchk_inode_rootdir_inum(const struct xfs_inode *ip)
return mp->m_metadirip->i_ino;
return mp->m_rootip->i_ino;
}
+
+static int
+xchk_meta_btree_count_blocks(
+ struct xfs_scrub *sc,
+ xfs_extnum_t *nextents,
+ xfs_filblks_t *count)
+{
+ struct xfs_btree_cur *cur;
+ int error;
+
+ if (!sc->sr.rtg) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ switch (sc->ip->i_metatype) {
+ case XFS_METAFILE_RTRMAP:
+ cur = xfs_rtrmapbt_init_cursor(sc->tp, sc->sr.rtg);
+ break;
+ default:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ error = xfs_btree_count_blocks(cur, count);
+ xfs_btree_del_cursor(cur, error);
+ if (!error) {
+ *nextents = 0;
+ (*count)--; /* don't count the btree iroot */
+ }
+ return error;
+}
+
+/* Count the blocks used by a file, even if it's a metadata inode. */
+int
+xchk_inode_count_blocks(
+ struct xfs_scrub *sc,
+ int whichfork,
+ xfs_extnum_t *nextents,
+ xfs_filblks_t *count)
+{
+ struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork);
+
+ if (!ifp) {
+ *nextents = 0;
+ *count = 0;
+ return 0;
+ }
+
+ if (ifp->if_format == XFS_DINODE_FMT_META_BTREE) {
+ ASSERT(whichfork == XFS_DATA_FORK);
+ return xchk_meta_btree_count_blocks(sc, nextents, count);
+ }
+
+ return xfs_bmap_count_blocks(sc->tp, sc->ip, whichfork, nextents,
+ count);
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 9ff3cafd8679..50ac6cca18fe 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -63,6 +63,7 @@ static inline int xchk_setup_nothing(struct xfs_scrub *sc)
/* Setup functions */
int xchk_setup_agheader(struct xfs_scrub *sc);
int xchk_setup_fs(struct xfs_scrub *sc);
+int xchk_setup_rt(struct xfs_scrub *sc);
int xchk_setup_ag_allocbt(struct xfs_scrub *sc);
int xchk_setup_ag_iallocbt(struct xfs_scrub *sc);
int xchk_setup_ag_rmapbt(struct xfs_scrub *sc);
@@ -80,10 +81,12 @@ int xchk_setup_metapath(struct xfs_scrub *sc);
int xchk_setup_rtbitmap(struct xfs_scrub *sc);
int xchk_setup_rtsummary(struct xfs_scrub *sc);
int xchk_setup_rgsuperblock(struct xfs_scrub *sc);
+int xchk_setup_rtrmapbt(struct xfs_scrub *sc);
#else
# define xchk_setup_rtbitmap xchk_setup_nothing
# define xchk_setup_rtsummary xchk_setup_nothing
# define xchk_setup_rgsuperblock xchk_setup_nothing
+# define xchk_setup_rtrmapbt xchk_setup_nothing
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_ino_dqattach(struct xfs_scrub *sc);
@@ -125,7 +128,8 @@ xchk_ag_init_existing(
#ifdef CONFIG_XFS_RT
/* All the locks we need to check an rtgroup. */
-#define XCHK_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP)
+#define XCHK_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP | \
+ XFS_RTGLOCK_RMAP)
int xchk_rtgroup_init(struct xfs_scrub *sc, xfs_rgnumber_t rgno,
struct xchk_rt *sr);
@@ -141,12 +145,17 @@ xchk_rtgroup_init_existing(
return error == -ENOENT ? -EFSCORRUPTED : error;
}
-void xchk_rtgroup_lock(struct xchk_rt *sr, unsigned int rtglock_flags);
+int xchk_rtgroup_lock(struct xfs_scrub *sc, struct xchk_rt *sr,
+ unsigned int rtglock_flags);
+void xchk_rtgroup_unlock(struct xchk_rt *sr);
+void xchk_rtgroup_btcur_free(struct xchk_rt *sr);
void xchk_rtgroup_free(struct xfs_scrub *sc, struct xchk_rt *sr);
#else
# define xchk_rtgroup_init(sc, rgno, sr) (-EFSCORRUPTED)
# define xchk_rtgroup_init_existing(sc, rgno, sr) (-EFSCORRUPTED)
-# define xchk_rtgroup_lock(sc, lockflags) do { } while (0)
+# define xchk_rtgroup_lock(sc, sr, lockflags) (-EFSCORRUPTED)
+# define xchk_rtgroup_unlock(sr) do { } while (0)
+# define xchk_rtgroup_btcur_free(sr) do { } while (0)
# define xchk_rtgroup_free(sc, sr) do { } while (0)
#endif /* CONFIG_XFS_RT */
@@ -257,6 +266,12 @@ int xchk_metadata_inode_forks(struct xfs_scrub *sc);
(sc)->mp->m_super->s_id, \
(sc)->ip ? (sc)->ip->i_ino : (sc)->sm->sm_ino, \
##__VA_ARGS__)
+#define xchk_xfile_rtgroup_descr(sc, fmt, ...) \
+ kasprintf(XCHK_GFP_FLAGS, "XFS (%s): rtgroup 0x%x " fmt, \
+ (sc)->mp->m_super->s_id, \
+ (sc)->sa.pag ? \
+ rtg_rgno((sc)->sr.rtg) : (sc)->sm->sm_agno, \
+ ##__VA_ARGS__)
/*
* Setting up a hook to wait for intents to drain is costly -- we have to take
@@ -274,6 +289,8 @@ void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);
int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino,
bool *inuse);
+int xchk_inode_count_blocks(struct xfs_scrub *sc, int whichfork,
+ xfs_extnum_t *nextents, xfs_filblks_t *count);
bool xchk_inode_is_dirtree_root(const struct xfs_inode *ip);
bool xchk_inode_is_sb_rooted(const struct xfs_inode *ip);
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index ccc6ca5934ca..bcc4244e3b55 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -114,6 +114,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_DIRTREE] = { XHG_INO, XFS_SICK_INO_DIRTREE },
[XFS_SCRUB_TYPE_METAPATH] = { XHG_FS, XFS_SICK_FS_METAPATH },
[XFS_SCRUB_TYPE_RGSUPER] = { XHG_RTGROUP, XFS_SICK_RG_SUPER },
+ [XFS_SCRUB_TYPE_RTRMAPBT] = { XHG_RTGROUP, XFS_SICK_RG_RMAPBT },
};
/* Return the health status mask for this scrub type. */
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 25ee66e7649d..8e702121dc86 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -502,6 +502,10 @@ xchk_dinode(
if (!S_ISREG(mode) && !S_ISDIR(mode))
xchk_ino_set_corrupt(sc, ino);
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ if (!S_ISREG(mode))
+ xchk_ino_set_corrupt(sc, ino);
+ break;
case XFS_DINODE_FMT_UUID:
default:
xchk_ino_set_corrupt(sc, ino);
@@ -686,15 +690,13 @@ xchk_inode_xref_bmap(
return;
/* Walk all the extents to check nextents/naextents/nblocks. */
- error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
- &nextents, &count);
+ error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count);
if (!xchk_should_check_xref(sc, &error, NULL))
return;
if (nextents < xfs_dfork_data_extents(dip))
xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino);
- error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
- &nextents, &acount);
+ error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents, &acount);
if (!xchk_should_check_xref(sc, &error, NULL))
return;
if (nextents != xfs_dfork_attr_extents(dip))
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index 5a58ddd27bd2..d7e3f033b160 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -38,6 +38,8 @@
#include "xfs_log_priv.h"
#include "xfs_health.h"
#include "xfs_symlink_remote.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -773,17 +775,71 @@ xrep_dinode_count_ag_rmaps(
return error;
}
+/* Count extents and blocks for an inode given an rt rmap. */
+STATIC int
+xrep_dinode_walk_rtrmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_inode *ri = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(ri->sc, &error))
+ return error;
+
+ /* We only care about this inode. */
+ if (rec->rm_owner != ri->sc->sm->sm_ino)
+ return 0;
+
+ if (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))
+ return -EFSCORRUPTED;
+
+ ri->rt_blocks += rec->rm_blockcount;
+ ri->rt_extents++;
+ return 0;
+}
+
+/* Count extents and blocks for an inode from all realtime rmap data. */
+STATIC int
+xrep_dinode_count_rtgroup_rmaps(
+ struct xrep_inode *ri,
+ struct xfs_rtgroup *rtg)
+{
+ struct xfs_scrub *sc = ri->sc;
+ int error;
+
+ error = xrep_rtgroup_init(sc, rtg, &sc->sr, XFS_RTGLOCK_RMAP);
+ if (error)
+ return error;
+
+ error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap,
+ ri);
+ xchk_rtgroup_btcur_free(&sc->sr);
+ xchk_rtgroup_free(sc, &sc->sr);
+ return error;
+}
+
/* Count extents and blocks for a given inode from all rmap data. */
STATIC int
xrep_dinode_count_rmaps(
struct xrep_inode *ri)
{
struct xfs_perag *pag = NULL;
+ struct xfs_rtgroup *rtg = NULL;
int error;
- if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
+ if (!xfs_has_rmapbt(ri->sc->mp))
return -EOPNOTSUPP;
+ while ((rtg = xfs_rtgroup_next(ri->sc->mp, rtg))) {
+ error = xrep_dinode_count_rtgroup_rmaps(ri, rtg);
+ if (error) {
+ xfs_rtgroup_rele(rtg);
+ return error;
+ }
+ }
+
while ((pag = xfs_perag_next(ri->sc->mp, pag))) {
error = xrep_dinode_count_ag_rmaps(ri, pag);
if (error) {
@@ -888,6 +944,55 @@ xrep_dinode_bad_bmbt_fork(
return false;
}
+/* Return true if this rmap-format ifork looks like garbage. */
+STATIC bool
+xrep_dinode_bad_rtrmapbt_fork(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip,
+ unsigned int dfork_size)
+{
+ struct xfs_rtrmap_root *dfp;
+ unsigned int nrecs;
+ unsigned int level;
+
+ if (dfork_size < sizeof(struct xfs_rtrmap_root))
+ return true;
+
+ dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+ nrecs = be16_to_cpu(dfp->bb_numrecs);
+ level = be16_to_cpu(dfp->bb_level);
+
+ if (level > sc->mp->m_rtrmap_maxlevels)
+ return true;
+ if (xfs_rtrmap_droot_space_calc(level, nrecs) > dfork_size)
+ return true;
+ if (level > 0 && nrecs == 0)
+ return true;
+
+ return false;
+}
+
+/* Check a metadata-btree fork. */
+STATIC bool
+xrep_dinode_bad_metabt_fork(
+ struct xfs_scrub *sc,
+ struct xfs_dinode *dip,
+ unsigned int dfork_size,
+ int whichfork)
+{
+ if (whichfork != XFS_DATA_FORK)
+ return true;
+
+ switch (be16_to_cpu(dip->di_metatype)) {
+ case XFS_METAFILE_RTRMAP:
+ return xrep_dinode_bad_rtrmapbt_fork(sc, dip, dfork_size);
+ default:
+ return true;
+ }
+
+ return false;
+}
+
/*
* Check the data fork for things that will fail the ifork verifiers or the
* ifork formatters.
@@ -968,6 +1073,11 @@ xrep_dinode_check_dfork(
XFS_DATA_FORK))
return true;
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ if (xrep_dinode_bad_metabt_fork(sc, dip, dfork_size,
+ XFS_DATA_FORK))
+ return true;
+ break;
default:
return true;
}
@@ -1088,6 +1198,11 @@ xrep_dinode_check_afork(
XFS_ATTR_FORK))
return true;
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size,
+ XFS_ATTR_FORK))
+ return true;
+ break;
default:
return true;
}
@@ -1135,6 +1250,7 @@ xrep_dinode_ensure_forkoff(
uint16_t mode)
{
struct xfs_bmdr_block *bmdr;
+ struct xfs_rtrmap_root *rmdr;
struct xfs_scrub *sc = ri->sc;
xfs_extnum_t attr_extents, data_extents;
size_t bmdr_minsz = xfs_bmdr_space_calc(1);
@@ -1241,6 +1357,17 @@ xrep_dinode_ensure_forkoff(
bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
dfork_min = xfs_bmap_broot_space(sc->mp, bmdr);
break;
+ case XFS_DINODE_FMT_META_BTREE:
+ switch (be16_to_cpu(dip->di_metatype)) {
+ case XFS_METAFILE_RTRMAP:
+ rmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK);
+ dfork_min = xfs_rtrmap_broot_space(sc->mp, rmdr);
+ break;
+ default:
+ dfork_min = 0;
+ break;
+ }
+ break;
default:
dfork_min = 0;
break;
@@ -1500,8 +1627,7 @@ xrep_inode_blockcounts(
trace_xrep_inode_blockcounts(sc);
/* Set data fork counters from the data fork mappings. */
- error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK,
- &nextents, &count);
+ error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count);
if (error)
return error;
if (xfs_is_reflink_inode(sc->ip)) {
@@ -1525,8 +1651,8 @@ xrep_inode_blockcounts(
/* Set attr fork counters from the attr fork mappings. */
ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK);
if (ifp) {
- error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK,
- &nextents, &acount);
+ error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents,
+ &acount);
if (error)
return error;
if (count >= sc->mp->m_sb.sb_dblocks)
diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c
index c678cba1ffc3..74d71373e7ed 100644
--- a/fs/xfs/scrub/metapath.c
+++ b/fs/xfs/scrub/metapath.c
@@ -21,6 +21,7 @@
#include "xfs_trans_space.h"
#include "xfs_attr.h"
#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -246,6 +247,8 @@ xchk_setup_metapath(
return xchk_setup_metapath_dqinode(sc, XFS_DQTYPE_GROUP);
case XFS_SCRUB_METAPATH_PRJQUOTA:
return xchk_setup_metapath_dqinode(sc, XFS_DQTYPE_PROJ);
+ case XFS_SCRUB_METAPATH_RTRMAPBT:
+ return xchk_setup_metapath_rtginode(sc, XFS_RTGI_RMAP);
default:
return -ENOENT;
}
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c
index 70af27d98734..ac38f5843090 100644
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -19,6 +19,8 @@
#include "xfs_rmap.h"
#include "xfs_ag.h"
#include "xfs_defer.h"
+#include "xfs_metafile.h"
+#include "xfs_quota.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -121,6 +123,43 @@ xrep_newbt_init_inode(
}
/*
+ * Initialize accounting resources for staging a new metadata inode btree.
+ * If the metadata file has a space reservation, the caller must adjust that
+ * reservation when committing the new ondisk btree.
+ */
+int
+xrep_newbt_init_metadir_inode(
+ struct xrep_newbt *xnr,
+ struct xfs_scrub *sc)
+{
+ struct xfs_owner_info oinfo;
+ struct xfs_ifork *ifp;
+
+ ASSERT(xfs_is_metadir_inode(sc->ip));
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
+
+ ifp = kmem_cache_zalloc(xfs_ifork_cache, XCHK_GFP_FLAGS);
+ if (!ifp)
+ return -ENOMEM;
+
+ /*
+ * Allocate new metadir btree blocks with XFS_AG_RESV_NONE because the
+ * inode metadata space reservations can only account allocated space
+ * to the i_nblocks. We do not want to change the inode core fields
+ * until we're ready to commit the new tree, so we allocate the blocks
+ * as if they were regular file blocks. This exposes us to a higher
+ * risk of the repair being cancelled due to ENOSPC.
+ */
+ xrep_newbt_init_ag(xnr, sc, &oinfo,
+ XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
+ XFS_AG_RESV_NONE);
+ xnr->ifake.if_fork = ifp;
+ xnr->ifake.if_fork_size = xfs_inode_fork_size(sc->ip, XFS_DATA_FORK);
+ return 0;
+}
+
+/*
* Initialize accounting resources for staging a new btree. Callers are
* expected to add their own reservations (and clean them up) manually.
*/
@@ -224,6 +263,7 @@ xrep_newbt_alloc_ag_blocks(
int error = 0;
ASSERT(sc->sa.pag != NULL);
+ ASSERT(xnr->resv != XFS_AG_RESV_METAFILE);
while (nr_blocks > 0) {
struct xfs_alloc_arg args = {
@@ -297,6 +337,8 @@ xrep_newbt_alloc_file_blocks(
struct xfs_mount *mp = sc->mp;
int error = 0;
+ ASSERT(xnr->resv != XFS_AG_RESV_METAFILE);
+
while (nr_blocks > 0) {
struct xfs_alloc_arg args = {
.tp = sc->tp,
diff --git a/fs/xfs/scrub/newbt.h b/fs/xfs/scrub/newbt.h
index 3d804d31af24..5ce785599287 100644
--- a/fs/xfs/scrub/newbt.h
+++ b/fs/xfs/scrub/newbt.h
@@ -63,6 +63,7 @@ void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct xfs_scrub *sc,
enum xfs_ag_resv_type resv);
int xrep_newbt_init_inode(struct xrep_newbt *xnr, struct xfs_scrub *sc,
int whichfork, const struct xfs_owner_info *oinfo);
+int xrep_newbt_init_metadir_inode(struct xrep_newbt *xnr, struct xfs_scrub *sc);
int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
int xrep_newbt_add_extent(struct xrep_newbt *xnr, struct xfs_perag *pag,
xfs_agblock_t agbno, xfs_extlen_t len);
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c
index 08230952053b..4d7f1b82dc55 100644
--- a/fs/xfs/scrub/reap.c
+++ b/fs/xfs/scrub/reap.c
@@ -33,6 +33,7 @@
#include "xfs_attr.h"
#include "xfs_attr_remote.h"
#include "xfs_defer.h"
+#include "xfs_metafile.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -390,6 +391,8 @@ xreap_agextent_iter(
xfs_fsblock_t fsbno;
int error = 0;
+ ASSERT(rs->resv != XFS_AG_RESV_METAFILE);
+
fsbno = xfs_agbno_to_fsb(sc->sa.pag, agbno);
/*
@@ -676,6 +679,44 @@ xrep_reap_fsblocks(
}
/*
+ * Dispose of every block of an old metadata btree that used to be rooted in a
+ * metadata directory file.
+ */
+int
+xrep_reap_metadir_fsblocks(
+ struct xfs_scrub *sc,
+ struct xfsb_bitmap *bitmap)
+{
+ /*
+ * Reap old metadir btree blocks with XFS_AG_RESV_NONE because the old
+ * blocks are no longer mapped by the inode, and inode metadata space
+ * reservations can only account freed space to the i_nblocks.
+ */
+ struct xfs_owner_info oinfo;
+ struct xreap_state rs = {
+ .sc = sc,
+ .oinfo = &oinfo,
+ .resv = XFS_AG_RESV_NONE,
+ };
+ int error;
+
+ ASSERT(xfs_has_rmapbt(sc->mp));
+ ASSERT(sc->ip != NULL);
+ ASSERT(xfs_is_metadir_inode(sc->ip));
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, XFS_DATA_FORK);
+
+ error = xfsb_bitmap_walk(bitmap, xreap_fsmeta_extent, &rs);
+ if (error)
+ return error;
+
+ if (xreap_dirty(&rs))
+ return xrep_defer_finish(sc);
+
+ return 0;
+}
+
+/*
* Metadata files are not supposed to share blocks with anything else.
* If blocks are shared, we remove the reverse mapping (thus reducing the
* crosslink factor); if blocks are not shared, we also need to free them.
diff --git a/fs/xfs/scrub/reap.h b/fs/xfs/scrub/reap.h
index 3f2f1775e29d..70e5e6bbb8d3 100644
--- a/fs/xfs/scrub/reap.h
+++ b/fs/xfs/scrub/reap.h
@@ -14,6 +14,8 @@ int xrep_reap_agblocks(struct xfs_scrub *sc, struct xagb_bitmap *bitmap,
int xrep_reap_fsblocks(struct xfs_scrub *sc, struct xfsb_bitmap *bitmap,
const struct xfs_owner_info *oinfo);
int xrep_reap_ifork(struct xfs_scrub *sc, struct xfs_inode *ip, int whichfork);
+int xrep_reap_metadir_fsblocks(struct xfs_scrub *sc,
+ struct xfsb_bitmap *bitmap);
/* Buffer cache scan context. */
struct xrep_bufscan {
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 91c8bc055a4f..61e414c81253 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -37,6 +37,11 @@
#include "xfs_da_btree.h"
#include "xfs_attr.h"
#include "xfs_dir2.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtalloc.h"
+#include "xfs_metafile.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -62,6 +67,7 @@ xrep_attempt(
trace_xrep_attempt(XFS_I(file_inode(sc->file)), sc->sm, error);
xchk_ag_btcur_free(&sc->sa);
+ xchk_rtgroup_btcur_free(&sc->sr);
/* Repair whatever's broken. */
ASSERT(sc->ops->repair);
@@ -378,6 +384,41 @@ xrep_calc_ag_resblks(
return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
}
+#ifdef CONFIG_XFS_RT
+/*
+ * Figure out how many blocks to reserve for a rtgroup repair. We calculate
+ * the worst case estimate for the number of blocks we'd need to rebuild one of
+ * any type of per-rtgroup btree.
+ */
+xfs_extlen_t
+xrep_calc_rtgroup_resblks(
+ struct xfs_scrub *sc)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_scrub_metadata *sm = sc->sm;
+ uint64_t usedlen;
+ xfs_extlen_t rmapbt_sz = 0;
+
+ if (!(sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
+ return 0;
+ if (!xfs_has_rtgroups(mp)) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ usedlen = xfs_rtbxlen_to_blen(mp, xfs_rtgroup_extents(mp, sm->sm_agno));
+ ASSERT(usedlen <= XFS_MAX_RGBLOCKS);
+
+ if (xfs_has_rmapbt(mp))
+ rmapbt_sz = xfs_rtrmapbt_calc_size(mp, usedlen);
+
+ trace_xrep_calc_rtgroup_resblks_btsize(mp, sm->sm_agno, usedlen,
+ rmapbt_sz);
+
+ return rmapbt_sz;
+}
+#endif /* CONFIG_XFS_RT */
+
/*
* Reconstructing per-AG Btrees
*
@@ -954,6 +995,22 @@ xrep_ag_init(
}
#ifdef CONFIG_XFS_RT
+/* Initialize all the btree cursors for a RT repair. */
+void
+xrep_rtgroup_btcur_init(
+ struct xfs_scrub *sc,
+ struct xchk_rt *sr)
+{
+ struct xfs_mount *mp = sc->mp;
+
+ ASSERT(sr->rtg != NULL);
+
+ if (sc->sm->sm_type != XFS_SCRUB_TYPE_RTRMAPBT &&
+ (sr->rtlock_flags & XFS_RTGLOCK_RMAP) &&
+ xfs_has_rtrmapbt(mp))
+ sr->rmap_cur = xfs_rtrmapbt_init_cursor(sc->tp, sr->rtg);
+}
+
/*
* Given a reference to a rtgroup structure, lock rtgroup btree inodes and
* create btree cursors. Must only be called to repair a regular rt file.
@@ -972,6 +1029,33 @@ xrep_rtgroup_init(
/* Grab our own passive reference from the caller's ref. */
sr->rtg = xfs_rtgroup_hold(rtg);
+ xrep_rtgroup_btcur_init(sc, sr);
+ return 0;
+}
+
+/* Ensure that all rt blocks in the given range are not marked free. */
+int
+xrep_require_rtext_inuse(
+ struct xfs_scrub *sc,
+ xfs_rgblock_t rgbno,
+ xfs_filblks_t len)
+{
+ struct xfs_mount *mp = sc->mp;
+ xfs_rtxnum_t startrtx;
+ xfs_rtxnum_t endrtx;
+ bool is_free = false;
+ int error;
+
+ startrtx = xfs_rgbno_to_rtx(mp, rgbno);
+ endrtx = xfs_rgbno_to_rtx(mp, rgbno + len - 1);
+
+ error = xfs_rtalloc_extent_is_free(sc->sr.rtg, sc->tp, startrtx,
+ endrtx - startrtx + 1, &is_free);
+ if (error)
+ return error;
+ if (is_free)
+ return -EFSCORRUPTED;
+
return 0;
}
#endif /* CONFIG_XFS_RT */
@@ -1237,3 +1321,110 @@ xrep_buf_verify_struct(
return fa == NULL;
}
+
+/* Check the sanity of a rmap record for a metadata btree inode. */
+int
+xrep_check_ino_btree_mapping(
+ struct xfs_scrub *sc,
+ const struct xfs_rmap_irec *rec)
+{
+ enum xbtree_recpacking outcome;
+ int error;
+
+ /*
+ * Metadata btree inodes never have extended attributes, and all blocks
+ * should have the bmbt block flag set.
+ */
+ if ((rec->rm_flags & XFS_RMAP_ATTR_FORK) ||
+ !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
+ return -EFSCORRUPTED;
+
+ /* Make sure the block is within the AG. */
+ if (!xfs_verify_agbext(sc->sa.pag, rec->rm_startblock,
+ rec->rm_blockcount))
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space. */
+ error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock,
+ rec->rm_blockcount, &outcome);
+ if (error)
+ return error;
+ if (outcome != XBTREE_RECPACKING_EMPTY)
+ return -EFSCORRUPTED;
+
+ return 0;
+}
+
+/*
+ * Reset the block count of the inode being repaired, and adjust the dquot
+ * block usage to match. The inode must not have an xattr fork.
+ */
+void
+xrep_inode_set_nblocks(
+ struct xfs_scrub *sc,
+ int64_t new_blocks)
+{
+ int64_t delta =
+ new_blocks - sc->ip->i_nblocks;
+
+ sc->ip->i_nblocks = new_blocks;
+
+ xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
+ if (delta != 0)
+ xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT,
+ delta);
+}
+
+/* Reset the block reservation for a metadata inode. */
+int
+xrep_reset_metafile_resv(
+ struct xfs_scrub *sc)
+{
+ struct xfs_inode *ip = sc->ip;
+ int64_t delta;
+ int error;
+
+ delta = ip->i_nblocks + ip->i_delayed_blks - ip->i_meta_resv_asked;
+ if (delta == 0)
+ return 0;
+
+ /*
+ * Too many blocks have been reserved, transfer some from the incore
+ * reservation back to the filesystem.
+ */
+ if (delta > 0) {
+ int64_t give_back;
+
+ give_back = min_t(uint64_t, delta, ip->i_delayed_blks);
+ if (give_back > 0) {
+ xfs_mod_delalloc(ip, 0, -give_back);
+ xfs_add_fdblocks(ip->i_mount, give_back);
+ ip->i_delayed_blks -= give_back;
+ }
+
+ return 0;
+ }
+
+ /*
+ * Not enough reservation; try to take some blocks from the filesystem
+ * to the metadata inode. @delta is negative here, so invert the sign.
+ */
+ delta = -delta;
+ error = xfs_dec_fdblocks(sc->mp, delta, true);
+ while (error == -ENOSPC) {
+ delta--;
+ if (delta == 0) {
+ xfs_warn(sc->mp,
+"Insufficient free space to reset space reservation for inode 0x%llx after repair.",
+ ip->i_ino);
+ return 0;
+ }
+ error = xfs_dec_fdblocks(sc->mp, delta, true);
+ }
+ if (error)
+ return error;
+
+ xfs_mod_delalloc(ip, 0, delta);
+ ip->i_delayed_blks += delta;
+ return 0;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index b649da1a93eb..ac5962732d26 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -97,6 +97,7 @@ int xrep_setup_parent(struct xfs_scrub *sc);
int xrep_setup_nlinks(struct xfs_scrub *sc);
int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *resblks);
int xrep_setup_dirtree(struct xfs_scrub *sc);
+int xrep_setup_rtrmapbt(struct xfs_scrub *sc);
/* Repair setup functions */
int xrep_setup_ag_allocbt(struct xfs_scrub *sc);
@@ -110,10 +111,18 @@ int xrep_ag_init(struct xfs_scrub *sc, struct xfs_perag *pag,
#ifdef CONFIG_XFS_RT
int xrep_rtgroup_init(struct xfs_scrub *sc, struct xfs_rtgroup *rtg,
struct xchk_rt *sr, unsigned int rtglock_flags);
+void xrep_rtgroup_btcur_init(struct xfs_scrub *sc, struct xchk_rt *sr);
+int xrep_require_rtext_inuse(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_filblks_t len);
+xfs_extlen_t xrep_calc_rtgroup_resblks(struct xfs_scrub *sc);
#else
# define xrep_rtgroup_init(sc, rtg, sr, lockflags) (-ENOSYS)
+# define xrep_calc_rtgroup_resblks(sc) (0)
#endif /* CONFIG_XFS_RT */
+int xrep_check_ino_btree_mapping(struct xfs_scrub *sc,
+ const struct xfs_rmap_irec *rec);
+
/* Metadata revalidators */
int xrep_revalidate_allocbt(struct xfs_scrub *sc);
@@ -147,10 +156,12 @@ int xrep_metapath(struct xfs_scrub *sc);
int xrep_rtbitmap(struct xfs_scrub *sc);
int xrep_rtsummary(struct xfs_scrub *sc);
int xrep_rgsuperblock(struct xfs_scrub *sc);
+int xrep_rtrmapbt(struct xfs_scrub *sc);
#else
# define xrep_rtbitmap xrep_notsupported
# define xrep_rtsummary xrep_notsupported
# define xrep_rgsuperblock xrep_notsupported
+# define xrep_rtrmapbt xrep_notsupported
#endif /* CONFIG_XFS_RT */
#ifdef CONFIG_XFS_QUOTA
@@ -169,6 +180,8 @@ int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep,
void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp);
bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
+void xrep_inode_set_nblocks(struct xfs_scrub *sc, int64_t new_blocks);
+int xrep_reset_metafile_resv(struct xfs_scrub *sc);
#else
@@ -192,6 +205,8 @@ xrep_calc_ag_resblks(
return 0;
}
+#define xrep_calc_rtgroup_resblks xrep_calc_ag_resblks
+
static inline int
xrep_reset_perag_resv(
struct xfs_scrub *sc)
@@ -219,6 +234,7 @@ xrep_setup_nothing(
#define xrep_setup_nlinks xrep_setup_nothing
#define xrep_setup_dirtree xrep_setup_nothing
#define xrep_setup_metapath xrep_setup_nothing
+#define xrep_setup_rtrmapbt xrep_setup_nothing
#define xrep_setup_inode(sc, imap) ((void)0)
@@ -256,6 +272,7 @@ static inline int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *x)
#define xrep_dirtree xrep_notsupported
#define xrep_metapath xrep_notsupported
#define xrep_rgsuperblock xrep_notsupported
+#define xrep_rtrmapbt xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rgsuper.c b/fs/xfs/scrub/rgsuper.c
index 463b3573bb76..d189732d0e24 100644
--- a/fs/xfs/scrub/rgsuper.c
+++ b/fs/xfs/scrub/rgsuper.c
@@ -13,6 +13,7 @@
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
+#include "xfs_rmap.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
@@ -34,6 +35,7 @@ xchk_rgsuperblock_xref(
return;
xchk_xref_is_used_rt_space(sc, xfs_rgbno_to_rtb(sc->sr.rtg, 0), 1);
+ xchk_xref_is_only_rt_owned_by(sc, 0, 1, &XFS_RMAP_OINFO_FS);
}
int
@@ -61,7 +63,9 @@ xchk_rgsuperblock(
if (!xchk_xref_process_error(sc, 0, 0, &error))
return error;
- xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP_SHARED);
+ error = xchk_rtgroup_lock(sc, &sc->sr, XFS_RTGLOCK_BITMAP_SHARED);
+ if (error)
+ return error;
/*
* Since we already validated the rt superblock at mount time, we don't
diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c
index a0a227d183d2..c2c7b76cc25a 100644
--- a/fs/xfs/scrub/rmap_repair.c
+++ b/fs/xfs/scrub/rmap_repair.c
@@ -31,6 +31,8 @@
#include "xfs_refcount.h"
#include "xfs_refcount_btree.h"
#include "xfs_ag.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_rtgroup.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -499,6 +501,63 @@ xrep_rmap_scan_iext(
return xrep_rmap_stash_accumulated(rf);
}
+static int
+xrep_rmap_scan_meta_btree(
+ struct xrep_rmap_ifork *rf,
+ struct xfs_inode *ip)
+{
+ struct xfs_scrub *sc = rf->rr->sc;
+ struct xfs_rtgroup *rtg = NULL;
+ struct xfs_btree_cur *cur = NULL;
+ enum xfs_rtg_inodes type;
+ int error;
+
+ if (rf->whichfork != XFS_DATA_FORK)
+ return -EFSCORRUPTED;
+
+ switch (ip->i_metatype) {
+ case XFS_METAFILE_RTRMAP:
+ type = XFS_RTGI_RMAP;
+ break;
+ default:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) {
+ if (ip == rtg->rtg_inodes[type])
+ goto found;
+ }
+
+ /*
+ * We should never find an rt metadata btree inode that isn't
+ * associated with an rtgroup yet has ondisk blocks allocated to it.
+ */
+ if (ip->i_nblocks) {
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+
+found:
+ switch (ip->i_metatype) {
+ case XFS_METAFILE_RTRMAP:
+ cur = xfs_rtrmapbt_init_cursor(sc->tp, rtg);
+ break;
+ default:
+ ASSERT(0);
+ error = -EFSCORRUPTED;
+ goto out_rtg;
+ }
+
+ error = xrep_rmap_scan_iroot_btree(rf, cur);
+ xfs_btree_del_cursor(cur, error);
+out_rtg:
+ xfs_rtgroup_rele(rtg);
+ return error;
+}
+
/* Find all the extents from a given AG in an inode fork. */
STATIC int
xrep_rmap_scan_ifork(
@@ -512,14 +571,14 @@ xrep_rmap_scan_ifork(
.whichfork = whichfork,
};
struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork);
+ bool mappings_done;
int error = 0;
if (!ifp)
return 0;
- if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
- bool mappings_done;
-
+ switch (ifp->if_format) {
+ case XFS_DINODE_FMT_BTREE:
/*
* Scan the bmap btree for data device mappings. This includes
* the btree blocks themselves, even if this is a realtime
@@ -528,15 +587,18 @@ xrep_rmap_scan_ifork(
error = xrep_rmap_scan_bmbt(&rf, ip, &mappings_done);
if (error || mappings_done)
return error;
- } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
- return 0;
+ fallthrough;
+ case XFS_DINODE_FMT_EXTENTS:
+ /* Scan incore extent cache if this isn't a realtime file. */
+ if (xfs_ifork_is_realtime(ip, whichfork))
+ return 0;
+
+ return xrep_rmap_scan_iext(&rf, ifp);
+ case XFS_DINODE_FMT_META_BTREE:
+ return xrep_rmap_scan_meta_btree(&rf, ip);
}
- /* Scan incore extent cache if this isn't a realtime file. */
- if (xfs_ifork_is_realtime(ip, whichfork))
- return 0;
-
- return xrep_rmap_scan_iext(&rf, ifp);
+ return 0;
}
/*
@@ -1552,7 +1614,7 @@ xrep_rmapbt_live_update(
if (!xrep_rmapbt_want_live_update(&rr->iscan, &p->oinfo))
goto out_unlock;
- trace_xrep_rmap_live_update(rr->sc->sa.pag, action, p);
+ trace_xrep_rmap_live_update(pag_group(rr->sc->sa.pag), action, p);
error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
if (error)
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 376a36fd9a9c..28c90a31f4c3 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -9,17 +9,24 @@
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
+#include "xfs_btree.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_rtbitmap.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
#include "xfs_bit.h"
+#include "xfs_rtgroup.h"
#include "xfs_sb.h"
+#include "xfs_rmap.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_exchmaps.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
+#include "scrub/tempexch.h"
#include "scrub/rtbitmap.h"
+#include "scrub/btree.h"
/* Set us up with the realtime metadata locked. */
int
@@ -30,10 +37,15 @@ xchk_setup_rtbitmap(
struct xchk_rtbitmap *rtb;
int error;
- rtb = kzalloc(sizeof(struct xchk_rtbitmap), XCHK_GFP_FLAGS);
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
+ rtb = kzalloc(struct_size(rtb, words, xchk_rtbitmap_wordcnt(sc)),
+ XCHK_GFP_FLAGS);
if (!rtb)
return -ENOMEM;
sc->buf = rtb;
+ rtb->sc = sc;
error = xchk_rtgroup_init(sc, sc->sm->sm_agno, &sc->sr);
if (error)
@@ -49,8 +61,7 @@ xchk_setup_rtbitmap(
if (error)
return error;
- error = xchk_install_live_inode(sc,
- sc->sr.rtg->rtg_inodes[XFS_RTGI_BITMAP]);
+ error = xchk_install_live_inode(sc, rtg_bitmap(sc->sr.rtg));
if (error)
return error;
@@ -58,12 +69,15 @@ xchk_setup_rtbitmap(
if (error)
return error;
+ error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
+ if (error)
+ return error;
+
/*
* Now that we've locked the rtbitmap, we can't race with growfsrt
* trying to expand the bitmap or change the size of the rt volume.
* Hence it is safe to compute and check the geometry values.
*/
- xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP);
if (mp->m_sb.sb_rblocks) {
rtb->rextents = xfs_blen_to_rtbxlen(mp, mp->m_sb.sb_rblocks);
rtb->rextslog = xfs_compute_rextslog(rtb->rextents);
@@ -73,7 +87,30 @@ xchk_setup_rtbitmap(
return 0;
}
-/* Realtime bitmap. */
+/* Per-rtgroup bitmap contents. */
+
+/* Cross-reference rtbitmap entries with other metadata. */
+STATIC void
+xchk_rtbitmap_xref(
+ struct xchk_rtbitmap *rtb,
+ xfs_rtblock_t startblock,
+ xfs_rtblock_t blockcount)
+{
+ struct xfs_scrub *sc = rtb->sc;
+ xfs_rgblock_t rgbno = xfs_rtb_to_rgbno(sc->mp, startblock);
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+ if (!sc->sr.rmap_cur)
+ return;
+
+ xchk_xref_has_no_rt_owner(sc, rgbno, blockcount);
+
+ if (rtb->next_free_rgbno < rgbno)
+ xchk_xref_has_rt_owner(sc, rtb->next_free_rgbno,
+ rgbno - rtb->next_free_rgbno);
+ rtb->next_free_rgbno = rgbno + blockcount;
+}
/* Scrub a free extent record from the realtime bitmap. */
STATIC int
@@ -83,7 +120,8 @@ xchk_rtbitmap_rec(
const struct xfs_rtalloc_rec *rec,
void *priv)
{
- struct xfs_scrub *sc = priv;
+ struct xchk_rtbitmap *rtb = priv;
+ struct xfs_scrub *sc = rtb->sc;
xfs_rtblock_t startblock;
xfs_filblks_t blockcount;
@@ -92,6 +130,12 @@ xchk_rtbitmap_rec(
if (!xfs_verify_rtbext(rtg_mount(rtg), startblock, blockcount))
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+
+ xchk_rtbitmap_xref(rtb, startblock, blockcount);
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return -ECANCELED;
+
return 0;
}
@@ -139,15 +183,16 @@ xchk_rtbitmap_check_extents(
return error;
}
-/* Scrub the realtime bitmap. */
+/* Scrub this group's realtime bitmap. */
int
xchk_rtbitmap(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
struct xfs_rtgroup *rtg = sc->sr.rtg;
- struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
+ struct xfs_inode *rbmip = rtg_bitmap(rtg);
struct xchk_rtbitmap *rtb = sc->buf;
+ xfs_rgblock_t last_rgbno;
int error;
/* Is sb_rextents correct? */
@@ -200,10 +245,20 @@ xchk_rtbitmap(
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
return error;
- error = xfs_rtalloc_query_all(rtg, sc->tp, xchk_rtbitmap_rec, sc);
+ rtb->next_free_rgbno = 0;
+ error = xfs_rtalloc_query_all(rtg, sc->tp, xchk_rtbitmap_rec, rtb);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
return error;
+ /*
+ * Check that the are rmappings for all rt extents between the end of
+ * the last free extent we saw and the last possible extent in the rt
+ * group.
+ */
+ last_rgbno = rtg->rtg_extents * mp->m_sb.sb_rextsize - 1;
+ if (rtb->next_free_rgbno < last_rgbno)
+ xchk_xref_has_rt_owner(sc, rtb->next_free_rgbno,
+ last_rgbno - rtb->next_free_rgbno);
return 0;
}
@@ -215,7 +270,7 @@ xchk_xref_is_used_rt_space(
xfs_extlen_t len)
{
struct xfs_rtgroup *rtg = sc->sr.rtg;
- struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
+ struct xfs_inode *rbmip = rtg_bitmap(rtg);
xfs_rtxnum_t startext;
xfs_rtxnum_t endext;
bool is_free;
diff --git a/fs/xfs/scrub/rtbitmap.h b/fs/xfs/scrub/rtbitmap.h
index 85304ff019e1..fe52b877253d 100644
--- a/fs/xfs/scrub/rtbitmap.h
+++ b/fs/xfs/scrub/rtbitmap.h
@@ -6,17 +6,72 @@
#ifndef __XFS_SCRUB_RTBITMAP_H__
#define __XFS_SCRUB_RTBITMAP_H__
+/*
+ * We use an xfile to construct new bitmap blocks for the portion of the
+ * rtbitmap file that we're replacing. Whereas the ondisk bitmap must be
+ * accessed through the buffer cache, the xfile bitmap supports direct
+ * word-level accesses. Therefore, we create a small abstraction for linear
+ * access.
+ */
+typedef unsigned long long xrep_wordoff_t;
+typedef unsigned int xrep_wordcnt_t;
+
+/* Mask to round an rtx down to the nearest bitmap word. */
+#define XREP_RTBMP_WORDMASK ((1ULL << XFS_NBWORDLOG) - 1)
+
+
struct xchk_rtbitmap {
+ struct xfs_scrub *sc;
+
uint64_t rextents;
uint64_t rbmblocks;
unsigned int rextslog;
unsigned int resblks;
+
+ /* The next free rt group block number that we expect to see. */
+ xfs_rgblock_t next_free_rgbno;
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+ /* stuff for staging a new bitmap */
+ struct xfs_rtalloc_args args;
+ struct xrep_tempexch tempexch;
+#endif
+
+ /* The next rtgroup block we expect to see during our rtrmapbt walk. */
+ xfs_rgblock_t next_rgbno;
+
+ /* rtgroup lock flags */
+ unsigned int rtglock_flags;
+
+ /* rtword position of xfile as we write buffers to disk. */
+ xrep_wordoff_t prep_wordoff;
+
+ /* In-Memory rtbitmap for repair. */
+ union xfs_rtword_raw words[];
};
#ifdef CONFIG_XFS_ONLINE_REPAIR
int xrep_setup_rtbitmap(struct xfs_scrub *sc, struct xchk_rtbitmap *rtb);
+
+/*
+ * How big should the words[] buffer be?
+ *
+ * For repairs, we want a full fsblock worth of space so that we can memcpy a
+ * buffer full of 1s into the xfile bitmap. The xfile bitmap doesn't have
+ * rtbitmap block headers, so we don't use blockwsize. Scrub doesn't use the
+ * words buffer at all.
+ */
+static inline unsigned int
+xchk_rtbitmap_wordcnt(
+ struct xfs_scrub *sc)
+{
+ if (xchk_could_repair(sc))
+ return sc->mp->m_sb.sb_blocksize >> XFS_WORDLOG;
+ return 0;
+}
#else
# define xrep_setup_rtbitmap(sc, rtb) (0)
+# define xchk_rtbitmap_wordcnt(sc) (0)
#endif /* CONFIG_XFS_ONLINE_REPAIR */
#endif /* __XFS_SCRUB_RTBITMAP_H__ */
diff --git a/fs/xfs/scrub/rtbitmap_repair.c b/fs/xfs/scrub/rtbitmap_repair.c
index 0fef98e9f834..c6e33834c5ae 100644
--- a/fs/xfs/scrub/rtbitmap_repair.c
+++ b/fs/xfs/scrub/rtbitmap_repair.c
@@ -12,32 +12,65 @@
#include "xfs_btree.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
+#include "xfs_rtalloc.h"
#include "xfs_inode.h"
#include "xfs_bit.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
+#include "xfs_rmap.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_exchmaps.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
+#include "xfs_extent_busy.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/xfile.h"
+#include "scrub/tempfile.h"
+#include "scrub/tempexch.h"
+#include "scrub/reap.h"
#include "scrub/rtbitmap.h"
-/* Set up to repair the realtime bitmap file metadata. */
+/* rt bitmap content repairs */
+
+/* Set up to repair the realtime bitmap for this group. */
int
xrep_setup_rtbitmap(
struct xfs_scrub *sc,
struct xchk_rtbitmap *rtb)
{
struct xfs_mount *mp = sc->mp;
- unsigned long long blocks = 0;
+ char *descr;
+ unsigned long long blocks = mp->m_sb.sb_rbmblocks;
+ int error;
+
+ error = xrep_tempfile_create(sc, S_IFREG);
+ if (error)
+ return error;
+
+ /* Create an xfile to hold our reconstructed bitmap. */
+ descr = xchk_xfile_rtgroup_descr(sc, "bitmap file");
+ error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile);
+ kfree(descr);
+ if (error)
+ return error;
/*
- * Reserve enough blocks to write out a completely new bmbt for a
- * maximally fragmented bitmap file. We do not hold the rtbitmap
- * ILOCK yet, so this is entirely speculative.
+ * Reserve enough blocks to write out a completely new bitmap file,
+ * plus twice as many blocks as we would need if we can only allocate
+ * one block per data fork mapping. This should cover the
+ * preallocation of the temporary file and exchanging the extent
+ * mappings.
+ *
+ * We cannot use xfs_exchmaps_estimate because we have not yet
+ * constructed the replacement bitmap and therefore do not know how
+ * many extents it will use. By the time we do, we will have a dirty
+ * transaction (which we cannot drop because we cannot drop the
+ * rtbitmap ILOCK) and cannot ask for more reservation.
*/
- blocks = xfs_bmbt_calc_size(mp, mp->m_sb.sb_rbmblocks);
+ blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
if (blocks > UINT_MAX)
return -EOPNOTSUPP;
@@ -45,6 +78,304 @@ xrep_setup_rtbitmap(
return 0;
}
+static inline xrep_wordoff_t
+rtx_to_wordoff(
+ struct xfs_mount *mp,
+ xfs_rtxnum_t rtx)
+{
+ return rtx >> XFS_NBWORDLOG;
+}
+
+static inline xrep_wordcnt_t
+rtxlen_to_wordcnt(
+ xfs_rtxlen_t rtxlen)
+{
+ return rtxlen >> XFS_NBWORDLOG;
+}
+
+/* Helper functions to record rtwords in an xfile. */
+
+static inline int
+xfbmp_load(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ xfs_rtword_t *word)
+{
+ union xfs_rtword_raw urk;
+ int error;
+
+ ASSERT(xfs_has_rtgroups(rtb->sc->mp));
+
+ error = xfile_load(rtb->sc->xfile, &urk,
+ sizeof(union xfs_rtword_raw),
+ wordoff << XFS_WORDLOG);
+ if (error)
+ return error;
+
+ *word = be32_to_cpu(urk.rtg);
+ return 0;
+}
+
+static inline int
+xfbmp_store(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ const xfs_rtword_t word)
+{
+ union xfs_rtword_raw urk;
+
+ ASSERT(xfs_has_rtgroups(rtb->sc->mp));
+
+ urk.rtg = cpu_to_be32(word);
+ return xfile_store(rtb->sc->xfile, &urk,
+ sizeof(union xfs_rtword_raw),
+ wordoff << XFS_WORDLOG);
+}
+
+static inline int
+xfbmp_copyin(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ const union xfs_rtword_raw *word,
+ xrep_wordcnt_t nr_words)
+{
+ return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
+ wordoff << XFS_WORDLOG);
+}
+
+static inline int
+xfbmp_copyout(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ union xfs_rtword_raw *word,
+ xrep_wordcnt_t nr_words)
+{
+ return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
+ wordoff << XFS_WORDLOG);
+}
+
+/* Perform a logical OR operation on an rtword in the incore bitmap. */
+static int
+xrep_rtbitmap_or(
+ struct xchk_rtbitmap *rtb,
+ xrep_wordoff_t wordoff,
+ xfs_rtword_t mask)
+{
+ xfs_rtword_t word;
+ int error;
+
+ error = xfbmp_load(rtb, wordoff, &word);
+ if (error)
+ return error;
+
+ trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word);
+
+ return xfbmp_store(rtb, wordoff, word | mask);
+}
+
+/*
+ * Mark as free every rt extent between the next rt block we expected to see
+ * in the rtrmap records and the given rt block.
+ */
+STATIC int
+xrep_rtbitmap_mark_free(
+ struct xchk_rtbitmap *rtb,
+ xfs_rgblock_t rgbno)
+{
+ struct xfs_mount *mp = rtb->sc->mp;
+ struct xfs_rtgroup *rtg = rtb->sc->sr.rtg;
+ xfs_rtxnum_t startrtx;
+ xfs_rtxnum_t nextrtx;
+ xrep_wordoff_t wordoff, nextwordoff;
+ unsigned int bit;
+ unsigned int bufwsize;
+ xfs_extlen_t mod;
+ xfs_rtword_t mask;
+ int error;
+
+ if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno))
+ return -EFSCORRUPTED;
+
+ /*
+ * Convert rt blocks to rt extents The block range we find must be
+ * aligned to an rtextent boundary on both ends.
+ */
+ startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno);
+ mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno);
+ if (mod)
+ return -EFSCORRUPTED;
+
+ nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1;
+ mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1);
+ if (mod != mp->m_sb.sb_rextsize - 1)
+ return -EFSCORRUPTED;
+
+ trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1);
+
+ /* Set bits as needed to round startrtx up to the nearest word. */
+ bit = startrtx & XREP_RTBMP_WORDMASK;
+ if (bit) {
+ xfs_rtblock_t len = nextrtx - startrtx;
+ unsigned int lastbit;
+
+ lastbit = min(bit + len, XFS_NBWORD);
+ mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;
+
+ error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx),
+ mask);
+ if (error || lastbit - bit == len)
+ return error;
+ startrtx += XFS_NBWORD - bit;
+ }
+
+ /* Set bits as needed to round nextrtx down to the nearest word. */
+ bit = nextrtx & XREP_RTBMP_WORDMASK;
+ if (bit) {
+ mask = ((xfs_rtword_t)1 << bit) - 1;
+
+ error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx),
+ mask);
+ if (error || startrtx + bit == nextrtx)
+ return error;
+ nextrtx -= bit;
+ }
+
+ trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1);
+
+ /* Set all the words in between, up to a whole fs block at once. */
+ wordoff = rtx_to_wordoff(mp, startrtx);
+ nextwordoff = rtx_to_wordoff(mp, nextrtx);
+ bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG;
+
+ while (wordoff < nextwordoff) {
+ xrep_wordoff_t rem;
+ xrep_wordcnt_t wordcnt;
+
+ wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff,
+ bufwsize);
+
+ /*
+ * Try to keep us aligned to the rtwords buffer to reduce the
+ * number of xfile writes.
+ */
+ rem = wordoff & (bufwsize - 1);
+ if (rem)
+ wordcnt = min_t(xrep_wordcnt_t, wordcnt,
+ bufwsize - rem);
+
+ error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt);
+ if (error)
+ return error;
+
+ wordoff += wordcnt;
+ }
+
+ return 0;
+}
+
+/* Set free space in the rtbitmap based on rtrmapbt records. */
+STATIC int
+xrep_rtbitmap_walk_rtrmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xchk_rtbitmap *rtb = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(rtb->sc, &error))
+ return error;
+
+ if (rtb->next_rgbno < rec->rm_startblock) {
+ error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock);
+ if (error)
+ return error;
+ }
+
+ rtb->next_rgbno = max(rtb->next_rgbno,
+ rec->rm_startblock + rec->rm_blockcount);
+ return 0;
+}
+
+/*
+ * Walk the rtrmapbt to find all the gaps between records, and mark the gaps
+ * in the realtime bitmap that we're computing.
+ */
+STATIC int
+xrep_rtbitmap_find_freespace(
+ struct xchk_rtbitmap *rtb)
+{
+ struct xfs_scrub *sc = rtb->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_rtgroup *rtg = sc->sr.rtg;
+ uint64_t blockcount;
+ int error;
+
+ /* Prepare a buffer of ones so that we can accelerate bulk setting. */
+ memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize);
+
+ xrep_rtgroup_btcur_init(sc, &sc->sr);
+ error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap,
+ rtb);
+ if (error)
+ goto out;
+
+ /*
+ * Mark as free every possible rt extent from the last one we saw to
+ * the end of the rt group.
+ */
+ blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize;
+ if (rtb->next_rgbno < blockcount) {
+ error = xrep_rtbitmap_mark_free(rtb, blockcount);
+ if (error)
+ goto out;
+ }
+
+out:
+ xchk_rtgroup_btcur_free(&sc->sr);
+ return error;
+}
+
+static int
+xrep_rtbitmap_prep_buf(
+ struct xfs_scrub *sc,
+ struct xfs_buf *bp,
+ void *data)
+{
+ struct xchk_rtbitmap *rtb = data;
+ struct xfs_mount *mp = sc->mp;
+ union xfs_rtword_raw *ondisk;
+ int error;
+
+ rtb->args.mp = sc->mp;
+ rtb->args.tp = sc->tp;
+ rtb->args.rbmbp = bp;
+ ondisk = xfs_rbmblock_wordptr(&rtb->args, 0);
+ rtb->args.rbmbp = NULL;
+
+ error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk,
+ mp->m_blockwsize);
+ if (error)
+ return error;
+
+ if (xfs_has_rtgroups(sc->mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
+ hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
+ hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
+ hdr->rt_lsn = 0;
+ uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
+ bp->b_ops = &xfs_rtbitmap_buf_ops;
+ } else {
+ bp->b_ops = &xfs_rtbuf_ops;
+ }
+
+ rtb->prep_wordoff += mp->m_blockwsize;
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF);
+ return 0;
+}
+
/*
* Make sure that the given range of the data fork of the realtime file is
* mapped to written blocks. The caller must ensure that the inode is joined
@@ -160,9 +491,18 @@ xrep_rtbitmap(
{
struct xchk_rtbitmap *rtb = sc->buf;
struct xfs_mount *mp = sc->mp;
+ struct xfs_group *xg = rtg_group(sc->sr.rtg);
unsigned long long blocks = 0;
+ unsigned int busy_gen;
int error;
+ /* We require the realtime rmapbt to rebuild anything. */
+ if (!xfs_has_rtrmapbt(sc->mp))
+ return -EOPNOTSUPP;
+ /* We require atomic file exchange range to rebuild anything. */
+ if (!xfs_has_exchange_range(sc->mp))
+ return -EOPNOTSUPP;
+
/* Impossibly large rtbitmap means we can't touch the filesystem. */
if (rtb->rbmblocks > U32_MAX)
return 0;
@@ -195,6 +535,79 @@ xrep_rtbitmap(
if (error)
return error;
- /* Fix inconsistent bitmap geometry */
- return xrep_rtbitmap_geometry(sc, rtb);
+ /*
+ * Fix inconsistent bitmap geometry. This function returns with a
+ * clean scrub transaction.
+ */
+ error = xrep_rtbitmap_geometry(sc, rtb);
+ if (error)
+ return error;
+
+ /*
+ * Make sure the busy extent list is clear because we can't put extents
+ * on there twice.
+ */
+ if (!xfs_extent_busy_list_empty(xg, &busy_gen)) {
+ error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Generate the new rtbitmap data. We don't need the rtbmp information
+ * once this call is finished.
+ */
+ error = xrep_rtbitmap_find_freespace(rtb);
+ if (error)
+ return error;
+
+ /*
+ * Try to take ILOCK_EXCL of the temporary file. We had better be the
+ * only ones holding onto this inode, but we can't block while holding
+ * the rtbitmap file's ILOCK_EXCL.
+ */
+ while (!xrep_tempfile_ilock_nowait(sc)) {
+ if (xchk_should_terminate(sc, &error))
+ return error;
+ delay(1);
+ }
+
+ /*
+ * Make sure we have space allocated for the part of the bitmap
+ * file that corresponds to this group. We already joined sc->ip.
+ */
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+ error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks);
+ if (error)
+ return error;
+
+ /* Last chance to abort before we start committing fixes. */
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ /* Copy the bitmap file that we generated. */
+ error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks,
+ xrep_rtbitmap_prep_buf, rtb);
+ if (error)
+ return error;
+ error = xrep_tempfile_set_isize(sc,
+ XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks));
+ if (error)
+ return error;
+
+ /*
+ * Now exchange the data fork contents. We're done with the temporary
+ * buffer, so we can reuse it for the tempfile exchmaps information.
+ */
+ error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
+ rtb->rbmblocks, &rtb->tempexch);
+ if (error)
+ return error;
+
+ error = xrep_tempexch_contents(sc, &rtb->tempexch);
+ if (error)
+ return error;
+
+ /* Free the old rtbitmap blocks if they're not in use. */
+ return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
}
diff --git a/fs/xfs/scrub/rtrmap.c b/fs/xfs/scrub/rtrmap.c
new file mode 100644
index 000000000000..300a1e85b3d6
--- /dev/null
+++ b/fs/xfs/scrub/rtrmap.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_rtalloc.h"
+#include "xfs_rtgroup.h"
+#include "xfs_metafile.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+
+/* Set us up with the realtime metadata locked. */
+int
+xchk_setup_rtrmapbt(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
+ if (xchk_could_repair(sc)) {
+ error = xrep_setup_rtrmapbt(sc);
+ if (error)
+ return error;
+ }
+
+ error = xchk_rtgroup_init(sc, sc->sm->sm_agno, &sc->sr);
+ if (error)
+ return error;
+
+ error = xchk_setup_rt(sc);
+ if (error)
+ return error;
+
+ error = xchk_install_live_inode(sc, rtg_rmap(sc->sr.rtg));
+ if (error)
+ return error;
+
+ return xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
+}
+
+/* Realtime reverse mapping. */
+
+struct xchk_rtrmap {
+ /*
+ * The furthest-reaching of the rmapbt records that we've already
+ * processed. This enables us to detect overlapping records for space
+ * allocations that cannot be shared.
+ */
+ struct xfs_rmap_irec overlap_rec;
+
+ /*
+ * The previous rmapbt record, so that we can check for two records
+ * that could be one.
+ */
+ struct xfs_rmap_irec prev_rec;
+};
+
+/* Flag failures for records that overlap but cannot. */
+STATIC void
+xchk_rtrmapbt_check_overlapping(
+ struct xchk_btree *bs,
+ struct xchk_rtrmap *cr,
+ const struct xfs_rmap_irec *irec)
+{
+ xfs_rtblock_t pnext, inext;
+
+ if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ /* No previous record? */
+ if (cr->overlap_rec.rm_blockcount == 0)
+ goto set_prev;
+
+ /* Do overlap_rec and irec overlap? */
+ pnext = cr->overlap_rec.rm_startblock + cr->overlap_rec.rm_blockcount;
+ if (pnext <= irec->rm_startblock)
+ goto set_prev;
+
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+
+ /* Save whichever rmap record extends furthest. */
+ inext = irec->rm_startblock + irec->rm_blockcount;
+ if (pnext > inext)
+ return;
+
+set_prev:
+ memcpy(&cr->overlap_rec, irec, sizeof(struct xfs_rmap_irec));
+}
+
+/* Decide if two reverse-mapping records can be merged. */
+static inline bool
+xchk_rtrmap_mergeable(
+ struct xchk_rtrmap *cr,
+ const struct xfs_rmap_irec *r2)
+{
+ const struct xfs_rmap_irec *r1 = &cr->prev_rec;
+
+ /* Ignore if prev_rec is not yet initialized. */
+ if (cr->prev_rec.rm_blockcount == 0)
+ return false;
+
+ if (r1->rm_owner != r2->rm_owner)
+ return false;
+ if (r1->rm_startblock + r1->rm_blockcount != r2->rm_startblock)
+ return false;
+ if ((unsigned long long)r1->rm_blockcount + r2->rm_blockcount >
+ XFS_RMAP_LEN_MAX)
+ return false;
+ if (r1->rm_flags != r2->rm_flags)
+ return false;
+ return r1->rm_offset + r1->rm_blockcount == r2->rm_offset;
+}
+
+/* Flag failures for records that could be merged. */
+STATIC void
+xchk_rtrmapbt_check_mergeable(
+ struct xchk_btree *bs,
+ struct xchk_rtrmap *cr,
+ const struct xfs_rmap_irec *irec)
+{
+ if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ if (xchk_rtrmap_mergeable(cr, irec))
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+
+ memcpy(&cr->prev_rec, irec, sizeof(struct xfs_rmap_irec));
+}
+
+/* Cross-reference with other metadata. */
+STATIC void
+xchk_rtrmapbt_xref(
+ struct xfs_scrub *sc,
+ struct xfs_rmap_irec *irec)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xchk_xref_is_used_rt_space(sc,
+ xfs_rgbno_to_rtb(sc->sr.rtg, irec->rm_startblock),
+ irec->rm_blockcount);
+}
+
+/* Scrub a realtime rmapbt record. */
+STATIC int
+xchk_rtrmapbt_rec(
+ struct xchk_btree *bs,
+ const union xfs_btree_rec *rec)
+{
+ struct xchk_rtrmap *cr = bs->private;
+ struct xfs_rmap_irec irec;
+
+ if (xfs_rmap_btrec_to_irec(rec, &irec) != NULL ||
+ xfs_rtrmap_check_irec(to_rtg(bs->cur->bc_group), &irec) != NULL) {
+ xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
+ return 0;
+ }
+
+ if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return 0;
+
+ xchk_rtrmapbt_check_mergeable(bs, cr, &irec);
+ xchk_rtrmapbt_check_overlapping(bs, cr, &irec);
+ xchk_rtrmapbt_xref(bs->sc, &irec);
+ return 0;
+}
+
+/* Scrub the realtime rmap btree. */
+int
+xchk_rtrmapbt(
+ struct xfs_scrub *sc)
+{
+ struct xfs_inode *ip = rtg_rmap(sc->sr.rtg);
+ struct xfs_owner_info oinfo;
+ struct xchk_rtrmap cr = { };
+ int error;
+
+ error = xchk_metadata_inode_forks(sc);
+ if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ return error;
+
+ xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, XFS_DATA_FORK);
+ return xchk_btree(sc, sc->sr.rmap_cur, xchk_rtrmapbt_rec, &oinfo, &cr);
+}
+
+/* xref check that the extent has no realtime reverse mapping at all */
+void
+xchk_xref_has_no_rt_owner(
+ struct xfs_scrub *sc,
+ xfs_rgblock_t bno,
+ xfs_extlen_t len)
+{
+ enum xbtree_recpacking outcome;
+ int error;
+
+ if (!sc->sr.rmap_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ error = xfs_rmap_has_records(sc->sr.rmap_cur, bno, len, &outcome);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.rmap_cur))
+ return;
+ if (outcome != XBTREE_RECPACKING_EMPTY)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+}
+
+/* xref check that the extent is completely mapped */
+void
+xchk_xref_has_rt_owner(
+ struct xfs_scrub *sc,
+ xfs_rgblock_t bno,
+ xfs_extlen_t len)
+{
+ enum xbtree_recpacking outcome;
+ int error;
+
+ if (!sc->sr.rmap_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ error = xfs_rmap_has_records(sc->sr.rmap_cur, bno, len, &outcome);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.rmap_cur))
+ return;
+ if (outcome != XBTREE_RECPACKING_FULL)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+}
+
+/* xref check that the extent is only owned by a given owner */
+void
+xchk_xref_is_only_rt_owned_by(
+ struct xfs_scrub *sc,
+ xfs_agblock_t bno,
+ xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo)
+{
+ struct xfs_rmap_matches res;
+ int error;
+
+ if (!sc->sr.rmap_cur || xchk_skip_xref(sc->sm))
+ return;
+
+ error = xfs_rmap_count_owners(sc->sr.rmap_cur, bno, len, oinfo, &res);
+ if (!xchk_should_check_xref(sc, &error, &sc->sr.rmap_cur))
+ return;
+ if (res.matches != 1)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+ if (res.bad_non_owner_matches)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+ if (res.non_owner_matches)
+ xchk_btree_xref_set_corrupt(sc, sc->sr.rmap_cur, 0);
+}
diff --git a/fs/xfs/scrub/rtrmap_repair.c b/fs/xfs/scrub/rtrmap_repair.c
new file mode 100644
index 000000000000..49de8bc2dd17
--- /dev/null
+++ b/fs/xfs/scrub/rtrmap_repair.c
@@ -0,0 +1,903 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_btree_staging.h"
+#include "xfs_buf_mem.h"
+#include "xfs_btree_mem.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_alloc.h"
+#include "xfs_rmap.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_rtrmap_btree.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_quota.h"
+#include "xfs_rtalloc.h"
+#include "xfs_ag.h"
+#include "xfs_rtgroup.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/btree.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/bitmap.h"
+#include "scrub/fsb_bitmap.h"
+#include "scrub/xfile.h"
+#include "scrub/xfarray.h"
+#include "scrub/iscan.h"
+#include "scrub/newbt.h"
+#include "scrub/reap.h"
+
+/*
+ * Realtime Reverse Mapping Btree Repair
+ * =====================================
+ *
+ * This isn't quite as difficult as repairing the rmap btree on the data
+ * device, since we only store the data fork extents of realtime files on the
+ * realtime device. We still have to freeze the filesystem and stop the
+ * background threads like we do for the rmap repair, but we only have to scan
+ * realtime inodes.
+ *
+ * Collecting entries for the new realtime rmap btree is easy -- all we have
+ * to do is generate rtrmap entries from the data fork mappings of all realtime
+ * files in the filesystem. We then scan the rmap btrees of the data device
+ * looking for extents belonging to the old btree and note them in a bitmap.
+ *
+ * To rebuild the realtime rmap btree, we bulk-load the collected mappings into
+ * a new btree cursor and atomically swap that into the realtime inode. Then
+ * we can free the blocks from the old btree.
+ *
+ * We use the 'xrep_rtrmap' prefix for all the rmap functions.
+ */
+
+/* Context for collecting rmaps */
+struct xrep_rtrmap {
+ /* new rtrmapbt information */
+ struct xrep_newbt new_btree;
+
+ /* lock for the xfbtree and xfile */
+ struct mutex lock;
+
+ /* rmap records generated from primary metadata */
+ struct xfbtree rtrmap_btree;
+
+ struct xfs_scrub *sc;
+
+ /* bitmap of old rtrmapbt blocks */
+ struct xfsb_bitmap old_rtrmapbt_blocks;
+
+ /* Hooks into rtrmap update code. */
+ struct xfs_rmap_hook rhook;
+
+ /* inode scan cursor */
+ struct xchk_iscan iscan;
+
+ /* in-memory btree cursor for the ->get_blocks walk */
+ struct xfs_btree_cur *mcur;
+
+ /* Number of records we're staging in the new btree. */
+ uint64_t nr_records;
+};
+
+/* Set us up to repair rt reverse mapping btrees. */
+int
+xrep_setup_rtrmapbt(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rtrmap *rr;
+ char *descr;
+ int error;
+
+ xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP);
+
+ descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records");
+ error = xrep_setup_xfbtree(sc, descr);
+ kfree(descr);
+ if (error)
+ return error;
+
+ rr = kzalloc(sizeof(struct xrep_rtrmap), XCHK_GFP_FLAGS);
+ if (!rr)
+ return -ENOMEM;
+
+ rr->sc = sc;
+ sc->buf = rr;
+ return 0;
+}
+
+/* Make sure there's nothing funny about this mapping. */
+STATIC int
+xrep_rtrmap_check_mapping(
+ struct xfs_scrub *sc,
+ const struct xfs_rmap_irec *rec)
+{
+ if (xfs_rtrmap_check_irec(sc->sr.rtg, rec) != NULL)
+ return -EFSCORRUPTED;
+
+ /* Make sure this isn't free space. */
+ return xrep_require_rtext_inuse(sc, rec->rm_startblock,
+ rec->rm_blockcount);
+}
+
+/* Store a reverse-mapping record. */
+static inline int
+xrep_rtrmap_stash(
+ struct xrep_rtrmap *rr,
+ xfs_rgblock_t startblock,
+ xfs_extlen_t blockcount,
+ uint64_t owner,
+ uint64_t offset,
+ unsigned int flags)
+{
+ struct xfs_rmap_irec rmap = {
+ .rm_startblock = startblock,
+ .rm_blockcount = blockcount,
+ .rm_owner = owner,
+ .rm_offset = offset,
+ .rm_flags = flags,
+ };
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_btree_cur *mcur;
+ int error = 0;
+
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ if (xchk_iscan_aborted(&rr->iscan))
+ return -EFSCORRUPTED;
+
+ trace_xrep_rtrmap_found(sc->mp, &rmap);
+
+ /* Add entry to in-memory btree. */
+ mutex_lock(&rr->lock);
+ mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, sc->tp, &rr->rtrmap_btree);
+ error = xfs_rmap_map_raw(mcur, &rmap);
+ xfs_btree_del_cursor(mcur, error);
+ if (error)
+ goto out_cancel;
+
+ error = xfbtree_trans_commit(&rr->rtrmap_btree, sc->tp);
+ if (error)
+ goto out_abort;
+
+ mutex_unlock(&rr->lock);
+ return 0;
+
+out_cancel:
+ xfbtree_trans_cancel(&rr->rtrmap_btree, sc->tp);
+out_abort:
+ xchk_iscan_abort(&rr->iscan);
+ mutex_unlock(&rr->lock);
+ return error;
+}
+
+/* Finding all file and bmbt extents. */
+
+/* Context for accumulating rmaps for an inode fork. */
+struct xrep_rtrmap_ifork {
+ /*
+ * Accumulate rmap data here to turn multiple adjacent bmaps into a
+ * single rmap.
+ */
+ struct xfs_rmap_irec accum;
+
+ struct xrep_rtrmap *rr;
+};
+
+/* Stash an rmap that we accumulated while walking an inode fork. */
+STATIC int
+xrep_rtrmap_stash_accumulated(
+ struct xrep_rtrmap_ifork *rf)
+{
+ if (rf->accum.rm_blockcount == 0)
+ return 0;
+
+ return xrep_rtrmap_stash(rf->rr, rf->accum.rm_startblock,
+ rf->accum.rm_blockcount, rf->accum.rm_owner,
+ rf->accum.rm_offset, rf->accum.rm_flags);
+}
+
+/* Accumulate a bmbt record. */
+STATIC int
+xrep_rtrmap_visit_bmbt(
+ struct xfs_btree_cur *cur,
+ struct xfs_bmbt_irec *rec,
+ void *priv)
+{
+ struct xrep_rtrmap_ifork *rf = priv;
+ struct xfs_rmap_irec *accum = &rf->accum;
+ struct xfs_mount *mp = rf->rr->sc->mp;
+ xfs_rgblock_t rgbno;
+ unsigned int rmap_flags = 0;
+ int error;
+
+ if (xfs_rtb_to_rgno(mp, rec->br_startblock) !=
+ rtg_rgno(rf->rr->sc->sr.rtg))
+ return 0;
+
+ if (rec->br_state == XFS_EXT_UNWRITTEN)
+ rmap_flags |= XFS_RMAP_UNWRITTEN;
+
+ /* If this bmap is adjacent to the previous one, just add it. */
+ rgbno = xfs_rtb_to_rgbno(mp, rec->br_startblock);
+ if (accum->rm_blockcount > 0 &&
+ rec->br_startoff == accum->rm_offset + accum->rm_blockcount &&
+ rgbno == accum->rm_startblock + accum->rm_blockcount &&
+ rmap_flags == accum->rm_flags) {
+ accum->rm_blockcount += rec->br_blockcount;
+ return 0;
+ }
+
+ /* Otherwise stash the old rmap and start accumulating a new one. */
+ error = xrep_rtrmap_stash_accumulated(rf);
+ if (error)
+ return error;
+
+ accum->rm_startblock = rgbno;
+ accum->rm_blockcount = rec->br_blockcount;
+ accum->rm_offset = rec->br_startoff;
+ accum->rm_flags = rmap_flags;
+ return 0;
+}
+
+/*
+ * Iterate the block mapping btree to collect rmap records for anything in this
+ * fork that maps to the rt volume. Sets @mappings_done to true if we've
+ * scanned the block mappings in this fork.
+ */
+STATIC int
+xrep_rtrmap_scan_bmbt(
+ struct xrep_rtrmap_ifork *rf,
+ struct xfs_inode *ip,
+ bool *mappings_done)
+{
+ struct xrep_rtrmap *rr = rf->rr;
+ struct xfs_btree_cur *cur;
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
+ int error = 0;
+
+ *mappings_done = false;
+
+ /*
+ * If the incore extent cache is already loaded, we'll just use the
+ * incore extent scanner to record mappings. Don't bother walking the
+ * ondisk extent tree.
+ */
+ if (!xfs_need_iread_extents(ifp))
+ return 0;
+
+ /* Accumulate all the mappings in the bmap btree. */
+ cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, XFS_DATA_FORK);
+ error = xfs_bmap_query_all(cur, xrep_rtrmap_visit_bmbt, rf);
+ xfs_btree_del_cursor(cur, error);
+ if (error)
+ return error;
+
+ /* Stash any remaining accumulated rmaps and exit. */
+ *mappings_done = true;
+ return xrep_rtrmap_stash_accumulated(rf);
+}
+
+/*
+ * Iterate the in-core extent cache to collect rmap records for anything in
+ * this fork that matches the AG.
+ */
+STATIC int
+xrep_rtrmap_scan_iext(
+ struct xrep_rtrmap_ifork *rf,
+ struct xfs_ifork *ifp)
+{
+ struct xfs_bmbt_irec rec;
+ struct xfs_iext_cursor icur;
+ int error;
+
+ for_each_xfs_iext(ifp, &icur, &rec) {
+ if (isnullstartblock(rec.br_startblock))
+ continue;
+ error = xrep_rtrmap_visit_bmbt(NULL, &rec, rf);
+ if (error)
+ return error;
+ }
+
+ return xrep_rtrmap_stash_accumulated(rf);
+}
+
+/* Find all the extents on the realtime device mapped by an inode fork. */
+STATIC int
+xrep_rtrmap_scan_dfork(
+ struct xrep_rtrmap *rr,
+ struct xfs_inode *ip)
+{
+ struct xrep_rtrmap_ifork rf = {
+ .accum = { .rm_owner = ip->i_ino, },
+ .rr = rr,
+ };
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
+ int error = 0;
+
+ if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
+ bool mappings_done;
+
+ /*
+ * Scan the bmbt for mappings. If the incore extent tree is
+ * loaded, we want to scan the cached mappings since that's
+ * faster when the extent counts are very high.
+ */
+ error = xrep_rtrmap_scan_bmbt(&rf, ip, &mappings_done);
+ if (error || mappings_done)
+ return error;
+ } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) {
+ /* realtime data forks should only be extents or btree */
+ return -EFSCORRUPTED;
+ }
+
+ /* Scan incore extent cache. */
+ return xrep_rtrmap_scan_iext(&rf, ifp);
+}
+
+/* Record reverse mappings for a file. */
+STATIC int
+xrep_rtrmap_scan_inode(
+ struct xrep_rtrmap *rr,
+ struct xfs_inode *ip)
+{
+ unsigned int lock_mode;
+ int error = 0;
+
+ /* Skip the rt rmap btree inode. */
+ if (rr->sc->ip == ip)
+ return 0;
+
+ lock_mode = xfs_ilock_data_map_shared(ip);
+
+ /* Check the data fork if it's on the realtime device. */
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ error = xrep_rtrmap_scan_dfork(rr, ip);
+ if (error)
+ goto out_unlock;
+ }
+
+ xchk_iscan_mark_visited(&rr->iscan, ip);
+out_unlock:
+ xfs_iunlock(ip, lock_mode);
+ return error;
+}
+
+/* Record extents that belong to the realtime rmap inode. */
+STATIC int
+xrep_rtrmap_walk_rmap(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rtrmap *rr = priv;
+ int error = 0;
+
+ if (xchk_should_terminate(rr->sc, &error))
+ return error;
+
+ /* Skip extents which are not owned by this inode and fork. */
+ if (rec->rm_owner != rr->sc->ip->i_ino)
+ return 0;
+
+ error = xrep_check_ino_btree_mapping(rr->sc, rec);
+ if (error)
+ return error;
+
+ return xfsb_bitmap_set(&rr->old_rtrmapbt_blocks,
+ xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock),
+ rec->rm_blockcount);
+}
+
+/* Scan one AG for reverse mappings for the realtime rmap btree. */
+STATIC int
+xrep_rtrmap_scan_ag(
+ struct xrep_rtrmap *rr,
+ struct xfs_perag *pag)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ error = xrep_ag_init(sc, pag, &sc->sa);
+ if (error)
+ return error;
+
+ error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrmap_walk_rmap, rr);
+ xchk_ag_free(sc, &sc->sa);
+ return error;
+}
+
+/* Count and check all collected records. */
+STATIC int
+xrep_rtrmap_check_record(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xrep_rtrmap *rr = priv;
+ int error;
+
+ error = xrep_rtrmap_check_mapping(rr->sc, rec);
+ if (error)
+ return error;
+
+ rr->nr_records++;
+ return 0;
+}
+
+/* Generate all the reverse-mappings for the realtime device. */
+STATIC int
+xrep_rtrmap_find_rmaps(
+ struct xrep_rtrmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_perag *pag = NULL;
+ struct xfs_inode *ip;
+ struct xfs_btree_cur *mcur;
+ int error;
+
+ /* Generate rmaps for the realtime superblock */
+ if (xfs_has_rtsb(sc->mp) && rtg_rgno(rr->sc->sr.rtg) == 0) {
+ error = xrep_rtrmap_stash(rr, 0, sc->mp->m_sb.sb_rextsize,
+ XFS_RMAP_OWN_FS, 0, 0);
+ if (error)
+ return error;
+ }
+
+ /*
+ * Set up for a potentially lengthy filesystem scan by reducing our
+ * transaction resource usage for the duration. Specifically:
+ *
+ * Unlock the realtime metadata inodes and cancel the transaction to
+ * release the log grant space while we scan the filesystem.
+ *
+ * Create a new empty transaction to eliminate the possibility of the
+ * inode scan deadlocking on cyclical metadata.
+ *
+ * We pass the empty transaction to the file scanning function to avoid
+ * repeatedly cycling empty transactions. This can be done even though
+ * we take the IOLOCK to quiesce the file because empty transactions
+ * do not take sb_internal.
+ */
+ xchk_trans_cancel(sc);
+ xchk_rtgroup_unlock(&sc->sr);
+ error = xchk_trans_alloc_empty(sc);
+ if (error)
+ return error;
+
+ while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) {
+ error = xrep_rtrmap_scan_inode(rr, ip);
+ xchk_irele(sc, ip);
+ if (error)
+ break;
+
+ if (xchk_should_terminate(sc, &error))
+ break;
+ }
+ xchk_iscan_iter_finish(&rr->iscan);
+ if (error)
+ return error;
+
+ /*
+ * Switch out for a real transaction and lock the RT metadata in
+ * preparation for building a new tree.
+ */
+ xchk_trans_cancel(sc);
+ error = xchk_setup_rt(sc);
+ if (error)
+ return error;
+ error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL);
+ if (error)
+ return error;
+
+ /*
+ * If a hook failed to update the in-memory btree, we lack the data to
+ * continue the repair.
+ */
+ if (xchk_iscan_aborted(&rr->iscan))
+ return -EFSCORRUPTED;
+
+ /* Scan for old rtrmap blocks. */
+ while ((pag = xfs_perag_next(sc->mp, pag))) {
+ error = xrep_rtrmap_scan_ag(rr, pag);
+ if (error) {
+ xfs_perag_rele(pag);
+ return error;
+ }
+ }
+
+ /*
+ * Now that we have everything locked again, we need to count the
+ * number of rmap records stashed in the btree. This should reflect
+ * all actively-owned rt files in the filesystem. At the same time,
+ * check all our records before we start building a new btree, which
+ * requires the rtbitmap lock.
+ */
+ mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, &rr->rtrmap_btree);
+ rr->nr_records = 0;
+ error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr);
+ xfs_btree_del_cursor(mcur, error);
+
+ return error;
+}
+
+/* Building the new rtrmap btree. */
+
+/* Retrieve rtrmapbt data for bulk load. */
+STATIC int
+xrep_rtrmap_get_records(
+ struct xfs_btree_cur *cur,
+ unsigned int idx,
+ struct xfs_btree_block *block,
+ unsigned int nr_wanted,
+ void *priv)
+{
+ struct xrep_rtrmap *rr = priv;
+ union xfs_btree_rec *block_rec;
+ unsigned int loaded;
+ int error;
+
+ for (loaded = 0; loaded < nr_wanted; loaded++, idx++) {
+ int stat = 0;
+
+ error = xfs_btree_increment(rr->mcur, 0, &stat);
+ if (error)
+ return error;
+ if (!stat)
+ return -EFSCORRUPTED;
+
+ error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat);
+ if (error)
+ return error;
+ if (!stat)
+ return -EFSCORRUPTED;
+
+ block_rec = xfs_btree_rec_addr(cur, idx, block);
+ cur->bc_ops->init_rec_from_cur(cur, block_rec);
+ }
+
+ return loaded;
+}
+
+/* Feed one of the new btree blocks to the bulk loader. */
+STATIC int
+xrep_rtrmap_claim_block(
+ struct xfs_btree_cur *cur,
+ union xfs_btree_ptr *ptr,
+ void *priv)
+{
+ struct xrep_rtrmap *rr = priv;
+
+ return xrep_newbt_claim_block(cur, &rr->new_btree, ptr);
+}
+
+/* Figure out how much space we need to create the incore btree root block. */
+STATIC size_t
+xrep_rtrmap_iroot_size(
+ struct xfs_btree_cur *cur,
+ unsigned int level,
+ unsigned int nr_this_level,
+ void *priv)
+{
+ return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level);
+}
+
+/*
+ * Use the collected rmap information to stage a new rmap btree. If this is
+ * successful we'll return with the new btree root information logged to the
+ * repair transaction but not yet committed. This implements section (III)
+ * above.
+ */
+STATIC int
+xrep_rtrmap_build_new_tree(
+ struct xrep_rtrmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ struct xfs_rtgroup *rtg = sc->sr.rtg;
+ struct xfs_btree_cur *rmap_cur;
+ int error;
+
+ /*
+ * Prepare to construct the new btree by reserving disk space for the
+ * new btree and setting up all the accounting information we'll need
+ * to root the new btree while it's under construction and before we
+ * attach it to the realtime rmapbt inode.
+ */
+ error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc);
+ if (error)
+ return error;
+
+ rr->new_btree.bload.get_records = xrep_rtrmap_get_records;
+ rr->new_btree.bload.claim_block = xrep_rtrmap_claim_block;
+ rr->new_btree.bload.iroot_size = xrep_rtrmap_iroot_size;
+
+ rmap_cur = xfs_rtrmapbt_init_cursor(NULL, rtg);
+ xfs_btree_stage_ifakeroot(rmap_cur, &rr->new_btree.ifake);
+
+ /* Compute how many blocks we'll need for the rmaps collected. */
+ error = xfs_btree_bload_compute_geometry(rmap_cur,
+ &rr->new_btree.bload, rr->nr_records);
+ if (error)
+ goto err_cur;
+
+ /* Last chance to abort before we start committing fixes. */
+ if (xchk_should_terminate(sc, &error))
+ goto err_cur;
+
+ /*
+ * Guess how many blocks we're going to need to rebuild an entire
+ * rtrmapbt from the number of extents we found, and pump up our
+ * transaction to have sufficient block reservation. We're allowed
+ * to exceed quota to repair inconsistent metadata, though this is
+ * unlikely.
+ */
+ error = xfs_trans_reserve_more_inode(sc->tp, rtg_rmap(rtg),
+ rr->new_btree.bload.nr_blocks, 0, true);
+ if (error)
+ goto err_cur;
+
+ /* Reserve the space we'll need for the new btree. */
+ error = xrep_newbt_alloc_blocks(&rr->new_btree,
+ rr->new_btree.bload.nr_blocks);
+ if (error)
+ goto err_cur;
+
+ /*
+ * Create a cursor to the in-memory btree so that we can bulk load the
+ * new btree.
+ */
+ rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, &rr->rtrmap_btree);
+ error = xfs_btree_goto_left_edge(rr->mcur);
+ if (error)
+ goto err_mcur;
+
+ /* Add all observed rmap records. */
+ rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE;
+ error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr);
+ if (error)
+ goto err_mcur;
+
+ /*
+ * Install the new rtrmap btree in the inode. After this point the old
+ * btree is no longer accessible, the new tree is live, and we can
+ * delete the cursor.
+ */
+ xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp);
+ xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks);
+ xfs_btree_del_cursor(rmap_cur, 0);
+ xfs_btree_del_cursor(rr->mcur, 0);
+ rr->mcur = NULL;
+
+ /*
+ * Now that we've written the new btree to disk, we don't need to keep
+ * updating the in-memory btree. Abort the scan to stop live updates.
+ */
+ xchk_iscan_abort(&rr->iscan);
+
+ /* Dispose of any unused blocks and the accounting information. */
+ error = xrep_newbt_commit(&rr->new_btree);
+ if (error)
+ return error;
+
+ return xrep_roll_trans(sc);
+
+err_mcur:
+ xfs_btree_del_cursor(rr->mcur, error);
+err_cur:
+ xfs_btree_del_cursor(rmap_cur, error);
+ xrep_newbt_cancel(&rr->new_btree);
+ return error;
+}
+
+/* Reaping the old btree. */
+
+/* Reap the old rtrmapbt blocks. */
+STATIC int
+xrep_rtrmap_remove_old_tree(
+ struct xrep_rtrmap *rr)
+{
+ int error;
+
+ /*
+ * Free all the extents that were allocated to the former rtrmapbt and
+ * aren't cross-linked with something else.
+ */
+ error = xrep_reap_metadir_fsblocks(rr->sc, &rr->old_rtrmapbt_blocks);
+ if (error)
+ return error;
+
+ /*
+ * Ensure the proper reservation for the rtrmap inode so that we don't
+ * fail to expand the new btree.
+ */
+ return xrep_reset_metafile_resv(rr->sc);
+}
+
+static inline bool
+xrep_rtrmapbt_want_live_update(
+ struct xchk_iscan *iscan,
+ const struct xfs_owner_info *oi)
+{
+ if (xchk_iscan_aborted(iscan))
+ return false;
+
+ /*
+ * We scanned the CoW staging extents before we started the iscan, so
+ * we need all the updates.
+ */
+ if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner))
+ return true;
+
+ /* Ignore updates to files that the scanner hasn't visited yet. */
+ return xchk_iscan_want_live_update(iscan, oi->oi_owner);
+}
+
+/*
+ * Apply a rtrmapbt update from the regular filesystem into our shadow btree.
+ * We're running from the thread that owns the rtrmap ILOCK and is generating
+ * the update, so we must be careful about which parts of the struct
+ * xrep_rtrmap that we change.
+ */
+static int
+xrep_rtrmapbt_live_update(
+ struct notifier_block *nb,
+ unsigned long action,
+ void *data)
+{
+ struct xfs_rmap_update_params *p = data;
+ struct xrep_rtrmap *rr;
+ struct xfs_mount *mp;
+ struct xfs_btree_cur *mcur;
+ struct xfs_trans *tp;
+ void *txcookie;
+ int error;
+
+ rr = container_of(nb, struct xrep_rtrmap, rhook.rmap_hook.nb);
+ mp = rr->sc->mp;
+
+ if (!xrep_rtrmapbt_want_live_update(&rr->iscan, &p->oinfo))
+ goto out_unlock;
+
+ trace_xrep_rmap_live_update(rtg_group(rr->sc->sr.rtg), action, p);
+
+ error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
+ if (error)
+ goto out_abort;
+
+ mutex_lock(&rr->lock);
+ mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, &rr->rtrmap_btree);
+ error = __xfs_rmap_finish_intent(mcur, action, p->startblock,
+ p->blockcount, &p->oinfo, p->unwritten);
+ xfs_btree_del_cursor(mcur, error);
+ if (error)
+ goto out_cancel;
+
+ error = xfbtree_trans_commit(&rr->rtrmap_btree, tp);
+ if (error)
+ goto out_cancel;
+
+ xrep_trans_cancel_hook_dummy(&txcookie, tp);
+ mutex_unlock(&rr->lock);
+ return NOTIFY_DONE;
+
+out_cancel:
+ xfbtree_trans_cancel(&rr->rtrmap_btree, tp);
+ xrep_trans_cancel_hook_dummy(&txcookie, tp);
+out_abort:
+ xchk_iscan_abort(&rr->iscan);
+ mutex_unlock(&rr->lock);
+out_unlock:
+ return NOTIFY_DONE;
+}
+
+/* Set up the filesystem scan components. */
+STATIC int
+xrep_rtrmap_setup_scan(
+ struct xrep_rtrmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+ int error;
+
+ mutex_init(&rr->lock);
+ xfsb_bitmap_init(&rr->old_rtrmapbt_blocks);
+
+ /* Set up some storage */
+ error = xfs_rtrmapbt_mem_init(sc->mp, &rr->rtrmap_btree, sc->xmbtp,
+ rtg_rgno(sc->sr.rtg));
+ if (error)
+ goto out_bitmap;
+
+ /* Retry iget every tenth of a second for up to 30 seconds. */
+ xchk_iscan_start(sc, 30000, 100, &rr->iscan);
+
+ /*
+ * Hook into live rtrmap operations so that we can update our in-memory
+ * btree to reflect live changes on the filesystem. Since we drop the
+ * rtrmap ILOCK to scan all the inodes, we need this piece to avoid
+ * installing a stale btree.
+ */
+ ASSERT(sc->flags & XCHK_FSGATES_RMAP);
+ xfs_rmap_hook_setup(&rr->rhook, xrep_rtrmapbt_live_update);
+ error = xfs_rmap_hook_add(rtg_group(sc->sr.rtg), &rr->rhook);
+ if (error)
+ goto out_iscan;
+ return 0;
+
+out_iscan:
+ xchk_iscan_teardown(&rr->iscan);
+ xfbtree_destroy(&rr->rtrmap_btree);
+out_bitmap:
+ xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
+ mutex_destroy(&rr->lock);
+ return error;
+}
+
+/* Tear down scan components. */
+STATIC void
+xrep_rtrmap_teardown(
+ struct xrep_rtrmap *rr)
+{
+ struct xfs_scrub *sc = rr->sc;
+
+ xchk_iscan_abort(&rr->iscan);
+ xfs_rmap_hook_del(rtg_group(sc->sr.rtg), &rr->rhook);
+ xchk_iscan_teardown(&rr->iscan);
+ xfbtree_destroy(&rr->rtrmap_btree);
+ xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks);
+ mutex_destroy(&rr->lock);
+}
+
+/* Repair the realtime rmap btree. */
+int
+xrep_rtrmapbt(
+ struct xfs_scrub *sc)
+{
+ struct xrep_rtrmap *rr = sc->buf;
+ int error;
+
+ /* Make sure any problems with the fork are fixed. */
+ error = xrep_metadata_inode_forks(sc);
+ if (error)
+ return error;
+
+ error = xrep_rtrmap_setup_scan(rr);
+ if (error)
+ return error;
+
+ /* Collect rmaps for realtime files. */
+ error = xrep_rtrmap_find_rmaps(rr);
+ if (error)
+ goto out_records;
+
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+
+ /* Rebuild the rtrmap information. */
+ error = xrep_rtrmap_build_new_tree(rr);
+ if (error)
+ goto out_records;
+
+ /* Kill the old tree. */
+ error = xrep_rtrmap_remove_old_tree(rr);
+ if (error)
+ goto out_records;
+
+out_records:
+ xrep_rtrmap_teardown(rr);
+ return error;
+}
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index 49fc6250bafc..4ac679c1bd29 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -81,8 +81,7 @@ xchk_setup_rtsummary(
if (error)
return error;
- error = xchk_install_live_inode(sc,
- sc->sr.rtg->rtg_inodes[XFS_RTGI_SUMMARY]);
+ error = xchk_install_live_inode(sc, rtg_summary(sc->sr.rtg));
if (error)
return error;
@@ -90,6 +89,10 @@ xchk_setup_rtsummary(
if (error)
return error;
+ error = xchk_rtgroup_lock(sc, &sc->sr, XFS_RTGLOCK_BITMAP);
+ if (error)
+ return error;
+
/*
* Now that we've locked the rtbitmap and rtsummary, we can't race with
* growfsrt trying to expand the summary or change the size of the rt
@@ -100,7 +103,6 @@ xchk_setup_rtsummary(
* exclusively here. If we ever start caring about running concurrent
* fsmap with scrub this could be changed.
*/
- xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP);
if (mp->m_sb.sb_rblocks) {
rts->rextents = xfs_blen_to_rtbxlen(mp, mp->m_sb.sb_rblocks);
rts->rbmblocks = xfs_rtbitmap_blockcount(mp);
@@ -191,8 +193,7 @@ xchk_rtsum_record_free(
rtlen = xfs_rtxlen_to_extlen(mp, rec->ar_extcount);
if (!xfs_verify_rtbext(mp, rtbno, rtlen)) {
- xchk_ino_xref_set_corrupt(sc,
- rtg->rtg_inodes[XFS_RTGI_BITMAP]->i_ino);
+ xchk_ino_xref_set_corrupt(sc, rtg_bitmap(rtg)->i_ino);
return -EFSCORRUPTED;
}
@@ -218,7 +219,7 @@ xchk_rtsum_compute(
/* If the bitmap size doesn't match the computed size, bail. */
if (XFS_FSB_TO_B(mp, xfs_rtbitmap_blockcount(mp)) !=
- rtg->rtg_inodes[XFS_RTGI_BITMAP]->i_disk_size)
+ rtg_bitmap(rtg)->i_disk_size)
return -EFSCORRUPTED;
return xfs_rtalloc_query_all(rtg, sc->tp, xchk_rtsum_record_free, sc);
@@ -310,8 +311,8 @@ xchk_rtsummary(
{
struct xfs_mount *mp = sc->mp;
struct xfs_rtgroup *rtg = sc->sr.rtg;
- struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
- struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY];
+ struct xfs_inode *rbmip = rtg_bitmap(rtg);
+ struct xfs_inode *rsumip = rtg_summary(rtg);
struct xchk_rtsummary *rts = sc->buf;
int error;
diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c
index 8198ea84ad70..d593977d70df 100644
--- a/fs/xfs/scrub/rtsummary_repair.c
+++ b/fs/xfs/scrub/rtsummary_repair.c
@@ -165,7 +165,8 @@ xrep_rtsummary(
* Now exchange the contents. Nothing in repair uses the temporary
* buffer, so we can reuse it for the tempfile exchrange information.
*/
- error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, &rts->tempexch);
+ error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
+ rts->rsumblocks, &rts->tempexch);
if (error)
return error;
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 950f5a58dcd9..16da054b2eb0 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -164,7 +164,7 @@ xchk_fsgates_disable(
trace_xchk_fsgates_disable(sc, sc->flags & XCHK_FSGATES_ALL);
if (sc->flags & XCHK_FSGATES_DRAIN)
- xfs_drain_wait_disable();
+ xfs_defer_drain_wait_disable();
if (sc->flags & XCHK_FSGATES_QUOTA)
xfs_dqtrx_hook_disable();
@@ -218,6 +218,8 @@ xchk_teardown(
int error)
{
xchk_ag_free(sc, &sc->sa);
+ xchk_rtgroup_btcur_free(&sc->sr);
+
if (sc->tp) {
if (error == 0 && (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
error = xfs_trans_commit(sc->tp);
@@ -458,6 +460,13 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.has = xfs_has_rtsb,
.repair = xrep_rgsuperblock,
},
+ [XFS_SCRUB_TYPE_RTRMAPBT] = { /* realtime group rmapbt */
+ .type = ST_RTGROUP,
+ .setup = xchk_setup_rtrmapbt,
+ .scrub = xchk_rtrmapbt,
+ .has = xfs_has_rtrmapbt,
+ .repair = xrep_rtrmapbt,
+ },
};
static int
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 5dbbe93cb49b..cba4e89a3a62 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -126,6 +126,9 @@ struct xchk_rt {
/* XFS_RTGLOCK_* lock state if locked */
unsigned int rtlock_flags;
+
+ /* rtgroup btrees */
+ struct xfs_btree_cur *rmap_cur;
};
struct xfs_scrub {
@@ -280,10 +283,12 @@ int xchk_metapath(struct xfs_scrub *sc);
int xchk_rtbitmap(struct xfs_scrub *sc);
int xchk_rtsummary(struct xfs_scrub *sc);
int xchk_rgsuperblock(struct xfs_scrub *sc);
+int xchk_rtrmapbt(struct xfs_scrub *sc);
#else
# define xchk_rtbitmap xchk_nothing
# define xchk_rtsummary xchk_nothing
# define xchk_rgsuperblock xchk_nothing
+# define xchk_rtrmapbt xchk_nothing
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_quota(struct xfs_scrub *sc);
@@ -317,8 +322,17 @@ void xchk_xref_is_not_cow_staging(struct xfs_scrub *sc, xfs_agblock_t bno,
#ifdef CONFIG_XFS_RT
void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
xfs_extlen_t len);
+void xchk_xref_has_no_rt_owner(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_extlen_t len);
+void xchk_xref_has_rt_owner(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_extlen_t len);
+void xchk_xref_is_only_rt_owned_by(struct xfs_scrub *sc, xfs_rgblock_t rgbno,
+ xfs_extlen_t len, const struct xfs_owner_info *oinfo);
#else
# define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
+# define xchk_xref_has_no_rt_owner(sc, rtbno, len) do { } while (0)
+# define xchk_xref_has_rt_owner(sc, rtbno, len) do { } while (0)
+# define xchk_xref_is_only_rt_owned_by(sc, bno, len, oinfo) do { } while (0)
#endif
#endif /* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c
index a476c7b2ab75..eb6bb170c902 100644
--- a/fs/xfs/scrub/stats.c
+++ b/fs/xfs/scrub/stats.c
@@ -82,6 +82,7 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_DIRTREE] = "dirtree",
[XFS_SCRUB_TYPE_METAPATH] = "metapath",
[XFS_SCRUB_TYPE_RGSUPER] = "rgsuper",
+ [XFS_SCRUB_TYPE_RTRMAPBT] = "rtrmapbt",
};
/* Format the scrub stats into a text buffer, similar to pcp style. */
diff --git a/fs/xfs/scrub/tempexch.h b/fs/xfs/scrub/tempexch.h
index 995ba187c5aa..eccda720c2ca 100644
--- a/fs/xfs/scrub/tempexch.h
+++ b/fs/xfs/scrub/tempexch.h
@@ -12,7 +12,7 @@ struct xrep_tempexch {
};
int xrep_tempexch_trans_reserve(struct xfs_scrub *sc, int whichfork,
- struct xrep_tempexch *ti);
+ xfs_fileoff_t off, xfs_filblks_t len, struct xrep_tempexch *ti);
int xrep_tempexch_trans_alloc(struct xfs_scrub *sc, int whichfork,
struct xrep_tempexch *ti);
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index 4ebb5f8459e8..cf99e0ca51b0 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -606,6 +606,8 @@ STATIC int
xrep_tempexch_prep_request(
struct xfs_scrub *sc,
int whichfork,
+ xfs_fileoff_t off,
+ xfs_filblks_t len,
struct xrep_tempexch *tx)
{
struct xfs_exchmaps_req *req = &tx->req;
@@ -629,18 +631,19 @@ xrep_tempexch_prep_request(
/* Exchange all mappings in both forks. */
req->ip1 = sc->tempip;
req->ip2 = sc->ip;
- req->startoff1 = 0;
- req->startoff2 = 0;
+ req->startoff1 = off;
+ req->startoff2 = off;
switch (whichfork) {
case XFS_ATTR_FORK:
req->flags |= XFS_EXCHMAPS_ATTR_FORK;
break;
case XFS_DATA_FORK:
- /* Always exchange sizes when exchanging data fork mappings. */
- req->flags |= XFS_EXCHMAPS_SET_SIZES;
+ /* Exchange sizes when exchanging all data fork mappings. */
+ if (off == 0 && len == XFS_MAX_FILEOFF)
+ req->flags |= XFS_EXCHMAPS_SET_SIZES;
break;
}
- req->blockcount = XFS_MAX_FILEOFF;
+ req->blockcount = len;
return 0;
}
@@ -796,6 +799,8 @@ int
xrep_tempexch_trans_reserve(
struct xfs_scrub *sc,
int whichfork,
+ xfs_fileoff_t off,
+ xfs_filblks_t len,
struct xrep_tempexch *tx)
{
int error;
@@ -804,7 +809,7 @@ xrep_tempexch_trans_reserve(
xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);
- error = xrep_tempexch_prep_request(sc, whichfork, tx);
+ error = xrep_tempexch_prep_request(sc, whichfork, off, len, tx);
if (error)
return error;
@@ -842,7 +847,8 @@ xrep_tempexch_trans_alloc(
ASSERT(sc->tp == NULL);
ASSERT(xfs_has_exchange_range(sc->mp));
- error = xrep_tempexch_prep_request(sc, whichfork, tx);
+ error = xrep_tempexch_prep_request(sc, whichfork, 0, XFS_MAX_FILEOFF,
+ tx);
if (error)
return error;
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 98f923ae664d..2450e214103f 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -21,6 +21,7 @@
#include "xfs_rmap.h"
#include "xfs_parent.h"
#include "xfs_metafile.h"
+#include "xfs_rtgroup.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index d2ae7e93acb0..fb86b746bc17 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -17,6 +17,7 @@
#include "xfs_bit.h"
#include "xfs_quota_defs.h"
+struct xfs_rtgroup;
struct xfs_scrub;
struct xfile;
struct xfarray;
@@ -40,6 +41,9 @@ struct xchk_dirtree_outcomes;
TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED);
TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW);
+TRACE_DEFINE_ENUM(XG_TYPE_AG);
+TRACE_DEFINE_ENUM(XG_TYPE_RTG);
+
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PROBE);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_SB);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_AGF);
@@ -72,6 +76,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_METAPATH);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RGSUPER);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RTRMAPBT);
#define XFS_SCRUB_TYPE_STRINGS \
{ XFS_SCRUB_TYPE_PROBE, "probe" }, \
@@ -105,7 +110,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RGSUPER);
{ XFS_SCRUB_TYPE_DIRTREE, "dirtree" }, \
{ XFS_SCRUB_TYPE_BARRIER, "barrier" }, \
{ XFS_SCRUB_TYPE_METAPATH, "metapath" }, \
- { XFS_SCRUB_TYPE_RGSUPER, "rgsuper" }
+ { XFS_SCRUB_TYPE_RGSUPER, "rgsuper" }, \
+ { XFS_SCRUB_TYPE_RTRMAPBT, "rtrmapbt" }
#define XFS_SCRUB_FLAG_STRINGS \
{ XFS_SCRUB_IFLAG_REPAIR, "repair" }, \
@@ -2282,6 +2288,32 @@ TRACE_EVENT(xrep_calc_ag_resblks_btsize,
__entry->rmapbt_sz,
__entry->refcbt_sz)
)
+
+#ifdef CONFIG_XFS_RT
+TRACE_EVENT(xrep_calc_rtgroup_resblks_btsize,
+ TP_PROTO(struct xfs_mount *mp, xfs_rgnumber_t rgno,
+ xfs_rgblock_t usedlen, xfs_rgblock_t rmapbt_sz),
+ TP_ARGS(mp, rgno, usedlen, rmapbt_sz),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_rgnumber_t, rgno)
+ __field(xfs_rgblock_t, usedlen)
+ __field(xfs_rgblock_t, rmapbt_sz)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rgno = rgno;
+ __entry->usedlen = usedlen;
+ __entry->rmapbt_sz = rmapbt_sz;
+ ),
+ TP_printk("dev %d:%d rgno 0x%x usedlen %u rmapbt %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rgno,
+ __entry->usedlen,
+ __entry->rmapbt_sz)
+);
+#endif /* CONFIG_XFS_RT */
+
TRACE_EVENT(xrep_reset_counters,
TP_PROTO(struct xfs_mount *mp, struct xchk_fscounters *fsc),
TP_ARGS(mp, fsc),
@@ -2680,11 +2712,12 @@ DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_update_inode);
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_unfixable_inode);
TRACE_EVENT(xrep_rmap_live_update,
- TP_PROTO(const struct xfs_perag *pag, unsigned int op,
+ TP_PROTO(const struct xfs_group *xg, unsigned int op,
const struct xfs_rmap_update_params *p),
- TP_ARGS(pag, op, p),
+ TP_ARGS(xg, op, p),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(unsigned int, op)
__field(xfs_agblock_t, agbno)
@@ -2694,8 +2727,9 @@ TRACE_EVENT(xrep_rmap_live_update,
__field(unsigned int, flags)
),
TP_fast_assign(
- __entry->dev = pag_mount(pag)->m_super->s_dev;
- __entry->agno = pag_agno(pag);
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->agno = xg->xg_gno;
__entry->op = op;
__entry->agbno = p->startblock;
__entry->len = p->blockcount;
@@ -2704,10 +2738,12 @@ TRACE_EVENT(xrep_rmap_live_update,
if (p->unwritten)
__entry->flags |= XFS_RMAP_UNWRITTEN;
),
- TP_printk("dev %d:%d agno 0x%x op %d agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x op %d %sbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
__entry->op,
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agbno,
__entry->len,
__entry->owner,
@@ -3605,6 +3641,186 @@ DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_try_unlink);
DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_unlink);
DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_link);
+#ifdef CONFIG_XFS_RT
+DECLARE_EVENT_CLASS(xrep_rtbitmap_class,
+ TP_PROTO(struct xfs_mount *mp, xfs_rtxnum_t start, xfs_rtxnum_t end),
+ TP_ARGS(mp, start, end),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_rtxnum_t, start)
+ __field(xfs_rtxnum_t, end)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->start = start;
+ __entry->end = end;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d startrtx 0x%llx endrtx 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->start,
+ __entry->end)
+);
+#define DEFINE_REPAIR_RGBITMAP_EVENT(name) \
+DEFINE_EVENT(xrep_rtbitmap_class, name, \
+ TP_PROTO(struct xfs_mount *mp, xfs_rtxnum_t start, \
+ xfs_rtxnum_t end), \
+ TP_ARGS(mp, start, end))
+DEFINE_REPAIR_RGBITMAP_EVENT(xrep_rtbitmap_record_free);
+DEFINE_REPAIR_RGBITMAP_EVENT(xrep_rtbitmap_record_free_bulk);
+
+TRACE_EVENT(xrep_rtbitmap_or,
+ TP_PROTO(struct xfs_mount *mp, unsigned long long wordoff,
+ xfs_rtword_t mask, xfs_rtword_t word),
+ TP_ARGS(mp, wordoff, mask, word),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(unsigned long long, wordoff)
+ __field(unsigned int, mask)
+ __field(unsigned int, word)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->wordoff = wordoff;
+ __entry->mask = mask;
+ __entry->word = word;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d wordoff 0x%llx mask 0x%x word 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->wordoff,
+ __entry->mask,
+ __entry->word)
+);
+
+TRACE_EVENT(xrep_rtbitmap_load,
+ TP_PROTO(struct xfs_rtgroup *rtg, xfs_fileoff_t rbmoff,
+ xfs_rtxnum_t rtx, xfs_rtxnum_t len),
+ TP_ARGS(rtg, rbmoff, rtx, len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_rgnumber_t, rgno)
+ __field(xfs_fileoff_t, rbmoff)
+ __field(xfs_rtxnum_t, rtx)
+ __field(xfs_rtxnum_t, len)
+ ),
+ TP_fast_assign(
+ __entry->dev = rtg_mount(rtg)->m_super->s_dev;
+ __entry->rtdev = rtg_mount(rtg)->m_rtdev_targp->bt_dev;
+ __entry->rgno = rtg_rgno(rtg);
+ __entry->rbmoff = rbmoff;
+ __entry->rtx = rtx;
+ __entry->len = len;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rgno 0x%x rbmoff 0x%llx rtx 0x%llx rtxcount 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->rgno,
+ __entry->rbmoff,
+ __entry->rtx,
+ __entry->len)
+);
+
+TRACE_EVENT(xrep_rtbitmap_load_words,
+ TP_PROTO(struct xfs_mount *mp, xfs_fileoff_t rbmoff,
+ unsigned long long wordoff, unsigned int wordcnt),
+ TP_ARGS(mp, rbmoff, wordoff, wordcnt),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_fileoff_t, rbmoff)
+ __field(unsigned long long, wordoff)
+ __field(unsigned int, wordcnt)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->rbmoff = rbmoff;
+ __entry->wordoff = wordoff;
+ __entry->wordcnt = wordcnt;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rbmoff 0x%llx wordoff 0x%llx wordcnt 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->rbmoff,
+ __entry->wordoff,
+ __entry->wordcnt)
+);
+
+TRACE_EVENT(xrep_rtbitmap_load_word,
+ TP_PROTO(struct xfs_mount *mp, unsigned long long wordoff,
+ unsigned int bit, xfs_rtword_t ondisk_word,
+ xfs_rtword_t xfile_word, xfs_rtword_t word_mask),
+ TP_ARGS(mp, wordoff, bit, ondisk_word, xfile_word, word_mask),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(unsigned long long, wordoff)
+ __field(unsigned int, bit)
+ __field(xfs_rtword_t, ondisk_word)
+ __field(xfs_rtword_t, xfile_word)
+ __field(xfs_rtword_t, word_mask)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->wordoff = wordoff;
+ __entry->bit = bit;
+ __entry->ondisk_word = ondisk_word;
+ __entry->xfile_word = xfile_word;
+ __entry->word_mask = word_mask;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d wordoff 0x%llx bit %u ondisk 0x%x(0x%x) inmem 0x%x(0x%x) result 0x%x mask 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->wordoff,
+ __entry->bit,
+ __entry->ondisk_word,
+ __entry->ondisk_word & __entry->word_mask,
+ __entry->xfile_word,
+ __entry->xfile_word & ~__entry->word_mask,
+ (__entry->xfile_word & ~__entry->word_mask) |
+ (__entry->ondisk_word & __entry->word_mask),
+ __entry->word_mask)
+);
+
+TRACE_EVENT(xrep_rtrmap_found,
+ TP_PROTO(struct xfs_mount *mp, const struct xfs_rmap_irec *rec),
+ TP_ARGS(mp, rec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, rtdev)
+ __field(xfs_rgblock_t, rgbno)
+ __field(xfs_extlen_t, len)
+ __field(uint64_t, owner)
+ __field(uint64_t, offset)
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->rtdev = mp->m_rtdev_targp->bt_dev;
+ __entry->rgbno = rec->rm_startblock;
+ __entry->len = rec->rm_blockcount;
+ __entry->owner = rec->rm_owner;
+ __entry->offset = rec->rm_offset;
+ __entry->flags = rec->rm_flags;
+ ),
+ TP_printk("dev %d:%d rtdev %d:%d rgbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->rtdev), MINOR(__entry->rtdev),
+ __entry->rgbno,
+ __entry->len,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+);
+#endif /* CONFIG_XFS_RT */
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index aa63b8efd782..6f313fbf7669 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -22,6 +22,7 @@
#include "xfs_error.h"
#include "xfs_ag.h"
#include "xfs_buf_mem.h"
+#include "xfs_notify_failure.h"
struct kmem_cache *xfs_buf_cache;
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 3d0c6402cb36..4f2e4ea29e1f 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -262,6 +262,9 @@ xlog_recover_validate_buf_type(
case XFS_BMAP_MAGIC:
bp->b_ops = &xfs_bmbt_buf_ops;
break;
+ case XFS_RTRMAP_CRC_MAGIC:
+ bp->b_ops = &xfs_rtrmapbt_buf_ops;
+ break;
case XFS_RMAP_CRC_MAGIC:
bp->b_ops = &xfs_rmapbt_buf_ops;
break;
@@ -855,6 +858,7 @@ xlog_recover_get_buf_lsn(
uuid = &btb->bb_u.s.bb_uuid;
break;
}
+ case XFS_RTRMAP_CRC_MAGIC:
case XFS_BMAP_CRC_MAGIC:
case XFS_BMAP_MAGIC: {
struct xfs_btree_block *btb = blk;
diff --git a/fs/xfs/xfs_drain.c b/fs/xfs/xfs_drain.c
index 5ede81fadbd8..fa5f31931efd 100644
--- a/fs/xfs/xfs_drain.c
+++ b/fs/xfs/xfs_drain.c
@@ -13,28 +13,28 @@
#include "xfs_trace.h"
/*
- * Use a static key here to reduce the overhead of xfs_drain_rele. If the
- * compiler supports jump labels, the static branch will be replaced by a nop
- * sled when there are no xfs_drain_wait callers. Online fsck is currently
- * the only caller, so this is a reasonable tradeoff.
+ * Use a static key here to reduce the overhead of xfs_defer_drain_rele. If
+ * the compiler supports jump labels, the static branch will be replaced by a
+ * nop sled when there are no xfs_defer_drain_wait callers. Online fsck is
+ * currently the only caller, so this is a reasonable tradeoff.
*
* Note: Patching the kernel code requires taking the cpu hotplug lock. Other
* parts of the kernel allocate memory with that lock held, which means that
* XFS callers cannot hold any locks that might be used by memory reclaim or
* writeback when calling the static_branch_{inc,dec} functions.
*/
-static DEFINE_STATIC_KEY_FALSE(xfs_drain_waiter_gate);
+static DEFINE_STATIC_KEY_FALSE(xfs_defer_drain_waiter_gate);
void
-xfs_drain_wait_disable(void)
+xfs_defer_drain_wait_disable(void)
{
- static_branch_dec(&xfs_drain_waiter_gate);
+ static_branch_dec(&xfs_defer_drain_waiter_gate);
}
void
-xfs_drain_wait_enable(void)
+xfs_defer_drain_wait_enable(void)
{
- static_branch_inc(&xfs_drain_waiter_gate);
+ static_branch_inc(&xfs_defer_drain_waiter_gate);
}
void
@@ -71,7 +71,7 @@ static inline bool has_waiters(struct wait_queue_head *wq_head)
static inline void xfs_defer_drain_rele(struct xfs_defer_drain *dr)
{
if (atomic_dec_and_test(&dr->dr_count) &&
- static_branch_unlikely(&xfs_drain_waiter_gate) &&
+ static_branch_unlikely(&xfs_defer_drain_waiter_gate) &&
has_waiters(&dr->dr_waiters))
wake_up(&dr->dr_waiters);
}
diff --git a/fs/xfs/xfs_drain.h b/fs/xfs/xfs_drain.h
index efcf88df9a5e..4d446dbf65e5 100644
--- a/fs/xfs/xfs_drain.h
+++ b/fs/xfs/xfs_drain.h
@@ -26,8 +26,8 @@ struct xfs_defer_drain {
void xfs_defer_drain_init(struct xfs_defer_drain *dr);
void xfs_defer_drain_free(struct xfs_defer_drain *dr);
-void xfs_drain_wait_disable(void);
-void xfs_drain_wait_enable(void);
+void xfs_defer_drain_wait_disable(void);
+void xfs_defer_drain_wait_enable(void);
/*
* Deferred Work Intent Drains
@@ -61,6 +61,9 @@ void xfs_drain_wait_enable(void);
* All functions that create work items must increment the intent counter as
* soon as the item is added to the transaction and cannot drop the counter
* until the item is finished or cancelled.
+ *
+ * The same principles apply to realtime groups because the rt metadata inode
+ * ILOCKs are not held across transaction rolls.
*/
struct xfs_group *xfs_group_intent_get(struct xfs_mount *mp,
xfs_fsblock_t fsbno, enum xfs_group_type type);
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 3290dd8524a6..3e3ef16f65a3 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -26,6 +26,7 @@
#include "xfs_rtbitmap.h"
#include "xfs_ag.h"
#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
/* Convert an xfs_fsmap to an fsmap. */
static void
@@ -832,6 +833,174 @@ xfs_getfsmap_rtdev_rtbitmap(
return error;
}
+
+/* Transform a realtime rmapbt record into a fsmap */
+STATIC int
+xfs_getfsmap_rtdev_rmapbt_helper(
+ struct xfs_btree_cur *cur,
+ const struct xfs_rmap_irec *rec,
+ void *priv)
+{
+ struct xfs_fsmap_irec frec = {
+ .owner = rec->rm_owner,
+ .offset = rec->rm_offset,
+ .rm_flags = rec->rm_flags,
+ .rec_key = rec->rm_startblock,
+ };
+ struct xfs_getfsmap_info *info = priv;
+
+ return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group,
+ rec->rm_startblock, rec->rm_blockcount, &frec);
+}
+
+/* Actually query the rtrmap btree. */
+STATIC int
+xfs_getfsmap_rtdev_rmapbt_query(
+ struct xfs_trans *tp,
+ struct xfs_getfsmap_info *info,
+ struct xfs_btree_cur **curpp)
+{
+ struct xfs_rtgroup *rtg = to_rtg(info->group);
+
+ /* Query the rtrmapbt */
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ *curpp = xfs_rtrmapbt_init_cursor(tp, rtg);
+ return xfs_rmap_query_range(*curpp, &info->low, &info->high,
+ xfs_getfsmap_rtdev_rmapbt_helper, info);
+}
+
+/* Execute a getfsmap query against the realtime device rmapbt. */
+STATIC int
+xfs_getfsmap_rtdev_rmapbt(
+ struct xfs_trans *tp,
+ const struct xfs_fsmap *keys,
+ struct xfs_getfsmap_info *info)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_rtgroup *rtg = NULL;
+ struct xfs_btree_cur *bt_cur = NULL;
+ xfs_rtblock_t start_rtb;
+ xfs_rtblock_t end_rtb;
+ xfs_rgnumber_t start_rg, end_rg;
+ uint64_t eofs;
+ int error = 0;
+
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
+ if (keys[0].fmr_physical >= eofs)
+ return 0;
+ start_rtb = xfs_daddr_to_rtb(mp, keys[0].fmr_physical);
+ end_rtb = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical));
+
+ info->missing_owner = XFS_FMR_OWN_FREE;
+
+ /*
+ * Convert the fsmap low/high keys to rtgroup based keys. Initialize
+ * low to the fsmap low key and max out the high key to the end
+ * of the rtgroup.
+ */
+ info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
+ if (error)
+ return error;
+ info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
+ xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
+
+ /* Adjust the low key if we are continuing from where we left off. */
+ if (info->low.rm_blockcount == 0) {
+ /* No previous record from which to continue */
+ } else if (rmap_not_shareable(mp, &info->low)) {
+ /* Last record seen was an unshareable extent */
+ info->low.rm_owner = 0;
+ info->low.rm_offset = 0;
+
+ start_rtb += info->low.rm_blockcount;
+ if (xfs_rtb_to_daddr(mp, start_rtb) >= eofs)
+ return 0;
+ } else {
+ /* Last record seen was a shareable file data extent */
+ info->low.rm_offset += info->low.rm_blockcount;
+ }
+ info->low.rm_startblock = xfs_rtb_to_rgbno(mp, start_rtb);
+
+ info->high.rm_startblock = -1U;
+ info->high.rm_owner = ULLONG_MAX;
+ info->high.rm_offset = ULLONG_MAX;
+ info->high.rm_blockcount = 0;
+ info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS;
+
+ start_rg = xfs_rtb_to_rgno(mp, start_rtb);
+ end_rg = xfs_rtb_to_rgno(mp, end_rtb);
+
+ while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rg, end_rg))) {
+ /*
+ * Set the rtgroup high key from the fsmap high key if this
+ * is the last rtgroup that we're querying.
+ */
+ info->group = rtg_group(rtg);
+ if (rtg_rgno(rtg) == end_rg) {
+ info->high.rm_startblock =
+ xfs_rtb_to_rgbno(mp, end_rtb);
+ info->high.rm_offset =
+ XFS_BB_TO_FSBT(mp, keys[1].fmr_offset);
+ error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]);
+ if (error)
+ break;
+ xfs_getfsmap_set_irec_flags(&info->high, &keys[1]);
+ }
+
+ if (bt_cur) {
+ xfs_rtgroup_unlock(to_rtg(bt_cur->bc_group),
+ XFS_RTGLOCK_RMAP);
+ xfs_btree_del_cursor(bt_cur, XFS_BTREE_NOERROR);
+ bt_cur = NULL;
+ }
+
+ trace_xfs_fsmap_low_group_key(mp, info->dev, rtg_rgno(rtg),
+ &info->low);
+ trace_xfs_fsmap_high_group_key(mp, info->dev, rtg_rgno(rtg),
+ &info->high);
+
+ error = xfs_getfsmap_rtdev_rmapbt_query(tp, info, &bt_cur);
+ if (error)
+ break;
+
+ /*
+ * Set the rtgroup low key to the start of the rtgroup prior to
+ * moving on to the next rtgroup.
+ */
+ if (rtg_rgno(rtg) == start_rg)
+ memset(&info->low, 0, sizeof(info->low));
+
+ /*
+ * If this is the last rtgroup, report any gap at the end of it
+ * before we drop the reference to the perag when the loop
+ * terminates.
+ */
+ if (rtg_rgno(rtg) == end_rg) {
+ info->last = true;
+ error = xfs_getfsmap_rtdev_rmapbt_helper(bt_cur,
+ &info->high, info);
+ if (error)
+ break;
+ }
+ info->group = NULL;
+ }
+
+ if (bt_cur) {
+ xfs_rtgroup_unlock(to_rtg(bt_cur->bc_group),
+ XFS_RTGLOCK_RMAP);
+ xfs_btree_del_cursor(bt_cur, error < 0 ? XFS_BTREE_ERROR :
+ XFS_BTREE_NOERROR);
+ }
+
+ /* loop termination case */
+ if (rtg) {
+ info->group = NULL;
+ xfs_rtgroup_rele(rtg);
+ }
+
+ return error;
+}
#endif /* CONFIG_XFS_RT */
/* Do we recognize the device? */
@@ -971,7 +1140,10 @@ xfs_getfsmap(
if (mp->m_rtdev_targp) {
handlers[2].nr_sectors = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
handlers[2].dev = new_encode_dev(mp->m_rtdev_targp->bt_dev);
- handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
+ if (use_rmap)
+ handlers[2].fn = xfs_getfsmap_rtdev_rmapbt;
+ else
+ handlers[2].fn = xfs_getfsmap_rtdev_rtbitmap;
}
#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index e1145107d8cb..9df5a09c0acd 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -22,6 +22,7 @@
#include "xfs_ag_resv.h"
#include "xfs_trace.h"
#include "xfs_rtalloc.h"
+#include "xfs_rtrmap_btree.h"
/*
* Write new AG headers to disk. Non-transactional, but need to be
@@ -114,6 +115,12 @@ xfs_growfs_data_private(
xfs_buf_relse(bp);
}
+ /* Make sure the new fs size won't cause problems with the log. */
+ error = xfs_growfs_check_rtgeom(mp, nb, mp->m_sb.sb_rblocks,
+ mp->m_sb.sb_rextsize);
+ if (error)
+ return error;
+
nb_div = nb;
nb_mod = do_div(nb_div, mp->m_sb.sb_agblocks);
if (nb_mod && nb_mod >= XFS_MIN_AG_BLOCKS)
@@ -221,7 +228,11 @@ xfs_growfs_data_private(
error = xfs_fs_reserve_ag_blocks(mp);
if (error == -ENOSPC)
error = 0;
+
+ /* Compute new maxlevels for rt btrees. */
+ xfs_rtrmapbt_compute_maxlevels(mp);
}
+
return error;
out_trans_cancel:
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index c7c2e6561998..d438c3c001c8 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -447,6 +447,7 @@ static const struct ioctl_sick_map rtgroup_map[] = {
{ XFS_SICK_RG_SUPER, XFS_RTGROUP_GEOM_SICK_SUPER },
{ XFS_SICK_RG_BITMAP, XFS_RTGROUP_GEOM_SICK_BITMAP },
{ XFS_SICK_RG_SUMMARY, XFS_RTGROUP_GEOM_SICK_SUMMARY },
+ { XFS_SICK_RG_RMAPBT, XFS_RTGROUP_GEOM_SICK_RMAPBT },
};
/* Fill out rtgroup geometry health info. */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index c8ad2606f928..c95fe1b1de4e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2382,7 +2382,16 @@ xfs_iflush(
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
goto flush_out;
}
- if (S_ISREG(VFS_I(ip)->i_mode)) {
+ if (ip->i_df.if_format == XFS_DINODE_FMT_META_BTREE) {
+ if (!S_ISREG(VFS_I(ip)->i_mode) ||
+ !(ip->i_diflags2 & XFS_DIFLAG2_METADATA)) {
+ xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
+ "%s: Bad %s meta btree inode %Lu, ptr "PTR_FMT,
+ __func__, xfs_metafile_type_str(ip->i_metatype),
+ ip->i_ino, ip);
+ goto flush_out;
+ }
+ } else if (S_ISREG(VFS_I(ip)->i_mode)) {
if (XFS_TEST_ERROR(
ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
ip->i_df.if_format != XFS_DINODE_FMT_BTREE,
@@ -2422,6 +2431,14 @@ xfs_iflush(
goto flush_out;
}
+ if (xfs_inode_has_attr_fork(ip) &&
+ ip->i_af.if_format == XFS_DINODE_FMT_META_BTREE) {
+ xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
+ "%s: meta btree in inode %Lu attr fork, ptr "PTR_FMT,
+ __func__, ip->i_ino, ip);
+ goto flush_out;
+ }
+
/*
* Inode item log recovery for v2 inodes are dependent on the flushiter
* count for correct sequencing. We bump the flush iteration count so
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index 912f0b1bc3cb..a174f64b8bb2 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -242,6 +242,7 @@ xfs_inode_item_data_fork_size(
}
break;
case XFS_DINODE_FMT_BTREE:
+ case XFS_DINODE_FMT_META_BTREE:
if ((iip->ili_fields & XFS_ILOG_DBROOT) &&
ip->i_df.if_broot_bytes > 0) {
*nbytes += ip->i_df.if_broot_bytes;
@@ -362,6 +363,7 @@ xfs_inode_item_format_data_fork(
}
break;
case XFS_DINODE_FMT_BTREE:
+ case XFS_DINODE_FMT_META_BTREE:
iip->ili_fields &=
~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | XFS_ILOG_DEV);
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index e70d2611456b..5de1d3563b76 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -22,6 +22,7 @@
#include "xfs_log_recover.h"
#include "xfs_icache.h"
#include "xfs_bmap_btree.h"
+#include "xfs_rtrmap_btree.h"
STATIC void
xlog_recover_inode_ra_pass2(
@@ -266,6 +267,38 @@ xlog_dinode_verify_extent_counts(
return 0;
}
+static inline int
+xlog_recover_inode_dbroot(
+ struct xfs_mount *mp,
+ void *src,
+ unsigned int len,
+ struct xfs_dinode *dip)
+{
+ void *dfork = XFS_DFORK_DPTR(dip);
+ unsigned int dsize = XFS_DFORK_DSIZE(dip, mp);
+
+ switch (dip->di_format) {
+ case XFS_DINODE_FMT_BTREE:
+ xfs_bmbt_to_bmdr(mp, src, len, dfork, dsize);
+ break;
+ case XFS_DINODE_FMT_META_BTREE:
+ switch (be16_to_cpu(dip->di_metatype)) {
+ case XFS_METAFILE_RTRMAP:
+ xfs_rtrmapbt_to_disk(mp, src, len, dfork, dsize);
+ return 0;
+ default:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+ break;
+ default:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ return 0;
+}
+
STATIC int
xlog_recover_inode_commit_pass2(
struct xlog *log,
@@ -393,8 +426,9 @@ xlog_recover_inode_commit_pass2(
if (unlikely(S_ISREG(ldip->di_mode))) {
- if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
- (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
+ if (ldip->di_format != XFS_DINODE_FMT_EXTENTS &&
+ ldip->di_format != XFS_DINODE_FMT_BTREE &&
+ ldip->di_format != XFS_DINODE_FMT_META_BTREE) {
XFS_CORRUPTION_ERROR(
"Bad log dinode data fork format for regular file",
XFS_ERRLEVEL_LOW, mp, ldip, sizeof(*ldip));
@@ -475,9 +509,9 @@ xlog_recover_inode_commit_pass2(
break;
case XFS_ILOG_DBROOT:
- xfs_bmbt_to_bmdr(mp, (struct xfs_btree_block *)src, len,
- (struct xfs_bmdr_block *)XFS_DFORK_DPTR(dip),
- XFS_DFORK_DSIZE(dip, mp));
+ error = xlog_recover_inode_dbroot(mp, src, len, dip);
+ if (error)
+ goto out_release;
break;
default:
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 0af3d477197b..5c95c97519c7 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1820,6 +1820,8 @@ static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = {
&xlog_xmd_item_ops,
&xlog_rtefi_item_ops,
&xlog_rtefd_item_ops,
+ &xlog_rtrui_item_ops,
+ &xlog_rtrud_item_ops,
};
static const struct xlog_recover_item_ops *
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 97137126b16f..7b7d21b50d54 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -37,6 +37,7 @@
#include "xfs_rtbitmap.h"
#include "xfs_metafile.h"
#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
#include "scrub/stats.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -655,8 +656,7 @@ static inline void
xfs_rtbtree_compute_maxlevels(
struct xfs_mount *mp)
{
- /* This will be filled in later. */
- mp->m_rtbtree_maxlevels = 0;
+ mp->m_rtbtree_maxlevels = mp->m_rtrmap_maxlevels;
}
/*
@@ -727,6 +727,7 @@ xfs_mountfs(
xfs_bmap_compute_maxlevels(mp, XFS_ATTR_FORK);
xfs_mount_setup_inode_geom(mp);
xfs_rmapbt_compute_maxlevels(mp);
+ xfs_rtrmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp);
xfs_agbtree_compute_maxlevels(mp);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index ddb9d19a3a3d..1bc95fb170db 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -158,11 +158,14 @@ typedef struct xfs_mount {
uint m_bmap_dmnr[2]; /* min bmap btree records */
uint m_rmap_mxr[2]; /* max rmap btree records */
uint m_rmap_mnr[2]; /* min rmap btree records */
+ uint m_rtrmap_mxr[2]; /* max rtrmap btree records */
+ uint m_rtrmap_mnr[2]; /* min rtrmap btree records */
uint m_refc_mxr[2]; /* max refc btree records */
uint m_refc_mnr[2]; /* min refc btree records */
uint m_alloc_maxlevels; /* max alloc btree levels */
uint m_bm_maxlevels[2]; /* max bmap btree levels */
uint m_rmap_maxlevels; /* max rmap btree levels */
+ uint m_rtrmap_maxlevels; /* max rtrmap btree level */
uint m_refc_maxlevels; /* max refcount btree level */
unsigned int m_agbtree_maxlevels; /* max level of all AG btrees */
unsigned int m_rtbtree_maxlevels; /* max level of all rt btrees */
@@ -399,6 +402,12 @@ static inline bool xfs_has_rtsb(struct xfs_mount *mp)
return xfs_has_rtgroups(mp) && xfs_has_realtime(mp);
}
+static inline bool xfs_has_rtrmapbt(struct xfs_mount *mp)
+{
+ return xfs_has_rtgroups(mp) && xfs_has_realtime(mp) &&
+ xfs_has_rmapbt(mp);
+}
+
/*
* Some features are always on for v5 file systems, allow the compiler to
* eliminiate dead code when building without v4 support.
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index fa50e5308292..ed8d8ed42f0a 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -19,6 +19,9 @@
#include "xfs_rtalloc.h"
#include "xfs_trans.h"
#include "xfs_ag.h"
+#include "xfs_notify_failure.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtrmap_btree.h"
#include <linux/mm.h>
#include <linux/dax.h>
@@ -154,23 +157,115 @@ xfs_dax_notify_failure_thaw(
}
static int
-xfs_dax_notify_ddev_failure(
+xfs_dax_translate_range(
+ struct xfs_buftarg *btp,
+ u64 offset,
+ u64 len,
+ xfs_daddr_t *daddr,
+ uint64_t *bblen)
+{
+ u64 dev_start = btp->bt_dax_part_off;
+ u64 dev_len = bdev_nr_bytes(btp->bt_bdev);
+ u64 dev_end = dev_start + dev_len - 1;
+
+ /* Notify failure on the whole device. */
+ if (offset == 0 && len == U64_MAX) {
+ offset = dev_start;
+ len = dev_len;
+ }
+
+ /* Ignore the range out of filesystem area */
+ if (offset + len - 1 < dev_start)
+ return -ENXIO;
+ if (offset > dev_end)
+ return -ENXIO;
+
+ /* Calculate the real range when it touches the boundary */
+ if (offset > dev_start)
+ offset -= dev_start;
+ else {
+ len -= dev_start - offset;
+ offset = 0;
+ }
+ if (offset + len - 1 > dev_end)
+ len = dev_end - offset + 1;
+
+ *daddr = BTOBB(offset);
+ *bblen = BTOBB(len);
+ return 0;
+}
+
+static int
+xfs_dax_notify_logdev_failure(
struct xfs_mount *mp,
- xfs_daddr_t daddr,
- xfs_daddr_t bblen,
+ u64 offset,
+ u64 len,
int mf_flags)
{
+ xfs_daddr_t daddr;
+ uint64_t bblen;
+ int error;
+
+ /*
+ * Return ENXIO instead of shutting down the filesystem if the failed
+ * region is beyond the end of the log.
+ */
+ error = xfs_dax_translate_range(mp->m_logdev_targp,
+ offset, len, &daddr, &bblen);
+ if (error)
+ return error;
+
+ /*
+ * In the pre-remove case the failure notification is attempting to
+ * trigger a force unmount. The expectation is that the device is
+ * still present, but its removal is in progress and can not be
+ * cancelled, proceed with accessing the log device.
+ */
+ if (mf_flags & MF_MEM_PRE_REMOVE)
+ return 0;
+
+ xfs_err(mp, "ondisk log corrupt, shutting down fs!");
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
+ return -EFSCORRUPTED;
+}
+
+static int
+xfs_dax_notify_dev_failure(
+ struct xfs_mount *mp,
+ u64 offset,
+ u64 len,
+ int mf_flags,
+ enum xfs_group_type type)
+{
struct xfs_failure_info notify = { .mf_flags = mf_flags };
struct xfs_trans *tp = NULL;
struct xfs_btree_cur *cur = NULL;
- struct xfs_buf *agf_bp = NULL;
int error = 0;
bool kernel_frozen = false;
- xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, daddr);
- xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, fsbno);
- xfs_fsblock_t end_fsbno = XFS_DADDR_TO_FSB(mp,
- daddr + bblen - 1);
- xfs_agnumber_t end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);
+ uint32_t start_gno, end_gno;
+ xfs_fsblock_t start_bno, end_bno;
+ xfs_daddr_t daddr;
+ uint64_t bblen;
+ struct xfs_group *xg = NULL;
+
+ if (!xfs_has_rmapbt(mp)) {
+ xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
+ return -EOPNOTSUPP;
+ }
+
+ error = xfs_dax_translate_range(type == XG_TYPE_RTG ?
+ mp->m_rtdev_targp : mp->m_ddev_targp,
+ offset, len, &daddr, &bblen);
+ if (error)
+ return error;
+
+ if (type == XG_TYPE_RTG) {
+ start_bno = xfs_daddr_to_rtb(mp, daddr);
+ end_bno = xfs_daddr_to_rtb(mp, daddr + bblen - 1);
+ } else {
+ start_bno = XFS_DADDR_TO_FSB(mp, daddr);
+ end_bno = XFS_DADDR_TO_FSB(mp, daddr + bblen - 1);
+ }
if (mf_flags & MF_MEM_PRE_REMOVE) {
xfs_info(mp, "Device is about to be removed!");
@@ -189,46 +284,58 @@ xfs_dax_notify_ddev_failure(
if (error)
goto out;
- for (; agno <= end_agno; agno++) {
+ start_gno = xfs_fsb_to_gno(mp, start_bno, type);
+ end_gno = xfs_fsb_to_gno(mp, end_bno, type);
+ while ((xg = xfs_group_next_range(mp, xg, start_gno, end_gno, type))) {
+ struct xfs_buf *agf_bp = NULL;
+ struct xfs_rtgroup *rtg = NULL;
struct xfs_rmap_irec ri_low = { };
struct xfs_rmap_irec ri_high;
- struct xfs_agf *agf;
- struct xfs_perag *pag;
- xfs_agblock_t range_agend;
- pag = xfs_perag_get(mp, agno);
- error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
- if (error) {
- xfs_perag_put(pag);
- break;
- }
+ if (type == XG_TYPE_AG) {
+ struct xfs_perag *pag = to_perag(xg);
- cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
+ error = xfs_alloc_read_agf(pag, tp, 0, &agf_bp);
+ if (error) {
+ xfs_perag_put(pag);
+ break;
+ }
+
+ cur = xfs_rmapbt_init_cursor(mp, tp, agf_bp, pag);
+ } else {
+ rtg = to_rtg(xg);
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_RMAP);
+ cur = xfs_rtrmapbt_init_cursor(tp, rtg);
+ }
/*
* Set the rmap range from ri_low to ri_high, which represents
* a [start, end] where we looking for the files or metadata.
*/
memset(&ri_high, 0xFF, sizeof(ri_high));
- ri_low.rm_startblock = XFS_FSB_TO_AGBNO(mp, fsbno);
- if (agno == end_agno)
- ri_high.rm_startblock = XFS_FSB_TO_AGBNO(mp, end_fsbno);
+ if (xg->xg_gno == start_gno)
+ ri_low.rm_startblock =
+ xfs_fsb_to_gbno(mp, start_bno, type);
+ if (xg->xg_gno == end_gno)
+ ri_high.rm_startblock =
+ xfs_fsb_to_gbno(mp, end_bno, type);
- agf = agf_bp->b_addr;
- range_agend = min(be32_to_cpu(agf->agf_length) - 1,
- ri_high.rm_startblock);
notify.startblock = ri_low.rm_startblock;
- notify.blockcount = range_agend + 1 - ri_low.rm_startblock;
+ notify.blockcount = min(xg->xg_block_count,
+ ri_high.rm_startblock + 1) -
+ ri_low.rm_startblock;
error = xfs_rmap_query_range(cur, &ri_low, &ri_high,
xfs_dax_failure_fn, &notify);
xfs_btree_del_cursor(cur, error);
- xfs_trans_brelse(tp, agf_bp);
- xfs_perag_put(pag);
- if (error)
+ if (agf_bp)
+ xfs_trans_brelse(tp, agf_bp);
+ if (rtg)
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_RMAP);
+ if (error) {
+ xfs_group_put(xg);
break;
-
- fsbno = XFS_AGB_TO_FSB(mp, agno + 1, 0);
+ }
}
xfs_trans_cancel(tp);
@@ -263,67 +370,20 @@ xfs_dax_notify_failure(
int mf_flags)
{
struct xfs_mount *mp = dax_holder(dax_dev);
- u64 ddev_start;
- u64 ddev_end;
if (!(mp->m_super->s_flags & SB_BORN)) {
xfs_warn(mp, "filesystem is not ready for notify_failure()!");
return -EIO;
}
- if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) {
- xfs_debug(mp,
- "notify_failure() not supported on realtime device!");
- return -EOPNOTSUPP;
- }
-
- if (mp->m_logdev_targp && mp->m_logdev_targp->bt_daxdev == dax_dev &&
- mp->m_logdev_targp != mp->m_ddev_targp) {
- /*
- * In the pre-remove case the failure notification is attempting
- * to trigger a force unmount. The expectation is that the
- * device is still present, but its removal is in progress and
- * can not be cancelled, proceed with accessing the log device.
- */
- if (mf_flags & MF_MEM_PRE_REMOVE)
- return 0;
- xfs_err(mp, "ondisk log corrupt, shutting down fs!");
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
- return -EFSCORRUPTED;
- }
-
- if (!xfs_has_rmapbt(mp)) {
- xfs_debug(mp, "notify_failure() needs rmapbt enabled!");
- return -EOPNOTSUPP;
- }
-
- ddev_start = mp->m_ddev_targp->bt_dax_part_off;
- ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;
-
- /* Notify failure on the whole device. */
- if (offset == 0 && len == U64_MAX) {
- offset = ddev_start;
- len = bdev_nr_bytes(mp->m_ddev_targp->bt_bdev);
- }
-
- /* Ignore the range out of filesystem area */
- if (offset + len - 1 < ddev_start)
- return -ENXIO;
- if (offset > ddev_end)
- return -ENXIO;
-
- /* Calculate the real range when it touches the boundary */
- if (offset > ddev_start)
- offset -= ddev_start;
- else {
- len -= ddev_start - offset;
- offset = 0;
+ if (mp->m_logdev_targp != mp->m_ddev_targp &&
+ mp->m_logdev_targp->bt_daxdev == dax_dev) {
+ return xfs_dax_notify_logdev_failure(mp, offset, len, mf_flags);
}
- if (offset + len - 1 > ddev_end)
- len = ddev_end - offset + 1;
- return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),
- mf_flags);
+ return xfs_dax_notify_dev_failure(mp, offset, len, mf_flags,
+ (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) ?
+ XG_TYPE_RTG : XG_TYPE_AG);
}
const struct dax_holder_operations xfs_dax_holder_operations = {
diff --git a/fs/xfs/xfs_notify_failure.h b/fs/xfs/xfs_notify_failure.h
new file mode 100644
index 000000000000..8d08ec29dd29
--- /dev/null
+++ b/fs/xfs/xfs_notify_failure.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_NOTIFY_FAILURE_H__
+#define __XFS_NOTIFY_FAILURE_H__
+
+extern const struct dax_holder_operations xfs_dax_holder_operations;
+
+#endif /* __XFS_NOTIFY_FAILURE_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 3abab5fb593e..e1ba5af6250f 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -230,10 +230,10 @@ xfs_qm_unmount_rt(
if (!rtg)
return;
- if (rtg->rtg_inodes[XFS_RTGI_BITMAP])
- xfs_qm_dqdetach(rtg->rtg_inodes[XFS_RTGI_BITMAP]);
- if (rtg->rtg_inodes[XFS_RTGI_SUMMARY])
- xfs_qm_dqdetach(rtg->rtg_inodes[XFS_RTGI_SUMMARY]);
+ if (rtg_bitmap(rtg))
+ xfs_qm_dqdetach(rtg_bitmap(rtg));
+ if (rtg_summary(rtg))
+ xfs_qm_dqdetach(rtg_summary(rtg));
xfs_rtgroup_rele(rtg);
}
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 76b3c0ed3b4f..89decffe76c8 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -23,6 +23,7 @@
#include "xfs_ag.h"
#include "xfs_btree.h"
#include "xfs_trace.h"
+#include "xfs_rtgroup.h"
struct kmem_cache *xfs_rui_cache;
struct kmem_cache *xfs_rud_cache;
@@ -94,7 +95,9 @@ xfs_rui_item_format(
ASSERT(atomic_read(&ruip->rui_next_extent) ==
ruip->rui_format.rui_nextents);
- ruip->rui_format.rui_type = XFS_LI_RUI;
+ ASSERT(lip->li_type == XFS_LI_RUI || lip->li_type == XFS_LI_RUI_RT);
+
+ ruip->rui_format.rui_type = lip->li_type;
ruip->rui_format.rui_size = 1;
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUI_FORMAT, &ruip->rui_format,
@@ -137,12 +140,15 @@ xfs_rui_item_release(
STATIC struct xfs_rui_log_item *
xfs_rui_init(
struct xfs_mount *mp,
+ unsigned short item_type,
uint nextents)
{
struct xfs_rui_log_item *ruip;
ASSERT(nextents > 0);
+ ASSERT(item_type == XFS_LI_RUI || item_type == XFS_LI_RUI_RT);
+
if (nextents > XFS_RUI_MAX_FAST_EXTENTS)
ruip = kzalloc(xfs_rui_log_item_sizeof(nextents),
GFP_KERNEL | __GFP_NOFAIL);
@@ -150,7 +156,7 @@ xfs_rui_init(
ruip = kmem_cache_zalloc(xfs_rui_cache,
GFP_KERNEL | __GFP_NOFAIL);
- xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops);
+ xfs_log_item_init(mp, &ruip->rui_item, item_type, &xfs_rui_item_ops);
ruip->rui_format.rui_nextents = nextents;
ruip->rui_format.rui_id = (uintptr_t)(void *)ruip;
atomic_set(&ruip->rui_next_extent, 0);
@@ -189,7 +195,9 @@ xfs_rud_item_format(
struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
struct xfs_log_iovec *vecp = NULL;
- rudp->rud_format.rud_type = XFS_LI_RUD;
+ ASSERT(lip->li_type == XFS_LI_RUD || lip->li_type == XFS_LI_RUD_RT);
+
+ rudp->rud_format.rud_type = lip->li_type;
rudp->rud_format.rud_size = 1;
xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_RUD_FORMAT, &rudp->rud_format,
@@ -233,6 +241,14 @@ static inline struct xfs_rmap_intent *ri_entry(const struct list_head *e)
return list_entry(e, struct xfs_rmap_intent, ri_list);
}
+static inline bool
+xfs_rui_item_isrt(const struct xfs_log_item *lip)
+{
+ ASSERT(lip->li_type == XFS_LI_RUI || lip->li_type == XFS_LI_RUI_RT);
+
+ return lip->li_type == XFS_LI_RUI_RT;
+}
+
/* Sort rmap intents by AG. */
static int
xfs_rmap_update_diff_items(
@@ -305,18 +321,20 @@ xfs_rmap_update_log_item(
}
static struct xfs_log_item *
-xfs_rmap_update_create_intent(
+__xfs_rmap_update_create_intent(
struct xfs_trans *tp,
struct list_head *items,
unsigned int count,
- bool sort)
+ bool sort,
+ unsigned short item_type)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_rui_log_item *ruip = xfs_rui_init(mp, count);
+ struct xfs_rui_log_item *ruip;
struct xfs_rmap_intent *ri;
ASSERT(count > 0);
+ ruip = xfs_rui_init(mp, item_type, count);
if (sort)
list_sort(mp, items, xfs_rmap_update_diff_items);
list_for_each_entry(ri, items, ri_list)
@@ -324,6 +342,23 @@ xfs_rmap_update_create_intent(
return &ruip->rui_item;
}
+static struct xfs_log_item *
+xfs_rmap_update_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
+{
+ return __xfs_rmap_update_create_intent(tp, items, count, sort,
+ XFS_LI_RUI);
+}
+
+static inline unsigned short
+xfs_rud_type_from_rui(const struct xfs_rui_log_item *ruip)
+{
+ return xfs_rui_item_isrt(&ruip->rui_item) ? XFS_LI_RUD_RT : XFS_LI_RUD;
+}
+
/* Get an RUD so we can process all the deferred rmap updates. */
static struct xfs_log_item *
xfs_rmap_update_create_done(
@@ -335,8 +370,8 @@ xfs_rmap_update_create_done(
struct xfs_rud_log_item *rudp;
rudp = kmem_cache_zalloc(xfs_rud_cache, GFP_KERNEL | __GFP_NOFAIL);
- xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD,
- &xfs_rud_item_ops);
+ xfs_log_item_init(tp->t_mountp, &rudp->rud_item,
+ xfs_rud_type_from_rui(ruip), &xfs_rud_item_ops);
rudp->rud_ruip = ruip;
rudp->rud_format.rud_rui_id = ruip->rui_format.rui_id;
@@ -351,11 +386,20 @@ xfs_rmap_defer_add(
{
struct xfs_mount *mp = tp->t_mountp;
- trace_xfs_rmap_defer(mp, ri);
-
+ /*
+ * Deferred rmap updates for the realtime and data sections must use
+ * separate transactions to finish deferred work because updates to
+ * realtime metadata files can lock AGFs to allocate btree blocks and
+ * we don't want that mixing with the AGF locks taken to finish data
+ * section updates.
+ */
ri->ri_group = xfs_group_intent_get(mp, ri->ri_bmap.br_startblock,
- XG_TYPE_AG);
- xfs_defer_add(tp, &ri->ri_list, &xfs_rmap_update_defer_type);
+ ri->ri_realtime ? XG_TYPE_RTG : XG_TYPE_AG);
+
+ trace_xfs_rmap_defer(mp, ri);
+ xfs_defer_add(tp, &ri->ri_list, ri->ri_realtime ?
+ &xfs_rtrmap_update_defer_type :
+ &xfs_rmap_update_defer_type);
}
/* Cancel a deferred rmap update. */
@@ -415,6 +459,7 @@ xfs_rmap_update_abort_intent(
static inline bool
xfs_rui_validate_map(
struct xfs_mount *mp,
+ bool isrt,
struct xfs_map_extent *map)
{
if (!xfs_has_rmapbt(mp))
@@ -444,6 +489,9 @@ xfs_rui_validate_map(
if (!xfs_verify_fileext(mp, map->me_startoff, map->me_len))
return false;
+ if (isrt)
+ return xfs_verify_rtbext(mp, map->me_startblock, map->me_len);
+
return xfs_verify_fsbext(mp, map->me_startblock, map->me_len);
}
@@ -451,6 +499,7 @@ static inline void
xfs_rui_recover_work(
struct xfs_mount *mp,
struct xfs_defer_pending *dfp,
+ bool isrt,
const struct xfs_map_extent *map)
{
struct xfs_rmap_intent *ri;
@@ -495,7 +544,9 @@ xfs_rui_recover_work(
ri->ri_bmap.br_blockcount = map->me_len;
ri->ri_bmap.br_state = (map->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ?
XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- ri->ri_group = xfs_group_intent_get(mp, map->me_startblock, XG_TYPE_AG);
+ ri->ri_group = xfs_group_intent_get(mp, map->me_startblock,
+ isrt ? XG_TYPE_RTG : XG_TYPE_AG);
+ ri->ri_realtime = isrt;
xfs_defer_add_item(dfp, &ri->ri_list);
}
@@ -514,6 +565,7 @@ xfs_rmap_recover_work(
struct xfs_rui_log_item *ruip = RUI_ITEM(lip);
struct xfs_trans *tp;
struct xfs_mount *mp = lip->li_log->l_mp;
+ bool isrt = xfs_rui_item_isrt(lip);
int i;
int error = 0;
@@ -523,7 +575,7 @@ xfs_rmap_recover_work(
* just toss the RUI.
*/
for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
- if (!xfs_rui_validate_map(mp,
+ if (!xfs_rui_validate_map(mp, isrt,
&ruip->rui_format.rui_extents[i])) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
&ruip->rui_format,
@@ -531,7 +583,8 @@ xfs_rmap_recover_work(
return -EFSCORRUPTED;
}
- xfs_rui_recover_work(mp, dfp, &ruip->rui_format.rui_extents[i]);
+ xfs_rui_recover_work(mp, dfp, isrt,
+ &ruip->rui_format.rui_extents[i]);
}
resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
@@ -566,10 +619,13 @@ xfs_rmap_relog_intent(
struct xfs_map_extent *map;
unsigned int count;
+ ASSERT(intent->li_type == XFS_LI_RUI ||
+ intent->li_type == XFS_LI_RUI_RT);
+
count = RUI_ITEM(intent)->rui_format.rui_nextents;
map = RUI_ITEM(intent)->rui_format.rui_extents;
- ruip = xfs_rui_init(tp->t_mountp, count);
+ ruip = xfs_rui_init(tp->t_mountp, intent->li_type, count);
memcpy(ruip->rui_format.rui_extents, map, count * sizeof(*map));
atomic_set(&ruip->rui_next_extent, count);
@@ -589,6 +645,47 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
.relog_intent = xfs_rmap_relog_intent,
};
+#ifdef CONFIG_XFS_RT
+static struct xfs_log_item *
+xfs_rtrmap_update_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
+{
+ return __xfs_rmap_update_create_intent(tp, items, count, sort,
+ XFS_LI_RUI_RT);
+}
+
+/* Clean up after calling xfs_rmap_finish_one. */
+STATIC void
+xfs_rtrmap_finish_one_cleanup(
+ struct xfs_trans *tp,
+ struct xfs_btree_cur *rcur,
+ int error)
+{
+ if (rcur)
+ xfs_btree_del_cursor(rcur, error);
+}
+
+const struct xfs_defer_op_type xfs_rtrmap_update_defer_type = {
+ .name = "rtrmap",
+ .max_items = XFS_RUI_MAX_FAST_EXTENTS,
+ .create_intent = xfs_rtrmap_update_create_intent,
+ .abort_intent = xfs_rmap_update_abort_intent,
+ .create_done = xfs_rmap_update_create_done,
+ .finish_item = xfs_rmap_update_finish_item,
+ .finish_cleanup = xfs_rtrmap_finish_one_cleanup,
+ .cancel_item = xfs_rmap_update_cancel_item,
+ .recover_work = xfs_rmap_recover_work,
+ .relog_intent = xfs_rmap_relog_intent,
+};
+#else
+const struct xfs_defer_op_type xfs_rtrmap_update_defer_type = {
+ .name = "rtrmap",
+};
+#endif
+
STATIC bool
xfs_rui_item_match(
struct xfs_log_item *lip,
@@ -654,7 +751,7 @@ xlog_recover_rui_commit_pass2(
return -EFSCORRUPTED;
}
- ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
+ ruip = xfs_rui_init(mp, ITEM_TYPE(item), rui_formatp->rui_nextents);
xfs_rui_copy_format(&ruip->rui_format, rui_formatp);
atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
@@ -668,6 +765,61 @@ const struct xlog_recover_item_ops xlog_rui_item_ops = {
.commit_pass2 = xlog_recover_rui_commit_pass2,
};
+#ifdef CONFIG_XFS_RT
+STATIC int
+xlog_recover_rtrui_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_rui_log_item *ruip;
+ struct xfs_rui_log_format *rui_formatp;
+ size_t len;
+
+ rui_formatp = item->ri_buf[0].i_addr;
+
+ if (item->ri_buf[0].i_len < xfs_rui_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ len = xfs_rui_log_format_sizeof(rui_formatp->rui_nextents);
+ if (item->ri_buf[0].i_len != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ ruip = xfs_rui_init(mp, ITEM_TYPE(item), rui_formatp->rui_nextents);
+ xfs_rui_copy_format(&ruip->rui_format, rui_formatp);
+ atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
+
+ xlog_recover_intent_item(log, &ruip->rui_item, lsn,
+ &xfs_rtrmap_update_defer_type);
+ return 0;
+}
+#else
+STATIC int
+xlog_recover_rtrui_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+}
+#endif
+
+const struct xlog_recover_item_ops xlog_rtrui_item_ops = {
+ .item_type = XFS_LI_RUI_RT,
+ .commit_pass2 = xlog_recover_rtrui_commit_pass2,
+};
+
/*
* This routine is called when an RUD format structure is found in a committed
* transaction in the log. Its purpose is to cancel the corresponding RUI if it
@@ -699,3 +851,33 @@ const struct xlog_recover_item_ops xlog_rud_item_ops = {
.item_type = XFS_LI_RUD,
.commit_pass2 = xlog_recover_rud_commit_pass2,
};
+
+#ifdef CONFIG_XFS_RT
+STATIC int
+xlog_recover_rtrud_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_rud_log_format *rud_formatp;
+
+ rud_formatp = item->ri_buf[0].i_addr;
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_rud_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ rud_formatp, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ xlog_recover_release_intent(log, XFS_LI_RUI_RT,
+ rud_formatp->rud_rui_id);
+ return 0;
+}
+#else
+# define xlog_recover_rtrud_commit_pass2 xlog_recover_rtrui_commit_pass2
+#endif
+
+const struct xlog_recover_item_ops xlog_rtrud_item_ops = {
+ .item_type = XFS_LI_RUD_RT,
+ .commit_pass2 = xlog_recover_rtrud_commit_pass2,
+};
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 5128c5ad72f5..a69967f9d88e 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -22,6 +22,7 @@
#include "xfs_rtalloc.h"
#include "xfs_sb.h"
#include "xfs_rtbitmap.h"
+#include "xfs_rtrmap_btree.h"
#include "xfs_quota.h"
#include "xfs_log_priv.h"
#include "xfs_health.h"
@@ -845,6 +846,13 @@ xfs_growfs_rt_init_rtsb(
mp->m_rtsb_bp = rtsb_bp;
error = xfs_bwrite(rtsb_bp);
xfs_buf_unlock(rtsb_bp);
+ if (error)
+ return error;
+
+ /* Initialize the rtrmap to reflect the rtsb. */
+ if (rtg_rmap(args->rtg) != NULL)
+ error = xfs_rtrmapbt_init_rtsb(nargs->mp, args->rtg, args->tp);
+
return error;
}
@@ -856,8 +864,8 @@ xfs_growfs_rt_bmblock(
xfs_fileoff_t bmbno)
{
struct xfs_mount *mp = rtg_mount(rtg);
- struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
- struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY];
+ struct xfs_inode *rbmip = rtg_bitmap(rtg);
+ struct xfs_inode *rsumip = rtg_summary(rtg);
struct xfs_rtalloc_args args = {
.mp = mp,
.rtg = rtg,
@@ -893,8 +901,9 @@ xfs_growfs_rt_bmblock(
goto out_free;
nargs.tp = args.tp;
- xfs_rtgroup_lock(args.rtg, XFS_RTGLOCK_BITMAP);
- xfs_rtgroup_trans_join(args.tp, args.rtg, XFS_RTGLOCK_BITMAP);
+ xfs_rtgroup_lock(args.rtg, XFS_RTGLOCK_BITMAP | XFS_RTGLOCK_RMAP);
+ xfs_rtgroup_trans_join(args.tp, args.rtg,
+ XFS_RTGLOCK_BITMAP | XFS_RTGLOCK_RMAP);
/*
* Update the bitmap inode's size ondisk and incore. We need to update
@@ -980,9 +989,11 @@ xfs_growfs_rt_bmblock(
goto out_free;
/*
- * Ensure the mount RT feature flag is now set.
+ * Ensure the mount RT feature flag is now set, and compute new
+ * maxlevels for rt btrees.
*/
mp->m_features |= XFS_FEAT_REALTIME;
+ xfs_rtrmapbt_compute_maxlevels(mp);
kfree(nmp);
return 0;
@@ -1041,8 +1052,8 @@ xfs_growfs_rt_alloc_blocks(
xfs_extlen_t *nrbmblocks)
{
struct xfs_mount *mp = rtg_mount(rtg);
- struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
- struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY];
+ struct xfs_inode *rbmip = rtg_bitmap(rtg);
+ struct xfs_inode *rsumip = rtg_summary(rtg);
xfs_extlen_t orbmblocks = 0;
xfs_extlen_t orsumblocks = 0;
struct xfs_mount *nmp;
@@ -1150,29 +1161,37 @@ out_rele:
return error;
}
-static int
+int
xfs_growfs_check_rtgeom(
const struct xfs_mount *mp,
+ xfs_rfsblock_t dblocks,
xfs_rfsblock_t rblocks,
xfs_extlen_t rextsize)
{
+ xfs_extlen_t min_logfsbs;
struct xfs_mount *nmp;
- int error = 0;
nmp = xfs_growfs_rt_alloc_fake_mount(mp, rblocks, rextsize);
if (!nmp)
return -ENOMEM;
+ nmp->m_sb.sb_dblocks = dblocks;
+
+ xfs_rtrmapbt_compute_maxlevels(nmp);
+ xfs_trans_resv_calc(nmp, M_RES(nmp));
/*
* New summary size can't be more than half the size of the log. This
* prevents us from getting a log overflow, since we'll log basically
* the whole summary file at once.
*/
- if (nmp->m_rsumblocks > (mp->m_sb.sb_logblocks >> 1))
- error = -EINVAL;
+ min_logfsbs = min_t(xfs_extlen_t, xfs_log_calc_minimum_size(nmp),
+ nmp->m_rsumblocks * 2);
kfree(nmp);
- return error;
+
+ if (min_logfsbs > mp->m_sb.sb_logblocks)
+ return -EINVAL;
+ return 0;
}
/*
@@ -1263,11 +1282,15 @@ xfs_growfs_rt(
XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE)
goto out_unlock;
- /* Unsupported realtime features. */
+ /* Check for features supported only on rtgroups filesystems. */
error = -EOPNOTSUPP;
- if (xfs_has_quota(mp) && !xfs_has_rtgroups(mp))
- goto out_unlock;
- if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp))
+ if (!xfs_has_rtgroups(mp)) {
+ if (xfs_has_rmapbt(mp))
+ goto out_unlock;
+ if (xfs_has_quota(mp))
+ goto out_unlock;
+ }
+ if (xfs_has_reflink(mp))
goto out_unlock;
error = xfs_sb_validate_fsb_count(&mp->m_sb, in->newblocks);
@@ -1291,7 +1314,8 @@ xfs_growfs_rt(
goto out_unlock;
/* Make sure the new fs size won't cause problems with the log. */
- error = xfs_growfs_check_rtgeom(mp, in->newblocks, in->extsize);
+ error = xfs_growfs_check_rtgeom(mp, mp->m_sb.sb_dblocks, in->newblocks,
+ in->extsize);
if (error)
goto out_unlock;
@@ -1498,6 +1522,13 @@ void
xfs_rt_resv_free(
struct xfs_mount *mp)
{
+ struct xfs_rtgroup *rtg = NULL;
+ unsigned int i;
+
+ while ((rtg = xfs_rtgroup_next(mp, rtg))) {
+ for (i = 0; i < XFS_RTGI_MAX; i++)
+ xfs_metafile_resv_free(rtg->rtg_inodes[i]);
+ }
}
/* Reserve space for rt metadata inodes' space expansion. */
@@ -1505,7 +1536,20 @@ int
xfs_rt_resv_init(
struct xfs_mount *mp)
{
- return 0;
+ struct xfs_rtgroup *rtg = NULL;
+ xfs_filblks_t ask;
+ int error = 0;
+
+ while ((rtg = xfs_rtgroup_next(mp, rtg))) {
+ int err2;
+
+ ask = xfs_rtrmapbt_calc_reserves(mp);
+ err2 = xfs_metafile_resv_init(rtg_rmap(rtg), ask);
+ if (err2 && !error)
+ error = err2;
+ }
+
+ return error;
}
/*
@@ -1622,7 +1666,7 @@ xfs_rtpick_extent(
xfs_rtxlen_t len) /* allocation length (rtextents) */
{
struct xfs_mount *mp = rtg_mount(rtg);
- struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
+ struct xfs_inode *rbmip = rtg_bitmap(rtg);
xfs_rtxnum_t b = 0; /* result rtext */
int log2; /* log of sequence number */
uint64_t resid; /* residual after log removed */
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index d87523e6a550..9044f7226ab6 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -46,6 +46,8 @@ xfs_growfs_rt(
xfs_growfs_rt_t *in); /* user supplied growfs struct */
int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp);
+int xfs_growfs_check_rtgeom(const struct xfs_mount *mp, xfs_rfsblock_t dblocks,
+ xfs_rfsblock_t rblocks, xfs_agblock_t rextsize);
#else
# define xfs_growfs_rt(mp,in) (-ENOSYS)
# define xfs_rtalloc_reinit_frextents(m) (0)
@@ -65,6 +67,14 @@ xfs_rtmount_init(
# define xfs_rtunmount_inodes(m)
# define xfs_rt_resv_free(mp) ((void)0)
# define xfs_rt_resv_init(mp) (0)
+
+static inline int
+xfs_growfs_check_rtgeom(const struct xfs_mount *mp,
+ xfs_rfsblock_t dblocks, xfs_rfsblock_t rblocks,
+ xfs_extlen_t rextsize)
+{
+ return 0;
+}
#endif /* CONFIG_XFS_RT */
#endif /* __XFS_RTALLOC_H__ */
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index ffb52725c2a8..b7f2988bc03b 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -52,7 +52,9 @@ int xfs_stats_format(struct xfsstats __percpu *stats, char *buf)
{ "rmapbt", xfsstats_offset(xs_refcbt_2) },
{ "refcntbt", xfsstats_offset(xs_rmap_mem_2) },
{ "rmapbt_mem", xfsstats_offset(xs_rcbag_2) },
- { "rcbagbt", xfsstats_offset(xs_qm_dqreclaims)},
+ { "rcbagbt", xfsstats_offset(xs_rtrmap_2) },
+ { "rtrmapbt", xfsstats_offset(xs_rtrmap_mem_2)},
+ { "rtrmapbt_mem", xfsstats_offset(xs_qm_dqreclaims)},
/* we print both series of quota information together */
{ "qm", xfsstats_offset(xs_xstrat_bytes)},
};
diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h
index a61fb56ed2e6..9c47de5dff2d 100644
--- a/fs/xfs/xfs_stats.h
+++ b/fs/xfs/xfs_stats.h
@@ -127,6 +127,8 @@ struct __xfsstats {
uint32_t xs_refcbt_2[__XBTS_MAX];
uint32_t xs_rmap_mem_2[__XBTS_MAX];
uint32_t xs_rcbag_2[__XBTS_MAX];
+ uint32_t xs_rtrmap_2[__XBTS_MAX];
+ uint32_t xs_rtrmap_mem_2[__XBTS_MAX];
uint32_t xs_qm_dqreclaims;
uint32_t xs_qm_dqreclaim_misses;
uint32_t xs_qm_dquot_dups;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 394fdf3bb535..ecd5a9f444d8 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1767,12 +1767,6 @@ xfs_fs_fill_super(
}
}
- if (xfs_has_rmapbt(mp) && mp->m_sb.sb_rblocks) {
- xfs_alert(mp,
- "reverse mapping btree not compatible with realtime device!");
- error = -EINVAL;
- goto out_filestream_unmount;
- }
if (xfs_has_exchange_range(mp))
xfs_warn_experimental(mp, XFS_EXPERIMENTAL_EXCHRANGE);
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 302e6e5d6c7e..c0e85c1e42f2 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -92,7 +92,6 @@ extern xfs_agnumber_t xfs_set_inode_alloc(struct xfs_mount *,
extern const struct export_operations xfs_export_operations;
extern const struct quotactl_ops xfs_quotactl_operations;
-extern const struct dax_holder_operations xfs_dax_holder_operations;
extern void xfs_reinit_percpu_counters(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index cbda663fe6e8..84cdc145e2d9 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -14,11 +14,15 @@
* ino: filesystem inode number
*
* agbno: per-AG block number in fs blocks
+ * rgbno: per-rtgroup block number in fs blocks
* startblock: physical block number for file mappings. This is either a
* segmented fsblock for data device mappings, or a rfsblock
* for realtime device mappings
* fsbcount: number of blocks in an extent, in fs blocks
*
+ * gbno: generic allocation group block number. This is an agbno for
+ * space in a per-AG or a rgbno for space in a realtime group.
+ *
* daddr: physical block number in 512b blocks
* bbcount: number of blocks in a physical extent, in 512b blocks
*
@@ -2295,6 +2299,7 @@ TRACE_DEFINE_ENUM(XFS_DINODE_FMT_LOCAL);
TRACE_DEFINE_ENUM(XFS_DINODE_FMT_EXTENTS);
TRACE_DEFINE_ENUM(XFS_DINODE_FMT_BTREE);
TRACE_DEFINE_ENUM(XFS_DINODE_FMT_UUID);
+TRACE_DEFINE_ENUM(XFS_DINODE_FMT_META_BTREE);
DECLARE_EVENT_CLASS(xfs_swap_extent_class,
TP_PROTO(struct xfs_inode *ip, int which),
@@ -2918,13 +2923,14 @@ DEFINE_DEFER_PENDING_ITEM_EVENT(xfs_defer_finish_item);
/* rmap tracepoints */
DECLARE_EVENT_CLASS(xfs_rmap_class,
TP_PROTO(struct xfs_btree_cur *cur,
- xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten,
+ xfs_agblock_t gbno, xfs_extlen_t len, bool unwritten,
const struct xfs_owner_info *oinfo),
- TP_ARGS(cur, agbno, len, unwritten, oinfo),
+ TP_ARGS(cur, gbno, len, unwritten, oinfo),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, agbno)
+ __field(xfs_agblock_t, gbno)
__field(xfs_extlen_t, len)
__field(uint64_t, owner)
__field(uint64_t, offset)
@@ -2932,8 +2938,9 @@ DECLARE_EVENT_CLASS(xfs_rmap_class,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->type = cur->bc_group->xg_type;
__entry->agno = cur->bc_group->xg_gno;
- __entry->agbno = agbno;
+ __entry->gbno = gbno;
__entry->len = len;
__entry->owner = oinfo->oi_owner;
__entry->offset = oinfo->oi_offset;
@@ -2941,10 +2948,11 @@ DECLARE_EVENT_CLASS(xfs_rmap_class,
if (unwritten)
__entry->flags |= XFS_RMAP_UNWRITTEN;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%lx",
+ TP_printk("dev %d:%d %sno 0x%x gbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%lx",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
- __entry->agbno,
+ __entry->gbno,
__entry->len,
__entry->owner,
__entry->offset,
@@ -2953,9 +2961,9 @@ DECLARE_EVENT_CLASS(xfs_rmap_class,
#define DEFINE_RMAP_EVENT(name) \
DEFINE_EVENT(xfs_rmap_class, name, \
TP_PROTO(struct xfs_btree_cur *cur, \
- xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, \
+ xfs_agblock_t gbno, xfs_extlen_t len, bool unwritten, \
const struct xfs_owner_info *oinfo), \
- TP_ARGS(cur, agbno, len, unwritten, oinfo))
+ TP_ARGS(cur, gbno, len, unwritten, oinfo))
/* btree cursor error/%ip tracepoint class */
DECLARE_EVENT_CLASS(xfs_btree_error_class,
@@ -3018,47 +3026,36 @@ TRACE_EVENT(xfs_rmap_convert_state,
TP_ARGS(cur, state, caller_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
- __field(xfs_ino_t, ino)
__field(int, state)
__field(unsigned long, caller_ip)
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
- switch (cur->bc_ops->type) {
- case XFS_BTREE_TYPE_INODE:
- __entry->agno = 0;
- __entry->ino = cur->bc_ino.ip->i_ino;
- break;
- case XFS_BTREE_TYPE_AG:
- __entry->agno = cur->bc_group->xg_gno;
- __entry->ino = 0;
- break;
- case XFS_BTREE_TYPE_MEM:
- __entry->agno = 0;
- __entry->ino = 0;
- break;
- }
+ __entry->type = cur->bc_group->xg_type;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->state = state;
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d agno 0x%x ino 0x%llx state %d caller %pS",
+ TP_printk("dev %d:%d %sno 0x%x state %d caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
- __entry->ino,
__entry->state,
(char *)__entry->caller_ip)
);
DECLARE_EVENT_CLASS(xfs_rmapbt_class,
TP_PROTO(struct xfs_btree_cur *cur,
- xfs_agblock_t agbno, xfs_extlen_t len,
+ xfs_agblock_t gbno, xfs_extlen_t len,
uint64_t owner, uint64_t offset, unsigned int flags),
- TP_ARGS(cur, agbno, len, owner, offset, flags),
+ TP_ARGS(cur, gbno, len, owner, offset, flags),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, agbno)
+ __field(xfs_agblock_t, gbno)
__field(xfs_extlen_t, len)
__field(uint64_t, owner)
__field(uint64_t, offset)
@@ -3066,17 +3063,19 @@ DECLARE_EVENT_CLASS(xfs_rmapbt_class,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
+ __entry->type = cur->bc_group->xg_type;
__entry->agno = cur->bc_group->xg_gno;
- __entry->agbno = agbno;
+ __entry->gbno = gbno;
__entry->len = len;
__entry->owner = owner;
__entry->offset = offset;
__entry->flags = flags;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x gbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
- __entry->agbno,
+ __entry->gbno,
__entry->len,
__entry->owner,
__entry->offset,
@@ -3085,9 +3084,9 @@ DECLARE_EVENT_CLASS(xfs_rmapbt_class,
#define DEFINE_RMAPBT_EVENT(name) \
DEFINE_EVENT(xfs_rmapbt_class, name, \
TP_PROTO(struct xfs_btree_cur *cur, \
- xfs_agblock_t agbno, xfs_extlen_t len, \
+ xfs_agblock_t gbno, xfs_extlen_t len, \
uint64_t owner, uint64_t offset, unsigned int flags), \
- TP_ARGS(cur, agbno, len, owner, offset, flags))
+ TP_ARGS(cur, gbno, len, owner, offset, flags))
TRACE_DEFINE_ENUM(XFS_RMAP_MAP);
TRACE_DEFINE_ENUM(XFS_RMAP_MAP_SHARED);
@@ -3104,8 +3103,9 @@ DECLARE_EVENT_CLASS(xfs_rmap_deferred_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned long long, owner)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
- __field(xfs_agblock_t, agbno)
+ __field(xfs_agblock_t, gbno)
__field(int, whichfork)
__field(xfs_fileoff_t, l_loff)
__field(xfs_filblks_t, l_len)
@@ -3114,9 +3114,11 @@ DECLARE_EVENT_CLASS(xfs_rmap_deferred_class,
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
- __entry->agno = XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock);
- __entry->agbno = XFS_FSB_TO_AGBNO(mp,
- ri->ri_bmap.br_startblock);
+ __entry->type = ri->ri_group->xg_type;
+ __entry->agno = ri->ri_group->xg_gno;
+ __entry->gbno = xfs_fsb_to_gbno(mp,
+ ri->ri_bmap.br_startblock,
+ ri->ri_group->xg_type);
__entry->owner = ri->ri_owner;
__entry->whichfork = ri->ri_whichfork;
__entry->l_loff = ri->ri_bmap.br_startoff;
@@ -3124,11 +3126,12 @@ DECLARE_EVENT_CLASS(xfs_rmap_deferred_class,
__entry->l_state = ri->ri_bmap.br_state;
__entry->op = ri->ri_type;
),
- TP_printk("dev %d:%d op %s agno 0x%x agbno 0x%x owner 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d",
+ TP_printk("dev %d:%d op %s %sno 0x%x gbno 0x%x owner 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__print_symbolic(__entry->op, XFS_RMAP_INTENT_STRINGS),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
- __entry->agbno,
+ __entry->gbno,
__entry->owner,
__print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
__entry->l_loff,
@@ -3993,7 +3996,7 @@ TRACE_EVENT(xfs_fsmap_mapping,
__entry->offset = frec->offset;
__entry->flags = frec->rm_flags;
),
- TP_printk("dev %d:%d keydev %d:%d agno 0x%x rmapbno 0x%x start_daddr 0x%llx len_daddr 0x%llx owner 0x%llx fileoff 0x%llx flags 0x%x",
+ TP_printk("dev %d:%d keydev %d:%d agno 0x%x gbno 0x%x start_daddr 0x%llx len_daddr 0x%llx owner 0x%llx fileoff 0x%llx flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
MAJOR(__entry->keydev), MINOR(__entry->keydev),
__entry->agno,
@@ -5619,6 +5622,27 @@ DEFINE_METAFILE_RESV_EVENT(xfs_metafile_resv_free_space);
DEFINE_METAFILE_RESV_EVENT(xfs_metafile_resv_critical);
DEFINE_INODE_ERROR_EVENT(xfs_metafile_resv_init_error);
+#ifdef CONFIG_XFS_RT
+TRACE_EVENT(xfs_growfs_check_rtgeom,
+ TP_PROTO(const struct xfs_mount *mp, unsigned int min_logfsbs),
+ TP_ARGS(mp, min_logfsbs),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned int, logblocks)
+ __field(unsigned int, min_logfsbs)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->logblocks = mp->m_sb.sb_logblocks;
+ __entry->min_logfsbs = min_logfsbs;
+ ),
+ TP_printk("dev %d:%d logblocks %u min_logfsbs %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->logblocks,
+ __entry->min_logfsbs)
+);
+#endif /* CONFIG_XFS_RT */
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH