From 9eef772f3a194f6841850e45dacdf4207ec7da84 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 15 Apr 2024 14:54:34 -0700 Subject: xfs: add an explicit owner field to xfs_da_args Add an explicit owner field to xfs_da_args, which will make it easier for online fsck to set the owner field of the temporary directory and xattr structures that it builds to repair damaged metadata. Note: I hopefully found all the xfs_da_args definitions by looking for automatic stack variable declarations and xfs_da_args.dp assignments: git grep -E '(args.*dp =|struct xfs_da_args[[:space:]]*[a-z0-9][a-z0-9]*)' Note that callers of xfs_attr_{get,set,change} can set the owner to zero (or leave it unset) to have the default set to args->dp. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 673a4b6d2e8d..74d769461443 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -264,6 +264,8 @@ xfs_attr_get( if (xfs_is_shutdown(args->dp->i_mount)) return -EIO; + if (!args->owner) + args->owner = args->dp->i_ino; args->geo = args->dp->i_mount->m_attr_geo; args->whichfork = XFS_ATTR_FORK; args->hashval = xfs_da_hashname(args->name, args->namelen); @@ -937,6 +939,8 @@ xfs_attr_set( if (error) return error; + if (!args->owner) + args->owner = args->dp->i_ino; args->geo = mp->m_attr_geo; args->whichfork = XFS_ATTR_FORK; args->hashval = xfs_da_hashname(args->name, args->namelen); -- cgit v1.2.3-59-g8ed1b From f4887fbc41dcb1560ec5da982ac7c6ad04b71de5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 15 Apr 2024 14:54:36 -0700 Subject: xfs: validate attr leaf buffer owners Create a leaf block header checking function to validate the owner field of xattr leaf blocks. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 10 ++++---- fs/xfs/libxfs/xfs_attr_leaf.c | 56 ++++++++++++++++++++++++++++++++++++------- fs/xfs/libxfs/xfs_attr_leaf.h | 4 +++- fs/xfs/libxfs/xfs_da_btree.c | 42 ++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_da_btree.h | 1 + fs/xfs/libxfs/xfs_exchmaps.c | 3 ++- fs/xfs/scrub/dabtree.c | 7 ++++++ fs/xfs/xfs_attr_list.c | 24 ++++++++++++++++--- 8 files changed, 128 insertions(+), 19 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 74d769461443..b3c9666cd011 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -649,8 +649,8 @@ xfs_attr_leaf_remove_attr( int forkoff; int error; - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, - &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, + args->blkno, &bp); if (error) return error; @@ -681,7 +681,7 @@ xfs_attr_leaf_shrink( if (!xfs_attr_is_leaf(dp)) return 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp); if (error) return error; @@ -1158,7 +1158,7 @@ xfs_attr_leaf_try_add( struct xfs_buf *bp; int error; - error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, &bp); if (error) return error; @@ -1206,7 +1206,7 @@ xfs_attr_leaf_hasname( { int error = 0; - error = xfs_attr3_leaf_read(args->trans, args->dp, 0, bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, 0, bp); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 8937c034b330..17ec5ff5a4e3 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -388,6 +388,27 @@ xfs_attr3_leaf_verify( return NULL; } +xfs_failaddr_t +xfs_attr3_leaf_header_check( + struct xfs_buf *bp, + xfs_ino_t owner) +{ + struct xfs_mount *mp = bp->b_mount; + + if (xfs_has_crc(mp)) { + struct xfs_attr3_leafblock *hdr3 = bp->b_addr; + + if (hdr3->hdr.info.hdr.magic != + cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) + return __this_address; + + if (be64_to_cpu(hdr3->hdr.info.owner) != owner) + return __this_address; + } + + return NULL; +} + static void xfs_attr3_leaf_write_verify( struct xfs_buf *bp) @@ -448,16 +469,30 @@ int xfs_attr3_leaf_read( struct xfs_trans *tp, struct xfs_inode *dp, + xfs_ino_t owner, xfs_dablk_t bno, struct xfs_buf **bpp) { + xfs_failaddr_t fa; int err; err = xfs_da_read_buf(tp, dp, bno, 0, bpp, XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops); - if (!err && tp && *bpp) + if (err || !(*bpp)) + return err; + + fa = xfs_attr3_leaf_header_check(*bpp, owner); + if (fa) { + __xfs_buf_mark_corrupt(*bpp, fa); + xfs_trans_brelse(tp, *bpp); + *bpp = NULL; + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); + return -EFSCORRUPTED; + } + + if (tp) xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF); - return err; + return 0; } /*======================================================================== @@ -1160,7 +1195,7 @@ xfs_attr3_leaf_to_node( error = xfs_da_grow_inode(args, &blkno); if (error) goto out; - error = xfs_attr3_leaf_read(args->trans, dp, 0, &bp1); + error = xfs_attr3_leaf_read(args->trans, dp, args->owner, 0, &bp1); if (error) goto out; @@ -1995,7 +2030,7 @@ xfs_attr3_leaf_toosmall( if (blkno == 0) continue; error = xfs_attr3_leaf_read(state->args->trans, state->args->dp, - blkno, &bp); + state->args->owner, blkno, &bp); if (error) return error; @@ -2717,7 +2752,8 @@ xfs_attr3_leaf_clearflag( /* * Set up the operation. */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, + args->blkno, &bp); if (error) return error; @@ -2781,7 +2817,8 @@ xfs_attr3_leaf_setflag( /* * Set up the operation. */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, + args->blkno, &bp); if (error) return error; @@ -2840,7 +2877,8 @@ xfs_attr3_leaf_flipflags( /* * Read the block containing the "old" attr */ - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp1); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, + args->blkno, &bp1); if (error) return error; @@ -2848,8 +2886,8 @@ xfs_attr3_leaf_flipflags( * Read the block containing the "new" attr, if it is different */ if (args->blkno2 != args->blkno) { - error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2, - &bp2); + error = xfs_attr3_leaf_read(args->trans, args->dp, args->owner, + args->blkno2, &bp2); if (error) return error; } else { diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 9b9948639c0f..bac219589896 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -98,12 +98,14 @@ int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp, struct xfs_buf *leaf2_bp); int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local); int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp, - xfs_dablk_t bno, struct xfs_buf **bpp); + xfs_ino_t owner, xfs_dablk_t bno, struct xfs_buf **bpp); void xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo, struct xfs_attr3_icleaf_hdr *to, struct xfs_attr_leafblock *from); void xfs_attr3_leaf_hdr_to_disk(struct xfs_da_geometry *geo, struct xfs_attr_leafblock *to, struct xfs_attr3_icleaf_hdr *from); +xfs_failaddr_t xfs_attr3_leaf_header_check(struct xfs_buf *bp, + xfs_ino_t owner); #endif /* __XFS_ATTR_LEAF_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c index 743f6421cc04..65eef8775187 100644 --- a/fs/xfs/libxfs/xfs_da_btree.c +++ b/fs/xfs/libxfs/xfs_da_btree.c @@ -252,6 +252,25 @@ xfs_da3_node_verify( return NULL; } +xfs_failaddr_t +xfs_da3_header_check( + struct xfs_buf *bp, + xfs_ino_t owner) +{ + struct xfs_mount *mp = bp->b_mount; + struct xfs_da_blkinfo *hdr = bp->b_addr; + + if (!xfs_has_crc(mp)) + return NULL; + + switch (hdr->magic) { + case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC): + return xfs_attr3_leaf_header_check(bp, owner); + } + + return NULL; +} + static void xfs_da3_node_write_verify( struct xfs_buf *bp) @@ -1591,6 +1610,7 @@ xfs_da3_node_lookup_int( struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr nodehdr; struct xfs_da_args *args; + xfs_failaddr_t fa; xfs_dablk_t blkno; xfs_dahash_t hashval; xfs_dahash_t btreehashval; @@ -1629,6 +1649,12 @@ xfs_da3_node_lookup_int( if (magic == XFS_ATTR_LEAF_MAGIC || magic == XFS_ATTR3_LEAF_MAGIC) { + fa = xfs_attr3_leaf_header_check(blk->bp, args->owner); + if (fa) { + __xfs_buf_mark_corrupt(blk->bp, fa); + xfs_da_mark_sick(args); + return -EFSCORRUPTED; + } blk->magic = XFS_ATTR_LEAF_MAGIC; blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL); break; @@ -1996,6 +2022,7 @@ xfs_da3_path_shift( struct xfs_da_node_entry *btree; struct xfs_da3_icnode_hdr nodehdr; struct xfs_buf *bp; + xfs_failaddr_t fa; xfs_dablk_t blkno = 0; int level; int error; @@ -2087,6 +2114,12 @@ xfs_da3_path_shift( break; case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: + fa = xfs_attr3_leaf_header_check(blk->bp, args->owner); + if (fa) { + __xfs_buf_mark_corrupt(blk->bp, fa); + xfs_da_mark_sick(args); + return -EFSCORRUPTED; + } blk->magic = XFS_ATTR_LEAF_MAGIC; ASSERT(level == path->active-1); blk->index = 0; @@ -2290,6 +2323,7 @@ xfs_da3_swap_lastblock( struct xfs_buf *last_buf; struct xfs_buf *sib_buf; struct xfs_buf *par_buf; + xfs_failaddr_t fa; xfs_dahash_t dead_hash; xfs_fileoff_t lastoff; xfs_dablk_t dead_blkno; @@ -2326,6 +2360,14 @@ xfs_da3_swap_lastblock( error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w); if (error) return error; + fa = xfs_da3_header_check(last_buf, args->owner); + if (fa) { + __xfs_buf_mark_corrupt(last_buf, fa); + xfs_trans_brelse(tp, last_buf); + xfs_da_mark_sick(args); + return -EFSCORRUPTED; + } + /* * Copy the last block into the dead buffer and log it. */ diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 7fb13f26edaa..99618e0c8a72 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -236,6 +236,7 @@ void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp, struct xfs_da3_icnode_hdr *to, struct xfs_da_intnode *from); void xfs_da3_node_hdr_to_disk(struct xfs_mount *mp, struct xfs_da_intnode *to, struct xfs_da3_icnode_hdr *from); +xfs_failaddr_t xfs_da3_header_check(struct xfs_buf *bp, xfs_ino_t owner); extern struct kmem_cache *xfs_da_state_cache; diff --git a/fs/xfs/libxfs/xfs_exchmaps.c b/fs/xfs/libxfs/xfs_exchmaps.c index 8d28e8cce5e9..9c9cf2e998b2 100644 --- a/fs/xfs/libxfs/xfs_exchmaps.c +++ b/fs/xfs/libxfs/xfs_exchmaps.c @@ -438,7 +438,8 @@ xfs_exchmaps_attr_to_sf( if (!xfs_attr_is_leaf(xmi->xmi_ip2)) return 0; - error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, 0, &bp); + error = xfs_attr3_leaf_read(tp, xmi->xmi_ip2, xmi->xmi_ip2->i_ino, 0, + &bp); if (error) return error; diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index fa6385a99ac4..c71254088dff 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -320,6 +320,7 @@ xchk_da_btree_block( struct xfs_da3_blkinfo *hdr3; struct xfs_da_args *dargs = &ds->dargs; struct xfs_inode *ip = ds->dargs.dp; + xfs_failaddr_t fa; xfs_ino_t owner; int *pmaxrecs; struct xfs_da3_icnode_hdr nodehdr; @@ -442,6 +443,12 @@ xchk_da_btree_block( goto out_freebp; } + fa = xfs_da3_header_check(blk->bp, dargs->owner); + if (fa) { + xchk_da_set_corrupt(ds, level); + goto out_freebp; + } + /* * If we've been handed a block that is below the dabtree root, does * its hashval match what the parent block expected to see? diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 42a575db7267..f6496e33ff91 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -214,6 +214,7 @@ xfs_attr_node_list_lookup( struct xfs_mount *mp = dp->i_mount; struct xfs_trans *tp = context->tp; struct xfs_buf *bp; + xfs_failaddr_t fa; int i; int error = 0; unsigned int expected_level = 0; @@ -273,6 +274,12 @@ xfs_attr_node_list_lookup( } } + fa = xfs_attr3_leaf_header_check(bp, dp->i_ino); + if (fa) { + __xfs_buf_mark_corrupt(bp, fa); + goto out_releasebuf; + } + if (expected_level != 0) goto out_corruptbuf; @@ -281,6 +288,7 @@ xfs_attr_node_list_lookup( out_corruptbuf: xfs_buf_mark_corrupt(bp); +out_releasebuf: xfs_trans_brelse(tp, bp); xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); return -EFSCORRUPTED; @@ -297,6 +305,7 @@ xfs_attr_node_list( struct xfs_buf *bp; struct xfs_inode *dp = context->dp; struct xfs_mount *mp = dp->i_mount; + xfs_failaddr_t fa; int error = 0; trace_xfs_attr_node_list(context); @@ -332,6 +341,14 @@ xfs_attr_node_list( case XFS_ATTR_LEAF_MAGIC: case XFS_ATTR3_LEAF_MAGIC: leaf = bp->b_addr; + fa = xfs_attr3_leaf_header_check(bp, dp->i_ino); + if (fa) { + __xfs_buf_mark_corrupt(bp, fa); + xfs_trans_brelse(context->tp, bp); + xfs_dirattr_mark_sick(dp, XFS_ATTR_FORK); + bp = NULL; + break; + } xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); entries = xfs_attr3_leaf_entryp(leaf); @@ -382,8 +399,8 @@ need_lookup: break; cursor->blkno = leafhdr.forw; xfs_trans_brelse(context->tp, bp); - error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno, - &bp); + error = xfs_attr3_leaf_read(context->tp, dp, dp->i_ino, + cursor->blkno, &bp); if (error) return error; } @@ -503,7 +520,8 @@ xfs_attr_leaf_list( trace_xfs_attr_leaf_list(context); context->cursor.blkno = 0; - error = xfs_attr3_leaf_read(context->tp, context->dp, 0, &bp); + error = xfs_attr3_leaf_read(context->tp, context->dp, + context->dp->i_ino, 0, &bp); if (error) return error; -- cgit v1.2.3-59-g8ed1b From e47dcf113ae348678143cc935a1183059c02c9ad Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 15 Apr 2024 14:54:45 -0700 Subject: xfs: repair extended attributes If the extended attributes look bad, try to sift through the rubble to find whatever keys/values we can, stage a new attribute structure in a temporary file and use the atomic extent swapping mechanism to commit the results in bulk. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_attr.c | 2 +- fs/xfs/libxfs/xfs_attr.h | 2 + fs/xfs/libxfs/xfs_da_format.h | 5 + fs/xfs/scrub/attr.c | 20 +- fs/xfs/scrub/attr.h | 7 + fs/xfs/scrub/attr_repair.c | 1207 +++++++++++++++++++++++++++++++++++++++++ fs/xfs/scrub/attr_repair.h | 11 + fs/xfs/scrub/repair.c | 46 ++ fs/xfs/scrub/repair.h | 6 + fs/xfs/scrub/scrub.c | 2 +- fs/xfs/scrub/trace.h | 83 +++ fs/xfs/scrub/xfarray.c | 17 + fs/xfs/scrub/xfarray.h | 2 + fs/xfs/scrub/xfblob.c | 17 + fs/xfs/scrub/xfblob.h | 2 + fs/xfs/scrub/xfile.h | 5 + fs/xfs/xfs_buf.c | 3 + fs/xfs/xfs_trace.h | 2 + 19 files changed, 1436 insertions(+), 4 deletions(-) create mode 100644 fs/xfs/scrub/attr_repair.c create mode 100644 fs/xfs/scrub/attr_repair.h (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index bc27757702fe..8647629ac7bf 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -194,6 +194,7 @@ ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y) xfs-y += $(addprefix scrub/, \ agheader_repair.o \ alloc_repair.o \ + attr_repair.o \ bmap_repair.o \ cow_repair.o \ fscounters_repair.o \ diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index b3c9666cd011..05d22c5e3885 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1055,7 +1055,7 @@ out_trans_cancel: * External routines when attribute list is inside the inode *========================================================================*/ -static inline int xfs_attr_sf_totsize(struct xfs_inode *dp) +int xfs_attr_sf_totsize(struct xfs_inode *dp) { struct xfs_attr_sf_hdr *sf = dp->i_af.if_data; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 81be9b3e4004..e4f55008552b 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -618,4 +618,6 @@ extern struct kmem_cache *xfs_attr_intent_cache; int __init xfs_attr_intent_init_cache(void); void xfs_attr_intent_destroy_cache(void); +int xfs_attr_sf_totsize(struct xfs_inode *dp); + #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h index 060e5c96b70f..aac3fe039614 100644 --- a/fs/xfs/libxfs/xfs_da_format.h +++ b/fs/xfs/libxfs/xfs_da_format.h @@ -721,6 +721,11 @@ struct xfs_attr3_leafblock { #define XFS_ATTR_INCOMPLETE (1u << XFS_ATTR_INCOMPLETE_BIT) #define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE) +#define XFS_ATTR_NAMESPACE_STR \ + { XFS_ATTR_LOCAL, "local" }, \ + { XFS_ATTR_ROOT, "root" }, \ + { XFS_ATTR_SECURE, "secure" } + /* * Alignment for namelist and valuelist entries (since they are mixed * there can be only one alignment value) diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 0c467f4f8e77..7621e548d730 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -10,6 +10,7 @@ #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_log_format.h" +#include "xfs_trans.h" #include "xfs_inode.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" @@ -20,6 +21,7 @@ #include "scrub/common.h" #include "scrub/dabtree.h" #include "scrub/attr.h" +#include "scrub/repair.h" /* Free the buffers linked from the xattr buffer. */ static void @@ -35,6 +37,8 @@ xchk_xattr_buf_cleanup( kvfree(ab->value); ab->value = NULL; ab->value_sz = 0; + kvfree(ab->name); + ab->name = NULL; } /* @@ -65,7 +69,7 @@ xchk_xattr_want_freemap( * reallocating the buffer if necessary. Buffer contents are not preserved * across a reallocation. */ -static int +int xchk_setup_xattr_buf( struct xfs_scrub *sc, size_t value_size) @@ -95,6 +99,12 @@ xchk_setup_xattr_buf( return -ENOMEM; } + if (xchk_could_repair(sc)) { + ab->name = kvmalloc(XATTR_NAME_MAX + 1, XCHK_GFP_FLAGS); + if (!ab->name) + return -ENOMEM; + } + resize_value: if (ab->value_sz >= value_size) return 0; @@ -121,6 +131,12 @@ xchk_setup_xattr( { int error; + if (xchk_could_repair(sc)) { + error = xrep_setup_xattr(sc); + if (error) + return error; + } + /* * We failed to get memory while checking attrs, so this time try to * get all the memory we're ever going to need. Allocate the buffer @@ -247,7 +263,7 @@ fail_xref: * Within a char, the lowest bit of the char represents the byte with * the smallest address */ -STATIC bool +bool xchk_xattr_set_map( struct xfs_scrub *sc, unsigned long *map, diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h index 48fd9402c432..7db58af56646 100644 --- a/fs/xfs/scrub/attr.h +++ b/fs/xfs/scrub/attr.h @@ -16,9 +16,16 @@ struct xchk_xattr_buf { /* Bitmap of free space in xattr leaf blocks. */ unsigned long *freemap; + /* Memory buffer used to hold salvaged xattr names. */ + unsigned char *name; + /* Memory buffer used to extract xattr values. */ void *value; size_t value_sz; }; +bool xchk_xattr_set_map(struct xfs_scrub *sc, unsigned long *map, + unsigned int start, unsigned int len); +int xchk_setup_xattr_buf(struct xfs_scrub *sc, size_t value_size); + #endif /* __XFS_SCRUB_ATTR_H__ */ diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c new file mode 100644 index 000000000000..7b4318764d03 --- /dev/null +++ b/fs/xfs/scrub/attr_repair.c @@ -0,0 +1,1207 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_remote.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_exchmaps.h" +#include "xfs_exchrange.h" +#include "xfs_acl.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/tempfile.h" +#include "scrub/tempexch.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/xfblob.h" +#include "scrub/attr.h" +#include "scrub/reap.h" +#include "scrub/attr_repair.h" + +/* + * Extended Attribute Repair + * ========================= + * + * We repair extended attributes by reading the attr leaf blocks looking for + * attributes entries that look salvageable (name passes verifiers, value can + * be retrieved, etc). Each extended attribute worth salvaging is stashed in + * memory, and the stashed entries are periodically replayed into a temporary + * file to constrain memory use. Batching the construction of the temporary + * extended attribute structure in this fashion reduces lock cycling of the + * file being repaired and the temporary file. + * + * When salvaging completes, the remaining stashed attributes are replayed to + * the temporary file. An atomic file contents exchange is used to commit the + * new xattr blocks to the file being repaired. This will disrupt attrmulti + * cursors. + */ + +struct xrep_xattr_key { + /* Cookie for retrieval of the xattr name. */ + xfblob_cookie name_cookie; + + /* Cookie for retrieval of the xattr value. */ + xfblob_cookie value_cookie; + + /* XFS_ATTR_* flags */ + int flags; + + /* Length of the value and name. */ + uint32_t valuelen; + uint16_t namelen; +}; + +/* + * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write + * them to the temp file. + */ +#define XREP_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8) + +struct xrep_xattr { + struct xfs_scrub *sc; + + /* Information for exchanging attr fork mappings at the end. */ + struct xrep_tempexch tx; + + /* xattr keys */ + struct xfarray *xattr_records; + + /* xattr values */ + struct xfblob *xattr_blobs; + + /* Number of attributes that we are salvaging. */ + unsigned long long attrs_found; +}; + +/* Set up to recreate the extended attributes. */ +int +xrep_setup_xattr( + struct xfs_scrub *sc) +{ + return xrep_tempfile_create(sc, S_IFREG); +} + +/* + * Decide if we want to salvage this attribute. We don't bother with + * incomplete or oversized keys or values. The @value parameter can be null + * for remote attrs. + */ +STATIC int +xrep_xattr_want_salvage( + struct xrep_xattr *rx, + unsigned int attr_flags, + const void *name, + int namelen, + const void *value, + int valuelen) +{ + if (attr_flags & XFS_ATTR_INCOMPLETE) + return false; + if (namelen > XATTR_NAME_MAX || namelen <= 0) + return false; + if (!xfs_attr_namecheck(name, namelen)) + return false; + if (valuelen > XATTR_SIZE_MAX || valuelen < 0) + return false; + if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) + return false; + return true; +} + +/* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */ +STATIC int +xrep_xattr_salvage_key( + struct xrep_xattr *rx, + int flags, + unsigned char *name, + int namelen, + unsigned char *value, + int valuelen) +{ + struct xrep_xattr_key key = { + .valuelen = valuelen, + .flags = flags & XFS_ATTR_NSP_ONDISK_MASK, + }; + unsigned int i = 0; + int error = 0; + + if (xchk_should_terminate(rx->sc, &error)) + return error; + + /* + * Truncate the name to the first character that would trip namecheck. + * If we no longer have a name after that, ignore this attribute. + */ + while (i < namelen && name[i] != 0) + i++; + if (i == 0) + return 0; + key.namelen = i; + + trace_xrep_xattr_salvage_rec(rx->sc->ip, flags, name, key.namelen, + valuelen); + + error = xfblob_store(rx->xattr_blobs, &key.name_cookie, name, + key.namelen); + if (error) + return error; + + error = xfblob_store(rx->xattr_blobs, &key.value_cookie, value, + key.valuelen); + if (error) + return error; + + error = xfarray_append(rx->xattr_records, &key); + if (error) + return error; + + rx->attrs_found++; + return 0; +} + +/* + * Record a shortform extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_sf_attr( + struct xrep_xattr *rx, + struct xfs_attr_sf_hdr *hdr, + struct xfs_attr_sf_entry *sfe) +{ + struct xfs_scrub *sc = rx->sc; + struct xchk_xattr_buf *ab = sc->buf; + unsigned char *name = sfe->nameval; + unsigned char *value = &sfe->nameval[sfe->namelen]; + + if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)name - (char *)hdr, + sfe->namelen)) + return 0; + + if (!xchk_xattr_set_map(sc, ab->usedmap, (char *)value - (char *)hdr, + sfe->valuelen)) + return 0; + + if (!xrep_xattr_want_salvage(rx, sfe->flags, sfe->nameval, + sfe->namelen, value, sfe->valuelen)) + return 0; + + return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval, + sfe->namelen, value, sfe->valuelen); +} + +/* + * Record a local format extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_local_attr( + struct xrep_xattr *rx, + struct xfs_attr_leaf_entry *ent, + unsigned int nameidx, + const char *buf_end, + struct xfs_attr_leaf_name_local *lentry) +{ + struct xchk_xattr_buf *ab = rx->sc->buf; + unsigned char *value; + unsigned int valuelen; + unsigned int namesize; + + /* + * Decode the leaf local entry format. If something seems wrong, we + * junk the attribute. + */ + value = &lentry->nameval[lentry->namelen]; + valuelen = be16_to_cpu(lentry->valuelen); + namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen); + if ((char *)lentry + namesize > buf_end) + return 0; + if (!xrep_xattr_want_salvage(rx, ent->flags, lentry->nameval, + lentry->namelen, value, valuelen)) + return 0; + if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize)) + return 0; + + /* Try to save this attribute. */ + return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval, + lentry->namelen, value, valuelen); +} + +/* + * Record a remote format extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_remote_attr( + struct xrep_xattr *rx, + struct xfs_attr_leaf_entry *ent, + unsigned int nameidx, + const char *buf_end, + struct xfs_attr_leaf_name_remote *rentry, + unsigned int ent_idx, + struct xfs_buf *leaf_bp) +{ + struct xchk_xattr_buf *ab = rx->sc->buf; + struct xfs_da_args args = { + .trans = rx->sc->tp, + .dp = rx->sc->ip, + .index = ent_idx, + .geo = rx->sc->mp->m_attr_geo, + .owner = rx->sc->ip->i_ino, + .attr_filter = ent->flags & XFS_ATTR_NSP_ONDISK_MASK, + .namelen = rentry->namelen, + .name = rentry->name, + .value = ab->value, + .valuelen = be32_to_cpu(rentry->valuelen), + }; + unsigned int namesize; + int error; + + /* + * Decode the leaf remote entry format. If something seems wrong, we + * junk the attribute. Note that we should never find a zero-length + * remote attribute value. + */ + namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); + if ((char *)rentry + namesize > buf_end) + return 0; + if (args.valuelen == 0 || + !xrep_xattr_want_salvage(rx, ent->flags, rentry->name, + rentry->namelen, NULL, args.valuelen)) + return 0; + if (!xchk_xattr_set_map(rx->sc, ab->usedmap, nameidx, namesize)) + return 0; + + /* + * Enlarge the buffer (if needed) to hold the value that we're trying + * to salvage from the old extended attribute data. + */ + error = xchk_setup_xattr_buf(rx->sc, args.valuelen); + if (error == -ENOMEM) + error = -EDEADLOCK; + if (error) + return error; + + /* Look up the remote value and stash it for reconstruction. */ + error = xfs_attr3_leaf_getvalue(leaf_bp, &args); + if (error || args.rmtblkno == 0) + goto err_free; + + error = xfs_attr_rmtval_get(&args); + if (error) + goto err_free; + + /* Try to save this attribute. */ + error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name, + rentry->namelen, ab->value, args.valuelen); +err_free: + /* remote value was garbage, junk it */ + if (error == -EFSBADCRC || error == -EFSCORRUPTED) + error = 0; + return error; +} + +/* Extract every xattr key that we can from this attr fork block. */ +STATIC int +xrep_xattr_recover_leaf( + struct xrep_xattr *rx, + struct xfs_buf *bp) +{ + struct xfs_attr3_icleaf_hdr leafhdr; + struct xfs_scrub *sc = rx->sc; + struct xfs_mount *mp = sc->mp; + struct xfs_attr_leafblock *leaf; + struct xfs_attr_leaf_name_local *lentry; + struct xfs_attr_leaf_name_remote *rentry; + struct xfs_attr_leaf_entry *ent; + struct xfs_attr_leaf_entry *entries; + struct xchk_xattr_buf *ab = rx->sc->buf; + char *buf_end; + size_t off; + unsigned int nameidx; + unsigned int hdrsize; + int i; + int error = 0; + + bitmap_zero(ab->usedmap, mp->m_attr_geo->blksize); + + /* Check the leaf header */ + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); + hdrsize = xfs_attr3_leaf_hdr_size(leaf); + xchk_xattr_set_map(sc, ab->usedmap, 0, hdrsize); + entries = xfs_attr3_leaf_entryp(leaf); + + buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; + for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { + if (xchk_should_terminate(sc, &error)) + return error; + + /* Skip key if it conflicts with something else? */ + off = (char *)ent - (char *)leaf; + if (!xchk_xattr_set_map(sc, ab->usedmap, off, + sizeof(xfs_attr_leaf_entry_t))) + continue; + + /* Check the name information. */ + nameidx = be16_to_cpu(ent->nameidx); + if (nameidx < leafhdr.firstused || + nameidx >= mp->m_attr_geo->blksize) + continue; + + if (ent->flags & XFS_ATTR_LOCAL) { + lentry = xfs_attr3_leaf_name_local(leaf, i); + error = xrep_xattr_salvage_local_attr(rx, ent, nameidx, + buf_end, lentry); + } else { + rentry = xfs_attr3_leaf_name_remote(leaf, i); + error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx, + buf_end, rentry, i, bp); + } + if (error) + return error; + } + + return 0; +} + +/* Try to recover shortform attrs. */ +STATIC int +xrep_xattr_recover_sf( + struct xrep_xattr *rx) +{ + struct xfs_scrub *sc = rx->sc; + struct xchk_xattr_buf *ab = sc->buf; + struct xfs_attr_sf_hdr *hdr; + struct xfs_attr_sf_entry *sfe; + struct xfs_attr_sf_entry *next; + struct xfs_ifork *ifp; + unsigned char *end; + int i; + int error = 0; + + ifp = xfs_ifork_ptr(rx->sc->ip, XFS_ATTR_FORK); + hdr = ifp->if_data; + + bitmap_zero(ab->usedmap, ifp->if_bytes); + end = (unsigned char *)ifp->if_data + ifp->if_bytes; + xchk_xattr_set_map(sc, ab->usedmap, 0, sizeof(*hdr)); + + sfe = xfs_attr_sf_firstentry(hdr); + if ((unsigned char *)sfe > end) + return 0; + + for (i = 0; i < hdr->count; i++) { + if (xchk_should_terminate(sc, &error)) + return error; + + next = xfs_attr_sf_nextentry(sfe); + if ((unsigned char *)next > end) + break; + + if (xchk_xattr_set_map(sc, ab->usedmap, + (char *)sfe - (char *)hdr, + sizeof(struct xfs_attr_sf_entry))) { + /* + * No conflicts with the sf entry; let's save this + * attribute. + */ + error = xrep_xattr_salvage_sf_attr(rx, hdr, sfe); + if (error) + return error; + } + + sfe = next; + } + + return 0; +} + +/* + * Try to return a buffer of xattr data for a given physical extent. + * + * Because the buffer cache get function complains if it finds a buffer + * matching the block number but not matching the length, we must be careful to + * look for incore buffers (up to the maximum length of a remote value) that + * could be hiding anywhere in the physical range. If we find an incore + * buffer, we can pass that to the caller. Optionally, read a single block and + * pass that back. + * + * Note the subtlety that remote attr value blocks for which there is no incore + * buffer will be passed to the callback one block at a time. These buffers + * will not have any ops attached and must be staled to prevent aliasing with + * multiblock buffers once we drop the ILOCK. + */ +STATIC int +xrep_xattr_find_buf( + struct xfs_mount *mp, + xfs_fsblock_t fsbno, + xfs_extlen_t max_len, + bool can_read, + struct xfs_buf **bpp) +{ + struct xrep_bufscan scan = { + .daddr = XFS_FSB_TO_DADDR(mp, fsbno), + .max_sectors = xrep_bufscan_max_sectors(mp, max_len), + .daddr_step = XFS_FSB_TO_BB(mp, 1), + }; + struct xfs_buf *bp; + + while ((bp = xrep_bufscan_advance(mp, &scan)) != NULL) { + *bpp = bp; + return 0; + } + + if (!can_read) { + *bpp = NULL; + return 0; + } + + return xfs_buf_read(mp->m_ddev_targp, scan.daddr, XFS_FSB_TO_BB(mp, 1), + XBF_TRYLOCK, bpp, NULL); +} + +/* + * Deal with a buffer that we found during our walk of the attr fork. + * + * Attribute leaf and node blocks are simple -- they're a single block, so we + * can walk them one at a time and we never have to worry about discontiguous + * multiblock buffers like we do for directories. + * + * Unfortunately, remote attr blocks add a lot of complexity here. Each disk + * block is totally self contained, in the sense that the v5 header provides no + * indication that there could be more data in the next block. The incore + * buffers can span multiple blocks, though they never cross extent records. + * However, they don't necessarily start or end on an extent record boundary. + * Therefore, we need a special buffer find function to walk the buffer cache + * for us. + * + * The caller must hold the ILOCK on the file being repaired. We use + * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't + * own the block and don't want to hang the system on a potentially garbage + * buffer. + */ +STATIC int +xrep_xattr_recover_block( + struct xrep_xattr *rx, + xfs_dablk_t dabno, + xfs_fsblock_t fsbno, + xfs_extlen_t max_len, + xfs_extlen_t *actual_len) +{ + struct xfs_da_blkinfo *info; + struct xfs_buf *bp; + int error; + + error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp); + if (error) + return error; + info = bp->b_addr; + *actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length); + + trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno, + be16_to_cpu(info->magic)); + + /* + * If the buffer has the right magic number for an attr leaf block and + * passes a structure check (we don't care about checksums), salvage + * as much as we can from the block. */ + if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) && + xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops) && + xfs_attr3_leaf_header_check(bp, rx->sc->ip->i_ino) == NULL) + error = xrep_xattr_recover_leaf(rx, bp); + + /* + * If the buffer didn't already have buffer ops set, it was read in by + * the _find_buf function and could very well be /part/ of a multiblock + * remote block. Mark it stale so that it doesn't hang around in + * memory to cause problems. + */ + if (bp->b_ops == NULL) + xfs_buf_stale(bp); + + xfs_buf_relse(bp); + return error; +} + +/* Insert one xattr key/value. */ +STATIC int +xrep_xattr_insert_rec( + struct xrep_xattr *rx, + const struct xrep_xattr_key *key) +{ + struct xfs_da_args args = { + .dp = rx->sc->tempip, + .attr_filter = key->flags, + .attr_flags = XATTR_CREATE, + .namelen = key->namelen, + .valuelen = key->valuelen, + .owner = rx->sc->ip->i_ino, + }; + struct xchk_xattr_buf *ab = rx->sc->buf; + int error; + + /* + * Grab pointers to the scrub buffer so that we can use them to insert + * attrs into the temp file. + */ + args.name = ab->name; + args.value = ab->value; + + /* + * The attribute name is stored near the end of the in-core buffer, + * though we reserve one more byte to ensure null termination. + */ + ab->name[XATTR_NAME_MAX] = 0; + + error = xfblob_load(rx->xattr_blobs, key->name_cookie, ab->name, + key->namelen); + if (error) + return error; + + error = xfblob_free(rx->xattr_blobs, key->name_cookie); + if (error) + return error; + + error = xfblob_load(rx->xattr_blobs, key->value_cookie, args.value, + key->valuelen); + if (error) + return error; + + error = xfblob_free(rx->xattr_blobs, key->value_cookie); + if (error) + return error; + + ab->name[key->namelen] = 0; + + trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags, ab->name, + key->namelen, key->valuelen); + + /* + * xfs_attr_set creates and commits its own transaction. If the attr + * already exists, we'll just drop it during the rebuild. + */ + error = xfs_attr_set(&args); + if (error == -EEXIST) + error = 0; + + return error; +} + +/* + * Periodically flush salvaged attributes to the temporary file. This is done + * to reduce the memory requirements of the xattr rebuild because files can + * contain millions of attributes. + */ +STATIC int +xrep_xattr_flush_stashed( + struct xrep_xattr *rx) +{ + xfarray_idx_t array_cur; + int error; + + /* + * Entering this function, the scrub context has a reference to the + * inode being repaired, the temporary file, and a scrub transaction + * that we use during xattr salvaging to avoid livelocking if there + * are cycles in the xattr structures. We hold ILOCK_EXCL on both + * the inode being repaired, though it is not ijoined to the scrub + * transaction. + * + * To constrain kernel memory use, we occasionally flush salvaged + * xattrs from the xfarray and xfblob structures into the temporary + * file in preparation for exchanging the xattr structures at the end. + * Updating the temporary file requires a transaction, so we commit the + * scrub transaction and drop the two ILOCKs so that xfs_attr_set can + * allocate whatever transaction it wants. + * + * We still hold IOLOCK_EXCL on the inode being repaired, which + * prevents anyone from modifying the damaged xattr data while we + * repair it. + */ + error = xrep_trans_commit(rx->sc); + if (error) + return error; + xchk_iunlock(rx->sc, XFS_ILOCK_EXCL); + + /* + * Take the IOLOCK of the temporary file while we modify xattrs. This + * isn't strictly required because the temporary file is never revealed + * to userspace, but we follow the same locking rules. We still hold + * sc->ip's IOLOCK. + */ + error = xrep_tempfile_iolock_polled(rx->sc); + if (error) + return error; + + /* Add all the salvaged attrs to the temporary file. */ + foreach_xfarray_idx(rx->xattr_records, array_cur) { + struct xrep_xattr_key key; + + error = xfarray_load(rx->xattr_records, array_cur, &key); + if (error) + return error; + + error = xrep_xattr_insert_rec(rx, &key); + if (error) + return error; + } + + /* Empty out both arrays now that we've added the entries. */ + xfarray_truncate(rx->xattr_records); + xfblob_truncate(rx->xattr_blobs); + + xrep_tempfile_iounlock(rx->sc); + + /* Recreate the salvage transaction and relock the inode. */ + error = xchk_trans_alloc(rx->sc, 0); + if (error) + return error; + xchk_ilock(rx->sc, XFS_ILOCK_EXCL); + return 0; +} + +/* Decide if we've stashed too much xattr data in memory. */ +static inline bool +xrep_xattr_want_flush_stashed( + struct xrep_xattr *rx) +{ + unsigned long long bytes; + + bytes = xfarray_bytes(rx->xattr_records) + + xfblob_bytes(rx->xattr_blobs); + return bytes > XREP_XATTR_MAX_STASH_BYTES; +} + +/* Extract as many attribute keys and values as we can. */ +STATIC int +xrep_xattr_recover( + struct xrep_xattr *rx) +{ + struct xfs_bmbt_irec got; + struct xfs_scrub *sc = rx->sc; + struct xfs_da_geometry *geo = sc->mp->m_attr_geo; + xfs_fileoff_t offset; + xfs_extlen_t len; + xfs_dablk_t dabno; + int nmap; + int error; + + /* + * Iterate each xattr leaf block in the attr fork to scan them for any + * attributes that we might salvage. + */ + for (offset = 0; + offset < XFS_MAX_FILEOFF; + offset = got.br_startoff + got.br_blockcount) { + nmap = 1; + error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset, + &got, &nmap, XFS_BMAPI_ATTRFORK); + if (error) + return error; + if (nmap != 1) + return -EFSCORRUPTED; + if (!xfs_bmap_is_written_extent(&got)) + continue; + + for (dabno = round_up(got.br_startoff, geo->fsbcount); + dabno < got.br_startoff + got.br_blockcount; + dabno += len) { + xfs_fileoff_t curr_offset = dabno - got.br_startoff; + xfs_extlen_t maxlen; + + if (xchk_should_terminate(rx->sc, &error)) + return error; + + maxlen = min_t(xfs_filblks_t, INT_MAX, + got.br_blockcount - curr_offset); + error = xrep_xattr_recover_block(rx, dabno, + curr_offset + got.br_startblock, + maxlen, &len); + if (error) + return error; + + if (xrep_xattr_want_flush_stashed(rx)) { + error = xrep_xattr_flush_stashed(rx); + if (error) + return error; + } + } + } + + return 0; +} + +/* + * Reset the extended attribute fork to a state where we can start re-adding + * the salvaged attributes. + */ +STATIC int +xrep_xattr_fork_remove( + struct xfs_scrub *sc, + struct xfs_inode *ip) +{ + struct xfs_attr_sf_hdr *hdr; + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_ATTR_FORK); + + /* + * If the data fork is in btree format, we can't change di_forkoff + * because we could run afoul of the rule that the data fork isn't + * supposed to be in btree format if there's enough space in the fork + * that it could have used extents format. Instead, reinitialize the + * attr fork to have a shortform structure with zero attributes. + */ + if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) { + ifp->if_format = XFS_DINODE_FMT_LOCAL; + hdr = xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes, + XFS_ATTR_FORK); + hdr->count = 0; + hdr->totsize = cpu_to_be16(sizeof(*hdr)); + xfs_trans_log_inode(sc->tp, ip, + XFS_ILOG_CORE | XFS_ILOG_ADATA); + return 0; + } + + /* If we still have attr fork extents, something's wrong. */ + if (ifp->if_nextents != 0) { + struct xfs_iext_cursor icur; + struct xfs_bmbt_irec irec; + unsigned int i = 0; + + xfs_emerg(sc->mp, + "inode 0x%llx attr fork still has %llu attr extents, format %d?!", + ip->i_ino, ifp->if_nextents, ifp->if_format); + for_each_xfs_iext(ifp, &icur, &irec) { + xfs_err(sc->mp, + "[%u]: startoff %llu startblock %llu blockcount %llu state %u", + i++, irec.br_startoff, + irec.br_startblock, irec.br_blockcount, + irec.br_state); + } + ASSERT(0); + return -EFSCORRUPTED; + } + + xfs_attr_fork_remove(ip, sc->tp); + return 0; +} + +/* + * Free all the attribute fork blocks of the file being repaired and delete the + * fork. The caller must ILOCK the scrub file and join it to the transaction. + * This function returns with the inode joined to a clean transaction. + */ +int +xrep_xattr_reset_fork( + struct xfs_scrub *sc) +{ + int error; + + trace_xrep_xattr_reset_fork(sc->ip, sc->ip); + + /* Unmap all the attr blocks. */ + if (xfs_ifork_has_extents(&sc->ip->i_af)) { + error = xrep_reap_ifork(sc, sc->ip, XFS_ATTR_FORK); + if (error) + return error; + } + + error = xrep_xattr_fork_remove(sc, sc->ip); + if (error) + return error; + + return xfs_trans_roll_inode(&sc->tp, sc->ip); +} + +/* + * Free all the attribute fork blocks of the temporary file and delete the attr + * fork. The caller must ILOCK the tempfile and join it to the transaction. + * This function returns with the inode joined to a clean scrub transaction. + */ +STATIC int +xrep_xattr_reset_tempfile_fork( + struct xfs_scrub *sc) +{ + int error; + + trace_xrep_xattr_reset_fork(sc->ip, sc->tempip); + + /* + * Wipe out the attr fork of the temp file so that regular inode + * inactivation won't trip over the corrupt attr fork. + */ + if (xfs_ifork_has_extents(&sc->tempip->i_af)) { + error = xrep_reap_ifork(sc, sc->tempip, XFS_ATTR_FORK); + if (error) + return error; + } + + return xrep_xattr_fork_remove(sc, sc->tempip); +} + +/* + * Find all the extended attributes for this inode by scraping them out of the + * attribute key blocks by hand, and flushing them into the temp file. + * When we're done, free the staging memory before exchanging the xattr + * structures to reduce memory usage. + */ +STATIC int +xrep_xattr_salvage_attributes( + struct xrep_xattr *rx) +{ + struct xfs_inode *ip = rx->sc->ip; + int error; + + /* Short format xattrs are easy! */ + if (rx->sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL) { + error = xrep_xattr_recover_sf(rx); + if (error) + return error; + + return xrep_xattr_flush_stashed(rx); + } + + /* + * For non-inline xattr structures, the salvage function scans the + * buffer cache looking for potential attr leaf blocks. The scan + * requires the ability to lock any buffer found and runs independently + * of any transaction <-> buffer item <-> buffer linkage. Therefore, + * roll the transaction to ensure there are no buffers joined. We hold + * the ILOCK independently of the transaction. + */ + error = xfs_trans_roll(&rx->sc->tp); + if (error) + return error; + + error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK); + if (error) + return error; + + error = xrep_xattr_recover(rx); + if (error) + return error; + + return xrep_xattr_flush_stashed(rx); +} + +/* + * Prepare both inodes' attribute forks for an exchange. Promote the tempfile + * from short format to leaf format, and if the file being repaired has a short + * format attr fork, turn it into an empty extent list. + */ +STATIC int +xrep_xattr_swap_prep( + struct xfs_scrub *sc, + bool temp_local, + bool ip_local) +{ + int error; + + /* + * If the tempfile's attributes are in shortform format, convert that + * to a single leaf extent so that we can use the atomic mapping + * exchange. + */ + if (temp_local) { + struct xfs_da_args args = { + .dp = sc->tempip, + .geo = sc->mp->m_attr_geo, + .whichfork = XFS_ATTR_FORK, + .trans = sc->tp, + .total = 1, + .owner = sc->ip->i_ino, + }; + + error = xfs_attr_shortform_to_leaf(&args); + if (error) + return error; + + /* + * Roll the deferred log items to get us back to a clean + * transaction. + */ + error = xfs_defer_finish(&sc->tp); + if (error) + return error; + } + + /* + * If the file being repaired had a shortform attribute fork, convert + * that to an empty extent list in preparation for the atomic mapping + * exchange. + */ + if (ip_local) { + struct xfs_ifork *ifp; + + ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK); + + xfs_idestroy_fork(ifp); + ifp->if_format = XFS_DINODE_FMT_EXTENTS; + ifp->if_nextents = 0; + ifp->if_bytes = 0; + ifp->if_data = NULL; + ifp->if_height = 0; + + xfs_trans_log_inode(sc->tp, sc->ip, + XFS_ILOG_CORE | XFS_ILOG_ADATA); + } + + return 0; +} + +/* Exchange the temporary file's attribute fork with the one being repaired. */ +STATIC int +xrep_xattr_swap( + struct xfs_scrub *sc, + struct xrep_tempexch *tx) +{ + bool ip_local, temp_local; + int error = 0; + + ip_local = sc->ip->i_af.if_format == XFS_DINODE_FMT_LOCAL; + temp_local = sc->tempip->i_af.if_format == XFS_DINODE_FMT_LOCAL; + + /* + * If the both files have a local format attr fork and the rebuilt + * xattr data would fit in the repaired file's attr fork, just copy + * the contents from the tempfile and declare ourselves done. + */ + if (ip_local && temp_local) { + int forkoff; + int newsize; + + newsize = xfs_attr_sf_totsize(sc->tempip); + forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize); + if (forkoff > 0) { + sc->ip->i_forkoff = forkoff; + xrep_tempfile_copyout_local(sc, XFS_ATTR_FORK); + return 0; + } + } + + /* Otherwise, make sure both attr forks are in block-mapping mode. */ + error = xrep_xattr_swap_prep(sc, temp_local, ip_local); + if (error) + return error; + + return xrep_tempexch_contents(sc, tx); +} + +/* + * Exchange the new extended attribute data (which we created in the tempfile) + * with the file being repaired. + */ +STATIC int +xrep_xattr_rebuild_tree( + struct xrep_xattr *rx) +{ + struct xfs_scrub *sc = rx->sc; + int error; + + /* + * If we didn't find any attributes to salvage, repair the file by + * zapping its attr fork. + */ + if (rx->attrs_found == 0) { + xfs_trans_ijoin(sc->tp, sc->ip, 0); + error = xrep_xattr_reset_fork(sc); + if (error) + return error; + + goto forget_acls; + } + + trace_xrep_xattr_rebuild_tree(sc->ip, sc->tempip); + + /* + * Commit the repair transaction and drop the ILOCKs so that we can use + * the atomic file content exchange helper functions to compute the + * correct resource reservations. + * + * We still hold IOLOCK_EXCL (aka i_rwsem) which will prevent xattr + * modifications, but there's nothing to prevent userspace from reading + * the attributes until we're ready for the exchange operation. Reads + * will return -EIO without shutting down the fs, so we're ok with + * that. + */ + error = xrep_trans_commit(sc); + if (error) + return error; + + xchk_iunlock(sc, XFS_ILOCK_EXCL); + + /* + * Take the IOLOCK on the temporary file so that we can run xattr + * operations with the same locks held as we would for a normal file. + * We still hold sc->ip's IOLOCK. + */ + error = xrep_tempfile_iolock_polled(rx->sc); + if (error) + return error; + + /* Allocate exchange transaction and lock both inodes. */ + error = xrep_tempexch_trans_alloc(rx->sc, XFS_ATTR_FORK, &rx->tx); + if (error) + return error; + + /* + * Exchange the blocks mapped by the tempfile's attr fork with the file + * being repaired. The old attr blocks will then be attached to the + * tempfile, so reap its attr fork. + */ + error = xrep_xattr_swap(sc, &rx->tx); + if (error) + return error; + + error = xrep_xattr_reset_tempfile_fork(sc); + if (error) + return error; + + /* + * Roll to get a transaction without any inodes joined to it. Then we + * can drop the tempfile's ILOCK and IOLOCK before doing more work on + * the scrub target file. + */ + error = xfs_trans_roll(&sc->tp); + if (error) + return error; + + xrep_tempfile_iunlock(sc); + xrep_tempfile_iounlock(sc); + +forget_acls: + /* Invalidate cached ACLs now that we've reloaded all the xattrs. */ + xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_FILE); + xfs_forget_acl(VFS_I(sc->ip), SGI_ACL_DEFAULT); + return 0; +} + +/* Tear down all the incore scan stuff we created. */ +STATIC void +xrep_xattr_teardown( + struct xrep_xattr *rx) +{ + xfblob_destroy(rx->xattr_blobs); + xfarray_destroy(rx->xattr_records); + kfree(rx); +} + +/* Set up the filesystem scan so we can regenerate extended attributes. */ +STATIC int +xrep_xattr_setup_scan( + struct xfs_scrub *sc, + struct xrep_xattr **rxp) +{ + struct xrep_xattr *rx; + char *descr; + int max_len; + int error; + + rx = kzalloc(sizeof(struct xrep_xattr), XCHK_GFP_FLAGS); + if (!rx) + return -ENOMEM; + rx->sc = sc; + + /* + * Allocate enough memory to handle loading local attr values from the + * xfblob data while flushing stashed attrs to the temporary file. + * We only realloc the buffer when salvaging remote attr values. + */ + max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize); + error = xchk_setup_xattr_buf(rx->sc, max_len); + if (error == -ENOMEM) + error = -EDEADLOCK; + if (error) + goto out_rx; + + /* Set up some staging for salvaged attribute keys and values */ + descr = xchk_xfile_ino_descr(sc, "xattr keys"); + error = xfarray_create(descr, 0, sizeof(struct xrep_xattr_key), + &rx->xattr_records); + kfree(descr); + if (error) + goto out_rx; + + descr = xchk_xfile_ino_descr(sc, "xattr names"); + error = xfblob_create(descr, &rx->xattr_blobs); + kfree(descr); + if (error) + goto out_keys; + + *rxp = rx; + return 0; +out_keys: + xfarray_destroy(rx->xattr_records); +out_rx: + kfree(rx); + return error; +} + +/* + * Repair the extended attribute metadata. + * + * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer. + * The buffer cache in XFS can't handle aliased multiblock buffers, so this + * might misbehave if the attr fork is crosslinked with other filesystem + * metadata. + */ +int +xrep_xattr( + struct xfs_scrub *sc) +{ + struct xrep_xattr *rx = NULL; + int error; + + if (!xfs_inode_hasattr(sc->ip)) + return -ENOENT; + + /* The rmapbt is required to reap the old attr fork. */ + if (!xfs_has_rmapbt(sc->mp)) + return -EOPNOTSUPP; + + error = xrep_xattr_setup_scan(sc, &rx); + if (error) + return error; + + ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL); + + error = xrep_xattr_salvage_attributes(rx); + if (error) + goto out_scan; + + /* Last chance to abort before we start committing fixes. */ + if (xchk_should_terminate(sc, &error)) + goto out_scan; + + error = xrep_xattr_rebuild_tree(rx); + if (error) + goto out_scan; + +out_scan: + xrep_xattr_teardown(rx); + return error; +} diff --git a/fs/xfs/scrub/attr_repair.h b/fs/xfs/scrub/attr_repair.h new file mode 100644 index 000000000000..0a9ffa7cfa90 --- /dev/null +++ b/fs/xfs/scrub/attr_repair.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong + */ +#ifndef __XFS_SCRUB_ATTR_REPAIR_H__ +#define __XFS_SCRUB_ATTR_REPAIR_H__ + +int xrep_xattr_reset_fork(struct xfs_scrub *sc); + +#endif /* __XFS_SCRUB_ATTR_REPAIR_H__ */ diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 443e62f72481..04aec0e9e4c3 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -32,6 +32,9 @@ #include "xfs_reflink.h" #include "xfs_health.h" #include "xfs_buf_mem.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -39,6 +42,7 @@ #include "scrub/bitmap.h" #include "scrub/stats.h" #include "scrub/xfile.h" +#include "scrub/attr_repair.h" /* * Attempt to repair some metadata, if the metadata is corrupt and userspace @@ -1136,6 +1140,17 @@ xrep_metadata_inode_forks( return error; } + /* Clear the attr forks since metadata shouldn't have that. */ + if (xfs_inode_hasattr(sc->ip)) { + if (!dirty) { + dirty = true; + xfs_trans_ijoin(sc->tp, sc->ip, 0); + } + error = xrep_xattr_reset_fork(sc); + if (error) + return error; + } + /* * If we modified the inode, roll the transaction but don't rejoin the * inode to the new transaction because xrep_bmap_data can do that. @@ -1201,3 +1216,34 @@ xrep_trans_cancel_hook_dummy( current->journal_info = *cookiep; *cookiep = NULL; } + +/* + * See if this buffer can pass the given ->verify_struct() function. + * + * If the buffer already has ops attached and they're not the ones that were + * passed in, we reject the buffer. Otherwise, we perform the structure test + * (note that we do not check CRCs) and return the outcome of the test. The + * buffer ops and error state are left unchanged. + */ +bool +xrep_buf_verify_struct( + struct xfs_buf *bp, + const struct xfs_buf_ops *ops) +{ + const struct xfs_buf_ops *old_ops = bp->b_ops; + xfs_failaddr_t fa; + int old_error; + + if (old_ops) { + if (old_ops != ops) + return false; + } + + old_error = bp->b_error; + bp->b_ops = ops; + fa = bp->b_ops->verify_struct(bp); + bp->b_ops = old_ops; + bp->b_error = old_error; + + return fa == NULL; +} diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 0e2b695ab8f6..9cbfd8da5620 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -90,6 +90,7 @@ int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten); int xrep_metadata_inode_forks(struct xfs_scrub *sc); int xrep_setup_ag_rmapbt(struct xfs_scrub *sc); int xrep_setup_ag_refcountbt(struct xfs_scrub *sc); +int xrep_setup_xattr(struct xfs_scrub *sc); /* Repair setup functions */ int xrep_setup_ag_allocbt(struct xfs_scrub *sc); @@ -123,6 +124,7 @@ int xrep_bmap_attr(struct xfs_scrub *sc); int xrep_bmap_cow(struct xfs_scrub *sc); int xrep_nlinks(struct xfs_scrub *sc); int xrep_fscounters(struct xfs_scrub *sc); +int xrep_xattr(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xrep_rtbitmap(struct xfs_scrub *sc); @@ -147,6 +149,8 @@ int xrep_trans_alloc_hook_dummy(struct xfs_mount *mp, void **cookiep, struct xfs_trans **tpp); void xrep_trans_cancel_hook_dummy(void **cookiep, struct xfs_trans *tp); +bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops); + #else #define xrep_ino_dqattach(sc) (0) @@ -190,6 +194,7 @@ xrep_setup_nothing( #define xrep_setup_ag_allocbt xrep_setup_nothing #define xrep_setup_ag_rmapbt xrep_setup_nothing #define xrep_setup_ag_refcountbt xrep_setup_nothing +#define xrep_setup_xattr xrep_setup_nothing #define xrep_setup_inode(sc, imap) ((void)0) @@ -215,6 +220,7 @@ xrep_setup_nothing( #define xrep_nlinks xrep_notsupported #define xrep_fscounters xrep_notsupported #define xrep_rtsummary xrep_notsupported +#define xrep_xattr xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 62a064c1a5d3..547189a14b6b 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -331,7 +331,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .type = ST_INODE, .setup = xchk_setup_xattr, .scrub = xchk_xattr, - .repair = xrep_notsupported, + .repair = xrep_xattr, }, [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ .type = ST_INODE, diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 7d07912d8f75..026813205b47 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2416,6 +2416,89 @@ TRACE_EVENT(xreap_bmapi_binval_scan, __entry->scan_blocks) ); +TRACE_EVENT(xrep_xattr_recover_leafblock, + TP_PROTO(struct xfs_inode *ip, xfs_dablk_t dabno, uint16_t magic), + TP_ARGS(ip, dabno, magic), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_dablk_t, dabno) + __field(uint16_t, magic) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->dabno = dabno; + __entry->magic = magic; + ), + TP_printk("dev %d:%d ino 0x%llx dablk 0x%x magic 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->dabno, + __entry->magic) +); + +DECLARE_EVENT_CLASS(xrep_xattr_salvage_class, + TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name, + unsigned int namelen, unsigned int valuelen), + TP_ARGS(ip, flags, name, namelen, valuelen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, flags) + __field(unsigned int, namelen) + __dynamic_array(char, name, namelen) + __field(unsigned int, valuelen) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->flags = flags; + __entry->namelen = namelen; + memcpy(__get_str(name), name, namelen); + __entry->valuelen = valuelen; + ), + TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->flags, "|", XFS_ATTR_NAMESPACE_STR), + __entry->namelen, + __get_str(name), + __entry->valuelen) +); +#define DEFINE_XREP_XATTR_SALVAGE_EVENT(name) \ +DEFINE_EVENT(xrep_xattr_salvage_class, name, \ + TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name, \ + unsigned int namelen, unsigned int valuelen), \ + TP_ARGS(ip, flags, name, namelen, valuelen)) +DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_salvage_rec); +DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_insert_rec); + +TRACE_EVENT(xrep_xattr_class, + TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip), + TP_ARGS(ip, arg_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_ino_t, src_ino) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->src_ino = arg_ip->i_ino; + ), + TP_printk("dev %d:%d ino 0x%llx src 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->src_ino) +) +#define DEFINE_XREP_XATTR_EVENT(name) \ +DEFINE_EVENT(xrep_xattr_class, name, \ + TP_PROTO(struct xfs_inode *ip, struct xfs_inode *arg_ip), \ + TP_ARGS(ip, arg_ip)) +DEFINE_XREP_XATTR_EVENT(xrep_xattr_rebuild_tree); +DEFINE_XREP_XATTR_EVENT(xrep_xattr_reset_fork); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ #endif /* _TRACE_XFS_SCRUB_TRACE_H */ diff --git a/fs/xfs/scrub/xfarray.c b/fs/xfs/scrub/xfarray.c index 17c982a4821d..b65cd3fc5ac9 100644 --- a/fs/xfs/scrub/xfarray.c +++ b/fs/xfs/scrub/xfarray.c @@ -1051,3 +1051,20 @@ out_free: kvfree(si); return error; } + +/* How many bytes is this array consuming? */ +unsigned long long +xfarray_bytes( + struct xfarray *array) +{ + return xfile_bytes(array->xfile); +} + +/* Empty the entire array. */ +void +xfarray_truncate( + struct xfarray *array) +{ + xfile_discard(array->xfile, 0, MAX_LFS_FILESIZE); + array->nr = 0; +} diff --git a/fs/xfs/scrub/xfarray.h b/fs/xfs/scrub/xfarray.h index acb2f94c56c1..3b10a58e9f14 100644 --- a/fs/xfs/scrub/xfarray.h +++ b/fs/xfs/scrub/xfarray.h @@ -44,6 +44,8 @@ int xfarray_unset(struct xfarray *array, xfarray_idx_t idx); int xfarray_store(struct xfarray *array, xfarray_idx_t idx, const void *ptr); int xfarray_store_anywhere(struct xfarray *array, const void *ptr); bool xfarray_element_is_null(struct xfarray *array, const void *ptr); +void xfarray_truncate(struct xfarray *array); +unsigned long long xfarray_bytes(struct xfarray *array); /* * Load an array element, but zero the buffer if there's no data because we diff --git a/fs/xfs/scrub/xfblob.c b/fs/xfs/scrub/xfblob.c index cec668debce5..6ef2a9637f16 100644 --- a/fs/xfs/scrub/xfblob.c +++ b/fs/xfs/scrub/xfblob.c @@ -149,3 +149,20 @@ xfblob_free( xfile_discard(blob->xfile, cookie, sizeof(key) + key.xb_size); return 0; } + +/* How many bytes is this blob storage object consuming? */ +unsigned long long +xfblob_bytes( + struct xfblob *blob) +{ + return xfile_bytes(blob->xfile); +} + +/* Drop all the blobs. */ +void +xfblob_truncate( + struct xfblob *blob) +{ + xfile_discard(blob->xfile, PAGE_SIZE, MAX_LFS_FILESIZE - PAGE_SIZE); + blob->last_offset = PAGE_SIZE; +} diff --git a/fs/xfs/scrub/xfblob.h b/fs/xfs/scrub/xfblob.h index bd98647407f1..78a67a06408f 100644 --- a/fs/xfs/scrub/xfblob.h +++ b/fs/xfs/scrub/xfblob.h @@ -20,5 +20,7 @@ int xfblob_load(struct xfblob *blob, xfblob_cookie cookie, void *ptr, int xfblob_store(struct xfblob *blob, xfblob_cookie *cookie, const void *ptr, uint32_t size); int xfblob_free(struct xfblob *blob, xfblob_cookie cookie); +unsigned long long xfblob_bytes(struct xfblob *blob); +void xfblob_truncate(struct xfblob *blob); #endif /* __XFS_SCRUB_XFBLOB_H__ */ diff --git a/fs/xfs/scrub/xfile.h b/fs/xfs/scrub/xfile.h index 8dfbae1fe33a..cc2cc1714cd4 100644 --- a/fs/xfs/scrub/xfile.h +++ b/fs/xfs/scrub/xfile.h @@ -27,4 +27,9 @@ struct folio *xfile_get_folio(struct xfile *xf, loff_t offset, size_t len, unsigned int flags); void xfile_put_folio(struct xfile *xf, struct folio *folio); +static inline unsigned long long xfile_bytes(struct xfile *xf) +{ + return file_inode(xf->file)->i_blocks << SECTOR_SHIFT; +} + #endif /* __XFS_SCRUB_XFILE_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index f0fa02264eda..8a0151e23f3d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -494,6 +494,9 @@ _xfs_buf_obj_cmp( * it stale has not yet committed. i.e. we are * reallocating a busy extent. Skip this buffer and * continue searching for an exact match. + * + * Note: If we're scanning for incore buffers to stale, don't + * complain if we find non-stale buffers. */ if (!(map->bm_flags & XBM_LIVESCAN)) ASSERT(bp->b_flags & XBF_STALE); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 939baf08331b..a80c3063a13f 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -31,6 +31,8 @@ * pos: file offset, in bytes * bytecount: number of bytes * + * dablk: directory or xattr block offset, in filesystem blocks + * * disize: ondisk file size, in bytes * isize: incore file size, in bytes * -- cgit v1.2.3-59-g8ed1b From 779a4b606c7678343e3603398fe07de38b817ef4 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:20 -0700 Subject: xfs: remove XFS_DA_OP_NOTIME The only user of this flag sets it prior to an xfs_attr_get_ilocked call, which doesn't update anything. Get rid of the flag. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 5 ++--- fs/xfs/libxfs/xfs_da_btree.h | 6 ++---- fs/xfs/scrub/attr.c | 1 - 3 files changed, 4 insertions(+), 8 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 05d22c5e3885..30e6084122d8 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -365,7 +365,7 @@ xfs_attr_try_sf_addname( * Commit the shortform mods, and we're done. * NOTE: this is also the error path (EEXIST, etc). */ - if (!error && !(args->op_flags & XFS_DA_OP_NOTIME)) + if (!error) xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); if (xfs_has_wsync(dp->i_mount)) @@ -1033,8 +1033,7 @@ xfs_attr_set( if (xfs_has_wsync(mp)) xfs_trans_set_sync(args->trans); - if (!(args->op_flags & XFS_DA_OP_NOTIME)) - xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); + xfs_trans_ichgtime(args->trans, dp, XFS_ICHGTIME_CHG); /* * Commit the last in the sequence of transactions. diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 76e764080d99..b04a3290ffac 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -90,9 +90,8 @@ typedef struct xfs_da_args { #define XFS_DA_OP_ADDNAME (1u << 2) /* this is an add operation */ #define XFS_DA_OP_OKNOENT (1u << 3) /* lookup op, ENOENT ok, else die */ #define XFS_DA_OP_CILOOKUP (1u << 4) /* lookup returns CI name if found */ -#define XFS_DA_OP_NOTIME (1u << 5) /* don't update inode timestamps */ -#define XFS_DA_OP_RECOVERY (1u << 6) /* Log recovery operation */ -#define XFS_DA_OP_LOGGED (1u << 7) /* Use intent items to track op */ +#define XFS_DA_OP_RECOVERY (1u << 5) /* Log recovery operation */ +#define XFS_DA_OP_LOGGED (1u << 6) /* Use intent items to track op */ #define XFS_DA_OP_FLAGS \ { XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \ @@ -100,7 +99,6 @@ typedef struct xfs_da_args { { XFS_DA_OP_ADDNAME, "ADDNAME" }, \ { XFS_DA_OP_OKNOENT, "OKNOENT" }, \ { XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \ - { XFS_DA_OP_NOTIME, "NOTIME" }, \ { XFS_DA_OP_RECOVERY, "RECOVERY" }, \ { XFS_DA_OP_LOGGED, "LOGGED" } diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 8853e4d0eee3..5b855d7c9821 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -173,7 +173,6 @@ xchk_xattr_actor( void *priv) { struct xfs_da_args args = { - .op_flags = XFS_DA_OP_NOTIME, .attr_filter = attr_flags & XFS_ATTR_NSP_ONDISK_MASK, .geo = sc->mp->m_attr_geo, .whichfork = XFS_ATTR_FORK, -- cgit v1.2.3-59-g8ed1b From 54275d8496f3e4764302cebc0e9517d950ba6589 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:21 -0700 Subject: xfs: remove xfs_da_args.attr_flags This field only ever contains XATTR_{CREATE,REPLACE}, and it only goes as deep as xfs_attr_set. Remove the field from the structure and replace it with an enum specifying exactly what kind of change we want to make to the xattr structure. Upsert is the name that we'll give to the flags==0 operation, because we're either updating an existing value or inserting it, and the caller doesn't care. Note: The "UPSERTR" name created here is to make userspace porting easier. It will be removed in the next patch. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 7 ++++--- fs/xfs/libxfs/xfs_attr.h | 9 ++++++++- fs/xfs/libxfs/xfs_da_btree.h | 1 - fs/xfs/scrub/attr_repair.c | 3 +-- fs/xfs/xfs_acl.c | 2 +- fs/xfs/xfs_ioctl.c | 14 ++++++-------- fs/xfs/xfs_iops.c | 2 +- fs/xfs/xfs_trace.h | 7 +------ fs/xfs/xfs_xattr.c | 19 +++++++++++++++---- fs/xfs/xfs_xattr.h | 3 ++- 10 files changed, 39 insertions(+), 28 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 30e6084122d8..b04e09143869 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -922,7 +922,8 @@ xfs_attr_defer_add( */ int xfs_attr_set( - struct xfs_da_args *args) + struct xfs_da_args *args, + enum xfs_attr_update op) { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; @@ -1008,7 +1009,7 @@ xfs_attr_set( } /* Pure create fails if the attr already exists */ - if (args->attr_flags & XATTR_CREATE) + if (op == XFS_ATTRUPDATE_CREATE) goto out_trans_cancel; xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REPLACE); break; @@ -1018,7 +1019,7 @@ xfs_attr_set( goto out_trans_cancel; /* Pure replace fails if no existing attr to replace. */ - if (args->attr_flags & XATTR_REPLACE) + if (op == XFS_ATTRUPDATE_REPLACE) goto out_trans_cancel; xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_SET); break; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 670ab2a613fc..228360f7c85c 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -544,7 +544,14 @@ int xfs_inode_hasattr(struct xfs_inode *ip); bool xfs_attr_is_leaf(struct xfs_inode *ip); int xfs_attr_get_ilocked(struct xfs_da_args *args); int xfs_attr_get(struct xfs_da_args *args); -int xfs_attr_set(struct xfs_da_args *args); + +enum xfs_attr_update { + XFS_ATTRUPDATE_UPSERTR, /* set/remove value, replace any existing attr */ + XFS_ATTRUPDATE_CREATE, /* set value, fail if attr already exists */ + XFS_ATTRUPDATE_REPLACE, /* set value, fail if attr does not exist */ +}; + +int xfs_attr_set(struct xfs_da_args *args, enum xfs_attr_update op); int xfs_attr_set_iter(struct xfs_attr_intent *attr); int xfs_attr_remove_iter(struct xfs_attr_intent *attr); bool xfs_attr_namecheck(const void *name, size_t length); diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index b04a3290ffac..706b529a81fe 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -60,7 +60,6 @@ typedef struct xfs_da_args { void *value; /* set of bytes (maybe contain NULLs) */ int valuelen; /* length of value */ unsigned int attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */ - unsigned int attr_flags; /* XATTR_{CREATE,REPLACE} */ xfs_dahash_t hashval; /* hash value of name */ xfs_ino_t inumber; /* input/output inode number */ struct xfs_inode *dp; /* directory inode to manipulate */ diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 7b4318764d03..3066d662ea13 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -557,7 +557,6 @@ xrep_xattr_insert_rec( struct xfs_da_args args = { .dp = rx->sc->tempip, .attr_filter = key->flags, - .attr_flags = XATTR_CREATE, .namelen = key->namelen, .valuelen = key->valuelen, .owner = rx->sc->ip->i_ino, @@ -605,7 +604,7 @@ xrep_xattr_insert_rec( * xfs_attr_set creates and commits its own transaction. If the attr * already exists, we'll just drop it during the rebuild. */ - error = xfs_attr_set(&args); + error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE); if (error == -EEXIST) error = 0; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 4bf69c9c088e..12f98af9e709 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -203,7 +203,7 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) xfs_acl_to_disk(args.value, acl); } - error = xfs_attr_change(&args); + error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERTR); kvfree(args.value); /* diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index efa95892655d..0eca7a9096e2 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -361,15 +361,15 @@ xfs_attr_filter( return 0; } -static unsigned int -xfs_attr_flags( +static inline enum xfs_attr_update +xfs_xattr_flags( u32 ioc_flags) { if (ioc_flags & XFS_IOC_ATTR_CREATE) - return XATTR_CREATE; + return XFS_ATTRUPDATE_CREATE; if (ioc_flags & XFS_IOC_ATTR_REPLACE) - return XATTR_REPLACE; - return 0; + return XFS_ATTRUPDATE_REPLACE; + return XFS_ATTRUPDATE_UPSERTR; } int @@ -476,7 +476,6 @@ xfs_attrmulti_attr_get( struct xfs_da_args args = { .dp = XFS_I(inode), .attr_filter = xfs_attr_filter(flags), - .attr_flags = xfs_attr_flags(flags), .name = name, .namelen = strlen(name), .valuelen = *len, @@ -510,7 +509,6 @@ xfs_attrmulti_attr_set( struct xfs_da_args args = { .dp = XFS_I(inode), .attr_filter = xfs_attr_filter(flags), - .attr_flags = xfs_attr_flags(flags), .name = name, .namelen = strlen(name), }; @@ -528,7 +526,7 @@ xfs_attrmulti_attr_set( args.valuelen = len; } - error = xfs_attr_change(&args); + error = xfs_attr_change(&args, xfs_xattr_flags(flags)); if (!error && (flags & XFS_IOC_ATTR_ROOT)) xfs_forget_acl(inode, name); kfree(args.value); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ad76704ab133..d02ca2248bbc 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -63,7 +63,7 @@ xfs_initxattrs( .value = xattr->value, .valuelen = xattr->value_len, }; - error = xfs_attr_change(&args); + error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERTR); if (error < 0) break; } diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 08df5bb70a6c..57f225ba7e8a 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1999,7 +1999,6 @@ DECLARE_EVENT_CLASS(xfs_attr_class, __field(int, valuelen) __field(xfs_dahash_t, hashval) __field(unsigned int, attr_filter) - __field(unsigned int, attr_flags) __field(uint32_t, op_flags) ), TP_fast_assign( @@ -2011,11 +2010,10 @@ DECLARE_EVENT_CLASS(xfs_attr_class, __entry->valuelen = args->valuelen; __entry->hashval = args->hashval; __entry->attr_filter = args->attr_filter; - __entry->attr_flags = args->attr_flags; __entry->op_flags = args->op_flags; ), TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d valuelen %d " - "hashval 0x%x filter %s flags %s op_flags %s", + "hashval 0x%x filter %s op_flags %s", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->namelen, @@ -2025,9 +2023,6 @@ DECLARE_EVENT_CLASS(xfs_attr_class, __entry->hashval, __print_flags(__entry->attr_filter, "|", XFS_ATTR_FILTER_FLAGS), - __print_flags(__entry->attr_flags, "|", - { XATTR_CREATE, "CREATE" }, - { XATTR_REPLACE, "REPLACE" }), __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) ) diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 4ebf7052eb67..6ce1e06f650a 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -73,7 +73,8 @@ xfs_attr_want_log_assist( */ int xfs_attr_change( - struct xfs_da_args *args) + struct xfs_da_args *args, + enum xfs_attr_update op) { struct xfs_mount *mp = args->dp->i_mount; int error; @@ -88,7 +89,7 @@ xfs_attr_change( args->op_flags |= XFS_DA_OP_LOGGED; } - return xfs_attr_set(args); + return xfs_attr_set(args, op); } @@ -115,6 +116,17 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, return args.valuelen; } +static inline enum xfs_attr_update +xfs_xattr_flags_to_op( + int flags) +{ + if (flags & XATTR_CREATE) + return XFS_ATTRUPDATE_CREATE; + if (flags & XATTR_REPLACE) + return XFS_ATTRUPDATE_REPLACE; + return XFS_ATTRUPDATE_UPSERTR; +} + static int xfs_xattr_set(const struct xattr_handler *handler, struct mnt_idmap *idmap, struct dentry *unused, @@ -124,7 +136,6 @@ xfs_xattr_set(const struct xattr_handler *handler, struct xfs_da_args args = { .dp = XFS_I(inode), .attr_filter = handler->flags, - .attr_flags = flags, .name = name, .namelen = strlen(name), .value = (void *)value, @@ -132,7 +143,7 @@ xfs_xattr_set(const struct xattr_handler *handler, }; int error; - error = xfs_attr_change(&args); + error = xfs_attr_change(&args, xfs_xattr_flags_to_op(flags)); if (!error && (handler->flags & XFS_ATTR_ROOT)) xfs_forget_acl(inode, name); return error; diff --git a/fs/xfs/xfs_xattr.h b/fs/xfs/xfs_xattr.h index cec766cad26c..c3eb858fb59e 100644 --- a/fs/xfs/xfs_xattr.h +++ b/fs/xfs/xfs_xattr.h @@ -6,7 +6,8 @@ #ifndef __XFS_XATTR_H__ #define __XFS_XATTR_H__ -int xfs_attr_change(struct xfs_da_args *args); +enum xfs_attr_update; +int xfs_attr_change(struct xfs_da_args *args, enum xfs_attr_update op); extern const struct xattr_handler * const xfs_xattr_handlers[]; -- cgit v1.2.3-59-g8ed1b From c27411d4c640037d70e2fa5751616e175e52ca5e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:22 -0700 Subject: xfs: make attr removal an explicit operation Parent pointers match attrs on name+value, unlike everything else which matches on only the name. Therefore, we cannot keep using the heuristic that !value means remove. Make this an explicit operation code. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 19 ++++++++++--------- fs/xfs/libxfs/xfs_attr.h | 3 ++- fs/xfs/xfs_acl.c | 17 +++++++++-------- fs/xfs/xfs_ioctl.c | 9 ++++++--- fs/xfs/xfs_iops.c | 2 +- fs/xfs/xfs_xattr.c | 9 ++++++--- 6 files changed, 34 insertions(+), 25 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index b04e09143869..f8f7445b063c 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -916,10 +916,6 @@ xfs_attr_defer_add( trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp); } -/* - * Note: If args->value is NULL the attribute will be removed, just like the - * Linux ->setattr API. - */ int xfs_attr_set( struct xfs_da_args *args, @@ -955,7 +951,10 @@ xfs_attr_set( args->op_flags = XFS_DA_OP_OKNOENT | (args->op_flags & XFS_DA_OP_LOGGED); - if (args->value) { + switch (op) { + case XFS_ATTRUPDATE_UPSERT: + case XFS_ATTRUPDATE_CREATE: + case XFS_ATTRUPDATE_REPLACE: XFS_STATS_INC(mp, xs_attr_set); args->total = xfs_attr_calc_size(args, &local); @@ -975,9 +974,11 @@ xfs_attr_set( if (!local) rmt_blks = xfs_attr3_rmt_blocks(mp, args->valuelen); - } else { + break; + case XFS_ATTRUPDATE_REMOVE: XFS_STATS_INC(mp, xs_attr_remove); rmt_blks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX); + break; } /* @@ -989,7 +990,7 @@ xfs_attr_set( if (error) return error; - if (args->value || xfs_inode_hasattr(dp)) { + if (op != XFS_ATTRUPDATE_REMOVE || xfs_inode_hasattr(dp)) { error = xfs_iext_count_may_overflow(dp, XFS_ATTR_FORK, XFS_IEXT_ATTR_MANIP_CNT(rmt_blks)); if (error == -EFBIG) @@ -1002,7 +1003,7 @@ xfs_attr_set( error = xfs_attr_lookup(args); switch (error) { case -EEXIST: - if (!args->value) { + if (op == XFS_ATTRUPDATE_REMOVE) { /* if no value, we are performing a remove operation */ xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REMOVE); break; @@ -1015,7 +1016,7 @@ xfs_attr_set( break; case -ENOATTR: /* Can't remove what isn't there. */ - if (!args->value) + if (op == XFS_ATTRUPDATE_REMOVE) goto out_trans_cancel; /* Pure replace fails if no existing attr to replace. */ diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 228360f7c85c..c8005f52102a 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -546,7 +546,8 @@ int xfs_attr_get_ilocked(struct xfs_da_args *args); int xfs_attr_get(struct xfs_da_args *args); enum xfs_attr_update { - XFS_ATTRUPDATE_UPSERTR, /* set/remove value, replace any existing attr */ + XFS_ATTRUPDATE_REMOVE, /* remove attr */ + XFS_ATTRUPDATE_UPSERT, /* set value, replace any existing attr */ XFS_ATTRUPDATE_CREATE, /* set value, fail if attr already exists */ XFS_ATTRUPDATE_REPLACE, /* set value, fail if attr does not exist */ }; diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 12f98af9e709..c7c3dcfa2718 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -201,16 +201,17 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) if (!args.value) return -ENOMEM; xfs_acl_to_disk(args.value, acl); + error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERT); + kvfree(args.value); + } else { + error = xfs_attr_change(&args, XFS_ATTRUPDATE_REMOVE); + /* + * If the attribute didn't exist to start with that's fine. + */ + if (error == -ENOATTR) + error = 0; } - error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERTR); - kvfree(args.value); - - /* - * If the attribute didn't exist to start with that's fine. - */ - if (!acl && error == -ENOATTR) - error = 0; if (!error) set_cached_acl(inode, type, acl); return error; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0eca7a9096e2..e30f9f40f086 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -363,13 +363,16 @@ xfs_attr_filter( static inline enum xfs_attr_update xfs_xattr_flags( - u32 ioc_flags) + u32 ioc_flags, + void *value) { + if (!value) + return XFS_ATTRUPDATE_REMOVE; if (ioc_flags & XFS_IOC_ATTR_CREATE) return XFS_ATTRUPDATE_CREATE; if (ioc_flags & XFS_IOC_ATTR_REPLACE) return XFS_ATTRUPDATE_REPLACE; - return XFS_ATTRUPDATE_UPSERTR; + return XFS_ATTRUPDATE_UPSERT; } int @@ -526,7 +529,7 @@ xfs_attrmulti_attr_set( args.valuelen = len; } - error = xfs_attr_change(&args, xfs_xattr_flags(flags)); + error = xfs_attr_change(&args, xfs_xattr_flags(flags, args.value)); if (!error && (flags & XFS_IOC_ATTR_ROOT)) xfs_forget_acl(inode, name); kfree(args.value); diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index d02ca2248bbc..659fd10c0cda 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -63,7 +63,7 @@ xfs_initxattrs( .value = xattr->value, .valuelen = xattr->value_len, }; - error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERTR); + error = xfs_attr_change(&args, XFS_ATTRUPDATE_UPSERT); if (error < 0) break; } diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 6ce1e06f650a..0cbb93cf2869 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -118,13 +118,16 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, static inline enum xfs_attr_update xfs_xattr_flags_to_op( - int flags) + int flags, + const void *value) { + if (!value) + return XFS_ATTRUPDATE_REMOVE; if (flags & XATTR_CREATE) return XFS_ATTRUPDATE_CREATE; if (flags & XATTR_REPLACE) return XFS_ATTRUPDATE_REPLACE; - return XFS_ATTRUPDATE_UPSERTR; + return XFS_ATTRUPDATE_UPSERT; } static int @@ -143,7 +146,7 @@ xfs_xattr_set(const struct xattr_handler *handler, }; int error; - error = xfs_attr_change(&args, xfs_xattr_flags_to_op(flags)); + error = xfs_attr_change(&args, xfs_xattr_flags_to_op(flags, value)); if (!error && (handler->flags & XFS_ATTR_ROOT)) xfs_forget_acl(inode, name); return error; -- cgit v1.2.3-59-g8ed1b From ef80de940a6344da1d4f12c948a0ad4d6ff6e841 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:24 -0700 Subject: xfs: attr fork iext must be loaded before calling xfs_attr_is_leaf Christoph noticed that the xfs_attr_is_leaf in xfs_attr_get_ilocked can access the incore extent tree of the attr fork, but nothing in the xfs_attr_get path guarantees that the incore tree is actually loaded. Most of the time it is, but seeing as xfs_attr_is_leaf ignores the return value of xfs_iext_get_extent I guess we've been making choices based on random stack contents and nobody's complained? Reported-by: Christoph Hellwig Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 17 +++++++++++++++++ fs/xfs/xfs_attr_item.c | 42 ++++++++++++++++++++++++++++++++++++------ fs/xfs/xfs_attr_list.c | 7 +++++++ 3 files changed, 60 insertions(+), 6 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index f8f7445b063c..54edc690ac1e 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -87,6 +87,8 @@ xfs_attr_is_leaf( struct xfs_iext_cursor icur; struct xfs_bmbt_irec imap; + ASSERT(!xfs_need_iread_extents(ifp)); + if (ifp->if_nextents != 1 || ifp->if_format != XFS_DINODE_FMT_EXTENTS) return false; @@ -224,11 +226,21 @@ int xfs_attr_get_ilocked( struct xfs_da_args *args) { + int error; + xfs_assert_ilocked(args->dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); if (!xfs_inode_hasattr(args->dp)) return -ENOATTR; + /* + * The incore attr fork iext tree must be loaded for xfs_attr_is_leaf + * to work correctly. + */ + error = xfs_iread_extents(args->trans, args->dp, XFS_ATTR_FORK); + if (error) + return error; + if (args->dp->i_af.if_format == XFS_DINODE_FMT_LOCAL) return xfs_attr_shortform_getvalue(args); if (xfs_attr_is_leaf(args->dp)) @@ -870,6 +882,11 @@ xfs_attr_lookup( return -ENOATTR; } + /* Prerequisite for xfs_attr_is_leaf */ + error = xfs_iread_extents(args->trans, args->dp, XFS_ATTR_FORK); + if (error) + return error; + if (xfs_attr_is_leaf(dp)) { error = xfs_attr_leaf_hasname(args, &bp); diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index d46034705694..541455731618 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -498,6 +498,25 @@ xfs_attri_validate( return xfs_verify_ino(mp, attrp->alfi_ino); } +static int +xfs_attri_iread_extents( + struct xfs_inode *ip) +{ + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc_empty(ip->i_mount, &tp); + if (error) + return error; + + xfs_ilock(ip, XFS_ILOCK_EXCL); + error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xfs_trans_cancel(tp); + + return error; +} + static inline struct xfs_attr_intent * xfs_attri_recover_work( struct xfs_mount *mp, @@ -508,13 +527,22 @@ xfs_attri_recover_work( { struct xfs_attr_intent *attr; struct xfs_da_args *args; + struct xfs_inode *ip; int local; int error; - error = xlog_recover_iget(mp, attrp->alfi_ino, ipp); + error = xlog_recover_iget(mp, attrp->alfi_ino, &ip); if (error) return ERR_PTR(error); + if (xfs_inode_has_attr_fork(ip)) { + error = xfs_attri_iread_extents(ip); + if (error) { + xfs_irele(ip); + return ERR_PTR(error); + } + } + attr = kzalloc(sizeof(struct xfs_attr_intent) + sizeof(struct xfs_da_args), GFP_KERNEL | __GFP_NOFAIL); args = (struct xfs_da_args *)(attr + 1); @@ -531,7 +559,7 @@ xfs_attri_recover_work( attr->xattri_nameval = xfs_attri_log_nameval_get(nv); ASSERT(attr->xattri_nameval); - args->dp = *ipp; + args->dp = ip; args->geo = mp->m_attr_geo; args->whichfork = XFS_ATTR_FORK; args->name = nv->name.i_addr; @@ -561,6 +589,7 @@ xfs_attri_recover_work( } xfs_defer_add_item(dfp, &attr->xattri_list); + *ipp = ip; return attr; } @@ -615,16 +644,17 @@ xfs_attr_recover_work( XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, &attrip->attri_format, sizeof(attrip->attri_format)); - if (error) { - xfs_trans_cancel(tp); - goto out_unlock; - } + if (error) + goto out_cancel; error = xfs_defer_ops_capture_and_commit(tp, capture_list); out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); xfs_irele(ip); return error; +out_cancel: + xfs_trans_cancel(tp); + goto out_unlock; } /* Re-log an intent item to push the log tail forward. */ diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 6a621f016f04..97c8f3dcfb89 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -544,6 +544,7 @@ xfs_attr_list_ilocked( struct xfs_attr_list_context *context) { struct xfs_inode *dp = context->dp; + int error; xfs_assert_ilocked(dp, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL); @@ -554,6 +555,12 @@ xfs_attr_list_ilocked( return 0; if (dp->i_af.if_format == XFS_DINODE_FMT_LOCAL) return xfs_attr_shortform_list(context); + + /* Prerequisite for xfs_attr_is_leaf */ + error = xfs_iread_extents(NULL, dp, XFS_ATTR_FORK); + if (error) + return error; + if (xfs_attr_is_leaf(dp)) return xfs_attr_leaf_list(context); return xfs_attr_node_list(context); -- cgit v1.2.3-59-g8ed1b From 992c3b5c3fe6f42778436649ddae2b7a2984b7aa Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:29 -0700 Subject: xfs: restructure xfs_attr_complete_op a bit Eliminate the local variable from this function so that we can streamline things a bit later when we add the PPTR_REPLACE op code. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 54edc690ac1e..ba59dab6c56d 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -432,14 +432,13 @@ xfs_attr_complete_op( enum xfs_delattr_state replace_state) { struct xfs_da_args *args = attr->xattri_da_args; - bool do_replace = args->op_flags & XFS_DA_OP_REPLACE; + + if (!(args->op_flags & XFS_DA_OP_REPLACE)) + replace_state = XFS_DAS_DONE; args->op_flags &= ~XFS_DA_OP_REPLACE; args->attr_filter &= ~XFS_ATTR_INCOMPLETE; - if (do_replace) - return replace_state; - - return XFS_DAS_DONE; + return replace_state; } static int -- cgit v1.2.3-59-g8ed1b From ea0b3e814741fb64e7785b564ea619578058e0b0 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:34 -0700 Subject: xfs: enforce one namespace per attribute Create a standardized helper function to enforce one namespace bit per extended attribute, and refactor all the open-coded hweight logic. This function is not a static inline to avoid porting hassles in userspace. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 11 +++++++++++ fs/xfs/libxfs/xfs_attr.h | 4 +++- fs/xfs/libxfs/xfs_attr_leaf.c | 7 ++++++- fs/xfs/scrub/attr.c | 12 +++++------- fs/xfs/scrub/attr_repair.c | 4 +--- fs/xfs/xfs_attr_item.c | 10 ++++++++-- fs/xfs/xfs_attr_list.c | 11 +++++++---- 7 files changed, 41 insertions(+), 18 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index ba59dab6c56d..629fb25d149c 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1532,12 +1532,23 @@ out_release: return error; } +/* Enforce that there is at most one namespace bit per attr. */ +inline bool xfs_attr_check_namespace(unsigned int attr_flags) +{ + return hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) < 2; +} + /* Returns true if the attribute entry name is valid. */ bool xfs_attr_namecheck( + unsigned int attr_flags, const void *name, size_t length) { + /* Only one namespace bit allowed. */ + if (!xfs_attr_check_namespace(attr_flags)) + return false; + /* * MAXNAMELEN includes the trailing null, but (name/length) leave it * out, so use >= for the length check. diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 79b457adb7bd..cd106b0a424f 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -560,7 +560,9 @@ enum xfs_attr_update { int xfs_attr_set(struct xfs_da_args *args, enum xfs_attr_update op); int xfs_attr_set_iter(struct xfs_attr_intent *attr); int xfs_attr_remove_iter(struct xfs_attr_intent *attr); -bool xfs_attr_namecheck(const void *name, size_t length); +bool xfs_attr_check_namespace(unsigned int attr_flags); +bool xfs_attr_namecheck(unsigned int attr_flags, const void *name, + size_t length); int xfs_attr_calc_size(struct xfs_da_args *args, int *local); void xfs_init_attr_trans(struct xfs_da_args *args, struct xfs_trans_res *tres, unsigned int *total); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 17ec5ff5a4e3..3b024ab892e6 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -950,6 +950,11 @@ xfs_attr_shortform_to_leaf( nargs.hashval = xfs_da_hashname(sfe->nameval, sfe->namelen); nargs.attr_filter = sfe->flags & XFS_ATTR_NSP_ONDISK_MASK; + if (!xfs_attr_check_namespace(sfe->flags)) { + xfs_da_mark_sick(args); + error = -EFSCORRUPTED; + goto out; + } error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ ASSERT(error == -ENOATTR); error = xfs_attr3_leaf_add(bp, &nargs); @@ -1063,7 +1068,7 @@ xfs_attr_shortform_verify( * one namespace flag per xattr, so we can just count the * bits (i.e. hweight) here. */ - if (hweight8(sfep->flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) + if (!xfs_attr_check_namespace(sfep->flags)) return __this_address; sfep = next_sfep; diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index fd22d652a63a..7789bd2f0950 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -203,14 +203,8 @@ xchk_xattr_actor( return 0; } - /* Only one namespace bit allowed. */ - if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) { - xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno); - return -ECANCELED; - } - /* Does this name make sense? */ - if (!xfs_attr_namecheck(name, namelen)) { + if (!xfs_attr_namecheck(attr_flags, name, namelen)) { xchk_fblock_set_corrupt(sc, XFS_ATTR_FORK, args.blkno); return -ECANCELED; } @@ -519,6 +513,10 @@ xchk_xattr_rec( xchk_da_set_corrupt(ds, level); return 0; } + if (!xfs_attr_check_namespace(ent->flags)) { + xchk_da_set_corrupt(ds, level); + return 0; + } if (ent->flags & XFS_ATTR_LOCAL) { lentry = (struct xfs_attr_leaf_name_local *) diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 3066d662ea13..8b89c112c492 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -123,12 +123,10 @@ xrep_xattr_want_salvage( return false; if (namelen > XATTR_NAME_MAX || namelen <= 0) return false; - if (!xfs_attr_namecheck(name, namelen)) + if (!xfs_attr_namecheck(attr_flags, name, namelen)) return false; if (valuelen > XATTR_SIZE_MAX || valuelen < 0) return false; - if (hweight32(attr_flags & XFS_ATTR_NSP_ONDISK_MASK) > 1) - return false; return true; } diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 39536303a7b6..a65ac7479768 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -492,6 +492,10 @@ xfs_attri_validate( if (attrp->alfi_attr_filter & ~XFS_ATTRI_FILTER_MASK) return false; + if (!xfs_attr_check_namespace(attrp->alfi_attr_filter & + XFS_ATTR_NSP_ONDISK_MASK)) + return false; + switch (op) { case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: @@ -633,7 +637,8 @@ xfs_attr_recover_work( */ attrp = &attrip->attri_format; if (!xfs_attri_validate(mp, attrp) || - !xfs_attr_namecheck(nv->name.i_addr, nv->name.i_len)) + !xfs_attr_namecheck(attrp->alfi_attr_filter, nv->name.i_addr, + nv->name.i_len)) return -EFSCORRUPTED; attr = xfs_attri_recover_work(mp, dfp, attrp, &ip, nv); @@ -747,7 +752,8 @@ xfs_attri_validate_name_iovec( return NULL; } - if (!xfs_attr_namecheck(iovec->i_addr, name_len)) { + if (!xfs_attr_namecheck(attri_formatp->alfi_attr_filter, iovec->i_addr, + name_len)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, sizeof(*attri_formatp)); XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 97c8f3dcfb89..903ed46c6887 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -82,7 +82,8 @@ xfs_attr_shortform_list( (dp->i_af.if_bytes + sf->count * 16) < context->bufsize)) { for (i = 0, sfe = xfs_attr_sf_firstentry(sf); i < sf->count; i++) { if (XFS_IS_CORRUPT(context->dp->i_mount, - !xfs_attr_namecheck(sfe->nameval, + !xfs_attr_namecheck(sfe->flags, + sfe->nameval, sfe->namelen))) { xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK); return -EFSCORRUPTED; @@ -122,7 +123,8 @@ xfs_attr_shortform_list( for (i = 0, sfe = xfs_attr_sf_firstentry(sf); i < sf->count; i++) { if (unlikely( ((char *)sfe < (char *)sf) || - ((char *)sfe >= ((char *)sf + dp->i_af.if_bytes)))) { + ((char *)sfe >= ((char *)sf + dp->i_af.if_bytes)) || + !xfs_attr_check_namespace(sfe->flags))) { XFS_CORRUPTION_ERROR("xfs_attr_shortform_list", XFS_ERRLEVEL_LOW, context->dp->i_mount, sfe, @@ -177,7 +179,7 @@ xfs_attr_shortform_list( cursor->offset = 0; } if (XFS_IS_CORRUPT(context->dp->i_mount, - !xfs_attr_namecheck(sbp->name, + !xfs_attr_namecheck(sbp->flags, sbp->name, sbp->namelen))) { xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK); error = -EFSCORRUPTED; @@ -502,7 +504,8 @@ xfs_attr3_leaf_list_int( } if (XFS_IS_CORRUPT(context->dp->i_mount, - !xfs_attr_namecheck(name, namelen))) { + !xfs_attr_namecheck(entry->flags, name, + namelen))) { xfs_dirattr_mark_sick(context->dp, XFS_ATTR_FORK); return -EFSCORRUPTED; } -- cgit v1.2.3-59-g8ed1b From 9713dc88773d066413ae23aa474b13241507a89e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:37 -0700 Subject: xfs: move xfs_attr_defer_add to xfs_attr_item.c Move the code that adds the incore xfs_attr_item deferred work data to a transaction live with the ATTRI log item code. This means that the upper level extended attribute code no longer has to know about the inner workings of the ATTRI log items. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 37 +++---------------------------------- fs/xfs/xfs_attr_item.c | 30 ++++++++++++++++++++++++++++++ fs/xfs/xfs_attr_item.h | 8 ++++++++ 3 files changed, 41 insertions(+), 34 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 629fb25d149c..50eab63ff3be 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -901,37 +901,6 @@ xfs_attr_lookup( return error; } -static void -xfs_attr_defer_add( - struct xfs_da_args *args, - unsigned int op_flags) -{ - - struct xfs_attr_intent *new; - - new = kmem_cache_zalloc(xfs_attr_intent_cache, - GFP_KERNEL | __GFP_NOFAIL); - new->xattri_op_flags = op_flags; - new->xattri_da_args = args; - - switch (op_flags) { - case XFS_ATTRI_OP_FLAGS_SET: - new->xattri_dela_state = xfs_attr_init_add_state(args); - break; - case XFS_ATTRI_OP_FLAGS_REPLACE: - new->xattri_dela_state = xfs_attr_init_replace_state(args); - break; - case XFS_ATTRI_OP_FLAGS_REMOVE: - new->xattri_dela_state = xfs_attr_init_remove_state(args); - break; - default: - ASSERT(0); - } - - xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type); - trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp); -} - int xfs_attr_set( struct xfs_da_args *args, @@ -1021,14 +990,14 @@ xfs_attr_set( case -EEXIST: if (op == XFS_ATTRUPDATE_REMOVE) { /* if no value, we are performing a remove operation */ - xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REMOVE); + xfs_attr_defer_add(args, XFS_ATTR_DEFER_REMOVE); break; } /* Pure create fails if the attr already exists */ if (op == XFS_ATTRUPDATE_CREATE) goto out_trans_cancel; - xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_REPLACE); + xfs_attr_defer_add(args, XFS_ATTR_DEFER_REPLACE); break; case -ENOATTR: /* Can't remove what isn't there. */ @@ -1038,7 +1007,7 @@ xfs_attr_set( /* Pure replace fails if no existing attr to replace. */ if (op == XFS_ATTRUPDATE_REPLACE) goto out_trans_cancel; - xfs_attr_defer_add(args, XFS_ATTRI_OP_FLAGS_SET); + xfs_attr_defer_add(args, XFS_ATTR_DEFER_SET); break; default: goto out_trans_cancel; diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index a65ac7479768..a7d6c9af47e8 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -727,6 +727,36 @@ xfs_attr_create_done( return &attrdp->attrd_item; } +void +xfs_attr_defer_add( + struct xfs_da_args *args, + enum xfs_attr_defer_op op) +{ + struct xfs_attr_intent *new; + + new = kmem_cache_zalloc(xfs_attr_intent_cache, + GFP_NOFS | __GFP_NOFAIL); + new->xattri_da_args = args; + + switch (op) { + case XFS_ATTR_DEFER_SET: + new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_SET; + new->xattri_dela_state = xfs_attr_init_add_state(args); + break; + case XFS_ATTR_DEFER_REPLACE: + new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_REPLACE; + new->xattri_dela_state = xfs_attr_init_replace_state(args); + break; + case XFS_ATTR_DEFER_REMOVE: + new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_REMOVE; + new->xattri_dela_state = xfs_attr_init_remove_state(args); + break; + } + + xfs_defer_add(args->trans, &new->xattri_list, &xfs_attr_defer_type); + trace_xfs_attr_defer_add(new->xattri_dela_state, args->dp); +} + const struct xfs_defer_op_type xfs_attr_defer_type = { .name = "attr", .max_items = 1, diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h index 3280a7930287..c32b669b0e16 100644 --- a/fs/xfs/xfs_attr_item.h +++ b/fs/xfs/xfs_attr_item.h @@ -51,4 +51,12 @@ struct xfs_attrd_log_item { extern struct kmem_cache *xfs_attri_cache; extern struct kmem_cache *xfs_attrd_cache; +enum xfs_attr_defer_op { + XFS_ATTR_DEFER_SET, + XFS_ATTR_DEFER_REMOVE, + XFS_ATTR_DEFER_REPLACE, +}; + +void xfs_attr_defer_add(struct xfs_da_args *args, enum xfs_attr_defer_op op); + #endif /* __XFS_ATTR_ITEM_H__ */ -- cgit v1.2.3-59-g8ed1b From a64e0134754bf88021e937aa34f1fbb5b524e585 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:38 -0700 Subject: xfs: create a separate hashname function for extended attributes Create a separate function to compute name hashvalues for extended attributes. When we get to parent pointers we'll be altering the rules so that metadump obfuscation doesn't turn heinous. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 28 ++++++++++++++++++++++++++-- fs/xfs/libxfs/xfs_attr.h | 14 ++++++++++++++ fs/xfs/libxfs/xfs_attr_leaf.c | 3 +-- fs/xfs/scrub/attr.c | 11 ++++++++--- fs/xfs/xfs_attr_item.c | 2 +- fs/xfs/xfs_attr_list.c | 5 ++++- 6 files changed, 54 insertions(+), 9 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 50eab63ff3be..8262c263be9d 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -280,7 +280,7 @@ xfs_attr_get( args->owner = args->dp->i_ino; args->geo = args->dp->i_mount->m_attr_geo; args->whichfork = XFS_ATTR_FORK; - args->hashval = xfs_da_hashname(args->name, args->namelen); + xfs_attr_sethash(args); /* Entirely possible to look up a name which doesn't exist */ args->op_flags = XFS_DA_OP_OKNOENT; @@ -415,6 +415,30 @@ out: return error; } +/* Compute the hash value for a user/root/secure extended attribute */ +xfs_dahash_t +xfs_attr_hashname( + const uint8_t *name, + int namelen) +{ + return xfs_da_hashname(name, namelen); +} + +/* Compute the hash value for any extended attribute from any namespace. */ +xfs_dahash_t +xfs_attr_hashval( + struct xfs_mount *mp, + unsigned int attr_flags, + const uint8_t *name, + int namelen, + const void *value, + int valuelen) +{ + ASSERT(xfs_attr_check_namespace(attr_flags)); + + return xfs_attr_hashname(name, namelen); +} + /* * Handle the state change on completion of a multi-state attr operation. * @@ -925,7 +949,7 @@ xfs_attr_set( args->owner = args->dp->i_ino; args->geo = mp->m_attr_geo; args->whichfork = XFS_ATTR_FORK; - args->hashval = xfs_da_hashname(args->name, args->namelen); + xfs_attr_sethash(args); /* * We have no control over the attribute names that userspace passes us diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index cd106b0a424f..c63b1d610e53 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -628,6 +628,20 @@ xfs_attr_init_replace_state(struct xfs_da_args *args) return xfs_attr_init_add_state(args); } +xfs_dahash_t xfs_attr_hashname(const uint8_t *name, int namelen); + +xfs_dahash_t xfs_attr_hashval(struct xfs_mount *mp, unsigned int attr_flags, + const uint8_t *name, int namelen, const void *value, + int valuelen); + +/* Set the hash value for any extended attribute from any namespace. */ +static inline void xfs_attr_sethash(struct xfs_da_args *args) +{ + args->hashval = xfs_attr_hashval(args->dp->i_mount, args->attr_filter, + args->name, args->namelen, + args->value, args->valuelen); +} + extern struct kmem_cache *xfs_attr_intent_cache; int __init xfs_attr_intent_init_cache(void); void xfs_attr_intent_destroy_cache(void); diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index c47fad39744e..e54a8372a30a 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -948,14 +948,13 @@ xfs_attr_shortform_to_leaf( nargs.namelen = sfe->namelen; nargs.value = &sfe->nameval[nargs.namelen]; nargs.valuelen = sfe->valuelen; - nargs.hashval = xfs_da_hashname(sfe->nameval, - sfe->namelen); nargs.attr_filter = sfe->flags & XFS_ATTR_NSP_ONDISK_MASK; if (!xfs_attr_check_namespace(sfe->flags)) { xfs_da_mark_sick(args); error = -EFSCORRUPTED; goto out; } + xfs_attr_sethash(&nargs); error = xfs_attr3_leaf_lookup_int(bp, &nargs); /* set a->index */ ASSERT(error == -ENOATTR); error = xfs_attr3_leaf_add(bp, &nargs); diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 7789bd2f0950..22d7ef4df169 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -179,7 +179,6 @@ xchk_xattr_actor( .dp = ip, .name = name, .namelen = namelen, - .hashval = xfs_da_hashname(name, namelen), .trans = sc->tp, .valuelen = valuelen, .owner = ip->i_ino, @@ -230,6 +229,7 @@ xchk_xattr_actor( args.value = ab->value; + xfs_attr_sethash(&args); error = xfs_attr_get_ilocked(&args); /* ENODATA means the hash lookup failed and the attr is bad */ if (error == -ENODATA) @@ -525,7 +525,10 @@ xchk_xattr_rec( xchk_da_set_corrupt(ds, level); goto out; } - calc_hash = xfs_da_hashname(lentry->nameval, lentry->namelen); + calc_hash = xfs_attr_hashval(mp, ent->flags, lentry->nameval, + lentry->namelen, + lentry->nameval + lentry->namelen, + be16_to_cpu(lentry->valuelen)); } else { rentry = (struct xfs_attr_leaf_name_remote *) (((char *)bp->b_addr) + nameidx); @@ -533,7 +536,9 @@ xchk_xattr_rec( xchk_da_set_corrupt(ds, level); goto out; } - calc_hash = xfs_da_hashname(rentry->name, rentry->namelen); + calc_hash = xfs_attr_hashval(mp, ent->flags, rentry->name, + rentry->namelen, NULL, + be32_to_cpu(rentry->valuelen)); } if (calc_hash != hash) xchk_da_set_corrupt(ds, level); diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index a7d6c9af47e8..4a57bcff49eb 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -582,13 +582,13 @@ xfs_attri_recover_work( args->whichfork = XFS_ATTR_FORK; args->name = nv->name.i_addr; args->namelen = nv->name.i_len; - args->hashval = xfs_da_hashname(args->name, args->namelen); args->value = nv->value.i_addr; args->valuelen = nv->value.i_len; args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK; args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT | XFS_DA_OP_LOGGED; args->owner = args->dp->i_ino; + xfs_attr_sethash(args); switch (xfs_attr_intent_op(attr)) { case XFS_ATTRI_OP_FLAGS_SET: diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 903ed46c6887..9bc4b5322539 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -135,12 +135,15 @@ xfs_attr_shortform_list( } sbp->entno = i; - sbp->hash = xfs_da_hashname(sfe->nameval, sfe->namelen); sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; + sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags, + sfe->nameval, sfe->namelen, + sfe->nameval + sfe->namelen, + sfe->valuelen); sfe = xfs_attr_sf_nextentry(sfe); sbp++; nsbuf++; -- cgit v1.2.3-59-g8ed1b From 5773f7f82be5aa98e4883566072d33342814cebe Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:42 -0700 Subject: xfs: create attr log item opcodes and formats for parent pointers Make the necessary alterations to the extended attribute log intent item ondisk format so that we can log parent pointer operations. This requires the creation of new opcodes specific to parent pointers, and a new four-argument replace operation to handle renames. At this point this part of the patchset has changed so much from what Allison original wrote that I no longer think her SoB applies. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 19 +++ fs/xfs/libxfs/xfs_attr.h | 4 +- fs/xfs/libxfs/xfs_da_btree.h | 4 + fs/xfs/libxfs/xfs_log_format.h | 22 +++- fs/xfs/xfs_attr_item.c | 259 +++++++++++++++++++++++++++++++++++++---- fs/xfs/xfs_attr_item.h | 2 + 6 files changed, 284 insertions(+), 26 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 8262c263be9d..78c87c405e33 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -439,6 +439,23 @@ xfs_attr_hashval( return xfs_attr_hashname(name, namelen); } +/* + * PPTR_REPLACE operations require the caller to set the old and new names and + * values explicitly. Update the canonical fields to the new name and value + * here now that the removal phase has finished. + */ +static void +xfs_attr_update_pptr_replace_args( + struct xfs_da_args *args) +{ + ASSERT(args->new_namelen > 0); + args->name = args->new_name; + args->namelen = args->new_namelen; + args->value = args->new_value; + args->valuelen = args->new_valuelen; + xfs_attr_sethash(args); +} + /* * Handle the state change on completion of a multi-state attr operation. * @@ -459,6 +476,8 @@ xfs_attr_complete_op( if (!(args->op_flags & XFS_DA_OP_REPLACE)) replace_state = XFS_DAS_DONE; + else if (xfs_attr_intent_op(attr) == XFS_ATTRI_OP_FLAGS_PPTR_REPLACE) + xfs_attr_update_pptr_replace_args(args); args->op_flags &= ~XFS_DA_OP_REPLACE; args->attr_filter &= ~XFS_ATTR_INCOMPLETE; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index c63b1d610e53..d0ed7ea58ab0 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -510,8 +510,8 @@ struct xfs_attr_intent { struct xfs_da_args *xattri_da_args; /* - * Shared buffer containing the attr name and value so that the logging - * code can share large memory buffers between log items. + * Shared buffer containing the attr name, new name, and value so that + * the logging code can share large memory buffers between log items. */ struct xfs_attri_log_nameval *xattri_nameval; diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h index 17cef594b5bb..354d5d65043e 100644 --- a/fs/xfs/libxfs/xfs_da_btree.h +++ b/fs/xfs/libxfs/xfs_da_btree.h @@ -55,7 +55,9 @@ enum xfs_dacmp { typedef struct xfs_da_args { struct xfs_da_geometry *geo; /* da block geometry */ const uint8_t *name; /* string (maybe not NULL terminated) */ + const uint8_t *new_name; /* new attr name */ void *value; /* set of bytes (maybe contain NULLs) */ + void *new_value; /* new xattr value (may contain NULLs) */ struct xfs_inode *dp; /* directory inode to manipulate */ struct xfs_trans *trans; /* current trans (changes over time) */ @@ -63,10 +65,12 @@ typedef struct xfs_da_args { xfs_ino_t owner; /* inode that owns the dir/attr data */ int valuelen; /* length of value */ + int new_valuelen; /* length of new_value */ uint8_t filetype; /* filetype of inode for directories */ uint8_t op_flags; /* operation flags */ uint8_t attr_filter; /* XFS_ATTR_{ROOT,SECURE,INCOMPLETE} */ short namelen; /* length of string (maybe no NULL) */ + short new_namelen; /* length of new attr name */ xfs_dahash_t hashval; /* hash value of name */ xfs_extlen_t total; /* total blocks needed, for 1st bmap */ int whichfork; /* data or attribute fork */ diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 020aebd10143..632dd9732455 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -115,11 +115,13 @@ struct xfs_unmount_log_format { #define XLOG_REG_TYPE_BUD_FORMAT 26 #define XLOG_REG_TYPE_ATTRI_FORMAT 27 #define XLOG_REG_TYPE_ATTRD_FORMAT 28 -#define XLOG_REG_TYPE_ATTR_NAME 29 +#define XLOG_REG_TYPE_ATTR_NAME 29 #define XLOG_REG_TYPE_ATTR_VALUE 30 #define XLOG_REG_TYPE_XMI_FORMAT 31 #define XLOG_REG_TYPE_XMD_FORMAT 32 -#define XLOG_REG_TYPE_MAX 32 +#define XLOG_REG_TYPE_ATTR_NEWNAME 33 +#define XLOG_REG_TYPE_ATTR_NEWVALUE 34 +#define XLOG_REG_TYPE_MAX 34 /* * Flags to log operation header @@ -1026,6 +1028,9 @@ struct xfs_icreate_log { #define XFS_ATTRI_OP_FLAGS_SET 1 /* Set the attribute */ #define XFS_ATTRI_OP_FLAGS_REMOVE 2 /* Remove the attribute */ #define XFS_ATTRI_OP_FLAGS_REPLACE 3 /* Replace the attribute */ +#define XFS_ATTRI_OP_FLAGS_PPTR_SET 4 /* Set parent pointer */ +#define XFS_ATTRI_OP_FLAGS_PPTR_REMOVE 5 /* Remove parent pointer */ +#define XFS_ATTRI_OP_FLAGS_PPTR_REPLACE 6 /* Replace parent pointer */ #define XFS_ATTRI_OP_FLAGS_TYPE_MASK 0xFF /* Flags type mask */ /* @@ -1048,7 +1053,18 @@ struct xfs_attri_log_format { uint64_t alfi_id; /* attri identifier */ uint64_t alfi_ino; /* the inode for this attr operation */ uint32_t alfi_op_flags; /* marks the op as a set or remove */ - uint32_t alfi_name_len; /* attr name length */ + union { + uint32_t alfi_name_len; /* attr name length */ + struct { + /* + * For PPTR_REPLACE, these are the lengths of the old + * and new attr names. The new and old values must + * have the same length. + */ + uint16_t alfi_old_name_len; + uint16_t alfi_new_name_len; + }; + }; uint32_t alfi_value_len; /* attr value length */ uint32_t alfi_attr_filter;/* attr filter flags */ }; diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 413e3d3959a5..be5064f5a531 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -73,8 +73,12 @@ static inline struct xfs_attri_log_nameval * xfs_attri_log_nameval_alloc( const void *name, unsigned int name_len, + const void *new_name, + unsigned int new_name_len, const void *value, - unsigned int value_len) + unsigned int value_len, + const void *new_value, + unsigned int new_value_len) { struct xfs_attri_log_nameval *nv; @@ -83,15 +87,26 @@ xfs_attri_log_nameval_alloc( * this. But kvmalloc() utterly sucks, so we use our own version. */ nv = xlog_kvmalloc(sizeof(struct xfs_attri_log_nameval) + - name_len + value_len); + name_len + new_name_len + value_len + + new_value_len); nv->name.i_addr = nv + 1; nv->name.i_len = name_len; nv->name.i_type = XLOG_REG_TYPE_ATTR_NAME; memcpy(nv->name.i_addr, name, name_len); + if (new_name_len) { + nv->new_name.i_addr = nv->name.i_addr + name_len; + nv->new_name.i_len = new_name_len; + memcpy(nv->new_name.i_addr, new_name, new_name_len); + } else { + nv->new_name.i_addr = NULL; + nv->new_name.i_len = 0; + } + nv->new_name.i_type = XLOG_REG_TYPE_ATTR_NEWNAME; + if (value_len) { - nv->value.i_addr = nv->name.i_addr + name_len; + nv->value.i_addr = nv->name.i_addr + name_len + new_name_len; nv->value.i_len = value_len; memcpy(nv->value.i_addr, value, value_len); } else { @@ -100,6 +115,17 @@ xfs_attri_log_nameval_alloc( } nv->value.i_type = XLOG_REG_TYPE_ATTR_VALUE; + if (new_value_len) { + nv->new_value.i_addr = nv->name.i_addr + name_len + + new_name_len + value_len; + nv->new_value.i_len = new_value_len; + memcpy(nv->new_value.i_addr, new_value, new_value_len); + } else { + nv->new_value.i_addr = NULL; + nv->new_value.i_len = 0; + } + nv->new_value.i_type = XLOG_REG_TYPE_ATTR_NEWVALUE; + refcount_set(&nv->refcount, 1); return nv; } @@ -145,11 +171,20 @@ xfs_attri_item_size( *nbytes += sizeof(struct xfs_attri_log_format) + xlog_calc_iovec_len(nv->name.i_len); - if (!nv->value.i_len) - return; + if (nv->new_name.i_len) { + *nvecs += 1; + *nbytes += xlog_calc_iovec_len(nv->new_name.i_len); + } - *nvecs += 1; - *nbytes += xlog_calc_iovec_len(nv->value.i_len); + if (nv->value.i_len) { + *nvecs += 1; + *nbytes += xlog_calc_iovec_len(nv->value.i_len); + } + + if (nv->new_value.i_len) { + *nvecs += 1; + *nbytes += xlog_calc_iovec_len(nv->new_value.i_len); + } } /* @@ -179,15 +214,28 @@ xfs_attri_item_format( ASSERT(nv->name.i_len > 0); attrip->attri_format.alfi_size++; + if (nv->new_name.i_len > 0) + attrip->attri_format.alfi_size++; + if (nv->value.i_len > 0) attrip->attri_format.alfi_size++; + if (nv->new_value.i_len > 0) + attrip->attri_format.alfi_size++; + xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ATTRI_FORMAT, &attrip->attri_format, sizeof(struct xfs_attri_log_format)); xlog_copy_from_iovec(lv, &vecp, &nv->name); + + if (nv->new_name.i_len > 0) + xlog_copy_from_iovec(lv, &vecp, &nv->new_name); + if (nv->value.i_len > 0) xlog_copy_from_iovec(lv, &vecp, &nv->value); + + if (nv->new_value.i_len > 0) + xlog_copy_from_iovec(lv, &vecp, &nv->new_value); } /* @@ -322,6 +370,8 @@ xfs_attr_log_item( const struct xfs_attr_intent *attr) { struct xfs_attri_log_format *attrp; + struct xfs_attri_log_nameval *nv = attr->xattri_nameval; + struct xfs_da_args *args = attr->xattri_da_args; /* * At this point the xfs_attr_intent has been constructed, and we've @@ -329,13 +379,25 @@ xfs_attr_log_item( * structure with fields from this xfs_attr_intent */ attrp = &attrip->attri_format; - attrp->alfi_ino = attr->xattri_da_args->dp->i_ino; + attrp->alfi_ino = args->dp->i_ino; ASSERT(!(attr->xattri_op_flags & ~XFS_ATTRI_OP_FLAGS_TYPE_MASK)); attrp->alfi_op_flags = attr->xattri_op_flags; - attrp->alfi_value_len = attr->xattri_nameval->value.i_len; - attrp->alfi_name_len = attr->xattri_nameval->name.i_len; - ASSERT(!(attr->xattri_da_args->attr_filter & ~XFS_ATTRI_FILTER_MASK)); - attrp->alfi_attr_filter = attr->xattri_da_args->attr_filter; + attrp->alfi_value_len = nv->value.i_len; + + switch (xfs_attr_log_item_op(attrp)) { + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + ASSERT(nv->value.i_len == nv->new_value.i_len); + + attrp->alfi_old_name_len = nv->name.i_len; + attrp->alfi_new_name_len = nv->new_name.i_len; + break; + default: + attrp->alfi_name_len = nv->name.i_len; + break; + } + + ASSERT(!(args->attr_filter & ~XFS_ATTRI_FILTER_MASK)); + attrp->alfi_attr_filter = args->attr_filter; } /* Get an ATTRI. */ @@ -374,8 +436,11 @@ xfs_attr_create_intent( * Transfer our reference to the name/value buffer to the * deferred work state structure. */ - attr->xattri_nameval = xfs_attri_log_nameval_alloc(args->name, - args->namelen, args->value, args->valuelen); + attr->xattri_nameval = xfs_attri_log_nameval_alloc( + args->name, args->namelen, + args->new_name, args->new_namelen, + args->value, args->valuelen, + args->new_value, args->new_valuelen); } attrip = xfs_attri_init(mp, attr->xattri_nameval); @@ -494,6 +559,17 @@ xfs_attri_validate( return false; switch (op) { + case XFS_ATTRI_OP_FLAGS_PPTR_SET: + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: + if (!xfs_has_parent(mp)) + return false; + if (attrp->alfi_value_len != sizeof(struct xfs_parent_rec)) + return false; + if (!xfs_attri_validate_namelen(attrp->alfi_name_len)) + return false; + if (!(attrp->alfi_attr_filter & XFS_ATTR_PARENT)) + return false; + break; case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: if (!xfs_is_using_logged_xattrs(mp)) @@ -511,6 +587,18 @@ xfs_attri_validate( if (!xfs_attri_validate_namelen(attrp->alfi_name_len)) return false; break; + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + if (!xfs_has_parent(mp)) + return false; + if (!xfs_attri_validate_namelen(attrp->alfi_old_name_len)) + return false; + if (!xfs_attri_validate_namelen(attrp->alfi_new_name_len)) + return false; + if (attrp->alfi_value_len != sizeof(struct xfs_parent_rec)) + return false; + if (!(attrp->alfi_attr_filter & XFS_ATTR_PARENT)) + return false; + break; default: return false; } @@ -583,8 +671,12 @@ xfs_attri_recover_work( args->whichfork = XFS_ATTR_FORK; args->name = nv->name.i_addr; args->namelen = nv->name.i_len; + args->new_name = nv->new_name.i_addr; + args->new_namelen = nv->new_name.i_len; args->value = nv->value.i_addr; args->valuelen = nv->value.i_len; + args->new_value = nv->new_value.i_addr; + args->new_valuelen = nv->new_value.i_len; args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK; args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT | XFS_DA_OP_LOGGED; @@ -592,6 +684,8 @@ xfs_attri_recover_work( xfs_attr_sethash(args); switch (xfs_attr_intent_op(attr)) { + case XFS_ATTRI_OP_FLAGS_PPTR_SET: + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: args->total = xfs_attr_calc_size(args, &local); @@ -600,6 +694,7 @@ xfs_attri_recover_work( else attr->xattri_dela_state = xfs_attr_init_add_state(args); break; + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: case XFS_ATTRI_OP_FLAGS_REMOVE: attr->xattri_dela_state = xfs_attr_init_remove_state(args); break; @@ -700,7 +795,17 @@ xfs_attr_relog_intent( new_attrp->alfi_ino = old_attrp->alfi_ino; new_attrp->alfi_op_flags = old_attrp->alfi_op_flags; new_attrp->alfi_value_len = old_attrp->alfi_value_len; - new_attrp->alfi_name_len = old_attrp->alfi_name_len; + + switch (xfs_attr_log_item_op(old_attrp)) { + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + new_attrp->alfi_new_name_len = old_attrp->alfi_new_name_len; + new_attrp->alfi_old_name_len = old_attrp->alfi_old_name_len; + break; + default: + new_attrp->alfi_name_len = old_attrp->alfi_name_len; + break; + } + new_attrp->alfi_attr_filter = old_attrp->alfi_attr_filter; return &new_attrip->attri_item; @@ -734,22 +839,61 @@ xfs_attr_defer_add( enum xfs_attr_defer_op op) { struct xfs_attr_intent *new; + unsigned int log_op = 0; + bool is_pptr = args->attr_filter & XFS_ATTR_PARENT; + + if (is_pptr) { + ASSERT(xfs_has_parent(args->dp->i_mount)); + ASSERT((args->attr_filter & ~XFS_ATTR_PARENT) == 0); + ASSERT(args->op_flags & XFS_DA_OP_LOGGED); + ASSERT(args->valuelen == sizeof(struct xfs_parent_rec)); + } new = kmem_cache_zalloc(xfs_attr_intent_cache, GFP_NOFS | __GFP_NOFAIL); new->xattri_da_args = args; + /* Compute log operation from the higher level op and namespace. */ switch (op) { case XFS_ATTR_DEFER_SET: - new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_SET; - new->xattri_dela_state = xfs_attr_init_add_state(args); + if (is_pptr) + log_op = XFS_ATTRI_OP_FLAGS_PPTR_SET; + else + log_op = XFS_ATTRI_OP_FLAGS_SET; break; case XFS_ATTR_DEFER_REPLACE: - new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_REPLACE; - new->xattri_dela_state = xfs_attr_init_replace_state(args); + if (is_pptr) + log_op = XFS_ATTRI_OP_FLAGS_PPTR_REPLACE; + else + log_op = XFS_ATTRI_OP_FLAGS_REPLACE; break; case XFS_ATTR_DEFER_REMOVE: - new->xattri_op_flags = XFS_ATTRI_OP_FLAGS_REMOVE; + if (is_pptr) + log_op = XFS_ATTRI_OP_FLAGS_PPTR_REMOVE; + else + log_op = XFS_ATTRI_OP_FLAGS_REMOVE; + break; + default: + ASSERT(0); + break; + } + new->xattri_op_flags = log_op; + + /* Set up initial attr operation state. */ + switch (log_op) { + case XFS_ATTRI_OP_FLAGS_PPTR_SET: + case XFS_ATTRI_OP_FLAGS_SET: + new->xattri_dela_state = xfs_attr_init_add_state(args); + break; + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + ASSERT(args->new_valuelen == args->valuelen); + new->xattri_dela_state = xfs_attr_init_replace_state(args); + break; + case XFS_ATTRI_OP_FLAGS_REPLACE: + new->xattri_dela_state = xfs_attr_init_replace_state(args); + break; + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: + case XFS_ATTRI_OP_FLAGS_REMOVE: new->xattri_dela_state = xfs_attr_init_remove_state(args); break; } @@ -824,9 +968,13 @@ xlog_recover_attri_commit_pass2( struct xfs_attri_log_nameval *nv; const void *attr_name; const void *attr_value = NULL; + const void *attr_new_name = NULL; + const void *attr_new_value = NULL; size_t len; unsigned int name_len = 0; unsigned int value_len = 0; + unsigned int new_name_len = 0; + unsigned int new_value_len = 0; unsigned int op, i = 0; /* Validate xfs_attri_log_format before the large memory allocation */ @@ -847,6 +995,17 @@ xlog_recover_attri_commit_pass2( /* Check the number of log iovecs makes sense for the op code. */ op = xfs_attr_log_item_op(attri_formatp); switch (op) { + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: + case XFS_ATTRI_OP_FLAGS_PPTR_SET: + /* Log item, attr name, attr value */ + if (item->ri_total != 3) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + name_len = attri_formatp->alfi_name_len; + value_len = attri_formatp->alfi_value_len; + break; case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: /* Log item, attr name, attr value */ @@ -867,6 +1026,20 @@ xlog_recover_attri_commit_pass2( } name_len = attri_formatp->alfi_name_len; break; + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + /* + * Log item, attr name, new attr name, attr value, new attr + * value + */ + if (item->ri_total != 5) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + name_len = attri_formatp->alfi_old_name_len; + new_name_len = attri_formatp->alfi_new_name_len; + new_value_len = value_len = attri_formatp->alfi_value_len; + break; default: XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, attri_formatp, len); @@ -881,6 +1054,16 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; i++; + /* Validate the new attr name */ + if (new_name_len > 0) { + attr_new_name = xfs_attri_validate_name_iovec(mp, + attri_formatp, &item->ri_buf[i], + new_name_len); + if (!attr_new_name) + return -EFSCORRUPTED; + i++; + } + /* Validate the attr value, if present */ if (value_len != 0) { attr_value = xfs_attri_validate_value_iovec(mp, attri_formatp, @@ -890,6 +1073,16 @@ xlog_recover_attri_commit_pass2( i++; } + /* Validate the new attr value, if present */ + if (new_value_len != 0) { + attr_new_value = xfs_attri_validate_value_iovec(mp, + attri_formatp, &item->ri_buf[i], + new_value_len); + if (!attr_new_value) + return -EFSCORRUPTED; + i++; + } + /* * Make sure we got the correct number of buffers for the operation * that we just loaded. @@ -909,12 +1102,17 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; } fallthrough; + case XFS_ATTRI_OP_FLAGS_PPTR_REMOVE: + case XFS_ATTRI_OP_FLAGS_PPTR_SET: case XFS_ATTRI_OP_FLAGS_SET: case XFS_ATTRI_OP_FLAGS_REPLACE: /* * Regular xattr set/remove/replace operations require a name * and do not take a newname. Values are optional for set and * replace. + * + * Name-value set/remove operations must have a name, do not + * take a newname, and can take a value. */ if (attr_name == NULL || name_len == 0) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, @@ -922,6 +1120,23 @@ xlog_recover_attri_commit_pass2( return -EFSCORRUPTED; } break; + case XFS_ATTRI_OP_FLAGS_PPTR_REPLACE: + /* + * Name-value replace operations require the caller to + * specify the old and new names and values explicitly. + * Values are optional. + */ + if (attr_name == NULL || name_len == 0) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + if (attr_new_name == NULL || new_name_len == 0) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, len); + return -EFSCORRUPTED; + } + break; } /* @@ -930,7 +1145,9 @@ xlog_recover_attri_commit_pass2( * reference. */ nv = xfs_attri_log_nameval_alloc(attr_name, name_len, - attr_value, value_len); + attr_new_name, new_name_len, + attr_value, value_len, + attr_new_value, new_value_len); attrip = xfs_attri_init(mp, nv); memcpy(&attrip->attri_format, attri_formatp, len); diff --git a/fs/xfs/xfs_attr_item.h b/fs/xfs/xfs_attr_item.h index c32b669b0e16..e74128cbb722 100644 --- a/fs/xfs/xfs_attr_item.h +++ b/fs/xfs/xfs_attr_item.h @@ -13,7 +13,9 @@ struct kmem_zone; struct xfs_attri_log_nameval { struct xfs_log_iovec name; + struct xfs_log_iovec new_name; /* PPTR_REPLACE only */ struct xfs_log_iovec value; + struct xfs_log_iovec new_value; /* PPTR_REPLACE only */ refcount_t refcount; /* name and value follow the end of this struct */ -- cgit v1.2.3-59-g8ed1b From a08d6729637428b6ef8c6a5a94d8c6db7b805a44 Mon Sep 17 00:00:00 2001 From: Allison Henderson Date: Mon, 22 Apr 2024 09:47:44 -0700 Subject: xfs: add parent pointer validator functions The attr name of a parent pointer is a string, and the attr value of a parent pointer is (more or less) a file handle. So we need to modify attr_namecheck to verify the parent pointer name, and add a xfs_parent_valuecheck function to sanitize the handle. At the same time, we need to validate attr values during log recovery if the xattr is really a parent pointer. Signed-off-by: Allison Henderson Reviewed-by: Darrick J. Wong [djwong: move functions to xfs_parent.c, adjust for new disk format] Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_attr.c | 5 +++ fs/xfs/libxfs/xfs_parent.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_parent.h | 15 ++++++++ fs/xfs/xfs_attr_item.c | 10 +++++ 5 files changed, 123 insertions(+) create mode 100644 fs/xfs/libxfs/xfs_parent.c create mode 100644 fs/xfs/libxfs/xfs_parent.h (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 4e1eb3b6dbc4..4956ea9a307b 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -42,6 +42,7 @@ xfs-y += $(addprefix libxfs/, \ xfs_inode_buf.o \ xfs_log_rlimit.o \ xfs_ag_resv.o \ + xfs_parent.o \ xfs_rmap.o \ xfs_rmap_btree.o \ xfs_refcount.o \ diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 78c87c405e33..93524efa6e56 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -26,6 +26,7 @@ #include "xfs_trace.h" #include "xfs_attr_item.h" #include "xfs_xattr.h" +#include "xfs_parent.h" struct kmem_cache *xfs_attr_intent_cache; @@ -1568,6 +1569,10 @@ xfs_attr_namecheck( if (length >= MAXNAMELEN) return false; + /* Parent pointers have their own validation. */ + if (attr_flags & XFS_ATTR_PARENT) + return xfs_parent_namecheck(attr_flags, name, length); + /* There shouldn't be any nulls here */ return !memchr(name, 0, length); } diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c new file mode 100644 index 000000000000..5961fa8c8561 --- /dev/null +++ b/fs/xfs/libxfs/xfs_parent.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022-2024 Oracle. + * All rights reserved. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_format.h" +#include "xfs_da_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_bmap_btree.h" +#include "xfs_inode.h" +#include "xfs_error.h" +#include "xfs_trace.h" +#include "xfs_trans.h" +#include "xfs_da_btree.h" +#include "xfs_attr.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_attr_sf.h" +#include "xfs_bmap.h" +#include "xfs_defer.h" +#include "xfs_log.h" +#include "xfs_xattr.h" +#include "xfs_parent.h" +#include "xfs_trans_space.h" + +/* + * Parent pointer attribute handling. + * + * Because the attribute name is a filename component, it will never be longer + * than 255 bytes and must not contain nulls or slashes. These are roughly the + * same constraints that apply to attribute names. + * + * The attribute value must always be a struct xfs_parent_rec. This means the + * attribute will never be in remote format because 12 bytes is nowhere near + * xfs_attr_leaf_entsize_local_max() (~75% of block size). + * + * Creating a new parent attribute will always create a new attribute - there + * should never, ever be an existing attribute in the tree for a new inode. + * ENOSPC behavior is problematic - creating the inode without the parent + * pointer is effectively a corruption, so we allow parent attribute creation + * to dip into the reserve block pool to avoid unexpected ENOSPC errors from + * occurring. + */ + +/* Return true if parent pointer attr name is valid. */ +bool +xfs_parent_namecheck( + unsigned int attr_flags, + const void *name, + size_t length) +{ + /* + * Parent pointers always use logged operations, so there should never + * be incomplete xattrs. + */ + if (attr_flags & XFS_ATTR_INCOMPLETE) + return false; + + return xfs_dir2_namecheck(name, length); +} + +/* Return true if parent pointer attr value is valid. */ +bool +xfs_parent_valuecheck( + struct xfs_mount *mp, + const void *value, + size_t valuelen) +{ + const struct xfs_parent_rec *rec = value; + + if (!xfs_has_parent(mp)) + return false; + + /* The xattr value must be a parent record. */ + if (valuelen != sizeof(struct xfs_parent_rec)) + return false; + + /* The parent record must be local. */ + if (value == NULL) + return false; + + /* The parent inumber must be valid. */ + if (!xfs_verify_dir_ino(mp, be64_to_cpu(rec->p_ino))) + return false; + + return true; +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h new file mode 100644 index 000000000000..ef8aff860780 --- /dev/null +++ b/fs/xfs/libxfs/xfs_parent.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022-2024 Oracle. + * All Rights Reserved. + */ +#ifndef __XFS_PARENT_H__ +#define __XFS_PARENT_H__ + +/* Metadata validators */ +bool xfs_parent_namecheck(unsigned int attr_flags, const void *name, + size_t length); +bool xfs_parent_valuecheck(struct xfs_mount *mp, const void *value, + size_t valuelen); + +#endif /* __XFS_PARENT_H__ */ diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c index 2898eeb16366..2b10ac4c5fce 100644 --- a/fs/xfs/xfs_attr_item.c +++ b/fs/xfs/xfs_attr_item.c @@ -27,6 +27,7 @@ #include "xfs_error.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" +#include "xfs_parent.h" struct kmem_cache *xfs_attri_cache; struct kmem_cache *xfs_attrd_cache; @@ -973,6 +974,15 @@ xfs_attri_validate_value_iovec( return NULL; } + if ((attri_formatp->alfi_attr_filter & XFS_ATTR_PARENT) && + !xfs_parent_valuecheck(mp, iovec->i_addr, value_len)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + attri_formatp, sizeof(*attri_formatp)); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + iovec->i_addr, iovec->i_len); + return NULL; + } + return iovec->i_addr; } -- cgit v1.2.3-59-g8ed1b From fb102fe7fe02e70f8a49cc7f74bc0769cdab2912 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:47:46 -0700 Subject: xfs: create a hashname function for parent pointers Although directory entry and parent pointer recordsets look very similar (name -> ino), there's one major difference between them: a file can be hardlinked from multiple parent directories with the same filename. This is common in shared container environments where a base directory tree might be hardlink-copied multiple times. IOWs the same 'ls' program might be hardlinked to multiple /srv/*/bin/ls paths. We don't want parent pointer operations to bog down on hash collisions between the same dirent name, so create a special hash function that mixes in the parent directory inode number. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 3 +++ fs/xfs/libxfs/xfs_parent.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_parent.h | 5 +++++ fs/xfs/scrub/attr.c | 4 ++++ 4 files changed, 59 insertions(+) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 93524efa6e56..8c283e5c2470 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -437,6 +437,9 @@ xfs_attr_hashval( { ASSERT(xfs_attr_check_namespace(attr_flags)); + if (attr_flags & XFS_ATTR_PARENT) + return xfs_parent_hashattr(mp, name, namelen, value, valuelen); + return xfs_attr_hashname(name, namelen); } diff --git a/fs/xfs/libxfs/xfs_parent.c b/fs/xfs/libxfs/xfs_parent.c index 5961fa8c8561..d564baf2549c 100644 --- a/fs/xfs/libxfs/xfs_parent.c +++ b/fs/xfs/libxfs/xfs_parent.c @@ -90,3 +90,50 @@ xfs_parent_valuecheck( return true; } + +/* Compute the attribute name hash for a parent pointer. */ +xfs_dahash_t +xfs_parent_hashval( + struct xfs_mount *mp, + const uint8_t *name, + int namelen, + xfs_ino_t parent_ino) +{ + struct xfs_name xname = { + .name = name, + .len = namelen, + }; + + /* + * Use the same dirent name hash as would be used on the directory, but + * mix in the parent inode number to avoid collisions on hardlinked + * files with identical names but different parents. + */ + return xfs_dir2_hashname(mp, &xname) ^ + upper_32_bits(parent_ino) ^ lower_32_bits(parent_ino); +} + +/* Compute the attribute name hash from the xattr components. */ +xfs_dahash_t +xfs_parent_hashattr( + struct xfs_mount *mp, + const uint8_t *name, + int namelen, + const void *value, + int valuelen) +{ + const struct xfs_parent_rec *rec = value; + + /* Requires a local attr value in xfs_parent_rec format */ + if (valuelen != sizeof(struct xfs_parent_rec)) { + ASSERT(valuelen == sizeof(struct xfs_parent_rec)); + return 0; + } + + if (!value) { + ASSERT(value != NULL); + return 0; + } + + return xfs_parent_hashval(mp, name, namelen, be64_to_cpu(rec->p_ino)); +} diff --git a/fs/xfs/libxfs/xfs_parent.h b/fs/xfs/libxfs/xfs_parent.h index ef8aff860780..6a4028871b72 100644 --- a/fs/xfs/libxfs/xfs_parent.h +++ b/fs/xfs/libxfs/xfs_parent.h @@ -12,4 +12,9 @@ bool xfs_parent_namecheck(unsigned int attr_flags, const void *name, bool xfs_parent_valuecheck(struct xfs_mount *mp, const void *value, size_t valuelen); +xfs_dahash_t xfs_parent_hashval(struct xfs_mount *mp, const uint8_t *name, + int namelen, xfs_ino_t parent_ino); +xfs_dahash_t xfs_parent_hashattr(struct xfs_mount *mp, const uint8_t *name, + int namelen, const void *value, int valuelen); + #endif /* __XFS_PARENT_H__ */ diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 22d7ef4df169..c07d050b39b2 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -536,6 +536,10 @@ xchk_xattr_rec( xchk_da_set_corrupt(ds, level); goto out; } + if (ent->flags & XFS_ATTR_PARENT) { + xchk_da_set_corrupt(ds, level); + goto out; + } calc_hash = xfs_attr_hashval(mp, ent->flags, rentry->name, rentry->namelen, NULL, be32_to_cpu(rentry->valuelen)); -- cgit v1.2.3-59-g8ed1b From e7420e75ef04787bc51688fc9bbca7da4d164a1e Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:48:06 -0700 Subject: xfs: remove some boilerplate from xfs_attr_set In preparation for online/offline repair wanting to use xfs_attr_set, move some of the boilerplate out of this function into the callers. Repair can initialize the da_args completely, and the userspace flag handling/twisting goes away once we move it to xfs_attr_change. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 33 ++++++++++++--------------------- fs/xfs/scrub/attr_repair.c | 4 ++++ fs/xfs/xfs_xattr.c | 24 ++++++++++++++++++++++-- 3 files changed, 38 insertions(+), 23 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 8c283e5c2470..df8418671c37 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -948,6 +948,16 @@ xfs_attr_lookup( return error; } +/* + * Make a change to the xattr structure. + * + * The caller must have initialized @args, attached dquots, and must not hold + * any ILOCKs. + * + * Returns -EEXIST for XFS_ATTRUPDATE_CREATE if the name already exists. + * Returns -ENOATTR for XFS_ATTRUPDATE_REMOVE if the name does not exist. + * Returns 0 on success, or a negative errno if something else went wrong. + */ int xfs_attr_set( struct xfs_da_args *args, @@ -961,27 +971,7 @@ xfs_attr_set( int rmt_blks = 0; unsigned int total; - if (xfs_is_shutdown(dp->i_mount)) - return -EIO; - - error = xfs_qm_dqattach(dp); - if (error) - return error; - - if (!args->owner) - args->owner = args->dp->i_ino; - args->geo = mp->m_attr_geo; - args->whichfork = XFS_ATTR_FORK; - xfs_attr_sethash(args); - - /* - * We have no control over the attribute names that userspace passes us - * to remove, so we have to allow the name lookup prior to attribute - * removal to fail as well. Preserve the logged flag, since we need - * to pass that through to the logging code. - */ - args->op_flags = XFS_DA_OP_OKNOENT | - (args->op_flags & XFS_DA_OP_LOGGED); + ASSERT(!args->trans); switch (op) { case XFS_ATTRUPDATE_UPSERT: @@ -1076,6 +1066,7 @@ xfs_attr_set( error = xfs_trans_commit(args->trans); out_unlock: xfs_iunlock(dp, XFS_ILOCK_EXCL); + args->trans = NULL; return error; out_trans_cancel: diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 8b89c112c492..67c0ec0d1dbb 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -558,6 +558,9 @@ xrep_xattr_insert_rec( .namelen = key->namelen, .valuelen = key->valuelen, .owner = rx->sc->ip->i_ino, + .geo = rx->sc->mp->m_attr_geo, + .whichfork = XFS_ATTR_FORK, + .op_flags = XFS_DA_OP_OKNOENT, }; struct xchk_xattr_buf *ab = rx->sc->buf; int error; @@ -602,6 +605,7 @@ xrep_xattr_insert_rec( * xfs_attr_set creates and commits its own transaction. If the attr * already exists, we'll just drop it during the rebuild. */ + xfs_attr_sethash(&args); error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE); if (error == -EEXIST) error = 0; diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index b43f7081b0f4..bbdbe9026658 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -17,6 +17,7 @@ #include "xfs_acl.h" #include "xfs_log.h" #include "xfs_xattr.h" +#include "xfs_quota.h" #include @@ -70,7 +71,9 @@ xfs_attr_want_log_assist( /* * Set or remove an xattr, having grabbed the appropriate logging resources - * prior to calling libxfs. + * prior to calling libxfs. Callers of this function are only required to + * initialize the inode, attr_filter, name, namelen, value, and valuelen fields + * of @args. */ int xfs_attr_change( @@ -80,7 +83,19 @@ xfs_attr_change( struct xfs_mount *mp = args->dp->i_mount; int error; - ASSERT(!(args->op_flags & XFS_DA_OP_LOGGED)); + if (xfs_is_shutdown(mp)) + return -EIO; + + error = xfs_qm_dqattach(args->dp); + if (error) + return error; + + /* + * We have no control over the attribute names that userspace passes us + * to remove, so we have to allow the name lookup prior to attribute + * removal to fail as well. + */ + args->op_flags = XFS_DA_OP_OKNOENT; if (xfs_attr_want_log_assist(mp)) { error = xfs_attr_grab_log_assist(mp); @@ -90,6 +105,11 @@ xfs_attr_change( args->op_flags |= XFS_DA_OP_LOGGED; } + args->owner = args->dp->i_ino; + args->geo = mp->m_attr_geo; + args->whichfork = XFS_ATTR_FORK; + xfs_attr_sethash(args); + return xfs_attr_set(args, op); } -- cgit v1.2.3-59-g8ed1b From bf61c36a45d4c215994699a7a06a00c58d22e8a2 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:48:07 -0700 Subject: xfs: make the reserved block permission flag explicit in xfs_attr_set Make the use of reserved blocks an explicit parameter to xfs_attr_set. Userspace setting XFS_ATTR_ROOT attrs should continue to be able to use it, but for online repairs we can back out and therefore do not care. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 6 +++--- fs/xfs/libxfs/xfs_attr.h | 2 +- fs/xfs/scrub/attr_repair.c | 2 +- fs/xfs/xfs_xattr.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index df8418671c37..c98145596f02 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -952,7 +952,7 @@ xfs_attr_lookup( * Make a change to the xattr structure. * * The caller must have initialized @args, attached dquots, and must not hold - * any ILOCKs. + * any ILOCKs. Reserved data blocks may be used if @rsvd is set. * * Returns -EEXIST for XFS_ATTRUPDATE_CREATE if the name already exists. * Returns -ENOATTR for XFS_ATTRUPDATE_REMOVE if the name does not exist. @@ -961,12 +961,12 @@ xfs_attr_lookup( int xfs_attr_set( struct xfs_da_args *args, - enum xfs_attr_update op) + enum xfs_attr_update op, + bool rsvd) { struct xfs_inode *dp = args->dp; struct xfs_mount *mp = dp->i_mount; struct xfs_trans_res tres; - bool rsvd = (args->attr_filter & XFS_ATTR_ROOT); int error, local; int rmt_blks = 0; unsigned int total; diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index d12583dd7eec..43dee4cbaab2 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -558,7 +558,7 @@ enum xfs_attr_update { XFS_ATTRUPDATE_REPLACE, /* set value, fail if attr does not exist */ }; -int xfs_attr_set(struct xfs_da_args *args, enum xfs_attr_update op); +int xfs_attr_set(struct xfs_da_args *args, enum xfs_attr_update op, bool rsvd); int xfs_attr_set_iter(struct xfs_attr_intent *attr); int xfs_attr_remove_iter(struct xfs_attr_intent *attr); bool xfs_attr_check_namespace(unsigned int attr_flags); diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 67c0ec0d1dbb..cbcc446d5119 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -606,7 +606,7 @@ xrep_xattr_insert_rec( * already exists, we'll just drop it during the rebuild. */ xfs_attr_sethash(&args); - error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE); + error = xfs_attr_set(&args, XFS_ATTRUPDATE_CREATE, false); if (error == -EEXIST) error = 0; diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index bbdbe9026658..ab3d22f662f2 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -110,7 +110,7 @@ xfs_attr_change( args->whichfork = XFS_ATTR_FORK; xfs_attr_sethash(args); - return xfs_attr_set(args, op); + return xfs_attr_set(args, op, args->attr_filter & XFS_ATTR_ROOT); } -- cgit v1.2.3-59-g8ed1b From 55edcd1f86474f973fccf5c5ccc8bc7908893142 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:48:15 -0700 Subject: xfs: split xfs_bmap_add_attrfork into two pieces Split this function into two pieces -- one to make the actual changes to the inode core to add the attr fork, and another one to deal with getting the transaction and locking the inodes. The next couple of patches will need this to be split into two. One patch implements committing new parent pointer recordsets to damaged files. If one file has an attr fork and the other does not, we have to create the missing attr fork before the atomic swap transaction, and can use the behavior encoded in the current xfs_bmap_add_attrfork. The second patch adapts /lost+found adoptions to handle parent pointers correctly. The adoption process will add a parent pointer to a child that is being moved to /lost+found, but this requires that the attr fork already exists. We don't know if we're actually going to commit the adoption until we've already reserved a transaction and taken the ILOCKs, which means that we must have a way to bypass the start of the current xfs_bmap_add_attrfork. Therefore, create xfs_attr_add_fork as the helper that creates a transaction and takes locks; and make xfs_bmap_add_attrfork the function that updates the inode core and allocates the incore attr fork. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 39 ++++++++++++++++++++++++++++++++++++++- fs/xfs/libxfs/xfs_bmap.c | 36 ++++++++++-------------------------- fs/xfs/libxfs/xfs_bmap.h | 3 ++- 3 files changed, 50 insertions(+), 28 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index c98145596f02..ab6ec2f15d76 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -948,6 +948,43 @@ xfs_attr_lookup( return error; } +STATIC int +xfs_attr_add_fork( + struct xfs_inode *ip, /* incore inode pointer */ + int size, /* space new attribute needs */ + int rsvd) /* xact may use reserved blks */ +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; /* transaction pointer */ + unsigned int blks; /* space reservation */ + int error; /* error return value */ + + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + + blks = XFS_ADDAFORK_SPACE_RES(mp); + + error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0, + rsvd, &tp); + if (error) + return error; + + if (xfs_inode_has_attr_fork(ip)) + goto trans_cancel; + + error = xfs_bmap_add_attrfork(tp, ip, size, rsvd); + if (error) + goto trans_cancel; + + error = xfs_trans_commit(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; + +trans_cancel: + xfs_trans_cancel(tp); + xfs_iunlock(ip, XFS_ILOCK_EXCL); + return error; +} + /* * Make a change to the xattr structure. * @@ -989,7 +1026,7 @@ xfs_attr_set( xfs_attr_sf_entsize_byname(args->namelen, args->valuelen); - error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); + error = xfs_attr_add_fork(dp, sf_size, rsvd); if (error) return error; } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 1f528cf2d906..6053f5e5c71e 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1025,38 +1025,29 @@ xfs_bmap_set_attrforkoff( } /* - * Convert inode from non-attributed to attributed. - * Must not be in a transaction, ip must not be locked. + * Convert inode from non-attributed to attributed. Caller must hold the + * ILOCK_EXCL and the file cannot have an attr fork. */ int /* error code */ xfs_bmap_add_attrfork( - xfs_inode_t *ip, /* incore inode pointer */ + struct xfs_trans *tp, + struct xfs_inode *ip, /* incore inode pointer */ int size, /* space new attribute needs */ int rsvd) /* xact may use reserved blks */ { - xfs_mount_t *mp; /* mount structure */ - xfs_trans_t *tp; /* transaction pointer */ - int blks; /* space reservation */ + struct xfs_mount *mp = tp->t_mountp; int version = 1; /* superblock attr version */ int logflags; /* logging flags */ int error; /* error return value */ - mp = ip->i_mount; + xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); - - blks = XFS_ADDAFORK_SPACE_RES(mp); - - error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_addafork, blks, 0, - rsvd, &tp); - if (error) - return error; - if (xfs_inode_has_attr_fork(ip)) - goto trans_cancel; + ASSERT(!xfs_inode_has_attr_fork(ip)); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); error = xfs_bmap_set_attrforkoff(ip, size, &version); if (error) - goto trans_cancel; + return error; xfs_ifork_init_attr(ip, XFS_DINODE_FMT_EXTENTS, 0); logflags = 0; @@ -1077,7 +1068,7 @@ xfs_bmap_add_attrfork( if (logflags) xfs_trans_log_inode(tp, ip, logflags); if (error) - goto trans_cancel; + return error; if (!xfs_has_attr(mp) || (!xfs_has_attr2(mp) && version == 2)) { bool log_sb = false; @@ -1096,14 +1087,7 @@ xfs_bmap_add_attrfork( xfs_log_sb(tp); } - error = xfs_trans_commit(tp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; - -trans_cancel: - xfs_trans_cancel(tp); - xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; + return 0; } /* diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 32fb2a455c29..e98849eb9bba 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -176,7 +176,8 @@ int xfs_bmap_longest_free_extent(struct xfs_perag *pag, void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp); -int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); +int xfs_bmap_add_attrfork(struct xfs_trans *tp, struct xfs_inode *ip, + int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork); int xfs_bmap_local_to_extents(struct xfs_trans *tp, struct xfs_inode *ip, -- cgit v1.2.3-59-g8ed1b From a26dc21309af68623b82b4e366cbbeb5a85ce65b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 22 Apr 2024 09:48:16 -0700 Subject: xfs: actually rebuild the parent pointer xattrs Once we've assembled all the parent pointers for a file, we need to commit the new dataset atomically to that file. Parent pointer records are embedded in the xattr structure, which means that we must write a new extended attribute structure, again, atomically. Therefore, we must copy the non-parent-pointer attributes from the file being repaired into the temporary file's extended attributes and then call the atomic extent swap mechanism to exchange the blocks. Signed-off-by: Darrick J. Wong Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 2 +- fs/xfs/libxfs/xfs_attr.h | 1 + fs/xfs/scrub/attr_repair.c | 4 +- fs/xfs/scrub/attr_repair.h | 4 + fs/xfs/scrub/findparent.c | 2 + fs/xfs/scrub/parent_repair.c | 709 +++++++++++++++++++++++++++++++++++++++++-- fs/xfs/scrub/trace.h | 2 + 7 files changed, 701 insertions(+), 23 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index ab6ec2f15d76..1c2a27fce08a 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -948,7 +948,7 @@ xfs_attr_lookup( return error; } -STATIC int +int xfs_attr_add_fork( struct xfs_inode *ip, /* incore inode pointer */ int size, /* space new attribute needs */ diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h index 43dee4cbaab2..088cb7b30168 100644 --- a/fs/xfs/libxfs/xfs_attr.h +++ b/fs/xfs/libxfs/xfs_attr.h @@ -648,5 +648,6 @@ int __init xfs_attr_intent_init_cache(void); void xfs_attr_intent_destroy_cache(void); int xfs_attr_sf_totsize(struct xfs_inode *dp); +int xfs_attr_add_fork(struct xfs_inode *ip, int size, int rsvd); #endif /* __XFS_ATTR_H__ */ diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c index 87f6c9cb54eb..e059813b92b7 100644 --- a/fs/xfs/scrub/attr_repair.c +++ b/fs/xfs/scrub/attr_repair.c @@ -1030,7 +1030,7 @@ xrep_xattr_reset_fork( * fork. The caller must ILOCK the tempfile and join it to the transaction. * This function returns with the inode joined to a clean scrub transaction. */ -STATIC int +int xrep_xattr_reset_tempfile_fork( struct xfs_scrub *sc) { @@ -1336,7 +1336,7 @@ xrep_xattr_swap_prep( } /* Exchange the temporary file's attribute fork with the one being repaired. */ -STATIC int +int xrep_xattr_swap( struct xfs_scrub *sc, struct xrep_tempexch *tx) diff --git a/fs/xfs/scrub/attr_repair.h b/fs/xfs/scrub/attr_repair.h index 0a9ffa7cfa90..979729bd4a5f 100644 --- a/fs/xfs/scrub/attr_repair.h +++ b/fs/xfs/scrub/attr_repair.h @@ -6,6 +6,10 @@ #ifndef __XFS_SCRUB_ATTR_REPAIR_H__ #define __XFS_SCRUB_ATTR_REPAIR_H__ +struct xrep_tempexch; + +int xrep_xattr_swap(struct xfs_scrub *sc, struct xrep_tempexch *tx); int xrep_xattr_reset_fork(struct xfs_scrub *sc); +int xrep_xattr_reset_tempfile_fork(struct xfs_scrub *sc); #endif /* __XFS_SCRUB_ATTR_REPAIR_H__ */ diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c index c78422ad757b..01766041ba2c 100644 --- a/fs/xfs/scrub/findparent.c +++ b/fs/xfs/scrub/findparent.c @@ -24,6 +24,7 @@ #include "xfs_trans_space.h" #include "xfs_health.h" #include "xfs_exchmaps.h" +#include "xfs_parent.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -33,6 +34,7 @@ #include "scrub/findparent.h" #include "scrub/readdir.h" #include "scrub/tempfile.h" +#include "scrub/listxattr.h" /* * Finding the Parent of a Directory diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index 311bc7990d7c..28e9746c0663 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -25,6 +25,8 @@ #include "xfs_health.h" #include "xfs_exchmaps.h" #include "xfs_parent.h" +#include "xfs_attr.h" +#include "xfs_bmap.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -34,10 +36,13 @@ #include "scrub/findparent.h" #include "scrub/readdir.h" #include "scrub/tempfile.h" +#include "scrub/tempexch.h" #include "scrub/orphanage.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" #include "scrub/xfblob.h" +#include "scrub/attr_repair.h" +#include "scrub/listxattr.h" /* * Repairing The Directory Parent Pointer @@ -65,9 +70,9 @@ * * When salvaging completes, the remaining stashed entries are replayed to the * temporary file. All non-parent pointer extended attributes are copied to - * the temporary file's extended attributes. An atomic extent swap is used to - * commit the new directory blocks to the directory being repaired. This will - * disrupt attrmulti cursors. + * the temporary file's extended attributes. An atomic file mapping exchange + * is used to commit the new xattr blocks to the file being repaired. This + * will disrupt attrmulti cursors. */ /* Create a parent pointer in the tempfile. */ @@ -106,6 +111,23 @@ struct xrep_parent { /* Blobs containing parent pointer names. */ struct xfblob *pptr_names; + /* xattr keys */ + struct xfarray *xattr_records; + + /* xattr values */ + struct xfblob *xattr_blobs; + + /* Scratch buffers for saving extended attributes */ + unsigned char *xattr_name; + void *xattr_value; + unsigned int xattr_value_sz; + + /* + * Information used to exchange the attr fork mappings, if the fs + * supports parent pointers. + */ + struct xrep_tempexch tx; + /* * Information used to scan the filesystem to find the inumber of the * dotdot entry for this directory. On filesystems without parent @@ -117,6 +139,8 @@ struct xrep_parent { * @pscan.lock coordinates access to pptr_recs, pptr_names, pptr, and * pptr_scratch. This reduces the memory requirements of this * structure. + * + * The lock also controls access to xattr_records and xattr_blobs(?) */ struct xrep_parent_scan_info pscan; @@ -129,14 +153,48 @@ struct xrep_parent { /* Scratch buffer for scanning pptr xattrs */ struct xfs_da_args pptr_args; + + /* Have we seen any live updates of parent pointers recently? */ + bool saw_pptr_updates; }; +struct xrep_parent_xattr { + /* Cookie for retrieval of the xattr name. */ + xfblob_cookie name_cookie; + + /* Cookie for retrieval of the xattr value. */ + xfblob_cookie value_cookie; + + /* XFS_ATTR_* flags */ + int flags; + + /* Length of the value and name. */ + uint32_t valuelen; + uint16_t namelen; +}; + +/* + * Stash up to 8 pages of attrs in xattr_records/xattr_blobs before we write + * them to the temp file. + */ +#define XREP_PARENT_XATTR_MAX_STASH_BYTES (PAGE_SIZE * 8) + /* Tear down all the incore stuff we created. */ static void xrep_parent_teardown( struct xrep_parent *rp) { xrep_findparent_scan_teardown(&rp->pscan); + kvfree(rp->xattr_name); + rp->xattr_name = NULL; + kvfree(rp->xattr_value); + rp->xattr_value = NULL; + if (rp->xattr_blobs) + xfblob_destroy(rp->xattr_blobs); + rp->xattr_blobs = NULL; + if (rp->xattr_records) + xfarray_destroy(rp->xattr_records); + rp->xattr_records = NULL; if (rp->pptr_names) xfblob_destroy(rp->pptr_names); rp->pptr_names = NULL; @@ -556,10 +614,11 @@ xrep_parent_scan_dirtree( } /* - * Cancel the empty transaction so that we can (later) use the atomic - * extent swap helpers to lock files and commit the new directory. + * Retake sc->ip's ILOCK now that we're done flushing stashed parent + * pointers. We end this function with an empty transaction and the + * ILOCK. */ - xchk_trans_cancel(rp->sc); + xchk_ilock(rp->sc, XFS_ILOCK_EXCL); return 0; } @@ -594,6 +653,8 @@ xrep_parent_live_update( else error = xrep_parent_stash_parentremove(rp, p->name, p->dp); + if (!error) + rp->saw_pptr_updates = true; mutex_unlock(&rp->pscan.lock); if (error) goto out_abort; @@ -648,6 +709,55 @@ xrep_parent_reset_dotdot( return xfs_trans_roll(&sc->tp); } +/* Pass back the parent inumber if this a parent pointer */ +STATIC int +xrep_parent_lookup_pptr( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) +{ + xfs_ino_t *inop = priv; + xfs_ino_t parent_ino; + int error; + + if (!(attr_flags & XFS_ATTR_PARENT)) + return 0; + + error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value, + valuelen, &parent_ino, NULL); + if (error) + return error; + + *inop = parent_ino; + return -ECANCELED; +} + +/* + * Find the first parent of the scrub target by walking parent pointers for + * the purpose of deciding if we're going to move it to the orphanage. + * We don't care if the attr fork is zapped. + */ +STATIC int +xrep_parent_lookup_pptrs( + struct xfs_scrub *sc, + xfs_ino_t *inop) +{ + int error; + + *inop = NULLFSINO; + + error = xchk_xattr_walk(sc, sc->ip, xrep_parent_lookup_pptr, NULL, + inop); + if (error && error != -ECANCELED) + return error; + return 0; +} + /* * Move the current file to the orphanage. * @@ -664,14 +774,26 @@ xrep_parent_move_to_orphanage( xfs_ino_t orig_parent, new_parent; int error; - /* - * We are about to drop the ILOCK on sc->ip to lock the orphanage and - * prepare for the adoption. Therefore, look up the old dotdot entry - * for sc->ip so that we can compare it after we re-lock sc->ip. - */ - error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &orig_parent); - if (error) - return error; + if (S_ISDIR(VFS_I(sc->ip)->i_mode)) { + /* + * We are about to drop the ILOCK on sc->ip to lock the + * orphanage and prepare for the adoption. Therefore, look up + * the old dotdot entry for sc->ip so that we can compare it + * after we re-lock sc->ip. + */ + error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, + &orig_parent); + if (error) + return error; + } else { + /* + * We haven't dropped the ILOCK since we committed the new + * xattr structure (and hence the new parent pointer records), + * which means that the file cannot have been moved in the + * directory tree, and there are no parents. + */ + orig_parent = NULLFSINO; + } /* * Drop the ILOCK on the scrub target and commit the transaction. @@ -704,9 +826,14 @@ xrep_parent_move_to_orphanage( * Now that we've reacquired the ILOCK on sc->ip, look up the dotdot * entry again. If the parent changed or the child was unlinked while * the child directory was unlocked, we don't need to move the child to - * the orphanage after all. + * the orphanage after all. For a non-directory, we have to scan for + * the first parent pointer to see if one has been added. */ - error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, &new_parent); + if (S_ISDIR(VFS_I(sc->ip)->i_mode)) + error = xchk_dir_lookup(sc, sc->ip, &xfs_name_dotdot, + &new_parent); + else + error = xrep_parent_lookup_pptrs(sc, &new_parent); if (error) return error; @@ -733,6 +860,488 @@ xrep_parent_move_to_orphanage( return 0; } +/* Ensure that the xattr value buffer is large enough. */ +STATIC int +xrep_parent_alloc_xattr_value( + struct xrep_parent *rp, + size_t bufsize) +{ + void *new_val; + + if (rp->xattr_value_sz >= bufsize) + return 0; + + if (rp->xattr_value) { + kvfree(rp->xattr_value); + rp->xattr_value = NULL; + rp->xattr_value_sz = 0; + } + + new_val = kvmalloc(bufsize, XCHK_GFP_FLAGS); + if (!new_val) + return -ENOMEM; + + rp->xattr_value = new_val; + rp->xattr_value_sz = bufsize; + return 0; +} + +/* Retrieve the (remote) value of a non-pptr xattr. */ +STATIC int +xrep_parent_fetch_xattr_remote( + struct xrep_parent *rp, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + unsigned int valuelen) +{ + struct xfs_scrub *sc = rp->sc; + struct xfs_da_args args = { + .attr_filter = attr_flags & XFS_ATTR_NSP_ONDISK_MASK, + .geo = sc->mp->m_attr_geo, + .whichfork = XFS_ATTR_FORK, + .dp = ip, + .name = name, + .namelen = namelen, + .trans = sc->tp, + .valuelen = valuelen, + .owner = ip->i_ino, + }; + int error; + + /* + * If we need a larger value buffer, try to allocate one. If that + * fails, return with -EDEADLOCK to try harder. + */ + error = xrep_parent_alloc_xattr_value(rp, valuelen); + if (error == -ENOMEM) + return -EDEADLOCK; + if (error) + return error; + + args.value = rp->xattr_value; + xfs_attr_sethash(&args); + return xfs_attr_get_ilocked(&args); +} + +/* Stash non-pptr attributes for later replay into the temporary file. */ +STATIC int +xrep_parent_stash_xattr( + struct xfs_scrub *sc, + struct xfs_inode *ip, + unsigned int attr_flags, + const unsigned char *name, + unsigned int namelen, + const void *value, + unsigned int valuelen, + void *priv) +{ + struct xrep_parent_xattr key = { + .valuelen = valuelen, + .namelen = namelen, + .flags = attr_flags & XFS_ATTR_NSP_ONDISK_MASK, + }; + struct xrep_parent *rp = priv; + int error; + + if (attr_flags & (XFS_ATTR_INCOMPLETE | XFS_ATTR_PARENT)) + return 0; + + if (!value) { + error = xrep_parent_fetch_xattr_remote(rp, ip, attr_flags, + name, namelen, valuelen); + if (error) + return error; + + value = rp->xattr_value; + } + + trace_xrep_parent_stash_xattr(rp->sc->tempip, key.flags, (void *)name, + key.namelen, key.valuelen); + + error = xfblob_store(rp->xattr_blobs, &key.name_cookie, name, + key.namelen); + if (error) + return error; + + error = xfblob_store(rp->xattr_blobs, &key.value_cookie, value, + key.valuelen); + if (error) + return error; + + return xfarray_append(rp->xattr_records, &key); +} + +/* Insert one xattr key/value. */ +STATIC int +xrep_parent_insert_xattr( + struct xrep_parent *rp, + const struct xrep_parent_xattr *key) +{ + struct xfs_da_args args = { + .dp = rp->sc->tempip, + .attr_filter = key->flags, + .namelen = key->namelen, + .valuelen = key->valuelen, + .owner = rp->sc->ip->i_ino, + .geo = rp->sc->mp->m_attr_geo, + .whichfork = XFS_ATTR_FORK, + .op_flags = XFS_DA_OP_OKNOENT, + }; + int error; + + ASSERT(!(key->flags & XFS_ATTR_PARENT)); + + /* + * Grab pointers to the scrub buffer so that we can use them to insert + * attrs into the temp file. + */ + args.name = rp->xattr_name; + args.value = rp->xattr_value; + + /* + * The attribute name is stored near the end of the in-core buffer, + * though we reserve one more byte to ensure null termination. + */ + rp->xattr_name[XATTR_NAME_MAX] = 0; + + error = xfblob_load(rp->xattr_blobs, key->name_cookie, rp->xattr_name, + key->namelen); + if (error) + return error; + + error = xfblob_free(rp->xattr_blobs, key->name_cookie); + if (error) + return error; + + error = xfblob_load(rp->xattr_blobs, key->value_cookie, args.value, + key->valuelen); + if (error) + return error; + + error = xfblob_free(rp->xattr_blobs, key->value_cookie); + if (error) + return error; + + rp->xattr_name[key->namelen] = 0; + + trace_xrep_parent_insert_xattr(rp->sc->tempip, key->flags, + rp->xattr_name, key->namelen, key->valuelen); + + xfs_attr_sethash(&args); + return xfs_attr_set(&args, XFS_ATTRUPDATE_UPSERT, false); +} + +/* + * Periodically flush salvaged attributes to the temporary file. This is done + * to reduce the memory requirements of the xattr rebuild because files can + * contain millions of attributes. + */ +STATIC int +xrep_parent_flush_xattrs( + struct xrep_parent *rp) +{ + xfarray_idx_t array_cur; + int error; + + /* + * Entering this function, the scrub context has a reference to the + * inode being repaired, the temporary file, and the empty scrub + * transaction that we created for the xattr scan. We hold ILOCK_EXCL + * on the inode being repaired. + * + * To constrain kernel memory use, we occasionally flush salvaged + * xattrs from the xfarray and xfblob structures into the temporary + * file in preparation for exchanging the xattr structures at the end. + * Updating the temporary file requires a transaction, so we commit the + * scrub transaction and drop the ILOCK so that xfs_attr_set can + * allocate whatever transaction it wants. + * + * We still hold IOLOCK_EXCL on the inode being repaired, which + * prevents anyone from adding xattrs (or parent pointers) while we're + * flushing. + */ + xchk_trans_cancel(rp->sc); + xchk_iunlock(rp->sc, XFS_ILOCK_EXCL); + + /* + * Take the IOLOCK of the temporary file while we modify xattrs. This + * isn't strictly required because the temporary file is never revealed + * to userspace, but we follow the same locking rules. We still hold + * sc->ip's IOLOCK. + */ + error = xrep_tempfile_iolock_polled(rp->sc); + if (error) + return error; + + /* Add all the salvaged attrs to the temporary file. */ + foreach_xfarray_idx(rp->xattr_records, array_cur) { + struct xrep_parent_xattr key; + + error = xfarray_load(rp->xattr_records, array_cur, &key); + if (error) + return error; + + error = xrep_parent_insert_xattr(rp, &key); + if (error) + return error; + } + + /* Empty out both arrays now that we've added the entries. */ + xfarray_truncate(rp->xattr_records); + xfblob_truncate(rp->xattr_blobs); + + xrep_tempfile_iounlock(rp->sc); + + /* Recreate the empty transaction and relock the inode. */ + error = xchk_trans_alloc_empty(rp->sc); + if (error) + return error; + xchk_ilock(rp->sc, XFS_ILOCK_EXCL); + return 0; +} + +/* Decide if we've stashed too much xattr data in memory. */ +static inline bool +xrep_parent_want_flush_xattrs( + struct xrep_parent *rp) +{ + unsigned long long bytes; + + bytes = xfarray_bytes(rp->xattr_records) + + xfblob_bytes(rp->xattr_blobs); + return bytes > XREP_PARENT_XATTR_MAX_STASH_BYTES; +} + +/* Flush staged attributes to the temporary file if we're over the limit. */ +STATIC int +xrep_parent_try_flush_xattrs( + struct xfs_scrub *sc, + void *priv) +{ + struct xrep_parent *rp = priv; + int error; + + if (!xrep_parent_want_flush_xattrs(rp)) + return 0; + + error = xrep_parent_flush_xattrs(rp); + if (error) + return error; + + /* + * If there were any parent pointer updates to the xattr structure + * while we dropped the ILOCK, the xattr structure is now stale. + * Signal to the attr copy process that we need to start over, but + * this time without opportunistic attr flushing. + * + * This is unlikely to happen, so we're ok with restarting the copy. + */ + mutex_lock(&rp->pscan.lock); + if (rp->saw_pptr_updates) + error = -ESTALE; + mutex_unlock(&rp->pscan.lock); + return error; +} + +/* Copy all the non-pptr extended attributes into the temporary file. */ +STATIC int +xrep_parent_copy_xattrs( + struct xrep_parent *rp) +{ + struct xfs_scrub *sc = rp->sc; + int error; + + /* + * Clear the pptr updates flag. We hold sc->ip ILOCKed, so there + * can't be any parent pointer updates in progress. + */ + mutex_lock(&rp->pscan.lock); + rp->saw_pptr_updates = false; + mutex_unlock(&rp->pscan.lock); + + /* Copy xattrs, stopping periodically to flush the incore buffers. */ + error = xchk_xattr_walk(sc, sc->ip, xrep_parent_stash_xattr, + xrep_parent_try_flush_xattrs, rp); + if (error && error != -ESTALE) + return error; + + if (error == -ESTALE) { + /* + * The xattr copy collided with a parent pointer update. + * Restart the copy, but this time hold the ILOCK all the way + * to the end to lock out any directory parent pointer updates. + */ + error = xchk_xattr_walk(sc, sc->ip, xrep_parent_stash_xattr, + NULL, rp); + if (error) + return error; + } + + /* Flush any remaining stashed xattrs to the temporary file. */ + if (xfarray_bytes(rp->xattr_records) == 0) + return 0; + + return xrep_parent_flush_xattrs(rp); +} + +/* + * Ensure that @sc->ip and @sc->tempip both have attribute forks before we head + * into the attr fork exchange transaction. All files on a filesystem with + * parent pointers must have an attr fork because the parent pointer code does + * not itself add attribute forks. + * + * Note: Unlinkable unlinked files don't need one, but the overhead of having + * an unnecessary attr fork is not justified by the additional code complexity + * that would be needed to track that state correctly. + */ +STATIC int +xrep_parent_ensure_attr_fork( + struct xrep_parent *rp) +{ + struct xfs_scrub *sc = rp->sc; + int error; + + error = xfs_attr_add_fork(sc->tempip, + sizeof(struct xfs_attr_sf_hdr), 1); + if (error) + return error; + return xfs_attr_add_fork(sc->ip, sizeof(struct xfs_attr_sf_hdr), 1); +} + +/* + * Finish replaying stashed parent pointer updates, allocate a transaction for + * exchanging extent mappings, and take the ILOCKs of both files before we + * commit the new attribute structure. + */ +STATIC int +xrep_parent_finalize_tempfile( + struct xrep_parent *rp) +{ + struct xfs_scrub *sc = rp->sc; + int error; + + /* + * Repair relies on the ILOCK to quiesce all possible xattr updates. + * Replay all queued parent pointer updates into the tempfile before + * exchanging the contents, even if that means dropping the ILOCKs and + * the transaction. + */ + do { + error = xrep_parent_replay_updates(rp); + if (error) + return error; + + error = xrep_parent_ensure_attr_fork(rp); + if (error) + return error; + + error = xrep_tempexch_trans_alloc(sc, XFS_ATTR_FORK, &rp->tx); + if (error) + return error; + + if (xfarray_length(rp->pptr_recs) == 0) + break; + + xchk_trans_cancel(sc); + xrep_tempfile_iunlock_both(sc); + } while (!xchk_should_terminate(sc, &error)); + return error; +} + +/* + * Replay all the stashed parent pointers into the temporary file, copy all + * the non-pptr xattrs from the file being repaired into the temporary file, + * and exchange the attr fork contents atomically. + */ +STATIC int +xrep_parent_rebuild_pptrs( + struct xrep_parent *rp) +{ + struct xfs_scrub *sc = rp->sc; + xfs_ino_t parent_ino = NULLFSINO; + int error; + + /* + * Copy non-ppttr xattrs from the file being repaired into the + * temporary file's xattr structure. We hold sc->ip's IOLOCK, which + * prevents setxattr/removexattr calls from occurring, but renames + * update the parent pointers without holding IOLOCK. If we detect + * stale attr structures, we restart the scan but only flush at the + * end. + */ + error = xrep_parent_copy_xattrs(rp); + if (error) + return error; + + /* + * Cancel the empty transaction that we used to walk and copy attrs, + * and drop the ILOCK so that we can take the IOLOCK on the temporary + * file. We still hold sc->ip's IOLOCK. + */ + xchk_trans_cancel(sc); + xchk_iunlock(sc, XFS_ILOCK_EXCL); + + error = xrep_tempfile_iolock_polled(sc); + if (error) + return error; + + /* + * Allocate transaction, lock inodes, and make sure that we've replayed + * all the stashed pptr updates to the tempdir. After this point, + * we're ready to exchange the attr fork mappings. + */ + error = xrep_parent_finalize_tempfile(rp); + if (error) + return error; + + /* Last chance to abort before we start committing pptr fixes. */ + if (xchk_should_terminate(sc, &error)) + return error; + + if (xchk_iscan_aborted(&rp->pscan.iscan)) + return -ECANCELED; + + /* + * Exchange the attr fork contents and junk the old attr fork contents, + * which are now in the tempfile. + */ + error = xrep_xattr_swap(sc, &rp->tx); + if (error) + return error; + error = xrep_xattr_reset_tempfile_fork(sc); + if (error) + return error; + + /* + * Roll to get a transaction without any inodes joined to it. Then we + * can drop the tempfile's ILOCK and IOLOCK before doing more work on + * the scrub target file. + */ + error = xfs_trans_roll(&sc->tp); + if (error) + return error; + xrep_tempfile_iunlock(sc); + xrep_tempfile_iounlock(sc); + + /* + * We've committed the new parent pointers. Find at least one parent + * so that we can decide if we're moving this file to the orphanage. + * For this purpose, root directories are their own parents. + */ + if (sc->ip == sc->mp->m_rootip) { + xrep_findparent_scan_found(&rp->pscan, sc->ip->i_ino); + } else { + error = xrep_parent_lookup_pptrs(sc, &parent_ino); + if (error) + return error; + if (parent_ino != NULLFSINO) + xrep_findparent_scan_found(&rp->pscan, parent_ino); + } + return 0; +} + /* * Commit the new parent pointer structure (currently only the dotdot entry) to * the file that we're repairing. @@ -741,13 +1350,24 @@ STATIC int xrep_parent_rebuild_tree( struct xrep_parent *rp) { + int error; + + if (xfs_has_parent(rp->sc->mp)) { + error = xrep_parent_rebuild_pptrs(rp); + if (error) + return error; + } + if (rp->pscan.parent_ino == NULLFSINO) { if (xrep_orphanage_can_adopt(rp->sc)) return xrep_parent_move_to_orphanage(rp); return -EFSCORRUPTED; } - return xrep_parent_reset_dotdot(rp); + if (S_ISDIR(VFS_I(rp->sc->ip)->i_mode)) + return xrep_parent_reset_dotdot(rp); + + return 0; } /* Set up the filesystem scan so we can look for parents. */ @@ -757,18 +1377,39 @@ xrep_parent_setup_scan( { struct xfs_scrub *sc = rp->sc; char *descr; + struct xfs_da_geometry *geo = sc->mp->m_attr_geo; + int max_len; int error; if (!xfs_has_parent(sc->mp)) return xrep_findparent_scan_start(sc, &rp->pscan); + /* Buffers for copying non-pptr attrs to the tempfile */ + rp->xattr_name = kvmalloc(XATTR_NAME_MAX + 1, XCHK_GFP_FLAGS); + if (!rp->xattr_name) + return -ENOMEM; + + /* + * Allocate enough memory to handle loading local attr values from the + * xfblob data while flushing stashed attrs to the temporary file. + * We only realloc the buffer when salvaging remote attr values, so + * TRY_HARDER means we allocate the maximal attr value size. + */ + if (sc->flags & XCHK_TRY_HARDER) + max_len = XATTR_SIZE_MAX; + else + max_len = xfs_attr_leaf_entsize_local_max(geo->blksize); + error = xrep_parent_alloc_xattr_value(rp, max_len); + if (error) + goto out_xattr_name; + /* Set up some staging memory for logging parent pointer updates. */ descr = xchk_xfile_ino_descr(sc, "parent pointer entries"); error = xfarray_create(descr, 0, sizeof(struct xrep_pptr), &rp->pptr_recs); kfree(descr); if (error) - return error; + goto out_xattr_value; descr = xchk_xfile_ino_descr(sc, "parent pointer names"); error = xfblob_create(descr, &rp->pptr_names); @@ -776,19 +1417,47 @@ xrep_parent_setup_scan( if (error) goto out_recs; + /* Set up some storage for copying attrs before the mapping exchange */ + descr = xchk_xfile_ino_descr(sc, + "parent pointer retained xattr entries"); + error = xfarray_create(descr, 0, sizeof(struct xrep_parent_xattr), + &rp->xattr_records); + kfree(descr); + if (error) + goto out_names; + + descr = xchk_xfile_ino_descr(sc, + "parent pointer retained xattr values"); + error = xfblob_create(descr, &rp->xattr_blobs); + kfree(descr); + if (error) + goto out_attr_keys; + error = __xrep_findparent_scan_start(sc, &rp->pscan, xrep_parent_live_update); if (error) - goto out_names; + goto out_attr_values; return 0; +out_attr_values: + xfblob_destroy(rp->xattr_blobs); + rp->xattr_blobs = NULL; +out_attr_keys: + xfarray_destroy(rp->xattr_records); + rp->xattr_records = NULL; out_names: xfblob_destroy(rp->pptr_names); rp->pptr_names = NULL; out_recs: xfarray_destroy(rp->pptr_recs); rp->pptr_recs = NULL; +out_xattr_value: + kvfree(rp->xattr_value); + rp->xattr_value = NULL; +out_xattr_name: + kvfree(rp->xattr_name); + rp->xattr_name = NULL; return error; } @@ -818,7 +1487,7 @@ xrep_parent( if (error) goto out_teardown; - /* Last chance to abort before we start committing fixes. */ + /* Last chance to abort before we start committing dotdot fixes. */ if (xchk_should_terminate(sc, &error)) goto out_teardown; diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 3e0cd482379c..ecfaa4b88910 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -2539,6 +2539,8 @@ DEFINE_EVENT(xrep_xattr_salvage_class, name, \ TP_ARGS(ip, flags, name, namelen, valuelen)) DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_salvage_rec); DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_xattr_insert_rec); +DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_parent_stash_xattr); +DEFINE_XREP_XATTR_SALVAGE_EVENT(xrep_parent_insert_xattr); DECLARE_EVENT_CLASS(xrep_pptr_salvage_class, TP_PROTO(struct xfs_inode *ip, unsigned int flags, const void *name, -- cgit v1.2.3-59-g8ed1b From 204a26aa1d5a891154c9275fe4022e28793ca112 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 2 May 2024 07:48:36 -0700 Subject: xfs: create a helper to compute the blockcount of a max sized remote value Create a helper function to compute the number of fsblocks needed to store a maximally-sized extended attribute value. Signed-off-by: Darrick J. Wong Reviewed-by: Andrey Albershteyn Reviewed-by: Christoph Hellwig --- fs/xfs/libxfs/xfs_attr.c | 2 +- fs/xfs/libxfs/xfs_attr_remote.h | 6 ++++++ fs/xfs/scrub/reap.c | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 1c2a27fce08a..eb274d4d636d 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1036,7 +1036,7 @@ xfs_attr_set( break; case XFS_ATTRUPDATE_REMOVE: XFS_STATS_INC(mp, xs_attr_remove); - rmt_blks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX); + rmt_blks = xfs_attr3_max_rmt_blocks(mp); break; } diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h index c64b04f91caf..e3c6c7d774bf 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.h +++ b/fs/xfs/libxfs/xfs_attr_remote.h @@ -8,6 +8,12 @@ unsigned int xfs_attr3_rmt_blocks(struct xfs_mount *mp, unsigned int attrlen); +/* Number of rmt blocks needed to store the maximally sized attr value */ +static inline unsigned int xfs_attr3_max_rmt_blocks(struct xfs_mount *mp) +{ + return xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX); +} + int xfs_attr_rmtval_get(struct xfs_da_args *args); int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map, xfs_buf_flags_t incore_flags); diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c index 01ceaa4efa16..be283153c254 100644 --- a/fs/xfs/scrub/reap.c +++ b/fs/xfs/scrub/reap.c @@ -223,7 +223,7 @@ xrep_bufscan_max_sectors( int max_fsbs; /* Remote xattr values are the largest buffers that we support. */ - max_fsbs = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX); + max_fsbs = xfs_attr3_max_rmt_blocks(mp); return XFS_FSB_TO_BB(mp, min_t(xfs_extlen_t, fsblocks, max_fsbs)); } @@ -806,7 +806,7 @@ xreap_bmapi_binval( * of the next hole. */ off = imap->br_startoff + imap->br_blockcount; - max_off = off + xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX); + max_off = off + xfs_attr3_max_rmt_blocks(mp); while (off < max_off) { struct xfs_bmbt_irec hmap; int nhmaps = 1; -- cgit v1.2.3-59-g8ed1b From 25576c5420e61dea4c2b52942460f2221b8e46e8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 May 2024 09:33:55 +0200 Subject: xfs: simplify iext overflow checking and upgrade Currently the calls to xfs_iext_count_may_overflow and xfs_iext_count_upgrade are always paired. Merge them into a single function to simplify the callers and the actual check and upgrade logic itself. Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Reviewed-by: Dave Chinner Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_attr.c | 5 +--- fs/xfs/libxfs/xfs_bmap.c | 5 +--- fs/xfs/libxfs/xfs_inode_fork.c | 57 ++++++++++++++++++------------------------ fs/xfs/libxfs/xfs_inode_fork.h | 6 ++--- fs/xfs/xfs_bmap_item.c | 4 +-- fs/xfs/xfs_bmap_util.c | 23 ++++------------- fs/xfs/xfs_dquot.c | 5 +--- fs/xfs/xfs_iomap.c | 9 ++----- fs/xfs/xfs_reflink.c | 9 ++----- fs/xfs/xfs_rtalloc.c | 5 +--- 10 files changed, 41 insertions(+), 87 deletions(-) (limited to 'fs/xfs/libxfs/xfs_attr.c') diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index eb274d4d636d..430cd3244c14 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1050,11 +1050,8 @@ xfs_attr_set( return error; if (op != XFS_ATTRUPDATE_REMOVE || xfs_inode_hasattr(dp)) { - error = xfs_iext_count_may_overflow(dp, XFS_ATTR_FORK, + error = xfs_iext_count_extend(args->trans, dp, XFS_ATTR_FORK, XFS_IEXT_ATTR_MANIP_CNT(rmt_blks)); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(args->trans, dp, - XFS_IEXT_ATTR_MANIP_CNT(rmt_blks)); if (error) goto out_trans_cancel; } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0633f285875d..3b3206d312d6 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4651,11 +4651,8 @@ xfs_bmapi_convert_one_delalloc( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - error = xfs_iext_count_may_overflow(ip, whichfork, + error = xfs_iext_count_extend(tp, ip, whichfork, XFS_IEXT_ADD_NOSPLIT_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_ADD_NOSPLIT_CNT); if (error) goto out_trans_cancel; diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 7d660a973909..9d11ae015909 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -765,53 +765,46 @@ xfs_ifork_verify_local_attr( return 0; } +/* + * Check if the inode fork supports adding nr_to_add more extents. + * + * If it doesn't but we can upgrade it to large extent counters, do the upgrade. + * If we can't upgrade or are already using big counters but still can't fit the + * additional extents, return -EFBIG. + */ int -xfs_iext_count_may_overflow( +xfs_iext_count_extend( + struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, - int nr_to_add) + uint nr_to_add) { + struct xfs_mount *mp = ip->i_mount; + bool has_large = + xfs_inode_has_large_extent_counts(ip); struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); - uint64_t max_exts; uint64_t nr_exts; + ASSERT(nr_to_add <= XFS_MAX_EXTCNT_UPGRADE_NR); + if (whichfork == XFS_COW_FORK) return 0; - max_exts = xfs_iext_max_nextents(xfs_inode_has_large_extent_counts(ip), - whichfork); - - if (XFS_TEST_ERROR(false, ip->i_mount, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) - max_exts = 10; - + /* no point in upgrading if if_nextents overflows */ nr_exts = ifp->if_nextents + nr_to_add; - if (nr_exts < ifp->if_nextents || nr_exts > max_exts) + if (nr_exts < ifp->if_nextents) return -EFBIG; - return 0; -} - -/* - * Upgrade this inode's extent counter fields to be able to handle a potential - * increase in the extent count by nr_to_add. Normally this is the same - * quantity that caused xfs_iext_count_may_overflow() to return -EFBIG. - */ -int -xfs_iext_count_upgrade( - struct xfs_trans *tp, - struct xfs_inode *ip, - uint nr_to_add) -{ - ASSERT(nr_to_add <= XFS_MAX_EXTCNT_UPGRADE_NR); - - if (!xfs_has_large_extent_counts(ip->i_mount) || - xfs_inode_has_large_extent_counts(ip) || - XFS_TEST_ERROR(false, ip->i_mount, XFS_ERRTAG_REDUCE_MAX_IEXTENTS)) + if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REDUCE_MAX_IEXTENTS) && + nr_exts > 10) return -EFBIG; - ip->i_diflags2 |= XFS_DIFLAG2_NREXT64; - xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - + if (nr_exts > xfs_iext_max_nextents(has_large, whichfork)) { + if (has_large || !xfs_has_large_extent_counts(mp)) + return -EFBIG; + ip->i_diflags2 |= XFS_DIFLAG2_NREXT64; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + } return 0; } diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h index bd53eb951b65..2373d12fd474 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.h +++ b/fs/xfs/libxfs/xfs_inode_fork.h @@ -256,10 +256,8 @@ extern void xfs_ifork_init_cow(struct xfs_inode *ip); int xfs_ifork_verify_local_data(struct xfs_inode *ip); int xfs_ifork_verify_local_attr(struct xfs_inode *ip); -int xfs_iext_count_may_overflow(struct xfs_inode *ip, int whichfork, - int nr_to_add); -int xfs_iext_count_upgrade(struct xfs_trans *tp, struct xfs_inode *ip, - uint nr_to_add); +int xfs_iext_count_extend(struct xfs_trans *tp, struct xfs_inode *ip, + int whichfork, uint nr_to_add); bool xfs_ifork_is_realtime(struct xfs_inode *ip, int whichfork); /* returns true if the fork has extents but they are not read in yet. */ diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index d27859a684aa..a19d62e78aa1 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -524,9 +524,7 @@ xfs_bmap_recover_work( else iext_delta = XFS_IEXT_PUNCH_HOLE_CNT; - error = xfs_iext_count_may_overflow(ip, work->bi_whichfork, iext_delta); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, iext_delta); + error = xfs_iext_count_extend(tp, ip, work->bi_whichfork, iext_delta); if (error) goto err_cancel; diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 14e0eb9b305f..ac2e77ebb54c 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -710,11 +710,8 @@ xfs_alloc_file_space( if (error) break; - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_ADD_NOSPLIT_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_ADD_NOSPLIT_CNT); if (error) goto error; @@ -771,10 +768,8 @@ xfs_unmap_extent( if (error) return error; - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_PUNCH_HOLE_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT); if (error) goto out_trans_cancel; @@ -1050,10 +1045,8 @@ xfs_insert_file_space( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_PUNCH_HOLE_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT); if (error) goto out_trans_cancel; @@ -1279,23 +1272,17 @@ xfs_swap_extent_rmap( trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec); if (xfs_bmap_is_real_extent(&uirec)) { - error = xfs_iext_count_may_overflow(ip, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_SWAP_RMAP_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_SWAP_RMAP_CNT); if (error) goto out; } if (xfs_bmap_is_real_extent(&irec)) { - error = xfs_iext_count_may_overflow(tip, + error = xfs_iext_count_extend(tp, tip, XFS_DATA_FORK, XFS_IEXT_SWAP_RMAP_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_SWAP_RMAP_CNT); if (error) goto out; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 43acb4f0d174..c1b211c260a9 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -341,11 +341,8 @@ xfs_dquot_disk_alloc( goto err_cancel; } - error = xfs_iext_count_may_overflow(quotip, XFS_DATA_FORK, + error = xfs_iext_count_extend(tp, quotip, XFS_DATA_FORK, XFS_IEXT_ADD_NOSPLIT_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, quotip, - XFS_IEXT_ADD_NOSPLIT_CNT); if (error) goto err_cancel; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 3db93b876de3..378342673925 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -299,9 +299,7 @@ xfs_iomap_write_direct( if (error) return error; - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, nr_exts); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, nr_exts); + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, nr_exts); if (error) goto out_trans_cancel; @@ -617,11 +615,8 @@ xfs_iomap_write_unwritten( if (error) return error; - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_WRITE_UNWRITTEN_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_WRITE_UNWRITTEN_CNT); if (error) goto error_on_bmapi_transaction; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 520f5ef3743c..063a2e00d169 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -754,11 +754,8 @@ xfs_reflink_end_cow_extent( del = got; xfs_trim_extent(&del, *offset_fsb, end_fsb - *offset_fsb); - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_REFLINK_END_COW_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_REFLINK_END_COW_CNT); if (error) goto out_cancel; @@ -1258,9 +1255,7 @@ xfs_reflink_remap_extent( if (dmap_written) ++iext_delta; - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, iext_delta); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, iext_delta); + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, iext_delta); if (error) goto out_cancel; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 150f544445ca..5a7ddfed1bb8 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -695,11 +695,8 @@ xfs_growfs_rt_alloc( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); - error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK, + error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK, XFS_IEXT_ADD_NOSPLIT_CNT); - if (error == -EFBIG) - error = xfs_iext_count_upgrade(tp, ip, - XFS_IEXT_ADD_NOSPLIT_CNT); if (error) goto out_trans_cancel; -- cgit v1.2.3-59-g8ed1b