From f2e812c1522dab847912309b00abcc762dd696da Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 19 Mar 2024 09:36:28 +1100 Subject: xfs: don't use current->journal_info syzbot reported an ext4 panic during a page fault where found a journal handle when it didn't expect to find one. The structure it tripped over had a value of 'TRAN' in the first entry in the structure, and that indicates it tripped over a struct xfs_trans instead of a jbd2 handle. The reason for this is that the page fault was taken during a copy-out to a user buffer from an xfs bulkstat operation. XFS uses an "empty" transaction context for bulkstat to do automated metadata buffer cleanup, and so the transaction context is valid across the copyout of the bulkstat info into the user buffer. We are using empty transaction contexts like this in XFS to reduce the risk of failing to release objects we reference during the operation, especially during error handling. Hence we really need to ensure that we can take page faults from these contexts without leaving landmines for the code processing the page fault to trip over. However, this same behaviour could happen from any other filesystem that triggers a page fault or any other exception that is handled on-stack from within a task context that has current->journal_info set. Having a page fault from some other filesystem bounce into XFS where we have to run a transaction isn't a bug at all, but the usage of current->journal_info means that this could result corruption of the outer task's journal_info structure. The problem is purely that we now have two different contexts that now think they own current->journal_info. IOWs, no filesystem can allow page faults or on-stack exceptions while current->journal_info is set by the filesystem because the exception processing might use current->journal_info itself. If we end up with nested XFS transactions whilst holding an empty transaction, then it isn't an issue as the outer transaction does not hold a log reservation. If we ignore the current->journal_info usage, then the only problem that might occur is a deadlock if the exception tries to take the same locks the upper context holds. That, however, is not a problem that setting current->journal_info would solve, so it's largely an irrelevant concern here. IOWs, we really only use current->journal_info for a warning check in xfs_vm_writepages() to ensure we aren't doing writeback from a transaction context. Writeback might need to do allocation, so it can need to run transactions itself. Hence it's a debug check to warn us that we've done something silly, and largely it is not all that useful. So let's just remove all the use of current->journal_info in XFS and get rid of all the potential issues from nested contexts where current->journal_info might get misused by another filesystem context. Reported-by: syzbot+cdee56dbcdf0096ef605@syzkaller.appspotmail.com Signed-off-by: Dave Chinner Reviewed-by: "Darrick J. Wong" Reviewed-by: Mark Tinguely Reviewed-by: Christoph Hellwig Signed-off-by: Chandan Babu R --- fs/xfs/xfs_aops.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/xfs/xfs_aops.c') diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 1698507d1ac7..3f428620ebf2 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -503,13 +503,6 @@ xfs_vm_writepages( { struct xfs_writepage_ctx wpc = { }; - /* - * Writing back data in a transaction context can result in recursive - * transactions. This is bad, so issue a warning and get out of here. - */ - if (WARN_ON_ONCE(current->journal_info)) - return 0; - xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); } -- cgit v1.2.3-59-g8ed1b From 2e08371a83f1c06fd85eea8cd37c87a224cc4cc4 Mon Sep 17 00:00:00 2001 From: Zhang Yi Date: Thu, 25 Apr 2024 21:13:29 +0800 Subject: xfs: make xfs_bmapi_convert_delalloc() to allocate the target offset Since xfs_bmapi_convert_delalloc() only attempts to allocate the entire delalloc extent and require multiple invocations to allocate the target offset. So xfs_convert_blocks() add a loop to do this job and we call it in the write back path, but xfs_convert_blocks() isn't a common helper. Let's do it in xfs_bmapi_convert_delalloc() and drop xfs_convert_blocks(), preparing for the post EOF delalloc blocks converting in the buffered write begin path. Signed-off-by: Zhang Yi Reviewed-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_bmap.c | 34 ++++++++++++++++++++++++++++-- fs/xfs/xfs_aops.c | 54 +++++++++++++----------------------------------- 2 files changed, 46 insertions(+), 42 deletions(-) (limited to 'fs/xfs/xfs_aops.c') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index c9b1dfe63944..49e2fcad593d 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4590,8 +4590,8 @@ error0: * invocations to allocate the target offset if a large enough physical extent * is not available. */ -int -xfs_bmapi_convert_delalloc( +static int +xfs_bmapi_convert_one_delalloc( struct xfs_inode *ip, int whichfork, xfs_off_t offset, @@ -4724,6 +4724,36 @@ out_trans_cancel: return error; } +/* + * Pass in a dellalloc extent and convert it to real extents, return the real + * extent that maps offset_fsb in iomap. + */ +int +xfs_bmapi_convert_delalloc( + struct xfs_inode *ip, + int whichfork, + loff_t offset, + struct iomap *iomap, + unsigned int *seq) +{ + int error; + + /* + * Attempt to allocate whatever delalloc extent currently backs offset + * and put the result into iomap. Allocate in a loop because it may + * take several attempts to allocate real blocks for a contiguous + * delalloc extent if free space is sufficiently fragmented. + */ + do { + error = xfs_bmapi_convert_one_delalloc(ip, whichfork, offset, + iomap, seq); + if (error) + return error; + } while (iomap->offset + iomap->length <= offset); + + return 0; +} + int xfs_bmapi_remap( struct xfs_trans *tp, diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 3f428620ebf2..f7520f375cee 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -233,45 +233,6 @@ xfs_imap_valid( return true; } -/* - * Pass in a dellalloc extent and convert it to real extents, return the real - * extent that maps offset_fsb in wpc->iomap. - * - * The current page is held locked so nothing could have removed the block - * backing offset_fsb, although it could have moved from the COW to the data - * fork by another thread. - */ -static int -xfs_convert_blocks( - struct iomap_writepage_ctx *wpc, - struct xfs_inode *ip, - int whichfork, - loff_t offset) -{ - int error; - unsigned *seq; - - if (whichfork == XFS_COW_FORK) - seq = &XFS_WPC(wpc)->cow_seq; - else - seq = &XFS_WPC(wpc)->data_seq; - - /* - * Attempt to allocate whatever delalloc extent currently backs offset - * and put the result into wpc->iomap. Allocate in a loop because it - * may take several attempts to allocate real blocks for a contiguous - * delalloc extent if free space is sufficiently fragmented. - */ - do { - error = xfs_bmapi_convert_delalloc(ip, whichfork, offset, - &wpc->iomap, seq); - if (error) - return error; - } while (wpc->iomap.offset + wpc->iomap.length <= offset); - - return 0; -} - static int xfs_map_blocks( struct iomap_writepage_ctx *wpc, @@ -290,6 +251,7 @@ xfs_map_blocks( struct xfs_iext_cursor icur; int retries = 0; int error = 0; + unsigned int *seq; if (xfs_is_shutdown(mp)) return -EIO; @@ -387,7 +349,19 @@ retry: trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); return 0; allocate_blocks: - error = xfs_convert_blocks(wpc, ip, whichfork, offset); + /* + * Convert a dellalloc extent to a real one. The current page is held + * locked so nothing could have removed the block backing offset_fsb, + * although it could have moved from the COW to the data fork by another + * thread. + */ + if (whichfork == XFS_COW_FORK) + seq = &XFS_WPC(wpc)->cow_seq; + else + seq = &XFS_WPC(wpc)->data_seq; + + error = xfs_bmapi_convert_delalloc(ip, whichfork, offset, + &wpc->iomap, seq); if (error) { /* * If we failed to find the extent in the COW fork we might have -- cgit v1.2.3-59-g8ed1b From cc3c92e7e79eb5f7f3ec4d5790ade384b7d294f7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 30 Apr 2024 14:58:22 +0200 Subject: xfs: xfs_quota_unreserve_blkres can't fail Unreserving quotas can't fail due to quota limits, and we'll notice a shut down file system a bit later in all the callers anyway. Return void and remove the error checking and propagation in the callers. Signed-off-by: Christoph Hellwig Reviewed-by: "Darrick J. Wong" Signed-off-by: Chandan Babu R --- fs/xfs/libxfs/xfs_bmap.c | 16 +++++----------- fs/xfs/libxfs/xfs_bmap.h | 2 +- fs/xfs/xfs_aops.c | 6 +----- fs/xfs/xfs_bmap_util.c | 9 +++------ fs/xfs/xfs_bmap_util.h | 2 +- fs/xfs/xfs_iomap.c | 4 ++-- fs/xfs/xfs_quota.h | 7 ++++--- fs/xfs/xfs_reflink.c | 11 +++-------- 8 files changed, 20 insertions(+), 37 deletions(-) (limited to 'fs/xfs/xfs_aops.c') diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index be515bac8ea1..0633f285875d 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4928,7 +4928,7 @@ xfs_bmap_split_indlen( *indlen2 = len2; } -int +void xfs_bmap_del_extent_delay( struct xfs_inode *ip, int whichfork, @@ -4944,7 +4944,6 @@ xfs_bmap_del_extent_delay( xfs_filblks_t got_indlen, new_indlen, stolen = 0; uint32_t state = xfs_bmap_fork_to_state(whichfork); uint64_t fdblocks; - int error = 0; bool isrt; XFS_STATS_INC(mp, xs_del_exlist); @@ -4964,9 +4963,7 @@ xfs_bmap_del_extent_delay( * sb counters as we might have to borrow some blocks for the * indirect block accounting. */ - error = xfs_quota_unreserve_blkres(ip, del->br_blockcount); - if (error) - return error; + xfs_quota_unreserve_blkres(ip, del->br_blockcount); ip->i_delayed_blks -= del->br_blockcount; if (got->br_startoff == del->br_startoff) @@ -5064,7 +5061,6 @@ xfs_bmap_del_extent_delay( xfs_add_fdblocks(mp, fdblocks); xfs_mod_delalloc(ip, -(int64_t)del->br_blockcount, -da_diff); - return error; } void @@ -5622,18 +5618,16 @@ __xfs_bunmapi( delete: if (wasdel) { - error = xfs_bmap_del_extent_delay(ip, whichfork, &icur, - &got, &del); + xfs_bmap_del_extent_delay(ip, whichfork, &icur, &got, &del); } else { error = xfs_bmap_del_extent_real(ip, tp, &icur, cur, &del, &tmp_logflags, whichfork, flags); logflags |= tmp_logflags; + if (error) + goto error0; } - if (error) - goto error0; - end = del.br_startoff - 1; nodelete: /* diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index e98849eb9bba..667b0c2b33d1 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -202,7 +202,7 @@ int xfs_bmapi_write(struct xfs_trans *tp, struct xfs_inode *ip, int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, xfs_fileoff_t bno, xfs_filblks_t len, uint32_t flags, xfs_extnum_t nexts, int *done); -int xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, +void xfs_bmap_del_extent_delay(struct xfs_inode *ip, int whichfork, struct xfs_iext_cursor *cur, struct xfs_bmbt_irec *got, struct xfs_bmbt_irec *del); void xfs_bmap_del_extent_cow(struct xfs_inode *ip, diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index f7520f375cee..6dead20338e2 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -443,7 +443,6 @@ xfs_discard_folio( { struct xfs_inode *ip = XFS_I(folio->mapping->host); struct xfs_mount *mp = ip->i_mount; - int error; if (xfs_is_shutdown(mp)) return; @@ -457,11 +456,8 @@ xfs_discard_folio( * byte of the next folio. Hence the end offset is only dependent on the * folio itself and not the start offset that is passed in. */ - error = xfs_bmap_punch_delalloc_range(ip, pos, + xfs_bmap_punch_delalloc_range(ip, pos, folio_pos(folio) + folio_size(folio)); - - if (error && !xfs_is_shutdown(mp)) - xfs_alert(mp, "page discard unable to remove delalloc mapping."); } static const struct iomap_writeback_ops xfs_writeback_ops = { diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 2e6f08198c07..14e0eb9b305f 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -440,7 +440,7 @@ out_unlock_iolock: * if the ranges only partially overlap them, so it is up to the caller to * ensure that partial blocks are not passed in. */ -int +void xfs_bmap_punch_delalloc_range( struct xfs_inode *ip, xfs_off_t start_byte, @@ -452,7 +452,6 @@ xfs_bmap_punch_delalloc_range( xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, end_byte); struct xfs_bmbt_irec got, del; struct xfs_iext_cursor icur; - int error = 0; ASSERT(!xfs_need_iread_extents(ifp)); @@ -476,15 +475,13 @@ xfs_bmap_punch_delalloc_range( continue; } - error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, - &got, &del); - if (error || !xfs_iext_get_extent(ifp, &icur, &got)) + xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del); + if (!xfs_iext_get_extent(ifp, &icur, &got)) break; } out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL); - return error; } /* diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 77ecbb753ef2..51f84d8ff372 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -30,7 +30,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap) } #endif /* CONFIG_XFS_RT */ -int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, +void xfs_bmap_punch_delalloc_range(struct xfs_inode *ip, xfs_off_t start_byte, xfs_off_t end_byte); struct kgetbmap { diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index f5d0ed45721b..3db93b876de3 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1232,8 +1232,8 @@ xfs_buffered_write_delalloc_punch( loff_t offset, loff_t length) { - return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, - offset + length); + xfs_bmap_punch_delalloc_range(XFS_I(inode), offset, offset + length); + return 0; } static int diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 621ea9d7cf06..23d71a55bbc0 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -215,10 +215,11 @@ xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks) return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false); } -static inline int -xfs_quota_unreserve_blkres(struct xfs_inode *ip, int64_t blocks) +static inline void +xfs_quota_unreserve_blkres(struct xfs_inode *ip, uint64_t blocks) { - return xfs_quota_reserve_blkres(ip, -blocks); + /* don't return an error as unreserving quotas can't fail */ + xfs_quota_reserve_blkres(ip, -(int64_t)blocks); } extern int xfs_mount_reset_sbqflags(struct xfs_mount *); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index 5ecb52a234be..24761e89ce9a 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -592,10 +592,8 @@ xfs_reflink_cancel_cow_blocks( trace_xfs_reflink_cancel_cow(ip, &del); if (isnullstartblock(del.br_startblock)) { - error = xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, - &icur, &got, &del); - if (error) - break; + xfs_bmap_del_extent_delay(ip, XFS_COW_FORK, &icur, &got, + &del); } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER); @@ -618,10 +616,7 @@ xfs_reflink_cancel_cow_blocks( xfs_bmap_del_extent_cow(ip, &icur, &got, &del); /* Remove the quota reservation */ - error = xfs_quota_unreserve_blkres(ip, - del.br_blockcount); - if (error) - break; + xfs_quota_unreserve_blkres(ip, del.br_blockcount); } else { /* Didn't do anything, push cursor back. */ xfs_iext_prev(ifp, &icur); -- cgit v1.2.3-59-g8ed1b