From 85c73bf726e41be276bcad3325d9a8aef10be289 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 7 Jul 2022 22:05:18 +1000 Subject: xfs: rework xfs_buf_incore() API Make it consistent with the other buffer APIs to return a error and the buffer is placed in a parameter. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_attr_remote.c | 15 ++++++++++----- fs/xfs/scrub/repair.c | 15 +++++++++------ fs/xfs/xfs_buf.c | 19 ++----------------- fs/xfs/xfs_buf.h | 20 ++++++++++++++++---- fs/xfs/xfs_qm.c | 9 ++++----- 5 files changed, 41 insertions(+), 37 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index 7298c148f848..d440393b40eb 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -543,6 +543,7 @@ xfs_attr_rmtval_stale( { struct xfs_mount *mp = ip->i_mount; struct xfs_buf *bp; + int error; ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); @@ -550,14 +551,18 @@ xfs_attr_rmtval_stale( XFS_IS_CORRUPT(mp, map->br_startblock == HOLESTARTBLOCK)) return -EFSCORRUPTED; - bp = xfs_buf_incore(mp->m_ddev_targp, + error = xfs_buf_incore(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, map->br_startblock), - XFS_FSB_TO_BB(mp, map->br_blockcount), incore_flags); - if (bp) { - xfs_buf_stale(bp); - xfs_buf_relse(bp); + XFS_FSB_TO_BB(mp, map->br_blockcount), + incore_flags, &bp); + if (error) { + if (error == -ENOENT) + return 0; + return error; } + xfs_buf_stale(bp); + xfs_buf_relse(bp); return 0; } diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 1e7b6b209ee8..5e7428782571 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -457,16 +457,19 @@ xrep_invalidate_blocks( * assume it's owned by someone else. */ for_each_xbitmap_block(fsbno, bmr, n, bitmap) { + int error; + /* Skip AG headers and post-EOFS blocks */ if (!xfs_verify_fsbno(sc->mp, fsbno)) continue; - bp = xfs_buf_incore(sc->mp->m_ddev_targp, + error = xfs_buf_incore(sc->mp->m_ddev_targp, XFS_FSB_TO_DADDR(sc->mp, fsbno), - XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK); - if (bp) { - xfs_trans_bjoin(sc->tp, bp); - xfs_trans_binval(sc->tp, bp); - } + XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK, &bp); + if (error) + continue; + + xfs_trans_bjoin(sc->tp, bp); + xfs_trans_binval(sc->tp, bp); } return 0; diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index bf4e60871068..143e1c70df5d 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -616,23 +616,6 @@ found: return 0; } -struct xfs_buf * -xfs_buf_incore( - struct xfs_buftarg *target, - xfs_daddr_t blkno, - size_t numblks, - xfs_buf_flags_t flags) -{ - struct xfs_buf *bp; - int error; - DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); - - error = xfs_buf_find(target, &map, 1, flags, NULL, &bp); - if (error) - return NULL; - return bp; -} - /* * Assembles a buffer covering the specified range. The code is optimised for * cache hits, as metadata intensive workloads will see 3 orders of magnitude @@ -656,6 +639,8 @@ xfs_buf_get_map( goto found; if (error != -ENOENT) return error; + if (flags & XBF_INCORE) + return -ENOENT; error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp); if (error) diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 1ee3056ff9cf..58e9034d51bd 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -42,9 +42,11 @@ struct xfs_buf; #define _XBF_DELWRI_Q (1u << 22)/* buffer on a delwri queue */ /* flags used only as arguments to access routines */ +#define XBF_INCORE (1u << 29)/* lookup only, return if found in cache */ #define XBF_TRYLOCK (1u << 30)/* lock requested, but do not wait */ #define XBF_UNMAPPED (1u << 31)/* do not map the buffer */ + typedef unsigned int xfs_buf_flags_t; #define XFS_BUF_FLAGS \ @@ -63,6 +65,7 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ /* The following interface flags should never be set */ \ + { XBF_INCORE, "INCORE" }, \ { XBF_TRYLOCK, "TRYLOCK" }, \ { XBF_UNMAPPED, "UNMAPPED" } @@ -196,10 +199,6 @@ struct xfs_buf { }; /* Finding and Reading Buffers */ -struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, - xfs_daddr_t blkno, size_t numblks, - xfs_buf_flags_t flags); - int xfs_buf_get_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp); int xfs_buf_read_map(struct xfs_buftarg *target, struct xfs_buf_map *map, @@ -209,6 +208,19 @@ void xfs_buf_readahead_map(struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, const struct xfs_buf_ops *ops); +static inline int +xfs_buf_incore( + struct xfs_buftarg *target, + xfs_daddr_t blkno, + size_t numblks, + xfs_buf_flags_t flags, + struct xfs_buf **bpp) +{ + DEFINE_SINGLE_BUF_MAP(map, blkno, numblks); + + return xfs_buf_get_map(target, &map, 1, XBF_INCORE | flags, bpp); +} + static inline int xfs_buf_get( struct xfs_buftarg *target, diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index abf08bbf34a9..3517a6be8dad 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -1229,12 +1229,11 @@ xfs_qm_flush_one( */ if (!xfs_dqflock_nowait(dqp)) { /* buf is pinned in-core by delwri list */ - bp = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno, - mp->m_quotainfo->qi_dqchunklen, 0); - if (!bp) { - error = -EINVAL; + error = xfs_buf_incore(mp->m_ddev_targp, dqp->q_blkno, + mp->m_quotainfo->qi_dqchunklen, 0, &bp); + if (error) goto out_unlock; - } + xfs_buf_unlock(bp); xfs_buf_delwri_pushbuf(bp, buffer_list); -- cgit v1.2.3-59-g8ed1b From de67dc575434dca8d60b1e181ed5dd296392ffce Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 12:02:46 +1000 Subject: xfs: break up xfs_buf_find() into individual pieces xfs_buf_find() is made up of three main parts: lookup, insert and locking. The interactions with xfs_buf_get_map() require it to be called twice - once for a pure lookup, and again on lookup failure so the insert path can be run. We want to simplify this down a lot, so split it into a fast path lookup, a slow path insert and a "lock the found buffer" helper. This will then let us integrate these operations more effectively into xfs_buf_get_map() in future patches. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 158 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 104 insertions(+), 54 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 143e1c70df5d..9a44048d352b 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -503,6 +503,98 @@ xfs_buf_hash_destroy( rhashtable_destroy(&pag->pag_buf_hash); } +static int +xfs_buf_map_verify( + struct xfs_buftarg *btp, + struct xfs_buf_map *map) +{ + xfs_daddr_t eofs; + + /* Check for IOs smaller than the sector size / not sector aligned */ + ASSERT(!(BBTOB(map->bm_len) < btp->bt_meta_sectorsize)); + ASSERT(!(BBTOB(map->bm_bn) & (xfs_off_t)btp->bt_meta_sectormask)); + + /* + * Corrupted block numbers can get through to here, unfortunately, so we + * have to check that the buffer falls within the filesystem bounds. + */ + eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); + if (map->bm_bn < 0 || map->bm_bn >= eofs) { + xfs_alert(btp->bt_mount, + "%s: daddr 0x%llx out of range, EOFS 0x%llx", + __func__, map->bm_bn, eofs); + WARN_ON(1); + return -EFSCORRUPTED; + } + return 0; +} + +static int +xfs_buf_find_lock( + struct xfs_buf *bp, + xfs_buf_flags_t flags) +{ + if (!xfs_buf_trylock(bp)) { + if (flags & XBF_TRYLOCK) { + XFS_STATS_INC(bp->b_mount, xb_busy_locked); + xfs_buf_rele(bp); + return -EAGAIN; + } + xfs_buf_lock(bp); + XFS_STATS_INC(bp->b_mount, xb_get_locked_waited); + } + + /* + * if the buffer is stale, clear all the external state associated with + * it. We need to keep flags such as how we allocated the buffer memory + * intact here. + */ + if (bp->b_flags & XBF_STALE) { + ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); + bp->b_flags &= _XBF_KMEM | _XBF_PAGES; + bp->b_ops = NULL; + } + return 0; +} + +static inline struct xfs_buf * +xfs_buf_lookup( + struct xfs_perag *pag, + struct xfs_buf_map *map) +{ + struct xfs_buf *bp; + + bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params); + if (!bp) + return NULL; + atomic_inc(&bp->b_hold); + return bp; +} + +/* + * Insert the new_bp into the hash table. This consumes the perag reference + * taken for the lookup. + */ +static int +xfs_buf_find_insert( + struct xfs_buftarg *btp, + struct xfs_perag *pag, + struct xfs_buf *new_bp) +{ + /* No match found */ + if (!new_bp) { + xfs_perag_put(pag); + XFS_STATS_INC(btp->bt_mount, xb_miss_locked); + return -ENOENT; + } + + /* the buffer keeps the perag reference until it is freed */ + new_bp->b_pag = pag; + rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head, + xfs_buf_hash_params); + return 0; +} + /* * Look up a buffer in the buffer cache and return it referenced and locked * in @found_bp. @@ -533,7 +625,7 @@ xfs_buf_find( struct xfs_perag *pag; struct xfs_buf *bp; struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; - xfs_daddr_t eofs; + int error; int i; *found_bp = NULL; @@ -541,47 +633,22 @@ xfs_buf_find( for (i = 0; i < nmaps; i++) cmap.bm_len += map[i].bm_len; - /* Check for IOs smaller than the sector size / not sector aligned */ - ASSERT(!(BBTOB(cmap.bm_len) < btp->bt_meta_sectorsize)); - ASSERT(!(BBTOB(cmap.bm_bn) & (xfs_off_t)btp->bt_meta_sectormask)); - - /* - * Corrupted block numbers can get through to here, unfortunately, so we - * have to check that the buffer falls within the filesystem bounds. - */ - eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks); - if (cmap.bm_bn < 0 || cmap.bm_bn >= eofs) { - xfs_alert(btp->bt_mount, - "%s: daddr 0x%llx out of range, EOFS 0x%llx", - __func__, cmap.bm_bn, eofs); - WARN_ON(1); - return -EFSCORRUPTED; - } + error = xfs_buf_map_verify(btp, &cmap); + if (error) + return error; pag = xfs_perag_get(btp->bt_mount, xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); spin_lock(&pag->pag_buf_lock); - bp = rhashtable_lookup_fast(&pag->pag_buf_hash, &cmap, - xfs_buf_hash_params); - if (bp) { - atomic_inc(&bp->b_hold); + bp = xfs_buf_lookup(pag, &cmap); + if (bp) goto found; - } - - /* No match found */ - if (!new_bp) { - XFS_STATS_INC(btp->bt_mount, xb_miss_locked); - spin_unlock(&pag->pag_buf_lock); - xfs_perag_put(pag); - return -ENOENT; - } - /* the buffer keeps the perag reference until it is freed */ - new_bp->b_pag = pag; - rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head, - xfs_buf_hash_params); + error = xfs_buf_find_insert(btp, pag, new_bp); spin_unlock(&pag->pag_buf_lock); + if (error) + return error; *found_bp = new_bp; return 0; @@ -589,26 +656,9 @@ found: spin_unlock(&pag->pag_buf_lock); xfs_perag_put(pag); - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) { - xfs_buf_rele(bp); - XFS_STATS_INC(btp->bt_mount, xb_busy_locked); - return -EAGAIN; - } - xfs_buf_lock(bp); - XFS_STATS_INC(btp->bt_mount, xb_get_locked_waited); - } - - /* - * if the buffer is stale, clear all the external state associated with - * it. We need to keep flags such as how we allocated the buffer memory - * intact here. - */ - if (bp->b_flags & XBF_STALE) { - ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); - bp->b_flags &= _XBF_KMEM | _XBF_PAGES; - bp->b_ops = NULL; - } + error = xfs_buf_find_lock(bp, flags); + if (error) + return error; trace_xfs_buf_find(bp, flags, _RET_IP_); XFS_STATS_INC(btp->bt_mount, xb_get_locked); -- cgit v1.2.3-59-g8ed1b From 348000804a0f4dea74219a927e081d6e7dee792f Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 12:04:31 +1000 Subject: xfs: merge xfs_buf_find() and xfs_buf_get_map() Now that we factored xfs_buf_find(), we can start separating into distinct fast and slow paths from xfs_buf_get_map(). We start by moving the lookup map and perag setup to _get_map(), and then move all the specifics of the fast path lookup into xfs_buf_lookup() and call it directly from _get_map(). We the move all the slow path code to xfs_buf_find_insert(), which is now also called directly from _get_map(). As such, xfs_buf_find() now goes away. Signed-off-by: Dave Chinner Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 202 +++++++++++++++++++++++++------------------------------ 1 file changed, 93 insertions(+), 109 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 9a44048d352b..81ca951b451a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -537,7 +537,6 @@ xfs_buf_find_lock( if (!xfs_buf_trylock(bp)) { if (flags & XBF_TRYLOCK) { XFS_STATS_INC(bp->b_mount, xb_busy_locked); - xfs_buf_rele(bp); return -EAGAIN; } xfs_buf_lock(bp); @@ -557,113 +556,97 @@ xfs_buf_find_lock( return 0; } -static inline struct xfs_buf * +static inline int xfs_buf_lookup( struct xfs_perag *pag, - struct xfs_buf_map *map) + struct xfs_buf_map *map, + xfs_buf_flags_t flags, + struct xfs_buf **bpp) { struct xfs_buf *bp; + int error; + spin_lock(&pag->pag_buf_lock); bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params); - if (!bp) - return NULL; + if (!bp) { + spin_unlock(&pag->pag_buf_lock); + return -ENOENT; + } atomic_inc(&bp->b_hold); - return bp; -} + spin_unlock(&pag->pag_buf_lock); -/* - * Insert the new_bp into the hash table. This consumes the perag reference - * taken for the lookup. - */ -static int -xfs_buf_find_insert( - struct xfs_buftarg *btp, - struct xfs_perag *pag, - struct xfs_buf *new_bp) -{ - /* No match found */ - if (!new_bp) { - xfs_perag_put(pag); - XFS_STATS_INC(btp->bt_mount, xb_miss_locked); - return -ENOENT; + error = xfs_buf_find_lock(bp, flags); + if (error) { + xfs_buf_rele(bp); + return error; } - /* the buffer keeps the perag reference until it is freed */ - new_bp->b_pag = pag; - rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head, - xfs_buf_hash_params); + trace_xfs_buf_find(bp, flags, _RET_IP_); + *bpp = bp; return 0; } /* - * Look up a buffer in the buffer cache and return it referenced and locked - * in @found_bp. - * - * If @new_bp is supplied and we have a lookup miss, insert @new_bp into the - * cache. - * - * If XBF_TRYLOCK is set in @flags, only try to lock the buffer and return - * -EAGAIN if we fail to lock it. - * - * Return values are: - * -EFSCORRUPTED if have been supplied with an invalid address - * -EAGAIN on trylock failure - * -ENOENT if we fail to find a match and @new_bp was NULL - * 0, with @found_bp: - * - @new_bp if we inserted it into the cache - * - the buffer we found and locked. + * Insert the new_bp into the hash table. This consumes the perag reference + * taken for the lookup regardless of the result of the insert. */ static int -xfs_buf_find( +xfs_buf_find_insert( struct xfs_buftarg *btp, + struct xfs_perag *pag, + struct xfs_buf_map *cmap, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, - struct xfs_buf *new_bp, - struct xfs_buf **found_bp) + struct xfs_buf **bpp) { - struct xfs_perag *pag; + struct xfs_buf *new_bp; struct xfs_buf *bp; - struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; int error; - int i; - - *found_bp = NULL; - - for (i = 0; i < nmaps; i++) - cmap.bm_len += map[i].bm_len; - error = xfs_buf_map_verify(btp, &cmap); + error = _xfs_buf_alloc(btp, map, nmaps, flags, &new_bp); if (error) - return error; + goto out_drop_pag; - pag = xfs_perag_get(btp->bt_mount, - xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); + /* + * For buffers that fit entirely within a single page, first attempt to + * allocate the memory from the heap to minimise memory usage. If we + * can't get heap memory for these small buffers, we fall back to using + * the page allocator. + */ + if (BBTOB(new_bp->b_length) >= PAGE_SIZE || + xfs_buf_alloc_kmem(new_bp, flags) < 0) { + error = xfs_buf_alloc_pages(new_bp, flags); + if (error) + goto out_free_buf; + } spin_lock(&pag->pag_buf_lock); - bp = xfs_buf_lookup(pag, &cmap); - if (bp) - goto found; + bp = rhashtable_lookup(&pag->pag_buf_hash, cmap, xfs_buf_hash_params); + if (bp) { + atomic_inc(&bp->b_hold); + spin_unlock(&pag->pag_buf_lock); + error = xfs_buf_find_lock(bp, flags); + if (error) + xfs_buf_rele(bp); + else + *bpp = bp; + goto out_free_buf; + } - error = xfs_buf_find_insert(btp, pag, new_bp); + /* The buffer keeps the perag reference until it is freed. */ + new_bp->b_pag = pag; + rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head, + xfs_buf_hash_params); spin_unlock(&pag->pag_buf_lock); - if (error) - return error; - *found_bp = new_bp; + *bpp = new_bp; return 0; -found: - spin_unlock(&pag->pag_buf_lock); +out_free_buf: + xfs_buf_free(new_bp); +out_drop_pag: xfs_perag_put(pag); - - error = xfs_buf_find_lock(bp, flags); - if (error) - return error; - - trace_xfs_buf_find(bp, flags, _RET_IP_); - XFS_STATS_INC(btp->bt_mount, xb_get_locked); - *found_bp = bp; - return 0; + return error; } /* @@ -673,54 +656,54 @@ found: */ int xfs_buf_get_map( - struct xfs_buftarg *target, + struct xfs_buftarg *btp, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp) { - struct xfs_buf *bp; - struct xfs_buf *new_bp; + struct xfs_perag *pag; + struct xfs_buf *bp = NULL; + struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; int error; + int i; - *bpp = NULL; - error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp); - if (!error) - goto found; - if (error != -ENOENT) - return error; - if (flags & XBF_INCORE) - return -ENOENT; + for (i = 0; i < nmaps; i++) + cmap.bm_len += map[i].bm_len; - error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp); + error = xfs_buf_map_verify(btp, &cmap); if (error) return error; - /* - * For buffers that fit entirely within a single page, first attempt to - * allocate the memory from the heap to minimise memory usage. If we - * can't get heap memory for these small buffers, we fall back to using - * the page allocator. - */ - if (BBTOB(new_bp->b_length) >= PAGE_SIZE || - xfs_buf_alloc_kmem(new_bp, flags) < 0) { - error = xfs_buf_alloc_pages(new_bp, flags); - if (error) - goto out_free_buf; - } + pag = xfs_perag_get(btp->bt_mount, + xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn)); - error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp); - if (error) - goto out_free_buf; + error = xfs_buf_lookup(pag, &cmap, flags, &bp); + if (error && error != -ENOENT) + goto out_put_perag; - if (bp != new_bp) - xfs_buf_free(new_bp); + /* cache hits always outnumber misses by at least 10:1 */ + if (unlikely(!bp)) { + XFS_STATS_INC(btp->bt_mount, xb_miss_locked); + + if (flags & XBF_INCORE) + goto out_put_perag; + + /* xfs_buf_find_insert() consumes the perag reference. */ + error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps, + flags, &bp); + if (error) + return error; + } else { + XFS_STATS_INC(btp->bt_mount, xb_get_locked); + xfs_perag_put(pag); + } -found: + /* We do not hold a perag reference anymore. */ if (!bp->b_addr) { error = _xfs_buf_map_pages(bp, flags); if (unlikely(error)) { - xfs_warn_ratelimited(target->bt_mount, + xfs_warn_ratelimited(btp->bt_mount, "%s: failed to map %u pages", __func__, bp->b_page_count); xfs_buf_relse(bp); @@ -735,12 +718,13 @@ found: if (!(flags & XBF_READ)) xfs_buf_ioerror(bp, 0); - XFS_STATS_INC(target->bt_mount, xb_get); + XFS_STATS_INC(btp->bt_mount, xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); *bpp = bp; return 0; -out_free_buf: - xfs_buf_free(new_bp); + +out_put_perag: + xfs_perag_put(pag); return error; } -- cgit v1.2.3-59-g8ed1b From d8d9bbb0ee6c79191b704d88c8ae712b89e0d2bb Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 12:04:38 +1000 Subject: xfs: reduce the number of atomic when locking a buffer after lookup Avoid an extra atomic operation in the non-trylock case by only doing a trylock if the XBF_TRYLOCK flag is set. This follows the pattern in the IO path with NOWAIT semantics where the "trylock-fail-lock" path showed 5-10% reduced throughput compared to just using single lock call when not under NOWAIT conditions. So make that same change here, too. See commit 942491c9e6d6 ("xfs: fix AIM7 regression") for details. Signed-off-by: Dave Chinner [hch: split from a larger patch] Signed-off-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 81ca951b451a..374c4e508b12 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -534,11 +534,12 @@ xfs_buf_find_lock( struct xfs_buf *bp, xfs_buf_flags_t flags) { - if (!xfs_buf_trylock(bp)) { - if (flags & XBF_TRYLOCK) { + if (flags & XBF_TRYLOCK) { + if (!xfs_buf_trylock(bp)) { XFS_STATS_INC(bp->b_mount, xb_busy_locked); return -EAGAIN; } + } else { xfs_buf_lock(bp); XFS_STATS_INC(bp->b_mount, xb_get_locked_waited); } -- cgit v1.2.3-59-g8ed1b From 32dd4f9c506b1bf147c24cf05423cd893bc06e38 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 12:04:43 +1000 Subject: xfs: remove a superflous hash lookup when inserting new buffers Currently on the slow path insert we repeat the initial hash table lookup before we attempt the insert, resulting in a two traversals of the hash table to ensure the insert is valid. The rhashtable API provides a method for an atomic lookup and insert operation, so we can avoid one of the hash table traversals by using this method. Adapted from a large patch containing this optimisation by Christoph Hellwig. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 374c4e508b12..1a6542e01bec 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -623,8 +623,15 @@ xfs_buf_find_insert( } spin_lock(&pag->pag_buf_lock); - bp = rhashtable_lookup(&pag->pag_buf_hash, cmap, xfs_buf_hash_params); + bp = rhashtable_lookup_get_insert_fast(&pag->pag_buf_hash, + &new_bp->b_rhash_head, xfs_buf_hash_params); + if (IS_ERR(bp)) { + error = PTR_ERR(bp); + spin_unlock(&pag->pag_buf_lock); + goto out_free_buf; + } if (bp) { + /* found an existing buffer */ atomic_inc(&bp->b_hold); spin_unlock(&pag->pag_buf_lock); error = xfs_buf_find_lock(bp, flags); @@ -635,10 +642,8 @@ xfs_buf_find_insert( goto out_free_buf; } - /* The buffer keeps the perag reference until it is freed. */ + /* The new buffer keeps the perag reference until it is freed. */ new_bp->b_pag = pag; - rhashtable_insert_fast(&pag->pag_buf_hash, &new_bp->b_rhash_head, - xfs_buf_hash_params); spin_unlock(&pag->pag_buf_lock); *bpp = new_bp; return 0; -- cgit v1.2.3-59-g8ed1b From 298f342245066309189d8637ca7339d56840c3e1 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Thu, 14 Jul 2022 12:05:07 +1000 Subject: xfs: lockless buffer lookup Now that we have a standalone fast path for buffer lookup, we can easily convert it to use rcu lookups. When we continually hammer the buffer cache with trylock lookups, we end up with a huge amount of lock contention on the per-ag buffer hash locks: - 92.71% 0.05% [kernel] [k] xfs_inodegc_worker - 92.67% xfs_inodegc_worker - 92.13% xfs_inode_unlink - 91.52% xfs_inactive_ifree - 85.63% xfs_read_agi - 85.61% xfs_trans_read_buf_map - 85.59% xfs_buf_read_map - xfs_buf_get_map - 85.55% xfs_buf_find - 72.87% _raw_spin_lock - do_raw_spin_lock 71.86% __pv_queued_spin_lock_slowpath - 8.74% xfs_buf_rele - 7.88% _raw_spin_lock - 7.88% do_raw_spin_lock 7.63% __pv_queued_spin_lock_slowpath - 1.70% xfs_buf_trylock - 1.68% down_trylock - 1.41% _raw_spin_lock_irqsave - 1.39% do_raw_spin_lock __pv_queued_spin_lock_slowpath - 0.76% _raw_spin_unlock 0.75% do_raw_spin_unlock This is basically hammering the pag->pag_buf_lock from lots of CPUs doing trylocks at the same time. Most of the buffer trylock operations ultimately fail after we've done the lookup, so we're really hammering the buf hash lock whilst making no progress. We can also see significant spinlock traffic on the same lock just under normal operation when lots of tasks are accessing metadata from the same AG, so let's avoid all this by converting the lookup fast path to leverages the rhashtable's ability to do rcu protected lookups. We avoid races with the buffer release path by using atomic_inc_not_zero() on the buffer hold count. Any buffer that is in the LRU will have a non-zero count, thereby allowing the lockless fast path to be taken in most cache hit situations. If the buffer hold count is zero, then it is likely going through the release path so in that case we fall back to the existing lookup miss slow path. The slow path will then do an atomic lookup and insert under the buffer hash lock and hence serialise correctly against buffer release freeing the buffer. The use of rcu protected lookups means that buffer handles now need to be freed by RCU callbacks (same as inodes). We still free the buffer pages before the RCU callback - we won't be trying to access them at all on a buffer that has zero references - but we need the buffer handle itself to be present for the entire rcu protected read side to detect a zero hold count correctly. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/xfs/xfs_buf.c | 22 +++++++++++++++------- fs/xfs/xfs_buf.h | 1 + 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'fs/xfs') diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 1a6542e01bec..6dac5583977f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -294,6 +294,16 @@ xfs_buf_free_pages( bp->b_flags &= ~_XBF_PAGES; } +static void +xfs_buf_free_callback( + struct callback_head *cb) +{ + struct xfs_buf *bp = container_of(cb, struct xfs_buf, b_rcu); + + xfs_buf_free_maps(bp); + kmem_cache_free(xfs_buf_cache, bp); +} + static void xfs_buf_free( struct xfs_buf *bp) @@ -307,8 +317,7 @@ xfs_buf_free( else if (bp->b_flags & _XBF_KMEM) kmem_free(bp->b_addr); - xfs_buf_free_maps(bp); - kmem_cache_free(xfs_buf_cache, bp); + call_rcu(&bp->b_rcu, xfs_buf_free_callback); } static int @@ -567,14 +576,13 @@ xfs_buf_lookup( struct xfs_buf *bp; int error; - spin_lock(&pag->pag_buf_lock); + rcu_read_lock(); bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params); - if (!bp) { - spin_unlock(&pag->pag_buf_lock); + if (!bp || !atomic_inc_not_zero(&bp->b_hold)) { + rcu_read_unlock(); return -ENOENT; } - atomic_inc(&bp->b_hold); - spin_unlock(&pag->pag_buf_lock); + rcu_read_unlock(); error = xfs_buf_find_lock(bp, flags); if (error) { diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 58e9034d51bd..02b3c1635ec3 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -196,6 +196,7 @@ struct xfs_buf { int b_last_error; const struct xfs_buf_ops *b_ops; + struct rcu_head b_rcu; }; /* Finding and Reading Buffers */ -- cgit v1.2.3-59-g8ed1b