diff options
Diffstat (limited to 'fs/xfs')
57 files changed, 1479 insertions, 1212 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index b056cfc6398e..1131dd01e4fe 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -106,6 +106,7 @@ xfs-y += xfs_log.o \ xfs_icreate_item.o \ xfs_inode_item.o \ xfs_inode_item_recover.o \ + xfs_iunlink_item.o \ xfs_refcount_item.o \ xfs_rmap_item.o \ xfs_log_recover.o \ diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index 3e920cf1b454..bb0c700afe3c 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -120,18 +120,18 @@ xfs_initialize_perag_data( for (index = 0; index < agcount; index++) { /* - * read the agf, then the agi. This gets us - * all the information we need and populates the - * per-ag structures for us. + * Read the AGF and AGI buffers to populate the per-ag + * structures for us. */ - error = xfs_alloc_pagf_init(mp, NULL, index, 0); - if (error) + pag = xfs_perag_get(mp, index); + error = xfs_alloc_read_agf(pag, NULL, 0, NULL); + if (!error) + error = xfs_ialloc_read_agi(pag, NULL, NULL); + if (error) { + xfs_perag_put(pag); return error; + } - error = xfs_ialloc_pagi_init(mp, NULL, index); - if (error) - return error; - pag = xfs_perag_get(mp, index); ifree += pag->pagi_freecount; ialloc += pag->pagi_count; bfree += pag->pagf_freeblks; @@ -194,17 +194,76 @@ xfs_free_perag( XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0); cancel_delayed_work_sync(&pag->pag_blockgc_work); - xfs_iunlink_destroy(pag); xfs_buf_hash_destroy(pag); call_rcu(&pag->rcu_head, __xfs_free_perag); } } +/* Find the size of the AG, in blocks. */ +static xfs_agblock_t +__xfs_ag_block_count( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agnumber_t agcount, + xfs_rfsblock_t dblocks) +{ + ASSERT(agno < agcount); + + if (agno < agcount - 1) + return mp->m_sb.sb_agblocks; + return dblocks - (agno * mp->m_sb.sb_agblocks); +} + +xfs_agblock_t +xfs_ag_block_count( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + return __xfs_ag_block_count(mp, agno, mp->m_sb.sb_agcount, + mp->m_sb.sb_dblocks); +} + +/* Calculate the first and last possible inode number in an AG. */ +static void +__xfs_agino_range( + struct xfs_mount *mp, + xfs_agblock_t eoag, + xfs_agino_t *first, + xfs_agino_t *last) +{ + xfs_agblock_t bno; + + /* + * Calculate the first inode, which will be in the first + * cluster-aligned block after the AGFL. + */ + bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align); + *first = XFS_AGB_TO_AGINO(mp, bno); + + /* + * Calculate the last inode, which will be at the end of the + * last (aligned) cluster that can be allocated in the AG. + */ + bno = round_down(eoag, M_IGEO(mp)->cluster_align); + *last = XFS_AGB_TO_AGINO(mp, bno) - 1; +} + +void +xfs_agino_range( + struct xfs_mount *mp, + xfs_agnumber_t agno, + xfs_agino_t *first, + xfs_agino_t *last) +{ + return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last); +} + int xfs_initialize_perag( struct xfs_mount *mp, xfs_agnumber_t agcount, + xfs_rfsblock_t dblocks, xfs_agnumber_t *maxagi) { struct xfs_perag *pag; @@ -263,13 +322,18 @@ xfs_initialize_perag( if (error) goto out_remove_pag; - error = xfs_iunlink_init(pag); - if (error) - goto out_hash_destroy; - /* first new pag is fully initialized */ if (first_initialised == NULLAGNUMBER) first_initialised = index; + + /* + * Pre-calculated geometry + */ + pag->block_count = __xfs_ag_block_count(mp, index, agcount, + dblocks); + pag->min_block = XFS_AGFL_BLOCK(mp); + __xfs_agino_range(mp, pag->block_count, &pag->agino_min, + &pag->agino_max); } index = xfs_set_inode_alloc(mp, agcount); @@ -280,8 +344,6 @@ xfs_initialize_perag( mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); return 0; -out_hash_destroy: - xfs_buf_hash_destroy(pag); out_remove_pag: radix_tree_delete(&mp->m_perag_tree, index); out_free_pag: @@ -293,7 +355,6 @@ out_unwind_new_pags: if (!pag) break; xfs_buf_hash_destroy(pag); - xfs_iunlink_destroy(pag); kmem_free(pag); } return error; @@ -321,12 +382,6 @@ xfs_get_aghdr_buf( return 0; } -static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id) -{ - return mp->m_sb.sb_logstart > 0 && - id->agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); -} - /* * Generic btree root block init function */ @@ -352,7 +407,7 @@ xfs_freesp_init_recs( arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); - if (is_log_ag(mp, id)) { + if (xfs_ag_contains_log(mp, id->agno)) { struct xfs_alloc_rec *nrec; xfs_agblock_t start = XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart); @@ -479,7 +534,7 @@ xfs_rmaproot_init( } /* account for the log space */ - if (is_log_ag(mp, id)) { + if (xfs_ag_contains_log(mp, id->agno)) { rrec = XFS_RMAP_REC_ADDR(block, be16_to_cpu(block->bb_numrecs) + 1); rrec->rm_startblock = cpu_to_be32( @@ -550,7 +605,7 @@ xfs_agfblock_init( agf->agf_refcount_blocks = cpu_to_be32(1); } - if (is_log_ag(mp, id)) { + if (xfs_ag_contains_log(mp, id->agno)) { int64_t logblocks = mp->m_sb.sb_logblocks; be32_add_cpu(&agf->agf_freeblks, -logblocks); @@ -761,11 +816,11 @@ xfs_ag_init_headers( int xfs_ag_shrink_space( - struct xfs_mount *mp, + struct xfs_perag *pag, struct xfs_trans **tpp, - xfs_agnumber_t agno, xfs_extlen_t delta) { + struct xfs_mount *mp = pag->pag_mount; struct xfs_alloc_arg args = { .tp = *tpp, .mp = mp, @@ -782,14 +837,14 @@ xfs_ag_shrink_space( xfs_agblock_t aglen; int error, err2; - ASSERT(agno == mp->m_sb.sb_agcount - 1); - error = xfs_ialloc_read_agi(mp, *tpp, agno, &agibp); + ASSERT(pag->pag_agno == mp->m_sb.sb_agcount - 1); + error = xfs_ialloc_read_agi(pag, *tpp, &agibp); if (error) return error; agi = agibp->b_addr; - error = xfs_alloc_read_agf(mp, *tpp, agno, 0, &agfbp); + error = xfs_alloc_read_agf(pag, *tpp, 0, &agfbp); if (error) return error; @@ -801,13 +856,14 @@ xfs_ag_shrink_space( if (delta >= aglen) return -EINVAL; - args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta); + args.fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta); /* * Make sure that the last inode cluster cannot overlap with the new * end of the AG, even if it's sparse. */ - error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta); + error = xfs_ialloc_check_shrink(*tpp, pag->pag_agno, agibp, + aglen - delta); if (error) return error; @@ -815,7 +871,7 @@ xfs_ag_shrink_space( * Disable perag reservations so it doesn't cause the allocation request * to fail. We'll reestablish reservation before we return. */ - error = xfs_ag_resv_free(agibp->b_pag); + error = xfs_ag_resv_free(pag); if (error) return error; @@ -844,7 +900,7 @@ xfs_ag_shrink_space( be32_add_cpu(&agi->agi_length, -delta); be32_add_cpu(&agf->agf_length, -delta); - err2 = xfs_ag_resv_init(agibp->b_pag, *tpp); + err2 = xfs_ag_resv_init(pag, *tpp); if (err2) { be32_add_cpu(&agi->agi_length, delta); be32_add_cpu(&agf->agf_length, delta); @@ -868,8 +924,9 @@ xfs_ag_shrink_space( xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH); xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH); return 0; + resv_init_out: - err2 = xfs_ag_resv_init(agibp->b_pag, *tpp); + err2 = xfs_ag_resv_init(pag, *tpp); if (!err2) return error; resv_err: @@ -883,9 +940,8 @@ resv_err: */ int xfs_ag_extend_space( - struct xfs_mount *mp, + struct xfs_perag *pag, struct xfs_trans *tp, - struct aghdr_init_data *id, xfs_extlen_t len) { struct xfs_buf *bp; @@ -893,23 +949,20 @@ xfs_ag_extend_space( struct xfs_agf *agf; int error; - /* - * Change the agi length. - */ - error = xfs_ialloc_read_agi(mp, tp, id->agno, &bp); + ASSERT(pag->pag_agno == pag->pag_mount->m_sb.sb_agcount - 1); + + error = xfs_ialloc_read_agi(pag, tp, &bp); if (error) return error; agi = bp->b_addr; be32_add_cpu(&agi->agi_length, len); - ASSERT(id->agno == mp->m_sb.sb_agcount - 1 || - be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks); xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH); /* * Change agf length. */ - error = xfs_alloc_read_agf(mp, tp, id->agno, 0, &bp); + error = xfs_alloc_read_agf(pag, tp, 0, &bp); if (error) return error; @@ -924,49 +977,49 @@ xfs_ag_extend_space( * XFS_RMAP_OINFO_SKIP_UPDATE is used here to tell the rmap btree that * this doesn't actually exist in the rmap btree. */ - error = xfs_rmap_free(tp, bp, bp->b_pag, - be32_to_cpu(agf->agf_length) - len, + error = xfs_rmap_free(tp, bp, pag, be32_to_cpu(agf->agf_length) - len, len, &XFS_RMAP_OINFO_SKIP_UPDATE); if (error) return error; - return xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, id->agno, + error = xfs_free_extent(tp, XFS_AGB_TO_FSB(pag->pag_mount, pag->pag_agno, be32_to_cpu(agf->agf_length) - len), len, &XFS_RMAP_OINFO_SKIP_UPDATE, XFS_AG_RESV_NONE); + if (error) + return error; + + /* Update perag geometry */ + pag->block_count = be32_to_cpu(agf->agf_length); + __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min, + &pag->agino_max); + return 0; } /* Retrieve AG geometry. */ int xfs_ag_get_geometry( - struct xfs_mount *mp, - xfs_agnumber_t agno, + struct xfs_perag *pag, struct xfs_ag_geometry *ageo) { struct xfs_buf *agi_bp; struct xfs_buf *agf_bp; struct xfs_agi *agi; struct xfs_agf *agf; - struct xfs_perag *pag; unsigned int freeblks; int error; - if (agno >= mp->m_sb.sb_agcount) - return -EINVAL; - /* Lock the AG headers. */ - error = xfs_ialloc_read_agi(mp, NULL, agno, &agi_bp); + error = xfs_ialloc_read_agi(pag, NULL, &agi_bp); if (error) return error; - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agf_bp); + error = xfs_alloc_read_agf(pag, NULL, 0, &agf_bp); if (error) goto out_agi; - pag = agi_bp->b_pag; - /* Fill out form. */ memset(ageo, 0, sizeof(*ageo)); - ageo->ag_number = agno; + ageo->ag_number = pag->pag_agno; agi = agi_bp->b_addr; ageo->ag_icount = be32_to_cpu(agi->agi_count); diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index e411d51c2589..517a138faa66 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -67,6 +67,12 @@ struct xfs_perag { /* for rcu-safe freeing */ struct rcu_head rcu_head; + /* Precalculated geometry info */ + xfs_agblock_t block_count; + xfs_agblock_t min_block; + xfs_agino_t agino_min; + xfs_agino_t agino_max; + #ifdef __KERNEL__ /* -- kernel only structures below this line -- */ @@ -97,17 +103,11 @@ struct xfs_perag { /* background prealloc block trimming */ struct delayed_work pag_blockgc_work; - /* - * Unlinked inode information. This incore information reflects - * data stored in the AGI, so callers must hold the AGI buffer lock - * or have some other means to control concurrency. - */ - struct rhashtable pagi_unlinked_hash; #endif /* __KERNEL__ */ }; int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, - xfs_agnumber_t *maxagi); + xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi); int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno); void xfs_free_perag(struct xfs_mount *mp); @@ -117,6 +117,56 @@ struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, void xfs_perag_put(struct xfs_perag *pag); /* + * Per-ag geometry infomation and validation + */ +xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno); +void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agino_t *first, xfs_agino_t *last); + +static inline bool +xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno) +{ + if (agbno >= pag->block_count) + return false; + if (agbno <= pag->min_block) + return false; + return true; +} + +/* + * Verify that an AG inode number pointer neither points outside the AG + * nor points at static metadata. + */ +static inline bool +xfs_verify_agino(struct xfs_perag *pag, xfs_agino_t agino) +{ + if (agino < pag->agino_min) + return false; + if (agino > pag->agino_max) + return false; + return true; +} + +/* + * Verify that an AG inode number pointer neither points outside the AG + * nor points at static metadata, or is NULLAGINO. + */ +static inline bool +xfs_verify_agino_or_null(struct xfs_perag *pag, xfs_agino_t agino) +{ + if (agino == NULLAGINO) + return true; + return xfs_verify_agino(pag, agino); +} + +static inline bool +xfs_ag_contains_log(struct xfs_mount *mp, xfs_agnumber_t agno) +{ + return mp->m_sb.sb_logstart > 0 && + agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); +} + +/* * Perag iteration APIs */ static inline struct xfs_perag * @@ -168,11 +218,10 @@ struct aghdr_init_data { }; int xfs_ag_init_headers(struct xfs_mount *mp, struct aghdr_init_data *id); -int xfs_ag_shrink_space(struct xfs_mount *mp, struct xfs_trans **tpp, - xfs_agnumber_t agno, xfs_extlen_t delta); -int xfs_ag_extend_space(struct xfs_mount *mp, struct xfs_trans *tp, - struct aghdr_init_data *id, xfs_extlen_t len); -int xfs_ag_get_geometry(struct xfs_mount *mp, xfs_agnumber_t agno, - struct xfs_ag_geometry *ageo); +int xfs_ag_shrink_space(struct xfs_perag *pag, struct xfs_trans **tpp, + xfs_extlen_t delta); +int xfs_ag_extend_space(struct xfs_perag *pag, struct xfs_trans *tp, + xfs_extlen_t len); +int xfs_ag_get_geometry(struct xfs_perag *pag, struct xfs_ag_geometry *ageo); #endif /* __LIBXFS_AG_H */ diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index fe94058d4e9e..5af123d13a63 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -322,7 +322,7 @@ out: * address. */ if (has_resv) { - error2 = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, 0); + error2 = xfs_alloc_read_agf(pag, tp, 0, NULL); if (error2) return error2; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index d3f2886fdc08..41557c430cb6 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -248,7 +248,7 @@ xfs_alloc_get_rec( int *stat) /* output: success/failure */ { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; + struct xfs_perag *pag = cur->bc_ag.pag; union xfs_btree_rec *rec; int error; @@ -263,11 +263,11 @@ xfs_alloc_get_rec( goto out_bad_rec; /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(mp, agno, *bno)) + if (!xfs_verify_agbno(pag, *bno)) goto out_bad_rec; if (*bno > *bno + *len) goto out_bad_rec; - if (!xfs_verify_agbno(mp, agno, *bno + *len - 1)) + if (!xfs_verify_agbno(pag, *bno + *len - 1)) goto out_bad_rec; return 0; @@ -275,7 +275,8 @@ xfs_alloc_get_rec( out_bad_rec: xfs_warn(mp, "%s Freespace BTree record corruption in AG %d detected!", - cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno); + cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", + pag->pag_agno); xfs_warn(mp, "start block 0x%x block count 0x%x", *bno, *len); return -EFSCORRUPTED; @@ -703,20 +704,19 @@ const struct xfs_buf_ops xfs_agfl_buf_ops = { /* * Read in the allocation group free block array. */ -int /* error */ +int xfs_alloc_read_agfl( - xfs_mount_t *mp, /* mount point structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - struct xfs_buf **bpp) /* buffer for the ag free block array */ + struct xfs_perag *pag, + struct xfs_trans *tp, + struct xfs_buf **bpp) { - struct xfs_buf *bp; /* return value */ - int error; + struct xfs_mount *mp = pag->pag_mount; + struct xfs_buf *bp; + int error; - ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf( mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), + XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)), XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); if (error) return error; @@ -1075,7 +1075,8 @@ xfs_alloc_ag_vextent_small( be32_to_cpu(agf->agf_flcount) <= args->minleft) goto out; - error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0); + error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp, + &fbno, 0); if (error) goto error; if (fbno == NULLAGBLOCK) @@ -2609,7 +2610,7 @@ xfs_alloc_fix_freelist( ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); if (!pag->pagf_init) { - error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); + error = xfs_alloc_read_agf(pag, tp, flags, &agbp); if (error) { /* Couldn't lock the AGF so skip this AG. */ if (error == -EAGAIN) @@ -2639,7 +2640,7 @@ xfs_alloc_fix_freelist( * Can fail if we're not blocking on locks, and it's held. */ if (!agbp) { - error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp); + error = xfs_alloc_read_agf(pag, tp, flags, &agbp); if (error) { /* Couldn't lock the AGF so skip this AG. */ if (error == -EAGAIN) @@ -2697,7 +2698,7 @@ xfs_alloc_fix_freelist( else targs.oinfo = XFS_RMAP_OINFO_AG; while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { - error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); + error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0); if (error) goto out_agbp_relse; @@ -2712,7 +2713,7 @@ xfs_alloc_fix_freelist( targs.alignment = targs.minlen = targs.prod = 1; targs.type = XFS_ALLOCTYPE_THIS_AG; targs.pag = pag; - error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp); + error = xfs_alloc_read_agfl(pag, tp, &agflbp); if (error) goto out_agbp_relse; @@ -2741,7 +2742,7 @@ xfs_alloc_fix_freelist( * Put each allocated block on the list. */ for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) { - error = xfs_alloc_put_freelist(tp, agbp, + error = xfs_alloc_put_freelist(pag, tp, agbp, agflbp, bno, 0); if (error) goto out_agflbp_relse; @@ -2767,6 +2768,7 @@ out_no_agbp: */ int xfs_alloc_get_freelist( + struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, xfs_agblock_t *bnop, @@ -2779,7 +2781,6 @@ xfs_alloc_get_freelist( int error; uint32_t logflags; struct xfs_mount *mp = tp->t_mountp; - struct xfs_perag *pag; /* * Freelist is empty, give up. @@ -2791,8 +2792,7 @@ xfs_alloc_get_freelist( /* * Read the array of free blocks. */ - error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), - &agflbp); + error = xfs_alloc_read_agfl(pag, tp, &agflbp); if (error) return error; @@ -2807,7 +2807,6 @@ xfs_alloc_get_freelist( if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) agf->agf_flfirst = 0; - pag = agbp->b_pag; ASSERT(!pag->pagf_agflreset); be32_add_cpu(&agf->agf_flcount, -1); pag->pagf_flcount--; @@ -2868,29 +2867,11 @@ xfs_alloc_log_agf( } /* - * Interface for inode allocation to force the pag data to be initialized. - */ -int /* error */ -xfs_alloc_pagf_init( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags) /* XFS_ALLOC_FLAGS_... */ -{ - struct xfs_buf *bp; - int error; - - error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp); - if (!error) - xfs_trans_brelse(tp, bp); - return error; -} - -/* * Put the block on the freelist for the allocation group. */ int xfs_alloc_put_freelist( + struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, struct xfs_buf *agflbp, @@ -2899,21 +2880,22 @@ xfs_alloc_put_freelist( { struct xfs_mount *mp = tp->t_mountp; struct xfs_agf *agf = agbp->b_addr; - struct xfs_perag *pag; __be32 *blockp; int error; uint32_t logflags; __be32 *agfl_bno; int startoff; - if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, - be32_to_cpu(agf->agf_seqno), &agflbp))) - return error; + if (!agflbp) { + error = xfs_alloc_read_agfl(pag, tp, &agflbp); + if (error) + return error; + } + be32_add_cpu(&agf->agf_fllast, 1); if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) agf->agf_fllast = 0; - pag = agbp->b_pag; ASSERT(!pag->pagf_agflreset); be32_add_cpu(&agf->agf_flcount, 1); pag->pagf_flcount++; @@ -3070,61 +3052,57 @@ const struct xfs_buf_ops xfs_agf_buf_ops = { /* * Read in the allocation group header (free/alloc section). */ -int /* error */ +int xfs_read_agf( - struct xfs_mount *mp, /* mount point structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags, /* XFS_BUF_ */ - struct xfs_buf **bpp) /* buffer for the ag freelist header */ + struct xfs_perag *pag, + struct xfs_trans *tp, + int flags, + struct xfs_buf **agfbpp) { - int error; + struct xfs_mount *mp = pag->pag_mount; + int error; - trace_xfs_read_agf(mp, agno); + trace_xfs_read_agf(pag->pag_mount, pag->pag_agno); - ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); + XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops); if (error) return error; - ASSERT(!(*bpp)->b_error); - xfs_buf_set_ref(*bpp, XFS_AGF_REF); + xfs_buf_set_ref(*agfbpp, XFS_AGF_REF); return 0; } /* - * Read in the allocation group header (free/alloc section). + * Read in the allocation group header (free/alloc section) and initialise the + * perag structure if necessary. If the caller provides @agfbpp, then return the + * locked buffer to the caller, otherwise free it. */ -int /* error */ +int xfs_alloc_read_agf( - struct xfs_mount *mp, /* mount point structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags, /* XFS_ALLOC_FLAG_... */ - struct xfs_buf **bpp) /* buffer for the ag freelist header */ + struct xfs_perag *pag, + struct xfs_trans *tp, + int flags, + struct xfs_buf **agfbpp) { - struct xfs_agf *agf; /* ag freelist header */ - struct xfs_perag *pag; /* per allocation group data */ + struct xfs_buf *agfbp; + struct xfs_agf *agf; int error; int allocbt_blks; - trace_xfs_alloc_read_agf(mp, agno); + trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno); /* We don't support trylock when freeing. */ ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) != (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)); - ASSERT(agno != NULLAGNUMBER); - error = xfs_read_agf(mp, tp, agno, + error = xfs_read_agf(pag, tp, (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, - bpp); + &agfbp); if (error) return error; - ASSERT(!(*bpp)->b_error); - agf = (*bpp)->b_addr; - pag = (*bpp)->b_pag; + agf = agfbp->b_addr; if (!pag->pagf_init) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); @@ -3138,7 +3116,7 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); pag->pagf_init = 1; - pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf); + pag->pagf_agflreset = xfs_agfl_needs_reset(pag->pag_mount, agf); /* * Update the in-core allocbt counter. Filter out the rmapbt @@ -3148,13 +3126,14 @@ xfs_alloc_read_agf( * counter only tracks non-root blocks. */ allocbt_blks = pag->pagf_btreeblks; - if (xfs_has_rmapbt(mp)) + if (xfs_has_rmapbt(pag->pag_mount)) allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1; if (allocbt_blks > 0) - atomic64_add(allocbt_blks, &mp->m_allocbt_blks); + atomic64_add(allocbt_blks, + &pag->pag_mount->m_allocbt_blks); } #ifdef DEBUG - else if (!xfs_is_shutdown(mp)) { + else if (!xfs_is_shutdown(pag->pag_mount)) { ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); @@ -3165,6 +3144,10 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); } #endif + if (agfbpp) + *agfbpp = agfbp; + else + xfs_trans_brelse(tp, agfbp); return 0; } diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 84ca09b2223f..2c3f762dfb58 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -95,6 +95,11 @@ xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_perag *pag, xfs_extlen_t need, xfs_extlen_t reserved); unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, struct xfs_perag *pag); +int xfs_alloc_get_freelist(struct xfs_perag *pag, struct xfs_trans *tp, + struct xfs_buf *agfbp, xfs_agblock_t *bnop, int btreeblk); +int xfs_alloc_put_freelist(struct xfs_perag *pag, struct xfs_trans *tp, + struct xfs_buf *agfbp, struct xfs_buf *agflbp, + xfs_agblock_t bno, int btreeblk); /* * Compute and fill in value of m_alloc_maxlevels. @@ -104,17 +109,6 @@ xfs_alloc_compute_maxlevels( struct xfs_mount *mp); /* file system mount structure */ /* - * Get a block from the freelist. - * Returns with the buffer for the block gotten. - */ -int /* error */ -xfs_alloc_get_freelist( - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer containing the agf structure */ - xfs_agblock_t *bnop, /* block address retrieved from freelist */ - int btreeblk); /* destination is a AGF btree */ - -/* * Log the given fields from the agf structure. */ void @@ -124,38 +118,6 @@ xfs_alloc_log_agf( uint32_t fields);/* mask of fields to be logged (XFS_AGF_...) */ /* - * Interface for inode allocation to force the pag data to be initialized. - */ -int /* error */ -xfs_alloc_pagf_init( - struct xfs_mount *mp, /* file system mount structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags); /* XFS_ALLOC_FLAGS_... */ - -/* - * Put the block on the freelist for the allocation group. - */ -int /* error */ -xfs_alloc_put_freelist( - struct xfs_trans *tp, /* transaction pointer */ - struct xfs_buf *agbp, /* buffer for a.g. freelist header */ - struct xfs_buf *agflbp,/* buffer for a.g. free block array */ - xfs_agblock_t bno, /* block being freed */ - int btreeblk); /* owner was a AGF btree */ - -/* - * Read in the allocation group header (free/alloc section). - */ -int /* error */ -xfs_alloc_read_agf( - struct xfs_mount *mp, /* mount point structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags, /* XFS_ALLOC_FLAG_... */ - struct xfs_buf **bpp); /* buffer for the ag freelist header */ - -/* * Allocate an extent (variable-size). */ int /* error */ @@ -206,10 +168,12 @@ xfs_alloc_get_rec( xfs_extlen_t *len, /* output: length of extent */ int *stat); /* output: success/failure */ -int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); -int xfs_alloc_read_agfl(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, struct xfs_buf **bpp); +int xfs_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags, + struct xfs_buf **agfbpp); +int xfs_alloc_read_agf(struct xfs_perag *pag, struct xfs_trans *tp, int flags, + struct xfs_buf **agfbpp); +int xfs_alloc_read_agfl(struct xfs_perag *pag, struct xfs_trans *tp, + struct xfs_buf **bpp); int xfs_free_agfl_block(struct xfs_trans *, xfs_agnumber_t, xfs_agblock_t, struct xfs_buf *, struct xfs_owner_info *); int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags); diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 8c9f73cc0bee..549a3cba0234 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -60,8 +60,8 @@ xfs_allocbt_alloc_block( xfs_agblock_t bno; /* Allocate the new block from the freelist. If we can't, give up. */ - error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp, - &bno, 1); + error = xfs_alloc_get_freelist(cur->bc_ag.pag, cur->bc_tp, + cur->bc_ag.agbp, &bno, 1); if (error) return error; @@ -71,7 +71,7 @@ xfs_allocbt_alloc_block( } atomic64_inc(&cur->bc_mp->m_allocbt_blks); - xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agbp->b_pag, bno, 1, false); + xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.pag, bno, 1, false); new->s = cpu_to_be32(bno); @@ -89,7 +89,8 @@ xfs_allocbt_free_block( int error; bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp)); - error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); + error = xfs_alloc_put_freelist(cur->bc_ag.pag, cur->bc_tp, agbp, NULL, + bno, 1); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index 224649a76cbb..6b8857e53add 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1558,7 +1558,7 @@ xfs_attr_node_get( * If not in a transaction, we have to release all the buffers. */ out_release: - for (i = 0; state != NULL && i < state->path.active; i++) { + for (i = 0; i < state->path.active; i++) { xfs_trans_brelse(args->trans, state->path.blk[i].bp); state->path.blk[i].bp = NULL; } diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 6833110d1bd4..88828fcf0453 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3185,7 +3185,8 @@ xfs_bmap_longest_free_extent( pag = xfs_perag_get(mp, ag); if (!pag->pagf_init) { - error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); + error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK, + NULL); if (error) { /* Couldn't lock the AGF, so skip this AG. */ if (error == -EAGAIN) { diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 2eecc49fc1b2..06ab364d2de3 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -91,10 +91,9 @@ xfs_btree_check_lblock_siblings( static inline xfs_failaddr_t xfs_btree_check_sblock_siblings( - struct xfs_mount *mp, + struct xfs_perag *pag, struct xfs_btree_cur *cur, int level, - xfs_agnumber_t agno, xfs_agblock_t agbno, __be32 dsibling) { @@ -110,7 +109,7 @@ xfs_btree_check_sblock_siblings( if (!xfs_btree_check_sptr(cur, sibling, level + 1)) return __this_address; } else { - if (!xfs_verify_agbno(mp, agno, sibling)) + if (!xfs_verify_agbno(pag, sibling)) return __this_address; } return NULL; @@ -195,11 +194,11 @@ __xfs_btree_check_sblock( struct xfs_buf *bp) { struct xfs_mount *mp = cur->bc_mp; + struct xfs_perag *pag = cur->bc_ag.pag; xfs_btnum_t btnum = cur->bc_btnum; int crc = xfs_has_crc(mp); xfs_failaddr_t fa; xfs_agblock_t agbno = NULLAGBLOCK; - xfs_agnumber_t agno = NULLAGNUMBER; if (crc) { if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid)) @@ -217,16 +216,14 @@ __xfs_btree_check_sblock( cur->bc_ops->get_maxrecs(cur, level)) return __this_address; - if (bp) { + if (bp) agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); - agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); - } - fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, agbno, + fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno, block->bb_u.s.bb_leftsib); if (!fa) - fa = xfs_btree_check_sblock_siblings(mp, cur, level, agno, - agbno, block->bb_u.s.bb_rightsib); + fa = xfs_btree_check_sblock_siblings(pag, cur, level, agbno, + block->bb_u.s.bb_rightsib); return fa; } @@ -288,7 +285,7 @@ xfs_btree_check_sptr( { if (level <= 0) return false; - return xfs_verify_agbno(cur->bc_mp, cur->bc_ag.pag->pag_agno, agbno); + return xfs_verify_agbno(cur->bc_ag.pag, agbno); } /* @@ -4595,7 +4592,6 @@ xfs_btree_sblock_verify( { struct xfs_mount *mp = bp->b_mount; struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); - xfs_agnumber_t agno; xfs_agblock_t agbno; xfs_failaddr_t fa; @@ -4604,12 +4600,11 @@ xfs_btree_sblock_verify( return __this_address; /* sibling pointer verification */ - agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); agbno = xfs_daddr_to_agbno(mp, xfs_buf_daddr(bp)); - fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno, + fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno, block->bb_u.s.bb_leftsib); if (!fa) - fa = xfs_btree_check_sblock_siblings(mp, NULL, -1, agno, agbno, + fa = xfs_btree_check_sblock_siblings(bp->b_pag, NULL, -1, agbno, block->bb_u.s.bb_rightsib); return fa; } diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index bf2f4bc89193..6cdfd64bc56b 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -105,7 +105,6 @@ xfs_inobt_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; union xfs_btree_rec *rec; int error; uint64_t realfree; @@ -116,7 +115,7 @@ xfs_inobt_get_rec( xfs_inobt_btrec_to_irec(mp, rec, irec); - if (!xfs_verify_agino(mp, agno, irec->ir_startino)) + if (!xfs_verify_agino(cur->bc_ag.pag, irec->ir_startino)) goto out_bad_rec; if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT || irec->ir_count > XFS_INODES_PER_CHUNK) @@ -137,7 +136,8 @@ xfs_inobt_get_rec( out_bad_rec: xfs_warn(mp, "%s Inode BTree record corruption in AG %d detected!", - cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free", agno); + cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free", + cur->bc_ag.pag->pag_agno); xfs_warn(mp, "start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x", irec->ir_startino, irec->ir_count, irec->ir_freecount, @@ -1610,7 +1610,7 @@ xfs_dialloc_good_ag( return false; if (!pag->pagi_init) { - error = xfs_ialloc_pagi_init(mp, tp, pag->pag_agno); + error = xfs_ialloc_read_agi(pag, tp, NULL); if (error) return false; } @@ -1621,7 +1621,7 @@ xfs_dialloc_good_ag( return false; if (!pag->pagf_init) { - error = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, flags); + error = xfs_alloc_read_agf(pag, tp, flags, NULL); if (error) return false; } @@ -1679,7 +1679,7 @@ xfs_dialloc_try_ag( * Then read in the AGI buffer and recheck with the AGI buffer * lock held. */ - error = xfs_ialloc_read_agi(pag->pag_mount, *tpp, pag->pag_agno, &agbp); + error = xfs_ialloc_read_agi(pag, *tpp, &agbp); if (error) return error; @@ -2169,7 +2169,7 @@ xfs_difree( /* * Get the allocation group header. */ - error = xfs_ialloc_read_agi(mp, tp, pag->pag_agno, &agbp); + error = xfs_ialloc_read_agi(pag, tp, &agbp); if (error) { xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", __func__, error); @@ -2215,7 +2215,7 @@ xfs_imap_lookup( int error; int i; - error = xfs_ialloc_read_agi(mp, tp, pag->pag_agno, &agbp); + error = xfs_ialloc_read_agi(pag, tp, &agbp); if (error) { xfs_alert(mp, "%s: xfs_ialloc_read_agi() returned error %d, agno %d", @@ -2571,47 +2571,48 @@ const struct xfs_buf_ops xfs_agi_buf_ops = { */ int xfs_read_agi( - struct xfs_mount *mp, /* file system mount structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - struct xfs_buf **bpp) /* allocation group hdr buf */ + struct xfs_perag *pag, + struct xfs_trans *tp, + struct xfs_buf **agibpp) { + struct xfs_mount *mp = pag->pag_mount; int error; - trace_xfs_read_agi(mp, agno); + trace_xfs_read_agi(pag->pag_mount, pag->pag_agno); - ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops); + XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)), + XFS_FSS_TO_BB(mp, 1), 0, agibpp, &xfs_agi_buf_ops); if (error) return error; if (tp) - xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_AGI_BUF); + xfs_trans_buf_set_type(tp, *agibpp, XFS_BLFT_AGI_BUF); - xfs_buf_set_ref(*bpp, XFS_AGI_REF); + xfs_buf_set_ref(*agibpp, XFS_AGI_REF); return 0; } +/* + * Read in the agi and initialise the per-ag data. If the caller supplies a + * @agibpp, return the locked AGI buffer to them, otherwise release it. + */ int xfs_ialloc_read_agi( - struct xfs_mount *mp, /* file system mount structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - struct xfs_buf **bpp) /* allocation group hdr buf */ + struct xfs_perag *pag, + struct xfs_trans *tp, + struct xfs_buf **agibpp) { - struct xfs_agi *agi; /* allocation group header */ - struct xfs_perag *pag; /* per allocation group data */ + struct xfs_buf *agibp; + struct xfs_agi *agi; int error; - trace_xfs_ialloc_read_agi(mp, agno); + trace_xfs_ialloc_read_agi(pag->pag_mount, pag->pag_agno); - error = xfs_read_agi(mp, tp, agno, bpp); + error = xfs_read_agi(pag, tp, &agibp); if (error) return error; - agi = (*bpp)->b_addr; - pag = (*bpp)->b_pag; + agi = agibp->b_addr; if (!pag->pagi_init) { pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); pag->pagi_count = be32_to_cpu(agi->agi_count); @@ -2623,27 +2624,11 @@ xfs_ialloc_read_agi( * we are in the middle of a forced shutdown. */ ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) || - xfs_is_shutdown(mp)); - return 0; -} - -/* - * Read in the agi to initialise the per-ag data in the mount structure - */ -int -xfs_ialloc_pagi_init( - xfs_mount_t *mp, /* file system mount structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno) /* allocation group number */ -{ - struct xfs_buf *bp = NULL; - int error; - - error = xfs_ialloc_read_agi(mp, tp, agno, &bp); - if (error) - return error; - if (bp) - xfs_trans_brelse(tp, bp); + xfs_is_shutdown(pag->pag_mount)); + if (agibpp) + *agibpp = agibp; + else + xfs_trans_brelse(tp, agibp); return 0; } @@ -2912,8 +2897,7 @@ xfs_ialloc_calc_rootino( * allocation group, or very odd geometries created by old mkfs * versions on very small filesystems. */ - if (mp->m_sb.sb_logstart && - XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0) + if (xfs_ag_contains_log(mp, 0)) first_bno += mp->m_sb.sb_logblocks; /* diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index a7705b6a1fd3..9bbbca6ac4ed 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -62,25 +62,10 @@ xfs_ialloc_log_agi( struct xfs_buf *bp, /* allocation group header buffer */ uint32_t fields); /* bitmask of fields to log */ -/* - * Read in the allocation group header (inode allocation section) - */ -int /* error */ -xfs_ialloc_read_agi( - struct xfs_mount *mp, /* file system mount structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - struct xfs_buf **bpp); /* allocation group hdr buf */ - -/* - * Read in the allocation group header to initialise the per-ag data - * in the mount structure - */ -int -xfs_ialloc_pagi_init( - struct xfs_mount *mp, /* file system mount structure */ - struct xfs_trans *tp, /* transaction pointer */ - xfs_agnumber_t agno); /* allocation group number */ +int xfs_read_agi(struct xfs_perag *pag, struct xfs_trans *tp, + struct xfs_buf **agibpp); +int xfs_ialloc_read_agi(struct xfs_perag *pag, struct xfs_trans *tp, + struct xfs_buf **agibpp); /* * Lookup a record by ino in the btree given by cur. @@ -102,8 +87,6 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_agblock_t length, unsigned int gen); -int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, struct xfs_buf **bpp); union xfs_btree_rec; void xfs_inobt_btrec_to_irec(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index b2ad2fdc40f5..8c83e265770c 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -683,10 +683,10 @@ xfs_inobt_rec_check_count( static xfs_extlen_t xfs_inobt_max_size( - struct xfs_mount *mp, - xfs_agnumber_t agno) + struct xfs_perag *pag) { - xfs_agblock_t agblocks = xfs_ag_block_count(mp, agno); + struct xfs_mount *mp = pag->pag_mount; + xfs_agblock_t agblocks = pag->block_count; /* Bail out if we're uninitialized, which can happen in mkfs. */ if (M_IGEO(mp)->inobt_mxr[0] == 0) @@ -697,8 +697,7 @@ xfs_inobt_max_size( * never be available for the kinds of things that would require btree * expansion. We therefore can pretend the space isn't there. */ - if (mp->m_sb.sb_logstart && - XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == agno) + if (xfs_ag_contains_log(mp, pag->pag_agno)) agblocks -= mp->m_sb.sb_logblocks; return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr, @@ -722,7 +721,7 @@ xfs_inobt_cur( ASSERT(*agi_bpp == NULL); ASSERT(*curpp == NULL); - error = xfs_ialloc_read_agi(mp, tp, pag->pag_agno, agi_bpp); + error = xfs_ialloc_read_agi(pag, tp, agi_bpp); if (error) return error; @@ -757,16 +756,15 @@ xfs_inobt_count_blocks( /* Read finobt block count from AGI header. */ static int xfs_finobt_read_blocks( - struct xfs_mount *mp, - struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_trans *tp, xfs_extlen_t *tree_blocks) { struct xfs_buf *agbp; struct xfs_agi *agi; int error; - error = xfs_ialloc_read_agi(mp, tp, pag->pag_agno, &agbp); + error = xfs_ialloc_read_agi(pag, tp, &agbp); if (error) return error; @@ -794,14 +792,14 @@ xfs_finobt_calc_reserves( return 0; if (xfs_has_inobtcounts(mp)) - error = xfs_finobt_read_blocks(mp, tp, pag, &tree_len); + error = xfs_finobt_read_blocks(pag, tp, &tree_len); else error = xfs_inobt_count_blocks(mp, tp, pag, XFS_BTNUM_FINO, &tree_len); if (error) return error; - *ask += xfs_inobt_max_size(mp, pag->pag_agno); + *ask += xfs_inobt_max_size(pag); *used += tree_len; return 0; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 3b1b63f9d886..806f209defed 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -10,6 +10,7 @@ #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" +#include "xfs_ag.h" #include "xfs_inode.h" #include "xfs_errortag.h" #include "xfs_error.h" @@ -41,14 +42,12 @@ xfs_inode_buf_verify( bool readahead) { struct xfs_mount *mp = bp->b_mount; - xfs_agnumber_t agno; int i; int ni; /* * Validate the magic number and version of every inode in the buffer */ - agno = xfs_daddr_to_agno(mp, xfs_buf_daddr(bp)); ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; for (i = 0; i < ni; i++) { struct xfs_dinode *dip; @@ -59,7 +58,7 @@ xfs_inode_buf_verify( unlinked_ino = be32_to_cpu(dip->di_next_unlinked); di_ok = xfs_verify_magic16(bp, dip->di_magic) && xfs_dinode_good_version(mp, dip->di_version) && - xfs_verify_agino_or_null(mp, agno, unlinked_ino); + xfs_verify_agino_or_null(bp->b_pag, unlinked_ino); if (unlikely(XFS_TEST_ERROR(!di_ok, mp, XFS_ERRTAG_ITOBP_INOTOBP))) { if (readahead) { @@ -230,7 +229,8 @@ xfs_inode_from_disk( ip->i_nblocks = be64_to_cpu(from->di_nblocks); ip->i_extsize = be32_to_cpu(from->di_extsize); ip->i_forkoff = from->di_forkoff; - ip->i_diflags = be16_to_cpu(from->di_flags); + ip->i_diflags = be16_to_cpu(from->di_flags); + ip->i_next_unlinked = be32_to_cpu(from->di_next_unlinked); if (from->di_dmevmask || from->di_dmstate) xfs_iflags_set(ip, XFS_IPRESERVE_DM_FIELDS); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 97e9e6020596..64b910caafaa 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -111,7 +111,7 @@ xfs_refcount_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; + struct xfs_perag *pag = cur->bc_ag.pag; union xfs_btree_rec *rec; int error; xfs_agblock_t realstart; @@ -121,8 +121,6 @@ xfs_refcount_get_rec( return error; xfs_refcount_btrec_to_irec(rec, irec); - - agno = cur->bc_ag.pag->pag_agno; if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) goto out_bad_rec; @@ -137,22 +135,23 @@ xfs_refcount_get_rec( } /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(mp, agno, realstart)) + if (!xfs_verify_agbno(pag, realstart)) goto out_bad_rec; if (realstart > realstart + irec->rc_blockcount) goto out_bad_rec; - if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1)) + if (!xfs_verify_agbno(pag, realstart + irec->rc_blockcount - 1)) goto out_bad_rec; if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) goto out_bad_rec; - trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec); + trace_xfs_refcount_get(cur->bc_mp, pag->pag_agno, irec); return 0; out_bad_rec: xfs_warn(mp, - "Refcount BTree record corruption in AG %d detected!", agno); + "Refcount BTree record corruption in AG %d detected!", + pag->pag_agno); xfs_warn(mp, "Start block 0x%x, block count 0x%x, references 0x%x", irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); @@ -1177,8 +1176,8 @@ xfs_refcount_finish_one( *pcur = NULL; } if (rcur == NULL) { - error = xfs_alloc_read_agf(tp->t_mountp, tp, pag->pag_agno, - XFS_ALLOC_FLAG_FREEING, &agbp); + error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_FREEING, + &agbp); if (error) goto out_drop; @@ -1710,7 +1709,7 @@ xfs_refcount_recover_cow_leftovers( if (error) return error; - error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); + error = xfs_alloc_read_agf(pag, tp, 0, &agbp); if (error) goto out_trans; cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index d14c1720b0fb..316c1ec0c3c2 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -493,7 +493,7 @@ xfs_refcountbt_calc_reserves( if (!xfs_has_reflink(mp)) return 0; - error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); + error = xfs_alloc_read_agf(pag, tp, 0, &agbp); if (error) return error; @@ -507,8 +507,7 @@ xfs_refcountbt_calc_reserves( * never be available for the kinds of things that would require btree * expansion. We therefore can pretend the space isn't there. */ - if (mp->m_sb.sb_logstart && - XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == pag->pag_agno) + if (xfs_ag_contains_log(mp, pag->pag_agno)) agblocks -= mp->m_sb.sb_logblocks; *ask += xfs_refcountbt_max_size(mp, agblocks); diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 2845019d31da..094dfc897ebc 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -215,7 +215,7 @@ xfs_rmap_get_rec( int *stat) { struct xfs_mount *mp = cur->bc_mp; - xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno; + struct xfs_perag *pag = cur->bc_ag.pag; union xfs_btree_rec *rec; int error; @@ -235,12 +235,12 @@ xfs_rmap_get_rec( goto out_bad_rec; } else { /* check for valid extent range, including overflow */ - if (!xfs_verify_agbno(mp, agno, irec->rm_startblock)) + if (!xfs_verify_agbno(pag, irec->rm_startblock)) goto out_bad_rec; if (irec->rm_startblock > irec->rm_startblock + irec->rm_blockcount) goto out_bad_rec; - if (!xfs_verify_agbno(mp, agno, + if (!xfs_verify_agbno(pag, irec->rm_startblock + irec->rm_blockcount - 1)) goto out_bad_rec; } @@ -254,7 +254,7 @@ xfs_rmap_get_rec( out_bad_rec: xfs_warn(mp, "Reverse Mapping BTree record corruption in AG %d detected!", - agno); + pag->pag_agno); xfs_warn(mp, "Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x", irec->rm_owner, irec->rm_flags, irec->rm_startblock, diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 69e104d0277f..7f83f62e51e0 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -90,7 +90,7 @@ xfs_rmapbt_alloc_block( xfs_agblock_t bno; /* Allocate the new block from the freelist. If we can't, give up. */ - error = xfs_alloc_get_freelist(cur->bc_tp, cur->bc_ag.agbp, + error = xfs_alloc_get_freelist(pag, cur->bc_tp, cur->bc_ag.agbp, &bno, 1); if (error) return error; @@ -129,7 +129,7 @@ xfs_rmapbt_free_block( bno, 1); be32_add_cpu(&agf->agf_rmap_blocks, -1); xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS); - error = xfs_alloc_put_freelist(cur->bc_tp, agbp, NULL, bno, 1); + error = xfs_alloc_put_freelist(pag, cur->bc_tp, agbp, NULL, bno, 1); if (error) return error; @@ -652,7 +652,7 @@ xfs_rmapbt_calc_reserves( if (!xfs_has_rmapbt(mp)) return 0; - error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, &agbp); + error = xfs_alloc_read_agf(pag, tp, 0, &agbp); if (error) return error; @@ -666,8 +666,7 @@ xfs_rmapbt_calc_reserves( * never be available for the kinds of things that would require btree * expansion. We therefore can pretend the space isn't there. */ - if (mp->m_sb.sb_logstart && - XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == pag->pag_agno) + if (xfs_ag_contains_log(mp, pag->pag_agno)) agblocks -= mp->m_sb.sb_logblocks; /* Reserve 1% of the AG or enough for 1 block per record. */ diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index e810d23f2d97..5c2765934732 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -13,25 +13,13 @@ #include "xfs_mount.h" #include "xfs_ag.h" -/* Find the size of the AG, in blocks. */ -inline xfs_agblock_t -xfs_ag_block_count( - struct xfs_mount *mp, - xfs_agnumber_t agno) -{ - ASSERT(agno < mp->m_sb.sb_agcount); - - if (agno < mp->m_sb.sb_agcount - 1) - return mp->m_sb.sb_agblocks; - return mp->m_sb.sb_dblocks - (agno * mp->m_sb.sb_agblocks); -} /* * Verify that an AG block number pointer neither points outside the AG * nor points at static metadata. */ -inline bool -xfs_verify_agbno( +static inline bool +xfs_verify_agno_agbno( struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno) @@ -59,7 +47,7 @@ xfs_verify_fsbno( if (agno >= mp->m_sb.sb_agcount) return false; - return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); + return xfs_verify_agno_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); } /* @@ -85,40 +73,12 @@ xfs_verify_fsbext( XFS_FSB_TO_AGNO(mp, fsbno + len - 1); } -/* Calculate the first and last possible inode number in an AG. */ -inline void -xfs_agino_range( - struct xfs_mount *mp, - xfs_agnumber_t agno, - xfs_agino_t *first, - xfs_agino_t *last) -{ - xfs_agblock_t bno; - xfs_agblock_t eoag; - - eoag = xfs_ag_block_count(mp, agno); - - /* - * Calculate the first inode, which will be in the first - * cluster-aligned block after the AGFL. - */ - bno = round_up(XFS_AGFL_BLOCK(mp) + 1, M_IGEO(mp)->cluster_align); - *first = XFS_AGB_TO_AGINO(mp, bno); - - /* - * Calculate the last inode, which will be at the end of the - * last (aligned) cluster that can be allocated in the AG. - */ - bno = round_down(eoag, M_IGEO(mp)->cluster_align); - *last = XFS_AGB_TO_AGINO(mp, bno) - 1; -} - /* * Verify that an AG inode number pointer neither points outside the AG * nor points at static metadata. */ -inline bool -xfs_verify_agino( +static inline bool +xfs_verify_agno_agino( struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino) @@ -131,19 +91,6 @@ xfs_verify_agino( } /* - * Verify that an AG inode number pointer neither points outside the AG - * nor points at static metadata, or is NULLAGINO. - */ -bool -xfs_verify_agino_or_null( - struct xfs_mount *mp, - xfs_agnumber_t agno, - xfs_agino_t agino) -{ - return agino == NULLAGINO || xfs_verify_agino(mp, agno, agino); -} - -/* * Verify that an FS inode number pointer neither points outside the * filesystem nor points at static AG metadata. */ @@ -159,7 +106,7 @@ xfs_verify_ino( return false; if (XFS_AGINO_TO_INO(mp, agno, agino) != ino) return false; - return xfs_verify_agino(mp, agno, agino); + return xfs_verify_agno_agino(mp, agno, agino); } /* Is this an internal inode number? */ @@ -229,12 +176,8 @@ xfs_icount_range( /* root, rtbitmap, rtsum all live in the first chunk */ *min = XFS_INODES_PER_CHUNK; - for_each_perag(mp, agno, pag) { - xfs_agino_t first, last; - - xfs_agino_range(mp, agno, &first, &last); - nr_inos += last - first + 1; - } + for_each_perag(mp, agno, pag) + nr_inos += pag->agino_max - pag->agino_min + 1; *max = nr_inos; } diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 373f64a492a4..a6b7d98cf68f 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -179,19 +179,10 @@ enum xfs_ag_resv_type { */ struct xfs_mount; -xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno); -bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agblock_t agbno); bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno, xfs_fsblock_t len); -void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agino_t *first, xfs_agino_t *last); -bool xfs_verify_agino(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agino_t agino); -bool xfs_verify_agino_or_null(struct xfs_mount *mp, xfs_agnumber_t agno, - xfs_agino_t agino); bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index 90aebfe9dc5f..b7b838bd4ba4 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -541,16 +541,16 @@ xchk_agf( /* Check the AG length */ eoag = be32_to_cpu(agf->agf_length); - if (eoag != xfs_ag_block_count(mp, agno)) + if (eoag != pag->block_count) xchk_block_set_corrupt(sc, sc->sa.agf_bp); /* Check the AGF btree roots and levels */ agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]); - if (!xfs_verify_agbno(mp, agno, agbno)) + if (!xfs_verify_agbno(pag, agbno)) xchk_block_set_corrupt(sc, sc->sa.agf_bp); agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]); - if (!xfs_verify_agbno(mp, agno, agbno)) + if (!xfs_verify_agbno(pag, agbno)) xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]); @@ -563,7 +563,7 @@ xchk_agf( if (xfs_has_rmapbt(mp)) { agbno = be32_to_cpu(agf->agf_roots[XFS_BTNUM_RMAP]); - if (!xfs_verify_agbno(mp, agno, agbno)) + if (!xfs_verify_agbno(pag, agbno)) xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]); @@ -573,7 +573,7 @@ xchk_agf( if (xfs_has_reflink(mp)) { agbno = be32_to_cpu(agf->agf_refcount_root); - if (!xfs_verify_agbno(mp, agno, agbno)) + if (!xfs_verify_agbno(pag, agbno)) xchk_block_set_corrupt(sc, sc->sa.agf_bp); level = be32_to_cpu(agf->agf_refcount_level); @@ -639,9 +639,8 @@ xchk_agfl_block( { struct xchk_agfl_info *sai = priv; struct xfs_scrub *sc = sai->sc; - xfs_agnumber_t agno = sc->sa.pag->pag_agno; - if (xfs_verify_agbno(mp, agno, agbno) && + if (xfs_verify_agbno(sc->sa.pag, agbno) && sai->nr_entries < sai->sz_entries) sai->entries[sai->nr_entries++] = agbno; else @@ -871,12 +870,12 @@ xchk_agi( /* Check the AG length */ eoag = be32_to_cpu(agi->agi_length); - if (eoag != xfs_ag_block_count(mp, agno)) + if (eoag != pag->block_count) xchk_block_set_corrupt(sc, sc->sa.agi_bp); /* Check btree roots and levels */ agbno = be32_to_cpu(agi->agi_root); - if (!xfs_verify_agbno(mp, agno, agbno)) + if (!xfs_verify_agbno(pag, agbno)) xchk_block_set_corrupt(sc, sc->sa.agi_bp); level = be32_to_cpu(agi->agi_level); @@ -885,7 +884,7 @@ xchk_agi( if (xfs_has_finobt(mp)) { agbno = be32_to_cpu(agi->agi_free_root); - if (!xfs_verify_agbno(mp, agno, agbno)) + if (!xfs_verify_agbno(pag, agbno)) xchk_block_set_corrupt(sc, sc->sa.agi_bp); level = be32_to_cpu(agi->agi_free_level); @@ -902,17 +901,17 @@ xchk_agi( /* Check inode pointers */ agino = be32_to_cpu(agi->agi_newino); - if (!xfs_verify_agino_or_null(mp, agno, agino)) + if (!xfs_verify_agino_or_null(pag, agino)) xchk_block_set_corrupt(sc, sc->sa.agi_bp); agino = be32_to_cpu(agi->agi_dirino); - if (!xfs_verify_agino_or_null(mp, agno, agino)) + if (!xfs_verify_agino_or_null(pag, agino)) xchk_block_set_corrupt(sc, sc->sa.agi_bp); /* Check unlinked inode buckets */ for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) { agino = be32_to_cpu(agi->agi_unlinked[i]); - if (!xfs_verify_agino_or_null(mp, agno, agino)) + if (!xfs_verify_agino_or_null(pag, agino)) xchk_block_set_corrupt(sc, sc->sa.agi_bp); } diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 6da7f2ca77de..1b0b4e243f77 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -106,7 +106,7 @@ xrep_agf_check_agfl_block( { struct xfs_scrub *sc = priv; - if (!xfs_verify_agbno(mp, sc->sa.pag->pag_agno, agbno)) + if (!xfs_verify_agbno(sc->sa.pag, agbno)) return -EFSCORRUPTED; return 0; } @@ -130,10 +130,7 @@ xrep_check_btree_root( struct xfs_scrub *sc, struct xrep_find_ag_btree *fab) { - struct xfs_mount *mp = sc->mp; - xfs_agnumber_t agno = sc->sm->sm_agno; - - return xfs_verify_agbno(mp, agno, fab->root) && + return xfs_verify_agbno(sc->sa.pag, fab->root) && fab->height <= fab->maxlevels; } @@ -201,8 +198,7 @@ xrep_agf_init_header( agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); agf->agf_seqno = cpu_to_be32(sc->sa.pag->pag_agno); - agf->agf_length = cpu_to_be32(xfs_ag_block_count(mp, - sc->sa.pag->pag_agno)); + agf->agf_length = cpu_to_be32(sc->sa.pag->block_count); agf->agf_flfirst = old_agf->agf_flfirst; agf->agf_fllast = old_agf->agf_fllast; agf->agf_flcount = old_agf->agf_flcount; @@ -405,7 +401,7 @@ xrep_agf( * btrees rooted in the AGF. If the AGFL contents are obviously bad * then we'll bail out. */ - error = xfs_alloc_read_agfl(mp, sc->tp, sc->sa.pag->pag_agno, &agfl_bp); + error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); if (error) return error; @@ -666,8 +662,7 @@ xrep_agfl( * nothing wrong with the AGF, but all the AG header repair functions * have this chicken-and-egg problem. */ - error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.pag->pag_agno, 0, - &agf_bp); + error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &agf_bp); if (error) return error; @@ -742,8 +737,7 @@ xrep_agi_find_btrees( int error; /* Read the AGF. */ - error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.pag->pag_agno, 0, - &agf_bp); + error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &agf_bp); if (error) return error; @@ -782,8 +776,7 @@ xrep_agi_init_header( agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); agi->agi_seqno = cpu_to_be32(sc->sa.pag->pag_agno); - agi->agi_length = cpu_to_be32(xfs_ag_block_count(mp, - sc->sa.pag->pag_agno)); + agi->agi_length = cpu_to_be32(sc->sa.pag->block_count); agi->agi_newino = cpu_to_be32(NULLAGINO); agi->agi_dirino = cpu_to_be32(NULLAGINO); if (xfs_has_crc(mp)) diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c index 87518e1292f8..ab427b4d7fe0 100644 --- a/fs/xfs/scrub/alloc.c +++ b/fs/xfs/scrub/alloc.c @@ -93,8 +93,7 @@ xchk_allocbt_rec( struct xchk_btree *bs, const union xfs_btree_rec *rec) { - struct xfs_mount *mp = bs->cur->bc_mp; - xfs_agnumber_t agno = bs->cur->bc_ag.pag->pag_agno; + struct xfs_perag *pag = bs->cur->bc_ag.pag; xfs_agblock_t bno; xfs_extlen_t len; @@ -102,8 +101,8 @@ xchk_allocbt_rec( len = be32_to_cpu(rec->alloc.ar_blockcount); if (bno + len <= bno || - !xfs_verify_agbno(mp, agno, bno) || - !xfs_verify_agbno(mp, agno, bno + len - 1)) + !xfs_verify_agbno(pag, bno) || + !xfs_verify_agbno(pag, bno + len - 1)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); xchk_allocbt_xref(bs->sc, bno, len); diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 285995ba3947..9353fd060525 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -540,7 +540,7 @@ xchk_bmap_check_ag_rmaps( struct xfs_buf *agf; int error; - error = xfs_alloc_read_agf(sc->mp, sc->tp, pag->pag_agno, 0, &agf); + error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf); if (error) return error; diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 97b54ac3075f..9bbbf20f401b 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -416,15 +416,15 @@ xchk_ag_read_headers( if (!sa->pag) return -ENOENT; - error = xfs_ialloc_read_agi(mp, sc->tp, agno, &sa->agi_bp); + error = xfs_ialloc_read_agi(sa->pag, sc->tp, &sa->agi_bp); if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGI)) return error; - error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &sa->agf_bp); + error = xfs_alloc_read_agf(sa->pag, sc->tp, 0, &sa->agf_bp); if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF)) return error; - error = xfs_alloc_read_agfl(mp, sc->tp, agno, &sa->agfl_bp); + error = xfs_alloc_read_agfl(sa->pag, sc->tp, &sa->agfl_bp); if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL)) return error; diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 48a6cbdf95d0..6a6f8fe7f87c 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -78,10 +78,10 @@ xchk_fscount_warmup( continue; /* Lock both AG headers. */ - error = xfs_ialloc_read_agi(mp, sc->tp, agno, &agi_bp); + error = xfs_ialloc_read_agi(pag, sc->tp, &agi_bp); if (error) break; - error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &agf_bp); + error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf_bp); if (error) break; diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 2e61df3bca83..aa65ec88a0c0 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -8,6 +8,8 @@ #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_btree.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" #include "xfs_ag.h" #include "xfs_health.h" #include "scrub/scrub.h" diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c index 00848ee542fb..e1026e07bf94 100644 --- a/fs/xfs/scrub/ialloc.c +++ b/fs/xfs/scrub/ialloc.c @@ -104,13 +104,13 @@ xchk_iallocbt_chunk( xfs_extlen_t len) { struct xfs_mount *mp = bs->cur->bc_mp; - xfs_agnumber_t agno = bs->cur->bc_ag.pag->pag_agno; + struct xfs_perag *pag = bs->cur->bc_ag.pag; xfs_agblock_t bno; bno = XFS_AGINO_TO_AGBNO(mp, agino); if (bno + len <= bno || - !xfs_verify_agbno(mp, agno, bno) || - !xfs_verify_agbno(mp, agno, bno + len - 1)) + !xfs_verify_agbno(pag, bno) || + !xfs_verify_agbno(pag, bno + len - 1)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); xchk_iallocbt_chunk_xref(bs->sc, irec, agino, bno, len); @@ -421,10 +421,10 @@ xchk_iallocbt_rec( const union xfs_btree_rec *rec) { struct xfs_mount *mp = bs->cur->bc_mp; + struct xfs_perag *pag = bs->cur->bc_ag.pag; struct xchk_iallocbt *iabt = bs->private; struct xfs_inobt_rec_incore irec; uint64_t holes; - xfs_agnumber_t agno = bs->cur->bc_ag.pag->pag_agno; xfs_agino_t agino; xfs_extlen_t len; int holecount; @@ -446,8 +446,8 @@ xchk_iallocbt_rec( agino = irec.ir_startino; /* Record has to be properly aligned within the AG. */ - if (!xfs_verify_agino(mp, agno, agino) || - !xfs_verify_agino(mp, agno, agino + XFS_INODES_PER_CHUNK - 1)) { + if (!xfs_verify_agino(pag, agino) || + !xfs_verify_agino(pag, agino + XFS_INODES_PER_CHUNK - 1)) { xchk_btree_set_corrupt(bs->sc, bs->cur, 0); goto out; } diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c index 2744eecdbaf0..c68b767dc08f 100644 --- a/fs/xfs/scrub/refcount.c +++ b/fs/xfs/scrub/refcount.c @@ -13,6 +13,8 @@ #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/btree.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" #include "xfs_ag.h" /* @@ -332,9 +334,8 @@ xchk_refcountbt_rec( struct xchk_btree *bs, const union xfs_btree_rec *rec) { - struct xfs_mount *mp = bs->cur->bc_mp; xfs_agblock_t *cow_blocks = bs->private; - xfs_agnumber_t agno = bs->cur->bc_ag.pag->pag_agno; + struct xfs_perag *pag = bs->cur->bc_ag.pag; xfs_agblock_t bno; xfs_extlen_t len; xfs_nlink_t refcount; @@ -354,8 +355,8 @@ xchk_refcountbt_rec( /* Check the extent. */ bno &= ~XFS_REFC_COW_START; if (bno + len <= bno || - !xfs_verify_agbno(mp, agno, bno) || - !xfs_verify_agbno(mp, agno, bno + len - 1)) + !xfs_verify_agbno(pag, bno) || + !xfs_verify_agbno(pag, bno + len - 1)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); if (refcount == 0) diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 5e7428782571..5b5273505931 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -199,7 +199,7 @@ xrep_calc_ag_resblks( icount = pag->pagi_count; } else { /* Try to get the actual counters from disk. */ - error = xfs_ialloc_read_agi(mp, NULL, sm->sm_agno, &bp); + error = xfs_ialloc_read_agi(pag, NULL, &bp); if (!error) { icount = pag->pagi_count; xfs_buf_relse(bp); @@ -207,9 +207,9 @@ xrep_calc_ag_resblks( } /* Now grab the block counters from the AGF. */ - error = xfs_alloc_read_agf(mp, NULL, sm->sm_agno, 0, &bp); + error = xfs_alloc_read_agf(pag, NULL, 0, &bp); if (error) { - aglen = xfs_ag_block_count(mp, sm->sm_agno); + aglen = pag->block_count; freelen = aglen; usedlen = aglen; } else { @@ -220,25 +220,22 @@ xrep_calc_ag_resblks( usedlen = aglen - freelen; xfs_buf_relse(bp); } - xfs_perag_put(pag); /* If the icount is impossible, make some worst-case assumptions. */ if (icount == NULLAGINO || - !xfs_verify_agino(mp, sm->sm_agno, icount)) { - xfs_agino_t first, last; - - xfs_agino_range(mp, sm->sm_agno, &first, &last); - icount = last - first + 1; + !xfs_verify_agino(pag, icount)) { + icount = pag->agino_max - pag->agino_min + 1; } /* If the block counts are impossible, make worst-case assumptions. */ if (aglen == NULLAGBLOCK || - aglen != xfs_ag_block_count(mp, sm->sm_agno) || + aglen != pag->block_count || freelen >= aglen) { - aglen = xfs_ag_block_count(mp, sm->sm_agno); + aglen = pag->block_count; freelen = aglen; usedlen = aglen; } + xfs_perag_put(pag); trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen, freelen, usedlen); @@ -300,13 +297,13 @@ xrep_alloc_ag_block( switch (resv) { case XFS_AG_RESV_AGFL: case XFS_AG_RESV_RMAPBT: - error = xfs_alloc_get_freelist(sc->tp, sc->sa.agf_bp, &bno, 1); + error = xfs_alloc_get_freelist(sc->sa.pag, sc->tp, + sc->sa.agf_bp, &bno, 1); if (error) return error; if (bno == NULLAGBLOCK) return -ENOSPC; - xfs_extent_busy_reuse(sc->mp, sc->sa.pag, bno, - 1, false); + xfs_extent_busy_reuse(sc->mp, sc->sa.pag, bno, 1, false); *fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, bno); if (resv == XFS_AG_RESV_RMAPBT) xfs_ag_resv_rmapbt_alloc(sc->mp, sc->sa.pag->pag_agno); @@ -519,8 +516,8 @@ xrep_put_freelist( return error; /* Put the block on the AGFL. */ - error = xfs_alloc_put_freelist(sc->tp, sc->sa.agf_bp, sc->sa.agfl_bp, - agbno, 0); + error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp, + sc->sa.agfl_bp, agbno, 0); if (error) return error; xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1, @@ -546,6 +543,7 @@ xrep_reap_block( agno = XFS_FSB_TO_AGNO(sc->mp, fsbno); agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); + ASSERT(agno == sc->sa.pag->pag_agno); /* * If we are repairing per-inode metadata, we need to read in the AGF @@ -553,7 +551,7 @@ xrep_reap_block( * the AGF buffer that the setup functions already grabbed. */ if (sc->ip) { - error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf_bp); + error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &agf_bp); if (error) return error; } else { diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c index 8dae0345c7df..229826b2e1c0 100644 --- a/fs/xfs/scrub/rmap.c +++ b/fs/xfs/scrub/rmap.c @@ -92,7 +92,7 @@ xchk_rmapbt_rec( { struct xfs_mount *mp = bs->cur->bc_mp; struct xfs_rmap_irec irec; - xfs_agnumber_t agno = bs->cur->bc_ag.pag->pag_agno; + struct xfs_perag *pag = bs->cur->bc_ag.pag; bool non_inode; bool is_unwritten; bool is_bmbt; @@ -121,8 +121,8 @@ xchk_rmapbt_rec( * Otherwise we must point somewhere past the static metadata * but before the end of the FS. Run the regular check. */ - if (!xfs_verify_agbno(mp, agno, irec.rm_startblock) || - !xfs_verify_agbno(mp, agno, irec.rm_startblock + + if (!xfs_verify_agbno(pag, irec.rm_startblock) || + !xfs_verify_agbno(pag, irec.rm_startblock + irec.rm_blockcount - 1)) xchk_btree_set_corrupt(bs->sc, bs->cur, 0); } diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index c6fe3f6ebb6b..bfc829c07f03 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -45,7 +45,7 @@ xfs_trim_extents( */ xfs_log_force(mp, XFS_LOG_SYNC); - error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp); + error = xfs_alloc_read_agf(pag, NULL, 0, &agbp); if (error) goto out_put_perag; agf = agbp->b_addr; diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 765be054dffe..0d0a0b37d8c5 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -11,6 +11,7 @@ #include "xfs_bit.h" #include "xfs_shared.h" #include "xfs_mount.h" +#include "xfs_ag.h" #include "xfs_defer.h" #include "xfs_trans.h" #include "xfs_trans_priv.h" @@ -551,6 +552,7 @@ xfs_agfl_free_finish_item( xfs_agnumber_t agno; xfs_agblock_t agbno; uint next_extent; + struct xfs_perag *pag; free = container_of(item, struct xfs_extent_free_item, xefi_list); ASSERT(free->xefi_blockcount == 1); @@ -560,9 +562,11 @@ xfs_agfl_free_finish_item( trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); - error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + pag = xfs_perag_get(mp, agno); + error = xfs_alloc_read_agf(pag, tp, 0, &agbp); if (!error) error = xfs_free_agfl_block(tp, agno, agbno, agbp, &oinfo); + xfs_perag_put(pag); /* * Mark the transaction dirty, even on error. This ensures the diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index be9bcf8a1f99..34b21a29c39b 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -126,7 +126,7 @@ xfs_filestream_pick_ag( pag = xfs_perag_get(mp, ag); if (!pag->pagf_init) { - err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); + err = xfs_alloc_read_agf(pag, NULL, trylock, NULL); if (err) { if (err != -EAGAIN) { xfs_perag_put(pag); @@ -181,7 +181,7 @@ next_ag: if (ag != startag) continue; - /* Allow sleeping in xfs_alloc_pagf_init() on the 2nd pass. */ + /* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */ if (trylock != 0) { trylock = 0; continue; diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index bb23199f65c3..d8337274c74d 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -642,8 +642,7 @@ __xfs_getfsmap_datadev( info->agf_bp = NULL; } - error = xfs_alloc_read_agf(mp, tp, pag->pag_agno, 0, - &info->agf_bp); + error = xfs_alloc_read_agf(pag, tp, 0, &info->agf_bp); if (error) break; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index d4a77c53f94b..5fe9af24dfcd 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -41,6 +41,7 @@ xfs_resizefs_init_new_ags( xfs_agnumber_t oagcount, xfs_agnumber_t nagcount, xfs_rfsblock_t delta, + struct xfs_perag *last_pag, bool *lastag_extended) { struct xfs_mount *mp = tp->t_mountp; @@ -73,7 +74,7 @@ xfs_resizefs_init_new_ags( if (delta) { *lastag_extended = true; - error = xfs_ag_extend_space(mp, tp, id, delta); + error = xfs_ag_extend_space(last_pag, tp, delta); } return error; } @@ -96,6 +97,7 @@ xfs_growfs_data_private( xfs_agnumber_t oagcount; struct xfs_trans *tp; struct aghdr_init_data id = {}; + struct xfs_perag *last_pag; nb = in->newblocks; error = xfs_sb_validate_fsb_count(&mp->m_sb, nb); @@ -128,10 +130,9 @@ xfs_growfs_data_private( return -EINVAL; oagcount = mp->m_sb.sb_agcount; - /* allocate the new per-ag structures */ if (nagcount > oagcount) { - error = xfs_initialize_perag(mp, nagcount, &nagimax); + error = xfs_initialize_perag(mp, nagcount, nb, &nagimax); if (error) return error; } else if (nagcount < oagcount) { @@ -145,15 +146,17 @@ xfs_growfs_data_private( if (error) return error; + last_pag = xfs_perag_get(mp, oagcount - 1); if (delta > 0) { error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, - delta, &lastag_extended); + delta, last_pag, &lastag_extended); } else { xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SHRINK, "EXPERIMENTAL online shrink feature in use. Use at your own risk!"); - error = xfs_ag_shrink_space(mp, &tp, nagcount - 1, -delta); + error = xfs_ag_shrink_space(last_pag, &tp, -delta); } + xfs_perag_put(last_pag); if (error) goto out_trans_cancel; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 2609825d53ee..aef4097ffd3e 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -111,6 +111,8 @@ xfs_inode_alloc( INIT_WORK(&ip->i_ioend_work, xfs_end_io); INIT_LIST_HEAD(&ip->i_ioend_list); spin_lock_init(&ip->i_ioend_lock); + ip->i_next_unlinked = NULLAGINO; + ip->i_prev_unlinked = NULLAGINO; return ip; } @@ -912,6 +914,7 @@ reclaim: ip->i_checked = 0; spin_unlock(&ip->i_flags_lock); + ASSERT(!ip->i_itemp || ip->i_itemp->ili_item.li_buf == NULL); xfs_iunlock(ip, XFS_ILOCK_EXCL); XFS_STATS_INC(ip->i_mount, xs_ig_reclaims); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3e1c62ffa4f7..622953791ddd 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -20,6 +20,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_inode_item.h" +#include "xfs_iunlink_item.h" #include "xfs_ialloc.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" @@ -1801,195 +1802,69 @@ out: * because we must walk that list to find the inode that points to the inode * being removed from the unlinked hash bucket list. * - * What if we modelled the unlinked list as a collection of records capturing - * "X.next_unlinked = Y" relations? If we indexed those records on Y, we'd - * have a fast way to look up unlinked list predecessors, which avoids the - * slow list walk. That's exactly what we do here (in-core) with a per-AG - * rhashtable. + * Hence we keep an in-memory double linked list to link each inode on an + * unlinked list. Because there are 64 unlinked lists per AGI, keeping pointer + * based lists would require having 64 list heads in the perag, one for each + * list. This is expensive in terms of memory (think millions of AGs) and cache + * misses on lookups. Instead, use the fact that inodes on the unlinked list + * must be referenced at the VFS level to keep them on the list and hence we + * have an existence guarantee for inodes on the unlinked list. * - * Because this is a backref cache, we ignore operational failures since the - * iunlink code can fall back to the slow bucket walk. The only errors that - * should bubble out are for obviously incorrect situations. - * - * All users of the backref cache MUST hold the AGI buffer lock to serialize - * access or have otherwise provided for concurrency control. + * Given we have an existence guarantee, we can use lockless inode cache lookups + * to resolve aginos to xfs inodes. This means we only need 8 bytes per inode + * for the double linked unlinked list, and we don't need any extra locking to + * keep the list safe as all manipulations are done under the AGI buffer lock. + * Keeping the list up to date does not require memory allocation, just finding + * the XFS inode and updating the next/prev unlinked list aginos. */ -/* Capture a "X.next_unlinked = Y" relationship. */ -struct xfs_iunlink { - struct rhash_head iu_rhash_head; - xfs_agino_t iu_agino; /* X */ - xfs_agino_t iu_next_unlinked; /* Y */ -}; - -/* Unlinked list predecessor lookup hashtable construction */ -static int -xfs_iunlink_obj_cmpfn( - struct rhashtable_compare_arg *arg, - const void *obj) -{ - const xfs_agino_t *key = arg->key; - const struct xfs_iunlink *iu = obj; - - if (iu->iu_next_unlinked != *key) - return 1; - return 0; -} - -static const struct rhashtable_params xfs_iunlink_hash_params = { - .min_size = XFS_AGI_UNLINKED_BUCKETS, - .key_len = sizeof(xfs_agino_t), - .key_offset = offsetof(struct xfs_iunlink, - iu_next_unlinked), - .head_offset = offsetof(struct xfs_iunlink, iu_rhash_head), - .automatic_shrinking = true, - .obj_cmpfn = xfs_iunlink_obj_cmpfn, -}; - /* - * Return X, where X.next_unlinked == @agino. Returns NULLAGINO if no such - * relation is found. + * Find an inode on the unlinked list. This does not take references to the + * inode as we have existence guarantees by holding the AGI buffer lock and that + * only unlinked, referenced inodes can be on the unlinked inode list. If we + * don't find the inode in cache, then let the caller handle the situation. */ -static xfs_agino_t -xfs_iunlink_lookup_backref( +static struct xfs_inode * +xfs_iunlink_lookup( struct xfs_perag *pag, xfs_agino_t agino) { - struct xfs_iunlink *iu; - - iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino, - xfs_iunlink_hash_params); - return iu ? iu->iu_agino : NULLAGINO; -} + struct xfs_inode *ip; -/* - * Take ownership of an iunlink cache entry and insert it into the hash table. - * If successful, the entry will be owned by the cache; if not, it is freed. - * Either way, the caller does not own @iu after this call. - */ -static int -xfs_iunlink_insert_backref( - struct xfs_perag *pag, - struct xfs_iunlink *iu) -{ - int error; + rcu_read_lock(); + ip = radix_tree_lookup(&pag->pag_ici_root, agino); - error = rhashtable_insert_fast(&pag->pagi_unlinked_hash, - &iu->iu_rhash_head, xfs_iunlink_hash_params); /* - * Fail loudly if there already was an entry because that's a sign of - * corruption of in-memory data. Also fail loudly if we see an error - * code we didn't anticipate from the rhashtable code. Currently we - * only anticipate ENOMEM. + * Inode not in memory or in RCU freeing limbo should not happen. + * Warn about this and let the caller handle the failure. */ - if (error) { - WARN(error != -ENOMEM, "iunlink cache insert error %d", error); - kmem_free(iu); + if (WARN_ON_ONCE(!ip || !ip->i_ino)) { + rcu_read_unlock(); + return NULL; } - /* - * Absorb any runtime errors that aren't a result of corruption because - * this is a cache and we can always fall back to bucket list scanning. - */ - if (error != 0 && error != -EEXIST) - error = 0; - return error; + ASSERT(!xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM)); + rcu_read_unlock(); + return ip; } -/* Remember that @prev_agino.next_unlinked = @this_agino. */ +/* Update the prev pointer of the next agino. */ static int -xfs_iunlink_add_backref( +xfs_iunlink_update_backref( struct xfs_perag *pag, xfs_agino_t prev_agino, - xfs_agino_t this_agino) -{ - struct xfs_iunlink *iu; - - if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK)) - return 0; - - iu = kmem_zalloc(sizeof(*iu), KM_NOFS); - iu->iu_agino = prev_agino; - iu->iu_next_unlinked = this_agino; - - return xfs_iunlink_insert_backref(pag, iu); -} - -/* - * Replace X.next_unlinked = @agino with X.next_unlinked = @next_unlinked. - * If @next_unlinked is NULLAGINO, we drop the backref and exit. If there - * wasn't any such entry then we don't bother. - */ -static int -xfs_iunlink_change_backref( - struct xfs_perag *pag, - xfs_agino_t agino, - xfs_agino_t next_unlinked) + xfs_agino_t next_agino) { - struct xfs_iunlink *iu; - int error; - - /* Look up the old entry; if there wasn't one then exit. */ - iu = rhashtable_lookup_fast(&pag->pagi_unlinked_hash, &agino, - xfs_iunlink_hash_params); - if (!iu) - return 0; - - /* - * Remove the entry. This shouldn't ever return an error, but if we - * couldn't remove the old entry we don't want to add it again to the - * hash table, and if the entry disappeared on us then someone's - * violated the locking rules and we need to fail loudly. Either way - * we cannot remove the inode because internal state is or would have - * been corrupt. - */ - error = rhashtable_remove_fast(&pag->pagi_unlinked_hash, - &iu->iu_rhash_head, xfs_iunlink_hash_params); - if (error) - return error; + struct xfs_inode *ip; - /* If there is no new next entry just free our item and return. */ - if (next_unlinked == NULLAGINO) { - kmem_free(iu); + /* No update necessary if we are at the end of the list. */ + if (next_agino == NULLAGINO) return 0; - } - - /* Update the entry and re-add it to the hash table. */ - iu->iu_next_unlinked = next_unlinked; - return xfs_iunlink_insert_backref(pag, iu); -} - -/* Set up the in-core predecessor structures. */ -int -xfs_iunlink_init( - struct xfs_perag *pag) -{ - return rhashtable_init(&pag->pagi_unlinked_hash, - &xfs_iunlink_hash_params); -} - -/* Free the in-core predecessor structures. */ -static void -xfs_iunlink_free_item( - void *ptr, - void *arg) -{ - struct xfs_iunlink *iu = ptr; - bool *freed_anything = arg; - *freed_anything = true; - kmem_free(iu); -} - -void -xfs_iunlink_destroy( - struct xfs_perag *pag) -{ - bool freed_anything = false; - - rhashtable_free_and_destroy(&pag->pagi_unlinked_hash, - xfs_iunlink_free_item, &freed_anything); - - ASSERT(freed_anything == false || xfs_is_shutdown(pag->pag_mount)); + ip = xfs_iunlink_lookup(pag, next_agino); + if (!ip) + return -EFSCORRUPTED; + ip->i_prev_unlinked = prev_agino; + return 0; } /* @@ -2008,7 +1883,7 @@ xfs_iunlink_update_bucket( xfs_agino_t old_value; int offset; - ASSERT(xfs_verify_agino_or_null(tp->t_mountp, pag->pag_agno, new_agino)); + ASSERT(xfs_verify_agino_or_null(pag, new_agino)); old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]); trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index, @@ -2031,88 +1906,53 @@ xfs_iunlink_update_bucket( return 0; } -/* Set an on-disk inode's next_unlinked pointer. */ -STATIC void -xfs_iunlink_update_dinode( - struct xfs_trans *tp, - struct xfs_perag *pag, - xfs_agino_t agino, - struct xfs_buf *ibp, - struct xfs_dinode *dip, - struct xfs_imap *imap, - xfs_agino_t next_agino) -{ - struct xfs_mount *mp = tp->t_mountp; - int offset; - - ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino)); - - trace_xfs_iunlink_update_dinode(mp, pag->pag_agno, agino, - be32_to_cpu(dip->di_next_unlinked), next_agino); - - dip->di_next_unlinked = cpu_to_be32(next_agino); - offset = imap->im_boffset + - offsetof(struct xfs_dinode, di_next_unlinked); - - /* need to recalc the inode CRC if appropriate */ - xfs_dinode_calc_crc(mp, dip); - xfs_trans_inode_buf(tp, ibp); - xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); -} - -/* Set an in-core inode's unlinked pointer and return the old value. */ -STATIC int -xfs_iunlink_update_inode( +static int +xfs_iunlink_insert_inode( struct xfs_trans *tp, - struct xfs_inode *ip, struct xfs_perag *pag, - xfs_agino_t next_agino, - xfs_agino_t *old_next_agino) + struct xfs_buf *agibp, + struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_dinode *dip; - struct xfs_buf *ibp; - xfs_agino_t old_value; + struct xfs_agi *agi = agibp->b_addr; + xfs_agino_t next_agino; + xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); + short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; int error; - ASSERT(xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino)); - - error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp); - if (error) - return error; - dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset); - - /* Make sure the old pointer isn't garbage. */ - old_value = be32_to_cpu(dip->di_next_unlinked); - if (!xfs_verify_agino_or_null(mp, pag->pag_agno, old_value)) { - xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, - sizeof(*dip), __this_address); - error = -EFSCORRUPTED; - goto out; + /* + * Get the index into the agi hash table for the list this inode will + * go on. Make sure the pointer isn't garbage and that this inode + * isn't already on the list. + */ + next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); + if (next_agino == agino || + !xfs_verify_agino_or_null(pag, next_agino)) { + xfs_buf_mark_corrupt(agibp); + return -EFSCORRUPTED; } /* - * Since we're updating a linked list, we should never find that the - * current pointer is the same as the new value, unless we're - * terminating the list. + * Update the prev pointer in the next inode to point back to this + * inode. */ - *old_next_agino = old_value; - if (old_value == next_agino) { - if (next_agino != NULLAGINO) { - xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, - dip, sizeof(*dip), __this_address); - error = -EFSCORRUPTED; - } - goto out; + error = xfs_iunlink_update_backref(pag, agino, next_agino); + if (error) + return error; + + if (next_agino != NULLAGINO) { + /* + * There is already another inode in the bucket, so point this + * inode to the current head of the list. + */ + error = xfs_iunlink_log_inode(tp, ip, pag, next_agino); + if (error) + return error; + ip->i_next_unlinked = next_agino; } - /* Ok, update the new pointer. */ - xfs_iunlink_update_dinode(tp, pag, XFS_INO_TO_AGINO(mp, ip->i_ino), - ibp, dip, &ip->i_imap, next_agino); - return 0; -out: - xfs_trans_brelse(tp, ibp); - return error; + /* Point the head of the list to point to this inode. */ + return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); } /* @@ -2129,11 +1969,7 @@ xfs_iunlink( { struct xfs_mount *mp = tp->t_mountp; struct xfs_perag *pag; - struct xfs_agi *agi; struct xfs_buf *agibp; - xfs_agino_t next_agino; - xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); - short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; int error; ASSERT(VFS_I(ip)->i_nlink == 0); @@ -2143,202 +1979,38 @@ xfs_iunlink( pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp); + error = xfs_read_agi(pag, tp, &agibp); if (error) goto out; - agi = agibp->b_addr; - - /* - * Get the index into the agi hash table for the list this inode will - * go on. Make sure the pointer isn't garbage and that this inode - * isn't already on the list. - */ - next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); - if (next_agino == agino || - !xfs_verify_agino_or_null(mp, pag->pag_agno, next_agino)) { - xfs_buf_mark_corrupt(agibp); - error = -EFSCORRUPTED; - goto out; - } - - if (next_agino != NULLAGINO) { - xfs_agino_t old_agino; - - /* - * There is already another inode in the bucket, so point this - * inode to the current head of the list. - */ - error = xfs_iunlink_update_inode(tp, ip, pag, next_agino, - &old_agino); - if (error) - goto out; - ASSERT(old_agino == NULLAGINO); - - /* - * agino has been unlinked, add a backref from the next inode - * back to agino. - */ - error = xfs_iunlink_add_backref(pag, agino, next_agino); - if (error) - goto out; - } - /* Point the head of the list to point to this inode. */ - error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, agino); + error = xfs_iunlink_insert_inode(tp, pag, agibp, ip); out: xfs_perag_put(pag); return error; } -/* Return the imap, dinode pointer, and buffer for an inode. */ -STATIC int -xfs_iunlink_map_ino( - struct xfs_trans *tp, - xfs_agnumber_t agno, - xfs_agino_t agino, - struct xfs_imap *imap, - struct xfs_dinode **dipp, - struct xfs_buf **bpp) -{ - struct xfs_mount *mp = tp->t_mountp; - int error; - - imap->im_blkno = 0; - error = xfs_imap(mp, tp, XFS_AGINO_TO_INO(mp, agno, agino), imap, 0); - if (error) { - xfs_warn(mp, "%s: xfs_imap returned error %d.", - __func__, error); - return error; - } - - error = xfs_imap_to_bp(mp, tp, imap, bpp); - if (error) { - xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.", - __func__, error); - return error; - } - - *dipp = xfs_buf_offset(*bpp, imap->im_boffset); - return 0; -} - -/* - * Walk the unlinked chain from @head_agino until we find the inode that - * points to @target_agino. Return the inode number, map, dinode pointer, - * and inode cluster buffer of that inode as @agino, @imap, @dipp, and @bpp. - * - * @tp, @pag, @head_agino, and @target_agino are input parameters. - * @agino, @imap, @dipp, and @bpp are all output parameters. - * - * Do not call this function if @target_agino is the head of the list. - */ -STATIC int -xfs_iunlink_map_prev( - struct xfs_trans *tp, - struct xfs_perag *pag, - xfs_agino_t head_agino, - xfs_agino_t target_agino, - xfs_agino_t *agino, - struct xfs_imap *imap, - struct xfs_dinode **dipp, - struct xfs_buf **bpp) -{ - struct xfs_mount *mp = tp->t_mountp; - xfs_agino_t next_agino; - int error; - - ASSERT(head_agino != target_agino); - *bpp = NULL; - - /* See if our backref cache can find it faster. */ - *agino = xfs_iunlink_lookup_backref(pag, target_agino); - if (*agino != NULLAGINO) { - error = xfs_iunlink_map_ino(tp, pag->pag_agno, *agino, imap, - dipp, bpp); - if (error) - return error; - - if (be32_to_cpu((*dipp)->di_next_unlinked) == target_agino) - return 0; - - /* - * If we get here the cache contents were corrupt, so drop the - * buffer and fall back to walking the bucket list. - */ - xfs_trans_brelse(tp, *bpp); - *bpp = NULL; - WARN_ON_ONCE(1); - } - - trace_xfs_iunlink_map_prev_fallback(mp, pag->pag_agno); - - /* Otherwise, walk the entire bucket until we find it. */ - next_agino = head_agino; - while (next_agino != target_agino) { - xfs_agino_t unlinked_agino; - - if (*bpp) - xfs_trans_brelse(tp, *bpp); - - *agino = next_agino; - error = xfs_iunlink_map_ino(tp, pag->pag_agno, next_agino, imap, - dipp, bpp); - if (error) - return error; - - unlinked_agino = be32_to_cpu((*dipp)->di_next_unlinked); - /* - * Make sure this pointer is valid and isn't an obvious - * infinite loop. - */ - if (!xfs_verify_agino(mp, pag->pag_agno, unlinked_agino) || - next_agino == unlinked_agino) { - XFS_CORRUPTION_ERROR(__func__, - XFS_ERRLEVEL_LOW, mp, - *dipp, sizeof(**dipp)); - error = -EFSCORRUPTED; - return error; - } - next_agino = unlinked_agino; - } - - return 0; -} - -/* - * Pull the on-disk inode from the AGI unlinked list. - */ -STATIC int -xfs_iunlink_remove( +static int +xfs_iunlink_remove_inode( struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_buf *agibp, struct xfs_inode *ip) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_agi *agi; - struct xfs_buf *agibp; - struct xfs_buf *last_ibp; - struct xfs_dinode *last_dip = NULL; + struct xfs_agi *agi = agibp->b_addr; xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino); - xfs_agino_t next_agino; xfs_agino_t head_agino; short bucket_index = agino % XFS_AGI_UNLINKED_BUCKETS; int error; trace_xfs_iunlink_remove(ip); - /* Get the agi buffer first. It ensures lock ordering on the list. */ - error = xfs_read_agi(mp, tp, pag->pag_agno, &agibp); - if (error) - return error; - agi = agibp->b_addr; - /* * Get the index into the agi hash table for the list this inode will * go on. Make sure the head pointer isn't garbage. */ head_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]); - if (!xfs_verify_agino(mp, pag->pag_agno, head_agino)) { + if (!xfs_verify_agino(pag, head_agino)) { XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, agi, sizeof(*agi)); return -EFSCORRUPTED; @@ -2349,52 +2021,60 @@ xfs_iunlink_remove( * the old pointer value so that we can update whatever was previous * to us in the list to point to whatever was next in the list. */ - error = xfs_iunlink_update_inode(tp, ip, pag, NULLAGINO, &next_agino); + error = xfs_iunlink_log_inode(tp, ip, pag, NULLAGINO); if (error) return error; /* - * If there was a backref pointing from the next inode back to this - * one, remove it because we've removed this inode from the list. - * - * Later, if this inode was in the middle of the list we'll update - * this inode's backref to point from the next inode. + * Update the prev pointer in the next inode to point back to previous + * inode in the chain. */ - if (next_agino != NULLAGINO) { - error = xfs_iunlink_change_backref(pag, next_agino, NULLAGINO); - if (error) - return error; - } + error = xfs_iunlink_update_backref(pag, ip->i_prev_unlinked, + ip->i_next_unlinked); + if (error) + return error; if (head_agino != agino) { - struct xfs_imap imap; - xfs_agino_t prev_agino; + struct xfs_inode *prev_ip; - /* We need to search the list for the inode being freed. */ - error = xfs_iunlink_map_prev(tp, pag, head_agino, agino, - &prev_agino, &imap, &last_dip, &last_ibp); - if (error) - return error; + prev_ip = xfs_iunlink_lookup(pag, ip->i_prev_unlinked); + if (!prev_ip) + return -EFSCORRUPTED; - /* Point the previous inode on the list to the next inode. */ - xfs_iunlink_update_dinode(tp, pag, prev_agino, last_ibp, - last_dip, &imap, next_agino); - - /* - * Now we deal with the backref for this inode. If this inode - * pointed at a real inode, change the backref that pointed to - * us to point to our old next. If this inode was the end of - * the list, delete the backref that pointed to us. Note that - * change_backref takes care of deleting the backref if - * next_agino is NULLAGINO. - */ - return xfs_iunlink_change_backref(agibp->b_pag, agino, - next_agino); + error = xfs_iunlink_log_inode(tp, prev_ip, pag, + ip->i_next_unlinked); + prev_ip->i_next_unlinked = ip->i_next_unlinked; + } else { + /* Point the head of the list to the next unlinked inode. */ + error = xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, + ip->i_next_unlinked); } - /* Point the head of the list to the next unlinked inode. */ - return xfs_iunlink_update_bucket(tp, pag, agibp, bucket_index, - next_agino); + ip->i_next_unlinked = NULLAGINO; + ip->i_prev_unlinked = NULLAGINO; + return error; +} + +/* + * Pull the on-disk inode from the AGI unlinked list. + */ +STATIC int +xfs_iunlink_remove( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_inode *ip) +{ + struct xfs_buf *agibp; + int error; + + trace_xfs_iunlink_remove(ip); + + /* Get the agi buffer first. It ensures lock ordering on the list. */ + error = xfs_read_agi(pag, tp, &agibp); + if (error) + return error; + + return xfs_iunlink_remove_inode(tp, pag, agibp, ip); } /* @@ -3032,10 +2712,12 @@ out_trans_abort: static int xfs_rename_alloc_whiteout( struct user_namespace *mnt_userns, + struct xfs_name *src_name, struct xfs_inode *dp, struct xfs_inode **wip) { struct xfs_inode *tmpfile; + struct qstr name; int error; error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR | WHITEOUT_MODE, @@ -3043,6 +2725,15 @@ xfs_rename_alloc_whiteout( if (error) return error; + name.name = src_name->name; + name.len = src_name->len; + error = xfs_inode_init_security(VFS_I(tmpfile), VFS_I(dp), &name); + if (error) { + xfs_finish_inode_setup(tmpfile); + xfs_irele(tmpfile); + return error; + } + /* * Prepare the tmpfile inode as if it were created through the VFS. * Complete the inode setup and flag it as linkable. nlink is already @@ -3093,7 +2784,8 @@ xfs_rename( * appropriately. */ if (flags & RENAME_WHITEOUT) { - error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip); + error = xfs_rename_alloc_whiteout(mnt_userns, src_name, + target_dp, &wip); if (error) return error; @@ -3229,11 +2921,13 @@ retry: if (inodes[i] == wip || (inodes[i] == target_ip && (VFS_I(target_ip)->i_nlink == 1 || src_is_directory))) { - struct xfs_buf *bp; - xfs_agnumber_t agno; + struct xfs_perag *pag; + struct xfs_buf *bp; - agno = XFS_INO_TO_AGNO(mp, inodes[i]->i_ino); - error = xfs_read_agi(mp, tp, agno, &bp); + pag = xfs_perag_get(mp, + XFS_INO_TO_AGNO(mp, inodes[i]->i_ino)); + error = xfs_read_agi(pag, tp, &bp); + xfs_perag_put(pag); if (error) goto out_trans_cancel; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 7be6f8e705ab..8d8cce61e5ba 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -68,6 +68,10 @@ typedef struct xfs_inode { uint64_t i_diflags2; /* XFS_DIFLAG2_... */ struct timespec64 i_crtime; /* time created */ + /* unlinked list pointers */ + xfs_agino_t i_next_unlinked; + xfs_agino_t i_prev_unlinked; + /* VFS inode */ struct inode i_vnode; /* embedded VFS inode */ @@ -505,9 +509,6 @@ extern struct kmem_cache *xfs_inode_cache; bool xfs_inode_needs_inactive(struct xfs_inode *ip); -int xfs_iunlink_init(struct xfs_perag *pag); -void xfs_iunlink_destroy(struct xfs_perag *pag); - void xfs_end_io(struct work_struct *work); int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 0d67ff8a8961..f21581cd8a70 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -955,6 +955,7 @@ xfs_ioc_ag_geometry( struct xfs_mount *mp, void __user *arg) { + struct xfs_perag *pag; struct xfs_ag_geometry ageo; int error; @@ -965,7 +966,12 @@ xfs_ioc_ag_geometry( if (memchr_inv(&ageo.ag_reserved, 0, sizeof(ageo.ag_reserved))) return -EINVAL; - error = xfs_ag_get_geometry(mp, ageo.ag_number, &ageo); + pag = xfs_perag_get(mp, ageo.ag_number); + if (!pag) + return -EINVAL; + + error = xfs_ag_get_geometry(pag, &ageo); + xfs_perag_put(pag); if (error) return error; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 29f5b8b8aca6..6720b60f88cf 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -75,9 +75,8 @@ xfs_initxattrs( * these attrs can be journalled at inode creation time (along with the * inode, of course, such that log replay can't cause these to be lost). */ - -STATIC int -xfs_init_security( +int +xfs_inode_init_security( struct inode *inode, struct inode *dir, const struct qstr *qstr) @@ -122,7 +121,7 @@ xfs_cleanup_inode( /* Oh, the horror. * If we can't add the ACL or we fail in - * xfs_init_security we must back out. + * xfs_inode_init_security we must back out. * ENOSPC can hit here, among other things. */ xfs_dentry_to_name(&teardown, dentry); @@ -208,7 +207,7 @@ xfs_generic_create( inode = VFS_I(ip); - error = xfs_init_security(inode, dir, &dentry->d_name); + error = xfs_inode_init_security(inode, dir, &dentry->d_name); if (unlikely(error)) goto out_cleanup_inode; @@ -424,7 +423,7 @@ xfs_vn_symlink( inode = VFS_I(cip); - error = xfs_init_security(inode, dir, &dentry->d_name); + error = xfs_inode_init_security(inode, dir, &dentry->d_name); if (unlikely(error)) goto out_cleanup_inode; diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index 278949056048..cb5fc68c9ea0 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -17,4 +17,7 @@ extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr); int xfs_vn_setattr_size(struct user_namespace *mnt_userns, struct dentry *dentry, struct iattr *vap); +int xfs_inode_init_security(struct inode *inode, struct inode *dir, + const struct qstr *qstr); + #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/xfs_iunlink_item.c b/fs/xfs/xfs_iunlink_item.c new file mode 100644 index 000000000000..43005ce8bd48 --- /dev/null +++ b/fs/xfs/xfs_iunlink_item.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020-2022, Red Hat, Inc. + * All Rights Reserved. + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_ag.h" +#include "xfs_iunlink_item.h" +#include "xfs_trace.h" +#include "xfs_error.h" + +struct kmem_cache *xfs_iunlink_cache; + +static inline struct xfs_iunlink_item *IUL_ITEM(struct xfs_log_item *lip) +{ + return container_of(lip, struct xfs_iunlink_item, item); +} + +static void +xfs_iunlink_item_release( + struct xfs_log_item *lip) +{ + struct xfs_iunlink_item *iup = IUL_ITEM(lip); + + xfs_perag_put(iup->pag); + kmem_cache_free(xfs_iunlink_cache, IUL_ITEM(lip)); +} + + +static uint64_t +xfs_iunlink_item_sort( + struct xfs_log_item *lip) +{ + return IUL_ITEM(lip)->ip->i_ino; +} + +/* + * Look up the inode cluster buffer and log the on-disk unlinked inode change + * we need to make. + */ +static int +xfs_iunlink_log_dinode( + struct xfs_trans *tp, + struct xfs_iunlink_item *iup) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *ip = iup->ip; + struct xfs_dinode *dip; + struct xfs_buf *ibp; + int offset; + int error; + + error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp); + if (error) + return error; + /* + * Don't log the unlinked field on stale buffers as this may be the + * transaction that frees the inode cluster and relogging the buffer + * here will incorrectly remove the stale state. + */ + if (ibp->b_flags & XBF_STALE) + goto out; + + dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset); + + /* Make sure the old pointer isn't garbage. */ + if (be32_to_cpu(dip->di_next_unlinked) != iup->old_agino) { + xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, + sizeof(*dip), __this_address); + error = -EFSCORRUPTED; + goto out; + } + + trace_xfs_iunlink_update_dinode(mp, iup->pag->pag_agno, + XFS_INO_TO_AGINO(mp, ip->i_ino), + be32_to_cpu(dip->di_next_unlinked), iup->next_agino); + + dip->di_next_unlinked = cpu_to_be32(iup->next_agino); + offset = ip->i_imap.im_boffset + + offsetof(struct xfs_dinode, di_next_unlinked); + + xfs_dinode_calc_crc(mp, dip); + xfs_trans_inode_buf(tp, ibp); + xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1); + return 0; +out: + xfs_trans_brelse(tp, ibp); + return error; +} + +/* + * On precommit, we grab the inode cluster buffer for the inode number we were + * passed, then update the next unlinked field for that inode in the buffer and + * log the buffer. This ensures that the inode cluster buffer was logged in the + * correct order w.r.t. other inode cluster buffers. We can then remove the + * iunlink item from the transaction and release it as it is has now served it's + * purpose. + */ +static int +xfs_iunlink_item_precommit( + struct xfs_trans *tp, + struct xfs_log_item *lip) +{ + struct xfs_iunlink_item *iup = IUL_ITEM(lip); + int error; + + error = xfs_iunlink_log_dinode(tp, iup); + list_del(&lip->li_trans); + xfs_iunlink_item_release(lip); + return error; +} + +static const struct xfs_item_ops xfs_iunlink_item_ops = { + .iop_release = xfs_iunlink_item_release, + .iop_sort = xfs_iunlink_item_sort, + .iop_precommit = xfs_iunlink_item_precommit, +}; + + +/* + * Initialize the inode log item for a newly allocated (in-core) inode. + * + * Inode extents can only reside within an AG. Hence specify the starting + * block for the inode chunk by offset within an AG as well as the + * length of the allocated extent. + * + * This joins the item to the transaction and marks it dirty so + * that we don't need a separate call to do this, nor does the + * caller need to know anything about the iunlink item. + */ +int +xfs_iunlink_log_inode( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_perag *pag, + xfs_agino_t next_agino) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_iunlink_item *iup; + + ASSERT(xfs_verify_agino_or_null(pag, next_agino)); + ASSERT(xfs_verify_agino_or_null(pag, ip->i_next_unlinked)); + + /* + * Since we're updating a linked list, we should never find that the + * current pointer is the same as the new value, unless we're + * terminating the list. + */ + if (ip->i_next_unlinked == next_agino) { + if (next_agino != NULLAGINO) + return -EFSCORRUPTED; + return 0; + } + + iup = kmem_cache_zalloc(xfs_iunlink_cache, GFP_KERNEL | __GFP_NOFAIL); + xfs_log_item_init(mp, &iup->item, XFS_LI_IUNLINK, + &xfs_iunlink_item_ops); + + iup->ip = ip; + iup->next_agino = next_agino; + iup->old_agino = ip->i_next_unlinked; + + atomic_inc(&pag->pag_ref); + iup->pag = pag; + + xfs_trans_add_item(tp, &iup->item); + tp->t_flags |= XFS_TRANS_DIRTY; + set_bit(XFS_LI_DIRTY, &iup->item.li_flags); + return 0; +} + diff --git a/fs/xfs/xfs_iunlink_item.h b/fs/xfs/xfs_iunlink_item.h new file mode 100644 index 000000000000..c793cdcaccde --- /dev/null +++ b/fs/xfs/xfs_iunlink_item.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2020-2022, Red Hat, Inc. + * All Rights Reserved. + */ +#ifndef XFS_IUNLINK_ITEM_H +#define XFS_IUNLINK_ITEM_H 1 + +struct xfs_trans; +struct xfs_inode; +struct xfs_perag; + +/* in memory log item structure */ +struct xfs_iunlink_item { + struct xfs_log_item item; + struct xfs_inode *ip; + struct xfs_perag *pag; + xfs_agino_t next_agino; + xfs_agino_t old_agino; +}; + +extern struct kmem_cache *xfs_iunlink_cache; + +int xfs_iunlink_log_inode(struct xfs_trans *tp, struct xfs_inode *ip, + struct xfs_perag *pag, xfs_agino_t next_agino); + +#endif /* XFS_IUNLINK_ITEM_H */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ae904b21e9cc..498cbb49392b 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -57,7 +57,8 @@ xlog_grant_push_ail( STATIC void xlog_sync( struct xlog *log, - struct xlog_in_core *iclog); + struct xlog_in_core *iclog, + struct xlog_ticket *ticket); #if defined(DEBUG) STATIC void xlog_verify_grant_tail( @@ -567,7 +568,8 @@ xlog_state_shutdown_callbacks( int xlog_state_release_iclog( struct xlog *log, - struct xlog_in_core *iclog) + struct xlog_in_core *iclog, + struct xlog_ticket *ticket) { xfs_lsn_t tail_lsn; bool last_ref; @@ -614,7 +616,7 @@ xlog_state_release_iclog( trace_xlog_iclog_syncing(iclog, _RET_IP_); spin_unlock(&log->l_icloglock); - xlog_sync(log, iclog); + xlog_sync(log, iclog, ticket); spin_lock(&log->l_icloglock); return 0; } @@ -881,7 +883,7 @@ xlog_force_iclog( iclog->ic_flags |= XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA; if (iclog->ic_state == XLOG_STATE_ACTIVE) xlog_state_switch_iclogs(iclog->ic_log, iclog, 0); - return xlog_state_release_iclog(iclog->ic_log, iclog); + return xlog_state_release_iclog(iclog->ic_log, iclog, NULL); } /* @@ -944,6 +946,8 @@ xlog_write_unmount_record( .lv_niovecs = 1, .lv_iovecp = ®, }; + LIST_HEAD(lv_chain); + list_add(&vec.lv_list, &lv_chain); BUILD_BUG_ON((sizeof(struct xlog_op_header) + sizeof(struct xfs_unmount_log_format)) != @@ -952,7 +956,7 @@ xlog_write_unmount_record( /* account for space used by record data */ ticket->t_curr_res -= sizeof(unmount_rec); - return xlog_write(log, NULL, &vec, ticket, reg.i_len); + return xlog_write(log, NULL, &lv_chain, ticket, reg.i_len); } /* @@ -2025,7 +2029,8 @@ xlog_calc_iclog_size( STATIC void xlog_sync( struct xlog *log, - struct xlog_in_core *iclog) + struct xlog_in_core *iclog, + struct xlog_ticket *ticket) { unsigned int count; /* byte count of bwrite */ unsigned int roundoff; /* roundoff to BB or stripe */ @@ -2037,12 +2042,20 @@ xlog_sync( count = xlog_calc_iclog_size(log, iclog, &roundoff); - /* move grant heads by roundoff in sync */ - xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); - xlog_grant_add_space(log, &log->l_write_head.grant, roundoff); + /* + * If we have a ticket, account for the roundoff via the ticket + * reservation to avoid touching the hot grant heads needlessly. + * Otherwise, we have to move grant heads directly. + */ + if (ticket) { + ticket->t_curr_res -= roundoff; + } else { + xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); + xlog_grant_add_space(log, &log->l_write_head.grant, roundoff); + } /* put cycle number in every block */ - xlog_pack_data(log, iclog, roundoff); + xlog_pack_data(log, iclog, roundoff); /* real byte length */ size = iclog->ic_offset; @@ -2275,7 +2288,7 @@ xlog_write_get_more_iclog_space( spin_lock(&log->l_icloglock); ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC); xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt); - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, ticket); spin_unlock(&log->l_icloglock); if (error) return error; @@ -2471,13 +2484,13 @@ int xlog_write( struct xlog *log, struct xfs_cil_ctx *ctx, - struct xfs_log_vec *log_vector, + struct list_head *lv_chain, struct xlog_ticket *ticket, uint32_t len) { struct xlog_in_core *iclog = NULL; - struct xfs_log_vec *lv = log_vector; + struct xfs_log_vec *lv; uint32_t record_cnt = 0; uint32_t data_cnt = 0; int error = 0; @@ -2505,7 +2518,7 @@ xlog_write( if (ctx) xlog_cil_set_ctx_write_state(ctx, iclog); - while (lv) { + list_for_each_entry(lv, lv_chain, lv_list) { /* * If the entire log vec does not fit in the iclog, punt it to * the partial copy loop which can handle this case. @@ -2526,7 +2539,6 @@ xlog_write( xlog_write_full(lv, ticket, iclog, &log_offset, &len, &record_cnt, &data_cnt); } - lv = lv->lv_next; } ASSERT(len == 0); @@ -2538,7 +2550,7 @@ xlog_write( */ spin_lock(&log->l_icloglock); xlog_state_finish_copy(log, iclog, record_cnt, 0); - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, ticket); spin_unlock(&log->l_icloglock); return error; @@ -2958,7 +2970,7 @@ restart: * reference to the iclog. */ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) - error = xlog_state_release_iclog(log, iclog); + error = xlog_state_release_iclog(log, iclog, ticket); spin_unlock(&log->l_icloglock); if (error) return error; @@ -3406,7 +3418,8 @@ xfs_log_ticket_get( static int xlog_calc_unit_res( struct xlog *log, - int unit_bytes) + int unit_bytes, + int *niclogs) { int iclog_space; uint num_headers; @@ -3486,6 +3499,8 @@ xlog_calc_unit_res( /* roundoff padding for transaction data and one for commit record */ unit_bytes += 2 * log->l_iclog_roundoff; + if (niclogs) + *niclogs = num_headers; return unit_bytes; } @@ -3494,7 +3509,7 @@ xfs_log_calc_unit_res( struct xfs_mount *mp, int unit_bytes) { - return xlog_calc_unit_res(mp->m_log, unit_bytes); + return xlog_calc_unit_res(mp->m_log, unit_bytes, NULL); } /* @@ -3512,7 +3527,7 @@ xlog_ticket_alloc( tic = kmem_cache_zalloc(xfs_log_ticket_cache, GFP_NOFS | __GFP_NOFAIL); - unit_res = xlog_calc_unit_res(log, unit_bytes); + unit_res = xlog_calc_unit_res(log, unit_bytes, &tic->t_iclog_hdrs); atomic_set(&tic->t_ref, 1); tic->t_task = current; diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index f3ce046a7d45..2728886c2963 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -9,7 +9,8 @@ struct xfs_cil_ctx; struct xfs_log_vec { - struct xfs_log_vec *lv_next; /* next lv in build list */ + struct list_head lv_list; /* CIL lv chain ptrs */ + uint32_t lv_order_id; /* chain ordering info */ int lv_niovecs; /* number of iovecs in lv */ struct xfs_log_iovec *lv_iovecp; /* iovec array */ struct xfs_log_item *lv_item; /* owner */ diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index db6cb7800251..eccbfb99e894 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -44,9 +44,20 @@ xlog_cil_ticket_alloc( * transaction overhead reservation from the first transaction commit. */ tic->t_curr_res = 0; + tic->t_iclog_hdrs = 0; return tic; } +static inline void +xlog_cil_set_iclog_hdr_count(struct xfs_cil *cil) +{ + struct xlog *log = cil->xc_log; + + atomic_set(&cil->xc_iclog_hdrs, + (XLOG_CIL_BLOCKING_SPACE_LIMIT(log) / + (log->l_iclog_size - log->l_iclog_hsize))); +} + /* * Check if the current log item was first committed in this sequence. * We can't rely on just the log item being in the CIL, we have to check @@ -61,7 +72,7 @@ xlog_item_in_current_chkpt( struct xfs_cil *cil, struct xfs_log_item *lip) { - if (list_empty(&lip->li_cil)) + if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) return false; /* @@ -93,15 +104,88 @@ xlog_cil_ctx_alloc(void) ctx = kmem_zalloc(sizeof(*ctx), KM_NOFS); INIT_LIST_HEAD(&ctx->committing); INIT_LIST_HEAD(&ctx->busy_extents); + INIT_LIST_HEAD(&ctx->log_items); + INIT_LIST_HEAD(&ctx->lv_chain); INIT_WORK(&ctx->push_work, xlog_cil_push_work); return ctx; } +/* + * Aggregate the CIL per cpu structures into global counts, lists, etc and + * clear the percpu state ready for the next context to use. This is called + * from the push code with the context lock held exclusively, hence nothing else + * will be accessing or modifying the per-cpu counters. + */ +static void +xlog_cil_push_pcp_aggregate( + struct xfs_cil *cil, + struct xfs_cil_ctx *ctx) +{ + struct xlog_cil_pcp *cilpcp; + int cpu; + + for_each_online_cpu(cpu) { + cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); + + ctx->ticket->t_curr_res += cilpcp->space_reserved; + cilpcp->space_reserved = 0; + + if (!list_empty(&cilpcp->busy_extents)) { + list_splice_init(&cilpcp->busy_extents, + &ctx->busy_extents); + } + if (!list_empty(&cilpcp->log_items)) + list_splice_init(&cilpcp->log_items, &ctx->log_items); + + /* + * We're in the middle of switching cil contexts. Reset the + * counter we use to detect when the current context is nearing + * full. + */ + cilpcp->space_used = 0; + } +} + +/* + * Aggregate the CIL per-cpu space used counters into the global atomic value. + * This is called when the per-cpu counter aggregation will first pass the soft + * limit threshold so we can switch to atomic counter aggregation for accurate + * detection of hard limit traversal. + */ +static void +xlog_cil_insert_pcp_aggregate( + struct xfs_cil *cil, + struct xfs_cil_ctx *ctx) +{ + struct xlog_cil_pcp *cilpcp; + int cpu; + int count = 0; + + /* Trigger atomic updates then aggregate only for the first caller */ + if (!test_and_clear_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags)) + return; + + for_each_online_cpu(cpu) { + int old, prev; + + cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); + do { + old = cilpcp->space_used; + prev = cmpxchg(&cilpcp->space_used, old, 0); + } while (old != prev); + count += old; + } + atomic_add(count, &ctx->space_used); +} + static void xlog_cil_ctx_switch( struct xfs_cil *cil, struct xfs_cil_ctx *ctx) { + xlog_cil_set_iclog_hdr_count(cil); + set_bit(XLOG_CIL_EMPTY, &cil->xc_flags); + set_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags); ctx->sequence = ++cil->xc_current_sequence; ctx->cil = cil; cil->xc_ctx = ctx; @@ -123,6 +207,7 @@ xlog_cil_init_post_recovery( { log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log); log->l_cilp->xc_ctx->sequence = 1; + xlog_cil_set_iclog_hdr_count(log->l_cilp); } static inline int @@ -254,6 +339,7 @@ xlog_cil_alloc_shadow_bufs( memset(lv, 0, xlog_cil_iovec_space(niovecs)); + INIT_LIST_HEAD(&lv->lv_list); lv->lv_item = lip; lv->lv_size = buf_size; if (ordered) @@ -269,7 +355,6 @@ xlog_cil_alloc_shadow_bufs( else lv->lv_buf_len = 0; lv->lv_bytes = 0; - lv->lv_next = NULL; } /* Ensure the lv is set up according to ->iop_size */ @@ -396,7 +481,6 @@ xlog_cil_insert_format_items( if (lip->li_lv && shadow->lv_size <= lip->li_lv->lv_size) { /* same or smaller, optimise common overwrite case */ lv = lip->li_lv; - lv->lv_next = NULL; if (ordered) goto insert; @@ -434,6 +518,23 @@ insert: } /* + * The use of lockless waitqueue_active() requires that the caller has + * serialised itself against the wakeup call in xlog_cil_push_work(). That + * can be done by either holding the push lock or the context lock. + */ +static inline bool +xlog_cil_over_hard_limit( + struct xlog *log, + int32_t space_used) +{ + if (waitqueue_active(&log->l_cilp->xc_push_wait)) + return true; + if (space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) + return true; + return false; +} + +/* * Insert the log items into the CIL and calculate the difference in space * consumed by the item. Add the space to the checkpoint ticket and calculate * if the change requires additional log metadata. If it does, take that space @@ -450,8 +551,10 @@ xlog_cil_insert_items( struct xfs_cil_ctx *ctx = cil->xc_ctx; struct xfs_log_item *lip; int len = 0; - int iclog_space; int iovhdr_res = 0, split_res = 0, ctx_res = 0; + int space_used; + int order; + struct xlog_cil_pcp *cilpcp; ASSERT(tp); @@ -461,93 +564,135 @@ xlog_cil_insert_items( */ xlog_cil_insert_format_items(log, tp, &len); - spin_lock(&cil->xc_cil_lock); + /* + * Subtract the space released by intent cancelation from the space we + * consumed so that we remove it from the CIL space and add it back to + * the current transaction reservation context. + */ + len -= released_space; - /* attach the transaction to the CIL if it has any busy extents */ - if (!list_empty(&tp->t_busy)) - list_splice_init(&tp->t_busy, &ctx->busy_extents); + /* + * Grab the per-cpu pointer for the CIL before we start any accounting. + * That ensures that we are running with pre-emption disabled and so we + * can't be scheduled away between split sample/update operations that + * are done without outside locking to serialise them. + */ + cilpcp = get_cpu_ptr(cil->xc_pcp); /* - * Now transfer enough transaction reservation to the context ticket - * for the checkpoint. The context ticket is special - the unit - * reservation has to grow as well as the current reservation as we - * steal from tickets so we can correctly determine the space used - * during the transaction commit. + * We need to take the CIL checkpoint unit reservation on the first + * commit into the CIL. Test the XLOG_CIL_EMPTY bit first so we don't + * unnecessarily do an atomic op in the fast path here. We can clear the + * XLOG_CIL_EMPTY bit as we are under the xc_ctx_lock here and that + * needs to be held exclusively to reset the XLOG_CIL_EMPTY bit. */ - if (ctx->ticket->t_curr_res == 0) { + if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags) && + test_and_clear_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) ctx_res = ctx->ticket->t_unit_res; - ctx->ticket->t_curr_res = ctx_res; - tp->t_ticket->t_curr_res -= ctx_res; - } - /* do we need space for more log record headers? */ - iclog_space = log->l_iclog_size - log->l_iclog_hsize; - if (len > 0 && (ctx->space_used / iclog_space != - (ctx->space_used + len) / iclog_space)) { - split_res = (len + iclog_space - 1) / iclog_space; - /* need to take into account split region headers, too */ - split_res *= log->l_iclog_hsize + sizeof(struct xlog_op_header); - ctx->ticket->t_unit_res += split_res; - ctx->ticket->t_curr_res += split_res; - tp->t_ticket->t_curr_res -= split_res; - ASSERT(tp->t_ticket->t_curr_res >= len); + /* + * Check if we need to steal iclog headers. atomic_read() is not a + * locked atomic operation, so we can check the value before we do any + * real atomic ops in the fast path. If we've already taken the CIL unit + * reservation from this commit, we've already got one iclog header + * space reserved so we have to account for that otherwise we risk + * overrunning the reservation on this ticket. + * + * If the CIL is already at the hard limit, we might need more header + * space that originally reserved. So steal more header space from every + * commit that occurs once we are over the hard limit to ensure the CIL + * push won't run out of reservation space. + * + * This can steal more than we need, but that's OK. + * + * The cil->xc_ctx_lock provides the serialisation necessary for safely + * calling xlog_cil_over_hard_limit() in this context. + */ + space_used = atomic_read(&ctx->space_used) + cilpcp->space_used + len; + if (atomic_read(&cil->xc_iclog_hdrs) > 0 || + xlog_cil_over_hard_limit(log, space_used)) { + split_res = log->l_iclog_hsize + + sizeof(struct xlog_op_header); + if (ctx_res) + ctx_res += split_res * (tp->t_ticket->t_iclog_hdrs - 1); + else + ctx_res = split_res * tp->t_ticket->t_iclog_hdrs; + atomic_sub(tp->t_ticket->t_iclog_hdrs, &cil->xc_iclog_hdrs); } - tp->t_ticket->t_curr_res -= len; - tp->t_ticket->t_curr_res += released_space; - ctx->space_used += len; - ctx->space_used -= released_space; + cilpcp->space_reserved += ctx_res; /* - * If we've overrun the reservation, dump the tx details before we move - * the log items. Shutdown is imminent... + * Accurately account when over the soft limit, otherwise fold the + * percpu count into the global count if over the per-cpu threshold. */ - if (WARN_ON(tp->t_ticket->t_curr_res < 0)) { - xfs_warn(log->l_mp, "Transaction log reservation overrun:"); - xfs_warn(log->l_mp, - " log items: %d bytes (iov hdrs: %d bytes)", - len, iovhdr_res); - xfs_warn(log->l_mp, " split region headers: %d bytes", - split_res); - xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res); - xlog_print_trans(tp); + if (!test_bit(XLOG_CIL_PCP_SPACE, &cil->xc_flags)) { + atomic_add(len, &ctx->space_used); + } else if (cilpcp->space_used + len > + (XLOG_CIL_SPACE_LIMIT(log) / num_online_cpus())) { + space_used = atomic_add_return(cilpcp->space_used + len, + &ctx->space_used); + cilpcp->space_used = 0; + + /* + * If we just transitioned over the soft limit, we need to + * transition to the global atomic counter. + */ + if (space_used >= XLOG_CIL_SPACE_LIMIT(log)) + xlog_cil_insert_pcp_aggregate(cil, ctx); + } else { + cilpcp->space_used += len; } + /* attach the transaction to the CIL if it has any busy extents */ + if (!list_empty(&tp->t_busy)) + list_splice_init(&tp->t_busy, &cilpcp->busy_extents); /* - * Now (re-)position everything modified at the tail of the CIL. + * Now update the order of everything modified in the transaction + * and insert items into the CIL if they aren't already there. * We do this here so we only need to take the CIL lock once during * the transaction commit. */ + order = atomic_inc_return(&ctx->order_id); list_for_each_entry(lip, &tp->t_items, li_trans) { - /* Skip items which aren't dirty in this transaction. */ if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) continue; - /* - * Only move the item if it isn't already at the tail. This is - * to prevent a transient list_empty() state when reinserting - * an item that is already the only item in the CIL. - */ - if (!list_is_last(&lip->li_cil, &cil->xc_cil)) - list_move_tail(&lip->li_cil, &cil->xc_cil); + lip->li_order_id = order; + if (!list_empty(&lip->li_cil)) + continue; + list_add_tail(&lip->li_cil, &cilpcp->log_items); } + put_cpu_ptr(cilpcp); - spin_unlock(&cil->xc_cil_lock); - - if (tp->t_ticket->t_curr_res < 0) + /* + * If we've overrun the reservation, dump the tx details before we move + * the log items. Shutdown is imminent... + */ + tp->t_ticket->t_curr_res -= ctx_res + len; + if (WARN_ON(tp->t_ticket->t_curr_res < 0)) { + xfs_warn(log->l_mp, "Transaction log reservation overrun:"); + xfs_warn(log->l_mp, + " log items: %d bytes (iov hdrs: %d bytes)", + len, iovhdr_res); + xfs_warn(log->l_mp, " split region headers: %d bytes", + split_res); + xfs_warn(log->l_mp, " ctx ticket: %d bytes", ctx_res); + xlog_print_trans(tp); xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); + } } static void xlog_cil_free_logvec( - struct xfs_log_vec *log_vector) + struct list_head *lv_chain) { struct xfs_log_vec *lv; - for (lv = log_vector; lv; ) { - struct xfs_log_vec *next = lv->lv_next; + while (!list_empty(lv_chain)) { + lv = list_first_entry(lv_chain, struct xfs_log_vec, lv_list); + list_del_init(&lv->lv_list); kmem_free(lv); - lv = next; } } @@ -647,7 +792,7 @@ xlog_cil_committed( spin_unlock(&ctx->cil->xc_push_lock); } - xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, + xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, &ctx->lv_chain, ctx->start_lsn, abort); xfs_extent_busy_sort(&ctx->busy_extents); @@ -658,7 +803,7 @@ xlog_cil_committed( list_del(&ctx->committing); spin_unlock(&ctx->cil->xc_push_lock); - xlog_cil_free_logvec(ctx->lv_chain); + xlog_cil_free_logvec(&ctx->lv_chain); if (!list_empty(&ctx->busy_extents)) xlog_discard_busy_extents(mp, ctx); @@ -817,7 +962,6 @@ restart: static int xlog_cil_write_chain( struct xfs_cil_ctx *ctx, - struct xfs_log_vec *chain, uint32_t chain_len) { struct xlog *log = ctx->cil->xc_log; @@ -826,7 +970,7 @@ xlog_cil_write_chain( error = xlog_cil_order_write(ctx->cil, ctx->sequence, _START_RECORD); if (error) return error; - return xlog_write(log, ctx, chain, ctx->ticket, chain_len); + return xlog_write(log, ctx, &ctx->lv_chain, ctx->ticket, chain_len); } /* @@ -855,6 +999,8 @@ xlog_cil_write_commit_record( .lv_iovecp = ®, }; int error; + LIST_HEAD(lv_chain); + list_add(&vec.lv_list, &lv_chain); if (xlog_is_shutdown(log)) return -EIO; @@ -865,7 +1011,7 @@ xlog_cil_write_commit_record( /* account for space used by record data */ ctx->ticket->t_curr_res -= reg.i_len; - error = xlog_write(log, ctx, &vec, ctx->ticket, reg.i_len); + error = xlog_write(log, ctx, &lv_chain, ctx->ticket, reg.i_len); if (error) xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); return error; @@ -931,12 +1077,31 @@ xlog_cil_build_trans_hdr( lvhdr->lv_niovecs = 2; lvhdr->lv_iovecp = &hdr->lhdr[0]; lvhdr->lv_bytes = hdr->lhdr[0].i_len + hdr->lhdr[1].i_len; - lvhdr->lv_next = ctx->lv_chain; tic->t_curr_res -= lvhdr->lv_bytes; } /* + * CIL item reordering compare function. We want to order in ascending ID order, + * but we want to leave items with the same ID in the order they were added to + * the list. This is important for operations like reflink where we log 4 order + * dependent intents in a single transaction when we overwrite an existing + * shared extent with a new shared extent. i.e. BUI(unmap), CUI(drop), + * CUI (inc), BUI(remap)... + */ +static int +xlog_cil_order_cmp( + void *priv, + const struct list_head *a, + const struct list_head *b) +{ + struct xfs_log_vec *l1 = container_of(a, struct xfs_log_vec, lv_list); + struct xfs_log_vec *l2 = container_of(b, struct xfs_log_vec, lv_list); + + return l1->lv_order_id > l2->lv_order_id; +} + +/* * Pull all the log vectors off the items in the CIL, and remove the items from * the CIL. We don't need the CIL lock here because it's only needed on the * transaction commit side which is currently locked out by the flush lock. @@ -947,18 +1112,16 @@ xlog_cil_build_trans_hdr( */ static void xlog_cil_build_lv_chain( - struct xfs_cil *cil, struct xfs_cil_ctx *ctx, struct list_head *whiteouts, uint32_t *num_iovecs, uint32_t *num_bytes) { - struct xfs_log_vec *lv = NULL; - - while (!list_empty(&cil->xc_cil)) { + while (!list_empty(&ctx->log_items)) { struct xfs_log_item *item; + struct xfs_log_vec *lv; - item = list_first_entry(&cil->xc_cil, + item = list_first_entry(&ctx->log_items, struct xfs_log_item, li_cil); if (test_bit(XFS_LI_WHITEOUT, &item->li_flags)) { @@ -967,18 +1130,18 @@ xlog_cil_build_lv_chain( continue; } - list_del_init(&item->li_cil); - if (!ctx->lv_chain) - ctx->lv_chain = item->li_lv; - else - lv->lv_next = item->li_lv; lv = item->li_lv; - item->li_lv = NULL; - *num_iovecs += lv->lv_niovecs; + lv->lv_order_id = item->li_order_id; /* we don't write ordered log vectors */ if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) *num_bytes += lv->lv_bytes; + *num_iovecs += lv->lv_niovecs; + list_add_tail(&lv->lv_list, &ctx->lv_chain); + + list_del_init(&item->li_cil); + item->li_order_id = 0; + item->li_lv = NULL; } } @@ -1022,10 +1185,11 @@ xlog_cil_push_work( int num_bytes = 0; int error = 0; struct xlog_cil_trans_hdr thdr; - struct xfs_log_vec lvhdr = { NULL }; + struct xfs_log_vec lvhdr = {}; xfs_csn_t push_seq; bool push_commit_stable; LIST_HEAD (whiteouts); + struct xlog_ticket *ticket; new_ctx = xlog_cil_ctx_alloc(); new_ctx->ticket = xlog_cil_ticket_alloc(log); @@ -1049,12 +1213,14 @@ xlog_cil_push_work( if (waitqueue_active(&cil->xc_push_wait)) wake_up_all(&cil->xc_push_wait); + xlog_cil_push_pcp_aggregate(cil, ctx); + /* * Check if we've anything to push. If there is nothing, then we don't * move on to a new sequence number and so we have to be able to push * this sequence again later. */ - if (list_empty(&cil->xc_cil)) { + if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) { cil->xc_push_seq = 0; spin_unlock(&cil->xc_push_lock); goto out_skip; @@ -1094,7 +1260,7 @@ xlog_cil_push_work( list_add(&ctx->committing, &cil->xc_committing); spin_unlock(&cil->xc_push_lock); - xlog_cil_build_lv_chain(cil, ctx, &whiteouts, &num_iovecs, &num_bytes); + xlog_cil_build_lv_chain(ctx, &whiteouts, &num_iovecs, &num_bytes); /* * Switch the contexts so we can drop the context lock and move out @@ -1127,14 +1293,30 @@ xlog_cil_push_work( up_write(&cil->xc_ctx_lock); /* + * Sort the log vector chain before we add the transaction headers. + * This ensures we always have the transaction headers at the start + * of the chain. + */ + list_sort(NULL, &ctx->lv_chain, xlog_cil_order_cmp); + + /* * Build a checkpoint transaction header and write it to the log to * begin the transaction. We need to account for the space used by the * transaction header here as it is not accounted for in xlog_write(). + * Add the lvhdr to the head of the lv chain we pass to xlog_write() so + * it gets written into the iclog first. */ xlog_cil_build_trans_hdr(ctx, &thdr, &lvhdr, num_iovecs); num_bytes += lvhdr.lv_bytes; + list_add(&lvhdr.lv_list, &ctx->lv_chain); - error = xlog_cil_write_chain(ctx, &lvhdr, num_bytes); + /* + * Take the lvhdr back off the lv_chain immediately after calling + * xlog_cil_write_chain() as it should not be passed to log IO + * completion. + */ + error = xlog_cil_write_chain(ctx, num_bytes); + list_del(&lvhdr.lv_list); if (error) goto out_abort_free_ticket; @@ -1142,7 +1324,14 @@ xlog_cil_push_work( if (error) goto out_abort_free_ticket; - xfs_log_ticket_ungrant(log, ctx->ticket); + /* + * Grab the ticket from the ctx so we can ungrant it after releasing the + * commit_iclog. The ctx may be freed by the time we return from + * releasing the commit_iclog (i.e. checkpoint has been completed and + * callback run) so we can't reference the ctx after the call to + * xlog_state_release_iclog(). + */ + ticket = ctx->ticket; /* * If the checkpoint spans multiple iclogs, wait for all previous iclogs @@ -1192,12 +1381,14 @@ xlog_cil_push_work( if (push_commit_stable && ctx->commit_iclog->ic_state == XLOG_STATE_ACTIVE) xlog_state_switch_iclogs(log, ctx->commit_iclog, 0); - xlog_state_release_iclog(log, ctx->commit_iclog); + ticket = ctx->ticket; + xlog_state_release_iclog(log, ctx->commit_iclog, ticket); /* Not safe to reference ctx now! */ spin_unlock(&log->l_icloglock); xlog_cil_cleanup_whiteouts(&whiteouts); + xfs_log_ticket_ungrant(log, ticket); return; out_skip: @@ -1207,17 +1398,19 @@ out_skip: return; out_abort_free_ticket: - xfs_log_ticket_ungrant(log, ctx->ticket); ASSERT(xlog_is_shutdown(log)); xlog_cil_cleanup_whiteouts(&whiteouts); if (!ctx->commit_iclog) { + xfs_log_ticket_ungrant(log, ctx->ticket); xlog_cil_committed(ctx); return; } spin_lock(&log->l_icloglock); - xlog_state_release_iclog(log, ctx->commit_iclog); + ticket = ctx->ticket; + xlog_state_release_iclog(log, ctx->commit_iclog, ticket); /* Not safe to reference ctx now! */ spin_unlock(&log->l_icloglock); + xfs_log_ticket_ungrant(log, ticket); } /* @@ -1232,18 +1425,27 @@ xlog_cil_push_background( struct xlog *log) __releases(cil->xc_ctx_lock) { struct xfs_cil *cil = log->l_cilp; + int space_used = atomic_read(&cil->xc_ctx->space_used); /* * The cil won't be empty because we are called while holding the - * context lock so whatever we added to the CIL will still be there + * context lock so whatever we added to the CIL will still be there. */ - ASSERT(!list_empty(&cil->xc_cil)); + ASSERT(!test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)); /* - * Don't do a background push if we haven't used up all the - * space available yet. + * We are done if: + * - we haven't used up all the space available yet; or + * - we've already queued up a push; and + * - we're not over the hard limit; and + * - nothing has been over the hard limit. + * + * If so, we don't need to take the push lock as there's nothing to do. */ - if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) { + if (space_used < XLOG_CIL_SPACE_LIMIT(log) || + (cil->xc_push_seq == cil->xc_current_sequence && + space_used < XLOG_CIL_BLOCKING_SPACE_LIMIT(log) && + !waitqueue_active(&cil->xc_push_wait))) { up_read(&cil->xc_ctx_lock); return; } @@ -1270,12 +1472,11 @@ xlog_cil_push_background( * dipping back down under the hard limit. * * The ctx->xc_push_lock provides the serialisation necessary for safely - * using the lockless waitqueue_active() check in this context. + * calling xlog_cil_over_hard_limit() in this context. */ - if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log) || - waitqueue_active(&cil->xc_push_wait)) { + if (xlog_cil_over_hard_limit(log, space_used)) { trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket); - ASSERT(cil->xc_ctx->space_used < log->l_logsize); + ASSERT(space_used < log->l_logsize); xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock); return; } @@ -1334,7 +1535,8 @@ xlog_cil_push_now( * If the CIL is empty or we've already pushed the sequence then * there's no more work that we need to do. */ - if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) { + if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags) || + push_seq <= cil->xc_push_seq) { spin_unlock(&cil->xc_push_lock); return; } @@ -1352,7 +1554,7 @@ xlog_cil_empty( bool empty = false; spin_lock(&cil->xc_push_lock); - if (list_empty(&cil->xc_cil)) + if (test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) empty = true; spin_unlock(&cil->xc_push_lock); return empty; @@ -1483,7 +1685,7 @@ xlog_cil_flush( * If the CIL is empty, make sure that any previous checkpoint that may * still be in an active iclog is pushed to stable storage. */ - if (list_empty(&log->l_cilp->xc_cil)) + if (test_bit(XLOG_CIL_EMPTY, &log->l_cilp->xc_flags)) xfs_log_force(log->l_mp, 0); } @@ -1568,7 +1770,7 @@ restart: * we would have found the context on the committing list. */ if (sequence == cil->xc_current_sequence && - !list_empty(&cil->xc_cil)) { + !test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)) { spin_unlock(&cil->xc_push_lock); goto restart; } @@ -1589,14 +1791,48 @@ out_shutdown: } /* + * Move dead percpu state to the relevant CIL context structures. + * + * We have to lock the CIL context here to ensure that nothing is modifying + * the percpu state, either addition or removal. Both of these are done under + * the CIL context lock, so grabbing that exclusively here will ensure we can + * safely drain the cilpcp for the CPU that is dying. + */ +void +xlog_cil_pcp_dead( + struct xlog *log, + unsigned int cpu) +{ + struct xfs_cil *cil = log->l_cilp; + struct xlog_cil_pcp *cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); + struct xfs_cil_ctx *ctx; + + down_write(&cil->xc_ctx_lock); + ctx = cil->xc_ctx; + if (ctx->ticket) + ctx->ticket->t_curr_res += cilpcp->space_reserved; + cilpcp->space_reserved = 0; + + if (!list_empty(&cilpcp->log_items)) + list_splice_init(&cilpcp->log_items, &ctx->log_items); + if (!list_empty(&cilpcp->busy_extents)) + list_splice_init(&cilpcp->busy_extents, &ctx->busy_extents); + atomic_add(cilpcp->space_used, &ctx->space_used); + cilpcp->space_used = 0; + up_write(&cil->xc_ctx_lock); +} + +/* * Perform initial CIL structure initialisation. */ int xlog_cil_init( - struct xlog *log) + struct xlog *log) { - struct xfs_cil *cil; - struct xfs_cil_ctx *ctx; + struct xfs_cil *cil; + struct xfs_cil_ctx *ctx; + struct xlog_cil_pcp *cilpcp; + int cpu; cil = kmem_zalloc(sizeof(*cil), KM_MAYFAIL); if (!cil) @@ -1611,22 +1847,31 @@ xlog_cil_init( if (!cil->xc_push_wq) goto out_destroy_cil; - INIT_LIST_HEAD(&cil->xc_cil); + cil->xc_log = log; + cil->xc_pcp = alloc_percpu(struct xlog_cil_pcp); + if (!cil->xc_pcp) + goto out_destroy_wq; + + for_each_possible_cpu(cpu) { + cilpcp = per_cpu_ptr(cil->xc_pcp, cpu); + INIT_LIST_HEAD(&cilpcp->busy_extents); + INIT_LIST_HEAD(&cilpcp->log_items); + } + INIT_LIST_HEAD(&cil->xc_committing); - spin_lock_init(&cil->xc_cil_lock); spin_lock_init(&cil->xc_push_lock); init_waitqueue_head(&cil->xc_push_wait); init_rwsem(&cil->xc_ctx_lock); init_waitqueue_head(&cil->xc_start_wait); init_waitqueue_head(&cil->xc_commit_wait); - cil->xc_log = log; log->l_cilp = cil; ctx = xlog_cil_ctx_alloc(); xlog_cil_ctx_switch(cil, ctx); - return 0; +out_destroy_wq: + destroy_workqueue(cil->xc_push_wq); out_destroy_cil: kmem_free(cil); return -ENOMEM; @@ -1636,14 +1881,17 @@ void xlog_cil_destroy( struct xlog *log) { - if (log->l_cilp->xc_ctx) { - if (log->l_cilp->xc_ctx->ticket) - xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket); - kmem_free(log->l_cilp->xc_ctx); + struct xfs_cil *cil = log->l_cilp; + + if (cil->xc_ctx) { + if (cil->xc_ctx->ticket) + xfs_log_ticket_put(cil->xc_ctx->ticket); + kmem_free(cil->xc_ctx); } - ASSERT(list_empty(&log->l_cilp->xc_cil)); - destroy_workqueue(log->l_cilp->xc_push_wq); - kmem_free(log->l_cilp); + ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags)); + free_percpu(cil->xc_pcp); + destroy_workqueue(cil->xc_push_wq); + kmem_free(cil); } diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 686c01eb3661..1bd2963e8fbd 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -143,15 +143,16 @@ enum xlog_iclog_state { #define XLOG_COVER_OPS 5 typedef struct xlog_ticket { - struct list_head t_queue; /* reserve/write queue */ - struct task_struct *t_task; /* task that owns this ticket */ - xlog_tid_t t_tid; /* transaction identifier : 4 */ - atomic_t t_ref; /* ticket reference count : 4 */ - int t_curr_res; /* current reservation in bytes : 4 */ - int t_unit_res; /* unit reservation in bytes : 4 */ - char t_ocnt; /* original count : 1 */ - char t_cnt; /* current count : 1 */ - uint8_t t_flags; /* properties of reservation : 1 */ + struct list_head t_queue; /* reserve/write queue */ + struct task_struct *t_task; /* task that owns this ticket */ + xlog_tid_t t_tid; /* transaction identifier */ + atomic_t t_ref; /* ticket reference count */ + int t_curr_res; /* current reservation */ + int t_unit_res; /* unit reservation */ + char t_ocnt; /* original unit count */ + char t_cnt; /* current unit count */ + uint8_t t_flags; /* properties of reservation */ + int t_iclog_hdrs; /* iclog hdrs in t_curr_res */ } xlog_ticket_t; /* @@ -221,13 +222,25 @@ struct xfs_cil_ctx { xfs_lsn_t commit_lsn; /* chkpt commit record lsn */ struct xlog_in_core *commit_iclog; struct xlog_ticket *ticket; /* chkpt ticket */ - int space_used; /* aggregate size of regions */ + atomic_t space_used; /* aggregate size of regions */ struct list_head busy_extents; /* busy extents in chkpt */ - struct xfs_log_vec *lv_chain; /* logvecs being pushed */ + struct list_head log_items; /* log items in chkpt */ + struct list_head lv_chain; /* logvecs being pushed */ struct list_head iclog_entry; struct list_head committing; /* ctx committing list */ struct work_struct discard_endio_work; struct work_struct push_work; + atomic_t order_id; +}; + +/* + * Per-cpu CIL tracking items + */ +struct xlog_cil_pcp { + int32_t space_used; + uint32_t space_reserved; + struct list_head busy_extents; + struct list_head log_items; }; /* @@ -248,8 +261,8 @@ struct xfs_cil_ctx { */ struct xfs_cil { struct xlog *xc_log; - struct list_head xc_cil; - spinlock_t xc_cil_lock; + unsigned long xc_flags; + atomic_t xc_iclog_hdrs; struct workqueue_struct *xc_push_wq; struct rw_semaphore xc_ctx_lock ____cacheline_aligned_in_smp; @@ -263,8 +276,17 @@ struct xfs_cil { wait_queue_head_t xc_start_wait; xfs_csn_t xc_current_sequence; wait_queue_head_t xc_push_wait; /* background push throttle */ + + void __percpu *xc_pcp; /* percpu CIL structures */ +#ifdef CONFIG_HOTPLUG_CPU + struct list_head xc_pcp_list; +#endif } ____cacheline_aligned_in_smp; +/* xc_flags bit values */ +#define XLOG_CIL_EMPTY 1 +#define XLOG_CIL_PCP_SPACE 2 + /* * The amount of log space we allow the CIL to aggregate is difficult to size. * Whatever we choose, we have to make sure we can get a reservation for the @@ -486,14 +508,15 @@ struct xlog_ticket *xlog_ticket_alloc(struct xlog *log, int unit_bytes, void xlog_print_tic_res(struct xfs_mount *mp, struct xlog_ticket *ticket); void xlog_print_trans(struct xfs_trans *); int xlog_write(struct xlog *log, struct xfs_cil_ctx *ctx, - struct xfs_log_vec *log_vector, struct xlog_ticket *tic, + struct list_head *lv_chain, struct xlog_ticket *tic, uint32_t len); void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket); void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket); void xlog_state_switch_iclogs(struct xlog *log, struct xlog_in_core *iclog, int eventual_size); -int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog); +int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog, + struct xlog_ticket *ticket); /* * When we crack an atomic LSN, we sample it first so that the value will not @@ -682,4 +705,9 @@ xlog_kvmalloc( return p; } +/* + * CIL CPU dead notifier + */ +void xlog_cil_pcp_dead(struct xlog *log, unsigned int cpu); + #endif /* __XFS_LOG_PRIV_H__ */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 5f7e4e6e33ce..9e0e7ff76e02 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2629,21 +2629,21 @@ xlog_recover_cancel_intents( */ STATIC void xlog_recover_clear_agi_bucket( - xfs_mount_t *mp, - xfs_agnumber_t agno, - int bucket) + struct xfs_perag *pag, + int bucket) { - xfs_trans_t *tp; - xfs_agi_t *agi; - struct xfs_buf *agibp; - int offset; - int error; + struct xfs_mount *mp = pag->pag_mount; + struct xfs_trans *tp; + struct xfs_agi *agi; + struct xfs_buf *agibp; + int offset; + int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp); if (error) goto out_error; - error = xfs_read_agi(mp, tp, agno, &agibp); + error = xfs_read_agi(pag, tp, &agibp); if (error) goto out_abort; @@ -2662,60 +2662,62 @@ xlog_recover_clear_agi_bucket( out_abort: xfs_trans_cancel(tp); out_error: - xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno); + xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, + pag->pag_agno); return; } -STATIC xfs_agino_t -xlog_recover_process_one_iunlink( - struct xfs_mount *mp, - xfs_agnumber_t agno, - xfs_agino_t agino, - int bucket) +static int +xlog_recover_iunlink_bucket( + struct xfs_perag *pag, + struct xfs_agi *agi, + int bucket) { - struct xfs_buf *ibp; - struct xfs_dinode *dip; - struct xfs_inode *ip; - xfs_ino_t ino; - int error; - - ino = XFS_AGINO_TO_INO(mp, agno, agino); - error = xfs_iget(mp, NULL, ino, 0, 0, &ip); - if (error) - goto fail; + struct xfs_mount *mp = pag->pag_mount; + struct xfs_inode *prev_ip = NULL; + struct xfs_inode *ip; + xfs_agino_t prev_agino, agino; + int error = 0; - /* - * Get the on disk inode to find the next inode in the bucket. - */ - error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &ibp); - if (error) - goto fail_iput; - dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset); + agino = be32_to_cpu(agi->agi_unlinked[bucket]); + while (agino != NULLAGINO) { + error = xfs_iget(mp, NULL, + XFS_AGINO_TO_INO(mp, pag->pag_agno, agino), + 0, 0, &ip); + if (error) + break; - xfs_iflags_clear(ip, XFS_IRECOVERY); - ASSERT(VFS_I(ip)->i_nlink == 0); - ASSERT(VFS_I(ip)->i_mode != 0); + ASSERT(VFS_I(ip)->i_nlink == 0); + ASSERT(VFS_I(ip)->i_mode != 0); + xfs_iflags_clear(ip, XFS_IRECOVERY); + agino = ip->i_next_unlinked; - /* setup for the next pass */ - agino = be32_to_cpu(dip->di_next_unlinked); - xfs_buf_relse(ibp); + if (prev_ip) { + ip->i_prev_unlinked = prev_agino; + xfs_irele(prev_ip); - xfs_irele(ip); - return agino; + /* + * Ensure the inode is removed from the unlinked list + * before we continue so that it won't race with + * building the in-memory list here. This could be + * serialised with the agibp lock, but that just + * serialises via lockstepping and it's much simpler + * just to flush the inodegc queue and wait for it to + * complete. + */ + xfs_inodegc_flush(mp); + } - fail_iput: - xfs_irele(ip); - fail: - /* - * We can't read in the inode this bucket points to, or this inode - * is messed up. Just ditch this bucket of inodes. We will lose - * some inodes and space, but at least we won't hang. - * - * Call xlog_recover_clear_agi_bucket() to perform a transaction to - * clear the inode pointer in the bucket. - */ - xlog_recover_clear_agi_bucket(mp, agno, bucket); - return NULLAGINO; + prev_agino = agino; + prev_ip = ip; + } + + if (prev_ip) { + ip->i_prev_unlinked = prev_agino; + xfs_irele(prev_ip); + } + xfs_inodegc_flush(mp); + return error; } /* @@ -2741,59 +2743,70 @@ xlog_recover_process_one_iunlink( * scheduled on this CPU to ensure other scheduled work can run without undue * latency. */ -STATIC void -xlog_recover_process_iunlinks( - struct xlog *log) +static void +xlog_recover_iunlink_ag( + struct xfs_perag *pag) { - struct xfs_mount *mp = log->l_mp; - struct xfs_perag *pag; - xfs_agnumber_t agno; struct xfs_agi *agi; struct xfs_buf *agibp; - xfs_agino_t agino; int bucket; int error; - for_each_perag(mp, agno, pag) { - error = xfs_read_agi(mp, NULL, pag->pag_agno, &agibp); + error = xfs_read_agi(pag, NULL, &agibp); + if (error) { + /* + * AGI is b0rked. Don't process it. + * + * We should probably mark the filesystem as corrupt after we've + * recovered all the ag's we can.... + */ + return; + } + + /* + * Unlock the buffer so that it can be acquired in the normal course of + * the transaction to truncate and free each inode. Because we are not + * racing with anyone else here for the AGI buffer, we don't even need + * to hold it locked to read the initial unlinked bucket entries out of + * the buffer. We keep buffer reference though, so that it stays pinned + * in memory while we need the buffer. + */ + agi = agibp->b_addr; + xfs_buf_unlock(agibp); + + for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { + error = xlog_recover_iunlink_bucket(pag, agi, bucket); if (error) { /* - * AGI is b0rked. Don't process it. - * - * We should probably mark the filesystem as corrupt - * after we've recovered all the ag's we can.... + * Bucket is unrecoverable, so only a repair scan can + * free the remaining unlinked inodes. Just empty the + * bucket and remaining inodes on it unreferenced and + * unfreeable. */ - continue; + xfs_inodegc_flush(pag->pag_mount); + xlog_recover_clear_agi_bucket(pag, bucket); } - /* - * Unlock the buffer so that it can be acquired in the normal - * course of the transaction to truncate and free each inode. - * Because we are not racing with anyone else here for the AGI - * buffer, we don't even need to hold it locked to read the - * initial unlinked bucket entries out of the buffer. We keep - * buffer reference though, so that it stays pinned in memory - * while we need the buffer. - */ - agi = agibp->b_addr; - xfs_buf_unlock(agibp); - - for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { - agino = be32_to_cpu(agi->agi_unlinked[bucket]); - while (agino != NULLAGINO) { - agino = xlog_recover_process_one_iunlink(mp, - pag->pag_agno, agino, bucket); - cond_resched(); - } - } - xfs_buf_rele(agibp); } + xfs_buf_rele(agibp); +} + +static void +xlog_recover_process_iunlinks( + struct xlog *log) +{ + struct xfs_perag *pag; + xfs_agnumber_t agno; + + for_each_perag(log->l_mp, agno, pag) + xlog_recover_iunlink_ag(pag); + /* * Flush the pending unlinked inodes to ensure that the inactivations * are fully completed on disk and the incore inodes can be reclaimed * before we signal that recovery is complete. */ - xfs_inodegc_flush(mp); + xfs_inodegc_flush(log->l_mp); } STATIC void @@ -3313,7 +3326,8 @@ xlog_do_recover( /* re-initialise in-core superblock and geometry structures */ mp->m_features |= xfs_sb_version_to_features(sbp); xfs_reinit_percpu_counters(mp); - error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); + error = xfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks, + &mp->m_maxagi); if (error) { xfs_warn(mp, "Failed post-recovery per-ag init: %d", error); return error; diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index daa8d29c46b4..f10c88cee116 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -778,7 +778,8 @@ xfs_mountfs( /* * Allocate and initialize the per-ag data. */ - error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi); + error = xfs_initialize_perag(mp, sbp->sb_agcount, mp->m_sb.sb_dblocks, + &mp->m_maxagi); if (error) { xfs_warn(mp, "Failed per-ag init: %d", error); goto out_free_dir; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index e7a7c00d93be..81994f4706de 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -125,11 +125,10 @@ * shared blocks. If there are no shared extents, fbno and flen will * be set to NULLAGBLOCK and 0, respectively. */ -int +static int xfs_reflink_find_shared( - struct xfs_mount *mp, + struct xfs_perag *pag, struct xfs_trans *tp, - xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, @@ -140,11 +139,11 @@ xfs_reflink_find_shared( struct xfs_btree_cur *cur; int error; - error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp); + error = xfs_alloc_read_agf(pag, tp, 0, &agbp); if (error) return error; - cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agbp->b_pag); + cur = xfs_refcountbt_init_cursor(pag->pag_mount, tp, agbp, pag); error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen, find_end_of_shared); @@ -171,7 +170,8 @@ xfs_reflink_trim_around_shared( struct xfs_bmbt_irec *irec, bool *shared) { - xfs_agnumber_t agno; + struct xfs_mount *mp = ip->i_mount; + struct xfs_perag *pag; xfs_agblock_t agbno; xfs_extlen_t aglen; xfs_agblock_t fbno; @@ -186,12 +186,13 @@ xfs_reflink_trim_around_shared( trace_xfs_reflink_trim_around_shared(ip, irec); - agno = XFS_FSB_TO_AGNO(ip->i_mount, irec->br_startblock); - agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock); + pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, irec->br_startblock)); + agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); aglen = irec->br_blockcount; - error = xfs_reflink_find_shared(ip->i_mount, NULL, agno, agbno, - aglen, &fbno, &flen, true); + error = xfs_reflink_find_shared(pag, NULL, agbno, aglen, &fbno, &flen, + true); + xfs_perag_put(pag); if (error) return error; @@ -1420,11 +1421,6 @@ xfs_reflink_inode_has_shared_extents( struct xfs_bmbt_irec got; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; - xfs_agnumber_t agno; - xfs_agblock_t agbno; - xfs_extlen_t aglen; - xfs_agblock_t rbno; - xfs_extlen_t rlen; struct xfs_iext_cursor icur; bool found; int error; @@ -1437,17 +1433,25 @@ xfs_reflink_inode_has_shared_extents( *has_shared = false; found = xfs_iext_lookup_extent(ip, ifp, 0, &icur, &got); while (found) { + struct xfs_perag *pag; + xfs_agblock_t agbno; + xfs_extlen_t aglen; + xfs_agblock_t rbno; + xfs_extlen_t rlen; + if (isnullstartblock(got.br_startblock) || got.br_state != XFS_EXT_NORM) goto next; - agno = XFS_FSB_TO_AGNO(mp, got.br_startblock); + + pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, got.br_startblock)); agbno = XFS_FSB_TO_AGBNO(mp, got.br_startblock); aglen = got.br_blockcount; - - error = xfs_reflink_find_shared(mp, tp, agno, agbno, aglen, + error = xfs_reflink_find_shared(pag, tp, agbno, aglen, &rbno, &rlen, false); + xfs_perag_put(pag); if (error) return error; + /* Is there still a shared block here? */ if (rbno != NULLAGBLOCK) { *has_shared = true; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index bea65f2fe657..65c5dfe17ecf 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -16,9 +16,6 @@ static inline bool xfs_is_cow_inode(struct xfs_inode *ip) return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip); } -extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp, - xfs_agnumber_t agno, xfs_agblock_t agbno, xfs_extlen_t aglen, - xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal); extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip, struct xfs_bmbt_irec *irec, bool *shared); int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap, diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index aa977c7ea370..4edee1d3784a 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -40,6 +40,7 @@ #include "xfs_defer.h" #include "xfs_attr_item.h" #include "xfs_xattr.h" +#include "xfs_iunlink_item.h" #include <linux/magic.h> #include <linux/fs_context.h> @@ -2096,8 +2097,16 @@ xfs_init_caches(void) if (!xfs_attri_cache) goto out_destroy_attrd_cache; + xfs_iunlink_cache = kmem_cache_create("xfs_iul_item", + sizeof(struct xfs_iunlink_item), + 0, 0, NULL); + if (!xfs_iunlink_cache) + goto out_destroy_attri_cache; + return 0; + out_destroy_attri_cache: + kmem_cache_destroy(xfs_attri_cache); out_destroy_attrd_cache: kmem_cache_destroy(xfs_attrd_cache); out_destroy_bui_cache: @@ -2148,6 +2157,7 @@ xfs_destroy_caches(void) * destroy caches. */ rcu_barrier(); + kmem_cache_destroy(xfs_iunlink_cache); kmem_cache_destroy(xfs_attri_cache); kmem_cache_destroy(xfs_attrd_cache); kmem_cache_destroy(xfs_bui_cache); @@ -2213,6 +2223,7 @@ xfs_cpu_dead( list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) { spin_unlock(&xfs_mount_list_lock); xfs_inodegc_cpu_dead(mp, cpu); + xlog_cil_pcp_dead(mp->m_log, cpu); spin_lock(&xfs_mount_list_lock); } spin_unlock(&xfs_mount_list_lock); diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 0fa1b7a2918c..926543f01335 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -3672,7 +3672,6 @@ DEFINE_EVENT(xfs_ag_inode_class, name, \ TP_ARGS(ip)) DEFINE_AGINODE_EVENT(xfs_iunlink); DEFINE_AGINODE_EVENT(xfs_iunlink_remove); -DEFINE_AG_EVENT(xfs_iunlink_map_prev_fallback); DECLARE_EVENT_CLASS(xfs_fs_corrupt_class, TP_PROTO(struct xfs_mount *mp, unsigned int flags), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 82cf0189c0db..7bd16fbff534 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -760,7 +760,7 @@ xfs_log_item_batch_insert( void xfs_trans_committed_bulk( struct xfs_ail *ailp, - struct xfs_log_vec *log_vector, + struct list_head *lv_chain, xfs_lsn_t commit_lsn, bool aborted) { @@ -775,7 +775,7 @@ xfs_trans_committed_bulk( spin_unlock(&ailp->ail_lock); /* unpin all the log items */ - for (lv = log_vector; lv; lv = lv->lv_next ) { + list_for_each_entry(lv, lv_chain, lv_list) { struct xfs_log_item *lip = lv->lv_item; xfs_lsn_t item_lsn; @@ -845,6 +845,90 @@ xfs_trans_committed_bulk( } /* + * Sort transaction items prior to running precommit operations. This will + * attempt to order the items such that they will always be locked in the same + * order. Items that have no sort function are moved to the end of the list + * and so are locked last. + * + * This may need refinement as different types of objects add sort functions. + * + * Function is more complex than it needs to be because we are comparing 64 bit + * values and the function only returns 32 bit values. + */ +static int +xfs_trans_precommit_sort( + void *unused_arg, + const struct list_head *a, + const struct list_head *b) +{ + struct xfs_log_item *lia = container_of(a, + struct xfs_log_item, li_trans); + struct xfs_log_item *lib = container_of(b, + struct xfs_log_item, li_trans); + int64_t diff; + + /* + * If both items are non-sortable, leave them alone. If only one is + * sortable, move the non-sortable item towards the end of the list. + */ + if (!lia->li_ops->iop_sort && !lib->li_ops->iop_sort) + return 0; + if (!lia->li_ops->iop_sort) + return 1; + if (!lib->li_ops->iop_sort) + return -1; + + diff = lia->li_ops->iop_sort(lia) - lib->li_ops->iop_sort(lib); + if (diff < 0) + return -1; + if (diff > 0) + return 1; + return 0; +} + +/* + * Run transaction precommit functions. + * + * If there is an error in any of the callouts, then stop immediately and + * trigger a shutdown to abort the transaction. There is no recovery possible + * from errors at this point as the transaction is dirty.... + */ +static int +xfs_trans_run_precommits( + struct xfs_trans *tp) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_log_item *lip, *n; + int error = 0; + + /* + * Sort the item list to avoid ABBA deadlocks with other transactions + * running precommit operations that lock multiple shared items such as + * inode cluster buffers. + */ + list_sort(NULL, &tp->t_items, xfs_trans_precommit_sort); + + /* + * Precommit operations can remove the log item from the transaction + * if the log item exists purely to delay modifications until they + * can be ordered against other operations. Hence we have to use + * list_for_each_entry_safe() here. + */ + list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) { + if (!test_bit(XFS_LI_DIRTY, &lip->li_flags)) + continue; + if (lip->li_ops->iop_precommit) { + error = lip->li_ops->iop_precommit(tp, lip); + if (error) + break; + } + } + if (error) + xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + return error; +} + +/* * Commit the given transaction to the log. * * XFS disk error handling mechanism is not based on a typical @@ -869,6 +953,13 @@ __xfs_trans_commit( trace_xfs_trans_commit(tp, _RET_IP_); + error = xfs_trans_run_precommits(tp); + if (error) { + if (tp->t_flags & XFS_TRANS_PERM_LOG_RES) + xfs_defer_cancel(tp); + goto out_unreserve; + } + /* * Finish deferred items on final commit. Only permanent transactions * should ever have deferred ops. diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 9561f193e7e1..55819785941c 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -45,6 +45,7 @@ struct xfs_log_item { struct xfs_log_vec *li_lv; /* active log vector */ struct xfs_log_vec *li_lv_shadow; /* standby vector */ xfs_csn_t li_seq; /* CIL commit seq */ + uint32_t li_order_id; /* CIL commit order */ }; /* @@ -71,10 +72,12 @@ struct xfs_item_ops { void (*iop_format)(struct xfs_log_item *, struct xfs_log_vec *); void (*iop_pin)(struct xfs_log_item *); void (*iop_unpin)(struct xfs_log_item *, int remove); - uint (*iop_push)(struct xfs_log_item *, struct list_head *); + uint64_t (*iop_sort)(struct xfs_log_item *lip); + int (*iop_precommit)(struct xfs_trans *tp, struct xfs_log_item *lip); void (*iop_committing)(struct xfs_log_item *lip, xfs_csn_t seq); - void (*iop_release)(struct xfs_log_item *); xfs_lsn_t (*iop_committed)(struct xfs_log_item *, xfs_lsn_t); + uint (*iop_push)(struct xfs_log_item *, struct list_head *); + void (*iop_release)(struct xfs_log_item *); int (*iop_recover)(struct xfs_log_item *lip, struct list_head *capture_list); bool (*iop_match)(struct xfs_log_item *item, uint64_t id); diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index f0d79a9050ba..d5400150358e 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -19,7 +19,8 @@ void xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *); void xfs_trans_del_item(struct xfs_log_item *); void xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp); -void xfs_trans_committed_bulk(struct xfs_ail *ailp, struct xfs_log_vec *lv, +void xfs_trans_committed_bulk(struct xfs_ail *ailp, + struct list_head *lv_chain, xfs_lsn_t commit_lsn, bool aborted); /* * AIL traversal cursor. |