diff options
Diffstat (limited to '')
68 files changed, 2023 insertions, 1751 deletions
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c index bb0c700afe3c..86696a1c6891 100644 --- a/fs/xfs/libxfs/xfs_ag.c +++ b/fs/xfs/libxfs/xfs_ag.c @@ -44,16 +44,15 @@ xfs_perag_get( xfs_agnumber_t agno) { struct xfs_perag *pag; - int ref = 0; rcu_read_lock(); pag = radix_tree_lookup(&mp->m_perag_tree, agno); if (pag) { + trace_xfs_perag_get(pag, _RET_IP_); ASSERT(atomic_read(&pag->pag_ref) >= 0); - ref = atomic_inc_return(&pag->pag_ref); + atomic_inc(&pag->pag_ref); } rcu_read_unlock(); - trace_xfs_perag_get(mp, agno, ref, _RET_IP_); return pag; } @@ -68,7 +67,6 @@ xfs_perag_get_tag( { struct xfs_perag *pag; int found; - int ref; rcu_read_lock(); found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, @@ -77,9 +75,9 @@ xfs_perag_get_tag( rcu_read_unlock(); return NULL; } - ref = atomic_inc_return(&pag->pag_ref); + trace_xfs_perag_get_tag(pag, _RET_IP_); + atomic_inc(&pag->pag_ref); rcu_read_unlock(); - trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); return pag; } @@ -87,11 +85,68 @@ void xfs_perag_put( struct xfs_perag *pag) { - int ref; - + trace_xfs_perag_put(pag, _RET_IP_); ASSERT(atomic_read(&pag->pag_ref) > 0); - ref = atomic_dec_return(&pag->pag_ref); - trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); + atomic_dec(&pag->pag_ref); +} + +/* + * Active references for perag structures. This is for short term access to the + * per ag structures for walking trees or accessing state. If an AG is being + * shrunk or is offline, then this will fail to find that AG and return NULL + * instead. + */ +struct xfs_perag * +xfs_perag_grab( + struct xfs_mount *mp, + xfs_agnumber_t agno) +{ + struct xfs_perag *pag; + + rcu_read_lock(); + pag = radix_tree_lookup(&mp->m_perag_tree, agno); + if (pag) { + trace_xfs_perag_grab(pag, _RET_IP_); + if (!atomic_inc_not_zero(&pag->pag_active_ref)) + pag = NULL; + } + rcu_read_unlock(); + return pag; +} + +/* + * search from @first to find the next perag with the given tag set. + */ +struct xfs_perag * +xfs_perag_grab_tag( + struct xfs_mount *mp, + xfs_agnumber_t first, + int tag) +{ + struct xfs_perag *pag; + int found; + + rcu_read_lock(); + found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, + (void **)&pag, first, 1, tag); + if (found <= 0) { + rcu_read_unlock(); + return NULL; + } + trace_xfs_perag_grab_tag(pag, _RET_IP_); + if (!atomic_inc_not_zero(&pag->pag_active_ref)) + pag = NULL; + rcu_read_unlock(); + return pag; +} + +void +xfs_perag_rele( + struct xfs_perag *pag) +{ + trace_xfs_perag_rele(pag, _RET_IP_); + if (atomic_dec_and_test(&pag->pag_active_ref)) + wake_up(&pag->pag_active_wq); } /* @@ -196,6 +251,10 @@ xfs_free_perag( cancel_delayed_work_sync(&pag->pag_blockgc_work); xfs_buf_hash_destroy(pag); + /* drop the mount's active reference */ + xfs_perag_rele(pag); + XFS_IS_CORRUPT(pag->pag_mount, + atomic_read(&pag->pag_active_ref) != 0); call_rcu(&pag->rcu_head, __xfs_free_perag); } } @@ -314,6 +373,7 @@ xfs_initialize_perag( INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); init_waitqueue_head(&pag->pagb_wait); + init_waitqueue_head(&pag->pag_active_wq); pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; #endif /* __KERNEL__ */ @@ -322,6 +382,9 @@ xfs_initialize_perag( if (error) goto out_remove_pag; + /* Active ref owned by mount indicates AG is online. */ + atomic_set(&pag->pag_active_ref, 1); + /* first new pag is fully initialized */ if (first_initialised == NULLAGNUMBER) first_initialised = index; @@ -824,7 +887,7 @@ xfs_ag_shrink_space( struct xfs_alloc_arg args = { .tp = *tpp, .mp = mp, - .type = XFS_ALLOCTYPE_THIS_BNO, + .pag = pag, .minlen = delta, .maxlen = delta, .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, @@ -856,14 +919,11 @@ xfs_ag_shrink_space( if (delta >= aglen) return -EINVAL; - args.fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta); - /* * Make sure that the last inode cluster cannot overlap with the new * end of the AG, even if it's sparse. */ - error = xfs_ialloc_check_shrink(*tpp, pag->pag_agno, agibp, - aglen - delta); + error = xfs_ialloc_check_shrink(pag, *tpp, agibp, aglen - delta); if (error) return error; @@ -876,7 +936,8 @@ xfs_ag_shrink_space( return error; /* internal log shouldn't also show up in the free space btrees */ - error = xfs_alloc_vextent(&args); + error = xfs_alloc_vextent_exact_bno(&args, + XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta)); if (!error && args.agbno == NULLAGBLOCK) error = -ENOSPC; diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 191b22b9a35b..5e18536dfdce 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -32,14 +32,12 @@ struct xfs_ag_resv { struct xfs_perag { struct xfs_mount *pag_mount; /* owner filesystem */ xfs_agnumber_t pag_agno; /* AG this structure belongs to */ - atomic_t pag_ref; /* perag reference count */ - char pagf_init; /* this agf's entry is initialized */ - char pagi_init; /* this agi's entry is initialized */ - char pagf_metadata; /* the agf is preferred to be metadata */ - char pagi_inodeok; /* The agi is ok for inodes */ + atomic_t pag_ref; /* passive reference count */ + atomic_t pag_active_ref; /* active reference count */ + wait_queue_head_t pag_active_wq;/* woken active_ref falls to zero */ + unsigned long pag_opstate; uint8_t pagf_levels[XFS_BTNUM_AGF]; /* # of levels in bno & cnt btree */ - bool pagf_agflreset; /* agfl requires reset before use */ uint32_t pagf_flcount; /* count of blocks in freelist */ xfs_extlen_t pagf_freeblks; /* total free blocks */ xfs_extlen_t pagf_longest; /* longest free space */ @@ -106,16 +104,44 @@ struct xfs_perag { #endif /* __KERNEL__ */ }; +/* + * Per-AG operational state. These are atomic flag bits. + */ +#define XFS_AGSTATE_AGF_INIT 0 +#define XFS_AGSTATE_AGI_INIT 1 +#define XFS_AGSTATE_PREFERS_METADATA 2 +#define XFS_AGSTATE_ALLOWS_INODES 3 +#define XFS_AGSTATE_AGFL_NEEDS_RESET 4 + +#define __XFS_AG_OPSTATE(name, NAME) \ +static inline bool xfs_perag_ ## name (struct xfs_perag *pag) \ +{ \ + return test_bit(XFS_AGSTATE_ ## NAME, &pag->pag_opstate); \ +} + +__XFS_AG_OPSTATE(initialised_agf, AGF_INIT) +__XFS_AG_OPSTATE(initialised_agi, AGI_INIT) +__XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA) +__XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES) +__XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET) + int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t agcount, xfs_rfsblock_t dcount, xfs_agnumber_t *maxagi); int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno); void xfs_free_perag(struct xfs_mount *mp); +/* Passive AG references */ struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int tag); void xfs_perag_put(struct xfs_perag *pag); +/* Active AG references */ +struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t); +struct xfs_perag *xfs_perag_grab_tag(struct xfs_mount *, xfs_agnumber_t, + int tag); +void xfs_perag_rele(struct xfs_perag *pag); + /* * Per-ag geometry infomation and validation */ @@ -193,31 +219,86 @@ xfs_perag_next( struct xfs_mount *mp = pag->pag_mount; *agno = pag->pag_agno + 1; - xfs_perag_put(pag); - if (*agno > end_agno) - return NULL; - return xfs_perag_get(mp, *agno); + xfs_perag_rele(pag); + while (*agno <= end_agno) { + pag = xfs_perag_grab(mp, *agno); + if (pag) + return pag; + (*agno)++; + } + return NULL; } #define for_each_perag_range(mp, agno, end_agno, pag) \ - for ((pag) = xfs_perag_get((mp), (agno)); \ + for ((pag) = xfs_perag_grab((mp), (agno)); \ (pag) != NULL; \ (pag) = xfs_perag_next((pag), &(agno), (end_agno))) #define for_each_perag_from(mp, agno, pag) \ for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag)) - #define for_each_perag(mp, agno, pag) \ (agno) = 0; \ for_each_perag_from((mp), (agno), (pag)) #define for_each_perag_tag(mp, agno, pag, tag) \ - for ((agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \ + for ((agno) = 0, (pag) = xfs_perag_grab_tag((mp), 0, (tag)); \ (pag) != NULL; \ (agno) = (pag)->pag_agno + 1, \ - xfs_perag_put(pag), \ - (pag) = xfs_perag_get_tag((mp), (agno), (tag))) + xfs_perag_rele(pag), \ + (pag) = xfs_perag_grab_tag((mp), (agno), (tag))) + +static inline struct xfs_perag * +xfs_perag_next_wrap( + struct xfs_perag *pag, + xfs_agnumber_t *agno, + xfs_agnumber_t stop_agno, + xfs_agnumber_t restart_agno, + xfs_agnumber_t wrap_agno) +{ + struct xfs_mount *mp = pag->pag_mount; + + *agno = pag->pag_agno + 1; + xfs_perag_rele(pag); + while (*agno != stop_agno) { + if (*agno >= wrap_agno) { + if (restart_agno >= stop_agno) + break; + *agno = restart_agno; + } + + pag = xfs_perag_grab(mp, *agno); + if (pag) + return pag; + (*agno)++; + } + return NULL; +} + +/* + * Iterate all AGs from start_agno through wrap_agno, then restart_agno through + * (start_agno - 1). + */ +#define for_each_perag_wrap_range(mp, start_agno, restart_agno, wrap_agno, agno, pag) \ + for ((agno) = (start_agno), (pag) = xfs_perag_grab((mp), (agno)); \ + (pag) != NULL; \ + (pag) = xfs_perag_next_wrap((pag), &(agno), (start_agno), \ + (restart_agno), (wrap_agno))) +/* + * Iterate all AGs from start_agno through wrap_agno, then 0 through + * (start_agno - 1). + */ +#define for_each_perag_wrap_at(mp, start_agno, wrap_agno, agno, pag) \ + for_each_perag_wrap_range((mp), (start_agno), 0, (wrap_agno), (agno), (pag)) + +/* + * Iterate all AGs from start_agno through to the end of the filesystem, then 0 + * through (start_agno - 1). + */ +#define for_each_perag_wrap(mp, start_agno, agno, pag) \ + for_each_perag_wrap_at((mp), (start_agno), (mp)->m_sb.sb_agcount, \ + (agno), (pag)) + struct aghdr_init_data { /* per ag data */ diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 5af123d13a63..7fd1fea95552 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -264,7 +264,7 @@ xfs_ag_resv_init( if (error) goto out; - error = xfs_finobt_calc_reserves(mp, tp, pag, &ask, &used); + error = xfs_finobt_calc_reserves(pag, tp, &ask, &used); if (error) goto out; diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 989cf341779b..6a037173d20d 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -36,10 +36,6 @@ struct workqueue_struct *xfs_alloc_wq; #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 -STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); -STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); - /* * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in * the beginning of the block for a proper header with the location information @@ -772,8 +768,6 @@ xfs_alloc_cur_setup( int error; int i; - ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO); - acur->cur_len = args->maxlen; acur->rec_bno = 0; acur->rec_len = 0; @@ -887,7 +881,6 @@ xfs_alloc_cur_check( * We have an aligned record that satisfies minlen and beats or matches * the candidate extent size. Compare locality for near allocation mode. */ - ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO); diff = xfs_alloc_compute_diff(args->agbno, args->len, args->alignment, args->datatype, bnoa, lena, &bnew); @@ -1133,78 +1126,6 @@ error: } /* - * Allocate a variable extent in the allocation group agno. - * Type and bno are used to determine where in the allocation group the - * extent will start. - * Extent's length (returned in *len) will be between minlen and maxlen, - * and of the form k * prod + mod unless there's nothing that large. - * Return the starting a.g. block, or NULLAGBLOCK if we can't do it. - */ -STATIC int /* error */ -xfs_alloc_ag_vextent( - xfs_alloc_arg_t *args) /* argument structure for allocation */ -{ - int error=0; - - ASSERT(args->minlen > 0); - ASSERT(args->maxlen > 0); - ASSERT(args->minlen <= args->maxlen); - ASSERT(args->mod < args->prod); - ASSERT(args->alignment > 0); - - /* - * Branch to correct routine based on the type. - */ - args->wasfromfl = 0; - switch (args->type) { - case XFS_ALLOCTYPE_THIS_AG: - error = xfs_alloc_ag_vextent_size(args); - break; - case XFS_ALLOCTYPE_NEAR_BNO: - error = xfs_alloc_ag_vextent_near(args); - break; - case XFS_ALLOCTYPE_THIS_BNO: - error = xfs_alloc_ag_vextent_exact(args); - break; - default: - ASSERT(0); - /* NOTREACHED */ - } - - if (error || args->agbno == NULLAGBLOCK) - return error; - - ASSERT(args->len >= args->minlen); - ASSERT(args->len <= args->maxlen); - ASSERT(!args->wasfromfl || args->resv != XFS_AG_RESV_AGFL); - ASSERT(args->agbno % args->alignment == 0); - - /* if not file data, insert new block into the reverse map btree */ - if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { - error = xfs_rmap_alloc(args->tp, args->agbp, args->pag, - args->agbno, args->len, &args->oinfo); - if (error) - return error; - } - - if (!args->wasfromfl) { - error = xfs_alloc_update_counters(args->tp, args->agbp, - -((long)(args->len))); - if (error) - return error; - - ASSERT(!xfs_extent_busy_search(args->mp, args->pag, - args->agbno, args->len)); - } - - xfs_ag_resv_alloc_extent(args->pag, args->resv, args); - - XFS_STATS_INC(args->mp, xs_allocx); - XFS_STATS_ADD(args->mp, xs_allocb, args->len); - return error; -} - -/* * Allocate a variable extent at exactly agno/bno. * Extent's length (returned in *len) will be between minlen and maxlen, * and of the form k * prod + mod unless there's nothing that large. @@ -1389,7 +1310,6 @@ xfs_alloc_ag_vextent_locality( bool fbinc; ASSERT(acur->len == 0); - ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO); *stat = 0; @@ -2435,7 +2355,7 @@ xfs_agfl_reset( struct xfs_mount *mp = tp->t_mountp; struct xfs_agf *agf = agbp->b_addr; - ASSERT(pag->pagf_agflreset); + ASSERT(xfs_perag_agfl_needs_reset(pag)); trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_); xfs_warn(mp, @@ -2450,7 +2370,7 @@ xfs_agfl_reset( XFS_AGF_FLCOUNT); pag->pagf_flcount = 0; - pag->pagf_agflreset = false; + clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); } /* @@ -2472,20 +2392,20 @@ xfs_defer_agfl_block( struct xfs_owner_info *oinfo) { struct xfs_mount *mp = tp->t_mountp; - struct xfs_extent_free_item *new; /* new element */ + struct xfs_extent_free_item *xefi; ASSERT(xfs_extfree_item_cache != NULL); ASSERT(oinfo != NULL); - new = kmem_cache_zalloc(xfs_extfree_item_cache, + xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); - new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); - new->xefi_blockcount = 1; - new->xefi_owner = oinfo->oi_owner; + xefi->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno); + xefi->xefi_blockcount = 1; + xefi->xefi_owner = oinfo->oi_owner; trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &new->xefi_list); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list); } /* @@ -2500,7 +2420,7 @@ __xfs_free_extent_later( const struct xfs_owner_info *oinfo, bool skip_discard) { - struct xfs_extent_free_item *new; /* new element */ + struct xfs_extent_free_item *xefi; #ifdef DEBUG struct xfs_mount *mp = tp->t_mountp; xfs_agnumber_t agno; @@ -2519,27 +2439,27 @@ __xfs_free_extent_later( #endif ASSERT(xfs_extfree_item_cache != NULL); - new = kmem_cache_zalloc(xfs_extfree_item_cache, + xefi = kmem_cache_zalloc(xfs_extfree_item_cache, GFP_KERNEL | __GFP_NOFAIL); - new->xefi_startblock = bno; - new->xefi_blockcount = (xfs_extlen_t)len; + xefi->xefi_startblock = bno; + xefi->xefi_blockcount = (xfs_extlen_t)len; if (skip_discard) - new->xefi_flags |= XFS_EFI_SKIP_DISCARD; + xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD; if (oinfo) { ASSERT(oinfo->oi_offset == 0); if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK) - new->xefi_flags |= XFS_EFI_ATTR_FORK; + xefi->xefi_flags |= XFS_EFI_ATTR_FORK; if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK) - new->xefi_flags |= XFS_EFI_BMBT_BLOCK; - new->xefi_owner = oinfo->oi_owner; + xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK; + xefi->xefi_owner = oinfo->oi_owner; } else { - new->xefi_owner = XFS_RMAP_OWN_NULL; + xefi->xefi_owner = XFS_RMAP_OWN_NULL; } trace_xfs_bmap_free_defer(tp->t_mountp, XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0, XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len); - xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); + xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list); } #ifdef DEBUG @@ -2605,7 +2525,7 @@ xfs_alloc_fix_freelist( /* deferred ops (AGFL block frees) require permanent transactions */ ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); - if (!pag->pagf_init) { + if (!xfs_perag_initialised_agf(pag)) { error = xfs_alloc_read_agf(pag, tp, flags, &agbp); if (error) { /* Couldn't lock the AGF so skip this AG. */ @@ -2620,7 +2540,8 @@ xfs_alloc_fix_freelist( * somewhere else if we are not being asked to try harder at this * point */ - if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) && + if (xfs_perag_prefers_metadata(pag) && + (args->datatype & XFS_ALLOC_USERDATA) && (flags & XFS_ALLOC_FLAG_TRYLOCK)) { ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING)); goto out_agbp_relse; @@ -2646,7 +2567,7 @@ xfs_alloc_fix_freelist( } /* reset a padding mismatched agfl before final free space check */ - if (pag->pagf_agflreset) + if (xfs_perag_agfl_needs_reset(pag)) xfs_agfl_reset(tp, agbp, pag); /* If there isn't enough total space or single-extent, reject it. */ @@ -2707,7 +2628,6 @@ xfs_alloc_fix_freelist( targs.agbp = agbp; targs.agno = args->agno; targs.alignment = targs.minlen = targs.prod = 1; - targs.type = XFS_ALLOCTYPE_THIS_AG; targs.pag = pag; error = xfs_alloc_read_agfl(pag, tp, &agflbp); if (error) @@ -2720,7 +2640,7 @@ xfs_alloc_fix_freelist( targs.resv = XFS_AG_RESV_AGFL; /* Allocate as many blocks as possible at once. */ - error = xfs_alloc_ag_vextent(&targs); + error = xfs_alloc_ag_vextent_size(&targs); if (error) goto out_agflbp_relse; @@ -2734,6 +2654,18 @@ xfs_alloc_fix_freelist( break; goto out_agflbp_relse; } + + if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) { + error = xfs_rmap_alloc(tp, agbp, pag, + targs.agbno, targs.len, &targs.oinfo); + if (error) + goto out_agflbp_relse; + } + error = xfs_alloc_update_counters(tp, agbp, + -((long)(targs.len))); + if (error) + goto out_agflbp_relse; + /* * Put each allocated block on the list. */ @@ -2803,7 +2735,7 @@ xfs_alloc_get_freelist( if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp)) agf->agf_flfirst = 0; - ASSERT(!pag->pagf_agflreset); + ASSERT(!xfs_perag_agfl_needs_reset(pag)); be32_add_cpu(&agf->agf_flcount, -1); pag->pagf_flcount--; @@ -2892,7 +2824,7 @@ xfs_alloc_put_freelist( if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp)) agf->agf_fllast = 0; - ASSERT(!pag->pagf_agflreset); + ASSERT(!xfs_perag_agfl_needs_reset(pag)); be32_add_cpu(&agf->agf_flcount, 1); pag->pagf_flcount++; @@ -3099,7 +3031,7 @@ xfs_alloc_read_agf( return error; agf = agfbp->b_addr; - if (!pag->pagf_init) { + if (!xfs_perag_initialised_agf(pag)) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); pag->pagf_flcount = be32_to_cpu(agf->agf_flcount); @@ -3111,8 +3043,8 @@ xfs_alloc_read_agf( pag->pagf_levels[XFS_BTNUM_RMAPi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); - pag->pagf_init = 1; - pag->pagf_agflreset = xfs_agfl_needs_reset(pag->pag_mount, agf); + if (xfs_agfl_needs_reset(pag->pag_mount, agf)) + set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate); /* * Update the in-core allocbt counter. Filter out the rmapbt @@ -3127,6 +3059,8 @@ xfs_alloc_read_agf( if (allocbt_blks > 0) atomic64_add(allocbt_blks, &pag->pag_mount->m_allocbt_blks); + + set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); } #ifdef DEBUG else if (!xfs_is_shutdown(pag->pag_mount)) { @@ -3148,26 +3082,25 @@ xfs_alloc_read_agf( } /* - * Allocate an extent (variable-size). - * Depending on the allocation type, we either look in a single allocation - * group or loop over the allocation groups to find the result. + * Pre-proces allocation arguments to set initial state that we don't require + * callers to set up correctly, as well as bounds check the allocation args + * that are set up. */ -int /* error */ -xfs_alloc_vextent( - struct xfs_alloc_arg *args) /* allocation argument structure */ +static int +xfs_alloc_vextent_check_args( + struct xfs_alloc_arg *args, + xfs_fsblock_t target, + xfs_agnumber_t *minimum_agno) { - xfs_agblock_t agsize; /* allocation group size */ - int error; - int flags; /* XFS_ALLOC_FLAG_... locking flags */ - struct xfs_mount *mp; /* mount structure pointer */ - xfs_agnumber_t sagno; /* starting allocation group number */ - xfs_alloctype_t type; /* input allocation type */ - int bump_rotor = 0; - xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */ - - mp = args->mp; - type = args->otype = args->type; - args->agbno = NULLAGBLOCK; + struct xfs_mount *mp = args->mp; + xfs_agblock_t agsize; + + args->fsbno = NULLFSBLOCK; + + *minimum_agno = 0; + if (args->tp->t_highest_agno != NULLAGNUMBER) + *minimum_agno = args->tp->t_highest_agno; + /* * Just fix this up, for the case where the last a.g. is shorter * (or there's only one a.g.) and the caller couldn't easily figure @@ -3178,168 +3111,414 @@ xfs_alloc_vextent( args->maxlen = agsize; if (args->alignment == 0) args->alignment = 1; - ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount); - ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize); + + ASSERT(args->minlen > 0); + ASSERT(args->maxlen > 0); + ASSERT(args->alignment > 0); + ASSERT(args->resv != XFS_AG_RESV_AGFL); + + ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount); + ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize); ASSERT(args->minlen <= args->maxlen); ASSERT(args->minlen <= agsize); ASSERT(args->mod < args->prod); - if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount || - XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize || + + if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount || + XFS_FSB_TO_AGBNO(mp, target) >= agsize || args->minlen > args->maxlen || args->minlen > agsize || args->mod >= args->prod) { - args->fsbno = NULLFSBLOCK; trace_xfs_alloc_vextent_badargs(args); + return -ENOSPC; + } + + if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) { + trace_xfs_alloc_vextent_skip_deadlock(args); + return -ENOSPC; + } + return 0; + +} + +/* + * Prepare an AG for allocation. If the AG is not prepared to accept the + * allocation, return failure. + * + * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are + * modified to hold their own perag references. + */ +static int +xfs_alloc_vextent_prepare_ag( + struct xfs_alloc_arg *args) +{ + bool need_pag = !args->pag; + int error; + + if (need_pag) + args->pag = xfs_perag_get(args->mp, args->agno); + + args->agbp = NULL; + error = xfs_alloc_fix_freelist(args, 0); + if (error) { + trace_xfs_alloc_vextent_nofix(args); + if (need_pag) + xfs_perag_put(args->pag); + args->agbno = NULLAGBLOCK; + return error; + } + if (!args->agbp) { + /* cannot allocate in this AG at all */ + trace_xfs_alloc_vextent_noagbp(args); + args->agbno = NULLAGBLOCK; return 0; } + args->wasfromfl = 0; + return 0; +} - switch (type) { - case XFS_ALLOCTYPE_THIS_AG: - case XFS_ALLOCTYPE_NEAR_BNO: - case XFS_ALLOCTYPE_THIS_BNO: - /* - * These three force us into a single a.g. - */ - args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); - args->pag = xfs_perag_get(mp, args->agno); - error = xfs_alloc_fix_freelist(args, 0); - if (error) { - trace_xfs_alloc_vextent_nofix(args); - goto error0; - } - if (!args->agbp) { - trace_xfs_alloc_vextent_noagbp(args); +/* + * Post-process allocation results to account for the allocation if it succeed + * and set the allocated block number correctly for the caller. + * + * XXX: we should really be returning ENOSPC for ENOSPC, not + * hiding it behind a "successful" NULLFSBLOCK allocation. + */ +static int +xfs_alloc_vextent_finish( + struct xfs_alloc_arg *args, + xfs_agnumber_t minimum_agno, + int alloc_error, + bool drop_perag) +{ + struct xfs_mount *mp = args->mp; + int error = 0; + + /* + * We can end up here with a locked AGF. If we failed, the caller is + * likely going to try to allocate again with different parameters, and + * that can widen the AGs that are searched for free space. If we have + * to do BMBT block allocation, we have to do a new allocation. + * + * Hence leaving this function with the AGF locked opens up potential + * ABBA AGF deadlocks because a future allocation attempt in this + * transaction may attempt to lock a lower number AGF. + * + * We can't release the AGF until the transaction is commited, so at + * this point we must update the "first allocation" tracker to point at + * this AG if the tracker is empty or points to a lower AG. This allows + * the next allocation attempt to be modified appropriately to avoid + * deadlocks. + */ + if (args->agbp && + (args->tp->t_highest_agno == NULLAGNUMBER || + args->agno > minimum_agno)) + args->tp->t_highest_agno = args->agno; + + /* + * If the allocation failed with an error or we had an ENOSPC result, + * preserve the returned error whilst also marking the allocation result + * as "no extent allocated". This ensures that callers that fail to + * capture the error will still treat it as a failed allocation. + */ + if (alloc_error || args->agbno == NULLAGBLOCK) { + args->fsbno = NULLFSBLOCK; + error = alloc_error; + goto out_drop_perag; + } + + args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); + + ASSERT(args->len >= args->minlen); + ASSERT(args->len <= args->maxlen); + ASSERT(args->agbno % args->alignment == 0); + XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len); + + /* if not file data, insert new block into the reverse map btree */ + if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) { + error = xfs_rmap_alloc(args->tp, args->agbp, args->pag, + args->agbno, args->len, &args->oinfo); + if (error) + goto out_drop_perag; + } + + if (!args->wasfromfl) { + error = xfs_alloc_update_counters(args->tp, args->agbp, + -((long)(args->len))); + if (error) + goto out_drop_perag; + + ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno, + args->len)); + } + + xfs_ag_resv_alloc_extent(args->pag, args->resv, args); + + XFS_STATS_INC(mp, xs_allocx); + XFS_STATS_ADD(mp, xs_allocb, args->len); + +out_drop_perag: + if (drop_perag && args->pag) { + xfs_perag_rele(args->pag); + args->pag = NULL; + } + return error; +} + +/* + * Allocate within a single AG only. This uses a best-fit length algorithm so if + * you need an exact sized allocation without locality constraints, this is the + * fastest way to do it. + * + * Caller is expected to hold a perag reference in args->pag. + */ +int +xfs_alloc_vextent_this_ag( + struct xfs_alloc_arg *args, + xfs_agnumber_t agno) +{ + struct xfs_mount *mp = args->mp; + xfs_agnumber_t minimum_agno; + int error; + + args->agno = agno; + args->agbno = 0; + error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0), + &minimum_agno); + if (error) { + if (error == -ENOSPC) + return 0; + return error; + } + + error = xfs_alloc_vextent_prepare_ag(args); + if (!error && args->agbp) + error = xfs_alloc_ag_vextent_size(args); + + return xfs_alloc_vextent_finish(args, minimum_agno, error, false); +} + +/* + * Iterate all AGs trying to allocate an extent starting from @start_ag. + * + * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the + * allocation attempts in @start_agno have locality information. If we fail to + * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs + * we attempt to allocation in as there is no locality optimisation possible for + * those allocations. + * + * On return, args->pag may be left referenced if we finish before the "all + * failed" return point. The allocation finish still needs the perag, and + * so the caller will release it once they've finished the allocation. + * + * When we wrap the AG iteration at the end of the filesystem, we have to be + * careful not to wrap into AGs below ones we already have locked in the + * transaction if we are doing a blocking iteration. This will result in an + * out-of-order locking of AGFs and hence can cause deadlocks. + */ +static int +xfs_alloc_vextent_iterate_ags( + struct xfs_alloc_arg *args, + xfs_agnumber_t minimum_agno, + xfs_agnumber_t start_agno, + xfs_agblock_t target_agbno, + uint32_t flags) +{ + struct xfs_mount *mp = args->mp; + xfs_agnumber_t agno; + int error = 0; + +restart: + for_each_perag_wrap_range(mp, start_agno, minimum_agno, + mp->m_sb.sb_agcount, agno, args->pag) { + args->agno = agno; + error = xfs_alloc_vextent_prepare_ag(args); + if (error) break; + if (!args->agbp) { + trace_xfs_alloc_vextent_loopfailed(args); + continue; } - args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); - if ((error = xfs_alloc_ag_vextent(args))) - goto error0; - break; - case XFS_ALLOCTYPE_START_BNO: - /* - * Try near allocation first, then anywhere-in-ag after - * the first a.g. fails. - */ - if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && - xfs_is_inode32(mp)) { - args->fsbno = XFS_AGB_TO_FSB(mp, - ((mp->m_agfrotor / rotorstep) % - mp->m_sb.sb_agcount), 0); - bump_rotor = 1; - } - args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); - args->type = XFS_ALLOCTYPE_NEAR_BNO; - fallthrough; - case XFS_ALLOCTYPE_FIRST_AG: + /* - * Rotate through the allocation groups looking for a winner. + * Allocation is supposed to succeed now, so break out of the + * loop regardless of whether we succeed or not. */ - if (type == XFS_ALLOCTYPE_FIRST_AG) { - /* - * Start with allocation group given by bno. - */ - args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); - args->type = XFS_ALLOCTYPE_THIS_AG; - sagno = 0; - flags = 0; + if (args->agno == start_agno && target_agbno) { + args->agbno = target_agbno; + error = xfs_alloc_ag_vextent_near(args); } else { - /* - * Start with the given allocation group. - */ - args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); - flags = XFS_ALLOC_FLAG_TRYLOCK; - } - /* - * Loop over allocation groups twice; first time with - * trylock set, second time without. - */ - for (;;) { - args->pag = xfs_perag_get(mp, args->agno); - error = xfs_alloc_fix_freelist(args, flags); - if (error) { - trace_xfs_alloc_vextent_nofix(args); - goto error0; - } - /* - * If we get a buffer back then the allocation will fly. - */ - if (args->agbp) { - if ((error = xfs_alloc_ag_vextent(args))) - goto error0; - break; - } - - trace_xfs_alloc_vextent_loopfailed(args); - - /* - * Didn't work, figure out the next iteration. - */ - if (args->agno == sagno && - type == XFS_ALLOCTYPE_START_BNO) - args->type = XFS_ALLOCTYPE_THIS_AG; - /* - * For the first allocation, we can try any AG to get - * space. However, if we already have allocated a - * block, we don't want to try AGs whose number is below - * sagno. Otherwise, we may end up with out-of-order - * locking of AGF, which might cause deadlock. - */ - if (++(args->agno) == mp->m_sb.sb_agcount) { - if (args->tp->t_firstblock != NULLFSBLOCK) - args->agno = sagno; - else - args->agno = 0; - } - /* - * Reached the starting a.g., must either be done - * or switch to non-trylock mode. - */ - if (args->agno == sagno) { - if (flags == 0) { - args->agbno = NULLAGBLOCK; - trace_xfs_alloc_vextent_allfailed(args); - break; - } - - flags = 0; - if (type == XFS_ALLOCTYPE_START_BNO) { - args->agbno = XFS_FSB_TO_AGBNO(mp, - args->fsbno); - args->type = XFS_ALLOCTYPE_NEAR_BNO; - } - } - xfs_perag_put(args->pag); - } - if (bump_rotor) { - if (args->agno == sagno) - mp->m_agfrotor = (mp->m_agfrotor + 1) % - (mp->m_sb.sb_agcount * rotorstep); - else - mp->m_agfrotor = (args->agno * rotorstep + 1) % - (mp->m_sb.sb_agcount * rotorstep); + args->agbno = 0; + error = xfs_alloc_ag_vextent_size(args); } break; - default: - ASSERT(0); - /* NOTREACHED */ } - if (args->agbno == NULLAGBLOCK) - args->fsbno = NULLFSBLOCK; - else { - args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno); -#ifdef DEBUG - ASSERT(args->len >= args->minlen); - ASSERT(args->len <= args->maxlen); - ASSERT(args->agbno % args->alignment == 0); - XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), - args->len); -#endif + if (error) { + xfs_perag_rele(args->pag); + args->pag = NULL; + return error; + } + if (args->agbp) + return 0; + /* + * We didn't find an AG we can alloation from. If we were given + * constraining flags by the caller, drop them and retry the allocation + * without any constraints being set. + */ + if (flags) { + flags = 0; + goto restart; } - xfs_perag_put(args->pag); + + ASSERT(args->pag == NULL); + trace_xfs_alloc_vextent_allfailed(args); return 0; -error0: - xfs_perag_put(args->pag); - return error; +} + +/* + * Iterate from the AGs from the start AG to the end of the filesystem, trying + * to allocate blocks. It starts with a near allocation attempt in the initial + * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap + * back to zero if allowed by previous allocations in this transaction, + * otherwise will wrap back to the start AG and run a second blocking pass to + * the end of the filesystem. + */ +int +xfs_alloc_vextent_start_ag( + struct xfs_alloc_arg *args, + xfs_fsblock_t target) +{ + struct xfs_mount *mp = args->mp; + xfs_agnumber_t minimum_agno; + xfs_agnumber_t start_agno; + xfs_agnumber_t rotorstep = xfs_rotorstep; + bool bump_rotor = false; + int error; + + args->agno = NULLAGNUMBER; + args->agbno = NULLAGBLOCK; + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); + if (error) { + if (error == -ENOSPC) + return 0; + return error; + } + + if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) && + xfs_is_inode32(mp)) { + target = XFS_AGB_TO_FSB(mp, + ((mp->m_agfrotor / rotorstep) % + mp->m_sb.sb_agcount), 0); + bump_rotor = 1; + } + + start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target)); + error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno, + XFS_FSB_TO_AGBNO(mp, target), XFS_ALLOC_FLAG_TRYLOCK); + + if (bump_rotor) { + if (args->agno == start_agno) + mp->m_agfrotor = (mp->m_agfrotor + 1) % + (mp->m_sb.sb_agcount * rotorstep); + else + mp->m_agfrotor = (args->agno * rotorstep + 1) % + (mp->m_sb.sb_agcount * rotorstep); + } + + return xfs_alloc_vextent_finish(args, minimum_agno, error, true); +} + +/* + * Iterate from the agno indicated via @target through to the end of the + * filesystem attempting blocking allocation. This does not wrap or try a second + * pass, so will not recurse into AGs lower than indicated by the target. + */ +int +xfs_alloc_vextent_first_ag( + struct xfs_alloc_arg *args, + xfs_fsblock_t target) + { + struct xfs_mount *mp = args->mp; + xfs_agnumber_t minimum_agno; + xfs_agnumber_t start_agno; + int error; + + args->agno = NULLAGNUMBER; + args->agbno = NULLAGBLOCK; + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); + if (error) { + if (error == -ENOSPC) + return 0; + return error; + } + + start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target)); + error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno, + XFS_FSB_TO_AGBNO(mp, target), 0); + return xfs_alloc_vextent_finish(args, minimum_agno, error, true); +} + +/* + * Allocate at the exact block target or fail. Caller is expected to hold a + * perag reference in args->pag. + */ +int +xfs_alloc_vextent_exact_bno( + struct xfs_alloc_arg *args, + xfs_fsblock_t target) +{ + struct xfs_mount *mp = args->mp; + xfs_agnumber_t minimum_agno; + int error; + + args->agno = XFS_FSB_TO_AGNO(mp, target); + args->agbno = XFS_FSB_TO_AGBNO(mp, target); + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); + if (error) { + if (error == -ENOSPC) + return 0; + return error; + } + + error = xfs_alloc_vextent_prepare_ag(args); + if (!error && args->agbp) + error = xfs_alloc_ag_vextent_exact(args); + + return xfs_alloc_vextent_finish(args, minimum_agno, error, false); +} + +/* + * Allocate an extent as close to the target as possible. If there are not + * viable candidates in the AG, then fail the allocation. + * + * Caller may or may not have a per-ag reference in args->pag. + */ +int +xfs_alloc_vextent_near_bno( + struct xfs_alloc_arg *args, + xfs_fsblock_t target) +{ + struct xfs_mount *mp = args->mp; + xfs_agnumber_t minimum_agno; + bool needs_perag = args->pag == NULL; + int error; + + args->agno = XFS_FSB_TO_AGNO(mp, target); + args->agbno = XFS_FSB_TO_AGBNO(mp, target); + error = xfs_alloc_vextent_check_args(args, target, &minimum_agno); + if (error) { + if (error == -ENOSPC) + return 0; + return error; + } + + if (needs_perag) + args->pag = xfs_perag_grab(mp, args->agno); + + error = xfs_alloc_vextent_prepare_ag(args); + if (!error && args->agbp) + error = xfs_alloc_ag_vextent_near(args); + + return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag); } /* Ensure that the freelist is at full capacity. */ diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 2c3f762dfb58..2b246d74c189 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -17,25 +17,6 @@ extern struct workqueue_struct *xfs_alloc_wq; unsigned int xfs_agfl_size(struct xfs_mount *mp); /* - * Freespace allocation types. Argument to xfs_alloc_[v]extent. - */ -#define XFS_ALLOCTYPE_FIRST_AG 0x02 /* ... start at ag 0 */ -#define XFS_ALLOCTYPE_THIS_AG 0x08 /* anywhere in this a.g. */ -#define XFS_ALLOCTYPE_START_BNO 0x10 /* near this block else anywhere */ -#define XFS_ALLOCTYPE_NEAR_BNO 0x20 /* in this a.g. and near this block */ -#define XFS_ALLOCTYPE_THIS_BNO 0x40 /* at exactly this block */ - -/* this should become an enum again when the tracing code is fixed */ -typedef unsigned int xfs_alloctype_t; - -#define XFS_ALLOC_TYPES \ - { XFS_ALLOCTYPE_FIRST_AG, "FIRST_AG" }, \ - { XFS_ALLOCTYPE_THIS_AG, "THIS_AG" }, \ - { XFS_ALLOCTYPE_START_BNO, "START_BNO" }, \ - { XFS_ALLOCTYPE_NEAR_BNO, "NEAR_BNO" }, \ - { XFS_ALLOCTYPE_THIS_BNO, "THIS_BNO" } - -/* * Flags for xfs_alloc_fix_freelist. */ #define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ @@ -68,8 +49,6 @@ typedef struct xfs_alloc_arg { xfs_agblock_t min_agbno; /* set an agbno range for NEAR allocs */ xfs_agblock_t max_agbno; /* ... */ xfs_extlen_t len; /* output: actual size of extent */ - xfs_alloctype_t type; /* allocation type XFS_ALLOCTYPE_... */ - xfs_alloctype_t otype; /* original allocation type */ int datatype; /* mask defining data type treatment */ char wasdel; /* set if allocation was prev delayed */ char wasfromfl; /* set if allocation is from freelist */ @@ -118,11 +97,43 @@ xfs_alloc_log_agf( uint32_t fields);/* mask of fields to be logged (XFS_AGF_...) */ /* - * Allocate an extent (variable-size). + * Allocate an extent anywhere in the specific AG given. If there is no + * space matching the requirements in that AG, then the allocation will fail. */ -int /* error */ -xfs_alloc_vextent( - xfs_alloc_arg_t *args); /* allocation argument structure */ +int xfs_alloc_vextent_this_ag(struct xfs_alloc_arg *args, xfs_agnumber_t agno); + +/* + * Allocate an extent as close to the target as possible. If there are not + * viable candidates in the AG, then fail the allocation. + */ +int xfs_alloc_vextent_near_bno(struct xfs_alloc_arg *args, + xfs_fsblock_t target); + +/* + * Allocate an extent exactly at the target given. If this is not possible + * then the allocation fails. + */ +int xfs_alloc_vextent_exact_bno(struct xfs_alloc_arg *args, + xfs_fsblock_t target); + +/* + * Best effort full filesystem allocation scan. + * + * Locality aware allocation will be attempted in the initial AG, but on failure + * non-localised attempts will be made. The AGs are constrained by previous + * allocations in the current transaction. Two passes will be made - the first + * non-blocking, the second blocking. + */ +int xfs_alloc_vextent_start_ag(struct xfs_alloc_arg *args, + xfs_fsblock_t target); + +/* + * Iterate from the AG indicated from args->fsbno through to the end of the + * filesystem attempting blocking allocation. This is for use in last + * resort allocation attempts when everything else has failed. + */ +int xfs_alloc_vextent_first_ag(struct xfs_alloc_arg *args, + xfs_fsblock_t target); /* * Free an extent. diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 549a3cba0234..0f29c7b1b39f 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -315,7 +315,7 @@ xfs_allocbt_verify( level = be16_to_cpu(block->bb_level); if (bp->b_ops->magic[0] == cpu_to_be32(XFS_ABTC_MAGIC)) btnum = XFS_BTNUM_CNTi; - if (pag && pag->pagf_init) { + if (pag && xfs_perag_initialised_agf(pag)) { if (level >= pag->pagf_levels[btnum]) return __this_address; } else if (level >= mp->m_alloc_maxlevels) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 0d56a8d862e8..34de6e6898c4 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -645,34 +645,23 @@ xfs_bmap_extents_to_btree( args.tp = tp; args.mp = mp; xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork); - if (tp->t_firstblock == NULLFSBLOCK) { - args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); - } else if (tp->t_flags & XFS_TRANS_LOWMODE) { - args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = tp->t_firstblock; - } else { - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.fsbno = tp->t_firstblock; - } + args.minlen = args.maxlen = args.prod = 1; args.wasdel = wasdel; *logflagsp = 0; - error = xfs_alloc_vextent(&args); + error = xfs_alloc_vextent_start_ag(&args, + XFS_INO_TO_FSB(mp, ip->i_ino)); if (error) goto out_root_realloc; + /* + * Allocation can't fail, the space was reserved. + */ if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { error = -ENOSPC; goto out_root_realloc; } - /* - * Allocation can't fail, the space was reserved. - */ - ASSERT(tp->t_firstblock == NULLFSBLOCK || - args.agno >= XFS_FSB_TO_AGNO(mp, tp->t_firstblock)); - tp->t_firstblock = args.fsbno; cur->bc_ino.allocated++; ip->i_nblocks++; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); @@ -799,28 +788,24 @@ xfs_bmap_local_to_extents( memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = ip->i_mount; + args.total = total; + args.minlen = args.maxlen = args.prod = 1; xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0); + /* * Allocate a block. We know we need only one, since the * file currently fits in an inode. */ - if (tp->t_firstblock == NULLFSBLOCK) { - args.fsbno = XFS_INO_TO_FSB(args.mp, ip->i_ino); - args.type = XFS_ALLOCTYPE_START_BNO; - } else { - args.fsbno = tp->t_firstblock; - args.type = XFS_ALLOCTYPE_NEAR_BNO; - } args.total = total; args.minlen = args.maxlen = args.prod = 1; - error = xfs_alloc_vextent(&args); + error = xfs_alloc_vextent_start_ag(&args, + XFS_INO_TO_FSB(args.mp, ip->i_ino)); if (error) goto done; /* Can't fail, the space was reserved. */ ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(args.len == 1); - tp->t_firstblock = args.fsbno; error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp, XFS_FSB_TO_DADDR(args.mp, args.fsbno), args.mp->m_bsize, 0, &bp); @@ -854,8 +839,7 @@ xfs_bmap_local_to_extents( ifp->if_nextents = 1; ip->i_nblocks = 1; - xfs_trans_mod_dquot_byino(tp, ip, - XFS_TRANS_DQ_BCOUNT, 1L); + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L); flags |= xfs_ilog_fext(whichfork); done: @@ -3025,9 +3009,7 @@ xfs_bmap_adjacent( struct xfs_bmalloca *ap) /* bmap alloc argument struct */ { xfs_fsblock_t adjust; /* adjustment to block numbers */ - xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ xfs_mount_t *mp; /* mount point structure */ - int nullfb; /* true if ap->firstblock isn't set */ int rt; /* true if inode is realtime */ #define ISVALID(x,y) \ @@ -3038,11 +3020,8 @@ xfs_bmap_adjacent( XFS_FSB_TO_AGBNO(mp, x) < mp->m_sb.sb_agblocks) mp = ap->ip->i_mount; - nullfb = ap->tp->t_firstblock == NULLFSBLOCK; rt = XFS_IS_REALTIME_INODE(ap->ip) && (ap->datatype & XFS_ALLOC_USERDATA); - fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, - ap->tp->t_firstblock); /* * If allocating at eof, and there's a previous real block, * try to use its last block as our starting point. @@ -3101,13 +3080,6 @@ xfs_bmap_adjacent( prevbno += adjust; else prevdiff += adjust; - /* - * If the firstblock forbids it, can't use it, - * must use default. - */ - if (!rt && !nullfb && - XFS_FSB_TO_AGNO(mp, prevbno) != fb_agno) - prevbno = NULLFSBLOCK; } /* * No previous block or can't follow it, just default. @@ -3143,13 +3115,6 @@ xfs_bmap_adjacent( gotdiff += adjust - ap->length; } else gotdiff += adjust; - /* - * If the firstblock forbids it, can't use it, - * must use default. - */ - if (!rt && !nullfb && - XFS_FSB_TO_AGNO(mp, gotbno) != fb_agno) - gotbno = NULLFSBLOCK; } /* * No next block, just default. @@ -3170,147 +3135,91 @@ xfs_bmap_adjacent( #undef ISVALID } -static int +int xfs_bmap_longest_free_extent( + struct xfs_perag *pag, struct xfs_trans *tp, - xfs_agnumber_t ag, - xfs_extlen_t *blen, - int *notinit) + xfs_extlen_t *blen) { - struct xfs_mount *mp = tp->t_mountp; - struct xfs_perag *pag; xfs_extlen_t longest; int error = 0; - pag = xfs_perag_get(mp, ag); - if (!pag->pagf_init) { + if (!xfs_perag_initialised_agf(pag)) { error = xfs_alloc_read_agf(pag, tp, XFS_ALLOC_FLAG_TRYLOCK, NULL); - if (error) { - /* Couldn't lock the AGF, so skip this AG. */ - if (error == -EAGAIN) { - *notinit = 1; - error = 0; - } - goto out; - } + if (error) + return error; } longest = xfs_alloc_longest_free_extent(pag, - xfs_alloc_min_freelist(mp, pag), + xfs_alloc_min_freelist(pag->pag_mount, pag), xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); if (*blen < longest) *blen = longest; -out: - xfs_perag_put(pag); - return error; + return 0; } -static void +static xfs_extlen_t xfs_bmap_select_minlen( struct xfs_bmalloca *ap, struct xfs_alloc_arg *args, - xfs_extlen_t *blen, - int notinit) + xfs_extlen_t blen) { - if (notinit || *blen < ap->minlen) { - /* - * Since we did a BUF_TRYLOCK above, it is possible that - * there is space for this request. - */ - args->minlen = ap->minlen; - } else if (*blen < args->maxlen) { - /* - * If the best seen length is less than the request length, - * use the best as the minimum. - */ - args->minlen = *blen; - } else { - /* - * Otherwise we've seen an extent as big as maxlen, use that - * as the minimum. - */ - args->minlen = args->maxlen; - } -} - -STATIC int -xfs_bmap_btalloc_nullfb( - struct xfs_bmalloca *ap, - struct xfs_alloc_arg *args, - xfs_extlen_t *blen) -{ - struct xfs_mount *mp = ap->ip->i_mount; - xfs_agnumber_t ag, startag; - int notinit = 0; - int error; - - args->type = XFS_ALLOCTYPE_START_BNO; - args->total = ap->total; - startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno); - if (startag == NULLAGNUMBER) - startag = ag = 0; - - while (*blen < args->maxlen) { - error = xfs_bmap_longest_free_extent(args->tp, ag, blen, - ¬init); - if (error) - return error; - - if (++ag == mp->m_sb.sb_agcount) - ag = 0; - if (ag == startag) - break; - } + /* + * Since we used XFS_ALLOC_FLAG_TRYLOCK in _longest_free_extent(), it is + * possible that there is enough contiguous free space for this request. + */ + if (blen < ap->minlen) + return ap->minlen; - xfs_bmap_select_minlen(ap, args, blen, notinit); - return 0; + /* + * If the best seen length is less than the request length, + * use the best as the minimum, otherwise we've got the maxlen we + * were asked for. + */ + if (blen < args->maxlen) + return blen; + return args->maxlen; } -STATIC int -xfs_bmap_btalloc_filestreams( +static int +xfs_bmap_btalloc_select_lengths( struct xfs_bmalloca *ap, struct xfs_alloc_arg *args, xfs_extlen_t *blen) { - struct xfs_mount *mp = ap->ip->i_mount; - xfs_agnumber_t ag; - int notinit = 0; - int error; - - args->type = XFS_ALLOCTYPE_NEAR_BNO; - args->total = ap->total; - - ag = XFS_FSB_TO_AGNO(mp, args->fsbno); - if (ag == NULLAGNUMBER) - ag = 0; - - error = xfs_bmap_longest_free_extent(args->tp, ag, blen, ¬init); - if (error) - return error; + struct xfs_mount *mp = args->mp; + struct xfs_perag *pag; + xfs_agnumber_t agno, startag; + int error = 0; - if (*blen < args->maxlen) { - error = xfs_filestream_new_ag(ap, &ag); - if (error) - return error; + if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { + args->total = ap->minlen; + args->minlen = ap->minlen; + return 0; + } - error = xfs_bmap_longest_free_extent(args->tp, ag, blen, - ¬init); - if (error) - return error; + args->total = ap->total; + startag = XFS_FSB_TO_AGNO(mp, ap->blkno); + if (startag == NULLAGNUMBER) + startag = 0; + *blen = 0; + for_each_perag_wrap(mp, startag, agno, pag) { + error = xfs_bmap_longest_free_extent(pag, args->tp, blen); + if (error && error != -EAGAIN) + break; + error = 0; + if (*blen >= args->maxlen) + break; } + if (pag) + xfs_perag_rele(pag); - xfs_bmap_select_minlen(ap, args, blen, notinit); - - /* - * Set the failure fallback case to look in the selected AG as stream - * may have moved. - */ - ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); - return 0; + args->minlen = xfs_bmap_select_minlen(ap, args, *blen); + return error; } /* Update all inode and quota accounting for the allocation we just did. */ @@ -3413,21 +3322,7 @@ xfs_bmap_process_allocated_extent( xfs_fileoff_t orig_offset, xfs_extlen_t orig_length) { - int nullfb; - - nullfb = ap->tp->t_firstblock == NULLFSBLOCK; - - /* - * check the allocation happened at the same or higher AG than - * the first block that was allocated. - */ - ASSERT(nullfb || - XFS_FSB_TO_AGNO(args->mp, ap->tp->t_firstblock) <= - XFS_FSB_TO_AGNO(args->mp, args->fsbno)); - ap->blkno = args->fsbno; - if (nullfb) - ap->tp->t_firstblock = args->fsbno; ap->length = args->len; /* * If the extent size hint is active, we tried to round the @@ -3474,23 +3369,17 @@ xfs_bmap_exact_minlen_extent_alloc( xfs_bmap_compute_alignments(ap, &args); - if (ap->tp->t_firstblock == NULLFSBLOCK) { - /* - * Unlike the longest extent available in an AG, we don't track - * the length of an AG's shortest extent. - * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and - * hence we can afford to start traversing from the 0th AG since - * we need not be concerned about a drop in performance in - * "debug only" code paths. - */ - ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0); - } else { - ap->blkno = ap->tp->t_firstblock; - } + /* + * Unlike the longest extent available in an AG, we don't track + * the length of an AG's shortest extent. + * XFS_ERRTAG_BMAP_ALLOC_MINLEN_EXTENT is a debug only knob and + * hence we can afford to start traversing from the 0th AG since + * we need not be concerned about a drop in performance in + * "debug only" code paths. + */ + ap->blkno = XFS_AGB_TO_FSB(mp, 0, 0); - args.fsbno = ap->blkno; args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; - args.type = XFS_ALLOCTYPE_FIRST_AG; args.minlen = args.maxlen = ap->minlen; args.total = ap->total; @@ -3502,7 +3391,7 @@ xfs_bmap_exact_minlen_extent_alloc( args.resv = XFS_AG_RESV_NONE; args.datatype = ap->datatype; - error = xfs_alloc_vextent(&args); + error = xfs_alloc_vextent_first_ag(&args, ap->blkno); if (error) return error; @@ -3522,193 +3411,270 @@ xfs_bmap_exact_minlen_extent_alloc( #endif -STATIC int -xfs_bmap_btalloc( - struct xfs_bmalloca *ap) +/* + * If we are not low on available data blocks and we are allocating at + * EOF, optimise allocation for contiguous file extension and/or stripe + * alignment of the new extent. + * + * NOTE: ap->aeof is only set if the allocation length is >= the + * stripe unit and the allocation offset is at the end of file. + */ +static int +xfs_bmap_btalloc_at_eof( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_extlen_t blen, + int stripe_align, + bool ag_only) { - struct xfs_mount *mp = ap->ip->i_mount; - struct xfs_alloc_arg args = { .tp = ap->tp, .mp = mp }; - xfs_alloctype_t atype = 0; - xfs_agnumber_t fb_agno; /* ag number of ap->firstblock */ - xfs_agnumber_t ag; - xfs_fileoff_t orig_offset; - xfs_extlen_t orig_length; - xfs_extlen_t blen; - xfs_extlen_t nextminlen = 0; - int nullfb; /* true if ap->firstblock isn't set */ - int isaligned; - int tryagain; + struct xfs_mount *mp = args->mp; + struct xfs_perag *caller_pag = args->pag; int error; - int stripe_align; - - ASSERT(ap->length); - orig_offset = ap->offset; - orig_length = ap->length; - - stripe_align = xfs_bmap_compute_alignments(ap, &args); - - nullfb = ap->tp->t_firstblock == NULLFSBLOCK; - fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, - ap->tp->t_firstblock); - if (nullfb) { - if ((ap->datatype & XFS_ALLOC_USERDATA) && - xfs_inode_is_filestream(ap->ip)) { - ag = xfs_filestream_lookup_ag(ap->ip); - ag = (ag != NULLAGNUMBER) ? ag : 0; - ap->blkno = XFS_AGB_TO_FSB(mp, ag, 0); - } else { - ap->blkno = XFS_INO_TO_FSB(mp, ap->ip->i_ino); - } - } else - ap->blkno = ap->tp->t_firstblock; - - xfs_bmap_adjacent(ap); /* - * If allowed, use ap->blkno; otherwise must use firstblock since - * it's in the right allocation group. - */ - if (nullfb || XFS_FSB_TO_AGNO(mp, ap->blkno) == fb_agno) - ; - else - ap->blkno = ap->tp->t_firstblock; - /* - * Normal allocation, done through xfs_alloc_vextent. + * If there are already extents in the file, try an exact EOF block + * allocation to extend the file as a contiguous extent. If that fails, + * or it's the first allocation in a file, just try for a stripe aligned + * allocation. */ - tryagain = isaligned = 0; - args.fsbno = ap->blkno; - args.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE; + if (ap->offset) { + xfs_extlen_t nextminlen = 0; - /* Trim the allocation back to the maximum an AG can fit. */ - args.maxlen = min(ap->length, mp->m_ag_max_usable); - blen = 0; - if (nullfb) { /* - * Search for an allocation group with a single extent large - * enough for the request. If one isn't found, then adjust - * the minimum allocation size to the largest space found. + * Compute the minlen+alignment for the next case. Set slop so + * that the value of minlen+alignment+slop doesn't go up between + * the calls. */ - if ((ap->datatype & XFS_ALLOC_USERDATA) && - xfs_inode_is_filestream(ap->ip)) - error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); + args->alignment = 1; + if (blen > stripe_align && blen <= args->maxlen) + nextminlen = blen - stripe_align; else - error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); + nextminlen = args->minlen; + if (nextminlen + stripe_align > args->minlen + 1) + args->minalignslop = nextminlen + stripe_align - + args->minlen - 1; + else + args->minalignslop = 0; + + if (!caller_pag) + args->pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ap->blkno)); + error = xfs_alloc_vextent_exact_bno(args, ap->blkno); + if (!caller_pag) + xfs_perag_put(args->pag); if (error) return error; - } else if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { - if (xfs_inode_is_filestream(ap->ip)) - args.type = XFS_ALLOCTYPE_FIRST_AG; - else - args.type = XFS_ALLOCTYPE_START_BNO; - args.total = args.minlen = ap->minlen; + + if (args->fsbno != NULLFSBLOCK) + return 0; + /* + * Exact allocation failed. Reset to try an aligned allocation + * according to the original allocation specification. + */ + args->pag = NULL; + args->alignment = stripe_align; + args->minlen = nextminlen; + args->minalignslop = 0; } else { - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.total = ap->total; - args.minlen = ap->minlen; + /* + * Adjust minlen to try and preserve alignment if we + * can't guarantee an aligned maxlen extent. + */ + args->alignment = stripe_align; + if (blen > args->alignment && + blen <= args->maxlen + args->alignment) + args->minlen = blen - args->alignment; + args->minalignslop = 0; } - /* - * If we are not low on available data blocks, and the underlying - * logical volume manager is a stripe, and the file offset is zero then - * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof - * is only set if the allocation length is >= the stripe unit and the - * allocation offset is at the end of file. - */ - if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) { - if (!ap->offset) { - args.alignment = stripe_align; - atype = args.type; - isaligned = 1; - /* - * Adjust minlen to try and preserve alignment if we - * can't guarantee an aligned maxlen extent. - */ - if (blen > args.alignment && - blen <= args.maxlen + args.alignment) - args.minlen = blen - args.alignment; - args.minalignslop = 0; - } else { - /* - * First try an exact bno allocation. - * If it fails then do a near or start bno - * allocation with alignment turned on. - */ - atype = args.type; - tryagain = 1; - args.type = XFS_ALLOCTYPE_THIS_BNO; - args.alignment = 1; - /* - * Compute the minlen+alignment for the - * next case. Set slop so that the value - * of minlen+alignment+slop doesn't go up - * between the calls. - */ - if (blen > stripe_align && blen <= args.maxlen) - nextminlen = blen - stripe_align; - else - nextminlen = args.minlen; - if (nextminlen + stripe_align > args.minlen + 1) - args.minalignslop = - nextminlen + stripe_align - - args.minlen - 1; - else - args.minalignslop = 0; - } + if (ag_only) { + error = xfs_alloc_vextent_near_bno(args, ap->blkno); } else { - args.alignment = 1; - args.minalignslop = 0; + args->pag = NULL; + error = xfs_alloc_vextent_start_ag(args, ap->blkno); + ASSERT(args->pag == NULL); + args->pag = caller_pag; } - args.minleft = ap->minleft; - args.wasdel = ap->wasdel; - args.resv = XFS_AG_RESV_NONE; - args.datatype = ap->datatype; - - error = xfs_alloc_vextent(&args); if (error) return error; - if (tryagain && args.fsbno == NULLFSBLOCK) { - /* - * Exact allocation failed. Now try with alignment - * turned on. - */ - args.type = atype; - args.fsbno = ap->blkno; - args.alignment = stripe_align; - args.minlen = nextminlen; - args.minalignslop = 0; - isaligned = 1; - if ((error = xfs_alloc_vextent(&args))) - return error; - } - if (isaligned && args.fsbno == NULLFSBLOCK) { - /* - * allocation failed, so turn off alignment and - * try again. - */ - args.type = atype; - args.fsbno = ap->blkno; - args.alignment = 0; - if ((error = xfs_alloc_vextent(&args))) + if (args->fsbno != NULLFSBLOCK) + return 0; + + /* + * Allocation failed, so turn return the allocation args to their + * original non-aligned state so the caller can proceed on allocation + * failure as if this function was never called. + */ + args->fsbno = ap->blkno; + args->alignment = 1; + return 0; +} + +/* + * We have failed multiple allocation attempts so now are in a low space + * allocation situation. Try a locality first full filesystem minimum length + * allocation whilst still maintaining necessary total block reservation + * requirements. + * + * If that fails, we are now critically low on space, so perform a last resort + * allocation attempt: no reserve, no locality, blocking, minimum length, full + * filesystem free space scan. We also indicate to future allocations in this + * transaction that we are critically low on space so they don't waste time on + * allocation modes that are unlikely to succeed. + */ +int +xfs_bmap_btalloc_low_space( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args) +{ + int error; + + if (args->minlen > ap->minlen) { + args->minlen = ap->minlen; + error = xfs_alloc_vextent_start_ag(args, ap->blkno); + if (error || args->fsbno != NULLFSBLOCK) return error; } - if (args.fsbno == NULLFSBLOCK && nullfb && - args.minlen > ap->minlen) { - args.minlen = ap->minlen; - args.type = XFS_ALLOCTYPE_START_BNO; - args.fsbno = ap->blkno; - if ((error = xfs_alloc_vextent(&args))) - return error; + + /* Last ditch attempt before failure is declared. */ + args->total = ap->minlen; + error = xfs_alloc_vextent_first_ag(args, 0); + if (error) + return error; + ap->tp->t_flags |= XFS_TRANS_LOWMODE; + return 0; +} + +static int +xfs_bmap_btalloc_filestreams( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + int stripe_align) +{ + xfs_extlen_t blen = 0; + int error = 0; + + + error = xfs_filestream_select_ag(ap, args, &blen); + if (error) + return error; + ASSERT(args->pag); + + /* + * If we are in low space mode, then optimal allocation will fail so + * prepare for minimal allocation and jump to the low space algorithm + * immediately. + */ + if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { + args->minlen = ap->minlen; + ASSERT(args->fsbno == NULLFSBLOCK); + goto out_low_space; } - if (args.fsbno == NULLFSBLOCK && nullfb) { - args.fsbno = 0; - args.type = XFS_ALLOCTYPE_FIRST_AG; - args.total = ap->minlen; - if ((error = xfs_alloc_vextent(&args))) + + args->minlen = xfs_bmap_select_minlen(ap, args, blen); + if (ap->aeof) + error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align, + true); + + if (!error && args->fsbno == NULLFSBLOCK) + error = xfs_alloc_vextent_near_bno(args, ap->blkno); + +out_low_space: + /* + * We are now done with the perag reference for the filestreams + * association provided by xfs_filestream_select_ag(). Release it now as + * we've either succeeded, had a fatal error or we are out of space and + * need to do a full filesystem scan for free space which will take it's + * own references. + */ + xfs_perag_rele(args->pag); + args->pag = NULL; + if (error || args->fsbno != NULLFSBLOCK) + return error; + + return xfs_bmap_btalloc_low_space(ap, args); +} + +static int +xfs_bmap_btalloc_best_length( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + int stripe_align) +{ + xfs_extlen_t blen = 0; + int error; + + ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino); + xfs_bmap_adjacent(ap); + + /* + * Search for an allocation group with a single extent large enough for + * the request. If one isn't found, then adjust the minimum allocation + * size to the largest space found. + */ + error = xfs_bmap_btalloc_select_lengths(ap, args, &blen); + if (error) + return error; + + /* + * Don't attempt optimal EOF allocation if previous allocations barely + * succeeded due to being near ENOSPC. It is highly unlikely we'll get + * optimal or even aligned allocations in this case, so don't waste time + * trying. + */ + if (ap->aeof && !(ap->tp->t_flags & XFS_TRANS_LOWMODE)) { + error = xfs_bmap_btalloc_at_eof(ap, args, blen, stripe_align, + false); + if (error || args->fsbno != NULLFSBLOCK) return error; - ap->tp->t_flags |= XFS_TRANS_LOWMODE; } + error = xfs_alloc_vextent_start_ag(args, ap->blkno); + if (error || args->fsbno != NULLFSBLOCK) + return error; + + return xfs_bmap_btalloc_low_space(ap, args); +} + +static int +xfs_bmap_btalloc( + struct xfs_bmalloca *ap) +{ + struct xfs_mount *mp = ap->ip->i_mount; + struct xfs_alloc_arg args = { + .tp = ap->tp, + .mp = mp, + .fsbno = NULLFSBLOCK, + .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, + .minleft = ap->minleft, + .wasdel = ap->wasdel, + .resv = XFS_AG_RESV_NONE, + .datatype = ap->datatype, + .alignment = 1, + .minalignslop = 0, + }; + xfs_fileoff_t orig_offset; + xfs_extlen_t orig_length; + int error; + int stripe_align; + + ASSERT(ap->length); + orig_offset = ap->offset; + orig_length = ap->length; + + stripe_align = xfs_bmap_compute_alignments(ap, &args); + + /* Trim the allocation back to the maximum an AG can fit. */ + args.maxlen = min(ap->length, mp->m_ag_max_usable); + + if ((ap->datatype & XFS_ALLOC_USERDATA) && + xfs_inode_is_filestream(ap->ip)) + error = xfs_bmap_btalloc_filestreams(ap, &args, stripe_align); + else + error = xfs_bmap_btalloc_best_length(ap, &args, stripe_align); + if (error) + return error; + if (args.fsbno != NULLFSBLOCK) { xfs_bmap_process_allocated_extent(ap, &args, orig_offset, orig_length); @@ -4256,7 +4222,7 @@ xfs_bmapi_convert_unwritten( return 0; } -static inline xfs_extlen_t +xfs_extlen_t xfs_bmapi_minleft( struct xfs_trans *tp, struct xfs_inode *ip, @@ -4264,7 +4230,7 @@ xfs_bmapi_minleft( { struct xfs_ifork *ifp = xfs_ifork_ptr(ip, fork); - if (tp && tp->t_firstblock != NULLFSBLOCK) + if (tp && tp->t_highest_agno != NULLAGNUMBER) return 0; if (ifp->if_format != XFS_DINODE_FMT_BTREE) return 1; @@ -6146,39 +6112,37 @@ xfs_bmap_unmap_extent( int xfs_bmap_finish_one( struct xfs_trans *tp, - struct xfs_inode *ip, - enum xfs_bmap_intent_type type, - int whichfork, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t *blockcount, - xfs_exntst_t state) + struct xfs_bmap_intent *bi) { + struct xfs_bmbt_irec *bmap = &bi->bi_bmap; int error = 0; - ASSERT(tp->t_firstblock == NULLFSBLOCK); + ASSERT(tp->t_highest_agno == NULLAGNUMBER); trace_xfs_bmap_deferred(tp->t_mountp, - XFS_FSB_TO_AGNO(tp->t_mountp, startblock), type, - XFS_FSB_TO_AGBNO(tp->t_mountp, startblock), - ip->i_ino, whichfork, startoff, *blockcount, state); + XFS_FSB_TO_AGNO(tp->t_mountp, bmap->br_startblock), + bi->bi_type, + XFS_FSB_TO_AGBNO(tp->t_mountp, bmap->br_startblock), + bi->bi_owner->i_ino, bi->bi_whichfork, + bmap->br_startoff, bmap->br_blockcount, + bmap->br_state); - if (WARN_ON_ONCE(whichfork != XFS_DATA_FORK)) + if (WARN_ON_ONCE(bi->bi_whichfork != XFS_DATA_FORK)) return -EFSCORRUPTED; if (XFS_TEST_ERROR(false, tp->t_mountp, XFS_ERRTAG_BMAP_FINISH_ONE)) return -EIO; - switch (type) { + switch (bi->bi_type) { case XFS_BMAP_MAP: - error = xfs_bmapi_remap(tp, ip, startoff, *blockcount, - startblock, 0); - *blockcount = 0; + error = xfs_bmapi_remap(tp, bi->bi_owner, bmap->br_startoff, + bmap->br_blockcount, bmap->br_startblock, 0); + bmap->br_blockcount = 0; break; case XFS_BMAP_UNMAP: - error = __xfs_bunmapi(tp, ip, startoff, blockcount, - XFS_BMAPI_REMAP, 1); + error = __xfs_bunmapi(tp, bi->bi_owner, bmap->br_startoff, + &bmap->br_blockcount, XFS_BMAPI_REMAP, 1); break; default: ASSERT(0); diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index 16db95b11589..dd08361ca5a6 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -12,6 +12,7 @@ struct xfs_ifork; struct xfs_inode; struct xfs_mount; struct xfs_trans; +struct xfs_alloc_arg; /* * Argument structure for xfs_bmap_alloc. @@ -168,6 +169,8 @@ static inline bool xfs_bmap_is_written_extent(struct xfs_bmbt_irec *irec) #define xfs_valid_startblock(ip, startblock) \ ((startblock) != 0 || XFS_IS_REALTIME_INODE(ip)) +int xfs_bmap_longest_free_extent(struct xfs_perag *pag, + struct xfs_trans *tp, xfs_extlen_t *blen); void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno, xfs_filblks_t len); unsigned int xfs_bmap_compute_attr_offset(struct xfs_mount *mp); @@ -220,6 +223,10 @@ int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp, struct xfs_inode *ip, int whichfork, struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp, struct xfs_bmbt_irec *new, int *logflagsp); +xfs_extlen_t xfs_bmapi_minleft(struct xfs_trans *tp, struct xfs_inode *ip, + int fork); +int xfs_bmap_btalloc_low_space(struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args); enum xfs_bmap_intent_type { XFS_BMAP_MAP = 1, @@ -234,10 +241,7 @@ struct xfs_bmap_intent { struct xfs_bmbt_irec bi_bmap; }; -int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip, - enum xfs_bmap_intent_type type, int whichfork, - xfs_fileoff_t startoff, xfs_fsblock_t startblock, - xfs_filblks_t *blockcount, xfs_exntst_t state); +int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_bmap_intent *bi); void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip, struct xfs_bmbt_irec *imap); void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip, diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index cfa052d40105..b8ad95050c9b 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -21,6 +21,7 @@ #include "xfs_quota.h" #include "xfs_trace.h" #include "xfs_rmap.h" +#include "xfs_ag.h" static struct kmem_cache *xfs_bmbt_cur_cache; @@ -184,11 +185,11 @@ xfs_bmbt_update_cursor( struct xfs_btree_cur *src, struct xfs_btree_cur *dst) { - ASSERT((dst->bc_tp->t_firstblock != NULLFSBLOCK) || + ASSERT((dst->bc_tp->t_highest_agno != NULLAGNUMBER) || (dst->bc_ino.ip->i_diflags & XFS_DIFLAG_REALTIME)); dst->bc_ino.allocated += src->bc_ino.allocated; - dst->bc_tp->t_firstblock = src->bc_tp->t_firstblock; + dst->bc_tp->t_highest_agno = src->bc_tp->t_highest_agno; src->bc_ino.allocated = 0; } @@ -200,46 +201,32 @@ xfs_bmbt_alloc_block( union xfs_btree_ptr *new, int *stat) { - xfs_alloc_arg_t args; /* block allocation args */ - int error; /* error return value */ + struct xfs_alloc_arg args; + int error; memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; - args.fsbno = cur->bc_tp->t_firstblock; xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_ino.ip->i_ino, cur->bc_ino.whichfork); - - if (args.fsbno == NULLFSBLOCK) { - args.fsbno = be64_to_cpu(start->l); - args.type = XFS_ALLOCTYPE_START_BNO; - /* - * Make sure there is sufficient room left in the AG to - * complete a full tree split for an extent insert. If - * we are converting the middle part of an extent then - * we may need space for two tree splits. - * - * We are relying on the caller to make the correct block - * reservation for this operation to succeed. If the - * reservation amount is insufficient then we may fail a - * block allocation here and corrupt the filesystem. - */ - args.minleft = args.tp->t_blk_res; - } else if (cur->bc_tp->t_flags & XFS_TRANS_LOWMODE) { - args.type = XFS_ALLOCTYPE_START_BNO; - } else { - args.type = XFS_ALLOCTYPE_NEAR_BNO; - } - args.minlen = args.maxlen = args.prod = 1; args.wasdel = cur->bc_ino.flags & XFS_BTCUR_BMBT_WASDEL; - if (!args.wasdel && args.tp->t_blk_res == 0) { - error = -ENOSPC; - goto error0; - } - error = xfs_alloc_vextent(&args); + if (!args.wasdel && args.tp->t_blk_res == 0) + return -ENOSPC; + + /* + * If we are coming here from something like unwritten extent + * conversion, there has been no data extent allocation already done, so + * we have to ensure that we attempt to locate the entire set of bmbt + * allocations in the same AG, as xfs_bmapi_write() would have reserved. + */ + if (cur->bc_tp->t_highest_agno == NULLAGNUMBER) + args.minleft = xfs_bmapi_minleft(cur->bc_tp, cur->bc_ino.ip, + cur->bc_ino.whichfork); + + error = xfs_alloc_vextent_start_ag(&args, be64_to_cpu(start->l)); if (error) - goto error0; + return error; if (args.fsbno == NULLFSBLOCK && args.minleft) { /* @@ -247,11 +234,10 @@ xfs_bmbt_alloc_block( * a full btree split. Try again and if * successful activate the lowspace algorithm. */ - args.fsbno = 0; - args.type = XFS_ALLOCTYPE_FIRST_AG; - error = xfs_alloc_vextent(&args); + args.minleft = 0; + error = xfs_alloc_vextent_start_ag(&args, 0); if (error) - goto error0; + return error; cur->bc_tp->t_flags |= XFS_TRANS_LOWMODE; } if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { @@ -260,7 +246,6 @@ xfs_bmbt_alloc_block( } ASSERT(args.len == 1); - cur->bc_tp->t_firstblock = args.fsbno; cur->bc_ino.allocated++; cur->bc_ino.ip->i_nblocks++; xfs_trans_log_inode(args.tp, cur->bc_ino.ip, XFS_ILOG_CORE); @@ -271,9 +256,6 @@ xfs_bmbt_alloc_block( *stat = 1; return 0; - - error0: - return error; } STATIC int diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 4c16c8c31fcb..c4649cc624e1 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -2913,9 +2913,22 @@ xfs_btree_split_worker( } /* - * BMBT split requests often come in with little stack to work on. Push + * BMBT split requests often come in with little stack to work on so we push * them off to a worker thread so there is lots of stack to use. For the other * btree types, just call directly to avoid the context switch overhead here. + * + * Care must be taken here - the work queue rescuer thread introduces potential + * AGF <> worker queue deadlocks if the BMBT block allocation has to lock new + * AGFs to allocate blocks. A task being run by the rescuer could attempt to + * lock an AGF that is already locked by a task queued to run by the rescuer, + * resulting in an ABBA deadlock as the rescuer cannot run the lock holder to + * release it until the current thread it is running gains the lock. + * + * To avoid this issue, we only ever queue BMBT splits that don't have an AGF + * already locked to allocate from. The only place that doesn't hold an AGF + * locked is unwritten extent conversion at IO completion, but that has already + * been offloaded to a worker thread and hence has no stack consumption issues + * we have to worry about. */ STATIC int /* error */ xfs_btree_split( @@ -2929,7 +2942,8 @@ xfs_btree_split( struct xfs_btree_split_args args; DECLARE_COMPLETION_ONSTACK(done); - if (cur->bc_btnum != XFS_BTNUM_BMAP) + if (cur->bc_btnum != XFS_BTNUM_BMAP || + cur->bc_tp->t_highest_agno == NULLAGNUMBER) return __xfs_btree_split(cur, level, ptrp, key, curp, stat); args.cur = cur; @@ -4666,7 +4680,12 @@ xfs_btree_space_to_height( const unsigned int *limits, unsigned long long leaf_blocks) { - unsigned long long node_blocks = limits[1]; + /* + * The root btree block can have fewer than minrecs pointers in it + * because the tree might not be big enough to require that amount of + * fanout. Hence it has a minimum size of 2 pointers, not limits[1]. + */ + unsigned long long node_blocks = 2; unsigned long long blocks_left = leaf_blocks - 1; unsigned int height = 1; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 5118dedf9267..7ee292aecbeb 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -169,10 +169,9 @@ xfs_inobt_insert_rec( */ STATIC int xfs_inobt_insert( - struct xfs_mount *mp, + struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, - struct xfs_perag *pag, xfs_agino_t newino, xfs_agino_t newlen, xfs_btnum_t btnum) @@ -182,7 +181,7 @@ xfs_inobt_insert( int i; int error; - cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, btnum); + cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum); for (thisino = newino; thisino < newino + newlen; @@ -514,20 +513,20 @@ __xfs_inobt_rec_merge( */ STATIC int xfs_inobt_insert_sprec( - struct xfs_mount *mp, + struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, - struct xfs_perag *pag, int btnum, struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */ bool merge) /* merge or replace */ { + struct xfs_mount *mp = pag->pag_mount; struct xfs_btree_cur *cur; int error; int i; struct xfs_inobt_rec_incore rec; - cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, btnum); + cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum); /* the new record is pre-aligned so we know where to look */ error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i); @@ -609,9 +608,9 @@ error: */ STATIC int xfs_ialloc_ag_alloc( + struct xfs_perag *pag, struct xfs_trans *tp, - struct xfs_buf *agbp, - struct xfs_perag *pag) + struct xfs_buf *agbp) { struct xfs_agi *agi; struct xfs_alloc_arg args; @@ -631,6 +630,7 @@ xfs_ialloc_ag_alloc( args.mp = tp->t_mountp; args.fsbno = NULLFSBLOCK; args.oinfo = XFS_RMAP_OINFO_INODES; + args.pag = pag; #ifdef DEBUG /* randomly do sparse inode allocations */ @@ -662,8 +662,6 @@ xfs_ialloc_ag_alloc( goto sparse_alloc; if (likely(newino != NULLAGINO && (args.agbno < be32_to_cpu(agi->agi_length)))) { - args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno); - args.type = XFS_ALLOCTYPE_THIS_BNO; args.prod = 1; /* @@ -684,7 +682,10 @@ xfs_ialloc_ag_alloc( /* Allow space for the inode btree to split. */ args.minleft = igeo->inobt_maxlevels; - if ((error = xfs_alloc_vextent(&args))) + error = xfs_alloc_vextent_exact_bno(&args, + XFS_AGB_TO_FSB(args.mp, pag->pag_agno, + args.agbno)); + if (error) return error; /* @@ -717,22 +718,17 @@ xfs_ialloc_ag_alloc( } else args.alignment = igeo->cluster_align; /* - * Need to figure out where to allocate the inode blocks. - * Ideally they should be spaced out through the a.g. - * For now, just allocate blocks up front. - */ - args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno); - /* * Allocate a fixed-size extent of inodes. */ - args.type = XFS_ALLOCTYPE_NEAR_BNO; args.prod = 1; /* * Allow space for the inode btree to split. */ args.minleft = igeo->inobt_maxlevels; - if ((error = xfs_alloc_vextent(&args))) + error = xfs_alloc_vextent_near_bno(&args, + XFS_AGB_TO_FSB(args.mp, pag->pag_agno, + be32_to_cpu(agi->agi_root))); + if (error) return error; } @@ -741,11 +737,11 @@ xfs_ialloc_ag_alloc( * alignment. */ if (isaligned && args.fsbno == NULLFSBLOCK) { - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno); args.alignment = igeo->cluster_align; - if ((error = xfs_alloc_vextent(&args))) + error = xfs_alloc_vextent_near_bno(&args, + XFS_AGB_TO_FSB(args.mp, pag->pag_agno, + be32_to_cpu(agi->agi_root))); + if (error) return error; } @@ -757,9 +753,6 @@ xfs_ialloc_ag_alloc( igeo->ialloc_min_blks < igeo->ialloc_blks && args.fsbno == NULLFSBLOCK) { sparse_alloc: - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.agbno = be32_to_cpu(agi->agi_root); - args.fsbno = XFS_AGB_TO_FSB(args.mp, pag->pag_agno, args.agbno); args.alignment = args.mp->m_sb.sb_spino_align; args.prod = 1; @@ -781,7 +774,9 @@ sparse_alloc: args.mp->m_sb.sb_inoalignmt) - igeo->ialloc_blks; - error = xfs_alloc_vextent(&args); + error = xfs_alloc_vextent_near_bno(&args, + XFS_AGB_TO_FSB(args.mp, pag->pag_agno, + be32_to_cpu(agi->agi_root))); if (error) return error; @@ -831,7 +826,7 @@ sparse_alloc: * if necessary. If a merge does occur, rec is updated to the * merged record. */ - error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag, + error = xfs_inobt_insert_sprec(pag, tp, agbp, XFS_BTNUM_INO, &rec, true); if (error == -EFSCORRUPTED) { xfs_alert(args.mp, @@ -856,20 +851,20 @@ sparse_alloc: * existing record with this one. */ if (xfs_has_finobt(args.mp)) { - error = xfs_inobt_insert_sprec(args.mp, tp, agbp, pag, + error = xfs_inobt_insert_sprec(pag, tp, agbp, XFS_BTNUM_FINO, &rec, false); if (error) return error; } } else { /* full chunk - insert new records to both btrees */ - error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino, newlen, + error = xfs_inobt_insert(pag, tp, agbp, newino, newlen, XFS_BTNUM_INO); if (error) return error; if (xfs_has_finobt(args.mp)) { - error = xfs_inobt_insert(args.mp, tp, agbp, pag, newino, + error = xfs_inobt_insert(pag, tp, agbp, newino, newlen, XFS_BTNUM_FINO); if (error) return error; @@ -981,9 +976,9 @@ xfs_inobt_first_free_inode( */ STATIC int xfs_dialloc_ag_inobt( + struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, - struct xfs_perag *pag, xfs_ino_t parent, xfs_ino_t *inop) { @@ -999,12 +994,12 @@ xfs_dialloc_ag_inobt( int i, j; int searchdistance = 10; - ASSERT(pag->pagi_init); - ASSERT(pag->pagi_inodeok); + ASSERT(xfs_perag_initialised_agi(pag)); + ASSERT(xfs_perag_allows_inodes(pag)); ASSERT(pag->pagi_freecount > 0); restart_pagno: - cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); /* * If pagino is 0 (this is the root inode allocation) use newino. * This must work because we've just allocated some. @@ -1429,9 +1424,9 @@ xfs_dialloc_ag_update_inobt( */ static int xfs_dialloc_ag( + struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, - struct xfs_perag *pag, xfs_ino_t parent, xfs_ino_t *inop) { @@ -1448,7 +1443,7 @@ xfs_dialloc_ag( int i; if (!xfs_has_finobt(mp)) - return xfs_dialloc_ag_inobt(tp, agbp, pag, parent, inop); + return xfs_dialloc_ag_inobt(pag, tp, agbp, parent, inop); /* * If pagino is 0 (this is the root inode allocation) use newino. @@ -1457,7 +1452,7 @@ xfs_dialloc_ag( if (!pagino) pagino = be32_to_cpu(agi->agi_newino); - cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_FINO); + cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO); error = xfs_check_agi_freecount(cur); if (error) @@ -1500,7 +1495,7 @@ xfs_dialloc_ag( * the original freecount. If all is well, make the equivalent update to * the inobt using the finobt record and offset information. */ - icur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); + icur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); error = xfs_check_agi_freecount(icur); if (error) @@ -1577,25 +1572,10 @@ xfs_dialloc_roll( return error; } -static xfs_agnumber_t -xfs_ialloc_next_ag( - xfs_mount_t *mp) -{ - xfs_agnumber_t agno; - - spin_lock(&mp->m_agirotor_lock); - agno = mp->m_agirotor; - if (++mp->m_agirotor >= mp->m_maxagi) - mp->m_agirotor = 0; - spin_unlock(&mp->m_agirotor_lock); - - return agno; -} - static bool xfs_dialloc_good_ag( - struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_trans *tp, umode_t mode, int flags, bool ok_alloc) @@ -1606,10 +1586,12 @@ xfs_dialloc_good_ag( int needspace; int error; - if (!pag->pagi_inodeok) + if (!pag) + return false; + if (!xfs_perag_allows_inodes(pag)) return false; - if (!pag->pagi_init) { + if (!xfs_perag_initialised_agi(pag)) { error = xfs_ialloc_read_agi(pag, tp, NULL); if (error) return false; @@ -1620,7 +1602,7 @@ xfs_dialloc_good_ag( if (!ok_alloc) return false; - if (!pag->pagf_init) { + if (!xfs_perag_initialised_agf(pag)) { error = xfs_alloc_read_agf(pag, tp, flags, NULL); if (error) return false; @@ -1665,8 +1647,8 @@ xfs_dialloc_good_ag( static int xfs_dialloc_try_ag( - struct xfs_trans **tpp, struct xfs_perag *pag, + struct xfs_trans **tpp, xfs_ino_t parent, xfs_ino_t *new_ino, bool ok_alloc) @@ -1689,7 +1671,7 @@ xfs_dialloc_try_ag( goto out_release; } - error = xfs_ialloc_ag_alloc(*tpp, agbp, pag); + error = xfs_ialloc_ag_alloc(pag, *tpp, agbp); if (error < 0) goto out_release; @@ -1705,7 +1687,7 @@ xfs_dialloc_try_ag( } /* Allocate an inode in the found AG */ - error = xfs_dialloc_ag(*tpp, agbp, pag, parent, &ino); + error = xfs_dialloc_ag(pag, *tpp, agbp, parent, &ino); if (!error) *new_ino = ino; return error; @@ -1737,8 +1719,9 @@ xfs_dialloc( struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(mp); bool ok_alloc = true; + bool low_space = false; int flags; - xfs_ino_t ino; + xfs_ino_t ino = NULLFSINO; /* * Directories, symlinks, and regular files frequently allocate at least @@ -1746,7 +1729,8 @@ xfs_dialloc( * an AG has enough space for file creation. */ if (S_ISDIR(mode)) - start_agno = xfs_ialloc_next_ag(mp); + start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) % + mp->m_maxagi; else { start_agno = XFS_INO_TO_AGNO(mp, parent); if (start_agno >= mp->m_maxagi) @@ -1768,41 +1752,55 @@ xfs_dialloc( } /* + * If we are near to ENOSPC, we want to prefer allocation from AGs that + * have free inodes in them rather than use up free space allocating new + * inode chunks. Hence we turn off allocation for the first non-blocking + * pass through the AGs if we are near ENOSPC to consume free inodes + * that we can immediately allocate, but then we allow allocation on the + * second pass if we fail to find an AG with free inodes in it. + */ + if (percpu_counter_read_positive(&mp->m_fdblocks) < + mp->m_low_space[XFS_LOWSP_1_PCNT]) { + ok_alloc = false; + low_space = true; + } + + /* * Loop until we find an allocation group that either has free inodes * or in which we can allocate some inodes. Iterate through the * allocation groups upward, wrapping at the end. */ - agno = start_agno; flags = XFS_ALLOC_FLAG_TRYLOCK; - for (;;) { - pag = xfs_perag_get(mp, agno); - if (xfs_dialloc_good_ag(*tpp, pag, mode, flags, ok_alloc)) { - error = xfs_dialloc_try_ag(tpp, pag, parent, +retry: + for_each_perag_wrap_at(mp, start_agno, mp->m_maxagi, agno, pag) { + if (xfs_dialloc_good_ag(pag, *tpp, mode, flags, ok_alloc)) { + error = xfs_dialloc_try_ag(pag, tpp, parent, &ino, ok_alloc); if (error != -EAGAIN) break; + error = 0; } if (xfs_is_shutdown(mp)) { error = -EFSCORRUPTED; break; } - if (++agno == mp->m_maxagi) - agno = 0; - if (agno == start_agno) { - if (!flags) { - error = -ENOSPC; - break; - } + } + if (pag) + xfs_perag_rele(pag); + if (error) + return error; + if (ino == NULLFSINO) { + if (flags) { flags = 0; + if (low_space) + ok_alloc = true; + goto retry; } - xfs_perag_put(pag); + return -ENOSPC; } - - if (!error) - *new_ino = ino; - xfs_perag_put(pag); - return error; + *new_ino = ino; + return 0; } /* @@ -1885,14 +1883,14 @@ next: STATIC int xfs_difree_inobt( - struct xfs_mount *mp, + struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, - struct xfs_perag *pag, xfs_agino_t agino, struct xfs_icluster *xic, struct xfs_inobt_rec_incore *orec) { + struct xfs_mount *mp = pag->pag_mount; struct xfs_agi *agi = agbp->b_addr; struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; @@ -1907,7 +1905,7 @@ xfs_difree_inobt( /* * Initialize the cursor. */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); error = xfs_check_agi_freecount(cur); if (error) @@ -2019,20 +2017,20 @@ error0: */ STATIC int xfs_difree_finobt( - struct xfs_mount *mp, + struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, - struct xfs_perag *pag, xfs_agino_t agino, struct xfs_inobt_rec_incore *ibtrec) /* inobt record */ { + struct xfs_mount *mp = pag->pag_mount; struct xfs_btree_cur *cur; struct xfs_inobt_rec_incore rec; int offset = agino - ibtrec->ir_startino; int error; int i; - cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_FINO); + cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO); error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); if (error) @@ -2179,7 +2177,7 @@ xfs_difree( /* * Fix up the inode allocation btree. */ - error = xfs_difree_inobt(mp, tp, agbp, pag, agino, xic, &rec); + error = xfs_difree_inobt(pag, tp, agbp, agino, xic, &rec); if (error) goto error0; @@ -2187,7 +2185,7 @@ xfs_difree( * Fix up the free inode btree. */ if (xfs_has_finobt(mp)) { - error = xfs_difree_finobt(mp, tp, agbp, pag, agino, &rec); + error = xfs_difree_finobt(pag, tp, agbp, agino, &rec); if (error) goto error0; } @@ -2200,15 +2198,15 @@ error0: STATIC int xfs_imap_lookup( - struct xfs_mount *mp, - struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_trans *tp, xfs_agino_t agino, xfs_agblock_t agbno, xfs_agblock_t *chunk_agbno, xfs_agblock_t *offset_agbno, int flags) { + struct xfs_mount *mp = pag->pag_mount; struct xfs_inobt_rec_incore rec; struct xfs_btree_cur *cur; struct xfs_buf *agbp; @@ -2229,7 +2227,7 @@ xfs_imap_lookup( * we have a record, we need to ensure it contains the inode number * we are looking up. */ - cur = xfs_inobt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i); if (!error) { if (i) @@ -2263,12 +2261,13 @@ xfs_imap_lookup( */ int xfs_imap( - struct xfs_mount *mp, /* file system mount structure */ - struct xfs_trans *tp, /* transaction pointer */ + struct xfs_perag *pag, + struct xfs_trans *tp, xfs_ino_t ino, /* inode to locate */ struct xfs_imap *imap, /* location map structure */ uint flags) /* flags for inode btree lookup */ { + struct xfs_mount *mp = pag->pag_mount; xfs_agblock_t agbno; /* block number of inode in the alloc group */ xfs_agino_t agino; /* inode number within alloc group */ xfs_agblock_t chunk_agbno; /* first block in inode chunk */ @@ -2276,17 +2275,15 @@ xfs_imap( int error; /* error code */ int offset; /* index of inode in its buffer */ xfs_agblock_t offset_agbno; /* blks from chunk start to inode */ - struct xfs_perag *pag; ASSERT(ino != NULLFSINO); /* * Split up the inode number into its parts. */ - pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ino)); agino = XFS_INO_TO_AGINO(mp, ino); agbno = XFS_AGINO_TO_AGBNO(mp, agino); - if (!pag || agbno >= mp->m_sb.sb_agblocks || + if (agbno >= mp->m_sb.sb_agblocks || ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { error = -EINVAL; #ifdef DEBUG @@ -2295,20 +2292,14 @@ xfs_imap( * as they can be invalid without implying corruption. */ if (flags & XFS_IGET_UNTRUSTED) - goto out_drop; - if (!pag) { - xfs_alert(mp, - "%s: agno (%d) >= mp->m_sb.sb_agcount (%d)", - __func__, XFS_INO_TO_AGNO(mp, ino), - mp->m_sb.sb_agcount); - } + return error; if (agbno >= mp->m_sb.sb_agblocks) { xfs_alert(mp, "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)", __func__, (unsigned long long)agbno, (unsigned long)mp->m_sb.sb_agblocks); } - if (pag && ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { + if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) { xfs_alert(mp, "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)", __func__, ino, @@ -2316,7 +2307,7 @@ xfs_imap( } xfs_stack_trace(); #endif /* DEBUG */ - goto out_drop; + return error; } /* @@ -2327,10 +2318,10 @@ xfs_imap( * in all cases where an untrusted inode number is passed. */ if (flags & XFS_IGET_UNTRUSTED) { - error = xfs_imap_lookup(mp, tp, pag, agino, agbno, + error = xfs_imap_lookup(pag, tp, agino, agbno, &chunk_agbno, &offset_agbno, flags); if (error) - goto out_drop; + return error; goto out_map; } @@ -2346,8 +2337,7 @@ xfs_imap( imap->im_len = XFS_FSB_TO_BB(mp, 1); imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog); - error = 0; - goto out_drop; + return 0; } /* @@ -2359,10 +2349,10 @@ xfs_imap( offset_agbno = agbno & M_IGEO(mp)->inoalign_mask; chunk_agbno = agbno - offset_agbno; } else { - error = xfs_imap_lookup(mp, tp, pag, agino, agbno, + error = xfs_imap_lookup(pag, tp, agino, agbno, &chunk_agbno, &offset_agbno, flags); if (error) - goto out_drop; + return error; } out_map: @@ -2390,14 +2380,9 @@ out_map: __func__, (unsigned long long) imap->im_blkno, (unsigned long long) imap->im_len, XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)); - error = -EINVAL; - goto out_drop; + return -EINVAL; } - error = 0; -out_drop: - if (pag) - xfs_perag_put(pag); - return error; + return 0; } /* @@ -2613,10 +2598,10 @@ xfs_ialloc_read_agi( return error; agi = agibp->b_addr; - if (!pag->pagi_init) { + if (!xfs_perag_initialised_agi(pag)) { pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); pag->pagi_count = be32_to_cpu(agi->agi_count); - pag->pagi_init = 1; + set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); } /* @@ -2924,26 +2909,24 @@ xfs_ialloc_calc_rootino( */ int xfs_ialloc_check_shrink( + struct xfs_perag *pag, struct xfs_trans *tp, - xfs_agnumber_t agno, struct xfs_buf *agibp, xfs_agblock_t new_length) { struct xfs_inobt_rec_incore rec; struct xfs_btree_cur *cur; - struct xfs_mount *mp = tp->t_mountp; - struct xfs_perag *pag; - xfs_agino_t agino = XFS_AGB_TO_AGINO(mp, new_length); + xfs_agino_t agino; int has; int error; - if (!xfs_has_sparseinodes(mp)) + if (!xfs_has_sparseinodes(pag->pag_mount)) return 0; - pag = xfs_perag_get(mp, agno); - cur = xfs_inobt_init_cursor(mp, tp, agibp, pag, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(pag, tp, agibp, XFS_BTNUM_INO); /* Look up the inobt record that would correspond to the new EOFS. */ + agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length); error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has); if (error || !has) goto out; @@ -2964,6 +2947,5 @@ xfs_ialloc_check_shrink( } out: xfs_btree_del_cursor(cur, error); - xfs_perag_put(pag); return error; } diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 9bbbca6ac4ed..ab8c30b4ec22 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -12,6 +12,7 @@ struct xfs_imap; struct xfs_mount; struct xfs_trans; struct xfs_btree_cur; +struct xfs_perag; /* Move inodes in clusters of this size */ #define XFS_INODE_BIG_CLUSTER_SIZE 8192 @@ -47,7 +48,7 @@ int xfs_difree(struct xfs_trans *tp, struct xfs_perag *pag, */ int xfs_imap( - struct xfs_mount *mp, /* file system mount structure */ + struct xfs_perag *pag, struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t ino, /* inode to locate */ struct xfs_imap *imap, /* location map structure */ @@ -106,7 +107,7 @@ int xfs_ialloc_cluster_alignment(struct xfs_mount *mp); void xfs_ialloc_setup_geometry(struct xfs_mount *mp); xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit); -int xfs_ialloc_check_shrink(struct xfs_trans *tp, xfs_agnumber_t agno, +int xfs_ialloc_check_shrink(struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agibp, xfs_agblock_t new_length); #endif /* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index 8c83e265770c..9b28211d5a4c 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -36,8 +36,8 @@ STATIC struct xfs_btree_cur * xfs_inobt_dup_cursor( struct xfs_btree_cur *cur) { - return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, - cur->bc_ag.agbp, cur->bc_ag.pag, cur->bc_btnum); + return xfs_inobt_init_cursor(cur->bc_ag.pag, cur->bc_tp, + cur->bc_ag.agbp, cur->bc_btnum); } STATIC void @@ -103,15 +103,15 @@ __xfs_inobt_alloc_block( memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; + args.pag = cur->bc_ag.pag; args.oinfo = XFS_RMAP_OINFO_INOBT; - args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_ag.pag->pag_agno, sbno); args.minlen = 1; args.maxlen = 1; args.prod = 1; - args.type = XFS_ALLOCTYPE_NEAR_BNO; args.resv = resv; - error = xfs_alloc_vextent(&args); + error = xfs_alloc_vextent_near_bno(&args, + XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno, sbno)); if (error) return error; @@ -291,8 +291,8 @@ xfs_inobt_verify( * Similarly, during log recovery we will have a perag structure * attached, but the agi information will not yet have been initialised * from the on disk AGI. We don't currently use any of this information, - * but beware of the landmine (i.e. need to check pag->pagi_init) if we - * ever do. + * but beware of the landmine (i.e. need to check + * xfs_perag_initialised_agi(pag)) if we ever do. */ if (xfs_has_crc(mp)) { fa = xfs_btree_sblock_v5hdr_verify(bp); @@ -427,11 +427,11 @@ static const struct xfs_btree_ops xfs_finobt_ops = { */ static struct xfs_btree_cur * xfs_inobt_init_common( - struct xfs_mount *mp, /* file system mount point */ - struct xfs_trans *tp, /* transaction pointer */ struct xfs_perag *pag, + struct xfs_trans *tp, /* transaction pointer */ xfs_btnum_t btnum) /* ialloc or free ino btree */ { + struct xfs_mount *mp = pag->pag_mount; struct xfs_btree_cur *cur; cur = xfs_btree_alloc_cursor(mp, tp, btnum, @@ -456,16 +456,15 @@ xfs_inobt_init_common( /* Create an inode btree cursor. */ struct xfs_btree_cur * xfs_inobt_init_cursor( - struct xfs_mount *mp, + struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agbp, - struct xfs_perag *pag, xfs_btnum_t btnum) { struct xfs_btree_cur *cur; struct xfs_agi *agi = agbp->b_addr; - cur = xfs_inobt_init_common(mp, tp, pag, btnum); + cur = xfs_inobt_init_common(pag, tp, btnum); if (btnum == XFS_BTNUM_INO) cur->bc_nlevels = be32_to_cpu(agi->agi_level); else @@ -477,14 +476,13 @@ xfs_inobt_init_cursor( /* Create an inode btree cursor with a fake root for staging. */ struct xfs_btree_cur * xfs_inobt_stage_cursor( - struct xfs_mount *mp, - struct xbtree_afakeroot *afake, struct xfs_perag *pag, + struct xbtree_afakeroot *afake, xfs_btnum_t btnum) { struct xfs_btree_cur *cur; - cur = xfs_inobt_init_common(mp, NULL, pag, btnum); + cur = xfs_inobt_init_common(pag, NULL, btnum); xfs_btree_stage_afakeroot(cur, afake); return cur; } @@ -708,9 +706,8 @@ xfs_inobt_max_size( /* Read AGI and create inobt cursor. */ int xfs_inobt_cur( - struct xfs_mount *mp, - struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_trans *tp, xfs_btnum_t which, struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp) @@ -725,16 +722,15 @@ xfs_inobt_cur( if (error) return error; - cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, pag, which); + cur = xfs_inobt_init_cursor(pag, tp, *agi_bpp, which); *curpp = cur; return 0; } static int xfs_inobt_count_blocks( - struct xfs_mount *mp, - struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_trans *tp, xfs_btnum_t btnum, xfs_extlen_t *tree_blocks) { @@ -742,7 +738,7 @@ xfs_inobt_count_blocks( struct xfs_btree_cur *cur = NULL; int error; - error = xfs_inobt_cur(mp, tp, pag, btnum, &cur, &agbp); + error = xfs_inobt_cur(pag, tp, btnum, &cur, &agbp); if (error) return error; @@ -779,22 +775,21 @@ xfs_finobt_read_blocks( */ int xfs_finobt_calc_reserves( - struct xfs_mount *mp, - struct xfs_trans *tp, struct xfs_perag *pag, + struct xfs_trans *tp, xfs_extlen_t *ask, xfs_extlen_t *used) { xfs_extlen_t tree_len = 0; int error; - if (!xfs_has_finobt(mp)) + if (!xfs_has_finobt(pag->pag_mount)) return 0; - if (xfs_has_inobtcounts(mp)) + if (xfs_has_inobtcounts(pag->pag_mount)) error = xfs_finobt_read_blocks(pag, tp, &tree_len); else - error = xfs_inobt_count_blocks(mp, tp, pag, XFS_BTNUM_FINO, + error = xfs_inobt_count_blocks(pag, tp, XFS_BTNUM_FINO, &tree_len); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.h b/fs/xfs/libxfs/xfs_ialloc_btree.h index 26451cb76b98..e859a6e05230 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.h +++ b/fs/xfs/libxfs/xfs_ialloc_btree.h @@ -46,12 +46,10 @@ struct xfs_perag; (maxrecs) * sizeof(xfs_inobt_key_t) + \ ((index) - 1) * sizeof(xfs_inobt_ptr_t))) -extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *mp, - struct xfs_trans *tp, struct xfs_buf *agbp, - struct xfs_perag *pag, xfs_btnum_t btnum); -struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_mount *mp, - struct xbtree_afakeroot *afake, struct xfs_perag *pag, - xfs_btnum_t btnum); +extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_perag *pag, + struct xfs_trans *tp, struct xfs_buf *agbp, xfs_btnum_t btnum); +struct xfs_btree_cur *xfs_inobt_stage_cursor(struct xfs_perag *pag, + struct xbtree_afakeroot *afake, xfs_btnum_t btnum); extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); /* ir_holemask to inode allocation bitmap conversion */ @@ -64,13 +62,13 @@ int xfs_inobt_rec_check_count(struct xfs_mount *, #define xfs_inobt_rec_check_count(mp, rec) 0 #endif /* DEBUG */ -int xfs_finobt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, - struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); +int xfs_finobt_calc_reserves(struct xfs_perag *perag, struct xfs_trans *tp, + xfs_extlen_t *ask, xfs_extlen_t *used); extern xfs_extlen_t xfs_iallocbt_calc_size(struct xfs_mount *mp, unsigned long long len); -int xfs_inobt_cur(struct xfs_mount *mp, struct xfs_trans *tp, - struct xfs_perag *pag, xfs_btnum_t btnum, - struct xfs_btree_cur **curpp, struct xfs_buf **agi_bpp); +int xfs_inobt_cur(struct xfs_perag *pag, struct xfs_trans *tp, + xfs_btnum_t btnum, struct xfs_btree_cur **curpp, + struct xfs_buf **agi_bpp); void xfs_inobt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 6f7ed9288fe4..bcf46aa0d08b 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1213,37 +1213,33 @@ out_error: STATIC int xfs_refcount_adjust( struct xfs_btree_cur *cur, - xfs_agblock_t agbno, - xfs_extlen_t aglen, - xfs_agblock_t *new_agbno, - xfs_extlen_t *new_aglen, + xfs_agblock_t *agbno, + xfs_extlen_t *aglen, enum xfs_refc_adjust_op adj) { bool shape_changed; int shape_changes = 0; int error; - *new_agbno = agbno; - *new_aglen = aglen; if (adj == XFS_REFCOUNT_ADJUST_INCREASE) - trace_xfs_refcount_increase(cur->bc_mp, cur->bc_ag.pag->pag_agno, - agbno, aglen); + trace_xfs_refcount_increase(cur->bc_mp, + cur->bc_ag.pag->pag_agno, *agbno, *aglen); else - trace_xfs_refcount_decrease(cur->bc_mp, cur->bc_ag.pag->pag_agno, - agbno, aglen); + trace_xfs_refcount_decrease(cur->bc_mp, + cur->bc_ag.pag->pag_agno, *agbno, *aglen); /* * Ensure that no rcextents cross the boundary of the adjustment range. */ error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, - agbno, &shape_changed); + *agbno, &shape_changed); if (error) goto out_error; if (shape_changed) shape_changes++; error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED, - agbno + aglen, &shape_changed); + *agbno + *aglen, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1253,7 +1249,7 @@ xfs_refcount_adjust( * Try to merge with the left or right extents of the range. */ error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED, - new_agbno, new_aglen, adj, &shape_changed); + agbno, aglen, adj, &shape_changed); if (error) goto out_error; if (shape_changed) @@ -1262,7 +1258,7 @@ xfs_refcount_adjust( cur->bc_ag.refc.shape_changes++; /* Now that we've taken care of the ends, adjust the middle extents */ - error = xfs_refcount_adjust_extents(cur, new_agbno, new_aglen, adj); + error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj); if (error) goto out_error; @@ -1298,21 +1294,20 @@ xfs_refcount_finish_one_cleanup( static inline int xfs_refcount_continue_op( struct xfs_btree_cur *cur, - xfs_fsblock_t startblock, - xfs_agblock_t new_agbno, - xfs_extlen_t new_len, - xfs_fsblock_t *new_fsbno) + struct xfs_refcount_intent *ri, + xfs_agblock_t new_agbno) { struct xfs_mount *mp = cur->bc_mp; struct xfs_perag *pag = cur->bc_ag.pag; - if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, new_len))) + if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno, + ri->ri_blockcount))) return -EFSCORRUPTED; - *new_fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); + ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno); - ASSERT(xfs_verify_fsbext(mp, *new_fsbno, new_len)); - ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, *new_fsbno)); + ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount)); + ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); return 0; } @@ -1327,11 +1322,7 @@ xfs_refcount_continue_op( int xfs_refcount_finish_one( struct xfs_trans *tp, - enum xfs_refcount_intent_type type, - xfs_fsblock_t startblock, - xfs_extlen_t blockcount, - xfs_fsblock_t *new_fsb, - xfs_extlen_t *new_len, + struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur) { struct xfs_mount *mp = tp->t_mountp; @@ -1339,17 +1330,16 @@ xfs_refcount_finish_one( struct xfs_buf *agbp = NULL; int error = 0; xfs_agblock_t bno; - xfs_agblock_t new_agbno; unsigned long nr_ops = 0; int shape_changes = 0; struct xfs_perag *pag; - pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); - bno = XFS_FSB_TO_AGBNO(mp, startblock); + pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock)); + bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock); - trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, startblock), - type, XFS_FSB_TO_AGBNO(mp, startblock), - blockcount); + trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock), + ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock), + ri->ri_blockcount); if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE)) { error = -EIO; @@ -1380,42 +1370,42 @@ xfs_refcount_finish_one( } *pcur = rcur; - switch (type) { + switch (ri->ri_type) { case XFS_REFCOUNT_INCREASE: - error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_INCREASE); + error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, + XFS_REFCOUNT_ADJUST_INCREASE); if (error) goto out_drop; - if (*new_len > 0) - error = xfs_refcount_continue_op(rcur, startblock, - new_agbno, *new_len, new_fsb); + if (ri->ri_blockcount > 0) + error = xfs_refcount_continue_op(rcur, ri, bno); break; case XFS_REFCOUNT_DECREASE: - error = xfs_refcount_adjust(rcur, bno, blockcount, &new_agbno, - new_len, XFS_REFCOUNT_ADJUST_DECREASE); + error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount, + XFS_REFCOUNT_ADJUST_DECREASE); if (error) goto out_drop; - if (*new_len > 0) - error = xfs_refcount_continue_op(rcur, startblock, - new_agbno, *new_len, new_fsb); + if (ri->ri_blockcount > 0) + error = xfs_refcount_continue_op(rcur, ri, bno); break; case XFS_REFCOUNT_ALLOC_COW: - *new_fsb = startblock + blockcount; - *new_len = 0; - error = __xfs_refcount_cow_alloc(rcur, bno, blockcount); + error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount); + if (error) + goto out_drop; + ri->ri_blockcount = 0; break; case XFS_REFCOUNT_FREE_COW: - *new_fsb = startblock + blockcount; - *new_len = 0; - error = __xfs_refcount_cow_free(rcur, bno, blockcount); + error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount); + if (error) + goto out_drop; + ri->ri_blockcount = 0; break; default: ASSERT(0); error = -EFSCORRUPTED; } - if (!error && *new_len > 0) - trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, type, - bno, blockcount, new_agbno, *new_len); + if (!error && ri->ri_blockcount > 0) + trace_xfs_refcount_finish_one_leftover(mp, pag->pag_agno, + ri->ri_type, bno, ri->ri_blockcount); out_drop: xfs_perag_put(pag); return error; diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h index 452f30556f5a..c633477ce3ce 100644 --- a/fs/xfs/libxfs/xfs_refcount.h +++ b/fs/xfs/libxfs/xfs_refcount.h @@ -75,9 +75,7 @@ void xfs_refcount_decrease_extent(struct xfs_trans *tp, extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp, struct xfs_btree_cur *rcur, int error); extern int xfs_refcount_finish_one(struct xfs_trans *tp, - enum xfs_refcount_intent_type type, xfs_fsblock_t startblock, - xfs_extlen_t blockcount, xfs_fsblock_t *new_fsb, - xfs_extlen_t *new_len, struct xfs_btree_cur **pcur); + struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur); extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur, xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno, diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c index e1f789866683..f3b860970b26 100644 --- a/fs/xfs/libxfs/xfs_refcount_btree.c +++ b/fs/xfs/libxfs/xfs_refcount_btree.c @@ -67,14 +67,14 @@ xfs_refcountbt_alloc_block( memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; - args.type = XFS_ALLOCTYPE_NEAR_BNO; - args.fsbno = XFS_AGB_TO_FSB(cur->bc_mp, cur->bc_ag.pag->pag_agno, - xfs_refc_block(args.mp)); + args.pag = cur->bc_ag.pag; args.oinfo = XFS_RMAP_OINFO_REFC; args.minlen = args.maxlen = args.prod = 1; args.resv = XFS_AG_RESV_METADATA; - error = xfs_alloc_vextent(&args); + error = xfs_alloc_vextent_near_bno(&args, + XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno, + xfs_refc_block(args.mp))); if (error) goto out_error; trace_xfs_refcountbt_alloc_block(cur->bc_mp, cur->bc_ag.pag->pag_agno, @@ -227,7 +227,7 @@ xfs_refcountbt_verify( return fa; level = be16_to_cpu(block->bb_level); - if (pag && pag->pagf_init) { + if (pag && xfs_perag_initialised_agf(pag)) { if (level >= pag->pagf_refcount_level) return __this_address; } else if (level >= mp->m_refc_maxlevels) diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index b56aca1e7c66..df720041cd3d 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2390,13 +2390,7 @@ xfs_rmap_finish_one_cleanup( int xfs_rmap_finish_one( struct xfs_trans *tp, - enum xfs_rmap_intent_type type, - uint64_t owner, - int whichfork, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t blockcount, - xfs_exntst_t state, + struct xfs_rmap_intent *ri, struct xfs_btree_cur **pcur) { struct xfs_mount *mp = tp->t_mountp; @@ -2408,11 +2402,13 @@ xfs_rmap_finish_one( xfs_agblock_t bno; bool unwritten; - pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, startblock)); - bno = XFS_FSB_TO_AGBNO(mp, startblock); + pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, ri->ri_bmap.br_startblock)); + bno = XFS_FSB_TO_AGBNO(mp, ri->ri_bmap.br_startblock); - trace_xfs_rmap_deferred(mp, pag->pag_agno, type, bno, owner, whichfork, - startoff, blockcount, state); + trace_xfs_rmap_deferred(mp, pag->pag_agno, ri->ri_type, bno, + ri->ri_owner, ri->ri_whichfork, + ri->ri_bmap.br_startoff, ri->ri_bmap.br_blockcount, + ri->ri_bmap.br_state); if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_RMAP_FINISH_ONE)) { error = -EIO; @@ -2448,35 +2444,37 @@ xfs_rmap_finish_one( } *pcur = rcur; - xfs_rmap_ino_owner(&oinfo, owner, whichfork, startoff); - unwritten = state == XFS_EXT_UNWRITTEN; - bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, startblock); + xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork, + ri->ri_bmap.br_startoff); + unwritten = ri->ri_bmap.br_state == XFS_EXT_UNWRITTEN; + bno = XFS_FSB_TO_AGBNO(rcur->bc_mp, ri->ri_bmap.br_startblock); - switch (type) { + switch (ri->ri_type) { case XFS_RMAP_ALLOC: case XFS_RMAP_MAP: - error = xfs_rmap_map(rcur, bno, blockcount, unwritten, &oinfo); + error = xfs_rmap_map(rcur, bno, ri->ri_bmap.br_blockcount, + unwritten, &oinfo); break; case XFS_RMAP_MAP_SHARED: - error = xfs_rmap_map_shared(rcur, bno, blockcount, unwritten, - &oinfo); + error = xfs_rmap_map_shared(rcur, bno, + ri->ri_bmap.br_blockcount, unwritten, &oinfo); break; case XFS_RMAP_FREE: case XFS_RMAP_UNMAP: - error = xfs_rmap_unmap(rcur, bno, blockcount, unwritten, - &oinfo); + error = xfs_rmap_unmap(rcur, bno, ri->ri_bmap.br_blockcount, + unwritten, &oinfo); break; case XFS_RMAP_UNMAP_SHARED: - error = xfs_rmap_unmap_shared(rcur, bno, blockcount, unwritten, - &oinfo); + error = xfs_rmap_unmap_shared(rcur, bno, + ri->ri_bmap.br_blockcount, unwritten, &oinfo); break; case XFS_RMAP_CONVERT: - error = xfs_rmap_convert(rcur, bno, blockcount, !unwritten, - &oinfo); + error = xfs_rmap_convert(rcur, bno, ri->ri_bmap.br_blockcount, + !unwritten, &oinfo); break; case XFS_RMAP_CONVERT_SHARED: - error = xfs_rmap_convert_shared(rcur, bno, blockcount, - !unwritten, &oinfo); + error = xfs_rmap_convert_shared(rcur, bno, + ri->ri_bmap.br_blockcount, !unwritten, &oinfo); break; default: ASSERT(0); diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index 54741a591a17..2dac88cea28d 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -179,10 +179,8 @@ void xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno, void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp, struct xfs_btree_cur *rcur, int error); -int xfs_rmap_finish_one(struct xfs_trans *tp, enum xfs_rmap_intent_type type, - uint64_t owner, int whichfork, xfs_fileoff_t startoff, - xfs_fsblock_t startblock, xfs_filblks_t blockcount, - xfs_exntst_t state, struct xfs_btree_cur **pcur); +int xfs_rmap_finish_one(struct xfs_trans *tp, struct xfs_rmap_intent *ri, + struct xfs_btree_cur **pcur); int xfs_rmap_lookup_le_range(struct xfs_btree_cur *cur, xfs_agblock_t bno, uint64_t owner, uint64_t offset, unsigned int flags, diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 7f83f62e51e0..d3285684bb5e 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -313,7 +313,7 @@ xfs_rmapbt_verify( return fa; level = be16_to_cpu(block->bb_level); - if (pag && pag->pagf_init) { + if (pag && xfs_perag_initialised_agf(pag)) { if (level >= pag->pagf_levels[XFS_BTNUM_RMAPi]) return __this_address; } else if (level >= mp->m_rmap_maxlevels) diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 1eeecf2eb2a7..99cc03a298e2 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -909,7 +909,8 @@ xfs_sb_mount_common( struct xfs_mount *mp, struct xfs_sb *sbp) { - mp->m_agfrotor = mp->m_agirotor = 0; + mp->m_agfrotor = 0; + atomic_set(&mp->m_agirotor, 0); mp->m_maxagi = mp->m_sb.sb_agcount; mp->m_blkbit_log = sbp->sb_blocklog + XFS_NBBYLOG; mp->m_blkbb_log = sbp->sb_blocklog - BBSHIFT; diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index d75d82151eeb..c37e6d72760b 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -191,14 +191,15 @@ xrep_agf_init_header( struct xfs_agf *old_agf) { struct xfs_mount *mp = sc->mp; + struct xfs_perag *pag = sc->sa.pag; struct xfs_agf *agf = agf_bp->b_addr; memcpy(old_agf, agf, sizeof(*old_agf)); memset(agf, 0, BBTOB(agf_bp->b_length)); agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); - agf->agf_seqno = cpu_to_be32(sc->sa.pag->pag_agno); - agf->agf_length = cpu_to_be32(sc->sa.pag->block_count); + agf->agf_seqno = cpu_to_be32(pag->pag_agno); + agf->agf_length = cpu_to_be32(pag->block_count); agf->agf_flfirst = old_agf->agf_flfirst; agf->agf_fllast = old_agf->agf_fllast; agf->agf_flcount = old_agf->agf_flcount; @@ -206,8 +207,8 @@ xrep_agf_init_header( uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); /* Mark the incore AGF data stale until we're done fixing things. */ - ASSERT(sc->sa.pag->pagf_init); - sc->sa.pag->pagf_init = 0; + ASSERT(xfs_perag_initialised_agf(pag)); + clear_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); } /* Set btree root information in an AGF. */ @@ -333,7 +334,7 @@ xrep_agf_commit_new( pag->pagf_levels[XFS_BTNUM_RMAPi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level); - pag->pagf_init = 1; + set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate); return 0; } @@ -434,7 +435,7 @@ xrep_agf( out_revert: /* Mark the incore AGF state stale and revert the AGF. */ - sc->sa.pag->pagf_init = 0; + clear_bit(XFS_AGSTATE_AGF_INIT, &sc->sa.pag->pag_opstate); memcpy(agf, &old_agf, sizeof(old_agf)); return error; } @@ -618,7 +619,7 @@ xrep_agfl_update_agf( xfs_force_summary_recalc(sc->mp); /* Update the AGF counters. */ - if (sc->sa.pag->pagf_init) + if (xfs_perag_initialised_agf(sc->sa.pag)) sc->sa.pag->pagf_flcount = flcount; agf->agf_flfirst = cpu_to_be32(0); agf->agf_flcount = cpu_to_be32(flcount); @@ -822,14 +823,15 @@ xrep_agi_init_header( struct xfs_agi *old_agi) { struct xfs_agi *agi = agi_bp->b_addr; + struct xfs_perag *pag = sc->sa.pag; struct xfs_mount *mp = sc->mp; memcpy(old_agi, agi, sizeof(*old_agi)); memset(agi, 0, BBTOB(agi_bp->b_length)); agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); - agi->agi_seqno = cpu_to_be32(sc->sa.pag->pag_agno); - agi->agi_length = cpu_to_be32(sc->sa.pag->block_count); + agi->agi_seqno = cpu_to_be32(pag->pag_agno); + agi->agi_length = cpu_to_be32(pag->block_count); agi->agi_newino = cpu_to_be32(NULLAGINO); agi->agi_dirino = cpu_to_be32(NULLAGINO); if (xfs_has_crc(mp)) @@ -840,8 +842,8 @@ xrep_agi_init_header( sizeof(agi->agi_unlinked)); /* Mark the incore AGF data stale until we're done fixing things. */ - ASSERT(sc->sa.pag->pagi_init); - sc->sa.pag->pagi_init = 0; + ASSERT(xfs_perag_initialised_agi(pag)); + clear_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); } /* Set btree root information in an AGI. */ @@ -873,8 +875,7 @@ xrep_agi_calc_from_btrees( xfs_agino_t freecount; int error; - cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, - sc->sa.pag, XFS_BTNUM_INO); + cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp, agi_bp, XFS_BTNUM_INO); error = xfs_ialloc_count_inodes(cur, &count, &freecount); if (error) goto err; @@ -894,8 +895,8 @@ xrep_agi_calc_from_btrees( if (xfs_has_finobt(mp) && xfs_has_inobtcounts(mp)) { xfs_agblock_t blocks; - cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, - sc->sa.pag, XFS_BTNUM_FINO); + cur = xfs_inobt_init_cursor(sc->sa.pag, sc->tp, agi_bp, + XFS_BTNUM_FINO); error = xfs_btree_count_blocks(cur, &blocks); if (error) goto err; @@ -929,7 +930,7 @@ xrep_agi_commit_new( pag = sc->sa.pag; pag->pagi_count = be32_to_cpu(agi->agi_count); pag->pagi_freecount = be32_to_cpu(agi->agi_freecount); - pag->pagi_init = 1; + set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate); return 0; } @@ -994,7 +995,7 @@ xrep_agi( out_revert: /* Mark the incore AGI state stale and revert the AGI. */ - sc->sa.pag->pagi_init = 0; + clear_bit(XFS_AGSTATE_AGI_INIT, &sc->sa.pag->pag_opstate); memcpy(agi, &old_agi, sizeof(old_agi)); return error; } diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index d50d0eab196a..dbbc7037074c 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -662,7 +662,7 @@ xchk_bmap_check_rmaps( error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag); if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { - xfs_perag_put(pag); + xfs_perag_rele(pag); return error; } } diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 613260b04a3d..848a8e32e56f 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -478,15 +478,15 @@ xchk_ag_btcur_init( /* Set up a inobt cursor for cross-referencing. */ if (sa->agi_bp && xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) { - sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, - sa->pag, XFS_BTNUM_INO); + sa->ino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp, + XFS_BTNUM_INO); } /* Set up a finobt cursor for cross-referencing. */ if (sa->agi_bp && xfs_has_finobt(mp) && xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) { - sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, - sa->pag, XFS_BTNUM_FINO); + sa->fino_cur = xfs_inobt_init_cursor(sa->pag, sc->tp, sa->agi_bp, + XFS_BTNUM_FINO); } /* Set up a rmapbt cursor for cross-referencing. */ @@ -636,6 +636,7 @@ xchk_get_inode( { struct xfs_imap imap; struct xfs_mount *mp = sc->mp; + struct xfs_perag *pag; struct xfs_inode *ip_in = XFS_I(file_inode(sc->file)); struct xfs_inode *ip = NULL; int error; @@ -671,10 +672,14 @@ xchk_get_inode( * Otherwise, we really couldn't find it so tell userspace * that it no longer exists. */ - error = xfs_imap(sc->mp, sc->tp, sc->sm->sm_ino, &imap, - XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE); - if (error) - return -ENOENT; + pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino)); + if (pag) { + error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap, + XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE); + xfs_perag_put(pag); + if (error) + return -ENOENT; + } error = -EFSCORRUPTED; fallthrough; default: diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 4777e7b89fdc..f0c7f41897b9 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -86,7 +86,8 @@ xchk_fscount_warmup( for_each_perag(mp, agno, pag) { if (xchk_should_terminate(sc, &error)) break; - if (pag->pagi_init && pag->pagf_init) + if (xfs_perag_initialised_agi(pag) && + xfs_perag_initialised_agf(pag)) continue; /* Lock both AG headers. */ @@ -101,7 +102,8 @@ xchk_fscount_warmup( * These are supposed to be initialized by the header read * function. */ - if (!pag->pagi_init || !pag->pagf_init) { + if (!xfs_perag_initialised_agi(pag) || + !xfs_perag_initialised_agf(pag)) { error = -EFSCORRUPTED; break; } @@ -117,7 +119,7 @@ xchk_fscount_warmup( if (agi_bp) xfs_buf_relse(agi_bp); if (pag) - xfs_perag_put(pag); + xfs_perag_rele(pag); return error; } @@ -220,7 +222,8 @@ retry: break; /* This somehow got unset since the warmup? */ - if (!pag->pagi_init || !pag->pagf_init) { + if (!xfs_perag_initialised_agi(pag) || + !xfs_perag_initialised_agf(pag)) { error = -EFSCORRUPTED; break; } @@ -249,7 +252,7 @@ retry: } if (pag) - xfs_perag_put(pag); + xfs_perag_rele(pag); if (error) { xchk_set_incomplete(sc); return error; diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 4b92f9253ccd..1b71174ec0d6 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -206,7 +206,7 @@ xrep_calc_ag_resblks( return 0; pag = xfs_perag_get(mp, sm->sm_agno); - if (pag->pagi_init) { + if (xfs_perag_initialised_agi(pag)) { /* Use in-core icount if possible. */ icount = pag->pagi_count; } else { @@ -326,15 +326,14 @@ xrep_alloc_ag_block( args.tp = sc->tp; args.mp = sc->mp; + args.pag = sc->sa.pag; args.oinfo = *oinfo; - args.fsbno = XFS_AGB_TO_FSB(args.mp, sc->sa.pag->pag_agno, 0); args.minlen = 1; args.maxlen = 1; args.prod = 1; - args.type = XFS_ALLOCTYPE_THIS_AG; args.resv = resv; - error = xfs_alloc_vextent(&args); + error = xfs_alloc_vextent_this_ag(&args, sc->sa.pag->pag_agno); if (error) return error; if (args.fsbno == NULLFSBLOCK) diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index a05f44eb8178..791db7d9c849 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -242,7 +242,7 @@ xfs_acl_set_mode( } int -xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +xfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type) { umode_t mode; @@ -258,7 +258,7 @@ xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, return error; if (type == ACL_TYPE_ACCESS) { - error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl); + error = posix_acl_update_mode(idmap, inode, &mode, &acl); if (error) return error; set_mode = true; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index dcd176149c7a..bf7f960997d3 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -11,7 +11,7 @@ struct posix_acl; #ifdef CONFIG_XFS_POSIX_ACL extern struct posix_acl *xfs_get_acl(struct inode *inode, int type, bool rcu); -extern int xfs_set_acl(struct user_namespace *mnt_userns, struct dentry *dentry, +extern int xfs_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, struct posix_acl *acl, int type); extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); void xfs_forget_acl(struct inode *inode, const char *name); diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 41323da523d1..6e2f0013380a 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -246,18 +246,11 @@ static int xfs_trans_log_finish_bmap_update( struct xfs_trans *tp, struct xfs_bud_log_item *budp, - enum xfs_bmap_intent_type type, - struct xfs_inode *ip, - int whichfork, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t *blockcount, - xfs_exntst_t state) + struct xfs_bmap_intent *bi) { int error; - error = xfs_bmap_finish_one(tp, ip, type, whichfork, startoff, - startblock, blockcount, state); + error = xfs_bmap_finish_one(tp, bi); /* * Mark the transaction dirty, even on error. This ensures the @@ -290,24 +283,24 @@ xfs_bmap_update_diff_items( /* Set the map extent flags for this mapping. */ static void xfs_trans_set_bmap_flags( - struct xfs_map_extent *bmap, + struct xfs_map_extent *map, enum xfs_bmap_intent_type type, int whichfork, xfs_exntst_t state) { - bmap->me_flags = 0; + map->me_flags = 0; switch (type) { case XFS_BMAP_MAP: case XFS_BMAP_UNMAP: - bmap->me_flags = type; + map->me_flags = type; break; default: ASSERT(0); } if (state == XFS_EXT_UNWRITTEN) - bmap->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; + map->me_flags |= XFS_BMAP_EXTENT_UNWRITTEN; if (whichfork == XFS_ATTR_FORK) - bmap->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; + map->me_flags |= XFS_BMAP_EXTENT_ATTR_FORK; } /* Log bmap updates in the intent item. */ @@ -315,7 +308,7 @@ STATIC void xfs_bmap_update_log_item( struct xfs_trans *tp, struct xfs_bui_log_item *buip, - struct xfs_bmap_intent *bmap) + struct xfs_bmap_intent *bi) { uint next_extent; struct xfs_map_extent *map; @@ -331,12 +324,12 @@ xfs_bmap_update_log_item( next_extent = atomic_inc_return(&buip->bui_next_extent) - 1; ASSERT(next_extent < buip->bui_format.bui_nextents); map = &buip->bui_format.bui_extents[next_extent]; - map->me_owner = bmap->bi_owner->i_ino; - map->me_startblock = bmap->bi_bmap.br_startblock; - map->me_startoff = bmap->bi_bmap.br_startoff; - map->me_len = bmap->bi_bmap.br_blockcount; - xfs_trans_set_bmap_flags(map, bmap->bi_type, bmap->bi_whichfork, - bmap->bi_bmap.br_state); + map->me_owner = bi->bi_owner->i_ino; + map->me_startblock = bi->bi_bmap.br_startblock; + map->me_startoff = bi->bi_bmap.br_startoff; + map->me_len = bi->bi_bmap.br_blockcount; + xfs_trans_set_bmap_flags(map, bi->bi_type, bi->bi_whichfork, + bi->bi_bmap.br_state); } static struct xfs_log_item * @@ -348,15 +341,15 @@ xfs_bmap_update_create_intent( { struct xfs_mount *mp = tp->t_mountp; struct xfs_bui_log_item *buip = xfs_bui_init(mp); - struct xfs_bmap_intent *bmap; + struct xfs_bmap_intent *bi; ASSERT(count == XFS_BUI_MAX_FAST_EXTENTS); xfs_trans_add_item(tp, &buip->bui_item); if (sort) list_sort(mp, items, xfs_bmap_update_diff_items); - list_for_each_entry(bmap, items, bi_list) - xfs_bmap_update_log_item(tp, buip, bmap); + list_for_each_entry(bi, items, bi_list) + xfs_bmap_update_log_item(tp, buip, bi); return &buip->bui_item; } @@ -378,25 +371,17 @@ xfs_bmap_update_finish_item( struct list_head *item, struct xfs_btree_cur **state) { - struct xfs_bmap_intent *bmap; - xfs_filblks_t count; + struct xfs_bmap_intent *bi; int error; - bmap = container_of(item, struct xfs_bmap_intent, bi_list); - count = bmap->bi_bmap.br_blockcount; - error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), - bmap->bi_type, - bmap->bi_owner, bmap->bi_whichfork, - bmap->bi_bmap.br_startoff, - bmap->bi_bmap.br_startblock, - &count, - bmap->bi_bmap.br_state); - if (!error && count > 0) { - ASSERT(bmap->bi_type == XFS_BMAP_UNMAP); - bmap->bi_bmap.br_blockcount = count; + bi = container_of(item, struct xfs_bmap_intent, bi_list); + + error = xfs_trans_log_finish_bmap_update(tp, BUD_ITEM(done), bi); + if (!error && bi->bi_bmap.br_blockcount > 0) { + ASSERT(bi->bi_type == XFS_BMAP_UNMAP); return -EAGAIN; } - kmem_cache_free(xfs_bmap_intent_cache, bmap); + kmem_cache_free(xfs_bmap_intent_cache, bi); return error; } @@ -413,10 +398,10 @@ STATIC void xfs_bmap_update_cancel_item( struct list_head *item) { - struct xfs_bmap_intent *bmap; + struct xfs_bmap_intent *bi; - bmap = container_of(item, struct xfs_bmap_intent, bi_list); - kmem_cache_free(xfs_bmap_intent_cache, bmap); + bi = container_of(item, struct xfs_bmap_intent, bi_list); + kmem_cache_free(xfs_bmap_intent_cache, bi); } const struct xfs_defer_op_type xfs_bmap_update_defer_type = { @@ -434,18 +419,18 @@ xfs_bui_validate( struct xfs_mount *mp, struct xfs_bui_log_item *buip) { - struct xfs_map_extent *bmap; + struct xfs_map_extent *map; /* Only one mapping operation per BUI... */ if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) return false; - bmap = &buip->bui_format.bui_extents[0]; + map = &buip->bui_format.bui_extents[0]; - if (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS) + if (map->me_flags & ~XFS_BMAP_EXTENT_FLAGS) return false; - switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) { + switch (map->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) { case XFS_BMAP_MAP: case XFS_BMAP_UNMAP: break; @@ -453,13 +438,13 @@ xfs_bui_validate( return false; } - if (!xfs_verify_ino(mp, bmap->me_owner)) + if (!xfs_verify_ino(mp, map->me_owner)) return false; - if (!xfs_verify_fileext(mp, bmap->me_startoff, bmap->me_len)) + if (!xfs_verify_fileext(mp, map->me_startoff, map->me_len)) return false; - return xfs_verify_fsbext(mp, bmap->me_startblock, bmap->me_len); + return xfs_verify_fsbext(mp, map->me_startblock, map->me_len); } /* @@ -471,17 +456,13 @@ xfs_bui_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { - struct xfs_bmbt_irec irec; + struct xfs_bmap_intent fake = { }; struct xfs_bui_log_item *buip = BUI_ITEM(lip); struct xfs_trans *tp; struct xfs_inode *ip = NULL; struct xfs_mount *mp = lip->li_log->l_mp; - struct xfs_map_extent *bmap; + struct xfs_map_extent *map; struct xfs_bud_log_item *budp; - xfs_filblks_t count; - xfs_exntst_t state; - unsigned int bui_type; - int whichfork; int iext_delta; int error = 0; @@ -491,14 +472,12 @@ xfs_bui_item_recover( return -EFSCORRUPTED; } - bmap = &buip->bui_format.bui_extents[0]; - state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? - XFS_EXT_UNWRITTEN : XFS_EXT_NORM; - whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? + map = &buip->bui_format.bui_extents[0]; + fake.bi_whichfork = (map->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? XFS_ATTR_FORK : XFS_DATA_FORK; - bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; + fake.bi_type = map->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; - error = xlog_recover_iget(mp, bmap->me_owner, &ip); + error = xlog_recover_iget(mp, map->me_owner, &ip); if (error) return error; @@ -512,34 +491,34 @@ xfs_bui_item_recover( xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, 0); - if (bui_type == XFS_BMAP_MAP) + if (fake.bi_type == XFS_BMAP_MAP) iext_delta = XFS_IEXT_ADD_NOSPLIT_CNT; else iext_delta = XFS_IEXT_PUNCH_HOLE_CNT; - error = xfs_iext_count_may_overflow(ip, whichfork, iext_delta); + error = xfs_iext_count_may_overflow(ip, fake.bi_whichfork, iext_delta); if (error == -EFBIG) error = xfs_iext_count_upgrade(tp, ip, iext_delta); if (error) goto err_cancel; - count = bmap->me_len; - error = xfs_trans_log_finish_bmap_update(tp, budp, bui_type, ip, - whichfork, bmap->me_startoff, bmap->me_startblock, - &count, state); + fake.bi_owner = ip; + fake.bi_bmap.br_startblock = map->me_startblock; + fake.bi_bmap.br_startoff = map->me_startoff; + fake.bi_bmap.br_blockcount = map->me_len; + fake.bi_bmap.br_state = (map->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? + XFS_EXT_UNWRITTEN : XFS_EXT_NORM; + + error = xfs_trans_log_finish_bmap_update(tp, budp, &fake); if (error == -EFSCORRUPTED) - XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bmap, - sizeof(*bmap)); + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, map, + sizeof(*map)); if (error) goto err_cancel; - if (count > 0) { - ASSERT(bui_type == XFS_BMAP_UNMAP); - irec.br_startblock = bmap->me_startblock; - irec.br_blockcount = count; - irec.br_startoff = bmap->me_startoff; - irec.br_state = state; - xfs_bmap_unmap_extent(tp, ip, &irec); + if (fake.bi_bmap.br_blockcount > 0) { + ASSERT(fake.bi_type == XFS_BMAP_UNMAP); + xfs_bmap_unmap_extent(tp, ip, &fake.bi_bmap); } /* @@ -579,18 +558,18 @@ xfs_bui_item_relog( { struct xfs_bud_log_item *budp; struct xfs_bui_log_item *buip; - struct xfs_map_extent *extp; + struct xfs_map_extent *map; unsigned int count; count = BUI_ITEM(intent)->bui_format.bui_nextents; - extp = BUI_ITEM(intent)->bui_format.bui_extents; + map = BUI_ITEM(intent)->bui_format.bui_extents; tp->t_flags |= XFS_TRANS_DIRTY; budp = xfs_trans_get_bud(tp, BUI_ITEM(intent)); set_bit(XFS_LI_DIRTY, &budp->bud_item.li_flags); buip = xfs_bui_init(tp->t_mountp); - memcpy(buip->bui_format.bui_extents, extp, count * sizeof(*extp)); + memcpy(buip->bui_format.bui_extents, map, count * sizeof(*map)); atomic_set(&buip->bui_next_extent, count); xfs_trans_add_item(tp, &buip->bui_item); set_bit(XFS_LI_DIRTY, &buip->bui_item.li_flags); diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 867645b74d88..a09dd2606479 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -1410,7 +1410,7 @@ xfs_swap_extent_rmap( /* Unmap the old blocks in the source file. */ while (tirec.br_blockcount) { - ASSERT(tp->t_firstblock == NULLFSBLOCK); + ASSERT(tp->t_highest_agno == NULLAGNUMBER); trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec); /* Read extent from the source file */ diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index bfc829c07f03..afc4c78b9eed 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -21,23 +21,20 @@ STATIC int xfs_trim_extents( - struct xfs_mount *mp, - xfs_agnumber_t agno, + struct xfs_perag *pag, xfs_daddr_t start, xfs_daddr_t end, xfs_daddr_t minlen, uint64_t *blocks_trimmed) { + struct xfs_mount *mp = pag->pag_mount; struct block_device *bdev = mp->m_ddev_targp->bt_bdev; struct xfs_btree_cur *cur; struct xfs_buf *agbp; struct xfs_agf *agf; - struct xfs_perag *pag; int error; int i; - pag = xfs_perag_get(mp, agno); - /* * Force out the log. This means any transactions that might have freed * space before we take the AGF buffer lock are now on disk, and the @@ -47,7 +44,7 @@ xfs_trim_extents( error = xfs_alloc_read_agf(pag, NULL, 0, &agbp); if (error) - goto out_put_perag; + return error; agf = agbp->b_addr; cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT); @@ -71,10 +68,10 @@ xfs_trim_extents( error = xfs_alloc_get_rec(cur, &fbno, &flen, &i); if (error) - goto out_del_cursor; + break; if (XFS_IS_CORRUPT(mp, i != 1)) { error = -EFSCORRUPTED; - goto out_del_cursor; + break; } ASSERT(flen <= be32_to_cpu(agf->agf_longest)); @@ -83,15 +80,15 @@ xfs_trim_extents( * the format the range/len variables are supplied in by * userspace. */ - dbno = XFS_AGB_TO_DADDR(mp, agno, fbno); + dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno); dlen = XFS_FSB_TO_BB(mp, flen); /* * Too small? Give up. */ if (dlen < minlen) { - trace_xfs_discard_toosmall(mp, agno, fbno, flen); - goto out_del_cursor; + trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen); + break; } /* @@ -100,7 +97,7 @@ xfs_trim_extents( * down partially overlapping ranges for now. */ if (dbno + dlen < start || dbno > end) { - trace_xfs_discard_exclude(mp, agno, fbno, flen); + trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen); goto next_extent; } @@ -109,32 +106,30 @@ xfs_trim_extents( * discard and try again the next time. */ if (xfs_extent_busy_search(mp, pag, fbno, flen)) { - trace_xfs_discard_busy(mp, agno, fbno, flen); + trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen); goto next_extent; } - trace_xfs_discard_extent(mp, agno, fbno, flen); + trace_xfs_discard_extent(mp, pag->pag_agno, fbno, flen); error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS); if (error) - goto out_del_cursor; + break; *blocks_trimmed += flen; next_extent: error = xfs_btree_decrement(cur, 0, &i); if (error) - goto out_del_cursor; + break; if (fatal_signal_pending(current)) { error = -ERESTARTSYS; - goto out_del_cursor; + break; } } out_del_cursor: xfs_btree_del_cursor(cur, error); xfs_buf_relse(agbp); -out_put_perag: - xfs_perag_put(pag); return error; } @@ -152,11 +147,12 @@ xfs_ioc_trim( struct xfs_mount *mp, struct fstrim_range __user *urange) { + struct xfs_perag *pag; unsigned int granularity = bdev_discard_granularity(mp->m_ddev_targp->bt_bdev); struct fstrim_range range; xfs_daddr_t start, end, minlen; - xfs_agnumber_t start_agno, end_agno, agno; + xfs_agnumber_t agno; uint64_t blocks_trimmed = 0; int error, last_error = 0; @@ -193,18 +189,18 @@ xfs_ioc_trim( end = start + BTOBBT(range.len) - 1; if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1) - end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)- 1; - - start_agno = xfs_daddr_to_agno(mp, start); - end_agno = xfs_daddr_to_agno(mp, end); + end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1; - for (agno = start_agno; agno <= end_agno; agno++) { - error = xfs_trim_extents(mp, agno, start, end, minlen, + agno = xfs_daddr_to_agno(mp, start); + for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) { + error = xfs_trim_extents(pag, start, end, minlen, &blocks_trimmed); if (error) { last_error = error; - if (error == -ERESTARTSYS) + if (error == -ERESTARTSYS) { + xfs_perag_rele(pag); break; + } } } diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index ae082808cfed..b2cbbba3e15a 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -228,7 +228,7 @@ static struct attribute *xfs_errortag_attrs[] = { }; ATTRIBUTE_GROUPS(xfs_errortag); -static struct kobj_type xfs_errortag_ktype = { +static const struct kobj_type xfs_errortag_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_errortag_sysfs_ops, .default_groups = xfs_errortag_groups, diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index dbe6c37dc697..0b9c5ba8a598 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -75,7 +75,7 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp); /* * XFS panic tags -- allow a call to xfs_alert_tag() be turned into - * a panic by setting xfs_panic_mask in a sysctl. + * a panic by setting fs.xfs.panic_mask in a sysctl. */ #define XFS_NO_PTAG 0u #define XFS_PTAG_IFLUSH (1u << 0) @@ -88,6 +88,16 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp); #define XFS_PTAG_FSBLOCK_ZERO (1u << 7) #define XFS_PTAG_VERIFIER_ERROR (1u << 8) +#define XFS_PTAG_MASK (XFS_PTAG_IFLUSH | \ + XFS_PTAG_LOGRES | \ + XFS_PTAG_AILDELETE | \ + XFS_PTAG_ERROR_REPORT | \ + XFS_PTAG_SHUTDOWN_CORRUPT | \ + XFS_PTAG_SHUTDOWN_IOERROR | \ + XFS_PTAG_SHUTDOWN_LOGERROR | \ + XFS_PTAG_FSBLOCK_ZERO | \ + XFS_PTAG_VERIFIER_ERROR) + #define XFS_PTAG_STRINGS \ { XFS_NO_PTAG, "none" }, \ { XFS_PTAG_IFLUSH, "iflush" }, \ diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index ad22a003f959..f3d328e4a440 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -236,6 +236,7 @@ xfs_extent_busy_update_extent( * */ busyp->bno = fend; + busyp->length = bend - fend; } else if (bbno < fbno) { /* * Case 8: diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index d5130d1fcfae..011b50469301 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -345,23 +345,30 @@ static int xfs_trans_free_extent( struct xfs_trans *tp, struct xfs_efd_log_item *efdp, - xfs_fsblock_t start_block, - xfs_extlen_t ext_len, - const struct xfs_owner_info *oinfo, - bool skip_discard) + struct xfs_extent_free_item *xefi) { + struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_extent *extp; uint next_extent; - xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, + xefi->xefi_startblock); xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, - start_block); + xefi->xefi_startblock); int error; - trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); + oinfo.oi_owner = xefi->xefi_owner; + if (xefi->xefi_flags & XFS_EFI_ATTR_FORK) + oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; + if (xefi->xefi_flags & XFS_EFI_BMBT_BLOCK) + oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; + + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, + xefi->xefi_blockcount); - error = __xfs_free_extent(tp, start_block, ext_len, - oinfo, XFS_AG_RESV_NONE, skip_discard); + error = __xfs_free_extent(tp, xefi->xefi_startblock, + xefi->xefi_blockcount, &oinfo, XFS_AG_RESV_NONE, + xefi->xefi_flags & XFS_EFI_SKIP_DISCARD); /* * Mark the transaction dirty, even on error. This ensures the * transaction is aborted, which: @@ -375,8 +382,8 @@ xfs_trans_free_extent( next_extent = efdp->efd_next_extent; ASSERT(next_extent < efdp->efd_format.efd_nextents); extp = &(efdp->efd_format.efd_extents[next_extent]); - extp->ext_start = start_block; - extp->ext_len = ext_len; + extp->ext_start = xefi->xefi_startblock; + extp->ext_len = xefi->xefi_blockcount; efdp->efd_next_extent++; return error; @@ -404,7 +411,7 @@ STATIC void xfs_extent_free_log_item( struct xfs_trans *tp, struct xfs_efi_log_item *efip, - struct xfs_extent_free_item *free) + struct xfs_extent_free_item *xefi) { uint next_extent; struct xfs_extent *extp; @@ -420,8 +427,8 @@ xfs_extent_free_log_item( next_extent = atomic_inc_return(&efip->efi_next_extent) - 1; ASSERT(next_extent < efip->efi_format.efi_nextents); extp = &efip->efi_format.efi_extents[next_extent]; - extp->ext_start = free->xefi_startblock; - extp->ext_len = free->xefi_blockcount; + extp->ext_start = xefi->xefi_startblock; + extp->ext_len = xefi->xefi_blockcount; } static struct xfs_log_item * @@ -433,15 +440,15 @@ xfs_extent_free_create_intent( { struct xfs_mount *mp = tp->t_mountp; struct xfs_efi_log_item *efip = xfs_efi_init(mp, count); - struct xfs_extent_free_item *free; + struct xfs_extent_free_item *xefi; ASSERT(count > 0); xfs_trans_add_item(tp, &efip->efi_item); if (sort) list_sort(mp, items, xfs_extent_free_diff_items); - list_for_each_entry(free, items, xefi_list) - xfs_extent_free_log_item(tp, efip, free); + list_for_each_entry(xefi, items, xefi_list) + xfs_extent_free_log_item(tp, efip, xefi); return &efip->efi_item; } @@ -463,21 +470,13 @@ xfs_extent_free_finish_item( struct list_head *item, struct xfs_btree_cur **state) { - struct xfs_owner_info oinfo = { }; - struct xfs_extent_free_item *free; + struct xfs_extent_free_item *xefi; int error; - free = container_of(item, struct xfs_extent_free_item, xefi_list); - oinfo.oi_owner = free->xefi_owner; - if (free->xefi_flags & XFS_EFI_ATTR_FORK) - oinfo.oi_flags |= XFS_OWNER_INFO_ATTR_FORK; - if (free->xefi_flags & XFS_EFI_BMBT_BLOCK) - oinfo.oi_flags |= XFS_OWNER_INFO_BMBT_BLOCK; - error = xfs_trans_free_extent(tp, EFD_ITEM(done), - free->xefi_startblock, - free->xefi_blockcount, - &oinfo, free->xefi_flags & XFS_EFI_SKIP_DISCARD); - kmem_cache_free(xfs_extfree_item_cache, free); + xefi = container_of(item, struct xfs_extent_free_item, xefi_list); + + error = xfs_trans_free_extent(tp, EFD_ITEM(done), xefi); + kmem_cache_free(xfs_extfree_item_cache, xefi); return error; } @@ -494,10 +493,10 @@ STATIC void xfs_extent_free_cancel_item( struct list_head *item) { - struct xfs_extent_free_item *free; + struct xfs_extent_free_item *xefi; - free = container_of(item, struct xfs_extent_free_item, xefi_list); - kmem_cache_free(xfs_extfree_item_cache, free); + xefi = container_of(item, struct xfs_extent_free_item, xefi_list); + kmem_cache_free(xfs_extfree_item_cache, xefi); } const struct xfs_defer_op_type xfs_extent_free_defer_type = { @@ -523,7 +522,7 @@ xfs_agfl_free_finish_item( struct xfs_owner_info oinfo = { }; struct xfs_mount *mp = tp->t_mountp; struct xfs_efd_log_item *efdp = EFD_ITEM(done); - struct xfs_extent_free_item *free; + struct xfs_extent_free_item *xefi; struct xfs_extent *extp; struct xfs_buf *agbp; int error; @@ -532,13 +531,13 @@ xfs_agfl_free_finish_item( uint next_extent; struct xfs_perag *pag; - free = container_of(item, struct xfs_extent_free_item, xefi_list); - ASSERT(free->xefi_blockcount == 1); - agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock); - agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock); - oinfo.oi_owner = free->xefi_owner; + xefi = container_of(item, struct xfs_extent_free_item, xefi_list); + ASSERT(xefi->xefi_blockcount == 1); + agno = XFS_FSB_TO_AGNO(mp, xefi->xefi_startblock); + agbno = XFS_FSB_TO_AGBNO(mp, xefi->xefi_startblock); + oinfo.oi_owner = xefi->xefi_owner; - trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, free->xefi_blockcount); + trace_xfs_agfl_free_deferred(mp, agno, 0, agbno, xefi->xefi_blockcount); pag = xfs_perag_get(mp, agno); error = xfs_alloc_read_agf(pag, tp, 0, &agbp); @@ -559,11 +558,11 @@ xfs_agfl_free_finish_item( next_extent = efdp->efd_next_extent; ASSERT(next_extent < efdp->efd_format.efd_nextents); extp = &(efdp->efd_format.efd_extents[next_extent]); - extp->ext_start = free->xefi_startblock; - extp->ext_len = free->xefi_blockcount; + extp->ext_start = xefi->xefi_startblock; + extp->ext_len = xefi->xefi_blockcount; efdp->efd_next_extent++; - kmem_cache_free(xfs_extfree_item_cache, free); + kmem_cache_free(xfs_extfree_item_cache, xefi); return error; } @@ -599,7 +598,6 @@ xfs_efi_item_recover( struct xfs_mount *mp = lip->li_log->l_mp; struct xfs_efd_log_item *efdp; struct xfs_trans *tp; - struct xfs_extent *extp; int i; int error = 0; @@ -624,10 +622,17 @@ xfs_efi_item_recover( efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); for (i = 0; i < efip->efi_format.efi_nextents; i++) { + struct xfs_extent_free_item fake = { + .xefi_owner = XFS_RMAP_OWN_UNKNOWN, + }; + struct xfs_extent *extp; + extp = &efip->efi_format.efi_extents[i]; - error = xfs_trans_free_extent(tp, efdp, extp->ext_start, - extp->ext_len, - &XFS_RMAP_OINFO_ANY_OWNER, false); + + fake.xefi_startblock = extp->ext_start; + fake.xefi_blockcount = extp->ext_len; + + error = xfs_trans_free_extent(tp, efdp, &fake); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, extp, sizeof(*extp)); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 595a5bcf46b9..705250f9f90a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1047,7 +1047,7 @@ xfs_file_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = xfs_vn_setattr_size(file_mnt_user_ns(file), + error = xfs_vn_setattr_size(file_mnt_idmap(file), file_dentry(file), &iattr); if (error) goto out_unlock; @@ -1429,7 +1429,7 @@ xfs_file_mmap( file_accessed(file); vma->vm_ops = &xfs_file_vm_ops; if (IS_DAX(inode)) - vma->vm_flags |= VM_HUGEPAGE; + vm_flags_set(vma, VM_HUGEPAGE); return 0; } diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index 34b21a29c39b..22c13933c8f8 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -12,6 +12,7 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_bmap.h" +#include "xfs_bmap_util.h" #include "xfs_alloc.h" #include "xfs_mru_cache.h" #include "xfs_trace.h" @@ -22,7 +23,7 @@ struct xfs_fstrm_item { struct xfs_mru_cache_elem mru; - xfs_agnumber_t ag; /* AG in use for this directory */ + struct xfs_perag *pag; /* AG in use for this directory */ }; enum xfs_fstrm_alloc { @@ -30,117 +31,68 @@ enum xfs_fstrm_alloc { XFS_PICK_LOWSPACE = 2, }; -/* - * Allocation group filestream associations are tracked with per-ag atomic - * counters. These counters allow xfs_filestream_pick_ag() to tell whether a - * particular AG already has active filestreams associated with it. - */ -int -xfs_filestream_peek_ag( - xfs_mount_t *mp, - xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - int ret; - - pag = xfs_perag_get(mp, agno); - ret = atomic_read(&pag->pagf_fstrms); - xfs_perag_put(pag); - return ret; -} - -static int -xfs_filestream_get_ag( - xfs_mount_t *mp, - xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - int ret; - - pag = xfs_perag_get(mp, agno); - ret = atomic_inc_return(&pag->pagf_fstrms); - xfs_perag_put(pag); - return ret; -} - -static void -xfs_filestream_put_ag( - xfs_mount_t *mp, - xfs_agnumber_t agno) -{ - struct xfs_perag *pag; - - pag = xfs_perag_get(mp, agno); - atomic_dec(&pag->pagf_fstrms); - xfs_perag_put(pag); -} - static void xfs_fstrm_free_func( void *data, struct xfs_mru_cache_elem *mru) { - struct xfs_mount *mp = data; struct xfs_fstrm_item *item = container_of(mru, struct xfs_fstrm_item, mru); + struct xfs_perag *pag = item->pag; - xfs_filestream_put_ag(mp, item->ag); - trace_xfs_filestream_free(mp, mru->key, item->ag); + trace_xfs_filestream_free(pag, mru->key); + atomic_dec(&pag->pagf_fstrms); + xfs_perag_rele(pag); kmem_free(item); } /* - * Scan the AGs starting at startag looking for an AG that isn't in use and has - * at least minlen blocks free. + * Scan the AGs starting at start_agno looking for an AG that isn't in use and + * has at least minlen blocks free. If no AG is found to match the allocation + * requirements, pick the AG with the most free space in it. */ static int xfs_filestream_pick_ag( - struct xfs_inode *ip, - xfs_agnumber_t startag, - xfs_agnumber_t *agp, + struct xfs_alloc_arg *args, + xfs_ino_t pino, + xfs_agnumber_t start_agno, int flags, - xfs_extlen_t minlen) + xfs_extlen_t *longest) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_fstrm_item *item; + struct xfs_mount *mp = args->mp; struct xfs_perag *pag; - xfs_extlen_t longest, free = 0, minfree, maxfree = 0; - xfs_agnumber_t ag, max_ag = NULLAGNUMBER; - int err, trylock, nscan; - - ASSERT(S_ISDIR(VFS_I(ip)->i_mode)); + struct xfs_perag *max_pag = NULL; + xfs_extlen_t minlen = *longest; + xfs_extlen_t free = 0, minfree, maxfree = 0; + xfs_agnumber_t agno; + bool first_pass = true; + int err; /* 2% of an AG's blocks must be free for it to be chosen. */ minfree = mp->m_sb.sb_agblocks / 50; - ag = startag; - *agp = NULLAGNUMBER; - - /* For the first pass, don't sleep trying to init the per-AG. */ - trylock = XFS_ALLOC_FLAG_TRYLOCK; - - for (nscan = 0; 1; nscan++) { - trace_xfs_filestream_scan(mp, ip->i_ino, ag); - - pag = xfs_perag_get(mp, ag); - - if (!pag->pagf_init) { - err = xfs_alloc_read_agf(pag, NULL, trylock, NULL); - if (err) { - if (err != -EAGAIN) { - xfs_perag_put(pag); - return err; - } - /* Couldn't lock the AGF, skip this AG. */ - goto next_ag; - } +restart: + for_each_perag_wrap(mp, start_agno, agno, pag) { + trace_xfs_filestream_scan(pag, pino); + *longest = 0; + err = xfs_bmap_longest_free_extent(pag, NULL, longest); + if (err) { + xfs_perag_rele(pag); + if (err != -EAGAIN) + break; + /* Couldn't lock the AGF, skip this AG. */ + err = 0; + continue; } /* Keep track of the AG with the most free blocks. */ if (pag->pagf_freeblks > maxfree) { maxfree = pag->pagf_freeblks; - max_ag = ag; + if (max_pag) + xfs_perag_rele(max_pag); + atomic_inc(&pag->pag_active_ref); + max_pag = pag; } /* @@ -149,93 +101,73 @@ xfs_filestream_pick_ag( * loop, and it guards against two filestreams being established * in the same AG as each other. */ - if (xfs_filestream_get_ag(mp, ag) > 1) { - xfs_filestream_put_ag(mp, ag); - goto next_ag; - } - - longest = xfs_alloc_longest_free_extent(pag, - xfs_alloc_min_freelist(mp, pag), - xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE)); - if (((minlen && longest >= minlen) || - (!minlen && pag->pagf_freeblks >= minfree)) && - (!pag->pagf_metadata || !(flags & XFS_PICK_USERDATA) || - (flags & XFS_PICK_LOWSPACE))) { - - /* Break out, retaining the reference on the AG. */ - free = pag->pagf_freeblks; - xfs_perag_put(pag); - *agp = ag; - break; + if (atomic_inc_return(&pag->pagf_fstrms) <= 1) { + if (((minlen && *longest >= minlen) || + (!minlen && pag->pagf_freeblks >= minfree)) && + (!xfs_perag_prefers_metadata(pag) || + !(flags & XFS_PICK_USERDATA) || + (flags & XFS_PICK_LOWSPACE))) { + /* Break out, retaining the reference on the AG. */ + free = pag->pagf_freeblks; + break; + } } /* Drop the reference on this AG, it's not usable. */ - xfs_filestream_put_ag(mp, ag); -next_ag: - xfs_perag_put(pag); - /* Move to the next AG, wrapping to AG 0 if necessary. */ - if (++ag >= mp->m_sb.sb_agcount) - ag = 0; - - /* If a full pass of the AGs hasn't been done yet, continue. */ - if (ag != startag) - continue; + atomic_dec(&pag->pagf_fstrms); + } - /* Allow sleeping in xfs_alloc_read_agf() on the 2nd pass. */ - if (trylock != 0) { - trylock = 0; - continue; + if (err) { + xfs_perag_rele(pag); + if (max_pag) + xfs_perag_rele(max_pag); + return err; + } + + if (!pag) { + /* + * Allow a second pass to give xfs_bmap_longest_free_extent() + * another attempt at locking AGFs that it might have skipped + * over before we fail. + */ + if (first_pass) { + first_pass = false; + goto restart; } - /* Finally, if lowspace wasn't set, set it for the 3rd pass. */ + /* + * We must be low on data space, so run a final lowspace + * optimised selection pass if we haven't already. + */ if (!(flags & XFS_PICK_LOWSPACE)) { flags |= XFS_PICK_LOWSPACE; - continue; + goto restart; } /* - * Take the AG with the most free space, regardless of whether - * it's already in use by another filestream. + * No unassociated AGs are available, so select the AG with the + * most free space, regardless of whether it's already in use by + * another filestream. It none suit, just use whatever AG we can + * grab. */ - if (max_ag != NULLAGNUMBER) { - xfs_filestream_get_ag(mp, max_ag); + if (!max_pag) { + for_each_perag_wrap(args->mp, 0, start_agno, args->pag) + break; + atomic_inc(&args->pag->pagf_fstrms); + *longest = 0; + } else { + pag = max_pag; free = maxfree; - *agp = max_ag; - break; + atomic_inc(&pag->pagf_fstrms); } - - /* take AG 0 if none matched */ - trace_xfs_filestream_pick(ip, *agp, free, nscan); - *agp = 0; - return 0; - } - - trace_xfs_filestream_pick(ip, *agp, free, nscan); - - if (*agp == NULLAGNUMBER) - return 0; - - err = -ENOMEM; - item = kmem_alloc(sizeof(*item), KM_MAYFAIL); - if (!item) - goto out_put_ag; - - item->ag = *agp; - - err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru); - if (err) { - if (err == -EEXIST) - err = 0; - goto out_free_item; + } else if (max_pag) { + xfs_perag_rele(max_pag); } + trace_xfs_filestream_pick(pag, pino, free); + args->pag = pag; return 0; -out_free_item: - kmem_free(item); -out_put_ag: - xfs_filestream_put_ag(mp, *agp); - return err; } static struct xfs_inode * @@ -263,104 +195,187 @@ out: } /* - * Find the right allocation group for a file, either by finding an - * existing file stream or creating a new one. + * Lookup the mru cache for an existing association. If one exists and we can + * use it, return with an active perag reference indicating that the allocation + * will proceed with that association. * - * Returns NULLAGNUMBER in case of an error. + * If we have no association, or we cannot use the current one and have to + * destroy it, return with longest = 0 to tell the caller to create a new + * association. */ -xfs_agnumber_t -xfs_filestream_lookup_ag( - struct xfs_inode *ip) +static int +xfs_filestream_lookup_association( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_ino_t pino, + xfs_extlen_t *longest) { - struct xfs_mount *mp = ip->i_mount; - struct xfs_inode *pip = NULL; - xfs_agnumber_t startag, ag = NULLAGNUMBER; + struct xfs_mount *mp = args->mp; + struct xfs_perag *pag; struct xfs_mru_cache_elem *mru; + int error = 0; - ASSERT(S_ISREG(VFS_I(ip)->i_mode)); - - pip = xfs_filestream_get_parent(ip); - if (!pip) - return NULLAGNUMBER; + *longest = 0; + mru = xfs_mru_cache_lookup(mp->m_filestream, pino); + if (!mru) + return 0; + /* + * Grab the pag and take an extra active reference for the caller whilst + * the mru item cannot go away. This means we'll pin the perag with + * the reference we get here even if the filestreams association is torn + * down immediately after we mark the lookup as done. + */ + pag = container_of(mru, struct xfs_fstrm_item, mru)->pag; + atomic_inc(&pag->pag_active_ref); + xfs_mru_cache_done(mp->m_filestream); - mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); - if (mru) { - ag = container_of(mru, struct xfs_fstrm_item, mru)->ag; - xfs_mru_cache_done(mp->m_filestream); + trace_xfs_filestream_lookup(pag, ap->ip->i_ino); - trace_xfs_filestream_lookup(mp, ip->i_ino, ag); - goto out; - } + ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0); + xfs_bmap_adjacent(ap); /* - * Set the starting AG using the rotor for inode32, otherwise - * use the directory inode's AG. + * If there is very little free space before we start a filestreams + * allocation, we're almost guaranteed to fail to find a large enough + * free space available so just use the cached AG. */ - if (xfs_is_inode32(mp)) { - xfs_agnumber_t rotorstep = xfs_rotorstep; - startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; - mp->m_agfrotor = (mp->m_agfrotor + 1) % - (mp->m_sb.sb_agcount * rotorstep); - } else - startag = XFS_INO_TO_AGNO(mp, pip->i_ino); + if (ap->tp->t_flags & XFS_TRANS_LOWMODE) { + *longest = 1; + goto out_done; + } - if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0)) - ag = NULLAGNUMBER; -out: - xfs_irele(pip); - return ag; + error = xfs_bmap_longest_free_extent(pag, args->tp, longest); + if (error == -EAGAIN) + error = 0; + if (error || *longest < args->maxlen) { + /* We aren't going to use this perag */ + *longest = 0; + xfs_perag_rele(pag); + return error; + } + +out_done: + args->pag = pag; + return 0; } -/* - * Pick a new allocation group for the current file and its file stream. - * - * This is called when the allocator can't find a suitable extent in the - * current AG, and we have to move the stream into a new AG with more space. - */ -int -xfs_filestream_new_ag( +static int +xfs_filestream_create_association( struct xfs_bmalloca *ap, - xfs_agnumber_t *agp) + struct xfs_alloc_arg *args, + xfs_ino_t pino, + xfs_extlen_t *longest) { - struct xfs_inode *ip = ap->ip, *pip; - struct xfs_mount *mp = ip->i_mount; - xfs_extlen_t minlen = ap->length; - xfs_agnumber_t startag = 0; - int flags = 0; - int err = 0; + struct xfs_mount *mp = args->mp; struct xfs_mru_cache_elem *mru; + struct xfs_fstrm_item *item; + xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, pino); + int flags = 0; + int error; - *agp = NULLAGNUMBER; - - pip = xfs_filestream_get_parent(ip); - if (!pip) - goto exit; - - mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino); + /* Changing parent AG association now, so remove the existing one. */ + mru = xfs_mru_cache_remove(mp->m_filestream, pino); if (mru) { struct xfs_fstrm_item *item = container_of(mru, struct xfs_fstrm_item, mru); - startag = (item->ag + 1) % mp->m_sb.sb_agcount; + + agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount; + xfs_fstrm_free_func(mp, mru); + } else if (xfs_is_inode32(mp)) { + xfs_agnumber_t rotorstep = xfs_rotorstep; + + agno = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount; + mp->m_agfrotor = (mp->m_agfrotor + 1) % + (mp->m_sb.sb_agcount * rotorstep); } + ap->blkno = XFS_AGB_TO_FSB(args->mp, agno, 0); + xfs_bmap_adjacent(ap); + if (ap->datatype & XFS_ALLOC_USERDATA) flags |= XFS_PICK_USERDATA; if (ap->tp->t_flags & XFS_TRANS_LOWMODE) flags |= XFS_PICK_LOWSPACE; - err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen); + *longest = ap->length; + error = xfs_filestream_pick_ag(args, pino, agno, flags, longest); + if (error) + return error; /* - * Only free the item here so we skip over the old AG earlier. + * We are going to use this perag now, so create an assoication for it. + * xfs_filestream_pick_ag() has already bumped the perag fstrms counter + * for us, so all we need to do here is take another active reference to + * the perag for the cached association. + * + * If we fail to store the association, we need to drop the fstrms + * counter as well as drop the perag reference we take here for the + * item. We do not need to return an error for this failure - as long as + * we return a referenced AG, the allocation can still go ahead just + * fine. */ - if (mru) - xfs_fstrm_free_func(mp, mru); + item = kmem_alloc(sizeof(*item), KM_MAYFAIL); + if (!item) + goto out_put_fstrms; + + atomic_inc(&args->pag->pag_active_ref); + item->pag = args->pag; + error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru); + if (error) + goto out_free_item; + return 0; + +out_free_item: + xfs_perag_rele(item->pag); + kmem_free(item); +out_put_fstrms: + atomic_dec(&args->pag->pagf_fstrms); + return 0; +} + +/* + * Search for an allocation group with a single extent large enough for + * the request. First we look for an existing association and use that if it + * is found. Otherwise, we create a new association by selecting an AG that fits + * the allocation criteria. + * + * We return with a referenced perag in args->pag to indicate which AG we are + * allocating into or an error with no references held. + */ +int +xfs_filestream_select_ag( + struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, + xfs_extlen_t *longest) +{ + struct xfs_mount *mp = args->mp; + struct xfs_inode *pip; + xfs_ino_t ino = 0; + int error = 0; + + *longest = 0; + args->total = ap->total; + pip = xfs_filestream_get_parent(ap->ip); + if (pip) { + ino = pip->i_ino; + error = xfs_filestream_lookup_association(ap, args, ino, + longest); + xfs_irele(pip); + if (error) + return error; + if (*longest >= args->maxlen) + goto out_select; + if (ap->tp->t_flags & XFS_TRANS_LOWMODE) + goto out_select; + } + + error = xfs_filestream_create_association(ap, args, ino, longest); + if (error) + return error; - xfs_irele(pip); -exit: - if (*agp == NULLAGNUMBER) - *agp = 0; - return err; +out_select: + ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0); + return 0; } void diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 403226ebb80b..84149ed0e340 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -9,13 +9,13 @@ struct xfs_mount; struct xfs_inode; struct xfs_bmalloca; +struct xfs_alloc_arg; int xfs_filestream_mount(struct xfs_mount *mp); void xfs_filestream_unmount(struct xfs_mount *mp); void xfs_filestream_deassociate(struct xfs_inode *ip); -xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); -int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); -int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno); +int xfs_filestream_select_ag(struct xfs_bmalloca *ap, + struct xfs_alloc_arg *args, xfs_extlen_t *blen); static inline int xfs_inode_is_filestream( diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c index 88a88506ffff..59e7d1a14b67 100644 --- a/fs/xfs/xfs_fsmap.c +++ b/fs/xfs/xfs_fsmap.c @@ -688,11 +688,11 @@ __xfs_getfsmap_datadev( info->agf_bp = NULL; } if (info->pag) { - xfs_perag_put(info->pag); + xfs_perag_rele(info->pag); info->pag = NULL; } else if (pag) { /* loop termination case */ - xfs_perag_put(pag); + xfs_perag_rele(pag); } return error; @@ -761,6 +761,7 @@ xfs_getfsmap_datadev_bnobt( { struct xfs_alloc_rec_incore akeys[2]; + memset(akeys, 0, sizeof(akeys)); info->missing_owner = XFS_FMR_OWN_UNKNOWN; return __xfs_getfsmap_datadev(tp, keys, info, xfs_getfsmap_datadev_bnobt_query, &akeys[0]); diff --git a/fs/xfs/xfs_globals.c b/fs/xfs/xfs_globals.c index 4d0a98f920ca..9edc1f2bc939 100644 --- a/fs/xfs/xfs_globals.c +++ b/fs/xfs/xfs_globals.c @@ -4,6 +4,7 @@ * All Rights Reserved. */ #include "xfs.h" +#include "xfs_error.h" /* * Tunable XFS parameters. xfs_params is required even when CONFIG_SYSCTL=n, @@ -15,7 +16,7 @@ xfs_param_t xfs_params = { /* MIN DFLT MAX */ .sgid_inherit = { 0, 0, 1 }, .symlink_mode = { 0, 0, 1 }, - .panic_mask = { 0, 0, 256 }, + .panic_mask = { 0, 0, XFS_PTAG_MASK}, .error_level = { 0, 3, 11 }, .syncd_timer = { 1*100, 30*100, 7200*100}, .stats_clear = { 0, 0, 1 }, diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index f35e2cee5265..c9a7e270a428 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -255,7 +255,7 @@ xfs_perag_set_inode_tag( break; } - trace_xfs_perag_set_inode_tag(mp, pag->pag_agno, tag, _RET_IP_); + trace_xfs_perag_set_inode_tag(pag, _RET_IP_); } /* Clear a tag on both the AG incore inode tree and the AG radix tree. */ @@ -289,7 +289,7 @@ xfs_perag_clear_inode_tag( radix_tree_tag_clear(&mp->m_perag_tree, pag->pag_agno, tag); spin_unlock(&mp->m_perag_lock); - trace_xfs_perag_clear_inode_tag(mp, pag->pag_agno, tag, _RET_IP_); + trace_xfs_perag_clear_inode_tag(pag, _RET_IP_); } /* @@ -586,7 +586,7 @@ xfs_iget_cache_miss( if (!ip) return -ENOMEM; - error = xfs_imap(mp, tp, ip->i_ino, &ip->i_imap, flags); + error = xfs_imap(pag, tp, ip->i_ino, &ip->i_imap, flags); if (error) goto out_destroy; @@ -1767,7 +1767,7 @@ xfs_icwalk( if (error) { last_error = error; if (error == -EFSCORRUPTED) { - xfs_perag_put(pag); + xfs_perag_rele(pag); break; } } @@ -1853,12 +1853,20 @@ xfs_inodegc_worker( struct xfs_inodegc, work); struct llist_node *node = llist_del_all(&gc->list); struct xfs_inode *ip, *n; + unsigned int nofs_flag; WRITE_ONCE(gc->items, 0); if (!node) return; + /* + * We can allocate memory here while doing writeback on behalf of + * memory reclaim. To avoid memory allocation deadlocks set the + * task-wide nofs context for the following operations. + */ + nofs_flag = memalloc_nofs_save(); + ip = llist_entry(node, struct xfs_inode, i_gclist); trace_xfs_inodegc_worker(ip->i_mount, READ_ONCE(gc->shrinker_hits)); @@ -1867,6 +1875,8 @@ xfs_inodegc_worker( xfs_iflags_set(ip, XFS_INACTIVATING); xfs_inodegc_inactivate(ip); } + + memalloc_nofs_restore(nofs_flag); } /* diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d354ea2b74f9..5808abab786c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -777,7 +777,7 @@ xfs_inode_inherit_flags2( */ int xfs_init_new_inode( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_trans *tp, struct xfs_inode *pip, xfs_ino_t ino, @@ -823,11 +823,11 @@ xfs_init_new_inode( ip->i_projid = prid; if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { - inode_fsuid_set(inode, mnt_userns); + inode_fsuid_set(inode, idmap); inode->i_gid = dir->i_gid; inode->i_mode = mode; } else { - inode_init_owner(mnt_userns, inode, dir, mode); + inode_init_owner(idmap, inode, dir, mode); } /* @@ -836,7 +836,7 @@ xfs_init_new_inode( * (and only if the irix_sgid_inherit compatibility variable is set). */ if (irix_sgid_inherit && (inode->i_mode & S_ISGID) && - !vfsgid_in_group_p(i_gid_into_vfsgid(mnt_userns, inode))) + !vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode))) inode->i_mode &= ~S_ISGID; ip->i_disk_size = 0; @@ -946,7 +946,7 @@ xfs_bumplink( int xfs_create( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, xfs_inode_t *dp, struct xfs_name *name, umode_t mode, @@ -978,8 +978,8 @@ xfs_create( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), - mapped_fsgid(mnt_userns, &init_user_ns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), + mapped_fsgid(idmap, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -1020,7 +1020,7 @@ xfs_create( */ error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); if (!error) - error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode, + error = xfs_init_new_inode(idmap, tp, dp, ino, mode, is_dir ? 2 : 1, rdev, prid, init_xattrs, &ip); if (error) goto out_trans_cancel; @@ -1102,7 +1102,7 @@ xfs_create( int xfs_create_tmpfile( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_inode *dp, umode_t mode, struct xfs_inode **ipp) @@ -1127,8 +1127,8 @@ xfs_create_tmpfile( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), - mapped_fsgid(mnt_userns, &init_user_ns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), + mapped_fsgid(idmap, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -1144,7 +1144,7 @@ xfs_create_tmpfile( error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); if (!error) - error = xfs_init_new_inode(mnt_userns, tp, dp, ino, mode, + error = xfs_init_new_inode(idmap, tp, dp, ino, mode, 0, 0, prid, false, &ip); if (error) goto out_trans_cancel; @@ -1367,7 +1367,7 @@ xfs_itruncate_extents_flags( unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1; while (unmap_len > 0) { - ASSERT(tp->t_firstblock == NULLFSBLOCK); + ASSERT(tp->t_highest_agno == NULLAGNUMBER); error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len, flags, XFS_ITRUNC_MAX_EXTENTS); if (error) @@ -2709,7 +2709,7 @@ out_trans_abort: */ static int xfs_rename_alloc_whiteout( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_name *src_name, struct xfs_inode *dp, struct xfs_inode **wip) @@ -2718,7 +2718,7 @@ xfs_rename_alloc_whiteout( struct qstr name; int error; - error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR | WHITEOUT_MODE, + error = xfs_create_tmpfile(idmap, dp, S_IFCHR | WHITEOUT_MODE, &tmpfile); if (error) return error; @@ -2750,7 +2750,7 @@ xfs_rename_alloc_whiteout( */ int xfs_rename( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, @@ -2782,7 +2782,7 @@ xfs_rename( * appropriately. */ if (flags & RENAME_WHITEOUT) { - error = xfs_rename_alloc_whiteout(mnt_userns, src_name, + error = xfs_rename_alloc_whiteout(idmap, src_name, target_dp, &wip); if (error) return error; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index fa780f08dc89..69d21e42c10a 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -473,18 +473,18 @@ int xfs_release(struct xfs_inode *ip); void xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); -int xfs_create(struct user_namespace *mnt_userns, +int xfs_create(struct mnt_idmap *idmap, struct xfs_inode *dp, struct xfs_name *name, umode_t mode, dev_t rdev, bool need_xattr, struct xfs_inode **ipp); -int xfs_create_tmpfile(struct user_namespace *mnt_userns, +int xfs_create_tmpfile(struct mnt_idmap *idmap, struct xfs_inode *dp, umode_t mode, struct xfs_inode **ipp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, struct xfs_name *target_name); -int xfs_rename(struct user_namespace *mnt_userns, +int xfs_rename(struct mnt_idmap *idmap, struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, @@ -515,7 +515,7 @@ void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); -int xfs_init_new_inode(struct user_namespace *mnt_userns, struct xfs_trans *tp, +int xfs_init_new_inode(struct mnt_idmap *idmap, struct xfs_trans *tp, struct xfs_inode *pip, xfs_ino_t ino, umode_t mode, xfs_nlink_t nlink, dev_t rdev, prid_t prid, bool init_xattrs, struct xfs_inode **ipp); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 13f1b2add390..55bb01173cde 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -665,7 +665,7 @@ xfs_ioc_fsbulkstat( struct xfs_fsop_bulkreq bulkreq; struct xfs_ibulk breq = { .mp = mp, - .mnt_userns = file_mnt_user_ns(file), + .idmap = file_mnt_idmap(file), .ocount = 0, }; xfs_ino_t lastino; @@ -754,7 +754,7 @@ xfs_bulkstat_fmt( static int xfs_bulk_ireq_setup( struct xfs_mount *mp, - struct xfs_bulk_ireq *hdr, + const struct xfs_bulk_ireq *hdr, struct xfs_ibulk *breq, void __user *ubuffer) { @@ -780,7 +780,7 @@ xfs_bulk_ireq_setup( switch (hdr->ino) { case XFS_BULK_IREQ_SPECIAL_ROOT: - hdr->ino = mp->m_sb.sb_rootino; + breq->startino = mp->m_sb.sb_rootino; break; default: return -EINVAL; @@ -844,7 +844,7 @@ xfs_ioc_bulkstat( struct xfs_bulk_ireq hdr; struct xfs_ibulk breq = { .mp = mp, - .mnt_userns = file_mnt_user_ns(file), + .idmap = file_mnt_idmap(file), }; int error; @@ -1297,7 +1297,7 @@ xfs_ioctl_setattr_check_projid( int xfs_fileattr_set( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { @@ -1371,7 +1371,7 @@ xfs_fileattr_set( */ if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) && - !capable_wrt_inode_uidgid(mnt_userns, VFS_I(ip), CAP_FSETID)) + !capable_wrt_inode_uidgid(idmap, VFS_I(ip), CAP_FSETID)) VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID); /* Change the ownerships and register project quota modifications */ diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h index d4abba2c13c1..38be600b5e1e 100644 --- a/fs/xfs/xfs_ioctl.h +++ b/fs/xfs/xfs_ioctl.h @@ -49,7 +49,7 @@ xfs_fileattr_get( extern int xfs_fileattr_set( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa); diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index 2f54b701eead..ee35eea1ecce 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -204,7 +204,7 @@ xfs_compat_ioc_fsbulkstat( struct xfs_fsop_bulkreq bulkreq; struct xfs_ibulk breq = { .mp = mp, - .mnt_userns = file_mnt_user_ns(file), + .idmap = file_mnt_idmap(file), .ocount = 0, }; xfs_ino_t lastino; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 669c1bc5c3a7..69dbe7814128 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -83,7 +83,7 @@ xfs_iomap_valid( return true; } -const struct iomap_page_ops xfs_iomap_page_ops = { +static const struct iomap_folio_ops xfs_iomap_folio_ops = { .iomap_valid = xfs_iomap_valid, }; @@ -133,7 +133,7 @@ xfs_bmbt_to_iomap( iomap->flags |= IOMAP_F_DIRTY; iomap->validity_cookie = sequence_cookie; - iomap->page_ops = &xfs_iomap_page_ops; + iomap->folio_ops = &xfs_iomap_folio_ops; return 0; } diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 515318dfbc38..24718adb3c16 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -162,12 +162,12 @@ xfs_create_need_xattr( STATIC int xfs_generic_create( - struct user_namespace *mnt_userns, - struct inode *dir, - struct dentry *dentry, - umode_t mode, - dev_t rdev, - struct file *tmpfile) /* unnamed file */ + struct mnt_idmap *idmap, + struct inode *dir, + struct dentry *dentry, + umode_t mode, + dev_t rdev, + struct file *tmpfile) /* unnamed file */ { struct inode *inode; struct xfs_inode *ip = NULL; @@ -196,11 +196,11 @@ xfs_generic_create( goto out_free_acl; if (!tmpfile) { - error = xfs_create(mnt_userns, XFS_I(dir), &name, mode, rdev, + error = xfs_create(idmap, XFS_I(dir), &name, mode, rdev, xfs_create_need_xattr(dir, default_acl, acl), &ip); } else { - error = xfs_create_tmpfile(mnt_userns, XFS_I(dir), mode, &ip); + error = xfs_create_tmpfile(idmap, XFS_I(dir), mode, &ip); } if (unlikely(error)) goto out_free_acl; @@ -255,35 +255,34 @@ xfs_generic_create( STATIC int xfs_vn_mknod( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev) { - return xfs_generic_create(mnt_userns, dir, dentry, mode, rdev, NULL); + return xfs_generic_create(idmap, dir, dentry, mode, rdev, NULL); } STATIC int xfs_vn_create( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool flags) { - return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, NULL); + return xfs_generic_create(idmap, dir, dentry, mode, 0, NULL); } STATIC int xfs_vn_mkdir( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { - return xfs_generic_create(mnt_userns, dir, dentry, mode | S_IFDIR, 0, - NULL); + return xfs_generic_create(idmap, dir, dentry, mode | S_IFDIR, 0, NULL); } STATIC struct dentry * @@ -400,7 +399,7 @@ xfs_vn_unlink( STATIC int xfs_vn_symlink( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, const char *symname) @@ -417,7 +416,7 @@ xfs_vn_symlink( if (unlikely(error)) goto out; - error = xfs_symlink(mnt_userns, XFS_I(dir), &name, symname, mode, &cip); + error = xfs_symlink(idmap, XFS_I(dir), &name, symname, mode, &cip); if (unlikely(error)) goto out; @@ -443,7 +442,7 @@ xfs_vn_symlink( STATIC int xfs_vn_rename( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *odir, struct dentry *odentry, struct inode *ndir, @@ -472,7 +471,7 @@ xfs_vn_rename( if (unlikely(error)) return error; - return xfs_rename(mnt_userns, XFS_I(odir), &oname, + return xfs_rename(idmap, XFS_I(odir), &oname, XFS_I(d_inode(odentry)), XFS_I(ndir), &nname, new_inode ? XFS_I(new_inode) : NULL, flags); } @@ -549,7 +548,7 @@ xfs_stat_blksize( STATIC int xfs_vn_getattr( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, @@ -558,8 +557,8 @@ xfs_vn_getattr( struct inode *inode = d_inode(path->dentry); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - vfsuid_t vfsuid = i_uid_into_vfsuid(mnt_userns, inode); - vfsgid_t vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsuid_t vfsuid = i_uid_into_vfsuid(idmap, inode); + vfsgid_t vfsgid = i_gid_into_vfsgid(idmap, inode); trace_xfs_getattr(ip); @@ -627,7 +626,7 @@ xfs_vn_getattr( static int xfs_vn_change_ok( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { @@ -639,7 +638,7 @@ xfs_vn_change_ok( if (xfs_is_shutdown(mp)) return -EIO; - return setattr_prepare(mnt_userns, dentry, iattr); + return setattr_prepare(idmap, dentry, iattr); } /* @@ -650,7 +649,7 @@ xfs_vn_change_ok( */ static int xfs_setattr_nonsize( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct xfs_inode *ip, struct iattr *iattr) @@ -679,14 +678,14 @@ xfs_setattr_nonsize( uint qflags = 0; if ((mask & ATTR_UID) && XFS_IS_UQUOTA_ON(mp)) { - uid = from_vfsuid(mnt_userns, i_user_ns(inode), + uid = from_vfsuid(idmap, i_user_ns(inode), iattr->ia_vfsuid); qflags |= XFS_QMOPT_UQUOTA; } else { uid = inode->i_uid; } if ((mask & ATTR_GID) && XFS_IS_GQUOTA_ON(mp)) { - gid = from_vfsgid(mnt_userns, i_user_ns(inode), + gid = from_vfsgid(idmap, i_user_ns(inode), iattr->ia_vfsgid); qflags |= XFS_QMOPT_GQUOTA; } else { @@ -719,18 +718,18 @@ xfs_setattr_nonsize( * also. */ if (XFS_IS_UQUOTA_ON(mp) && - i_uid_needs_update(mnt_userns, iattr, inode)) { + i_uid_needs_update(idmap, iattr, inode)) { ASSERT(udqp); old_udqp = xfs_qm_vop_chown(tp, ip, &ip->i_udquot, udqp); } if (XFS_IS_GQUOTA_ON(mp) && - i_gid_needs_update(mnt_userns, iattr, inode)) { + i_gid_needs_update(idmap, iattr, inode)) { ASSERT(xfs_has_pquotino(mp) || !XFS_IS_PQUOTA_ON(mp)); ASSERT(gdqp); old_gdqp = xfs_qm_vop_chown(tp, ip, &ip->i_gdquot, gdqp); } - setattr_copy(mnt_userns, inode, iattr); + setattr_copy(idmap, inode, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); @@ -758,7 +757,7 @@ xfs_setattr_nonsize( * Posix ACL code seems to care about this issue either. */ if (mask & ATTR_MODE) { - error = posix_acl_chmod(mnt_userns, dentry, inode->i_mode); + error = posix_acl_chmod(idmap, dentry, inode->i_mode); if (error) return error; } @@ -779,7 +778,7 @@ out_dqrele: */ STATIC int xfs_setattr_size( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct xfs_inode *ip, struct iattr *iattr) @@ -812,7 +811,7 @@ xfs_setattr_size( * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr); + return xfs_setattr_nonsize(idmap, dentry, ip, iattr); } /* @@ -956,7 +955,7 @@ xfs_setattr_size( } ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); - setattr_copy(mnt_userns, inode, iattr); + setattr_copy(idmap, inode, iattr); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); @@ -977,7 +976,7 @@ out_trans_cancel: int xfs_vn_setattr_size( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { @@ -986,15 +985,15 @@ xfs_vn_setattr_size( trace_xfs_setattr(ip); - error = xfs_vn_change_ok(mnt_userns, dentry, iattr); + error = xfs_vn_change_ok(idmap, dentry, iattr); if (error) return error; - return xfs_setattr_size(mnt_userns, dentry, ip, iattr); + return xfs_setattr_size(idmap, dentry, ip, iattr); } STATIC int xfs_vn_setattr( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *iattr) { @@ -1014,14 +1013,14 @@ xfs_vn_setattr( return error; } - error = xfs_vn_setattr_size(mnt_userns, dentry, iattr); + error = xfs_vn_setattr_size(idmap, dentry, iattr); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); } else { trace_xfs_setattr(ip); - error = xfs_vn_change_ok(mnt_userns, dentry, iattr); + error = xfs_vn_change_ok(idmap, dentry, iattr); if (!error) - error = xfs_setattr_nonsize(mnt_userns, dentry, ip, iattr); + error = xfs_setattr_nonsize(idmap, dentry, ip, iattr); } return error; @@ -1092,12 +1091,12 @@ xfs_vn_fiemap( STATIC int xfs_vn_tmpfile( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct inode *dir, struct file *file, umode_t mode) { - int err = xfs_generic_create(mnt_userns, dir, file->f_path.dentry, mode, 0, file); + int err = xfs_generic_create(idmap, dir, file->f_path.dentry, mode, 0, file); return finish_open_simple(file, err); } diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index e570dcb5df8d..7f84a0843b24 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -13,7 +13,7 @@ extern const struct file_operations xfs_dir_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); -int xfs_vn_setattr_size(struct user_namespace *mnt_userns, +int xfs_vn_setattr_size(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *vap); int xfs_inode_init_security(struct inode *inode, struct inode *dir, diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index a1c2bcf65d37..f225413a993c 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -55,7 +55,7 @@ struct xfs_bstat_chunk { STATIC int xfs_bulkstat_one_int( struct xfs_mount *mp, - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_trans *tp, xfs_ino_t ino, struct xfs_bstat_chunk *bc) @@ -83,8 +83,8 @@ xfs_bulkstat_one_int( ASSERT(ip != NULL); ASSERT(ip->i_imap.im_blkno != 0); inode = VFS_I(ip); - vfsuid = i_uid_into_vfsuid(mnt_userns, inode); - vfsgid = i_gid_into_vfsgid(mnt_userns, inode); + vfsuid = i_uid_into_vfsuid(idmap, inode); + vfsgid = i_gid_into_vfsgid(idmap, inode); /* xfs_iget returns the following without needing * further change. @@ -178,7 +178,7 @@ xfs_bulkstat_one( struct xfs_trans *tp; int error; - if (breq->mnt_userns != &init_user_ns) { + if (breq->idmap != &nop_mnt_idmap) { xfs_warn_ratelimited(breq->mp, "bulkstat not supported inside of idmapped mounts."); return -EINVAL; @@ -199,7 +199,7 @@ xfs_bulkstat_one( if (error) goto out; - error = xfs_bulkstat_one_int(breq->mp, breq->mnt_userns, tp, + error = xfs_bulkstat_one_int(breq->mp, breq->idmap, tp, breq->startino, &bc); xfs_trans_cancel(tp); out: @@ -225,7 +225,7 @@ xfs_bulkstat_iwalk( struct xfs_bstat_chunk *bc = data; int error; - error = xfs_bulkstat_one_int(mp, bc->breq->mnt_userns, tp, ino, data); + error = xfs_bulkstat_one_int(mp, bc->breq->idmap, tp, ino, data); /* bulkstat just skips over missing inodes */ if (error == -ENOENT || error == -EINVAL) return 0; @@ -270,7 +270,7 @@ xfs_bulkstat( unsigned int iwalk_flags = 0; int error; - if (breq->mnt_userns != &init_user_ns) { + if (breq->idmap != &nop_mnt_idmap) { xfs_warn_ratelimited(breq->mp, "bulkstat not supported inside of idmapped mounts."); return -EINVAL; diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index e2d0eba43f35..1659f13f17a8 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -8,7 +8,7 @@ /* In-memory representation of a userspace request for batch inode data. */ struct xfs_ibulk { struct xfs_mount *mp; - struct user_namespace *mnt_userns; + struct mnt_idmap *idmap; void __user *ubuffer; /* user output buffer */ xfs_ino_t startino; /* start with this inode */ unsigned int icount; /* number of elements in ubuffer */ diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 7558486f4937..21be93bf006d 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -275,7 +275,7 @@ xfs_iwalk_ag_start( /* Set up a fresh cursor and empty the inobt cache. */ iwag->nr_recs = 0; - error = xfs_inobt_cur(mp, tp, pag, XFS_BTNUM_INO, curpp, agi_bpp); + error = xfs_inobt_cur(pag, tp, XFS_BTNUM_INO, curpp, agi_bpp); if (error) return error; @@ -390,7 +390,7 @@ xfs_iwalk_run_callbacks( } /* ...and recreate the cursor just past where we left off. */ - error = xfs_inobt_cur(mp, iwag->tp, iwag->pag, XFS_BTNUM_INO, curpp, + error = xfs_inobt_cur(iwag->pag, iwag->tp, XFS_BTNUM_INO, curpp, agi_bpp); if (error) return error; @@ -591,7 +591,7 @@ xfs_iwalk( } if (iwag.pag) - xfs_perag_put(pag); + xfs_perag_rele(pag); xfs_iwalk_free(&iwag); return error; } @@ -683,7 +683,7 @@ xfs_iwalk_threaded( break; } if (pag) - xfs_perag_put(pag); + xfs_perag_rele(pag); if (polled) xfs_pwork_poll(&pctl); return xfs_pwork_destroy(&pctl); @@ -776,7 +776,7 @@ xfs_inobt_walk( } if (iwag.pag) - xfs_perag_put(pag); + xfs_perag_rele(pag); xfs_iwalk_free(&iwag); return error; } diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index f9878021e7d0..e88f18f85e4b 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -34,6 +34,7 @@ typedef __u32 xfs_nlink_t; #include <linux/module.h> #include <linux/mutex.h> #include <linux/file.h> +#include <linux/filelock.h> #include <linux/swap.h> #include <linux/errno.h> #include <linux/sched/signal.h> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 8aca2cc173ac..f3269c0626f0 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -210,8 +210,7 @@ typedef struct xfs_mount { struct xfs_error_cfg m_error_cfg[XFS_ERR_CLASS_MAX][XFS_ERR_ERRNO_MAX]; struct xstats m_stats; /* per-fs stats */ xfs_agnumber_t m_agfrotor; /* last ag where space found */ - xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */ - spinlock_t m_agirotor_lock;/* .. and lock protecting it */ + atomic_t m_agirotor; /* last ag dir inode alloced */ /* Memory shrinker to throttle and reprioritize inodegc */ struct shrinker m_inodegc_shrinker; diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 38d23f0e703a..23d16186e1a3 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -322,7 +322,7 @@ xfs_fs_commit_blocks( xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); - setattr_copy(&init_user_ns, inode, iattr); + setattr_copy(&nop_mnt_idmap, inode, iattr); if (update_isize) { i_size_write(inode, iattr->ia_size); ip->i_disk_size = iattr->ia_size; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index ff53d40a2dae..7dc0db7f5a76 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -68,7 +68,7 @@ restart: while (1) { struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH]; - int error = 0; + int error; int i; mutex_lock(&qi->qi_tree_lock); @@ -787,7 +787,7 @@ xfs_qm_qino_alloc( error = xfs_dialloc(&tp, 0, S_IFREG, &ino); if (!error) - error = xfs_init_new_inode(&init_user_ns, tp, NULL, ino, + error = xfs_init_new_inode(&nop_mnt_idmap, tp, NULL, ino, S_IFREG, 1, 0, 0, false, ipp); if (error) { xfs_trans_cancel(tp); diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 858e3e9eb4a8..48d771a76add 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -252,17 +252,12 @@ static int xfs_trans_log_finish_refcount_update( struct xfs_trans *tp, struct xfs_cud_log_item *cudp, - enum xfs_refcount_intent_type type, - xfs_fsblock_t startblock, - xfs_extlen_t blockcount, - xfs_fsblock_t *new_fsb, - xfs_extlen_t *new_len, + struct xfs_refcount_intent *ri, struct xfs_btree_cur **pcur) { int error; - error = xfs_refcount_finish_one(tp, type, startblock, - blockcount, new_fsb, new_len, pcur); + error = xfs_refcount_finish_one(tp, ri, pcur); /* * Mark the transaction dirty, even on error. This ensures the @@ -297,16 +292,16 @@ xfs_refcount_update_diff_items( /* Set the phys extent flags for this reverse mapping. */ static void xfs_trans_set_refcount_flags( - struct xfs_phys_extent *refc, + struct xfs_phys_extent *pmap, enum xfs_refcount_intent_type type) { - refc->pe_flags = 0; + pmap->pe_flags = 0; switch (type) { case XFS_REFCOUNT_INCREASE: case XFS_REFCOUNT_DECREASE: case XFS_REFCOUNT_ALLOC_COW: case XFS_REFCOUNT_FREE_COW: - refc->pe_flags |= type; + pmap->pe_flags |= type; break; default: ASSERT(0); @@ -318,10 +313,10 @@ STATIC void xfs_refcount_update_log_item( struct xfs_trans *tp, struct xfs_cui_log_item *cuip, - struct xfs_refcount_intent *refc) + struct xfs_refcount_intent *ri) { uint next_extent; - struct xfs_phys_extent *ext; + struct xfs_phys_extent *pmap; tp->t_flags |= XFS_TRANS_DIRTY; set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); @@ -333,10 +328,10 @@ xfs_refcount_update_log_item( */ next_extent = atomic_inc_return(&cuip->cui_next_extent) - 1; ASSERT(next_extent < cuip->cui_format.cui_nextents); - ext = &cuip->cui_format.cui_extents[next_extent]; - ext->pe_startblock = refc->ri_startblock; - ext->pe_len = refc->ri_blockcount; - xfs_trans_set_refcount_flags(ext, refc->ri_type); + pmap = &cuip->cui_format.cui_extents[next_extent]; + pmap->pe_startblock = ri->ri_startblock; + pmap->pe_len = ri->ri_blockcount; + xfs_trans_set_refcount_flags(pmap, ri->ri_type); } static struct xfs_log_item * @@ -348,15 +343,15 @@ xfs_refcount_update_create_intent( { struct xfs_mount *mp = tp->t_mountp; struct xfs_cui_log_item *cuip = xfs_cui_init(mp, count); - struct xfs_refcount_intent *refc; + struct xfs_refcount_intent *ri; ASSERT(count > 0); xfs_trans_add_item(tp, &cuip->cui_item); if (sort) list_sort(mp, items, xfs_refcount_update_diff_items); - list_for_each_entry(refc, items, ri_list) - xfs_refcount_update_log_item(tp, cuip, refc); + list_for_each_entry(ri, items, ri_list) + xfs_refcount_update_log_item(tp, cuip, ri); return &cuip->cui_item; } @@ -378,25 +373,20 @@ xfs_refcount_update_finish_item( struct list_head *item, struct xfs_btree_cur **state) { - struct xfs_refcount_intent *refc; - xfs_fsblock_t new_fsb; - xfs_extlen_t new_aglen; + struct xfs_refcount_intent *ri; int error; - refc = container_of(item, struct xfs_refcount_intent, ri_list); - error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), - refc->ri_type, refc->ri_startblock, refc->ri_blockcount, - &new_fsb, &new_aglen, state); + ri = container_of(item, struct xfs_refcount_intent, ri_list); + error = xfs_trans_log_finish_refcount_update(tp, CUD_ITEM(done), ri, + state); /* Did we run out of reservation? Requeue what we didn't finish. */ - if (!error && new_aglen > 0) { - ASSERT(refc->ri_type == XFS_REFCOUNT_INCREASE || - refc->ri_type == XFS_REFCOUNT_DECREASE); - refc->ri_startblock = new_fsb; - refc->ri_blockcount = new_aglen; + if (!error && ri->ri_blockcount > 0) { + ASSERT(ri->ri_type == XFS_REFCOUNT_INCREASE || + ri->ri_type == XFS_REFCOUNT_DECREASE); return -EAGAIN; } - kmem_cache_free(xfs_refcount_intent_cache, refc); + kmem_cache_free(xfs_refcount_intent_cache, ri); return error; } @@ -413,10 +403,10 @@ STATIC void xfs_refcount_update_cancel_item( struct list_head *item) { - struct xfs_refcount_intent *refc; + struct xfs_refcount_intent *ri; - refc = container_of(item, struct xfs_refcount_intent, ri_list); - kmem_cache_free(xfs_refcount_intent_cache, refc); + ri = container_of(item, struct xfs_refcount_intent, ri_list); + kmem_cache_free(xfs_refcount_intent_cache, ri); } const struct xfs_defer_op_type xfs_refcount_update_defer_type = { @@ -433,15 +423,15 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = { static inline bool xfs_cui_validate_phys( struct xfs_mount *mp, - struct xfs_phys_extent *refc) + struct xfs_phys_extent *pmap) { if (!xfs_has_reflink(mp)) return false; - if (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS) + if (pmap->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS) return false; - switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { + switch (pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { case XFS_REFCOUNT_INCREASE: case XFS_REFCOUNT_DECREASE: case XFS_REFCOUNT_ALLOC_COW: @@ -451,7 +441,7 @@ xfs_cui_validate_phys( return false; } - return xfs_verify_fsbext(mp, refc->pe_startblock, refc->pe_len); + return xfs_verify_fsbext(mp, pmap->pe_startblock, pmap->pe_len); } /* @@ -463,18 +453,13 @@ xfs_cui_item_recover( struct xfs_log_item *lip, struct list_head *capture_list) { - struct xfs_bmbt_irec irec; struct xfs_cui_log_item *cuip = CUI_ITEM(lip); - struct xfs_phys_extent *refc; struct xfs_cud_log_item *cudp; struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; struct xfs_mount *mp = lip->li_log->l_mp; - xfs_fsblock_t new_fsb; - xfs_extlen_t new_len; unsigned int refc_type; bool requeue_only = false; - enum xfs_refcount_intent_type type; int i; int error = 0; @@ -513,14 +498,17 @@ xfs_cui_item_recover( cudp = xfs_trans_get_cud(tp, cuip); for (i = 0; i < cuip->cui_format.cui_nextents; i++) { - refc = &cuip->cui_format.cui_extents[i]; - refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; + struct xfs_refcount_intent fake = { }; + struct xfs_phys_extent *pmap; + + pmap = &cuip->cui_format.cui_extents[i]; + refc_type = pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; switch (refc_type) { case XFS_REFCOUNT_INCREASE: case XFS_REFCOUNT_DECREASE: case XFS_REFCOUNT_ALLOC_COW: case XFS_REFCOUNT_FREE_COW: - type = refc_type; + fake.ri_type = refc_type; break; default: XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, @@ -529,13 +517,12 @@ xfs_cui_item_recover( error = -EFSCORRUPTED; goto abort_error; } - if (requeue_only) { - new_fsb = refc->pe_startblock; - new_len = refc->pe_len; - } else + + fake.ri_startblock = pmap->pe_startblock; + fake.ri_blockcount = pmap->pe_len; + if (!requeue_only) error = xfs_trans_log_finish_refcount_update(tp, cudp, - type, refc->pe_startblock, refc->pe_len, - &new_fsb, &new_len, &rcur); + &fake, &rcur); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, &cuip->cui_format, @@ -544,10 +531,13 @@ xfs_cui_item_recover( goto abort_error; /* Requeue what we didn't finish. */ - if (new_len > 0) { - irec.br_startblock = new_fsb; - irec.br_blockcount = new_len; - switch (type) { + if (fake.ri_blockcount > 0) { + struct xfs_bmbt_irec irec = { + .br_startblock = fake.ri_startblock, + .br_blockcount = fake.ri_blockcount, + }; + + switch (fake.ri_type) { case XFS_REFCOUNT_INCREASE: xfs_refcount_increase_extent(tp, &irec); break; @@ -596,18 +586,18 @@ xfs_cui_item_relog( { struct xfs_cud_log_item *cudp; struct xfs_cui_log_item *cuip; - struct xfs_phys_extent *extp; + struct xfs_phys_extent *pmap; unsigned int count; count = CUI_ITEM(intent)->cui_format.cui_nextents; - extp = CUI_ITEM(intent)->cui_format.cui_extents; + pmap = CUI_ITEM(intent)->cui_format.cui_extents; tp->t_flags |= XFS_TRANS_DIRTY; cudp = xfs_trans_get_cud(tp, CUI_ITEM(intent)); set_bit(XFS_LI_DIRTY, &cudp->cud_item.li_flags); cuip = xfs_cui_init(tp->t_mountp, count); - memcpy(cuip->cui_format.cui_extents, extp, count * sizeof(*extp)); + memcpy(cuip->cui_format.cui_extents, pmap, count * sizeof(*pmap)); atomic_set(&cuip->cui_next_extent, count); xfs_trans_add_item(tp, &cuip->cui_item); set_bit(XFS_LI_DIRTY, &cuip->cui_item.li_flags); diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index fe46bce8cae6..f5dc46ce9803 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -416,8 +416,6 @@ xfs_reflink_fill_cow_hole( goto convert; } - ASSERT(cmap->br_startoff > imap->br_startoff); - /* Allocate the entire reservation as unwritten blocks. */ nimaps = 1; error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount, @@ -612,7 +610,7 @@ xfs_reflink_cancel_cow_blocks( if (error) break; } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { - ASSERT((*tpp)->t_firstblock == NULLFSBLOCK); + ASSERT((*tpp)->t_highest_agno == NULLAGNUMBER); /* Free the CoW orphan record. */ xfs_refcount_free_cow_extent(*tpp, del.br_startblock, @@ -929,7 +927,7 @@ xfs_reflink_recover_cow( for_each_perag(mp, agno, pag) { error = xfs_refcount_recover_cow_leftovers(mp, pag); if (error) { - xfs_perag_put(pag); + xfs_perag_rele(pag); break; } } diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 534504ede1a3..a1619d67015f 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -244,40 +244,40 @@ xfs_trans_get_rud( /* Set the map extent flags for this reverse mapping. */ static void xfs_trans_set_rmap_flags( - struct xfs_map_extent *rmap, + struct xfs_map_extent *map, enum xfs_rmap_intent_type type, int whichfork, xfs_exntst_t state) { - rmap->me_flags = 0; + map->me_flags = 0; if (state == XFS_EXT_UNWRITTEN) - rmap->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; + map->me_flags |= XFS_RMAP_EXTENT_UNWRITTEN; if (whichfork == XFS_ATTR_FORK) - rmap->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; + map->me_flags |= XFS_RMAP_EXTENT_ATTR_FORK; switch (type) { case XFS_RMAP_MAP: - rmap->me_flags |= XFS_RMAP_EXTENT_MAP; + map->me_flags |= XFS_RMAP_EXTENT_MAP; break; case XFS_RMAP_MAP_SHARED: - rmap->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; + map->me_flags |= XFS_RMAP_EXTENT_MAP_SHARED; break; case XFS_RMAP_UNMAP: - rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP; + map->me_flags |= XFS_RMAP_EXTENT_UNMAP; break; case XFS_RMAP_UNMAP_SHARED: - rmap->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; + map->me_flags |= XFS_RMAP_EXTENT_UNMAP_SHARED; break; case XFS_RMAP_CONVERT: - rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT; + map->me_flags |= XFS_RMAP_EXTENT_CONVERT; break; case XFS_RMAP_CONVERT_SHARED: - rmap->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; + map->me_flags |= XFS_RMAP_EXTENT_CONVERT_SHARED; break; case XFS_RMAP_ALLOC: - rmap->me_flags |= XFS_RMAP_EXTENT_ALLOC; + map->me_flags |= XFS_RMAP_EXTENT_ALLOC; break; case XFS_RMAP_FREE: - rmap->me_flags |= XFS_RMAP_EXTENT_FREE; + map->me_flags |= XFS_RMAP_EXTENT_FREE; break; default: ASSERT(0); @@ -293,19 +293,12 @@ static int xfs_trans_log_finish_rmap_update( struct xfs_trans *tp, struct xfs_rud_log_item *rudp, - enum xfs_rmap_intent_type type, - uint64_t owner, - int whichfork, - xfs_fileoff_t startoff, - xfs_fsblock_t startblock, - xfs_filblks_t blockcount, - xfs_exntst_t state, + struct xfs_rmap_intent *ri, struct xfs_btree_cur **pcur) { int error; - error = xfs_rmap_finish_one(tp, type, owner, whichfork, startoff, - startblock, blockcount, state, pcur); + error = xfs_rmap_finish_one(tp, ri, pcur); /* * Mark the transaction dirty, even on error. This ensures the @@ -342,7 +335,7 @@ STATIC void xfs_rmap_update_log_item( struct xfs_trans *tp, struct xfs_rui_log_item *ruip, - struct xfs_rmap_intent *rmap) + struct xfs_rmap_intent *ri) { uint next_extent; struct xfs_map_extent *map; @@ -358,12 +351,12 @@ xfs_rmap_update_log_item( next_extent = atomic_inc_return(&ruip->rui_next_extent) - 1; ASSERT(next_extent < ruip->rui_format.rui_nextents); map = &ruip->rui_format.rui_extents[next_extent]; - map->me_owner = rmap->ri_owner; - map->me_startblock = rmap->ri_bmap.br_startblock; - map->me_startoff = rmap->ri_bmap.br_startoff; - map->me_len = rmap->ri_bmap.br_blockcount; - xfs_trans_set_rmap_flags(map, rmap->ri_type, rmap->ri_whichfork, - rmap->ri_bmap.br_state); + map->me_owner = ri->ri_owner; + map->me_startblock = ri->ri_bmap.br_startblock; + map->me_startoff = ri->ri_bmap.br_startoff; + map->me_len = ri->ri_bmap.br_blockcount; + xfs_trans_set_rmap_flags(map, ri->ri_type, ri->ri_whichfork, + ri->ri_bmap.br_state); } static struct xfs_log_item * @@ -375,15 +368,15 @@ xfs_rmap_update_create_intent( { struct xfs_mount *mp = tp->t_mountp; struct xfs_rui_log_item *ruip = xfs_rui_init(mp, count); - struct xfs_rmap_intent *rmap; + struct xfs_rmap_intent *ri; ASSERT(count > 0); xfs_trans_add_item(tp, &ruip->rui_item); if (sort) list_sort(mp, items, xfs_rmap_update_diff_items); - list_for_each_entry(rmap, items, ri_list) - xfs_rmap_update_log_item(tp, ruip, rmap); + list_for_each_entry(ri, items, ri_list) + xfs_rmap_update_log_item(tp, ruip, ri); return &ruip->rui_item; } @@ -405,16 +398,14 @@ xfs_rmap_update_finish_item( struct list_head *item, struct xfs_btree_cur **state) { - struct xfs_rmap_intent *rmap; + struct xfs_rmap_intent *ri; int error; - rmap = container_of(item, struct xfs_rmap_intent, ri_list); - error = xfs_trans_log_finish_rmap_update(tp, RUD_ITEM(done), - rmap->ri_type, rmap->ri_owner, rmap->ri_whichfork, - rmap->ri_bmap.br_startoff, rmap->ri_bmap.br_startblock, - rmap->ri_bmap.br_blockcount, rmap->ri_bmap.br_state, + ri = container_of(item, struct xfs_rmap_intent, ri_list); + + error = xfs_trans_log_finish_rmap_update(tp, RUD_ITEM(done), ri, state); - kmem_cache_free(xfs_rmap_intent_cache, rmap); + kmem_cache_free(xfs_rmap_intent_cache, ri); return error; } @@ -431,10 +422,10 @@ STATIC void xfs_rmap_update_cancel_item( struct list_head *item) { - struct xfs_rmap_intent *rmap; + struct xfs_rmap_intent *ri; - rmap = container_of(item, struct xfs_rmap_intent, ri_list); - kmem_cache_free(xfs_rmap_intent_cache, rmap); + ri = container_of(item, struct xfs_rmap_intent, ri_list); + kmem_cache_free(xfs_rmap_intent_cache, ri); } const struct xfs_defer_op_type xfs_rmap_update_defer_type = { @@ -451,15 +442,15 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = { static inline bool xfs_rui_validate_map( struct xfs_mount *mp, - struct xfs_map_extent *rmap) + struct xfs_map_extent *map) { if (!xfs_has_rmapbt(mp)) return false; - if (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS) + if (map->me_flags & ~XFS_RMAP_EXTENT_FLAGS) return false; - switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { + switch (map->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { case XFS_RMAP_EXTENT_MAP: case XFS_RMAP_EXTENT_MAP_SHARED: case XFS_RMAP_EXTENT_UNMAP: @@ -473,14 +464,14 @@ xfs_rui_validate_map( return false; } - if (!XFS_RMAP_NON_INODE_OWNER(rmap->me_owner) && - !xfs_verify_ino(mp, rmap->me_owner)) + if (!XFS_RMAP_NON_INODE_OWNER(map->me_owner) && + !xfs_verify_ino(mp, map->me_owner)) return false; - if (!xfs_verify_fileext(mp, rmap->me_startoff, rmap->me_len)) + if (!xfs_verify_fileext(mp, map->me_startoff, map->me_len)) return false; - return xfs_verify_fsbext(mp, rmap->me_startblock, rmap->me_len); + return xfs_verify_fsbext(mp, map->me_startblock, map->me_len); } /* @@ -493,15 +484,11 @@ xfs_rui_item_recover( struct list_head *capture_list) { struct xfs_rui_log_item *ruip = RUI_ITEM(lip); - struct xfs_map_extent *rmap; struct xfs_rud_log_item *rudp; struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; struct xfs_mount *mp = lip->li_log->l_mp; - enum xfs_rmap_intent_type type; - xfs_exntst_t state; int i; - int whichfork; int error = 0; /* @@ -526,35 +513,34 @@ xfs_rui_item_recover( rudp = xfs_trans_get_rud(tp, ruip); for (i = 0; i < ruip->rui_format.rui_nextents; i++) { - rmap = &ruip->rui_format.rui_extents[i]; - state = (rmap->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ? - XFS_EXT_UNWRITTEN : XFS_EXT_NORM; - whichfork = (rmap->me_flags & XFS_RMAP_EXTENT_ATTR_FORK) ? - XFS_ATTR_FORK : XFS_DATA_FORK; - switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { + struct xfs_rmap_intent fake = { }; + struct xfs_map_extent *map; + + map = &ruip->rui_format.rui_extents[i]; + switch (map->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { case XFS_RMAP_EXTENT_MAP: - type = XFS_RMAP_MAP; + fake.ri_type = XFS_RMAP_MAP; break; case XFS_RMAP_EXTENT_MAP_SHARED: - type = XFS_RMAP_MAP_SHARED; + fake.ri_type = XFS_RMAP_MAP_SHARED; break; case XFS_RMAP_EXTENT_UNMAP: - type = XFS_RMAP_UNMAP; + fake.ri_type = XFS_RMAP_UNMAP; break; case XFS_RMAP_EXTENT_UNMAP_SHARED: - type = XFS_RMAP_UNMAP_SHARED; + fake.ri_type = XFS_RMAP_UNMAP_SHARED; break; case XFS_RMAP_EXTENT_CONVERT: - type = XFS_RMAP_CONVERT; + fake.ri_type = XFS_RMAP_CONVERT; break; case XFS_RMAP_EXTENT_CONVERT_SHARED: - type = XFS_RMAP_CONVERT_SHARED; + fake.ri_type = XFS_RMAP_CONVERT_SHARED; break; case XFS_RMAP_EXTENT_ALLOC: - type = XFS_RMAP_ALLOC; + fake.ri_type = XFS_RMAP_ALLOC; break; case XFS_RMAP_EXTENT_FREE: - type = XFS_RMAP_FREE; + fake.ri_type = XFS_RMAP_FREE; break; default: XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, @@ -563,13 +549,21 @@ xfs_rui_item_recover( error = -EFSCORRUPTED; goto abort_error; } - error = xfs_trans_log_finish_rmap_update(tp, rudp, type, - rmap->me_owner, whichfork, - rmap->me_startoff, rmap->me_startblock, - rmap->me_len, state, &rcur); + + fake.ri_owner = map->me_owner; + fake.ri_whichfork = (map->me_flags & XFS_RMAP_EXTENT_ATTR_FORK) ? + XFS_ATTR_FORK : XFS_DATA_FORK; + fake.ri_bmap.br_startblock = map->me_startblock; + fake.ri_bmap.br_startoff = map->me_startoff; + fake.ri_bmap.br_blockcount = map->me_len; + fake.ri_bmap.br_state = (map->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ? + XFS_EXT_UNWRITTEN : XFS_EXT_NORM; + + error = xfs_trans_log_finish_rmap_update(tp, rudp, &fake, + &rcur); if (error == -EFSCORRUPTED) XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, - rmap, sizeof(*rmap)); + map, sizeof(*map)); if (error) goto abort_error; @@ -600,18 +594,18 @@ xfs_rui_item_relog( { struct xfs_rud_log_item *rudp; struct xfs_rui_log_item *ruip; - struct xfs_map_extent *extp; + struct xfs_map_extent *map; unsigned int count; count = RUI_ITEM(intent)->rui_format.rui_nextents; - extp = RUI_ITEM(intent)->rui_format.rui_extents; + map = RUI_ITEM(intent)->rui_format.rui_extents; tp->t_flags |= XFS_TRANS_DIRTY; rudp = xfs_trans_get_rud(tp, RUI_ITEM(intent)); set_bit(XFS_LI_DIRTY, &rudp->rud_item.li_flags); ruip = xfs_rui_init(tp->t_mountp, count); - memcpy(ruip->rui_format.rui_extents, extp, count * sizeof(*extp)); + memcpy(ruip->rui_format.rui_extents, map, count * sizeof(*map)); atomic_set(&ruip->rui_next_extent, count); xfs_trans_add_item(tp, &ruip->rui_item); set_bit(XFS_LI_DIRTY, &ruip->rui_item.li_flags); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 0c4b73e9b29d..2479b5cbd75e 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -247,6 +247,32 @@ xfs_fs_show_options( return 0; } +static bool +xfs_set_inode_alloc_perag( + struct xfs_perag *pag, + xfs_ino_t ino, + xfs_agnumber_t max_metadata) +{ + if (!xfs_is_inode32(pag->pag_mount)) { + set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); + clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); + return false; + } + + if (ino > XFS_MAXINUMBER_32) { + clear_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); + clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); + return false; + } + + set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate); + if (pag->pag_agno < max_metadata) + set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); + else + clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate); + return true; +} + /* * Set parameters for inode allocation heuristics, taking into account * filesystem size and inode32/inode64 mount options; i.e. specifically @@ -310,24 +336,8 @@ xfs_set_inode_alloc( ino = XFS_AGINO_TO_INO(mp, index, agino); pag = xfs_perag_get(mp, index); - - if (xfs_is_inode32(mp)) { - if (ino > XFS_MAXINUMBER_32) { - pag->pagi_inodeok = 0; - pag->pagf_metadata = 0; - } else { - pag->pagi_inodeok = 1; - maxagi++; - if (index < max_metadata) - pag->pagf_metadata = 1; - else - pag->pagf_metadata = 0; - } - } else { - pag->pagi_inodeok = 1; - pag->pagf_metadata = 0; - } - + if (xfs_set_inode_alloc_perag(pag, ino, max_metadata)) + maxagi++; xfs_perag_put(pag); } @@ -1922,7 +1932,6 @@ static int xfs_init_fs_context( return -ENOMEM; spin_lock_init(&mp->m_sb_lock); - spin_lock_init(&mp->m_agirotor_lock); INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC); spin_lock_init(&mp->m_perag_lock); mutex_init(&mp->m_growlock); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 8389f3ef88ef..85e433df6a3f 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -144,7 +144,7 @@ xfs_readlink( int xfs_symlink( - struct user_namespace *mnt_userns, + struct mnt_idmap *idmap, struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, @@ -193,8 +193,8 @@ xfs_symlink( /* * Make sure that we have allocated dquot(s) on disk. */ - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), - mapped_fsgid(mnt_userns, &init_user_ns), prid, + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), + mapped_fsgid(idmap, &init_user_ns), prid, XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, &udqp, &gdqp, &pdqp); if (error) @@ -231,7 +231,7 @@ xfs_symlink( */ error = xfs_dialloc(&tp, dp->i_ino, S_IFLNK, &ino); if (!error) - error = xfs_init_new_inode(mnt_userns, tp, dp, ino, + error = xfs_init_new_inode(idmap, tp, dp, ino, S_IFLNK | (mode & ~S_IFMT), 1, 0, prid, false, &ip); if (error) diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index 2586b7e393f3..d1ca1ce62a93 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h @@ -7,7 +7,7 @@ /* Kernel only symlink definitions */ -int xfs_symlink(struct user_namespace *mnt_userns, struct xfs_inode *dp, +int xfs_symlink(struct mnt_idmap *idmap, struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, umode_t mode, struct xfs_inode **ipp); int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link); diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c index f7faf6e70d7f..a3c6b1548723 100644 --- a/fs/xfs/xfs_sysfs.c +++ b/fs/xfs/xfs_sysfs.c @@ -69,7 +69,7 @@ static struct attribute *xfs_mp_attrs[] = { }; ATTRIBUTE_GROUPS(xfs_mp); -struct kobj_type xfs_mp_ktype = { +const struct kobj_type xfs_mp_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, .default_groups = xfs_mp_groups, @@ -266,7 +266,7 @@ static struct attribute *xfs_dbg_attrs[] = { }; ATTRIBUTE_GROUPS(xfs_dbg); -struct kobj_type xfs_dbg_ktype = { +const struct kobj_type xfs_dbg_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, .default_groups = xfs_dbg_groups, @@ -324,7 +324,7 @@ static struct attribute *xfs_stats_attrs[] = { }; ATTRIBUTE_GROUPS(xfs_stats); -struct kobj_type xfs_stats_ktype = { +const struct kobj_type xfs_stats_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, .default_groups = xfs_stats_groups, @@ -410,7 +410,7 @@ static struct attribute *xfs_log_attrs[] = { }; ATTRIBUTE_GROUPS(xfs_log); -struct kobj_type xfs_log_ktype = { +const struct kobj_type xfs_log_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, .default_groups = xfs_log_groups, @@ -564,13 +564,13 @@ static struct attribute *xfs_error_attrs[] = { }; ATTRIBUTE_GROUPS(xfs_error); -static struct kobj_type xfs_error_cfg_ktype = { +static const struct kobj_type xfs_error_cfg_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, .default_groups = xfs_error_groups, }; -static struct kobj_type xfs_error_ktype = { +static const struct kobj_type xfs_error_ktype = { .release = xfs_sysfs_release, .sysfs_ops = &xfs_sysfs_ops, }; diff --git a/fs/xfs/xfs_sysfs.h b/fs/xfs/xfs_sysfs.h index 513095e353a5..148893ebfdef 100644 --- a/fs/xfs/xfs_sysfs.h +++ b/fs/xfs/xfs_sysfs.h @@ -7,10 +7,10 @@ #ifndef __XFS_SYSFS_H__ #define __XFS_SYSFS_H__ -extern struct kobj_type xfs_mp_ktype; /* xfs_mount */ -extern struct kobj_type xfs_dbg_ktype; /* debug */ -extern struct kobj_type xfs_log_ktype; /* xlog */ -extern struct kobj_type xfs_stats_ktype; /* stats */ +extern const struct kobj_type xfs_mp_ktype; /* xfs_mount */ +extern const struct kobj_type xfs_dbg_ktype; /* debug */ +extern const struct kobj_type xfs_log_ktype; /* xlog */ +extern const struct kobj_type xfs_stats_ktype; /* stats */ static inline struct xfs_kobj * to_kobj(struct kobject *kobject) @@ -28,7 +28,7 @@ xfs_sysfs_release(struct kobject *kobject) static inline int xfs_sysfs_init( struct xfs_kobj *kobj, - struct kobj_type *ktype, + const struct kobj_type *ktype, struct xfs_kobj *parent_kobj, const char *name) { diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 421d1e504ac4..7dc0fd6a6504 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -74,6 +74,7 @@ struct xfs_inobt_rec_incore; union xfs_btree_ptr; struct xfs_dqtrx; struct xfs_icwalk; +struct xfs_perag; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ @@ -159,36 +160,40 @@ TRACE_EVENT(xlog_intent_recovery_failed, ); DECLARE_EVENT_CLASS(xfs_perag_class, - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, - unsigned long caller_ip), - TP_ARGS(mp, agno, refcount, caller_ip), + TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), + TP_ARGS(pag, caller_ip), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(int, refcount) + __field(int, active_refcount) __field(unsigned long, caller_ip) ), TP_fast_assign( - __entry->dev = mp->m_super->s_dev; - __entry->agno = agno; - __entry->refcount = refcount; + __entry->dev = pag->pag_mount->m_super->s_dev; + __entry->agno = pag->pag_agno; + __entry->refcount = atomic_read(&pag->pag_ref); + __entry->active_refcount = atomic_read(&pag->pag_active_ref); __entry->caller_ip = caller_ip; ), - TP_printk("dev %d:%d agno 0x%x refcount %d caller %pS", + TP_printk("dev %d:%d agno 0x%x passive refs %d active refs %d caller %pS", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->refcount, + __entry->active_refcount, (char *)__entry->caller_ip) ); #define DEFINE_PERAG_REF_EVENT(name) \ DEFINE_EVENT(xfs_perag_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, \ - unsigned long caller_ip), \ - TP_ARGS(mp, agno, refcount, caller_ip)) + TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \ + TP_ARGS(pag, caller_ip)) DEFINE_PERAG_REF_EVENT(xfs_perag_get); DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_put); +DEFINE_PERAG_REF_EVENT(xfs_perag_grab); +DEFINE_PERAG_REF_EVENT(xfs_perag_grab_tag); +DEFINE_PERAG_REF_EVENT(xfs_perag_rele); DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag); DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag); @@ -634,8 +639,8 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release); DEFINE_BUF_ITEM_EVENT(xfs_trans_binval); DECLARE_EVENT_CLASS(xfs_filestream_class, - TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), - TP_ARGS(mp, ino, agno), + TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino), + TP_ARGS(pag, ino), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) @@ -643,10 +648,10 @@ DECLARE_EVENT_CLASS(xfs_filestream_class, __field(int, streams) ), TP_fast_assign( - __entry->dev = mp->m_super->s_dev; + __entry->dev = pag->pag_mount->m_super->s_dev; __entry->ino = ino; - __entry->agno = agno; - __entry->streams = xfs_filestream_peek_ag(mp, agno); + __entry->agno = pag->pag_agno; + __entry->streams = atomic_read(&pag->pagf_fstrms); ), TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d", MAJOR(__entry->dev), MINOR(__entry->dev), @@ -656,39 +661,40 @@ DECLARE_EVENT_CLASS(xfs_filestream_class, ) #define DEFINE_FILESTREAM_EVENT(name) \ DEFINE_EVENT(xfs_filestream_class, name, \ - TP_PROTO(struct xfs_mount *mp, xfs_ino_t ino, xfs_agnumber_t agno), \ - TP_ARGS(mp, ino, agno)) + TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino), \ + TP_ARGS(pag, ino)) DEFINE_FILESTREAM_EVENT(xfs_filestream_free); DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); TRACE_EVENT(xfs_filestream_pick, - TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno, - xfs_extlen_t free, int nscan), - TP_ARGS(ip, agno, free, nscan), + TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free), + TP_ARGS(pag, ino, free), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_ino_t, ino) __field(xfs_agnumber_t, agno) __field(int, streams) __field(xfs_extlen_t, free) - __field(int, nscan) ), TP_fast_assign( - __entry->dev = VFS_I(ip)->i_sb->s_dev; - __entry->ino = ip->i_ino; - __entry->agno = agno; - __entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); + __entry->dev = pag->pag_mount->m_super->s_dev; + __entry->ino = ino; + if (pag) { + __entry->agno = pag->pag_agno; + __entry->streams = atomic_read(&pag->pagf_fstrms); + } else { + __entry->agno = NULLAGNUMBER; + __entry->streams = 0; + } __entry->free = free; - __entry->nscan = nscan; ), - TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d nscan %d", + TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino, __entry->agno, __entry->streams, - __entry->free, - __entry->nscan) + __entry->free) ); DECLARE_EVENT_CLASS(xfs_lock_class, @@ -1795,13 +1801,11 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, __field(xfs_extlen_t, alignment) __field(xfs_extlen_t, minalignslop) __field(xfs_extlen_t, len) - __field(short, type) - __field(short, otype) __field(char, wasdel) __field(char, wasfromfl) __field(int, resv) __field(int, datatype) - __field(xfs_fsblock_t, firstblock) + __field(xfs_agnumber_t, highest_agno) ), TP_fast_assign( __entry->dev = args->mp->m_super->s_dev; @@ -1816,18 +1820,16 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, __entry->alignment = args->alignment; __entry->minalignslop = args->minalignslop; __entry->len = args->len; - __entry->type = args->type; - __entry->otype = args->otype; __entry->wasdel = args->wasdel; __entry->wasfromfl = args->wasfromfl; __entry->resv = args->resv; __entry->datatype = args->datatype; - __entry->firstblock = args->tp->t_firstblock; + __entry->highest_agno = args->tp->t_highest_agno; ), TP_printk("dev %d:%d agno 0x%x agbno 0x%x minlen %u maxlen %u mod %u " "prod %u minleft %u total %u alignment %u minalignslop %u " - "len %u type %s otype %s wasdel %d wasfromfl %d resv %d " - "datatype 0x%x firstblock 0x%llx", + "len %u wasdel %d wasfromfl %d resv %d " + "datatype 0x%x highest_agno 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno, __entry->agbno, @@ -1840,13 +1842,11 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, __entry->alignment, __entry->minalignslop, __entry->len, - __print_symbolic(__entry->type, XFS_ALLOC_TYPES), - __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), __entry->wasdel, __entry->wasfromfl, __entry->resv, __entry->datatype, - (unsigned long long)__entry->firstblock) + __entry->highest_agno) ) #define DEFINE_ALLOC_EVENT(name) \ @@ -1877,6 +1877,7 @@ DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); DEFINE_ALLOC_EVENT(xfs_alloc_small_done); DEFINE_ALLOC_EVENT(xfs_alloc_small_error); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_badargs); +DEFINE_ALLOC_EVENT(xfs_alloc_vextent_skip_deadlock); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_nofix); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp); DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed); @@ -3207,17 +3208,14 @@ DEFINE_REFCOUNT_DEFERRED_EVENT(xfs_refcount_deferred); TRACE_EVENT(xfs_refcount_finish_one_leftover, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, - int type, xfs_agblock_t agbno, xfs_extlen_t len, - xfs_agblock_t new_agbno, xfs_extlen_t new_len), - TP_ARGS(mp, agno, type, agbno, len, new_agbno, new_len), + int type, xfs_agblock_t agbno, xfs_extlen_t len), + TP_ARGS(mp, agno, type, agbno, len), TP_STRUCT__entry( __field(dev_t, dev) __field(xfs_agnumber_t, agno) __field(int, type) __field(xfs_agblock_t, agbno) __field(xfs_extlen_t, len) - __field(xfs_agblock_t, new_agbno) - __field(xfs_extlen_t, new_len) ), TP_fast_assign( __entry->dev = mp->m_super->s_dev; @@ -3225,17 +3223,13 @@ TRACE_EVENT(xfs_refcount_finish_one_leftover, __entry->type = type; __entry->agbno = agbno; __entry->len = len; - __entry->new_agbno = new_agbno; - __entry->new_len = new_len; ), - TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x new_agbno 0x%x new_fsbcount 0x%x", + TP_printk("dev %d:%d type %d agno 0x%x agbno 0x%x fsbcount 0x%x", MAJOR(__entry->dev), MINOR(__entry->dev), __entry->type, __entry->agno, __entry->agbno, - __entry->len, - __entry->new_agbno, - __entry->new_len) + __entry->len) ); /* simple inode-based error/%ip tracepoint class */ diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 7bd16fbff534..8afc0c080861 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -102,7 +102,7 @@ xfs_trans_dup( INIT_LIST_HEAD(&ntp->t_items); INIT_LIST_HEAD(&ntp->t_busy); INIT_LIST_HEAD(&ntp->t_dfops); - ntp->t_firstblock = NULLFSBLOCK; + ntp->t_highest_agno = NULLAGNUMBER; ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); ASSERT(tp->t_ticket != NULL); @@ -278,7 +278,7 @@ retry: INIT_LIST_HEAD(&tp->t_items); INIT_LIST_HEAD(&tp->t_busy); INIT_LIST_HEAD(&tp->t_dfops); - tp->t_firstblock = NULLFSBLOCK; + tp->t_highest_agno = NULLAGNUMBER; error = xfs_trans_reserve(tp, resp, blocks, rtextents); if (error == -ENOSPC && want_retry) { @@ -1078,10 +1078,10 @@ xfs_trans_cancel( /* * It's never valid to cancel a transaction with deferred ops attached, * because the transaction is effectively dirty. Complain about this - * loudly before freeing the in-memory defer items. + * loudly before freeing the in-memory defer items and shutting down the + * filesystem. */ if (!list_empty(&tp->t_dfops)) { - ASSERT(xfs_is_shutdown(mp) || list_empty(&tp->t_dfops)); ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); dirty = true; xfs_defer_cancel(tp); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 55819785941c..6e3646d524ce 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -132,7 +132,7 @@ typedef struct xfs_trans { unsigned int t_rtx_res; /* # of rt extents resvd */ unsigned int t_rtx_res_used; /* # of resvd rt extents used */ unsigned int t_flags; /* misc flags */ - xfs_fsblock_t t_firstblock; /* first block allocated */ + xfs_agnumber_t t_highest_agno; /* highest AGF locked */ struct xlog_ticket *t_ticket; /* log mgr ticket */ struct xfs_mount *t_mountp; /* ptr to fs mount struct */ struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index 10aa1fd39d2b..7b9a0ed1b11f 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -133,7 +133,7 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused, static int xfs_xattr_set(const struct xattr_handler *handler, - struct user_namespace *mnt_userns, struct dentry *unused, + struct mnt_idmap *idmap, struct dentry *unused, struct inode *inode, const char *name, const void *value, size_t size, int flags) { |