diff options
Diffstat (limited to 'fs/xfs/scrub/btree.c')
-rw-r--r-- | fs/xfs/scrub/btree.c | 203 |
1 files changed, 123 insertions, 80 deletions
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index f52a7b8256f9..2f4519590dc1 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -9,6 +9,7 @@ #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" +#include "xfs_inode.h" #include "xfs_btree.h" #include "scrub/scrub.h" #include "scrub/common.h" @@ -43,7 +44,7 @@ __xchk_btree_process_error( /* Note the badness but don't abort. */ sc->sm->sm_flags |= errflag; *error = 0; - /* fall through */ + fallthrough; default: if (cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) trace_xchk_ifork_btree_op_error(sc, cur, level, @@ -135,14 +136,14 @@ xchk_btree_rec( struct xfs_buf *bp; block = xfs_btree_get_block(cur, 0, &bp); - rec = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); + rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block); trace_xchk_btree_rec(bs->sc, cur, 0); /* If this isn't the first record, are they in order? */ - if (!bs->firstrec && !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) + if (cur->bc_levels[0].ptr > 1 && + !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) xchk_btree_set_corrupt(bs->sc, cur, 0); - bs->firstrec = false; memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len); if (cur->bc_nlevels == 1) @@ -151,7 +152,7 @@ xchk_btree_rec( /* Is this at least as large as the parent low key? */ cur->bc_ops->init_key_from_rec(&key, rec); keyblock = xfs_btree_get_block(cur, 1, &bp); - keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[1], keyblock); + keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, &key, keyp) < 0) xchk_btree_set_corrupt(bs->sc, cur, 1); @@ -160,7 +161,7 @@ xchk_btree_rec( /* Is this no larger than the parent high key? */ cur->bc_ops->init_high_key_from_rec(&hkey, rec); - keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[1], keyblock); + keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, keyp, &hkey) < 0) xchk_btree_set_corrupt(bs->sc, cur, 1); } @@ -182,23 +183,22 @@ xchk_btree_key( struct xfs_buf *bp; block = xfs_btree_get_block(cur, level, &bp); - key = xfs_btree_key_addr(cur, cur->bc_ptrs[level], block); + key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block); trace_xchk_btree_key(bs->sc, cur, level); /* If this isn't the first key, are they in order? */ - if (!bs->firstkey[level] && - !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level], key)) + if (cur->bc_levels[level].ptr > 1 && + !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1], key)) xchk_btree_set_corrupt(bs->sc, cur, level); - bs->firstkey[level] = false; - memcpy(&bs->lastkey[level], key, cur->bc_ops->key_len); + memcpy(&bs->lastkey[level - 1], key, cur->bc_ops->key_len); if (level + 1 >= cur->bc_nlevels) return; /* Is this at least as large as the parent low key? */ keyblock = xfs_btree_get_block(cur, level + 1, &bp); - keyp = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); + keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock); if (cur->bc_ops->diff_two_keys(cur, key, keyp) < 0) xchk_btree_set_corrupt(bs->sc, cur, level); @@ -206,8 +206,9 @@ xchk_btree_key( return; /* Is this no larger than the parent high key? */ - key = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level], block); - keyp = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], keyblock); + key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block); + keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr, + keyblock); if (cur->bc_ops->diff_two_keys(cur, keyp, key) < 0) xchk_btree_set_corrupt(bs->sc, cur, level); } @@ -290,7 +291,7 @@ xchk_btree_block_check_sibling( /* Compare upper level pointer to sibling pointer. */ pblock = xfs_btree_get_block(ncur, level + 1, &pbp); - pp = xfs_btree_ptr_addr(ncur, ncur->bc_ptrs[level + 1], pblock); + pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock); if (!xchk_btree_ptr_ok(bs, level + 1, pp)) goto out; if (pbp) @@ -373,10 +374,10 @@ xchk_btree_check_block_owner( init_sa = bs->cur->bc_flags & XFS_BTREE_LONG_PTRS; if (init_sa) { - error = xchk_ag_init(bs->sc, agno, &bs->sc->sa); + error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa); if (!xchk_btree_xref_process_error(bs->sc, bs->cur, level, &error)) - return error; + goto out_free; } xchk_xref_is_used_space(bs->sc, agbno, 1); @@ -392,6 +393,7 @@ xchk_btree_check_block_owner( if (!bs->sc->sa.rmap_cur && btnum == XFS_BTNUM_RMAP) bs->cur = NULL; +out_free: if (init_sa) xchk_ag_free(bs->sc, &bs->sc->sa); @@ -434,12 +436,36 @@ xchk_btree_check_owner( if (!co) return -ENOMEM; co->level = level; - co->daddr = XFS_BUF_ADDR(bp); + co->daddr = xfs_buf_daddr(bp); list_add_tail(&co->list, &bs->to_check); return 0; } - return xchk_btree_check_block_owner(bs, level, XFS_BUF_ADDR(bp)); + return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp)); +} + +/* Decide if we want to check minrecs of a btree block in the inode root. */ +static inline bool +xchk_btree_check_iroot_minrecs( + struct xchk_btree *bs) +{ + /* + * xfs_bmap_add_attrfork_btree had an implementation bug wherein it + * would miscalculate the space required for the data fork bmbt root + * when adding an attr fork, and promote the iroot contents to an + * external block unnecessarily. This went unnoticed for many years + * until scrub found filesystems in this state. Inode rooted btrees are + * not supposed to have immediate child blocks that are small enough + * that the contents could fit in the inode root, but we can't fail + * existing filesystems, so instead we disable the check for data fork + * bmap btrees when there's an attr fork. + */ + if (bs->cur->bc_btnum == XFS_BTNUM_BMAP && + bs->cur->bc_ino.whichfork == XFS_DATA_FORK && + xfs_inode_has_attr_fork(bs->sc->ip)) + return false; + + return true; } /* @@ -452,32 +478,42 @@ xchk_btree_check_minrecs( int level, struct xfs_btree_block *block) { - unsigned int numrecs; - int ok_level; - - numrecs = be16_to_cpu(block->bb_numrecs); + struct xfs_btree_cur *cur = bs->cur; + unsigned int root_level = cur->bc_nlevels - 1; + unsigned int numrecs = be16_to_cpu(block->bb_numrecs); /* More records than minrecs means the block is ok. */ - if (numrecs >= bs->cur->bc_ops->get_minrecs(bs->cur, level)) + if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) return; /* - * Certain btree blocks /can/ have fewer than minrecs records. Any - * level greater than or equal to the level of the highest dedicated - * btree block are allowed to violate this constraint. - * - * For a btree rooted in a block, the btree root can have fewer than - * minrecs records. If the btree is rooted in an inode and does not - * store records in the root, the direct children of the root and the - * root itself can have fewer than minrecs records. + * For btrees rooted in the inode, it's possible that the root block + * contents spilled into a regular ondisk block because there wasn't + * enough space in the inode root. The number of records in that + * child block might be less than the standard minrecs, but that's ok + * provided that there's only one direct child of the root. */ - ok_level = bs->cur->bc_nlevels - 1; - if (bs->cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) - ok_level--; - if (level >= ok_level) + if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && + level == cur->bc_nlevels - 2) { + struct xfs_btree_block *root_block; + struct xfs_buf *root_bp; + int root_maxrecs; + + root_block = xfs_btree_get_block(cur, root_level, &root_bp); + root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level); + if (xchk_btree_check_iroot_minrecs(bs) && + (be16_to_cpu(root_block->bb_numrecs) != 1 || + numrecs <= root_maxrecs)) + xchk_btree_set_corrupt(bs->sc, cur, level); return; + } - xchk_btree_set_corrupt(bs->sc, bs->cur, level); + /* + * Otherwise, only the root level is allowed to have fewer than minrecs + * records or keyptrs. + */ + if (level < root_level) + xchk_btree_set_corrupt(bs->sc, cur, level); } /* @@ -560,7 +596,7 @@ xchk_btree_block_keys( /* Obtain the parent's copy of the keys for this block. */ parent_block = xfs_btree_get_block(cur, level + 1, &bp); - parent_keys = xfs_btree_key_addr(cur, cur->bc_ptrs[level + 1], + parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, parent_block); if (cur->bc_ops->diff_two_keys(cur, &block_keys, parent_keys) != 0) @@ -571,7 +607,7 @@ xchk_btree_block_keys( /* Get high keys */ high_bk = xfs_btree_high_key_from_key(cur, &block_keys); - high_pk = xfs_btree_high_key_addr(cur, cur->bc_ptrs[level + 1], + high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr, parent_block); if (cur->bc_ops->diff_two_keys(cur, high_bk, high_pk) != 0) @@ -591,35 +627,39 @@ xchk_btree( const struct xfs_owner_info *oinfo, void *private) { - struct xchk_btree bs = { - .cur = cur, - .scrub_rec = scrub_fn, - .oinfo = oinfo, - .firstrec = true, - .private = private, - .sc = sc, - }; union xfs_btree_ptr ptr; + struct xchk_btree *bs; union xfs_btree_ptr *pp; union xfs_btree_rec *recp; struct xfs_btree_block *block; - int level; struct xfs_buf *bp; struct check_owner *co; struct check_owner *n; - int i; + size_t cur_sz; + int level; int error = 0; - /* Initialize scrub state */ - for (i = 0; i < XFS_BTREE_MAXLEVELS; i++) - bs.firstkey[i] = true; - INIT_LIST_HEAD(&bs.to_check); - - /* Don't try to check a tree with a height we can't handle. */ - if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) { + /* + * Allocate the btree scrub context from the heap, because this + * structure can get rather large. Don't let a caller feed us a + * totally absurd size. + */ + cur_sz = xchk_btree_sizeof(cur->bc_nlevels); + if (cur_sz > PAGE_SIZE) { xchk_btree_set_corrupt(sc, cur, 0); - goto out; + return 0; } + bs = kmem_zalloc(cur_sz, KM_NOFS | KM_MAYFAIL); + if (!bs) + return -ENOMEM; + bs->cur = cur; + bs->scrub_rec = scrub_fn; + bs->oinfo = oinfo; + bs->private = private; + bs->sc = sc; + + /* Initialize scrub state */ + INIT_LIST_HEAD(&bs->to_check); /* * Load the root of the btree. The helper function absorbs @@ -627,79 +667,82 @@ xchk_btree( */ level = cur->bc_nlevels - 1; cur->bc_ops->init_ptr_from_cur(cur, &ptr); - if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr)) + if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr)) goto out; - error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp); + error = xchk_btree_get_block(bs, level, &ptr, &block, &bp); if (error || !block) goto out; - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; while (level < cur->bc_nlevels) { block = xfs_btree_get_block(cur, level, &bp); if (level == 0) { /* End of leaf, pop back towards the root. */ - if (cur->bc_ptrs[level] > + if (cur->bc_levels[level].ptr > be16_to_cpu(block->bb_numrecs)) { - xchk_btree_block_keys(&bs, level, block); + xchk_btree_block_keys(bs, level, block); if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } /* Records in order for scrub? */ - xchk_btree_rec(&bs); + xchk_btree_rec(bs); /* Call out to the record checker. */ - recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block); - error = bs.scrub_rec(&bs, recp); + recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, + block); + error = bs->scrub_rec(bs, recp); if (error) break; if (xchk_should_terminate(sc, &error) || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) break; - cur->bc_ptrs[level]++; + cur->bc_levels[level].ptr++; continue; } /* End of node, pop back towards the root. */ - if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) { - xchk_btree_block_keys(&bs, level, block); + if (cur->bc_levels[level].ptr > + be16_to_cpu(block->bb_numrecs)) { + xchk_btree_block_keys(bs, level, block); if (level < cur->bc_nlevels - 1) - cur->bc_ptrs[level + 1]++; + cur->bc_levels[level + 1].ptr++; level++; continue; } /* Keys in order for scrub? */ - xchk_btree_key(&bs, level); + xchk_btree_key(bs, level); /* Drill another level deeper. */ - pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block); - if (!xchk_btree_ptr_ok(&bs, level, pp)) { - cur->bc_ptrs[level]++; + pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); + if (!xchk_btree_ptr_ok(bs, level, pp)) { + cur->bc_levels[level].ptr++; continue; } level--; - error = xchk_btree_get_block(&bs, level, pp, &block, &bp); + error = xchk_btree_get_block(bs, level, pp, &block, &bp); if (error || !block) goto out; - cur->bc_ptrs[level] = 1; + cur->bc_levels[level].ptr = 1; } out: /* Process deferred owner checks on btree blocks. */ - list_for_each_entry_safe(co, n, &bs.to_check, list) { - if (!error && bs.cur) - error = xchk_btree_check_block_owner(&bs, - co->level, co->daddr); + list_for_each_entry_safe(co, n, &bs->to_check, list) { + if (!error && bs->cur) + error = xchk_btree_check_block_owner(bs, co->level, + co->daddr); list_del(&co->list); kmem_free(co); } + kmem_free(bs); return error; } |