diff options
-rw-r--r-- | fs/xfs/Makefile | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_fs.h | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_health.h | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/common.c | 49 | ||||
-rw-r--r-- | fs/xfs/scrub/common.h | 11 | ||||
-rw-r--r-- | fs/xfs/scrub/fscounters.c | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/health.c | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/quotacheck.c | 867 | ||||
-rw-r--r-- | fs/xfs/scrub/quotacheck.h | 76 | ||||
-rw-r--r-- | fs/xfs/scrub/quotacheck_repair.c | 261 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.c | 46 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.h | 5 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 9 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.h | 10 | ||||
-rw-r--r-- | fs/xfs/scrub/stats.c | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 30 | ||||
-rw-r--r-- | fs/xfs/scrub/xfarray.h | 19 | ||||
-rw-r--r-- | fs/xfs/xfs_health.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 16 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.h | 16 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_bhv.c | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_quota.h | 46 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_dquot.c | 169 |
25 files changed, 1645 insertions, 26 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 4597c0f19e8e..68891e6ee08e 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -180,6 +180,7 @@ xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \ xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \ dqiterate.o \ quota.o \ + quotacheck.o \ ) # online repair @@ -203,6 +204,7 @@ xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \ xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \ quota_repair.o \ + quotacheck_repair.o \ ) endif endif diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 6360073865db..07acbed9235c 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -195,6 +195,7 @@ struct xfs_fsop_geom { #define XFS_FSOP_GEOM_SICK_PQUOTA (1 << 3) /* project quota */ #define XFS_FSOP_GEOM_SICK_RT_BITMAP (1 << 4) /* realtime bitmap */ #define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */ +#define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */ /* Output for XFS_FS_COUNTS */ typedef struct xfs_fsop_counts { @@ -709,9 +710,10 @@ struct xfs_scrub_metadata { #define XFS_SCRUB_TYPE_GQUOTA 22 /* group quotas */ #define XFS_SCRUB_TYPE_PQUOTA 23 /* project quotas */ #define XFS_SCRUB_TYPE_FSCOUNTERS 24 /* fs summary counters */ +#define XFS_SCRUB_TYPE_QUOTACHECK 25 /* quota counters */ /* Number of scrub subcommands. */ -#define XFS_SCRUB_TYPE_NR 25 +#define XFS_SCRUB_TYPE_NR 26 /* i: Repair this metadata. */ #define XFS_SCRUB_IFLAG_REPAIR (1u << 0) diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 6296993ff8f3..5626e53b3f0f 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -41,6 +41,7 @@ struct xfs_fsop_geom; #define XFS_SICK_FS_UQUOTA (1 << 1) /* user quota */ #define XFS_SICK_FS_GQUOTA (1 << 2) /* group quota */ #define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */ +#define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */ /* Observable health issues for realtime volume metadata. */ #define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */ @@ -77,7 +78,8 @@ struct xfs_fsop_geom; #define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \ XFS_SICK_FS_UQUOTA | \ XFS_SICK_FS_GQUOTA | \ - XFS_SICK_FS_PQUOTA) + XFS_SICK_FS_PQUOTA | \ + XFS_SICK_FS_QUOTACHECK) #define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \ XFS_SICK_RT_SUMMARY) diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 81f2b96bb5a7..c5a6c47d3df2 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -29,6 +29,8 @@ #include "xfs_attr.h" #include "xfs_reflink.h" #include "xfs_ag.h" +#include "xfs_error.h" +#include "xfs_quota.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" @@ -82,6 +84,15 @@ __xchk_process_error( sc->ip ? sc->ip : XFS_I(file_inode(sc->file)), sc->sm, *error); break; + case -ECANCELED: + /* + * ECANCELED here means that the caller set one of the scrub + * outcome flags (corrupt, xfail, xcorrupt) and wants to exit + * quickly. Set error to zero and do not continue. + */ + trace_xchk_op_error(sc, agno, bno, *error, ret_ip); + *error = 0; + break; case -EFSBADCRC: case -EFSCORRUPTED: /* Note the badness but don't abort. */ @@ -89,8 +100,7 @@ __xchk_process_error( *error = 0; fallthrough; default: - trace_xchk_op_error(sc, agno, bno, *error, - ret_ip); + trace_xchk_op_error(sc, agno, bno, *error, ret_ip); break; } return false; @@ -136,6 +146,16 @@ __xchk_fblock_process_error( /* Used to restart an op with deadlock avoidance. */ trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); break; + case -ECANCELED: + /* + * ECANCELED here means that the caller set one of the scrub + * outcome flags (corrupt, xfail, xcorrupt) and wants to exit + * quickly. Set error to zero and do not continue. + */ + trace_xchk_file_op_error(sc, whichfork, offset, *error, + ret_ip); + *error = 0; + break; case -EFSBADCRC: case -EFSCORRUPTED: /* Note the badness but don't abort. */ @@ -227,6 +247,19 @@ xchk_block_set_corrupt( trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address); } +#ifdef CONFIG_XFS_QUOTA +/* Record a corrupt quota counter. */ +void +xchk_qcheck_set_corrupt( + struct xfs_scrub *sc, + unsigned int dqtype, + xfs_dqid_t id) +{ + sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; + trace_xchk_qcheck_error(sc, dqtype, id, __return_address); +} +#endif + /* Record a corruption while cross-referencing. */ void xchk_block_xref_set_corrupt( @@ -653,6 +686,13 @@ xchk_trans_cancel( sc->tp = NULL; } +int +xchk_trans_alloc_empty( + struct xfs_scrub *sc) +{ + return xfs_trans_alloc_empty(sc->mp, &sc->tp); +} + /* * Grab an empty transaction so that we can re-grab locked buffers if * one of our btrees turns out to be cyclic. @@ -672,7 +712,7 @@ xchk_trans_alloc( return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, resblks, 0, 0, &sc->tp); - return xfs_trans_alloc_empty(sc->mp, &sc->tp); + return xchk_trans_alloc_empty(sc); } /* Set us up with a transaction and an empty context. */ @@ -1259,6 +1299,9 @@ xchk_fsgates_enable( if (scrub_fsgates & XCHK_FSGATES_DRAIN) xfs_drain_wait_enable(); + if (scrub_fsgates & XCHK_FSGATES_QUOTA) + xfs_dqtrx_hook_enable(); + sc->flags |= scrub_fsgates; } diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index da09580b454a..eb51037cd0d2 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -32,6 +32,7 @@ xchk_should_terminate( } int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks); +int xchk_trans_alloc_empty(struct xfs_scrub *sc); void xchk_trans_cancel(struct xfs_scrub *sc); bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno, @@ -54,6 +55,10 @@ void xchk_block_set_corrupt(struct xfs_scrub *sc, void xchk_ino_set_corrupt(struct xfs_scrub *sc, xfs_ino_t ino); void xchk_fblock_set_corrupt(struct xfs_scrub *sc, int whichfork, xfs_fileoff_t offset); +#ifdef CONFIG_XFS_QUOTA +void xchk_qcheck_set_corrupt(struct xfs_scrub *sc, unsigned int dqtype, + xfs_dqid_t id); +#endif void xchk_block_xref_set_corrupt(struct xfs_scrub *sc, struct xfs_buf *bp); @@ -105,6 +110,7 @@ xchk_setup_rtsummary(struct xfs_scrub *sc) #ifdef CONFIG_XFS_QUOTA int xchk_ino_dqattach(struct xfs_scrub *sc); int xchk_setup_quota(struct xfs_scrub *sc); +int xchk_setup_quotacheck(struct xfs_scrub *sc); #else static inline int xchk_ino_dqattach(struct xfs_scrub *sc) @@ -116,6 +122,11 @@ xchk_setup_quota(struct xfs_scrub *sc) { return -ENOENT; } +static inline int +xchk_setup_quotacheck(struct xfs_scrub *sc) +{ + return -ENOENT; +} #endif int xchk_setup_fscounters(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 5799e9a94f1f..893c5a6e3ddb 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -242,7 +242,7 @@ xchk_setup_fscounters( return error; } - return xfs_trans_alloc_empty(sc->mp, &sc->tp); + return xchk_trans_alloc_empty(sc); } /* diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 531006910ca9..3c9eac070796 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -105,6 +105,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { [XFS_SCRUB_TYPE_GQUOTA] = { XHG_FS, XFS_SICK_FS_GQUOTA }, [XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA }, [XFS_SCRUB_TYPE_FSCOUNTERS] = { XHG_FS, XFS_SICK_FS_COUNTERS }, + [XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK }, }; /* Return the health status mask for this scrub type. */ diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c new file mode 100644 index 000000000000..c77eb2de8df7 --- /dev/null +++ b/fs/xfs/scrub/quotacheck.c @@ -0,0 +1,867 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2020-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_quota.h" +#include "xfs_qm.h" +#include "xfs_icache.h" +#include "xfs_bmap_util.h" +#include "xfs_ialloc.h" +#include "xfs_ag.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/repair.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/iscan.h" +#include "scrub/quota.h" +#include "scrub/quotacheck.h" +#include "scrub/trace.h" + +/* + * Live Quotacheck + * =============== + * + * Quota counters are "summary" metadata, in the sense that they are computed + * as the summation of the block usage counts for every file on the filesystem. + * Therefore, we compute the correct icount, bcount, and rtbcount values by + * creating a shadow quota counter structure and walking every inode. + */ + +/* Track the quota deltas for a dquot in a transaction. */ +struct xqcheck_dqtrx { + xfs_dqtype_t q_type; + xfs_dqid_t q_id; + + int64_t icount_delta; + + int64_t bcount_delta; + int64_t delbcnt_delta; + + int64_t rtbcount_delta; + int64_t delrtb_delta; +}; + +#define XQCHECK_MAX_NR_DQTRXS (XFS_QM_TRANS_DQTYPES * XFS_QM_TRANS_MAXDQS) + +/* + * Track the quota deltas for all dquots attached to a transaction if the + * quota deltas are being applied to an inode that we already scanned. + */ +struct xqcheck_dqacct { + struct rhash_head hash; + uintptr_t tx_id; + struct xqcheck_dqtrx dqtrx[XQCHECK_MAX_NR_DQTRXS]; + unsigned int refcount; +}; + +/* Free a shadow dquot accounting structure. */ +static void +xqcheck_dqacct_free( + void *ptr, + void *arg) +{ + struct xqcheck_dqacct *dqa = ptr; + + kfree(dqa); +} + +/* Set us up to scrub quota counters. */ +int +xchk_setup_quotacheck( + struct xfs_scrub *sc) +{ + if (!XFS_IS_QUOTA_ON(sc->mp)) + return -ENOENT; + + xchk_fsgates_enable(sc, XCHK_FSGATES_QUOTA); + + sc->buf = kzalloc(sizeof(struct xqcheck), XCHK_GFP_FLAGS); + if (!sc->buf) + return -ENOMEM; + + return xchk_setup_fs(sc); +} + +/* + * Part 1: Collecting dquot resource usage counts. For each xfs_dquot attached + * to each inode, we create a shadow dquot, and compute the inode count and add + * the data/rt block usage from what we see. + * + * To avoid false corruption reports in part 2, any failure in this part must + * set the INCOMPLETE flag even when a negative errno is returned. This care + * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, + * ECANCELED) that are absorbed into a scrub state flag update by + * xchk_*_process_error. Scrub and repair share the same incore data + * structures, so the INCOMPLETE flag is critical to prevent a repair based on + * insufficient information. + * + * Because we are scanning a live filesystem, it's possible that another thread + * will try to update the quota counters for an inode that we've already + * scanned. This will cause our counts to be incorrect. Therefore, we hook + * the live transaction code in two places: (1) when the callers update the + * per-transaction dqtrx structure to log quota counter updates; and (2) when + * transaction commit actually logs those updates to the incore dquot. By + * shadowing transaction updates in this manner, live quotacheck can ensure + * by locking the dquot and the shadow structure that its own copies are not + * out of date. Because the hook code runs in a different process context from + * the scrub code and the scrub state flags are not accessed atomically, + * failures in the hook code must abort the iscan and the scrubber must notice + * the aborted scan and set the incomplete flag. + * + * Note that we use srcu notifier hooks to minimize the overhead when live + * quotacheck is /not/ running. + */ + +/* Update an incore dquot counter information from a live update. */ +static int +xqcheck_update_incore_counts( + struct xqcheck *xqc, + struct xfarray *counts, + xfs_dqid_t id, + int64_t inodes, + int64_t nblks, + int64_t rtblks) +{ + struct xqcheck_dquot xcdq; + int error; + + error = xfarray_load_sparse(counts, id, &xcdq); + if (error) + return error; + + xcdq.flags |= XQCHECK_DQUOT_WRITTEN; + xcdq.icount += inodes; + xcdq.bcount += nblks; + xcdq.rtbcount += rtblks; + + error = xfarray_store(counts, id, &xcdq); + if (error == -EFBIG) { + /* + * EFBIG means we tried to store data at too high a byte offset + * in the sparse array. IOWs, we cannot complete the check and + * must notify userspace that the check was incomplete. + */ + error = -ECANCELED; + } + return error; +} + +/* Decide if this is the shadow dquot accounting structure for a transaction. */ +static int +xqcheck_dqacct_obj_cmpfn( + struct rhashtable_compare_arg *arg, + const void *obj) +{ + const uintptr_t *tx_idp = arg->key; + const struct xqcheck_dqacct *dqa = obj; + + if (dqa->tx_id != *tx_idp) + return 1; + return 0; +} + +static const struct rhashtable_params xqcheck_dqacct_hash_params = { + .min_size = 32, + .key_len = sizeof(uintptr_t), + .key_offset = offsetof(struct xqcheck_dqacct, tx_id), + .head_offset = offsetof(struct xqcheck_dqacct, hash), + .automatic_shrinking = true, + .obj_cmpfn = xqcheck_dqacct_obj_cmpfn, +}; + +/* Find a shadow dqtrx slot for the given dquot. */ +STATIC struct xqcheck_dqtrx * +xqcheck_get_dqtrx( + struct xqcheck_dqacct *dqa, + xfs_dqtype_t q_type, + xfs_dqid_t q_id) +{ + int i; + + for (i = 0; i < XQCHECK_MAX_NR_DQTRXS; i++) { + if (dqa->dqtrx[i].q_type == 0 || + (dqa->dqtrx[i].q_type == q_type && + dqa->dqtrx[i].q_id == q_id)) + return &dqa->dqtrx[i]; + } + + return NULL; +} + +/* + * Create and fill out a quota delta tracking structure to shadow the updates + * going on in the regular quota code. + */ +static int +xqcheck_mod_live_ino_dqtrx( + struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct xfs_mod_ino_dqtrx_params *p = data; + struct xqcheck *xqc; + struct xqcheck_dqacct *dqa; + struct xqcheck_dqtrx *dqtrx; + int error; + + xqc = container_of(nb, struct xqcheck, qhook.mod_hook.nb); + + /* Skip quota reservation fields. */ + switch (action) { + case XFS_TRANS_DQ_BCOUNT: + case XFS_TRANS_DQ_DELBCOUNT: + case XFS_TRANS_DQ_ICOUNT: + case XFS_TRANS_DQ_RTBCOUNT: + case XFS_TRANS_DQ_DELRTBCOUNT: + break; + default: + return NOTIFY_DONE; + } + + /* Ignore dqtrx updates for quota types we don't care about. */ + switch (p->q_type) { + case XFS_DQTYPE_USER: + if (!xqc->ucounts) + return NOTIFY_DONE; + break; + case XFS_DQTYPE_GROUP: + if (!xqc->gcounts) + return NOTIFY_DONE; + break; + case XFS_DQTYPE_PROJ: + if (!xqc->pcounts) + return NOTIFY_DONE; + break; + default: + return NOTIFY_DONE; + } + + /* Skip inodes that haven't been scanned yet. */ + if (!xchk_iscan_want_live_update(&xqc->iscan, p->ino)) + return NOTIFY_DONE; + + /* Make a shadow quota accounting tracker for this transaction. */ + mutex_lock(&xqc->lock); + dqa = rhashtable_lookup_fast(&xqc->shadow_dquot_acct, &p->tx_id, + xqcheck_dqacct_hash_params); + if (!dqa) { + dqa = kzalloc(sizeof(struct xqcheck_dqacct), XCHK_GFP_FLAGS); + if (!dqa) + goto out_abort; + + dqa->tx_id = p->tx_id; + error = rhashtable_insert_fast(&xqc->shadow_dquot_acct, + &dqa->hash, xqcheck_dqacct_hash_params); + if (error) + goto out_abort; + } + + /* Find the shadow dqtrx (or an empty slot) here. */ + dqtrx = xqcheck_get_dqtrx(dqa, p->q_type, p->q_id); + if (!dqtrx) + goto out_abort; + if (dqtrx->q_type == 0) { + dqtrx->q_type = p->q_type; + dqtrx->q_id = p->q_id; + dqa->refcount++; + } + + /* Update counter */ + switch (action) { + case XFS_TRANS_DQ_BCOUNT: + dqtrx->bcount_delta += p->delta; + break; + case XFS_TRANS_DQ_DELBCOUNT: + dqtrx->delbcnt_delta += p->delta; + break; + case XFS_TRANS_DQ_ICOUNT: + dqtrx->icount_delta += p->delta; + break; + case XFS_TRANS_DQ_RTBCOUNT: + dqtrx->rtbcount_delta += p->delta; + break; + case XFS_TRANS_DQ_DELRTBCOUNT: + dqtrx->delrtb_delta += p->delta; + break; + } + + mutex_unlock(&xqc->lock); + return NOTIFY_DONE; + +out_abort: + xchk_iscan_abort(&xqc->iscan); + mutex_unlock(&xqc->lock); + return NOTIFY_DONE; +} + +/* + * Apply the transaction quota deltas to our shadow quota accounting info when + * the regular quota code are doing the same. + */ +static int +xqcheck_apply_live_dqtrx( + struct notifier_block *nb, + unsigned long action, + void *data) +{ + struct xfs_apply_dqtrx_params *p = data; + struct xqcheck *xqc; + struct xqcheck_dqacct *dqa; + struct xqcheck_dqtrx *dqtrx; + struct xfarray *counts; + int error; + + xqc = container_of(nb, struct xqcheck, qhook.apply_hook.nb); + + /* Map the dquot type to an incore counter object. */ + switch (p->q_type) { + case XFS_DQTYPE_USER: + counts = xqc->ucounts; + break; + case XFS_DQTYPE_GROUP: + counts = xqc->gcounts; + break; + case XFS_DQTYPE_PROJ: + counts = xqc->pcounts; + break; + default: + return NOTIFY_DONE; + } + + if (xchk_iscan_aborted(&xqc->iscan) || counts == NULL) + return NOTIFY_DONE; + + /* + * Find the shadow dqtrx for this transaction and dquot, if any deltas + * need to be applied here. If not, we're finished early. + */ + mutex_lock(&xqc->lock); + dqa = rhashtable_lookup_fast(&xqc->shadow_dquot_acct, &p->tx_id, + xqcheck_dqacct_hash_params); + if (!dqa) + goto out_unlock; + dqtrx = xqcheck_get_dqtrx(dqa, p->q_type, p->q_id); + if (!dqtrx || dqtrx->q_type == 0) + goto out_unlock; + + /* Update our shadow dquot if we're committing. */ + if (action == XFS_APPLY_DQTRX_COMMIT) { + error = xqcheck_update_incore_counts(xqc, counts, p->q_id, + dqtrx->icount_delta, + dqtrx->bcount_delta + dqtrx->delbcnt_delta, + dqtrx->rtbcount_delta + dqtrx->delrtb_delta); + if (error) + goto out_abort; + } + + /* Free the shadow accounting structure if that was the last user. */ + dqa->refcount--; + if (dqa->refcount == 0) { + error = rhashtable_remove_fast(&xqc->shadow_dquot_acct, + &dqa->hash, xqcheck_dqacct_hash_params); + if (error) + goto out_abort; + xqcheck_dqacct_free(dqa, NULL); + } + + mutex_unlock(&xqc->lock); + return NOTIFY_DONE; + +out_abort: + xchk_iscan_abort(&xqc->iscan); +out_unlock: + mutex_unlock(&xqc->lock); + return NOTIFY_DONE; +} + +/* Record this inode's quota usage in our shadow quota counter data. */ +STATIC int +xqcheck_collect_inode( + struct xqcheck *xqc, + struct xfs_inode *ip) +{ + struct xfs_trans *tp = xqc->sc->tp; + xfs_filblks_t nblks, rtblks; + uint ilock_flags = 0; + xfs_dqid_t id; + bool isreg = S_ISREG(VFS_I(ip)->i_mode); + int error = 0; + + if (xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) { + /* + * Quota files are never counted towards quota, so we do not + * need to take the lock. + */ + xchk_iscan_mark_visited(&xqc->iscan, ip); + return 0; + } + + /* Figure out the data / rt device block counts. */ + xfs_ilock(ip, XFS_IOLOCK_SHARED); + if (isreg) + xfs_ilock(ip, XFS_MMAPLOCK_SHARED); + if (XFS_IS_REALTIME_INODE(ip)) { + /* + * Read in the data fork for rt files so that _count_blocks + * can count the number of blocks allocated from the rt volume. + * Inodes do not track that separately. + */ + ilock_flags = xfs_ilock_data_map_shared(ip); + error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); + if (error) + goto out_abort; + } else { + ilock_flags = XFS_ILOCK_SHARED; + xfs_ilock(ip, XFS_ILOCK_SHARED); + } + xfs_inode_count_blocks(tp, ip, &nblks, &rtblks); + + if (xchk_iscan_aborted(&xqc->iscan)) { + error = -ECANCELED; + goto out_incomplete; + } + + /* Update the shadow dquot counters. */ + mutex_lock(&xqc->lock); + if (xqc->ucounts) { + id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_USER); + error = xqcheck_update_incore_counts(xqc, xqc->ucounts, id, 1, + nblks, rtblks); + if (error) + goto out_mutex; + } + + if (xqc->gcounts) { + id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_GROUP); + error = xqcheck_update_incore_counts(xqc, xqc->gcounts, id, 1, + nblks, rtblks); + if (error) + goto out_mutex; + } + + if (xqc->pcounts) { + id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_PROJ); + error = xqcheck_update_incore_counts(xqc, xqc->pcounts, id, 1, + nblks, rtblks); + if (error) + goto out_mutex; + } + mutex_unlock(&xqc->lock); + + xchk_iscan_mark_visited(&xqc->iscan, ip); + goto out_ilock; + +out_mutex: + mutex_unlock(&xqc->lock); +out_abort: + xchk_iscan_abort(&xqc->iscan); +out_incomplete: + xchk_set_incomplete(xqc->sc); +out_ilock: + xfs_iunlock(ip, ilock_flags); + if (isreg) + xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); + return error; +} + +/* Walk all the allocated inodes and run a quota scan on them. */ +STATIC int +xqcheck_collect_counts( + struct xqcheck *xqc) +{ + struct xfs_scrub *sc = xqc->sc; + struct xfs_inode *ip; + int error; + + /* + * Set up for a potentially lengthy filesystem scan by reducing our + * transaction resource usage for the duration. Specifically: + * + * Cancel the transaction to release the log grant space while we scan + * the filesystem. + * + * Create a new empty transaction to eliminate the possibility of the + * inode scan deadlocking on cyclical metadata. + * + * We pass the empty transaction to the file scanning function to avoid + * repeatedly cycling empty transactions. This can be done without + * risk of deadlock between sb_internal and the IOLOCK (we take the + * IOLOCK to quiesce the file before scanning) because empty + * transactions do not take sb_internal. + */ + xchk_trans_cancel(sc); + error = xchk_trans_alloc_empty(sc); + if (error) + return error; + + while ((error = xchk_iscan_iter(&xqc->iscan, &ip)) == 1) { + error = xqcheck_collect_inode(xqc, ip); + xchk_irele(sc, ip); + if (error) + break; + + if (xchk_should_terminate(sc, &error)) + break; + } + xchk_iscan_iter_finish(&xqc->iscan); + if (error) { + xchk_set_incomplete(sc); + /* + * If we couldn't grab an inode that was busy with a state + * change, change the error code so that we exit to userspace + * as quickly as possible. + */ + if (error == -EBUSY) + return -ECANCELED; + return error; + } + + /* + * Switch out for a real transaction in preparation for building a new + * tree. + */ + xchk_trans_cancel(sc); + return xchk_setup_fs(sc); +} + +/* + * Part 2: Comparing dquot resource counters. Walk each xfs_dquot, comparing + * the resource usage counters against our shadow dquots; and then walk each + * shadow dquot (that wasn't covered in the first part), comparing it against + * the xfs_dquot. + */ + +/* + * Check the dquot data against what we observed. Caller must hold the dquot + * lock. + */ +STATIC int +xqcheck_compare_dquot( + struct xqcheck *xqc, + xfs_dqtype_t dqtype, + struct xfs_dquot *dq) +{ + struct xqcheck_dquot xcdq; + struct xfarray *counts = xqcheck_counters_for(xqc, dqtype); + int error; + + if (xchk_iscan_aborted(&xqc->iscan)) { + xchk_set_incomplete(xqc->sc); + return -ECANCELED; + } + + mutex_lock(&xqc->lock); + error = xfarray_load_sparse(counts, dq->q_id, &xcdq); + if (error) + goto out_unlock; + + if (xcdq.icount != dq->q_ino.count) + xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id); + + if (xcdq.bcount != dq->q_blk.count) + xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id); + + if (xcdq.rtbcount != dq->q_rtb.count) + xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id); + + xcdq.flags |= (XQCHECK_DQUOT_COMPARE_SCANNED | XQCHECK_DQUOT_WRITTEN); + error = xfarray_store(counts, dq->q_id, &xcdq); + if (error == -EFBIG) { + /* + * EFBIG means we tried to store data at too high a byte offset + * in the sparse array. IOWs, we cannot complete the check and + * must notify userspace that the check was incomplete. This + * should never happen outside of the collection phase. + */ + xchk_set_incomplete(xqc->sc); + error = -ECANCELED; + } + mutex_unlock(&xqc->lock); + if (error) + return error; + + if (xqc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return -ECANCELED; + + return 0; + +out_unlock: + mutex_unlock(&xqc->lock); + return error; +} + +/* + * Walk all the observed dquots, and make sure there's a matching incore + * dquot and that its counts match ours. + */ +STATIC int +xqcheck_walk_observations( + struct xqcheck *xqc, + xfs_dqtype_t dqtype) +{ + struct xqcheck_dquot xcdq; + struct xfs_dquot *dq; + struct xfarray *counts = xqcheck_counters_for(xqc, dqtype); + xfarray_idx_t cur = XFARRAY_CURSOR_INIT; + int error; + + mutex_lock(&xqc->lock); + while ((error = xfarray_iter(counts, &cur, &xcdq)) == 1) { + xfs_dqid_t id = cur - 1; + + if (xcdq.flags & XQCHECK_DQUOT_COMPARE_SCANNED) + continue; + + mutex_unlock(&xqc->lock); + + error = xfs_qm_dqget(xqc->sc->mp, id, dqtype, false, &dq); + if (error == -ENOENT) { + xchk_qcheck_set_corrupt(xqc->sc, dqtype, id); + return 0; + } + if (error) + return error; + + error = xqcheck_compare_dquot(xqc, dqtype, dq); + xfs_qm_dqput(dq); + if (error) + return error; + + if (xchk_should_terminate(xqc->sc, &error)) + return error; + + mutex_lock(&xqc->lock); + } + mutex_unlock(&xqc->lock); + + return error; +} + +/* Compare the quota counters we observed against the live dquots. */ +STATIC int +xqcheck_compare_dqtype( + struct xqcheck *xqc, + xfs_dqtype_t dqtype) +{ + struct xchk_dqiter cursor = { }; + struct xfs_scrub *sc = xqc->sc; + struct xfs_dquot *dq; + int error; + + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) + return 0; + + /* If the quota CHKD flag is cleared, we need to repair this quota. */ + if (!(xfs_quota_chkd_flag(dqtype) & sc->mp->m_qflags)) { + xchk_qcheck_set_corrupt(xqc->sc, dqtype, 0); + return 0; + } + + /* Compare what we observed against the actual dquots. */ + xchk_dqiter_init(&cursor, sc, dqtype); + while ((error = xchk_dquot_iter(&cursor, &dq)) == 1) { + error = xqcheck_compare_dquot(xqc, dqtype, dq); + xfs_qm_dqput(dq); + if (error) + break; + } + if (error) + return error; + + /* Walk all the observed dquots and compare to the incore ones. */ + return xqcheck_walk_observations(xqc, dqtype); +} + +/* Tear down everything associated with a quotacheck. */ +static void +xqcheck_teardown_scan( + void *priv) +{ + struct xqcheck *xqc = priv; + struct xfs_quotainfo *qi = xqc->sc->mp->m_quotainfo; + + /* Discourage any hook functions that might be running. */ + xchk_iscan_abort(&xqc->iscan); + + /* + * As noted above, the apply hook is responsible for cleaning up the + * shadow dquot accounting data when a transaction completes. The mod + * hook must be removed before the apply hook so that we don't + * mistakenly leave an active shadow account for the mod hook to get + * its hands on. No hooks should be running after these functions + * return. + */ + xfs_dqtrx_hook_del(qi, &xqc->qhook); + + if (xqc->shadow_dquot_acct.key_len) { + rhashtable_free_and_destroy(&xqc->shadow_dquot_acct, + xqcheck_dqacct_free, NULL); + xqc->shadow_dquot_acct.key_len = 0; + } + + if (xqc->pcounts) { + xfarray_destroy(xqc->pcounts); + xqc->pcounts = NULL; + } + + if (xqc->gcounts) { + xfarray_destroy(xqc->gcounts); + xqc->gcounts = NULL; + } + + if (xqc->ucounts) { + xfarray_destroy(xqc->ucounts); + xqc->ucounts = NULL; + } + + xchk_iscan_teardown(&xqc->iscan); + mutex_destroy(&xqc->lock); + xqc->sc = NULL; +} + +/* + * Scan all inodes in the entire filesystem to generate quota counter data. + * If the scan is successful, the quota data will be left alive for a repair. + * If any error occurs, we'll tear everything down. + */ +STATIC int +xqcheck_setup_scan( + struct xfs_scrub *sc, + struct xqcheck *xqc) +{ + char *descr; + struct xfs_quotainfo *qi = sc->mp->m_quotainfo; + unsigned long long max_dquots = XFS_DQ_ID_MAX + 1ULL; + int error; + + ASSERT(xqc->sc == NULL); + xqc->sc = sc; + + mutex_init(&xqc->lock); + + /* Retry iget every tenth of a second for up to 30 seconds. */ + xchk_iscan_start(sc, 30000, 100, &xqc->iscan); + + error = -ENOMEM; + if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_USER)) { + descr = xchk_xfile_descr(sc, "user dquot records"); + error = xfarray_create(descr, max_dquots, + sizeof(struct xqcheck_dquot), &xqc->ucounts); + kfree(descr); + if (error) + goto out_teardown; + } + + if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_GROUP)) { + descr = xchk_xfile_descr(sc, "group dquot records"); + error = xfarray_create(descr, max_dquots, + sizeof(struct xqcheck_dquot), &xqc->gcounts); + kfree(descr); + if (error) + goto out_teardown; + } + + if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_PROJ)) { + descr = xchk_xfile_descr(sc, "project dquot records"); + error = xfarray_create(descr, max_dquots, + sizeof(struct xqcheck_dquot), &xqc->pcounts); + kfree(descr); + if (error) + goto out_teardown; + } + + /* + * Set up hash table to map transactions to our internal shadow dqtrx + * structures. + */ + error = rhashtable_init(&xqc->shadow_dquot_acct, + &xqcheck_dqacct_hash_params); + if (error) + goto out_teardown; + + /* + * Hook into the quota code. The hook only triggers for inodes that + * were already scanned, and the scanner thread takes each inode's + * ILOCK, which means that any in-progress inode updates will finish + * before we can scan the inode. + * + * The apply hook (which removes the shadow dquot accounting struct) + * must be installed before the mod hook so that we never fail to catch + * the end of a quota update sequence and leave stale shadow data. + */ + ASSERT(sc->flags & XCHK_FSGATES_QUOTA); + xfs_dqtrx_hook_setup(&xqc->qhook, xqcheck_mod_live_ino_dqtrx, + xqcheck_apply_live_dqtrx); + + error = xfs_dqtrx_hook_add(qi, &xqc->qhook); + if (error) + goto out_teardown; + + /* Use deferred cleanup to pass the quota count data to repair. */ + sc->buf_cleanup = xqcheck_teardown_scan; + return 0; + +out_teardown: + xqcheck_teardown_scan(xqc); + return error; +} + +/* Scrub all counters for a given quota type. */ +int +xchk_quotacheck( + struct xfs_scrub *sc) +{ + struct xqcheck *xqc = sc->buf; + int error = 0; + + /* Check quota counters on the live filesystem. */ + error = xqcheck_setup_scan(sc, xqc); + if (error) + return error; + + /* Walk all inodes, picking up quota information. */ + error = xqcheck_collect_counts(xqc); + if (!xchk_xref_process_error(sc, 0, 0, &error)) + return error; + + /* Fail fast if we're not playing with a full dataset. */ + if (xchk_iscan_aborted(&xqc->iscan)) + xchk_set_incomplete(sc); + if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) + return 0; + + /* Compare quota counters. */ + if (xqc->ucounts) { + error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_USER); + if (!xchk_xref_process_error(sc, 0, 0, &error)) + return error; + } + if (xqc->gcounts) { + error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_GROUP); + if (!xchk_xref_process_error(sc, 0, 0, &error)) + return error; + } + if (xqc->pcounts) { + error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_PROJ); + if (!xchk_xref_process_error(sc, 0, 0, &error)) + return error; + } + + /* Check one last time for an incomplete dataset. */ + if (xchk_iscan_aborted(&xqc->iscan)) + xchk_set_incomplete(sc); + + return 0; +} diff --git a/fs/xfs/scrub/quotacheck.h b/fs/xfs/scrub/quotacheck.h new file mode 100644 index 000000000000..4ea5f249c978 --- /dev/null +++ b/fs/xfs/scrub/quotacheck.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2020-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_QUOTACHECK_H__ +#define __XFS_SCRUB_QUOTACHECK_H__ + +/* Quota counters for live quotacheck. */ +struct xqcheck_dquot { + /* block usage count */ + int64_t bcount; + + /* inode usage count */ + int64_t icount; + + /* realtime block usage count */ + int64_t rtbcount; + + /* Record state */ + unsigned int flags; +}; + +/* + * This incore dquot record has been written at least once. We never want to + * store an xqcheck_dquot that looks uninitialized. + */ +#define XQCHECK_DQUOT_WRITTEN (1U << 0) + +/* Already checked this dquot. */ +#define XQCHECK_DQUOT_COMPARE_SCANNED (1U << 1) + +/* Already repaired this dquot. */ +#define XQCHECK_DQUOT_REPAIR_SCANNED (1U << 2) + +/* Live quotacheck control structure. */ +struct xqcheck { + struct xfs_scrub *sc; + + /* Shadow dquot counter data. */ + struct xfarray *ucounts; + struct xfarray *gcounts; + struct xfarray *pcounts; + + /* Lock protecting quotacheck count observations */ + struct mutex lock; + + struct xchk_iscan iscan; + + /* Hooks into the quota code. */ + struct xfs_dqtrx_hook qhook; + + /* Shadow quota delta tracking structure. */ + struct rhashtable shadow_dquot_acct; +}; + +/* Return the incore counter array for a given quota type. */ +static inline struct xfarray * +xqcheck_counters_for( + struct xqcheck *xqc, + xfs_dqtype_t dqtype) +{ + switch (dqtype) { + case XFS_DQTYPE_USER: + return xqc->ucounts; + case XFS_DQTYPE_GROUP: + return xqc->gcounts; + case XFS_DQTYPE_PROJ: + return xqc->pcounts; + } + + ASSERT(0); + return NULL; +} + +#endif /* __XFS_SCRUB_QUOTACHECK_H__ */ diff --git a/fs/xfs/scrub/quotacheck_repair.c b/fs/xfs/scrub/quotacheck_repair.c new file mode 100644 index 000000000000..dd8554c755b5 --- /dev/null +++ b/fs/xfs/scrub/quotacheck_repair.c @@ -0,0 +1,261 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2020-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_quota.h" +#include "xfs_qm.h" +#include "xfs_icache.h" +#include "xfs_bmap_util.h" +#include "xfs_iwalk.h" +#include "xfs_ialloc.h" +#include "xfs_sb.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/repair.h" +#include "scrub/xfile.h" +#include "scrub/xfarray.h" +#include "scrub/iscan.h" +#include "scrub/quota.h" +#include "scrub/quotacheck.h" +#include "scrub/trace.h" + +/* + * Live Quotacheck Repair + * ====================== + * + * Use the live quota counter information that we collected to replace the + * counter values in the incore dquots. A scrub->repair cycle should have left + * the live data and hooks active, so this is safe so long as we make sure the + * dquot is locked. + */ + +/* Commit new counters to a dquot. */ +static int +xqcheck_commit_dquot( + struct xqcheck *xqc, + xfs_dqtype_t dqtype, + struct xfs_dquot *dq) +{ + struct xqcheck_dquot xcdq; + struct xfarray *counts = xqcheck_counters_for(xqc, dqtype); + int64_t delta; + bool dirty = false; + int error = 0; + + /* Unlock the dquot just long enough to allocate a transaction. */ + xfs_dqunlock(dq); + error = xchk_trans_alloc(xqc->sc, 0); + xfs_dqlock(dq); + if (error) + return error; + + xfs_trans_dqjoin(xqc->sc->tp, dq); + + if (xchk_iscan_aborted(&xqc->iscan)) { + error = -ECANCELED; + goto out_cancel; + } + + mutex_lock(&xqc->lock); + error = xfarray_load_sparse(counts, dq->q_id, &xcdq); + if (error) + goto out_unlock; + + /* Adjust counters as needed. */ + delta = (int64_t)xcdq.icount - dq->q_ino.count; + if (delta) { + dq->q_ino.reserved += delta; + dq->q_ino.count += delta; + dirty = true; + } + + delta = (int64_t)xcdq.bcount - dq->q_blk.count; + if (delta) { + dq->q_blk.reserved += delta; + dq->q_blk.count += delta; + dirty = true; + } + + delta = (int64_t)xcdq.rtbcount - dq->q_rtb.count; + if (delta) { + dq->q_rtb.reserved += delta; + dq->q_rtb.count += delta; + dirty = true; + } + + xcdq.flags |= (XQCHECK_DQUOT_REPAIR_SCANNED | XQCHECK_DQUOT_WRITTEN); + error = xfarray_store(counts, dq->q_id, &xcdq); + if (error == -EFBIG) { + /* + * EFBIG means we tried to store data at too high a byte offset + * in the sparse array. IOWs, we cannot complete the repair + * and must cancel the whole operation. This should never + * happen, but we need to catch it anyway. + */ + error = -ECANCELED; + } + mutex_unlock(&xqc->lock); + if (error || !dirty) + goto out_cancel; + + trace_xrep_quotacheck_dquot(xqc->sc->mp, dq->q_type, dq->q_id); + + /* Commit the dirty dquot to disk. */ + dq->q_flags |= XFS_DQFLAG_DIRTY; + if (dq->q_id) + xfs_qm_adjust_dqtimers(dq); + xfs_trans_log_dquot(xqc->sc->tp, dq); + + /* + * Transaction commit unlocks the dquot, so we must re-lock it so that + * the caller can put the reference (which apparently requires a locked + * dquot). + */ + error = xrep_trans_commit(xqc->sc); + xfs_dqlock(dq); + return error; + +out_unlock: + mutex_unlock(&xqc->lock); +out_cancel: + xchk_trans_cancel(xqc->sc); + + /* Re-lock the dquot so the caller can put the reference. */ + xfs_dqlock(dq); + return error; +} + +/* Commit new quota counters for a particular quota type. */ +STATIC int +xqcheck_commit_dqtype( + struct xqcheck *xqc, + unsigned int dqtype) +{ + struct xchk_dqiter cursor = { }; + struct xqcheck_dquot xcdq; + struct xfs_scrub *sc = xqc->sc; + struct xfs_mount *mp = sc->mp; + struct xfarray *counts = xqcheck_counters_for(xqc, dqtype); + struct xfs_dquot *dq; + xfarray_idx_t cur = XFARRAY_CURSOR_INIT; + int error; + + /* + * Update the counters of every dquot that the quota file knows about. + */ + xchk_dqiter_init(&cursor, sc, dqtype); + while ((error = xchk_dquot_iter(&cursor, &dq)) == 1) { + error = xqcheck_commit_dquot(xqc, dqtype, dq); + xfs_qm_dqput(dq); + if (error) + break; + } + if (error) + return error; + + /* + * Make a second pass to deal with the dquots that we know about but + * the quota file previously did not know about. + */ + mutex_lock(&xqc->lock); + while ((error = xfarray_iter(counts, &cur, &xcdq)) == 1) { + xfs_dqid_t id = cur - 1; + + if (xcdq.flags & XQCHECK_DQUOT_REPAIR_SCANNED) + continue; + + mutex_unlock(&xqc->lock); + + /* + * Grab the dquot, allowing for dquot block allocation in a + * separate transaction. We committed the scrub transaction + * in a previous step, so we will not be creating nested + * transactions here. + */ + error = xfs_qm_dqget(mp, id, dqtype, true, &dq); + if (error) + return error; + + error = xqcheck_commit_dquot(xqc, dqtype, dq); + xfs_qm_dqput(dq); + if (error) + return error; + + mutex_lock(&xqc->lock); + } + mutex_unlock(&xqc->lock); + + return error; +} + +/* Figure out quota CHKD flags for the running quota types. */ +static inline unsigned int +xqcheck_chkd_flags( + struct xfs_mount *mp) +{ + unsigned int ret = 0; + + if (XFS_IS_UQUOTA_ON(mp)) + ret |= XFS_UQUOTA_CHKD; + if (XFS_IS_GQUOTA_ON(mp)) + ret |= XFS_GQUOTA_CHKD; + if (XFS_IS_PQUOTA_ON(mp)) + ret |= XFS_PQUOTA_CHKD; + return ret; +} + +/* Commit the new dquot counters. */ +int +xrep_quotacheck( + struct xfs_scrub *sc) +{ + struct xqcheck *xqc = sc->buf; + unsigned int qflags = xqcheck_chkd_flags(sc->mp); + int error; + + /* + * Clear the CHKD flag for the running quota types and commit the scrub + * transaction so that we can allocate new quota block mappings if we + * have to. If we crash after this point, the sb still has the CHKD + * flags cleared, so mount quotacheck will fix all of this up. + */ + xrep_update_qflags(sc, qflags, 0); + error = xrep_trans_commit(sc); + if (error) + return error; + + /* Commit the new counters to the dquots. */ + if (xqc->ucounts) { + error = xqcheck_commit_dqtype(xqc, XFS_DQTYPE_USER); + if (error) + return error; + } + if (xqc->gcounts) { + error = xqcheck_commit_dqtype(xqc, XFS_DQTYPE_GROUP); + if (error) + return error; + } + if (xqc->pcounts) { + error = xqcheck_commit_dqtype(xqc, XFS_DQTYPE_PROJ); + if (error) + return error; + } + + /* Set the CHKD flags now that we've fixed quota counts. */ + error = xchk_trans_alloc(sc, 0); + if (error) + return error; + + xrep_update_qflags(sc, 0, qflags); + return xrep_trans_commit(sc); +} diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 745d5b8f405a..7141b1778902 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -687,6 +687,44 @@ xrep_find_ag_btree_roots( } #ifdef CONFIG_XFS_QUOTA +/* Update some quota flags in the superblock. */ +void +xrep_update_qflags( + struct xfs_scrub *sc, + unsigned int clear_flags, + unsigned int set_flags) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_buf *bp; + + mutex_lock(&mp->m_quotainfo->qi_quotaofflock); + if ((mp->m_qflags & clear_flags) == 0 && + (mp->m_qflags & set_flags) == set_flags) + goto no_update; + + mp->m_qflags &= ~clear_flags; + mp->m_qflags |= set_flags; + + spin_lock(&mp->m_sb_lock); + mp->m_sb.sb_qflags &= ~clear_flags; + mp->m_sb.sb_qflags |= set_flags; + spin_unlock(&mp->m_sb_lock); + + /* + * Update the quota flags in the ondisk superblock without touching + * the summary counters. We have not quiesced inode chunk allocation, + * so we cannot coordinate with updates to the icount and ifree percpu + * counters. + */ + bp = xfs_trans_getsb(sc->tp); + xfs_sb_to_disk(bp->b_addr, &mp->m_sb); + xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF); + xfs_trans_log_buf(sc->tp, bp, 0, sizeof(struct xfs_dsb) - 1); + +no_update: + mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock); +} + /* Force a quotacheck the next time we mount. */ void xrep_force_quotacheck( @@ -699,13 +737,7 @@ xrep_force_quotacheck( if (!(flag & sc->mp->m_qflags)) return; - mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock); - sc->mp->m_qflags &= ~flag; - spin_lock(&sc->mp->m_sb_lock); - sc->mp->m_sb.sb_qflags &= ~flag; - spin_unlock(&sc->mp->m_sb_lock); - xfs_log_sb(sc->tp); - mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock); + xrep_update_qflags(sc, flag, 0); } /* diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 17114327e6fa..fdfa06699921 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -72,6 +72,8 @@ int xrep_find_ag_btree_roots(struct xfs_scrub *sc, struct xfs_buf *agf_bp, struct xrep_find_ag_btree *btree_info, struct xfs_buf *agfl_bp); #ifdef CONFIG_XFS_QUOTA +void xrep_update_qflags(struct xfs_scrub *sc, unsigned int clear_flags, + unsigned int set_flags); void xrep_force_quotacheck(struct xfs_scrub *sc, xfs_dqtype_t type); int xrep_ino_dqattach(struct xfs_scrub *sc); #else @@ -123,8 +125,10 @@ int xrep_rtbitmap(struct xfs_scrub *sc); #ifdef CONFIG_XFS_QUOTA int xrep_quota(struct xfs_scrub *sc); +int xrep_quotacheck(struct xfs_scrub *sc); #else # define xrep_quota xrep_notsupported +# define xrep_quotacheck xrep_notsupported #endif /* CONFIG_XFS_QUOTA */ int xrep_reinit_pagf(struct xfs_scrub *sc); @@ -191,6 +195,7 @@ xrep_setup_nothing( #define xrep_bmap_cow xrep_notsupported #define xrep_rtbitmap xrep_notsupported #define xrep_quota xrep_notsupported +#define xrep_quotacheck xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index caf324c2b991..9112c0985c62 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -157,6 +157,9 @@ xchk_fsgates_disable( if (sc->flags & XCHK_FSGATES_DRAIN) xfs_drain_wait_disable(); + if (sc->flags & XCHK_FSGATES_QUOTA) + xfs_dqtrx_hook_disable(); + sc->flags &= ~XCHK_FSGATES_ALL; } @@ -360,6 +363,12 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .scrub = xchk_fscounters, .repair = xrep_notsupported, }, + [XFS_SCRUB_TYPE_QUOTACHECK] = { /* quota counters */ + .type = ST_FS, + .setup = xchk_setup_quotacheck, + .scrub = xchk_quotacheck, + .repair = xrep_quotacheck, + }, }; static int diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 7fc50654c4fe..5cd4550155f2 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -121,6 +121,7 @@ struct xfs_scrub { #define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */ #define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */ #define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */ +#define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */ #define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */ #define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */ @@ -130,7 +131,8 @@ struct xfs_scrub { * features are gated off via dynamic code patching, which is why the state * must be enabled during scrub setup and can only be torn down afterwards. */ -#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN) +#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN | \ + XCHK_FSGATES_QUOTA) /* Metadata scrubbers */ int xchk_tester(struct xfs_scrub *sc); @@ -167,12 +169,18 @@ xchk_rtsummary(struct xfs_scrub *sc) #endif #ifdef CONFIG_XFS_QUOTA int xchk_quota(struct xfs_scrub *sc); +int xchk_quotacheck(struct xfs_scrub *sc); #else static inline int xchk_quota(struct xfs_scrub *sc) { return -ENOENT; } +static inline int +xchk_quotacheck(struct xfs_scrub *sc) +{ + return -ENOENT; +} #endif int xchk_fscounters(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c index cd91db4a5548..d716a432227b 100644 --- a/fs/xfs/scrub/stats.c +++ b/fs/xfs/scrub/stats.c @@ -77,6 +77,7 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = { [XFS_SCRUB_TYPE_GQUOTA] = "grpquota", [XFS_SCRUB_TYPE_PQUOTA] = "prjquota", [XFS_SCRUB_TYPE_FSCOUNTERS] = "fscounters", + [XFS_SCRUB_TYPE_QUOTACHECK] = "quotacheck", }; /* Format the scrub stats into a text buffer, similar to pcp style. */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 9aba60c61880..fedcebf90a42 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -15,6 +15,7 @@ #include <linux/tracepoint.h> #include "xfs_bit.h" +#include "xfs_quota_defs.h" struct xfs_scrub; struct xfile; @@ -65,6 +66,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_UQUOTA); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_GQUOTA); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PQUOTA); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS); +TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_QUOTACHECK); #define XFS_SCRUB_TYPE_STRINGS \ { XFS_SCRUB_TYPE_PROBE, "probe" }, \ @@ -91,7 +93,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS); { XFS_SCRUB_TYPE_UQUOTA, "usrquota" }, \ { XFS_SCRUB_TYPE_GQUOTA, "grpquota" }, \ { XFS_SCRUB_TYPE_PQUOTA, "prjquota" }, \ - { XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" } + { XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" }, \ + { XFS_SCRUB_TYPE_QUOTACHECK, "quotacheck" } #define XFS_SCRUB_FLAG_STRINGS \ { XFS_SCRUB_IFLAG_REPAIR, "repair" }, \ @@ -109,6 +112,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS); { XCHK_HAVE_FREEZE_PROT, "nofreeze" }, \ { XCHK_FSGATES_DRAIN, "fsgates_drain" }, \ { XCHK_NEED_DRAIN, "need_drain" }, \ + { XCHK_FSGATES_QUOTA, "fsgates_quota" }, \ { XREP_RESET_PERAG_RESV, "reset_perag_resv" }, \ { XREP_ALREADY_FIXED, "already_fixed" } @@ -397,6 +401,29 @@ DEFINE_SCRUB_DQITER_EVENT(xchk_dquot_iter_revalidate_bmap); DEFINE_SCRUB_DQITER_EVENT(xchk_dquot_iter_advance_bmap); DEFINE_SCRUB_DQITER_EVENT(xchk_dquot_iter_advance_incore); DEFINE_SCRUB_DQITER_EVENT(xchk_dquot_iter); + +TRACE_EVENT(xchk_qcheck_error, + TP_PROTO(struct xfs_scrub *sc, xfs_dqtype_t dqtype, xfs_dqid_t id, + void *ret_ip), + TP_ARGS(sc, dqtype, id, ret_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_dqtype_t, dqtype) + __field(xfs_dqid_t, id) + __field(void *, ret_ip) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->dqtype = dqtype; + __entry->id = id; + __entry->ret_ip = ret_ip; + ), + TP_printk("dev %d:%d dquot type %s id 0x%x ret_ip %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_symbolic(__entry->dqtype, XFS_DQTYPE_STRINGS), + __entry->id, + __entry->ret_ip) +); #endif /* CONFIG_XFS_QUOTA */ TRACE_EVENT(xchk_incomplete, @@ -1977,6 +2004,7 @@ DEFINE_EVENT(xrep_dquot_class, name, \ DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item); DEFINE_XREP_DQUOT_EVENT(xrep_disk_dquot); DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item_fill_bmap_hole); +DEFINE_XREP_DQUOT_EVENT(xrep_quotacheck_dquot); #endif /* CONFIG_XFS_QUOTA */ #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ diff --git a/fs/xfs/scrub/xfarray.h b/fs/xfs/scrub/xfarray.h index ec643cc9fc14..acb2f94c56c1 100644 --- a/fs/xfs/scrub/xfarray.h +++ b/fs/xfs/scrub/xfarray.h @@ -45,6 +45,25 @@ int xfarray_store(struct xfarray *array, xfarray_idx_t idx, const void *ptr); int xfarray_store_anywhere(struct xfarray *array, const void *ptr); bool xfarray_element_is_null(struct xfarray *array, const void *ptr); +/* + * Load an array element, but zero the buffer if there's no data because we + * haven't stored to that array element yet. + */ +static inline int +xfarray_load_sparse( + struct xfarray *array, + uint64_t idx, + void *rec) +{ + int error = xfarray_load(array, idx, rec); + + if (error == -ENODATA) { + memset(rec, 0, array->obj_size); + return 0; + } + return error; +} + /* Append an element to the array. */ static inline int xfarray_append(struct xfarray *array, const void *ptr) { diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index 9a57afee9338..ef07af9f753d 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -280,6 +280,7 @@ static const struct ioctl_sick_map fs_map[] = { { XFS_SICK_FS_UQUOTA, XFS_FSOP_GEOM_SICK_UQUOTA }, { XFS_SICK_FS_GQUOTA, XFS_FSOP_GEOM_SICK_GQUOTA }, { XFS_SICK_FS_PQUOTA, XFS_FSOP_GEOM_SICK_PQUOTA }, + { XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK }, { 0, 0 }, }; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 110077ca3d2a..d6635d219527 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -3755,3 +3755,19 @@ xfs_ifork_zapped( return false; } } + +/* Compute the number of data and realtime blocks used by a file. */ +void +xfs_inode_count_blocks( + struct xfs_trans *tp, + struct xfs_inode *ip, + xfs_filblks_t *dblocks, + xfs_filblks_t *rblocks) +{ + struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); + + *rblocks = 0; + if (XFS_IS_REALTIME_INODE(ip)) + xfs_bmap_count_leaves(ifp, rblocks); + *dblocks = ip->i_nblocks - *rblocks; +} diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 796d11065fe2..7bbdc7009e7d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -623,5 +623,7 @@ int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip) int xfs_inode_reload_unlinked(struct xfs_inode *ip); bool xfs_ifork_zapped(const struct xfs_inode *ip, int whichfork); +void xfs_inode_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, + xfs_filblks_t *dblocks, xfs_filblks_t *rblocks); #endif /* __XFS_INODE_H__ */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b5b555698ae1..991be1eacb6c 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -26,6 +26,7 @@ #include "xfs_ag.h" #include "xfs_ialloc.h" #include "xfs_log_priv.h" +#include "xfs_health.h" /* * The global quota manager. There is only one of these for the entire @@ -692,6 +693,9 @@ xfs_qm_init_quotainfo( shrinker_register(qinf->qi_shrinker); + xfs_hooks_init(&qinf->qi_mod_ino_dqtrx_hooks); + xfs_hooks_init(&qinf->qi_apply_dqtrx_hooks); + return 0; out_free_inos: @@ -1408,8 +1412,12 @@ error_return: xfs_warn(mp, "Quotacheck: Failed to reset quota flags."); } - } else + xfs_fs_mark_sick(mp, XFS_SICK_FS_QUOTACHECK); + } else { xfs_notice(mp, "Quotacheck: Done."); + xfs_fs_mark_healthy(mp, XFS_SICK_FS_QUOTACHECK); + } + return error; error_purge: @@ -1819,12 +1827,12 @@ xfs_qm_vop_chown( ASSERT(prevdq); ASSERT(prevdq != newdq); - xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_nblocks)); - xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1); + xfs_trans_mod_ino_dquot(tp, ip, prevdq, bfield, -(ip->i_nblocks)); + xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_ICOUNT, -1); /* the sparkling new dquot */ - xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_nblocks); - xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1); + xfs_trans_mod_ino_dquot(tp, ip, newdq, bfield, ip->i_nblocks); + xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_ICOUNT, 1); /* * Back when we made quota reservations for the chown, we reserved the @@ -1906,22 +1914,21 @@ xfs_qm_vop_create_dqattach( ASSERT(i_uid_read(VFS_I(ip)) == udqp->q_id); ip->i_udquot = xfs_qm_dqhold(udqp); - xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1); } if (gdqp && XFS_IS_GQUOTA_ON(mp)) { ASSERT(ip->i_gdquot == NULL); ASSERT(i_gid_read(VFS_I(ip)) == gdqp->q_id); ip->i_gdquot = xfs_qm_dqhold(gdqp); - xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); } if (pdqp && XFS_IS_PQUOTA_ON(mp)) { ASSERT(ip->i_pdquot == NULL); ASSERT(ip->i_projid == pdqp->q_id); ip->i_pdquot = xfs_qm_dqhold(pdqp); - xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1); } + + xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, 1); } /* Decide if this inode's dquot is near an enforcement boundary. */ diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index d5c9fc4ba591..f5993012bf98 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -68,6 +68,10 @@ struct xfs_quotainfo { /* Minimum and maximum quota expiration timestamp values. */ time64_t qi_expiry_min; time64_t qi_expiry_max; + + /* Hook to feed quota counter updates to an active online repair. */ + struct xfs_hooks qi_mod_ino_dqtrx_hooks; + struct xfs_hooks qi_apply_dqtrx_hooks; }; static inline struct radix_tree_root * @@ -104,6 +108,18 @@ xfs_quota_inode(struct xfs_mount *mp, xfs_dqtype_t type) return NULL; } +/* + * Parameters for tracking dqtrx changes on behalf of an inode. The hook + * function arg parameter is the field being updated. + */ +struct xfs_mod_ino_dqtrx_params { + uintptr_t tx_id; + xfs_ino_t ino; + xfs_dqtype_t q_type; + xfs_dqid_t q_id; + int64_t delta; +}; + extern void xfs_trans_mod_dquot(struct xfs_trans *tp, struct xfs_dquot *dqp, uint field, int64_t delta); extern void xfs_trans_dqjoin(struct xfs_trans *, struct xfs_dquot *); diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index b77673dd0558..271c1021c733 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -9,6 +9,7 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" +#include "xfs_mount.h" #include "xfs_quota.h" #include "xfs_mount.h" #include "xfs_inode.h" diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index e0d56489f3b2..85a4ae1a17f6 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -74,6 +74,22 @@ struct xfs_dqtrx { int64_t qt_icount_delta; /* dquot inode count changes */ }; +enum xfs_apply_dqtrx_type { + XFS_APPLY_DQTRX_COMMIT = 0, + XFS_APPLY_DQTRX_UNRESERVE, +}; + +/* + * Parameters for applying dqtrx changes to a dquot. The hook function arg + * parameter is enum xfs_apply_dqtrx_type. + */ +struct xfs_apply_dqtrx_params { + uintptr_t tx_id; + xfs_ino_t ino; + xfs_dqtype_t q_type; + xfs_dqid_t q_id; +}; + #ifdef CONFIG_XFS_QUOTA extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *); extern void xfs_trans_free_dqinfo(struct xfs_trans *); @@ -114,6 +130,30 @@ xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks) return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false); } bool xfs_inode_near_dquot_enforcement(struct xfs_inode *ip, xfs_dqtype_t type); + +# ifdef CONFIG_XFS_LIVE_HOOKS +void xfs_trans_mod_ino_dquot(struct xfs_trans *tp, struct xfs_inode *ip, + struct xfs_dquot *dqp, unsigned int field, int64_t delta); + +struct xfs_quotainfo; + +struct xfs_dqtrx_hook { + struct xfs_hook mod_hook; + struct xfs_hook apply_hook; +}; + +void xfs_dqtrx_hook_disable(void); +void xfs_dqtrx_hook_enable(void); + +int xfs_dqtrx_hook_add(struct xfs_quotainfo *qi, struct xfs_dqtrx_hook *hook); +void xfs_dqtrx_hook_del(struct xfs_quotainfo *qi, struct xfs_dqtrx_hook *hook); +void xfs_dqtrx_hook_setup(struct xfs_dqtrx_hook *hook, notifier_fn_t mod_fn, + notifier_fn_t apply_fn); +# else +# define xfs_trans_mod_ino_dquot(tp, ip, dqp, field, delta) \ + xfs_trans_mod_dquot((tp), (dqp), (field), (delta)) +# endif /* CONFIG_XFS_LIVE_HOOKS */ + #else static inline int xfs_qm_vop_dqalloc(struct xfs_inode *ip, kuid_t kuid, kgid_t kgid, @@ -173,6 +213,12 @@ xfs_trans_reserve_quota_icreate(struct xfs_trans *tp, struct xfs_dquot *udqp, #define xfs_qm_unmount(mp) #define xfs_qm_unmount_quotas(mp) #define xfs_inode_near_dquot_enforcement(ip, type) (false) + +# ifdef CONFIG_XFS_LIVE_HOOKS +# define xfs_dqtrx_hook_enable() ((void)0) +# define xfs_dqtrx_hook_disable() ((void)0) +# endif /* CONFIG_XFS_LIVE_HOOKS */ + #endif /* CONFIG_XFS_QUOTA */ static inline int diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 9c159d016ecf..577b535a595c 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -17,6 +17,7 @@ #include "xfs_qm.h" #include "xfs_trace.h" #include "xfs_error.h" +#include "xfs_health.h" STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *); @@ -120,6 +121,116 @@ xfs_trans_dup_dqinfo( } } +#ifdef CONFIG_XFS_LIVE_HOOKS +/* + * Use a static key here to reduce the overhead of quota live updates. If the + * compiler supports jump labels, the static branch will be replaced by a nop + * sled when there are no hook users. Online fsck is currently the only + * caller, so this is a reasonable tradeoff. + * + * Note: Patching the kernel code requires taking the cpu hotplug lock. Other + * parts of the kernel allocate memory with that lock held, which means that + * XFS callers cannot hold any locks that might be used by memory reclaim or + * writeback when calling the static_branch_{inc,dec} functions. + */ +DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dqtrx_hooks_switch); + +void +xfs_dqtrx_hook_disable(void) +{ + xfs_hooks_switch_off(&xfs_dqtrx_hooks_switch); +} + +void +xfs_dqtrx_hook_enable(void) +{ + xfs_hooks_switch_on(&xfs_dqtrx_hooks_switch); +} + +/* Schedule a transactional dquot update on behalf of an inode. */ +void +xfs_trans_mod_ino_dquot( + struct xfs_trans *tp, + struct xfs_inode *ip, + struct xfs_dquot *dqp, + unsigned int field, + int64_t delta) +{ + xfs_trans_mod_dquot(tp, dqp, field, delta); + + if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) { + struct xfs_mod_ino_dqtrx_params p = { + .tx_id = (uintptr_t)tp, + .ino = ip->i_ino, + .q_type = xfs_dquot_type(dqp), + .q_id = dqp->q_id, + .delta = delta + }; + struct xfs_quotainfo *qi = tp->t_mountp->m_quotainfo; + + xfs_hooks_call(&qi->qi_mod_ino_dqtrx_hooks, field, &p); + } +} + +/* Call the specified functions during a dquot counter update. */ +int +xfs_dqtrx_hook_add( + struct xfs_quotainfo *qi, + struct xfs_dqtrx_hook *hook) +{ + int error; + + /* + * Transactional dquot updates first call the mod hook when changes + * are attached to the transaction and then call the apply hook when + * those changes are committed (or canceled). + * + * The apply hook must be installed before the mod hook so that we + * never fail to catch the end of a quota update sequence. + */ + error = xfs_hooks_add(&qi->qi_apply_dqtrx_hooks, &hook->apply_hook); + if (error) + goto out; + + error = xfs_hooks_add(&qi->qi_mod_ino_dqtrx_hooks, &hook->mod_hook); + if (error) + goto out_apply; + + return 0; + +out_apply: + xfs_hooks_del(&qi->qi_apply_dqtrx_hooks, &hook->apply_hook); +out: + return error; +} + +/* Stop calling the specified function during a dquot counter update. */ +void +xfs_dqtrx_hook_del( + struct xfs_quotainfo *qi, + struct xfs_dqtrx_hook *hook) +{ + /* + * The mod hook must be removed before apply hook to avoid giving the + * hook consumer with an incomplete update. No hooks should be running + * after these functions return. + */ + xfs_hooks_del(&qi->qi_mod_ino_dqtrx_hooks, &hook->mod_hook); + xfs_hooks_del(&qi->qi_apply_dqtrx_hooks, &hook->apply_hook); +} + +/* Configure dquot update hook functions. */ +void +xfs_dqtrx_hook_setup( + struct xfs_dqtrx_hook *hook, + notifier_fn_t mod_fn, + notifier_fn_t apply_fn) +{ + xfs_hook_setup(&hook->mod_hook, mod_fn); + xfs_hook_setup(&hook->apply_hook, apply_fn); +} +#endif /* CONFIG_XFS_LIVE_HOOKS */ + /* * Wrap around mod_dquot to account for both user and group quotas. */ @@ -137,11 +248,11 @@ xfs_trans_mod_dquot_byino( return; if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) - (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); + xfs_trans_mod_ino_dquot(tp, ip, ip->i_udquot, field, delta); if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) - (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta); + xfs_trans_mod_ino_dquot(tp, ip, ip->i_gdquot, field, delta); if (XFS_IS_PQUOTA_ON(mp) && ip->i_pdquot) - (void) xfs_trans_mod_dquot(tp, ip->i_pdquot, field, delta); + xfs_trans_mod_ino_dquot(tp, ip, ip->i_pdquot, field, delta); } STATIC struct xfs_dqtrx * @@ -321,6 +432,29 @@ xfs_apply_quota_reservation_deltas( } } +#ifdef CONFIG_XFS_LIVE_HOOKS +/* Call downstream hooks now that it's time to apply dquot deltas. */ +static inline void +xfs_trans_apply_dquot_deltas_hook( + struct xfs_trans *tp, + struct xfs_dquot *dqp) +{ + if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) { + struct xfs_apply_dqtrx_params p = { + .tx_id = (uintptr_t)tp, + .q_type = xfs_dquot_type(dqp), + .q_id = dqp->q_id, + }; + struct xfs_quotainfo *qi = tp->t_mountp->m_quotainfo; + + xfs_hooks_call(&qi->qi_apply_dqtrx_hooks, + XFS_APPLY_DQTRX_COMMIT, &p); + } +} +#else +# define xfs_trans_apply_dquot_deltas_hook(tp, dqp) ((void)0) +#endif /* CONFIG_XFS_LIVE_HOOKS */ + /* * Called by xfs_trans_commit() and similar in spirit to * xfs_trans_apply_sb_deltas(). @@ -366,6 +500,8 @@ xfs_trans_apply_dquot_deltas( ASSERT(XFS_DQ_IS_LOCKED(dqp)); + xfs_trans_apply_dquot_deltas_hook(tp, dqp); + /* * adjust the actual number of blocks used */ @@ -465,6 +601,29 @@ xfs_trans_apply_dquot_deltas( } } +#ifdef CONFIG_XFS_LIVE_HOOKS +/* Call downstream hooks now that it's time to cancel dquot deltas. */ +static inline void +xfs_trans_unreserve_and_mod_dquots_hook( + struct xfs_trans *tp, + struct xfs_dquot *dqp) +{ + if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) { + struct xfs_apply_dqtrx_params p = { + .tx_id = (uintptr_t)tp, + .q_type = xfs_dquot_type(dqp), + .q_id = dqp->q_id, + }; + struct xfs_quotainfo *qi = tp->t_mountp->m_quotainfo; + + xfs_hooks_call(&qi->qi_apply_dqtrx_hooks, + XFS_APPLY_DQTRX_UNRESERVE, &p); + } +} +#else +# define xfs_trans_unreserve_and_mod_dquots_hook(tp, dqp) ((void)0) +#endif /* CONFIG_XFS_LIVE_HOOKS */ + /* * Release the reservations, and adjust the dquots accordingly. * This is called only when the transaction is being aborted. If by @@ -495,6 +654,9 @@ xfs_trans_unreserve_and_mod_dquots( */ if ((dqp = qtrx->qt_dquot) == NULL) break; + + xfs_trans_unreserve_and_mod_dquots_hook(tp, dqp); + /* * Unreserve the original reservation. We don't care * about the number of blocks used field, or deltas. @@ -706,6 +868,7 @@ error_return: error_corrupt: xfs_dqunlock(dqp); xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + xfs_fs_mark_sick(mp, XFS_SICK_FS_QUOTACHECK); return -EFSCORRUPTED; } |