aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/Makefile2
-rw-r--r--fs/xfs/libxfs/xfs_fs.h4
-rw-r--r--fs/xfs/libxfs/xfs_health.h4
-rw-r--r--fs/xfs/scrub/common.c49
-rw-r--r--fs/xfs/scrub/common.h11
-rw-r--r--fs/xfs/scrub/fscounters.c2
-rw-r--r--fs/xfs/scrub/health.c1
-rw-r--r--fs/xfs/scrub/quotacheck.c867
-rw-r--r--fs/xfs/scrub/quotacheck.h76
-rw-r--r--fs/xfs/scrub/quotacheck_repair.c261
-rw-r--r--fs/xfs/scrub/repair.c46
-rw-r--r--fs/xfs/scrub/repair.h5
-rw-r--r--fs/xfs/scrub/scrub.c9
-rw-r--r--fs/xfs/scrub/scrub.h10
-rw-r--r--fs/xfs/scrub/stats.c1
-rw-r--r--fs/xfs/scrub/trace.h30
-rw-r--r--fs/xfs/scrub/xfarray.h19
-rw-r--r--fs/xfs/xfs_health.c1
-rw-r--r--fs/xfs/xfs_inode.c16
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_qm.c23
-rw-r--r--fs/xfs/xfs_qm.h16
-rw-r--r--fs/xfs/xfs_qm_bhv.c1
-rw-r--r--fs/xfs/xfs_quota.h46
-rw-r--r--fs/xfs/xfs_trans_dquot.c169
25 files changed, 1645 insertions, 26 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 4597c0f19e8e..68891e6ee08e 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -180,6 +180,7 @@ xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
dqiterate.o \
quota.o \
+ quotacheck.o \
)
# online repair
@@ -203,6 +204,7 @@ xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
quota_repair.o \
+ quotacheck_repair.o \
)
endif
endif
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 6360073865db..07acbed9235c 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -195,6 +195,7 @@ struct xfs_fsop_geom {
#define XFS_FSOP_GEOM_SICK_PQUOTA (1 << 3) /* project quota */
#define XFS_FSOP_GEOM_SICK_RT_BITMAP (1 << 4) /* realtime bitmap */
#define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */
+#define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */
/* Output for XFS_FS_COUNTS */
typedef struct xfs_fsop_counts {
@@ -709,9 +710,10 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_GQUOTA 22 /* group quotas */
#define XFS_SCRUB_TYPE_PQUOTA 23 /* project quotas */
#define XFS_SCRUB_TYPE_FSCOUNTERS 24 /* fs summary counters */
+#define XFS_SCRUB_TYPE_QUOTACHECK 25 /* quota counters */
/* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR 25
+#define XFS_SCRUB_TYPE_NR 26
/* i: Repair this metadata. */
#define XFS_SCRUB_IFLAG_REPAIR (1u << 0)
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index 6296993ff8f3..5626e53b3f0f 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -41,6 +41,7 @@ struct xfs_fsop_geom;
#define XFS_SICK_FS_UQUOTA (1 << 1) /* user quota */
#define XFS_SICK_FS_GQUOTA (1 << 2) /* group quota */
#define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */
+#define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */
/* Observable health issues for realtime volume metadata. */
#define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */
@@ -77,7 +78,8 @@ struct xfs_fsop_geom;
#define XFS_SICK_FS_PRIMARY (XFS_SICK_FS_COUNTERS | \
XFS_SICK_FS_UQUOTA | \
XFS_SICK_FS_GQUOTA | \
- XFS_SICK_FS_PQUOTA)
+ XFS_SICK_FS_PQUOTA | \
+ XFS_SICK_FS_QUOTACHECK)
#define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \
XFS_SICK_RT_SUMMARY)
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 81f2b96bb5a7..c5a6c47d3df2 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -29,6 +29,8 @@
#include "xfs_attr.h"
#include "xfs_reflink.h"
#include "xfs_ag.h"
+#include "xfs_error.h"
+#include "xfs_quota.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -82,6 +84,15 @@ __xchk_process_error(
sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
sc->sm, *error);
break;
+ case -ECANCELED:
+ /*
+ * ECANCELED here means that the caller set one of the scrub
+ * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
+ * quickly. Set error to zero and do not continue.
+ */
+ trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
+ *error = 0;
+ break;
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
@@ -89,8 +100,7 @@ __xchk_process_error(
*error = 0;
fallthrough;
default:
- trace_xchk_op_error(sc, agno, bno, *error,
- ret_ip);
+ trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
break;
}
return false;
@@ -136,6 +146,16 @@ __xchk_fblock_process_error(
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
break;
+ case -ECANCELED:
+ /*
+ * ECANCELED here means that the caller set one of the scrub
+ * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
+ * quickly. Set error to zero and do not continue.
+ */
+ trace_xchk_file_op_error(sc, whichfork, offset, *error,
+ ret_ip);
+ *error = 0;
+ break;
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
@@ -227,6 +247,19 @@ xchk_block_set_corrupt(
trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
}
+#ifdef CONFIG_XFS_QUOTA
+/* Record a corrupt quota counter. */
+void
+xchk_qcheck_set_corrupt(
+ struct xfs_scrub *sc,
+ unsigned int dqtype,
+ xfs_dqid_t id)
+{
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ trace_xchk_qcheck_error(sc, dqtype, id, __return_address);
+}
+#endif
+
/* Record a corruption while cross-referencing. */
void
xchk_block_xref_set_corrupt(
@@ -653,6 +686,13 @@ xchk_trans_cancel(
sc->tp = NULL;
}
+int
+xchk_trans_alloc_empty(
+ struct xfs_scrub *sc)
+{
+ return xfs_trans_alloc_empty(sc->mp, &sc->tp);
+}
+
/*
* Grab an empty transaction so that we can re-grab locked buffers if
* one of our btrees turns out to be cyclic.
@@ -672,7 +712,7 @@ xchk_trans_alloc(
return xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
resblks, 0, 0, &sc->tp);
- return xfs_trans_alloc_empty(sc->mp, &sc->tp);
+ return xchk_trans_alloc_empty(sc);
}
/* Set us up with a transaction and an empty context. */
@@ -1259,6 +1299,9 @@ xchk_fsgates_enable(
if (scrub_fsgates & XCHK_FSGATES_DRAIN)
xfs_drain_wait_enable();
+ if (scrub_fsgates & XCHK_FSGATES_QUOTA)
+ xfs_dqtrx_hook_enable();
+
sc->flags |= scrub_fsgates;
}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index da09580b454a..eb51037cd0d2 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -32,6 +32,7 @@ xchk_should_terminate(
}
int xchk_trans_alloc(struct xfs_scrub *sc, uint resblks);
+int xchk_trans_alloc_empty(struct xfs_scrub *sc);
void xchk_trans_cancel(struct xfs_scrub *sc);
bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno,
@@ -54,6 +55,10 @@ void xchk_block_set_corrupt(struct xfs_scrub *sc,
void xchk_ino_set_corrupt(struct xfs_scrub *sc, xfs_ino_t ino);
void xchk_fblock_set_corrupt(struct xfs_scrub *sc, int whichfork,
xfs_fileoff_t offset);
+#ifdef CONFIG_XFS_QUOTA
+void xchk_qcheck_set_corrupt(struct xfs_scrub *sc, unsigned int dqtype,
+ xfs_dqid_t id);
+#endif
void xchk_block_xref_set_corrupt(struct xfs_scrub *sc,
struct xfs_buf *bp);
@@ -105,6 +110,7 @@ xchk_setup_rtsummary(struct xfs_scrub *sc)
#ifdef CONFIG_XFS_QUOTA
int xchk_ino_dqattach(struct xfs_scrub *sc);
int xchk_setup_quota(struct xfs_scrub *sc);
+int xchk_setup_quotacheck(struct xfs_scrub *sc);
#else
static inline int
xchk_ino_dqattach(struct xfs_scrub *sc)
@@ -116,6 +122,11 @@ xchk_setup_quota(struct xfs_scrub *sc)
{
return -ENOENT;
}
+static inline int
+xchk_setup_quotacheck(struct xfs_scrub *sc)
+{
+ return -ENOENT;
+}
#endif
int xchk_setup_fscounters(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 5799e9a94f1f..893c5a6e3ddb 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -242,7 +242,7 @@ xchk_setup_fscounters(
return error;
}
- return xfs_trans_alloc_empty(sc->mp, &sc->tp);
+ return xchk_trans_alloc_empty(sc);
}
/*
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index 531006910ca9..3c9eac070796 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -105,6 +105,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_GQUOTA] = { XHG_FS, XFS_SICK_FS_GQUOTA },
[XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA },
[XFS_SCRUB_TYPE_FSCOUNTERS] = { XHG_FS, XFS_SICK_FS_COUNTERS },
+ [XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK },
};
/* Return the health status mask for this scrub type. */
diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c
new file mode 100644
index 000000000000..c77eb2de8df7
--- /dev/null
+++ b/fs/xfs/scrub/quotacheck.c
@@ -0,0 +1,867 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_qm.h"
+#include "xfs_icache.h"
+#include "xfs_bmap_util.h"
+#include "xfs_ialloc.h"
+#include "xfs_ag.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/repair.h"
+#include "scrub/xfile.h"
+#include "scrub/xfarray.h"
+#include "scrub/iscan.h"
+#include "scrub/quota.h"
+#include "scrub/quotacheck.h"
+#include "scrub/trace.h"
+
+/*
+ * Live Quotacheck
+ * ===============
+ *
+ * Quota counters are "summary" metadata, in the sense that they are computed
+ * as the summation of the block usage counts for every file on the filesystem.
+ * Therefore, we compute the correct icount, bcount, and rtbcount values by
+ * creating a shadow quota counter structure and walking every inode.
+ */
+
+/* Track the quota deltas for a dquot in a transaction. */
+struct xqcheck_dqtrx {
+ xfs_dqtype_t q_type;
+ xfs_dqid_t q_id;
+
+ int64_t icount_delta;
+
+ int64_t bcount_delta;
+ int64_t delbcnt_delta;
+
+ int64_t rtbcount_delta;
+ int64_t delrtb_delta;
+};
+
+#define XQCHECK_MAX_NR_DQTRXS (XFS_QM_TRANS_DQTYPES * XFS_QM_TRANS_MAXDQS)
+
+/*
+ * Track the quota deltas for all dquots attached to a transaction if the
+ * quota deltas are being applied to an inode that we already scanned.
+ */
+struct xqcheck_dqacct {
+ struct rhash_head hash;
+ uintptr_t tx_id;
+ struct xqcheck_dqtrx dqtrx[XQCHECK_MAX_NR_DQTRXS];
+ unsigned int refcount;
+};
+
+/* Free a shadow dquot accounting structure. */
+static void
+xqcheck_dqacct_free(
+ void *ptr,
+ void *arg)
+{
+ struct xqcheck_dqacct *dqa = ptr;
+
+ kfree(dqa);
+}
+
+/* Set us up to scrub quota counters. */
+int
+xchk_setup_quotacheck(
+ struct xfs_scrub *sc)
+{
+ if (!XFS_IS_QUOTA_ON(sc->mp))
+ return -ENOENT;
+
+ xchk_fsgates_enable(sc, XCHK_FSGATES_QUOTA);
+
+ sc->buf = kzalloc(sizeof(struct xqcheck), XCHK_GFP_FLAGS);
+ if (!sc->buf)
+ return -ENOMEM;
+
+ return xchk_setup_fs(sc);
+}
+
+/*
+ * Part 1: Collecting dquot resource usage counts. For each xfs_dquot attached
+ * to each inode, we create a shadow dquot, and compute the inode count and add
+ * the data/rt block usage from what we see.
+ *
+ * To avoid false corruption reports in part 2, any failure in this part must
+ * set the INCOMPLETE flag even when a negative errno is returned. This care
+ * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
+ * ECANCELED) that are absorbed into a scrub state flag update by
+ * xchk_*_process_error. Scrub and repair share the same incore data
+ * structures, so the INCOMPLETE flag is critical to prevent a repair based on
+ * insufficient information.
+ *
+ * Because we are scanning a live filesystem, it's possible that another thread
+ * will try to update the quota counters for an inode that we've already
+ * scanned. This will cause our counts to be incorrect. Therefore, we hook
+ * the live transaction code in two places: (1) when the callers update the
+ * per-transaction dqtrx structure to log quota counter updates; and (2) when
+ * transaction commit actually logs those updates to the incore dquot. By
+ * shadowing transaction updates in this manner, live quotacheck can ensure
+ * by locking the dquot and the shadow structure that its own copies are not
+ * out of date. Because the hook code runs in a different process context from
+ * the scrub code and the scrub state flags are not accessed atomically,
+ * failures in the hook code must abort the iscan and the scrubber must notice
+ * the aborted scan and set the incomplete flag.
+ *
+ * Note that we use srcu notifier hooks to minimize the overhead when live
+ * quotacheck is /not/ running.
+ */
+
+/* Update an incore dquot counter information from a live update. */
+static int
+xqcheck_update_incore_counts(
+ struct xqcheck *xqc,
+ struct xfarray *counts,
+ xfs_dqid_t id,
+ int64_t inodes,
+ int64_t nblks,
+ int64_t rtblks)
+{
+ struct xqcheck_dquot xcdq;
+ int error;
+
+ error = xfarray_load_sparse(counts, id, &xcdq);
+ if (error)
+ return error;
+
+ xcdq.flags |= XQCHECK_DQUOT_WRITTEN;
+ xcdq.icount += inodes;
+ xcdq.bcount += nblks;
+ xcdq.rtbcount += rtblks;
+
+ error = xfarray_store(counts, id, &xcdq);
+ if (error == -EFBIG) {
+ /*
+ * EFBIG means we tried to store data at too high a byte offset
+ * in the sparse array. IOWs, we cannot complete the check and
+ * must notify userspace that the check was incomplete.
+ */
+ error = -ECANCELED;
+ }
+ return error;
+}
+
+/* Decide if this is the shadow dquot accounting structure for a transaction. */
+static int
+xqcheck_dqacct_obj_cmpfn(
+ struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const uintptr_t *tx_idp = arg->key;
+ const struct xqcheck_dqacct *dqa = obj;
+
+ if (dqa->tx_id != *tx_idp)
+ return 1;
+ return 0;
+}
+
+static const struct rhashtable_params xqcheck_dqacct_hash_params = {
+ .min_size = 32,
+ .key_len = sizeof(uintptr_t),
+ .key_offset = offsetof(struct xqcheck_dqacct, tx_id),
+ .head_offset = offsetof(struct xqcheck_dqacct, hash),
+ .automatic_shrinking = true,
+ .obj_cmpfn = xqcheck_dqacct_obj_cmpfn,
+};
+
+/* Find a shadow dqtrx slot for the given dquot. */
+STATIC struct xqcheck_dqtrx *
+xqcheck_get_dqtrx(
+ struct xqcheck_dqacct *dqa,
+ xfs_dqtype_t q_type,
+ xfs_dqid_t q_id)
+{
+ int i;
+
+ for (i = 0; i < XQCHECK_MAX_NR_DQTRXS; i++) {
+ if (dqa->dqtrx[i].q_type == 0 ||
+ (dqa->dqtrx[i].q_type == q_type &&
+ dqa->dqtrx[i].q_id == q_id))
+ return &dqa->dqtrx[i];
+ }
+
+ return NULL;
+}
+
+/*
+ * Create and fill out a quota delta tracking structure to shadow the updates
+ * going on in the regular quota code.
+ */
+static int
+xqcheck_mod_live_ino_dqtrx(
+ struct notifier_block *nb,
+ unsigned long action,
+ void *data)
+{
+ struct xfs_mod_ino_dqtrx_params *p = data;
+ struct xqcheck *xqc;
+ struct xqcheck_dqacct *dqa;
+ struct xqcheck_dqtrx *dqtrx;
+ int error;
+
+ xqc = container_of(nb, struct xqcheck, qhook.mod_hook.nb);
+
+ /* Skip quota reservation fields. */
+ switch (action) {
+ case XFS_TRANS_DQ_BCOUNT:
+ case XFS_TRANS_DQ_DELBCOUNT:
+ case XFS_TRANS_DQ_ICOUNT:
+ case XFS_TRANS_DQ_RTBCOUNT:
+ case XFS_TRANS_DQ_DELRTBCOUNT:
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ /* Ignore dqtrx updates for quota types we don't care about. */
+ switch (p->q_type) {
+ case XFS_DQTYPE_USER:
+ if (!xqc->ucounts)
+ return NOTIFY_DONE;
+ break;
+ case XFS_DQTYPE_GROUP:
+ if (!xqc->gcounts)
+ return NOTIFY_DONE;
+ break;
+ case XFS_DQTYPE_PROJ:
+ if (!xqc->pcounts)
+ return NOTIFY_DONE;
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ /* Skip inodes that haven't been scanned yet. */
+ if (!xchk_iscan_want_live_update(&xqc->iscan, p->ino))
+ return NOTIFY_DONE;
+
+ /* Make a shadow quota accounting tracker for this transaction. */
+ mutex_lock(&xqc->lock);
+ dqa = rhashtable_lookup_fast(&xqc->shadow_dquot_acct, &p->tx_id,
+ xqcheck_dqacct_hash_params);
+ if (!dqa) {
+ dqa = kzalloc(sizeof(struct xqcheck_dqacct), XCHK_GFP_FLAGS);
+ if (!dqa)
+ goto out_abort;
+
+ dqa->tx_id = p->tx_id;
+ error = rhashtable_insert_fast(&xqc->shadow_dquot_acct,
+ &dqa->hash, xqcheck_dqacct_hash_params);
+ if (error)
+ goto out_abort;
+ }
+
+ /* Find the shadow dqtrx (or an empty slot) here. */
+ dqtrx = xqcheck_get_dqtrx(dqa, p->q_type, p->q_id);
+ if (!dqtrx)
+ goto out_abort;
+ if (dqtrx->q_type == 0) {
+ dqtrx->q_type = p->q_type;
+ dqtrx->q_id = p->q_id;
+ dqa->refcount++;
+ }
+
+ /* Update counter */
+ switch (action) {
+ case XFS_TRANS_DQ_BCOUNT:
+ dqtrx->bcount_delta += p->delta;
+ break;
+ case XFS_TRANS_DQ_DELBCOUNT:
+ dqtrx->delbcnt_delta += p->delta;
+ break;
+ case XFS_TRANS_DQ_ICOUNT:
+ dqtrx->icount_delta += p->delta;
+ break;
+ case XFS_TRANS_DQ_RTBCOUNT:
+ dqtrx->rtbcount_delta += p->delta;
+ break;
+ case XFS_TRANS_DQ_DELRTBCOUNT:
+ dqtrx->delrtb_delta += p->delta;
+ break;
+ }
+
+ mutex_unlock(&xqc->lock);
+ return NOTIFY_DONE;
+
+out_abort:
+ xchk_iscan_abort(&xqc->iscan);
+ mutex_unlock(&xqc->lock);
+ return NOTIFY_DONE;
+}
+
+/*
+ * Apply the transaction quota deltas to our shadow quota accounting info when
+ * the regular quota code are doing the same.
+ */
+static int
+xqcheck_apply_live_dqtrx(
+ struct notifier_block *nb,
+ unsigned long action,
+ void *data)
+{
+ struct xfs_apply_dqtrx_params *p = data;
+ struct xqcheck *xqc;
+ struct xqcheck_dqacct *dqa;
+ struct xqcheck_dqtrx *dqtrx;
+ struct xfarray *counts;
+ int error;
+
+ xqc = container_of(nb, struct xqcheck, qhook.apply_hook.nb);
+
+ /* Map the dquot type to an incore counter object. */
+ switch (p->q_type) {
+ case XFS_DQTYPE_USER:
+ counts = xqc->ucounts;
+ break;
+ case XFS_DQTYPE_GROUP:
+ counts = xqc->gcounts;
+ break;
+ case XFS_DQTYPE_PROJ:
+ counts = xqc->pcounts;
+ break;
+ default:
+ return NOTIFY_DONE;
+ }
+
+ if (xchk_iscan_aborted(&xqc->iscan) || counts == NULL)
+ return NOTIFY_DONE;
+
+ /*
+ * Find the shadow dqtrx for this transaction and dquot, if any deltas
+ * need to be applied here. If not, we're finished early.
+ */
+ mutex_lock(&xqc->lock);
+ dqa = rhashtable_lookup_fast(&xqc->shadow_dquot_acct, &p->tx_id,
+ xqcheck_dqacct_hash_params);
+ if (!dqa)
+ goto out_unlock;
+ dqtrx = xqcheck_get_dqtrx(dqa, p->q_type, p->q_id);
+ if (!dqtrx || dqtrx->q_type == 0)
+ goto out_unlock;
+
+ /* Update our shadow dquot if we're committing. */
+ if (action == XFS_APPLY_DQTRX_COMMIT) {
+ error = xqcheck_update_incore_counts(xqc, counts, p->q_id,
+ dqtrx->icount_delta,
+ dqtrx->bcount_delta + dqtrx->delbcnt_delta,
+ dqtrx->rtbcount_delta + dqtrx->delrtb_delta);
+ if (error)
+ goto out_abort;
+ }
+
+ /* Free the shadow accounting structure if that was the last user. */
+ dqa->refcount--;
+ if (dqa->refcount == 0) {
+ error = rhashtable_remove_fast(&xqc->shadow_dquot_acct,
+ &dqa->hash, xqcheck_dqacct_hash_params);
+ if (error)
+ goto out_abort;
+ xqcheck_dqacct_free(dqa, NULL);
+ }
+
+ mutex_unlock(&xqc->lock);
+ return NOTIFY_DONE;
+
+out_abort:
+ xchk_iscan_abort(&xqc->iscan);
+out_unlock:
+ mutex_unlock(&xqc->lock);
+ return NOTIFY_DONE;
+}
+
+/* Record this inode's quota usage in our shadow quota counter data. */
+STATIC int
+xqcheck_collect_inode(
+ struct xqcheck *xqc,
+ struct xfs_inode *ip)
+{
+ struct xfs_trans *tp = xqc->sc->tp;
+ xfs_filblks_t nblks, rtblks;
+ uint ilock_flags = 0;
+ xfs_dqid_t id;
+ bool isreg = S_ISREG(VFS_I(ip)->i_mode);
+ int error = 0;
+
+ if (xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) {
+ /*
+ * Quota files are never counted towards quota, so we do not
+ * need to take the lock.
+ */
+ xchk_iscan_mark_visited(&xqc->iscan, ip);
+ return 0;
+ }
+
+ /* Figure out the data / rt device block counts. */
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ if (isreg)
+ xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ /*
+ * Read in the data fork for rt files so that _count_blocks
+ * can count the number of blocks allocated from the rt volume.
+ * Inodes do not track that separately.
+ */
+ ilock_flags = xfs_ilock_data_map_shared(ip);
+ error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
+ if (error)
+ goto out_abort;
+ } else {
+ ilock_flags = XFS_ILOCK_SHARED;
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
+ }
+ xfs_inode_count_blocks(tp, ip, &nblks, &rtblks);
+
+ if (xchk_iscan_aborted(&xqc->iscan)) {
+ error = -ECANCELED;
+ goto out_incomplete;
+ }
+
+ /* Update the shadow dquot counters. */
+ mutex_lock(&xqc->lock);
+ if (xqc->ucounts) {
+ id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_USER);
+ error = xqcheck_update_incore_counts(xqc, xqc->ucounts, id, 1,
+ nblks, rtblks);
+ if (error)
+ goto out_mutex;
+ }
+
+ if (xqc->gcounts) {
+ id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_GROUP);
+ error = xqcheck_update_incore_counts(xqc, xqc->gcounts, id, 1,
+ nblks, rtblks);
+ if (error)
+ goto out_mutex;
+ }
+
+ if (xqc->pcounts) {
+ id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_PROJ);
+ error = xqcheck_update_incore_counts(xqc, xqc->pcounts, id, 1,
+ nblks, rtblks);
+ if (error)
+ goto out_mutex;
+ }
+ mutex_unlock(&xqc->lock);
+
+ xchk_iscan_mark_visited(&xqc->iscan, ip);
+ goto out_ilock;
+
+out_mutex:
+ mutex_unlock(&xqc->lock);
+out_abort:
+ xchk_iscan_abort(&xqc->iscan);
+out_incomplete:
+ xchk_set_incomplete(xqc->sc);
+out_ilock:
+ xfs_iunlock(ip, ilock_flags);
+ if (isreg)
+ xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
+ xfs_iunlock(ip, XFS_IOLOCK_SHARED);
+ return error;
+}
+
+/* Walk all the allocated inodes and run a quota scan on them. */
+STATIC int
+xqcheck_collect_counts(
+ struct xqcheck *xqc)
+{
+ struct xfs_scrub *sc = xqc->sc;
+ struct xfs_inode *ip;
+ int error;
+
+ /*
+ * Set up for a potentially lengthy filesystem scan by reducing our
+ * transaction resource usage for the duration. Specifically:
+ *
+ * Cancel the transaction to release the log grant space while we scan
+ * the filesystem.
+ *
+ * Create a new empty transaction to eliminate the possibility of the
+ * inode scan deadlocking on cyclical metadata.
+ *
+ * We pass the empty transaction to the file scanning function to avoid
+ * repeatedly cycling empty transactions. This can be done without
+ * risk of deadlock between sb_internal and the IOLOCK (we take the
+ * IOLOCK to quiesce the file before scanning) because empty
+ * transactions do not take sb_internal.
+ */
+ xchk_trans_cancel(sc);
+ error = xchk_trans_alloc_empty(sc);
+ if (error)
+ return error;
+
+ while ((error = xchk_iscan_iter(&xqc->iscan, &ip)) == 1) {
+ error = xqcheck_collect_inode(xqc, ip);
+ xchk_irele(sc, ip);
+ if (error)
+ break;
+
+ if (xchk_should_terminate(sc, &error))
+ break;
+ }
+ xchk_iscan_iter_finish(&xqc->iscan);
+ if (error) {
+ xchk_set_incomplete(sc);
+ /*
+ * If we couldn't grab an inode that was busy with a state
+ * change, change the error code so that we exit to userspace
+ * as quickly as possible.
+ */
+ if (error == -EBUSY)
+ return -ECANCELED;
+ return error;
+ }
+
+ /*
+ * Switch out for a real transaction in preparation for building a new
+ * tree.
+ */
+ xchk_trans_cancel(sc);
+ return xchk_setup_fs(sc);
+}
+
+/*
+ * Part 2: Comparing dquot resource counters. Walk each xfs_dquot, comparing
+ * the resource usage counters against our shadow dquots; and then walk each
+ * shadow dquot (that wasn't covered in the first part), comparing it against
+ * the xfs_dquot.
+ */
+
+/*
+ * Check the dquot data against what we observed. Caller must hold the dquot
+ * lock.
+ */
+STATIC int
+xqcheck_compare_dquot(
+ struct xqcheck *xqc,
+ xfs_dqtype_t dqtype,
+ struct xfs_dquot *dq)
+{
+ struct xqcheck_dquot xcdq;
+ struct xfarray *counts = xqcheck_counters_for(xqc, dqtype);
+ int error;
+
+ if (xchk_iscan_aborted(&xqc->iscan)) {
+ xchk_set_incomplete(xqc->sc);
+ return -ECANCELED;
+ }
+
+ mutex_lock(&xqc->lock);
+ error = xfarray_load_sparse(counts, dq->q_id, &xcdq);
+ if (error)
+ goto out_unlock;
+
+ if (xcdq.icount != dq->q_ino.count)
+ xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id);
+
+ if (xcdq.bcount != dq->q_blk.count)
+ xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id);
+
+ if (xcdq.rtbcount != dq->q_rtb.count)
+ xchk_qcheck_set_corrupt(xqc->sc, dqtype, dq->q_id);
+
+ xcdq.flags |= (XQCHECK_DQUOT_COMPARE_SCANNED | XQCHECK_DQUOT_WRITTEN);
+ error = xfarray_store(counts, dq->q_id, &xcdq);
+ if (error == -EFBIG) {
+ /*
+ * EFBIG means we tried to store data at too high a byte offset
+ * in the sparse array. IOWs, we cannot complete the check and
+ * must notify userspace that the check was incomplete. This
+ * should never happen outside of the collection phase.
+ */
+ xchk_set_incomplete(xqc->sc);
+ error = -ECANCELED;
+ }
+ mutex_unlock(&xqc->lock);
+ if (error)
+ return error;
+
+ if (xqc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return -ECANCELED;
+
+ return 0;
+
+out_unlock:
+ mutex_unlock(&xqc->lock);
+ return error;
+}
+
+/*
+ * Walk all the observed dquots, and make sure there's a matching incore
+ * dquot and that its counts match ours.
+ */
+STATIC int
+xqcheck_walk_observations(
+ struct xqcheck *xqc,
+ xfs_dqtype_t dqtype)
+{
+ struct xqcheck_dquot xcdq;
+ struct xfs_dquot *dq;
+ struct xfarray *counts = xqcheck_counters_for(xqc, dqtype);
+ xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
+ int error;
+
+ mutex_lock(&xqc->lock);
+ while ((error = xfarray_iter(counts, &cur, &xcdq)) == 1) {
+ xfs_dqid_t id = cur - 1;
+
+ if (xcdq.flags & XQCHECK_DQUOT_COMPARE_SCANNED)
+ continue;
+
+ mutex_unlock(&xqc->lock);
+
+ error = xfs_qm_dqget(xqc->sc->mp, id, dqtype, false, &dq);
+ if (error == -ENOENT) {
+ xchk_qcheck_set_corrupt(xqc->sc, dqtype, id);
+ return 0;
+ }
+ if (error)
+ return error;
+
+ error = xqcheck_compare_dquot(xqc, dqtype, dq);
+ xfs_qm_dqput(dq);
+ if (error)
+ return error;
+
+ if (xchk_should_terminate(xqc->sc, &error))
+ return error;
+
+ mutex_lock(&xqc->lock);
+ }
+ mutex_unlock(&xqc->lock);
+
+ return error;
+}
+
+/* Compare the quota counters we observed against the live dquots. */
+STATIC int
+xqcheck_compare_dqtype(
+ struct xqcheck *xqc,
+ xfs_dqtype_t dqtype)
+{
+ struct xchk_dqiter cursor = { };
+ struct xfs_scrub *sc = xqc->sc;
+ struct xfs_dquot *dq;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return 0;
+
+ /* If the quota CHKD flag is cleared, we need to repair this quota. */
+ if (!(xfs_quota_chkd_flag(dqtype) & sc->mp->m_qflags)) {
+ xchk_qcheck_set_corrupt(xqc->sc, dqtype, 0);
+ return 0;
+ }
+
+ /* Compare what we observed against the actual dquots. */
+ xchk_dqiter_init(&cursor, sc, dqtype);
+ while ((error = xchk_dquot_iter(&cursor, &dq)) == 1) {
+ error = xqcheck_compare_dquot(xqc, dqtype, dq);
+ xfs_qm_dqput(dq);
+ if (error)
+ break;
+ }
+ if (error)
+ return error;
+
+ /* Walk all the observed dquots and compare to the incore ones. */
+ return xqcheck_walk_observations(xqc, dqtype);
+}
+
+/* Tear down everything associated with a quotacheck. */
+static void
+xqcheck_teardown_scan(
+ void *priv)
+{
+ struct xqcheck *xqc = priv;
+ struct xfs_quotainfo *qi = xqc->sc->mp->m_quotainfo;
+
+ /* Discourage any hook functions that might be running. */
+ xchk_iscan_abort(&xqc->iscan);
+
+ /*
+ * As noted above, the apply hook is responsible for cleaning up the
+ * shadow dquot accounting data when a transaction completes. The mod
+ * hook must be removed before the apply hook so that we don't
+ * mistakenly leave an active shadow account for the mod hook to get
+ * its hands on. No hooks should be running after these functions
+ * return.
+ */
+ xfs_dqtrx_hook_del(qi, &xqc->qhook);
+
+ if (xqc->shadow_dquot_acct.key_len) {
+ rhashtable_free_and_destroy(&xqc->shadow_dquot_acct,
+ xqcheck_dqacct_free, NULL);
+ xqc->shadow_dquot_acct.key_len = 0;
+ }
+
+ if (xqc->pcounts) {
+ xfarray_destroy(xqc->pcounts);
+ xqc->pcounts = NULL;
+ }
+
+ if (xqc->gcounts) {
+ xfarray_destroy(xqc->gcounts);
+ xqc->gcounts = NULL;
+ }
+
+ if (xqc->ucounts) {
+ xfarray_destroy(xqc->ucounts);
+ xqc->ucounts = NULL;
+ }
+
+ xchk_iscan_teardown(&xqc->iscan);
+ mutex_destroy(&xqc->lock);
+ xqc->sc = NULL;
+}
+
+/*
+ * Scan all inodes in the entire filesystem to generate quota counter data.
+ * If the scan is successful, the quota data will be left alive for a repair.
+ * If any error occurs, we'll tear everything down.
+ */
+STATIC int
+xqcheck_setup_scan(
+ struct xfs_scrub *sc,
+ struct xqcheck *xqc)
+{
+ char *descr;
+ struct xfs_quotainfo *qi = sc->mp->m_quotainfo;
+ unsigned long long max_dquots = XFS_DQ_ID_MAX + 1ULL;
+ int error;
+
+ ASSERT(xqc->sc == NULL);
+ xqc->sc = sc;
+
+ mutex_init(&xqc->lock);
+
+ /* Retry iget every tenth of a second for up to 30 seconds. */
+ xchk_iscan_start(sc, 30000, 100, &xqc->iscan);
+
+ error = -ENOMEM;
+ if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_USER)) {
+ descr = xchk_xfile_descr(sc, "user dquot records");
+ error = xfarray_create(descr, max_dquots,
+ sizeof(struct xqcheck_dquot), &xqc->ucounts);
+ kfree(descr);
+ if (error)
+ goto out_teardown;
+ }
+
+ if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_GROUP)) {
+ descr = xchk_xfile_descr(sc, "group dquot records");
+ error = xfarray_create(descr, max_dquots,
+ sizeof(struct xqcheck_dquot), &xqc->gcounts);
+ kfree(descr);
+ if (error)
+ goto out_teardown;
+ }
+
+ if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_PROJ)) {
+ descr = xchk_xfile_descr(sc, "project dquot records");
+ error = xfarray_create(descr, max_dquots,
+ sizeof(struct xqcheck_dquot), &xqc->pcounts);
+ kfree(descr);
+ if (error)
+ goto out_teardown;
+ }
+
+ /*
+ * Set up hash table to map transactions to our internal shadow dqtrx
+ * structures.
+ */
+ error = rhashtable_init(&xqc->shadow_dquot_acct,
+ &xqcheck_dqacct_hash_params);
+ if (error)
+ goto out_teardown;
+
+ /*
+ * Hook into the quota code. The hook only triggers for inodes that
+ * were already scanned, and the scanner thread takes each inode's
+ * ILOCK, which means that any in-progress inode updates will finish
+ * before we can scan the inode.
+ *
+ * The apply hook (which removes the shadow dquot accounting struct)
+ * must be installed before the mod hook so that we never fail to catch
+ * the end of a quota update sequence and leave stale shadow data.
+ */
+ ASSERT(sc->flags & XCHK_FSGATES_QUOTA);
+ xfs_dqtrx_hook_setup(&xqc->qhook, xqcheck_mod_live_ino_dqtrx,
+ xqcheck_apply_live_dqtrx);
+
+ error = xfs_dqtrx_hook_add(qi, &xqc->qhook);
+ if (error)
+ goto out_teardown;
+
+ /* Use deferred cleanup to pass the quota count data to repair. */
+ sc->buf_cleanup = xqcheck_teardown_scan;
+ return 0;
+
+out_teardown:
+ xqcheck_teardown_scan(xqc);
+ return error;
+}
+
+/* Scrub all counters for a given quota type. */
+int
+xchk_quotacheck(
+ struct xfs_scrub *sc)
+{
+ struct xqcheck *xqc = sc->buf;
+ int error = 0;
+
+ /* Check quota counters on the live filesystem. */
+ error = xqcheck_setup_scan(sc, xqc);
+ if (error)
+ return error;
+
+ /* Walk all inodes, picking up quota information. */
+ error = xqcheck_collect_counts(xqc);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+
+ /* Fail fast if we're not playing with a full dataset. */
+ if (xchk_iscan_aborted(&xqc->iscan))
+ xchk_set_incomplete(sc);
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
+ return 0;
+
+ /* Compare quota counters. */
+ if (xqc->ucounts) {
+ error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_USER);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+ }
+ if (xqc->gcounts) {
+ error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_GROUP);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+ }
+ if (xqc->pcounts) {
+ error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_PROJ);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+ }
+
+ /* Check one last time for an incomplete dataset. */
+ if (xchk_iscan_aborted(&xqc->iscan))
+ xchk_set_incomplete(sc);
+
+ return 0;
+}
diff --git a/fs/xfs/scrub/quotacheck.h b/fs/xfs/scrub/quotacheck.h
new file mode 100644
index 000000000000..4ea5f249c978
--- /dev/null
+++ b/fs/xfs/scrub/quotacheck.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_QUOTACHECK_H__
+#define __XFS_SCRUB_QUOTACHECK_H__
+
+/* Quota counters for live quotacheck. */
+struct xqcheck_dquot {
+ /* block usage count */
+ int64_t bcount;
+
+ /* inode usage count */
+ int64_t icount;
+
+ /* realtime block usage count */
+ int64_t rtbcount;
+
+ /* Record state */
+ unsigned int flags;
+};
+
+/*
+ * This incore dquot record has been written at least once. We never want to
+ * store an xqcheck_dquot that looks uninitialized.
+ */
+#define XQCHECK_DQUOT_WRITTEN (1U << 0)
+
+/* Already checked this dquot. */
+#define XQCHECK_DQUOT_COMPARE_SCANNED (1U << 1)
+
+/* Already repaired this dquot. */
+#define XQCHECK_DQUOT_REPAIR_SCANNED (1U << 2)
+
+/* Live quotacheck control structure. */
+struct xqcheck {
+ struct xfs_scrub *sc;
+
+ /* Shadow dquot counter data. */
+ struct xfarray *ucounts;
+ struct xfarray *gcounts;
+ struct xfarray *pcounts;
+
+ /* Lock protecting quotacheck count observations */
+ struct mutex lock;
+
+ struct xchk_iscan iscan;
+
+ /* Hooks into the quota code. */
+ struct xfs_dqtrx_hook qhook;
+
+ /* Shadow quota delta tracking structure. */
+ struct rhashtable shadow_dquot_acct;
+};
+
+/* Return the incore counter array for a given quota type. */
+static inline struct xfarray *
+xqcheck_counters_for(
+ struct xqcheck *xqc,
+ xfs_dqtype_t dqtype)
+{
+ switch (dqtype) {
+ case XFS_DQTYPE_USER:
+ return xqc->ucounts;
+ case XFS_DQTYPE_GROUP:
+ return xqc->gcounts;
+ case XFS_DQTYPE_PROJ:
+ return xqc->pcounts;
+ }
+
+ ASSERT(0);
+ return NULL;
+}
+
+#endif /* __XFS_SCRUB_QUOTACHECK_H__ */
diff --git a/fs/xfs/scrub/quotacheck_repair.c b/fs/xfs/scrub/quotacheck_repair.c
new file mode 100644
index 000000000000..dd8554c755b5
--- /dev/null
+++ b/fs/xfs/scrub/quotacheck_repair.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_qm.h"
+#include "xfs_icache.h"
+#include "xfs_bmap_util.h"
+#include "xfs_iwalk.h"
+#include "xfs_ialloc.h"
+#include "xfs_sb.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/repair.h"
+#include "scrub/xfile.h"
+#include "scrub/xfarray.h"
+#include "scrub/iscan.h"
+#include "scrub/quota.h"
+#include "scrub/quotacheck.h"
+#include "scrub/trace.h"
+
+/*
+ * Live Quotacheck Repair
+ * ======================
+ *
+ * Use the live quota counter information that we collected to replace the
+ * counter values in the incore dquots. A scrub->repair cycle should have left
+ * the live data and hooks active, so this is safe so long as we make sure the
+ * dquot is locked.
+ */
+
+/* Commit new counters to a dquot. */
+static int
+xqcheck_commit_dquot(
+ struct xqcheck *xqc,
+ xfs_dqtype_t dqtype,
+ struct xfs_dquot *dq)
+{
+ struct xqcheck_dquot xcdq;
+ struct xfarray *counts = xqcheck_counters_for(xqc, dqtype);
+ int64_t delta;
+ bool dirty = false;
+ int error = 0;
+
+ /* Unlock the dquot just long enough to allocate a transaction. */
+ xfs_dqunlock(dq);
+ error = xchk_trans_alloc(xqc->sc, 0);
+ xfs_dqlock(dq);
+ if (error)
+ return error;
+
+ xfs_trans_dqjoin(xqc->sc->tp, dq);
+
+ if (xchk_iscan_aborted(&xqc->iscan)) {
+ error = -ECANCELED;
+ goto out_cancel;
+ }
+
+ mutex_lock(&xqc->lock);
+ error = xfarray_load_sparse(counts, dq->q_id, &xcdq);
+ if (error)
+ goto out_unlock;
+
+ /* Adjust counters as needed. */
+ delta = (int64_t)xcdq.icount - dq->q_ino.count;
+ if (delta) {
+ dq->q_ino.reserved += delta;
+ dq->q_ino.count += delta;
+ dirty = true;
+ }
+
+ delta = (int64_t)xcdq.bcount - dq->q_blk.count;
+ if (delta) {
+ dq->q_blk.reserved += delta;
+ dq->q_blk.count += delta;
+ dirty = true;
+ }
+
+ delta = (int64_t)xcdq.rtbcount - dq->q_rtb.count;
+ if (delta) {
+ dq->q_rtb.reserved += delta;
+ dq->q_rtb.count += delta;
+ dirty = true;
+ }
+
+ xcdq.flags |= (XQCHECK_DQUOT_REPAIR_SCANNED | XQCHECK_DQUOT_WRITTEN);
+ error = xfarray_store(counts, dq->q_id, &xcdq);
+ if (error == -EFBIG) {
+ /*
+ * EFBIG means we tried to store data at too high a byte offset
+ * in the sparse array. IOWs, we cannot complete the repair
+ * and must cancel the whole operation. This should never
+ * happen, but we need to catch it anyway.
+ */
+ error = -ECANCELED;
+ }
+ mutex_unlock(&xqc->lock);
+ if (error || !dirty)
+ goto out_cancel;
+
+ trace_xrep_quotacheck_dquot(xqc->sc->mp, dq->q_type, dq->q_id);
+
+ /* Commit the dirty dquot to disk. */
+ dq->q_flags |= XFS_DQFLAG_DIRTY;
+ if (dq->q_id)
+ xfs_qm_adjust_dqtimers(dq);
+ xfs_trans_log_dquot(xqc->sc->tp, dq);
+
+ /*
+ * Transaction commit unlocks the dquot, so we must re-lock it so that
+ * the caller can put the reference (which apparently requires a locked
+ * dquot).
+ */
+ error = xrep_trans_commit(xqc->sc);
+ xfs_dqlock(dq);
+ return error;
+
+out_unlock:
+ mutex_unlock(&xqc->lock);
+out_cancel:
+ xchk_trans_cancel(xqc->sc);
+
+ /* Re-lock the dquot so the caller can put the reference. */
+ xfs_dqlock(dq);
+ return error;
+}
+
+/* Commit new quota counters for a particular quota type. */
+STATIC int
+xqcheck_commit_dqtype(
+ struct xqcheck *xqc,
+ unsigned int dqtype)
+{
+ struct xchk_dqiter cursor = { };
+ struct xqcheck_dquot xcdq;
+ struct xfs_scrub *sc = xqc->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xfarray *counts = xqcheck_counters_for(xqc, dqtype);
+ struct xfs_dquot *dq;
+ xfarray_idx_t cur = XFARRAY_CURSOR_INIT;
+ int error;
+
+ /*
+ * Update the counters of every dquot that the quota file knows about.
+ */
+ xchk_dqiter_init(&cursor, sc, dqtype);
+ while ((error = xchk_dquot_iter(&cursor, &dq)) == 1) {
+ error = xqcheck_commit_dquot(xqc, dqtype, dq);
+ xfs_qm_dqput(dq);
+ if (error)
+ break;
+ }
+ if (error)
+ return error;
+
+ /*
+ * Make a second pass to deal with the dquots that we know about but
+ * the quota file previously did not know about.
+ */
+ mutex_lock(&xqc->lock);
+ while ((error = xfarray_iter(counts, &cur, &xcdq)) == 1) {
+ xfs_dqid_t id = cur - 1;
+
+ if (xcdq.flags & XQCHECK_DQUOT_REPAIR_SCANNED)
+ continue;
+
+ mutex_unlock(&xqc->lock);
+
+ /*
+ * Grab the dquot, allowing for dquot block allocation in a
+ * separate transaction. We committed the scrub transaction
+ * in a previous step, so we will not be creating nested
+ * transactions here.
+ */
+ error = xfs_qm_dqget(mp, id, dqtype, true, &dq);
+ if (error)
+ return error;
+
+ error = xqcheck_commit_dquot(xqc, dqtype, dq);
+ xfs_qm_dqput(dq);
+ if (error)
+ return error;
+
+ mutex_lock(&xqc->lock);
+ }
+ mutex_unlock(&xqc->lock);
+
+ return error;
+}
+
+/* Figure out quota CHKD flags for the running quota types. */
+static inline unsigned int
+xqcheck_chkd_flags(
+ struct xfs_mount *mp)
+{
+ unsigned int ret = 0;
+
+ if (XFS_IS_UQUOTA_ON(mp))
+ ret |= XFS_UQUOTA_CHKD;
+ if (XFS_IS_GQUOTA_ON(mp))
+ ret |= XFS_GQUOTA_CHKD;
+ if (XFS_IS_PQUOTA_ON(mp))
+ ret |= XFS_PQUOTA_CHKD;
+ return ret;
+}
+
+/* Commit the new dquot counters. */
+int
+xrep_quotacheck(
+ struct xfs_scrub *sc)
+{
+ struct xqcheck *xqc = sc->buf;
+ unsigned int qflags = xqcheck_chkd_flags(sc->mp);
+ int error;
+
+ /*
+ * Clear the CHKD flag for the running quota types and commit the scrub
+ * transaction so that we can allocate new quota block mappings if we
+ * have to. If we crash after this point, the sb still has the CHKD
+ * flags cleared, so mount quotacheck will fix all of this up.
+ */
+ xrep_update_qflags(sc, qflags, 0);
+ error = xrep_trans_commit(sc);
+ if (error)
+ return error;
+
+ /* Commit the new counters to the dquots. */
+ if (xqc->ucounts) {
+ error = xqcheck_commit_dqtype(xqc, XFS_DQTYPE_USER);
+ if (error)
+ return error;
+ }
+ if (xqc->gcounts) {
+ error = xqcheck_commit_dqtype(xqc, XFS_DQTYPE_GROUP);
+ if (error)
+ return error;
+ }
+ if (xqc->pcounts) {
+ error = xqcheck_commit_dqtype(xqc, XFS_DQTYPE_PROJ);
+ if (error)
+ return error;
+ }
+
+ /* Set the CHKD flags now that we've fixed quota counts. */
+ error = xchk_trans_alloc(sc, 0);
+ if (error)
+ return error;
+
+ xrep_update_qflags(sc, 0, qflags);
+ return xrep_trans_commit(sc);
+}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 745d5b8f405a..7141b1778902 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -687,6 +687,44 @@ xrep_find_ag_btree_roots(
}
#ifdef CONFIG_XFS_QUOTA
+/* Update some quota flags in the superblock. */
+void
+xrep_update_qflags(
+ struct xfs_scrub *sc,
+ unsigned int clear_flags,
+ unsigned int set_flags)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_buf *bp;
+
+ mutex_lock(&mp->m_quotainfo->qi_quotaofflock);
+ if ((mp->m_qflags & clear_flags) == 0 &&
+ (mp->m_qflags & set_flags) == set_flags)
+ goto no_update;
+
+ mp->m_qflags &= ~clear_flags;
+ mp->m_qflags |= set_flags;
+
+ spin_lock(&mp->m_sb_lock);
+ mp->m_sb.sb_qflags &= ~clear_flags;
+ mp->m_sb.sb_qflags |= set_flags;
+ spin_unlock(&mp->m_sb_lock);
+
+ /*
+ * Update the quota flags in the ondisk superblock without touching
+ * the summary counters. We have not quiesced inode chunk allocation,
+ * so we cannot coordinate with updates to the icount and ifree percpu
+ * counters.
+ */
+ bp = xfs_trans_getsb(sc->tp);
+ xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
+ xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_SB_BUF);
+ xfs_trans_log_buf(sc->tp, bp, 0, sizeof(struct xfs_dsb) - 1);
+
+no_update:
+ mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
+}
+
/* Force a quotacheck the next time we mount. */
void
xrep_force_quotacheck(
@@ -699,13 +737,7 @@ xrep_force_quotacheck(
if (!(flag & sc->mp->m_qflags))
return;
- mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
- sc->mp->m_qflags &= ~flag;
- spin_lock(&sc->mp->m_sb_lock);
- sc->mp->m_sb.sb_qflags &= ~flag;
- spin_unlock(&sc->mp->m_sb_lock);
- xfs_log_sb(sc->tp);
- mutex_unlock(&sc->mp->m_quotainfo->qi_quotaofflock);
+ xrep_update_qflags(sc, flag, 0);
}
/*
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 17114327e6fa..fdfa06699921 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -72,6 +72,8 @@ int xrep_find_ag_btree_roots(struct xfs_scrub *sc, struct xfs_buf *agf_bp,
struct xrep_find_ag_btree *btree_info, struct xfs_buf *agfl_bp);
#ifdef CONFIG_XFS_QUOTA
+void xrep_update_qflags(struct xfs_scrub *sc, unsigned int clear_flags,
+ unsigned int set_flags);
void xrep_force_quotacheck(struct xfs_scrub *sc, xfs_dqtype_t type);
int xrep_ino_dqattach(struct xfs_scrub *sc);
#else
@@ -123,8 +125,10 @@ int xrep_rtbitmap(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_QUOTA
int xrep_quota(struct xfs_scrub *sc);
+int xrep_quotacheck(struct xfs_scrub *sc);
#else
# define xrep_quota xrep_notsupported
+# define xrep_quotacheck xrep_notsupported
#endif /* CONFIG_XFS_QUOTA */
int xrep_reinit_pagf(struct xfs_scrub *sc);
@@ -191,6 +195,7 @@ xrep_setup_nothing(
#define xrep_bmap_cow xrep_notsupported
#define xrep_rtbitmap xrep_notsupported
#define xrep_quota xrep_notsupported
+#define xrep_quotacheck xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index caf324c2b991..9112c0985c62 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -157,6 +157,9 @@ xchk_fsgates_disable(
if (sc->flags & XCHK_FSGATES_DRAIN)
xfs_drain_wait_disable();
+ if (sc->flags & XCHK_FSGATES_QUOTA)
+ xfs_dqtrx_hook_disable();
+
sc->flags &= ~XCHK_FSGATES_ALL;
}
@@ -360,6 +363,12 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.scrub = xchk_fscounters,
.repair = xrep_notsupported,
},
+ [XFS_SCRUB_TYPE_QUOTACHECK] = { /* quota counters */
+ .type = ST_FS,
+ .setup = xchk_setup_quotacheck,
+ .scrub = xchk_quotacheck,
+ .repair = xrep_quotacheck,
+ },
};
static int
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 7fc50654c4fe..5cd4550155f2 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -121,6 +121,7 @@ struct xfs_scrub {
#define XCHK_HAVE_FREEZE_PROT (1U << 1) /* do we have freeze protection? */
#define XCHK_FSGATES_DRAIN (1U << 2) /* defer ops draining enabled */
#define XCHK_NEED_DRAIN (1U << 3) /* scrub needs to drain defer ops */
+#define XCHK_FSGATES_QUOTA (1U << 4) /* quota live update enabled */
#define XREP_RESET_PERAG_RESV (1U << 30) /* must reset AG space reservation */
#define XREP_ALREADY_FIXED (1U << 31) /* checking our repair work */
@@ -130,7 +131,8 @@ struct xfs_scrub {
* features are gated off via dynamic code patching, which is why the state
* must be enabled during scrub setup and can only be torn down afterwards.
*/
-#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN)
+#define XCHK_FSGATES_ALL (XCHK_FSGATES_DRAIN | \
+ XCHK_FSGATES_QUOTA)
/* Metadata scrubbers */
int xchk_tester(struct xfs_scrub *sc);
@@ -167,12 +169,18 @@ xchk_rtsummary(struct xfs_scrub *sc)
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_quota(struct xfs_scrub *sc);
+int xchk_quotacheck(struct xfs_scrub *sc);
#else
static inline int
xchk_quota(struct xfs_scrub *sc)
{
return -ENOENT;
}
+static inline int
+xchk_quotacheck(struct xfs_scrub *sc)
+{
+ return -ENOENT;
+}
#endif
int xchk_fscounters(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c
index cd91db4a5548..d716a432227b 100644
--- a/fs/xfs/scrub/stats.c
+++ b/fs/xfs/scrub/stats.c
@@ -77,6 +77,7 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_GQUOTA] = "grpquota",
[XFS_SCRUB_TYPE_PQUOTA] = "prjquota",
[XFS_SCRUB_TYPE_FSCOUNTERS] = "fscounters",
+ [XFS_SCRUB_TYPE_QUOTACHECK] = "quotacheck",
};
/* Format the scrub stats into a text buffer, similar to pcp style. */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 9aba60c61880..fedcebf90a42 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -15,6 +15,7 @@
#include <linux/tracepoint.h>
#include "xfs_bit.h"
+#include "xfs_quota_defs.h"
struct xfs_scrub;
struct xfile;
@@ -65,6 +66,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_UQUOTA);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_GQUOTA);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_PQUOTA);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_QUOTACHECK);
#define XFS_SCRUB_TYPE_STRINGS \
{ XFS_SCRUB_TYPE_PROBE, "probe" }, \
@@ -91,7 +93,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
{ XFS_SCRUB_TYPE_UQUOTA, "usrquota" }, \
{ XFS_SCRUB_TYPE_GQUOTA, "grpquota" }, \
{ XFS_SCRUB_TYPE_PQUOTA, "prjquota" }, \
- { XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" }
+ { XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" }, \
+ { XFS_SCRUB_TYPE_QUOTACHECK, "quotacheck" }
#define XFS_SCRUB_FLAG_STRINGS \
{ XFS_SCRUB_IFLAG_REPAIR, "repair" }, \
@@ -109,6 +112,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
{ XCHK_HAVE_FREEZE_PROT, "nofreeze" }, \
{ XCHK_FSGATES_DRAIN, "fsgates_drain" }, \
{ XCHK_NEED_DRAIN, "need_drain" }, \
+ { XCHK_FSGATES_QUOTA, "fsgates_quota" }, \
{ XREP_RESET_PERAG_RESV, "reset_perag_resv" }, \
{ XREP_ALREADY_FIXED, "already_fixed" }
@@ -397,6 +401,29 @@ DEFINE_SCRUB_DQITER_EVENT(xchk_dquot_iter_revalidate_bmap);
DEFINE_SCRUB_DQITER_EVENT(xchk_dquot_iter_advance_bmap);
DEFINE_SCRUB_DQITER_EVENT(xchk_dquot_iter_advance_incore);
DEFINE_SCRUB_DQITER_EVENT(xchk_dquot_iter);
+
+TRACE_EVENT(xchk_qcheck_error,
+ TP_PROTO(struct xfs_scrub *sc, xfs_dqtype_t dqtype, xfs_dqid_t id,
+ void *ret_ip),
+ TP_ARGS(sc, dqtype, id, ret_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_dqtype_t, dqtype)
+ __field(xfs_dqid_t, id)
+ __field(void *, ret_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->dqtype = dqtype;
+ __entry->id = id;
+ __entry->ret_ip = ret_ip;
+ ),
+ TP_printk("dev %d:%d dquot type %s id 0x%x ret_ip %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->dqtype, XFS_DQTYPE_STRINGS),
+ __entry->id,
+ __entry->ret_ip)
+);
#endif /* CONFIG_XFS_QUOTA */
TRACE_EVENT(xchk_incomplete,
@@ -1977,6 +2004,7 @@ DEFINE_EVENT(xrep_dquot_class, name, \
DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item);
DEFINE_XREP_DQUOT_EVENT(xrep_disk_dquot);
DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item_fill_bmap_hole);
+DEFINE_XREP_DQUOT_EVENT(xrep_quotacheck_dquot);
#endif /* CONFIG_XFS_QUOTA */
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
diff --git a/fs/xfs/scrub/xfarray.h b/fs/xfs/scrub/xfarray.h
index ec643cc9fc14..acb2f94c56c1 100644
--- a/fs/xfs/scrub/xfarray.h
+++ b/fs/xfs/scrub/xfarray.h
@@ -45,6 +45,25 @@ int xfarray_store(struct xfarray *array, xfarray_idx_t idx, const void *ptr);
int xfarray_store_anywhere(struct xfarray *array, const void *ptr);
bool xfarray_element_is_null(struct xfarray *array, const void *ptr);
+/*
+ * Load an array element, but zero the buffer if there's no data because we
+ * haven't stored to that array element yet.
+ */
+static inline int
+xfarray_load_sparse(
+ struct xfarray *array,
+ uint64_t idx,
+ void *rec)
+{
+ int error = xfarray_load(array, idx, rec);
+
+ if (error == -ENODATA) {
+ memset(rec, 0, array->obj_size);
+ return 0;
+ }
+ return error;
+}
+
/* Append an element to the array. */
static inline int xfarray_append(struct xfarray *array, const void *ptr)
{
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index 9a57afee9338..ef07af9f753d 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -280,6 +280,7 @@ static const struct ioctl_sick_map fs_map[] = {
{ XFS_SICK_FS_UQUOTA, XFS_FSOP_GEOM_SICK_UQUOTA },
{ XFS_SICK_FS_GQUOTA, XFS_FSOP_GEOM_SICK_GQUOTA },
{ XFS_SICK_FS_PQUOTA, XFS_FSOP_GEOM_SICK_PQUOTA },
+ { XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK },
{ 0, 0 },
};
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 110077ca3d2a..d6635d219527 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3755,3 +3755,19 @@ xfs_ifork_zapped(
return false;
}
}
+
+/* Compute the number of data and realtime blocks used by a file. */
+void
+xfs_inode_count_blocks(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ xfs_filblks_t *dblocks,
+ xfs_filblks_t *rblocks)
+{
+ struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
+
+ *rblocks = 0;
+ if (XFS_IS_REALTIME_INODE(ip))
+ xfs_bmap_count_leaves(ifp, rblocks);
+ *dblocks = ip->i_nblocks - *rblocks;
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 796d11065fe2..7bbdc7009e7d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -623,5 +623,7 @@ int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip)
int xfs_inode_reload_unlinked(struct xfs_inode *ip);
bool xfs_ifork_zapped(const struct xfs_inode *ip, int whichfork);
+void xfs_inode_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_filblks_t *dblocks, xfs_filblks_t *rblocks);
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b5b555698ae1..991be1eacb6c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -26,6 +26,7 @@
#include "xfs_ag.h"
#include "xfs_ialloc.h"
#include "xfs_log_priv.h"
+#include "xfs_health.h"
/*
* The global quota manager. There is only one of these for the entire
@@ -692,6 +693,9 @@ xfs_qm_init_quotainfo(
shrinker_register(qinf->qi_shrinker);
+ xfs_hooks_init(&qinf->qi_mod_ino_dqtrx_hooks);
+ xfs_hooks_init(&qinf->qi_apply_dqtrx_hooks);
+
return 0;
out_free_inos:
@@ -1408,8 +1412,12 @@ error_return:
xfs_warn(mp,
"Quotacheck: Failed to reset quota flags.");
}
- } else
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_QUOTACHECK);
+ } else {
xfs_notice(mp, "Quotacheck: Done.");
+ xfs_fs_mark_healthy(mp, XFS_SICK_FS_QUOTACHECK);
+ }
+
return error;
error_purge:
@@ -1819,12 +1827,12 @@ xfs_qm_vop_chown(
ASSERT(prevdq);
ASSERT(prevdq != newdq);
- xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_nblocks));
- xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
+ xfs_trans_mod_ino_dquot(tp, ip, prevdq, bfield, -(ip->i_nblocks));
+ xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
/* the sparkling new dquot */
- xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_nblocks);
- xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
+ xfs_trans_mod_ino_dquot(tp, ip, newdq, bfield, ip->i_nblocks);
+ xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_ICOUNT, 1);
/*
* Back when we made quota reservations for the chown, we reserved the
@@ -1906,22 +1914,21 @@ xfs_qm_vop_create_dqattach(
ASSERT(i_uid_read(VFS_I(ip)) == udqp->q_id);
ip->i_udquot = xfs_qm_dqhold(udqp);
- xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
}
if (gdqp && XFS_IS_GQUOTA_ON(mp)) {
ASSERT(ip->i_gdquot == NULL);
ASSERT(i_gid_read(VFS_I(ip)) == gdqp->q_id);
ip->i_gdquot = xfs_qm_dqhold(gdqp);
- xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
}
if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
ASSERT(ip->i_pdquot == NULL);
ASSERT(ip->i_projid == pdqp->q_id);
ip->i_pdquot = xfs_qm_dqhold(pdqp);
- xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1);
}
+
+ xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, 1);
}
/* Decide if this inode's dquot is near an enforcement boundary. */
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index d5c9fc4ba591..f5993012bf98 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -68,6 +68,10 @@ struct xfs_quotainfo {
/* Minimum and maximum quota expiration timestamp values. */
time64_t qi_expiry_min;
time64_t qi_expiry_max;
+
+ /* Hook to feed quota counter updates to an active online repair. */
+ struct xfs_hooks qi_mod_ino_dqtrx_hooks;
+ struct xfs_hooks qi_apply_dqtrx_hooks;
};
static inline struct radix_tree_root *
@@ -104,6 +108,18 @@ xfs_quota_inode(struct xfs_mount *mp, xfs_dqtype_t type)
return NULL;
}
+/*
+ * Parameters for tracking dqtrx changes on behalf of an inode. The hook
+ * function arg parameter is the field being updated.
+ */
+struct xfs_mod_ino_dqtrx_params {
+ uintptr_t tx_id;
+ xfs_ino_t ino;
+ xfs_dqtype_t q_type;
+ xfs_dqid_t q_id;
+ int64_t delta;
+};
+
extern void xfs_trans_mod_dquot(struct xfs_trans *tp, struct xfs_dquot *dqp,
uint field, int64_t delta);
extern void xfs_trans_dqjoin(struct xfs_trans *, struct xfs_dquot *);
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index b77673dd0558..271c1021c733 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -9,6 +9,7 @@
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index e0d56489f3b2..85a4ae1a17f6 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -74,6 +74,22 @@ struct xfs_dqtrx {
int64_t qt_icount_delta; /* dquot inode count changes */
};
+enum xfs_apply_dqtrx_type {
+ XFS_APPLY_DQTRX_COMMIT = 0,
+ XFS_APPLY_DQTRX_UNRESERVE,
+};
+
+/*
+ * Parameters for applying dqtrx changes to a dquot. The hook function arg
+ * parameter is enum xfs_apply_dqtrx_type.
+ */
+struct xfs_apply_dqtrx_params {
+ uintptr_t tx_id;
+ xfs_ino_t ino;
+ xfs_dqtype_t q_type;
+ xfs_dqid_t q_id;
+};
+
#ifdef CONFIG_XFS_QUOTA
extern void xfs_trans_dup_dqinfo(struct xfs_trans *, struct xfs_trans *);
extern void xfs_trans_free_dqinfo(struct xfs_trans *);
@@ -114,6 +130,30 @@ xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks)
return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false);
}
bool xfs_inode_near_dquot_enforcement(struct xfs_inode *ip, xfs_dqtype_t type);
+
+# ifdef CONFIG_XFS_LIVE_HOOKS
+void xfs_trans_mod_ino_dquot(struct xfs_trans *tp, struct xfs_inode *ip,
+ struct xfs_dquot *dqp, unsigned int field, int64_t delta);
+
+struct xfs_quotainfo;
+
+struct xfs_dqtrx_hook {
+ struct xfs_hook mod_hook;
+ struct xfs_hook apply_hook;
+};
+
+void xfs_dqtrx_hook_disable(void);
+void xfs_dqtrx_hook_enable(void);
+
+int xfs_dqtrx_hook_add(struct xfs_quotainfo *qi, struct xfs_dqtrx_hook *hook);
+void xfs_dqtrx_hook_del(struct xfs_quotainfo *qi, struct xfs_dqtrx_hook *hook);
+void xfs_dqtrx_hook_setup(struct xfs_dqtrx_hook *hook, notifier_fn_t mod_fn,
+ notifier_fn_t apply_fn);
+# else
+# define xfs_trans_mod_ino_dquot(tp, ip, dqp, field, delta) \
+ xfs_trans_mod_dquot((tp), (dqp), (field), (delta))
+# endif /* CONFIG_XFS_LIVE_HOOKS */
+
#else
static inline int
xfs_qm_vop_dqalloc(struct xfs_inode *ip, kuid_t kuid, kgid_t kgid,
@@ -173,6 +213,12 @@ xfs_trans_reserve_quota_icreate(struct xfs_trans *tp, struct xfs_dquot *udqp,
#define xfs_qm_unmount(mp)
#define xfs_qm_unmount_quotas(mp)
#define xfs_inode_near_dquot_enforcement(ip, type) (false)
+
+# ifdef CONFIG_XFS_LIVE_HOOKS
+# define xfs_dqtrx_hook_enable() ((void)0)
+# define xfs_dqtrx_hook_disable() ((void)0)
+# endif /* CONFIG_XFS_LIVE_HOOKS */
+
#endif /* CONFIG_XFS_QUOTA */
static inline int
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 9c159d016ecf..577b535a595c 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -17,6 +17,7 @@
#include "xfs_qm.h"
#include "xfs_trace.h"
#include "xfs_error.h"
+#include "xfs_health.h"
STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *);
@@ -120,6 +121,116 @@ xfs_trans_dup_dqinfo(
}
}
+#ifdef CONFIG_XFS_LIVE_HOOKS
+/*
+ * Use a static key here to reduce the overhead of quota live updates. If the
+ * compiler supports jump labels, the static branch will be replaced by a nop
+ * sled when there are no hook users. Online fsck is currently the only
+ * caller, so this is a reasonable tradeoff.
+ *
+ * Note: Patching the kernel code requires taking the cpu hotplug lock. Other
+ * parts of the kernel allocate memory with that lock held, which means that
+ * XFS callers cannot hold any locks that might be used by memory reclaim or
+ * writeback when calling the static_branch_{inc,dec} functions.
+ */
+DEFINE_STATIC_XFS_HOOK_SWITCH(xfs_dqtrx_hooks_switch);
+
+void
+xfs_dqtrx_hook_disable(void)
+{
+ xfs_hooks_switch_off(&xfs_dqtrx_hooks_switch);
+}
+
+void
+xfs_dqtrx_hook_enable(void)
+{
+ xfs_hooks_switch_on(&xfs_dqtrx_hooks_switch);
+}
+
+/* Schedule a transactional dquot update on behalf of an inode. */
+void
+xfs_trans_mod_ino_dquot(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ struct xfs_dquot *dqp,
+ unsigned int field,
+ int64_t delta)
+{
+ xfs_trans_mod_dquot(tp, dqp, field, delta);
+
+ if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) {
+ struct xfs_mod_ino_dqtrx_params p = {
+ .tx_id = (uintptr_t)tp,
+ .ino = ip->i_ino,
+ .q_type = xfs_dquot_type(dqp),
+ .q_id = dqp->q_id,
+ .delta = delta
+ };
+ struct xfs_quotainfo *qi = tp->t_mountp->m_quotainfo;
+
+ xfs_hooks_call(&qi->qi_mod_ino_dqtrx_hooks, field, &p);
+ }
+}
+
+/* Call the specified functions during a dquot counter update. */
+int
+xfs_dqtrx_hook_add(
+ struct xfs_quotainfo *qi,
+ struct xfs_dqtrx_hook *hook)
+{
+ int error;
+
+ /*
+ * Transactional dquot updates first call the mod hook when changes
+ * are attached to the transaction and then call the apply hook when
+ * those changes are committed (or canceled).
+ *
+ * The apply hook must be installed before the mod hook so that we
+ * never fail to catch the end of a quota update sequence.
+ */
+ error = xfs_hooks_add(&qi->qi_apply_dqtrx_hooks, &hook->apply_hook);
+ if (error)
+ goto out;
+
+ error = xfs_hooks_add(&qi->qi_mod_ino_dqtrx_hooks, &hook->mod_hook);
+ if (error)
+ goto out_apply;
+
+ return 0;
+
+out_apply:
+ xfs_hooks_del(&qi->qi_apply_dqtrx_hooks, &hook->apply_hook);
+out:
+ return error;
+}
+
+/* Stop calling the specified function during a dquot counter update. */
+void
+xfs_dqtrx_hook_del(
+ struct xfs_quotainfo *qi,
+ struct xfs_dqtrx_hook *hook)
+{
+ /*
+ * The mod hook must be removed before apply hook to avoid giving the
+ * hook consumer with an incomplete update. No hooks should be running
+ * after these functions return.
+ */
+ xfs_hooks_del(&qi->qi_mod_ino_dqtrx_hooks, &hook->mod_hook);
+ xfs_hooks_del(&qi->qi_apply_dqtrx_hooks, &hook->apply_hook);
+}
+
+/* Configure dquot update hook functions. */
+void
+xfs_dqtrx_hook_setup(
+ struct xfs_dqtrx_hook *hook,
+ notifier_fn_t mod_fn,
+ notifier_fn_t apply_fn)
+{
+ xfs_hook_setup(&hook->mod_hook, mod_fn);
+ xfs_hook_setup(&hook->apply_hook, apply_fn);
+}
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
/*
* Wrap around mod_dquot to account for both user and group quotas.
*/
@@ -137,11 +248,11 @@ xfs_trans_mod_dquot_byino(
return;
if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
- (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
+ xfs_trans_mod_ino_dquot(tp, ip, ip->i_udquot, field, delta);
if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot)
- (void) xfs_trans_mod_dquot(tp, ip->i_gdquot, field, delta);
+ xfs_trans_mod_ino_dquot(tp, ip, ip->i_gdquot, field, delta);
if (XFS_IS_PQUOTA_ON(mp) && ip->i_pdquot)
- (void) xfs_trans_mod_dquot(tp, ip->i_pdquot, field, delta);
+ xfs_trans_mod_ino_dquot(tp, ip, ip->i_pdquot, field, delta);
}
STATIC struct xfs_dqtrx *
@@ -321,6 +432,29 @@ xfs_apply_quota_reservation_deltas(
}
}
+#ifdef CONFIG_XFS_LIVE_HOOKS
+/* Call downstream hooks now that it's time to apply dquot deltas. */
+static inline void
+xfs_trans_apply_dquot_deltas_hook(
+ struct xfs_trans *tp,
+ struct xfs_dquot *dqp)
+{
+ if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) {
+ struct xfs_apply_dqtrx_params p = {
+ .tx_id = (uintptr_t)tp,
+ .q_type = xfs_dquot_type(dqp),
+ .q_id = dqp->q_id,
+ };
+ struct xfs_quotainfo *qi = tp->t_mountp->m_quotainfo;
+
+ xfs_hooks_call(&qi->qi_apply_dqtrx_hooks,
+ XFS_APPLY_DQTRX_COMMIT, &p);
+ }
+}
+#else
+# define xfs_trans_apply_dquot_deltas_hook(tp, dqp) ((void)0)
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
/*
* Called by xfs_trans_commit() and similar in spirit to
* xfs_trans_apply_sb_deltas().
@@ -366,6 +500,8 @@ xfs_trans_apply_dquot_deltas(
ASSERT(XFS_DQ_IS_LOCKED(dqp));
+ xfs_trans_apply_dquot_deltas_hook(tp, dqp);
+
/*
* adjust the actual number of blocks used
*/
@@ -465,6 +601,29 @@ xfs_trans_apply_dquot_deltas(
}
}
+#ifdef CONFIG_XFS_LIVE_HOOKS
+/* Call downstream hooks now that it's time to cancel dquot deltas. */
+static inline void
+xfs_trans_unreserve_and_mod_dquots_hook(
+ struct xfs_trans *tp,
+ struct xfs_dquot *dqp)
+{
+ if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) {
+ struct xfs_apply_dqtrx_params p = {
+ .tx_id = (uintptr_t)tp,
+ .q_type = xfs_dquot_type(dqp),
+ .q_id = dqp->q_id,
+ };
+ struct xfs_quotainfo *qi = tp->t_mountp->m_quotainfo;
+
+ xfs_hooks_call(&qi->qi_apply_dqtrx_hooks,
+ XFS_APPLY_DQTRX_UNRESERVE, &p);
+ }
+}
+#else
+# define xfs_trans_unreserve_and_mod_dquots_hook(tp, dqp) ((void)0)
+#endif /* CONFIG_XFS_LIVE_HOOKS */
+
/*
* Release the reservations, and adjust the dquots accordingly.
* This is called only when the transaction is being aborted. If by
@@ -495,6 +654,9 @@ xfs_trans_unreserve_and_mod_dquots(
*/
if ((dqp = qtrx->qt_dquot) == NULL)
break;
+
+ xfs_trans_unreserve_and_mod_dquots_hook(tp, dqp);
+
/*
* Unreserve the original reservation. We don't care
* about the number of blocks used field, or deltas.
@@ -706,6 +868,7 @@ error_return:
error_corrupt:
xfs_dqunlock(dqp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_QUOTACHECK);
return -EFSCORRUPTED;
}