aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/bcache/btree.c
diff options
context:
space:
mode:
authorColy Li <colyli@suse.de>2020-03-22 14:03:01 +0800
committerJens Axboe <axboe@kernel.dk>2020-03-22 10:06:57 -0600
commit8e7102273f597dbb38af43da874f8c123f8e6dbe (patch)
tree8f03c2d6e9998cae3fa1172d93dc556d5cca50f4 /drivers/md/bcache/btree.c
parentbcache: add bcache_ prefix to btree_root() and btree() macros (diff)
downloadlinux-dev-8e7102273f597dbb38af43da874f8c123f8e6dbe.tar.xz
linux-dev-8e7102273f597dbb38af43da874f8c123f8e6dbe.zip
bcache: make bch_btree_check() to be multithreaded
When registering a cache device, bch_btree_check() is called to check all btree nodes, to make sure the btree is consistent and not corrupted. bch_btree_check() is recursively executed in a single thread, when there are a lot of data cached and the btree is huge, it may take very long time to check all the btree nodes. In my testing, I observed it took around 50 minutes to finish bch_btree_check(). When checking the bcache btree nodes, the cache set is not running yet, and indeed the whole tree is in read-only state, it is safe to create multiple threads to check the btree in parallel. This patch tries to create multiple threads, and each thread tries to one-by-one check the sub-tree indexed by a key from the btree root node. The parallel thread number depends on how many keys in the btree root node. At most BCH_BTR_CHKTHREAD_MAX (64) threads can be created, but in practice is should be min(cpu-number/2, root-node-keys-number). Signed-off-by: Coly Li <colyli@suse.de> Cc: Christoph Hellwig <hch@infradead.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/md/bcache/btree.c')
-rw-r--r--drivers/md/bcache/btree.c169
1 files changed, 166 insertions, 3 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index faf152524a16..74d66b641169 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1897,13 +1897,176 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
return ret;
}
+
+static int bch_btree_check_thread(void *arg)
+{
+ int ret;
+ struct btree_check_info *info = arg;
+ struct btree_check_state *check_state = info->state;
+ struct cache_set *c = check_state->c;
+ struct btree_iter iter;
+ struct bkey *k, *p;
+ int cur_idx, prev_idx, skip_nr;
+ int i, n;
+
+ k = p = NULL;
+ i = n = 0;
+ cur_idx = prev_idx = 0;
+ ret = 0;
+
+ /* root node keys are checked before thread created */
+ bch_btree_iter_init(&c->root->keys, &iter, NULL);
+ k = bch_btree_iter_next_filter(&iter, &c->root->keys, bch_ptr_bad);
+ BUG_ON(!k);
+
+ p = k;
+ while (k) {
+ /*
+ * Fetch a root node key index, skip the keys which
+ * should be fetched by other threads, then check the
+ * sub-tree indexed by the fetched key.
+ */
+ spin_lock(&check_state->idx_lock);
+ cur_idx = check_state->key_idx;
+ check_state->key_idx++;
+ spin_unlock(&check_state->idx_lock);
+
+ skip_nr = cur_idx - prev_idx;
+
+ while (skip_nr) {
+ k = bch_btree_iter_next_filter(&iter,
+ &c->root->keys,
+ bch_ptr_bad);
+ if (k)
+ p = k;
+ else {
+ /*
+ * No more keys to check in root node,
+ * current checking threads are enough,
+ * stop creating more.
+ */
+ atomic_set(&check_state->enough, 1);
+ /* Update check_state->enough earlier */
+ smp_mb();
+ goto out;
+ }
+ skip_nr--;
+ cond_resched();
+ }
+
+ if (p) {
+ struct btree_op op;
+
+ btree_node_prefetch(c->root, p);
+ c->gc_stats.nodes++;
+ bch_btree_op_init(&op, 0);
+ ret = bcache_btree(check_recurse, p, c->root, &op);
+ if (ret)
+ goto out;
+ }
+ p = NULL;
+ prev_idx = cur_idx;
+ cond_resched();
+ }
+
+out:
+ info->result = ret;
+ /* update check_state->started among all CPUs */
+ smp_mb();
+ if (atomic_dec_and_test(&check_state->started))
+ wake_up(&check_state->wait);
+
+ return ret;
+}
+
+
+
+static int bch_btree_chkthread_nr(void)
+{
+ int n = num_online_cpus()/2;
+
+ if (n == 0)
+ n = 1;
+ else if (n > BCH_BTR_CHKTHREAD_MAX)
+ n = BCH_BTR_CHKTHREAD_MAX;
+
+ return n;
+}
+
int bch_btree_check(struct cache_set *c)
{
- struct btree_op op;
+ int ret = 0;
+ int i;
+ struct bkey *k = NULL;
+ struct btree_iter iter;
+ struct btree_check_state *check_state;
+ char name[32];
- bch_btree_op_init(&op, SHRT_MAX);
+ /* check and mark root node keys */
+ for_each_key_filter(&c->root->keys, k, &iter, bch_ptr_invalid)
+ bch_initial_mark_key(c, c->root->level, k);
+
+ bch_initial_mark_key(c, c->root->level + 1, &c->root->key);
+
+ if (c->root->level == 0)
+ return 0;
+
+ check_state = kzalloc(sizeof(struct btree_check_state), GFP_KERNEL);
+ if (!check_state)
+ return -ENOMEM;
+
+ check_state->c = c;
+ check_state->total_threads = bch_btree_chkthread_nr();
+ check_state->key_idx = 0;
+ spin_lock_init(&check_state->idx_lock);
+ atomic_set(&check_state->started, 0);
+ atomic_set(&check_state->enough, 0);
+ init_waitqueue_head(&check_state->wait);
- return bcache_btree_root(check_recurse, c, &op);
+ /*
+ * Run multiple threads to check btree nodes in parallel,
+ * if check_state->enough is non-zero, it means current
+ * running check threads are enough, unncessary to create
+ * more.
+ */
+ for (i = 0; i < check_state->total_threads; i++) {
+ /* fetch latest check_state->enough earlier */
+ smp_mb();
+ if (atomic_read(&check_state->enough))
+ break;
+
+ check_state->infos[i].result = 0;
+ check_state->infos[i].state = check_state;
+ snprintf(name, sizeof(name), "bch_btrchk[%u]", i);
+ atomic_inc(&check_state->started);
+
+ check_state->infos[i].thread =
+ kthread_run(bch_btree_check_thread,
+ &check_state->infos[i],
+ name);
+ if (IS_ERR(check_state->infos[i].thread)) {
+ pr_err("fails to run thread bch_btrchk[%d]", i);
+ for (--i; i >= 0; i--)
+ kthread_stop(check_state->infos[i].thread);
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ wait_event_interruptible(check_state->wait,
+ atomic_read(&check_state->started) == 0 ||
+ test_bit(CACHE_SET_IO_DISABLE, &c->flags));
+
+ for (i = 0; i < check_state->total_threads; i++) {
+ if (check_state->infos[i].result) {
+ ret = check_state->infos[i].result;
+ goto out;
+ }
+ }
+
+out:
+ kfree(check_state);
+ return ret;
}
void bch_initial_gc_finish(struct cache_set *c)