aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/bcache/btree.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--drivers/md/bcache/btree.c135
1 files changed, 96 insertions, 39 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 547c9eedc2f4..e7d4817681f2 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -90,6 +90,9 @@
#define MAX_NEED_GC 64
#define MAX_SAVE_PRIO 72
+#define MAX_GC_TIMES 100
+#define MIN_GC_NODES 100
+#define GC_SLEEP_MS 100
#define PTR_DIRTY_BIT (((uint64_t) 1 << 36))
@@ -180,7 +183,7 @@ static void bch_btree_init_next(struct btree *b)
void bkey_put(struct cache_set *c, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i))
@@ -284,6 +287,7 @@ err:
static void btree_node_read_endio(struct bio *bio)
{
struct closure *cl = bio->bi_private;
+
closure_put(cl);
}
@@ -432,7 +436,10 @@ static void do_btree_node_write(struct btree *b)
continue_at(cl, btree_node_write_done, NULL);
} else {
- /* No problem for multipage bvec since the bio is just allocated */
+ /*
+ * No problem for multipage bvec since the bio is
+ * just allocated
+ */
b->bio->bi_vcnt = 0;
bch_bio_map(b->bio, i);
@@ -476,7 +483,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent)
void bch_btree_node_write(struct btree *b, struct closure *parent)
{
- unsigned nsets = b->keys.nsets;
+ unsigned int nsets = b->keys.nsets;
lockdep_assert_held(&b->lock);
@@ -578,7 +585,7 @@ static void mca_bucket_free(struct btree *b)
list_move(&b->list, &b->c->btree_cache_freeable);
}
-static unsigned btree_order(struct bkey *k)
+static unsigned int btree_order(struct bkey *k)
{
return ilog2(KEY_SIZE(k) / PAGE_SECTORS ?: 1);
}
@@ -586,7 +593,7 @@ static unsigned btree_order(struct bkey *k)
static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
{
if (!bch_btree_keys_alloc(&b->keys,
- max_t(unsigned,
+ max_t(unsigned int,
ilog2(b->c->btree_pages),
btree_order(k)),
gfp)) {
@@ -601,6 +608,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
struct bkey *k, gfp_t gfp)
{
struct btree *b = kzalloc(sizeof(struct btree), gfp);
+
if (!b)
return NULL;
@@ -617,7 +625,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,
return b;
}
-static int mca_reap(struct btree *b, unsigned min_order, bool flush)
+static int mca_reap(struct btree *b, unsigned int min_order, bool flush)
{
struct closure cl;
@@ -743,6 +751,7 @@ void bch_btree_cache_free(struct cache_set *c)
{
struct btree *b;
struct closure cl;
+
closure_init_stack(&cl);
if (c->shrink.list.next)
@@ -783,7 +792,7 @@ void bch_btree_cache_free(struct cache_set *c)
int bch_btree_cache_alloc(struct cache_set *c)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < mca_reserve(c); i++)
if (!mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL))
@@ -1008,6 +1017,13 @@ retry:
BUG_ON(b->level != level);
}
+ if (btree_node_io_error(b)) {
+ rw_unlock(write, b);
+ return ERR_PTR(-EIO);
+ }
+
+ BUG_ON(!b->written);
+
b->parent = parent;
b->accessed = 1;
@@ -1019,13 +1035,6 @@ retry:
for (; i <= b->keys.nsets; i++)
prefetch(b->keys.set[i].data);
- if (btree_node_io_error(b)) {
- rw_unlock(write, b);
- return ERR_PTR(-EIO);
- }
-
- BUG_ON(!b->written);
-
return b;
}
@@ -1121,6 +1130,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
struct btree_op *op)
{
struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent);
+
if (!IS_ERR_OR_NULL(n)) {
mutex_lock(&n->write_lock);
bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort);
@@ -1133,7 +1143,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,
static void make_btree_freeing_key(struct btree *b, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
mutex_lock(&b->c->bucket_lock);
@@ -1154,7 +1164,7 @@ static int btree_check_reserve(struct btree *b, struct btree_op *op)
{
struct cache_set *c = b->c;
struct cache *ca;
- unsigned i, reserve = (c->root->level - b->level) * 2 + 1;
+ unsigned int i, reserve = (c->root->level - b->level) * 2 + 1;
mutex_lock(&c->bucket_lock);
@@ -1178,7 +1188,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
struct bkey *k)
{
uint8_t stale = 0;
- unsigned i;
+ unsigned int i;
struct bucket *g;
/*
@@ -1216,7 +1226,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
SET_GC_MARK(g, GC_MARK_RECLAIMABLE);
/* guard against overflow */
- SET_GC_SECTORS_USED(g, min_t(unsigned,
+ SET_GC_SECTORS_USED(g, min_t(unsigned int,
GC_SECTORS_USED(g) + KEY_SIZE(k),
MAX_GC_SECTORS_USED));
@@ -1230,7 +1240,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,
void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k)
{
- unsigned i;
+ unsigned int i;
for (i = 0; i < KEY_PTRS(k); i++)
if (ptr_available(c, k, i) &&
@@ -1256,7 +1266,7 @@ void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats)
static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
{
uint8_t stale = 0;
- unsigned keys = 0, good_keys = 0;
+ unsigned int keys = 0, good_keys = 0;
struct bkey *k;
struct btree_iter iter;
struct bset_tree *t;
@@ -1299,16 +1309,18 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
struct gc_merge_info {
struct btree *b;
- unsigned keys;
+ unsigned int keys;
};
-static int bch_btree_insert_node(struct btree *, struct btree_op *,
- struct keylist *, atomic_t *, struct bkey *);
+static int bch_btree_insert_node(struct btree *b, struct btree_op *op,
+ struct keylist *insert_keys,
+ atomic_t *journal_ref,
+ struct bkey *replace_key);
static int btree_gc_coalesce(struct btree *b, struct btree_op *op,
struct gc_stat *gc, struct gc_merge_info *r)
{
- unsigned i, nodes = 0, keys = 0, blocks;
+ unsigned int i, nodes = 0, keys = 0, blocks;
struct btree *new_nodes[GC_MERGE_NODES];
struct keylist keylist;
struct closure cl;
@@ -1508,11 +1520,11 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,
return -EINTR;
}
-static unsigned btree_gc_count_keys(struct btree *b)
+static unsigned int btree_gc_count_keys(struct btree *b)
{
struct bkey *k;
struct btree_iter iter;
- unsigned ret = 0;
+ unsigned int ret = 0;
for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)
ret += bkey_u64s(k);
@@ -1520,6 +1532,32 @@ static unsigned btree_gc_count_keys(struct btree *b)
return ret;
}
+static size_t btree_gc_min_nodes(struct cache_set *c)
+{
+ size_t min_nodes;
+
+ /*
+ * Since incremental GC would stop 100ms when front
+ * side I/O comes, so when there are many btree nodes,
+ * if GC only processes constant (100) nodes each time,
+ * GC would last a long time, and the front side I/Os
+ * would run out of the buckets (since no new bucket
+ * can be allocated during GC), and be blocked again.
+ * So GC should not process constant nodes, but varied
+ * nodes according to the number of btree nodes, which
+ * realized by dividing GC into constant(100) times,
+ * so when there are many btree nodes, GC can process
+ * more nodes each time, otherwise, GC will process less
+ * nodes each time (but no less than MIN_GC_NODES)
+ */
+ min_nodes = c->gc_stats.nodes / MAX_GC_TIMES;
+ if (min_nodes < MIN_GC_NODES)
+ min_nodes = MIN_GC_NODES;
+
+ return min_nodes;
+}
+
+
static int btree_gc_recurse(struct btree *b, struct btree_op *op,
struct closure *writes, struct gc_stat *gc)
{
@@ -1585,6 +1623,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,
memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1));
r->b = NULL;
+ if (atomic_read(&b->c->search_inflight) &&
+ gc->nodes >= gc->nodes_pre + btree_gc_min_nodes(b->c)) {
+ gc->nodes_pre = gc->nodes;
+ ret = -EAGAIN;
+ break;
+ }
+
if (need_resched()) {
ret = -EAGAIN;
break;
@@ -1642,7 +1687,7 @@ static void btree_gc_start(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
- unsigned i;
+ unsigned int i;
if (!c->gc_mark_valid)
return;
@@ -1668,7 +1713,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
{
struct bucket *b;
struct cache *ca;
- unsigned i;
+ unsigned int i;
mutex_lock(&c->bucket_lock);
@@ -1686,7 +1731,7 @@ static void bch_btree_gc_finish(struct cache_set *c)
struct bcache_device *d = c->devices[i];
struct cached_dev *dc;
struct keybuf_key *w, *n;
- unsigned j;
+ unsigned int j;
if (!d || UUID_FLASH_ONLY(&c->uuids[i]))
continue;
@@ -1753,7 +1798,10 @@ static void bch_btree_gc(struct cache_set *c)
closure_sync(&writes);
cond_resched();
- if (ret && ret != -EAGAIN)
+ if (ret == -EAGAIN)
+ schedule_timeout_interruptible(msecs_to_jiffies
+ (GC_SLEEP_MS));
+ else if (ret)
pr_warn("gc failed!");
} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
@@ -1775,7 +1823,7 @@ static void bch_btree_gc(struct cache_set *c)
static bool gc_should_run(struct cache_set *c)
{
struct cache *ca;
- unsigned i;
+ unsigned int i;
for_each_cache(ca, c, i)
if (ca->invalidate_needs_gc)
@@ -1834,8 +1882,14 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)
do {
k = bch_btree_iter_next_filter(&iter, &b->keys,
bch_ptr_bad);
- if (k)
+ if (k) {
btree_node_prefetch(b, k);
+ /*
+ * initiallize c->gc_stats.nodes
+ * for incremental GC
+ */
+ b->c->gc_stats.nodes++;
+ }
if (p)
ret = btree(check_recurse, p, b, op);
@@ -1860,7 +1914,7 @@ void bch_initial_gc_finish(struct cache_set *c)
{
struct cache *ca;
struct bucket *b;
- unsigned i;
+ unsigned int i;
bch_btree_gc_finish(c);
@@ -1900,7 +1954,7 @@ void bch_initial_gc_finish(struct cache_set *c)
static bool btree_insert_key(struct btree *b, struct bkey *k,
struct bkey *replace_key)
{
- unsigned status;
+ unsigned int status;
BUG_ON(bkey_cmp(k, &b->key) > 0);
@@ -1999,7 +2053,7 @@ static int btree_split(struct btree *b, struct btree_op *op,
block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;
if (split) {
- unsigned keys = 0;
+ unsigned int keys = 0;
trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys);
@@ -2177,10 +2231,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,
rw_lock(true, b, b->level);
if (b->key.ptr[0] != btree_ptr ||
- b->seq != seq + 1) {
+ b->seq != seq + 1) {
op->lock = b->level;
goto out;
- }
+ }
}
SET_KEY_PTRS(check_key, 1);
@@ -2255,7 +2309,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
void bch_btree_set_root(struct btree *b)
{
- unsigned i;
+ unsigned int i;
struct closure cl;
closure_init_stack(&cl);
@@ -2367,7 +2421,7 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,
struct refill {
struct btree_op op;
- unsigned nr_found;
+ unsigned int nr_found;
struct keybuf *buf;
struct bkey *end;
keybuf_pred_fn *pred;
@@ -2443,6 +2497,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,
if (!RB_EMPTY_ROOT(&buf->keys)) {
struct keybuf_key *w;
+
w = RB_FIRST(&buf->keys, struct keybuf_key, node);
buf->start = START_KEY(&w->key);
@@ -2474,6 +2529,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
{
bool ret = false;
struct keybuf_key *p, *w, s;
+
s.key = *start;
if (bkey_cmp(end, &buf->start) <= 0 ||
@@ -2500,6 +2556,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,
struct keybuf_key *bch_keybuf_next(struct keybuf *buf)
{
struct keybuf_key *w;
+
spin_lock(&buf->lock);
w = RB_FIRST(&buf->keys, struct keybuf_key, node);