diff options
Diffstat (limited to '')
-rw-r--r-- | drivers/md/bcache/btree.c | 135 |
1 files changed, 96 insertions, 39 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 547c9eedc2f4..e7d4817681f2 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -90,6 +90,9 @@ #define MAX_NEED_GC 64 #define MAX_SAVE_PRIO 72 +#define MAX_GC_TIMES 100 +#define MIN_GC_NODES 100 +#define GC_SLEEP_MS 100 #define PTR_DIRTY_BIT (((uint64_t) 1 << 36)) @@ -180,7 +183,7 @@ static void bch_btree_init_next(struct btree *b) void bkey_put(struct cache_set *c, struct bkey *k) { - unsigned i; + unsigned int i; for (i = 0; i < KEY_PTRS(k); i++) if (ptr_available(c, k, i)) @@ -284,6 +287,7 @@ err: static void btree_node_read_endio(struct bio *bio) { struct closure *cl = bio->bi_private; + closure_put(cl); } @@ -432,7 +436,10 @@ static void do_btree_node_write(struct btree *b) continue_at(cl, btree_node_write_done, NULL); } else { - /* No problem for multipage bvec since the bio is just allocated */ + /* + * No problem for multipage bvec since the bio is + * just allocated + */ b->bio->bi_vcnt = 0; bch_bio_map(b->bio, i); @@ -476,7 +483,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent) void bch_btree_node_write(struct btree *b, struct closure *parent) { - unsigned nsets = b->keys.nsets; + unsigned int nsets = b->keys.nsets; lockdep_assert_held(&b->lock); @@ -578,7 +585,7 @@ static void mca_bucket_free(struct btree *b) list_move(&b->list, &b->c->btree_cache_freeable); } -static unsigned btree_order(struct bkey *k) +static unsigned int btree_order(struct bkey *k) { return ilog2(KEY_SIZE(k) / PAGE_SECTORS ?: 1); } @@ -586,7 +593,7 @@ static unsigned btree_order(struct bkey *k) static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) { if (!bch_btree_keys_alloc(&b->keys, - max_t(unsigned, + max_t(unsigned int, ilog2(b->c->btree_pages), btree_order(k)), gfp)) { @@ -601,6 +608,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, struct bkey *k, gfp_t gfp) { struct btree *b = kzalloc(sizeof(struct btree), gfp); + if (!b) return NULL; @@ -617,7 +625,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, return b; } -static int mca_reap(struct btree *b, unsigned min_order, bool flush) +static int mca_reap(struct btree *b, unsigned int min_order, bool flush) { struct closure cl; @@ -743,6 +751,7 @@ void bch_btree_cache_free(struct cache_set *c) { struct btree *b; struct closure cl; + closure_init_stack(&cl); if (c->shrink.list.next) @@ -783,7 +792,7 @@ void bch_btree_cache_free(struct cache_set *c) int bch_btree_cache_alloc(struct cache_set *c) { - unsigned i; + unsigned int i; for (i = 0; i < mca_reserve(c); i++) if (!mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL)) @@ -1008,6 +1017,13 @@ retry: BUG_ON(b->level != level); } + if (btree_node_io_error(b)) { + rw_unlock(write, b); + return ERR_PTR(-EIO); + } + + BUG_ON(!b->written); + b->parent = parent; b->accessed = 1; @@ -1019,13 +1035,6 @@ retry: for (; i <= b->keys.nsets; i++) prefetch(b->keys.set[i].data); - if (btree_node_io_error(b)) { - rw_unlock(write, b); - return ERR_PTR(-EIO); - } - - BUG_ON(!b->written); - return b; } @@ -1121,6 +1130,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b, struct btree_op *op) { struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent); + if (!IS_ERR_OR_NULL(n)) { mutex_lock(&n->write_lock); bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); @@ -1133,7 +1143,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b, static void make_btree_freeing_key(struct btree *b, struct bkey *k) { - unsigned i; + unsigned int i; mutex_lock(&b->c->bucket_lock); @@ -1154,7 +1164,7 @@ static int btree_check_reserve(struct btree *b, struct btree_op *op) { struct cache_set *c = b->c; struct cache *ca; - unsigned i, reserve = (c->root->level - b->level) * 2 + 1; + unsigned int i, reserve = (c->root->level - b->level) * 2 + 1; mutex_lock(&c->bucket_lock); @@ -1178,7 +1188,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level, struct bkey *k) { uint8_t stale = 0; - unsigned i; + unsigned int i; struct bucket *g; /* @@ -1216,7 +1226,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level, SET_GC_MARK(g, GC_MARK_RECLAIMABLE); /* guard against overflow */ - SET_GC_SECTORS_USED(g, min_t(unsigned, + SET_GC_SECTORS_USED(g, min_t(unsigned int, GC_SECTORS_USED(g) + KEY_SIZE(k), MAX_GC_SECTORS_USED)); @@ -1230,7 +1240,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level, void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k) { - unsigned i; + unsigned int i; for (i = 0; i < KEY_PTRS(k); i++) if (ptr_available(c, k, i) && @@ -1256,7 +1266,7 @@ void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats) static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) { uint8_t stale = 0; - unsigned keys = 0, good_keys = 0; + unsigned int keys = 0, good_keys = 0; struct bkey *k; struct btree_iter iter; struct bset_tree *t; @@ -1299,16 +1309,18 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc) struct gc_merge_info { struct btree *b; - unsigned keys; + unsigned int keys; }; -static int bch_btree_insert_node(struct btree *, struct btree_op *, - struct keylist *, atomic_t *, struct bkey *); +static int bch_btree_insert_node(struct btree *b, struct btree_op *op, + struct keylist *insert_keys, + atomic_t *journal_ref, + struct bkey *replace_key); static int btree_gc_coalesce(struct btree *b, struct btree_op *op, struct gc_stat *gc, struct gc_merge_info *r) { - unsigned i, nodes = 0, keys = 0, blocks; + unsigned int i, nodes = 0, keys = 0, blocks; struct btree *new_nodes[GC_MERGE_NODES]; struct keylist keylist; struct closure cl; @@ -1508,11 +1520,11 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op, return -EINTR; } -static unsigned btree_gc_count_keys(struct btree *b) +static unsigned int btree_gc_count_keys(struct btree *b) { struct bkey *k; struct btree_iter iter; - unsigned ret = 0; + unsigned int ret = 0; for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad) ret += bkey_u64s(k); @@ -1520,6 +1532,32 @@ static unsigned btree_gc_count_keys(struct btree *b) return ret; } +static size_t btree_gc_min_nodes(struct cache_set *c) +{ + size_t min_nodes; + + /* + * Since incremental GC would stop 100ms when front + * side I/O comes, so when there are many btree nodes, + * if GC only processes constant (100) nodes each time, + * GC would last a long time, and the front side I/Os + * would run out of the buckets (since no new bucket + * can be allocated during GC), and be blocked again. + * So GC should not process constant nodes, but varied + * nodes according to the number of btree nodes, which + * realized by dividing GC into constant(100) times, + * so when there are many btree nodes, GC can process + * more nodes each time, otherwise, GC will process less + * nodes each time (but no less than MIN_GC_NODES) + */ + min_nodes = c->gc_stats.nodes / MAX_GC_TIMES; + if (min_nodes < MIN_GC_NODES) + min_nodes = MIN_GC_NODES; + + return min_nodes; +} + + static int btree_gc_recurse(struct btree *b, struct btree_op *op, struct closure *writes, struct gc_stat *gc) { @@ -1585,6 +1623,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op, memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1)); r->b = NULL; + if (atomic_read(&b->c->search_inflight) && + gc->nodes >= gc->nodes_pre + btree_gc_min_nodes(b->c)) { + gc->nodes_pre = gc->nodes; + ret = -EAGAIN; + break; + } + if (need_resched()) { ret = -EAGAIN; break; @@ -1642,7 +1687,7 @@ static void btree_gc_start(struct cache_set *c) { struct cache *ca; struct bucket *b; - unsigned i; + unsigned int i; if (!c->gc_mark_valid) return; @@ -1668,7 +1713,7 @@ static void bch_btree_gc_finish(struct cache_set *c) { struct bucket *b; struct cache *ca; - unsigned i; + unsigned int i; mutex_lock(&c->bucket_lock); @@ -1686,7 +1731,7 @@ static void bch_btree_gc_finish(struct cache_set *c) struct bcache_device *d = c->devices[i]; struct cached_dev *dc; struct keybuf_key *w, *n; - unsigned j; + unsigned int j; if (!d || UUID_FLASH_ONLY(&c->uuids[i])) continue; @@ -1753,7 +1798,10 @@ static void bch_btree_gc(struct cache_set *c) closure_sync(&writes); cond_resched(); - if (ret && ret != -EAGAIN) + if (ret == -EAGAIN) + schedule_timeout_interruptible(msecs_to_jiffies + (GC_SLEEP_MS)); + else if (ret) pr_warn("gc failed!"); } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); @@ -1775,7 +1823,7 @@ static void bch_btree_gc(struct cache_set *c) static bool gc_should_run(struct cache_set *c) { struct cache *ca; - unsigned i; + unsigned int i; for_each_cache(ca, c, i) if (ca->invalidate_needs_gc) @@ -1834,8 +1882,14 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op) do { k = bch_btree_iter_next_filter(&iter, &b->keys, bch_ptr_bad); - if (k) + if (k) { btree_node_prefetch(b, k); + /* + * initiallize c->gc_stats.nodes + * for incremental GC + */ + b->c->gc_stats.nodes++; + } if (p) ret = btree(check_recurse, p, b, op); @@ -1860,7 +1914,7 @@ void bch_initial_gc_finish(struct cache_set *c) { struct cache *ca; struct bucket *b; - unsigned i; + unsigned int i; bch_btree_gc_finish(c); @@ -1900,7 +1954,7 @@ void bch_initial_gc_finish(struct cache_set *c) static bool btree_insert_key(struct btree *b, struct bkey *k, struct bkey *replace_key) { - unsigned status; + unsigned int status; BUG_ON(bkey_cmp(k, &b->key) > 0); @@ -1999,7 +2053,7 @@ static int btree_split(struct btree *b, struct btree_op *op, block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5; if (split) { - unsigned keys = 0; + unsigned int keys = 0; trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); @@ -2177,10 +2231,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op, rw_lock(true, b, b->level); if (b->key.ptr[0] != btree_ptr || - b->seq != seq + 1) { + b->seq != seq + 1) { op->lock = b->level; goto out; - } + } } SET_KEY_PTRS(check_key, 1); @@ -2255,7 +2309,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys, void bch_btree_set_root(struct btree *b) { - unsigned i; + unsigned int i; struct closure cl; closure_init_stack(&cl); @@ -2367,7 +2421,7 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l, struct refill { struct btree_op op; - unsigned nr_found; + unsigned int nr_found; struct keybuf *buf; struct bkey *end; keybuf_pred_fn *pred; @@ -2443,6 +2497,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf, if (!RB_EMPTY_ROOT(&buf->keys)) { struct keybuf_key *w; + w = RB_FIRST(&buf->keys, struct keybuf_key, node); buf->start = START_KEY(&w->key); @@ -2474,6 +2529,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start, { bool ret = false; struct keybuf_key *p, *w, s; + s.key = *start; if (bkey_cmp(end, &buf->start) <= 0 || @@ -2500,6 +2556,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start, struct keybuf_key *bch_keybuf_next(struct keybuf *buf) { struct keybuf_key *w; + spin_lock(&buf->lock); w = RB_FIRST(&buf->keys, struct keybuf_key, node); |