diff options
Diffstat (limited to '')
| -rw-r--r-- | drivers/md/bcache/btree.c | 135 | 
1 files changed, 96 insertions, 39 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 547c9eedc2f4..e7d4817681f2 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -90,6 +90,9 @@  #define MAX_NEED_GC		64  #define MAX_SAVE_PRIO		72 +#define MAX_GC_TIMES		100 +#define MIN_GC_NODES		100 +#define GC_SLEEP_MS		100  #define PTR_DIRTY_BIT		(((uint64_t) 1 << 36)) @@ -180,7 +183,7 @@ static void bch_btree_init_next(struct btree *b)  void bkey_put(struct cache_set *c, struct bkey *k)  { -	unsigned i; +	unsigned int i;  	for (i = 0; i < KEY_PTRS(k); i++)  		if (ptr_available(c, k, i)) @@ -284,6 +287,7 @@ err:  static void btree_node_read_endio(struct bio *bio)  {  	struct closure *cl = bio->bi_private; +  	closure_put(cl);  } @@ -432,7 +436,10 @@ static void do_btree_node_write(struct btree *b)  		continue_at(cl, btree_node_write_done, NULL);  	} else { -		/* No problem for multipage bvec since the bio is just allocated */ +		/* +		 * No problem for multipage bvec since the bio is +		 * just allocated +		 */  		b->bio->bi_vcnt = 0;  		bch_bio_map(b->bio, i); @@ -476,7 +483,7 @@ void __bch_btree_node_write(struct btree *b, struct closure *parent)  void bch_btree_node_write(struct btree *b, struct closure *parent)  { -	unsigned nsets = b->keys.nsets; +	unsigned int nsets = b->keys.nsets;  	lockdep_assert_held(&b->lock); @@ -578,7 +585,7 @@ static void mca_bucket_free(struct btree *b)  	list_move(&b->list, &b->c->btree_cache_freeable);  } -static unsigned btree_order(struct bkey *k) +static unsigned int btree_order(struct bkey *k)  {  	return ilog2(KEY_SIZE(k) / PAGE_SECTORS ?: 1);  } @@ -586,7 +593,7 @@ static unsigned btree_order(struct bkey *k)  static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)  {  	if (!bch_btree_keys_alloc(&b->keys, -				  max_t(unsigned, +				  max_t(unsigned int,  					ilog2(b->c->btree_pages),  					btree_order(k)),  				  gfp)) { @@ -601,6 +608,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,  				      struct bkey *k, gfp_t gfp)  {  	struct btree *b = kzalloc(sizeof(struct btree), gfp); +  	if (!b)  		return NULL; @@ -617,7 +625,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c,  	return b;  } -static int mca_reap(struct btree *b, unsigned min_order, bool flush) +static int mca_reap(struct btree *b, unsigned int min_order, bool flush)  {  	struct closure cl; @@ -743,6 +751,7 @@ void bch_btree_cache_free(struct cache_set *c)  {  	struct btree *b;  	struct closure cl; +  	closure_init_stack(&cl);  	if (c->shrink.list.next) @@ -783,7 +792,7 @@ void bch_btree_cache_free(struct cache_set *c)  int bch_btree_cache_alloc(struct cache_set *c)  { -	unsigned i; +	unsigned int i;  	for (i = 0; i < mca_reserve(c); i++)  		if (!mca_bucket_alloc(c, &ZERO_KEY, GFP_KERNEL)) @@ -1008,6 +1017,13 @@ retry:  		BUG_ON(b->level != level);  	} +	if (btree_node_io_error(b)) { +		rw_unlock(write, b); +		return ERR_PTR(-EIO); +	} + +	BUG_ON(!b->written); +  	b->parent = parent;  	b->accessed = 1; @@ -1019,13 +1035,6 @@ retry:  	for (; i <= b->keys.nsets; i++)  		prefetch(b->keys.set[i].data); -	if (btree_node_io_error(b)) { -		rw_unlock(write, b); -		return ERR_PTR(-EIO); -	} - -	BUG_ON(!b->written); -  	return b;  } @@ -1121,6 +1130,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,  						  struct btree_op *op)  {  	struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent); +  	if (!IS_ERR_OR_NULL(n)) {  		mutex_lock(&n->write_lock);  		bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); @@ -1133,7 +1143,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b,  static void make_btree_freeing_key(struct btree *b, struct bkey *k)  { -	unsigned i; +	unsigned int i;  	mutex_lock(&b->c->bucket_lock); @@ -1154,7 +1164,7 @@ static int btree_check_reserve(struct btree *b, struct btree_op *op)  {  	struct cache_set *c = b->c;  	struct cache *ca; -	unsigned i, reserve = (c->root->level - b->level) * 2 + 1; +	unsigned int i, reserve = (c->root->level - b->level) * 2 + 1;  	mutex_lock(&c->bucket_lock); @@ -1178,7 +1188,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,  				    struct bkey *k)  {  	uint8_t stale = 0; -	unsigned i; +	unsigned int i;  	struct bucket *g;  	/* @@ -1216,7 +1226,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,  			SET_GC_MARK(g, GC_MARK_RECLAIMABLE);  		/* guard against overflow */ -		SET_GC_SECTORS_USED(g, min_t(unsigned, +		SET_GC_SECTORS_USED(g, min_t(unsigned int,  					     GC_SECTORS_USED(g) + KEY_SIZE(k),  					     MAX_GC_SECTORS_USED)); @@ -1230,7 +1240,7 @@ static uint8_t __bch_btree_mark_key(struct cache_set *c, int level,  void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k)  { -	unsigned i; +	unsigned int i;  	for (i = 0; i < KEY_PTRS(k); i++)  		if (ptr_available(c, k, i) && @@ -1256,7 +1266,7 @@ void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats)  static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)  {  	uint8_t stale = 0; -	unsigned keys = 0, good_keys = 0; +	unsigned int keys = 0, good_keys = 0;  	struct bkey *k;  	struct btree_iter iter;  	struct bset_tree *t; @@ -1299,16 +1309,18 @@ static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)  struct gc_merge_info {  	struct btree	*b; -	unsigned	keys; +	unsigned int	keys;  }; -static int bch_btree_insert_node(struct btree *, struct btree_op *, -				 struct keylist *, atomic_t *, struct bkey *); +static int bch_btree_insert_node(struct btree *b, struct btree_op *op, +				 struct keylist *insert_keys, +				 atomic_t *journal_ref, +				 struct bkey *replace_key);  static int btree_gc_coalesce(struct btree *b, struct btree_op *op,  			     struct gc_stat *gc, struct gc_merge_info *r)  { -	unsigned i, nodes = 0, keys = 0, blocks; +	unsigned int i, nodes = 0, keys = 0, blocks;  	struct btree *new_nodes[GC_MERGE_NODES];  	struct keylist keylist;  	struct closure cl; @@ -1508,11 +1520,11 @@ static int btree_gc_rewrite_node(struct btree *b, struct btree_op *op,  	return -EINTR;  } -static unsigned btree_gc_count_keys(struct btree *b) +static unsigned int btree_gc_count_keys(struct btree *b)  {  	struct bkey *k;  	struct btree_iter iter; -	unsigned ret = 0; +	unsigned int ret = 0;  	for_each_key_filter(&b->keys, k, &iter, bch_ptr_bad)  		ret += bkey_u64s(k); @@ -1520,6 +1532,32 @@ static unsigned btree_gc_count_keys(struct btree *b)  	return ret;  } +static size_t btree_gc_min_nodes(struct cache_set *c) +{ +	size_t min_nodes; + +	/* +	 * Since incremental GC would stop 100ms when front +	 * side I/O comes, so when there are many btree nodes, +	 * if GC only processes constant (100) nodes each time, +	 * GC would last a long time, and the front side I/Os +	 * would run out of the buckets (since no new bucket +	 * can be allocated during GC), and be blocked again. +	 * So GC should not process constant nodes, but varied +	 * nodes according to the number of btree nodes, which +	 * realized by dividing GC into constant(100) times, +	 * so when there are many btree nodes, GC can process +	 * more nodes each time, otherwise, GC will process less +	 * nodes each time (but no less than MIN_GC_NODES) +	 */ +	min_nodes = c->gc_stats.nodes / MAX_GC_TIMES; +	if (min_nodes < MIN_GC_NODES) +		min_nodes = MIN_GC_NODES; + +	return min_nodes; +} + +  static int btree_gc_recurse(struct btree *b, struct btree_op *op,  			    struct closure *writes, struct gc_stat *gc)  { @@ -1585,6 +1623,13 @@ static int btree_gc_recurse(struct btree *b, struct btree_op *op,  		memmove(r + 1, r, sizeof(r[0]) * (GC_MERGE_NODES - 1));  		r->b = NULL; +		if (atomic_read(&b->c->search_inflight) && +		    gc->nodes >= gc->nodes_pre + btree_gc_min_nodes(b->c)) { +			gc->nodes_pre =  gc->nodes; +			ret = -EAGAIN; +			break; +		} +  		if (need_resched()) {  			ret = -EAGAIN;  			break; @@ -1642,7 +1687,7 @@ static void btree_gc_start(struct cache_set *c)  {  	struct cache *ca;  	struct bucket *b; -	unsigned i; +	unsigned int i;  	if (!c->gc_mark_valid)  		return; @@ -1668,7 +1713,7 @@ static void bch_btree_gc_finish(struct cache_set *c)  {  	struct bucket *b;  	struct cache *ca; -	unsigned i; +	unsigned int i;  	mutex_lock(&c->bucket_lock); @@ -1686,7 +1731,7 @@ static void bch_btree_gc_finish(struct cache_set *c)  		struct bcache_device *d = c->devices[i];  		struct cached_dev *dc;  		struct keybuf_key *w, *n; -		unsigned j; +		unsigned int j;  		if (!d || UUID_FLASH_ONLY(&c->uuids[i]))  			continue; @@ -1753,7 +1798,10 @@ static void bch_btree_gc(struct cache_set *c)  		closure_sync(&writes);  		cond_resched(); -		if (ret && ret != -EAGAIN) +		if (ret == -EAGAIN) +			schedule_timeout_interruptible(msecs_to_jiffies +						       (GC_SLEEP_MS)); +		else if (ret)  			pr_warn("gc failed!");  	} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); @@ -1775,7 +1823,7 @@ static void bch_btree_gc(struct cache_set *c)  static bool gc_should_run(struct cache_set *c)  {  	struct cache *ca; -	unsigned i; +	unsigned int i;  	for_each_cache(ca, c, i)  		if (ca->invalidate_needs_gc) @@ -1834,8 +1882,14 @@ static int bch_btree_check_recurse(struct btree *b, struct btree_op *op)  		do {  			k = bch_btree_iter_next_filter(&iter, &b->keys,  						       bch_ptr_bad); -			if (k) +			if (k) {  				btree_node_prefetch(b, k); +				/* +				 * initiallize c->gc_stats.nodes +				 * for incremental GC +				 */ +				b->c->gc_stats.nodes++; +			}  			if (p)  				ret = btree(check_recurse, p, b, op); @@ -1860,7 +1914,7 @@ void bch_initial_gc_finish(struct cache_set *c)  {  	struct cache *ca;  	struct bucket *b; -	unsigned i; +	unsigned int i;  	bch_btree_gc_finish(c); @@ -1900,7 +1954,7 @@ void bch_initial_gc_finish(struct cache_set *c)  static bool btree_insert_key(struct btree *b, struct bkey *k,  			     struct bkey *replace_key)  { -	unsigned status; +	unsigned int status;  	BUG_ON(bkey_cmp(k, &b->key) > 0); @@ -1999,7 +2053,7 @@ static int btree_split(struct btree *b, struct btree_op *op,  			   block_bytes(n1->c)) > (btree_blocks(b) * 4) / 5;  	if (split) { -		unsigned keys = 0; +		unsigned int keys = 0;  		trace_bcache_btree_node_split(b, btree_bset_first(n1)->keys); @@ -2177,10 +2231,10 @@ int bch_btree_insert_check_key(struct btree *b, struct btree_op *op,  		rw_lock(true, b, b->level);  		if (b->key.ptr[0] != btree_ptr || -                   b->seq != seq + 1) { +		    b->seq != seq + 1) {  			op->lock = b->level;  			goto out; -               } +		}  	}  	SET_KEY_PTRS(check_key, 1); @@ -2255,7 +2309,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,  void bch_btree_set_root(struct btree *b)  { -	unsigned i; +	unsigned int i;  	struct closure cl;  	closure_init_stack(&cl); @@ -2367,7 +2421,7 @@ static inline int keybuf_nonoverlapping_cmp(struct keybuf_key *l,  struct refill {  	struct btree_op	op; -	unsigned	nr_found; +	unsigned int	nr_found;  	struct keybuf	*buf;  	struct bkey	*end;  	keybuf_pred_fn	*pred; @@ -2443,6 +2497,7 @@ void bch_refill_keybuf(struct cache_set *c, struct keybuf *buf,  	if (!RB_EMPTY_ROOT(&buf->keys)) {  		struct keybuf_key *w; +  		w = RB_FIRST(&buf->keys, struct keybuf_key, node);  		buf->start	= START_KEY(&w->key); @@ -2474,6 +2529,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,  {  	bool ret = false;  	struct keybuf_key *p, *w, s; +  	s.key = *start;  	if (bkey_cmp(end, &buf->start) <= 0 || @@ -2500,6 +2556,7 @@ bool bch_keybuf_check_overlapping(struct keybuf *buf, struct bkey *start,  struct keybuf_key *bch_keybuf_next(struct keybuf *buf)  {  	struct keybuf_key *w; +  	spin_lock(&buf->lock);  	w = RB_FIRST(&buf->keys, struct keybuf_key, node);  | 
