diff options
Diffstat (limited to 'drivers/md/bcache/journal.c')
-rw-r--r-- | drivers/md/bcache/journal.c | 326 |
1 files changed, 159 insertions, 167 deletions
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 0e3ff9745ac7..e5da469a4235 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -47,20 +47,18 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list, closure_init_stack(&cl); - pr_debug("reading %u", bucket_index); + pr_debug("reading %u\n", bucket_index); while (offset < ca->sb.bucket_size) { reread: left = ca->sb.bucket_size - offset; len = min_t(unsigned int, left, PAGE_SECTORS << JSET_BITS); - bio_reset(bio); + bio_reset(bio, ca->bdev, REQ_OP_READ); bio->bi_iter.bi_sector = bucket + offset; - bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = len << 9; bio->bi_end_io = journal_read_endio; bio->bi_private = &cl; - bio_set_op_attrs(bio, REQ_OP_READ, 0); bch_bio_map(bio, data); closure_bio_submit(ca->set, bio, &cl); @@ -78,13 +76,13 @@ reread: left = ca->sb.bucket_size - offset; size_t blocks, bytes = set_bytes(j); if (j->magic != jset_magic(&ca->sb)) { - pr_debug("%u: bad magic", bucket_index); + pr_debug("%u: bad magic\n", bucket_index); return ret; } if (bytes > left << 9 || bytes > PAGE_SIZE << JSET_BITS) { - pr_info("%u: too big, %zu bytes, offset %u", + pr_info("%u: too big, %zu bytes, offset %u\n", bucket_index, bytes, offset); return ret; } @@ -93,12 +91,12 @@ reread: left = ca->sb.bucket_size - offset; goto reread; if (j->csum != csum_set(j)) { - pr_info("%u: bad csum, %zu bytes, offset %u", + pr_info("%u: bad csum, %zu bytes, offset %u\n", bucket_index, bytes, offset); return ret; } - blocks = set_blocks(j, block_bytes(ca->set)); + blocks = set_blocks(j, block_bytes(ca)); /* * Nodes in 'list' are in linear increasing order of @@ -111,7 +109,7 @@ reread: left = ca->sb.bucket_size - offset; * Check from the oldest jset for last_seq. If * i->j.seq < j->last_seq, it means the oldest jset * in list is expired and useless, remove it from - * this list. Otherwise, j is a condidate jset for + * this list. Otherwise, j is a candidate jset for * further following checks. */ while (!list_empty(list)) { @@ -179,115 +177,109 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) ret; \ }) - struct cache *ca; - unsigned int iter; + struct cache *ca = c->cache; int ret = 0; + struct journal_device *ja = &ca->journal; + DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS); + unsigned int i, l, r, m; + uint64_t seq; - for_each_cache(ca, c, iter) { - struct journal_device *ja = &ca->journal; - DECLARE_BITMAP(bitmap, SB_JOURNAL_BUCKETS); - unsigned int i, l, r, m; - uint64_t seq; - - bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); - pr_debug("%u journal buckets", ca->sb.njournal_buckets); + bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); + pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); + /* + * Read journal buckets ordered by golden ratio hash to quickly + * find a sequence of buckets with valid journal entries + */ + for (i = 0; i < ca->sb.njournal_buckets; i++) { /* - * Read journal buckets ordered by golden ratio hash to quickly - * find a sequence of buckets with valid journal entries + * We must try the index l with ZERO first for + * correctness due to the scenario that the journal + * bucket is circular buffer which might have wrapped */ - for (i = 0; i < ca->sb.njournal_buckets; i++) { - /* - * We must try the index l with ZERO first for - * correctness due to the scenario that the journal - * bucket is circular buffer which might have wrapped - */ - l = (i * 2654435769U) % ca->sb.njournal_buckets; + l = (i * 2654435769U) % ca->sb.njournal_buckets; - if (test_bit(l, bitmap)) - break; + if (test_bit(l, bitmap)) + break; - if (read_bucket(l)) - goto bsearch; - } + if (read_bucket(l)) + goto bsearch; + } - /* - * If that fails, check all the buckets we haven't checked - * already - */ - pr_debug("falling back to linear search"); + /* + * If that fails, check all the buckets we haven't checked + * already + */ + pr_debug("falling back to linear search\n"); - for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets); - l < ca->sb.njournal_buckets; - l = find_next_zero_bit(bitmap, ca->sb.njournal_buckets, - l + 1)) - if (read_bucket(l)) - goto bsearch; + for_each_clear_bit(l, bitmap, ca->sb.njournal_buckets) + if (read_bucket(l)) + goto bsearch; - /* no journal entries on this device? */ - if (l == ca->sb.njournal_buckets) - continue; + /* no journal entries on this device? */ + if (l == ca->sb.njournal_buckets) + goto out; bsearch: - BUG_ON(list_empty(list)); + BUG_ON(list_empty(list)); - /* Binary search */ - m = l; - r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); - pr_debug("starting binary search, l %u r %u", l, r); + /* Binary search */ + m = l; + r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); + pr_debug("starting binary search, l %u r %u\n", l, r); - while (l + 1 < r) { - seq = list_entry(list->prev, struct journal_replay, - list)->j.seq; + while (l + 1 < r) { + seq = list_entry(list->prev, struct journal_replay, + list)->j.seq; - m = (l + r) >> 1; - read_bucket(m); + m = (l + r) >> 1; + read_bucket(m); - if (seq != list_entry(list->prev, struct journal_replay, - list)->j.seq) - l = m; - else - r = m; - } + if (seq != list_entry(list->prev, struct journal_replay, + list)->j.seq) + l = m; + else + r = m; + } - /* - * Read buckets in reverse order until we stop finding more - * journal entries - */ - pr_debug("finishing up: m %u njournal_buckets %u", - m, ca->sb.njournal_buckets); - l = m; + /* + * Read buckets in reverse order until we stop finding more + * journal entries + */ + pr_debug("finishing up: m %u njournal_buckets %u\n", + m, ca->sb.njournal_buckets); + l = m; - while (1) { - if (!l--) - l = ca->sb.njournal_buckets - 1; + while (1) { + if (!l--) + l = ca->sb.njournal_buckets - 1; - if (l == m) - break; + if (l == m) + break; - if (test_bit(l, bitmap)) - continue; + if (test_bit(l, bitmap)) + continue; - if (!read_bucket(l)) - break; - } + if (!read_bucket(l)) + break; + } - seq = 0; + seq = 0; - for (i = 0; i < ca->sb.njournal_buckets; i++) - if (ja->seq[i] > seq) { - seq = ja->seq[i]; - /* - * When journal_reclaim() goes to allocate for - * the first time, it'll use the bucket after - * ja->cur_idx - */ - ja->cur_idx = i; - ja->last_idx = ja->discard_idx = (i + 1) % - ca->sb.njournal_buckets; + for (i = 0; i < ca->sb.njournal_buckets; i++) + if (ja->seq[i] > seq) { + seq = ja->seq[i]; + /* + * When journal_reclaim() goes to allocate for + * the first time, it'll use the bucket after + * ja->cur_idx + */ + ja->cur_idx = i; + ja->last_idx = ja->discard_idx = (i + 1) % + ca->sb.njournal_buckets; - } - } + } +out: if (!list_empty(list)) c->journal.seq = list_entry(list->prev, struct journal_replay, @@ -345,12 +337,10 @@ void bch_journal_mark(struct cache_set *c, struct list_head *list) static bool is_discard_enabled(struct cache_set *s) { - struct cache *ca; - unsigned int i; + struct cache *ca = s->cache; - for_each_cache(ca, s, i) - if (ca->discard) - return true; + if (ca->discard) + return true; return false; } @@ -370,10 +360,10 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) if (n != i->j.seq) { if (n == start && is_discard_enabled(s)) - pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)", + pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n", n, i->j.seq - 1, start, end); else { - pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)", + pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n", n, i->j.seq - 1, start, end); ret = -EIO; goto err; @@ -403,7 +393,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) entries++; } - pr_info("journal replay done, %i keys in %i entries, seq %llu", + pr_info("journal replay done, %i keys in %i entries, seq %llu\n", keys, entries, end); err: while (!list_empty(list)) { @@ -415,6 +405,11 @@ err: return ret; } +void bch_journal_space_reserve(struct journal *j) +{ + j->do_reserve = true; +} + /* Journalling */ static void btree_flush_write(struct cache_set *c) @@ -481,7 +476,7 @@ static void btree_flush_write(struct cache_set *c) break; if (btree_node_journal_flush(b)) - pr_err("BUG: flush_write bit should not be set here!"); + pr_err("BUG: flush_write bit should not be set here!\n"); mutex_lock(&b->write_lock); @@ -506,7 +501,7 @@ static void btree_flush_write(struct cache_set *c) * - If there are matched nodes recorded in btree_nodes[], * they are clean now (this is why and how the oldest * journal entry can be reclaimed). These selected nodes - * will be ignored and skipped in the folowing for-loop. + * will be ignored and skipped in the following for-loop. */ if (((btree_current_write(b)->journal - fifo_front_p) & mask) != 0) { @@ -534,13 +529,13 @@ static void btree_flush_write(struct cache_set *c) for (i = 0; i < nr; i++) { b = btree_nodes[i]; if (!b) { - pr_err("BUG: btree_nodes[%d] is NULL", i); + pr_err("BUG: btree_nodes[%d] is NULL\n", i); continue; } /* safe to check without holding b->write_lock */ if (!btree_node_journal_flush(b)) { - pr_err("BUG: bnode %p: journal_flush bit cleaned", b); + pr_err("BUG: bnode %p: journal_flush bit cleaned\n", b); continue; } @@ -548,14 +543,14 @@ static void btree_flush_write(struct cache_set *c) if (!btree_current_write(b)->journal) { clear_bit(BTREE_NODE_journal_flush, &b->flags); mutex_unlock(&b->write_lock); - pr_debug("bnode %p: written by others", b); + pr_debug("bnode %p: written by others\n", b); continue; } if (!btree_node_dirty(b)) { clear_bit(BTREE_NODE_journal_flush, &b->flags); mutex_unlock(&b->write_lock); - pr_debug("bnode %p: dirty bit cleaned by others", b); + pr_debug("bnode %p: dirty bit cleaned by others\n", b); continue; } @@ -611,7 +606,7 @@ static void do_journal_discard(struct cache *ca) ca->sb.njournal_buckets; atomic_set(&ja->discard_in_flight, DISCARD_READY); - /* fallthrough */ + fallthrough; case DISCARD_READY: if (ja->discard_idx == ja->last_idx) @@ -619,11 +614,9 @@ static void do_journal_discard(struct cache *ca) atomic_set(&ja->discard_in_flight, DISCARD_IN_FLIGHT); - bio_init(bio, bio->bi_inline_vecs, 1); - bio_set_op_attrs(bio, REQ_OP_DISCARD, 0); + bio_init(bio, ca->bdev, bio->bi_inline_vecs, 1, REQ_OP_DISCARD); bio->bi_iter.bi_sector = bucket_to_sector(ca->set, ca->sb.d[ja->discard_idx]); - bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = bucket_bytes(ca); bio->bi_end_io = journal_discard_endio; @@ -633,12 +626,31 @@ static void do_journal_discard(struct cache *ca) } } +static unsigned int free_journal_buckets(struct cache_set *c) +{ + struct journal *j = &c->journal; + struct cache *ca = c->cache; + struct journal_device *ja = &c->cache->journal; + unsigned int n; + + /* In case njournal_buckets is not power of 2 */ + if (ja->cur_idx >= ja->discard_idx) + n = ca->sb.njournal_buckets + ja->discard_idx - ja->cur_idx; + else + n = ja->discard_idx - ja->cur_idx; + + if (n > (1 + j->do_reserve)) + return n - (1 + j->do_reserve); + + return 0; +} + static void journal_reclaim(struct cache_set *c) { struct bkey *k = &c->journal.key; - struct cache *ca; + struct cache *ca = c->cache; uint64_t last_seq; - unsigned int iter, n = 0; + struct journal_device *ja = &ca->journal; atomic_t p __maybe_unused; atomic_long_inc(&c->reclaim); @@ -650,46 +662,29 @@ static void journal_reclaim(struct cache_set *c) /* Update last_idx */ - for_each_cache(ca, c, iter) { - struct journal_device *ja = &ca->journal; - - while (ja->last_idx != ja->cur_idx && - ja->seq[ja->last_idx] < last_seq) - ja->last_idx = (ja->last_idx + 1) % - ca->sb.njournal_buckets; - } + while (ja->last_idx != ja->cur_idx && + ja->seq[ja->last_idx] < last_seq) + ja->last_idx = (ja->last_idx + 1) % + ca->sb.njournal_buckets; - for_each_cache(ca, c, iter) - do_journal_discard(ca); + do_journal_discard(ca); if (c->journal.blocks_free) goto out; - /* - * Allocate: - * XXX: Sort by free journal space - */ + if (!free_journal_buckets(c)) + goto out; - for_each_cache(ca, c, iter) { - struct journal_device *ja = &ca->journal; - unsigned int next = (ja->cur_idx + 1) % ca->sb.njournal_buckets; + ja->cur_idx = (ja->cur_idx + 1) % ca->sb.njournal_buckets; + k->ptr[0] = MAKE_PTR(0, + bucket_to_sector(c, ca->sb.d[ja->cur_idx]), + ca->sb.nr_this_dev); + atomic_long_inc(&c->reclaimed_journal_buckets); - /* No space available on this device */ - if (next == ja->discard_idx) - continue; + bkey_init(k); + SET_KEY_PTRS(k, 1); + c->journal.blocks_free = ca->sb.bucket_size >> c->block_bits; - ja->cur_idx = next; - k->ptr[n++] = MAKE_PTR(0, - bucket_to_sector(c, ca->sb.d[ja->cur_idx]), - ca->sb.nr_this_dev); - atomic_long_inc(&c->reclaimed_journal_buckets); - } - - if (n) { - bkey_init(k); - SET_KEY_PTRS(k, n); - c->journal.blocks_free = c->sb.bucket_size >> c->block_bits; - } out: if (!journal_full(&c->journal)) __closure_wake_up(&c->journal.wait); @@ -716,7 +711,7 @@ void bch_journal_next(struct journal *j) j->cur->data->keys = 0; if (fifo_full(&j->pin)) - pr_debug("journal_pin full (%zu)", fifo_used(&j->pin)); + pr_debug("journal_pin full (%zu)\n", fifo_used(&j->pin)); } static void journal_write_endio(struct bio *bio) @@ -753,11 +748,11 @@ static void journal_write_unlocked(struct closure *cl) __releases(c->journal.lock) { struct cache_set *c = container_of(cl, struct cache_set, journal.io); - struct cache *ca; + struct cache *ca = c->cache; struct journal_write *w = c->journal.cur; struct bkey *k = &c->journal.key; - unsigned int i, sectors = set_blocks(w->data, block_bytes(c)) * - c->sb.block_size; + unsigned int i, sectors = set_blocks(w->data, block_bytes(ca)) * + ca->sb.block_size; struct bio *bio; struct bio_list list; @@ -776,36 +771,32 @@ static void journal_write_unlocked(struct closure *cl) return; } - c->journal.blocks_free -= set_blocks(w->data, block_bytes(c)); + c->journal.blocks_free -= set_blocks(w->data, block_bytes(ca)); w->data->btree_level = c->root->level; bkey_copy(&w->data->btree_root, &c->root->key); bkey_copy(&w->data->uuid_bucket, &c->uuid_bucket); - for_each_cache(ca, c, i) - w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; - - w->data->magic = jset_magic(&c->sb); + w->data->prio_bucket[ca->sb.nr_this_dev] = ca->prio_buckets[0]; + w->data->magic = jset_magic(&ca->sb); w->data->version = BCACHE_JSET_VERSION; w->data->last_seq = last_seq(&c->journal); w->data->csum = csum_set(w->data); for (i = 0; i < KEY_PTRS(k); i++) { - ca = PTR_CACHE(c, k, i); + ca = c->cache; bio = &ca->journal.bio; atomic_long_add(sectors, &ca->meta_sectors_written); - bio_reset(bio); + bio_reset(bio, ca->bdev, REQ_OP_WRITE | + REQ_SYNC | REQ_META | REQ_PREFLUSH | REQ_FUA); bio->bi_iter.bi_sector = PTR_OFFSET(k, i); - bio_set_dev(bio, ca->bdev); bio->bi_iter.bi_size = sectors << 9; bio->bi_end_io = journal_write_endio; bio->bi_private = w; - bio_set_op_attrs(bio, REQ_OP_WRITE, - REQ_SYNC|REQ_META|REQ_PREFLUSH|REQ_FUA); bch_bio_map(bio, w->data); trace_bcache_journal_write(bio, w->data->keys); @@ -862,6 +853,7 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, size_t sectors; struct closure cl; bool wait = false; + struct cache *ca = c->cache; closure_init_stack(&cl); @@ -871,10 +863,10 @@ static struct journal_write *journal_wait_for_write(struct cache_set *c, struct journal_write *w = c->journal.cur; sectors = __set_blocks(w->data, w->data->keys + nkeys, - block_bytes(c)) * c->sb.block_size; + block_bytes(ca)) * ca->sb.block_size; if (sectors <= min_t(size_t, - c->journal.blocks_free * c->sb.block_size, + c->journal.blocks_free * ca->sb.block_size, PAGE_SECTORS << JSET_BITS)) return w; @@ -939,7 +931,7 @@ atomic_t *bch_journal(struct cache_set *c, if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) return NULL; - if (!CACHE_SYNC(&c->sb)) + if (!CACHE_SYNC(&c->cache->sb)) return NULL; w = journal_wait_for_write(c, bch_keylist_nkeys(keys)); @@ -955,8 +947,8 @@ atomic_t *bch_journal(struct cache_set *c, journal_try_write(c); } else if (!w->dirty) { w->dirty = true; - schedule_delayed_work(&c->journal.work, - msecs_to_jiffies(c->journal_delay_ms)); + queue_delayed_work(bch_flush_wq, &c->journal.work, + msecs_to_jiffies(c->journal_delay_ms)); spin_unlock(&c->journal.lock); } else { spin_unlock(&c->journal.lock); @@ -999,8 +991,8 @@ int bch_journal_alloc(struct cache_set *c) j->w[1].c = c; if (!(init_fifo(&j->pin, JOURNAL_PIN, GFP_KERNEL)) || - !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS)) || - !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL, JSET_BITS))) + !(j->w[0].data = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP, JSET_BITS)) || + !(j->w[1].data = (void *) __get_free_pages(GFP_KERNEL|__GFP_COMP, JSET_BITS))) return -ENOMEM; return 0; |