aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/bcache
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/bcache')
-rw-r--r--drivers/md/bcache/alloc.c18
-rw-r--r--drivers/md/bcache/bcache.h5
-rw-r--r--drivers/md/bcache/bset.c5
-rw-r--r--drivers/md/bcache/bset.h3
-rw-r--r--drivers/md/bcache/btree.c37
-rw-r--r--drivers/md/bcache/btree.h2
-rw-r--r--drivers/md/bcache/journal.c78
-rw-r--r--drivers/md/bcache/request.c17
-rw-r--r--drivers/md/bcache/stats.c10
-rw-r--r--drivers/md/bcache/super.c174
-rw-r--r--drivers/md/bcache/sysfs.c22
11 files changed, 252 insertions, 119 deletions
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index a1df0d95151c..8bc1faf71ff2 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -67,6 +67,7 @@
#include <linux/blkdev.h>
#include <linux/kthread.h>
#include <linux/random.h>
+#include <linux/sched/signal.h>
#include <trace/events/bcache.h>
#define MAX_OPEN_BUCKETS 128
@@ -733,8 +734,21 @@ int bch_open_buckets_alloc(struct cache_set *c)
int bch_cache_allocator_start(struct cache *ca)
{
- struct task_struct *k = kthread_run(bch_allocator_thread,
- ca, "bcache_allocator");
+ struct task_struct *k;
+
+ /*
+ * In case previous btree check operation occupies too many
+ * system memory for bcache btree node cache, and the
+ * registering process is selected by OOM killer. Here just
+ * ignore the SIGKILL sent by OOM killer if there is, to
+ * avoid kthread_run() being failed by pending signals. The
+ * bcache registering process will exit after the registration
+ * done.
+ */
+ if (signal_pending(current))
+ flush_signals(current);
+
+ k = kthread_run(bch_allocator_thread, ca, "bcache_allocator");
if (IS_ERR(k))
return PTR_ERR(k);
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 9198c1b480d9..74a9849ea164 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -301,6 +301,7 @@ struct cached_dev {
struct block_device *bdev;
struct cache_sb sb;
+ struct cache_sb_disk *sb_disk;
struct bio sb_bio;
struct bio_vec sb_bv[1];
struct closure sb_write;
@@ -329,6 +330,9 @@ struct cached_dev {
*/
atomic_t has_dirty;
+#define BCH_CACHE_READA_ALL 0
+#define BCH_CACHE_READA_META_ONLY 1
+ unsigned int cache_readahead_policy;
struct bch_ratelimit writeback_rate;
struct delayed_work writeback_rate_update;
@@ -403,6 +407,7 @@ enum alloc_reserve {
struct cache {
struct cache_set *set;
struct cache_sb sb;
+ struct cache_sb_disk *sb_disk;
struct bio sb_bio;
struct bio_vec sb_bv[1];
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index cffcdc9feefb..4385303836d8 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -1257,6 +1257,11 @@ static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
* Our temporary buffer is the same size as the btree node's
* buffer, we can just swap buffers instead of doing a big
* memcpy()
+ *
+ * Don't worry event 'out' is allocated from mempool, it can
+ * still be swapped here. Because state->pool is a page mempool
+ * creaated by by mempool_init_page_pool(), which allocates
+ * pages by alloc_pages() indeed.
*/
out->magic = b->set->data->magic;
diff --git a/drivers/md/bcache/bset.h b/drivers/md/bcache/bset.h
index c71365e7c1fa..a50dcfda656f 100644
--- a/drivers/md/bcache/bset.h
+++ b/drivers/md/bcache/bset.h
@@ -397,7 +397,8 @@ void bch_btree_keys_stats(struct btree_keys *b, struct bset_stats *state);
/* Bkey utility code */
-#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, (i)->keys)
+#define bset_bkey_last(i) bkey_idx((struct bkey *) (i)->d, \
+ (unsigned int)(i)->keys)
static inline struct bkey *bset_bkey_idx(struct bset *i, unsigned int idx)
{
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 14d6c33b0957..b12186c87f52 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -34,6 +34,7 @@
#include <linux/random.h>
#include <linux/rcupdate.h>
#include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
#include <linux/rculist.h>
#include <linux/delay.h>
#include <trace/events/bcache.h>
@@ -734,34 +735,32 @@ static unsigned long bch_mca_scan(struct shrinker *shrink,
i = 0;
btree_cache_used = c->btree_cache_used;
- list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
+ list_for_each_entry_safe_reverse(b, t, &c->btree_cache_freeable, list) {
if (nr <= 0)
goto out;
- if (++i > 3 &&
- !mca_reap(b, 0, false)) {
+ if (!mca_reap(b, 0, false)) {
mca_data_free(b);
rw_unlock(true, b);
freed++;
}
nr--;
+ i++;
}
- for (; (nr--) && i < btree_cache_used; i++) {
- if (list_empty(&c->btree_cache))
+ list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
+ if (nr <= 0 || i >= btree_cache_used)
goto out;
- b = list_first_entry(&c->btree_cache, struct btree, list);
- list_rotate_left(&c->btree_cache);
-
- if (!b->accessed &&
- !mca_reap(b, 0, false)) {
+ if (!mca_reap(b, 0, false)) {
mca_bucket_free(b);
mca_data_free(b);
rw_unlock(true, b);
freed++;
- } else
- b->accessed = 0;
+ }
+
+ nr--;
+ i++;
}
out:
mutex_unlock(&c->bucket_lock);
@@ -1069,7 +1068,6 @@ retry:
BUG_ON(!b->written);
b->parent = parent;
- b->accessed = 1;
for (; i <= b->keys.nsets && b->keys.set[i].size; i++) {
prefetch(b->keys.set[i].tree);
@@ -1160,7 +1158,6 @@ retry:
goto retry;
}
- b->accessed = 1;
b->parent = parent;
bch_bset_init_next(&b->keys, b->keys.set->data, bset_magic(&b->c->sb));
@@ -1917,6 +1914,18 @@ static int bch_gc_thread(void *arg)
int bch_gc_thread_start(struct cache_set *c)
{
+ /*
+ * In case previous btree check operation occupies too many
+ * system memory for bcache btree node cache, and the
+ * registering process is selected by OOM killer. Here just
+ * ignore the SIGKILL sent by OOM killer if there is, to
+ * avoid kthread_run() being failed by pending signals. The
+ * bcache registering process will exit after the registration
+ * done.
+ */
+ if (signal_pending(current))
+ flush_signals(current);
+
c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc");
return PTR_ERR_OR_ZERO(c->gc_thread);
}
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 76cfd121a486..f4dcca449391 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -121,8 +121,6 @@ struct btree {
/* Key/pointer for this btree node */
BKEY_PADDED(key);
- /* Single bit - set when accessed, cleared by shrinker */
- unsigned long accessed;
unsigned long seq;
struct rw_semaphore lock;
struct cache_set *c;
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index be2a2a201603..0e3ff9745ac7 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -420,7 +420,10 @@ err:
static void btree_flush_write(struct cache_set *c)
{
struct btree *b, *t, *btree_nodes[BTREE_FLUSH_NR];
- unsigned int i, n;
+ unsigned int i, nr;
+ int ref_nr;
+ atomic_t *fifo_front_p, *now_fifo_front_p;
+ size_t mask;
if (c->journal.btree_flushing)
return;
@@ -433,12 +436,50 @@ static void btree_flush_write(struct cache_set *c)
c->journal.btree_flushing = true;
spin_unlock(&c->journal.flush_write_lock);
+ /* get the oldest journal entry and check its refcount */
+ spin_lock(&c->journal.lock);
+ fifo_front_p = &fifo_front(&c->journal.pin);
+ ref_nr = atomic_read(fifo_front_p);
+ if (ref_nr <= 0) {
+ /*
+ * do nothing if no btree node references
+ * the oldest journal entry
+ */
+ spin_unlock(&c->journal.lock);
+ goto out;
+ }
+ spin_unlock(&c->journal.lock);
+
+ mask = c->journal.pin.mask;
+ nr = 0;
atomic_long_inc(&c->flush_write);
memset(btree_nodes, 0, sizeof(btree_nodes));
- n = 0;
mutex_lock(&c->bucket_lock);
list_for_each_entry_safe_reverse(b, t, &c->btree_cache, list) {
+ /*
+ * It is safe to get now_fifo_front_p without holding
+ * c->journal.lock here, because we don't need to know
+ * the exactly accurate value, just check whether the
+ * front pointer of c->journal.pin is changed.
+ */
+ now_fifo_front_p = &fifo_front(&c->journal.pin);
+ /*
+ * If the oldest journal entry is reclaimed and front
+ * pointer of c->journal.pin changes, it is unnecessary
+ * to scan c->btree_cache anymore, just quit the loop and
+ * flush out what we have already.
+ */
+ if (now_fifo_front_p != fifo_front_p)
+ break;
+ /*
+ * quit this loop if all matching btree nodes are
+ * scanned and record in btree_nodes[] already.
+ */
+ ref_nr = atomic_read(fifo_front_p);
+ if (nr >= ref_nr)
+ break;
+
if (btree_node_journal_flush(b))
pr_err("BUG: flush_write bit should not be set here!");
@@ -454,17 +495,43 @@ static void btree_flush_write(struct cache_set *c)
continue;
}
+ /*
+ * Only select the btree node which exactly references
+ * the oldest journal entry.
+ *
+ * If the journal entry pointed by fifo_front_p is
+ * reclaimed in parallel, don't worry:
+ * - the list_for_each_xxx loop will quit when checking
+ * next now_fifo_front_p.
+ * - If there are matched nodes recorded in btree_nodes[],
+ * they are clean now (this is why and how the oldest
+ * journal entry can be reclaimed). These selected nodes
+ * will be ignored and skipped in the folowing for-loop.
+ */
+ if (((btree_current_write(b)->journal - fifo_front_p) &
+ mask) != 0) {
+ mutex_unlock(&b->write_lock);
+ continue;
+ }
+
set_btree_node_journal_flush(b);
mutex_unlock(&b->write_lock);
- btree_nodes[n++] = b;
- if (n == BTREE_FLUSH_NR)
+ btree_nodes[nr++] = b;
+ /*
+ * To avoid holding c->bucket_lock too long time,
+ * only scan for BTREE_FLUSH_NR matched btree nodes
+ * at most. If there are more btree nodes reference
+ * the oldest journal entry, try to flush them next
+ * time when btree_flush_write() is called.
+ */
+ if (nr == BTREE_FLUSH_NR)
break;
}
mutex_unlock(&c->bucket_lock);
- for (i = 0; i < n; i++) {
+ for (i = 0; i < nr; i++) {
b = btree_nodes[i];
if (!b) {
pr_err("BUG: btree_nodes[%d] is NULL", i);
@@ -497,6 +564,7 @@ static void btree_flush_write(struct cache_set *c)
mutex_unlock(&b->write_lock);
}
+out:
spin_lock(&c->journal.flush_write_lock);
c->journal.btree_flushing = false;
spin_unlock(&c->journal.flush_write_lock);
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 73478a91a342..820d8402a1dc 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -379,13 +379,20 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
goto skip;
/*
- * Flag for bypass if the IO is for read-ahead or background,
- * unless the read-ahead request is for metadata
+ * If the bio is for read-ahead or background IO, bypass it or
+ * not depends on the following situations,
+ * - If the IO is for meta data, always cache it and no bypass
+ * - If the IO is not meta data, check dc->cache_reada_policy,
+ * BCH_CACHE_READA_ALL: cache it and not bypass
+ * BCH_CACHE_READA_META_ONLY: not cache it and bypass
+ * That is, read-ahead request for metadata always get cached
* (eg, for gfs2 or xfs).
*/
- if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) &&
- !(bio->bi_opf & (REQ_META|REQ_PRIO)))
- goto skip;
+ if ((bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND))) {
+ if (!(bio->bi_opf & (REQ_META|REQ_PRIO)) &&
+ (dc->cache_readahead_policy != BCH_CACHE_READA_ALL))
+ goto skip;
+ }
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index ba1c93791d8d..503aafe188dc 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -109,9 +109,13 @@ int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
void bch_cache_accounting_clear(struct cache_accounting *acc)
{
- memset(&acc->total.cache_hits,
- 0,
- sizeof(struct cache_stats));
+ acc->total.cache_hits = 0;
+ acc->total.cache_misses = 0;
+ acc->total.cache_bypass_hits = 0;
+ acc->total.cache_bypass_misses = 0;
+ acc->total.cache_readaheads = 0;
+ acc->total.cache_miss_collisions = 0;
+ acc->total.sectors_bypassed = 0;
}
void bch_cache_accounting_destroy(struct cache_accounting *acc)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 77e9869345e7..0c3c5419c52b 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -15,7 +15,6 @@
#include "writeback.h"
#include <linux/blkdev.h>
-#include <linux/buffer_head.h>
#include <linux/debugfs.h>
#include <linux/genhd.h>
#include <linux/idr.h>
@@ -60,17 +59,18 @@ struct workqueue_struct *bch_journal_wq;
/* Superblock */
static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
- struct page **res)
+ struct cache_sb_disk **res)
{
const char *err;
- struct cache_sb *s;
- struct buffer_head *bh = __bread(bdev, 1, SB_SIZE);
+ struct cache_sb_disk *s;
+ struct page *page;
unsigned int i;
- if (!bh)
+ page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
+ SB_OFFSET >> PAGE_SHIFT, GFP_KERNEL);
+ if (IS_ERR(page))
return "IO error";
-
- s = (struct cache_sb *) bh->b_data;
+ s = page_address(page) + offset_in_page(SB_OFFSET);
sb->offset = le64_to_cpu(s->offset);
sb->version = le64_to_cpu(s->version);
@@ -188,12 +188,10 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
}
sb->last_mount = (u32)ktime_get_real_seconds();
- err = NULL;
-
- get_page(bh->b_page);
- *res = bh->b_page;
+ *res = s;
+ return NULL;
err:
- put_bh(bh);
+ put_page(page);
return err;
}
@@ -207,15 +205,15 @@ static void write_bdev_super_endio(struct bio *bio)
closure_put(&dc->sb_write);
}
-static void __write_super(struct cache_sb *sb, struct bio *bio)
+static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
+ struct bio *bio)
{
- struct cache_sb *out = page_address(bio_first_page_all(bio));
unsigned int i;
+ bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_META;
bio->bi_iter.bi_sector = SB_SECTOR;
- bio->bi_iter.bi_size = SB_SIZE;
- bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_SYNC|REQ_META);
- bch_bio_map(bio, NULL);
+ __bio_add_page(bio, virt_to_page(out), SB_SIZE,
+ offset_in_page(out));
out->offset = cpu_to_le64(sb->offset);
out->version = cpu_to_le64(sb->version);
@@ -257,14 +255,14 @@ void bch_write_bdev_super(struct cached_dev *dc, struct closure *parent)
down(&dc->sb_write_mutex);
closure_init(cl, parent);
- bio_reset(bio);
+ bio_init(bio, dc->sb_bv, 1);
bio_set_dev(bio, dc->bdev);
bio->bi_end_io = write_bdev_super_endio;
bio->bi_private = dc;
closure_get(cl);
/* I/O request sent to backing device */
- __write_super(&dc->sb, bio);
+ __write_super(&dc->sb, dc->sb_disk, bio);
closure_return_with_destructor(cl, bch_write_bdev_super_unlock);
}
@@ -306,13 +304,13 @@ void bcache_write_super(struct cache_set *c)
SET_CACHE_SYNC(&ca->sb, CACHE_SYNC(&c->sb));
- bio_reset(bio);
+ bio_init(bio, ca->sb_bv, 1);
bio_set_dev(bio, ca->bdev);
bio->bi_end_io = write_super_endio;
bio->bi_private = ca;
closure_get(cl);
- __write_super(&ca->sb, bio);
+ __write_super(&ca->sb, ca->sb_disk, bio);
}
closure_return_with_destructor(cl, bcache_write_super_unlock);
@@ -611,12 +609,13 @@ int bch_prio_write(struct cache *ca, bool wait)
return 0;
}
-static void prio_read(struct cache *ca, uint64_t bucket)
+static int prio_read(struct cache *ca, uint64_t bucket)
{
struct prio_set *p = ca->disk_buckets;
struct bucket_disk *d = p->data + prios_per_bucket(ca), *end = d;
struct bucket *b;
unsigned int bucket_nr = 0;
+ int ret = -EIO;
for (b = ca->buckets;
b < ca->buckets + ca->sb.nbuckets;
@@ -629,11 +628,15 @@ static void prio_read(struct cache *ca, uint64_t bucket)
prio_io(ca, bucket, REQ_OP_READ, 0);
if (p->csum !=
- bch_crc64(&p->magic, bucket_bytes(ca) - 8))
+ bch_crc64(&p->magic, bucket_bytes(ca) - 8)) {
pr_warn("bad csum reading priorities");
+ goto out;
+ }
- if (p->magic != pset_magic(&ca->sb))
+ if (p->magic != pset_magic(&ca->sb)) {
pr_warn("bad magic reading priorities");
+ goto out;
+ }
bucket = p->next_bucket;
d = p->data;
@@ -642,6 +645,10 @@ static void prio_read(struct cache *ca, uint64_t bucket)
b->prio = le16_to_cpu(d->prio);
b->gen = b->last_gc = d->gen;
}
+
+ ret = 0;
+out:
+ return ret;
}
/* Bcache device */
@@ -1275,6 +1282,9 @@ static void cached_dev_free(struct closure *cl)
mutex_unlock(&bch_register_lock);
+ if (dc->sb_disk)
+ put_page(virt_to_page(dc->sb_disk));
+
if (!IS_ERR_OR_NULL(dc->bdev))
blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
@@ -1350,7 +1360,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
/* Cached device - bcache superblock */
-static int register_bdev(struct cache_sb *sb, struct page *sb_page,
+static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
struct block_device *bdev,
struct cached_dev *dc)
{
@@ -1362,11 +1372,7 @@ static int register_bdev(struct cache_sb *sb, struct page *sb_page,
memcpy(&dc->sb, sb, sizeof(struct cache_sb));
dc->bdev = bdev;
dc->bdev->bd_holder = dc;
-
- bio_init(&dc->sb_bio, dc->sb_bio.bi_inline_vecs, 1);
- bio_first_bvec_all(&dc->sb_bio)->bv_page = sb_page;
- get_page(sb_page);
-
+ dc->sb_disk = sb_disk;
if (cached_dev_init(dc, sb->block_size << 9))
goto err;
@@ -1876,8 +1882,10 @@ static int run_cache_set(struct cache_set *c)
j = &list_entry(journal.prev, struct journal_replay, list)->j;
err = "IO error reading priorities";
- for_each_cache(ca, c, i)
- prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]);
+ for_each_cache(ca, c, i) {
+ if (prio_read(ca, j->prio_bucket[ca->sb.nr_this_dev]))
+ goto err;
+ }
/*
* If prio_read() fails it'll call cache_set_error and we'll
@@ -1909,23 +1917,6 @@ static int run_cache_set(struct cache_set *c)
if (bch_btree_check(c))
goto err;
- /*
- * bch_btree_check() may occupy too much system memory which
- * has negative effects to user space application (e.g. data
- * base) performance. Shrink the mca cache memory proactively
- * here to avoid competing memory with user space workloads..
- */
- if (!c->shrinker_disabled) {
- struct shrink_control sc;
-
- sc.gfp_mask = GFP_KERNEL;
- sc.nr_to_scan = c->btree_cache_used * c->btree_pages;
- /* first run to clear b->accessed tag */
- c->shrink.scan_objects(&c->shrink, &sc);
- /* second run to reap non-accessed nodes */
- c->shrink.scan_objects(&c->shrink, &sc);
- }
-
bch_journal_mark(c, &journal);
bch_initial_gc_finish(c);
pr_debug("btree_check() done");
@@ -2136,8 +2127,8 @@ void bch_cache_release(struct kobject *kobj)
for (i = 0; i < RESERVE_NR; i++)
free_fifo(&ca->free[i]);
- if (ca->sb_bio.bi_inline_vecs[0].bv_page)
- put_page(bio_first_page_all(&ca->sb_bio));
+ if (ca->sb_disk)
+ put_page(virt_to_page(ca->sb_disk));
if (!IS_ERR_OR_NULL(ca->bdev))
blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
@@ -2259,7 +2250,7 @@ err_free:
return ret;
}
-static int register_cache(struct cache_sb *sb, struct page *sb_page,
+static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
struct block_device *bdev, struct cache *ca)
{
const char *err = NULL; /* must be set for any error case */
@@ -2269,10 +2260,7 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page,
memcpy(&ca->sb, sb, sizeof(struct cache_sb));
ca->bdev = bdev;
ca->bdev->bd_holder = ca;
-
- bio_init(&ca->sb_bio, ca->sb_bio.bi_inline_vecs, 1);
- bio_first_bvec_all(&ca->sb_bio)->bv_page = sb_page;
- get_page(sb_page);
+ ca->sb_disk = sb_disk;
if (blk_queue_discard(bdev_get_queue(bdev)))
ca->discard = CACHE_DISCARD(&ca->sb);
@@ -2372,29 +2360,35 @@ static bool bch_is_open(struct block_device *bdev)
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
const char *buffer, size_t size)
{
- ssize_t ret = -EINVAL;
- const char *err = "cannot allocate memory";
+ const char *err;
char *path = NULL;
- struct cache_sb *sb = NULL;
- struct block_device *bdev = NULL;
- struct page *sb_page = NULL;
+ struct cache_sb *sb;
+ struct cache_sb_disk *sb_disk;
+ struct block_device *bdev;
+ ssize_t ret;
+ ret = -EBUSY;
+ err = "failed to reference bcache module";
if (!try_module_get(THIS_MODULE))
- return -EBUSY;
+ goto out;
/* For latest state of bcache_is_reboot */
smp_mb();
+ err = "bcache is in reboot";
if (bcache_is_reboot)
- return -EBUSY;
+ goto out_module_put;
+ ret = -ENOMEM;
+ err = "cannot allocate memory";
path = kstrndup(buffer, size, GFP_KERNEL);
if (!path)
- goto err;
+ goto out_module_put;
sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL);
if (!sb)
- goto err;
+ goto out_free_path;
+ ret = -EINVAL;
err = "failed to open device";
bdev = blkdev_get_by_path(strim(path),
FMODE_READ|FMODE_WRITE|FMODE_EXCL,
@@ -2411,57 +2405,63 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (!IS_ERR(bdev))
bdput(bdev);
if (attr == &ksysfs_register_quiet)
- goto quiet_out;
+ goto done;
}
- goto err;
+ goto out_free_sb;
}
err = "failed to set blocksize";
if (set_blocksize(bdev, 4096))
- goto err_close;
+ goto out_blkdev_put;
- err = read_super(sb, bdev, &sb_page);
+ err = read_super(sb, bdev, &sb_disk);
if (err)
- goto err_close;
+ goto out_blkdev_put;
err = "failed to register device";
if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
if (!dc)
- goto err_close;
+ goto out_put_sb_page;
mutex_lock(&bch_register_lock);
- ret = register_bdev(sb, sb_page, bdev, dc);
+ ret = register_bdev(sb, sb_disk, bdev, dc);
mutex_unlock(&bch_register_lock);
/* blkdev_put() will be called in cached_dev_free() */
if (ret < 0)
- goto err;
+ goto out_free_sb;
} else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
if (!ca)
- goto err_close;
+ goto out_put_sb_page;
/* blkdev_put() will be called in bch_cache_release() */
- if (register_cache(sb, sb_page, bdev, ca) != 0)
- goto err;
+ if (register_cache(sb, sb_disk, bdev, ca) != 0)
+ goto out_free_sb;
}
-quiet_out:
- ret = size;
-out:
- if (sb_page)
- put_page(sb_page);
+
+done:
kfree(sb);
kfree(path);
module_put(THIS_MODULE);
- return ret;
+ return size;
-err_close:
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-err:
- pr_info("error %s: %s", path, err);
- goto out;
+out_put_sb_page:
+ put_page(virt_to_page(sb_disk));
+out_blkdev_put:
+ blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+out_free_sb:
+ kfree(sb);
+out_free_path:
+ kfree(path);
+ path = NULL;
+out_module_put:
+ module_put(THIS_MODULE);
+out:
+ pr_info("error %s: %s", path?path:"", err);
+ return ret;
}
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 733e2ddf3c78..3470fae4eabc 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -27,6 +27,12 @@ static const char * const bch_cache_modes[] = {
NULL
};
+static const char * const bch_reada_cache_policies[] = {
+ "all",
+ "meta-only",
+ NULL
+};
+
/* Default is 0 ("auto") */
static const char * const bch_stop_on_failure_modes[] = {
"auto",
@@ -100,6 +106,7 @@ rw_attribute(congested_write_threshold_us);
rw_attribute(sequential_cutoff);
rw_attribute(data_csum);
rw_attribute(cache_mode);
+rw_attribute(readahead_cache_policy);
rw_attribute(stop_when_cache_set_failed);
rw_attribute(writeback_metadata);
rw_attribute(writeback_running);
@@ -168,6 +175,11 @@ SHOW(__bch_cached_dev)
bch_cache_modes,
BDEV_CACHE_MODE(&dc->sb));
+ if (attr == &sysfs_readahead_cache_policy)
+ return bch_snprint_string_list(buf, PAGE_SIZE,
+ bch_reada_cache_policies,
+ dc->cache_readahead_policy);
+
if (attr == &sysfs_stop_when_cache_set_failed)
return bch_snprint_string_list(buf, PAGE_SIZE,
bch_stop_on_failure_modes,
@@ -353,6 +365,15 @@ STORE(__cached_dev)
}
}
+ if (attr == &sysfs_readahead_cache_policy) {
+ v = __sysfs_match_string(bch_reada_cache_policies, -1, buf);
+ if (v < 0)
+ return v;
+
+ if ((unsigned int) v != dc->cache_readahead_policy)
+ dc->cache_readahead_policy = v;
+ }
+
if (attr == &sysfs_stop_when_cache_set_failed) {
v = __sysfs_match_string(bch_stop_on_failure_modes, -1, buf);
if (v < 0)
@@ -467,6 +488,7 @@ static struct attribute *bch_cached_dev_files[] = {
&sysfs_data_csum,
#endif
&sysfs_cache_mode,
+ &sysfs_readahead_cache_policy,
&sysfs_stop_when_cache_set_failed,
&sysfs_writeback_metadata,
&sysfs_writeback_running,