diff options
Diffstat (limited to 'drivers/md/bcache')
-rw-r--r-- | drivers/md/bcache/bcache.h | 12 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 48 | ||||
-rw-r--r-- | drivers/md/bcache/btree.h | 5 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 4 | ||||
-rw-r--r-- | drivers/md/bcache/stats.h | 1 | ||||
-rw-r--r-- | drivers/md/bcache/super.c | 136 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.c | 31 | ||||
-rw-r--r-- | drivers/md/bcache/sysfs.h | 2 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 10 |
9 files changed, 142 insertions, 107 deletions
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index aebb7ef10e63..5a79bb3c272f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -275,7 +275,7 @@ struct bcache_device { int (*cache_miss)(struct btree *b, struct search *s, struct bio *bio, unsigned int sectors); - int (*ioctl)(struct bcache_device *d, fmode_t mode, + int (*ioctl)(struct bcache_device *d, blk_mode_t mode, unsigned int cmd, unsigned long arg); }; @@ -1004,11 +1004,11 @@ extern struct workqueue_struct *bch_flush_wq; extern struct mutex bch_register_lock; extern struct list_head bch_cache_sets; -extern struct kobj_type bch_cached_dev_ktype; -extern struct kobj_type bch_flash_dev_ktype; -extern struct kobj_type bch_cache_set_ktype; -extern struct kobj_type bch_cache_set_internal_ktype; -extern struct kobj_type bch_cache_ktype; +extern const struct kobj_type bch_cached_dev_ktype; +extern const struct kobj_type bch_flash_dev_ktype; +extern const struct kobj_type bch_cache_set_ktype; +extern const struct kobj_type bch_cache_set_internal_ktype; +extern const struct kobj_type bch_cache_ktype; void bch_cached_dev_release(struct kobject *kobj); void bch_flash_dev_release(struct kobject *kobj); diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 147c493a989a..fd121a61f17c 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -559,6 +559,27 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp) } } +#define cmp_int(l, r) ((l > r) - (l < r)) + +#ifdef CONFIG_PROVE_LOCKING +static int btree_lock_cmp_fn(const struct lockdep_map *_a, + const struct lockdep_map *_b) +{ + const struct btree *a = container_of(_a, struct btree, lock.dep_map); + const struct btree *b = container_of(_b, struct btree, lock.dep_map); + + return -cmp_int(a->level, b->level) ?: bkey_cmp(&a->key, &b->key); +} + +static void btree_lock_print_fn(const struct lockdep_map *map) +{ + const struct btree *b = container_of(map, struct btree, lock.dep_map); + + printk(KERN_CONT " l=%u %llu:%llu", b->level, + KEY_INODE(&b->key), KEY_OFFSET(&b->key)); +} +#endif + static struct btree *mca_bucket_alloc(struct cache_set *c, struct bkey *k, gfp_t gfp) { @@ -572,7 +593,7 @@ static struct btree *mca_bucket_alloc(struct cache_set *c, return NULL; init_rwsem(&b->lock); - lockdep_set_novalidate_class(&b->lock); + lock_set_cmp_fn(&b->lock, btree_lock_cmp_fn, btree_lock_print_fn); mutex_init(&b->write_lock); lockdep_set_novalidate_class(&b->write_lock); INIT_LIST_HEAD(&b->list); @@ -885,7 +906,7 @@ static struct btree *mca_cannibalize(struct cache_set *c, struct btree_op *op, * cannibalize_bucket() will take. This means every time we unlock the root of * the btree, we need to release this lock if we have it held. */ -static void bch_cannibalize_unlock(struct cache_set *c) +void bch_cannibalize_unlock(struct cache_set *c) { spin_lock(&c->btree_cannibalize_lock); if (c->btree_cache_alloc_lock == current) { @@ -1090,10 +1111,12 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, struct btree *parent) { BKEY_PADDED(key) k; - struct btree *b = ERR_PTR(-EAGAIN); + struct btree *b; mutex_lock(&c->bucket_lock); retry: + /* return ERR_PTR(-EAGAIN) when it fails */ + b = ERR_PTR(-EAGAIN); if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait)) goto err; @@ -1138,7 +1161,7 @@ static struct btree *btree_node_alloc_replacement(struct btree *b, { struct btree *n = bch_btree_node_alloc(b->c, op, b->level, b->parent); - if (!IS_ERR_OR_NULL(n)) { + if (!IS_ERR(n)) { mutex_lock(&n->write_lock); bch_btree_sort_into(&b->keys, &n->keys, &b->c->sort); bkey_copy_key(&n->key, &b->key); @@ -1340,7 +1363,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, memset(new_nodes, 0, sizeof(new_nodes)); closure_init_stack(&cl); - while (nodes < GC_MERGE_NODES && !IS_ERR_OR_NULL(r[nodes].b)) + while (nodes < GC_MERGE_NODES && !IS_ERR(r[nodes].b)) keys += r[nodes++].keys; blocks = btree_default_blocks(b->c) * 2 / 3; @@ -1352,7 +1375,7 @@ static int btree_gc_coalesce(struct btree *b, struct btree_op *op, for (i = 0; i < nodes; i++) { new_nodes[i] = btree_node_alloc_replacement(r[i].b, NULL); - if (IS_ERR_OR_NULL(new_nodes[i])) + if (IS_ERR(new_nodes[i])) goto out_nocoalesce; } @@ -1487,7 +1510,7 @@ out_nocoalesce: bch_keylist_free(&keylist); for (i = 0; i < nodes; i++) - if (!IS_ERR_OR_NULL(new_nodes[i])) { + if (!IS_ERR(new_nodes[i])) { btree_node_free(new_nodes[i]); rw_unlock(true, new_nodes[i]); } @@ -1669,7 +1692,7 @@ static int bch_btree_gc_root(struct btree *b, struct btree_op *op, if (should_rewrite) { n = btree_node_alloc_replacement(b, NULL); - if (!IS_ERR_OR_NULL(n)) { + if (!IS_ERR(n)) { bch_btree_node_write_sync(n); bch_btree_set_root(n); @@ -1968,6 +1991,15 @@ static int bch_btree_check_thread(void *arg) c->gc_stats.nodes++; bch_btree_op_init(&op, 0); ret = bcache_btree(check_recurse, p, c->root, &op); + /* + * The op may be added to cache_set's btree_cache_wait + * in mca_cannibalize(), must ensure it is removed from + * the list and release btree_cache_alloc_lock before + * free op memory. + * Otherwise, the btree_cache_wait will be damaged. + */ + bch_cannibalize_unlock(c); + finish_wait(&c->btree_cache_wait, &(&op)->wait); if (ret) goto out; } diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h index 1b5fdbc0d83e..45d64b54115a 100644 --- a/drivers/md/bcache/btree.h +++ b/drivers/md/bcache/btree.h @@ -247,8 +247,8 @@ static inline void bch_btree_op_init(struct btree_op *op, int write_lock_level) static inline void rw_lock(bool w, struct btree *b, int level) { - w ? down_write_nested(&b->lock, level + 1) - : down_read_nested(&b->lock, level + 1); + w ? down_write(&b->lock) + : down_read(&b->lock); if (w) b->seq++; } @@ -282,6 +282,7 @@ void bch_initial_gc_finish(struct cache_set *c); void bch_moving_gc(struct cache_set *c); int bch_btree_check(struct cache_set *c); void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k); +void bch_cannibalize_unlock(struct cache_set *c); static inline void wake_up_gc(struct cache_set *c) { diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 67a2e29e0b40..a9b1f3896249 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -1228,7 +1228,7 @@ void cached_dev_submit_bio(struct bio *bio) detached_dev_do_request(d, bio, orig_bdev, start_time); } -static int cached_dev_ioctl(struct bcache_device *d, fmode_t mode, +static int cached_dev_ioctl(struct bcache_device *d, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct cached_dev *dc = container_of(d, struct cached_dev, disk); @@ -1318,7 +1318,7 @@ void flash_dev_submit_bio(struct bio *bio) continue_at(cl, search_free, NULL); } -static int flash_dev_ioctl(struct bcache_device *d, fmode_t mode, +static int flash_dev_ioctl(struct bcache_device *d, blk_mode_t mode, unsigned int cmd, unsigned long arg) { return -ENOTTY; diff --git a/drivers/md/bcache/stats.h b/drivers/md/bcache/stats.h index bd3afc856d53..21b445f8af15 100644 --- a/drivers/md/bcache/stats.h +++ b/drivers/md/bcache/stats.h @@ -18,7 +18,6 @@ struct cache_stats { unsigned long cache_misses; unsigned long cache_bypass_hits; unsigned long cache_bypass_misses; - unsigned long cache_readaheads; unsigned long cache_miss_collisions; unsigned long sectors_bypassed; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index 7e9d19fd21dd..0ae2b3676293 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -732,9 +732,9 @@ out: /* Bcache device */ -static int open_dev(struct block_device *b, fmode_t mode) +static int open_dev(struct gendisk *disk, blk_mode_t mode) { - struct bcache_device *d = b->bd_disk->private_data; + struct bcache_device *d = disk->private_data; if (test_bit(BCACHE_DEV_CLOSING, &d->flags)) return -ENXIO; @@ -743,14 +743,14 @@ static int open_dev(struct block_device *b, fmode_t mode) return 0; } -static void release_dev(struct gendisk *b, fmode_t mode) +static void release_dev(struct gendisk *b) { struct bcache_device *d = b->private_data; closure_put(&d->cl); } -static int ioctl_dev(struct block_device *b, fmode_t mode, +static int ioctl_dev(struct block_device *b, blk_mode_t mode, unsigned int cmd, unsigned long arg) { struct bcache_device *d = b->bd_disk->private_data; @@ -1369,7 +1369,7 @@ static void cached_dev_free(struct closure *cl) put_page(virt_to_page(dc->sb_disk)); if (!IS_ERR_OR_NULL(dc->bdev)) - blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(dc->bdev, dc); wake_up(&unregister_wait); @@ -1453,7 +1453,6 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, memcpy(&dc->sb, sb, sizeof(struct cache_sb)); dc->bdev = bdev; - dc->bdev->bd_holder = dc; dc->sb_disk = sb_disk; if (cached_dev_init(dc, sb->block_size << 9)) @@ -1723,7 +1722,7 @@ static void cache_set_flush(struct closure *cl) if (!IS_ERR_OR_NULL(c->gc_thread)) kthread_stop(c->gc_thread); - if (!IS_ERR_OR_NULL(c->root)) + if (!IS_ERR(c->root)) list_add(&c->root->list, &c->btree_cache); /* @@ -2087,7 +2086,7 @@ static int run_cache_set(struct cache_set *c) err = "cannot allocate new btree root"; c->root = __bch_btree_node_alloc(c, NULL, 0, true, NULL); - if (IS_ERR_OR_NULL(c->root)) + if (IS_ERR(c->root)) goto err; mutex_lock(&c->root->write_lock); @@ -2218,7 +2217,7 @@ void bch_cache_release(struct kobject *kobj) put_page(virt_to_page(ca->sb_disk)); if (!IS_ERR_OR_NULL(ca->bdev)) - blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(ca->bdev, ca); kfree(ca); module_put(THIS_MODULE); @@ -2345,7 +2344,6 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, memcpy(&ca->sb, sb, sizeof(struct cache_sb)); ca->bdev = bdev; - ca->bdev->bd_holder = ca; ca->sb_disk = sb_disk; if (bdev_max_discard_sectors((bdev))) @@ -2359,7 +2357,7 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, * call blkdev_put() to bdev in bch_cache_release(). So we * explicitly call blkdev_put() here. */ - blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + blkdev_put(bdev, ca); if (ret == -ENOMEM) err = "cache_alloc(): -ENOMEM"; else if (ret == -EPERM) @@ -2448,6 +2446,7 @@ struct async_reg_args { struct cache_sb *sb; struct cache_sb_disk *sb_disk; struct block_device *bdev; + void *holder; }; static void register_bdev_worker(struct work_struct *work) @@ -2455,22 +2454,13 @@ static void register_bdev_worker(struct work_struct *work) int fail = false; struct async_reg_args *args = container_of(work, struct async_reg_args, reg_work.work); - struct cached_dev *dc; - - dc = kzalloc(sizeof(*dc), GFP_KERNEL); - if (!dc) { - fail = true; - put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); - goto out; - } mutex_lock(&bch_register_lock); - if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) + if (register_bdev(args->sb, args->sb_disk, args->bdev, args->holder) + < 0) fail = true; mutex_unlock(&bch_register_lock); -out: if (fail) pr_info("error %s: fail to register backing device\n", args->path); @@ -2485,21 +2475,11 @@ static void register_cache_worker(struct work_struct *work) int fail = false; struct async_reg_args *args = container_of(work, struct async_reg_args, reg_work.work); - struct cache *ca; - - ca = kzalloc(sizeof(*ca), GFP_KERNEL); - if (!ca) { - fail = true; - put_page(virt_to_page(args->sb_disk)); - blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); - goto out; - } /* blkdev_put() will be called in bch_cache_release() */ - if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) + if (register_cache(args->sb, args->sb_disk, args->bdev, args->holder)) fail = true; -out: if (fail) pr_info("error %s: fail to register cache device\n", args->path); @@ -2520,6 +2500,13 @@ static void register_device_async(struct async_reg_args *args) queue_delayed_work(system_wq, &args->reg_work, 10); } +static void *alloc_holder_object(struct cache_sb *sb) +{ + if (SB_IS_BDEV(sb)) + return kzalloc(sizeof(struct cached_dev), GFP_KERNEL); + return kzalloc(sizeof(struct cache), GFP_KERNEL); +} + static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size) { @@ -2527,9 +2514,11 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, char *path = NULL; struct cache_sb *sb; struct cache_sb_disk *sb_disk; - struct block_device *bdev; + struct block_device *bdev, *bdev2; + void *holder = NULL; ssize_t ret; bool async_registration = false; + bool quiet = false; #ifdef CONFIG_BCACHE_ASYNC_REGISTRATION async_registration = true; @@ -2558,11 +2547,34 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ret = -EINVAL; err = "failed to open device"; - bdev = blkdev_get_by_path(strim(path), - FMODE_READ|FMODE_WRITE|FMODE_EXCL, - sb); + bdev = blkdev_get_by_path(strim(path), BLK_OPEN_READ, NULL, NULL); + if (IS_ERR(bdev)) + goto out_free_sb; + + err = "failed to set blocksize"; + if (set_blocksize(bdev, 4096)) + goto out_blkdev_put; + + err = read_super(sb, bdev, &sb_disk); + if (err) + goto out_blkdev_put; + + holder = alloc_holder_object(sb); + if (!holder) { + ret = -ENOMEM; + err = "cannot allocate memory"; + goto out_put_sb_page; + } + + /* Now reopen in exclusive mode with proper holder */ + bdev2 = blkdev_get_by_dev(bdev->bd_dev, BLK_OPEN_READ | BLK_OPEN_WRITE, + holder, NULL); + blkdev_put(bdev, NULL); + bdev = bdev2; if (IS_ERR(bdev)) { - if (bdev == ERR_PTR(-EBUSY)) { + ret = PTR_ERR(bdev); + bdev = NULL; + if (ret == -EBUSY) { dev_t dev; mutex_lock(&bch_register_lock); @@ -2572,20 +2584,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, else err = "device busy"; mutex_unlock(&bch_register_lock); - if (attr == &ksysfs_register_quiet) - goto done; + if (attr == &ksysfs_register_quiet) { + quiet = true; + ret = size; + } } - goto out_free_sb; + goto out_free_holder; } - err = "failed to set blocksize"; - if (set_blocksize(bdev, 4096)) - goto out_blkdev_put; - - err = read_super(sb, bdev, &sb_disk); - if (err) - goto out_blkdev_put; - err = "failed to register device"; if (async_registration) { @@ -2596,59 +2602,46 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (!args) { ret = -ENOMEM; err = "cannot allocate memory"; - goto out_put_sb_page; + goto out_free_holder; } args->path = path; args->sb = sb; args->sb_disk = sb_disk; args->bdev = bdev; + args->holder = holder; register_device_async(args); /* No wait and returns to user space */ goto async_done; } if (SB_IS_BDEV(sb)) { - struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); - - if (!dc) { - ret = -ENOMEM; - err = "cannot allocate memory"; - goto out_put_sb_page; - } - mutex_lock(&bch_register_lock); - ret = register_bdev(sb, sb_disk, bdev, dc); + ret = register_bdev(sb, sb_disk, bdev, holder); mutex_unlock(&bch_register_lock); /* blkdev_put() will be called in cached_dev_free() */ if (ret < 0) goto out_free_sb; } else { - struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); - - if (!ca) { - ret = -ENOMEM; - err = "cannot allocate memory"; - goto out_put_sb_page; - } - /* blkdev_put() will be called in bch_cache_release() */ - ret = register_cache(sb, sb_disk, bdev, ca); + ret = register_cache(sb, sb_disk, bdev, holder); if (ret) goto out_free_sb; } -done: kfree(sb); kfree(path); module_put(THIS_MODULE); async_done: return size; +out_free_holder: + kfree(holder); out_put_sb_page: put_page(virt_to_page(sb_disk)); out_blkdev_put: - blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + if (bdev) + blkdev_put(bdev, holder); out_free_sb: kfree(sb); out_free_path: @@ -2657,7 +2650,8 @@ out_free_path: out_module_put: module_put(THIS_MODULE); out: - pr_info("error %s: %s\n", path?path:"", err); + if (!quiet) + pr_info("error %s: %s\n", path?path:"", err); return ret; } diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index c6f677059214..0e2c1880f60b 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -1111,26 +1111,25 @@ SHOW(__bch_cache) vfree(p); - ret = scnprintf(buf, PAGE_SIZE, - "Unused: %zu%%\n" - "Clean: %zu%%\n" - "Dirty: %zu%%\n" - "Metadata: %zu%%\n" - "Average: %llu\n" - "Sectors per Q: %zu\n" - "Quantiles: [", - unused * 100 / (size_t) ca->sb.nbuckets, - available * 100 / (size_t) ca->sb.nbuckets, - dirty * 100 / (size_t) ca->sb.nbuckets, - meta * 100 / (size_t) ca->sb.nbuckets, sum, - n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1)); + ret = sysfs_emit(buf, + "Unused: %zu%%\n" + "Clean: %zu%%\n" + "Dirty: %zu%%\n" + "Metadata: %zu%%\n" + "Average: %llu\n" + "Sectors per Q: %zu\n" + "Quantiles: [", + unused * 100 / (size_t) ca->sb.nbuckets, + available * 100 / (size_t) ca->sb.nbuckets, + dirty * 100 / (size_t) ca->sb.nbuckets, + meta * 100 / (size_t) ca->sb.nbuckets, sum, + n * ca->sb.bucket_size / (ARRAY_SIZE(q) + 1)); for (i = 0; i < ARRAY_SIZE(q); i++) - ret += scnprintf(buf + ret, PAGE_SIZE - ret, - "%u ", q[i]); + ret += sysfs_emit_at(buf, ret, "%u ", q[i]); ret--; - ret += scnprintf(buf + ret, PAGE_SIZE - ret, "]\n"); + ret += sysfs_emit_at(buf, ret, "]\n"); return ret; } diff --git a/drivers/md/bcache/sysfs.h b/drivers/md/bcache/sysfs.h index a2ff6447b699..65b8bd975ab1 100644 --- a/drivers/md/bcache/sysfs.h +++ b/drivers/md/bcache/sysfs.h @@ -3,7 +3,7 @@ #define _BCACHE_SYSFS_H_ #define KTYPE(type) \ -struct kobj_type type ## _ktype = { \ +const struct kobj_type type ## _ktype = { \ .release = type ## _release, \ .sysfs_ops = &((const struct sysfs_ops) { \ .show = type ## _show, \ diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index d4a5fc0650bb..24c049067f61 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -890,6 +890,16 @@ static int bch_root_node_dirty_init(struct cache_set *c, if (ret < 0) pr_warn("sectors dirty init failed, ret=%d!\n", ret); + /* + * The op may be added to cache_set's btree_cache_wait + * in mca_cannibalize(), must ensure it is removed from + * the list and release btree_cache_alloc_lock before + * free op memory. + * Otherwise, the btree_cache_wait will be damaged. + */ + bch_cannibalize_unlock(c); + finish_wait(&c->btree_cache_wait, &(&op.op)->wait); + return ret; } |