aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-bufio.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-bufio.c')
-rw-r--r--drivers/md/dm-bufio.c269
1 files changed, 207 insertions, 62 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 2d519c223562..9c5ef818ca36 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -18,6 +18,7 @@
#include <linux/module.h>
#include <linux/rbtree.h>
#include <linux/stacktrace.h>
+#include <linux/jump_label.h>
#define DM_MSG_PREFIX "bufio"
@@ -81,6 +82,8 @@
*/
struct dm_bufio_client {
struct mutex lock;
+ spinlock_t spinlock;
+ bool no_sleep;
struct list_head lru[LIST_SIZE];
unsigned long n_buffers[LIST_SIZE];
@@ -90,7 +93,6 @@ struct dm_bufio_client {
s8 sectors_per_block_bits;
void (*alloc_callback)(struct dm_buffer *);
void (*write_callback)(struct dm_buffer *);
-
struct kmem_cache *slab_buffer;
struct kmem_cache *slab_cache;
struct dm_io_client *dm_io;
@@ -108,7 +110,10 @@ struct dm_bufio_client {
int async_write_error;
struct list_head client_list;
+
struct shrinker shrinker;
+ struct work_struct shrink_work;
+ atomic_long_t need_shrink;
};
/*
@@ -158,23 +163,34 @@ struct dm_buffer {
#endif
};
+static DEFINE_STATIC_KEY_FALSE(no_sleep_enabled);
+
/*----------------------------------------------------------------*/
#define dm_bufio_in_request() (!!current->bio_list)
static void dm_bufio_lock(struct dm_bufio_client *c)
{
- mutex_lock_nested(&c->lock, dm_bufio_in_request());
+ if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep)
+ spin_lock_bh(&c->spinlock);
+ else
+ mutex_lock_nested(&c->lock, dm_bufio_in_request());
}
static int dm_bufio_trylock(struct dm_bufio_client *c)
{
- return mutex_trylock(&c->lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep)
+ return spin_trylock_bh(&c->spinlock);
+ else
+ return mutex_trylock(&c->lock);
}
static void dm_bufio_unlock(struct dm_bufio_client *c)
{
- mutex_unlock(&c->lock);
+ if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep)
+ spin_unlock_bh(&c->spinlock);
+ else
+ mutex_unlock(&c->lock);
}
/*----------------------------------------------------------------*/
@@ -256,12 +272,35 @@ static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
if (b->block == block)
return b;
- n = (b->block < block) ? n->rb_left : n->rb_right;
+ n = block < b->block ? n->rb_left : n->rb_right;
}
return NULL;
}
+static struct dm_buffer *__find_next(struct dm_bufio_client *c, sector_t block)
+{
+ struct rb_node *n = c->buffer_tree.rb_node;
+ struct dm_buffer *b;
+ struct dm_buffer *best = NULL;
+
+ while (n) {
+ b = container_of(n, struct dm_buffer, node);
+
+ if (b->block == block)
+ return b;
+
+ if (block <= b->block) {
+ n = n->rb_left;
+ best = b;
+ } else {
+ n = n->rb_right;
+ }
+ }
+
+ return best;
+}
+
static void __insert(struct dm_bufio_client *c, struct dm_buffer *b)
{
struct rb_node **new = &c->buffer_tree.rb_node, *parent = NULL;
@@ -276,8 +315,8 @@ static void __insert(struct dm_bufio_client *c, struct dm_buffer *b)
}
parent = *new;
- new = (found->block < b->block) ?
- &((*new)->rb_left) : &((*new)->rb_right);
+ new = b->block < found->block ?
+ &found->node.rb_left : &found->node.rb_right;
}
rb_link_node(&b->node, parent, new);
@@ -400,13 +439,13 @@ static void *alloc_buffer_data(struct dm_bufio_client *c, gfp_t gfp_mask,
*/
if (gfp_mask & __GFP_NORETRY) {
unsigned noio_flag = memalloc_noio_save();
- void *ptr = __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
+ void *ptr = __vmalloc(c->block_size, gfp_mask);
memalloc_noio_restore(noio_flag);
return ptr;
}
- return __vmalloc(c->block_size, gfp_mask, PAGE_KERNEL);
+ return __vmalloc(c->block_size, gfp_mask);
}
/*
@@ -551,13 +590,12 @@ static void dmio_complete(unsigned long error, void *context)
b->end_io(b, unlikely(error != 0) ? BLK_STS_IOERR : 0);
}
-static void use_dmio(struct dm_buffer *b, int rw, sector_t sector,
+static void use_dmio(struct dm_buffer *b, enum req_op op, sector_t sector,
unsigned n_sectors, unsigned offset)
{
int r;
struct dm_io_request io_req = {
- .bi_op = rw,
- .bi_op_flags = 0,
+ .bi_opf = op,
.notify.fn = dmio_complete,
.notify.context = b,
.client = b->c->dm_io,
@@ -585,11 +623,12 @@ static void bio_complete(struct bio *bio)
{
struct dm_buffer *b = bio->bi_private;
blk_status_t status = bio->bi_status;
- bio_put(bio);
+ bio_uninit(bio);
+ kfree(bio);
b->end_io(b, status);
}
-static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
+static void use_bio(struct dm_buffer *b, enum req_op op, sector_t sector,
unsigned n_sectors, unsigned offset)
{
struct bio *bio;
@@ -600,16 +639,14 @@ static void use_bio(struct dm_buffer *b, int rw, sector_t sector,
if (unlikely(b->c->sectors_per_block_bits < PAGE_SHIFT - SECTOR_SHIFT))
vec_size += 2;
- bio = bio_kmalloc(GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN, vec_size);
+ bio = bio_kmalloc(vec_size, GFP_NOWAIT | __GFP_NORETRY | __GFP_NOWARN);
if (!bio) {
dmio:
- use_dmio(b, rw, sector, n_sectors, offset);
+ use_dmio(b, op, sector, n_sectors, offset);
return;
}
-
+ bio_init(bio, b->c->bdev, bio->bi_inline_vecs, vec_size, op);
bio->bi_iter.bi_sector = sector;
- bio_set_dev(bio, b->c->bdev);
- bio_set_op_attrs(bio, rw, 0);
bio->bi_end_io = bio_complete;
bio->bi_private = b;
@@ -631,7 +668,21 @@ dmio:
submit_bio(bio);
}
-static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buffer *, blk_status_t))
+static inline sector_t block_to_sector(struct dm_bufio_client *c, sector_t block)
+{
+ sector_t sector;
+
+ if (likely(c->sectors_per_block_bits >= 0))
+ sector = block << c->sectors_per_block_bits;
+ else
+ sector = block * (c->block_size >> SECTOR_SHIFT);
+ sector += c->start;
+
+ return sector;
+}
+
+static void submit_io(struct dm_buffer *b, enum req_op op,
+ void (*end_io)(struct dm_buffer *, blk_status_t))
{
unsigned n_sectors;
sector_t sector;
@@ -639,13 +690,9 @@ static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buff
b->end_io = end_io;
- if (likely(b->c->sectors_per_block_bits >= 0))
- sector = b->block << b->c->sectors_per_block_bits;
- else
- sector = b->block * (b->c->block_size >> SECTOR_SHIFT);
- sector += b->c->start;
+ sector = block_to_sector(b->c, b->block);
- if (rw != REQ_OP_WRITE) {
+ if (op != REQ_OP_WRITE) {
n_sectors = b->c->block_size >> SECTOR_SHIFT;
offset = 0;
} else {
@@ -664,9 +711,9 @@ static void submit_io(struct dm_buffer *b, int rw, void (*end_io)(struct dm_buff
}
if (b->data_mode != DATA_MODE_VMALLOC)
- use_bio(b, rw, sector, n_sectors, offset);
+ use_bio(b, op, sector, n_sectors, offset);
else
- use_dmio(b, rw, sector, n_sectors, offset);
+ use_dmio(b, op, sector, n_sectors, offset);
}
/*----------------------------------------------------------------
@@ -748,7 +795,8 @@ static void __make_buffer_clean(struct dm_buffer *b)
{
BUG_ON(b->hold_count);
- if (!b->state) /* fast case */
+ /* smp_load_acquire() pairs with read_endio()'s smp_mb__before_atomic() */
+ if (!smp_load_acquire(&b->state)) /* fast case */
return;
wait_on_bit_io(&b->state, B_READING, TASK_UNINTERRUPTIBLE);
@@ -768,6 +816,10 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
BUG_ON(test_bit(B_WRITING, &b->state));
BUG_ON(test_bit(B_DIRTY, &b->state));
+ if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep &&
+ unlikely(test_bit_acquire(B_READING, &b->state)))
+ continue;
+
if (!b->hold_count) {
__make_buffer_clean(b);
__unlink_buffer(b);
@@ -776,6 +828,9 @@ static struct dm_buffer *__get_unclaimed_buffer(struct dm_bufio_client *c)
cond_resched();
}
+ if (static_branch_unlikely(&no_sleep_enabled) && c->no_sleep)
+ return NULL;
+
list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
BUG_ON(test_bit(B_READING, &b->state));
@@ -1004,7 +1059,7 @@ found_buffer:
* If the user called both dm_bufio_prefetch and dm_bufio_get on
* the same buffer, it would deadlock if we waited.
*/
- if (nf == NF_GET && unlikely(test_bit(B_READING, &b->state)))
+ if (nf == NF_GET && unlikely(test_bit_acquire(B_READING, &b->state)))
return NULL;
b->hold_count++;
@@ -1164,7 +1219,7 @@ void dm_bufio_release(struct dm_buffer *b)
* invalid buffer.
*/
if ((b->read_error || b->write_error) &&
- !test_bit(B_READING, &b->state) &&
+ !test_bit_acquire(B_READING, &b->state) &&
!test_bit(B_WRITING, &b->state) &&
!test_bit(B_DIRTY, &b->state)) {
__unlink_buffer(b);
@@ -1307,8 +1362,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
int dm_bufio_issue_flush(struct dm_bufio_client *c)
{
struct dm_io_request io_req = {
- .bi_op = REQ_OP_WRITE,
- .bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
+ .bi_opf = REQ_OP_WRITE | REQ_PREFLUSH | REQ_SYNC,
.mem.type = DM_IO_KMEM,
.mem.ptr.addr = NULL,
.client = c->dm_io,
@@ -1326,6 +1380,29 @@ int dm_bufio_issue_flush(struct dm_bufio_client *c)
EXPORT_SYMBOL_GPL(dm_bufio_issue_flush);
/*
+ * Use dm-io to send a discard request to flush the device.
+ */
+int dm_bufio_issue_discard(struct dm_bufio_client *c, sector_t block, sector_t count)
+{
+ struct dm_io_request io_req = {
+ .bi_opf = REQ_OP_DISCARD | REQ_SYNC,
+ .mem.type = DM_IO_KMEM,
+ .mem.ptr.addr = NULL,
+ .client = c->dm_io,
+ };
+ struct dm_io_region io_reg = {
+ .bdev = c->bdev,
+ .sector = block_to_sector(c, block),
+ .count = block_to_sector(c, count),
+ };
+
+ BUG_ON(dm_bufio_in_request());
+
+ return dm_io(&io_req, 1, &io_reg, NULL);
+}
+EXPORT_SYMBOL_GPL(dm_bufio_issue_discard);
+
+/*
* We first delete any other buffer that may be at that new location.
*
* Then, we write the buffer to the original location if it was dirty.
@@ -1401,6 +1478,14 @@ retry:
}
EXPORT_SYMBOL_GPL(dm_bufio_release_move);
+static void forget_buffer_locked(struct dm_buffer *b)
+{
+ if (likely(!b->hold_count) && likely(!smp_load_acquire(&b->state))) {
+ __unlink_buffer(b);
+ __free_buffer_wake(b);
+ }
+}
+
/*
* Free the given buffer.
*
@@ -1414,15 +1499,36 @@ void dm_bufio_forget(struct dm_bufio_client *c, sector_t block)
dm_bufio_lock(c);
b = __find(c, block);
- if (b && likely(!b->hold_count) && likely(!b->state)) {
- __unlink_buffer(b);
- __free_buffer_wake(b);
- }
+ if (b)
+ forget_buffer_locked(b);
dm_bufio_unlock(c);
}
EXPORT_SYMBOL_GPL(dm_bufio_forget);
+void dm_bufio_forget_buffers(struct dm_bufio_client *c, sector_t block, sector_t n_blocks)
+{
+ struct dm_buffer *b;
+ sector_t end_block = block + n_blocks;
+
+ while (block < end_block) {
+ dm_bufio_lock(c);
+
+ b = __find_next(c, block);
+ if (b) {
+ block = b->block + 1;
+ forget_buffer_locked(b);
+ }
+
+ dm_bufio_unlock(c);
+
+ if (!b)
+ break;
+ }
+
+}
+EXPORT_SYMBOL_GPL(dm_bufio_forget_buffers);
+
void dm_bufio_set_minimum_buffers(struct dm_bufio_client *c, unsigned n)
{
c->minimum_buffers = n;
@@ -1437,7 +1543,11 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
{
- sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
+ sector_t s = bdev_nr_sectors(c->bdev);
+ if (s >= c->start)
+ s -= c->start;
+ else
+ s = 0;
if (likely(c->sectors_per_block_bits >= 0))
s >>= c->sectors_per_block_bits;
else
@@ -1446,6 +1556,12 @@ sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
}
EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
+struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c)
+{
+ return c->dm_io;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client);
+
sector_t dm_bufio_get_block_number(struct dm_buffer *b)
{
return b->block;
@@ -1522,8 +1638,9 @@ static void drop_buffers(struct dm_bufio_client *c)
*/
static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
{
- if (!(gfp & __GFP_FS)) {
- if (test_bit(B_READING, &b->state) ||
+ if (!(gfp & __GFP_FS) ||
+ (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) {
+ if (test_bit_acquire(B_READING, &b->state) ||
test_bit(B_WRITING, &b->state) ||
test_bit(B_DIRTY, &b->state))
return false;
@@ -1549,8 +1666,7 @@ static unsigned long get_retain_buffers(struct dm_bufio_client *c)
return retain_bytes;
}
-static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
- gfp_t gfp_mask)
+static void __scan(struct dm_bufio_client *c)
{
int l;
struct dm_buffer *b, *tmp;
@@ -1561,42 +1677,58 @@ static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
for (l = 0; l < LIST_SIZE; l++) {
list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
- if (__try_evict_buffer(b, gfp_mask))
+ if (count - freed <= retain_target)
+ atomic_long_set(&c->need_shrink, 0);
+ if (!atomic_long_read(&c->need_shrink))
+ return;
+ if (__try_evict_buffer(b, GFP_KERNEL)) {
+ atomic_long_dec(&c->need_shrink);
freed++;
- if (!--nr_to_scan || ((count - freed) <= retain_target))
- return freed;
+ }
cond_resched();
}
}
- return freed;
}
-static unsigned long
-dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+static void shrink_work(struct work_struct *w)
+{
+ struct dm_bufio_client *c = container_of(w, struct dm_bufio_client, shrink_work);
+
+ dm_bufio_lock(c);
+ __scan(c);
+ dm_bufio_unlock(c);
+}
+
+static unsigned long dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
{
struct dm_bufio_client *c;
- unsigned long freed;
c = container_of(shrink, struct dm_bufio_client, shrinker);
- if (sc->gfp_mask & __GFP_FS)
- dm_bufio_lock(c);
- else if (!dm_bufio_trylock(c))
- return SHRINK_STOP;
+ atomic_long_add(sc->nr_to_scan, &c->need_shrink);
+ queue_work(dm_bufio_wq, &c->shrink_work);
- freed = __scan(c, sc->nr_to_scan, sc->gfp_mask);
- dm_bufio_unlock(c);
- return freed;
+ return sc->nr_to_scan;
}
-static unsigned long
-dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
struct dm_bufio_client *c = container_of(shrink, struct dm_bufio_client, shrinker);
unsigned long count = READ_ONCE(c->n_buffers[LIST_CLEAN]) +
READ_ONCE(c->n_buffers[LIST_DIRTY]);
unsigned long retain_target = get_retain_buffers(c);
+ unsigned long queued_for_cleanup = atomic_long_read(&c->need_shrink);
+
+ if (unlikely(count < retain_target))
+ count = 0;
+ else
+ count -= retain_target;
+
+ if (unlikely(count < queued_for_cleanup))
+ count = 0;
+ else
+ count -= queued_for_cleanup;
- return (count < retain_target) ? 0 : (count - retain_target);
+ return count;
}
/*
@@ -1605,7 +1737,8 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsigned block_size,
unsigned reserved_buffers, unsigned aux_size,
void (*alloc_callback)(struct dm_buffer *),
- void (*write_callback)(struct dm_buffer *))
+ void (*write_callback)(struct dm_buffer *),
+ unsigned int flags)
{
int r;
struct dm_bufio_client *c;
@@ -1635,12 +1768,18 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
c->alloc_callback = alloc_callback;
c->write_callback = write_callback;
+ if (flags & DM_BUFIO_CLIENT_NO_SLEEP) {
+ c->no_sleep = true;
+ static_branch_inc(&no_sleep_enabled);
+ }
+
for (i = 0; i < LIST_SIZE; i++) {
INIT_LIST_HEAD(&c->lru[i]);
c->n_buffers[i] = 0;
}
mutex_init(&c->lock);
+ spin_lock_init(&c->spinlock);
INIT_LIST_HEAD(&c->reserved_buffers);
c->need_reserved_buffers = reserved_buffers;
@@ -1687,11 +1826,15 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
__free_buffer_wake(b);
}
+ INIT_WORK(&c->shrink_work, shrink_work);
+ atomic_long_set(&c->need_shrink, 0);
+
c->shrinker.count_objects = dm_bufio_shrink_count;
c->shrinker.scan_objects = dm_bufio_shrink_scan;
c->shrinker.seeks = 1;
c->shrinker.batch = 0;
- r = register_shrinker(&c->shrinker);
+ r = register_shrinker(&c->shrinker, "md-%s:(%u:%u)", slab_name,
+ MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
if (r)
goto bad;
@@ -1732,6 +1875,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
drop_buffers(c);
unregister_shrinker(&c->shrinker);
+ flush_work(&c->shrink_work);
mutex_lock(&dm_bufio_clients_lock);
@@ -1762,6 +1906,8 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
kmem_cache_destroy(c->slab_buffer);
dm_io_client_destroy(c->dm_io);
mutex_destroy(&c->lock);
+ if (c->no_sleep)
+ static_branch_dec(&no_sleep_enabled);
kfree(c);
}
EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
@@ -1965,7 +2111,6 @@ static void __exit dm_bufio_exit(void)
int bug = 0;
cancel_delayed_work_sync(&dm_bufio_cleanup_old_work);
- flush_workqueue(dm_bufio_wq);
destroy_workqueue(dm_bufio_wq);
if (dm_bufio_client_count) {