aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm-bufio.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm-bufio.c')
-rw-r--r--drivers/md/dm-bufio.c238
1 files changed, 154 insertions, 84 deletions
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 825ca1f87639..c33b49792b87 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -14,6 +14,7 @@
#include <linux/vmalloc.h>
#include <linux/shrinker.h>
#include <linux/module.h>
+#include <linux/rbtree.h>
#define DM_MSG_PREFIX "bufio"
@@ -34,26 +35,23 @@
/*
* Check buffer ages in this interval (seconds)
*/
-#define DM_BUFIO_WORK_TIMER_SECS 10
+#define DM_BUFIO_WORK_TIMER_SECS 30
/*
* Free buffers when they are older than this (seconds)
*/
-#define DM_BUFIO_DEFAULT_AGE_SECS 60
+#define DM_BUFIO_DEFAULT_AGE_SECS 300
/*
- * The number of bvec entries that are embedded directly in the buffer.
- * If the chunk size is larger, dm-io is used to do the io.
+ * The nr of bytes of cached data to keep around.
*/
-#define DM_BUFIO_INLINE_VECS 16
+#define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024)
/*
- * Buffer hash
+ * The number of bvec entries that are embedded directly in the buffer.
+ * If the chunk size is larger, dm-io is used to do the io.
*/
-#define DM_BUFIO_HASH_BITS 20
-#define DM_BUFIO_HASH(block) \
- ((((block) >> DM_BUFIO_HASH_BITS) ^ (block)) & \
- ((1 << DM_BUFIO_HASH_BITS) - 1))
+#define DM_BUFIO_INLINE_VECS 16
/*
* Don't try to use kmem_cache_alloc for blocks larger than this.
@@ -106,7 +104,7 @@ struct dm_bufio_client {
unsigned minimum_buffers;
- struct hlist_head *cache_hash;
+ struct rb_root buffer_tree;
wait_queue_head_t free_buffer_wait;
int async_write_error;
@@ -135,7 +133,7 @@ enum data_mode {
};
struct dm_buffer {
- struct hlist_node hash_list;
+ struct rb_node node;
struct list_head lru_list;
sector_t block;
void *data;
@@ -223,6 +221,7 @@ static DEFINE_SPINLOCK(param_spinlock);
* Buffers are freed after this timeout
*/
static unsigned dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
+static unsigned dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
static unsigned long dm_bufio_peak_allocated;
static unsigned long dm_bufio_allocated_kmem_cache;
@@ -253,6 +252,53 @@ static LIST_HEAD(dm_bufio_all_clients);
*/
static DEFINE_MUTEX(dm_bufio_clients_lock);
+/*----------------------------------------------------------------
+ * A red/black tree acts as an index for all the buffers.
+ *--------------------------------------------------------------*/
+static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
+{
+ struct rb_node *n = c->buffer_tree.rb_node;
+ struct dm_buffer *b;
+
+ while (n) {
+ b = container_of(n, struct dm_buffer, node);
+
+ if (b->block == block)
+ return b;
+
+ n = (b->block < block) ? n->rb_left : n->rb_right;
+ }
+
+ return NULL;
+}
+
+static void __insert(struct dm_bufio_client *c, struct dm_buffer *b)
+{
+ struct rb_node **new = &c->buffer_tree.rb_node, *parent = NULL;
+ struct dm_buffer *found;
+
+ while (*new) {
+ found = container_of(*new, struct dm_buffer, node);
+
+ if (found->block == b->block) {
+ BUG_ON(found != b);
+ return;
+ }
+
+ parent = *new;
+ new = (found->block < b->block) ?
+ &((*new)->rb_left) : &((*new)->rb_right);
+ }
+
+ rb_link_node(&b->node, parent, new);
+ rb_insert_color(&b->node, &c->buffer_tree);
+}
+
+static void __remove(struct dm_bufio_client *c, struct dm_buffer *b)
+{
+ rb_erase(&b->node, &c->buffer_tree);
+}
+
/*----------------------------------------------------------------*/
static void adjust_total_allocated(enum data_mode data_mode, long diff)
@@ -434,7 +480,7 @@ static void __link_buffer(struct dm_buffer *b, sector_t block, int dirty)
b->block = block;
b->list_mode = dirty;
list_add(&b->lru_list, &c->lru[dirty]);
- hlist_add_head(&b->hash_list, &c->cache_hash[DM_BUFIO_HASH(block)]);
+ __insert(b->c, b);
b->last_accessed = jiffies;
}
@@ -448,7 +494,7 @@ static void __unlink_buffer(struct dm_buffer *b)
BUG_ON(!c->n_buffers[b->list_mode]);
c->n_buffers[b->list_mode]--;
- hlist_del(&b->hash_list);
+ __remove(b->c, b);
list_del(&b->lru_list);
}
@@ -532,6 +578,19 @@ static void use_dmio(struct dm_buffer *b, int rw, sector_t block,
end_io(&b->bio, r);
}
+static void inline_endio(struct bio *bio, int error)
+{
+ bio_end_io_t *end_fn = bio->bi_private;
+
+ /*
+ * Reset the bio to free any attached resources
+ * (e.g. bio integrity profiles).
+ */
+ bio_reset(bio);
+
+ end_fn(bio, error);
+}
+
static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
bio_end_io_t *end_io)
{
@@ -543,7 +602,12 @@ static void use_inline_bio(struct dm_buffer *b, int rw, sector_t block,
b->bio.bi_max_vecs = DM_BUFIO_INLINE_VECS;
b->bio.bi_iter.bi_sector = block << b->c->sectors_per_block_bits;
b->bio.bi_bdev = b->c->bdev;
- b->bio.bi_end_io = end_io;
+ b->bio.bi_end_io = inline_endio;
+ /*
+ * Use of .bi_private isn't a problem here because
+ * the dm_buffer's inline bio is local to bufio.
+ */
+ b->bio.bi_private = end_io;
/*
* We assume that if len >= PAGE_SIZE ptr is page-aligned.
@@ -887,23 +951,6 @@ static void __check_watermark(struct dm_bufio_client *c,
__write_dirty_buffers_async(c, 1, write_list);
}
-/*
- * Find a buffer in the hash.
- */
-static struct dm_buffer *__find(struct dm_bufio_client *c, sector_t block)
-{
- struct dm_buffer *b;
-
- hlist_for_each_entry(b, &c->cache_hash[DM_BUFIO_HASH(block)],
- hash_list) {
- dm_bufio_cond_resched();
- if (b->block == block)
- return b;
- }
-
- return NULL;
-}
-
/*----------------------------------------------------------------
* Getting a buffer
*--------------------------------------------------------------*/
@@ -1433,45 +1480,52 @@ static void drop_buffers(struct dm_bufio_client *c)
}
/*
- * Test if the buffer is unused and too old, and commit it.
- * At if noio is set, we must not do any I/O because we hold
- * dm_bufio_clients_lock and we would risk deadlock if the I/O gets rerouted to
- * different bufio client.
+ * We may not be able to evict this buffer if IO pending or the client
+ * is still using it. Caller is expected to know buffer is too old.
+ *
+ * And if GFP_NOFS is used, we must not do any I/O because we hold
+ * dm_bufio_clients_lock and we would risk deadlock if the I/O gets
+ * rerouted to different bufio client.
*/
-static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp,
- unsigned long max_jiffies)
+static bool __try_evict_buffer(struct dm_buffer *b, gfp_t gfp)
{
- if (jiffies - b->last_accessed < max_jiffies)
- return 0;
-
- if (!(gfp & __GFP_IO)) {
+ if (!(gfp & __GFP_FS)) {
if (test_bit(B_READING, &b->state) ||
test_bit(B_WRITING, &b->state) ||
test_bit(B_DIRTY, &b->state))
- return 0;
+ return false;
}
if (b->hold_count)
- return 0;
+ return false;
__make_buffer_clean(b);
__unlink_buffer(b);
__free_buffer_wake(b);
- return 1;
+ return true;
}
-static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
- gfp_t gfp_mask)
+static unsigned get_retain_buffers(struct dm_bufio_client *c)
+{
+ unsigned retain_bytes = ACCESS_ONCE(dm_bufio_retain_bytes);
+ return retain_bytes / c->block_size;
+}
+
+static unsigned long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
+ gfp_t gfp_mask)
{
int l;
struct dm_buffer *b, *tmp;
- long freed = 0;
+ unsigned long freed = 0;
+ unsigned long count = nr_to_scan;
+ unsigned retain_target = get_retain_buffers(c);
for (l = 0; l < LIST_SIZE; l++) {
list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
- freed += __cleanup_old_buffer(b, gfp_mask, 0);
- if (!--nr_to_scan)
+ if (__try_evict_buffer(b, gfp_mask))
+ freed++;
+ if (!--nr_to_scan || ((count - freed) <= retain_target))
return freed;
dm_bufio_cond_resched();
}
@@ -1486,7 +1540,7 @@ dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
unsigned long freed;
c = container_of(shrink, struct dm_bufio_client, shrinker);
- if (sc->gfp_mask & __GFP_IO)
+ if (sc->gfp_mask & __GFP_FS)
dm_bufio_lock(c);
else if (!dm_bufio_trylock(c))
return SHRINK_STOP;
@@ -1503,7 +1557,7 @@ dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
unsigned long count;
c = container_of(shrink, struct dm_bufio_client, shrinker);
- if (sc->gfp_mask & __GFP_IO)
+ if (sc->gfp_mask & __GFP_FS)
dm_bufio_lock(c);
else if (!dm_bufio_trylock(c))
return 0;
@@ -1533,11 +1587,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
r = -ENOMEM;
goto bad_client;
}
- c->cache_hash = vmalloc(sizeof(struct hlist_head) << DM_BUFIO_HASH_BITS);
- if (!c->cache_hash) {
- r = -ENOMEM;
- goto bad_hash;
- }
+ c->buffer_tree = RB_ROOT;
c->bdev = bdev;
c->block_size = block_size;
@@ -1556,9 +1606,6 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
c->n_buffers[i] = 0;
}
- for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
- INIT_HLIST_HEAD(&c->cache_hash[i]);
-
mutex_init(&c->lock);
INIT_LIST_HEAD(&c->reserved_buffers);
c->need_reserved_buffers = reserved_buffers;
@@ -1632,8 +1679,6 @@ bad_cache:
}
dm_io_client_destroy(c->dm_io);
bad_dm_io:
- vfree(c->cache_hash);
-bad_hash:
kfree(c);
bad_client:
return ERR_PTR(r);
@@ -1660,9 +1705,7 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
mutex_unlock(&dm_bufio_clients_lock);
- for (i = 0; i < 1 << DM_BUFIO_HASH_BITS; i++)
- BUG_ON(!hlist_empty(&c->cache_hash[i]));
-
+ BUG_ON(!RB_EMPTY_ROOT(&c->buffer_tree));
BUG_ON(c->need_reserved_buffers);
while (!list_empty(&c->reserved_buffers)) {
@@ -1680,36 +1723,60 @@ void dm_bufio_client_destroy(struct dm_bufio_client *c)
BUG_ON(c->n_buffers[i]);
dm_io_client_destroy(c->dm_io);
- vfree(c->cache_hash);
kfree(c);
}
EXPORT_SYMBOL_GPL(dm_bufio_client_destroy);
-static void cleanup_old_buffers(void)
+static unsigned get_max_age_hz(void)
{
- unsigned long max_age = ACCESS_ONCE(dm_bufio_max_age);
- struct dm_bufio_client *c;
+ unsigned max_age = ACCESS_ONCE(dm_bufio_max_age);
- if (max_age > ULONG_MAX / HZ)
- max_age = ULONG_MAX / HZ;
+ if (max_age > UINT_MAX / HZ)
+ max_age = UINT_MAX / HZ;
- mutex_lock(&dm_bufio_clients_lock);
- list_for_each_entry(c, &dm_bufio_all_clients, client_list) {
- if (!dm_bufio_trylock(c))
- continue;
+ return max_age * HZ;
+}
- while (!list_empty(&c->lru[LIST_CLEAN])) {
- struct dm_buffer *b;
- b = list_entry(c->lru[LIST_CLEAN].prev,
- struct dm_buffer, lru_list);
- if (!__cleanup_old_buffer(b, 0, max_age * HZ))
- break;
- dm_bufio_cond_resched();
- }
+static bool older_than(struct dm_buffer *b, unsigned long age_hz)
+{
+ return (jiffies - b->last_accessed) >= age_hz;
+}
+
+static void __evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
+{
+ struct dm_buffer *b, *tmp;
+ unsigned retain_target = get_retain_buffers(c);
+ unsigned count;
+
+ dm_bufio_lock(c);
+
+ count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+ list_for_each_entry_safe_reverse(b, tmp, &c->lru[LIST_CLEAN], lru_list) {
+ if (count <= retain_target)
+ break;
+
+ if (!older_than(b, age_hz))
+ break;
+
+ if (__try_evict_buffer(b, 0))
+ count--;
- dm_bufio_unlock(c);
dm_bufio_cond_resched();
}
+
+ dm_bufio_unlock(c);
+}
+
+static void cleanup_old_buffers(void)
+{
+ unsigned long max_age_hz = get_max_age_hz();
+ struct dm_bufio_client *c;
+
+ mutex_lock(&dm_bufio_clients_lock);
+
+ list_for_each_entry(c, &dm_bufio_all_clients, client_list)
+ __evict_old_buffers(c, max_age_hz);
+
mutex_unlock(&dm_bufio_clients_lock);
}
@@ -1834,6 +1901,9 @@ MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
module_param_named(max_age_seconds, dm_bufio_max_age, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
+module_param_named(retain_bytes, dm_bufio_retain_bytes, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
+
module_param_named(peak_allocated_bytes, dm_bufio_peak_allocated, ulong, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(peak_allocated_bytes, "Tracks the maximum allocated memory");