aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md/dm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/dm.c')
-rw-r--r--drivers/md/dm.c603
1 files changed, 458 insertions, 145 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 73f28802dc7a..f8c7ca3e8947 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -21,6 +21,9 @@
#include <linux/delay.h>
#include <linux/wait.h>
#include <linux/kthread.h>
+#include <linux/ktime.h>
+#include <linux/elevator.h> /* for rq_end_sector() */
+#include <linux/blk-mq.h>
#include <trace/events/block.h>
@@ -216,8 +219,29 @@ struct mapped_device {
struct kthread_worker kworker;
struct task_struct *kworker_task;
+
+ /* for request-based merge heuristic in dm_request_fn() */
+ unsigned seq_rq_merge_deadline_usecs;
+ int last_rq_rw;
+ sector_t last_rq_pos;
+ ktime_t last_rq_start_time;
+
+ /* for blk-mq request-based DM support */
+ struct blk_mq_tag_set tag_set;
+ bool use_blk_mq;
};
+#ifdef CONFIG_DM_MQ_DEFAULT
+static bool use_blk_mq = true;
+#else
+static bool use_blk_mq = false;
+#endif
+
+bool dm_use_blk_mq(struct mapped_device *md)
+{
+ return md->use_blk_mq;
+}
+
/*
* For mempools pre-allocation at the table loading time.
*/
@@ -250,35 +274,35 @@ static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
*/
static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
-static unsigned __dm_get_reserved_ios(unsigned *reserved_ios,
+static unsigned __dm_get_module_param(unsigned *module_param,
unsigned def, unsigned max)
{
- unsigned ios = ACCESS_ONCE(*reserved_ios);
- unsigned modified_ios = 0;
+ unsigned param = ACCESS_ONCE(*module_param);
+ unsigned modified_param = 0;
- if (!ios)
- modified_ios = def;
- else if (ios > max)
- modified_ios = max;
+ if (!param)
+ modified_param = def;
+ else if (param > max)
+ modified_param = max;
- if (modified_ios) {
- (void)cmpxchg(reserved_ios, ios, modified_ios);
- ios = modified_ios;
+ if (modified_param) {
+ (void)cmpxchg(module_param, param, modified_param);
+ param = modified_param;
}
- return ios;
+ return param;
}
unsigned dm_get_reserved_bio_based_ios(void)
{
- return __dm_get_reserved_ios(&reserved_bio_based_ios,
+ return __dm_get_module_param(&reserved_bio_based_ios,
RESERVED_BIO_BASED_IOS, RESERVED_MAX_IOS);
}
EXPORT_SYMBOL_GPL(dm_get_reserved_bio_based_ios);
unsigned dm_get_reserved_rq_based_ios(void)
{
- return __dm_get_reserved_ios(&reserved_rq_based_ios,
+ return __dm_get_module_param(&reserved_rq_based_ios,
RESERVED_REQUEST_BASED_IOS, RESERVED_MAX_IOS);
}
EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
@@ -433,7 +457,6 @@ static int dm_blk_open(struct block_device *bdev, fmode_t mode)
dm_get(md);
atomic_inc(&md->open_count);
-
out:
spin_unlock(&_minor_lock);
@@ -442,16 +465,20 @@ out:
static void dm_blk_close(struct gendisk *disk, fmode_t mode)
{
- struct mapped_device *md = disk->private_data;
+ struct mapped_device *md;
spin_lock(&_minor_lock);
+ md = disk->private_data;
+ if (WARN_ON(!md))
+ goto out;
+
if (atomic_dec_and_test(&md->open_count) &&
(test_bit(DMF_DEFERRED_REMOVE, &md->flags)))
queue_work(deferred_remove_workqueue, &deferred_remove_work);
dm_put(md);
-
+out:
spin_unlock(&_minor_lock);
}
@@ -1014,6 +1041,11 @@ static void end_clone_bio(struct bio *clone, int error)
blk_update_request(tio->orig, 0, nr_bytes);
}
+static struct dm_rq_target_io *tio_from_request(struct request *rq)
+{
+ return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
+}
+
/*
* Don't touch any member of the md after calling this function because
* the md may be freed in dm_put() at the end of this function.
@@ -1021,10 +1053,13 @@ static void end_clone_bio(struct bio *clone, int error)
*/
static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
{
+ int nr_requests_pending;
+
atomic_dec(&md->pending[rw]);
/* nudge anyone waiting on suspend queue */
- if (!md_in_flight(md))
+ nr_requests_pending = md_in_flight(md);
+ if (!nr_requests_pending)
wake_up(&md->wait);
/*
@@ -1033,8 +1068,13 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
* back into ->request_fn() could deadlock attempting to grab the
* queue lock again.
*/
- if (run_queue)
- blk_run_queue_async(md->queue);
+ if (run_queue) {
+ if (md->queue->mq_ops)
+ blk_mq_run_hw_queues(md->queue, true);
+ else if (!nr_requests_pending ||
+ (nr_requests_pending >= md->queue->nr_congestion_on))
+ blk_run_queue_async(md->queue);
+ }
/*
* dm_put() must be at the end of this function. See the comment above
@@ -1045,13 +1085,18 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
static void free_rq_clone(struct request *clone)
{
struct dm_rq_target_io *tio = clone->end_io_data;
+ struct mapped_device *md = tio->md;
blk_rq_unprep_clone(clone);
- if (clone->q && clone->q->mq_ops)
+
+ if (clone->q->mq_ops)
tio->ti->type->release_clone_rq(clone);
- else
- free_clone_request(tio->md, clone);
- free_rq_tio(tio);
+ else if (!md->queue->mq_ops)
+ /* request_fn queue stacked on request_fn queue(s) */
+ free_clone_request(md, clone);
+
+ if (!md->queue->mq_ops)
+ free_rq_tio(tio);
}
/*
@@ -1080,17 +1125,22 @@ static void dm_end_request(struct request *clone, int error)
}
free_rq_clone(clone);
- blk_end_request_all(rq, error);
+ if (!rq->q->mq_ops)
+ blk_end_request_all(rq, error);
+ else
+ blk_mq_end_request(rq, error);
rq_completed(md, rw, true);
}
static void dm_unprep_request(struct request *rq)
{
- struct dm_rq_target_io *tio = rq->special;
+ struct dm_rq_target_io *tio = tio_from_request(rq);
struct request *clone = tio->clone;
- rq->special = NULL;
- rq->cmd_flags &= ~REQ_DONTPREP;
+ if (!rq->q->mq_ops) {
+ rq->special = NULL;
+ rq->cmd_flags &= ~REQ_DONTPREP;
+ }
if (clone)
free_rq_clone(clone);
@@ -1099,18 +1149,29 @@ static void dm_unprep_request(struct request *rq)
/*
* Requeue the original request of a clone.
*/
-static void dm_requeue_unmapped_original_request(struct mapped_device *md,
- struct request *rq)
+static void old_requeue_request(struct request *rq)
{
- int rw = rq_data_dir(rq);
struct request_queue *q = rq->q;
unsigned long flags;
- dm_unprep_request(rq);
-
spin_lock_irqsave(q->queue_lock, flags);
blk_requeue_request(q, rq);
spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void dm_requeue_unmapped_original_request(struct mapped_device *md,
+ struct request *rq)
+{
+ int rw = rq_data_dir(rq);
+
+ dm_unprep_request(rq);
+
+ if (!rq->q->mq_ops)
+ old_requeue_request(rq);
+ else {
+ blk_mq_requeue_request(rq);
+ blk_mq_kick_requeue_list(rq->q);
+ }
rq_completed(md, rw, false);
}
@@ -1122,35 +1183,44 @@ static void dm_requeue_unmapped_request(struct request *clone)
dm_requeue_unmapped_original_request(tio->md, tio->orig);
}
-static void __stop_queue(struct request_queue *q)
-{
- blk_stop_queue(q);
-}
-
-static void stop_queue(struct request_queue *q)
+static void old_stop_queue(struct request_queue *q)
{
unsigned long flags;
+ if (blk_queue_stopped(q))
+ return;
+
spin_lock_irqsave(q->queue_lock, flags);
- __stop_queue(q);
+ blk_stop_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
-static void __start_queue(struct request_queue *q)
+static void stop_queue(struct request_queue *q)
{
- if (blk_queue_stopped(q))
- blk_start_queue(q);
+ if (!q->mq_ops)
+ old_stop_queue(q);
+ else
+ blk_mq_stop_hw_queues(q);
}
-static void start_queue(struct request_queue *q)
+static void old_start_queue(struct request_queue *q)
{
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
- __start_queue(q);
+ if (blk_queue_stopped(q))
+ blk_start_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
+static void start_queue(struct request_queue *q)
+{
+ if (!q->mq_ops)
+ old_start_queue(q);
+ else
+ blk_mq_start_stopped_hw_queues(q, true);
+}
+
static void dm_done(struct request *clone, int error, bool mapped)
{
int r = error;
@@ -1189,13 +1259,20 @@ static void dm_done(struct request *clone, int error, bool mapped)
static void dm_softirq_done(struct request *rq)
{
bool mapped = true;
- struct dm_rq_target_io *tio = rq->special;
+ struct dm_rq_target_io *tio = tio_from_request(rq);
struct request *clone = tio->clone;
+ int rw;
if (!clone) {
- blk_end_request_all(rq, tio->error);
- rq_completed(tio->md, rq_data_dir(rq), false);
- free_rq_tio(tio);
+ rw = rq_data_dir(rq);
+ if (!rq->q->mq_ops) {
+ blk_end_request_all(rq, tio->error);
+ rq_completed(tio->md, rw, false);
+ free_rq_tio(tio);
+ } else {
+ blk_mq_end_request(rq, tio->error);
+ rq_completed(tio->md, rw, false);
+ }
return;
}
@@ -1211,7 +1288,7 @@ static void dm_softirq_done(struct request *rq)
*/
static void dm_complete_request(struct request *rq, int error)
{
- struct dm_rq_target_io *tio = rq->special;
+ struct dm_rq_target_io *tio = tio_from_request(rq);
tio->error = error;
blk_complete_request(rq);
@@ -1230,7 +1307,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
}
/*
- * Called with the clone's queue lock held
+ * Called with the clone's queue lock held (for non-blk-mq)
*/
static void end_clone_request(struct request *clone, int error)
{
@@ -1690,7 +1767,7 @@ out:
* The request function that just remaps the bio built up by
* dm_merge_bvec.
*/
-static void _dm_request(struct request_queue *q, struct bio *bio)
+static void dm_make_request(struct request_queue *q, struct bio *bio)
{
int rw = bio_data_dir(bio);
struct mapped_device *md = q->queuedata;
@@ -1722,16 +1799,6 @@ int dm_request_based(struct mapped_device *md)
return blk_queue_stackable(md->queue);
}
-static void dm_request(struct request_queue *q, struct bio *bio)
-{
- struct mapped_device *md = q->queuedata;
-
- if (dm_request_based(md))
- blk_queue_bio(q, bio);
- else
- _dm_request(q, bio);
-}
-
static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
{
int r;
@@ -1784,15 +1851,25 @@ static int setup_clone(struct request *clone, struct request *rq,
static struct request *clone_rq(struct request *rq, struct mapped_device *md,
struct dm_rq_target_io *tio, gfp_t gfp_mask)
{
- struct request *clone = alloc_clone_request(md, gfp_mask);
+ /*
+ * Do not allocate a clone if tio->clone was already set
+ * (see: dm_mq_queue_rq).
+ */
+ bool alloc_clone = !tio->clone;
+ struct request *clone;
- if (!clone)
- return NULL;
+ if (alloc_clone) {
+ clone = alloc_clone_request(md, gfp_mask);
+ if (!clone)
+ return NULL;
+ } else
+ clone = tio->clone;
blk_rq_init(NULL, clone);
if (setup_clone(clone, rq, tio, gfp_mask)) {
/* -ENOMEM */
- free_clone_request(md, clone);
+ if (alloc_clone)
+ free_clone_request(md, clone);
return NULL;
}
@@ -1801,6 +1878,19 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
static void map_tio_request(struct kthread_work *work);
+static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
+ struct mapped_device *md)
+{
+ tio->md = md;
+ tio->ti = NULL;
+ tio->clone = NULL;
+ tio->orig = rq;
+ tio->error = 0;
+ memset(&tio->info, 0, sizeof(tio->info));
+ if (md->kworker_task)
+ init_kthread_work(&tio->work, map_tio_request);
+}
+
static struct dm_rq_target_io *prep_tio(struct request *rq,
struct mapped_device *md, gfp_t gfp_mask)
{
@@ -1812,13 +1902,7 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
if (!tio)
return NULL;
- tio->md = md;
- tio->ti = NULL;
- tio->clone = NULL;
- tio->orig = rq;
- tio->error = 0;
- memset(&tio->info, 0, sizeof(tio->info));
- init_kthread_work(&tio->work, map_tio_request);
+ init_tio(tio, rq, md);
table = dm_get_live_table(md, &srcu_idx);
if (!dm_table_mq_request_based(table)) {
@@ -1862,11 +1946,11 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
* DM_MAPIO_REQUEUE : the original request needs to be requeued
* < 0 : the request was completed due to failure
*/
-static int map_request(struct dm_target *ti, struct request *rq,
+static int map_request(struct dm_rq_target_io *tio, struct request *rq,
struct mapped_device *md)
{
int r;
- struct dm_rq_target_io *tio = rq->special;
+ struct dm_target *ti = tio->ti;
struct request *clone = NULL;
if (tio->clone) {
@@ -1881,7 +1965,7 @@ static int map_request(struct dm_target *ti, struct request *rq,
}
if (IS_ERR(clone))
return DM_MAPIO_REQUEUE;
- if (setup_clone(clone, rq, tio, GFP_KERNEL)) {
+ if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
/* -ENOMEM */
ti->type->release_clone_rq(clone);
return DM_MAPIO_REQUEUE;
@@ -1922,15 +2006,24 @@ static void map_tio_request(struct kthread_work *work)
struct request *rq = tio->orig;
struct mapped_device *md = tio->md;
- if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE)
+ if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
dm_requeue_unmapped_original_request(md, rq);
}
static void dm_start_request(struct mapped_device *md, struct request *orig)
{
- blk_start_request(orig);
+ if (!orig->q->mq_ops)
+ blk_start_request(orig);
+ else
+ blk_mq_start_request(orig);
atomic_inc(&md->pending[rq_data_dir(orig)]);
+ if (md->seq_rq_merge_deadline_usecs) {
+ md->last_rq_pos = rq_end_sector(orig);
+ md->last_rq_rw = rq_data_dir(orig);
+ md->last_rq_start_time = ktime_get();
+ }
+
/*
* Hold the md reference here for the in-flight I/O.
* We can't rely on the reference count by device opener,
@@ -1941,6 +2034,45 @@ static void dm_start_request(struct mapped_device *md, struct request *orig)
dm_get(md);
}
+#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
+
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
+{
+ return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
+}
+
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
+ const char *buf, size_t count)
+{
+ unsigned deadline;
+
+ if (!dm_request_based(md) || md->use_blk_mq)
+ return count;
+
+ if (kstrtouint(buf, 10, &deadline))
+ return -EINVAL;
+
+ if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
+ deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
+
+ md->seq_rq_merge_deadline_usecs = deadline;
+
+ return count;
+}
+
+static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md)
+{
+ ktime_t kt_deadline;
+
+ if (!md->seq_rq_merge_deadline_usecs)
+ return false;
+
+ kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
+ kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
+
+ return !ktime_after(ktime_get(), kt_deadline);
+}
+
/*
* q->request_fn for request-based dm.
* Called with the queue lock held.
@@ -1964,7 +2096,7 @@ static void dm_request_fn(struct request_queue *q)
while (!blk_queue_stopped(q)) {
rq = blk_peek_request(q);
if (!rq)
- goto delay_and_out;
+ goto out;
/* always use block 0 to find the target for flushes for now */
pos = 0;
@@ -1983,12 +2115,17 @@ static void dm_request_fn(struct request_queue *q)
continue;
}
+ if (dm_request_peeked_before_merge_deadline(md) &&
+ md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
+ md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq))
+ goto delay_and_out;
+
if (ti->type->busy && ti->type->busy(ti))
goto delay_and_out;
dm_start_request(md, rq);
- tio = rq->special;
+ tio = tio_from_request(rq);
/* Establish tio->ti before queuing work (map_tio_request) */
tio->ti = ti;
queue_kthread_work(&md->kworker, &tio->work);
@@ -1998,33 +2135,11 @@ static void dm_request_fn(struct request_queue *q)
goto out;
delay_and_out:
- blk_delay_queue(q, HZ / 10);
+ blk_delay_queue(q, HZ / 100);
out:
dm_put_live_table(md, srcu_idx);
}
-int dm_underlying_device_busy(struct request_queue *q)
-{
- return blk_lld_busy(q);
-}
-EXPORT_SYMBOL_GPL(dm_underlying_device_busy);
-
-static int dm_lld_busy(struct request_queue *q)
-{
- int r;
- struct mapped_device *md = q->queuedata;
- struct dm_table *map = dm_get_live_table_fast(md);
-
- if (!map || test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags))
- r = 1;
- else
- r = dm_table_any_busy_target(map);
-
- dm_put_live_table_fast(md);
-
- return r;
-}
-
static int dm_any_congested(void *congested_data, int bdi_bits)
{
int r = bdi_bits;
@@ -2107,7 +2222,7 @@ static void dm_init_md_queue(struct mapped_device *md)
{
/*
* Request-based dm devices cannot be stacked on top of bio-based dm
- * devices. The type of this dm device has not been decided yet.
+ * devices. The type of this dm device may not have been decided yet.
* The type is decided at the first table loading time.
* To prevent problematic device stacking, clear the queue flag
* for request stacking support until then.
@@ -2115,13 +2230,21 @@ static void dm_init_md_queue(struct mapped_device *md)
* This queue is new, so no concurrency on the queue_flags.
*/
queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
+}
+static void dm_init_old_md_queue(struct mapped_device *md)
+{
+ md->use_blk_mq = false;
+ dm_init_md_queue(md);
+
+ /*
+ * Initialize aspects of queue that aren't relevant for blk-mq
+ */
md->queue->queuedata = md;
md->queue->backing_dev_info.congested_fn = dm_any_congested;
md->queue->backing_dev_info.congested_data = md;
- blk_queue_make_request(md->queue, dm_request);
+
blk_queue_bounce_limit(md->queue, BLK_BOUNCE_ANY);
- blk_queue_merge_bvec(md->queue, dm_merge_bvec);
}
/*
@@ -2153,6 +2276,7 @@ static struct mapped_device *alloc_dev(int minor)
if (r < 0)
goto bad_io_barrier;
+ md->use_blk_mq = use_blk_mq;
md->type = DM_TYPE_NONE;
mutex_init(&md->suspend_lock);
mutex_init(&md->type_lock);
@@ -2241,7 +2365,6 @@ static void free_dev(struct mapped_device *md)
int minor = MINOR(disk_devt(md->disk));
unlock_fs(md);
- bdput(md->bdev);
destroy_workqueue(md->wq);
if (md->kworker_task)
@@ -2252,19 +2375,24 @@ static void free_dev(struct mapped_device *md)
mempool_destroy(md->rq_pool);
if (md->bs)
bioset_free(md->bs);
- blk_integrity_unregister(md->disk);
- del_gendisk(md->disk);
+
cleanup_srcu_struct(&md->io_barrier);
free_table_devices(&md->table_devices);
- free_minor(minor);
+ dm_stats_cleanup(&md->stats);
spin_lock(&_minor_lock);
md->disk->private_data = NULL;
spin_unlock(&_minor_lock);
-
+ if (blk_get_integrity(md->disk))
+ blk_integrity_unregister(md->disk);
+ del_gendisk(md->disk);
put_disk(md->disk);
blk_cleanup_queue(md->queue);
- dm_stats_cleanup(&md->stats);
+ if (md->use_blk_mq)
+ blk_mq_free_tag_set(&md->tag_set);
+ bdput(md->bdev);
+ free_minor(minor);
+
module_put(THIS_MODULE);
kfree(md);
}
@@ -2273,7 +2401,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
{
struct dm_md_mempools *p = dm_table_get_md_mempools(t);
- if (md->io_pool && md->bs) {
+ if (md->bs) {
/* The md already has necessary mempools. */
if (dm_table_get_type(t) == DM_TYPE_BIO_BASED) {
/*
@@ -2305,7 +2433,7 @@ static void __bind_mempools(struct mapped_device *md, struct dm_table *t)
p->bs = NULL;
out:
- /* mempool bind completed, now no need any mempools in the table */
+ /* mempool bind completed, no longer need any mempools in the table */
dm_table_free_md_mempools(t);
}
@@ -2352,7 +2480,7 @@ int dm_queue_merge_is_compulsory(struct request_queue *q)
if (!q->merge_bvec_fn)
return 0;
- if (q->make_request_fn == dm_request) {
+ if (q->make_request_fn == dm_make_request) {
dev_md = q->queuedata;
if (test_bit(DMF_MERGE_IS_OPTIONAL, &dev_md->flags))
return 0;
@@ -2421,7 +2549,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
* This must be done before setting the queue restrictions,
* because request-based dm may be run just after the setting.
*/
- if (dm_table_request_based(t) && !blk_queue_stopped(q))
+ if (dm_table_request_based(t))
stop_queue(q);
__bind_mempools(md, t);
@@ -2503,14 +2631,6 @@ unsigned dm_get_md_type(struct mapped_device *md)
return md->type;
}
-static bool dm_md_type_request_based(struct mapped_device *md)
-{
- unsigned table_type = dm_get_md_type(md);
-
- return (table_type == DM_TYPE_REQUEST_BASED ||
- table_type == DM_TYPE_MQ_REQUEST_BASED);
-}
-
struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
{
return md->immutable_target_type;
@@ -2527,6 +2647,14 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
}
EXPORT_SYMBOL_GPL(dm_get_queue_limits);
+static void init_rq_based_worker_thread(struct mapped_device *md)
+{
+ /* Initialize the request-based DM worker thread */
+ init_kthread_worker(&md->kworker);
+ md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
+ "kdmwork-%s", dm_device_name(md));
+}
+
/*
* Fully initialize a request-based queue (->elevator, ->request_fn, etc).
*/
@@ -2535,27 +2663,160 @@ static int dm_init_request_based_queue(struct mapped_device *md)
struct request_queue *q = NULL;
if (md->queue->elevator)
- return 1;
+ return 0;
/* Fully initialize the queue */
q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
if (!q)
- return 0;
+ return -EINVAL;
+
+ /* disable dm_request_fn's merge heuristic by default */
+ md->seq_rq_merge_deadline_usecs = 0;
md->queue = q;
- dm_init_md_queue(md);
+ dm_init_old_md_queue(md);
blk_queue_softirq_done(md->queue, dm_softirq_done);
blk_queue_prep_rq(md->queue, dm_prep_fn);
- blk_queue_lld_busy(md->queue, dm_lld_busy);
- /* Also initialize the request-based DM worker thread */
- init_kthread_worker(&md->kworker);
- md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
- "kdmwork-%s", dm_device_name(md));
+ init_rq_based_worker_thread(md);
elv_register_queue(md->queue);
- return 1;
+ return 0;
+}
+
+static int dm_mq_init_request(void *data, struct request *rq,
+ unsigned int hctx_idx, unsigned int request_idx,
+ unsigned int numa_node)
+{
+ struct mapped_device *md = data;
+ struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
+
+ /*
+ * Must initialize md member of tio, otherwise it won't
+ * be available in dm_mq_queue_rq.
+ */
+ tio->md = md;
+
+ return 0;
+}
+
+static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+ const struct blk_mq_queue_data *bd)
+{
+ struct request *rq = bd->rq;
+ struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
+ struct mapped_device *md = tio->md;
+ int srcu_idx;
+ struct dm_table *map = dm_get_live_table(md, &srcu_idx);
+ struct dm_target *ti;
+ sector_t pos;
+
+ /* always use block 0 to find the target for flushes for now */
+ pos = 0;
+ if (!(rq->cmd_flags & REQ_FLUSH))
+ pos = blk_rq_pos(rq);
+
+ ti = dm_table_find_target(map, pos);
+ if (!dm_target_is_valid(ti)) {
+ dm_put_live_table(md, srcu_idx);
+ DMERR_LIMIT("request attempted access beyond the end of device");
+ /*
+ * Must perform setup, that rq_completed() requires,
+ * before returning BLK_MQ_RQ_QUEUE_ERROR
+ */
+ dm_start_request(md, rq);
+ return BLK_MQ_RQ_QUEUE_ERROR;
+ }
+ dm_put_live_table(md, srcu_idx);
+
+ if (ti->type->busy && ti->type->busy(ti))
+ return BLK_MQ_RQ_QUEUE_BUSY;
+
+ dm_start_request(md, rq);
+
+ /* Init tio using md established in .init_request */
+ init_tio(tio, rq, md);
+
+ /*
+ * Establish tio->ti before queuing work (map_tio_request)
+ * or making direct call to map_request().
+ */
+ tio->ti = ti;
+
+ /* Clone the request if underlying devices aren't blk-mq */
+ if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
+ /* clone request is allocated at the end of the pdu */
+ tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
+ if (!clone_rq(rq, md, tio, GFP_ATOMIC))
+ return BLK_MQ_RQ_QUEUE_BUSY;
+ queue_kthread_work(&md->kworker, &tio->work);
+ } else {
+ /* Direct call is fine since .queue_rq allows allocations */
+ if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
+ dm_requeue_unmapped_original_request(md, rq);
+ }
+
+ return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static struct blk_mq_ops dm_mq_ops = {
+ .queue_rq = dm_mq_queue_rq,
+ .map_queue = blk_mq_map_queue,
+ .complete = dm_softirq_done,
+ .init_request = dm_mq_init_request,
+};
+
+static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
+{
+ unsigned md_type = dm_get_md_type(md);
+ struct request_queue *q;
+ int err;
+
+ memset(&md->tag_set, 0, sizeof(md->tag_set));
+ md->tag_set.ops = &dm_mq_ops;
+ md->tag_set.queue_depth = BLKDEV_MAX_RQ;
+ md->tag_set.numa_node = NUMA_NO_NODE;
+ md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+ md->tag_set.nr_hw_queues = 1;
+ if (md_type == DM_TYPE_REQUEST_BASED) {
+ /* make the memory for non-blk-mq clone part of the pdu */
+ md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request);
+ } else
+ md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
+ md->tag_set.driver_data = md;
+
+ err = blk_mq_alloc_tag_set(&md->tag_set);
+ if (err)
+ return err;
+
+ q = blk_mq_init_allocated_queue(&md->tag_set, md->queue);
+ if (IS_ERR(q)) {
+ err = PTR_ERR(q);
+ goto out_tag_set;
+ }
+ md->queue = q;
+ dm_init_md_queue(md);
+
+ /* backfill 'mq' sysfs registration normally done in blk_register_queue */
+ blk_mq_register_disk(md->disk);
+
+ if (md_type == DM_TYPE_REQUEST_BASED)
+ init_rq_based_worker_thread(md);
+
+ return 0;
+
+out_tag_set:
+ blk_mq_free_tag_set(&md->tag_set);
+ return err;
+}
+
+static unsigned filter_md_type(unsigned type, struct mapped_device *md)
+{
+ if (type == DM_TYPE_BIO_BASED)
+ return type;
+
+ return !md->use_blk_mq ? DM_TYPE_REQUEST_BASED : DM_TYPE_MQ_REQUEST_BASED;
}
/*
@@ -2563,9 +2824,29 @@ static int dm_init_request_based_queue(struct mapped_device *md)
*/
int dm_setup_md_queue(struct mapped_device *md)
{
- if (dm_md_type_request_based(md) && !dm_init_request_based_queue(md)) {
- DMWARN("Cannot initialize queue for request-based mapped device");
- return -EINVAL;
+ int r;
+ unsigned md_type = filter_md_type(dm_get_md_type(md), md);
+
+ switch (md_type) {
+ case DM_TYPE_REQUEST_BASED:
+ r = dm_init_request_based_queue(md);
+ if (r) {
+ DMWARN("Cannot initialize queue for request-based mapped device");
+ return r;
+ }
+ break;
+ case DM_TYPE_MQ_REQUEST_BASED:
+ r = dm_init_request_based_blk_mq_queue(md);
+ if (r) {
+ DMWARN("Cannot initialize queue for request-based blk-mq mapped device");
+ return r;
+ }
+ break;
+ case DM_TYPE_BIO_BASED:
+ dm_init_old_md_queue(md);
+ blk_queue_make_request(md->queue, dm_make_request);
+ blk_queue_merge_bvec(md->queue, dm_merge_bvec);
+ break;
}
return 0;
@@ -2616,6 +2897,19 @@ void dm_get(struct mapped_device *md)
BUG_ON(test_bit(DMF_FREEING, &md->flags));
}
+int dm_hold(struct mapped_device *md)
+{
+ spin_lock(&_minor_lock);
+ if (test_bit(DMF_FREEING, &md->flags)) {
+ spin_unlock(&_minor_lock);
+ return -EBUSY;
+ }
+ dm_get(md);
+ spin_unlock(&_minor_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dm_hold);
+
const char *dm_device_name(struct mapped_device *md)
{
return md->name;
@@ -2629,19 +2923,26 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
might_sleep();
- spin_lock(&_minor_lock);
map = dm_get_live_table(md, &srcu_idx);
+
+ spin_lock(&_minor_lock);
idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
set_bit(DMF_FREEING, &md->flags);
spin_unlock(&_minor_lock);
- if (dm_request_based(md))
+ if (dm_request_based(md) && md->kworker_task)
flush_kthread_worker(&md->kworker);
+ /*
+ * Take suspend_lock so that presuspend and postsuspend methods
+ * do not race with internal suspend.
+ */
+ mutex_lock(&md->suspend_lock);
if (!dm_suspended_md(md)) {
dm_table_presuspend_targets(map);
dm_table_postsuspend_targets(map);
}
+ mutex_unlock(&md->suspend_lock);
/* dm_put_live_table must be before msleep, otherwise deadlock is possible */
dm_put_live_table(md, srcu_idx);
@@ -2883,7 +3184,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
*/
if (dm_request_based(md)) {
stop_queue(md->queue);
- flush_kthread_worker(&md->kworker);
+ if (md->kworker_task)
+ flush_kthread_worker(&md->kworker);
}
flush_workqueue(md->wq);
@@ -3115,6 +3417,7 @@ void dm_internal_suspend_fast(struct mapped_device *md)
flush_workqueue(md->wq);
dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE);
}
+EXPORT_SYMBOL_GPL(dm_internal_suspend_fast);
void dm_internal_resume_fast(struct mapped_device *md)
{
@@ -3126,6 +3429,7 @@ void dm_internal_resume_fast(struct mapped_device *md)
done:
mutex_unlock(&md->suspend_lock);
}
+EXPORT_SYMBOL_GPL(dm_internal_resume_fast);
/*-----------------------------------------------------------------
* Event notification.
@@ -3179,6 +3483,7 @@ struct gendisk *dm_disk(struct mapped_device *md)
{
return md->disk;
}
+EXPORT_SYMBOL_GPL(dm_disk);
struct kobject *dm_kobject(struct mapped_device *md)
{
@@ -3226,16 +3531,19 @@ int dm_noflush_suspending(struct dm_target *ti)
}
EXPORT_SYMBOL_GPL(dm_noflush_suspending);
-struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size)
+struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
+ unsigned integrity, unsigned per_bio_data_size)
{
struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
- struct kmem_cache *cachep;
+ struct kmem_cache *cachep = NULL;
unsigned int pool_size = 0;
unsigned int front_pad;
if (!pools)
return NULL;
+ type = filter_md_type(type, md);
+
switch (type) {
case DM_TYPE_BIO_BASED:
cachep = _io_cache;
@@ -3243,13 +3551,13 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
break;
case DM_TYPE_REQUEST_BASED:
+ cachep = _rq_tio_cache;
pool_size = dm_get_reserved_rq_based_ios();
pools->rq_pool = mempool_create_slab_pool(pool_size, _rq_cache);
if (!pools->rq_pool)
goto out;
/* fall through to setup remaining rq-based pools */
case DM_TYPE_MQ_REQUEST_BASED:
- cachep = _rq_tio_cache;
if (!pool_size)
pool_size = dm_get_reserved_rq_based_ios();
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
@@ -3257,12 +3565,14 @@ struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, u
WARN_ON(per_bio_data_size != 0);
break;
default:
- goto out;
+ BUG();
}
- pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
- if (!pools->io_pool)
- goto out;
+ if (cachep) {
+ pools->io_pool = mempool_create_slab_pool(pool_size, cachep);
+ if (!pools->io_pool)
+ goto out;
+ }
pools->bs = bioset_create_nobvec(pool_size, front_pad);
if (!pools->bs)
@@ -3319,6 +3629,9 @@ MODULE_PARM_DESC(reserved_bio_based_ios, "Reserved IOs in bio-based mempools");
module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
+module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
+
MODULE_DESCRIPTION(DM_NAME " driver");
MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
MODULE_LICENSE("GPL");