1 files changed, 327 insertions, 273 deletions
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5df40480228b..be4905769a45 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -106,14 +106,6 @@ struct dm_rq_clone_bio_info {
 	struct bio clone;
 };
 
-union map_info *dm_get_rq_mapinfo(struct request *rq)
-{
-	if (rq && rq->end_io_data)
-		return &((struct dm_rq_target_io *)rq->end_io_data)->info;
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
-
 #define MINOR_ALLOCED ((void *)-1)
 
 /*
@@ -129,28 +121,18 @@ EXPORT_SYMBOL_GPL(dm_get_rq_mapinfo);
 #define DMF_SUSPENDED_INTERNALLY 7
 
 /*
- * A dummy definition to make RCU happy.
- * struct dm_table should never be dereferenced in this file.
- */
-struct dm_table {
-	int undefined__;
-};
-
-/*
  * Work processed by per-device workqueue.
  */
 struct mapped_device {
 	struct srcu_struct io_barrier;
 	struct mutex suspend_lock;
-	atomic_t holders;
-	atomic_t open_count;
 
 	/*
-	 * The current mapping.
+	 * The current mapping (struct dm_table *).
 	 * Use dm_get_live_table{_fast} or take suspend_lock for
 	 * dereference.
 	 */
-	struct dm_table __rcu *map;
+	void __rcu *map;
 
 	struct list_head table_devices;
 	struct mutex table_devices_lock;
@@ -158,10 +140,16 @@ struct mapped_device {
 	unsigned long flags;
 
 	struct request_queue *queue;
+	int numa_node_id;
+
 	unsigned type;
 	/* Protect queue and type against concurrent access. */
 	struct mutex type_lock;
 
+	atomic_t holders;
+	atomic_t open_count;
+
+	struct dm_target *immutable_target;
 	struct target_type *immutable_target_type;
 
 	struct gendisk *disk;
@@ -175,8 +163,20 @@ struct mapped_device {
 	atomic_t pending[2];
 	wait_queue_head_t wait;
 	struct work_struct work;
-	struct bio_list deferred;
 	spinlock_t deferred_lock;
+	struct bio_list deferred;
+
+	/*
+	 * Event handling.
+	 */
+	wait_queue_head_t eventq;
+	atomic_t event_nr;
+	atomic_t uevent_seq;
+	struct list_head uevent_list;
+	spinlock_t uevent_lock; /* Protect access to uevent_list */
+
+	/* the number of internal suspends */
+	unsigned internal_suspend_count;
 
 	/*
 	 * Processing queue (flush)
@@ -192,32 +192,21 @@ struct mapped_device {
 	struct bio_set *bs;
 
 	/*
-	 * Event handling.
-	 */
-	atomic_t event_nr;
-	wait_queue_head_t eventq;
-	atomic_t uevent_seq;
-	struct list_head uevent_list;
-	spinlock_t uevent_lock; /* Protect access to uevent_list */
-
-	/*
 	 * freeze/thaw support require holding onto a super block
 	 */
 	struct super_block *frozen_sb;
-	struct block_device *bdev;
 
 	/* forced geometry settings */
 	struct hd_geometry geometry;
 
+	struct block_device *bdev;
+
 	/* kobject and completion */
 	struct dm_kobject_holder kobj_holder;
 
 	/* zero-length flush that will be cloned and submitted to targets */
 	struct bio flush_bio;
 
-	/* the number of internal suspends */
-	unsigned internal_suspend_count;
-
 	struct dm_stats stats;
 
 	struct kthread_worker kworker;
@@ -230,8 +219,9 @@ struct mapped_device {
 	ktime_t last_rq_start_time;
 
 	/* for blk-mq request-based DM support */
-	struct blk_mq_tag_set tag_set;
-	bool use_blk_mq;
+	struct blk_mq_tag_set *tag_set;
+	bool use_blk_mq:1;
+	bool init_tio_pdu:1;
 };
 
 #ifdef CONFIG_DM_MQ_DEFAULT
@@ -240,10 +230,19 @@ static bool use_blk_mq = true;
 static bool use_blk_mq = false;
 #endif
 
+#define DM_MQ_NR_HW_QUEUES 1
+#define DM_MQ_QUEUE_DEPTH 2048
+#define DM_NUMA_NODE NUMA_NO_NODE
+
+static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
+static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
+static int dm_numa_node = DM_NUMA_NODE;
+
 bool dm_use_blk_mq(struct mapped_device *md)
 {
 	return md->use_blk_mq;
 }
+EXPORT_SYMBOL_GPL(dm_use_blk_mq);
 
 /*
  * For mempools pre-allocation at the table loading time.
@@ -277,6 +276,27 @@ static unsigned reserved_bio_based_ios = RESERVED_BIO_BASED_IOS;
  */
 static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
 
+static int __dm_get_module_param_int(int *module_param, int min, int max)
+{
+	int param = ACCESS_ONCE(*module_param);
+	int modified_param = 0;
+	bool modified = true;
+
+	if (param < min)
+		modified_param = min;
+	else if (param > max)
+		modified_param = max;
+	else
+		modified = false;
+
+	if (modified) {
+		(void)cmpxchg(module_param, param, modified_param);
+		param = modified_param;
+	}
+
+	return param;
+}
+
 static unsigned __dm_get_module_param(unsigned *module_param,
 				      unsigned def, unsigned max)
 {
@@ -310,6 +330,23 @@ unsigned dm_get_reserved_rq_based_ios(void)
 }
 EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
 
+static unsigned dm_get_blk_mq_nr_hw_queues(void)
+{
+	return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
+}
+
+static unsigned dm_get_blk_mq_queue_depth(void)
+{
+	return __dm_get_module_param(&dm_mq_queue_depth,
+				     DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
+}
+
+static unsigned dm_get_numa_node(void)
+{
+	return __dm_get_module_param_int(&dm_numa_node,
+					 DM_NUMA_NODE, num_online_nodes() - 1);
+}
+
 static int __init local_init(void)
 {
 	int r = -ENOMEM;
@@ -323,7 +360,7 @@ static int __init local_init(void)
 	if (!_rq_tio_cache)
 		goto out_free_io_cache;
 
-	_rq_cache = kmem_cache_create("dm_clone_request", sizeof(struct request),
+	_rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request),
 				      __alignof__(struct request), 0, NULL);
 	if (!_rq_cache)
 		goto out_free_rq_tio_cache;
@@ -556,16 +593,17 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return dm_get_geometry(md, geo);
 }
 
-static int dm_get_live_table_for_ioctl(struct mapped_device *md,
-		struct dm_target **tgt, struct block_device **bdev,
-		fmode_t *mode, int *srcu_idx)
+static int dm_grab_bdev_for_ioctl(struct mapped_device *md,
+				  struct block_device **bdev,
+				  fmode_t *mode)
 {
+	struct dm_target *tgt;
 	struct dm_table *map;
-	int r;
+	int srcu_idx, r;
 
 retry:
 	r = -ENOTTY;
-	map = dm_get_live_table(md, srcu_idx);
+	map = dm_get_live_table(md, &srcu_idx);
 	if (!map || !dm_table_get_size(map))
 		goto out;
 
@@ -573,9 +611,8 @@ retry:
 	if (dm_table_get_num_targets(map) != 1)
 		goto out;
 
-	*tgt = dm_table_get_target(map, 0);
-
-	if (!(*tgt)->type->prepare_ioctl)
+	tgt = dm_table_get_target(map, 0);
+	if (!tgt->type->prepare_ioctl)
 		goto out;
 
 	if (dm_suspended_md(md)) {
@@ -583,14 +620,16 @@ retry:
 		goto out;
 	}
 
-	r = (*tgt)->type->prepare_ioctl(*tgt, bdev, mode);
+	r = tgt->type->prepare_ioctl(tgt, bdev, mode);
 	if (r < 0)
 		goto out;
 
+	bdgrab(*bdev);
+	dm_put_live_table(md, srcu_idx);
 	return r;
 
 out:
-	dm_put_live_table(md, *srcu_idx);
+	dm_put_live_table(md, srcu_idx);
 	if (r == -ENOTCONN && !fatal_signal_pending(current)) {
 		msleep(10);
 		goto retry;
@@ -602,11 +641,9 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 			unsigned int cmd, unsigned long arg)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
-	struct dm_target *tgt;
-	struct block_device *tgt_bdev = NULL;
-	int srcu_idx, r;
+	int r;
 
-	r = dm_get_live_table_for_ioctl(md, &tgt, &tgt_bdev, &mode, &srcu_idx);
+	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -621,9 +658,9 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
 			goto out;
 	}
 
-	r =  __blkdev_driver_ioctl(tgt_bdev, mode, cmd, arg);
+	r =  __blkdev_driver_ioctl(bdev, mode, cmd, arg);
 out:
-	dm_put_live_table(md, srcu_idx);
+	bdput(bdev);
 	return r;
 }
 
@@ -642,24 +679,24 @@ static void free_tio(struct mapped_device *md, struct dm_target_io *tio)
 	bio_put(&tio->clone);
 }
 
-static struct dm_rq_target_io *alloc_rq_tio(struct mapped_device *md,
-					    gfp_t gfp_mask)
+static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
+						gfp_t gfp_mask)
 {
 	return mempool_alloc(md->io_pool, gfp_mask);
 }
 
-static void free_rq_tio(struct dm_rq_target_io *tio)
+static void free_old_rq_tio(struct dm_rq_target_io *tio)
 {
 	mempool_free(tio, tio->md->io_pool);
 }
 
-static struct request *alloc_clone_request(struct mapped_device *md,
-					   gfp_t gfp_mask)
+static struct request *alloc_old_clone_request(struct mapped_device *md,
+					       gfp_t gfp_mask)
 {
 	return mempool_alloc(md->rq_pool, gfp_mask);
 }
 
-static void free_clone_request(struct mapped_device *md, struct request *rq)
+static void free_old_clone_request(struct mapped_device *md, struct request *rq)
 {
 	mempool_free(rq, md->rq_pool);
 }
@@ -827,7 +864,7 @@ int dm_get_table_device(struct mapped_device *md, dev_t dev, fmode_t mode,
 	mutex_lock(&md->table_devices_lock);
 	td = find_table_device(&md->table_devices, dev, mode);
 	if (!td) {
-		td = kmalloc(sizeof(*td), GFP_KERNEL);
+		td = kmalloc_node(sizeof(*td), GFP_KERNEL, md->numa_node_id);
 		if (!td) {
 			mutex_unlock(&md->table_devices_lock);
 			return -ENOMEM;
@@ -1109,12 +1146,8 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
 	 * back into ->request_fn() could deadlock attempting to grab the
 	 * queue lock again.
 	 */
-	if (run_queue) {
-		if (md->queue->mq_ops)
-			blk_mq_run_hw_queues(md->queue, true);
-		else
-			blk_run_queue_async(md->queue);
-	}
+	if (!md->queue->mq_ops && run_queue)
+		blk_run_queue_async(md->queue);
 
 	/*
 	 * dm_put() must be at the end of this function. See the comment above
@@ -1134,15 +1167,10 @@ static void free_rq_clone(struct request *clone)
 		tio->ti->type->release_clone_rq(clone);
 	else if (!md->queue->mq_ops)
 		/* request_fn queue stacked on request_fn queue(s) */
-		free_clone_request(md, clone);
-	/*
-	 * NOTE: for the blk-mq queue stacked on request_fn queue(s) case:
-	 * no need to call free_clone_request() because we leverage blk-mq by
-	 * allocating the clone at the end of the blk-mq pdu (see: clone_rq)
-	 */
+		free_old_clone_request(md, clone);
 
 	if (!md->queue->mq_ops)
-		free_rq_tio(tio);
+		free_old_rq_tio(tio);
 }
 
 /*
@@ -1191,12 +1219,14 @@ static void dm_unprep_request(struct request *rq)
 
 	if (clone)
 		free_rq_clone(clone);
+	else if (!tio->md->queue->mq_ops)
+		free_old_rq_tio(tio);
 }
 
 /*
  * Requeue the original request of a clone.
  */
-static void old_requeue_request(struct request *rq)
+static void dm_old_requeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 	unsigned long flags;
@@ -1207,45 +1237,57 @@ static void old_requeue_request(struct request *rq)
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
+static void dm_mq_requeue_request(struct request *rq)
+{
+	struct request_queue *q = rq->q;
+	unsigned long flags;
+
+	blk_mq_requeue_request(rq);
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (!blk_queue_stopped(q))
+		blk_mq_kick_requeue_list(q);
+	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
 static void dm_requeue_original_request(struct mapped_device *md,
 					struct request *rq)
 {
 	int rw = rq_data_dir(rq);
 
+	rq_end_stats(md, rq);
 	dm_unprep_request(rq);
 
-	rq_end_stats(md, rq);
 	if (!rq->q->mq_ops)
-		old_requeue_request(rq);
-	else {
-		blk_mq_requeue_request(rq);
-		blk_mq_kick_requeue_list(rq->q);
-	}
+		dm_old_requeue_request(rq);
+	else
+		dm_mq_requeue_request(rq);
 
 	rq_completed(md, rw, false);
 }
 
-static void old_stop_queue(struct request_queue *q)
+static void dm_old_stop_queue(struct request_queue *q)
 {
 	unsigned long flags;
 
-	if (blk_queue_stopped(q))
+	spin_lock_irqsave(q->queue_lock, flags);
+	if (blk_queue_stopped(q)) {
+		spin_unlock_irqrestore(q->queue_lock, flags);
 		return;
+	}
 
-	spin_lock_irqsave(q->queue_lock, flags);
 	blk_stop_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
-static void stop_queue(struct request_queue *q)
+static void dm_stop_queue(struct request_queue *q)
 {
 	if (!q->mq_ops)
-		old_stop_queue(q);
+		dm_old_stop_queue(q);
 	else
 		blk_mq_stop_hw_queues(q);
 }
 
-static void old_start_queue(struct request_queue *q)
+static void dm_old_start_queue(struct request_queue *q)
 {
 	unsigned long flags;
 
@@ -1255,12 +1297,14 @@ static void old_start_queue(struct request_queue *q)
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
-static void start_queue(struct request_queue *q)
+static void dm_start_queue(struct request_queue *q)
 {
 	if (!q->mq_ops)
-		old_start_queue(q);
-	else
+		dm_old_start_queue(q);
+	else {
 		blk_mq_start_stopped_hw_queues(q, true);
+		blk_mq_kick_requeue_list(q);
+	}
 }
 
 static void dm_done(struct request *clone, int error, bool mapped)
@@ -1311,7 +1355,7 @@ static void dm_softirq_done(struct request *rq)
 		if (!rq->q->mq_ops) {
 			blk_end_request_all(rq, tio->error);
 			rq_completed(tio->md, rw, false);
-			free_rq_tio(tio);
+			free_old_rq_tio(tio);
 		} else {
 			blk_mq_end_request(rq, tio->error);
 			rq_completed(tio->md, rw, false);
@@ -1334,7 +1378,10 @@ static void dm_complete_request(struct request *rq, int error)
 	struct dm_rq_target_io *tio = tio_from_request(rq);
 
 	tio->error = error;
-	blk_complete_request(rq);
+	if (!rq->q->mq_ops)
+		blk_complete_request(rq);
+	else
+		blk_mq_complete_request(rq, error);
 }
 
 /*
@@ -1350,7 +1397,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
 }
 
 /*
- * Called with the clone's queue lock held (for non-blk-mq)
+ * Called with the clone's queue lock held (in the case of .request_fn)
  */
 static void end_clone_request(struct request *clone, int error)
 {
@@ -1520,21 +1567,26 @@ static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len)
 /*
  * Creates a bio that consists of range of complete bvecs.
  */
-static void clone_bio(struct dm_target_io *tio, struct bio *bio,
-		      sector_t sector, unsigned len)
+static int clone_bio(struct dm_target_io *tio, struct bio *bio,
+		     sector_t sector, unsigned len)
 {
 	struct bio *clone = &tio->clone;
 
 	__bio_clone_fast(clone, bio);
 
-	if (bio_integrity(bio))
-		bio_integrity_clone(clone, bio, GFP_NOIO);
+	if (bio_integrity(bio)) {
+		int r = bio_integrity_clone(clone, bio, GFP_NOIO);
+		if (r < 0)
+			return r;
+	}
 
 	bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
 	clone->bi_iter.bi_size = to_bytes(len);
 
 	if (bio_integrity(bio))
 		bio_integrity_trim(clone, 0, len);
+
+	return 0;
 }
 
 static struct dm_target_io *alloc_tio(struct clone_info *ci,
@@ -1591,13 +1643,14 @@ static int __send_empty_flush(struct clone_info *ci)
 	return 0;
 }
 
-static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
+static int __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti,
 				     sector_t sector, unsigned *len)
 {
 	struct bio *bio = ci->bio;
 	struct dm_target_io *tio;
 	unsigned target_bio_nr;
 	unsigned num_target_bios = 1;
+	int r = 0;
 
 	/*
 	 * Does the target want to receive duplicate copies of the bio?
@@ -1608,9 +1661,13 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti
 	for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) {
 		tio = alloc_tio(ci, ti, target_bio_nr);
 		tio->len_ptr = len;
-		clone_bio(tio, bio, sector, *len);
+		r = clone_bio(tio, bio, sector, *len);
+		if (r < 0)
+			break;
 		__map_bio(tio);
 	}
+
+	return r;
 }
 
 typedef unsigned (*get_num_bios_fn)(struct dm_target *ti);
@@ -1687,6 +1744,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
 	struct bio *bio = ci->bio;
 	struct dm_target *ti;
 	unsigned len;
+	int r;
 
 	if (unlikely(bio->bi_rw & REQ_DISCARD))
 		return __send_discard(ci);
@@ -1699,7 +1757,9 @@ static int __split_and_process_non_flush(struct clone_info *ci)
 
 	len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
 
-	__clone_and_map_data_bio(ci, ti, ci->sector, &len);
+	r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
+	if (r < 0)
+		return r;
 
 	ci->sector += len;
 	ci->sector_count -= len;
@@ -1837,28 +1897,22 @@ static int setup_clone(struct request *clone, struct request *rq,
 	return 0;
 }
 
-static struct request *clone_rq(struct request *rq, struct mapped_device *md,
-				struct dm_rq_target_io *tio, gfp_t gfp_mask)
+static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
+				    struct dm_rq_target_io *tio, gfp_t gfp_mask)
 {
 	/*
-	 * Do not allocate a clone if tio->clone was already set
-	 * (see: dm_mq_queue_rq).
+	 * Create clone for use with .request_fn request_queue
 	 */
-	bool alloc_clone = !tio->clone;
 	struct request *clone;
 
-	if (alloc_clone) {
-		clone = alloc_clone_request(md, gfp_mask);
-		if (!clone)
-			return NULL;
-	} else
-		clone = tio->clone;
+	clone = alloc_old_clone_request(md, gfp_mask);
+	if (!clone)
+		return NULL;
 
 	blk_rq_init(NULL, clone);
 	if (setup_clone(clone, rq, tio, gfp_mask)) {
 		/* -ENOMEM */
-		if (alloc_clone)
-			free_clone_request(md, clone);
+		free_old_clone_request(md, clone);
 		return NULL;
 	}
 
@@ -1875,29 +1929,40 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
 	tio->clone = NULL;
 	tio->orig = rq;
 	tio->error = 0;
-	memset(&tio->info, 0, sizeof(tio->info));
+	/*
+	 * Avoid initializing info for blk-mq; it passes
+	 * target-specific data through info.ptr
+	 * (see: dm_mq_init_request)
+	 */
+	if (!md->init_tio_pdu)
+		memset(&tio->info, 0, sizeof(tio->info));
 	if (md->kworker_task)
 		init_kthread_work(&tio->work, map_tio_request);
 }
 
-static struct dm_rq_target_io *prep_tio(struct request *rq,
-					struct mapped_device *md, gfp_t gfp_mask)
+static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
+					       struct mapped_device *md,
+					       gfp_t gfp_mask)
 {
 	struct dm_rq_target_io *tio;
 	int srcu_idx;
 	struct dm_table *table;
 
-	tio = alloc_rq_tio(md, gfp_mask);
+	tio = alloc_old_rq_tio(md, gfp_mask);
 	if (!tio)
 		return NULL;
 
 	init_tio(tio, rq, md);
 
 	table = dm_get_live_table(md, &srcu_idx);
+	/*
+	 * Must clone a request if this .request_fn DM device
+	 * is stacked on .request_fn device(s).
+	 */
 	if (!dm_table_mq_request_based(table)) {
-		if (!clone_rq(rq, md, tio, gfp_mask)) {
+		if (!clone_old_rq(rq, md, tio, gfp_mask)) {
 			dm_put_live_table(md, srcu_idx);
-			free_rq_tio(tio);
+			free_old_rq_tio(tio);
 			return NULL;
 		}
 	}
@@ -1909,7 +1974,7 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
 /*
  * Called with the queue lock held.
  */
-static int dm_prep_fn(struct request_queue *q, struct request *rq)
+static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
 {
 	struct mapped_device *md = q->queuedata;
 	struct dm_rq_target_io *tio;
@@ -1919,7 +1984,7 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
 		return BLKPREP_KILL;
 	}
 
-	tio = prep_tio(rq, md, GFP_ATOMIC);
+	tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
 	if (!tio)
 		return BLKPREP_DEFER;
 
@@ -2077,12 +2142,18 @@ static bool dm_request_peeked_before_merge_deadline(struct mapped_device *md)
 static void dm_request_fn(struct request_queue *q)
 {
 	struct mapped_device *md = q->queuedata;
-	int srcu_idx;
-	struct dm_table *map = dm_get_live_table(md, &srcu_idx);
-	struct dm_target *ti;
+	struct dm_target *ti = md->immutable_target;
 	struct request *rq;
 	struct dm_rq_target_io *tio;
-	sector_t pos;
+	sector_t pos = 0;
+
+	if (unlikely(!ti)) {
+		int srcu_idx;
+		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
+
+		ti = dm_table_find_target(map, pos);
+		dm_put_live_table(md, srcu_idx);
+	}
 
 	/*
 	 * For suspend, check blk_queue_stopped() and increment
@@ -2093,33 +2164,21 @@ static void dm_request_fn(struct request_queue *q)
 	while (!blk_queue_stopped(q)) {
 		rq = blk_peek_request(q);
 		if (!rq)
-			goto out;
+			return;
 
 		/* always use block 0 to find the target for flushes for now */
 		pos = 0;
 		if (!(rq->cmd_flags & REQ_FLUSH))
 			pos = blk_rq_pos(rq);
 
-		ti = dm_table_find_target(map, pos);
-		if (!dm_target_is_valid(ti)) {
-			/*
-			 * Must perform setup, that rq_completed() requires,
-			 * before calling dm_kill_unmapped_request
-			 */
-			DMERR_LIMIT("request attempted access beyond the end of device");
-			dm_start_request(md, rq);
-			dm_kill_unmapped_request(rq, -EIO);
-			continue;
+		if ((dm_request_peeked_before_merge_deadline(md) &&
+		     md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
+		     md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
+		    (ti->type->busy && ti->type->busy(ti))) {
+			blk_delay_queue(q, HZ / 100);
+			return;
 		}
 
-		if (dm_request_peeked_before_merge_deadline(md) &&
-		    md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
-		    md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq))
-			goto delay_and_out;
-
-		if (ti->type->busy && ti->type->busy(ti))
-			goto delay_and_out;
-
 		dm_start_request(md, rq);
 
 		tio = tio_from_request(rq);
@@ -2128,13 +2187,6 @@ static void dm_request_fn(struct request_queue *q)
 		queue_kthread_work(&md->kworker, &tio->work);
 		BUG_ON(!irqs_disabled());
 	}
-
-	goto out;
-
-delay_and_out:
-	blk_delay_queue(q, HZ / 100);
-out:
-	dm_put_live_table(md, srcu_idx);
 }
 
 static int dm_any_congested(void *congested_data, int bdi_bits)
@@ -2144,19 +2196,18 @@ static int dm_any_congested(void *congested_data, int bdi_bits)
 	struct dm_table *map;
 
 	if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) {
-		map = dm_get_live_table_fast(md);
-		if (map) {
+		if (dm_request_based(md)) {
 			/*
-			 * Request-based dm cares about only own queue for
-			 * the query about congestion status of request_queue
+			 * With request-based DM we only need to check the
+			 * top-level queue for congestion.
 			 */
-			if (dm_request_based(md))
-				r = md->queue->backing_dev_info.wb.state &
-				    bdi_bits;
-			else
+			r = md->queue->backing_dev_info.wb.state & bdi_bits;
+		} else {
+			map = dm_get_live_table_fast(md);
+			if (map)
 				r = dm_table_any_congested(map, bdi_bits);
+			dm_put_live_table_fast(md);
 		}
-		dm_put_live_table_fast(md);
 	}
 
 	return r;
@@ -2236,7 +2287,7 @@ static void dm_init_md_queue(struct mapped_device *md)
 	md->queue->backing_dev_info.congested_data = md;
 }
 
-static void dm_init_old_md_queue(struct mapped_device *md)
+static void dm_init_normal_md_queue(struct mapped_device *md)
 {
 	md->use_blk_mq = false;
 	dm_init_md_queue(md);
@@ -2283,10 +2334,11 @@ static void cleanup_mapped_device(struct mapped_device *md)
  */
 static struct mapped_device *alloc_dev(int minor)
 {
-	int r;
-	struct mapped_device *md = kzalloc(sizeof(*md), GFP_KERNEL);
+	int r, numa_node_id = dm_get_numa_node();
+	struct mapped_device *md;
 	void *old_md;
 
+	md = kzalloc_node(sizeof(*md), GFP_KERNEL, numa_node_id);
 	if (!md) {
 		DMWARN("unable to allocate device, out of memory.");
 		return NULL;
@@ -2307,7 +2359,9 @@ static struct mapped_device *alloc_dev(int minor)
 	if (r < 0)
 		goto bad_io_barrier;
 
+	md->numa_node_id = numa_node_id;
 	md->use_blk_mq = use_blk_mq;
+	md->init_tio_pdu = false;
 	md->type = DM_TYPE_NONE;
 	mutex_init(&md->suspend_lock);
 	mutex_init(&md->type_lock);
@@ -2321,13 +2375,13 @@ static struct mapped_device *alloc_dev(int minor)
 	INIT_LIST_HEAD(&md->table_devices);
 	spin_lock_init(&md->uevent_lock);
 
-	md->queue = blk_alloc_queue(GFP_KERNEL);
+	md->queue = blk_alloc_queue_node(GFP_KERNEL, numa_node_id);
 	if (!md->queue)
 		goto bad;
 
 	dm_init_md_queue(md);
 
-	md->disk = alloc_disk(1);
+	md->disk = alloc_disk_node(1, numa_node_id);
 	if (!md->disk)
 		goto bad;
 
@@ -2391,8 +2445,10 @@ static void free_dev(struct mapped_device *md)
 	unlock_fs(md);
 
 	cleanup_mapped_device(md);
-	if (md->use_blk_mq)
-		blk_mq_free_tag_set(&md->tag_set);
+	if (md->tag_set) {
+		blk_mq_free_tag_set(md->tag_set);
+		kfree(md->tag_set);
+	}
 
 	free_table_devices(&md->table_devices);
 	dm_stats_cleanup(&md->stats);
@@ -2500,13 +2556,20 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 	 * This must be done before setting the queue restrictions,
 	 * because request-based dm may be run just after the setting.
 	 */
-	if (dm_table_request_based(t))
-		stop_queue(q);
+	if (dm_table_request_based(t)) {
+		dm_stop_queue(q);
+		/*
+		 * Leverage the fact that request-based DM targets are
+		 * immutable singletons and establish md->immutable_target
+		 * - used to optimize both dm_request_fn and dm_mq_queue_rq
+		 */
+		md->immutable_target = dm_table_get_immutable_target(t);
+	}
 
 	__bind_mempools(md, t);
 
 	old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
-	rcu_assign_pointer(md->map, t);
+	rcu_assign_pointer(md->map, (void *)t);
 	md->immutable_target_type = dm_table_get_immutable_target_type(t);
 
 	dm_table_set_restrictions(t, q, limits);
@@ -2572,7 +2635,6 @@ void dm_set_md_type(struct mapped_device *md, unsigned type)
 
 unsigned dm_get_md_type(struct mapped_device *md)
 {
-	BUG_ON(!mutex_is_locked(&md->type_lock));
 	return md->type;
 }
 
@@ -2592,7 +2654,7 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
 }
 EXPORT_SYMBOL_GPL(dm_get_queue_limits);
 
-static void init_rq_based_worker_thread(struct mapped_device *md)
+static void dm_old_init_rq_based_worker_thread(struct mapped_device *md)
 {
 	/* Initialize the request-based DM worker thread */
 	init_kthread_worker(&md->kworker);
@@ -2601,26 +2663,22 @@ static void init_rq_based_worker_thread(struct mapped_device *md)
 }
 
 /*
- * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
+ * Fully initialize a .request_fn request-based queue.
  */
-static int dm_init_request_based_queue(struct mapped_device *md)
+static int dm_old_init_request_queue(struct mapped_device *md)
 {
-	struct request_queue *q = NULL;
-
 	/* Fully initialize the queue */
-	q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
-	if (!q)
+	if (!blk_init_allocated_queue(md->queue, dm_request_fn, NULL))
 		return -EINVAL;
 
 	/* disable dm_request_fn's merge heuristic by default */
 	md->seq_rq_merge_deadline_usecs = 0;
 
-	md->queue = q;
-	dm_init_old_md_queue(md);
+	dm_init_normal_md_queue(md);
 	blk_queue_softirq_done(md->queue, dm_softirq_done);
-	blk_queue_prep_rq(md->queue, dm_prep_fn);
+	blk_queue_prep_rq(md->queue, dm_old_prep_fn);
 
-	init_rq_based_worker_thread(md);
+	dm_old_init_rq_based_worker_thread(md);
 
 	elv_register_queue(md->queue);
 
@@ -2640,6 +2698,11 @@ static int dm_mq_init_request(void *data, struct request *rq,
 	 */
 	tio->md = md;
 
+	if (md->init_tio_pdu) {
+		/* target-specific per-io data is immediately after the tio */
+		tio->info.ptr = tio + 1;
+	}
+
 	return 0;
 }
 
@@ -2649,28 +2712,15 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct request *rq = bd->rq;
 	struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
 	struct mapped_device *md = tio->md;
-	int srcu_idx;
-	struct dm_table *map = dm_get_live_table(md, &srcu_idx);
-	struct dm_target *ti;
-	sector_t pos;
+	struct dm_target *ti = md->immutable_target;
 
-	/* always use block 0 to find the target for flushes for now */
-	pos = 0;
-	if (!(rq->cmd_flags & REQ_FLUSH))
-		pos = blk_rq_pos(rq);
+	if (unlikely(!ti)) {
+		int srcu_idx;
+		struct dm_table *map = dm_get_live_table(md, &srcu_idx);
 
-	ti = dm_table_find_target(map, pos);
-	if (!dm_target_is_valid(ti)) {
+		ti = dm_table_find_target(map, 0);
 		dm_put_live_table(md, srcu_idx);
-		DMERR_LIMIT("request attempted access beyond the end of device");
-		/*
-		 * Must perform setup, that rq_completed() requires,
-		 * before returning BLK_MQ_RQ_QUEUE_ERROR
-		 */
-		dm_start_request(md, rq);
-		return BLK_MQ_RQ_QUEUE_ERROR;
 	}
-	dm_put_live_table(md, srcu_idx);
 
 	if (ti->type->busy && ti->type->busy(ti))
 		return BLK_MQ_RQ_QUEUE_BUSY;
@@ -2686,20 +2736,12 @@ static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
 	 */
 	tio->ti = ti;
 
-	/* Clone the request if underlying devices aren't blk-mq */
-	if (dm_table_get_type(map) == DM_TYPE_REQUEST_BASED) {
-		/* clone request is allocated at the end of the pdu */
-		tio->clone = (void *)blk_mq_rq_to_pdu(rq) + sizeof(struct dm_rq_target_io);
-		(void) clone_rq(rq, md, tio, GFP_ATOMIC);
-		queue_kthread_work(&md->kworker, &tio->work);
-	} else {
-		/* Direct call is fine since .queue_rq allows allocations */
-		if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
-			/* Undo dm_start_request() before requeuing */
-			rq_end_stats(md, rq);
-			rq_completed(md, rq_data_dir(rq), false);
-			return BLK_MQ_RQ_QUEUE_BUSY;
-		}
+	/* Direct call is fine since .queue_rq allows allocations */
+	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+		/* Undo dm_start_request() before requeuing */
+		rq_end_stats(md, rq);
+		rq_completed(md, rq_data_dir(rq), false);
+		return BLK_MQ_RQ_QUEUE_BUSY;
 	}
 
 	return BLK_MQ_RQ_QUEUE_OK;
@@ -2712,47 +2754,56 @@ static struct blk_mq_ops dm_mq_ops = {
 	.init_request = dm_mq_init_request,
 };
 
-static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
+static int dm_mq_init_request_queue(struct mapped_device *md,
+				    struct dm_target *immutable_tgt)
 {
-	unsigned md_type = dm_get_md_type(md);
 	struct request_queue *q;
 	int err;
 
-	memset(&md->tag_set, 0, sizeof(md->tag_set));
-	md->tag_set.ops = &dm_mq_ops;
-	md->tag_set.queue_depth = BLKDEV_MAX_RQ;
-	md->tag_set.numa_node = NUMA_NO_NODE;
-	md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
-	md->tag_set.nr_hw_queues = 1;
-	if (md_type == DM_TYPE_REQUEST_BASED) {
-		/* make the memory for non-blk-mq clone part of the pdu */
-		md->tag_set.cmd_size = sizeof(struct dm_rq_target_io) + sizeof(struct request);
-	} else
-		md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
-	md->tag_set.driver_data = md;
-
-	err = blk_mq_alloc_tag_set(&md->tag_set);
+	if (dm_get_md_type(md) == DM_TYPE_REQUEST_BASED) {
+		DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
+		return -EINVAL;
+	}
+
+	md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
+	if (!md->tag_set)
+		return -ENOMEM;
+
+	md->tag_set->ops = &dm_mq_ops;
+	md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
+	md->tag_set->numa_node = md->numa_node_id;
+	md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
+	md->tag_set->driver_data = md;
+
+	md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
+	if (immutable_tgt && immutable_tgt->per_io_data_size) {
+		/* any target-specific per-io data is immediately after the tio */
+		md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
+		md->init_tio_pdu = true;
+	}
+
+	err = blk_mq_alloc_tag_set(md->tag_set);
 	if (err)
-		return err;
+		goto out_kfree_tag_set;
 
-	q = blk_mq_init_allocated_queue(&md->tag_set, md->queue);
+	q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
 	if (IS_ERR(q)) {
 		err = PTR_ERR(q);
 		goto out_tag_set;
 	}
-	md->queue = q;
 	dm_init_md_queue(md);
 
 	/* backfill 'mq' sysfs registration normally done in blk_register_queue */
 	blk_mq_register_disk(md->disk);
 
-	if (md_type == DM_TYPE_REQUEST_BASED)
-		init_rq_based_worker_thread(md);
-
 	return 0;
 
 out_tag_set:
-	blk_mq_free_tag_set(&md->tag_set);
+	blk_mq_free_tag_set(md->tag_set);
+out_kfree_tag_set:
+	kfree(md->tag_set);
+
 	return err;
 }
 
@@ -2767,28 +2818,28 @@ static unsigned filter_md_type(unsigned type, struct mapped_device *md)
 /*
  * Setup the DM device's queue based on md's type
  */
-int dm_setup_md_queue(struct mapped_device *md)
+int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t)
 {
 	int r;
 	unsigned md_type = filter_md_type(dm_get_md_type(md), md);
 
 	switch (md_type) {
 	case DM_TYPE_REQUEST_BASED:
-		r = dm_init_request_based_queue(md);
+		r = dm_old_init_request_queue(md);
 		if (r) {
-			DMWARN("Cannot initialize queue for request-based mapped device");
+			DMERR("Cannot initialize queue for request-based mapped device");
 			return r;
 		}
 		break;
 	case DM_TYPE_MQ_REQUEST_BASED:
-		r = dm_init_request_based_blk_mq_queue(md);
+		r = dm_mq_init_request_queue(md, dm_table_get_immutable_target(t));
 		if (r) {
-			DMWARN("Cannot initialize queue for request-based blk-mq mapped device");
+			DMERR("Cannot initialize queue for request-based dm-mq mapped device");
 			return r;
 		}
 		break;
 	case DM_TYPE_BIO_BASED:
-		dm_init_old_md_queue(md);
+		dm_init_normal_md_queue(md);
 		blk_queue_make_request(md->queue, dm_make_request);
 		/*
 		 * DM handles splitting bios as needed.  Free the bio_split bioset
@@ -3131,7 +3182,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
 	 * dm defers requests to md->wq from md->queue.
 	 */
 	if (dm_request_based(md)) {
-		stop_queue(md->queue);
+		dm_stop_queue(md->queue);
 		if (md->kworker_task)
 			flush_kthread_worker(&md->kworker);
 	}
@@ -3155,7 +3206,7 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map,
 		dm_queue_flush(md);
 
 		if (dm_request_based(md))
-			start_queue(md->queue);
+			dm_start_queue(md->queue);
 
 		unlock_fs(md);
 		dm_table_presuspend_undo_targets(map);
@@ -3234,7 +3285,7 @@ static int __dm_resume(struct mapped_device *md, struct dm_table *map)
 	 * Request-based dm is queueing the deferred I/Os in its request_queue.
 	 */
 	if (dm_request_based(md))
-		start_queue(md->queue);
+		dm_start_queue(md->queue);
 
 	unlock_fs(md);
 
@@ -3480,9 +3531,9 @@ int dm_noflush_suspending(struct dm_target *ti)
 EXPORT_SYMBOL_GPL(dm_noflush_suspending);
 
 struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned type,
-					    unsigned integrity, unsigned per_bio_data_size)
+					    unsigned integrity, unsigned per_io_data_size)
 {
-	struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL);
+	struct dm_md_mempools *pools = kzalloc_node(sizeof(*pools), GFP_KERNEL, md->numa_node_id);
 	struct kmem_cache *cachep = NULL;
 	unsigned int pool_size = 0;
 	unsigned int front_pad;
@@ -3496,7 +3547,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 	case DM_TYPE_BIO_BASED:
 		cachep = _io_cache;
 		pool_size = dm_get_reserved_bio_based_ios();
-		front_pad = roundup(per_bio_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
+		front_pad = roundup(per_io_data_size, __alignof__(struct dm_target_io)) + offsetof(struct dm_target_io, clone);
 		break;
 	case DM_TYPE_REQUEST_BASED:
 		cachep = _rq_tio_cache;
@@ -3509,8 +3560,7 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, unsigned t
 		if (!pool_size)
 			pool_size = dm_get_reserved_rq_based_ios();
 		front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
-		/* per_bio_data_size is not used. See __bind_mempools(). */
-		WARN_ON(per_bio_data_size != 0);
+		/* per_io_data_size is used for blk-mq pdu at queue allocation */
 		break;
 	default:
 		BUG();
@@ -3552,15 +3602,14 @@ void dm_free_md_mempools(struct dm_md_mempools *pools)
 }
 
 static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
-		u32 flags)
+			  u32 flags)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
 	const struct pr_ops *ops;
-	struct dm_target *tgt;
 	fmode_t mode;
-	int srcu_idx, r;
+	int r;
 
-	r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
+	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3570,20 +3619,19 @@ static int dm_pr_register(struct block_device *bdev, u64 old_key, u64 new_key,
 	else
 		r = -EOPNOTSUPP;
 
-	dm_put_live_table(md, srcu_idx);
+	bdput(bdev);
 	return r;
 }
 
 static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
-		u32 flags)
+			 u32 flags)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
 	const struct pr_ops *ops;
-	struct dm_target *tgt;
 	fmode_t mode;
-	int srcu_idx, r;
+	int r;
 
-	r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
+	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3593,7 +3641,7 @@ static int dm_pr_reserve(struct block_device *bdev, u64 key, enum pr_type type,
 	else
 		r = -EOPNOTSUPP;
 
-	dm_put_live_table(md, srcu_idx);
+	bdput(bdev);
 	return r;
 }
 
@@ -3601,11 +3649,10 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
 	const struct pr_ops *ops;
-	struct dm_target *tgt;
 	fmode_t mode;
-	int srcu_idx, r;
+	int r;
 
-	r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
+	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3615,20 +3662,19 @@ static int dm_pr_release(struct block_device *bdev, u64 key, enum pr_type type)
 	else
 		r = -EOPNOTSUPP;
 
-	dm_put_live_table(md, srcu_idx);
+	bdput(bdev);
 	return r;
 }
 
 static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
-		enum pr_type type, bool abort)
+			 enum pr_type type, bool abort)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
 	const struct pr_ops *ops;
-	struct dm_target *tgt;
 	fmode_t mode;
-	int srcu_idx, r;
+	int r;
 
-	r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
+	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3638,7 +3684,7 @@ static int dm_pr_preempt(struct block_device *bdev, u64 old_key, u64 new_key,
 	else
 		r = -EOPNOTSUPP;
 
-	dm_put_live_table(md, srcu_idx);
+	bdput(bdev);
 	return r;
 }
 
@@ -3646,11 +3692,10 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
 {
 	struct mapped_device *md = bdev->bd_disk->private_data;
 	const struct pr_ops *ops;
-	struct dm_target *tgt;
 	fmode_t mode;
-	int srcu_idx, r;
+	int r;
 
-	r = dm_get_live_table_for_ioctl(md, &tgt, &bdev, &mode, &srcu_idx);
+	r = dm_grab_bdev_for_ioctl(md, &bdev, &mode);
 	if (r < 0)
 		return r;
 
@@ -3660,7 +3705,7 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
 	else
 		r = -EOPNOTSUPP;
 
-	dm_put_live_table(md, srcu_idx);
+	bdput(bdev);
 	return r;
 }
 
@@ -3699,6 +3744,15 @@ MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"
 module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
 
+module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
+
+module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");
+
+module_param(dm_numa_node, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(dm_numa_node, "NUMA node for DM device memory allocations");
+
 MODULE_DESCRIPTION(DM_NAME " driver");
 MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>");
 MODULE_LICENSE("GPL");