From 3aef3cae4342c1d8137a1c0782cbb66f1be3943c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 09:14:01 -0700 Subject: block: add a req_bvec helper Return the currently active bvec segment, potentially spanning multiple pages. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- include/linux/blkdev.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c58a3b2bf00..84ce76f92d83 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -932,6 +932,17 @@ static inline unsigned int blk_rq_payload_bytes(struct request *rq) return blk_rq_bytes(rq); } +/* + * Return the first full biovec in the request. The caller needs to check that + * there are any bvecs before calling this helper. + */ +static inline struct bio_vec req_bvec(struct request *rq) +{ + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) + return rq->special_vec; + return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); +} + static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, int op) { -- cgit v1.2.3-59-g8ed1b From 2a876f5e25e8ec9fa5777d36e5695ee33dd63f6f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:38:29 -0700 Subject: block: add a rq_integrity_vec helper This provides a nice little shortcut to get the integrity data for drivers like NVMe that only support a single integrity segment. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- include/linux/blkdev.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84ce76f92d83..3a13fbe13e08 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1559,6 +1559,17 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, return bio_integrity_intervals(bi, sectors) * bi->tuple_size; } +/* + * Return the first bvec that contains integrity data. Only drivers that are + * limited to a single integrity segment should use this helper. + */ +static inline struct bio_vec *rq_integrity_vec(struct request *rq) +{ + if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1)) + return NULL; + return rq->bio->bi_integrity->bip_vec; +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ struct bio; @@ -1633,6 +1644,11 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, return 0; } +static inline struct bio_vec *rq_integrity_vec(struct request *rq) +{ + return NULL; +} + #endif /* CONFIG_BLK_DEV_INTEGRITY */ struct block_device_operations { -- cgit v1.2.3-59-g8ed1b From 9d9de535f385a8b3ba0e88ca0abf386c5704bbfc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:18:30 -0700 Subject: block: add a rq_dma_dir helper In a lot of places we want to know the DMA direction for a given struct request. Add a little helper to make it a littler easier. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3a13fbe13e08..74469a4dc0a1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -641,6 +641,9 @@ static inline bool blk_account_rq(struct request *rq) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) +#define rq_dma_dir(rq) \ + (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) + static inline bool queue_is_mq(struct request_queue *q) { return q->mq_ops; -- cgit v1.2.3-59-g8ed1b From 3ab3a0313cb8c50391d74e40fd46a3408d8e4de9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:40:36 -0700 Subject: block: add dma_map_bvec helper Provide a nice little shortcut for mapping a single bvec. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 74469a4dc0a1..4b85dc066264 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -644,6 +644,10 @@ static inline bool blk_account_rq(struct request *rq) #define rq_dma_dir(rq) \ (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) +#define dma_map_bvec(dev, bv, dir, attrs) \ + dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ + (dir), (attrs)) + static inline bool queue_is_mq(struct request_queue *q) { return q->mq_ops; -- cgit v1.2.3-59-g8ed1b From 12adb7a013e318de553ccee4a006a718667972b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 30 Apr 2019 13:56:16 -0400 Subject: block: remove the unused blk_queue_dma_pad function Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-settings.c | 16 ---------------- include/linux/blkdev.h | 1 - 2 files changed, 17 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-settings.c b/block/blk-settings.c index ec150f88db09..3facc41476be 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -663,22 +663,6 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, } EXPORT_SYMBOL(disk_stack_limits); -/** - * blk_queue_dma_pad - set pad mask - * @q: the request queue for the device - * @mask: pad mask - * - * Set dma pad mask. - * - * Appending pad buffer to a request modifies the last entry of a - * scatter list such that it includes the pad buffer. - **/ -void blk_queue_dma_pad(struct request_queue *q, unsigned int mask) -{ - q->dma_pad_mask = mask; -} -EXPORT_SYMBOL(blk_queue_dma_pad); - /** * blk_queue_update_dma_pad - update pad mask * @q: the request queue for the device diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99aa98f60b9e..bd3e3f09bfa0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1069,7 +1069,6 @@ extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); -extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, -- cgit v1.2.3-59-g8ed1b From 2f8f1336a48bd5186de3476da0a3e2ec06d0533a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 30 Apr 2019 09:52:27 +0800 Subject: blk-mq: always free hctx after request queue is freed In normal queue cleanup path, hctx is released after request queue is freed, see blk_mq_release(). However, in __blk_mq_update_nr_hw_queues(), hctx may be freed because of hw queues shrinking. This way is easy to cause use-after-free, because: one implicit rule is that it is safe to call almost all block layer APIs if the request queue is alive; and one hctx may be retrieved by one API, then the hctx can be freed by blk_mq_update_nr_hw_queues(); finally use-after-free is triggered. Fixes this issue by always freeing hctx after releasing request queue. If some hctxs are removed in blk_mq_update_nr_hw_queues(), introduce a per-queue list to hold them, then try to resuse these hctxs if numa node is matched. Cc: Dongli Zhang Cc: James Smart Cc: Bart Van Assche Cc: linux-scsi@vger.kernel.org, Cc: Martin K . Petersen , Cc: Christoph Hellwig , Cc: James E . J . Bottomley , Reviewed-by: Hannes Reinecke Tested-by: James Smart Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq.c | 46 +++++++++++++++++++++++++++++++++------------- include/linux/blk-mq.h | 2 ++ include/linux/blkdev.h | 7 +++++++ 3 files changed, 42 insertions(+), 13 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/block/blk-mq.c b/block/blk-mq.c index 17e63d80b6d6..08a6248d8536 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2269,6 +2269,10 @@ static void blk_mq_exit_hctx(struct request_queue *q, set->ops->exit_hctx(hctx, hctx_idx); blk_mq_remove_cpuhp(hctx); + + spin_lock(&q->unused_hctx_lock); + list_add(&hctx->hctx_list, &q->unused_hctx_list); + spin_unlock(&q->unused_hctx_lock); } static void blk_mq_exit_hw_queues(struct request_queue *q, @@ -2351,6 +2355,8 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set, hctx->queue = q; hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; + INIT_LIST_HEAD(&hctx->hctx_list); + /* * Allocate space for all possible cpus to avoid allocation at * runtime @@ -2664,15 +2670,17 @@ static int blk_mq_alloc_ctxs(struct request_queue *q) */ void blk_mq_release(struct request_queue *q) { - struct blk_mq_hw_ctx *hctx; - unsigned int i; + struct blk_mq_hw_ctx *hctx, *next; + int i; cancel_delayed_work_sync(&q->requeue_work); - /* hctx kobj stays in hctx */ - queue_for_each_hw_ctx(q, hctx, i) { - if (!hctx) - continue; + queue_for_each_hw_ctx(q, hctx, i) + WARN_ON_ONCE(hctx && list_empty(&hctx->hctx_list)); + + /* all hctx are in .unused_hctx_list now */ + list_for_each_entry_safe(hctx, next, &q->unused_hctx_list, hctx_list) { + list_del_init(&hctx->hctx_list); kobject_put(&hctx->kobj); } @@ -2739,9 +2747,22 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( struct blk_mq_tag_set *set, struct request_queue *q, int hctx_idx, int node) { - struct blk_mq_hw_ctx *hctx; + struct blk_mq_hw_ctx *hctx = NULL, *tmp; - hctx = blk_mq_alloc_hctx(q, set, node); + /* reuse dead hctx first */ + spin_lock(&q->unused_hctx_lock); + list_for_each_entry(tmp, &q->unused_hctx_list, hctx_list) { + if (tmp->numa_node == node) { + hctx = tmp; + break; + } + } + if (hctx) + list_del_init(&hctx->hctx_list); + spin_unlock(&q->unused_hctx_lock); + + if (!hctx) + hctx = blk_mq_alloc_hctx(q, set, node); if (!hctx) goto fail; @@ -2779,10 +2800,8 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, hctx = blk_mq_alloc_and_init_hctx(set, q, i, node); if (hctx) { - if (hctxs[i]) { + if (hctxs[i]) blk_mq_exit_hctx(q, set, hctxs[i], i); - kobject_put(&hctxs[i]->kobj); - } hctxs[i] = hctx; } else { if (hctxs[i]) @@ -2813,9 +2832,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, if (hctx->tags) blk_mq_free_map_and_requests(set, j); blk_mq_exit_hctx(q, set, hctx, j); - kobject_put(&hctx->kobj); hctxs[j] = NULL; - } } mutex_unlock(&q->sysfs_lock); @@ -2858,6 +2875,9 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->queue_hw_ctx) goto err_sys_init; + INIT_LIST_HEAD(&q->unused_hctx_list); + spin_lock_init(&q->unused_hctx_lock); + blk_mq_realloc_hw_ctxs(set, q); if (!q->nr_hw_queues) goto err_hctxs; diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index db29928de467..15d1aa53d96c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -70,6 +70,8 @@ struct blk_mq_hw_ctx { struct dentry *sched_debugfs_dir; #endif + struct list_head hctx_list; + /* Must be the last member - see also blk_mq_hw_ctx_size(). */ struct srcu_struct srcu[0]; }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bd3e3f09bfa0..1aafeb923e7b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -535,6 +535,13 @@ struct request_queue { struct mutex sysfs_lock; + /* + * for reusing dead hctx instance in case of updating + * nr_hw_queues + */ + struct list_head unused_hctx_list; + spinlock_t unused_hctx_lock; + atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) -- cgit v1.2.3-59-g8ed1b