aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bio-integrity.c13
-rw-r--r--block/bio.c82
-rw-r--r--block/blk-core.c13
-rw-r--r--block/blk-merge.c19
-rw-r--r--block/blk-mq-cpumap.c4
-rw-r--r--block/blk-mq-sysfs.c9
-rw-r--r--block/blk-mq-tag.c84
-rw-r--r--block/blk-mq.c169
-rw-r--r--block/blk-mq.h5
-rw-r--r--block/blk-settings.c4
-rw-r--r--block/blk-sysfs.c12
-rw-r--r--block/elevator.c2
-rw-r--r--block/genhd.c11
-rw-r--r--block/ioprio.c14
-rw-r--r--block/scsi_ioctl.c10
15 files changed, 297 insertions, 154 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 0984232e429f..5cbd5d9ea61d 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -216,9 +216,10 @@ static int bio_integrity_process(struct bio *bio,
{
struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
struct blk_integrity_iter iter;
- struct bio_vec *bv;
+ struct bvec_iter bviter;
+ struct bio_vec bv;
struct bio_integrity_payload *bip = bio_integrity(bio);
- unsigned int i, ret = 0;
+ unsigned int ret = 0;
void *prot_buf = page_address(bip->bip_vec->bv_page) +
bip->bip_vec->bv_offset;
@@ -227,11 +228,11 @@ static int bio_integrity_process(struct bio *bio,
iter.seed = bip_get_seed(bip);
iter.prot_buf = prot_buf;
- bio_for_each_segment_all(bv, bio, i) {
- void *kaddr = kmap_atomic(bv->bv_page);
+ bio_for_each_segment(bv, bio, bviter) {
+ void *kaddr = kmap_atomic(bv.bv_page);
- iter.data_buf = kaddr + bv->bv_offset;
- iter.data_size = bv->bv_len;
+ iter.data_buf = kaddr + bv.bv_offset;
+ iter.data_size = bv.bv_len;
ret = proc_fn(&iter);
if (ret) {
diff --git a/block/bio.c b/block/bio.c
index 3e6e1986a5b2..471d7382c7d1 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -748,6 +748,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
}
}
+ bio->bi_iter.bi_size += len;
goto done;
}
@@ -764,29 +765,32 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
return 0;
/*
- * we might lose a segment or two here, but rather that than
- * make this too complex.
+ * setup the new entry, we might clear it again later if we
+ * cannot add the page
+ */
+ bvec = &bio->bi_io_vec[bio->bi_vcnt];
+ bvec->bv_page = page;
+ bvec->bv_len = len;
+ bvec->bv_offset = offset;
+ bio->bi_vcnt++;
+ bio->bi_phys_segments++;
+ bio->bi_iter.bi_size += len;
+
+ /*
+ * Perform a recount if the number of segments is greater
+ * than queue_max_segments(q).
*/
- while (bio->bi_phys_segments >= queue_max_segments(q)) {
+ while (bio->bi_phys_segments > queue_max_segments(q)) {
if (retried_segments)
- return 0;
+ goto failed;
retried_segments = 1;
blk_recount_segments(q, bio);
}
/*
- * setup the new entry, we might clear it again later if we
- * cannot add the page
- */
- bvec = &bio->bi_io_vec[bio->bi_vcnt];
- bvec->bv_page = page;
- bvec->bv_len = len;
- bvec->bv_offset = offset;
-
- /*
* if queue has other restrictions (eg varying max sector size
* depending on offset), it can specify a merge_bvec_fn in the
* queue to get further control
@@ -795,7 +799,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
struct bvec_merge_data bvm = {
.bi_bdev = bio->bi_bdev,
.bi_sector = bio->bi_iter.bi_sector,
- .bi_size = bio->bi_iter.bi_size,
+ .bi_size = bio->bi_iter.bi_size - len,
.bi_rw = bio->bi_rw,
};
@@ -803,23 +807,25 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
* merge_bvec_fn() returns number of bytes it can accept
* at this offset
*/
- if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len) {
- bvec->bv_page = NULL;
- bvec->bv_len = 0;
- bvec->bv_offset = 0;
- return 0;
- }
+ if (q->merge_bvec_fn(q, &bvm, bvec) < bvec->bv_len)
+ goto failed;
}
/* If we may be able to merge these biovecs, force a recount */
- if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
+ if (bio->bi_vcnt > 1 && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec)))
bio->bi_flags &= ~(1 << BIO_SEG_VALID);
- bio->bi_vcnt++;
- bio->bi_phys_segments++;
done:
- bio->bi_iter.bi_size += len;
return len;
+
+ failed:
+ bvec->bv_page = NULL;
+ bvec->bv_len = 0;
+ bvec->bv_offset = 0;
+ bio->bi_vcnt--;
+ bio->bi_iter.bi_size -= len;
+ blk_recount_segments(q, bio);
+ return 0;
}
/**
@@ -1739,6 +1745,34 @@ void bio_check_pages_dirty(struct bio *bio)
}
}
+void generic_start_io_acct(int rw, unsigned long sectors,
+ struct hd_struct *part)
+{
+ int cpu = part_stat_lock();
+
+ part_round_stats(cpu, part);
+ part_stat_inc(cpu, part, ios[rw]);
+ part_stat_add(cpu, part, sectors[rw], sectors);
+ part_inc_in_flight(part, rw);
+
+ part_stat_unlock();
+}
+EXPORT_SYMBOL(generic_start_io_acct);
+
+void generic_end_io_acct(int rw, struct hd_struct *part,
+ unsigned long start_time)
+{
+ unsigned long duration = jiffies - start_time;
+ int cpu = part_stat_lock();
+
+ part_stat_add(cpu, part, ticks[rw], duration);
+ part_round_stats(cpu, part);
+ part_dec_in_flight(part, rw);
+
+ part_stat_unlock();
+}
+EXPORT_SYMBOL(generic_end_io_acct);
+
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
void bio_flush_dcache_pages(struct bio *bi)
{
diff --git a/block/blk-core.c b/block/blk-core.c
index 0421b53e6431..30f6153a40c2 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -525,6 +525,9 @@ void blk_cleanup_queue(struct request_queue *q)
del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
blk_sync_queue(q);
+ if (q->mq_ops)
+ blk_mq_free_queue(q);
+
spin_lock_irq(lock);
if (q->queue_lock != &q->__queue_lock)
q->queue_lock = &q->__queue_lock;
@@ -1266,7 +1269,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
blk_clear_rq_complete(rq);
trace_block_rq_requeue(q, rq);
- if (blk_rq_tagged(rq))
+ if (rq->cmd_flags & REQ_QUEUED)
blk_queue_end_tag(q, rq);
BUG_ON(blk_queued_rq(rq));
@@ -1325,7 +1328,7 @@ void part_round_stats(int cpu, struct hd_struct *part)
}
EXPORT_SYMBOL_GPL(part_round_stats);
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
static void blk_pm_put_request(struct request *rq)
{
if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending)
@@ -2134,7 +2137,7 @@ void blk_account_io_done(struct request *req)
}
}
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
/*
* Don't process normal requests when queue is suspended
* or in the process of suspending/resuming
@@ -2554,7 +2557,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
*/
void blk_finish_request(struct request *req, int error)
{
- if (blk_rq_tagged(req))
+ if (req->cmd_flags & REQ_QUEUED)
blk_queue_end_tag(req->q, req);
BUG_ON(blk_queued_rq(req));
@@ -3159,7 +3162,7 @@ void blk_finish_plug(struct blk_plug *plug)
}
EXPORT_SYMBOL(blk_finish_plug);
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
/**
* blk_pm_runtime_init - Block layer runtime PM initialization routine
* @q: the queue of the device
diff --git a/block/blk-merge.c b/block/blk-merge.c
index b3ac40aef46b..89b97b5e0881 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -97,19 +97,22 @@ void blk_recalc_rq_segments(struct request *rq)
void blk_recount_segments(struct request_queue *q, struct bio *bio)
{
- bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE,
- &q->queue_flags);
- bool merge_not_need = bio->bi_vcnt < queue_max_segments(q);
+ unsigned short seg_cnt;
+
+ /* estimate segment number by bi_vcnt for non-cloned bio */
+ if (bio_flagged(bio, BIO_CLONED))
+ seg_cnt = bio_segments(bio);
+ else
+ seg_cnt = bio->bi_vcnt;
- if (no_sg_merge && !bio_flagged(bio, BIO_CLONED) &&
- merge_not_need)
- bio->bi_phys_segments = bio->bi_vcnt;
+ if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) &&
+ (seg_cnt < queue_max_segments(q)))
+ bio->bi_phys_segments = seg_cnt;
else {
struct bio *nxt = bio->bi_next;
bio->bi_next = NULL;
- bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio,
- no_sg_merge && merge_not_need);
+ bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false);
bio->bi_next = nxt;
}
diff --git a/block/blk-mq-cpumap.c b/block/blk-mq-cpumap.c
index 1065d7c65fa1..5f13f4d0bcce 100644
--- a/block/blk-mq-cpumap.c
+++ b/block/blk-mq-cpumap.c
@@ -17,7 +17,7 @@
static int cpu_to_queue_index(unsigned int nr_cpus, unsigned int nr_queues,
const int cpu)
{
- return cpu / ((nr_cpus + nr_queues - 1) / nr_queues);
+ return cpu * nr_queues / nr_cpus;
}
static int get_first_sibling(unsigned int cpu)
@@ -90,7 +90,7 @@ unsigned int *blk_mq_make_queue_map(struct blk_mq_tag_set *set)
unsigned int *map;
/* If cpus are offline, map them to first hctx */
- map = kzalloc_node(sizeof(*map) * num_possible_cpus(), GFP_KERNEL,
+ map = kzalloc_node(sizeof(*map) * nr_cpu_ids, GFP_KERNEL,
set->numa_node);
if (!map)
return NULL;
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 371d8800b48a..1630a20d5dcf 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -390,16 +390,15 @@ static void blk_mq_sysfs_init(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
struct blk_mq_ctx *ctx;
- int i, j;
+ int i;
kobject_init(&q->mq_kobj, &blk_mq_ktype);
- queue_for_each_hw_ctx(q, hctx, i) {
+ queue_for_each_hw_ctx(q, hctx, i)
kobject_init(&hctx->kobj, &blk_mq_hw_ktype);
- hctx_for_each_ctx(hctx, ctx, j)
- kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
- }
+ queue_for_each_ctx(q, ctx, i)
+ kobject_init(&ctx->kobj, &blk_mq_ctx_ktype);
}
/* see blk_register_queue() */
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 8317175a3009..32e8dbb9ad1c 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -137,6 +137,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
static int __bt_get_word(struct blk_align_bitmap *bm, unsigned int last_tag)
{
int tag, org_last_tag, end;
+ bool wrap = last_tag != 0;
org_last_tag = last_tag;
end = bm->depth;
@@ -148,15 +149,16 @@ restart:
* We started with an offset, start from 0 to
* exhaust the map.
*/
- if (org_last_tag && last_tag) {
- end = last_tag;
+ if (wrap) {
+ wrap = false;
+ end = org_last_tag;
last_tag = 0;
goto restart;
}
return -1;
}
last_tag = tag + 1;
- } while (test_and_set_bit_lock(tag, &bm->word));
+ } while (test_and_set_bit(tag, &bm->word));
return tag;
}
@@ -254,6 +256,21 @@ static int bt_get(struct blk_mq_alloc_data *data,
if (tag != -1)
break;
+ /*
+ * We're out of tags on this hardware queue, kick any
+ * pending IO submits before going to sleep waiting for
+ * some to complete.
+ */
+ blk_mq_run_hw_queue(hctx, false);
+
+ /*
+ * Retry tag allocation after running the hardware queue,
+ * as running the queue may also have found completions.
+ */
+ tag = __bt_get(hctx, bt, last_tag);
+ if (tag != -1)
+ break;
+
blk_mq_put_ctx(data->ctx);
io_schedule();
@@ -340,11 +357,10 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
struct bt_wait_state *bs;
int wait_cnt;
- /*
- * The unlock memory barrier need to order access to req in free
- * path and clearing tag bit
- */
- clear_bit_unlock(TAG_TO_BIT(bt, tag), &bt->map[index].word);
+ clear_bit(TAG_TO_BIT(bt, tag), &bt->map[index].word);
+
+ /* Ensure that the wait list checks occur after clear_bit(). */
+ smp_mb();
bs = bt_wake_ptr(bt);
if (!bs)
@@ -360,21 +376,6 @@ static void bt_clear_tag(struct blk_mq_bitmap_tags *bt, unsigned int tag)
}
}
-static void __blk_mq_put_tag(struct blk_mq_tags *tags, unsigned int tag)
-{
- BUG_ON(tag >= tags->nr_tags);
-
- bt_clear_tag(&tags->bitmap_tags, tag);
-}
-
-static void __blk_mq_put_reserved_tag(struct blk_mq_tags *tags,
- unsigned int tag)
-{
- BUG_ON(tag >= tags->nr_reserved_tags);
-
- bt_clear_tag(&tags->breserved_tags, tag);
-}
-
void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
unsigned int *last_tag)
{
@@ -383,10 +384,13 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
if (tag >= tags->nr_reserved_tags) {
const int real_tag = tag - tags->nr_reserved_tags;
- __blk_mq_put_tag(tags, real_tag);
+ BUG_ON(real_tag >= tags->nr_tags);
+ bt_clear_tag(&tags->bitmap_tags, real_tag);
*last_tag = real_tag;
- } else
- __blk_mq_put_reserved_tag(tags, tag);
+ } else {
+ BUG_ON(tag >= tags->nr_reserved_tags);
+ bt_clear_tag(&tags->breserved_tags, tag);
+ }
}
static void bt_for_each(struct blk_mq_hw_ctx *hctx,
@@ -584,6 +588,34 @@ int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
return 0;
}
+/**
+ * blk_mq_unique_tag() - return a tag that is unique queue-wide
+ * @rq: request for which to compute a unique tag
+ *
+ * The tag field in struct request is unique per hardware queue but not over
+ * all hardware queues. Hence this function that returns a tag with the
+ * hardware context index in the upper bits and the per hardware queue tag in
+ * the lower bits.
+ *
+ * Note: When called for a request that is queued on a non-multiqueue request
+ * queue, the hardware context index is set to zero.
+ */
+u32 blk_mq_unique_tag(struct request *rq)
+{
+ struct request_queue *q = rq->q;
+ struct blk_mq_hw_ctx *hctx;
+ int hwq = 0;
+
+ if (q->mq_ops) {
+ hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
+ hwq = hctx->queue_num;
+ }
+
+ return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
+ (rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
+}
+EXPORT_SYMBOL(blk_mq_unique_tag);
+
ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
{
char *orig_page = page;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 68929bad9a6a..da1ab5641227 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -107,11 +107,7 @@ static void blk_mq_usage_counter_release(struct percpu_ref *ref)
wake_up_all(&q->mq_freeze_wq);
}
-/*
- * Guarantee no request is in use, so we can change any data structure of
- * the queue afterward.
- */
-void blk_mq_freeze_queue(struct request_queue *q)
+static void blk_mq_freeze_queue_start(struct request_queue *q)
{
bool freeze;
@@ -123,9 +119,23 @@ void blk_mq_freeze_queue(struct request_queue *q)
percpu_ref_kill(&q->mq_usage_counter);
blk_mq_run_queues(q, false);
}
+}
+
+static void blk_mq_freeze_queue_wait(struct request_queue *q)
+{
wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
}
+/*
+ * Guarantee no request is in use, so we can change any data structure of
+ * the queue afterward.
+ */
+void blk_mq_freeze_queue(struct request_queue *q)
+{
+ blk_mq_freeze_queue_start(q);
+ blk_mq_freeze_queue_wait(q);
+}
+
static void blk_mq_unfreeze_queue(struct request_queue *q)
{
bool wake;
@@ -269,17 +279,25 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
blk_mq_queue_exit(q);
}
-void blk_mq_free_request(struct request *rq)
+void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
- struct blk_mq_hw_ctx *hctx;
- struct request_queue *q = rq->q;
ctx->rq_completed[rq_is_sync(rq)]++;
-
- hctx = q->mq_ops->map_queue(q, ctx->cpu);
__blk_mq_free_request(hctx, ctx, rq);
+
+}
+EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request);
+
+void blk_mq_free_request(struct request *rq)
+{
+ struct blk_mq_hw_ctx *hctx;
+ struct request_queue *q = rq->q;
+
+ hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
+ blk_mq_free_hctx_request(hctx, rq);
}
+EXPORT_SYMBOL_GPL(blk_mq_free_request);
inline void __blk_mq_end_request(struct request *rq, int error)
{
@@ -581,7 +599,7 @@ static void blk_mq_rq_timer(unsigned long priv)
* If not software queues are currently mapped to this
* hardware queue, there's nothing to check
*/
- if (!hctx->nr_ctx || !hctx->tags)
+ if (!blk_mq_hw_queue_mapped(hctx))
continue;
blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data);
@@ -680,6 +698,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
struct request_queue *q = hctx->queue;
struct request *rq;
LIST_HEAD(rq_list);
+ LIST_HEAD(driver_list);
+ struct list_head *dptr;
int queued;
WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
@@ -706,16 +726,27 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
}
/*
+ * Start off with dptr being NULL, so we start the first request
+ * immediately, even if we have more pending.
+ */
+ dptr = NULL;
+
+ /*
* Now process all the entries, sending them to the driver.
*/
queued = 0;
while (!list_empty(&rq_list)) {
+ struct blk_mq_queue_data bd;
int ret;
rq = list_first_entry(&rq_list, struct request, queuelist);
list_del_init(&rq->queuelist);
- ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
+ bd.rq = rq;
+ bd.list = dptr;
+ bd.last = list_empty(&rq_list);
+
+ ret = q->mq_ops->queue_rq(hctx, &bd);
switch (ret) {
case BLK_MQ_RQ_QUEUE_OK:
queued++;
@@ -734,6 +765,13 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
if (ret == BLK_MQ_RQ_QUEUE_BUSY)
break;
+
+ /*
+ * We've done the first request. If we have more than 1
+ * left in the list, set dptr to defer issue.
+ */
+ if (!dptr && rq_list.next != rq_list.prev)
+ dptr = &driver_list;
}
if (!queued)
@@ -760,10 +798,11 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
*/
static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
{
- int cpu = hctx->next_cpu;
+ if (hctx->queue->nr_hw_queues == 1)
+ return WORK_CPU_UNBOUND;
if (--hctx->next_cpu_batch <= 0) {
- int next_cpu;
+ int cpu = hctx->next_cpu, next_cpu;
next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
if (next_cpu >= nr_cpu_ids)
@@ -771,26 +810,32 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
hctx->next_cpu = next_cpu;
hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
+
+ return cpu;
}
- return cpu;
+ return hctx->next_cpu;
}
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
{
- if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
+ if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state) ||
+ !blk_mq_hw_queue_mapped(hctx)))
return;
- if (!async && cpumask_test_cpu(smp_processor_id(), hctx->cpumask))
- __blk_mq_run_hw_queue(hctx);
- else if (hctx->queue->nr_hw_queues == 1)
- kblockd_schedule_delayed_work(&hctx->run_work, 0);
- else {
- unsigned int cpu;
+ if (!async) {
+ int cpu = get_cpu();
+ if (cpumask_test_cpu(cpu, hctx->cpumask)) {
+ __blk_mq_run_hw_queue(hctx);
+ put_cpu();
+ return;
+ }
- cpu = blk_mq_hctx_next_cpu(hctx);
- kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0);
+ put_cpu();
}
+
+ kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
+ &hctx->run_work, 0);
}
void blk_mq_run_queues(struct request_queue *q, bool async)
@@ -804,9 +849,7 @@ void blk_mq_run_queues(struct request_queue *q, bool async)
test_bit(BLK_MQ_S_STOPPED, &hctx->state))
continue;
- preempt_disable();
blk_mq_run_hw_queue(hctx, async);
- preempt_enable();
}
}
EXPORT_SYMBOL(blk_mq_run_queues);
@@ -833,9 +876,7 @@ void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx)
{
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
- preempt_disable();
blk_mq_run_hw_queue(hctx, false);
- preempt_enable();
}
EXPORT_SYMBOL(blk_mq_start_hw_queue);
@@ -860,9 +901,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async)
continue;
clear_bit(BLK_MQ_S_STOPPED, &hctx->state);
- preempt_disable();
blk_mq_run_hw_queue(hctx, async);
- preempt_enable();
}
}
EXPORT_SYMBOL(blk_mq_start_stopped_hw_queues);
@@ -888,16 +927,11 @@ static void blk_mq_delay_work_fn(struct work_struct *work)
void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
{
- unsigned long tmo = msecs_to_jiffies(msecs);
-
- if (hctx->queue->nr_hw_queues == 1)
- kblockd_schedule_delayed_work(&hctx->delay_work, tmo);
- else {
- unsigned int cpu;
+ if (unlikely(!blk_mq_hw_queue_mapped(hctx)))
+ return;
- cpu = blk_mq_hctx_next_cpu(hctx);
- kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo);
- }
+ kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
+ &hctx->delay_work, msecs_to_jiffies(msecs));
}
EXPORT_SYMBOL(blk_mq_delay_queue);
@@ -1152,7 +1186,17 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
goto run_queue;
}
- if (is_sync) {
+ /*
+ * If the driver supports defer issued based on 'last', then
+ * queue it up like normal since we can potentially save some
+ * CPU this way.
+ */
+ if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
+ struct blk_mq_queue_data bd = {
+ .rq = rq,
+ .list = NULL,
+ .last = 1
+ };
int ret;
blk_mq_bio_to_request(rq, bio);
@@ -1162,7 +1206,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
* error (busy), just add it to our list as we previously
* would have done
*/
- ret = q->mq_ops->queue_rq(data.hctx, rq, true);
+ ret = q->mq_ops->queue_rq(data.hctx, &bd);
if (ret == BLK_MQ_RQ_QUEUE_OK)
goto done;
else {
@@ -1774,16 +1818,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
if (!ctx)
return ERR_PTR(-ENOMEM);
- /*
- * If a crashdump is active, then we are potentially in a very
- * memory constrained environment. Limit us to 1 queue and
- * 64 tags to prevent using too much memory.
- */
- if (is_kdump_kernel()) {
- set->nr_hw_queues = 1;
- set->queue_depth = min(64U, set->queue_depth);
- }
-
hctxs = kmalloc_node(set->nr_hw_queues * sizeof(*hctxs), GFP_KERNEL,
set->numa_node);
@@ -1921,7 +1955,7 @@ void blk_mq_free_queue(struct request_queue *q)
/* Basically redo blk_mq_init_queue with queue frozen */
static void blk_mq_queue_reinit(struct request_queue *q)
{
- blk_mq_freeze_queue(q);
+ WARN_ON_ONCE(!q->mq_freeze_depth);
blk_mq_sysfs_unregister(q);
@@ -1936,8 +1970,6 @@ static void blk_mq_queue_reinit(struct request_queue *q)
blk_mq_map_swqueue(q);
blk_mq_sysfs_register(q);
-
- blk_mq_unfreeze_queue(q);
}
static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
@@ -1956,8 +1988,25 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
return NOTIFY_OK;
mutex_lock(&all_q_mutex);
+
+ /*
+ * We need to freeze and reinit all existing queues. Freezing
+ * involves synchronous wait for an RCU grace period and doing it
+ * one by one may take a long time. Start freezing all queues in
+ * one swoop and then wait for the completions so that freezing can
+ * take place in parallel.
+ */
+ list_for_each_entry(q, &all_q_list, all_q_node)
+ blk_mq_freeze_queue_start(q);
+ list_for_each_entry(q, &all_q_list, all_q_node)
+ blk_mq_freeze_queue_wait(q);
+
list_for_each_entry(q, &all_q_list, all_q_node)
blk_mq_queue_reinit(q);
+
+ list_for_each_entry(q, &all_q_list, all_q_node)
+ blk_mq_unfreeze_queue(q);
+
mutex_unlock(&all_q_mutex);
return NOTIFY_OK;
}
@@ -2024,6 +2073,8 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
*/
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
{
+ BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
+
if (!set->nr_hw_queues)
return -EINVAL;
if (!set->queue_depth)
@@ -2040,6 +2091,16 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
set->queue_depth = BLK_MQ_MAX_DEPTH;
}
+ /*
+ * If a crashdump is active, then we are potentially in a very
+ * memory constrained environment. Limit us to 1 queue and
+ * 64 tags to prevent using too much memory.
+ */
+ if (is_kdump_kernel()) {
+ set->nr_hw_queues = 1;
+ set->queue_depth = min(64U, set->queue_depth);
+ }
+
set->tags = kmalloc_node(set->nr_hw_queues *
sizeof(struct blk_mq_tags *),
GFP_KERNEL, set->numa_node);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d567d5283ffa..206230e64f79 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -115,4 +115,9 @@ static inline void blk_mq_set_alloc_data(struct blk_mq_alloc_data *data,
data->hctx = hctx;
}
+static inline bool blk_mq_hw_queue_mapped(struct blk_mq_hw_ctx *hctx)
+{
+ return hctx->nr_ctx && hctx->tags;
+}
+
#endif
diff --git a/block/blk-settings.c b/block/blk-settings.c
index aa02247d227e..6ed2cbe5e8c9 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -257,9 +257,7 @@ void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_
__func__, max_hw_sectors);
}
- limits->max_hw_sectors = max_hw_sectors;
- limits->max_sectors = min_t(unsigned int, max_hw_sectors,
- BLK_DEF_MAX_SECTORS);
+ limits->max_sectors = limits->max_hw_sectors = max_hw_sectors;
}
EXPORT_SYMBOL(blk_limits_max_hw_sectors);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 1fac43408911..935ea2aa0730 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -492,17 +492,15 @@ static void blk_free_queue_rcu(struct rcu_head *rcu_head)
* Currently, its primary task it to free all the &struct request
* structures that were allocated to the queue and the queue itself.
*
- * Caveat:
- * Hopefully the low level driver will have finished any
- * outstanding requests first...
+ * Note:
+ * The low level driver must have finished any outstanding requests first
+ * via blk_cleanup_queue().
**/
static void blk_release_queue(struct kobject *kobj)
{
struct request_queue *q =
container_of(kobj, struct request_queue, kobj);
- blk_sync_queue(q);
-
blkcg_exit_queue(q);
if (q->elevator) {
@@ -517,9 +515,7 @@ static void blk_release_queue(struct kobject *kobj)
if (q->queue_tags)
__blk_queue_free_tags(q);
- if (q->mq_ops)
- blk_mq_free_queue(q);
- else
+ if (!q->mq_ops)
blk_free_flush_queue(q->fq);
blk_trace_shutdown(q);
diff --git a/block/elevator.c b/block/elevator.c
index afa3b037a17c..59794d0d38e3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -539,7 +539,7 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
e->type->ops.elevator_bio_merged_fn(q, rq, bio);
}
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
static void blk_pm_requeue_request(struct request *rq)
{
if (rq->q->dev && !(rq->cmd_flags & REQ_PM))
diff --git a/block/genhd.c b/block/genhd.c
index bd3060684ab2..0a536dc05f3b 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1070,9 +1070,16 @@ int disk_expand_part_tbl(struct gendisk *disk, int partno)
struct disk_part_tbl *old_ptbl = disk->part_tbl;
struct disk_part_tbl *new_ptbl;
int len = old_ptbl ? old_ptbl->len : 0;
- int target = partno + 1;
+ int i, target;
size_t size;
- int i;
+
+ /*
+ * check for int overflow, since we can get here from blkpg_ioctl()
+ * with a user passed 'partno'.
+ */
+ target = partno + 1;
+ if (target < 0)
+ return -EINVAL;
/* disk_max_parts() is zero during initialization, ignore if so */
if (disk_max_parts(disk) && target > disk_max_parts(disk))
diff --git a/block/ioprio.c b/block/ioprio.c
index e50170ca7c33..31666c92b46a 100644
--- a/block/ioprio.c
+++ b/block/ioprio.c
@@ -157,14 +157,16 @@ out:
int ioprio_best(unsigned short aprio, unsigned short bprio)
{
- unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
- unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
+ unsigned short aclass;
+ unsigned short bclass;
- if (aclass == IOPRIO_CLASS_NONE)
- aclass = IOPRIO_CLASS_BE;
- if (bclass == IOPRIO_CLASS_NONE)
- bclass = IOPRIO_CLASS_BE;
+ if (!ioprio_valid(aprio))
+ aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ if (!ioprio_valid(bprio))
+ bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ aclass = IOPRIO_PRIO_CLASS(aprio);
+ bclass = IOPRIO_PRIO_CLASS(bprio);
if (aclass == bclass)
return min(aprio, bprio);
if (aclass > bclass)
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 1e053d911240..28163fad3c5d 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -142,7 +142,7 @@ static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter)
__set_bit(GPCMD_VERIFY_10, filter->read_ok);
__set_bit(VERIFY_16, filter->read_ok);
__set_bit(REPORT_LUNS, filter->read_ok);
- __set_bit(SERVICE_ACTION_IN, filter->read_ok);
+ __set_bit(SERVICE_ACTION_IN_16, filter->read_ok);
__set_bit(RECEIVE_DIAGNOSTIC, filter->read_ok);
__set_bit(MAINTENANCE_IN, filter->read_ok);
__set_bit(GPCMD_READ_BUFFER_CAPACITY, filter->read_ok);
@@ -458,7 +458,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
- goto error;
+ goto error_free_buffer;
}
blk_rq_set_block_pc(rq);
@@ -531,9 +531,11 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode,
}
error:
+ blk_put_request(rq);
+
+error_free_buffer:
kfree(buffer);
- if (rq)
- blk_put_request(rq);
+
return err;
}
EXPORT_SYMBOL_GPL(sg_scsi_ioctl);