aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/Kconfig5
-rw-r--r--block/Makefile1
-rw-r--r--block/bfq-iosched.h22
-rw-r--r--block/bfq-wf2q.c146
-rw-r--r--block/bio-integrity.c5
-rw-r--r--block/blk-mq-debugfs.c3
-rw-r--r--block/blk-mq-pci.c8
-rw-r--r--block/blk-mq-rdma.c52
-rw-r--r--block/blk-mq.c16
-rw-r--r--block/blk-softirq.c2
-rw-r--r--block/blk-throttle.c18
-rw-r--r--block/bsg-lib.c74
-rw-r--r--block/genhd.c18
13 files changed, 250 insertions, 120 deletions
diff --git a/block/Kconfig b/block/Kconfig
index 89cd28f8d051..3ab42bbb06d5 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -206,4 +206,9 @@ config BLK_MQ_VIRTIO
depends on BLOCK && VIRTIO
default y
+config BLK_MQ_RDMA
+ bool
+ depends on BLOCK && INFINIBAND
+ default y
+
source block/Kconfig.iosched
diff --git a/block/Makefile b/block/Makefile
index 2b281cf258a0..9396ebc85d24 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_BLK_CMDLINE_PARSER) += cmdline-parser.o
obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
obj-$(CONFIG_BLK_MQ_PCI) += blk-mq-pci.o
obj-$(CONFIG_BLK_MQ_VIRTIO) += blk-mq-virtio.o
+obj-$(CONFIG_BLK_MQ_RDMA) += blk-mq-rdma.o
obj-$(CONFIG_BLK_DEV_ZONED) += blk-zoned.o
obj-$(CONFIG_BLK_WBT) += blk-wbt.o
obj-$(CONFIG_BLK_DEBUG_FS) += blk-mq-debugfs.o
diff --git a/block/bfq-iosched.h b/block/bfq-iosched.h
index fb28c255bcab..cc4ea8574483 100644
--- a/block/bfq-iosched.h
+++ b/block/bfq-iosched.h
@@ -71,17 +71,29 @@ struct bfq_service_tree {
*
* bfq_sched_data is the basic scheduler queue. It supports three
* ioprio_classes, and can be used either as a toplevel queue or as an
- * intermediate queue on a hierarchical setup. @next_in_service
- * points to the active entity of the sched_data service trees that
- * will be scheduled next. It is used to reduce the number of steps
- * needed for each hierarchical-schedule update.
+ * intermediate queue in a hierarchical setup.
*
* The supported ioprio_classes are the same as in CFQ, in descending
* priority order, IOPRIO_CLASS_RT, IOPRIO_CLASS_BE, IOPRIO_CLASS_IDLE.
* Requests from higher priority queues are served before all the
* requests from lower priority queues; among requests of the same
* queue requests are served according to B-WF2Q+.
- * All the fields are protected by the queue lock of the containing bfqd.
+ *
+ * The schedule is implemented by the service trees, plus the field
+ * @next_in_service, which points to the entity on the active trees
+ * that will be served next, if 1) no changes in the schedule occurs
+ * before the current in-service entity is expired, 2) the in-service
+ * queue becomes idle when it expires, and 3) if the entity pointed by
+ * in_service_entity is not a queue, then the in-service child entity
+ * of the entity pointed by in_service_entity becomes idle on
+ * expiration. This peculiar definition allows for the following
+ * optimization, not yet exploited: while a given entity is still in
+ * service, we already know which is the best candidate for next
+ * service among the other active entitities in the same parent
+ * entity. We can then quickly compare the timestamps of the
+ * in-service entity with those of such best candidate.
+ *
+ * All fields are protected by the lock of the containing bfqd.
*/
struct bfq_sched_data {
/* entity in service */
diff --git a/block/bfq-wf2q.c b/block/bfq-wf2q.c
index 979f8f21b7e2..911aa7431dbe 100644
--- a/block/bfq-wf2q.c
+++ b/block/bfq-wf2q.c
@@ -188,21 +188,23 @@ static bool bfq_update_parent_budget(struct bfq_entity *next_in_service)
/*
* This function tells whether entity stops being a candidate for next
- * service, according to the following logic.
+ * service, according to the restrictive definition of the field
+ * next_in_service. In particular, this function is invoked for an
+ * entity that is about to be set in service.
*
- * This function is invoked for an entity that is about to be set in
- * service. If such an entity is a queue, then the entity is no longer
- * a candidate for next service (i.e, a candidate entity to serve
- * after the in-service entity is expired). The function then returns
- * true.
+ * If entity is a queue, then the entity is no longer a candidate for
+ * next service according to the that definition, because entity is
+ * about to become the in-service queue. This function then returns
+ * true if entity is a queue.
*
- * In contrast, the entity could stil be a candidate for next service
- * if it is not a queue, and has more than one child. In fact, even if
- * one of its children is about to be set in service, other children
- * may still be the next to serve. As a consequence, a non-queue
- * entity is not a candidate for next-service only if it has only one
- * child. And only if this condition holds, then the function returns
- * true for a non-queue entity.
+ * In contrast, entity could still be a candidate for next service if
+ * it is not a queue, and has more than one active child. In fact,
+ * even if one of its children is about to be set in service, other
+ * active children may still be the next to serve, for the parent
+ * entity, even according to the above definition. As a consequence, a
+ * non-queue entity is not a candidate for next-service only if it has
+ * only one active child. And only if this condition holds, then this
+ * function returns true for a non-queue entity.
*/
static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
{
@@ -213,6 +215,18 @@ static bool bfq_no_longer_next_in_service(struct bfq_entity *entity)
bfqg = container_of(entity, struct bfq_group, entity);
+ /*
+ * The field active_entities does not always contain the
+ * actual number of active children entities: it happens to
+ * not account for the in-service entity in case the latter is
+ * removed from its active tree (which may get done after
+ * invoking the function bfq_no_longer_next_in_service in
+ * bfq_get_next_queue). Fortunately, here, i.e., while
+ * bfq_no_longer_next_in_service is not yet completed in
+ * bfq_get_next_queue, bfq_active_extract has not yet been
+ * invoked, and thus active_entities still coincides with the
+ * actual number of active entities.
+ */
if (bfqg->active_entities == 1)
return true;
@@ -954,7 +968,7 @@ static void bfq_update_fin_time_enqueue(struct bfq_entity *entity,
* one of its children receives a new request.
*
* Basically, this function updates the timestamps of entity and
- * inserts entity into its active tree, ater possible extracting it
+ * inserts entity into its active tree, ater possibly extracting it
* from its idle tree.
*/
static void __bfq_activate_entity(struct bfq_entity *entity,
@@ -1048,7 +1062,7 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
entity->start = entity->finish;
/*
* In addition, if the entity had more than one child
- * when set in service, then was not extracted from
+ * when set in service, then it was not extracted from
* the active tree. This implies that the position of
* the entity in the active tree may need to be
* changed now, because we have just updated the start
@@ -1056,9 +1070,8 @@ static void __bfq_requeue_entity(struct bfq_entity *entity)
* time in a moment (the requeueing is then, more
* precisely, a repositioning in this case). To
* implement this repositioning, we: 1) dequeue the
- * entity here, 2) update the finish time and
- * requeue the entity according to the new
- * timestamps below.
+ * entity here, 2) update the finish time and requeue
+ * the entity according to the new timestamps below.
*/
if (entity->tree)
bfq_active_extract(st, entity);
@@ -1105,9 +1118,10 @@ static void __bfq_activate_requeue_entity(struct bfq_entity *entity,
/**
- * bfq_activate_entity - activate or requeue an entity representing a bfq_queue,
- * and activate, requeue or reposition all ancestors
- * for which such an update becomes necessary.
+ * bfq_activate_requeue_entity - activate or requeue an entity representing a
+ * bfq_queue, and activate, requeue or reposition
+ * all ancestors for which such an update becomes
+ * necessary.
* @entity: the entity to activate.
* @non_blocking_wait_rq: true if this entity was waiting for a request
* @requeue: true if this is a requeue, which implies that bfqq is
@@ -1135,9 +1149,9 @@ static void bfq_activate_requeue_entity(struct bfq_entity *entity,
* @ins_into_idle_tree: if false, the entity will not be put into the
* idle tree.
*
- * Deactivates an entity, independently from its previous state. Must
+ * Deactivates an entity, independently of its previous state. Must
* be invoked only if entity is on a service tree. Extracts the entity
- * from that tree, and if necessary and allowed, puts it on the idle
+ * from that tree, and if necessary and allowed, puts it into the idle
* tree.
*/
bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
@@ -1158,8 +1172,10 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
st = bfq_entity_service_tree(entity);
is_in_service = entity == sd->in_service_entity;
- if (is_in_service)
+ if (is_in_service) {
bfq_calc_finish(entity, entity->service);
+ sd->in_service_entity = NULL;
+ }
if (entity->tree == &st->active)
bfq_active_extract(st, entity);
@@ -1177,7 +1193,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity, bool ins_into_idle_tree)
/**
* bfq_deactivate_entity - deactivate an entity representing a bfq_queue.
* @entity: the entity to deactivate.
- * @ins_into_idle_tree: true if the entity can be put on the idle tree
+ * @ins_into_idle_tree: true if the entity can be put into the idle tree
*/
static void bfq_deactivate_entity(struct bfq_entity *entity,
bool ins_into_idle_tree,
@@ -1208,16 +1224,29 @@ static void bfq_deactivate_entity(struct bfq_entity *entity,
*/
bfq_update_next_in_service(sd, NULL);
- if (sd->next_in_service)
+ if (sd->next_in_service || sd->in_service_entity) {
/*
- * The parent entity is still backlogged,
- * because next_in_service is not NULL. So, no
- * further upwards deactivation must be
- * performed. Yet, next_in_service has
- * changed. Then the schedule does need to be
- * updated upwards.
+ * The parent entity is still active, because
+ * either next_in_service or in_service_entity
+ * is not NULL. So, no further upwards
+ * deactivation must be performed. Yet,
+ * next_in_service has changed. Then the
+ * schedule does need to be updated upwards.
+ *
+ * NOTE If in_service_entity is not NULL, then
+ * next_in_service may happen to be NULL,
+ * although the parent entity is evidently
+ * active. This happens if 1) the entity
+ * pointed by in_service_entity is the only
+ * active entity in the parent entity, and 2)
+ * according to the definition of
+ * next_in_service, the in_service_entity
+ * cannot be considered as
+ * next_in_service. See the comments on the
+ * definition of next_in_service for details.
*/
break;
+ }
/*
* If we get here, then the parent is no more
@@ -1494,47 +1523,34 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
/*
* If entity is no longer a candidate for next
- * service, then we extract it from its active tree,
- * for the following reason. To further boost the
- * throughput in some special case, BFQ needs to know
- * which is the next candidate entity to serve, while
- * there is already an entity in service. In this
- * respect, to make it easy to compute/update the next
- * candidate entity to serve after the current
- * candidate has been set in service, there is a case
- * where it is necessary to extract the current
- * candidate from its service tree. Such a case is
- * when the entity just set in service cannot be also
- * a candidate for next service. Details about when
- * this conditions holds are reported in the comments
- * on the function bfq_no_longer_next_in_service()
- * invoked below.
+ * service, then it must be extracted from its active
+ * tree, so as to make sure that it won't be
+ * considered when computing next_in_service. See the
+ * comments on the function
+ * bfq_no_longer_next_in_service() for details.
*/
if (bfq_no_longer_next_in_service(entity))
bfq_active_extract(bfq_entity_service_tree(entity),
entity);
/*
- * For the same reason why we may have just extracted
- * entity from its active tree, we may need to update
- * next_in_service for the sched_data of entity too,
- * regardless of whether entity has been extracted.
- * In fact, even if entity has not been extracted, a
- * descendant entity may get extracted. Such an event
- * would cause a change in next_in_service for the
- * level of the descendant entity, and thus possibly
- * back to upper levels.
+ * Even if entity is not to be extracted according to
+ * the above check, a descendant entity may get
+ * extracted in one of the next iterations of this
+ * loop. Such an event could cause a change in
+ * next_in_service for the level of the descendant
+ * entity, and thus possibly back to this level.
*
- * We cannot perform the resulting needed update
- * before the end of this loop, because, to know which
- * is the correct next-to-serve candidate entity for
- * each level, we need first to find the leaf entity
- * to set in service. In fact, only after we know
- * which is the next-to-serve leaf entity, we can
- * discover whether the parent entity of the leaf
- * entity becomes the next-to-serve, and so on.
+ * However, we cannot perform the resulting needed
+ * update of next_in_service for this level before the
+ * end of the whole loop, because, to know which is
+ * the correct next-to-serve candidate entity for each
+ * level, we need first to find the leaf entity to set
+ * in service. In fact, only after we know which is
+ * the next-to-serve leaf entity, we can discover
+ * whether the parent entity of the leaf entity
+ * becomes the next-to-serve, and so on.
*/
-
}
bfqq = bfq_entity_to_bfqq(entity);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 553d75e357b4..5df32907ff3b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -386,11 +386,10 @@ static void bio_integrity_verify_fn(struct work_struct *work)
bool __bio_integrity_endio(struct bio *bio)
{
struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
+ struct bio_integrity_payload *bip = bio_integrity(bio);
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
- bi->profile->verify_fn) {
- struct bio_integrity_payload *bip = bio_integrity(bio);
-
+ (bip->bip_flags & BIP_BLOCK_INTEGRITY) && bi->profile->verify_fn) {
INIT_WORK(&bip->bip_work, bio_integrity_verify_fn);
queue_work(kintegrityd_wq, &bip->bip_work);
return false;
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index e53b6129ca5a..980e73095643 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -73,6 +73,8 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(STATS),
QUEUE_FLAG_NAME(POLL_STATS),
QUEUE_FLAG_NAME(REGISTERED),
+ QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
+ QUEUE_FLAG_NAME(QUIESCED),
};
#undef QUEUE_FLAG_NAME
@@ -263,6 +265,7 @@ static const char *const cmd_flag_name[] = {
CMD_FLAG_NAME(RAHEAD),
CMD_FLAG_NAME(BACKGROUND),
CMD_FLAG_NAME(NOUNMAP),
+ CMD_FLAG_NAME(NOWAIT),
};
#undef CMD_FLAG_NAME
diff --git a/block/blk-mq-pci.c b/block/blk-mq-pci.c
index 0c3354cf3552..76944e3271bf 100644
--- a/block/blk-mq-pci.c
+++ b/block/blk-mq-pci.c
@@ -36,12 +36,18 @@ int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev)
for (queue = 0; queue < set->nr_hw_queues; queue++) {
mask = pci_irq_get_affinity(pdev, queue);
if (!mask)
- return -EINVAL;
+ goto fallback;
for_each_cpu(cpu, mask)
set->mq_map[cpu] = queue;
}
return 0;
+
+fallback:
+ WARN_ON_ONCE(set->nr_hw_queues > 1);
+ for_each_possible_cpu(cpu)
+ set->mq_map[cpu] = 0;
+ return 0;
}
EXPORT_SYMBOL_GPL(blk_mq_pci_map_queues);
diff --git a/block/blk-mq-rdma.c b/block/blk-mq-rdma.c
new file mode 100644
index 000000000000..996167f1de18
--- /dev/null
+++ b/block/blk-mq-rdma.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2017 Sagi Grimberg.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+#include <linux/blk-mq.h>
+#include <linux/blk-mq-rdma.h>
+#include <rdma/ib_verbs.h>
+
+/**
+ * blk_mq_rdma_map_queues - provide a default queue mapping for rdma device
+ * @set: tagset to provide the mapping for
+ * @dev: rdma device associated with @set.
+ * @first_vec: first interrupt vectors to use for queues (usually 0)
+ *
+ * This function assumes the rdma device @dev has at least as many available
+ * interrupt vetors as @set has queues. It will then query it's affinity mask
+ * and built queue mapping that maps a queue to the CPUs that have irq affinity
+ * for the corresponding vector.
+ *
+ * In case either the driver passed a @dev with less vectors than
+ * @set->nr_hw_queues, or @dev does not provide an affinity mask for a
+ * vector, we fallback to the naive mapping.
+ */
+int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set,
+ struct ib_device *dev, int first_vec)
+{
+ const struct cpumask *mask;
+ unsigned int queue, cpu;
+
+ for (queue = 0; queue < set->nr_hw_queues; queue++) {
+ mask = ib_get_vector_affinity(dev, first_vec + queue);
+ if (!mask)
+ goto fallback;
+
+ for_each_cpu(cpu, mask)
+ set->mq_map[cpu] = queue;
+ }
+
+ return 0;
+
+fallback:
+ return blk_mq_map_queues(set);
+}
+EXPORT_SYMBOL_GPL(blk_mq_rdma_map_queues);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f84d145490bf..3f18cff80050 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -336,11 +336,12 @@ static struct request *blk_mq_get_request(struct request_queue *q,
struct elevator_queue *e = q->elevator;
struct request *rq;
unsigned int tag;
+ struct blk_mq_ctx *local_ctx = NULL;
blk_queue_enter_live(q);
data->q = q;
if (likely(!data->ctx))
- data->ctx = blk_mq_get_ctx(q);
+ data->ctx = local_ctx = blk_mq_get_ctx(q);
if (likely(!data->hctx))
data->hctx = blk_mq_map_queue(q, data->ctx->cpu);
if (op & REQ_NOWAIT)
@@ -359,6 +360,10 @@ static struct request *blk_mq_get_request(struct request_queue *q,
tag = blk_mq_get_tag(data);
if (tag == BLK_MQ_TAG_FAIL) {
+ if (local_ctx) {
+ blk_mq_put_ctx(local_ctx);
+ data->ctx = NULL;
+ }
blk_queue_exit(q);
return NULL;
}
@@ -390,13 +395,13 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
return ERR_PTR(ret);
rq = blk_mq_get_request(q, NULL, op, &alloc_data);
-
- blk_mq_put_ctx(alloc_data.ctx);
blk_queue_exit(q);
if (!rq)
return ERR_PTR(-EWOULDBLOCK);
+ blk_mq_put_ctx(alloc_data.ctx);
+
rq->__data_len = 0;
rq->__sector = (sector_t) -1;
rq->bio = rq->biotail = NULL;
@@ -441,7 +446,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
alloc_data.ctx = __blk_mq_get_ctx(q, cpu);
rq = blk_mq_get_request(q, NULL, op, &alloc_data);
-
blk_queue_exit(q);
if (!rq)
@@ -713,8 +717,8 @@ EXPORT_SYMBOL(blk_mq_kick_requeue_list);
void blk_mq_delay_kick_requeue_list(struct request_queue *q,
unsigned long msecs)
{
- kblockd_schedule_delayed_work(&q->requeue_work,
- msecs_to_jiffies(msecs));
+ kblockd_mod_delayed_work_on(WORK_CPU_UNBOUND, &q->requeue_work,
+ msecs_to_jiffies(msecs));
}
EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 87b7df4851bf..07125e7941f4 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -60,7 +60,7 @@ static void trigger_softirq(void *data)
static int raise_blk_irq(int cpu, struct request *rq)
{
if (cpu_online(cpu)) {
- struct call_single_data *data = &rq->csd;
+ call_single_data_t *data = &rq->csd;
data->func = trigger_softirq;
data->info = rq;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 6a4c4c493dd5..0fea76aa0f3f 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -380,6 +380,14 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw)
} \
} while (0)
+static inline unsigned int throtl_bio_data_size(struct bio *bio)
+{
+ /* assume it's one sector */
+ if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
+ return 512;
+ return bio->bi_iter.bi_size;
+}
+
static void throtl_qnode_init(struct throtl_qnode *qn, struct throtl_grp *tg)
{
INIT_LIST_HEAD(&qn->node);
@@ -932,6 +940,7 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
bool rw = bio_data_dir(bio);
u64 bytes_allowed, extra_bytes, tmp;
unsigned long jiffy_elapsed, jiffy_wait, jiffy_elapsed_rnd;
+ unsigned int bio_size = throtl_bio_data_size(bio);
jiffy_elapsed = jiffy_elapsed_rnd = jiffies - tg->slice_start[rw];
@@ -945,14 +954,14 @@ static bool tg_with_in_bps_limit(struct throtl_grp *tg, struct bio *bio,
do_div(tmp, HZ);
bytes_allowed = tmp;
- if (tg->bytes_disp[rw] + bio->bi_iter.bi_size <= bytes_allowed) {
+ if (tg->bytes_disp[rw] + bio_size <= bytes_allowed) {
if (wait)
*wait = 0;
return true;
}
/* Calc approx time to dispatch */
- extra_bytes = tg->bytes_disp[rw] + bio->bi_iter.bi_size - bytes_allowed;
+ extra_bytes = tg->bytes_disp[rw] + bio_size - bytes_allowed;
jiffy_wait = div64_u64(extra_bytes * HZ, tg_bps_limit(tg, rw));
if (!jiffy_wait)
@@ -1032,11 +1041,12 @@ static bool tg_may_dispatch(struct throtl_grp *tg, struct bio *bio,
static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio)
{
bool rw = bio_data_dir(bio);
+ unsigned int bio_size = throtl_bio_data_size(bio);
/* Charge the bio to the group */
- tg->bytes_disp[rw] += bio->bi_iter.bi_size;
+ tg->bytes_disp[rw] += bio_size;
tg->io_disp[rw]++;
- tg->last_bytes_disp[rw] += bio->bi_iter.bi_size;
+ tg->last_bytes_disp[rw] += bio_size;
tg->last_io_disp[rw]++;
/*
diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index c4513b23f57a..dd56d7460cb9 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -29,26 +29,25 @@
#include <scsi/scsi_cmnd.h>
/**
- * bsg_destroy_job - routine to teardown/delete a bsg job
+ * bsg_teardown_job - routine to teardown a bsg job
* @job: bsg_job that is to be torn down
*/
-static void bsg_destroy_job(struct kref *kref)
+static void bsg_teardown_job(struct kref *kref)
{
struct bsg_job *job = container_of(kref, struct bsg_job, kref);
struct request *rq = job->req;
- blk_end_request_all(rq, BLK_STS_OK);
-
put_device(job->dev); /* release reference for the request */
kfree(job->request_payload.sg_list);
kfree(job->reply_payload.sg_list);
- kfree(job);
+
+ blk_end_request_all(rq, BLK_STS_OK);
}
void bsg_job_put(struct bsg_job *job)
{
- kref_put(&job->kref, bsg_destroy_job);
+ kref_put(&job->kref, bsg_teardown_job);
}
EXPORT_SYMBOL_GPL(bsg_job_put);
@@ -100,7 +99,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done);
*/
static void bsg_softirq_done(struct request *rq)
{
- struct bsg_job *job = rq->special;
+ struct bsg_job *job = blk_mq_rq_to_pdu(rq);
bsg_job_put(job);
}
@@ -122,33 +121,20 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req)
}
/**
- * bsg_create_job - create the bsg_job structure for the bsg request
+ * bsg_prepare_job - create the bsg_job structure for the bsg request
* @dev: device that is being sent the bsg request
* @req: BSG request that needs a job structure
*/
-static int bsg_create_job(struct device *dev, struct request *req)
+static int bsg_prepare_job(struct device *dev, struct request *req)
{
struct request *rsp = req->next_rq;
- struct request_queue *q = req->q;
struct scsi_request *rq = scsi_req(req);
- struct bsg_job *job;
+ struct bsg_job *job = blk_mq_rq_to_pdu(req);
int ret;
- BUG_ON(req->special);
-
- job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
- if (!job)
- return -ENOMEM;
-
- req->special = job;
- job->req = req;
- if (q->bsg_job_size)
- job->dd_data = (void *)&job[1];
job->request = rq->cmd;
job->request_len = rq->cmd_len;
- job->reply = rq->sense;
- job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
- * allocated */
+
if (req->bio) {
ret = bsg_map_buffer(&job->request_payload, req);
if (ret)
@@ -187,7 +173,6 @@ static void bsg_request_fn(struct request_queue *q)
{
struct device *dev = q->queuedata;
struct request *req;
- struct bsg_job *job;
int ret;
if (!get_device(dev))
@@ -199,7 +184,7 @@ static void bsg_request_fn(struct request_queue *q)
break;
spin_unlock_irq(q->queue_lock);
- ret = bsg_create_job(dev, req);
+ ret = bsg_prepare_job(dev, req);
if (ret) {
scsi_req(req)->result = ret;
blk_end_request_all(req, BLK_STS_OK);
@@ -207,8 +192,7 @@ static void bsg_request_fn(struct request_queue *q)
continue;
}
- job = req->special;
- ret = q->bsg_job_fn(job);
+ ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req));
spin_lock_irq(q->queue_lock);
if (ret)
break;
@@ -219,6 +203,35 @@ static void bsg_request_fn(struct request_queue *q)
spin_lock_irq(q->queue_lock);
}
+static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp)
+{
+ struct bsg_job *job = blk_mq_rq_to_pdu(req);
+ struct scsi_request *sreq = &job->sreq;
+
+ memset(job, 0, sizeof(*job));
+
+ scsi_req_init(sreq);
+ sreq->sense_len = SCSI_SENSE_BUFFERSIZE;
+ sreq->sense = kzalloc(sreq->sense_len, gfp);
+ if (!sreq->sense)
+ return -ENOMEM;
+
+ job->req = req;
+ job->reply = sreq->sense;
+ job->reply_len = sreq->sense_len;
+ job->dd_data = job + 1;
+
+ return 0;
+}
+
+static void bsg_exit_rq(struct request_queue *q, struct request *req)
+{
+ struct bsg_job *job = blk_mq_rq_to_pdu(req);
+ struct scsi_request *sreq = &job->sreq;
+
+ kfree(sreq->sense);
+}
+
/**
* bsg_setup_queue - Create and add the bsg hooks so we can receive requests
* @dev: device to attach bsg device to
@@ -235,7 +248,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return ERR_PTR(-ENOMEM);
- q->cmd_size = sizeof(struct scsi_request);
+ q->cmd_size = sizeof(struct bsg_job) + dd_job_size;
+ q->init_rq_fn = bsg_init_rq;
+ q->exit_rq_fn = bsg_exit_rq;
q->request_fn = bsg_request_fn;
ret = blk_init_allocated_queue(q);
@@ -243,7 +258,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, char *name,
goto out_cleanup_queue;
q->queuedata = dev;
- q->bsg_job_size = dd_job_size;
q->bsg_job_fn = job_fn;
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
diff --git a/block/genhd.c b/block/genhd.c
index 713b7d4fe7a1..dd305c65ffb0 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -279,6 +279,7 @@ EXPORT_SYMBOL_GPL(disk_map_sector_rcu);
* Can be deleted altogether. Later.
*
*/
+#define BLKDEV_MAJOR_HASH_SIZE 255
static struct blk_major_name {
struct blk_major_name *next;
int major;
@@ -296,12 +297,11 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
{
struct blk_major_name *dp;
- if (offset < BLKDEV_MAJOR_HASH_SIZE) {
- mutex_lock(&block_class_lock);
- for (dp = major_names[offset]; dp; dp = dp->next)
+ mutex_lock(&block_class_lock);
+ for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
+ if (dp->major == offset)
seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
- mutex_unlock(&block_class_lock);
- }
+ mutex_unlock(&block_class_lock);
}
#endif /* CONFIG_PROC_FS */
@@ -346,6 +346,14 @@ int register_blkdev(unsigned int major, const char *name)
ret = major;
}
+ if (major >= BLKDEV_MAJOR_MAX) {
+ pr_err("register_blkdev: major requested (%d) is greater than the maximum (%d) for %s\n",
+ major, BLKDEV_MAJOR_MAX, name);
+
+ ret = -EINVAL;
+ goto out;
+ }
+
p = kmalloc(sizeof(struct blk_major_name), GFP_KERNEL);
if (p == NULL) {
ret = -ENOMEM;