From 4d92a9beb39d80a7d8ff7c04ae12a10290105ae5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 29 May 2014 08:09:00 -0600
Subject: block: remove 'magic' from struct blk_plug

I don't think we've ever caught any bugs with this, and there's the
list poisoning for the plug lists to catch uninitialized cases.
So remove the magic member and save 8 bytes in the struct.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       | 5 -----
 include/linux/blkdev.h | 1 -
 2 files changed, 6 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index d87be5b4e554..40d654861c33 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2957,8 +2957,6 @@ int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork,
 }
 EXPORT_SYMBOL(kblockd_schedule_delayed_work_on);
 
-#define PLUG_MAGIC	0x91827364
-
 /**
  * blk_start_plug - initialize blk_plug and track it inside the task_struct
  * @plug:	The &struct blk_plug that needs to be initialized
@@ -2977,7 +2975,6 @@ void blk_start_plug(struct blk_plug *plug)
 {
 	struct task_struct *tsk = current;
 
-	plug->magic = PLUG_MAGIC;
 	INIT_LIST_HEAD(&plug->list);
 	INIT_LIST_HEAD(&plug->mq_list);
 	INIT_LIST_HEAD(&plug->cb_list);
@@ -3074,8 +3071,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 	LIST_HEAD(list);
 	unsigned int depth;
 
-	BUG_ON(plug->magic != PLUG_MAGIC);
-
 	flush_plug_callbacks(plug, from_schedule);
 
 	if (!list_empty(&plug->mq_list))
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 913f1c2d3be0..098304576d51 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1060,7 +1060,6 @@ static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
  * schedule() where blk_schedule_flush_plug() is called.
  */
 struct blk_plug {
-	unsigned long magic; /* detect uninitialized use-cases */
 	struct list_head list; /* requests */
 	struct list_head mq_list; /* blk-mq requests */
 	struct list_head cb_list; /* md requires an unplug callback */
-- 
cgit v1.2.3-59-g8ed1b


From 05f1dd5315217398fc8d122bdee80f96a9f21274 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 29 May 2014 09:53:32 -0600
Subject: block: add queue flag for disabling SG merging

If devices are not SG starved, we waste a lot of time potentially
collapsing SG segments. Enough that 1.5% of the CPU time goes
to this, at only 400K IOPS. Add a queue flag, QUEUE_FLAG_NO_SG_MERGE,
which just returns the number of vectors in a bio instead of looping
over all segments and checking for collapsible ones.

Add a BLK_MQ_F_SG_MERGE flag so that drivers can opt-in on the sg
merging, if they so desire.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-merge.c      | 28 +++++++++++++++++++++-------
 block/blk-mq.c         |  3 +++
 include/linux/blk-mq.h |  1 +
 include/linux/blkdev.h |  1 +
 4 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/block/blk-merge.c b/block/blk-merge.c
index 6c583f9c5b65..b3bf0df0f4c2 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -13,7 +13,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 					     struct bio *bio)
 {
 	struct bio_vec bv, bvprv = { NULL };
-	int cluster, high, highprv = 1;
+	int cluster, high, highprv = 1, no_sg_merge;
 	unsigned int seg_size, nr_phys_segs;
 	struct bio *fbio, *bbio;
 	struct bvec_iter iter;
@@ -35,12 +35,21 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
 	cluster = blk_queue_cluster(q);
 	seg_size = 0;
 	nr_phys_segs = 0;
+	no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags);
+	high = 0;
 	for_each_bio(bio) {
 		bio_for_each_segment(bv, bio, iter) {
+			/*
+			 * If SG merging is disabled, each bio vector is
+			 * a segment
+			 */
+			if (no_sg_merge)
+				goto new_segment;
+
 			/*
 			 * the trick here is making sure that a high page is
-			 * never considered part of another segment, since that
-			 * might change with the bounce page.
+			 * never considered part of another segment, since
+			 * that might change with the bounce page.
 			 */
 			high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q);
 			if (!high && !highprv && cluster) {
@@ -84,11 +93,16 @@ void blk_recalc_rq_segments(struct request *rq)
 
 void blk_recount_segments(struct request_queue *q, struct bio *bio)
 {
-	struct bio *nxt = bio->bi_next;
+	if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags))
+		bio->bi_phys_segments = bio->bi_vcnt;
+	else {
+		struct bio *nxt = bio->bi_next;
+
+		bio->bi_next = NULL;
+		bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
+		bio->bi_next = nxt;
+	}
 
-	bio->bi_next = NULL;
-	bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio);
-	bio->bi_next = nxt;
 	bio->bi_flags |= (1 << BIO_SEG_VALID);
 }
 EXPORT_SYMBOL(blk_recount_segments);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f27fe44230c2..f98d977fd150 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1829,6 +1829,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	q->mq_ops = set->ops;
 	q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
 
+	if (!(set->flags & BLK_MQ_F_SG_MERGE))
+		q->queue_flags |= 1 << QUEUE_FLAG_NO_SG_MERGE;
+
 	q->sg_reserved_size = INT_MAX;
 
 	INIT_WORK(&q->requeue_work, blk_mq_requeue_work);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 91dfb75ce39f..95de239444d2 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -129,6 +129,7 @@ enum {
 	BLK_MQ_F_SHOULD_MERGE	= 1 << 0,
 	BLK_MQ_F_SHOULD_SORT	= 1 << 1,
 	BLK_MQ_F_TAG_SHARED	= 1 << 2,
+	BLK_MQ_F_SG_MERGE	= 1 << 3,
 
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 098304576d51..695b9fd41efe 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -510,6 +510,7 @@ struct request_queue {
 #define QUEUE_FLAG_SAME_FORCE  18	/* force complete on same CPU */
 #define QUEUE_FLAG_DEAD        19	/* queue tear-down finished */
 #define QUEUE_FLAG_INIT_DONE   20	/* queue is initialized */
+#define QUEUE_FLAG_NO_SG_MERGE 21	/* don't attempt to merge SG segments*/
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
-- 
cgit v1.2.3-59-g8ed1b


From 4b570521be54666e6ad7e5f47af92fd609fbd8b5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Thu, 29 May 2014 11:00:11 -0600
Subject: blk-mq: request initialization optimizations

We currently clear a lot more than we need to, so make that a bit
more clever. Make some of the init dependent on features, like
only setting start_time if we are going to use it.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index f98d977fd150..6160128085fc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -199,19 +199,12 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 	rq->q = q;
 	rq->mq_ctx = ctx;
 	rq->cmd_flags |= rw_flags;
-	rq->cmd_type = 0;
 	/* do not touch atomic flags, it needs atomic ops against the timer */
 	rq->cpu = -1;
-	rq->__data_len = 0;
-	rq->__sector = (sector_t) -1;
-	rq->bio = NULL;
-	rq->biotail = NULL;
 	INIT_HLIST_NODE(&rq->hash);
 	RB_CLEAR_NODE(&rq->rb_node);
-	memset(&rq->flush, 0, max(sizeof(rq->flush), sizeof(rq->elv)));
 	rq->rq_disk = NULL;
 	rq->part = NULL;
-	rq->start_time = jiffies;
 #ifdef CONFIG_BLK_CGROUP
 	rq->rl = NULL;
 	set_start_time_ns(rq);
@@ -221,23 +214,16 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 	rq->nr_integrity_segments = 0;
 #endif
-	rq->ioprio = 0;
 	rq->special = NULL;
 	/* tag was already set */
 	rq->errors = 0;
-	memset(rq->__cmd, 0, sizeof(rq->__cmd));
-	rq->cmd = rq->__cmd;
-	rq->cmd_len = BLK_MAX_CDB;
 
 	rq->extra_len = 0;
 	rq->sense_len = 0;
 	rq->resid_len = 0;
 	rq->sense = NULL;
 
-	rq->deadline = 0;
 	INIT_LIST_HEAD(&rq->timeout_list);
-	rq->timeout = 0;
-	rq->retries = 0;
 	rq->end_io = NULL;
 	rq->end_io_data = NULL;
 	rq->next_rq = NULL;
@@ -449,8 +435,10 @@ static void blk_mq_start_request(struct request *rq, bool last)
 	 * complete. So be sure to clear complete again when we start
 	 * the request, otherwise we'll ignore the completion event.
 	 */
-	set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
-	clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
+	if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+		set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
+	if (test_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags))
+		clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
 
 	if (q->dma_drain_size && blk_rq_bytes(rq)) {
 		/*
@@ -1112,7 +1100,11 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
 static void blk_mq_bio_to_request(struct request *rq, struct bio *bio)
 {
 	init_request_from_bio(rq, bio);
-	blk_account_io_start(rq, 1);
+
+	if (blk_do_io_stat(rq)) {
+		rq->start_time = jiffies;
+		blk_account_io_start(rq, 1);
+	}
 }
 
 static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
-- 
cgit v1.2.3-59-g8ed1b


From da52f22fa924b4a21d8e11fbfd3eeebd7a90a366 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Thu, 29 May 2014 15:11:30 -0400
Subject: block: remove dead code in scsi_ioctl:blk_verify_command

filter gets assigned the address of blk_default_cmd_filter on
entry to this function, so the !filter condition can never be true.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/scsi_ioctl.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 26487972ac54..9c28a5b38042 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -205,10 +205,6 @@ int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm)
 	if (capable(CAP_SYS_RAWIO))
 		return 0;
 
-	/* if there's no filter set, assume we're filtering everything out */
-	if (!filter)
-		return -EPERM;
-
 	/* Anybody who can open the device can do a read-safe command */
 	if (test_bit(cmd[0], filter->read_ok))
 		return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 2230237500821aedfcf2bba2a79d9cbca389233c Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Fri, 30 May 2014 08:06:42 -0600
Subject: blk-mq: blk_mq_tag_to_rq should handle flush request

flush request is special, which borrows the tag from the parent
request. Hence blk_mq_tag_to_rq needs special handling to return
the flush request from the tag.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-flush.c      |  4 +++-
 block/blk-mq.c         | 12 +++++++++---
 include/linux/blk-mq.h |  2 +-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/block/blk-flush.c b/block/blk-flush.c
index ef608b35d9be..ff87c664b7df 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -223,8 +223,10 @@ static void flush_end_io(struct request *flush_rq, int error)
 	struct request *rq, *n;
 	unsigned long flags = 0;
 
-	if (q->mq_ops)
+	if (q->mq_ops) {
 		spin_lock_irqsave(&q->mq_flush_lock, flags);
+		q->flush_rq->cmd_flags = 0;
+	}
 
 	running = &q->flush_queue[q->flush_running_idx];
 	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6160128085fc..21f952ab3581 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -541,9 +541,15 @@ void blk_mq_kick_requeue_list(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
+struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag)
 {
-	return tags->rqs[tag];
+	struct request_queue *q = hctx->queue;
+
+	if ((q->flush_rq->cmd_flags & REQ_FLUSH_SEQ) &&
+	    q->flush_rq->tag == tag)
+		return q->flush_rq;
+
+	return hctx->tags->rqs[tag];
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
 
@@ -572,7 +578,7 @@ static void blk_mq_timeout_check(void *__data, unsigned long *free_tags)
 		if (tag >= hctx->tags->nr_tags)
 			break;
 
-		rq = blk_mq_tag_to_rq(hctx->tags, tag++);
+		rq = blk_mq_tag_to_rq(hctx, tag++);
 		if (rq->q != hctx->queue)
 			continue;
 		if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 95de239444d2..ad3adb73cc70 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -154,7 +154,7 @@ void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		gfp_t gfp, bool reserved);
-struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
+struct request *blk_mq_tag_to_rq(struct blk_mq_hw_ctx *hctx, unsigned int tag);
 
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
-- 
cgit v1.2.3-59-g8ed1b