Merge branch 'for-4.4/core' of git://git.kernel.dk/linux-block

Pull core block updates from Jens Axboe: "This is the core block pull request for 4.4. I've got a few more topic branches this time around, some of them will layer on top of the core+drivers changes and will come in a separate round. So not a huge chunk of changes in this round. This pull request contains: - Enable blk-mq page allocation tracking with kmemleak, from Catalin. - Unused prototype removal in blk-mq from Christoph. - Cleanup of the q->blk_trace exchange, using cmpxchg instead of two xchg()'s, from Davidlohr. - A plug flush fix from Jeff. - Also from Jeff, a fix that means we don't have to update shared tag sets at init time unless we do a state change. This cuts down boot times on thousands of devices a lot with scsi/blk-mq. - blk-mq waitqueue barrier fix from Kosuke. - Various fixes from Ming: - Fixes for segment merging and splitting, and checks, for the old core and blk-mq. - Potential blk-mq speedup by marking ctx pending at the end of a plug insertion batch in blk-mq. - direct-io no page dirty on kernel direct reads. - A WRITE_SYNC fix for mpage from Roman" * 'for-4.4/core' of git://git.kernel.dk/linux-block: blk-mq: avoid excessive boot delays with large lun counts blktrace: re-write setting q->blk_trace blk-mq: mark ctx as pending at batch in flush plug path blk-mq: fix for trace_block_plug() block: check bio_mergeable() early before merging blk-mq: check bio_mergeable() early before merging block: avoid to merge splitted bio block: setup bi_phys_segments after splitting block: fix plug list flushing for nomerge queues blk-mq: remove unused blk_mq_clone_flush_request prototype blk-mq: fix waitqueue_active without memory barrier in block/blk-mq-tag.c fs: direct-io: don't dirtying pages for ITER_BVEC/ITER_KVEC direct read fs/mpage.c: forgotten WRITE_SYNC in case of data integrity write block: kmemleak: Track the page allocations for struct request
author: Linus Torvalds <torvalds@linux-foundation.org> 2015-11-04 20:28:10 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-11-04 20:28:10 -0800
commit: d9734e0d1ccf87e828ad172c58a96dff97cfc0ba (patch)
tree: fcce824c45f17dbcc954bc55b8b642a967b962e0 /block
parent: Merge tag 'pm+acpi-4.4-rc1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm (diff)
parent: blk-mq: avoid excessive boot delays with large lun counts (diff)
download: linux-dev-d9734e0d1ccf87e828ad172c58a96dff97cfc0ba.tar.xz
linux-dev-d9734e0d1ccf87e828ad172c58a96dff97cfc0ba.zip
7 files changed, 122 insertions, 40 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 18e92a6645e2..f888f23d796f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1594,6 +1594,30 @@ out:
 	return ret;
 }
 
+unsigned int blk_plug_queued_count(struct request_queue *q)
+{
+	struct blk_plug *plug;
+	struct request *rq;
+	struct list_head *plug_list;
+	unsigned int ret = 0;
+
+	plug = current->plug;
+	if (!plug)
+		goto out;
+
+	if (q->mq_ops)
+		plug_list = &plug->mq_list;
+	else
+		plug_list = &plug->list;
+
+	list_for_each_entry(rq, plug_list, queuelist) {
+		if (rq->q == q)
+			ret++;
+	}
+out:
+	return ret;
+}
+
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
 	req->cmd_type = REQ_TYPE_FS;
@@ -1641,9 +1665,11 @@ static void blk_queue_bio(struct request_queue *q, struct bio *bio)
 	 * Check if we can merge with the plugged list before grabbing
 	 * any locks.
 	 */
-	if (!blk_queue_nomerges(q) &&
-	    blk_attempt_plug_merge(q, bio, &request_count, NULL))
-		return;
+	if (!blk_queue_nomerges(q)) {
+		if (blk_attempt_plug_merge(q, bio, &request_count, NULL))
+			return;
+	} else
+		request_count = blk_plug_queued_count(q);
 
 	spin_lock_irq(q->queue_lock);
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index c4e9c37f3e38..de5716d8e525 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -11,13 +11,16 @@
 
 static struct bio *blk_bio_discard_split(struct request_queue *q,
 					 struct bio *bio,
-					 struct bio_set *bs)
+					 struct bio_set *bs,
+					 unsigned *nsegs)
 {
 	unsigned int max_discard_sectors, granularity;
 	int alignment;
 	sector_t tmp;
 	unsigned split_sectors;
 
+	*nsegs = 1;
+
 	/* Zero-sector (unknown) and one-sector granularities are the same.  */
 	granularity = max(q->limits.discard_granularity >> 9, 1U);
 
@@ -51,8 +54,11 @@ static struct bio *blk_bio_discard_split(struct request_queue *q,
 
 static struct bio *blk_bio_write_same_split(struct request_queue *q,
 					    struct bio *bio,
-					    struct bio_set *bs)
+					    struct bio_set *bs,
+					    unsigned *nsegs)
 {
+	*nsegs = 1;
+
 	if (!q->limits.max_write_same_sectors)
 		return NULL;
 
@@ -64,7 +70,8 @@ static struct bio *blk_bio_write_same_split(struct request_queue *q,
 
 static struct bio *blk_bio_segment_split(struct request_queue *q,
 					 struct bio *bio,
-					 struct bio_set *bs)
+					 struct bio_set *bs,
+					 unsigned *segs)
 {
 	struct bio_vec bv, bvprv, *bvprvp = NULL;
 	struct bvec_iter iter;
@@ -106,24 +113,35 @@ new_segment:
 		sectors += bv.bv_len >> 9;
 	}
 
+	*segs = nsegs;
 	return NULL;
 split:
+	*segs = nsegs;
 	return bio_split(bio, sectors, GFP_NOIO, bs);
 }
 
 void blk_queue_split(struct request_queue *q, struct bio **bio,
 		     struct bio_set *bs)
 {
-	struct bio *split;
+	struct bio *split, *res;
+	unsigned nsegs;
 
 	if ((*bio)->bi_rw & REQ_DISCARD)
-		split = blk_bio_discard_split(q, *bio, bs);
+		split = blk_bio_discard_split(q, *bio, bs, &nsegs);
 	else if ((*bio)->bi_rw & REQ_WRITE_SAME)
-		split = blk_bio_write_same_split(q, *bio, bs);
+		split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
 	else
-		split = blk_bio_segment_split(q, *bio, q->bio_split);
+		split = blk_bio_segment_split(q, *bio, q->bio_split, &nsegs);
+
+	/* physical segments can be figured out during splitting */
+	res = split ? split : *bio;
+	res->bi_phys_segments = nsegs;
+	bio_set_flag(res, BIO_SEG_VALID);
 
 	if (split) {
+		/* there isn't chance to merge the splitted bio */
+		split->bi_rw |= REQ_NOMERGE;
+
 		bio_chain(split, *bio);
 		generic_make_request(*bio);
 		*bio = split;
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index ec2d11915142..60ac684c8b8c 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -75,6 +75,10 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 	struct blk_mq_bitmap_tags *bt;
 	int i, wake_index;
 
+	/*
+	 * Make sure all changes prior to this are visible from other CPUs.
+	 */
+	smp_mb();
 	bt = &tags->bitmap_tags;
 	wake_index = atomic_read(&bt->wake_index);
 	for (i = 0; i < BT_WAIT_QUEUES; i++) {
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 85f014327342..309e41087e6b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -9,6 +9,7 @@
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/kmemleak.h>
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -989,18 +990,25 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
 }
 EXPORT_SYMBOL(blk_mq_delay_queue);
 
-static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
-				    struct request *rq, bool at_head)
+static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
+					    struct blk_mq_ctx *ctx,
+					    struct request *rq,
+					    bool at_head)
 {
-	struct blk_mq_ctx *ctx = rq->mq_ctx;
-
 	trace_block_rq_insert(hctx->queue, rq);
 
 	if (at_head)
 		list_add(&rq->queuelist, &ctx->rq_list);
 	else
 		list_add_tail(&rq->queuelist, &ctx->rq_list);
+}
 
+static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
+				    struct request *rq, bool at_head)
+{
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+
+	__blk_mq_insert_req_list(hctx, ctx, rq, at_head);
 	blk_mq_hctx_mark_pending(hctx, ctx);
 }
 
@@ -1056,8 +1064,9 @@ static void blk_mq_insert_requests(struct request_queue *q,
 		rq = list_first_entry(list, struct request, queuelist);
 		list_del_init(&rq->queuelist);
 		rq->mq_ctx = ctx;
-		__blk_mq_insert_request(hctx, rq, false);
+		__blk_mq_insert_req_list(hctx, ctx, rq, false);
 	}
+	blk_mq_hctx_mark_pending(hctx, ctx);
 	spin_unlock(&ctx->lock);
 
 	blk_mq_run_hw_queue(hctx, from_schedule);
@@ -1139,7 +1148,7 @@ static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx,
 					 struct blk_mq_ctx *ctx,
 					 struct request *rq, struct bio *bio)
 {
-	if (!hctx_allow_merges(hctx)) {
+	if (!hctx_allow_merges(hctx) || !bio_mergeable(bio)) {
 		blk_mq_bio_to_request(rq, bio);
 		spin_lock(&ctx->lock);
 insert_rq:
@@ -1267,9 +1276,12 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 	blk_queue_split(q, &bio, q->bio_split);
 
-	if (!is_flush_fua && !blk_queue_nomerges(q) &&
-	    blk_attempt_plug_merge(q, bio, &request_count, &same_queue_rq))
-		return;
+	if (!is_flush_fua && !blk_queue_nomerges(q)) {
+		if (blk_attempt_plug_merge(q, bio, &request_count,
+					   &same_queue_rq))
+			return;
+	} else
+		request_count = blk_plug_queued_count(q);
 
 	rq = blk_mq_map_request(q, bio, &data);
 	if (unlikely(!rq))
@@ -1376,7 +1388,7 @@ static void blk_sq_make_request(struct request_queue *q, struct bio *bio)
 	plug = current->plug;
 	if (plug) {
 		blk_mq_bio_to_request(rq, bio);
-		if (list_empty(&plug->mq_list))
+		if (!request_count)
 			trace_block_plug(q);
 		else if (request_count >= BLK_MAX_REQUEST_COUNT) {
 			blk_flush_plug_list(plug, false);
@@ -1430,6 +1442,11 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
 	while (!list_empty(&tags->page_list)) {
 		page = list_first_entry(&tags->page_list, struct page, lru);
 		list_del_init(&page->lru);
+		/*
+		 * Remove kmemleak object previously allocated in
+		 * blk_mq_init_rq_map().
+		 */
+		kmemleak_free(page_address(page));
 		__free_pages(page, page->private);
 	}
 
@@ -1502,6 +1519,11 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
 		list_add_tail(&page->lru, &tags->page_list);
 
 		p = page_address(page);
+		/*
+		 * Allow kmemleak to scan these pages as they contain pointers
+		 * to additional allocations like via ops->init_request().
+		 */
+		kmemleak_alloc(p, order_to_size(this_order), 1, GFP_KERNEL);
 		entries_per_page = order_to_size(this_order) / rq_size;
 		to_do = min(entries_per_page, set->queue_depth - i);
 		left -= to_do * rq_size;
@@ -1673,7 +1695,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	INIT_LIST_HEAD(&hctx->dispatch);
 	hctx->queue = q;
 	hctx->queue_num = hctx_idx;
-	hctx->flags = set->flags;
+	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
 
 	blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
 					blk_mq_hctx_notify, hctx);
@@ -1860,27 +1882,26 @@ static void blk_mq_map_swqueue(struct request_queue *q,
 	}
 }
 
-static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set)
+static void queue_set_hctx_shared(struct request_queue *q, bool shared)
 {
 	struct blk_mq_hw_ctx *hctx;
-	struct request_queue *q;
-	bool shared;
 	int i;
 
-	if (set->tag_list.next == set->tag_list.prev)
-		shared = false;
-	else
-		shared = true;
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (shared)
+			hctx->flags |= BLK_MQ_F_TAG_SHARED;
+		else
+			hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
+	}
+}
+
+static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared)
+{
+	struct request_queue *q;
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_freeze_queue(q);
-
-		queue_for_each_hw_ctx(q, hctx, i) {
-			if (shared)
-				hctx->flags |= BLK_MQ_F_TAG_SHARED;
-			else
-				hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
-		}
+		queue_set_hctx_shared(q, shared);
 		blk_mq_unfreeze_queue(q);
 	}
 }
@@ -1891,7 +1912,12 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
 
 	mutex_lock(&set->tag_list_lock);
 	list_del_init(&q->tag_set_list);
-	blk_mq_update_tag_set_depth(set);
+	if (list_is_singular(&set->tag_list)) {
+		/* just transitioned to unshared */
+		set->flags &= ~BLK_MQ_F_TAG_SHARED;
+		/* update existing queue */
+		blk_mq_update_tag_set_depth(set, false);
+	}
 	mutex_unlock(&set->tag_list_lock);
 }
 
@@ -1901,8 +1927,17 @@ static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
 	q->tag_set = set;
 
 	mutex_lock(&set->tag_list_lock);
+
+	/* Check to see if we're transitioning to shared (from 1 to 2 queues). */
+	if (!list_empty(&set->tag_list) && !(set->flags & BLK_MQ_F_TAG_SHARED)) {
+		set->flags |= BLK_MQ_F_TAG_SHARED;
+		/* update existing queue */
+		blk_mq_update_tag_set_depth(set, true);
+	}
+	if (set->flags & BLK_MQ_F_TAG_SHARED)
+		queue_set_hctx_shared(q, true);
 	list_add_tail(&q->tag_set_list, &set->tag_list);
-	blk_mq_update_tag_set_depth(set);
+
 	mutex_unlock(&set->tag_list_lock);
 }
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index f4fea7964910..b44dce165761 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -29,8 +29,6 @@ void __blk_mq_complete_request(struct request *rq);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_freeze_queue(struct request_queue *q);
 void blk_mq_free_queue(struct request_queue *q);
-void blk_mq_clone_flush_request(struct request *flush_rq,
-		struct request *orig_rq);
 int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr);
 void blk_mq_wake_waiters(struct request_queue *q);
 
diff --git a/block/blk.h b/block/blk.h
index 98614ad37c81..aa27d0292af1 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -86,6 +86,7 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req,
 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
 			    unsigned int *request_count,
 			    struct request **same_queue_rq);
+unsigned int blk_plug_queued_count(struct request_queue *q);
 
 void blk_account_io_start(struct request *req, bool new_io);
 void blk_account_io_completion(struct request *req, unsigned int bytes);
diff --git a/block/elevator.c b/block/elevator.c
index 84d63943f2de..c3555c9c672f 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -420,7 +420,7 @@ int elv_merge(struct request_queue *q, struct request **req, struct bio *bio)
 	 * 	noxmerges: Only simple one-hit cache try
 	 * 	merges:	   All merge tries attempted
 	 */
-	if (blk_queue_nomerges(q))
+	if (blk_queue_nomerges(q) || !bio_mergeable(bio))
 		return ELEVATOR_NO_MERGE;
 
 	/*
author	Linus Torvalds <torvalds@linux-foundation.org>	2015-11-04 20:28:10 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-11-04 20:28:10 -0800
commit	d9734e0d1ccf87e828ad172c58a96dff97cfc0ba (patch)
tree	fcce824c45f17dbcc954bc55b8b642a967b962e0 /block
parent	Merge tag 'pm+acpi-4.4-rc1-1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm (diff)
parent	blk-mq: avoid excessive boot delays with large lun counts (diff)
download	linux-dev-d9734e0d1ccf87e828ad172c58a96dff97cfc0ba.tar.xz linux-dev-d9734e0d1ccf87e828ad172c58a96dff97cfc0ba.zip