From 9d4df77fab7347a74a9938521ffad8d8fab2671d Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 5 Dec 2014 16:32:13 +0100
Subject: s390/scm_block: use mempool to manage aidaw requests

We currently use one preallocated page per HW request to store
aidaws. With this patch we use mempool to allocate an aidaw page
whenever we need it.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c | 45 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 7 deletions(-)

(limited to 'drivers/s390/block/scm_blk.c')

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 56046ab39629..5b2abadea094 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -10,6 +10,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/spinlock.h>
+#include <linux/mempool.h>
 #include <linux/module.h>
 #include <linux/blkdev.h>
 #include <linux/genhd.h>
@@ -20,6 +21,7 @@
 
 debug_info_t *scm_debug;
 static int scm_major;
+static mempool_t *aidaw_pool;
 static DEFINE_SPINLOCK(list_lock);
 static LIST_HEAD(inactive_requests);
 static unsigned int nr_requests = 64;
@@ -36,7 +38,6 @@ static void __scm_free_rq(struct scm_request *scmrq)
 	struct aob_rq_header *aobrq = to_aobrq(scmrq);
 
 	free_page((unsigned long) scmrq->aob);
-	free_page((unsigned long) scmrq->aidaw);
 	__scm_free_rq_cluster(scmrq);
 	kfree(aobrq);
 }
@@ -53,6 +54,8 @@ static void scm_free_rqs(void)
 		__scm_free_rq(scmrq);
 	}
 	spin_unlock_irq(&list_lock);
+
+	mempool_destroy(aidaw_pool);
 }
 
 static int __scm_alloc_rq(void)
@@ -65,9 +68,8 @@ static int __scm_alloc_rq(void)
 		return -ENOMEM;
 
 	scmrq = (void *) aobrq->data;
-	scmrq->aidaw = (void *) get_zeroed_page(GFP_DMA);
 	scmrq->aob = (void *) get_zeroed_page(GFP_DMA);
-	if (!scmrq->aob || !scmrq->aidaw) {
+	if (!scmrq->aob) {
 		__scm_free_rq(scmrq);
 		return -ENOMEM;
 	}
@@ -89,6 +91,10 @@ static int scm_alloc_rqs(unsigned int nrqs)
 {
 	int ret = 0;
 
+	aidaw_pool = mempool_create_page_pool(max(nrqs/8, 1U), 0);
+	if (!aidaw_pool)
+		return -ENOMEM;
+
 	while (nrqs-- && !ret)
 		ret = __scm_alloc_rq();
 
@@ -111,8 +117,13 @@ out:
 
 static void scm_request_done(struct scm_request *scmrq)
 {
+	struct msb *msb = &scmrq->aob->msb[0];
+	u64 aidaw = msb->data_addr;
 	unsigned long flags;
 
+	if ((msb->flags & MSB_FLAG_IDA) && aidaw)
+		mempool_free(virt_to_page(aidaw), aidaw_pool);
+
 	spin_lock_irqsave(&list_lock, flags);
 	list_add(&scmrq->list, &inactive_requests);
 	spin_unlock_irqrestore(&list_lock, flags);
@@ -123,15 +134,26 @@ static bool scm_permit_request(struct scm_blk_dev *bdev, struct request *req)
 	return rq_data_dir(req) != WRITE || bdev->state != SCM_WR_PROHIBIT;
 }
 
-static void scm_request_prepare(struct scm_request *scmrq)
+struct aidaw *scm_aidaw_alloc(void)
+{
+	struct page *page = mempool_alloc(aidaw_pool, GFP_ATOMIC);
+
+	return page ? page_address(page) : NULL;
+}
+
+static int scm_request_prepare(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_device *scmdev = bdev->gendisk->private_data;
-	struct aidaw *aidaw = scmrq->aidaw;
+	struct aidaw *aidaw = scm_aidaw_alloc();
 	struct msb *msb = &scmrq->aob->msb[0];
 	struct req_iterator iter;
 	struct bio_vec bv;
 
+	if (!aidaw)
+		return -ENOMEM;
+
+	memset(aidaw, 0, PAGE_SIZE);
 	msb->bs = MSB_BS_4K;
 	scmrq->aob->request.msb_count = 1;
 	msb->scm_addr = scmdev->address +
@@ -147,6 +169,8 @@ static void scm_request_prepare(struct scm_request *scmrq)
 		aidaw->data_addr = (u64) page_address(bv.bv_page);
 		aidaw++;
 	}
+
+	return 0;
 }
 
 static inline void scm_request_init(struct scm_blk_dev *bdev,
@@ -157,7 +181,6 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	struct aob *aob = scmrq->aob;
 
 	memset(aob, 0, sizeof(*aob));
-	memset(scmrq->aidaw, 0, PAGE_SIZE);
 	aobrq->scmdev = bdev->scmdev;
 	aob->request.cmd_code = ARQB_CMD_MOVE;
 	aob->request.data = (u64) aobrq;
@@ -236,7 +259,15 @@ static void scm_blk_request(struct request_queue *rq)
 			scm_initiate_cluster_request(scmrq);
 			return;
 		}
-		scm_request_prepare(scmrq);
+
+		if (scm_request_prepare(scmrq)) {
+			SCM_LOG(5, "no aidaw");
+			scm_release_cluster(scmrq);
+			scm_request_done(scmrq);
+			scm_ensure_queue_restart(bdev);
+			return;
+		}
+
 		atomic_inc(&bdev->queued_reqs);
 		blk_start_request(req);
 
-- 
cgit v1.2.3-59-g8ed1b


From de88d0d28fe932637eb5b7ebf9e638256cf07979 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 5 Dec 2014 16:41:47 +0100
Subject: s390/scm_block: allocate aidaw pages only when necessary

AOBs (the structure describing the HW request) need to be 4K
aligned but very little of that page is actually used. With
this patch we place aidaws at the end of the AOB page and only
allocate a separate page for aidaws when we have to (lists of
aidaws must not cross page boundaries).

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c         | 32 ++++++++++++++++++++++++++++----
 drivers/s390/block/scm_blk.h         |  3 ++-
 drivers/s390/block/scm_blk_cluster.c | 13 ++++++-------
 3 files changed, 36 insertions(+), 12 deletions(-)

(limited to 'drivers/s390/block/scm_blk.c')

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index 5b2abadea094..f5c369ce7e73 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -121,7 +121,8 @@ static void scm_request_done(struct scm_request *scmrq)
 	u64 aidaw = msb->data_addr;
 	unsigned long flags;
 
-	if ((msb->flags & MSB_FLAG_IDA) && aidaw)
+	if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
+	    IS_ALIGNED(aidaw, PAGE_SIZE))
 		mempool_free(virt_to_page(aidaw), aidaw_pool);
 
 	spin_lock_irqsave(&list_lock, flags);
@@ -134,26 +135,47 @@ static bool scm_permit_request(struct scm_blk_dev *bdev, struct request *req)
 	return rq_data_dir(req) != WRITE || bdev->state != SCM_WR_PROHIBIT;
 }
 
-struct aidaw *scm_aidaw_alloc(void)
+static inline struct aidaw *scm_aidaw_alloc(void)
 {
 	struct page *page = mempool_alloc(aidaw_pool, GFP_ATOMIC);
 
 	return page ? page_address(page) : NULL;
 }
 
+static inline unsigned long scm_aidaw_bytes(struct aidaw *aidaw)
+{
+	unsigned long _aidaw = (unsigned long) aidaw;
+	unsigned long bytes = ALIGN(_aidaw, PAGE_SIZE) - _aidaw;
+
+	return (bytes / sizeof(*aidaw)) * PAGE_SIZE;
+}
+
+struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes)
+{
+	struct aidaw *aidaw;
+
+	if (scm_aidaw_bytes(scmrq->next_aidaw) >= bytes)
+		return scmrq->next_aidaw;
+
+	aidaw = scm_aidaw_alloc();
+	if (aidaw)
+		memset(aidaw, 0, PAGE_SIZE);
+	return aidaw;
+}
+
 static int scm_request_prepare(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_device *scmdev = bdev->gendisk->private_data;
-	struct aidaw *aidaw = scm_aidaw_alloc();
 	struct msb *msb = &scmrq->aob->msb[0];
 	struct req_iterator iter;
+	struct aidaw *aidaw;
 	struct bio_vec bv;
 
+	aidaw = scm_aidaw_fetch(scmrq, blk_rq_bytes(scmrq->request));
 	if (!aidaw)
 		return -ENOMEM;
 
-	memset(aidaw, 0, PAGE_SIZE);
 	msb->bs = MSB_BS_4K;
 	scmrq->aob->request.msb_count = 1;
 	msb->scm_addr = scmdev->address +
@@ -188,6 +210,8 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	scmrq->bdev = bdev;
 	scmrq->retries = 4;
 	scmrq->error = 0;
+	/* We don't use all msbs - place aidaws at the end of the aob page. */
+	scmrq->next_aidaw = (void *) &aob->msb[1];
 	scm_request_cluster_init(scmrq);
 }
 
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index a315ef0e96f5..6334e1609208 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -30,6 +30,7 @@ struct scm_blk_dev {
 
 struct scm_request {
 	struct scm_blk_dev *bdev;
+	struct aidaw *next_aidaw;
 	struct request *request;
 	struct aob *aob;
 	struct list_head list;
@@ -54,7 +55,7 @@ void scm_blk_irq(struct scm_device *, void *, int);
 void scm_request_finish(struct scm_request *);
 void scm_request_requeue(struct scm_request *);
 
-struct aidaw *scm_aidaw_alloc(void);
+struct aidaw *scm_aidaw_fetch(struct scm_request *scmrq, unsigned int bytes);
 
 int scm_drv_init(void);
 void scm_drv_cleanup(void);
diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c
index 4787f80e5537..2fd01320b978 100644
--- a/drivers/s390/block/scm_blk_cluster.c
+++ b/drivers/s390/block/scm_blk_cluster.c
@@ -131,16 +131,9 @@ static int scm_prepare_cluster_request(struct scm_request *scmrq)
 		scmrq->cluster.state = CLUSTER_READ;
 		/* fall through */
 	case CLUSTER_READ:
-		aidaw = scm_aidaw_alloc();
-		if (!aidaw)
-			return -ENOMEM;
-
-		memset(aidaw, 0, PAGE_SIZE);
-		scmrq->aob->request.msb_count = 1;
 		msb->bs = MSB_BS_4K;
 		msb->oc = MSB_OC_READ;
 		msb->flags = MSB_FLAG_IDA;
-		msb->data_addr = (u64) aidaw;
 		msb->blk_count = write_cluster_size;
 
 		addr = scmdev->address + ((u64) blk_rq_pos(req) << 9);
@@ -151,6 +144,12 @@ static int scm_prepare_cluster_request(struct scm_request *scmrq)
 			       CLUSTER_SIZE))
 			msb->blk_count = 2 * write_cluster_size;
 
+		aidaw = scm_aidaw_fetch(scmrq, msb->blk_count * PAGE_SIZE);
+		if (!aidaw)
+			return -ENOMEM;
+
+		scmrq->aob->request.msb_count = 1;
+		msb->data_addr = (u64) aidaw;
 		for (i = 0; i < msb->blk_count; i++) {
 			aidaw->data_addr = (u64) scmrq->cluster.buf[i];
 			aidaw++;
-- 
cgit v1.2.3-59-g8ed1b


From bbc610a96524fbfa4ed38c4b1fc6348a1169f358 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 5 Dec 2014 16:43:58 +0100
Subject: s390/scm_block: handle multiple requests in one HW request

Handle up to 8 block layer requests per HW request. These requests
can be processed in parallel on the device leading to better
throughput (and less interrupts). The overhead for additional
requests is small since we don't blindly allocate new aidaws but
try to use what's left of the previous one.

Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c         | 145 ++++++++++++++++++++++++-----------
 drivers/s390/block/scm_blk.h         |   3 +-
 drivers/s390/block/scm_blk_cluster.c |  47 ++++++++----
 3 files changed, 132 insertions(+), 63 deletions(-)

(limited to 'drivers/s390/block/scm_blk.c')

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index f5c369ce7e73..cd27cb92ac6d 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -117,13 +117,19 @@ out:
 
 static void scm_request_done(struct scm_request *scmrq)
 {
-	struct msb *msb = &scmrq->aob->msb[0];
-	u64 aidaw = msb->data_addr;
 	unsigned long flags;
+	struct msb *msb;
+	u64 aidaw;
+	int i;
 
-	if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
-	    IS_ALIGNED(aidaw, PAGE_SIZE))
-		mempool_free(virt_to_page(aidaw), aidaw_pool);
+	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++) {
+		msb = &scmrq->aob->msb[i];
+		aidaw = msb->data_addr;
+
+		if ((msb->flags & MSB_FLAG_IDA) && aidaw &&
+		    IS_ALIGNED(aidaw, PAGE_SIZE))
+			mempool_free(virt_to_page(aidaw), aidaw_pool);
+	}
 
 	spin_lock_irqsave(&list_lock, flags);
 	list_add(&scmrq->list, &inactive_requests);
@@ -167,51 +173,57 @@ static int scm_request_prepare(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_device *scmdev = bdev->gendisk->private_data;
-	struct msb *msb = &scmrq->aob->msb[0];
+	int pos = scmrq->aob->request.msb_count;
+	struct msb *msb = &scmrq->aob->msb[pos];
+	struct request *req = scmrq->request[pos];
 	struct req_iterator iter;
 	struct aidaw *aidaw;
 	struct bio_vec bv;
 
-	aidaw = scm_aidaw_fetch(scmrq, blk_rq_bytes(scmrq->request));
+	aidaw = scm_aidaw_fetch(scmrq, blk_rq_bytes(req));
 	if (!aidaw)
 		return -ENOMEM;
 
 	msb->bs = MSB_BS_4K;
-	scmrq->aob->request.msb_count = 1;
-	msb->scm_addr = scmdev->address +
-		((u64) blk_rq_pos(scmrq->request) << 9);
-	msb->oc = (rq_data_dir(scmrq->request) == READ) ?
-		MSB_OC_READ : MSB_OC_WRITE;
+	scmrq->aob->request.msb_count++;
+	msb->scm_addr = scmdev->address + ((u64) blk_rq_pos(req) << 9);
+	msb->oc = (rq_data_dir(req) == READ) ? MSB_OC_READ : MSB_OC_WRITE;
 	msb->flags |= MSB_FLAG_IDA;
 	msb->data_addr = (u64) aidaw;
 
-	rq_for_each_segment(bv, scmrq->request, iter) {
+	rq_for_each_segment(bv, req, iter) {
 		WARN_ON(bv.bv_offset);
 		msb->blk_count += bv.bv_len >> 12;
 		aidaw->data_addr = (u64) page_address(bv.bv_page);
 		aidaw++;
 	}
 
+	scmrq->next_aidaw = aidaw;
 	return 0;
 }
 
+static inline void scm_request_set(struct scm_request *scmrq,
+				   struct request *req)
+{
+	scmrq->request[scmrq->aob->request.msb_count] = req;
+}
+
 static inline void scm_request_init(struct scm_blk_dev *bdev,
-				    struct scm_request *scmrq,
-				    struct request *req)
+				    struct scm_request *scmrq)
 {
 	struct aob_rq_header *aobrq = to_aobrq(scmrq);
 	struct aob *aob = scmrq->aob;
 
+	memset(scmrq->request, 0, sizeof(scmrq->request));
 	memset(aob, 0, sizeof(*aob));
 	aobrq->scmdev = bdev->scmdev;
 	aob->request.cmd_code = ARQB_CMD_MOVE;
 	aob->request.data = (u64) aobrq;
-	scmrq->request = req;
 	scmrq->bdev = bdev;
 	scmrq->retries = 4;
 	scmrq->error = 0;
 	/* We don't use all msbs - place aidaws at the end of the aob page. */
-	scmrq->next_aidaw = (void *) &aob->msb[1];
+	scmrq->next_aidaw = (void *) &aob->msb[SCM_RQ_PER_IO];
 	scm_request_cluster_init(scmrq);
 }
 
@@ -227,9 +239,12 @@ static void scm_ensure_queue_restart(struct scm_blk_dev *bdev)
 void scm_request_requeue(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
+	int i;
 
 	scm_release_cluster(scmrq);
-	blk_requeue_request(bdev->rq, scmrq->request);
+	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++)
+		blk_requeue_request(bdev->rq, scmrq->request[i]);
+
 	atomic_dec(&bdev->queued_reqs);
 	scm_request_done(scmrq);
 	scm_ensure_queue_restart(bdev);
@@ -238,20 +253,41 @@ void scm_request_requeue(struct scm_request *scmrq)
 void scm_request_finish(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
+	int i;
 
 	scm_release_cluster(scmrq);
-	blk_end_request_all(scmrq->request, scmrq->error);
+	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++)
+		blk_end_request_all(scmrq->request[i], scmrq->error);
+
 	atomic_dec(&bdev->queued_reqs);
 	scm_request_done(scmrq);
 }
 
+static int scm_request_start(struct scm_request *scmrq)
+{
+	struct scm_blk_dev *bdev = scmrq->bdev;
+	int ret;
+
+	atomic_inc(&bdev->queued_reqs);
+	if (!scmrq->aob->request.msb_count) {
+		scm_request_requeue(scmrq);
+		return -EINVAL;
+	}
+
+	ret = eadm_start_aob(scmrq->aob);
+	if (ret) {
+		SCM_LOG(5, "no subchannel");
+		scm_request_requeue(scmrq);
+	}
+	return ret;
+}
+
 static void scm_blk_request(struct request_queue *rq)
 {
 	struct scm_device *scmdev = rq->queuedata;
 	struct scm_blk_dev *bdev = dev_get_drvdata(&scmdev->dev);
-	struct scm_request *scmrq;
+	struct scm_request *scmrq = NULL;
 	struct request *req;
-	int ret;
 
 	while ((req = blk_peek_request(rq))) {
 		if (req->cmd_type != REQ_TYPE_FS) {
@@ -261,47 +297,64 @@ static void scm_blk_request(struct request_queue *rq)
 			continue;
 		}
 
-		if (!scm_permit_request(bdev, req)) {
-			scm_ensure_queue_restart(bdev);
-			return;
-		}
-		scmrq = scm_request_fetch();
+		if (!scm_permit_request(bdev, req))
+			goto out;
+
 		if (!scmrq) {
-			SCM_LOG(5, "no request");
-			scm_ensure_queue_restart(bdev);
-			return;
+			scmrq = scm_request_fetch();
+			if (!scmrq) {
+				SCM_LOG(5, "no request");
+				goto out;
+			}
+			scm_request_init(bdev, scmrq);
 		}
-		scm_request_init(bdev, scmrq, req);
+		scm_request_set(scmrq, req);
+
 		if (!scm_reserve_cluster(scmrq)) {
 			SCM_LOG(5, "cluster busy");
+			scm_request_set(scmrq, NULL);
+			if (scmrq->aob->request.msb_count)
+				goto out;
+
 			scm_request_done(scmrq);
 			return;
 		}
+
 		if (scm_need_cluster_request(scmrq)) {
-			atomic_inc(&bdev->queued_reqs);
-			blk_start_request(req);
-			scm_initiate_cluster_request(scmrq);
-			return;
+			if (scmrq->aob->request.msb_count) {
+				/* Start cluster requests separately. */
+				scm_request_set(scmrq, NULL);
+				if (scm_request_start(scmrq))
+					return;
+			} else {
+				atomic_inc(&bdev->queued_reqs);
+				blk_start_request(req);
+				scm_initiate_cluster_request(scmrq);
+			}
+			scmrq = NULL;
+			continue;
 		}
 
 		if (scm_request_prepare(scmrq)) {
-			SCM_LOG(5, "no aidaw");
-			scm_release_cluster(scmrq);
-			scm_request_done(scmrq);
-			scm_ensure_queue_restart(bdev);
-			return;
+			SCM_LOG(5, "aidaw alloc failed");
+			scm_request_set(scmrq, NULL);
+			goto out;
 		}
-
-		atomic_inc(&bdev->queued_reqs);
 		blk_start_request(req);
 
-		ret = eadm_start_aob(scmrq->aob);
-		if (ret) {
-			SCM_LOG(5, "no subchannel");
-			scm_request_requeue(scmrq);
+		if (scmrq->aob->request.msb_count < SCM_RQ_PER_IO)
+			continue;
+
+		if (scm_request_start(scmrq))
 			return;
-		}
+
+		scmrq = NULL;
 	}
+out:
+	if (scmrq)
+		scm_request_start(scmrq);
+	else
+		scm_ensure_queue_restart(bdev);
 }
 
 static void __scmrq_log_error(struct scm_request *scmrq)
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 6334e1609208..3dae0a3570ce 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -11,6 +11,7 @@
 #include <asm/eadm.h>
 
 #define SCM_NR_PARTS 8
+#define SCM_RQ_PER_IO 8
 #define SCM_QUEUE_DELAY 5
 
 struct scm_blk_dev {
@@ -31,7 +32,7 @@ struct scm_blk_dev {
 struct scm_request {
 	struct scm_blk_dev *bdev;
 	struct aidaw *next_aidaw;
-	struct request *request;
+	struct request *request[SCM_RQ_PER_IO];
 	struct aob *aob;
 	struct list_head list;
 	u8 retries;
diff --git a/drivers/s390/block/scm_blk_cluster.c b/drivers/s390/block/scm_blk_cluster.c
index 2fd01320b978..09db45296eed 100644
--- a/drivers/s390/block/scm_blk_cluster.c
+++ b/drivers/s390/block/scm_blk_cluster.c
@@ -57,39 +57,52 @@ void scm_request_cluster_init(struct scm_request *scmrq)
 	scmrq->cluster.state = CLUSTER_NONE;
 }
 
-static bool clusters_intersect(struct scm_request *A, struct scm_request *B)
+static bool clusters_intersect(struct request *A, struct request *B)
 {
 	unsigned long firstA, lastA, firstB, lastB;
 
-	firstA = ((u64) blk_rq_pos(A->request) << 9) / CLUSTER_SIZE;
-	lastA = (((u64) blk_rq_pos(A->request) << 9) +
-		    blk_rq_bytes(A->request) - 1) / CLUSTER_SIZE;
+	firstA = ((u64) blk_rq_pos(A) << 9) / CLUSTER_SIZE;
+	lastA = (((u64) blk_rq_pos(A) << 9) +
+		    blk_rq_bytes(A) - 1) / CLUSTER_SIZE;
 
-	firstB = ((u64) blk_rq_pos(B->request) << 9) / CLUSTER_SIZE;
-	lastB = (((u64) blk_rq_pos(B->request) << 9) +
-		    blk_rq_bytes(B->request) - 1) / CLUSTER_SIZE;
+	firstB = ((u64) blk_rq_pos(B) << 9) / CLUSTER_SIZE;
+	lastB = (((u64) blk_rq_pos(B) << 9) +
+		    blk_rq_bytes(B) - 1) / CLUSTER_SIZE;
 
 	return (firstB <= lastA && firstA <= lastB);
 }
 
 bool scm_reserve_cluster(struct scm_request *scmrq)
 {
+	struct request *req = scmrq->request[scmrq->aob->request.msb_count];
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_request *iter;
+	int pos, add = 1;
 
 	if (write_cluster_size == 0)
 		return true;
 
 	spin_lock(&bdev->lock);
 	list_for_each_entry(iter, &bdev->cluster_list, cluster.list) {
-		if (clusters_intersect(scmrq, iter) &&
-		    (rq_data_dir(scmrq->request) == WRITE ||
-		     rq_data_dir(iter->request) == WRITE)) {
-			spin_unlock(&bdev->lock);
-			return false;
+		if (iter == scmrq) {
+			/*
+			 * We don't have to use clusters_intersect here, since
+			 * cluster requests are always started separately.
+			 */
+			add = 0;
+			continue;
+		}
+		for (pos = 0; pos <= iter->aob->request.msb_count; pos++) {
+			if (clusters_intersect(req, iter->request[pos]) &&
+			    (rq_data_dir(req) == WRITE ||
+			     rq_data_dir(iter->request[pos]) == WRITE)) {
+				spin_unlock(&bdev->lock);
+				return false;
+			}
 		}
 	}
-	list_add(&scmrq->cluster.list, &bdev->cluster_list);
+	if (add)
+		list_add(&scmrq->cluster.list, &bdev->cluster_list);
 	spin_unlock(&bdev->lock);
 
 	return true;
@@ -118,7 +131,7 @@ static int scm_prepare_cluster_request(struct scm_request *scmrq)
 {
 	struct scm_blk_dev *bdev = scmrq->bdev;
 	struct scm_device *scmdev = bdev->gendisk->private_data;
-	struct request *req = scmrq->request;
+	struct request *req = scmrq->request[0];
 	struct msb *msb = &scmrq->aob->msb[0];
 	struct req_iterator iter;
 	struct aidaw *aidaw;
@@ -183,10 +196,12 @@ static int scm_prepare_cluster_request(struct scm_request *scmrq)
 
 bool scm_need_cluster_request(struct scm_request *scmrq)
 {
-	if (rq_data_dir(scmrq->request) == READ)
+	int pos = scmrq->aob->request.msb_count;
+
+	if (rq_data_dir(scmrq->request[pos]) == READ)
 		return false;
 
-	return blk_rq_bytes(scmrq->request) < CLUSTER_SIZE;
+	return blk_rq_bytes(scmrq->request[pos]) < CLUSTER_SIZE;
 }
 
 /* Called with queue lock held. */
-- 
cgit v1.2.3-59-g8ed1b


From 8622384f138b786b9ae639e79ccfb84c7db82cbc Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.vnet.ibm.com>
Date: Fri, 5 Dec 2014 16:47:17 +0100
Subject: s390/scm_block: make the number of reqs per HW req configurable

Introduce a module parameter to specify the number of requests
we try to handle with one HW request.

Suggested-by: Peter Oberparleiter <peter.oberparleiter@de.ibm.com>
Signed-off-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/block/scm_blk.c | 48 ++++++++++++++++++++++++++++++--------------
 drivers/s390/block/scm_blk.h |  3 +--
 2 files changed, 34 insertions(+), 17 deletions(-)

(limited to 'drivers/s390/block/scm_blk.c')

diff --git a/drivers/s390/block/scm_blk.c b/drivers/s390/block/scm_blk.c
index cd27cb92ac6d..75d9896deccb 100644
--- a/drivers/s390/block/scm_blk.c
+++ b/drivers/s390/block/scm_blk.c
@@ -25,10 +25,14 @@ static mempool_t *aidaw_pool;
 static DEFINE_SPINLOCK(list_lock);
 static LIST_HEAD(inactive_requests);
 static unsigned int nr_requests = 64;
+static unsigned int nr_requests_per_io = 8;
 static atomic_t nr_devices = ATOMIC_INIT(0);
 module_param(nr_requests, uint, S_IRUGO);
 MODULE_PARM_DESC(nr_requests, "Number of parallel requests.");
 
+module_param(nr_requests_per_io, uint, S_IRUGO);
+MODULE_PARM_DESC(nr_requests_per_io, "Number of requests per IO.");
+
 MODULE_DESCRIPTION("Block driver for s390 storage class memory.");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("scm:scmdev*");
@@ -39,6 +43,7 @@ static void __scm_free_rq(struct scm_request *scmrq)
 
 	free_page((unsigned long) scmrq->aob);
 	__scm_free_rq_cluster(scmrq);
+	kfree(scmrq->request);
 	kfree(aobrq);
 }
 
@@ -69,15 +74,16 @@ static int __scm_alloc_rq(void)
 
 	scmrq = (void *) aobrq->data;
 	scmrq->aob = (void *) get_zeroed_page(GFP_DMA);
-	if (!scmrq->aob) {
-		__scm_free_rq(scmrq);
-		return -ENOMEM;
-	}
+	if (!scmrq->aob)
+		goto free;
 
-	if (__scm_alloc_rq_cluster(scmrq)) {
-		__scm_free_rq(scmrq);
-		return -ENOMEM;
-	}
+	scmrq->request = kcalloc(nr_requests_per_io, sizeof(scmrq->request[0]),
+				 GFP_KERNEL);
+	if (!scmrq->request)
+		goto free;
+
+	if (__scm_alloc_rq_cluster(scmrq))
+		goto free;
 
 	INIT_LIST_HEAD(&scmrq->list);
 	spin_lock_irq(&list_lock);
@@ -85,6 +91,9 @@ static int __scm_alloc_rq(void)
 	spin_unlock_irq(&list_lock);
 
 	return 0;
+free:
+	__scm_free_rq(scmrq);
+	return -ENOMEM;
 }
 
 static int scm_alloc_rqs(unsigned int nrqs)
@@ -122,7 +131,7 @@ static void scm_request_done(struct scm_request *scmrq)
 	u64 aidaw;
 	int i;
 
-	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++) {
+	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++) {
 		msb = &scmrq->aob->msb[i];
 		aidaw = msb->data_addr;
 
@@ -214,7 +223,8 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	struct aob_rq_header *aobrq = to_aobrq(scmrq);
 	struct aob *aob = scmrq->aob;
 
-	memset(scmrq->request, 0, sizeof(scmrq->request));
+	memset(scmrq->request, 0,
+	       nr_requests_per_io * sizeof(scmrq->request[0]));
 	memset(aob, 0, sizeof(*aob));
 	aobrq->scmdev = bdev->scmdev;
 	aob->request.cmd_code = ARQB_CMD_MOVE;
@@ -223,7 +233,7 @@ static inline void scm_request_init(struct scm_blk_dev *bdev,
 	scmrq->retries = 4;
 	scmrq->error = 0;
 	/* We don't use all msbs - place aidaws at the end of the aob page. */
-	scmrq->next_aidaw = (void *) &aob->msb[SCM_RQ_PER_IO];
+	scmrq->next_aidaw = (void *) &aob->msb[nr_requests_per_io];
 	scm_request_cluster_init(scmrq);
 }
 
@@ -242,7 +252,7 @@ void scm_request_requeue(struct scm_request *scmrq)
 	int i;
 
 	scm_release_cluster(scmrq);
-	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++)
+	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++)
 		blk_requeue_request(bdev->rq, scmrq->request[i]);
 
 	atomic_dec(&bdev->queued_reqs);
@@ -256,7 +266,7 @@ void scm_request_finish(struct scm_request *scmrq)
 	int i;
 
 	scm_release_cluster(scmrq);
-	for (i = 0; i < SCM_RQ_PER_IO && scmrq->request[i]; i++)
+	for (i = 0; i < nr_requests_per_io && scmrq->request[i]; i++)
 		blk_end_request_all(scmrq->request[i], scmrq->error);
 
 	atomic_dec(&bdev->queued_reqs);
@@ -342,7 +352,7 @@ static void scm_blk_request(struct request_queue *rq)
 		}
 		blk_start_request(req);
 
-		if (scmrq->aob->request.msb_count < SCM_RQ_PER_IO)
+		if (scmrq->aob->request.msb_count < nr_requests_per_io)
 			continue;
 
 		if (scm_request_start(scmrq))
@@ -551,11 +561,19 @@ void scm_blk_set_available(struct scm_blk_dev *bdev)
 	spin_unlock_irqrestore(&bdev->lock, flags);
 }
 
+static bool __init scm_blk_params_valid(void)
+{
+	if (!nr_requests_per_io || nr_requests_per_io > 64)
+		return false;
+
+	return scm_cluster_size_valid();
+}
+
 static int __init scm_blk_init(void)
 {
 	int ret = -EINVAL;
 
-	if (!scm_cluster_size_valid())
+	if (!scm_blk_params_valid())
 		goto out;
 
 	ret = register_blkdev(0, "scm");
diff --git a/drivers/s390/block/scm_blk.h b/drivers/s390/block/scm_blk.h
index 3dae0a3570ce..09218cdc5129 100644
--- a/drivers/s390/block/scm_blk.h
+++ b/drivers/s390/block/scm_blk.h
@@ -11,7 +11,6 @@
 #include <asm/eadm.h>
 
 #define SCM_NR_PARTS 8
-#define SCM_RQ_PER_IO 8
 #define SCM_QUEUE_DELAY 5
 
 struct scm_blk_dev {
@@ -32,7 +31,7 @@ struct scm_blk_dev {
 struct scm_request {
 	struct scm_blk_dev *bdev;
 	struct aidaw *next_aidaw;
-	struct request *request[SCM_RQ_PER_IO];
+	struct request **request;
 	struct aob *aob;
 	struct list_head list;
 	u8 retries;
-- 
cgit v1.2.3-59-g8ed1b