NVMe: Mismatched host/device page size support

Adds support for devices with max page size smaller than the host's. In the case we encounter such a host/device combination, the driver will split a page into as many PRP entries as necessary for the device's page size capabilities. If the device's reported minimum page size is greater than the host's, the driver will not attempt to enable the device and return an error instead. Signed-off-by: Keith Busch <keith.busch@intel.com> Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com> Signed-off-by: Jens Axboe <axboe@fb.com>
author: Keith Busch <keith.busch@intel.com> 2014-06-23 11:34:01 -0600
committer: Jens Axboe <axboe@fb.com> 2014-11-04 13:17:07 -0700
commit: 1d0906246095184d1624c643c2088152d330c40a (patch)
tree: 3d7ea112d661961bd94c293edcee8bb2e6dcdce0 /drivers/block/nvme-core.c
parent: NVMe: Update list of status codes (diff)
download: linux-dev-1d0906246095184d1624c643c2088152d330c40a.tar.xz
linux-dev-1d0906246095184d1624c643c2088152d330c40a.zip
1 files changed, 40 insertions, 19 deletions
diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 42a62bbf4a11..e60bb0fec7e3 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -373,17 +373,17 @@ static __le64 **iod_list(struct nvme_iod *iod)
  * as it only leads to a small amount of wasted memory for the lifetime of
  * the I/O.
  */
-static int nvme_npages(unsigned size)
+static int nvme_npages(unsigned size, struct nvme_dev *dev)
 {
-	unsigned nprps = DIV_ROUND_UP(size + PAGE_SIZE, PAGE_SIZE);
-	return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
+	unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size);
+	return DIV_ROUND_UP(8 * nprps, dev->page_size - 8);
 }
 
 static struct nvme_iod *
-nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
+nvme_alloc_iod(unsigned nseg, unsigned nbytes, struct nvme_dev *dev, gfp_t gfp)
 {
 	struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) +
-				sizeof(__le64 *) * nvme_npages(nbytes) +
+				sizeof(__le64 *) * nvme_npages(nbytes, dev) +
 				sizeof(struct scatterlist) * nseg, gfp);
 
 	if (iod) {
@@ -400,7 +400,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
 
 void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
 {
-	const int last_prp = PAGE_SIZE / 8 - 1;
+	const int last_prp = dev->page_size / 8 - 1;
 	int i;
 	__le64 **list = iod_list(iod);
 	dma_addr_t prp_dma = iod->first_dma;
@@ -491,26 +491,27 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
 	__le64 **list = iod_list(iod);
 	dma_addr_t prp_dma;
 	int nprps, i;
+	u32 page_size = dev->page_size;
 
-	length -= (PAGE_SIZE - offset);
+	length -= (page_size - offset);
 	if (length <= 0)
 		return total_len;
 
-	dma_len -= (PAGE_SIZE - offset);
+	dma_len -= (page_size - offset);
 	if (dma_len) {
-		dma_addr += (PAGE_SIZE - offset);
+		dma_addr += (page_size - offset);
 	} else {
 		sg = sg_next(sg);
 		dma_addr = sg_dma_address(sg);
 		dma_len = sg_dma_len(sg);
 	}
 
-	if (length <= PAGE_SIZE) {
+	if (length <= page_size) {
 		iod->first_dma = dma_addr;
 		return total_len;
 	}
 
-	nprps = DIV_ROUND_UP(length, PAGE_SIZE);
+	nprps = DIV_ROUND_UP(length, page_size);
 	if (nprps <= (256 / 8)) {
 		pool = dev->prp_small_pool;
 		iod->npages = 0;
@@ -523,13 +524,13 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
 	if (!prp_list) {
 		iod->first_dma = dma_addr;
 		iod->npages = -1;
-		return (total_len - length) + PAGE_SIZE;
+		return (total_len - length) + page_size;
 	}
 	list[0] = prp_list;
 	iod->first_dma = prp_dma;
 	i = 0;
 	for (;;) {
-		if (i == PAGE_SIZE / 8) {
+		if (i == page_size >> 3) {
 			__le64 *old_prp_list = prp_list;
 			prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
 			if (!prp_list)
@@ -540,9 +541,9 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
 			i = 1;
 		}
 		prp_list[i++] = cpu_to_le64(dma_addr);
-		dma_len -= PAGE_SIZE;
-		dma_addr += PAGE_SIZE;
-		length -= PAGE_SIZE;
+		dma_len -= page_size;
+		dma_addr += page_size;
+		length -= page_size;
 		if (length <= 0)
 			break;
 		if (dma_len > 0)
@@ -749,7 +750,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 	if ((bio->bi_rw & REQ_FLUSH) && psegs)
 		return nvme_split_flush_data(nvmeq, bio);
 
-	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
+	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, ns->dev, GFP_ATOMIC);
 	if (!iod)
 		return -ENOMEM;
 
@@ -1463,6 +1464,24 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	u32 aqa;
 	u64 cap = readq(&dev->bar->cap);
 	struct nvme_queue *nvmeq;
+	unsigned page_shift = PAGE_SHIFT;
+	unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
+	unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
+
+	if (page_shift < dev_page_min) {
+		dev_err(&dev->pci_dev->dev,
+				"Minimum device page size (%u) too large for "
+				"host (%u)\n", 1 << dev_page_min,
+				1 << page_shift);
+		return -ENODEV;
+	}
+	if (page_shift > dev_page_max) {
+		dev_info(&dev->pci_dev->dev,
+				"Device maximum page size (%u) smaller than "
+				"host (%u); enabling work-around\n",
+				1 << dev_page_max, 1 << page_shift);
+		page_shift = dev_page_max;
+	}
 
 	result = nvme_disable_ctrl(dev, cap);
 	if (result < 0)
@@ -1478,8 +1497,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	aqa = nvmeq->q_depth - 1;
 	aqa |= aqa << 16;
 
+	dev->page_size = 1 << page_shift;
+
 	dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
-	dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
+	dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
 	dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
 	dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 
@@ -1529,7 +1550,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
 	}
 
 	err = -ENOMEM;
-	iod = nvme_alloc_iod(count, length, GFP_KERNEL);
+	iod = nvme_alloc_iod(count, length, dev, GFP_KERNEL);
 	if (!iod)
 		goto put_pages;
author	Keith Busch <keith.busch@intel.com>	2014-06-23 11:34:01 -0600
committer	Jens Axboe <axboe@fb.com>	2014-11-04 13:17:07 -0700
commit	1d0906246095184d1624c643c2088152d330c40a (patch)
tree	3d7ea112d661961bd94c293edcee8bb2e6dcdce0 /drivers/block/nvme-core.c
parent	NVMe: Update list of status codes (diff)
download	linux-dev-1d0906246095184d1624c643c2088152d330c40a.tar.xz linux-dev-1d0906246095184d1624c643c2088152d330c40a.zip