aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvdimm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvdimm')
-rw-r--r--drivers/nvdimm/Kconfig2
-rw-r--r--drivers/nvdimm/Makefile1
-rw-r--r--drivers/nvdimm/claim.c6
-rw-r--r--drivers/nvdimm/dax_devs.c2
-rw-r--r--drivers/nvdimm/namespace_devs.c8
-rw-r--r--drivers/nvdimm/nd.h1
-rw-r--r--drivers/nvdimm/nd_virtio.c125
-rw-r--r--drivers/nvdimm/pfn.h15
-rw-r--r--drivers/nvdimm/pfn_devs.c95
-rw-r--r--drivers/nvdimm/pmem.c18
-rw-r--r--drivers/nvdimm/region_devs.c33
-rw-r--r--drivers/nvdimm/virtio_pmem.c122
-rw-r--r--drivers/nvdimm/virtio_pmem.h55
13 files changed, 386 insertions, 97 deletions
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 54500798f23a..a5fde15e91d3 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -33,7 +33,7 @@ config BLK_DEV_PMEM
Documentation/admin-guide/kernel-parameters.rst). This driver converts
these persistent memory ranges into block devices that are
capable of DAX (direct-access) file system mappings. See
- Documentation/nvdimm/nvdimm.txt for more details.
+ Documentation/driver-api/nvdimm/nvdimm.rst for more details.
Say Y if you want to use an NVDIMM
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 6f2a088afad6..cefe233e0b52 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_ND_BTT) += nd_btt.o
obj-$(CONFIG_ND_BLK) += nd_blk.o
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
obj-$(CONFIG_OF_PMEM) += of_pmem.o
+obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o
nd_pmem-y := pmem.o
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 26c1c7618891..2985ca949912 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -255,7 +255,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);
sector_t sector = offset >> 9;
- int rc = 0;
+ int rc = 0, ret = 0;
if (unlikely(!size))
return 0;
@@ -293,7 +293,9 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
}
memcpy_flushcache(nsio->addr + offset, buf, size);
- nvdimm_flush(to_nd_region(ndns->dev.parent));
+ ret = nvdimm_flush(to_nd_region(ndns->dev.parent), NULL);
+ if (ret)
+ rc = ret;
return rc;
}
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index 49fc18ee0565..6d22b0f83b3b 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -118,7 +118,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
nvdimm_bus_unlock(&ndns->dev);
if (!dax_dev)
return -ENOMEM;
- pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
+ pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
nd_pfn->pfn_sb = pfn_sb;
rc = nd_pfn_validate(nd_pfn, DAX_SIG);
dev_dbg(dev, "dax: %s\n", rc == 0 ? dev_name(dax_dev) : "<none>");
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index a434a5964cb9..2d8d7e554877 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1822,8 +1822,8 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
&& !guid_equal(&nd_set->type_guid,
&nd_label->type_guid)) {
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
- nd_set->type_guid.b,
- nd_label->type_guid.b);
+ &nd_set->type_guid,
+ &nd_label->type_guid);
continue;
}
@@ -2227,8 +2227,8 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
if (namespace_label_has(ndd, type_guid)) {
if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) {
dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
- nd_set->type_guid.b,
- nd_label->type_guid.b);
+ &nd_set->type_guid,
+ &nd_label->type_guid);
return ERR_PTR(-EAGAIN);
}
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index d24304c0e6d7..1b9955651379 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -155,6 +155,7 @@ struct nd_region {
struct badblocks bb;
struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane;
+ int (*flush)(struct nd_region *nd_region, struct bio *bio);
struct nd_mapping mapping[0];
};
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
new file mode 100644
index 000000000000..10351d5b49fa
--- /dev/null
+++ b/drivers/nvdimm/nd_virtio.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * virtio_pmem.c: Virtio pmem Driver
+ *
+ * Discovers persistent memory range information
+ * from host and provides a virtio based flushing
+ * interface.
+ */
+#include "virtio_pmem.h"
+#include "nd.h"
+
+ /* The interrupt handler */
+void virtio_pmem_host_ack(struct virtqueue *vq)
+{
+ struct virtio_pmem *vpmem = vq->vdev->priv;
+ struct virtio_pmem_request *req_data, *req_buf;
+ unsigned long flags;
+ unsigned int len;
+
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ while ((req_data = virtqueue_get_buf(vq, &len)) != NULL) {
+ req_data->done = true;
+ wake_up(&req_data->host_acked);
+
+ if (!list_empty(&vpmem->req_list)) {
+ req_buf = list_first_entry(&vpmem->req_list,
+ struct virtio_pmem_request, list);
+ req_buf->wq_buf_avail = true;
+ wake_up(&req_buf->wq_buf);
+ list_del(&req_buf->list);
+ }
+ }
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+}
+EXPORT_SYMBOL_GPL(virtio_pmem_host_ack);
+
+ /* The request submission function */
+static int virtio_pmem_flush(struct nd_region *nd_region)
+{
+ struct virtio_device *vdev = nd_region->provider_data;
+ struct virtio_pmem *vpmem = vdev->priv;
+ struct virtio_pmem_request *req_data;
+ struct scatterlist *sgs[2], sg, ret;
+ unsigned long flags;
+ int err, err1;
+
+ might_sleep();
+ req_data = kmalloc(sizeof(*req_data), GFP_KERNEL);
+ if (!req_data)
+ return -ENOMEM;
+
+ req_data->done = false;
+ init_waitqueue_head(&req_data->host_acked);
+ init_waitqueue_head(&req_data->wq_buf);
+ INIT_LIST_HEAD(&req_data->list);
+ req_data->req.type = cpu_to_le32(VIRTIO_PMEM_REQ_TYPE_FLUSH);
+ sg_init_one(&sg, &req_data->req, sizeof(req_data->req));
+ sgs[0] = &sg;
+ sg_init_one(&ret, &req_data->resp.ret, sizeof(req_data->resp));
+ sgs[1] = &ret;
+
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ /*
+ * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual
+ * queue does not have free descriptor. We add the request
+ * to req_list and wait for host_ack to wake us up when free
+ * slots are available.
+ */
+ while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req_data,
+ GFP_ATOMIC)) == -ENOSPC) {
+
+ dev_info(&vdev->dev, "failed to send command to virtio pmem device, no free slots in the virtqueue\n");
+ req_data->wq_buf_avail = false;
+ list_add_tail(&req_data->list, &vpmem->req_list);
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+
+ /* A host response results in "host_ack" getting called */
+ wait_event(req_data->wq_buf, req_data->wq_buf_avail);
+ spin_lock_irqsave(&vpmem->pmem_lock, flags);
+ }
+ err1 = virtqueue_kick(vpmem->req_vq);
+ spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
+ /*
+ * virtqueue_add_sgs failed with error different than -ENOSPC, we can't
+ * do anything about that.
+ */
+ if (err || !err1) {
+ dev_info(&vdev->dev, "failed to send command to virtio pmem device\n");
+ err = -EIO;
+ } else {
+ /* A host repsonse results in "host_ack" getting called */
+ wait_event(req_data->host_acked, req_data->done);
+ err = le32_to_cpu(req_data->resp.ret);
+ }
+
+ kfree(req_data);
+ return err;
+};
+
+/* The asynchronous flush callback function */
+int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
+{
+ /*
+ * Create child bio for asynchronous flush and chain with
+ * parent bio. Otherwise directly call nd_region flush.
+ */
+ if (bio && bio->bi_iter.bi_sector != -1) {
+ struct bio *child = bio_alloc(GFP_ATOMIC, 0);
+
+ if (!child)
+ return -ENOMEM;
+ bio_copy_dev(child, bio);
+ child->bi_opf = REQ_PREFLUSH;
+ child->bi_iter.bi_sector = -1;
+ bio_chain(child, bio);
+ submit_bio(child);
+ return 0;
+ }
+ if (virtio_pmem_flush(nd_region))
+ return -EIO;
+
+ return 0;
+};
+EXPORT_SYMBOL_GPL(async_pmem_flush);
+MODULE_LICENSE("GPL");
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index f58b849e455b..7381673b7b70 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -28,22 +28,9 @@ struct nd_pfn_sb {
__le32 end_trunc;
/* minor-version-2 record the base alignment of the mapping */
__le32 align;
+ /* minor-version-3 guarantee the padding and flags are zero */
u8 padding[4000];
__le64 checksum;
};
-#ifdef CONFIG_SPARSEMEM
-#define PFN_SECTION_ALIGN_DOWN(x) SECTION_ALIGN_DOWN(x)
-#define PFN_SECTION_ALIGN_UP(x) SECTION_ALIGN_UP(x)
-#else
-/*
- * In this case ZONE_DEVICE=n and we will disable 'pfn' device support,
- * but we still want pmem to compile.
- */
-#define PFN_SECTION_ALIGN_DOWN(x) (x)
-#define PFN_SECTION_ALIGN_UP(x) (x)
-#endif
-
-#define PHYS_SECTION_ALIGN_DOWN(x) PFN_PHYS(PFN_SECTION_ALIGN_DOWN(PHYS_PFN(x)))
-#define PHYS_SECTION_ALIGN_UP(x) PFN_PHYS(PFN_SECTION_ALIGN_UP(PHYS_PFN(x)))
#endif /* __NVDIMM_PFN_H */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 55fb6b7433ed..df2bdbd22450 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -412,6 +412,15 @@ static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn)
return 0;
}
+/**
+ * nd_pfn_validate - read and validate info-block
+ * @nd_pfn: fsdax namespace runtime state / properties
+ * @sig: 'devdax' or 'fsdax' signature
+ *
+ * Upon return the info-block buffer contents (->pfn_sb) are
+ * indeterminate when validation fails, and a coherent info-block
+ * otherwise.
+ */
int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
{
u64 checksum, offset;
@@ -557,7 +566,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
nvdimm_bus_unlock(&ndns->dev);
if (!pfn_dev)
return -ENOMEM;
- pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
+ pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
nd_pfn = to_nd_pfn(pfn_dev);
nd_pfn->pfn_sb = pfn_sb;
rc = nd_pfn_validate(nd_pfn, PFN_SIG);
@@ -578,14 +587,14 @@ static u32 info_block_reserve(void)
}
/*
- * We hotplug memory at section granularity, pad the reserved area from
- * the previous section base to the namespace base address.
+ * We hotplug memory at sub-section granularity, pad the reserved area
+ * from the previous section base to the namespace base address.
*/
static unsigned long init_altmap_base(resource_size_t base)
{
unsigned long base_pfn = PHYS_PFN(base);
- return PFN_SECTION_ALIGN_DOWN(base_pfn);
+ return SUBSECTION_ALIGN_DOWN(base_pfn);
}
static unsigned long init_altmap_reserve(resource_size_t base)
@@ -593,7 +602,7 @@ static unsigned long init_altmap_reserve(resource_size_t base)
unsigned long reserve = info_block_reserve() >> PAGE_SHIFT;
unsigned long base_pfn = PHYS_PFN(base);
- reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
+ reserve += base_pfn - SUBSECTION_ALIGN_DOWN(base_pfn);
return reserve;
}
@@ -623,8 +632,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
} else if (nd_pfn->mode == PFN_MODE_PMEM) {
- nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res)
- - offset) / PAGE_SIZE);
+ nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset));
if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
dev_info(&nd_pfn->dev,
"number of pfns truncated from %lld to %ld\n",
@@ -640,60 +648,20 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
return 0;
}
-static u64 phys_pmem_align_down(struct nd_pfn *nd_pfn, u64 phys)
-{
- return min_t(u64, PHYS_SECTION_ALIGN_DOWN(phys),
- ALIGN_DOWN(phys, nd_pfn->align));
-}
-
-/*
- * Check if pmem collides with 'System RAM', or other regions when
- * section aligned. Trim it accordingly.
- */
-static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trunc)
-{
- struct nd_namespace_common *ndns = nd_pfn->ndns;
- struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
- struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent);
- const resource_size_t start = nsio->res.start;
- const resource_size_t end = start + resource_size(&nsio->res);
- resource_size_t adjust, size;
-
- *start_pad = 0;
- *end_trunc = 0;
-
- adjust = start - PHYS_SECTION_ALIGN_DOWN(start);
- size = resource_size(&nsio->res) + adjust;
- if (region_intersects(start - adjust, size, IORESOURCE_SYSTEM_RAM,
- IORES_DESC_NONE) == REGION_MIXED
- || nd_region_conflict(nd_region, start - adjust, size))
- *start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
-
- /* Now check that end of the range does not collide. */
- adjust = PHYS_SECTION_ALIGN_UP(end) - end;
- size = resource_size(&nsio->res) + adjust;
- if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
- IORES_DESC_NONE) == REGION_MIXED
- || !IS_ALIGNED(end, nd_pfn->align)
- || nd_region_conflict(nd_region, start, size))
- *end_trunc = end - phys_pmem_align_down(nd_pfn, end);
-}
-
static int nd_pfn_init(struct nd_pfn *nd_pfn)
{
struct nd_namespace_common *ndns = nd_pfn->ndns;
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
- u32 start_pad, end_trunc, reserve = info_block_reserve();
resource_size_t start, size;
struct nd_region *nd_region;
+ unsigned long npfns, align;
struct nd_pfn_sb *pfn_sb;
- unsigned long npfns;
phys_addr_t offset;
const char *sig;
u64 checksum;
int rc;
- pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL);
+ pfn_sb = devm_kmalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL);
if (!pfn_sb)
return -ENOMEM;
@@ -702,11 +670,14 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
sig = DAX_SIG;
else
sig = PFN_SIG;
+
rc = nd_pfn_validate(nd_pfn, sig);
if (rc != -ENODEV)
return rc;
/* no info block, do init */;
+ memset(pfn_sb, 0, sizeof(*pfn_sb));
+
nd_region = to_nd_region(nd_pfn->dev.parent);
if (nd_region->ro) {
dev_info(&nd_pfn->dev,
@@ -715,43 +686,35 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
return -ENXIO;
}
- memset(pfn_sb, 0, sizeof(*pfn_sb));
-
- trim_pfn_device(nd_pfn, &start_pad, &end_trunc);
- if (start_pad + end_trunc)
- dev_info(&nd_pfn->dev, "%s alignment collision, truncate %d bytes\n",
- dev_name(&ndns->dev), start_pad + end_trunc);
-
/*
* Note, we use 64 here for the standard size of struct page,
* debugging options may cause it to be larger in which case the
* implementation will limit the pfns advertised through
* ->direct_access() to those that are included in the memmap.
*/
- start = nsio->res.start + start_pad;
+ start = nsio->res.start;
size = resource_size(&nsio->res);
- npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - reserve)
- / PAGE_SIZE);
+ npfns = PHYS_PFN(size - SZ_8K);
+ align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT));
if (nd_pfn->mode == PFN_MODE_PMEM) {
/*
* The altmap should be padded out to the block size used
* when populating the vmemmap. This *should* be equal to
* PMD_SIZE for most architectures.
*/
- offset = ALIGN(start + reserve + 64 * npfns,
- max(nd_pfn->align, PMD_SIZE)) - start;
+ offset = ALIGN(start + SZ_8K + 64 * npfns, align) - start;
} else if (nd_pfn->mode == PFN_MODE_RAM)
- offset = ALIGN(start + reserve, nd_pfn->align) - start;
+ offset = ALIGN(start + SZ_8K, align) - start;
else
return -ENXIO;
- if (offset + start_pad + end_trunc >= size) {
+ if (offset >= size) {
dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
dev_name(&ndns->dev));
return -ENXIO;
}
- npfns = (size - offset - start_pad - end_trunc) / SZ_4K;
+ npfns = PHYS_PFN(size - offset);
pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
pfn_sb->dataoff = cpu_to_le64(offset);
pfn_sb->npfns = cpu_to_le64(npfns);
@@ -759,9 +722,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
pfn_sb->version_major = cpu_to_le16(1);
- pfn_sb->version_minor = cpu_to_le16(2);
- pfn_sb->start_pad = cpu_to_le32(start_pad);
- pfn_sb->end_trunc = cpu_to_le32(end_trunc);
+ pfn_sb->version_minor = cpu_to_le16(3);
pfn_sb->align = cpu_to_le32(nd_pfn->align);
checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
pfn_sb->checksum = cpu_to_le64(checksum);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index e7d8cc9f41e8..2bf3acd69613 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -184,6 +184,7 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
{
+ int ret = 0;
blk_status_t rc = 0;
bool do_acct;
unsigned long start;
@@ -193,7 +194,7 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
struct nd_region *nd_region = to_region(pmem);
if (bio->bi_opf & REQ_PREFLUSH)
- nvdimm_flush(nd_region);
+ ret = nvdimm_flush(nd_region, bio);
do_acct = nd_iostat_start(bio, &start);
bio_for_each_segment(bvec, bio, iter) {
@@ -208,7 +209,10 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
nd_iostat_end(bio, start);
if (bio->bi_opf & REQ_FUA)
- nvdimm_flush(nd_region);
+ ret = nvdimm_flush(nd_region, bio);
+
+ if (ret)
+ bio->bi_status = errno_to_blk_status(ret);
bio_endio(bio);
return BLK_QC_T_NONE;
@@ -362,6 +366,7 @@ static int pmem_attach_disk(struct device *dev,
struct gendisk *disk;
void *addr;
int rc;
+ unsigned long flags = 0UL;
pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
if (!pmem)
@@ -457,14 +462,15 @@ static int pmem_attach_disk(struct device *dev,
nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
disk->bb = &pmem->bb;
- dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
+ if (is_nvdimm_sync(nd_region))
+ flags = DAXDEV_F_SYNC;
+ dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
if (!dax_dev) {
put_disk(disk);
return -ENOMEM;
}
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
pmem->dax_dev = dax_dev;
-
gendev = disk_to_dev(disk);
gendev->groups = pmem_attribute_groups;
@@ -522,14 +528,14 @@ static int nd_pmem_remove(struct device *dev)
sysfs_put(pmem->bb_state);
pmem->bb_state = NULL;
}
- nvdimm_flush(to_nd_region(dev->parent));
+ nvdimm_flush(to_nd_region(dev->parent), NULL);
return 0;
}
static void nd_pmem_shutdown(struct device *dev)
{
- nvdimm_flush(to_nd_region(dev->parent));
+ nvdimm_flush(to_nd_region(dev->parent), NULL);
}
static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 4fed9ce9c2fe..56f2227f192a 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -287,7 +287,9 @@ static ssize_t deep_flush_store(struct device *dev, struct device_attribute *att
return rc;
if (!flush)
return -EINVAL;
- nvdimm_flush(nd_region);
+ rc = nvdimm_flush(nd_region, NULL);
+ if (rc)
+ return rc;
return len;
}
@@ -1077,6 +1079,11 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
dev->of_node = ndr_desc->of_node;
nd_region->ndr_size = resource_size(ndr_desc->res);
nd_region->ndr_start = ndr_desc->res->start;
+ if (ndr_desc->flush)
+ nd_region->flush = ndr_desc->flush;
+ else
+ nd_region->flush = NULL;
+
nd_device_register(dev);
return nd_region;
@@ -1117,11 +1124,24 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
}
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
+int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
+{
+ int rc = 0;
+
+ if (!nd_region->flush)
+ rc = generic_nvdimm_flush(nd_region);
+ else {
+ if (nd_region->flush(nd_region, bio))
+ rc = -EIO;
+ }
+
+ return rc;
+}
/**
* nvdimm_flush - flush any posted write queues between the cpu and pmem media
* @nd_region: blk or interleaved pmem region
*/
-void nvdimm_flush(struct nd_region *nd_region)
+int generic_nvdimm_flush(struct nd_region *nd_region)
{
struct nd_region_data *ndrd = dev_get_drvdata(&nd_region->dev);
int i, idx;
@@ -1145,6 +1165,8 @@ void nvdimm_flush(struct nd_region *nd_region)
if (ndrd_get_flush_wpq(ndrd, i, 0))
writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
wmb();
+
+ return 0;
}
EXPORT_SYMBOL_GPL(nvdimm_flush);
@@ -1189,6 +1211,13 @@ int nvdimm_has_cache(struct nd_region *nd_region)
}
EXPORT_SYMBOL_GPL(nvdimm_has_cache);
+bool is_nvdimm_sync(struct nd_region *nd_region)
+{
+ return is_nd_pmem(&nd_region->dev) &&
+ !test_bit(ND_REGION_ASYNC, &nd_region->flags);
+}
+EXPORT_SYMBOL_GPL(is_nvdimm_sync);
+
struct conflict_context {
struct nd_region *nd_region;
resource_size_t start, size;
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
new file mode 100644
index 000000000000..5e3d07b47e0c
--- /dev/null
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * virtio_pmem.c: Virtio pmem Driver
+ *
+ * Discovers persistent memory range information
+ * from host and registers the virtual pmem device
+ * with libnvdimm core.
+ */
+#include "virtio_pmem.h"
+#include "nd.h"
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_PMEM, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+ /* Initialize virt queue */
+static int init_vq(struct virtio_pmem *vpmem)
+{
+ /* single vq */
+ vpmem->req_vq = virtio_find_single_vq(vpmem->vdev,
+ virtio_pmem_host_ack, "flush_queue");
+ if (IS_ERR(vpmem->req_vq))
+ return PTR_ERR(vpmem->req_vq);
+
+ spin_lock_init(&vpmem->pmem_lock);
+ INIT_LIST_HEAD(&vpmem->req_list);
+
+ return 0;
+};
+
+static int virtio_pmem_probe(struct virtio_device *vdev)
+{
+ struct nd_region_desc ndr_desc = {};
+ int nid = dev_to_node(&vdev->dev);
+ struct nd_region *nd_region;
+ struct virtio_pmem *vpmem;
+ struct resource res;
+ int err = 0;
+
+ if (!vdev->config->get) {
+ dev_err(&vdev->dev, "%s failure: config access disabled\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ vpmem = devm_kzalloc(&vdev->dev, sizeof(*vpmem), GFP_KERNEL);
+ if (!vpmem) {
+ err = -ENOMEM;
+ goto out_err;
+ }
+
+ vpmem->vdev = vdev;
+ vdev->priv = vpmem;
+ err = init_vq(vpmem);
+ if (err) {
+ dev_err(&vdev->dev, "failed to initialize virtio pmem vq's\n");
+ goto out_err;
+ }
+
+ virtio_cread(vpmem->vdev, struct virtio_pmem_config,
+ start, &vpmem->start);
+ virtio_cread(vpmem->vdev, struct virtio_pmem_config,
+ size, &vpmem->size);
+
+ res.start = vpmem->start;
+ res.end = vpmem->start + vpmem->size - 1;
+ vpmem->nd_desc.provider_name = "virtio-pmem";
+ vpmem->nd_desc.module = THIS_MODULE;
+
+ vpmem->nvdimm_bus = nvdimm_bus_register(&vdev->dev,
+ &vpmem->nd_desc);
+ if (!vpmem->nvdimm_bus) {
+ dev_err(&vdev->dev, "failed to register device with nvdimm_bus\n");
+ err = -ENXIO;
+ goto out_vq;
+ }
+
+ dev_set_drvdata(&vdev->dev, vpmem->nvdimm_bus);
+
+ ndr_desc.res = &res;
+ ndr_desc.numa_node = nid;
+ ndr_desc.flush = async_pmem_flush;
+ set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+ set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+ nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc);
+ if (!nd_region) {
+ dev_err(&vdev->dev, "failed to create nvdimm region\n");
+ err = -ENXIO;
+ goto out_nd;
+ }
+ nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent);
+ return 0;
+out_nd:
+ nvdimm_bus_unregister(vpmem->nvdimm_bus);
+out_vq:
+ vdev->config->del_vqs(vdev);
+out_err:
+ return err;
+}
+
+static void virtio_pmem_remove(struct virtio_device *vdev)
+{
+ struct nvdimm_bus *nvdimm_bus = dev_get_drvdata(&vdev->dev);
+
+ nvdimm_bus_unregister(nvdimm_bus);
+ vdev->config->del_vqs(vdev);
+ vdev->config->reset(vdev);
+}
+
+static struct virtio_driver virtio_pmem_driver = {
+ .driver.name = KBUILD_MODNAME,
+ .driver.owner = THIS_MODULE,
+ .id_table = id_table,
+ .probe = virtio_pmem_probe,
+ .remove = virtio_pmem_remove,
+};
+
+module_virtio_driver(virtio_pmem_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio pmem driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/nvdimm/virtio_pmem.h b/drivers/nvdimm/virtio_pmem.h
new file mode 100644
index 000000000000..0dddefe594c4
--- /dev/null
+++ b/drivers/nvdimm/virtio_pmem.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * virtio_pmem.h: virtio pmem Driver
+ *
+ * Discovers persistent memory range information
+ * from host and provides a virtio based flushing
+ * interface.
+ **/
+
+#ifndef _LINUX_VIRTIO_PMEM_H
+#define _LINUX_VIRTIO_PMEM_H
+
+#include <linux/module.h>
+#include <uapi/linux/virtio_pmem.h>
+#include <linux/libnvdimm.h>
+#include <linux/spinlock.h>
+
+struct virtio_pmem_request {
+ struct virtio_pmem_req req;
+ struct virtio_pmem_resp resp;
+
+ /* Wait queue to process deferred work after ack from host */
+ wait_queue_head_t host_acked;
+ bool done;
+
+ /* Wait queue to process deferred work after virt queue buffer avail */
+ wait_queue_head_t wq_buf;
+ bool wq_buf_avail;
+ struct list_head list;
+};
+
+struct virtio_pmem {
+ struct virtio_device *vdev;
+
+ /* Virtio pmem request queue */
+ struct virtqueue *req_vq;
+
+ /* nvdimm bus registers virtio pmem device */
+ struct nvdimm_bus *nvdimm_bus;
+ struct nvdimm_bus_descriptor nd_desc;
+
+ /* List to store deferred work if virtqueue is full */
+ struct list_head req_list;
+
+ /* Synchronize virtqueue data */
+ spinlock_t pmem_lock;
+
+ /* Memory region information */
+ __u64 start;
+ __u64 size;
+};
+
+void virtio_pmem_host_ack(struct virtqueue *vq);
+int async_pmem_flush(struct nd_region *nd_region, struct bio *bio);
+#endif