x86, uaccess: introduce copy_from_iter_flushcache for pmem / cache-bypass operations

The pmem driver has a need to transfer data with a persistent memory destination and be able to rely on the fact that the destination writes are not cached. It is sufficient for the writes to be flushed to a cpu-store-buffer (non-temporal / "movnt" in x86 terms), as we expect userspace to call fsync() to ensure data-writes have reached a power-fail-safe zone in the platform. The fsync() triggers a REQ_FUA or REQ_FLUSH to the pmem driver which will turn around and fence previous writes with an "sfence". Implement a __copy_from_user_inatomic_flushcache, memcpy_page_flushcache, and memcpy_flushcache, that guarantee that the destination buffer is not dirty in the cpu cache on completion. The new copy_from_iter_flushcache and sub-routines will be used to replace the "pmem api" (include/linux/pmem.h + arch/x86/include/asm/pmem.h). The availability of copy_from_iter_flushcache() and memcpy_flushcache() are gated by the CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE config symbol, and fallback to copy_from_iter_nocache() and plain memcpy() otherwise. This is meant to satisfy the concern from Linus that if a driver wants to do something beyond the normal nocache semantics it should be something private to that driver [1], and Al's concern that anything uaccess related belongs with the rest of the uaccess code [2]. The first consumer of this interface is a new 'copy_from_iter' dax operation so that pmem can inject cache maintenance operations without imposing this overhead on other dax-capable drivers. [1]: https://lists.01.org/pipermail/linux-nvdimm/2017-January/008364.html [2]: https://lists.01.org/pipermail/linux-nvdimm/2017-April/009942.html Cc: <x86@kernel.org> Cc: Jan Kara <jack@suse.cz> Cc: Jeff Moyer <jmoyer@redhat.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Toshi Kani <toshi.kani@hpe.com> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Matthew Wilcox <mawilcox@microsoft.com> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
author: Dan Williams <dan.j.williams@intel.com> 2017-05-29 12:22:50 -0700
committer: Dan Williams <dan.j.williams@intel.com> 2017-06-09 09:09:56 -0700
commit: 0aed55af88345b5d673240f90e671d79662fb01e (patch)
tree: b4ebcb7d95e47a7fef48558daafc0e527c1e60ca /drivers/nvdimm
parent: Linux 4.12-rc4 (diff)
download: wireguard-linux-0aed55af88345b5d673240f90e671d79662fb01e.tar.xz
wireguard-linux-0aed55af88345b5d673240f90e671d79662fb01e.zip
3 files changed, 14 insertions, 5 deletions
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 7ceb5fa4f2a1..b8b9c8ca7862 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -277,7 +277,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 			rc = -EIO;
 	}
 
-	memcpy_to_pmem(nsio->addr + offset, buf, size);
+	memcpy_flushcache(nsio->addr + offset, buf, size);
 	nvdimm_flush(to_nd_region(ndns->dev.parent));
 
 	return rc;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c544d466ea51..2f3aefe565c6 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -29,6 +29,7 @@
 #include <linux/pfn_t.h>
 #include <linux/slab.h>
 #include <linux/pmem.h>
+#include <linux/uio.h>
 #include <linux/dax.h>
 #include <linux/nd.h>
 #include "pmem.h"
@@ -80,7 +81,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
 {
 	void *mem = kmap_atomic(page);
 
-	memcpy_to_pmem(pmem_addr, mem + off, len);
+	memcpy_flushcache(pmem_addr, mem + off, len);
 	kunmap_atomic(mem);
 }
 
@@ -235,8 +236,15 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
 	return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
 }
 
+static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+		void *addr, size_t bytes, struct iov_iter *i)
+{
+	return copy_from_iter_flushcache(addr, bytes, i);
+}
+
 static const struct dax_operations pmem_dax_ops = {
 	.direct_access = pmem_dax_direct_access,
+	.copy_from_iter = pmem_copy_from_iter,
 };
 
 static void pmem_release_queue(void *q)
@@ -294,7 +302,8 @@ static int pmem_attach_disk(struct device *dev,
 	dev_set_drvdata(dev, pmem);
 	pmem->phys_addr = res->start;
 	pmem->size = resource_size(res);
-	if (nvdimm_has_flush(nd_region) < 0)
+	if (!IS_ENABLED(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE)
+			|| nvdimm_has_flush(nd_region) < 0)
 		dev_warn(dev, "unable to guarantee persistence of writes\n");
 
 	if (!devm_request_mem_region(dev, res->start, resource_size(res),
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index b550edf2571f..985b0e11bd73 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1015,8 +1015,8 @@ void nvdimm_flush(struct nd_region *nd_region)
 	 * The first wmb() is needed to 'sfence' all previous writes
 	 * such that they are architecturally visible for the platform
 	 * buffer flush.  Note that we've already arranged for pmem
-	 * writes to avoid the cache via arch_memcpy_to_pmem().  The
-	 * final wmb() ensures ordering for the NVDIMM flush write.
+	 * writes to avoid the cache via memcpy_flushcache().  The final
+	 * wmb() ensures ordering for the NVDIMM flush write.
 	 */
 	wmb();
 	for (i = 0; i < nd_region->ndr_mappings; i++)
author	Dan Williams <dan.j.williams@intel.com>	2017-05-29 12:22:50 -0700
committer	Dan Williams <dan.j.williams@intel.com>	2017-06-09 09:09:56 -0700
commit	0aed55af88345b5d673240f90e671d79662fb01e (patch)
tree	b4ebcb7d95e47a7fef48558daafc0e527c1e60ca /drivers/nvdimm
parent	Linux 4.12-rc4 (diff)
download	wireguard-linux-0aed55af88345b5d673240f90e671d79662fb01e.tar.xz wireguard-linux-0aed55af88345b5d673240f90e671d79662fb01e.zip