From e7638488434415aa478e78435cac8f0365737638 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 16 May 2018 11:46:08 -0700 Subject: mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation for fixing dax-dma-vs-unmap issues, filesystems need to be able to rely on the fact that they will get wakeups on dev_pagemap page-idle events. Introduce MEMORY_DEVICE_FS_DAX and generic_dax_page_free() as common indicator / infrastructure for dax filesytems to require. With this change there are no users of the MEMORY_DEVICE_HOST designation, so remove it. The HMM sub-system extended dev_pagemap to arrange a callback when a dev_pagemap managed page is freed. Since a dev_pagemap page is free / idle when its reference count is 1 it requires an additional branch to check the page-type at put_page() time. Given put_page() is a hot-path we do not want to incur that check if HMM is not in use, so a static branch is used to avoid that overhead when not necessary. Now, the FS_DAX implementation wants to reuse this mechanism for receiving dev_pagemap ->page_free() callbacks. Rework the HMM-specific static-key into a generic mechanism that either HMM or FS_DAX code paths can enable. For ARCH=um builds, and any other arch that lacks ZONE_DEVICE support, care must be taken to compile out the DEV_PAGEMAP_OPS infrastructure. However, we still need to support FS_DAX in the FS_DAX_LIMITED case implemented by the s390/dcssblk driver. Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Michal Hocko Reported-by: kbuild test robot Reported-by: Thomas Meyer Reported-by: Dave Jiang Cc: "Jérôme Glisse" Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/dax/super.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers/dax') diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 2b2332b605e4..bf8bfaf5596f 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -86,6 +86,7 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) { struct block_device *bdev = sb->s_bdev; struct dax_device *dax_dev; + bool dax_enabled = false; pgoff_t pgoff; int err, id; void *kaddr; @@ -134,14 +135,21 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize) * on being able to do (page_address(pfn_to_page())). */ WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)); + dax_enabled = true; } else if (pfn_t_devmap(pfn)) { - /* pass */; - } else { + struct dev_pagemap *pgmap; + + pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL); + if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX) + dax_enabled = true; + put_dev_pagemap(pgmap); + } + + if (!dax_enabled) { pr_debug("VFS (%s): error: dax support not enabled\n", sb->s_id); return -EOPNOTSUPP; } - return 0; } EXPORT_SYMBOL_GPL(__bdev_dax_supported); -- cgit v1.2.3-59-g8ed1b From b3a9a0c36e1f7b9e2e6cf965c2bb973624f2b3b9 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 2 May 2018 06:46:33 -0700 Subject: dax: Introduce a ->copy_to_iter dax operation Similar to the ->copy_from_iter() operation, a platform may want to deploy an architecture or device specific routine for handling reads from a dax_device like /dev/pmemX. On x86 this routine will point to a machine check safe version of copy_to_iter(). For now, add the plumbing to device-mapper and the dax core. Cc: Ross Zwisler Cc: Mike Snitzer Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/dax/super.c | 10 ++++++++++ drivers/md/dm-linear.c | 16 ++++++++++++++++ drivers/md/dm-log-writes.c | 15 +++++++++++++++ drivers/md/dm-stripe.c | 21 +++++++++++++++++++++ drivers/md/dm.c | 25 +++++++++++++++++++++++++ drivers/nvdimm/pmem.c | 7 +++++++ drivers/s390/block/dcssblk.c | 7 +++++++ fs/dax.c | 3 ++- include/linux/dax.h | 5 +++++ include/linux/device-mapper.h | 5 +++-- 10 files changed, 111 insertions(+), 3 deletions(-) (limited to 'drivers/dax') diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 2b2332b605e4..31b839113399 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -282,6 +282,16 @@ size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, } EXPORT_SYMBOL_GPL(dax_copy_from_iter); +size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i) +{ + if (!dax_alive(dax_dev)) + return 0; + + return dax_dev->ops->copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} +EXPORT_SYMBOL_GPL(dax_copy_to_iter); + #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 775c06d953b7..d10964d41fd7 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -185,9 +185,24 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct linear_c *lc = ti->private; + struct block_device *bdev = lc->dev->bdev; + struct dax_device *dax_dev = lc->dev->dax_dev; + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + + dev_sector = linear_map_sector(ti, sector); + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} + #else #define linear_dax_direct_access NULL #define linear_dax_copy_from_iter NULL +#define linear_dax_copy_to_iter NULL #endif static struct target_type linear_target = { @@ -204,6 +219,7 @@ static struct target_type linear_target = { .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, .dax_copy_from_iter = linear_dax_copy_from_iter, + .dax_copy_to_iter = linear_dax_copy_to_iter, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index c90c7c08a77f..9ea2b0291f20 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -962,9 +962,23 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, dax_copy: return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); } + +static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, + pgoff_t pgoff, void *addr, size_t bytes, + struct iov_iter *i) +{ + struct log_writes_c *lc = ti->private; + sector_t sector = pgoff * PAGE_SECTORS; + + if (bdev_dax_pgoff(lc->dev->bdev, sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); +} + #else #define log_writes_dax_direct_access NULL #define log_writes_dax_copy_from_iter NULL +#define log_writes_dax_copy_to_iter NULL #endif static struct target_type log_writes_target = { @@ -982,6 +996,7 @@ static struct target_type log_writes_target = { .io_hints = log_writes_io_hints, .direct_access = log_writes_dax_direct_access, .dax_copy_from_iter = log_writes_dax_copy_from_iter, + .dax_copy_to_iter = log_writes_dax_copy_to_iter, }; static int __init dm_log_writes_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index fe7fb9b1aec3..8547d7594338 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -354,9 +354,29 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i); } +static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + struct stripe_c *sc = ti->private; + struct dax_device *dax_dev; + struct block_device *bdev; + uint32_t stripe; + + stripe_map_sector(sc, sector, &stripe, &dev_sector); + dev_sector += sc->stripe[stripe].physical_start; + dax_dev = sc->stripe[stripe].dev->dax_dev; + bdev = sc->stripe[stripe].dev->bdev; + + if (bdev_dax_pgoff(bdev, dev_sector, ALIGN(bytes, PAGE_SIZE), &pgoff)) + return 0; + return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); +} + #else #define stripe_dax_direct_access NULL #define stripe_dax_copy_from_iter NULL +#define stripe_dax_copy_to_iter NULL #endif /* @@ -478,6 +498,7 @@ static struct target_type stripe_target = { .io_hints = stripe_io_hints, .direct_access = stripe_dax_direct_access, .dax_copy_from_iter = stripe_dax_copy_from_iter, + .dax_copy_to_iter = stripe_dax_copy_to_iter, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0a7b0107ca78..6752f1c25258 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1089,6 +1089,30 @@ static size_t dm_dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } +static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct mapped_device *md = dax_get_private(dax_dev); + sector_t sector = pgoff * PAGE_SECTORS; + struct dm_target *ti; + long ret = 0; + int srcu_idx; + + ti = dm_dax_get_live_target(md, sector, &srcu_idx); + + if (!ti) + goto out; + if (!ti->type->dax_copy_to_iter) { + ret = copy_to_iter(addr, bytes, i); + goto out; + } + ret = ti->type->dax_copy_to_iter(ti, pgoff, addr, bytes, i); + out: + dm_put_live_table(md, srcu_idx); + + return ret; +} + /* * A target may call dm_accept_partial_bio only from the map routine. It is * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET. @@ -3134,6 +3158,7 @@ static const struct block_device_operations dm_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, .copy_from_iter = dm_dax_copy_from_iter, + .copy_to_iter = dm_dax_copy_to_iter, }; /* diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index e023d6aa22b5..1b8ab48365de 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -264,9 +264,16 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, return copy_from_iter_flushcache(addr, bytes, i); } +static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_to_iter(addr, bytes, i); +} + static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, .copy_from_iter = pmem_copy_from_iter, + .copy_to_iter = pmem_copy_to_iter, }; static const struct attribute_group *pmem_attribute_groups[] = { diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 0a312e450207..29024492b8ed 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -51,9 +51,16 @@ static size_t dcssblk_dax_copy_from_iter(struct dax_device *dax_dev, return copy_from_iter(addr, bytes, i); } +static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev, + pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) +{ + return copy_to_iter(addr, bytes, i); +} + static const struct dax_operations dcssblk_dax_ops = { .direct_access = dcssblk_dax_direct_access, .copy_from_iter = dcssblk_dax_copy_from_iter, + .copy_to_iter = dcssblk_dax_copy_to_iter, }; struct dcssblk_dev_info { diff --git a/fs/dax.c b/fs/dax.c index aaec72ded1b6..a64afdf7ec0d 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1057,7 +1057,8 @@ dax_iomap_actor(struct inode *inode, loff_t pos, loff_t length, void *data, map_len = dax_copy_from_iter(dax_dev, pgoff, kaddr, map_len, iter); else - map_len = copy_to_iter(kaddr, map_len, iter); + map_len = dax_copy_to_iter(dax_dev, pgoff, kaddr, + map_len, iter); if (map_len <= 0) { ret = map_len ? map_len : -EFAULT; break; diff --git a/include/linux/dax.h b/include/linux/dax.h index f9eb22ad341e..a43b396fb336 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -20,6 +20,9 @@ struct dax_operations { /* copy_from_iter: required operation for fs-dax direct-i/o */ size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, struct iov_iter *); + /* copy_to_iter: required operation for fs-dax direct-i/o */ + size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, + struct iov_iter *); }; extern struct attribute_group dax_attribute_group; @@ -118,6 +121,8 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); +size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 31fef7c34185..6fb0808e87c8 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -133,7 +133,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn); -typedef size_t (*dm_dax_copy_from_iter_fn)(struct dm_target *ti, pgoff_t pgoff, +typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); #define PAGE_SECTORS (PAGE_SIZE / 512) @@ -184,7 +184,8 @@ struct target_type { dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; - dm_dax_copy_from_iter_fn dax_copy_from_iter; + dm_dax_copy_iter_fn dax_copy_from_iter; + dm_dax_copy_iter_fn dax_copy_to_iter; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3-59-g8ed1b From 808c340be17dc77131fcdf9ad1eb34452d650da1 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Wed, 6 Jun 2018 10:45:14 -0600 Subject: dax: Use dax_write_cache* helpers Use dax_write_cache() and dax_write_cache_enabled() instead of open coding the bit operations. Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/dax/super.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'drivers/dax') diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 2b2332b605e4..c2c46f96b18c 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -182,8 +182,7 @@ static ssize_t write_cache_show(struct device *dev, if (!dax_dev) return -ENXIO; - rc = sprintf(buf, "%d\n", !!test_bit(DAXDEV_WRITE_CACHE, - &dax_dev->flags)); + rc = sprintf(buf, "%d\n", !!dax_write_cache_enabled(dax_dev)); put_dax(dax_dev); return rc; } @@ -201,10 +200,8 @@ static ssize_t write_cache_store(struct device *dev, if (rc) len = rc; - else if (write_cache) - set_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); else - clear_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags); + dax_write_cache(dax_dev, write_cache); put_dax(dax_dev); return len; @@ -286,7 +283,7 @@ EXPORT_SYMBOL_GPL(dax_copy_from_iter); void arch_wb_cache_pmem(void *addr, size_t size); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) { - if (unlikely(!test_bit(DAXDEV_WRITE_CACHE, &dax_dev->flags))) + if (unlikely(!dax_write_cache_enabled(dax_dev))) return; arch_wb_cache_pmem(addr, size); -- cgit v1.2.3-59-g8ed1b