From baa51277cf5dc844089ea2f6e0f78b1c5ca665d8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 5 Apr 2016 17:40:52 -0700 Subject: libnvdimm, test: add mock SMART data payload Provide simulated SMART data to enable the ndctl implementation of SMART data retrieval and parsing. The payload is defined here, "Section 4.1 SMART and Health Info (Function Index 1)": http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf Signed-off-by: Dan Williams --- include/uapi/linux/ndctl.h | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 7cc28ab05b87..59c61e018a86 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015, Intel Corporation. + * Copyright (c) 2014-2016, Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU Lesser General Public License, @@ -20,11 +20,45 @@ struct nd_cmd_smart { __u8 data[128]; } __packed; +#define ND_SMART_HEALTH_VALID (1 << 0) +#define ND_SMART_TEMP_VALID (1 << 1) +#define ND_SMART_SPARES_VALID (1 << 2) +#define ND_SMART_ALARM_VALID (1 << 3) +#define ND_SMART_USED_VALID (1 << 4) +#define ND_SMART_SHUTDOWN_VALID (1 << 5) +#define ND_SMART_VENDOR_VALID (1 << 6) +#define ND_SMART_TEMP_TRIP (1 << 0) +#define ND_SMART_SPARE_TRIP (1 << 1) +#define ND_SMART_NON_CRITICAL_HEALTH (1 << 0) +#define ND_SMART_CRITICAL_HEALTH (1 << 1) +#define ND_SMART_FATAL_HEALTH (1 << 2) + +struct nd_smart_payload { + __u32 flags; + __u8 reserved0[4]; + __u8 health; + __u16 temperature; + __u8 spares; + __u8 alarm_flags; + __u8 life_used; + __u8 shutdown_state; + __u8 reserved1; + __u32 vendor_size; + __u8 vendor_data[108]; +} __packed; + struct nd_cmd_smart_threshold { __u32 status; __u8 data[8]; } __packed; +struct nd_smart_threshold_payload { + __u16 alarm_control; + __u16 temperature; + __u8 spares; + __u8 reserved[3]; +} __packed; + struct nd_cmd_dimm_flags { __u32 status; __u32 flags; -- cgit v1.2.3-59-g8ed1b From c7e16e5257ec46530e3e874af38191746c137c83 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Mon, 11 Apr 2016 15:02:26 -0700 Subject: acpi: widen acpi_evaluate_dsm() revision and function-index arguments The ACPI specification states that arguments "Revision ID" and "Function Index" to a _DSM are type "Integer." Type Integers are 64 bit quantities. The function evaluate_dsm specifies these types as simple "int" which are 32 bits. Widen type passed to acpi_evaluate_dsm and its callers and derived callers to pass correct type. acpi_check_dsm and acpi_evaluate_dsm_typed had similar issue and were corrected as well. This is in preparation for libnvdimm implementing a generic _DSM passthrough facility to have the capacity to pass 64-bit values as the ACPI specification allows. [djbw: clarify the changelog, add rationale] Signed-off-by: Jerry Hoemann Acked-by: Rafael J. Wysocki Signed-off-by: Dan Williams --- drivers/acpi/utils.c | 4 ++-- include/acpi/acpi_bus.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 050673f0c0b3..e854dea7d5fe 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -625,7 +625,7 @@ acpi_status acpi_evaluate_lck(acpi_handle handle, int lock) * some old BIOSes do expect a buffer or an integer etc. */ union acpi_object * -acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, int rev, int func, +acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4) { acpi_status ret; @@ -674,7 +674,7 @@ EXPORT_SYMBOL(acpi_evaluate_dsm); * functions. Currently only support 64 functions at maximum, should be * enough for now. */ -bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, int rev, u64 funcs) +bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs) { int i; u64 mask = 0; diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 14362a84c78e..f092cc6eb1fb 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -61,12 +61,12 @@ bool acpi_ata_match(acpi_handle handle); bool acpi_bay_match(acpi_handle handle); bool acpi_dock_match(acpi_handle handle); -bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, int rev, u64 funcs); +bool acpi_check_dsm(acpi_handle handle, const u8 *uuid, u64 rev, u64 funcs); union acpi_object *acpi_evaluate_dsm(acpi_handle handle, const u8 *uuid, - int rev, int func, union acpi_object *argv4); + u64 rev, u64 func, union acpi_object *argv4); static inline union acpi_object * -acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, int rev, int func, +acpi_evaluate_dsm_typed(acpi_handle handle, const u8 *uuid, u64 rev, u64 func, union acpi_object *argv4, acpi_object_type type) { union acpi_object *obj; -- cgit v1.2.3-59-g8ed1b From 9d90725ddca347450c4ab177ad680ed76063afd4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 18 Mar 2016 11:27:36 -0700 Subject: libnvdimm, blk: move i/o infrastructure to nd_namespace_blk Consolidate the information for issuing i/o to a blk-namespace, and eliminate some pointer chasing. Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- drivers/nvdimm/blk.c | 137 ++++++++++++++++++++++++++------------------------- include/linux/nd.h | 2 + 2 files changed, 71 insertions(+), 68 deletions(-) (limited to 'include') diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 26d039879ba2..4c14ecdc792b 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -21,17 +21,19 @@ #include #include "nd.h" -struct nd_blk_device { - struct nd_namespace_blk *nsblk; - struct nd_blk_region *ndbr; - size_t disk_size; - u32 sector_size; - u32 internal_lbasize; -}; +static u32 nsblk_meta_size(struct nd_namespace_blk *nsblk) +{ + return nsblk->lbasize - ((nsblk->lbasize >= 4096) ? 4096 : 512); +} -static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) +static u32 nsblk_internal_lbasize(struct nd_namespace_blk *nsblk) { - return blk_dev->nsblk->lbasize - blk_dev->sector_size; + return roundup(nsblk->lbasize, INT_LBASIZE_ALIGNMENT); +} + +static u32 nsblk_sector_size(struct nd_namespace_blk *nsblk) +{ + return nsblk->lbasize - nsblk_meta_size(nsblk); } static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, @@ -55,20 +57,29 @@ static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk, return SIZE_MAX; } +static struct nd_blk_region *to_ndbr(struct nd_namespace_blk *nsblk) +{ + struct nd_region *nd_region; + struct device *parent; + + parent = nsblk->common.dev.parent; + nd_region = container_of(parent, struct nd_region, dev); + return container_of(nd_region, struct nd_blk_region, nd_region); +} + #ifdef CONFIG_BLK_DEV_INTEGRITY -static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, - struct bio_integrity_payload *bip, u64 lba, - int rw) +static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk, + struct bio_integrity_payload *bip, u64 lba, int rw) { - unsigned int len = nd_blk_meta_size(blk_dev); + struct nd_blk_region *ndbr = to_ndbr(nsblk); + unsigned int len = nsblk_meta_size(nsblk); resource_size_t dev_offset, ns_offset; - struct nd_namespace_blk *nsblk; - struct nd_blk_region *ndbr; + u32 internal_lbasize, sector_size; int err = 0; - nsblk = blk_dev->nsblk; - ndbr = blk_dev->ndbr; - ns_offset = lba * blk_dev->internal_lbasize + blk_dev->sector_size; + internal_lbasize = nsblk_internal_lbasize(nsblk); + sector_size = nsblk_sector_size(nsblk); + ns_offset = lba * internal_lbasize + sector_size; dev_offset = to_dev_offset(nsblk, ns_offset, len); if (dev_offset == SIZE_MAX) return -EIO; @@ -102,25 +113,26 @@ static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, } #else /* CONFIG_BLK_DEV_INTEGRITY */ -static int nd_blk_rw_integrity(struct nd_blk_device *blk_dev, - struct bio_integrity_payload *bip, u64 lba, - int rw) +static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk, + struct bio_integrity_payload *bip, u64 lba, int rw) { return 0; } #endif -static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, - struct bio_integrity_payload *bip, struct page *page, - unsigned int len, unsigned int off, int rw, - sector_t sector) +static int nsblk_do_bvec(struct nd_namespace_blk *nsblk, + struct bio_integrity_payload *bip, struct page *page, + unsigned int len, unsigned int off, int rw, sector_t sector) { - struct nd_blk_region *ndbr = blk_dev->ndbr; + struct nd_blk_region *ndbr = to_ndbr(nsblk); resource_size_t dev_offset, ns_offset; + u32 internal_lbasize, sector_size; int err = 0; void *iobuf; u64 lba; + internal_lbasize = nsblk_internal_lbasize(nsblk); + sector_size = nsblk_sector_size(nsblk); while (len) { unsigned int cur_len; @@ -130,11 +142,11 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, * Block Window setup/move steps. the do_io routine is capable * of handling len <= PAGE_SIZE. */ - cur_len = bip ? min(len, blk_dev->sector_size) : len; + cur_len = bip ? min(len, sector_size) : len; - lba = div_u64(sector << SECTOR_SHIFT, blk_dev->sector_size); - ns_offset = lba * blk_dev->internal_lbasize; - dev_offset = to_dev_offset(blk_dev->nsblk, ns_offset, cur_len); + lba = div_u64(sector << SECTOR_SHIFT, sector_size); + ns_offset = lba * internal_lbasize; + dev_offset = to_dev_offset(nsblk, ns_offset, cur_len); if (dev_offset == SIZE_MAX) return -EIO; @@ -145,13 +157,13 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, return err; if (bip) { - err = nd_blk_rw_integrity(blk_dev, bip, lba, rw); + err = nd_blk_rw_integrity(nsblk, bip, lba, rw); if (err) return err; } len -= cur_len; off += cur_len; - sector += blk_dev->sector_size >> SECTOR_SHIFT; + sector += sector_size >> SECTOR_SHIFT; } return err; @@ -160,7 +172,7 @@ static int nd_blk_do_bvec(struct nd_blk_device *blk_dev, static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) { struct bio_integrity_payload *bip; - struct nd_blk_device *blk_dev; + struct nd_namespace_blk *nsblk; struct bvec_iter iter; unsigned long start; struct bio_vec bvec; @@ -179,17 +191,17 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) } bip = bio_integrity(bio); - blk_dev = q->queuedata; + nsblk = q->queuedata; rw = bio_data_dir(bio); do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { unsigned int len = bvec.bv_len; BUG_ON(len > PAGE_SIZE); - err = nd_blk_do_bvec(blk_dev, bip, bvec.bv_page, len, - bvec.bv_offset, rw, iter.bi_sector); + err = nsblk_do_bvec(nsblk, bip, bvec.bv_page, len, + bvec.bv_offset, rw, iter.bi_sector); if (err) { - dev_dbg(&blk_dev->nsblk->common.dev, + dev_dbg(&nsblk->common.dev, "io error in %s sector %lld, len %d,\n", (rw == READ) ? "READ" : "WRITE", (unsigned long long) iter.bi_sector, len); @@ -205,17 +217,16 @@ static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio) return BLK_QC_T_NONE; } -static int nd_blk_rw_bytes(struct nd_namespace_common *ndns, +static int nsblk_rw_bytes(struct nd_namespace_common *ndns, resource_size_t offset, void *iobuf, size_t n, int rw) { - struct nd_blk_device *blk_dev = dev_get_drvdata(ndns->claim); - struct nd_namespace_blk *nsblk = blk_dev->nsblk; - struct nd_blk_region *ndbr = blk_dev->ndbr; + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev); + struct nd_blk_region *ndbr = to_ndbr(nsblk); resource_size_t dev_offset; dev_offset = to_dev_offset(nsblk, offset, n); - if (unlikely(offset + n > blk_dev->disk_size)) { + if (unlikely(offset + n > nsblk->size)) { dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); return -EFAULT; } @@ -242,16 +253,16 @@ static void nd_blk_release_disk(void *disk) put_disk(disk); } -static int nd_blk_attach_disk(struct device *dev, - struct nd_namespace_common *ndns, struct nd_blk_device *blk_dev) +static int nsblk_attach_disk(struct nd_namespace_blk *nsblk) { + struct device *dev = &nsblk->common.dev; resource_size_t available_disk_size; struct request_queue *q; struct gendisk *disk; u64 internal_nlba; - internal_nlba = div_u64(blk_dev->disk_size, blk_dev->internal_lbasize); - available_disk_size = internal_nlba * blk_dev->sector_size; + internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk)); + available_disk_size = internal_nlba * nsblk_sector_size(nsblk); q = blk_alloc_queue(GFP_KERNEL); if (!q) @@ -264,9 +275,9 @@ static int nd_blk_attach_disk(struct device *dev, blk_queue_make_request(q, nd_blk_make_request); blk_queue_max_hw_sectors(q, UINT_MAX); blk_queue_bounce_limit(q, BLK_BOUNCE_ANY); - blk_queue_logical_block_size(q, blk_dev->sector_size); + blk_queue_logical_block_size(q, nsblk_sector_size(nsblk)); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); - q->queuedata = blk_dev; + q->queuedata = nsblk; disk = alloc_disk(0); if (!disk) @@ -276,17 +287,17 @@ static int nd_blk_attach_disk(struct device *dev, return -ENOMEM; } - disk->driverfs_dev = &ndns->dev; + disk->driverfs_dev = dev; disk->first_minor = 0; disk->fops = &nd_blk_fops; disk->queue = q; disk->flags = GENHD_FL_EXT_DEVT; - nvdimm_namespace_disk_name(ndns, disk->disk_name); + nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name); set_capacity(disk, 0); add_disk(disk); - if (nd_blk_meta_size(blk_dev)) { - int rc = nd_integrity_init(disk, nd_blk_meta_size(blk_dev)); + if (nsblk_meta_size(nsblk)) { + int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk)); if (rc) return rc; @@ -301,33 +312,23 @@ static int nd_blk_probe(struct device *dev) { struct nd_namespace_common *ndns; struct nd_namespace_blk *nsblk; - struct nd_blk_device *blk_dev; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) return PTR_ERR(ndns); - blk_dev = devm_kzalloc(dev, sizeof(*blk_dev), GFP_KERNEL); - if (!blk_dev) - return -ENOMEM; - nsblk = to_nd_namespace_blk(&ndns->dev); - blk_dev->disk_size = nvdimm_namespace_capacity(ndns); - blk_dev->ndbr = to_nd_blk_region(dev->parent); - blk_dev->nsblk = to_nd_namespace_blk(&ndns->dev); - blk_dev->internal_lbasize = roundup(nsblk->lbasize, - INT_LBASIZE_ALIGNMENT); - blk_dev->sector_size = ((nsblk->lbasize >= 4096) ? 4096 : 512); - dev_set_drvdata(dev, blk_dev); - - ndns->rw_bytes = nd_blk_rw_bytes; + nsblk->size = nvdimm_namespace_capacity(ndns); + dev_set_drvdata(dev, nsblk); + + ndns->rw_bytes = nsblk_rw_bytes; if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - else if (nd_btt_probe(dev, ndns, blk_dev) == 0) { + else if (nd_btt_probe(dev, ndns, nsblk) == 0) { /* we'll come back as btt-blk */ return -ENXIO; } else - return nd_blk_attach_disk(dev, ndns, blk_dev); + return nsblk_attach_disk(nsblk); } static int nd_blk_remove(struct device *dev) diff --git a/include/linux/nd.h b/include/linux/nd.h index 5489ab756d1a..5ea4aec7fd63 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -82,6 +82,7 @@ struct nd_namespace_pmem { * @uuid: namespace name supplied in the dimm label * @id: ida allocated id * @lbasize: blk namespaces have a native sector size when btt not present + * @size: sum of all the resource ranges allocated to this namespace * @num_resources: number of dpa extents to claim * @res: discontiguous dpa extents for given dimm */ @@ -91,6 +92,7 @@ struct nd_namespace_blk { u8 *uuid; int id; unsigned long lbasize; + resource_size_t size; int num_resources; struct resource **res; }; -- cgit v1.2.3-59-g8ed1b From 200c79da824c978fcf6eec1dc9c0a1e521133267 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 22 Mar 2016 00:22:16 -0700 Subject: libnvdimm, pmem, pfn: make pmem_rw_bytes generic and refactor pfn setup In preparation for providing an alternative (to block device) access mechanism to persistent memory, convert pmem_rw_bytes() to nsio_rw_bytes(). This allows ->rw_bytes() functionality without requiring a 'struct pmem_device' to be instantiated. In other words, when ->rw_bytes() is in use i/o is driven through 'struct nd_namespace_io', otherwise it is driven through 'struct pmem_device' and the block layer. This consolidates the disjoint calls to devm_exit_badblocks() and devm_memunmap() into a common devm_nsio_disable() and cleans up the init path to use a unified pmem_attach_disk() implementation. Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- drivers/nvdimm/blk.c | 2 +- drivers/nvdimm/btt_devs.c | 4 +- drivers/nvdimm/claim.c | 61 ++++++++++ drivers/nvdimm/nd.h | 40 +++++-- drivers/nvdimm/pfn_devs.c | 4 +- drivers/nvdimm/pmem.c | 236 ++++++++++++++------------------------ include/linux/nd.h | 9 +- tools/testing/nvdimm/Kbuild | 1 + tools/testing/nvdimm/test/iomap.c | 27 +++-- 9 files changed, 211 insertions(+), 173 deletions(-) (limited to 'include') diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 4c14ecdc792b..495e06d9f7e7 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -324,7 +324,7 @@ static int nd_blk_probe(struct device *dev) ndns->rw_bytes = nsblk_rw_bytes; if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - else if (nd_btt_probe(dev, ndns, nsblk) == 0) { + else if (nd_btt_probe(dev, ndns) == 0) { /* we'll come back as btt-blk */ return -ENXIO; } else diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 1886171af80e..816d0dae6398 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -273,8 +273,7 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, return 0; } -int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns) { int rc; struct device *btt_dev; @@ -289,7 +288,6 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, nvdimm_bus_unlock(&ndns->dev); if (!btt_dev) return -ENOMEM; - dev_set_drvdata(btt_dev, drvdata); btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL); rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb); dev_dbg(dev, "%s: btt: %s\n", __func__, diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index e8f03b0e95e4..6bbd0a36994a 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -12,6 +12,7 @@ */ #include #include +#include #include "nd-core.h" #include "pfn.h" #include "btt.h" @@ -199,3 +200,63 @@ u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb) return sum; } EXPORT_SYMBOL(nd_sb_checksum); + +static int nsio_rw_bytes(struct nd_namespace_common *ndns, + resource_size_t offset, void *buf, size_t size, int rw) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + if (unlikely(offset + size > nsio->size)) { + dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); + return -EFAULT; + } + + if (rw == READ) { + unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); + + if (unlikely(is_bad_pmem(&nsio->bb, offset / 512, sz_align))) + return -EIO; + return memcpy_from_pmem(buf, nsio->addr + offset, size); + } else { + memcpy_to_pmem(nsio->addr + offset, buf, size); + wmb_pmem(); + } + + return 0; +} + +int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio) +{ + struct resource *res = &nsio->res; + struct nd_namespace_common *ndns = &nsio->common; + + nsio->size = resource_size(res); + if (!devm_request_mem_region(dev, res->start, resource_size(res), + dev_name(dev))) { + dev_warn(dev, "could not reserve region %pR\n", res); + return -EBUSY; + } + + ndns->rw_bytes = nsio_rw_bytes; + if (devm_init_badblocks(dev, &nsio->bb)) + return -ENOMEM; + nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb, + &nsio->res); + + nsio->addr = devm_memremap(dev, res->start, resource_size(res), + ARCH_MEMREMAP_PMEM); + if (IS_ERR(nsio->addr)) + return PTR_ERR(nsio->addr); + return 0; +} +EXPORT_SYMBOL_GPL(devm_nsio_enable); + +void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio) +{ + struct resource *res = &nsio->res; + + devm_memunmap(dev, nsio->addr); + devm_exit_badblocks(dev, &nsio->bb); + devm_release_mem_region(dev, res->start, resource_size(res)); +} +EXPORT_SYMBOL_GPL(devm_nsio_disable); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 0fb14890ba26..10e23fe49012 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -13,6 +13,7 @@ #ifndef __ND_H__ #define __ND_H__ #include +#include #include #include #include @@ -197,13 +198,12 @@ struct nd_gen_sb { u64 nd_sb_checksum(struct nd_gen_sb *sb); #if IS_ENABLED(CONFIG_BTT) -int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata); +int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_btt(struct device *dev); struct device *nd_btt_create(struct nd_region *nd_region); #else static inline int nd_btt_probe(struct device *dev, - struct nd_namespace_common *ndns, void *drvdata) + struct nd_namespace_common *ndns) { return -ENODEV; } @@ -221,14 +221,13 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) struct nd_pfn *to_nd_pfn(struct device *dev); #if IS_ENABLED(CONFIG_NVDIMM_PFN) -int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata); +int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); int nd_pfn_validate(struct nd_pfn *nd_pfn); #else -static inline int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +static inline int nd_pfn_probe(struct device *dev, + struct nd_namespace_common *ndns) { return -ENODEV; } @@ -272,6 +271,20 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name); void nvdimm_badblocks_populate(struct nd_region *nd_region, struct badblocks *bb, const struct resource *res); +#if IS_ENABLED(CONFIG_ND_CLAIM) +int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio); +void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio); +#else +static inline int devm_nsio_enable(struct device *dev, + struct nd_namespace_io *nsio) +{ + return -ENXIO; +} +static inline void devm_nsio_disable(struct device *dev, + struct nd_namespace_io *nsio) +{ +} +#endif int nd_blk_region_init(struct nd_region *nd_region); void __nd_iostat_start(struct bio *bio, unsigned long *start); static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) @@ -285,6 +298,19 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) return true; } void nd_iostat_end(struct bio *bio, unsigned long start); +static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector, + unsigned int len) +{ + if (bb->count) { + sector_t first_bad; + int num_bad; + + return !!badblocks_check(bb, sector, len / 512, &first_bad, + &num_bad); + } + + return false; +} resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); const u8 *nd_dev_to_uuid(struct device *dev); bool pmem_should_map_pages(struct device *dev); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 96aa5490c279..9df081ae96e3 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -410,8 +410,7 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) } EXPORT_SYMBOL(nd_pfn_validate); -int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, - void *drvdata) +int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) { int rc; struct nd_pfn *nd_pfn; @@ -427,7 +426,6 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns, nvdimm_bus_unlock(&ndns->dev); if (!pfn_dev) return -ENOMEM; - dev_set_drvdata(pfn_dev, drvdata); pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); nd_pfn = to_nd_pfn(pfn_dev); nd_pfn->pfn_sb = pfn_sb; diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 67d48e2e8ca2..b5f81b02205c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -49,19 +49,6 @@ struct pmem_device { struct badblocks bb; }; -static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) -{ - if (bb->count) { - sector_t first_bad; - int num_bad; - - return !!badblocks_check(bb, sector, len / 512, &first_bad, - &num_bad); - } - - return false; -} - static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, unsigned int len) { @@ -209,16 +196,40 @@ void pmem_release_disk(void *disk) put_disk(disk); } -static struct pmem_device *pmem_alloc(struct device *dev, - struct resource *res, int id) +static struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap); + +static int pmem_attach_disk(struct device *dev, + struct nd_namespace_common *ndns) { + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct vmem_altmap __altmap, *altmap = NULL; + struct resource *res = &nsio->res; + struct nd_pfn *nd_pfn = NULL; + int nid = dev_to_node(dev); + struct nd_pfn_sb *pfn_sb; struct pmem_device *pmem; + struct resource pfn_res; struct request_queue *q; + struct gendisk *disk; + void *addr; + + /* while nsio_rw_bytes is active, parse a pfn info block if present */ + if (is_nd_pfn(dev)) { + nd_pfn = to_nd_pfn(dev); + altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap); + if (IS_ERR(altmap)) + return PTR_ERR(altmap); + } + + /* we're attaching a block device, disable raw namespace access */ + devm_nsio_disable(dev, nsio); pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + dev_set_drvdata(dev, pmem); pmem->phys_addr = res->start; pmem->size = resource_size(res); if (!arch_has_wmb_pmem()) @@ -227,22 +238,31 @@ static struct pmem_device *pmem_alloc(struct device *dev, if (!devm_request_mem_region(dev, res->start, resource_size(res), dev_name(dev))) { dev_warn(dev, "could not reserve region %pR\n", res); - return ERR_PTR(-EBUSY); + return -EBUSY; } q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev)); if (!q) - return ERR_PTR(-ENOMEM); + return -ENOMEM; + pmem->pmem_queue = q; pmem->pfn_flags = PFN_DEV; - if (pmem_should_map_pages(dev)) { - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res, + if (is_nd_pfn(dev)) { + addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter, + altmap); + pfn_sb = nd_pfn->pfn_sb; + pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); + pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res); + pmem->pfn_flags |= PFN_MAP; + res = &pfn_res; /* for badblocks populate */ + res->start += pmem->data_offset; + } else if (pmem_should_map_pages(dev)) { + addr = devm_memremap_pages(dev, &nsio->res, &q->q_usage_counter, NULL); pmem->pfn_flags |= PFN_MAP; } else - pmem->virt_addr = (void __pmem *) devm_memremap(dev, - pmem->phys_addr, pmem->size, - ARCH_MEMREMAP_PMEM); + addr = devm_memremap(dev, pmem->phys_addr, + pmem->size, ARCH_MEMREMAP_PMEM); /* * At release time the queue must be dead before @@ -250,23 +270,12 @@ static struct pmem_device *pmem_alloc(struct device *dev, */ if (devm_add_action(dev, pmem_release_queue, q)) { blk_cleanup_queue(q); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } - if (IS_ERR(pmem->virt_addr)) - return (void __force *) pmem->virt_addr; - - pmem->pmem_queue = q; - return pmem; -} - -static int pmem_attach_disk(struct device *dev, - struct nd_namespace_common *ndns, struct pmem_device *pmem) -{ - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - int nid = dev_to_node(dev); - struct resource bb_res; - struct gendisk *disk; + if (IS_ERR(addr)) + return PTR_ERR(addr); + pmem->virt_addr = (void __pmem *) addr; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); @@ -291,20 +300,9 @@ static int pmem_attach_disk(struct device *dev, set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) / 512); pmem->pmem_disk = disk; - devm_exit_badblocks(dev, &pmem->bb); if (devm_init_badblocks(dev, &pmem->bb)) return -ENOMEM; - bb_res.start = nsio->res.start + pmem->data_offset; - bb_res.end = nsio->res.end; - if (is_nd_pfn(dev)) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); - struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - - bb_res.start += __le32_to_cpu(pfn_sb->start_pad); - bb_res.end -= __le32_to_cpu(pfn_sb->end_trunc); - } - nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, - &bb_res); + nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb, res); disk->bb = &pmem->bb; add_disk(disk); revalidate_disk(disk); @@ -312,33 +310,8 @@ static int pmem_attach_disk(struct device *dev, return 0; } -static int pmem_rw_bytes(struct nd_namespace_common *ndns, - resource_size_t offset, void *buf, size_t size, int rw) -{ - struct pmem_device *pmem = dev_get_drvdata(ndns->claim); - - if (unlikely(offset + size > pmem->size)) { - dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n"); - return -EFAULT; - } - - if (rw == READ) { - unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512); - - if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align))) - return -EIO; - return memcpy_from_pmem(buf, pmem->virt_addr + offset, size); - } else { - memcpy_to_pmem(pmem->virt_addr + offset, buf, size); - wmb_pmem(); - } - - return 0; -} - static int nd_pfn_init(struct nd_pfn *nd_pfn) { - struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); struct nd_namespace_common *ndns = nd_pfn->ndns; u32 start_pad = 0, end_trunc = 0; resource_size_t start, size; @@ -404,7 +377,8 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * ->direct_access() to those that are included in the memmap. */ start += start_pad; - npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K; + size = resource_size(&nsio->res); + npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K; if (nd_pfn->mode == PFN_MODE_PMEM) offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align) - start; @@ -413,13 +387,13 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) else return -ENXIO; - if (offset + start_pad + end_trunc >= pmem->size) { + if (offset + start_pad + end_trunc >= size) { dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", dev_name(&ndns->dev)); return -ENXIO; } - npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K; + npfns = (size - offset - start_pad - end_trunc) / SZ_4K; pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -456,17 +430,14 @@ static unsigned long init_altmap_reserve(resource_size_t base) return reserve; } -static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) +static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap) { - struct resource res; - struct request_queue *q; - struct pmem_device *pmem; - struct vmem_altmap *altmap; - struct device *dev = &nd_pfn->dev; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; - struct nd_namespace_common *ndns = nd_pfn->ndns; + u64 offset = le64_to_cpu(pfn_sb->dataoff); u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; struct vmem_altmap __altmap = { @@ -474,112 +445,75 @@ static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) .reserve = init_altmap_reserve(base), }; - pmem = dev_get_drvdata(dev); - pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); - pmem->pfn_pad = start_pad + end_trunc; + memcpy(res, &nsio->res, sizeof(*res)); + res->start += start_pad; + res->end -= end_trunc; + nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { - if (pmem->data_offset < SZ_8K) - return -EINVAL; + if (offset < SZ_8K) + return ERR_PTR(-EINVAL); nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset) - / PAGE_SIZE; + nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE; if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, "number of pfns truncated from %lld to %ld\n", le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); - altmap = & __altmap; - altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K); + memcpy(altmap, &__altmap, sizeof(*altmap)); + altmap->free = PHYS_PFN(offset - SZ_8K); altmap->alloc = 0; } else - return -ENXIO; + return ERR_PTR(-ENXIO); - /* establish pfn range for lookup, and switch to direct map */ - q = pmem->pmem_queue; - memcpy(&res, &nsio->res, sizeof(res)); - res.start += start_pad; - res.end -= end_trunc; - devm_remove_action(dev, pmem_release_queue, q); - devm_memunmap(dev, (void __force *) pmem->virt_addr); - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res, - &q->q_usage_counter, altmap); - pmem->pfn_flags |= PFN_MAP; - - /* - * At release time the queue must be dead before - * devm_memremap_pages is unwound - */ - if (devm_add_action(dev, pmem_release_queue, q)) { - blk_cleanup_queue(q); - return -ENOMEM; - } - if (IS_ERR(pmem->virt_addr)) - return PTR_ERR(pmem->virt_addr); - - /* attach pmem disk in "pfn-mode" */ - return pmem_attach_disk(dev, ndns, pmem); + return altmap; } -static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +/* + * Determine the effective resource range and vmem_altmap from an nd_pfn + * instance. + */ +static struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn, + struct resource *res, struct vmem_altmap *altmap) { - struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); int rc; if (!nd_pfn->uuid || !nd_pfn->ndns) - return -ENODEV; + return ERR_PTR(-ENODEV); rc = nd_pfn_init(nd_pfn); if (rc) - return rc; + return ERR_PTR(rc); + /* we need a valid pfn_sb before we can init a vmem_altmap */ - return __nvdimm_namespace_attach_pfn(nd_pfn); + return __nvdimm_setup_pfn(nd_pfn, res, altmap); } static int nd_pmem_probe(struct device *dev) { - struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_namespace_common *ndns; - struct nd_namespace_io *nsio; - struct pmem_device *pmem; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) return PTR_ERR(ndns); - nsio = to_nd_namespace_io(&ndns->dev); - pmem = pmem_alloc(dev, &nsio->res, nd_region->id); - if (IS_ERR(pmem)) - return PTR_ERR(pmem); - - dev_set_drvdata(dev, pmem); - ndns->rw_bytes = pmem_rw_bytes; - if (devm_init_badblocks(dev, &pmem->bb)) - return -ENOMEM; - nvdimm_badblocks_populate(nd_region, &pmem->bb, &nsio->res); + if (devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev))) + return -ENXIO; - if (is_nd_btt(dev)) { - /* btt allocates its own request_queue */ - devm_remove_action(dev, pmem_release_queue, pmem->pmem_queue); - blk_cleanup_queue(pmem->pmem_queue); + if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - } if (is_nd_pfn(dev)) - return nvdimm_namespace_attach_pfn(ndns); + return pmem_attach_disk(dev, ndns); - if (nd_btt_probe(dev, ndns, pmem) == 0 - || nd_pfn_probe(dev, ndns, pmem) == 0) { - /* - * We'll come back as either btt-pmem, or pfn-pmem, so - * drop the queue allocation for now. - */ + /* if we find a valid info-block we'll come back as that personality */ + if (nd_btt_probe(dev, ndns) == 0 || nd_pfn_probe(dev, ndns) == 0) return -ENXIO; - } - return pmem_attach_disk(dev, ndns, pmem); + /* ...otherwise we're just a raw pmem device */ + return pmem_attach_disk(dev, ndns); } static int nd_pmem_remove(struct device *dev) diff --git a/include/linux/nd.h b/include/linux/nd.h index 5ea4aec7fd63..aee2761d294c 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -15,6 +15,7 @@ #include #include #include +#include enum nvdimm_event { NVDIMM_REVALIDATE_POISON, @@ -55,13 +56,19 @@ static inline struct nd_namespace_common *to_ndns(struct device *dev) } /** - * struct nd_namespace_io - infrastructure for loading an nd_pmem instance + * struct nd_namespace_io - device representation of a persistent memory range * @dev: namespace device created by the nd region driver * @res: struct resource conversion of a NFIT SPA table + * @size: cached resource_size(@res) for fast path size checks + * @addr: virtual address to access the namespace range + * @bb: badblocks list for the namespace range */ struct nd_namespace_io { struct nd_namespace_common common; struct resource res; + resource_size_t size; + void __pmem *addr; + struct badblocks bb; }; /** diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index a34bfd0c8928..d5bc8c080b44 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -7,6 +7,7 @@ ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap ldflags-y += --wrap=memunmap ldflags-y += --wrap=__devm_request_region +ldflags-y += --wrap=__devm_release_region ldflags-y += --wrap=__request_region ldflags-y += --wrap=__release_region ldflags-y += --wrap=devm_memremap_pages diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 0c1a7e65bb81..c842095f2801 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -239,13 +239,11 @@ struct resource *__wrap___devm_request_region(struct device *dev, } EXPORT_SYMBOL(__wrap___devm_request_region); -void __wrap___release_region(struct resource *parent, resource_size_t start, - resource_size_t n) +static bool nfit_test_release_region(struct resource *parent, + resource_size_t start, resource_size_t n) { - struct nfit_test_resource *nfit_res; - if (parent == &iomem_resource) { - nfit_res = get_nfit_res(start); + struct nfit_test_resource *nfit_res = get_nfit_res(start); if (nfit_res) { struct resource *res = nfit_res->res + 1; @@ -254,11 +252,26 @@ void __wrap___release_region(struct resource *parent, resource_size_t start, __func__, start, n, res); else memset(res, 0, sizeof(*res)); - return; + return true; } } - __release_region(parent, start, n); + return false; +} + +void __wrap___release_region(struct resource *parent, resource_size_t start, + resource_size_t n) +{ + if (!nfit_test_release_region(parent, start, n)) + __release_region(parent, start, n); } EXPORT_SYMBOL(__wrap___release_region); +void __wrap___devm_release_region(struct device *dev, struct resource *parent, + resource_size_t start, resource_size_t n) +{ + if (!nfit_test_release_region(parent, start, n)) + __devm_release_region(dev, parent, start, n); +} +EXPORT_SYMBOL(__wrap___devm_release_region); + MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-59-g8ed1b From 40abf9be8f52d440e442206182916e3dcc68f722 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Mon, 11 Apr 2016 15:02:28 -0700 Subject: libnvdimm: increase max envelope size for ioctl nd_ioctl() must first read in the fixed sized portion of an ioctl so that it can then determine the size of the variable part. Prepare for ND_CMD_CALL calls which have larger fixed portion envelope. Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 833867b9ddc2..af31d1c6fdd7 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -27,7 +27,7 @@ enum { /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, ND_CMD_MAX_ELEM = 5, - ND_CMD_MAX_ENVELOPE = 16, + ND_CMD_MAX_ENVELOPE = 256, ND_MAX_MAPPINGS = 32, /* region flag indicating to direct-map persistent memory by default */ -- cgit v1.2.3-59-g8ed1b From e3654eca70d63704c94a60a2aafc0b3c7b46a00b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 28 Apr 2016 16:17:07 -0700 Subject: nfit, libnvdimm: clarify "commands" vs "_DSMs" Clarify the distinction between "commands", the ioctls userspace calls to request the kernel take some action on a given dimm device, and "_DSMs", the actual function numbers used in the firmware interface to the DIMM. _DSMs are ACPI specific whereas commands are Linux kernel generic. This is in preparation for breaking the 1:1 implicit relationship between the kernel ioctl number space and the firmware specific function numbers. Cc: Jerry Hoemann Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 21 +++++++++++++-------- drivers/acpi/nfit.h | 4 ++-- drivers/nvdimm/bus.c | 8 ++++---- drivers/nvdimm/core.c | 2 +- drivers/nvdimm/dimm_devs.c | 18 ++++++++++++------ drivers/nvdimm/nd-core.h | 2 +- include/linux/libnvdimm.h | 5 +++-- tools/testing/nvdimm/test/nfit.c | 27 ++++++++++++++------------- 8 files changed, 50 insertions(+), 37 deletions(-) (limited to 'include') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index d0f35e63640b..1b98e9dc6138 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -175,7 +175,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, union acpi_object in_obj, in_buf, *out_obj; struct device *dev = acpi_desc->dev; const char *cmd_name, *dimm_name; - unsigned long dsm_mask; + unsigned long cmd_mask; acpi_handle handle; const u8 *uuid; u32 offset; @@ -189,7 +189,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, return -ENOTTY; dimm_name = nvdimm_name(nvdimm); cmd_name = nvdimm_cmd_name(cmd); - dsm_mask = nfit_mem->dsm_mask; + cmd_mask = nvdimm_cmd_mask(nvdimm); desc = nd_cmd_dimm_desc(cmd); uuid = to_nfit_uuid(NFIT_DEV_DIMM); handle = adev->handle; @@ -197,7 +197,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct acpi_device *adev = to_acpi_dev(acpi_desc); cmd_name = nvdimm_bus_cmd_name(cmd); - dsm_mask = nd_desc->dsm_mask; + cmd_mask = nd_desc->cmd_mask; desc = nd_cmd_bus_desc(cmd); uuid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; @@ -207,7 +207,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, if (!desc || (cmd && (desc->out_num + desc->in_num == 0))) return -ENOTTY; - if (!test_bit(cmd, &dsm_mask)) + if (!test_bit(cmd, &cmd_mask)) return -ENOTTY; in_obj.type = ACPI_TYPE_PACKAGE; @@ -926,7 +926,8 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM); int i; - nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en; + /* nfit test assumes 1:1 relationship between commands and dsms */ + nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en; adev = to_acpi_dev(acpi_desc); if (!adev) return 0; @@ -976,9 +977,13 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) if (rc) continue; + /* + * For now there is 1:1 relationship between cmd_mask and + * dsm_mask. + */ nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, acpi_nfit_dimm_attribute_groups, - flags, &nfit_mem->dsm_mask); + flags, nfit_mem->dsm_mask); if (!nvdimm) return -ENOMEM; @@ -1007,14 +1012,14 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) struct acpi_device *adev; int i; - nd_desc->dsm_mask = acpi_desc->bus_dsm_force_en; + nd_desc->cmd_mask = acpi_desc->bus_cmd_force_en; adev = to_acpi_dev(acpi_desc); if (!adev) return; for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) - set_bit(i, &nd_desc->dsm_mask); + set_bit(i, &nd_desc->cmd_mask); } static ssize_t range_index_show(struct device *dev, diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index c75576b2d50e..332ee6f01662 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -132,8 +132,8 @@ struct acpi_nfit_desc { size_t ars_status_size; struct work_struct work; unsigned int cancel:1; - unsigned long dimm_dsm_force_en; - unsigned long bus_dsm_force_en; + unsigned long dimm_cmd_force_en; + unsigned long bus_cmd_force_en; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, void *iobuf, u64 len, int rw); }; diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 19f822d7f652..cb2042a12b76 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -589,24 +589,24 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, void __user *p = (void __user *) arg; struct device *dev = &nvdimm_bus->dev; const char *cmd_name, *dimm_name; - unsigned long dsm_mask; + unsigned long cmd_mask; void *buf; int rc, i; if (nvdimm) { desc = nd_cmd_dimm_desc(cmd); cmd_name = nvdimm_cmd_name(cmd); - dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0; + cmd_mask = nvdimm->cmd_mask; dimm_name = dev_name(&nvdimm->dev); } else { desc = nd_cmd_bus_desc(cmd); cmd_name = nvdimm_bus_cmd_name(cmd); - dsm_mask = nd_desc->dsm_mask; + cmd_mask = nd_desc->cmd_mask; dimm_name = "bus"; } if (!desc || (desc->out_num + desc->in_num == 0) || - !test_bit(cmd, &dsm_mask)) + !test_bit(cmd, &cmd_mask)) return -ENOTTY; /* fail write commands (when read-only) */ diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 182a93fe3712..e8688a13cf4f 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -251,7 +251,7 @@ static ssize_t commands_show(struct device *dev, struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; - for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG) + for_each_set_bit(cmd, &nd_desc->cmd_mask, BITS_PER_LONG) len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd)); len += sprintf(buf + len, "\n"); return len; diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index c56f88217924..79a35a02053c 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -37,9 +37,9 @@ static int __validate_dimm(struct nvdimm_drvdata *ndd) nvdimm = to_nvdimm(ndd->dev); - if (!nvdimm->dsm_mask) + if (!nvdimm->cmd_mask) return -ENXIO; - if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask)) + if (!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) return -ENXIO; return 0; @@ -263,6 +263,12 @@ const char *nvdimm_name(struct nvdimm *nvdimm) } EXPORT_SYMBOL_GPL(nvdimm_name); +unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm) +{ + return nvdimm->cmd_mask; +} +EXPORT_SYMBOL_GPL(nvdimm_cmd_mask); + void *nvdimm_provider_data(struct nvdimm *nvdimm) { if (nvdimm) @@ -277,10 +283,10 @@ static ssize_t commands_show(struct device *dev, struct nvdimm *nvdimm = to_nvdimm(dev); int cmd, len = 0; - if (!nvdimm->dsm_mask) + if (!nvdimm->cmd_mask) return sprintf(buf, "\n"); - for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG) + for_each_set_bit(cmd, &nvdimm->cmd_mask, BITS_PER_LONG) len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd)); len += sprintf(buf + len, "\n"); return len; @@ -340,7 +346,7 @@ EXPORT_SYMBOL_GPL(nvdimm_attribute_group); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, - unsigned long *dsm_mask) + unsigned long cmd_mask) { struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL); struct device *dev; @@ -355,7 +361,7 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, } nvdimm->provider_data = provider_data; nvdimm->flags = flags; - nvdimm->dsm_mask = dsm_mask; + nvdimm->cmd_mask = cmd_mask; atomic_set(&nvdimm->busy, 0); dev = &nvdimm->dev; dev_set_name(dev, "nmem%d", nvdimm->id); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 1d1500f3d8b5..da0d322ed7cb 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -37,7 +37,7 @@ struct nvdimm_bus { struct nvdimm { unsigned long flags; void *provider_data; - unsigned long *dsm_mask; + unsigned long cmd_mask; struct device dev; atomic_t busy; int id; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index af31d1c6fdd7..0c3c30cbbea5 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -68,7 +68,7 @@ struct nd_mapping { struct nvdimm_bus_descriptor { const struct attribute_group **attr_groups; - unsigned long dsm_mask; + unsigned long cmd_mask; char *provider_name; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); @@ -130,10 +130,11 @@ struct nd_region *to_nd_region(struct device *dev); struct nd_blk_region *to_nd_blk_region(struct device *dev); struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus); const char *nvdimm_name(struct nvdimm *nvdimm); +unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm); void *nvdimm_provider_data(struct nvdimm *nvdimm); struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data, const struct attribute_group **groups, unsigned long flags, - unsigned long *dsm_mask); + unsigned long cmd_mask); const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd); const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd); u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 3187322eeed7..ed899a411c22 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -344,8 +344,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, if (nvdimm) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); - if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) + if (!nfit_mem || !test_bit(cmd, &cmd_mask)) return -ENOTTY; /* lookup label space for the given dimm */ @@ -374,7 +375,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, } else { struct ars_state *ars_state = &t->ars_state; - if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask)) + if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask)) return -ENOTTY; switch (cmd) { @@ -1251,13 +1252,13 @@ static void nfit_test0_setup(struct nfit_test *t) post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE); acpi_desc = &t->acpi_desc; - set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); - set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); - set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); - set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_cmd_force_en); + set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -1315,10 +1316,10 @@ static void nfit_test1_setup(struct nfit_test *t) post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE); acpi_desc = &t->acpi_desc; - set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en); + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, -- cgit v1.2.3-59-g8ed1b From 31eca76ba2fc988bf88f16fcf763a0ec4068cd30 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 28 Apr 2016 16:23:43 -0700 Subject: nfit, libnvdimm: limited/whitelisted dimm command marshaling mechanism There are currently 4 known similar but incompatible definitions of the command sets that can be sent to an NVDIMM through ACPI. It is also clear that future platform generations (ACPI or not) will continue to revise and extend the DIMM command set as new devices and use cases arrive. It is obviously untenable to continue to proliferate divergence of these command definitions, and to that end a standardization process has begun to provide for a unified specification. However, that leaves a problem about what to do with this first generation where vendors are already shipping divergence. The Linux kernel can support these initial diverged platforms without giving platform-firmware free reign to continue to diverge and compound kernel maintenance overhead. The kernel implementation can encourage standardization in two ways: 1/ Require that any function code that userspace wants to send be explicitly white-listed in the implementation. For ACPI this means function codes marked as supported by acpi_check_dsm() may only be invoked if they appear in the white-list. A function must be publicly documented before it is added to the white-list. 2/ The above restrictions can be trivially bypassed by using the "vendor-specific" payload command. However, since vendor-specific commands are by definition not publicly documented and have the potential to corrupt the kernel's view of the dimm state, we provide a toggle to disable vendor-specific operations. Enabling undefined behavior is a policy decision that can be made by the platform owner and encourages firmware implementations to choose public over private command implementations. Based on an initial patch from Jerry Hoemann Cc: Jerry Hoemann Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 101 ++++++++++++++++++++++++++++++++++++++------- drivers/acpi/nfit.h | 14 ++++++- drivers/nvdimm/bus.c | 39 +++++++++++++++++ include/uapi/linux/ndctl.h | 42 +++++++++++++++++++ 4 files changed, 179 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 1b98e9dc6138..b85a46873228 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -171,33 +171,46 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); - const struct nd_cmd_desc *desc = NULL; union acpi_object in_obj, in_buf, *out_obj; + const struct nd_cmd_desc *desc = NULL; struct device *dev = acpi_desc->dev; + struct nd_cmd_pkg *call_pkg = NULL; const char *cmd_name, *dimm_name; - unsigned long cmd_mask; + unsigned long cmd_mask, dsm_mask; acpi_handle handle; + unsigned int func; const u8 *uuid; u32 offset; int rc, i; + func = cmd; + if (cmd == ND_CMD_CALL) { + call_pkg = buf; + func = call_pkg->nd_command; + } + if (nvdimm) { struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); struct acpi_device *adev = nfit_mem->adev; if (!adev) return -ENOTTY; + if (call_pkg && nfit_mem->family != call_pkg->nd_family) + return -ENOTTY; + dimm_name = nvdimm_name(nvdimm); cmd_name = nvdimm_cmd_name(cmd); cmd_mask = nvdimm_cmd_mask(nvdimm); + dsm_mask = nfit_mem->dsm_mask; desc = nd_cmd_dimm_desc(cmd); - uuid = to_nfit_uuid(NFIT_DEV_DIMM); + uuid = to_nfit_uuid(nfit_mem->family); handle = adev->handle; } else { struct acpi_device *adev = to_acpi_dev(acpi_desc); cmd_name = nvdimm_bus_cmd_name(cmd); cmd_mask = nd_desc->cmd_mask; + dsm_mask = cmd_mask; desc = nd_cmd_bus_desc(cmd); uuid = to_nfit_uuid(NFIT_DEV_BUS); handle = adev->handle; @@ -207,7 +220,7 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, if (!desc || (cmd && (desc->out_num + desc->in_num == 0))) return -ENOTTY; - if (!test_bit(cmd, &cmd_mask)) + if (!test_bit(cmd, &cmd_mask) || !test_bit(func, &dsm_mask)) return -ENOTTY; in_obj.type = ACPI_TYPE_PACKAGE; @@ -222,21 +235,44 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc, i, buf); + if (call_pkg) { + /* skip over package wrapper */ + in_buf.buffer.pointer = (void *) &call_pkg->nd_payload; + in_buf.buffer.length = call_pkg->nd_size_in; + } + if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) { - dev_dbg(dev, "%s:%s cmd: %s input length: %d\n", __func__, - dimm_name, cmd_name, in_buf.buffer.length); - print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, - 4, in_buf.buffer.pointer, min_t(u32, 128, - in_buf.buffer.length), true); + dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n", + __func__, dimm_name, cmd, func, + in_buf.buffer.length); + print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 4, 4, + in_buf.buffer.pointer, + min_t(u32, 256, in_buf.buffer.length), true); } - out_obj = acpi_evaluate_dsm(handle, uuid, 1, cmd, &in_obj); + out_obj = acpi_evaluate_dsm(handle, uuid, 1, func, &in_obj); if (!out_obj) { dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name, cmd_name); return -EINVAL; } + if (call_pkg) { + call_pkg->nd_fw_size = out_obj->buffer.length; + memcpy(call_pkg->nd_payload + call_pkg->nd_size_in, + out_obj->buffer.pointer, + min(call_pkg->nd_fw_size, call_pkg->nd_size_out)); + + ACPI_FREE(out_obj); + /* + * Need to support FW function w/o known size in advance. + * Caller can determine required size based upon nd_fw_size. + * If we return an error (like elsewhere) then caller wouldn't + * be able to rely upon data returned to make calculation. + */ + return 0; + } + if (out_obj->package.type != ACPI_TYPE_BUFFER) { dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n", __func__, dimm_name, cmd_name, out_obj->type); @@ -923,11 +959,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, { struct acpi_device *adev, *adev_dimm; struct device *dev = acpi_desc->dev; - const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM); + unsigned long dsm_mask; + const u8 *uuid; int i; /* nfit test assumes 1:1 relationship between commands and dsms */ nfit_mem->dsm_mask = acpi_desc->dimm_cmd_force_en; + nfit_mem->family = NVDIMM_FAMILY_INTEL; adev = to_acpi_dev(acpi_desc); if (!adev) return 0; @@ -940,7 +978,31 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc, return force_enable_dimms ? 0 : -ENODEV; } - for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++) + /* + * Until standardization materializes we need to consider up to 3 + * different command sets. Note, that checking for function0 (bit0) + * tells us if any commands are reachable through this uuid. + */ + for (i = NVDIMM_FAMILY_INTEL; i <= NVDIMM_FAMILY_HPE2; i++) + if (acpi_check_dsm(adev_dimm->handle, to_nfit_uuid(i), 1, 1)) + break; + + /* limit the supported commands to those that are publicly documented */ + nfit_mem->family = i; + if (nfit_mem->family == NVDIMM_FAMILY_INTEL) + dsm_mask = 0x3fe; + else if (nfit_mem->family == NVDIMM_FAMILY_HPE1) + dsm_mask = 0x1c3c76; + else if (nfit_mem->family == NVDIMM_FAMILY_HPE2) + dsm_mask = 0x1fe; + else { + dev_err(dev, "unknown dimm command family\n"); + nfit_mem->family = -1; + return force_enable_dimms ? 0 : -ENODEV; + } + + uuid = to_nfit_uuid(nfit_mem->family); + for_each_set_bit(i, &dsm_mask, BITS_PER_LONG) if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i)) set_bit(i, &nfit_mem->dsm_mask); @@ -953,8 +1015,8 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) int dimm_count = 0; list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) { + unsigned long flags = 0, cmd_mask; struct nvdimm *nvdimm; - unsigned long flags = 0; u32 device_handle; u16 mem_flags; int rc; @@ -978,12 +1040,17 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) continue; /* - * For now there is 1:1 relationship between cmd_mask and - * dsm_mask. + * TODO: provide translation for non-NVDIMM_FAMILY_INTEL + * devices (i.e. from nd_cmd to acpi_dsm) to standardize the + * userspace interface. */ + cmd_mask = 1UL << ND_CMD_CALL; + if (nfit_mem->family == NVDIMM_FAMILY_INTEL) + cmd_mask |= nfit_mem->dsm_mask; + nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem, acpi_nfit_dimm_attribute_groups, - flags, nfit_mem->dsm_mask); + flags, cmd_mask); if (!nvdimm) return -ENOMEM; @@ -2468,6 +2535,8 @@ static __init int nfit_init(void) acpi_str_to_uuid(UUID_PERSISTENT_VIRTUAL_CD, nfit_uuid[NFIT_SPA_PCD]); acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]); acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); + acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE1, nfit_uuid[NFIT_DEV_DIMM_N_HPE1]); + acpi_str_to_uuid(UUID_NFIT_DIMM_N_HPE2, nfit_uuid[NFIT_DEV_DIMM_N_HPE2]); nfit_wq = create_singlethread_workqueue("nfit"); if (!nfit_wq) diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 332ee6f01662..f82fda55b6de 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -21,13 +21,25 @@ #include #include +/* ACPI 6.1 */ #define UUID_NFIT_BUS "2f10e7a4-9e91-11e4-89d3-123b93f75cba" + +/* http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf */ #define UUID_NFIT_DIMM "4309ac30-0d11-11e4-9191-0800200c9a66" + +/* https://github.com/HewlettPackard/hpe-nvm/blob/master/Documentation/ */ +#define UUID_NFIT_DIMM_N_HPE1 "9002c334-acf3-4c0e-9642-a235f0d53bc6" +#define UUID_NFIT_DIMM_N_HPE2 "5008664b-b758-41a0-a03c-27c2f2d04f7e" + #define ACPI_NFIT_MEM_FAILED_MASK (ACPI_NFIT_MEM_SAVE_FAILED \ | ACPI_NFIT_MEM_RESTORE_FAILED | ACPI_NFIT_MEM_FLUSH_FAILED \ | ACPI_NFIT_MEM_NOT_ARMED) enum nfit_uuids { + /* for simplicity alias the uuid index with the family id */ + NFIT_DEV_DIMM = NVDIMM_FAMILY_INTEL, + NFIT_DEV_DIMM_N_HPE1 = NVDIMM_FAMILY_HPE1, + NFIT_DEV_DIMM_N_HPE2 = NVDIMM_FAMILY_HPE2, NFIT_SPA_VOLATILE, NFIT_SPA_PM, NFIT_SPA_DCR, @@ -37,7 +49,6 @@ enum nfit_uuids { NFIT_SPA_PDISK, NFIT_SPA_PCD, NFIT_DEV_BUS, - NFIT_DEV_DIMM, NFIT_UUID_MAX, }; @@ -110,6 +121,7 @@ struct nfit_mem { struct list_head list; struct acpi_device *adev; unsigned long dsm_mask; + int family; }; struct acpi_nfit_desc { diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index cb2042a12b76..395a9fbbc69d 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -439,6 +439,12 @@ static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = { .out_num = 3, .out_sizes = { 4, 4, UINT_MAX, }, }, + [ND_CMD_CALL] = { + .in_num = 2, + .in_sizes = { sizeof(struct nd_cmd_pkg), UINT_MAX, }, + .out_num = 1, + .out_sizes = { UINT_MAX, }, + }, }; const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd) @@ -473,6 +479,12 @@ static const struct nd_cmd_desc __nd_cmd_bus_descs[] = { .out_num = 3, .out_sizes = { 4, 4, 8, }, }, + [ND_CMD_CALL] = { + .in_num = 2, + .in_sizes = { sizeof(struct nd_cmd_pkg), UINT_MAX, }, + .out_num = 1, + .out_sizes = { UINT_MAX, }, + }, }; const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd) @@ -500,6 +512,10 @@ u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd, struct nd_cmd_vendor_hdr *hdr = buf; return hdr->in_length; + } else if (cmd == ND_CMD_CALL) { + struct nd_cmd_pkg *pkg = buf; + + return pkg->nd_size_in; } return UINT_MAX; @@ -522,6 +538,12 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd, return out_field[1]; else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 2) return out_field[1] - 8; + else if (cmd == ND_CMD_CALL) { + struct nd_cmd_pkg *pkg = (struct nd_cmd_pkg *) in_field; + + return pkg->nd_size_out; + } + return UINT_MAX; } @@ -588,6 +610,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, unsigned int cmd = _IOC_NR(ioctl_cmd); void __user *p = (void __user *) arg; struct device *dev = &nvdimm_bus->dev; + struct nd_cmd_pkg pkg; const char *cmd_name, *dimm_name; unsigned long cmd_mask; void *buf; @@ -605,6 +628,11 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, dimm_name = "bus"; } + if (cmd == ND_CMD_CALL) { + if (copy_from_user(&pkg, p, sizeof(pkg))) + return -EFAULT; + } + if (!desc || (desc->out_num + desc->in_num == 0) || !test_bit(cmd, &cmd_mask)) return -ENOTTY; @@ -616,6 +644,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, case ND_CMD_SET_CONFIG_DATA: case ND_CMD_ARS_START: case ND_CMD_CLEAR_ERROR: + case ND_CMD_CALL: dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", nvdimm ? nvdimm_cmd_name(cmd) : nvdimm_bus_cmd_name(cmd)); @@ -643,6 +672,16 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, in_len += in_size; } + if (cmd == ND_CMD_CALL) { + dev_dbg(dev, "%s:%s, idx: %llu, in: %zu, out: %zu, len %zu\n", + __func__, dimm_name, pkg.nd_command, + in_len, out_len, buf_len); + + for (i = 0; i < ARRAY_SIZE(pkg.nd_reserved2); i++) + if (pkg.nd_reserved2[i]) + return -EINVAL; + } + /* process an output envelope */ for (i = 0; i < desc->out_num; i++) { u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 7cc28ab05b87..45daa0be5ff9 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -125,6 +125,7 @@ enum { ND_CMD_VENDOR_EFFECT_LOG_SIZE = 7, ND_CMD_VENDOR_EFFECT_LOG = 8, ND_CMD_VENDOR = 9, + ND_CMD_CALL = 10, }; enum { @@ -158,6 +159,7 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) [ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size", [ND_CMD_VENDOR_EFFECT_LOG] = "effect_log", [ND_CMD_VENDOR] = "vendor", + [ND_CMD_CALL] = "cmd_call", }; if (cmd < ARRAY_SIZE(names) && names[cmd]) @@ -224,4 +226,44 @@ enum ars_masks { ARS_STATUS_MASK = 0x0000FFFF, ARS_EXT_STATUS_SHIFT = 16, }; + +/* + * struct nd_cmd_pkg + * + * is a wrapper to a quasi pass thru interface for invoking firmware + * associated with nvdimms. + * + * INPUT PARAMETERS + * + * nd_family corresponds to the firmware (e.g. DSM) interface. + * + * nd_command are the function index advertised by the firmware. + * + * nd_size_in is the size of the input parameters being passed to firmware + * + * OUTPUT PARAMETERS + * + * nd_fw_size is the size of the data firmware wants to return for + * the call. If nd_fw_size is greater than size of nd_size_out, only + * the first nd_size_out bytes are returned. + */ + +struct nd_cmd_pkg { + __u64 nd_family; /* family of commands */ + __u64 nd_command; + __u32 nd_size_in; /* INPUT: size of input args */ + __u32 nd_size_out; /* INPUT: size of payload */ + __u32 nd_reserved2[9]; /* reserved must be zero */ + __u32 nd_fw_size; /* OUTPUT: size fw wants to return */ + unsigned char nd_payload[]; /* Contents of call */ +}; + +/* These NVDIMM families represent pre-standardization command sets */ +#define NVDIMM_FAMILY_INTEL 0 +#define NVDIMM_FAMILY_HPE1 1 +#define NVDIMM_FAMILY_HPE2 2 + +#define ND_IOCTL_CALL _IOWR(ND_IOCTL, ND_CMD_CALL,\ + struct nd_cmd_pkg) + #endif /* __NDCTL_H__ */ -- cgit v1.2.3-59-g8ed1b From cd03412a51ac4cb3001a8cdfae4560c9602f3387 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 11 Mar 2016 10:15:36 -0800 Subject: libnvdimm, dax: introduce device-dax infrastructure Device DAX is the device-centric analogue of Filesystem DAX (CONFIG_FS_DAX). It allows persistent memory ranges to be allocated and mapped without need of an intervening file system. This initial infrastructure arranges for a libnvdimm pfn-device to be represented as a different device-type so that it can be attached to a driver other than the pmem driver. Signed-off-by: Dan Williams --- drivers/nvdimm/Kconfig | 13 ++++++ drivers/nvdimm/Makefile | 1 + drivers/nvdimm/bus.c | 4 ++ drivers/nvdimm/claim.c | 2 + drivers/nvdimm/dax_devs.c | 99 +++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/namespace_devs.c | 19 +++++++- drivers/nvdimm/nd-core.h | 1 + drivers/nvdimm/nd.h | 25 ++++++++++ drivers/nvdimm/pfn_devs.c | 100 +++++++++++++++++++++++++++------------- drivers/nvdimm/region.c | 2 + drivers/nvdimm/region_devs.c | 29 ++++++++++++ include/uapi/linux/ndctl.h | 2 + tools/testing/nvdimm/Kbuild | 1 + 13 files changed, 264 insertions(+), 34 deletions(-) create mode 100644 drivers/nvdimm/dax_devs.c (limited to 'include') diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 53c11621d5b1..7c8a3bf07884 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -88,4 +88,17 @@ config NVDIMM_PFN Select Y if unsure +config NVDIMM_DAX + bool "NVDIMM DAX: Raw access to persistent memory" + default LIBNVDIMM + depends on NVDIMM_PFN + help + Support raw device dax access to a persistent memory + namespace. For environments that want to hard partition + peristent memory, this capability provides a mechanism to + sub-divide a namespace into character devices that can only be + accessed via DAX (mmap(2)). + + Select Y if unsure + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index ea84d3c4e8e5..909554c3f955 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -23,3 +23,4 @@ libnvdimm-y += label.o libnvdimm-$(CONFIG_ND_CLAIM) += claim.o libnvdimm-$(CONFIG_BTT) += btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o +libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 19f822d7f652..97589e3cb852 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -40,6 +40,8 @@ static int to_nd_device_type(struct device *dev) return ND_DEVICE_REGION_PMEM; else if (is_nd_blk(dev)) return ND_DEVICE_REGION_BLK; + else if (is_nd_dax(dev)) + return ND_DEVICE_DAX_PMEM; else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent)) return nd_region_to_nstype(to_nd_region(dev->parent)); @@ -246,6 +248,8 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie) void __nd_device_register(struct device *dev) { + if (!dev) + return; dev->bus = &nvdimm_bus_type; get_device(dev); async_schedule_domain(nd_async_device_register, dev, diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c index 6bbd0a36994a..5f53db59a058 100644 --- a/drivers/nvdimm/claim.c +++ b/drivers/nvdimm/claim.c @@ -85,6 +85,8 @@ static bool is_idle(struct device *dev, struct nd_namespace_common *ndns) seed = nd_region->btt_seed; else if (is_nd_pfn(dev)) seed = nd_region->pfn_seed; + else if (is_nd_dax(dev)) + seed = nd_region->dax_seed; if (seed == dev || ndns || dev->driver) return false; diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c new file mode 100644 index 000000000000..f90f7549e7f4 --- /dev/null +++ b/drivers/nvdimm/dax_devs.c @@ -0,0 +1,99 @@ +/* + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include "nd-core.h" +#include "nd.h" + +static void nd_dax_release(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_dax *nd_dax = to_nd_dax(dev); + struct nd_pfn *nd_pfn = &nd_dax->nd_pfn; + + dev_dbg(dev, "%s\n", __func__); + nd_detach_ndns(dev, &nd_pfn->ndns); + ida_simple_remove(&nd_region->dax_ida, nd_pfn->id); + kfree(nd_pfn->uuid); + kfree(nd_dax); +} + +static struct device_type nd_dax_device_type = { + .name = "nd_dax", + .release = nd_dax_release, +}; + +bool is_nd_dax(struct device *dev) +{ + return dev ? dev->type == &nd_dax_device_type : false; +} +EXPORT_SYMBOL(is_nd_dax); + +struct nd_dax *to_nd_dax(struct device *dev) +{ + struct nd_dax *nd_dax = container_of(dev, struct nd_dax, nd_pfn.dev); + + WARN_ON(!is_nd_dax(dev)); + return nd_dax; +} +EXPORT_SYMBOL(to_nd_dax); + +static const struct attribute_group *nd_dax_attribute_groups[] = { + &nd_pfn_attribute_group, + &nd_device_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +static struct nd_dax *nd_dax_alloc(struct nd_region *nd_region) +{ + struct nd_pfn *nd_pfn; + struct nd_dax *nd_dax; + struct device *dev; + + nd_dax = kzalloc(sizeof(*nd_dax), GFP_KERNEL); + if (!nd_dax) + return NULL; + + nd_pfn = &nd_dax->nd_pfn; + nd_pfn->id = ida_simple_get(&nd_region->dax_ida, 0, 0, GFP_KERNEL); + if (nd_pfn->id < 0) { + kfree(nd_dax); + return NULL; + } + + dev = &nd_pfn->dev; + dev_set_name(dev, "dax%d.%d", nd_region->id, nd_pfn->id); + dev->groups = nd_dax_attribute_groups; + dev->type = &nd_dax_device_type; + dev->parent = &nd_region->dev; + + return nd_dax; +} + +struct device *nd_dax_create(struct nd_region *nd_region) +{ + struct device *dev = NULL; + struct nd_dax *nd_dax; + + if (!is_nd_pmem(&nd_region->dev)) + return NULL; + + nd_dax = nd_dax_alloc(nd_region); + if (nd_dax) + dev = nd_pfn_devinit(&nd_dax->nd_pfn, NULL); + __nd_device_register(dev); + return dev; +} diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index e5ad5162bf34..c5e3196c45b0 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1288,6 +1288,8 @@ static ssize_t mode_show(struct device *dev, mode = "safe"; else if (claim && is_nd_pfn(claim)) mode = "memory"; + else if (claim && is_nd_dax(claim)) + mode = "dax"; else if (!claim && pmem_should_map_pages(dev)) mode = "memory"; else @@ -1379,14 +1381,17 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) { struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL; + struct nd_dax *nd_dax = is_nd_dax(dev) ? to_nd_dax(dev) : NULL; struct nd_namespace_common *ndns = NULL; resource_size_t size; - if (nd_btt || nd_pfn) { + if (nd_btt || nd_pfn || nd_dax) { if (nd_btt) ndns = nd_btt->ndns; else if (nd_pfn) ndns = nd_pfn->ndns; + else if (nd_dax) + ndns = nd_dax->nd_pfn.ndns; if (!ndns) return ERR_PTR(-ENODEV); @@ -1779,6 +1784,18 @@ void nd_region_create_blk_seed(struct nd_region *nd_region) nd_device_register(nd_region->ns_seed); } +void nd_region_create_dax_seed(struct nd_region *nd_region) +{ + WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); + nd_region->dax_seed = nd_dax_create(nd_region); + /* + * Seed creation failures are not fatal, provisioning is simply + * disabled until memory becomes available + */ + if (!nd_region->dax_seed) + dev_err(&nd_region->dev, "failed to create dax namespace\n"); +} + void nd_region_create_pfn_seed(struct nd_region *nd_region) { WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev)); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index 1d1500f3d8b5..cb65308c0329 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -54,6 +54,7 @@ struct nd_region; void nd_region_create_blk_seed(struct nd_region *nd_region); void nd_region_create_btt_seed(struct nd_region *nd_region); void nd_region_create_pfn_seed(struct nd_region *nd_region); +void nd_region_create_dax_seed(struct nd_region *nd_region); void nd_region_disable(struct nvdimm_bus *nvdimm_bus, struct device *dev); int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus); void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus); diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 6c36509662e4..46910b8f32b1 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -101,10 +101,12 @@ struct nd_region { struct ida ns_ida; struct ida btt_ida; struct ida pfn_ida; + struct ida dax_ida; unsigned long flags; struct device *ns_seed; struct device *btt_seed; struct device *pfn_seed; + struct device *dax_seed; u16 ndr_mappings; u64 ndr_size; u64 ndr_start; @@ -161,6 +163,10 @@ struct nd_pfn { struct nd_namespace_common *ndns; }; +struct nd_dax { + struct nd_pfn nd_pfn; +}; + enum nd_async_mode { ND_SYNC, ND_ASYNC, @@ -224,7 +230,10 @@ struct nd_pfn *to_nd_pfn(struct device *dev); int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); +struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, + struct nd_namespace_common *ndns); int nd_pfn_validate(struct nd_pfn *nd_pfn); +extern struct attribute_group nd_pfn_attribute_group; #else static inline int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) @@ -248,6 +257,22 @@ static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) } #endif +struct nd_dax *to_nd_dax(struct device *dev); +#if IS_ENABLED(CONFIG_NVDIMM_DAX) +bool is_nd_dax(struct device *dev); +struct device *nd_dax_create(struct nd_region *nd_region); +#else +static inline bool is_nd_dax(struct device *dev) +{ + return false; +} + +static inline struct device *nd_dax_create(struct nd_region *nd_region) +{ + return NULL; +} +#endif + struct nd_region *to_nd_region(struct device *dev); int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index e8693fe65e49..6ade2eb7615d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -1,5 +1,5 @@ /* - * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -54,10 +54,29 @@ struct nd_pfn *to_nd_pfn(struct device *dev) } EXPORT_SYMBOL(to_nd_pfn); +static struct nd_pfn *to_nd_pfn_safe(struct device *dev) +{ + /* + * pfn device attributes are re-used by dax device instances, so we + * need to be careful to correct device-to-nd_pfn conversion. + */ + if (is_nd_pfn(dev)) + return to_nd_pfn(dev); + + if (is_nd_dax(dev)) { + struct nd_dax *nd_dax = to_nd_dax(dev); + + return &nd_dax->nd_pfn; + } + + WARN_ON(1); + return NULL; +} + static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); switch (nd_pfn->mode) { case PFN_MODE_RAM: @@ -72,7 +91,7 @@ static ssize_t mode_show(struct device *dev, static ssize_t mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc = 0; device_lock(dev); @@ -106,7 +125,7 @@ static DEVICE_ATTR_RW(mode); static ssize_t align_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); return sprintf(buf, "%lx\n", nd_pfn->align); } @@ -134,7 +153,7 @@ static ssize_t __align_store(struct nd_pfn *nd_pfn, const char *buf) static ssize_t align_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -152,7 +171,7 @@ static DEVICE_ATTR_RW(align); static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); if (nd_pfn->uuid) return sprintf(buf, "%pUb\n", nd_pfn->uuid); @@ -162,7 +181,7 @@ static ssize_t uuid_show(struct device *dev, static ssize_t uuid_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -178,7 +197,7 @@ static DEVICE_ATTR_RW(uuid); static ssize_t namespace_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; nvdimm_bus_lock(dev); @@ -191,7 +210,7 @@ static ssize_t namespace_show(struct device *dev, static ssize_t namespace_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -209,7 +228,7 @@ static DEVICE_ATTR_RW(namespace); static ssize_t resource_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -235,7 +254,7 @@ static DEVICE_ATTR_RO(resource); static ssize_t size_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct nd_pfn *nd_pfn = to_nd_pfn(dev); + struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); ssize_t rc; device_lock(dev); @@ -270,7 +289,7 @@ static struct attribute *nd_pfn_attributes[] = { NULL, }; -static struct attribute_group nd_pfn_attribute_group = { +struct attribute_group nd_pfn_attribute_group = { .attrs = nd_pfn_attributes, }; @@ -281,15 +300,31 @@ static const struct attribute_group *nd_pfn_attribute_groups[] = { NULL, }; -static struct device *__nd_pfn_create(struct nd_region *nd_region, +struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, struct nd_namespace_common *ndns) { - struct nd_pfn *nd_pfn; - struct device *dev; + struct device *dev = &nd_pfn->dev; - /* we can only create pages for contiguous ranged of pmem */ - if (!is_nd_pmem(&nd_region->dev)) + if (!nd_pfn) + return NULL; + + nd_pfn->mode = PFN_MODE_NONE; + nd_pfn->align = HPAGE_SIZE; + dev = &nd_pfn->dev; + device_initialize(&nd_pfn->dev); + if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { + dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", + __func__, dev_name(ndns->claim)); + put_device(dev); return NULL; + } + return dev; +} + +static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) +{ + struct nd_pfn *nd_pfn; + struct device *dev; nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); if (!nd_pfn) @@ -301,29 +336,27 @@ static struct device *__nd_pfn_create(struct nd_region *nd_region, return NULL; } - nd_pfn->mode = PFN_MODE_NONE; - nd_pfn->align = HPAGE_SIZE; dev = &nd_pfn->dev; dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); - dev->parent = &nd_region->dev; - dev->type = &nd_pfn_device_type; dev->groups = nd_pfn_attribute_groups; - device_initialize(&nd_pfn->dev); - if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { - dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", - __func__, dev_name(ndns->claim)); - put_device(dev); - return NULL; - } - return dev; + dev->type = &nd_pfn_device_type; + dev->parent = &nd_region->dev; + + return nd_pfn; } struct device *nd_pfn_create(struct nd_region *nd_region) { - struct device *dev = __nd_pfn_create(nd_region, NULL); + struct nd_pfn *nd_pfn; + struct device *dev; + + if (!is_nd_pmem(&nd_region->dev)) + return NULL; + + nd_pfn = nd_pfn_alloc(nd_region); + dev = nd_pfn_devinit(nd_pfn, NULL); - if (dev) - __nd_device_register(dev); + __nd_device_register(dev); return dev; } @@ -423,7 +456,8 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) return -ENODEV; nvdimm_bus_lock(&ndns->dev); - pfn_dev = __nd_pfn_create(nd_region, ndns); + nd_pfn = nd_pfn_alloc(nd_region); + pfn_dev = nd_pfn_devinit(nd_pfn, ndns); nvdimm_bus_unlock(&ndns->dev); if (!pfn_dev) return -ENOMEM; diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 4b7715e29cff..05a912359939 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -54,6 +54,7 @@ static int nd_region_probe(struct device *dev) nd_region->btt_seed = nd_btt_create(nd_region); nd_region->pfn_seed = nd_pfn_create(nd_region); + nd_region->dax_seed = nd_dax_create(nd_region); if (err == 0) return 0; @@ -86,6 +87,7 @@ static int nd_region_remove(struct device *dev) nd_region->ns_seed = NULL; nd_region->btt_seed = NULL; nd_region->pfn_seed = NULL; + nd_region->dax_seed = NULL; dev_set_drvdata(dev, NULL); nvdimm_bus_unlock(dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 139bf71ca549..9e1b054e0e61 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -306,6 +306,23 @@ static ssize_t pfn_seed_show(struct device *dev, } static DEVICE_ATTR_RO(pfn_seed); +static ssize_t dax_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->dax_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->dax_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(dax_seed); + static ssize_t read_only_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -335,6 +352,7 @@ static struct attribute *nd_region_attributes[] = { &dev_attr_mappings.attr, &dev_attr_btt_seed.attr, &dev_attr_pfn_seed.attr, + &dev_attr_dax_seed.attr, &dev_attr_read_only.attr, &dev_attr_set_cookie.attr, &dev_attr_available_size.attr, @@ -353,6 +371,9 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n) if (!is_nd_pmem(dev) && a == &dev_attr_pfn_seed.attr) return 0; + if (!is_nd_pmem(dev) && a == &dev_attr_dax_seed.attr) + return 0; + if (a != &dev_attr_set_cookie.attr && a != &dev_attr_available_size.attr) return a->mode; @@ -441,6 +462,13 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus, nd_region_create_pfn_seed(nd_region); nvdimm_bus_unlock(dev); } + if (is_nd_dax(dev) && probe) { + nd_region = to_nd_region(dev->parent); + nvdimm_bus_lock(dev); + if (nd_region->dax_seed == dev) + nd_region_create_dax_seed(nd_region); + nvdimm_bus_unlock(dev); + } } void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev) @@ -718,6 +746,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, ida_init(&nd_region->ns_ida); ida_init(&nd_region->btt_ida); ida_init(&nd_region->pfn_ida); + ida_init(&nd_region->dax_ida); dev = &nd_region->dev; dev_set_name(dev, "region%d", nd_region->id); dev->parent = &nvdimm_bus->dev; diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index 7cc28ab05b87..4f29d247f709 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -206,6 +206,7 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_DEVICE_NAMESPACE_IO 4 /* legacy persistent memory */ #define ND_DEVICE_NAMESPACE_PMEM 5 /* PMEM namespace (may alias with BLK) */ #define ND_DEVICE_NAMESPACE_BLK 6 /* BLK namespace (may alias with PMEM) */ +#define ND_DEVICE_DAX_PMEM 7 /* Device DAX interface to pmem */ enum nd_driver_flags { ND_DRIVER_DIMM = 1 << ND_DEVICE_DIMM, @@ -214,6 +215,7 @@ enum nd_driver_flags { ND_DRIVER_NAMESPACE_IO = 1 << ND_DEVICE_NAMESPACE_IO, ND_DRIVER_NAMESPACE_PMEM = 1 << ND_DEVICE_NAMESPACE_PMEM, ND_DRIVER_NAMESPACE_BLK = 1 << ND_DEVICE_NAMESPACE_BLK, + ND_DRIVER_DAX_PMEM = 1 << ND_DEVICE_DAX_PMEM, }; enum { diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index d5bc8c080b44..5ff6d3c126a9 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -50,6 +50,7 @@ libnvdimm-y += $(NVDIMM_SRC)/label.o libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o +libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o libnvdimm-y += config_check.o obj-m += test/ -- cgit v1.2.3-59-g8ed1b From acc93d30d7d43f428272c20a047389c4cbca82ba Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 7 May 2016 11:40:28 -0700 Subject: Revert "block: enable dax for raw block devices" This reverts commit 5a023cdba50c5f5f2bc351783b3131699deb3937. The functionality is superseded by the new "Device DAX" facility. Cc: Jeff Moyer Cc: Christoph Hellwig Cc: Dave Chinner Cc: Andrew Morton Cc: Ross Zwisler Cc: Jan Kara Signed-off-by: Dan Williams --- block/ioctl.c | 32 ----------------- fs/block_dev.c | 96 +++++++++++++++---------------------------------- include/linux/fs.h | 8 ----- include/uapi/linux/fs.h | 1 - 4 files changed, 29 insertions(+), 108 deletions(-) (limited to 'include') diff --git a/block/ioctl.c b/block/ioctl.c index 4ff1f92f89ca..698c7933d582 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -407,35 +407,6 @@ static inline int is_unrecognized_ioctl(int ret) ret == -ENOIOCTLCMD; } -#ifdef CONFIG_FS_DAX -bool blkdev_dax_capable(struct block_device *bdev) -{ - struct gendisk *disk = bdev->bd_disk; - - if (!disk->fops->direct_access) - return false; - - /* - * If the partition is not aligned on a page boundary, we can't - * do dax I/O to it. - */ - if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) - || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) - return false; - - /* - * If the device has known bad blocks, force all I/O through the - * driver / page cache. - * - * TODO: support finer grained dax error handling - */ - if (disk->bb && disk->bb->count) - return false; - - return true; -} -#endif - static int blkdev_flushbuf(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { @@ -598,9 +569,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKTRACESETUP: case BLKTRACETEARDOWN: return blk_trace_ioctl(bdev, cmd, argp); - case BLKDAXGET: - return put_int(arg, !!(bdev->bd_inode->i_flags & S_DAX)); - break; case IOC_PR_REGISTER: return blkdev_pr_register(bdev, argp); case IOC_PR_RESERVE: diff --git a/fs/block_dev.c b/fs/block_dev.c index 20a2c02b77c4..36ee10ca503e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include "internal.h" @@ -1159,6 +1160,33 @@ void bd_set_size(struct block_device *bdev, loff_t size) } EXPORT_SYMBOL(bd_set_size); +static bool blkdev_dax_capable(struct block_device *bdev) +{ + struct gendisk *disk = bdev->bd_disk; + + if (!disk->fops->direct_access || !IS_ENABLED(CONFIG_FS_DAX)) + return false; + + /* + * If the partition is not aligned on a page boundary, we can't + * do dax I/O to it. + */ + if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) + || (bdev->bd_part->nr_sects % (PAGE_SIZE / 512))) + return false; + + /* + * If the device has known bad blocks, force all I/O through the + * driver / page cache. + * + * TODO: support finer grained dax error handling + */ + if (disk->bb && disk->bb->count) + return false; + + return true; +} + static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); /* @@ -1724,79 +1752,13 @@ static const struct address_space_operations def_blk_aops = { .is_dirty_writeback = buffer_check_dirty_writeback, }; -#ifdef CONFIG_FS_DAX -/* - * In the raw block case we do not need to contend with truncation nor - * unwritten file extents. Without those concerns there is no need for - * additional locking beyond the mmap_sem context that these routines - * are already executing under. - * - * Note, there is no protection if the block device is dynamically - * resized (partition grow/shrink) during a fault. A stable block device - * size is already not enforced in the blkdev_direct_IO path. - * - * For DAX, it is the responsibility of the block device driver to - * ensure the whole-disk device size is stable while requests are in - * flight. - * - * Finally, unlike the filemap_page_mkwrite() case there is no - * filesystem superblock to sync against freezing. We still include a - * pfn_mkwrite callback for dax drivers to receive write fault - * notifications. - */ -static int blkdev_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - return __dax_fault(vma, vmf, blkdev_get_block, NULL); -} - -static int blkdev_dax_pfn_mkwrite(struct vm_area_struct *vma, - struct vm_fault *vmf) -{ - return dax_pfn_mkwrite(vma, vmf); -} - -static int blkdev_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr, - pmd_t *pmd, unsigned int flags) -{ - return __dax_pmd_fault(vma, addr, pmd, flags, blkdev_get_block, NULL); -} - -static const struct vm_operations_struct blkdev_dax_vm_ops = { - .fault = blkdev_dax_fault, - .pmd_fault = blkdev_dax_pmd_fault, - .pfn_mkwrite = blkdev_dax_pfn_mkwrite, -}; - -static const struct vm_operations_struct blkdev_default_vm_ops = { - .fault = filemap_fault, - .map_pages = filemap_map_pages, -}; - -static int blkdev_mmap(struct file *file, struct vm_area_struct *vma) -{ - struct inode *bd_inode = bdev_file_inode(file); - - file_accessed(file); - if (IS_DAX(bd_inode)) { - vma->vm_ops = &blkdev_dax_vm_ops; - vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE; - } else { - vma->vm_ops = &blkdev_default_vm_ops; - } - - return 0; -} -#else -#define blkdev_mmap generic_file_mmap -#endif - const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, .llseek = block_llseek, .read_iter = blkdev_read_iter, .write_iter = blkdev_write_iter, - .mmap = blkdev_mmap, + .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, #ifdef CONFIG_COMPAT diff --git a/include/linux/fs.h b/include/linux/fs.h index 70e61b58baaf..8363a10660f6 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2320,14 +2320,6 @@ extern struct super_block *freeze_bdev(struct block_device *); extern void emergency_thaw_all(void); extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); extern int fsync_bdev(struct block_device *); -#ifdef CONFIG_FS_DAX -extern bool blkdev_dax_capable(struct block_device *bdev); -#else -static inline bool blkdev_dax_capable(struct block_device *bdev) -{ - return false; -} -#endif extern struct super_block *blockdev_superblock; diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index a079d50376e1..fbff8b28aa35 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -222,7 +222,6 @@ struct fsxattr { #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) -#define BLKDAXGET _IO(0x12,129) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ -- cgit v1.2.3-59-g8ed1b