From be26f9ae022ad09967be7a83c58ce605014e939a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 1 Feb 2016 17:48:42 -0800 Subject: nfit, tools/testing/nvdimm: add format interface code definitions ACPI 6.1 and JEDEC Annex L Release 3 formalize the format interface code. Add definitions and update their usage in the unit test. Signed-off-by: Dan Williams --- drivers/acpi/nfit.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers') diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 3d549a383659..6689b0aaf194 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -40,6 +40,12 @@ enum nfit_uuids { NFIT_UUID_MAX, }; +enum nfit_fic { + NFIT_FIC_BYTE = 0x101, /* byte-addressable energy backed */ + NFIT_FIC_BLK = 0x201, /* block-addressable non-energy backed */ + NFIT_FIC_BYTEN = 0x301, /* byte-addressable non-energy backed */ +}; + enum { ND_BLK_READ_FLUSH = 1, ND_BLK_DCR_LATCH = 2, -- cgit v1.2.3-59-g8ed1b From aef25338226660cdd4df908c2eff1abdcfca65e5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 12 Feb 2016 17:01:11 -0800 Subject: libnvdimm, nfit: centralize command status translation The return value from an 'ndctl_fn' reports the command execution status, i.e. was the command properly formatted and was it successfully submitted to the bus provider. The new 'cmd_rc' parameter allows the bus provider to communicate command specific results, translated into common error codes. Convert the ARS commands to this scheme to: 1/ Consolidate status reporting 2/ Prepare for for expanding ars unit test cases 3/ Make the implementation more generic Cc: Vishal Verma Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 179 ++++++++++++++++++++++++++------------- drivers/acpi/nfit.h | 11 ++- drivers/nvdimm/bus.c | 2 +- drivers/nvdimm/dimm_devs.c | 6 +- include/linux/libnvdimm.h | 2 +- tools/testing/nvdimm/test/nfit.c | 5 +- 6 files changed, 138 insertions(+), 67 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 35947ac87644..4dd2b6808df5 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -72,9 +72,80 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) return to_acpi_device(acpi_desc->dev); } +static int xlat_status(void *buf, unsigned int cmd) +{ + struct nd_cmd_ars_status *ars_status; + struct nd_cmd_ars_start *ars_start; + struct nd_cmd_ars_cap *ars_cap; + u16 flags; + + switch (cmd) { + case ND_CMD_ARS_CAP: + ars_cap = buf; + if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE) + return -ENOTTY; + + /* Command failed */ + if (ars_cap->status & 0xffff) + return -EIO; + + /* No supported scan types for this range */ + flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE; + if ((ars_cap->status >> 16 & flags) == 0) + return -ENOTTY; + break; + case ND_CMD_ARS_START: + ars_start = buf; + /* ARS is in progress */ + if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY) + return -EBUSY; + + /* Command failed */ + if (ars_start->status & 0xffff) + return -EIO; + break; + case ND_CMD_ARS_STATUS: + ars_status = buf; + /* Command failed */ + if (ars_status->status & 0xffff) + return -EIO; + /* Check extended status (Upper two bytes) */ + if (ars_status->status == NFIT_ARS_STATUS_DONE) + return 0; + + /* ARS is in progress */ + if (ars_status->status == NFIT_ARS_STATUS_BUSY) + return -EBUSY; + + /* No ARS performed for the current boot */ + if (ars_status->status == NFIT_ARS_STATUS_NONE) + return -EAGAIN; + + /* + * ARS interrupted, either we overflowed or some other + * agent wants the scan to stop. If we didn't overflow + * then just continue with the returned results. + */ + if (ars_status->status == NFIT_ARS_STATUS_INTR) { + if (ars_status->flags & NFIT_ARS_F_OVERFLOW) + return -ENOSPC; + return 0; + } + + /* Unknown status */ + if (ars_status->status >> 16) + return -EIO; + break; + default: + break; + } + + return 0; +} + static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, - unsigned int buf_len) + unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); const struct nd_cmd_desc *desc = NULL; @@ -185,6 +256,8 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, * unfilled in the output buffer */ rc = buf_len - offset - in_buf.buffer.length; + if (cmd_rc) + *cmd_rc = xlat_status(buf, cmd); } else { dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n", __func__, dimm_name, cmd_name, buf_len, @@ -1105,7 +1178,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, writeq(cmd, mmio->addr.base + offset); wmb_blk(nfit_blk); - if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) + if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH) readq(mmio->addr.base + offset); } @@ -1141,7 +1214,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, memcpy_to_pmem(mmio->addr.aperture + offset, iobuf + copied, c); else { - if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH) + if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH) mmio_flush_range((void __force *) mmio->addr.aperture + offset, c); @@ -1328,13 +1401,13 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc, memset(&flags, 0, sizeof(flags)); rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags, - sizeof(flags)); + sizeof(flags), NULL); if (rc >= 0 && flags.status == 0) nfit_blk->dimm_flags = flags.flags; else if (rc == -ENOTTY) { /* fall back to a conservative default */ - nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH; + nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH; rc = 0; } else rc = -ENXIO; @@ -1473,19 +1546,27 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus, /* devm will free nfit_blk */ } -static int ars_get_cap(struct nvdimm_bus_descriptor *nd_desc, +static int ars_get_cap(struct acpi_nfit_desc *acpi_desc, struct nd_cmd_ars_cap *cmd, u64 addr, u64 length) { + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + int cmd_rc, rc; + cmd->address = addr; cmd->length = length; - - return nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd, - sizeof(*cmd)); + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd, + sizeof(*cmd), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + return 0; } static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc, struct nd_cmd_ars_start *cmd, u64 addr, u64 length) { + int cmd_rc; int rc; cmd->address = addr; @@ -1494,52 +1575,49 @@ static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc, while (1) { rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, cmd, - sizeof(*cmd)); - if (rc) + sizeof(*cmd), &cmd_rc); + + if (rc < 0) return rc; - switch (cmd->status) { - case 0: - return 0; - case 1: - /* ARS unsupported, but we should never get here */ - return 0; - case 6: + + if (cmd_rc == -EBUSY) { /* ARS is in progress */ msleep(1000); - break; - default: - return -ENXIO; + continue; } + + if (cmd_rc < 0) + return cmd_rc; + + return 0; } } static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc, struct nd_cmd_ars_status *cmd, u32 size) { - int rc; + int rc, cmd_rc; while (1) { rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd, - size); - if (rc || cmd->status & 0xffff) - return -ENXIO; + size, &cmd_rc); + if (rc < 0) + return rc; - /* Check extended status (Upper two bytes) */ - switch (cmd->status >> 16) { - case 0: - return 0; - case 1: - /* ARS is in progress */ + /* FIXME make async and have a timeout */ + if (cmd_rc == -EBUSY) { msleep(1000); - break; - case 2: - /* No ARS performed for the current boot */ + continue; + } + + if (cmd_rc == -EAGAIN || cmd_rc == 0) return 0; - case 3: - /* TODO: error list overflow support */ - default: + + /* TODO: error list overflow support */ + if (cmd_rc == -ENOSPC) return -ENXIO; - } + + return cmd_rc; } } @@ -1590,28 +1668,11 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc, start = ndr_desc->res->start; len = ndr_desc->res->end - ndr_desc->res->start + 1; - /* - * If ARS is unimplemented, unsupported, or if the 'Persistent Memory - * Scrub' flag in extended status is not set, skip this but continue - * initialization - */ - rc = ars_get_cap(nd_desc, ars_cap, start, len); + rc = ars_get_cap(acpi_desc, ars_cap, start, len); if (rc == -ENOTTY) { - dev_dbg(acpi_desc->dev, - "Address Range Scrub is not implemented, won't create an error list\n"); rc = 0; goto out; } - if (rc) - goto out; - - if ((ars_cap->status & 0xffff) || - !(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) { - dev_warn(acpi_desc->dev, - "ARS unsupported (status: 0x%x), won't create an error list\n", - ars_cap->status); - goto out; - } /* * Check if a full-range ARS has been run. If so, use those results @@ -1651,15 +1712,15 @@ static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc, u64 done, end; rc = ars_do_start(nd_desc, ars_start, cur, remaining); - if (rc) + if (rc < 0) goto out; rc = ars_get_status(nd_desc, ars_status, ars_status_size); - if (rc) + if (rc < 0) goto out; rc = ars_status_process_records(nvdimm_bus, ars_status, cur); - if (rc) + if (rc < 0) goto out; end = min(cur + remaining, diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 6689b0aaf194..f45b7d9b1d9e 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -47,8 +47,15 @@ enum nfit_fic { }; enum { - ND_BLK_READ_FLUSH = 1, - ND_BLK_DCR_LATCH = 2, + NFIT_BLK_READ_FLUSH = 1, + NFIT_BLK_DCR_LATCH = 2, + NFIT_ARS_STATUS_DONE = 0, + NFIT_ARS_STATUS_BUSY = 1 << 16, + NFIT_ARS_STATUS_NONE = 2 << 16, + NFIT_ARS_STATUS_INTR = 3 << 16, + NFIT_ARS_START_BUSY = 6, + NFIT_ARS_CAP_NONE = 1, + NFIT_ARS_F_OVERFLOW = 1, }; struct nfit_spa { diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 5d28e9405f32..c3ba888e3e3a 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -587,7 +587,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, if (rc) goto out_unlock; - rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len); + rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL); if (rc < 0) goto out_unlock; if (copy_to_user(p, buf, buf_len)) diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 651b8d19d324..c56f88217924 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -75,7 +75,7 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) memset(cmd, 0, sizeof(*cmd)); nd_desc = nvdimm_bus->nd_desc; return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd)); + ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), NULL); } int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) @@ -120,7 +120,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) cmd->in_offset = offset; rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), ND_CMD_GET_CONFIG_DATA, cmd, - cmd->in_length + sizeof(*cmd)); + cmd->in_length + sizeof(*cmd), NULL); if (rc || cmd->status) { rc = -ENXIO; break; @@ -171,7 +171,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, status = ((void *) cmd) + cmd_size - sizeof(u32); rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_SET_CONFIG_DATA, cmd, cmd_size); + ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL); if (rc || *status) { rc = rc ? rc : -ENXIO; break; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 141ffdd59960..f398953270d1 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -48,7 +48,7 @@ struct nvdimm; struct nvdimm_bus_descriptor; typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, - unsigned int buf_len); + unsigned int buf_len, int *cmd_rc); struct nd_namespace_label; struct nvdimm_drvdata; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 27b808e0489a..9ead77970546 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -261,7 +261,7 @@ static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd, static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, - unsigned int buf_len) + unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); @@ -315,6 +315,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, } } + /* TODO: error status tests */ + if (cmd_rc) + *cmd_rc = 0; return rc; } -- cgit v1.2.3-59-g8ed1b From 5faecf4eb0d7d67e809a4bc9059c764c27670832 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 17 Feb 2016 15:25:36 -0800 Subject: libnvdimm: protect nvdimm_{bus|namespace}_add_poison() with nvdimm_bus_lock() In preparation for making poison list retrieval asynchronus to region registration, add protection for walking and mutating the bus-level poison list. Cc: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/core.c | 101 +++++++++++++++++++++++++++++++------------------- 1 file changed, 63 insertions(+), 38 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 2e2832b83c93..f309e6bf6833 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -408,33 +408,11 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len) set_badblock(bb, start_sector, num_sectors); } -/** - * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks - * @ndns: the namespace containing poison ranges - * @bb: badblocks instance to populate - * @offset: offset at the start of the namespace before 'sector 0' - * - * The poison list generated during NFIT initialization may contain multiple, - * possibly overlapping ranges in the SPA (System Physical Address) space. - * Compare each of these ranges to the namespace currently being initialized, - * and add badblocks to the gendisk for all matching sub-ranges - */ -void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, - struct badblocks *bb, resource_size_t offset) +static void namespace_add_poison(struct list_head *poison_list, + struct badblocks *bb, struct resource *res) { - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - struct nd_region *nd_region = to_nd_region(ndns->dev.parent); - struct nvdimm_bus *nvdimm_bus; - struct list_head *poison_list; - u64 ns_start, ns_end, ns_size; struct nd_poison *pl; - ns_size = nvdimm_namespace_capacity(ndns) - offset; - ns_start = nsio->res.start + offset; - ns_end = nsio->res.end; - - nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent); - poison_list = &nvdimm_bus->poison_list; if (list_empty(poison_list)) return; @@ -442,37 +420,69 @@ void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, u64 pl_end = pl->start + pl->length - 1; /* Discard intervals with no intersection */ - if (pl_end < ns_start) + if (pl_end < res->start) continue; - if (pl->start > ns_end) + if (pl->start > res->end) continue; /* Deal with any overlap after start of the namespace */ - if (pl->start >= ns_start) { + if (pl->start >= res->start) { u64 start = pl->start; u64 len; - if (pl_end <= ns_end) + if (pl_end <= res->end) len = pl->length; else - len = ns_start + ns_size - pl->start; - __add_badblock_range(bb, start - ns_start, len); + len = res->start + resource_size(res) + - pl->start; + __add_badblock_range(bb, start - res->start, len); continue; } /* Deal with overlap for poison starting before the namespace */ - if (pl->start < ns_start) { + if (pl->start < res->start) { u64 len; - if (pl_end < ns_end) - len = pl->start + pl->length - ns_start; + if (pl_end < res->end) + len = pl->start + pl->length - res->start; else - len = ns_size; + len = resource_size(res); __add_badblock_range(bb, 0, len); } } } + +/** + * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks + * @ndns: the namespace containing poison ranges + * @bb: badblocks instance to populate + * @offset: offset at the start of the namespace before 'sector 0' + * + * The poison list generated during NFIT initialization may contain multiple, + * possibly overlapping ranges in the SPA (System Physical Address) space. + * Compare each of these ranges to the namespace currently being initialized, + * and add badblocks to the gendisk for all matching sub-ranges + */ +void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, + struct badblocks *bb, resource_size_t offset) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + struct nvdimm_bus *nvdimm_bus; + struct list_head *poison_list; + struct resource res = { + .start = nsio->res.start + offset, + .end = nsio->res.end, + }; + + nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent); + poison_list = &nvdimm_bus->poison_list; + + nvdimm_bus_lock(&nvdimm_bus->dev); + namespace_add_poison(poison_list, bb, &res); + nvdimm_bus_unlock(&nvdimm_bus->dev); +} EXPORT_SYMBOL_GPL(nvdimm_namespace_add_poison); -static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) { struct nd_poison *pl; @@ -487,12 +497,12 @@ static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) return 0; } -int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) { struct nd_poison *pl; if (list_empty(&nvdimm_bus->poison_list)) - return __add_poison(nvdimm_bus, addr, length); + return add_poison(nvdimm_bus, addr, length); /* * There is a chance this is a duplicate, check for those first. @@ -512,7 +522,18 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) * as any overlapping ranges will get resolved when the list is consumed * and converted to badblocks */ - return __add_poison(nvdimm_bus, addr, length); + return add_poison(nvdimm_bus, addr, length); +} + +int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +{ + int rc; + + nvdimm_bus_lock(&nvdimm_bus->dev); + rc = bus_add_poison(nvdimm_bus, addr, length); + nvdimm_bus_unlock(&nvdimm_bus->dev); + + return rc; } EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); @@ -553,7 +574,11 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) nd_synchronize(); device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); + + nvdimm_bus_lock(&nvdimm_bus->dev); free_poison_list(&nvdimm_bus->poison_list); + nvdimm_bus_unlock(&nvdimm_bus->dev); + nvdimm_bus_destroy_ndctl(nvdimm_bus); device_unregister(&nvdimm_bus->dev); -- cgit v1.2.3-59-g8ed1b From 719994660c249a086a7493205c7f1562e30c38cb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 18 Feb 2016 10:29:49 -0800 Subject: libnvdimm: async notification support In preparation for asynchronous address range scrub support add an ability for the pmem driver to dynamically consume address range scrub results. Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 26 ++++++++++++++++++++++++++ drivers/nvdimm/nd.h | 2 ++ drivers/nvdimm/pmem.c | 15 +++++++++++++++ drivers/nvdimm/region.c | 12 ++++++++++++ include/linux/nd.h | 7 +++++++ 5 files changed, 62 insertions(+) (limited to 'drivers') diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index c3ba888e3e3a..2508251439e7 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -133,6 +133,32 @@ static int nvdimm_bus_remove(struct device *dev) return rc; } +void nd_device_notify(struct device *dev, enum nvdimm_event event) +{ + device_lock(dev); + if (dev->driver) { + struct nd_device_driver *nd_drv; + + nd_drv = to_nd_device_driver(dev->driver); + if (nd_drv->notify) + nd_drv->notify(dev, event); + } + device_unlock(dev); +} +EXPORT_SYMBOL(nd_device_notify); + +void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + + if (!nvdimm_bus) + return; + + /* caller is responsible for holding a reference on the device */ + nd_device_notify(&nd_region->dev, event); +} +EXPORT_SYMBOL_GPL(nvdimm_region_notify); + static struct bus_type nvdimm_bus_type = { .name = "nd", .uevent = nvdimm_bus_uevent, diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index ba1633b9da31..78b82f6dd191 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "label.h" enum { @@ -168,6 +169,7 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size); void wait_nvdimm_bus_probe_idle(struct device *dev); void nd_device_register(struct device *dev); void nd_device_unregister(struct device *dev, enum nd_async_mode mode); +void nd_device_notify(struct device *dev, enum nvdimm_event event); int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, size_t len); ssize_t nd_sector_size_show(unsigned long current_lbasize, diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 8d0b54670184..efc2a5e671c6 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -488,12 +488,27 @@ static int nd_pmem_remove(struct device *dev) return 0; } +static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +{ + struct pmem_device *pmem = dev_get_drvdata(dev); + struct nd_namespace_common *ndns = pmem->ndns; + + if (event != NVDIMM_REVALIDATE_POISON) + return; + + if (is_nd_btt(dev)) + nvdimm_namespace_add_poison(ndns, &pmem->bb, 0); + else + nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset); +} + MODULE_ALIAS("pmem"); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); static struct nd_device_driver nd_pmem_driver = { .probe = nd_pmem_probe, .remove = nd_pmem_remove, + .notify = nd_pmem_notify, .drv = { .name = "nd_pmem", }, diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 7da63eac78ee..4b7715e29cff 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -93,9 +93,21 @@ static int nd_region_remove(struct device *dev) return 0; } +static int child_notify(struct device *dev, void *data) +{ + nd_device_notify(dev, *(enum nvdimm_event *) data); + return 0; +} + +static void nd_region_notify(struct device *dev, enum nvdimm_event event) +{ + device_for_each_child(dev, &event, child_notify); +} + static struct nd_device_driver nd_region_driver = { .probe = nd_region_probe, .remove = nd_region_remove, + .notify = nd_region_notify, .drv = { .name = "nd_region", }, diff --git a/include/linux/nd.h b/include/linux/nd.h index 507e47c86737..5489ab756d1a 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -16,11 +16,16 @@ #include #include +enum nvdimm_event { + NVDIMM_REVALIDATE_POISON, +}; + struct nd_device_driver { struct device_driver drv; unsigned long type; int (*probe)(struct device *dev); int (*remove)(struct device *dev); + void (*notify)(struct device *dev, enum nvdimm_event event); }; static inline struct nd_device_driver *to_nd_device_driver( @@ -144,6 +149,8 @@ static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns, MODULE_ALIAS("nd:t" __stringify(type) "*") #define ND_DEVICE_MODALIAS_FMT "nd:t%d" +struct nd_region; +void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event); int __must_check __nd_driver_register(struct nd_device_driver *nd_drv, struct module *module, const char *mod_name); #define nd_driver_register(driver) \ -- cgit v1.2.3-59-g8ed1b From a61fe6f7902ecaa89d5e6c709490fc4324927134 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 19 Feb 2016 12:29:32 -0800 Subject: nfit, tools/testing/nvdimm: unify common init for acpi_nfit_desc The nvdimm unit test infrastructure performs its own initialization of an acpi_nfit_desc to specify test overrides over the native implementation. Make it clear which attributes and operations it is overriding by re-using acpi_nfit_init_desc() as a common starting point. Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 44 +++++++++++++++------------------------- drivers/acpi/nfit.h | 2 +- tools/testing/nvdimm/test/nfit.c | 22 +++----------------- 3 files changed, 20 insertions(+), 48 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 4dd2b6808df5..76c9444c9c60 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -748,12 +748,11 @@ static struct attribute_group acpi_nfit_attribute_group = { .attrs = acpi_nfit_attributes, }; -const struct attribute_group *acpi_nfit_attribute_groups[] = { +static const struct attribute_group *acpi_nfit_attribute_groups[] = { &nvdimm_bus_attribute_group, &acpi_nfit_attribute_group, NULL, }; -EXPORT_SYMBOL_GPL(acpi_nfit_attribute_groups); static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev) { @@ -1962,15 +1961,9 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) } EXPORT_SYMBOL_GPL(acpi_nfit_init); -static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) +void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) { struct nvdimm_bus_descriptor *nd_desc; - struct acpi_nfit_desc *acpi_desc; - struct device *dev = &adev->dev; - - acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); - if (!acpi_desc) - return ERR_PTR(-ENOMEM); dev_set_drvdata(dev, acpi_desc); acpi_desc->dev = dev; @@ -1980,12 +1973,6 @@ static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) nd_desc->ndctl = acpi_nfit_ctl; nd_desc->attr_groups = acpi_nfit_attribute_groups; - acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc); - if (!acpi_desc->nvdimm_bus) { - devm_kfree(dev, acpi_desc); - return ERR_PTR(-ENXIO); - } - INIT_LIST_HEAD(&acpi_desc->spa_maps); INIT_LIST_HEAD(&acpi_desc->spas); INIT_LIST_HEAD(&acpi_desc->dcrs); @@ -1996,9 +1983,8 @@ static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) INIT_LIST_HEAD(&acpi_desc->dimms); mutex_init(&acpi_desc->spa_map_mutex); mutex_init(&acpi_desc->init_mutex); - - return acpi_desc; } +EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); static int acpi_nfit_add(struct acpi_device *adev) { @@ -2017,12 +2003,13 @@ static int acpi_nfit_add(struct acpi_device *adev) return 0; } - acpi_desc = acpi_nfit_desc_init(adev); - if (IS_ERR(acpi_desc)) { - dev_err(dev, "%s: error initializing acpi_desc: %ld\n", - __func__, PTR_ERR(acpi_desc)); - return PTR_ERR(acpi_desc); - } + acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); + if (!acpi_desc) + return -ENOMEM; + acpi_nfit_desc_init(acpi_desc, &adev->dev); + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); + if (!acpi_desc->nvdimm_bus) + return -ENOMEM; /* * Save the acpi header for later and then skip it, @@ -2085,12 +2072,13 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event) } if (!acpi_desc) { - acpi_desc = acpi_nfit_desc_init(adev); - if (IS_ERR(acpi_desc)) { - dev_err(dev, "%s: error initializing acpi_desc: %ld\n", - __func__, PTR_ERR(acpi_desc)); + acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); + if (!acpi_desc) + goto out_unlock; + acpi_nfit_desc_init(acpi_desc, &adev->dev); + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); + if (!acpi_desc->nvdimm_bus) goto out_unlock; - } } /* Evaluate _FIT */ diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index f45b7d9b1d9e..524dec0a3adb 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -195,5 +195,5 @@ static inline struct acpi_nfit_desc *to_acpi_desc( const u8 *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz); -extern const struct attribute_group *acpi_nfit_attribute_groups[]; +void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev); #endif /* __NFIT_H__ */ diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 9ead77970546..a66842e61bbc 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -499,7 +499,6 @@ static int nfit_test1_alloc(struct nfit_test *t) static void nfit_test0_setup(struct nfit_test *t) { - struct nvdimm_bus_descriptor *nd_desc; struct acpi_nfit_desc *acpi_desc; struct acpi_nfit_memory_map *memdev; void *nfit_buf = t->nfit_buf; @@ -1165,8 +1164,6 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - nd_desc = &acpi_desc->nd_desc; - nd_desc->ndctl = nfit_test_ctl; } static void nfit_test1_setup(struct nfit_test *t) @@ -1176,7 +1173,6 @@ static void nfit_test1_setup(struct nfit_test *t) struct acpi_nfit_memory_map *memdev; struct acpi_nfit_control_region *dcr; struct acpi_nfit_system_address *spa; - struct nvdimm_bus_descriptor *nd_desc; struct acpi_nfit_desc *acpi_desc; offset = 0; @@ -1226,8 +1222,6 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); - nd_desc = &acpi_desc->nd_desc; - nd_desc->ndctl = nfit_test_ctl; } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -1310,26 +1304,16 @@ static int nfit_test_probe(struct platform_device *pdev) nfit_test->setup(nfit_test); acpi_desc = &nfit_test->acpi_desc; - acpi_desc->dev = &pdev->dev; + acpi_nfit_desc_init(acpi_desc, &pdev->dev); acpi_desc->nfit = nfit_test->nfit_buf; acpi_desc->blk_do_io = nfit_test_blk_do_io; nd_desc = &acpi_desc->nd_desc; - nd_desc->attr_groups = acpi_nfit_attribute_groups; + nd_desc->provider_name = NULL; + nd_desc->ndctl = nfit_test_ctl; acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc); if (!acpi_desc->nvdimm_bus) return -ENXIO; - INIT_LIST_HEAD(&acpi_desc->spa_maps); - INIT_LIST_HEAD(&acpi_desc->spas); - INIT_LIST_HEAD(&acpi_desc->dcrs); - INIT_LIST_HEAD(&acpi_desc->bdws); - INIT_LIST_HEAD(&acpi_desc->idts); - INIT_LIST_HEAD(&acpi_desc->flushes); - INIT_LIST_HEAD(&acpi_desc->memdevs); - INIT_LIST_HEAD(&acpi_desc->dimms); - mutex_init(&acpi_desc->spa_map_mutex); - mutex_init(&acpi_desc->init_mutex); - rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); if (rc) { nvdimm_bus_unregister(acpi_desc->nvdimm_bus); -- cgit v1.2.3-59-g8ed1b From 7ae0fa439faff000744b234d04cb470bfd83593b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 19 Feb 2016 12:16:34 -0800 Subject: nfit, libnvdimm: async region scrub workqueue Introduce a workqueue that will be used to run address range scrub asynchronously with the rest of nvdimm device probing. Userspace still wants notification when probing operations complete, so introduce a new callback to flush this workqueue when userspace is awaiting probe completion. Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++ drivers/acpi/nfit.h | 3 +++ drivers/nvdimm/core.c | 9 +++++++++ include/linux/libnvdimm.h | 1 + 4 files changed, 62 insertions(+) (limited to 'drivers') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 76c9444c9c60..4aa2bd54fc1d 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -34,6 +34,8 @@ static bool force_enable_dimms; module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); +static struct workqueue_struct *nfit_wq; + struct nfit_table_prev { struct list_head spas; struct list_head memdevs; @@ -1961,6 +1963,39 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) } EXPORT_SYMBOL_GPL(acpi_nfit_init); +struct acpi_nfit_flush_work { + struct work_struct work; + struct completion cmp; +}; + +static void flush_probe(struct work_struct *work) +{ + struct acpi_nfit_flush_work *flush; + + flush = container_of(work, typeof(*flush), work); + complete(&flush->cmp); +} + +static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct device *dev = acpi_desc->dev; + struct acpi_nfit_flush_work flush; + + /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ + device_lock(dev); + device_unlock(dev); + + /* + * Scrub work could take 10s of seconds, userspace may give up so we + * need to be interruptible while waiting. + */ + INIT_WORK_ONSTACK(&flush.work, flush_probe); + COMPLETION_INITIALIZER_ONSTACK(flush.cmp); + queue_work(nfit_wq, &flush.work); + return wait_for_completion_interruptible(&flush.cmp); +} + void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) { struct nvdimm_bus_descriptor *nd_desc; @@ -1971,6 +2006,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) nd_desc = &acpi_desc->nd_desc; nd_desc->provider_name = "ACPI.NFIT"; nd_desc->ndctl = acpi_nfit_ctl; + nd_desc->flush_probe = acpi_nfit_flush_probe; nd_desc->attr_groups = acpi_nfit_attribute_groups; INIT_LIST_HEAD(&acpi_desc->spa_maps); @@ -2048,6 +2084,8 @@ static int acpi_nfit_remove(struct acpi_device *adev) { struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); + acpi_desc->cancel = 1; + flush_workqueue(nfit_wq); nvdimm_bus_unregister(acpi_desc->nvdimm_bus); return 0; } @@ -2079,6 +2117,12 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event) acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); if (!acpi_desc->nvdimm_bus) goto out_unlock; + } else { + /* + * Finish previous registration before considering new + * regions. + */ + flush_workqueue(nfit_wq); } /* Evaluate _FIT */ @@ -2146,12 +2190,17 @@ static __init int nfit_init(void) acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]); acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); + nfit_wq = create_singlethread_workqueue("nfit"); + if (!nfit_wq) + return -ENOMEM; + return acpi_bus_register_driver(&acpi_nfit_driver); } static __exit void nfit_exit(void) { acpi_bus_unregister_driver(&acpi_nfit_driver); + destroy_workqueue(nfit_wq); } module_init(nfit_init); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 524dec0a3adb..e8388fee4cc6 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -14,6 +14,7 @@ */ #ifndef __NFIT_H__ #define __NFIT_H__ +#include #include #include #include @@ -123,6 +124,8 @@ struct acpi_nfit_desc { struct list_head idts; struct nvdimm_bus *nvdimm_bus; struct device *dev; + struct work_struct work; + unsigned int cancel:1; unsigned long dimm_dsm_force_en; unsigned long bus_dsm_force_en; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index f309e6bf6833..79646d0c3277 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -298,6 +298,15 @@ static int flush_regions_dimms(struct device *dev, void *data) static ssize_t wait_probe_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + int rc; + + if (nd_desc->flush_probe) { + rc = nd_desc->flush_probe(nd_desc); + if (rc) + return rc; + } nd_synchronize(); device_for_each_child(dev, NULL, flush_regions_dimms); return sprintf(buf, "1\n"); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index f398953270d1..2299f8742f7f 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -71,6 +71,7 @@ struct nvdimm_bus_descriptor { unsigned long dsm_mask; char *provider_name; ndctl_fn ndctl; + int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); }; struct nd_cmd_desc { -- cgit v1.2.3-59-g8ed1b From 1cf03c00e7c17d3cf13a267dac83b3162a16ba8c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 17 Feb 2016 13:01:23 -0800 Subject: nfit: scrub and register regions in a workqueue Address range scrub is a potentially long running process that we want to complete before any pmem regions are registered. Perform this operation asynchronously to allow other drivers to load in the meantime. Platform firmware may have initiated a partial scrub prior to the driver loading, so we must be careful to consume those results before kicking off kernel initiated scrubs on other regions. This rework also makes the registration path more tolerant of scrub errors in that it splits scrubbing into 2 phases. The first phase synchronously waits for a platform-firmware initiated scrub to complete. The second phase scans the remaining address ranges asynchronously and notifies the related driver(s) when the scrub completes. Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 537 +++++++++++++++++++++++++++++++++++----------------- drivers/acpi/nfit.h | 8 +- 2 files changed, 371 insertions(+), 174 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 4aa2bd54fc1d..3646501b01d7 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "nfit.h" @@ -34,6 +35,16 @@ static bool force_enable_dimms; module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); +static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT; +module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds"); + +/* after three payloads of overflow, it's dead jim */ +static unsigned int scrub_overflow_abort = 3; +module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(scrub_overflow_abort, + "Number of times we overflow ARS results before abort"); + static struct workqueue_struct *nfit_wq; struct nfit_table_prev { @@ -1548,97 +1559,84 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus, } static int ars_get_cap(struct acpi_nfit_desc *acpi_desc, - struct nd_cmd_ars_cap *cmd, u64 addr, u64 length) + struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa) { struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct acpi_nfit_system_address *spa = nfit_spa->spa; int cmd_rc, rc; - cmd->address = addr; - cmd->length = length; + cmd->address = spa->address; + cmd->length = spa->length; rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd, sizeof(*cmd), &cmd_rc); if (rc < 0) return rc; - if (cmd_rc < 0) - return cmd_rc; - return 0; + return cmd_rc; } -static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_start *cmd, u64 addr, u64 length) +static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa) { - int cmd_rc; int rc; + int cmd_rc; + struct nd_cmd_ars_start ars_start; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; - cmd->address = addr; - cmd->length = length; - cmd->type = ND_ARS_PERSISTENT; - - while (1) { - rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, cmd, - sizeof(*cmd), &cmd_rc); - - if (rc < 0) - return rc; - - if (cmd_rc == -EBUSY) { - /* ARS is in progress */ - msleep(1000); - continue; - } + memset(&ars_start, 0, sizeof(ars_start)); + ars_start.address = spa->address; + ars_start.length = spa->length; + if (nfit_spa_type(spa) == NFIT_SPA_PM) + ars_start.type = ND_ARS_PERSISTENT; + else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) + ars_start.type = ND_ARS_VOLATILE; + else + return -ENOTTY; - if (cmd_rc < 0) - return cmd_rc; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, + sizeof(ars_start), &cmd_rc); - return 0; - } + if (rc < 0) + return rc; + return cmd_rc; } -static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_status *cmd, u32 size) +static int ars_continue(struct acpi_nfit_desc *acpi_desc) { int rc, cmd_rc; + struct nd_cmd_ars_start ars_start; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; + + memset(&ars_start, 0, sizeof(ars_start)); + ars_start.address = ars_status->restart_address; + ars_start.length = ars_status->restart_length; + ars_start.type = ars_status->type; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, + sizeof(ars_start), &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; +} - while (1) { - rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd, - size, &cmd_rc); - if (rc < 0) - return rc; - - /* FIXME make async and have a timeout */ - if (cmd_rc == -EBUSY) { - msleep(1000); - continue; - } - - if (cmd_rc == -EAGAIN || cmd_rc == 0) - return 0; - - /* TODO: error list overflow support */ - if (cmd_rc == -ENOSPC) - return -ENXIO; +static int ars_get_status(struct acpi_nfit_desc *acpi_desc) +{ + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; + int rc, cmd_rc; - return cmd_rc; - } + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status, + acpi_desc->ars_status_size, &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; } static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus, - struct nd_cmd_ars_status *ars_status, u64 start) + struct nd_cmd_ars_status *ars_status) { int rc; u32 i; - /* - * The address field returned by ars_status should be either - * less than or equal to the address we last started ARS for. - * The (start, length) returned by ars_status should also have - * non-zero overlap with the range we started ARS for. - * If this is not the case, bail. - */ - if (ars_status->address > start || - (ars_status->address + ars_status->length < start)) - return -ENXIO; - for (i = 0; i < ars_status->num_records; i++) { rc = nvdimm_bus_add_poison(nvdimm_bus, ars_status->records[i].err_address, @@ -1650,101 +1648,14 @@ static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus, return 0; } -static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc, - struct nd_region_desc *ndr_desc) -{ - struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; - struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus; - struct nd_cmd_ars_status *ars_status = NULL; - struct nd_cmd_ars_start *ars_start = NULL; - struct nd_cmd_ars_cap *ars_cap = NULL; - u64 start, len, cur, remaining; - u32 ars_status_size; - int rc; - - ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL); - if (!ars_cap) - return -ENOMEM; - - start = ndr_desc->res->start; - len = ndr_desc->res->end - ndr_desc->res->start + 1; - - rc = ars_get_cap(acpi_desc, ars_cap, start, len); - if (rc == -ENOTTY) { - rc = 0; - goto out; - } - - /* - * Check if a full-range ARS has been run. If so, use those results - * without having to start a new ARS. - */ - ars_status_size = ars_cap->max_ars_out; - ars_status = kzalloc(ars_status_size, GFP_KERNEL); - if (!ars_status) { - rc = -ENOMEM; - goto out; - } - - rc = ars_get_status(nd_desc, ars_status, ars_status_size); - if (rc) - goto out; - - if (ars_status->address <= start && - (ars_status->address + ars_status->length >= start + len)) { - rc = ars_status_process_records(nvdimm_bus, ars_status, start); - goto out; - } - - /* - * ARS_STATUS can overflow if the number of poison entries found is - * greater than the maximum buffer size (ars_cap->max_ars_out) - * To detect overflow, check if the length field of ars_status - * is less than the length we supplied. If so, process the - * error entries we got, adjust the start point, and start again - */ - ars_start = kzalloc(sizeof(*ars_start), GFP_KERNEL); - if (!ars_start) - return -ENOMEM; - - cur = start; - remaining = len; - do { - u64 done, end; - - rc = ars_do_start(nd_desc, ars_start, cur, remaining); - if (rc < 0) - goto out; - - rc = ars_get_status(nd_desc, ars_status, ars_status_size); - if (rc < 0) - goto out; - - rc = ars_status_process_records(nvdimm_bus, ars_status, cur); - if (rc < 0) - goto out; - - end = min(cur + remaining, - ars_status->address + ars_status->length); - done = end - cur; - cur += done; - remaining -= done; - } while (remaining); - - out: - kfree(ars_cap); - kfree(ars_start); - kfree(ars_status); - return rc; -} - static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc, struct acpi_nfit_memory_map *memdev, - struct acpi_nfit_system_address *spa) + struct nfit_spa *nfit_spa) { struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, memdev->device_handle); + struct acpi_nfit_system_address *spa = nfit_spa->spa; struct nd_blk_region_desc *ndbr_desc; struct nfit_mem *nfit_mem; int blk_valid = 0; @@ -1780,7 +1691,9 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, ndbr_desc->enable = acpi_nfit_blk_region_enable; ndbr_desc->disable = acpi_nfit_blk_region_disable; ndbr_desc->do_io = acpi_desc->blk_do_io; - if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc)) + nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) return -ENOMEM; break; } @@ -1800,7 +1713,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, struct resource res; int count = 0, rc; - if (nfit_spa->is_registered) + if (nfit_spa->nd_region) return 0; if (spa->range_index == 0) { @@ -1837,47 +1750,324 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, } nd_mapping = &nd_mappings[count++]; rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc, - memdev, spa); + memdev, nfit_spa); if (rc) - return rc; + goto out; } ndr_desc->nd_mapping = nd_mappings; ndr_desc->num_mappings = count; rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa); if (rc) - return rc; + goto out; nvdimm_bus = acpi_desc->nvdimm_bus; if (nfit_spa_type(spa) == NFIT_SPA_PM) { - rc = acpi_nfit_find_poison(acpi_desc, ndr_desc); - if (rc) { - dev_err(acpi_desc->dev, - "error while performing ARS to find poison: %d\n", - rc); - return rc; - } - if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc)) - return -ENOMEM; + nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) + rc = -ENOMEM; } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { - if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc)) - return -ENOMEM; + nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) + rc = -ENOMEM; } - nfit_spa->is_registered = 1; + out: + if (rc) + dev_err(acpi_desc->dev, "failed to register spa range %d\n", + nfit_spa->spa->range_index); + return rc; +} + +static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc, + u32 max_ars) +{ + struct device *dev = acpi_desc->dev; + struct nd_cmd_ars_status *ars_status; + + if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) { + memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size); + return 0; + } + + if (acpi_desc->ars_status) + devm_kfree(dev, acpi_desc->ars_status); + acpi_desc->ars_status = NULL; + ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL); + if (!ars_status) + return -ENOMEM; + acpi_desc->ars_status = ars_status; + acpi_desc->ars_status_size = max_ars; return 0; } -static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) +static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + struct acpi_nfit_system_address *spa = nfit_spa->spa; + int rc; + + if (!nfit_spa->max_ars) { + struct nd_cmd_ars_cap ars_cap; + + memset(&ars_cap, 0, sizeof(ars_cap)); + rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa); + if (rc < 0) + return rc; + nfit_spa->max_ars = ars_cap.max_ars_out; + nfit_spa->clear_err_unit = ars_cap.clear_err_unit; + /* check that the supported scrub types match the spa type */ + if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE && + ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0) + return -ENOTTY; + else if (nfit_spa_type(spa) == NFIT_SPA_PM && + ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0) + return -ENOTTY; + } + + if (ars_status_alloc(acpi_desc, nfit_spa->max_ars)) + return -ENOMEM; + + rc = ars_get_status(acpi_desc); + if (rc < 0 && rc != -ENOSPC) + return rc; + + if (ars_status_process_records(acpi_desc->nvdimm_bus, + acpi_desc->ars_status)) + return -ENOMEM; + + return 0; +} + +static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + struct acpi_nfit_system_address *spa = nfit_spa->spa; + unsigned int overflow_retry = scrub_overflow_abort; + u64 init_ars_start = 0, init_ars_len = 0; + struct device *dev = acpi_desc->dev; + unsigned int tmo = scrub_timeout; + int rc; + + if (nfit_spa->ars_done || !nfit_spa->nd_region) + return; + + rc = ars_start(acpi_desc, nfit_spa); + /* + * If we timed out the initial scan we'll still be busy here, + * and will wait another timeout before giving up permanently. + */ + if (rc < 0 && rc != -EBUSY) + return; + + do { + u64 ars_start, ars_len; + + if (acpi_desc->cancel) + break; + rc = acpi_nfit_query_poison(acpi_desc, nfit_spa); + if (rc == -ENOTTY) + break; + if (rc == -EBUSY && !tmo) { + dev_warn(dev, "range %d ars timeout, aborting\n", + spa->range_index); + break; + } + + if (rc == -EBUSY) { + /* + * Note, entries may be appended to the list + * while the lock is dropped, but the workqueue + * being active prevents entries being deleted / + * freed. + */ + mutex_unlock(&acpi_desc->init_mutex); + ssleep(1); + tmo--; + mutex_lock(&acpi_desc->init_mutex); + continue; + } + + /* we got some results, but there are more pending... */ + if (rc == -ENOSPC && overflow_retry--) { + if (!init_ars_len) { + init_ars_len = acpi_desc->ars_status->length; + init_ars_start = acpi_desc->ars_status->address; + } + rc = ars_continue(acpi_desc); + } + + if (rc < 0) { + dev_warn(dev, "range %d ars continuation failed\n", + spa->range_index); + break; + } + + if (init_ars_len) { + ars_start = init_ars_start; + ars_len = init_ars_len; + } else { + ars_start = acpi_desc->ars_status->address; + ars_len = acpi_desc->ars_status->length; + } + dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n", + spa->range_index, ars_start, ars_len); + /* notify the region about new poison entries */ + nvdimm_region_notify(nfit_spa->nd_region, + NVDIMM_REVALIDATE_POISON); + break; + } while (1); +} + +static void acpi_nfit_scrub(struct work_struct *work) { + struct device *dev; + u64 init_scrub_length = 0; struct nfit_spa *nfit_spa; + u64 init_scrub_address = 0; + bool init_ars_done = false; + struct acpi_nfit_desc *acpi_desc; + unsigned int tmo = scrub_timeout; + unsigned int overflow_retry = scrub_overflow_abort; + + acpi_desc = container_of(work, typeof(*acpi_desc), work); + dev = acpi_desc->dev; + /* + * We scrub in 2 phases. The first phase waits for any platform + * firmware initiated scrubs to complete and then we go search for the + * affected spa regions to mark them scanned. In the second phase we + * initiate a directed scrub for every range that was not scrubbed in + * phase 1. + */ + + /* process platform firmware initiated scrubs */ + retry: + mutex_lock(&acpi_desc->init_mutex); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - int rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + struct nd_cmd_ars_status *ars_status; + struct acpi_nfit_system_address *spa; + u64 ars_start, ars_len; + int rc; - if (rc) - return rc; + if (acpi_desc->cancel) + break; + + if (nfit_spa->nd_region) + continue; + + if (init_ars_done) { + /* + * No need to re-query, we're now just + * reconciling all the ranges covered by the + * initial scrub + */ + rc = 0; + } else + rc = acpi_nfit_query_poison(acpi_desc, nfit_spa); + + if (rc == -ENOTTY) { + /* no ars capability, just register spa and move on */ + acpi_nfit_register_region(acpi_desc, nfit_spa); + continue; + } + + if (rc == -EBUSY && !tmo) { + /* fallthrough to directed scrub in phase 2 */ + dev_warn(dev, "timeout awaiting ars results, continuing...\n"); + break; + } else if (rc == -EBUSY) { + mutex_unlock(&acpi_desc->init_mutex); + ssleep(1); + tmo--; + goto retry; + } + + /* we got some results, but there are more pending... */ + if (rc == -ENOSPC && overflow_retry--) { + ars_status = acpi_desc->ars_status; + /* + * Record the original scrub range, so that we + * can recall all the ranges impacted by the + * initial scrub. + */ + if (!init_scrub_length) { + init_scrub_length = ars_status->length; + init_scrub_address = ars_status->address; + } + rc = ars_continue(acpi_desc); + if (rc == 0) { + mutex_unlock(&acpi_desc->init_mutex); + goto retry; + } + } + + if (rc < 0) { + /* + * Initial scrub failed, we'll give it one more + * try below... + */ + break; + } + + /* We got some final results, record completed ranges */ + ars_status = acpi_desc->ars_status; + if (init_scrub_length) { + ars_start = init_scrub_address; + ars_len = ars_start + init_scrub_length; + } else { + ars_start = ars_status->address; + ars_len = ars_status->length; + } + spa = nfit_spa->spa; + + if (!init_ars_done) { + init_ars_done = true; + dev_dbg(dev, "init scrub %#llx + %#llx complete\n", + ars_start, ars_len); + } + if (ars_start <= spa->address && ars_start + ars_len + >= spa->address + spa->length) + acpi_nfit_register_region(acpi_desc, nfit_spa); + } + + /* + * For all the ranges not covered by an initial scrub we still + * want to see if there are errors, but it's ok to discover them + * asynchronously. + */ + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + /* + * Flag all the ranges that still need scrubbing, but + * register them now to make data available. + */ + if (nfit_spa->nd_region) + nfit_spa->ars_done = 1; + else + acpi_nfit_register_region(acpi_desc, nfit_spa); } + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + acpi_nfit_async_scrub(acpi_desc, nfit_spa); + mutex_unlock(&acpi_desc->init_mutex); +} + +static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_spa *nfit_spa; + int rc; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) { + /* BLK regions don't need to wait for ars results */ + rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + if (rc) + return rc; + } + + queue_work(nfit_wq, &acpi_desc->work); return 0; } @@ -2019,6 +2209,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) INIT_LIST_HEAD(&acpi_desc->dimms); mutex_init(&acpi_desc->spa_map_mutex); mutex_init(&acpi_desc->init_mutex); + INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); } EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index e8388fee4cc6..c75576b2d50e 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -57,12 +57,16 @@ enum { NFIT_ARS_START_BUSY = 6, NFIT_ARS_CAP_NONE = 1, NFIT_ARS_F_OVERFLOW = 1, + NFIT_ARS_TIMEOUT = 90, }; struct nfit_spa { struct acpi_nfit_system_address *spa; struct list_head list; - int is_registered; + struct nd_region *nd_region; + unsigned int ars_done:1; + u32 clear_err_unit; + u32 max_ars; }; struct nfit_dcr { @@ -124,6 +128,8 @@ struct acpi_nfit_desc { struct list_head idts; struct nvdimm_bus *nvdimm_bus; struct device *dev; + struct nd_cmd_ars_status *ars_status; + size_t ars_status_size; struct work_struct work; unsigned int cancel:1; unsigned long dimm_dsm_force_en; -- cgit v1.2.3-59-g8ed1b From 87bf572e19a092cc9cc77d5a00d543a2b628c142 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 22 Feb 2016 21:50:31 -0800 Subject: nfit: disable userspace initiated ars during scrub While the nfit driver is issuing address range scrub commands and reaping the results do not permit an ars_start command issued from userspace. The scrub thread assumes that all ars completions are for scrubs initiated by platform firmware at boot, or by the nfit driver. Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 23 +++++++++++++++++++++++ drivers/nvdimm/bus.c | 18 +++++++++++++----- include/linux/libnvdimm.h | 2 ++ 3 files changed, 38 insertions(+), 5 deletions(-) (limited to 'drivers') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 3646501b01d7..0def4ebf5d43 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -2186,6 +2186,28 @@ static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) return wait_for_completion_interruptible(&flush.cmp); } +static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + + if (nvdimm) + return 0; + if (cmd != ND_CMD_ARS_START) + return 0; + + /* + * The kernel and userspace may race to initiate a scrub, but + * the scrub thread is prepared to lose that initial race. It + * just needs guarantees that any ars it initiates are not + * interrupted by any intervening start reqeusts from userspace. + */ + if (work_busy(&acpi_desc->work)) + return -EBUSY; + + return 0; +} + void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) { struct nvdimm_bus_descriptor *nd_desc; @@ -2197,6 +2219,7 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) nd_desc->provider_name = "ACPI.NFIT"; nd_desc->ndctl = acpi_nfit_ctl; nd_desc->flush_probe = acpi_nfit_flush_probe; + nd_desc->clear_to_send = acpi_nfit_clear_to_send; nd_desc->attr_groups = acpi_nfit_attribute_groups; INIT_LIST_HEAD(&acpi_desc->spa_maps); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 2508251439e7..228c0e9f430e 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -490,16 +490,24 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) } /* set_config requires an idle interleave set */ -static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd) +static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, + struct nvdimm *nvdimm, unsigned int cmd) { - struct nvdimm_bus *nvdimm_bus; + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + + /* ask the bus provider if it would like to block this request */ + if (nd_desc->clear_to_send) { + int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd); + + if (rc) + return rc; + } if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) return 0; - nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev); + /* prevent label manipulation while the kernel owns label updates */ wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev); - if (atomic_read(&nvdimm->busy)) return -EBUSY; return 0; @@ -609,7 +617,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } nvdimm_bus_lock(&nvdimm_bus->dev); - rc = nd_cmd_clear_to_send(nvdimm, cmd); + rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd); if (rc) goto out_unlock; diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 2299f8742f7f..833867b9ddc2 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -72,6 +72,8 @@ struct nvdimm_bus_descriptor { char *provider_name; ndctl_fn ndctl; int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc); + int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd); }; struct nd_cmd_desc { -- cgit v1.2.3-59-g8ed1b From 4dc0e7be884e0f99211107dd75e6e1884b7b3754 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Wed, 6 Jan 2016 16:03:39 -0700 Subject: libnvdimm: Clean-up access mode check. Change nd_ioctl and nvdimm_ioctl access mode check to use O_RDONLY. Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 228c0e9f430e..f85227a5ee28 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -636,14 +636,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long id = (long) file->private_data; - int rc = -ENXIO, read_only; + int rc = -ENXIO, ro; struct nvdimm_bus *nvdimm_bus; - read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + ro = ((file->f_flags & O_ACCMODE) == O_RDONLY); mutex_lock(&nvdimm_bus_list_mutex); list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { if (nvdimm_bus->id == id) { - rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg); + rc = __nd_ioctl(nvdimm_bus, NULL, ro, cmd, arg); break; } } @@ -667,10 +667,10 @@ static int match_dimm(struct device *dev, void *data) static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int rc = -ENXIO, read_only; + int rc = -ENXIO, ro; struct nvdimm_bus *nvdimm_bus; - read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + ro = ((file->f_flags & O_ACCMODE) == O_RDONLY); mutex_lock(&nvdimm_bus_list_mutex); list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { struct device *dev = device_find_child(&nvdimm_bus->dev, @@ -681,7 +681,7 @@ static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) continue; nvdimm = to_nvdimm(dev); - rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg); + rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg); put_device(dev); break; } -- cgit v1.2.3-59-g8ed1b From 07accfa9d1a8bac8262f6d24a94a54d2d1f35149 Mon Sep 17 00:00:00 2001 From: Jerry Hoemann Date: Wed, 6 Jan 2016 16:03:41 -0700 Subject: libnvdimm: Fix security issue with DSM IOCTL. Code attempts to prevent certain IOCTL DSM from being called when device is opened read only. This security feature can be trivially overcome by changing the size portion of the ioctl_command which isn't used. Check only the _IOC_NR (i.e. the command). Cc: Signed-off-by: Jerry Hoemann Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index f85227a5ee28..2e9ac22595ec 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -547,10 +547,10 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, /* fail write commands (when read-only) */ if (read_only) - switch (ioctl_cmd) { - case ND_IOCTL_VENDOR: - case ND_IOCTL_SET_CONFIG_DATA: - case ND_IOCTL_ARS_START: + switch (cmd) { + case ND_CMD_VENDOR: + case ND_CMD_SET_CONFIG_DATA: + case ND_CMD_ARS_START: dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", nvdimm ? nvdimm_cmd_name(cmd) : nvdimm_bus_cmd_name(cmd)); -- cgit v1.2.3-59-g8ed1b From d9cbe09d39aa13f6924dc5fb88325de7ef01a72e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Mar 2016 09:14:36 -0800 Subject: libnvdimm, pmem: fix 'pfn' support for section-misaligned namespaces The altmap for a section-misaligned namespace needs to arrange for the base_pfn to be section-aligned. As a result the 'reserve' region (pfns from base that do not have a struct page) must be increased. Otherwise we trip the altmap validation check in __add_pages: if (altmap->base_pfn != phys_start_pfn || vmem_altmap_offset(altmap) > nr_pages) { pr_warn_once("memory add fail, invalid altmap\n"); return -EINVAL; } Signed-off-by: Dan Williams --- drivers/nvdimm/pfn.h | 13 +++++++++++++ drivers/nvdimm/pmem.c | 24 ++++++++++++++++++++++-- 2 files changed, 35 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index cc243754acef..6ee707e5b279 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -15,6 +15,7 @@ #define __NVDIMM_PFN_H #include +#include #define PFN_SIG_LEN 16 #define PFN_SIG "NVDIMM_PFN_INFO\0" @@ -32,4 +33,16 @@ struct nd_pfn_sb { u8 padding[4012]; __le64 checksum; }; + +#ifdef CONFIG_SPARSEMEM +#define PFN_SECTION_ALIGN_DOWN(x) SECTION_ALIGN_DOWN(x) +#define PFN_SECTION_ALIGN_UP(x) SECTION_ALIGN_UP(x) +#else +/* + * In this case ZONE_DEVICE=n and we will disable 'pfn' device support, + * but we still want pmem to compile. + */ +#define PFN_SECTION_ALIGN_DOWN(x) (x) +#define PFN_SECTION_ALIGN_UP(x) (x) +#endif #endif /* __NVDIMM_PFN_H */ diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 8d0b54670184..59d568ab7556 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -356,6 +356,26 @@ static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns) return 0; } +/* + * We hotplug memory at section granularity, pad the reserved area from + * the previous section base to the namespace base address. + */ +static unsigned long init_altmap_base(resource_size_t base) +{ + unsigned long base_pfn = __phys_to_pfn(base); + + return PFN_SECTION_ALIGN_DOWN(base_pfn); +} + +static unsigned long init_altmap_reserve(resource_size_t base) +{ + unsigned long reserve = __phys_to_pfn(SZ_8K); + unsigned long base_pfn = __phys_to_pfn(base); + + reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); + return reserve; +} + static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) { struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); @@ -369,8 +389,8 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) phys_addr_t offset; int rc; struct vmem_altmap __altmap = { - .base_pfn = __phys_to_pfn(nsio->res.start), - .reserve = __phys_to_pfn(SZ_8K), + .base_pfn = init_altmap_base(nsio->res.start), + .reserve = init_altmap_reserve(nsio->res.start), }; if (!nd_pfn->uuid || !nd_pfn->ndns) -- cgit v1.2.3-59-g8ed1b From cfe30b872058f211630eda7f65fb19d83beaaa3c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Mar 2016 09:38:00 -0800 Subject: libnvdimm, pmem: adjust for section collisions with 'System RAM' On a platform where 'Persistent Memory' and 'System RAM' are mixed within a given sparsemem section, trim the namespace and notify about the sub-optimal alignment. Cc: Toshi Kani Cc: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 7 +++ drivers/nvdimm/pfn.h | 10 +++- drivers/nvdimm/pfn_devs.c | 5 ++ drivers/nvdimm/pmem.c | 125 +++++++++++++++++++++++++++++----------- 4 files changed, 111 insertions(+), 36 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 9edf7eb7d17c..f5cb88601359 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -133,6 +133,7 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid) bool pmem_should_map_pages(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_io *nsio; if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) return false; @@ -143,6 +144,12 @@ bool pmem_should_map_pages(struct device *dev) if (is_nd_pfn(dev) || is_nd_btt(dev)) return false; + nsio = to_nd_namespace_io(dev); + if (region_intersects(nsio->res.start, resource_size(&nsio->res), + IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) + return false; + #ifdef ARCH_MEMREMAP_PMEM return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; #else diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index 6ee707e5b279..8e343a3ca873 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -27,10 +27,13 @@ struct nd_pfn_sb { __le32 flags; __le16 version_major; __le16 version_minor; - __le64 dataoff; + __le64 dataoff; /* relative to namespace_base + start_pad */ __le64 npfns; __le32 mode; - u8 padding[4012]; + /* minor-version-1 additions for section alignment */ + __le32 start_pad; + __le32 end_trunc; + u8 padding[4004]; __le64 checksum; }; @@ -45,4 +48,7 @@ struct nd_pfn_sb { #define PFN_SECTION_ALIGN_DOWN(x) (x) #define PFN_SECTION_ALIGN_UP(x) (x) #endif + +#define PHYS_SECTION_ALIGN_DOWN(x) PFN_PHYS(PFN_SECTION_ALIGN_DOWN(PHYS_PFN(x))) +#define PHYS_SECTION_ALIGN_UP(x) PFN_PHYS(PFN_SECTION_ALIGN_UP(PHYS_PFN(x))) #endif /* __NVDIMM_PFN_H */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index ae81a2f1da50..75a31a7359fb 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -299,6 +299,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) return -ENODEV; + if (__le16_to_cpu(pfn_sb->version_minor) < 1) { + pfn_sb->start_pad = 0; + pfn_sb->end_trunc = 0; + } + switch (le32_to_cpu(pfn_sb->mode)) { case PFN_MODE_RAM: case PFN_MODE_PMEM: diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 59d568ab7556..0cb450e1b400 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -43,7 +43,10 @@ struct pmem_device { phys_addr_t data_offset; u64 pfn_flags; void __pmem *virt_addr; + /* immutable base size of the namespace */ size_t size; + /* trim size when namespace capacity has been section aligned */ + u32 pfn_pad; struct badblocks bb; }; @@ -145,7 +148,7 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector, *kaddr = pmem->virt_addr + offset; *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); - return pmem->size - offset; + return pmem->size - pmem->pfn_pad - offset; } static const struct block_device_operations pmem_fops = { @@ -236,7 +239,8 @@ static int pmem_attach_disk(struct device *dev, disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(ndns, disk->disk_name); disk->driverfs_dev = dev; - set_capacity(disk, (pmem->size - pmem->data_offset) / 512); + set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) + / 512); pmem->pmem_disk = disk; devm_exit_badblocks(dev, &pmem->bb); if (devm_init_badblocks(dev, &pmem->bb)) @@ -279,6 +283,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = 0, end_trunc = 0; + resource_size_t start, size; + struct nd_namespace_io *nsio; struct nd_region *nd_region; unsigned long npfns; phys_addr_t offset; @@ -304,21 +311,56 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) } memset(pfn_sb, 0, sizeof(*pfn_sb)); - npfns = (pmem->size - SZ_8K) / SZ_4K; + + /* + * Check if pmem collides with 'System RAM' when section aligned and + * trim it accordingly + */ + nsio = to_nd_namespace_io(&ndns->dev); + start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start); + size = resource_size(&nsio->res); + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) { + + start = nsio->res.start; + start_pad = PHYS_SECTION_ALIGN_UP(start) - start; + } + + start = nsio->res.start; + size = PHYS_SECTION_ALIGN_UP(start + size) - start; + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) { + size = resource_size(&nsio->res); + end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); + } + + if (start_pad + end_trunc) + dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", + dev_name(&ndns->dev), start_pad + end_trunc); + /* * Note, we use 64 here for the standard size of struct page, * debugging options may cause it to be larger in which case the * implementation will limit the pfns advertised through * ->direct_access() to those that are included in the memmap. */ + start += start_pad; + npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K; if (nd_pfn->mode == PFN_MODE_PMEM) - offset = ALIGN(SZ_8K + 64 * npfns, nd_pfn->align); + offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align) + - start; else if (nd_pfn->mode == PFN_MODE_RAM) - offset = ALIGN(SZ_8K, nd_pfn->align); + offset = ALIGN(start + SZ_8K, nd_pfn->align) - start; else goto err; - npfns = (pmem->size - offset) / SZ_4K; + if (offset + start_pad + end_trunc >= pmem->size) { + dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", + dev_name(&ndns->dev)); + goto err; + } + + npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K; pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -326,6 +368,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); + pfn_sb->version_minor = cpu_to_le16(1); + pfn_sb->start_pad = cpu_to_le32(start_pad); + pfn_sb->end_trunc = cpu_to_le32(end_trunc); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); @@ -376,41 +421,36 @@ static unsigned long init_altmap_reserve(resource_size_t base) return reserve; } -static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) { - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); - struct device *dev = &nd_pfn->dev; - struct nd_region *nd_region; - struct vmem_altmap *altmap; - struct nd_pfn_sb *pfn_sb; - struct pmem_device *pmem; - struct request_queue *q; - phys_addr_t offset; int rc; + struct resource res; + struct request_queue *q; + struct pmem_device *pmem; + struct vmem_altmap *altmap; + struct device *dev = &nd_pfn->dev; + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + resource_size_t base = nsio->res.start + start_pad; struct vmem_altmap __altmap = { - .base_pfn = init_altmap_base(nsio->res.start), - .reserve = init_altmap_reserve(nsio->res.start), + .base_pfn = init_altmap_base(base), + .reserve = init_altmap_reserve(base), }; - if (!nd_pfn->uuid || !nd_pfn->ndns) - return -ENODEV; - - nd_region = to_nd_region(dev->parent); - rc = nd_pfn_init(nd_pfn); - if (rc) - return rc; - - pfn_sb = nd_pfn->pfn_sb; - offset = le64_to_cpu(pfn_sb->dataoff); + pmem = dev_get_drvdata(dev); + pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); + pmem->pfn_pad = start_pad + end_trunc; nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { - if (offset < SZ_8K) + if (pmem->data_offset < SZ_8K) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = (resource_size(&nsio->res) - offset) + nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset) / PAGE_SIZE; if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, @@ -418,7 +458,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); altmap = & __altmap; - altmap->free = __phys_to_pfn(offset - SZ_8K); + altmap->free = __phys_to_pfn(pmem->data_offset - SZ_8K); altmap->alloc = 0; } else { rc = -ENXIO; @@ -426,10 +466,12 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) } /* establish pfn range for lookup, and switch to direct map */ - pmem = dev_get_drvdata(dev); q = pmem->pmem_queue; + memcpy(&res, &nsio->res, sizeof(res)); + res.start += start_pad; + res.end -= end_trunc; devm_memunmap(dev, (void __force *) pmem->virt_addr); - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res, + pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res, &q->q_usage_counter, altmap); pmem->pfn_flags |= PFN_MAP; if (IS_ERR(pmem->virt_addr)) { @@ -438,7 +480,6 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) } /* attach pmem disk in "pfn-mode" */ - pmem->data_offset = offset; rc = pmem_attach_disk(dev, ndns, pmem); if (rc) goto err; @@ -447,6 +488,22 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) err: nvdimm_namespace_detach_pfn(ndns); return rc; + +} + +static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + int rc; + + if (!nd_pfn->uuid || !nd_pfn->ndns) + return -ENODEV; + + rc = nd_pfn_init(nd_pfn); + if (rc) + return rc; + /* we need a valid pfn_sb before we can init a vmem_altmap */ + return __nvdimm_namespace_attach_pfn(nd_pfn); } static int nd_pmem_probe(struct device *dev) -- cgit v1.2.3-59-g8ed1b From f6ed58c70d14572d0272ee129579dbfc97b97f50 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Mar 2016 09:46:04 -0800 Subject: libnvdimm, pfn: 'resource'-address and 'size' attributes for pfn devices Currenty with a raw mode pmem namespace the physical memory address range for the device can be obtained via /sys/block/pmemX/device/{resource|size}. Add similar attributes for pfn instances that takes the struct page memmap and section padding into account. Reported-by: Haozhong Zhang Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'drivers') diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 75a31a7359fb..254d3bc13f70 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -205,11 +205,67 @@ static ssize_t namespace_store(struct device *dev, } static DEVICE_ATTR_RW(namespace); +static ssize_t resource_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + if (dev->driver) { + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + u64 offset = __le64_to_cpu(pfn_sb->dataoff); + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start + + start_pad + offset); + } else { + /* no address to convey if the pfn instance is disabled */ + rc = -ENXIO; + } + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(resource); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + if (dev->driver) { + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + u64 offset = __le64_to_cpu(pfn_sb->dataoff); + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + rc = sprintf(buf, "%llu\n", (unsigned long long) + resource_size(&nsio->res) - start_pad + - end_trunc - offset); + } else { + /* no size to convey if the pfn instance is disabled */ + rc = -ENXIO; + } + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(size); + static struct attribute *nd_pfn_attributes[] = { &dev_attr_mode.attr, &dev_attr_namespace.attr, &dev_attr_uuid.attr, &dev_attr_align.attr, + &dev_attr_resource.attr, + &dev_attr_size.attr, NULL, }; -- cgit v1.2.3-59-g8ed1b From d4f323672aa63713b7ca26da418f66cc30d3a41a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 3 Mar 2016 16:08:54 -0800 Subject: nfit, libnvdimm: clear poison command support Add the boiler-plate for a 'clear error' command based on section 9.20.7.6 "Function Index 4 - Clear Uncorrectable Error" from the ACPI 6.1 specification, and add a reference implementation in nfit_test. Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 12 +++++++++++- drivers/nvdimm/bus.c | 19 +++++++++++++++++++ include/uapi/linux/ndctl.h | 13 +++++++++++++ tools/testing/nvdimm/test/nfit.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 0def4ebf5d43..c067d7414007 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -87,6 +87,7 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) static int xlat_status(void *buf, unsigned int cmd) { + struct nd_cmd_clear_error *clear_err; struct nd_cmd_ars_status *ars_status; struct nd_cmd_ars_start *ars_start; struct nd_cmd_ars_cap *ars_cap; @@ -149,6 +150,15 @@ static int xlat_status(void *buf, unsigned int cmd) if (ars_status->status >> 16) return -EIO; break; + case ND_CMD_CLEAR_ERROR: + clear_err = buf; + if (clear_err->status & 0xffff) + return -EIO; + if (!clear_err->cleared) + return -EIO; + if (clear_err->length > clear_err->cleared) + return clear_err->cleared; + break; default: break; } @@ -1002,7 +1012,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) if (!adev) return; - for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++) + for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) set_bit(i, &nd_desc->dsm_mask); } diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 2e9ac22595ec..cb6fd64b13e3 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -421,6 +421,12 @@ static const struct nd_cmd_desc __nd_cmd_bus_descs[] = { .out_num = 3, .out_sizes = { 4, 4, UINT_MAX, }, }, + [ND_CMD_CLEAR_ERROR] = { + .in_num = 2, + .in_sizes = { 8, 8, }, + .out_num = 3, + .out_sizes = { 4, 4, 8, }, + }, }; const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd) @@ -489,6 +495,13 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) } while (true); } +static int pmem_active(struct device *dev, void *data) +{ + if (is_nd_pmem(dev) && dev->driver) + return -EBUSY; + return 0; +} + /* set_config requires an idle interleave set */ static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, unsigned int cmd) @@ -503,6 +516,11 @@ static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, return rc; } + /* require clear error to go through the pmem driver */ + if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR) + return device_for_each_child(&nvdimm_bus->dev, NULL, + pmem_active); + if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) return 0; @@ -551,6 +569,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, case ND_CMD_VENDOR: case ND_CMD_SET_CONFIG_DATA: case ND_CMD_ARS_START: + case ND_CMD_CLEAR_ERROR: dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", nvdimm ? nvdimm_cmd_name(cmd) : nvdimm_bus_cmd_name(cmd)); diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h index cc68b92124d4..7cc28ab05b87 100644 --- a/include/uapi/linux/ndctl.h +++ b/include/uapi/linux/ndctl.h @@ -98,6 +98,14 @@ struct nd_cmd_ars_status { } __packed records[0]; } __packed; +struct nd_cmd_clear_error { + __u64 address; + __u64 length; + __u32 status; + __u8 reserved[4]; + __u64 cleared; +} __packed; + enum { ND_CMD_IMPLEMENTED = 0, @@ -105,6 +113,7 @@ enum { ND_CMD_ARS_CAP = 1, ND_CMD_ARS_START = 2, ND_CMD_ARS_STATUS = 3, + ND_CMD_CLEAR_ERROR = 4, /* per-dimm commands */ ND_CMD_SMART = 1, @@ -129,6 +138,7 @@ static inline const char *nvdimm_bus_cmd_name(unsigned cmd) [ND_CMD_ARS_CAP] = "ars_cap", [ND_CMD_ARS_START] = "ars_start", [ND_CMD_ARS_STATUS] = "ars_status", + [ND_CMD_CLEAR_ERROR] = "clear_error", }; if (cmd < ARRAY_SIZE(names) && names[cmd]) @@ -187,6 +197,9 @@ static inline const char *nvdimm_cmd_name(unsigned cmd) #define ND_IOCTL_ARS_STATUS _IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\ struct nd_cmd_ars_status) +#define ND_IOCTL_CLEAR_ERROR _IOWR(ND_IOCTL, ND_CMD_CLEAR_ERROR,\ + struct nd_cmd_clear_error) + #define ND_DEVICE_DIMM 1 /* nd_dimm: container for "config data" */ #define ND_DEVICE_REGION_PMEM 2 /* nd_region: (parent of PMEM namespaces) */ #define ND_DEVICE_REGION_BLK 3 /* nd_region: (parent of BLK namespaces) */ diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 1555c09efba1..3187322eeed7 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -223,6 +223,7 @@ static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd, } #define NFIT_TEST_ARS_RECORDS 4 +#define NFIT_TEST_CLEAR_ERR_UNIT 256 static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, unsigned int buf_len) @@ -233,6 +234,7 @@ static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd, nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status) + NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record); nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16; + nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT; return 0; } @@ -306,6 +308,28 @@ static int nfit_test_cmd_ars_status(struct ars_state *ars_state, return 0; } +static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err, + unsigned int buf_len, int *cmd_rc) +{ + const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1; + if (buf_len < sizeof(*clear_err)) + return -EINVAL; + + if ((clear_err->address & mask) || (clear_err->length & mask)) + return -EINVAL; + + /* + * Report 'all clear' success for all commands even though a new + * scrub will find errors again. This is enough to have the + * error removed from the 'badblocks' tracking in the pmem + * driver. + */ + clear_err->status = 0; + clear_err->cleared = clear_err->length; + *cmd_rc = 0; + return 0; +} + static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc) @@ -365,6 +389,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len, cmd_rc); break; + case ND_CMD_CLEAR_ERROR: + rc = nfit_test_cmd_clear_error(buf, buf_len, cmd_rc); + break; default: return -ENOTTY; } @@ -1230,6 +1257,7 @@ static void nfit_test0_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); } static void nfit_test1_setup(struct nfit_test *t) @@ -1290,6 +1318,7 @@ static void nfit_test1_setup(struct nfit_test *t) set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en); set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en); + set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en); } static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, -- cgit v1.2.3-59-g8ed1b From 45f68802f2542a6ad1550dab9c07004de6e0df40 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 6 Mar 2016 08:04:12 -0800 Subject: libnvdimm, pmem: fix ia64 build, use PHYS_PFN drivers/nvdimm/pmem.c: In function 'nvdimm_namespace_attach_pfn': drivers/nvdimm/pmem.c:367:3: error: implicit declaration of function '__phys_to_pfn' [-Werror=implicit-function-declaration] .base_pfn = __phys_to_pfn(nsio->res.start), ia64 does not provide __phys_to_pfn(), just use the PHYS_PFN() alias. Cc: Guenter Roeck Reported-by: kbuild test robot Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 0cb450e1b400..74e2569910d8 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -407,15 +407,15 @@ static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns) */ static unsigned long init_altmap_base(resource_size_t base) { - unsigned long base_pfn = __phys_to_pfn(base); + unsigned long base_pfn = PHYS_PFN(base); return PFN_SECTION_ALIGN_DOWN(base_pfn); } static unsigned long init_altmap_reserve(resource_size_t base) { - unsigned long reserve = __phys_to_pfn(SZ_8K); - unsigned long base_pfn = __phys_to_pfn(base); + unsigned long reserve = PHYS_PFN(SZ_8K); + unsigned long base_pfn = PHYS_PFN(base); reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); return reserve; @@ -458,7 +458,7 @@ static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); altmap = & __altmap; - altmap->free = __phys_to_pfn(pmem->data_offset - SZ_8K); + altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K); altmap->alloc = 0; } else { rc = -ENXIO; -- cgit v1.2.3-59-g8ed1b From af1996ef59dbcb36fe4878df7c717a02eb89d07a Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Wed, 9 Mar 2016 12:47:06 -0700 Subject: ACPI: Change NFIT driver to insert new resource ACPI 6 defines persistent memory (PMEM) ranges in multiple firmware interfaces, e820, EFI, and ACPI NFIT table. This EFI change, however, leads to hit a bug in the grub bootloader, which treats EFI_PERSISTENT_MEMORY type as regular memory and corrupts stored user data [1]. Therefore, BIOS may set generic reserved type in e820 and EFI to cover PMEM ranges. The kernel can initialize PMEM ranges from ACPI NFIT table alone. This scheme causes a problem in the iomem table, though. On x86, for instance, e820_reserve_resources() initializes top-level entries (iomem_resource.child) from the e820 table at early boot-time. This creates "reserved" entry for a PMEM range, which does not allow region_intersects() to check with PMEM type. Change acpi_nfit_register_region() to call acpi_nfit_insert_resource(), which calls insert_resource() to insert a PMEM entry from NFIT when the iomem table does not have a PMEM entry already. That is, when a PMEM range is marked as reserved type in e820, it inserts "Persistent Memory" entry, which results as follows. + "Persistent Memory" + "reserved" This allows the EINJ driver, which calls region_intersects() to check PMEM ranges, to work continuously even if BIOS sets reserved type (or sets nothing) to PMEM ranges in e820 and EFI. [1]: https://lists.gnu.org/archive/html/grub-devel/2015-11/msg00209.html Signed-off-by: Toshi Kani Cc: Rafael J. Wysocki Cc: Dan Williams Cc: Ingo Molnar Cc: Borislav Petkov Cc: Andrew Morton Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'drivers') diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index fb53db187854..269de5f75d98 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -1571,6 +1571,48 @@ static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus, return 0; } +static void acpi_nfit_remove_resource(void *data) +{ + struct resource *res = data; + + remove_resource(res); +} + +static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc, + struct nd_region_desc *ndr_desc) +{ + struct resource *res, *nd_res = ndr_desc->res; + int is_pmem, ret; + + /* No operation if the region is already registered as PMEM */ + is_pmem = region_intersects(nd_res->start, resource_size(nd_res), + IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY); + if (is_pmem == REGION_INTERSECTS) + return 0; + + res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL); + if (!res) + return -ENOMEM; + + res->name = "Persistent Memory"; + res->start = nd_res->start; + res->end = nd_res->end; + res->flags = IORESOURCE_MEM; + res->desc = IORES_DESC_PERSISTENT_MEMORY; + + ret = insert_resource(&iomem_resource, res); + if (ret) + return ret; + + ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res); + if (ret) { + remove_resource(res); + return ret; + } + + return 0; +} + static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc, struct nd_region_desc *ndr_desc) { @@ -1781,6 +1823,12 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, nvdimm_bus = acpi_desc->nvdimm_bus; if (nfit_spa_type(spa) == NFIT_SPA_PM) { + rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc); + if (rc) + dev_warn(acpi_desc->dev, + "failed to insert pmem resource to iomem: %d\n", + rc); + rc = acpi_nfit_find_poison(acpi_desc, ndr_desc); if (rc) { dev_err(acpi_desc->dev, -- cgit v1.2.3-59-g8ed1b From 55155291b32d24371256adbcc67f9f53cf3f314f Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 9 Mar 2016 09:21:54 +1100 Subject: pmem: don't allocate unused major device number When alloc_disk(0) or alloc_disk-node(0, XX) is used, the ->major number is completely ignored: all devices are allocated with a major of BLOCK_EXT_MAJOR. So there is no point allocating pmem_major. Signed-off-by: NeilBrown Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 74e2569910d8..ba8d5b6bfad0 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -50,8 +50,6 @@ struct pmem_device { struct badblocks bb; }; -static int pmem_major; - static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) { if (bb->count) { @@ -231,8 +229,6 @@ static int pmem_attach_disk(struct device *dev, return -ENOMEM; } - disk->major = pmem_major; - disk->first_minor = 0; disk->fops = &pmem_fops; disk->private_data = pmem; disk->queue = pmem->pmem_queue; @@ -579,26 +575,13 @@ static struct nd_device_driver nd_pmem_driver = { static int __init pmem_init(void) { - int error; - - pmem_major = register_blkdev(0, "pmem"); - if (pmem_major < 0) - return pmem_major; - - error = nd_driver_register(&nd_pmem_driver); - if (error) { - unregister_blkdev(pmem_major, "pmem"); - return error; - } - - return 0; + return nd_driver_register(&nd_pmem_driver); } module_init(pmem_init); static void pmem_exit(void) { driver_unregister(&nd_pmem_driver.drv); - unregister_blkdev(pmem_major, "pmem"); } module_exit(pmem_exit); -- cgit v1.2.3-59-g8ed1b From ec56151d382c2140851b4f25203af9016ba84fea Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 10 Mar 2016 08:59:28 +1100 Subject: nvdimm/blk: don't allocate unused major device number When alloc_disk(0) is used ->major is completely ignored, all devices are allocated with a "major" of BLOCK_EXT_MAJOR. So don't allocate nd_blk_major Signed-off-by: NeilBrown Signed-off-by: Dan Williams --- drivers/nvdimm/blk.c | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 91a336ea8c4f..e9ff9229d942 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -31,8 +31,6 @@ struct nd_blk_device { u32 internal_lbasize; }; -static int nd_blk_major; - static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) { return blk_dev->nsblk->lbasize - blk_dev->sector_size; @@ -264,7 +262,6 @@ static int nd_blk_attach_disk(struct nd_namespace_common *ndns, } disk->driverfs_dev = &ndns->dev; - disk->major = nd_blk_major; disk->first_minor = 0; disk->fops = &nd_blk_fops; disk->private_data = blk_dev; @@ -358,25 +355,12 @@ static struct nd_device_driver nd_blk_driver = { static int __init nd_blk_init(void) { - int rc; - - rc = register_blkdev(0, "nd_blk"); - if (rc < 0) - return rc; - - nd_blk_major = rc; - rc = nd_driver_register(&nd_blk_driver); - - if (rc < 0) - unregister_blkdev(nd_blk_major, "nd_blk"); - - return rc; + return nd_driver_register(&nd_blk_driver); } static void __exit nd_blk_exit(void) { driver_unregister(&nd_blk_driver.drv); - unregister_blkdev(nd_blk_major, "nd_blk"); } MODULE_AUTHOR("Ross Zwisler "); -- cgit v1.2.3-59-g8ed1b From ff8e92d5d94b99aab39f439d532cba435947dfc0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 10 Mar 2016 08:59:28 +1100 Subject: nvdimm/btt: don't allocate unused major device number alloc_disk(0) does not require or use a ->major number, all devices are allocated with a major of BLOCK_EXT_MAJOR. So don't allocate btt_major. Signed-off-by: NeilBrown Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 19 ++----------------- 1 file changed, 2 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index efb2c1ceef98..c32cbb593600 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -31,8 +31,6 @@ enum log_ent_request { LOG_OLD_ENT }; -static int btt_major; - static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, void *buf, size_t n) { @@ -1246,7 +1244,6 @@ static int btt_blk_init(struct btt *btt) nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); btt->btt_disk->driverfs_dev = &btt->nd_btt->dev; - btt->btt_disk->major = btt_major; btt->btt_disk->first_minor = 0; btt->btt_disk->fops = &btt_fops; btt->btt_disk->private_data = btt; @@ -1423,22 +1420,11 @@ EXPORT_SYMBOL(nvdimm_namespace_detach_btt); static int __init nd_btt_init(void) { - int rc; - - btt_major = register_blkdev(0, "btt"); - if (btt_major < 0) - return btt_major; + int rc = 0; debugfs_root = debugfs_create_dir("btt", NULL); - if (IS_ERR_OR_NULL(debugfs_root)) { + if (IS_ERR_OR_NULL(debugfs_root)) rc = -ENXIO; - goto err_debugfs; - } - - return 0; - - err_debugfs: - unregister_blkdev(btt_major, "btt"); return rc; } @@ -1446,7 +1432,6 @@ static int __init nd_btt_init(void) static void __exit nd_btt_exit(void) { debugfs_remove_recursive(debugfs_root); - unregister_blkdev(btt_major, "btt"); } MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT); -- cgit v1.2.3-59-g8ed1b From b5ebc8ec693281c3c1efff7459a069cbd8b9a149 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sun, 6 Mar 2016 15:20:51 -0800 Subject: libnvdimm, pmem: fix kmap_atomic() leak in error path When we enounter a bad block we need to kunmap_atomic() before returning. Cc: Cc: Ross Zwisler Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index efc2a5e671c6..e7b86a7fca0a 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -66,22 +66,25 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, int rw, sector_t sector) { + int rc = 0; void *mem = kmap_atomic(page); phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; if (rw == READ) { if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) - return -EIO; - memcpy_from_pmem(mem + off, pmem_addr, len); - flush_dcache_page(page); + rc = -EIO; + else { + memcpy_from_pmem(mem + off, pmem_addr, len); + flush_dcache_page(page); + } } else { flush_dcache_page(page); memcpy_to_pmem(pmem_addr, mem + off, len); } kunmap_atomic(mem); - return 0; + return rc; } static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) -- cgit v1.2.3-59-g8ed1b From 59e6473980f321c16299e12db69d1fabc2644a6f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Mar 2016 07:16:07 -0800 Subject: libnvdimm, pmem: clear poison on write If a write is directed at a known bad block perform the following: 1/ write the data 2/ send a clear poison command 3/ invalidate the poison out of the cache hierarchy Cc: Cc: Ross Zwisler Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- arch/x86/include/asm/pmem.h | 5 +++++ drivers/nvdimm/bus.c | 46 +++++++++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/nd.h | 2 ++ drivers/nvdimm/pmem.c | 29 +++++++++++++++++++++++++++- include/linux/pmem.h | 19 +++++++++++++++++++ 5 files changed, 100 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index c57fd1ea9689..bf8b35d2035a 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -137,6 +137,11 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) arch_wb_cache_pmem(addr, size); } +static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) +{ + clflush_cache_range((void __force *) addr, size); +} + static inline bool __arch_has_wmb_pmem(void) { /* diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index cb6fd64b13e3..33557481d452 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -159,6 +159,52 @@ void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event) } EXPORT_SYMBOL_GPL(nvdimm_region_notify); +long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, + unsigned int len) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc; + struct nd_cmd_clear_error clear_err; + struct nd_cmd_ars_cap ars_cap; + u32 clear_err_unit, mask; + int cmd_rc, rc; + + if (!nvdimm_bus) + return -ENXIO; + + nd_desc = nvdimm_bus->nd_desc; + if (!nd_desc->ndctl) + return -ENXIO; + + memset(&ars_cap, 0, sizeof(ars_cap)); + ars_cap.address = phys; + ars_cap.length = len; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap, + sizeof(ars_cap), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + clear_err_unit = ars_cap.clear_err_unit; + if (!clear_err_unit || !is_power_of_2(clear_err_unit)) + return -ENXIO; + + mask = clear_err_unit - 1; + if ((phys | len) & mask) + return -ENXIO; + memset(&clear_err, 0, sizeof(clear_err)); + clear_err.address = phys; + clear_err.length = len; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err, + sizeof(clear_err), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + return clear_err.cleared; +} +EXPORT_SYMBOL_GPL(nvdimm_clear_poison); + static struct bus_type nvdimm_bus_type = { .name = "nd", .uevent = nvdimm_bus_uevent, diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 78b82f6dd191..1799bd97a9ce 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -186,6 +186,8 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); +long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, + unsigned int len); struct nd_btt *to_nd_btt(struct device *dev); struct nd_gen_sb { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index e7b86a7fca0a..adc387236fe7 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -62,17 +62,40 @@ static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) return false; } +static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, + unsigned int len) +{ + struct device *dev = disk_to_dev(pmem->pmem_disk); + sector_t sector; + long cleared; + + sector = (offset - pmem->data_offset) / 512; + cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); + + if (cleared > 0 && cleared / 512) { + dev_dbg(dev, "%s: %llx clear %ld sector%s\n", + __func__, (unsigned long long) sector, + cleared / 512, cleared / 512 > 1 ? "s" : ""); + badblocks_clear(&pmem->bb, sector, cleared / 512); + } + invalidate_pmem(pmem->virt_addr + offset, len); +} + static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, int rw, sector_t sector) { int rc = 0; + bool bad_pmem = false; void *mem = kmap_atomic(page); phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) + bad_pmem = true; + if (rw == READ) { - if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) + if (unlikely(bad_pmem)) rc = -EIO; else { memcpy_from_pmem(mem + off, pmem_addr, len); @@ -81,6 +104,10 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, } else { flush_dcache_page(page); memcpy_to_pmem(pmem_addr, mem + off, len); + if (unlikely(bad_pmem)) { + pmem_clear_poison(pmem, pmem_off, len); + memcpy_to_pmem(pmem_addr, mem + off, len); + } } kunmap_atomic(mem); diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 7c3d11a6b4ad..3ec5309e29f3 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -58,6 +58,11 @@ static inline void arch_wb_cache_pmem(void __pmem *addr, size_t size) { BUG(); } + +static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) +{ + BUG(); +} #endif /* @@ -185,6 +190,20 @@ static inline void clear_pmem(void __pmem *addr, size_t size) default_clear_pmem(addr, size); } +/** + * invalidate_pmem - flush a pmem range from the cache hierarchy + * @addr: virtual start address + * @size: bytes to invalidate (internally aligned to cache line size) + * + * For platforms that support clearing poison this flushes any poisoned + * ranges out of the cache + */ +static inline void invalidate_pmem(void __pmem *addr, size_t size) +{ + if (arch_has_pmem_api()) + arch_invalidate_pmem(addr, size); +} + /** * wb_cache_pmem - write back processor cache for PMEM memory range * @addr: virtual start address -- cgit v1.2.3-59-g8ed1b