From 966d23a006ca7b44ac8cf4d0c96b19785e0c3da0 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 15 Jan 2019 10:47:00 -0800 Subject: libnvdimm/label: Clear 'updating' flag after label-set update The UEFI 2.7 specification sets expectations that the 'updating' flag is eventually cleared. To date, the libnvdimm core has never adhered to that protocol. The policy of the core matches the policy of other multi-device info-block formats like MD-Software-RAID that expect administrator intervention on inconsistent info-blocks, not automatic invalidation. However, some pre-boot environments may unfortunately attempt to "clean up" the labels and invalidate a set when it fails to find at least one "non-updating" label in the set. Clear the updating flag after set updates to minimize the window of vulnerability to aggressive pre-boot environments. Ideally implementations would not write to the label area outside of creating namespaces. Note that this only minimizes the window, it does not close it as the system can still crash while clearing the flag and the set can be subsequently deleted / invalidated by the pre-boot environment. Fixes: f524bf271a5c ("libnvdimm: write pmem label set") Cc: Cc: Kelly Couch Signed-off-by: Dan Williams --- drivers/nvdimm/label.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index a11bf4e6b451..6d6e9a12150b 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -755,7 +755,7 @@ static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class, static int __pmem_label_update(struct nd_region *nd_region, struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm, - int pos) + int pos, unsigned long flags) { struct nd_namespace_common *ndns = &nspm->nsio.common; struct nd_interleave_set *nd_set = nd_region->nd_set; @@ -796,7 +796,7 @@ static int __pmem_label_update(struct nd_region *nd_region, memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN); if (nspm->alt_name) memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN); - nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_UPDATING); + nd_label->flags = __cpu_to_le32(flags); nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings); nd_label->position = __cpu_to_le16(pos); nd_label->isetcookie = __cpu_to_le64(cookie); @@ -1249,13 +1249,13 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid) int nd_pmem_namespace_label_update(struct nd_region *nd_region, struct nd_namespace_pmem *nspm, resource_size_t size) { - int i; + int i, rc; for (i = 0; i < nd_region->ndr_mappings; i++) { struct nd_mapping *nd_mapping = &nd_region->mapping[i]; struct nvdimm_drvdata *ndd = to_ndd(nd_mapping); struct resource *res; - int rc, count = 0; + int count = 0; if (size == 0) { rc = del_labels(nd_mapping, nspm->uuid); @@ -1273,7 +1273,20 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region, if (rc < 0) return rc; - rc = __pmem_label_update(nd_region, nd_mapping, nspm, i); + rc = __pmem_label_update(nd_region, nd_mapping, nspm, i, + NSLABEL_FLAG_UPDATING); + if (rc) + return rc; + } + + if (size == 0) + return 0; + + /* Clear the UPDATING flag per UEFI 2.7 expectations */ + for (i = 0; i < nd_region->ndr_mappings; i++) { + struct nd_mapping *nd_mapping = &nd_region->mapping[i]; + + rc = __pmem_label_update(nd_region, nd_mapping, nspm, i, 0); if (rc) return rc; } -- cgit v1.2.3-59-g8ed1b From d5d30d5a5c60628de5e77e3f292a8f9012d51350 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 2 Feb 2019 16:35:26 -0800 Subject: libnvdimm/dimm: Add a no-BLK quirk based on NVDIMM family As Dexuan reports the NVDIMM_FAMILY_HYPERV platform is incompatible with the existing Linux namespace implementation because it uses NSLABEL_FLAG_LOCAL for x1-width PMEM interleave sets. Quirk it as an platform / DIMM that does not provide BLK-aperture access. Allow the libnvdimm core to assume no potential for aliasing. In case other implementations make the same mistake, provide a "noblk" module parameter to force-enable the quirk. Link: https://lkml.kernel.org/r/PU1P153MB0169977604493B82B662A01CBF920@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM Reported-by: Dexuan Cui Tested-by: Dexuan Cui Signed-off-by: Dan Williams --- drivers/acpi/nfit/core.c | 4 ++++ drivers/nvdimm/dimm_devs.c | 7 +++++++ drivers/nvdimm/label.c | 3 +++ drivers/nvdimm/namespace_devs.c | 6 ++++++ drivers/nvdimm/region_devs.c | 7 +++++++ include/linux/libnvdimm.h | 2 ++ 6 files changed, 29 insertions(+) (limited to 'drivers/nvdimm') diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c index 4a7e8b1fa43b..811c399a3a76 100644 --- a/drivers/acpi/nfit/core.c +++ b/drivers/acpi/nfit/core.c @@ -2016,6 +2016,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc) cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK; } + /* Quirk to ignore LOCAL for labels on HYPERV DIMMs */ + if (nfit_mem->family == NVDIMM_FAMILY_HYPERV) + set_bit(NDD_NOBLK, &flags); + if (test_bit(NFIT_MEM_LSR, &nfit_mem->flags)) { set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 4890310df874..553aa78abeee 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -11,6 +11,7 @@ * General Public License for more details. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -25,6 +26,10 @@ static DEFINE_IDA(dimm_ida); +static bool noblk; +module_param(noblk, bool, 0444); +MODULE_PARM_DESC(noblk, "force disable BLK / local alias support"); + /* * Retrieve bus and dimm handle and return if this bus supports * get_config_data commands @@ -551,6 +556,8 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus, nvdimm->dimm_id = dimm_id; nvdimm->provider_data = provider_data; + if (noblk) + flags |= 1 << NDD_NOBLK; nvdimm->flags = flags; nvdimm->cmd_mask = cmd_mask; nvdimm->num_flush = num_flush; diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c index 6d6e9a12150b..f3d753d3169c 100644 --- a/drivers/nvdimm/label.c +++ b/drivers/nvdimm/label.c @@ -392,6 +392,7 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) return 0; /* no label, nothing to reserve */ for_each_clear_bit_le(slot, free, nslot) { + struct nvdimm *nvdimm = to_nvdimm(ndd->dev); struct nd_namespace_label *nd_label; struct nd_region *nd_region = NULL; u8 label_uuid[NSLABEL_UUID_LEN]; @@ -406,6 +407,8 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd) memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN); flags = __le32_to_cpu(nd_label->flags); + if (test_bit(NDD_NOBLK, &nvdimm->flags)) + flags &= ~NSLABEL_FLAG_LOCAL; nd_label_gen_id(&label_id, label_uuid, flags); res = nvdimm_allocate_dpa(ndd, &label_id, __le64_to_cpu(nd_label->dpa), diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 4b077555ac70..3677b0c4a33d 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -2492,6 +2492,12 @@ static int init_active_labels(struct nd_region *nd_region) if (!label_ent) break; label = nd_label_active(ndd, j); + if (test_bit(NDD_NOBLK, &nvdimm->flags)) { + u32 flags = __le32_to_cpu(label->flags); + + flags &= ~NSLABEL_FLAG_LOCAL; + label->flags = __cpu_to_le32(flags); + } label_ent->label = label; mutex_lock(&nd_mapping->lock); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index e2818f94f292..3b58baa44b5c 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -1003,6 +1003,13 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, if (test_bit(NDD_UNARMED, &nvdimm->flags)) ro = 1; + + if (test_bit(NDD_NOBLK, &nvdimm->flags) + && dev_type == &nd_blk_device_type) { + dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n", + caller, dev_name(&nvdimm->dev), i); + return NULL; + } } if (dev_type == &nd_blk_device_type) { diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 5440f11b0907..7da406ae3a2b 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -42,6 +42,8 @@ enum { NDD_SECURITY_OVERWRITE = 3, /* tracking whether or not there is a pending device reference */ NDD_WORK_PENDING = 4, + /* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */ + NDD_NOBLK = 5, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, -- cgit v1.2.3-59-g8ed1b From f101ada7da6551127d192c2f1742c1e9e0f62799 Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Tue, 22 Jan 2019 10:48:09 +0800 Subject: libnvdimm, pfn: Fix over-trim in trim_pfn_device() When trying to see whether current nd_region intersects with others, trim_pfn_device() has already calculated the *size* to be expanded to SECTION size. Do not double append 'adjust' to 'size' when calculating whether the end of a region collides with the next pmem region. Fixes: ae86cbfef381 "libnvdimm, pfn: Pad pfn namespaces relative to other regions" Cc: Signed-off-by: Wei Yang Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 6f22272e8d80..040bbd9c367e 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -678,7 +678,7 @@ static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trun if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE) == REGION_MIXED || !IS_ALIGNED(end, nd_pfn->align) - || nd_region_conflict(nd_region, start, size + adjust)) + || nd_region_conflict(nd_region, start, size)) *end_trunc = end - phys_pmem_align_down(nd_pfn, end); } -- cgit v1.2.3-59-g8ed1b From 07464e88365e9236febaca9ed1a2e2006d8bc952 Mon Sep 17 00:00:00 2001 From: Oliver O'Halloran Date: Wed, 6 Feb 2019 13:04:53 +1100 Subject: libnvdimm: Fix altmap reservation size calculation Libnvdimm reserves the first 8K of pfn and devicedax namespaces to store a superblock describing the namespace. This 8K reservation is contained within the altmap area which the kernel uses for the vmemmap backing for the pages within the namespace. The altmap allows for some pages at the start of the altmap area to be reserved and that mechanism is used to protect the superblock from being re-used as vmemmap backing. The number of PFNs to reserve is calculated using: PHYS_PFN(SZ_8K) Which is implemented as: #define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) So on systems where PAGE_SIZE is greater than 8K the reservation size is truncated to zero and the superblock area is re-used as vmemmap backing. As a result all the namespace information stored in the superblock (i.e. if it's a PFN or DAX namespace) is lost and the namespace needs to be re-created to get access to the contents. This patch fixes this by using PFN_UP() rather than PHYS_PFN() to ensure that at least one page is reserved. On systems with a 4K pages size this patch should have no effect. Cc: stable@vger.kernel.org Cc: Dan Williams Fixes: ac515c084be9 ("libnvdimm, pmem, pfn: move pfn setup to the core") Signed-off-by: Oliver O'Halloran Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 040bbd9c367e..7760c1b91853 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -593,7 +593,7 @@ static unsigned long init_altmap_base(resource_size_t base) static unsigned long init_altmap_reserve(resource_size_t base) { - unsigned long reserve = PHYS_PFN(SZ_8K); + unsigned long reserve = PFN_UP(SZ_8K); unsigned long base_pfn = PHYS_PFN(base); reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); -- cgit v1.2.3-59-g8ed1b From 11a358109e0c48fa74f27e750dde4d25de4d5fc3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 6 Feb 2019 13:04:53 +1100 Subject: libnvdimm/pfn: Account for PAGE_SIZE > info-block-size in nd_pfn_init() Similar to "libnvdimm: Fix altmap reservation size calculation" provide for a reservation of a full page worth of info block space at info-block establishment time. Typically there is already slack in the padding from honoring the default 2MB alignment, but provide for a reservation for corner case configurations that would otherwise fit. Cc: Oliver O'Halloran Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index 7760c1b91853..ba74a341da5d 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -580,6 +580,11 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) } EXPORT_SYMBOL(nd_pfn_probe); +static u32 info_block_reserve(void) +{ + return ALIGN(SZ_8K, PAGE_SIZE); +} + /* * We hotplug memory at section granularity, pad the reserved area from * the previous section base to the namespace base address. @@ -593,7 +598,7 @@ static unsigned long init_altmap_base(resource_size_t base) static unsigned long init_altmap_reserve(resource_size_t base) { - unsigned long reserve = PFN_UP(SZ_8K); + unsigned long reserve = info_block_reserve() >> PAGE_SHIFT; unsigned long base_pfn = PHYS_PFN(base); reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); @@ -608,6 +613,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) u64 offset = le64_to_cpu(pfn_sb->dataoff); u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + u32 reserve = info_block_reserve(); struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); resource_size_t base = nsio->res.start + start_pad; @@ -621,7 +627,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) res->end -= end_trunc; if (nd_pfn->mode == PFN_MODE_RAM) { - if (offset < SZ_8K) + if (offset < reserve) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); pgmap->altmap_valid = false; @@ -634,7 +640,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); memcpy(altmap, &__altmap, sizeof(*altmap)); - altmap->free = PHYS_PFN(offset - SZ_8K); + altmap->free = PHYS_PFN(offset - reserve); altmap->alloc = 0; pgmap->altmap_valid = true; } else @@ -687,9 +693,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + u32 start_pad, end_trunc, reserve = info_block_reserve(); resource_size_t start, size; struct nd_region *nd_region; - u32 start_pad, end_trunc; struct nd_pfn_sb *pfn_sb; unsigned long npfns; phys_addr_t offset; @@ -734,7 +740,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) */ start = nsio->res.start + start_pad; size = resource_size(&nsio->res); - npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - SZ_8K) + npfns = PFN_SECTION_ALIGN_UP((size - start_pad - end_trunc - reserve) / PAGE_SIZE); if (nd_pfn->mode == PFN_MODE_PMEM) { /* @@ -742,10 +748,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * when populating the vmemmap. This *should* be equal to * PMD_SIZE for most architectures. */ - offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve, + offset = ALIGN(start + reserve + 64 * npfns + dax_label_reserve, max(nd_pfn->align, PMD_SIZE)) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) - offset = ALIGN(start + SZ_8K + dax_label_reserve, + offset = ALIGN(start + reserve + dax_label_reserve, nd_pfn->align) - start; else return -ENXIO; -- cgit v1.2.3-59-g8ed1b From fa7d2e639cd90442d868dfc6ca1d4cc9d8bf206e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 24 Jan 2019 17:33:06 -0800 Subject: libnvdimm/pmem: Honor force_raw for legacy pmem regions For recovery, where non-dax access is needed to a given physical address range, and testing, allow the 'force_raw' attribute to override the default establishment of a dev_pagemap. Otherwise without this capability it is possible to end up with a namespace that can not be activated due to corrupted info-block, and one that can not be repaired due to a section collision. Cc: Fixes: 004f1afbe199 ("libnvdimm, pmem: direct map legacy pmem by default") Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 4b077555ac70..33a3b23b3db7 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -138,6 +138,7 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid) bool pmem_should_map_pages(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_common *ndns = to_ndns(dev); struct nd_namespace_io *nsio; if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) @@ -149,6 +150,9 @@ bool pmem_should_map_pages(struct device *dev) if (is_nd_pfn(dev) || is_nd_btt(dev)) return false; + if (ndns->force_raw) + return false; + nsio = to_nd_namespace_io(dev); if (region_intersects(nsio->res.start, resource_size(&nsio->res), IORESOURCE_SYSTEM_RAM, -- cgit v1.2.3-59-g8ed1b From 4960461f5d22bb6d2965592c01805a97d55b132f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 26 Jan 2019 10:27:57 -0800 Subject: libnvdimm/pfn: Remove dax_label_reserve The reserve was for an abandoned effort to add label (partitioning support) to device-dax instances. Remove it. Signed-off-by: Dan Williams --- drivers/nvdimm/pfn_devs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index ba74a341da5d..d271bd731af7 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -690,7 +690,6 @@ static void trim_pfn_device(struct nd_pfn *nd_pfn, u32 *start_pad, u32 *end_trun static int nd_pfn_init(struct nd_pfn *nd_pfn) { - u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0; struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); u32 start_pad, end_trunc, reserve = info_block_reserve(); @@ -748,11 +747,10 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) * when populating the vmemmap. This *should* be equal to * PMD_SIZE for most architectures. */ - offset = ALIGN(start + reserve + 64 * npfns + dax_label_reserve, + offset = ALIGN(start + reserve + 64 * npfns, max(nd_pfn->align, PMD_SIZE)) - start; } else if (nd_pfn->mode == PFN_MODE_RAM) - offset = ALIGN(start + reserve + dax_label_reserve, - nd_pfn->align) - start; + offset = ALIGN(start + reserve, nd_pfn->align) - start; else return -ENXIO; -- cgit v1.2.3-59-g8ed1b From 2f8c9011151337d0bc106693f272f9bddbccfab2 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 27 Feb 2019 17:06:26 -0700 Subject: libnvdimm/btt: Remove unnecessary code in btt_freelist_init We call btt_log_read() twice, once to get the 'old' log entry, and again to get the 'new' entry. However, we have no use for the 'old' entry, so remove it. Cc: Dan Williams Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index b123b0dcf274..cd4fa87ea48c 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -541,9 +541,9 @@ static int arena_clear_freelist_error(struct arena_info *arena, u32 lane) static int btt_freelist_init(struct arena_info *arena) { - int old, new, ret; + int new, ret; u32 i, map_entry; - struct log_entry log_new, log_old; + struct log_entry log_new; arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry), GFP_KERNEL); @@ -551,10 +551,6 @@ static int btt_freelist_init(struct arena_info *arena) return -ENOMEM; for (i = 0; i < arena->nfree; i++) { - old = btt_log_read(arena, i, &log_old, LOG_OLD_ENT); - if (old < 0) - return old; - new = btt_log_read(arena, i, &log_new, LOG_NEW_ENT); if (new < 0) return new; -- cgit v1.2.3-59-g8ed1b From 9dedc73a4658ebcc0c9b58c3cb84e9ac80122213 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 27 Feb 2019 17:06:27 -0700 Subject: libnvdimm/btt: Fix LBA masking during 'free list' population The Linux BTT implementation assumes that log entries will never have the 'zero' flag set, and indeed it never sets that flag for log entries itself. However, the UEFI spec is ambiguous on the exact format of the LBA field of a log entry, specifically as to whether it should include the additional flag bits or not. While a zero bit doesn't make sense in the context of a log entry, other BTT implementations might still have it set. If an implementation does happen to have it set, we would happily read it in as the next block to write to for writes. Since a high bit is set, it pushes the block number out of the range of an 'arena', and we fail such a write with an EIO. Follow the robustness principle, and tolerate such implementations by stripping out the zero flag when populating the free list during initialization. Additionally, use the same stripped out entries for detection of incomplete writes and map restoration that happens at this stage. Add a sysfs file 'log_zero_flags' that indicates the ability to accept such a layout to userspace applications. This enables 'ndctl check-namespace' to recognize whether the kernel is able to handle zero flags, or whether it should attempt a fix-up under the --repair option. Cc: Dan Williams Reported-by: Dexuan Cui Reported-by: Pedro d'Aquino Filocre F S Barbuda Tested-by: Dexuan Cui Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 25 +++++++++++++++++++------ drivers/nvdimm/btt.h | 2 ++ drivers/nvdimm/btt_devs.c | 8 ++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index cd4fa87ea48c..4671776f5623 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -542,8 +542,8 @@ static int arena_clear_freelist_error(struct arena_info *arena, u32 lane) static int btt_freelist_init(struct arena_info *arena) { int new, ret; - u32 i, map_entry; struct log_entry log_new; + u32 i, map_entry, log_oldmap, log_newmap; arena->freelist = kcalloc(arena->nfree, sizeof(struct free_entry), GFP_KERNEL); @@ -555,16 +555,22 @@ static int btt_freelist_init(struct arena_info *arena) if (new < 0) return new; + /* old and new map entries with any flags stripped out */ + log_oldmap = ent_lba(le32_to_cpu(log_new.old_map)); + log_newmap = ent_lba(le32_to_cpu(log_new.new_map)); + /* sub points to the next one to be overwritten */ arena->freelist[i].sub = 1 - new; arena->freelist[i].seq = nd_inc_seq(le32_to_cpu(log_new.seq)); - arena->freelist[i].block = le32_to_cpu(log_new.old_map); + arena->freelist[i].block = log_oldmap; /* * FIXME: if error clearing fails during init, we want to make * the BTT read-only */ - if (ent_e_flag(log_new.old_map)) { + if (ent_e_flag(log_new.old_map) && + !ent_normal(log_new.old_map)) { + arena->freelist[i].has_err = 1; ret = arena_clear_freelist_error(arena, i); if (ret) dev_err_ratelimited(to_dev(arena), @@ -572,7 +578,7 @@ static int btt_freelist_init(struct arena_info *arena) } /* This implies a newly created or untouched flog entry */ - if (log_new.old_map == log_new.new_map) + if (log_oldmap == log_newmap) continue; /* Check if map recovery is needed */ @@ -580,8 +586,15 @@ static int btt_freelist_init(struct arena_info *arena) NULL, NULL, 0); if (ret) return ret; - if ((le32_to_cpu(log_new.new_map) != map_entry) && - (le32_to_cpu(log_new.old_map) == map_entry)) { + + /* + * The map_entry from btt_read_map is stripped of any flag bits, + * so use the stripped out versions from the log as well for + * testing whether recovery is needed. For restoration, use the + * 'raw' version of the log entries as that captured what we + * were going to write originally. + */ + if ((log_newmap != map_entry) && (log_oldmap == map_entry)) { /* * Last transaction wrote the flog, but wasn't able * to complete the map write. So fix up the map. diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index db3cb6d4d0d4..ddff49c707b0 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -44,6 +44,8 @@ #define ent_e_flag(ent) (!!(ent & MAP_ERR_MASK)) #define ent_z_flag(ent) (!!(ent & MAP_TRIM_MASK)) #define set_e_flag(ent) (ent |= MAP_ERR_MASK) +/* 'normal' is both e and z flags set */ +#define ent_normal(ent) (ent_e_flag(ent) && ent_z_flag(ent)) enum btt_init_state { INIT_UNCHECKED = 0, diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 795ad4ff35ca..b72a303176c7 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -159,11 +159,19 @@ static ssize_t size_show(struct device *dev, } static DEVICE_ATTR_RO(size); +static ssize_t log_zero_flags_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Y\n"); +} +static DEVICE_ATTR_RO(log_zero_flags); + static struct attribute *nd_btt_attributes[] = { &dev_attr_sector_size.attr, &dev_attr_namespace.attr, &dev_attr_uuid.attr, &dev_attr_size.attr, + &dev_attr_log_zero_flags.attr, NULL, }; -- cgit v1.2.3-59-g8ed1b From 316720b9c2341307b9a17103cdafa1ca9b2fb872 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 26 Feb 2019 01:42:53 +0000 Subject: libnvdimm/of_pmem: Fix platform_no_drv_owner.cocci warnings Remove .owner field if calls are used which set it automatically Generated by: scripts/coccinelle/api/platform_no_drv_owner.cocci Signed-off-by: YueHaibing Signed-off-by: Dan Williams --- drivers/nvdimm/of_pmem.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 0a701837dfc0..11b9821eba85 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -108,7 +108,6 @@ static struct platform_driver of_pmem_region_driver = { .remove = of_pmem_region_remove, .driver = { .name = "of_pmem", - .owner = THIS_MODULE, .of_match_table = of_pmem_region_match, }, }; -- cgit v1.2.3-59-g8ed1b From 075c3fdd56ac2e077f928353daf786341bbb6a52 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 4 Mar 2019 12:14:04 -0800 Subject: libnvdimm/namespace: Clean up holder_class_store() Use sysfs_streq() in place of open-coded strcmp()'s that check for an optional "\n" at the end of the input. Reviewed-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/namespace_devs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 3677b0c4a33d..17fb7f931f0c 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -1506,13 +1506,13 @@ static ssize_t __holder_class_store(struct device *dev, const char *buf) if (dev->driver || ndns->claim) return -EBUSY; - if (strcmp(buf, "btt") == 0 || strcmp(buf, "btt\n") == 0) + if (sysfs_streq(buf, "btt")) ndns->claim_class = btt_claim_class(dev); - else if (strcmp(buf, "pfn") == 0 || strcmp(buf, "pfn\n") == 0) + else if (sysfs_streq(buf, "pfn")) ndns->claim_class = NVDIMM_CCLASS_PFN; - else if (strcmp(buf, "dax") == 0 || strcmp(buf, "dax\n") == 0) + else if (sysfs_streq(buf, "dax")) ndns->claim_class = NVDIMM_CCLASS_DAX; - else if (strcmp(buf, "") == 0 || strcmp(buf, "\n") == 0) + else if (sysfs_streq(buf, "")) ndns->claim_class = NVDIMM_CCLASS_NONE; else return -EINVAL; -- cgit v1.2.3-59-g8ed1b