aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/nvdimm
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvdimm')
-rw-r--r--drivers/nvdimm/Kconfig27
-rw-r--r--drivers/nvdimm/Makefile4
-rw-r--r--drivers/nvdimm/badrange.c26
-rw-r--r--drivers/nvdimm/blk.c343
-rw-r--r--drivers/nvdimm/btt.c94
-rw-r--r--drivers/nvdimm/btt.h5
-rw-r--r--drivers/nvdimm/btt_devs.c38
-rw-r--r--drivers/nvdimm/bus.c168
-rw-r--r--drivers/nvdimm/claim.c16
-rw-r--r--drivers/nvdimm/core.c192
-rw-r--r--drivers/nvdimm/dax_devs.c4
-rw-r--r--drivers/nvdimm/dimm.c9
-rw-r--r--drivers/nvdimm/dimm_devs.c391
-rw-r--r--drivers/nvdimm/e820.c18
-rw-r--r--drivers/nvdimm/label.c548
-rw-r--r--drivers/nvdimm/label.h102
-rw-r--r--drivers/nvdimm/namespace_devs.c787
-rw-r--r--drivers/nvdimm/nd-core.h101
-rw-r--r--drivers/nvdimm/nd.h327
-rw-r--r--drivers/nvdimm/nd_perf.c329
-rw-r--r--drivers/nvdimm/nd_virtio.c6
-rw-r--r--drivers/nvdimm/of_pmem.c5
-rw-r--r--drivers/nvdimm/pfn.h12
-rw-r--r--drivers/nvdimm/pfn_devs.c91
-rw-r--r--drivers/nvdimm/pmem.c496
-rw-r--r--drivers/nvdimm/pmem.h6
-rw-r--r--drivers/nvdimm/region.c48
-rw-r--r--drivers/nvdimm/region_devs.c323
-rw-r--r--drivers/nvdimm/security.c22
-rw-r--r--drivers/nvdimm/virtio_pmem.c15
30 files changed, 2238 insertions, 2315 deletions
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index b7d1eb38b27d..5a29046e3319 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -10,19 +10,16 @@ menuconfig LIBNVDIMM
ACPI-6-NFIT defined resources. On platforms that define an
NFIT, or otherwise can discover NVDIMM resources, a libnvdimm
bus is registered to advertise PMEM (persistent memory)
- namespaces (/dev/pmemX) and BLK (sliding mmio window(s))
- namespaces (/dev/ndblkX.Y). A PMEM namespace refers to a
+ namespaces (/dev/pmemX). A PMEM namespace refers to a
memory resource that may span multiple DIMMs and support DAX
- (see CONFIG_DAX). A BLK namespace refers to an NVDIMM control
- region which exposes an mmio register set for windowed access
- mode to non-volatile memory.
+ (see CONFIG_DAX).
if LIBNVDIMM
config BLK_DEV_PMEM
tristate "PMEM: Persistent memory block device support"
default LIBNVDIMM
- select DAX_DRIVER
+ select DAX
select ND_BTT if BTT
select ND_PFN if NVDIMM_PFN
help
@@ -38,19 +35,6 @@ config BLK_DEV_PMEM
Say Y if you want to use an NVDIMM
-config ND_BLK
- tristate "BLK: Block data window (aperture) device support"
- default LIBNVDIMM
- select ND_BTT if BTT
- help
- Support NVDIMMs, or other devices, that implement a BLK-mode
- access capability. BLK-mode access uses memory-mapped-i/o
- apertures to access persistent media.
-
- Say Y if your platform firmware emits an ACPI.NFIT table
- (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode
- capabilities.
-
config ND_CLAIM
bool
@@ -67,9 +51,8 @@ config BTT
applications that rely on sector writes not being torn (a
guarantee that typical disks provide) can continue to do so.
The BTT manifests itself as an alternate personality for an
- NVDIMM namespace, i.e. a namespace can be in raw mode (pmemX,
- ndblkX.Y, etc...), or 'sectored' mode, (pmemXs, ndblkX.Ys,
- etc...).
+ NVDIMM namespace, i.e. a namespace can be in raw mode pmemX,
+ or 'sectored' mode.
Select Y if unsure
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 29203f3d3069..ba0296dca9db 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -2,7 +2,6 @@
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
obj-$(CONFIG_ND_BTT) += nd_btt.o
-obj-$(CONFIG_ND_BLK) += nd_blk.o
obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
obj-$(CONFIG_OF_PMEM) += of_pmem.o
obj-$(CONFIG_VIRTIO_PMEM) += virtio_pmem.o nd_virtio.o
@@ -11,13 +10,12 @@ nd_pmem-y := pmem.o
nd_btt-y := btt.o
-nd_blk-y := blk.o
-
nd_e820-y := e820.o
libnvdimm-y := core.o
libnvdimm-y += bus.o
libnvdimm-y += dimm_devs.o
+libnvdimm-$(CONFIG_PERF_EVENTS) += nd_perf.o
libnvdimm-y += dimm.o
libnvdimm-y += region_devs.o
libnvdimm-y += region.o
diff --git a/drivers/nvdimm/badrange.c b/drivers/nvdimm/badrange.c
index b9eeefa27e3a..aaf6e215a8c6 100644
--- a/drivers/nvdimm/badrange.c
+++ b/drivers/nvdimm/badrange.c
@@ -211,7 +211,7 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
}
static void badblocks_populate(struct badrange *badrange,
- struct badblocks *bb, const struct resource *res)
+ struct badblocks *bb, const struct range *range)
{
struct badrange_entry *bre;
@@ -222,34 +222,34 @@ static void badblocks_populate(struct badrange *badrange,
u64 bre_end = bre->start + bre->length - 1;
/* Discard intervals with no intersection */
- if (bre_end < res->start)
+ if (bre_end < range->start)
continue;
- if (bre->start > res->end)
+ if (bre->start > range->end)
continue;
/* Deal with any overlap after start of the namespace */
- if (bre->start >= res->start) {
+ if (bre->start >= range->start) {
u64 start = bre->start;
u64 len;
- if (bre_end <= res->end)
+ if (bre_end <= range->end)
len = bre->length;
else
- len = res->start + resource_size(res)
+ len = range->start + range_len(range)
- bre->start;
- __add_badblock_range(bb, start - res->start, len);
+ __add_badblock_range(bb, start - range->start, len);
continue;
}
/*
* Deal with overlap for badrange starting before
* the namespace.
*/
- if (bre->start < res->start) {
+ if (bre->start < range->start) {
u64 len;
- if (bre_end < res->end)
- len = bre->start + bre->length - res->start;
+ if (bre_end < range->end)
+ len = bre->start + bre->length - range->start;
else
- len = resource_size(res);
+ len = range_len(range);
__add_badblock_range(bb, 0, len);
}
}
@@ -267,7 +267,7 @@ static void badblocks_populate(struct badrange *badrange,
* and add badblocks entries for all matching sub-ranges
*/
void nvdimm_badblocks_populate(struct nd_region *nd_region,
- struct badblocks *bb, const struct resource *res)
+ struct badblocks *bb, const struct range *range)
{
struct nvdimm_bus *nvdimm_bus;
@@ -279,7 +279,7 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
nvdimm_bus_lock(&nvdimm_bus->dev);
- badblocks_populate(&nvdimm_bus->badrange, bb, res);
+ badblocks_populate(&nvdimm_bus->badrange, bb, range);
nvdimm_bus_unlock(&nvdimm_bus->dev);
}
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
deleted file mode 100644
index 677d6f45b5c4..000000000000
--- a/drivers/nvdimm/blk.c
+++ /dev/null
@@ -1,343 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * NVDIMM Block Window Driver
- * Copyright (c) 2014, Intel Corporation.
- */
-
-#include <linux/blkdev.h>
-#include <linux/fs.h>
-#include <linux/genhd.h>
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/nd.h>
-#include <linux/sizes.h>
-#include "nd.h"
-
-static u32 nsblk_meta_size(struct nd_namespace_blk *nsblk)
-{
- return nsblk->lbasize - ((nsblk->lbasize >= 4096) ? 4096 : 512);
-}
-
-static u32 nsblk_internal_lbasize(struct nd_namespace_blk *nsblk)
-{
- return roundup(nsblk->lbasize, INT_LBASIZE_ALIGNMENT);
-}
-
-static u32 nsblk_sector_size(struct nd_namespace_blk *nsblk)
-{
- return nsblk->lbasize - nsblk_meta_size(nsblk);
-}
-
-static resource_size_t to_dev_offset(struct nd_namespace_blk *nsblk,
- resource_size_t ns_offset, unsigned int len)
-{
- int i;
-
- for (i = 0; i < nsblk->num_resources; i++) {
- if (ns_offset < resource_size(nsblk->res[i])) {
- if (ns_offset + len > resource_size(nsblk->res[i])) {
- dev_WARN_ONCE(&nsblk->common.dev, 1,
- "illegal request\n");
- return SIZE_MAX;
- }
- return nsblk->res[i]->start + ns_offset;
- }
- ns_offset -= resource_size(nsblk->res[i]);
- }
-
- dev_WARN_ONCE(&nsblk->common.dev, 1, "request out of range\n");
- return SIZE_MAX;
-}
-
-static struct nd_blk_region *to_ndbr(struct nd_namespace_blk *nsblk)
-{
- struct nd_region *nd_region;
- struct device *parent;
-
- parent = nsblk->common.dev.parent;
- nd_region = container_of(parent, struct nd_region, dev);
- return container_of(nd_region, struct nd_blk_region, nd_region);
-}
-
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
- struct bio_integrity_payload *bip, u64 lba, int rw)
-{
- struct nd_blk_region *ndbr = to_ndbr(nsblk);
- unsigned int len = nsblk_meta_size(nsblk);
- resource_size_t dev_offset, ns_offset;
- u32 internal_lbasize, sector_size;
- int err = 0;
-
- internal_lbasize = nsblk_internal_lbasize(nsblk);
- sector_size = nsblk_sector_size(nsblk);
- ns_offset = lba * internal_lbasize + sector_size;
- dev_offset = to_dev_offset(nsblk, ns_offset, len);
- if (dev_offset == SIZE_MAX)
- return -EIO;
-
- while (len) {
- unsigned int cur_len;
- struct bio_vec bv;
- void *iobuf;
-
- bv = bvec_iter_bvec(bip->bip_vec, bip->bip_iter);
- /*
- * The 'bv' obtained from bvec_iter_bvec has its .bv_len and
- * .bv_offset already adjusted for iter->bi_bvec_done, and we
- * can use those directly
- */
-
- cur_len = min(len, bv.bv_len);
- iobuf = kmap_atomic(bv.bv_page);
- err = ndbr->do_io(ndbr, dev_offset, iobuf + bv.bv_offset,
- cur_len, rw);
- kunmap_atomic(iobuf);
- if (err)
- return err;
-
- len -= cur_len;
- dev_offset += cur_len;
- if (!bvec_iter_advance(bip->bip_vec, &bip->bip_iter, cur_len))
- return -EIO;
- }
-
- return err;
-}
-
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-static int nd_blk_rw_integrity(struct nd_namespace_blk *nsblk,
- struct bio_integrity_payload *bip, u64 lba, int rw)
-{
- return 0;
-}
-#endif
-
-static int nsblk_do_bvec(struct nd_namespace_blk *nsblk,
- struct bio_integrity_payload *bip, struct page *page,
- unsigned int len, unsigned int off, int rw, sector_t sector)
-{
- struct nd_blk_region *ndbr = to_ndbr(nsblk);
- resource_size_t dev_offset, ns_offset;
- u32 internal_lbasize, sector_size;
- int err = 0;
- void *iobuf;
- u64 lba;
-
- internal_lbasize = nsblk_internal_lbasize(nsblk);
- sector_size = nsblk_sector_size(nsblk);
- while (len) {
- unsigned int cur_len;
-
- /*
- * If we don't have an integrity payload, we don't have to
- * split the bvec into sectors, as this would cause unnecessary
- * Block Window setup/move steps. the do_io routine is capable
- * of handling len <= PAGE_SIZE.
- */
- cur_len = bip ? min(len, sector_size) : len;
-
- lba = div_u64(sector << SECTOR_SHIFT, sector_size);
- ns_offset = lba * internal_lbasize;
- dev_offset = to_dev_offset(nsblk, ns_offset, cur_len);
- if (dev_offset == SIZE_MAX)
- return -EIO;
-
- iobuf = kmap_atomic(page);
- err = ndbr->do_io(ndbr, dev_offset, iobuf + off, cur_len, rw);
- kunmap_atomic(iobuf);
- if (err)
- return err;
-
- if (bip) {
- err = nd_blk_rw_integrity(nsblk, bip, lba, rw);
- if (err)
- return err;
- }
- len -= cur_len;
- off += cur_len;
- sector += sector_size >> SECTOR_SHIFT;
- }
-
- return err;
-}
-
-static blk_qc_t nd_blk_make_request(struct request_queue *q, struct bio *bio)
-{
- struct bio_integrity_payload *bip;
- struct nd_namespace_blk *nsblk;
- struct bvec_iter iter;
- unsigned long start;
- struct bio_vec bvec;
- int err = 0, rw;
- bool do_acct;
-
- if (!bio_integrity_prep(bio))
- return BLK_QC_T_NONE;
-
- bip = bio_integrity(bio);
- nsblk = q->queuedata;
- rw = bio_data_dir(bio);
- do_acct = nd_iostat_start(bio, &start);
- bio_for_each_segment(bvec, bio, iter) {
- unsigned int len = bvec.bv_len;
-
- BUG_ON(len > PAGE_SIZE);
- err = nsblk_do_bvec(nsblk, bip, bvec.bv_page, len,
- bvec.bv_offset, rw, iter.bi_sector);
- if (err) {
- dev_dbg(&nsblk->common.dev,
- "io error in %s sector %lld, len %d,\n",
- (rw == READ) ? "READ" : "WRITE",
- (unsigned long long) iter.bi_sector, len);
- bio->bi_status = errno_to_blk_status(err);
- break;
- }
- }
- if (do_acct)
- nd_iostat_end(bio, start);
-
- bio_endio(bio);
- return BLK_QC_T_NONE;
-}
-
-static int nsblk_rw_bytes(struct nd_namespace_common *ndns,
- resource_size_t offset, void *iobuf, size_t n, int rw,
- unsigned long flags)
-{
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(&ndns->dev);
- struct nd_blk_region *ndbr = to_ndbr(nsblk);
- resource_size_t dev_offset;
-
- dev_offset = to_dev_offset(nsblk, offset, n);
-
- if (unlikely(offset + n > nsblk->size)) {
- dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
- return -EFAULT;
- }
-
- if (dev_offset == SIZE_MAX)
- return -EIO;
-
- return ndbr->do_io(ndbr, dev_offset, iobuf, n, rw);
-}
-
-static const struct block_device_operations nd_blk_fops = {
- .owner = THIS_MODULE,
- .revalidate_disk = nvdimm_revalidate_disk,
-};
-
-static void nd_blk_release_queue(void *q)
-{
- blk_cleanup_queue(q);
-}
-
-static void nd_blk_release_disk(void *disk)
-{
- del_gendisk(disk);
- put_disk(disk);
-}
-
-static int nsblk_attach_disk(struct nd_namespace_blk *nsblk)
-{
- struct device *dev = &nsblk->common.dev;
- resource_size_t available_disk_size;
- struct request_queue *q;
- struct gendisk *disk;
- u64 internal_nlba;
-
- internal_nlba = div_u64(nsblk->size, nsblk_internal_lbasize(nsblk));
- available_disk_size = internal_nlba * nsblk_sector_size(nsblk);
-
- q = blk_alloc_queue(GFP_KERNEL);
- if (!q)
- return -ENOMEM;
- if (devm_add_action_or_reset(dev, nd_blk_release_queue, q))
- return -ENOMEM;
-
- blk_queue_make_request(q, nd_blk_make_request);
- blk_queue_max_hw_sectors(q, UINT_MAX);
- blk_queue_logical_block_size(q, nsblk_sector_size(nsblk));
- blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
- q->queuedata = nsblk;
-
- disk = alloc_disk(0);
- if (!disk)
- return -ENOMEM;
-
- disk->first_minor = 0;
- disk->fops = &nd_blk_fops;
- disk->queue = q;
- disk->flags = GENHD_FL_EXT_DEVT;
- nvdimm_namespace_disk_name(&nsblk->common, disk->disk_name);
-
- if (devm_add_action_or_reset(dev, nd_blk_release_disk, disk))
- return -ENOMEM;
-
- if (nsblk_meta_size(nsblk)) {
- int rc = nd_integrity_init(disk, nsblk_meta_size(nsblk));
-
- if (rc)
- return rc;
- }
-
- set_capacity(disk, available_disk_size >> SECTOR_SHIFT);
- device_add_disk(dev, disk, NULL);
- revalidate_disk(disk);
- return 0;
-}
-
-static int nd_blk_probe(struct device *dev)
-{
- struct nd_namespace_common *ndns;
- struct nd_namespace_blk *nsblk;
-
- ndns = nvdimm_namespace_common_probe(dev);
- if (IS_ERR(ndns))
- return PTR_ERR(ndns);
-
- nsblk = to_nd_namespace_blk(&ndns->dev);
- nsblk->size = nvdimm_namespace_capacity(ndns);
- dev_set_drvdata(dev, nsblk);
-
- ndns->rw_bytes = nsblk_rw_bytes;
- if (is_nd_btt(dev))
- return nvdimm_namespace_attach_btt(ndns);
- else if (nd_btt_probe(dev, ndns) == 0) {
- /* we'll come back as btt-blk */
- return -ENXIO;
- } else
- return nsblk_attach_disk(nsblk);
-}
-
-static int nd_blk_remove(struct device *dev)
-{
- if (is_nd_btt(dev))
- nvdimm_namespace_detach_btt(to_nd_btt(dev));
- return 0;
-}
-
-static struct nd_device_driver nd_blk_driver = {
- .probe = nd_blk_probe,
- .remove = nd_blk_remove,
- .drv = {
- .name = "nd_blk",
- },
- .type = ND_DRIVER_NAMESPACE_BLK,
-};
-
-static int __init nd_blk_init(void)
-{
- return nd_driver_register(&nd_blk_driver);
-}
-
-static void __exit nd_blk_exit(void)
-{
- driver_unregister(&nd_blk_driver.drv);
-}
-
-MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_BLK);
-module_init(nd_blk_init);
-module_exit(nd_blk_exit);
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index 0d04ea3d9fd7..0297b7882e33 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -6,11 +6,11 @@
#include <linux/highmem.h>
#include <linux/debugfs.h>
#include <linux/blkdev.h>
+#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/mutex.h>
#include <linux/hdreg.h>
-#include <linux/genhd.h>
#include <linux/sizes.h>
#include <linux/ndctl.h>
#include <linux/fs.h>
@@ -972,7 +972,7 @@ static int btt_arena_write_layout(struct arena_info *arena)
u64 sum;
struct btt_sb *super;
struct nd_btt *nd_btt = arena->nd_btt;
- const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
+ const uuid_t *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
ret = btt_map_init(arena);
if (ret)
@@ -987,8 +987,8 @@ static int btt_arena_write_layout(struct arena_info *arena)
return -ENOMEM;
strncpy(super->signature, BTT_SIG, BTT_SIG_LEN);
- memcpy(super->uuid, nd_btt->uuid, 16);
- memcpy(super->parent_uuid, parent_uuid, 16);
+ export_uuid(super->uuid, nd_btt->uuid);
+ export_uuid(super->parent_uuid, parent_uuid);
super->flags = cpu_to_le32(arena->flags);
super->version_major = cpu_to_le16(arena->version_major);
super->version_minor = cpu_to_le16(arena->version_minor);
@@ -1163,17 +1163,15 @@ static int btt_rw_integrity(struct btt *btt, struct bio_integrity_payload *bip,
*/
cur_len = min(len, bv.bv_len);
- mem = kmap_atomic(bv.bv_page);
+ mem = bvec_kmap_local(&bv);
if (rw)
- ret = arena_write_bytes(arena, meta_nsoff,
- mem + bv.bv_offset, cur_len,
+ ret = arena_write_bytes(arena, meta_nsoff, mem, cur_len,
NVDIMM_IO_ATOMIC);
else
- ret = arena_read_bytes(arena, meta_nsoff,
- mem + bv.bv_offset, cur_len,
+ ret = arena_read_bytes(arena, meta_nsoff, mem, cur_len,
NVDIMM_IO_ATOMIC);
- kunmap_atomic(mem);
+ kunmap_local(mem);
if (ret)
return ret;
@@ -1424,7 +1422,7 @@ static int btt_write_pg(struct btt *btt, struct bio_integrity_payload *bip,
static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
struct page *page, unsigned int len, unsigned int off,
- unsigned int op, sector_t sector)
+ enum req_op op, sector_t sector)
{
int ret;
@@ -1439,10 +1437,10 @@ static int btt_do_bvec(struct btt *btt, struct bio_integrity_payload *bip,
return ret;
}
-static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
+static void btt_submit_bio(struct bio *bio)
{
struct bio_integrity_payload *bip = bio_integrity(bio);
- struct btt *btt = q->queuedata;
+ struct btt *btt = bio->bi_bdev->bd_disk->private_data;
struct bvec_iter iter;
unsigned long start;
struct bio_vec bvec;
@@ -1450,9 +1448,11 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
bool do_acct;
if (!bio_integrity_prep(bio))
- return BLK_QC_T_NONE;
+ return;
- do_acct = nd_iostat_start(bio, &start);
+ do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
+ if (do_acct)
+ start = bio_start_io_acct(bio);
bio_for_each_segment(bvec, bio, iter) {
unsigned int len = bvec.bv_len;
@@ -1477,21 +1477,18 @@ static blk_qc_t btt_make_request(struct request_queue *q, struct bio *bio)
}
}
if (do_acct)
- nd_iostat_end(bio, start);
+ bio_end_io_acct(bio, start);
bio_endio(bio);
- return BLK_QC_T_NONE;
}
static int btt_rw_page(struct block_device *bdev, sector_t sector,
- struct page *page, unsigned int op)
+ struct page *page, enum req_op op)
{
struct btt *btt = bdev->bd_disk->private_data;
int rc;
- unsigned int len;
- len = hpage_nr_pages(page) * PAGE_SIZE;
- rc = btt_do_bvec(btt, NULL, page, len, 0, op, sector);
+ rc = btt_do_bvec(btt, NULL, page, thp_size(page), 0, op, sector);
if (rc == 0)
page_endio(page, op_is_write(op), 0);
@@ -1510,65 +1507,55 @@ static int btt_getgeo(struct block_device *bd, struct hd_geometry *geo)
static const struct block_device_operations btt_fops = {
.owner = THIS_MODULE,
+ .submit_bio = btt_submit_bio,
.rw_page = btt_rw_page,
.getgeo = btt_getgeo,
- .revalidate_disk = nvdimm_revalidate_disk,
};
static int btt_blk_init(struct btt *btt)
{
struct nd_btt *nd_btt = btt->nd_btt;
struct nd_namespace_common *ndns = nd_btt->ndns;
+ int rc = -ENOMEM;
- /* create a new disk and request queue for btt */
- btt->btt_queue = blk_alloc_queue(GFP_KERNEL);
- if (!btt->btt_queue)
+ btt->btt_disk = blk_alloc_disk(NUMA_NO_NODE);
+ if (!btt->btt_disk)
return -ENOMEM;
- btt->btt_disk = alloc_disk(0);
- if (!btt->btt_disk) {
- blk_cleanup_queue(btt->btt_queue);
- return -ENOMEM;
- }
-
nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
btt->btt_disk->first_minor = 0;
btt->btt_disk->fops = &btt_fops;
btt->btt_disk->private_data = btt;
- btt->btt_disk->queue = btt->btt_queue;
- btt->btt_disk->flags = GENHD_FL_EXT_DEVT;
- btt->btt_disk->queue->backing_dev_info->capabilities |=
- BDI_CAP_SYNCHRONOUS_IO;
- blk_queue_make_request(btt->btt_queue, btt_make_request);
- blk_queue_logical_block_size(btt->btt_queue, btt->sector_size);
- blk_queue_max_hw_sectors(btt->btt_queue, UINT_MAX);
- blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_queue);
- btt->btt_queue->queuedata = btt;
+ blk_queue_logical_block_size(btt->btt_disk->queue, btt->sector_size);
+ blk_queue_max_hw_sectors(btt->btt_disk->queue, UINT_MAX);
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, btt->btt_disk->queue);
if (btt_meta_size(btt)) {
- int rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
-
- if (rc) {
- del_gendisk(btt->btt_disk);
- put_disk(btt->btt_disk);
- blk_cleanup_queue(btt->btt_queue);
- return rc;
- }
+ rc = nd_integrity_init(btt->btt_disk, btt_meta_size(btt));
+ if (rc)
+ goto out_cleanup_disk;
}
+
set_capacity(btt->btt_disk, btt->nlba * btt->sector_size >> 9);
- device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL);
+ rc = device_add_disk(&btt->nd_btt->dev, btt->btt_disk, NULL);
+ if (rc)
+ goto out_cleanup_disk;
+
btt->nd_btt->size = btt->nlba * (u64)btt->sector_size;
- revalidate_disk(btt->btt_disk);
+ nvdimm_check_and_set_ro(btt->btt_disk);
return 0;
+
+out_cleanup_disk:
+ put_disk(btt->btt_disk);
+ return rc;
}
static void btt_blk_cleanup(struct btt *btt)
{
del_gendisk(btt->btt_disk);
put_disk(btt->btt_disk);
- blk_cleanup_queue(btt->btt_queue);
}
/**
@@ -1589,7 +1576,8 @@ static void btt_blk_cleanup(struct btt *btt)
* Pointer to a new struct btt on success, NULL on failure.
*/
static struct btt *btt_init(struct nd_btt *nd_btt, unsigned long long rawsize,
- u32 lbasize, u8 *uuid, struct nd_region *nd_region)
+ u32 lbasize, uuid_t *uuid,
+ struct nd_region *nd_region)
{
int ret;
struct btt *btt;
@@ -1708,7 +1696,7 @@ int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
}
nd_region = to_nd_region(nd_btt->dev.parent);
btt = btt_init(nd_btt, rawsize, nd_btt->lbasize, nd_btt->uuid,
- nd_region);
+ nd_region);
if (!btt)
return -ENOMEM;
nd_btt->btt = btt;
diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h
index 2e258bee7db2..0c76c0333f6e 100644
--- a/drivers/nvdimm/btt.h
+++ b/drivers/nvdimm/btt.h
@@ -7,7 +7,6 @@
#ifndef _LINUX_BTT_H
#define _LINUX_BTT_H
-#include <linux/badblocks.h>
#include <linux/types.h>
#define BTT_SIG_LEN 16
@@ -197,10 +196,11 @@ struct arena_info {
int log_index[2];
};
+struct badblocks;
+
/**
* struct btt - handle for a BTT instance
* @btt_disk: Pointer to the gendisk for BTT device
- * @btt_queue: Pointer to the request queue for the BTT device
* @arena_list: Head of the list of arenas
* @debugfs_dir: Debugfs dentry
* @nd_btt: Parent nd_btt struct
@@ -218,7 +218,6 @@ struct arena_info {
*/
struct btt {
struct gendisk *btt_disk;
- struct request_queue *btt_queue;
struct list_head arena_list;
struct dentry *debugfs_dir;
struct nd_btt *nd_btt;
diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index 05feb97e11ce..fabbb31f2c35 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -4,7 +4,6 @@
*/
#include <linux/blkdev.h>
#include <linux/device.h>
-#include <linux/genhd.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/fs.h>
@@ -51,14 +50,14 @@ static ssize_t sector_size_store(struct device *dev,
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
btt_lbasize_supported);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc ? rc : len;
}
@@ -80,11 +79,11 @@ static ssize_t uuid_store(struct device *dev,
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc ? rc : len;
}
@@ -109,13 +108,13 @@ static ssize_t namespace_store(struct device *dev,
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc;
}
@@ -127,14 +126,14 @@ static ssize_t size_show(struct device *dev,
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
if (dev->driver)
rc = sprintf(buf, "%llu\n", nd_btt->size);
else {
/* no size to convey if the btt instance is disabled */
rc = -ENXIO;
}
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc;
}
@@ -179,9 +178,11 @@ bool is_nd_btt(struct device *dev)
}
EXPORT_SYMBOL(is_nd_btt);
+static struct lock_class_key nvdimm_btt_key;
+
static struct device *__nd_btt_create(struct nd_region *nd_region,
- unsigned long lbasize, u8 *uuid,
- struct nd_namespace_common *ndns)
+ unsigned long lbasize, uuid_t *uuid,
+ struct nd_namespace_common *ndns)
{
struct nd_btt *nd_btt;
struct device *dev;
@@ -206,6 +207,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
dev->parent = &nd_region->dev;
dev->type = &nd_btt_device_type;
device_initialize(&nd_btt->dev);
+ lockdep_set_class(&nd_btt->dev.mutex, &nvdimm_btt_key);
if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
dev_name(ndns->claim));
@@ -226,7 +228,7 @@ struct device *nd_btt_create(struct nd_region *nd_region)
{
struct device *dev = __nd_btt_create(nd_region, 0, NULL, NULL);
- __nd_device_register(dev);
+ nd_device_register(dev);
return dev;
}
@@ -244,14 +246,16 @@ struct device *nd_btt_create(struct nd_region *nd_region)
*/
bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super)
{
- const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
+ const uuid_t *ns_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev);
+ uuid_t parent_uuid;
u64 checksum;
if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0)
return false;
- if (!guid_is_null((guid_t *)&super->parent_uuid))
- if (memcmp(super->parent_uuid, parent_uuid, 16) != 0)
+ import_uuid(&parent_uuid, super->parent_uuid);
+ if (!uuid_is_null(&parent_uuid))
+ if (!uuid_equal(&parent_uuid, ns_uuid))
return false;
checksum = le64_to_cpu(super->checksum);
@@ -319,11 +323,11 @@ static int __nd_btt_probe(struct nd_btt *nd_btt,
return rc;
nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize);
- nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL);
+ nd_btt->uuid = kmemdup(&btt_sb->uuid, sizeof(uuid_t), GFP_KERNEL);
if (!nd_btt->uuid)
return -ENOMEM;
- __nd_device_register(&nd_btt->dev);
+ nd_device_register(&nd_btt->dev);
return 0;
}
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index a8b515968569..b38d0355b0ac 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -11,7 +11,6 @@
#include <linux/blkdev.h>
#include <linux/fcntl.h>
#include <linux/async.h>
-#include <linux/genhd.h>
#include <linux/ndctl.h>
#include <linux/sched.h>
#include <linux/slab.h>
@@ -35,8 +34,6 @@ static int to_nd_device_type(struct device *dev)
return ND_DEVICE_DIMM;
else if (is_memory(dev))
return ND_DEVICE_REGION_PMEM;
- else if (is_nd_blk(dev))
- return ND_DEVICE_REGION_BLK;
else if (is_nd_dax(dev))
return ND_DEVICE_DAX_PMEM;
else if (is_nd_region(dev->parent))
@@ -91,10 +88,7 @@ static int nvdimm_bus_probe(struct device *dev)
dev->driver->name, dev_name(dev));
nvdimm_bus_probe_start(nvdimm_bus);
- debug_nvdimm_lock(dev);
rc = nd_drv->probe(dev);
- debug_nvdimm_unlock(dev);
-
if ((rc == 0 || rc == -EOPNOTSUPP) &&
dev->parent && is_nd_region(dev->parent))
nd_region_advance_seeds(to_nd_region(dev->parent), dev);
@@ -108,23 +102,18 @@ static int nvdimm_bus_probe(struct device *dev)
return rc;
}
-static int nvdimm_bus_remove(struct device *dev)
+static void nvdimm_bus_remove(struct device *dev)
{
struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
struct module *provider = to_bus_provider(dev);
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
- int rc = 0;
- if (nd_drv->remove) {
- debug_nvdimm_lock(dev);
- rc = nd_drv->remove(dev);
- debug_nvdimm_unlock(dev);
- }
+ if (nd_drv->remove)
+ nd_drv->remove(dev);
- dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
- dev_name(dev), rc);
+ dev_dbg(&nvdimm_bus->dev, "%s.remove(%s)\n", dev->driver->name,
+ dev_name(dev));
module_put(provider);
- return rc;
}
static void nvdimm_bus_shutdown(struct device *dev)
@@ -144,7 +133,7 @@ static void nvdimm_bus_shutdown(struct device *dev)
void nd_device_notify(struct device *dev, enum nvdimm_event event)
{
- nd_device_lock(dev);
+ device_lock(dev);
if (dev->driver) {
struct nd_device_driver *nd_drv;
@@ -152,7 +141,7 @@ void nd_device_notify(struct device *dev, enum nvdimm_event event)
if (nd_drv->notify)
nd_drv->notify(dev, event);
}
- nd_device_unlock(dev);
+ device_unlock(dev);
}
EXPORT_SYMBOL(nd_device_notify);
@@ -187,8 +176,8 @@ static int nvdimm_clear_badblocks_region(struct device *dev, void *data)
ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1;
/* make sure we are in the region */
- if (ctx->phys < nd_region->ndr_start
- || (ctx->phys + ctx->cleared) > ndr_end)
+ if (ctx->phys < nd_region->ndr_start ||
+ (ctx->phys + ctx->cleared - 1) > ndr_end)
return 0;
sector = (ctx->phys - nd_region->ndr_start) / 512;
@@ -339,6 +328,8 @@ struct nvdimm_bus *nvdimm_to_bus(struct nvdimm *nvdimm)
}
EXPORT_SYMBOL_GPL(nvdimm_to_bus);
+static struct lock_class_key nvdimm_bus_key;
+
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nd_desc)
{
@@ -364,8 +355,14 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
nvdimm_bus->dev.groups = nd_desc->attr_groups;
nvdimm_bus->dev.bus = &nvdimm_bus_type;
nvdimm_bus->dev.of_node = nd_desc->of_node;
- dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
- rc = device_register(&nvdimm_bus->dev);
+ device_initialize(&nvdimm_bus->dev);
+ lockdep_set_class(&nvdimm_bus->dev.mutex, &nvdimm_bus_key);
+ device_set_pm_not_required(&nvdimm_bus->dev);
+ rc = dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
+ if (rc)
+ goto err;
+
+ rc = device_add(&nvdimm_bus->dev);
if (rc) {
dev_dbg(&nvdimm_bus->dev, "registration failed: %d\n", rc);
goto err;
@@ -397,21 +394,10 @@ static int child_unregister(struct device *dev, void *data)
if (dev->class)
return 0;
- if (is_nvdimm(dev)) {
- struct nvdimm *nvdimm = to_nvdimm(dev);
- bool dev_put = false;
-
- /* We are shutting down. Make state frozen artificially. */
- nvdimm_bus_lock(dev);
- set_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags);
- if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags))
- dev_put = true;
- nvdimm_bus_unlock(dev);
- cancel_delayed_work_sync(&nvdimm->dwork);
- if (dev_put)
- put_device(dev);
- }
- nd_device_unregister(dev, ND_SYNC);
+ if (is_nvdimm(dev))
+ nvdimm_delete(to_nvdimm(dev));
+ else
+ nd_device_unregister(dev, ND_SYNC);
return 0;
}
@@ -427,7 +413,7 @@ static void free_badrange_list(struct list_head *badrange_list)
list_del_init(badrange_list);
}
-static int nd_bus_remove(struct device *dev)
+static void nd_bus_remove(struct device *dev)
{
struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
@@ -446,8 +432,6 @@ static int nd_bus_remove(struct device *dev)
spin_unlock(&nvdimm_bus->badrange.lock);
nvdimm_bus_destroy_ndctl(nvdimm_bus);
-
- return 0;
}
static int nd_bus_probe(struct device *dev)
@@ -524,7 +508,7 @@ static void nd_async_device_unregister(void *d, async_cookie_t cookie)
put_device(dev);
}
-void __nd_device_register(struct device *dev)
+void nd_device_register(struct device *dev)
{
if (!dev)
return;
@@ -539,6 +523,7 @@ void __nd_device_register(struct device *dev)
set_dev_node(dev, to_nd_region(dev)->numa_node);
dev->bus = &nvdimm_bus_type;
+ device_set_pm_not_required(dev);
if (dev->parent) {
get_device(dev->parent);
if (dev_to_node(dev) == NUMA_NO_NODE)
@@ -549,12 +534,6 @@ void __nd_device_register(struct device *dev)
async_schedule_dev_domain(nd_async_device_register, dev,
&nd_async_domain);
}
-
-void nd_device_register(struct device *dev)
-{
- device_initialize(dev);
- __nd_device_register(dev);
-}
EXPORT_SYMBOL(nd_device_register);
void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
@@ -584,9 +563,9 @@ void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
* or otherwise let the async path handle it if the
* unregistration was already queued.
*/
- nd_device_lock(dev);
+ device_lock(dev);
killed = kill_device(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
if (!killed)
return;
@@ -628,27 +607,22 @@ int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
}
EXPORT_SYMBOL(__nd_driver_register);
-int nvdimm_revalidate_disk(struct gendisk *disk)
+void nvdimm_check_and_set_ro(struct gendisk *disk)
{
struct device *dev = disk_to_dev(disk)->parent;
struct nd_region *nd_region = to_nd_region(dev->parent);
int disk_ro = get_disk_ro(disk);
- /*
- * Upgrade to read-only if the region is read-only preserve as
- * read-only if the disk is already read-only.
- */
- if (disk_ro || nd_region->ro == disk_ro)
- return 0;
-
- dev_info(dev, "%s read-only, marking %s read-only\n",
- dev_name(&nd_region->dev), disk->disk_name);
- set_disk_ro(disk, 1);
-
- return 0;
+ /* catch the disk up with the region ro state */
+ if (disk_ro == nd_region->ro)
+ return;
+ dev_info(dev, "%s read-%s, marking %s read-%s\n",
+ dev_name(&nd_region->dev), nd_region->ro ? "only" : "write",
+ disk->disk_name, nd_region->ro ? "only" : "write");
+ set_disk_ro(disk, nd_region->ro);
}
-EXPORT_SYMBOL(nvdimm_revalidate_disk);
+EXPORT_SYMBOL(nvdimm_check_and_set_ro);
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf)
@@ -736,18 +710,44 @@ const struct attribute_group nd_numa_attribute_group = {
.is_visible = nd_numa_attr_visible,
};
+static void ndctl_release(struct device *dev)
+{
+ kfree(dev);
+}
+
+static struct lock_class_key nvdimm_ndctl_key;
+
int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
{
dev_t devt = MKDEV(nvdimm_bus_major, nvdimm_bus->id);
struct device *dev;
+ int rc;
- dev = device_create(nd_class, &nvdimm_bus->dev, devt, nvdimm_bus,
- "ndctl%d", nvdimm_bus->id);
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &nvdimm_ndctl_key);
+ device_set_pm_not_required(dev);
+ dev->class = nd_class;
+ dev->parent = &nvdimm_bus->dev;
+ dev->devt = devt;
+ dev->release = ndctl_release;
+ rc = dev_set_name(dev, "ndctl%d", nvdimm_bus->id);
+ if (rc)
+ goto err;
+
+ rc = device_add(dev);
+ if (rc) {
+ dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %d\n",
+ nvdimm_bus->id, rc);
+ goto err;
+ }
+ return 0;
- if (IS_ERR(dev))
- dev_dbg(&nvdimm_bus->dev, "failed to register ndctl%d: %ld\n",
- nvdimm_bus->id, PTR_ERR(dev));
- return PTR_ERR_OR_ZERO(dev);
+err:
+ put_device(dev);
+ return rc;
}
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
@@ -924,10 +924,10 @@ void wait_nvdimm_bus_probe_idle(struct device *dev)
if (nvdimm_bus->probe_active == 0)
break;
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
wait_event(nvdimm_bus->wait,
nvdimm_bus->probe_active == 0);
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
} while (true);
}
@@ -1037,13 +1037,31 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
dimm_name = "bus";
}
+ /* Validate command family support against bus declared support */
if (cmd == ND_CMD_CALL) {
+ unsigned long *mask;
+
if (copy_from_user(&pkg, p, sizeof(pkg)))
return -EFAULT;
+
+ if (nvdimm) {
+ if (pkg.nd_family > NVDIMM_FAMILY_MAX)
+ return -EINVAL;
+ mask = &nd_desc->dimm_family_mask;
+ } else {
+ if (pkg.nd_family > NVDIMM_BUS_FAMILY_MAX)
+ return -EINVAL;
+ mask = &nd_desc->bus_family_mask;
+ }
+
+ if (!test_bit(pkg.nd_family, mask))
+ return -EINVAL;
}
- if (!desc || (desc->out_num + desc->in_num == 0) ||
- !test_bit(cmd, &cmd_mask))
+ if (!desc ||
+ (desc->out_num + desc->in_num == 0) ||
+ cmd > ND_CMD_CALL ||
+ !test_bit(cmd, &cmd_mask))
return -ENOTTY;
/* fail write commands (when read-only) */
@@ -1143,7 +1161,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
goto out;
}
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
if (rc)
@@ -1165,7 +1183,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
out_unlock:
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
out:
kfree(in_env);
kfree(out_env);
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 45964acba944..030dbde6b088 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -4,6 +4,7 @@
*/
#include <linux/device.h>
#include <linux/sizes.h>
+#include <linux/badblocks.h>
#include "nd-core.h"
#include "pmem.h"
#include "pfn.h"
@@ -268,7 +269,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
if (rw == READ) {
if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
return -EIO;
- if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
+ if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0)
return -EIO;
return 0;
}
@@ -303,13 +304,16 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio,
resource_size_t size)
{
- struct resource *res = &nsio->res;
struct nd_namespace_common *ndns = &nsio->common;
+ struct range range = {
+ .start = nsio->res.start,
+ .end = nsio->res.end,
+ };
nsio->size = size;
- if (!devm_request_mem_region(dev, res->start, size,
+ if (!devm_request_mem_region(dev, range.start, size,
dev_name(&ndns->dev))) {
- dev_warn(dev, "could not reserve region %pR\n", res);
+ dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
return -EBUSY;
}
@@ -317,9 +321,9 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio,
if (devm_init_badblocks(dev, &nsio->bb))
return -ENOMEM;
nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb,
- &nsio->res);
+ &range);
- nsio->addr = devm_memremap(dev, res->start, size, ARCH_MEMREMAP_PMEM);
+ nsio->addr = devm_memremap(dev, range.start, size, ARCH_MEMREMAP_PMEM);
return PTR_ERR_OR_ZERO(nsio->addr);
}
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index fe9bd6febdd2..d91799b71d23 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -3,10 +3,11 @@
* Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
*/
#include <linux/libnvdimm.h>
-#include <linux/badblocks.h>
+#include <linux/suspend.h>
#include <linux/export.h>
#include <linux/module.h>
#include <linux/blkdev.h>
+#include <linux/blk-integrity.h>
#include <linux/device.h>
#include <linux/ctype.h>
#include <linux/ndctl.h>
@@ -206,38 +207,6 @@ struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus)
}
EXPORT_SYMBOL_GPL(to_nvdimm_bus_dev);
-static bool is_uuid_sep(char sep)
-{
- if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
- return true;
- return false;
-}
-
-static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
- size_t len)
-{
- const char *str = buf;
- u8 uuid[16];
- int i;
-
- for (i = 0; i < 16; i++) {
- if (!isxdigit(str[0]) || !isxdigit(str[1])) {
- dev_dbg(dev, "pos: %d buf[%zd]: %c buf[%zd]: %c\n",
- i, str - buf, str[0],
- str + 1 - buf, str[1]);
- return -EINVAL;
- }
-
- uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
- str += 2;
- if (is_uuid_sep(*str))
- str++;
- }
-
- memcpy(uuid_out, uuid, sizeof(uuid));
- return 0;
-}
-
/**
* nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
* @dev: container device for the uuid property
@@ -246,23 +215,23 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
*
* Enforce that uuids can only be changed while the device is disabled
* (driver detached)
- * LOCKING: expects nd_device_lock() is held on entry
+ * LOCKING: expects device_lock() is held on entry
*/
-int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+int nd_uuid_store(struct device *dev, uuid_t **uuid_out, const char *buf,
size_t len)
{
- u8 uuid[16];
+ uuid_t uuid;
int rc;
if (dev->driver)
return -EBUSY;
- rc = nd_uuid_parse(dev, uuid, buf, len);
+ rc = uuid_parse(buf, &uuid);
if (rc)
return rc;
kfree(*uuid_out);
- *uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
+ *uuid_out = kmemdup(&uuid, sizeof(uuid), GFP_KERNEL);
if (!(*uuid_out))
return -ENOMEM;
@@ -347,15 +316,15 @@ static DEVICE_ATTR_RO(provider);
static int flush_namespaces(struct device *dev, void *data)
{
- nd_device_lock(dev);
- nd_device_unlock(dev);
+ device_lock(dev);
+ device_unlock(dev);
return 0;
}
static int flush_regions_dimms(struct device *dev, void *data)
{
- nd_device_lock(dev);
- nd_device_unlock(dev);
+ device_lock(dev);
+ device_unlock(dev);
device_for_each_child(dev, NULL, flush_namespaces);
return 0;
}
@@ -389,8 +358,147 @@ static const struct attribute_group nvdimm_bus_attribute_group = {
.attrs = nvdimm_bus_attributes,
};
+static ssize_t capability_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ enum nvdimm_fwa_capability cap;
+
+ if (!nd_desc->fw_ops)
+ return -EOPNOTSUPP;
+
+ cap = nd_desc->fw_ops->capability(nd_desc);
+
+ switch (cap) {
+ case NVDIMM_FWA_CAP_QUIESCE:
+ return sprintf(buf, "quiesce\n");
+ case NVDIMM_FWA_CAP_LIVE:
+ return sprintf(buf, "live\n");
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static DEVICE_ATTR_RO(capability);
+
+static ssize_t activate_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ enum nvdimm_fwa_capability cap;
+ enum nvdimm_fwa_state state;
+
+ if (!nd_desc->fw_ops)
+ return -EOPNOTSUPP;
+
+ cap = nd_desc->fw_ops->capability(nd_desc);
+ state = nd_desc->fw_ops->activate_state(nd_desc);
+
+ if (cap < NVDIMM_FWA_CAP_QUIESCE)
+ return -EOPNOTSUPP;
+
+ switch (state) {
+ case NVDIMM_FWA_IDLE:
+ return sprintf(buf, "idle\n");
+ case NVDIMM_FWA_BUSY:
+ return sprintf(buf, "busy\n");
+ case NVDIMM_FWA_ARMED:
+ return sprintf(buf, "armed\n");
+ case NVDIMM_FWA_ARM_OVERFLOW:
+ return sprintf(buf, "overflow\n");
+ default:
+ return -ENXIO;
+ }
+}
+
+static int exec_firmware_activate(void *data)
+{
+ struct nvdimm_bus_descriptor *nd_desc = data;
+
+ return nd_desc->fw_ops->activate(nd_desc);
+}
+
+static ssize_t activate_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ enum nvdimm_fwa_state state;
+ bool quiesce;
+ ssize_t rc;
+
+ if (!nd_desc->fw_ops)
+ return -EOPNOTSUPP;
+
+ if (sysfs_streq(buf, "live"))
+ quiesce = false;
+ else if (sysfs_streq(buf, "quiesce"))
+ quiesce = true;
+ else
+ return -EINVAL;
+
+ state = nd_desc->fw_ops->activate_state(nd_desc);
+
+ switch (state) {
+ case NVDIMM_FWA_BUSY:
+ rc = -EBUSY;
+ break;
+ case NVDIMM_FWA_ARMED:
+ case NVDIMM_FWA_ARM_OVERFLOW:
+ if (quiesce)
+ rc = hibernate_quiet_exec(exec_firmware_activate, nd_desc);
+ else
+ rc = nd_desc->fw_ops->activate(nd_desc);
+ break;
+ case NVDIMM_FWA_IDLE:
+ default:
+ rc = -ENXIO;
+ }
+
+ if (rc == 0)
+ rc = len;
+ return rc;
+}
+
+static DEVICE_ATTR_ADMIN_RW(activate);
+
+static umode_t nvdimm_bus_firmware_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, typeof(*dev), kobj);
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ enum nvdimm_fwa_capability cap;
+
+ /*
+ * Both 'activate' and 'capability' disappear when no ops
+ * detected, or a negative capability is indicated.
+ */
+ if (!nd_desc->fw_ops)
+ return 0;
+
+ cap = nd_desc->fw_ops->capability(nd_desc);
+ if (cap < NVDIMM_FWA_CAP_QUIESCE)
+ return 0;
+
+ return a->mode;
+}
+static struct attribute *nvdimm_bus_firmware_attributes[] = {
+ &dev_attr_activate.attr,
+ &dev_attr_capability.attr,
+ NULL,
+};
+
+static const struct attribute_group nvdimm_bus_firmware_attribute_group = {
+ .name = "firmware",
+ .attrs = nvdimm_bus_firmware_attributes,
+ .is_visible = nvdimm_bus_firmware_visible,
+};
+
const struct attribute_group *nvdimm_bus_attribute_groups[] = {
&nvdimm_bus_attribute_group,
+ &nvdimm_bus_firmware_attribute_group,
NULL,
};
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index 99965077bac4..7f4a9d28b670 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -80,7 +80,7 @@ struct device *nd_dax_create(struct nd_region *nd_region)
nd_dax = nd_dax_alloc(nd_region);
if (nd_dax)
dev = nd_pfn_devinit(&nd_dax->nd_pfn, NULL);
- __nd_device_register(dev);
+ nd_device_register(dev);
return dev;
}
@@ -119,7 +119,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
nd_detach_ndns(dax_dev, &nd_pfn->ndns);
put_device(dax_dev);
} else
- __nd_device_register(dax_dev);
+ nd_device_register(dax_dev);
return rc;
}
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 64776ed15bb3..91d9163ee303 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -99,7 +99,7 @@ static int nvdimm_probe(struct device *dev)
if (ndd->ns_current >= 0) {
rc = nd_label_reserve_dpa(ndd);
if (rc == 0)
- nvdimm_set_aliasing(dev);
+ nvdimm_set_labeling(dev);
}
nvdimm_bus_unlock(dev);
@@ -113,19 +113,14 @@ static int nvdimm_probe(struct device *dev)
return rc;
}
-static int nvdimm_remove(struct device *dev)
+static void nvdimm_remove(struct device *dev)
{
struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
- if (!ndd)
- return 0;
-
nvdimm_bus_lock(dev);
dev_set_drvdata(dev, NULL);
nvdimm_bus_unlock(dev);
put_ndd(ndd);
-
- return 0;
}
static struct nd_device_driver nvdimm_driver = {
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 94ea6dba6b4f..c7c980577491 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -18,10 +18,6 @@
static DEFINE_IDA(dimm_ida);
-static bool noblk;
-module_param(noblk, bool, 0444);
-MODULE_PARM_DESC(noblk, "force disable BLK / local alias support");
-
/*
* Retrieve bus and dimm handle and return if this bus supports
* get_config_data commands
@@ -32,7 +28,7 @@ int nvdimm_check_config_data(struct device *dev)
if (!nvdimm->cmd_mask ||
!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) {
- if (test_bit(NDD_ALIASING, &nvdimm->flags))
+ if (test_bit(NDD_LABELING, &nvdimm->flags))
return -ENXIO;
else
return -ENOTTY;
@@ -173,11 +169,11 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
return rc;
}
-void nvdimm_set_aliasing(struct device *dev)
+void nvdimm_set_labeling(struct device *dev)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
- set_bit(NDD_ALIASING, &nvdimm->flags);
+ set_bit(NDD_LABELING, &nvdimm->flags);
}
void nvdimm_set_locked(struct device *dev)
@@ -211,22 +207,6 @@ struct nvdimm *to_nvdimm(struct device *dev)
}
EXPORT_SYMBOL_GPL(to_nvdimm);
-struct nvdimm *nd_blk_region_to_dimm(struct nd_blk_region *ndbr)
-{
- struct nd_region *nd_region = &ndbr->nd_region;
- struct nd_mapping *nd_mapping = &nd_region->mapping[0];
-
- return nd_mapping->nvdimm;
-}
-EXPORT_SYMBOL_GPL(nd_blk_region_to_dimm);
-
-unsigned long nd_blk_memremap_flags(struct nd_blk_region *ndbr)
-{
- /* pmem mapping properties are private to libnvdimm */
- return ARCH_MEMREMAP_PMEM;
-}
-EXPORT_SYMBOL_GPL(nd_blk_memremap_flags);
-
struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
{
struct nvdimm *nvdimm = nd_mapping->nvdimm;
@@ -313,7 +293,7 @@ static ssize_t flags_show(struct device *dev,
struct nvdimm *nvdimm = to_nvdimm(dev);
return sprintf(buf, "%s%s\n",
- test_bit(NDD_ALIASING, &nvdimm->flags) ? "alias " : "",
+ test_bit(NDD_LABELING, &nvdimm->flags) ? "label " : "",
test_bit(NDD_LOCKED, &nvdimm->flags) ? "lock " : "");
}
static DEVICE_ATTR_RO(flags);
@@ -334,16 +314,16 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr,
}
static DEVICE_ATTR_RO(state);
-static ssize_t available_slots_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t __available_slots_show(struct nvdimm_drvdata *ndd, char *buf)
{
- struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
+ struct device *dev;
ssize_t rc;
u32 nfree;
if (!ndd)
return -ENXIO;
+ dev = ndd->dev;
nvdimm_bus_lock(dev);
nfree = nd_label_nfree(ndd);
if (nfree - 1 > nfree) {
@@ -355,6 +335,18 @@ static ssize_t available_slots_show(struct device *dev,
nvdimm_bus_unlock(dev);
return rc;
}
+
+static ssize_t available_slots_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ ssize_t rc;
+
+ device_lock(dev);
+ rc = __available_slots_show(dev_get_drvdata(dev), buf);
+ device_unlock(dev);
+
+ return rc;
+}
static DEVICE_ATTR_RO(available_slots);
__weak ssize_t security_show(struct device *dev,
@@ -362,14 +354,14 @@ __weak ssize_t security_show(struct device *dev,
{
struct nvdimm *nvdimm = to_nvdimm(dev);
+ if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags))
+ return sprintf(buf, "overwrite\n");
if (test_bit(NVDIMM_SECURITY_DISABLED, &nvdimm->sec.flags))
return sprintf(buf, "disabled\n");
if (test_bit(NVDIMM_SECURITY_UNLOCKED, &nvdimm->sec.flags))
return sprintf(buf, "unlocked\n");
if (test_bit(NVDIMM_SECURITY_LOCKED, &nvdimm->sec.flags))
return sprintf(buf, "locked\n");
- if (test_bit(NVDIMM_SECURITY_OVERWRITE, &nvdimm->sec.flags))
- return sprintf(buf, "overwrite\n");
return -ENOTTY;
}
@@ -394,12 +386,12 @@ static ssize_t security_store(struct device *dev,
* done while probing is idle and the DIMM is not in active use
* in any region.
*/
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = nvdimm_security_store(dev, buf, len);
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc;
}
@@ -445,9 +437,125 @@ static const struct attribute_group nvdimm_attribute_group = {
.is_visible = nvdimm_visible,
};
+static ssize_t result_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ enum nvdimm_fwa_result result;
+
+ if (!nvdimm->fw_ops)
+ return -EOPNOTSUPP;
+
+ nvdimm_bus_lock(dev);
+ result = nvdimm->fw_ops->activate_result(nvdimm);
+ nvdimm_bus_unlock(dev);
+
+ switch (result) {
+ case NVDIMM_FWA_RESULT_NONE:
+ return sprintf(buf, "none\n");
+ case NVDIMM_FWA_RESULT_SUCCESS:
+ return sprintf(buf, "success\n");
+ case NVDIMM_FWA_RESULT_FAIL:
+ return sprintf(buf, "fail\n");
+ case NVDIMM_FWA_RESULT_NOTSTAGED:
+ return sprintf(buf, "not_staged\n");
+ case NVDIMM_FWA_RESULT_NEEDRESET:
+ return sprintf(buf, "need_reset\n");
+ default:
+ return -ENXIO;
+ }
+}
+static DEVICE_ATTR_ADMIN_RO(result);
+
+static ssize_t activate_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ enum nvdimm_fwa_state state;
+
+ if (!nvdimm->fw_ops)
+ return -EOPNOTSUPP;
+
+ nvdimm_bus_lock(dev);
+ state = nvdimm->fw_ops->activate_state(nvdimm);
+ nvdimm_bus_unlock(dev);
+
+ switch (state) {
+ case NVDIMM_FWA_IDLE:
+ return sprintf(buf, "idle\n");
+ case NVDIMM_FWA_BUSY:
+ return sprintf(buf, "busy\n");
+ case NVDIMM_FWA_ARMED:
+ return sprintf(buf, "armed\n");
+ default:
+ return -ENXIO;
+ }
+}
+
+static ssize_t activate_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ enum nvdimm_fwa_trigger arg;
+ int rc;
+
+ if (!nvdimm->fw_ops)
+ return -EOPNOTSUPP;
+
+ if (sysfs_streq(buf, "arm"))
+ arg = NVDIMM_FWA_ARM;
+ else if (sysfs_streq(buf, "disarm"))
+ arg = NVDIMM_FWA_DISARM;
+ else
+ return -EINVAL;
+
+ nvdimm_bus_lock(dev);
+ rc = nvdimm->fw_ops->arm(nvdimm, arg);
+ nvdimm_bus_unlock(dev);
+
+ if (rc < 0)
+ return rc;
+ return len;
+}
+static DEVICE_ATTR_ADMIN_RW(activate);
+
+static struct attribute *nvdimm_firmware_attributes[] = {
+ &dev_attr_activate.attr,
+ &dev_attr_result.attr,
+ NULL,
+};
+
+static umode_t nvdimm_firmware_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, typeof(*dev), kobj);
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ struct nvdimm *nvdimm = to_nvdimm(dev);
+ enum nvdimm_fwa_capability cap;
+
+ if (!nd_desc->fw_ops)
+ return 0;
+ if (!nvdimm->fw_ops)
+ return 0;
+
+ nvdimm_bus_lock(dev);
+ cap = nd_desc->fw_ops->capability(nd_desc);
+ nvdimm_bus_unlock(dev);
+
+ if (cap < NVDIMM_FWA_CAP_QUIESCE)
+ return 0;
+
+ return a->mode;
+}
+
+static const struct attribute_group nvdimm_firmware_attribute_group = {
+ .name = "firmware",
+ .attrs = nvdimm_firmware_attributes,
+ .is_visible = nvdimm_firmware_visible,
+};
+
static const struct attribute_group *nvdimm_attribute_groups[] = {
&nd_device_attribute_group,
&nvdimm_attribute_group,
+ &nvdimm_firmware_attribute_group,
NULL,
};
@@ -462,11 +570,14 @@ bool is_nvdimm(struct device *dev)
return dev->type == &nvdimm_device_type;
}
+static struct lock_class_key nvdimm_key;
+
struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
void *provider_data, const struct attribute_group **groups,
unsigned long flags, unsigned long cmd_mask, int num_flush,
struct resource *flush_wpq, const char *dimm_id,
- const struct nvdimm_security_ops *sec_ops)
+ const struct nvdimm_security_ops *sec_ops,
+ const struct nvdimm_fw_ops *fw_ops)
{
struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
struct device *dev;
@@ -482,8 +593,6 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
nvdimm->dimm_id = dimm_id;
nvdimm->provider_data = provider_data;
- if (noblk)
- flags |= 1 << NDD_NOBLK;
nvdimm->flags = flags;
nvdimm->cmd_mask = cmd_mask;
nvdimm->num_flush = num_flush;
@@ -496,6 +605,7 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
dev->devt = MKDEV(nvdimm_major, nvdimm->id);
dev->groups = groups;
nvdimm->sec.ops = sec_ops;
+ nvdimm->fw_ops = fw_ops;
nvdimm->sec.overwrite_tmo = 0;
INIT_DELAYED_WORK(&nvdimm->dwork, nvdimm_security_overwrite_query);
/*
@@ -505,12 +615,32 @@ struct nvdimm *__nvdimm_create(struct nvdimm_bus *nvdimm_bus,
/* get security state and extended (master) state */
nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &nvdimm_key);
nd_device_register(dev);
return nvdimm;
}
EXPORT_SYMBOL_GPL(__nvdimm_create);
+void nvdimm_delete(struct nvdimm *nvdimm)
+{
+ struct device *dev = &nvdimm->dev;
+ bool dev_put = false;
+
+ /* We are shutting down. Make state frozen artificially. */
+ nvdimm_bus_lock(dev);
+ set_bit(NVDIMM_SECURITY_FROZEN, &nvdimm->sec.flags);
+ if (test_and_clear_bit(NDD_WORK_PENDING, &nvdimm->flags))
+ dev_put = true;
+ nvdimm_bus_unlock(dev);
+ cancel_delayed_work_sync(&nvdimm->dwork);
+ if (dev_put)
+ put_device(dev);
+ nd_device_unregister(dev, ND_SYNC);
+}
+EXPORT_SYMBOL_GPL(nvdimm_delete);
+
static void shutdown_security_notify(void *data)
{
struct nvdimm *nvdimm = data;
@@ -562,111 +692,19 @@ int nvdimm_security_freeze(struct nvdimm *nvdimm)
return rc;
}
-int alias_dpa_busy(struct device *dev, void *data)
+static unsigned long dpa_align(struct nd_region *nd_region)
{
- resource_size_t map_end, blk_start, new;
- struct blk_alloc_info *info = data;
- struct nd_mapping *nd_mapping;
- struct nd_region *nd_region;
- struct nvdimm_drvdata *ndd;
- struct resource *res;
- int i;
+ struct device *dev = &nd_region->dev;
- if (!is_memory(dev))
+ if (dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev),
+ "bus lock required for capacity provision\n"))
return 0;
-
- nd_region = to_nd_region(dev);
- for (i = 0; i < nd_region->ndr_mappings; i++) {
- nd_mapping = &nd_region->mapping[i];
- if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
- break;
- }
-
- if (i >= nd_region->ndr_mappings)
+ if (dev_WARN_ONCE(dev, !nd_region->ndr_mappings || nd_region->align
+ % nd_region->ndr_mappings,
+ "invalid region align %#lx mappings: %d\n",
+ nd_region->align, nd_region->ndr_mappings))
return 0;
-
- ndd = to_ndd(nd_mapping);
- map_end = nd_mapping->start + nd_mapping->size - 1;
- blk_start = nd_mapping->start;
-
- /*
- * In the allocation case ->res is set to free space that we are
- * looking to validate against PMEM aliasing collision rules
- * (i.e. BLK is allocated after all aliased PMEM).
- */
- if (info->res) {
- if (info->res->start >= nd_mapping->start
- && info->res->start < map_end)
- /* pass */;
- else
- return 0;
- }
-
- retry:
- /*
- * Find the free dpa from the end of the last pmem allocation to
- * the end of the interleave-set mapping.
- */
- for_each_dpa_resource(ndd, res) {
- if (strncmp(res->name, "pmem", 4) != 0)
- continue;
- if ((res->start >= blk_start && res->start < map_end)
- || (res->end >= blk_start
- && res->end <= map_end)) {
- new = max(blk_start, min(map_end + 1, res->end + 1));
- if (new != blk_start) {
- blk_start = new;
- goto retry;
- }
- }
- }
-
- /* update the free space range with the probed blk_start */
- if (info->res && blk_start > info->res->start) {
- info->res->start = max(info->res->start, blk_start);
- if (info->res->start > info->res->end)
- info->res->end = info->res->start - 1;
- return 1;
- }
-
- info->available -= blk_start - nd_mapping->start;
-
- return 0;
-}
-
-/**
- * nd_blk_available_dpa - account the unused dpa of BLK region
- * @nd_mapping: container of dpa-resource-root + labels
- *
- * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but
- * we arrange for them to never start at an lower dpa than the last
- * PMEM allocation in an aliased region.
- */
-resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
-{
- struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
- struct nd_mapping *nd_mapping = &nd_region->mapping[0];
- struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- struct blk_alloc_info info = {
- .nd_mapping = nd_mapping,
- .available = nd_mapping->size,
- .res = NULL,
- };
- struct resource *res;
-
- if (!ndd)
- return 0;
-
- device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
-
- /* now account for busy blk allocations in unaliased dpa */
- for_each_dpa_resource(ndd, res) {
- if (strncmp(res->name, "blk", 3) != 0)
- continue;
- info.available -= resource_size(res);
- }
-
- return info.available;
+ return nd_region->align / nd_region->ndr_mappings;
}
/**
@@ -682,19 +720,31 @@ resource_size_t nd_pmem_max_contiguous_dpa(struct nd_region *nd_region,
struct nvdimm_bus *nvdimm_bus;
resource_size_t max = 0;
struct resource *res;
+ unsigned long align;
/* if a dimm is disabled the available capacity is zero */
if (!ndd)
return 0;
+ align = dpa_align(nd_region);
+ if (!align)
+ return 0;
+
nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
if (__reserve_free_pmem(&nd_region->dev, nd_mapping->nvdimm))
return 0;
for_each_dpa_resource(ndd, res) {
+ resource_size_t start, end;
+
if (strcmp(res->name, "pmem-reserve") != 0)
continue;
- if (resource_size(res) > max)
- max = resource_size(res);
+ /* trim free space relative to current alignment setting */
+ start = ALIGN(res->start, align);
+ end = ALIGN_DOWN(res->end + 1, align) - 1;
+ if (end < start)
+ continue;
+ if (end - start + 1 > max)
+ max = end - start + 1;
}
release_free_pmem(nvdimm_bus, nd_mapping);
return max;
@@ -704,66 +754,49 @@ resource_size_t nd_pmem_max_contiguous_dpa(struct nd_region *nd_region,
* nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
* @nd_mapping: container of dpa-resource-root + labels
* @nd_region: constrain available space check to this reference region
- * @overlap: calculate available space assuming this level of overlap
*
* Validate that a PMEM label, if present, aligns with the start of an
- * interleave set and truncate the available size at the lowest BLK
- * overlap point.
- *
- * The expectation is that this routine is called multiple times as it
- * probes for the largest BLK encroachment for any single member DIMM of
- * the interleave set. Once that value is determined the PMEM-limit for
- * the set can be established.
+ * interleave set.
*/
resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
- struct nd_mapping *nd_mapping, resource_size_t *overlap)
+ struct nd_mapping *nd_mapping)
{
- resource_size_t map_start, map_end, busy = 0, available, blk_start;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ resource_size_t map_start, map_end, busy = 0;
struct resource *res;
- const char *reason;
+ unsigned long align;
if (!ndd)
return 0;
+ align = dpa_align(nd_region);
+ if (!align)
+ return 0;
+
map_start = nd_mapping->start;
map_end = map_start + nd_mapping->size - 1;
- blk_start = max(map_start, map_end + 1 - *overlap);
for_each_dpa_resource(ndd, res) {
- if (res->start >= map_start && res->start < map_end) {
- if (strncmp(res->name, "blk", 3) == 0)
- blk_start = min(blk_start,
- max(map_start, res->start));
- else if (res->end > map_end) {
- reason = "misaligned to iset";
- goto err;
- } else
- busy += resource_size(res);
- } else if (res->end >= map_start && res->end <= map_end) {
- if (strncmp(res->name, "blk", 3) == 0) {
- /*
- * If a BLK allocation overlaps the start of
- * PMEM the entire interleave set may now only
- * be used for BLK.
- */
- blk_start = map_start;
- } else
- busy += resource_size(res);
- } else if (map_start > res->start && map_start < res->end) {
+ resource_size_t start, end;
+
+ start = ALIGN_DOWN(res->start, align);
+ end = ALIGN(res->end + 1, align) - 1;
+ if (start >= map_start && start < map_end) {
+ if (end > map_end) {
+ nd_dbg_dpa(nd_region, ndd, res,
+ "misaligned to iset\n");
+ return 0;
+ }
+ busy += end - start + 1;
+ } else if (end >= map_start && end <= map_end) {
+ busy += end - start + 1;
+ } else if (map_start > start && map_start < end) {
/* total eclipse of the mapping */
busy += nd_mapping->size;
- blk_start = map_start;
}
}
- *overlap = map_end + 1 - blk_start;
- available = blk_start - map_start;
- if (busy < available)
- return available - busy;
- return 0;
-
- err:
- nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
+ if (busy < nd_mapping->size)
+ return ALIGN_DOWN(nd_mapping->size - busy, align);
return 0;
}
@@ -794,7 +827,7 @@ struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
/**
* nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id
* @nvdimm: container of dpa-resource-root + labels
- * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid>
+ * @label_id: dpa resource name of the form pmem-<human readable uuid>
*/
resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
struct nd_label_id *label_id)
diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c
index e02f60ad6c99..4cd18be9d0e9 100644
--- a/drivers/nvdimm/e820.c
+++ b/drivers/nvdimm/e820.c
@@ -7,6 +7,7 @@
#include <linux/memory_hotplug.h>
#include <linux/libnvdimm.h>
#include <linux/module.h>
+#include <linux/numa.h>
static int e820_pmem_remove(struct platform_device *pdev)
{
@@ -16,27 +17,16 @@ static int e820_pmem_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int e820_range_to_nid(resource_size_t addr)
-{
- return memory_add_physaddr_to_nid(addr);
-}
-#else
-static int e820_range_to_nid(resource_size_t addr)
-{
- return NUMA_NO_NODE;
-}
-#endif
-
static int e820_register_one(struct resource *res, void *data)
{
struct nd_region_desc ndr_desc;
struct nvdimm_bus *nvdimm_bus = data;
+ int nid = phys_to_target_node(res->start);
memset(&ndr_desc, 0, sizeof(ndr_desc));
ndr_desc.res = res;
- ndr_desc.numa_node = e820_range_to_nid(res->start);
- ndr_desc.target_node = ndr_desc.numa_node;
+ ndr_desc.numa_node = numa_map_to_online_node(nid);
+ ndr_desc.target_node = nid;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc))
return -ENXIO;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 47a4828b8b31..082253a3a956 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -17,6 +17,14 @@ static guid_t nvdimm_btt2_guid;
static guid_t nvdimm_pfn_guid;
static guid_t nvdimm_dax_guid;
+static uuid_t nvdimm_btt_uuid;
+static uuid_t nvdimm_btt2_uuid;
+static uuid_t nvdimm_pfn_uuid;
+static uuid_t nvdimm_dax_uuid;
+
+static uuid_t cxl_region_uuid;
+static uuid_t cxl_namespace_uuid;
+
static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
static u32 best_seq(u32 a, u32 b)
@@ -321,12 +329,12 @@ static bool preamble_index(struct nvdimm_drvdata *ndd, int idx,
return true;
}
-char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
+char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid,
+ u32 flags)
{
if (!label_id || !uuid)
return NULL;
- snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb",
- flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid);
+ snprintf(label_id->id, ND_LABEL_ID_SIZE, "pmem-%pUb", uuid);
return label_id->id;
}
@@ -346,29 +354,45 @@ static bool preamble_next(struct nvdimm_drvdata *ndd,
free, nslot);
}
+static bool nsl_validate_checksum(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ u64 sum, sum_save;
+
+ if (!ndd->cxl && !efi_namespace_label_has(ndd, checksum))
+ return true;
+
+ sum_save = nsl_get_checksum(ndd, nd_label);
+ nsl_set_checksum(ndd, nd_label, 0);
+ sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
+ nsl_set_checksum(ndd, nd_label, sum_save);
+ return sum == sum_save;
+}
+
+static void nsl_calculate_checksum(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ u64 sum;
+
+ if (!ndd->cxl && !efi_namespace_label_has(ndd, checksum))
+ return;
+ nsl_set_checksum(ndd, nd_label, 0);
+ sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
+ nsl_set_checksum(ndd, nd_label, sum);
+}
+
static bool slot_valid(struct nvdimm_drvdata *ndd,
struct nd_namespace_label *nd_label, u32 slot)
{
+ bool valid;
+
/* check that we are written where we expect to be written */
- if (slot != __le32_to_cpu(nd_label->slot))
+ if (slot != nsl_get_slot(ndd, nd_label))
return false;
-
- /* check checksum */
- if (namespace_label_has(ndd, checksum)) {
- u64 sum, sum_save;
-
- sum_save = __le64_to_cpu(nd_label->checksum);
- nd_label->checksum = __cpu_to_le64(0);
- sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
- nd_label->checksum = __cpu_to_le64(sum_save);
- if (sum != sum_save) {
- dev_dbg(ndd->dev, "fail checksum. slot: %d expect: %#llx\n",
- slot, sum);
- return false;
- }
- }
-
- return true;
+ valid = nsl_validate_checksum(ndd, nd_label);
+ if (!valid)
+ dev_dbg(ndd->dev, "fail checksum. slot: %d\n", slot);
+ return valid;
}
int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
@@ -381,12 +405,11 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
return 0; /* no label, nothing to reserve */
for_each_clear_bit_le(slot, free, nslot) {
- struct nvdimm *nvdimm = to_nvdimm(ndd->dev);
struct nd_namespace_label *nd_label;
struct nd_region *nd_region = NULL;
- u8 label_uuid[NSLABEL_UUID_LEN];
struct nd_label_id label_id;
struct resource *res;
+ uuid_t label_uuid;
u32 flags;
nd_label = to_label(ndd, slot);
@@ -394,14 +417,12 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
if (!slot_valid(ndd, nd_label, slot))
continue;
- memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
- flags = __le32_to_cpu(nd_label->flags);
- if (test_bit(NDD_NOBLK, &nvdimm->flags))
- flags &= ~NSLABEL_FLAG_LOCAL;
- nd_label_gen_id(&label_id, label_uuid, flags);
+ nsl_get_uuid(ndd, nd_label, &label_uuid);
+ flags = nsl_get_flags(ndd, nd_label);
+ nd_label_gen_id(&label_id, &label_uuid, flags);
res = nvdimm_allocate_dpa(ndd, &label_id,
- __le64_to_cpu(nd_label->dpa),
- __le64_to_cpu(nd_label->rawsize));
+ nsl_get_dpa(ndd, nd_label),
+ nsl_get_rawsize(ndd, nd_label));
nd_dbg_dpa(nd_region, ndd, res, "reserve\n");
if (!res)
return -EBUSY;
@@ -548,9 +569,9 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd)
nd_label = to_label(ndd, slot);
if (!slot_valid(ndd, nd_label, slot)) {
- u32 label_slot = __le32_to_cpu(nd_label->slot);
- u64 size = __le64_to_cpu(nd_label->rawsize);
- u64 dpa = __le64_to_cpu(nd_label->dpa);
+ u32 label_slot = nsl_get_slot(ndd, nd_label);
+ u64 size = nsl_get_rawsize(ndd, nd_label);
+ u64 dpa = nsl_get_dpa(ndd, nd_label);
dev_dbg(ndd->dev,
"slot%d invalid slot: %d dpa: %llx size: %llx\n",
@@ -708,7 +729,7 @@ static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd,
- (unsigned long) to_namespace_index(ndd, 0);
}
-enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid)
+static enum nvdimm_claim_class guid_to_nvdimm_cclass(guid_t *guid)
{
if (guid_equal(guid, &nvdimm_btt_guid))
return NVDIMM_CCLASS_BTT;
@@ -724,6 +745,23 @@ enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid)
return NVDIMM_CCLASS_UNKNOWN;
}
+/* CXL labels store UUIDs instead of GUIDs for the same data */
+static enum nvdimm_claim_class uuid_to_nvdimm_cclass(uuid_t *uuid)
+{
+ if (uuid_equal(uuid, &nvdimm_btt_uuid))
+ return NVDIMM_CCLASS_BTT;
+ else if (uuid_equal(uuid, &nvdimm_btt2_uuid))
+ return NVDIMM_CCLASS_BTT2;
+ else if (uuid_equal(uuid, &nvdimm_pfn_uuid))
+ return NVDIMM_CCLASS_PFN;
+ else if (uuid_equal(uuid, &nvdimm_dax_uuid))
+ return NVDIMM_CCLASS_DAX;
+ else if (uuid_equal(uuid, &uuid_null))
+ return NVDIMM_CCLASS_NONE;
+
+ return NVDIMM_CCLASS_UNKNOWN;
+}
+
static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class,
guid_t *target)
{
@@ -745,6 +783,28 @@ static const guid_t *to_abstraction_guid(enum nvdimm_claim_class claim_class,
return &guid_null;
}
+/* CXL labels store UUIDs instead of GUIDs for the same data */
+static const uuid_t *to_abstraction_uuid(enum nvdimm_claim_class claim_class,
+ uuid_t *target)
+{
+ if (claim_class == NVDIMM_CCLASS_BTT)
+ return &nvdimm_btt_uuid;
+ else if (claim_class == NVDIMM_CCLASS_BTT2)
+ return &nvdimm_btt2_uuid;
+ else if (claim_class == NVDIMM_CCLASS_PFN)
+ return &nvdimm_pfn_uuid;
+ else if (claim_class == NVDIMM_CCLASS_DAX)
+ return &nvdimm_dax_uuid;
+ else if (claim_class == NVDIMM_CCLASS_UNKNOWN) {
+ /*
+ * If we're modifying a namespace for which we don't
+ * know the claim_class, don't touch the existing uuid.
+ */
+ return target;
+ } else
+ return &uuid_null;
+}
+
static void reap_victim(struct nd_mapping *nd_mapping,
struct nd_label_ent *victim)
{
@@ -756,6 +816,60 @@ static void reap_victim(struct nd_mapping *nd_mapping,
victim->label = NULL;
}
+static void nsl_set_type_guid(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label, guid_t *guid)
+{
+ if (efi_namespace_label_has(ndd, type_guid))
+ guid_copy(&nd_label->efi.type_guid, guid);
+}
+
+bool nsl_validate_type_guid(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label, guid_t *guid)
+{
+ if (ndd->cxl || !efi_namespace_label_has(ndd, type_guid))
+ return true;
+ if (!guid_equal(&nd_label->efi.type_guid, guid)) {
+ dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n", guid,
+ &nd_label->efi.type_guid);
+ return false;
+ }
+ return true;
+}
+
+static void nsl_set_claim_class(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ enum nvdimm_claim_class claim_class)
+{
+ if (ndd->cxl) {
+ uuid_t uuid;
+
+ import_uuid(&uuid, nd_label->cxl.abstraction_uuid);
+ export_uuid(nd_label->cxl.abstraction_uuid,
+ to_abstraction_uuid(claim_class, &uuid));
+ return;
+ }
+
+ if (!efi_namespace_label_has(ndd, abstraction_guid))
+ return;
+ guid_copy(&nd_label->efi.abstraction_guid,
+ to_abstraction_guid(claim_class,
+ &nd_label->efi.abstraction_guid));
+}
+
+enum nvdimm_claim_class nsl_get_claim_class(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl) {
+ uuid_t uuid;
+
+ import_uuid(&uuid, nd_label->cxl.abstraction_uuid);
+ return uuid_to_nvdimm_cclass(&uuid);
+ }
+ if (!efi_namespace_label_has(ndd, abstraction_guid))
+ return NVDIMM_CCLASS_NONE;
+ return guid_to_nvdimm_cclass(&nd_label->efi.abstraction_guid);
+}
+
static int __pmem_label_update(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
int pos, unsigned long flags)
@@ -796,30 +910,20 @@ static int __pmem_label_update(struct nd_region *nd_region,
nd_label = to_label(ndd, slot);
memset(nd_label, 0, sizeof_namespace_label(ndd));
- memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
- if (nspm->alt_name)
- memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN);
- nd_label->flags = __cpu_to_le32(flags);
- nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
- nd_label->position = __cpu_to_le16(pos);
- nd_label->isetcookie = __cpu_to_le64(cookie);
- nd_label->rawsize = __cpu_to_le64(resource_size(res));
- nd_label->lbasize = __cpu_to_le64(nspm->lbasize);
- nd_label->dpa = __cpu_to_le64(res->start);
- nd_label->slot = __cpu_to_le32(slot);
- if (namespace_label_has(ndd, type_guid))
- guid_copy(&nd_label->type_guid, &nd_set->type_guid);
- if (namespace_label_has(ndd, abstraction_guid))
- guid_copy(&nd_label->abstraction_guid,
- to_abstraction_guid(ndns->claim_class,
- &nd_label->abstraction_guid));
- if (namespace_label_has(ndd, checksum)) {
- u64 sum;
-
- nd_label->checksum = __cpu_to_le64(0);
- sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
- nd_label->checksum = __cpu_to_le64(sum);
- }
+ nsl_set_uuid(ndd, nd_label, nspm->uuid);
+ nsl_set_name(ndd, nd_label, nspm->alt_name);
+ nsl_set_flags(ndd, nd_label, flags);
+ nsl_set_nlabel(ndd, nd_label, nd_region->ndr_mappings);
+ nsl_set_nrange(ndd, nd_label, 1);
+ nsl_set_position(ndd, nd_label, pos);
+ nsl_set_isetcookie(ndd, nd_label, cookie);
+ nsl_set_rawsize(ndd, nd_label, resource_size(res));
+ nsl_set_lbasize(ndd, nd_label, nspm->lbasize);
+ nsl_set_dpa(ndd, nd_label, res->start);
+ nsl_set_slot(ndd, nd_label, slot);
+ nsl_set_type_guid(ndd, nd_label, &nd_set->type_guid);
+ nsl_set_claim_class(ndd, nd_label, ndns->claim_class);
+ nsl_calculate_checksum(ndd, nd_label);
nd_dbg_dpa(nd_region, ndd, res, "\n");
/* update label */
@@ -834,9 +938,8 @@ static int __pmem_label_update(struct nd_region *nd_region,
list_for_each_entry(label_ent, &nd_mapping->labels, list) {
if (!label_ent->label)
continue;
- if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags)
- || memcmp(nspm->uuid, label_ent->label->uuid,
- NSLABEL_UUID_LEN) == 0)
+ if (test_and_clear_bit(ND_LABEL_REAP, &label_ent->flags) ||
+ nsl_uuid_equal(ndd, label_ent->label, nspm->uuid))
reap_victim(nd_mapping, label_ent);
}
@@ -861,295 +964,6 @@ static int __pmem_label_update(struct nd_region *nd_region,
return rc;
}
-static bool is_old_resource(struct resource *res, struct resource **list, int n)
-{
- int i;
-
- if (res->flags & DPA_RESOURCE_ADJUSTED)
- return false;
- for (i = 0; i < n; i++)
- if (res == list[i])
- return true;
- return false;
-}
-
-static struct resource *to_resource(struct nvdimm_drvdata *ndd,
- struct nd_namespace_label *nd_label)
-{
- struct resource *res;
-
- for_each_dpa_resource(ndd, res) {
- if (res->start != __le64_to_cpu(nd_label->dpa))
- continue;
- if (resource_size(res) != __le64_to_cpu(nd_label->rawsize))
- continue;
- return res;
- }
-
- return NULL;
-}
-
-/*
- * 1/ Account all the labels that can be freed after this update
- * 2/ Allocate and write the label to the staging (next) index
- * 3/ Record the resources in the namespace device
- */
-static int __blk_label_update(struct nd_region *nd_region,
- struct nd_mapping *nd_mapping, struct nd_namespace_blk *nsblk,
- int num_labels)
-{
- int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
- struct nd_interleave_set *nd_set = nd_region->nd_set;
- struct nd_namespace_common *ndns = &nsblk->common;
- struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- struct nd_namespace_label *nd_label;
- struct nd_label_ent *label_ent, *e;
- struct nd_namespace_index *nsindex;
- unsigned long *free, *victim_map = NULL;
- struct resource *res, **old_res_list;
- struct nd_label_id label_id;
- u8 uuid[NSLABEL_UUID_LEN];
- int min_dpa_idx = 0;
- LIST_HEAD(list);
- u32 nslot, slot;
-
- if (!preamble_next(ndd, &nsindex, &free, &nslot))
- return -ENXIO;
-
- old_res_list = nsblk->res;
- nfree = nd_label_nfree(ndd);
- old_num_resources = nsblk->num_resources;
- nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
-
- /*
- * We need to loop over the old resources a few times, which seems a
- * bit inefficient, but we need to know that we have the label
- * space before we start mutating the tracking structures.
- * Otherwise the recovery method of last resort for userspace is
- * disable and re-enable the parent region.
- */
- alloc = 0;
- for_each_dpa_resource(ndd, res) {
- if (strcmp(res->name, label_id.id) != 0)
- continue;
- if (!is_old_resource(res, old_res_list, old_num_resources))
- alloc++;
- }
-
- victims = 0;
- if (old_num_resources) {
- /* convert old local-label-map to dimm-slot victim-map */
- victim_map = bitmap_zalloc(nslot, GFP_KERNEL);
- if (!victim_map)
- return -ENOMEM;
-
- /* mark unused labels for garbage collection */
- for_each_clear_bit_le(slot, free, nslot) {
- nd_label = to_label(ndd, slot);
- memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
- if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
- continue;
- res = to_resource(ndd, nd_label);
- if (res && is_old_resource(res, old_res_list,
- old_num_resources))
- continue;
- slot = to_slot(ndd, nd_label);
- set_bit(slot, victim_map);
- victims++;
- }
- }
-
- /* don't allow updates that consume the last label */
- if (nfree - alloc < 0 || nfree - alloc + victims < 1) {
- dev_info(&nsblk->common.dev, "insufficient label space\n");
- bitmap_free(victim_map);
- return -ENOSPC;
- }
- /* from here on we need to abort on error */
-
-
- /* assign all resources to the namespace before writing the labels */
- nsblk->res = NULL;
- nsblk->num_resources = 0;
- for_each_dpa_resource(ndd, res) {
- if (strcmp(res->name, label_id.id) != 0)
- continue;
- if (!nsblk_add_resource(nd_region, ndd, nsblk, res->start)) {
- rc = -ENOMEM;
- goto abort;
- }
- }
-
- /*
- * Find the resource associated with the first label in the set
- * per the v1.2 namespace specification.
- */
- for (i = 0; i < nsblk->num_resources; i++) {
- struct resource *min = nsblk->res[min_dpa_idx];
-
- res = nsblk->res[i];
- if (res->start < min->start)
- min_dpa_idx = i;
- }
-
- for (i = 0; i < nsblk->num_resources; i++) {
- size_t offset;
-
- res = nsblk->res[i];
- if (is_old_resource(res, old_res_list, old_num_resources))
- continue; /* carry-over */
- slot = nd_label_alloc_slot(ndd);
- if (slot == UINT_MAX)
- goto abort;
- dev_dbg(ndd->dev, "allocated: %d\n", slot);
-
- nd_label = to_label(ndd, slot);
- memset(nd_label, 0, sizeof_namespace_label(ndd));
- memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN);
- if (nsblk->alt_name)
- memcpy(nd_label->name, nsblk->alt_name,
- NSLABEL_NAME_LEN);
- nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL);
-
- /*
- * Use the presence of the type_guid as a flag to
- * determine isetcookie usage and nlabel + position
- * policy for blk-aperture namespaces.
- */
- if (namespace_label_has(ndd, type_guid)) {
- if (i == min_dpa_idx) {
- nd_label->nlabel = __cpu_to_le16(nsblk->num_resources);
- nd_label->position = __cpu_to_le16(0);
- } else {
- nd_label->nlabel = __cpu_to_le16(0xffff);
- nd_label->position = __cpu_to_le16(0xffff);
- }
- nd_label->isetcookie = __cpu_to_le64(nd_set->cookie2);
- } else {
- nd_label->nlabel = __cpu_to_le16(0); /* N/A */
- nd_label->position = __cpu_to_le16(0); /* N/A */
- nd_label->isetcookie = __cpu_to_le64(0); /* N/A */
- }
-
- nd_label->dpa = __cpu_to_le64(res->start);
- nd_label->rawsize = __cpu_to_le64(resource_size(res));
- nd_label->lbasize = __cpu_to_le64(nsblk->lbasize);
- nd_label->slot = __cpu_to_le32(slot);
- if (namespace_label_has(ndd, type_guid))
- guid_copy(&nd_label->type_guid, &nd_set->type_guid);
- if (namespace_label_has(ndd, abstraction_guid))
- guid_copy(&nd_label->abstraction_guid,
- to_abstraction_guid(ndns->claim_class,
- &nd_label->abstraction_guid));
-
- if (namespace_label_has(ndd, checksum)) {
- u64 sum;
-
- nd_label->checksum = __cpu_to_le64(0);
- sum = nd_fletcher64(nd_label,
- sizeof_namespace_label(ndd), 1);
- nd_label->checksum = __cpu_to_le64(sum);
- }
-
- /* update label */
- offset = nd_label_offset(ndd, nd_label);
- rc = nvdimm_set_config_data(ndd, offset, nd_label,
- sizeof_namespace_label(ndd));
- if (rc < 0)
- goto abort;
- }
-
- /* free up now unused slots in the new index */
- for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) {
- dev_dbg(ndd->dev, "free: %d\n", slot);
- nd_label_free_slot(ndd, slot);
- }
-
- /* update index */
- rc = nd_label_write_index(ndd, ndd->ns_next,
- nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
- if (rc)
- goto abort;
-
- /*
- * Now that the on-dimm labels are up to date, fix up the tracking
- * entries in nd_mapping->labels
- */
- nlabel = 0;
- mutex_lock(&nd_mapping->lock);
- list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
- nd_label = label_ent->label;
- if (!nd_label)
- continue;
- nlabel++;
- memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
- if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
- continue;
- nlabel--;
- list_move(&label_ent->list, &list);
- label_ent->label = NULL;
- }
- list_splice_tail_init(&list, &nd_mapping->labels);
- mutex_unlock(&nd_mapping->lock);
-
- if (nlabel + nsblk->num_resources > num_labels) {
- /*
- * Bug, we can't end up with more resources than
- * available labels
- */
- WARN_ON_ONCE(1);
- rc = -ENXIO;
- goto out;
- }
-
- mutex_lock(&nd_mapping->lock);
- label_ent = list_first_entry_or_null(&nd_mapping->labels,
- typeof(*label_ent), list);
- if (!label_ent) {
- WARN_ON(1);
- mutex_unlock(&nd_mapping->lock);
- rc = -ENXIO;
- goto out;
- }
- for_each_clear_bit_le(slot, free, nslot) {
- nd_label = to_label(ndd, slot);
- memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
- if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
- continue;
- res = to_resource(ndd, nd_label);
- res->flags &= ~DPA_RESOURCE_ADJUSTED;
- dev_vdbg(&nsblk->common.dev, "assign label slot: %d\n", slot);
- list_for_each_entry_from(label_ent, &nd_mapping->labels, list) {
- if (label_ent->label)
- continue;
- label_ent->label = nd_label;
- nd_label = NULL;
- break;
- }
- if (nd_label)
- dev_WARN(&nsblk->common.dev,
- "failed to track label slot%d\n", slot);
- }
- mutex_unlock(&nd_mapping->lock);
-
- out:
- kfree(old_res_list);
- bitmap_free(victim_map);
- return rc;
-
- abort:
- /*
- * 1/ repair the allocated label bitmap in the index
- * 2/ restore the resource list
- */
- nd_label_copy(ndd, nsindex, to_current_namespace_index(ndd));
- kfree(nsblk->res);
- nsblk->res = old_res_list;
- nsblk->num_resources = old_num_resources;
- old_res_list = NULL;
- goto out;
-}
-
static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
{
int i, old_num_labels = 0;
@@ -1194,12 +1008,11 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
return max(num_labels, old_num_labels);
}
-static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
+static int del_labels(struct nd_mapping *nd_mapping, uuid_t *uuid)
{
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_label_ent *label_ent, *e;
struct nd_namespace_index *nsindex;
- u8 label_uuid[NSLABEL_UUID_LEN];
unsigned long *free;
LIST_HEAD(list);
u32 nslot, slot;
@@ -1219,8 +1032,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
if (!nd_label)
continue;
active++;
- memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
- if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0)
+ if (!nsl_uuid_equal(ndd, nd_label, uuid))
continue;
active--;
slot = to_slot(ndd, nd_label);
@@ -1289,26 +1101,6 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region,
return 0;
}
-int nd_blk_namespace_label_update(struct nd_region *nd_region,
- struct nd_namespace_blk *nsblk, resource_size_t size)
-{
- struct nd_mapping *nd_mapping = &nd_region->mapping[0];
- struct resource *res;
- int count = 0;
-
- if (size == 0)
- return del_labels(nd_mapping, nsblk->uuid);
-
- for_each_dpa_resource(to_ndd(nd_mapping), res)
- count++;
-
- count = init_labels(nd_mapping, count);
- if (count < 0)
- return count;
-
- return __blk_label_update(nd_region, nd_mapping, nsblk, count);
-}
-
int __init nd_label_init(void)
{
WARN_ON(guid_parse(NVDIMM_BTT_GUID, &nvdimm_btt_guid));
@@ -1316,5 +1108,13 @@ int __init nd_label_init(void)
WARN_ON(guid_parse(NVDIMM_PFN_GUID, &nvdimm_pfn_guid));
WARN_ON(guid_parse(NVDIMM_DAX_GUID, &nvdimm_dax_guid));
+ WARN_ON(uuid_parse(NVDIMM_BTT_GUID, &nvdimm_btt_uuid));
+ WARN_ON(uuid_parse(NVDIMM_BTT2_GUID, &nvdimm_btt2_uuid));
+ WARN_ON(uuid_parse(NVDIMM_PFN_GUID, &nvdimm_pfn_uuid));
+ WARN_ON(uuid_parse(NVDIMM_DAX_GUID, &nvdimm_dax_uuid));
+
+ WARN_ON(uuid_parse(CXL_REGION_UUID, &cxl_region_uuid));
+ WARN_ON(uuid_parse(CXL_NAMESPACE_UUID, &cxl_namespace_uuid));
+
return 0;
}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 4c7b775c2811..0650fb4b9821 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -34,6 +34,7 @@ enum {
* struct nd_namespace_index - label set superblock
* @sig: NAMESPACE_INDEX\0
* @flags: placeholder
+ * @labelsize: log2 size (v1 labels 128 bytes v2 labels 256 bytes)
* @seq: sequence number for this index
* @myoff: offset of this index in label area
* @mysize: size of this index struct
@@ -43,7 +44,7 @@ enum {
* @major: label area major version
* @minor: label area minor version
* @checksum: fletcher64 of all fields
- * @free[0]: bitmap, nlabel bits
+ * @free: bitmap, nlabel bits
*
* The size of free[] is rounded up so the total struct size is a
* multiple of NSINDEX_ALIGN bytes. Any bits this allocates beyond
@@ -62,11 +63,43 @@ struct nd_namespace_index {
__le16 major;
__le16 minor;
__le64 checksum;
- u8 free[0];
+ u8 free[];
};
/**
- * struct nd_namespace_label - namespace superblock
+ * struct cxl_region_label - CXL 2.0 Table 211
+ * @type: uuid identifying this label format (region)
+ * @uuid: uuid for the region this label describes
+ * @flags: NSLABEL_FLAG_UPDATING (all other flags reserved)
+ * @nlabel: 1 per interleave-way in the region
+ * @position: this label's position in the set
+ * @dpa: start address in device-local capacity for this label
+ * @rawsize: size of this label's contribution to region
+ * @hpa: mandatory system physical address to map this region
+ * @slot: slot id of this label in label area
+ * @ig: interleave granularity (1 << @ig) * 256 bytes
+ * @align: alignment in SZ_256M blocks
+ * @reserved: reserved
+ * @checksum: fletcher64 sum of this label
+ */
+struct cxl_region_label {
+ u8 type[NSLABEL_UUID_LEN];
+ u8 uuid[NSLABEL_UUID_LEN];
+ __le32 flags;
+ __le16 nlabel;
+ __le16 position;
+ __le64 dpa;
+ __le64 rawsize;
+ __le64 hpa;
+ __le32 slot;
+ __le32 ig;
+ __le32 align;
+ u8 reserved[0xac];
+ __le64 checksum;
+};
+
+/**
+ * struct nvdimm_efi_label - namespace superblock
* @uuid: UUID per RFC 4122
* @name: optional name (NULL-terminated)
* @flags: see NSLABEL_FLAG_*
@@ -77,9 +110,14 @@ struct nd_namespace_index {
* @dpa: DPA of NVM range on this DIMM
* @rawsize: size of namespace
* @slot: slot of this label in label area
- * @unused: must be zero
+ * @align: physical address alignment of the namespace
+ * @reserved: reserved
+ * @type_guid: copy of struct acpi_nfit_system_address.range_guid
+ * @abstraction_guid: personality id (btt, btt2, fsdax, devdax....)
+ * @reserved2: reserved
+ * @checksum: fletcher64 sum of this object
*/
-struct nd_namespace_label {
+struct nvdimm_efi_label {
u8 uuid[NSLABEL_UUID_LEN];
u8 name[NSLABEL_NAME_LEN];
__le32 flags;
@@ -92,7 +130,7 @@ struct nd_namespace_label {
__le32 slot;
/*
* Accessing fields past this point should be gated by a
- * namespace_label_has() check.
+ * efi_namespace_label_has() check.
*/
u8 align;
u8 reserved[3];
@@ -102,14 +140,60 @@ struct nd_namespace_label {
__le64 checksum;
};
+/**
+ * struct nvdimm_cxl_label - CXL 2.0 Table 212
+ * @type: uuid identifying this label format (namespace)
+ * @uuid: uuid for the namespace this label describes
+ * @name: friendly name for the namespace
+ * @flags: NSLABEL_FLAG_UPDATING (all other flags reserved)
+ * @nrange: discontiguous namespace support
+ * @position: this label's position in the set
+ * @dpa: start address in device-local capacity for this label
+ * @rawsize: size of this label's contribution to namespace
+ * @slot: slot id of this label in label area
+ * @align: alignment in SZ_256M blocks
+ * @region_uuid: host interleave set identifier
+ * @abstraction_uuid: personality driver for this namespace
+ * @lbasize: address geometry for disk-like personalities
+ * @reserved: reserved
+ * @checksum: fletcher64 sum of this label
+ */
+struct nvdimm_cxl_label {
+ u8 type[NSLABEL_UUID_LEN];
+ u8 uuid[NSLABEL_UUID_LEN];
+ u8 name[NSLABEL_NAME_LEN];
+ __le32 flags;
+ __le16 nrange;
+ __le16 position;
+ __le64 dpa;
+ __le64 rawsize;
+ __le32 slot;
+ __le32 align;
+ u8 region_uuid[16];
+ u8 abstraction_uuid[16];
+ __le16 lbasize;
+ u8 reserved[0x56];
+ __le64 checksum;
+};
+
+struct nd_namespace_label {
+ union {
+ struct nvdimm_cxl_label cxl;
+ struct nvdimm_efi_label efi;
+ };
+};
+
#define NVDIMM_BTT_GUID "8aed63a2-29a2-4c66-8b12-f05d15d3922a"
#define NVDIMM_BTT2_GUID "18633bfc-1735-4217-8ac9-17239282d3f8"
#define NVDIMM_PFN_GUID "266400ba-fb9f-4677-bcb0-968f11d0d225"
#define NVDIMM_DAX_GUID "97a86d9c-3cdd-4eda-986f-5068b4f80088"
+#define CXL_REGION_UUID "529d7c61-da07-47c4-a93f-ecdf2c06f444"
+#define CXL_NAMESPACE_UUID "68bb2c0a-5a77-4937-9f85-3caf41a0f93c"
+
/**
* struct nd_label_id - identifier string for dpa allocation
- * @id: "{blk|pmem}-<namespace uuid>"
+ * @id: "pmem-<namespace uuid>"
*/
struct nd_label_id {
char id[ND_LABEL_ID_SIZE];
@@ -135,12 +219,8 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd);
bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot);
u32 nd_label_nfree(struct nvdimm_drvdata *ndd);
-enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid);
struct nd_region;
struct nd_namespace_pmem;
-struct nd_namespace_blk;
int nd_pmem_namespace_label_update(struct nd_region *nd_region,
struct nd_namespace_pmem *nspm, resource_size_t size);
-int nd_blk_namespace_label_update(struct nd_region *nd_region,
- struct nd_namespace_blk *nsblk, resource_size_t size);
#endif /* __LABEL_H__ */
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 032dc61725ff..c60ec0b373c5 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -10,6 +10,7 @@
#include <linux/nd.h>
#include "nd-core.h"
#include "pmem.h"
+#include "pfn.h"
#include "nd.h"
static void namespace_io_release(struct device *dev)
@@ -31,35 +32,17 @@ static void namespace_pmem_release(struct device *dev)
kfree(nspm);
}
-static void namespace_blk_release(struct device *dev)
-{
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
- struct nd_region *nd_region = to_nd_region(dev->parent);
-
- if (nsblk->id >= 0)
- ida_simple_remove(&nd_region->ns_ida, nsblk->id);
- kfree(nsblk->alt_name);
- kfree(nsblk->uuid);
- kfree(nsblk->res);
- kfree(nsblk);
-}
-
static bool is_namespace_pmem(const struct device *dev);
-static bool is_namespace_blk(const struct device *dev);
static bool is_namespace_io(const struct device *dev);
static int is_uuid_busy(struct device *dev, void *data)
{
- u8 *uuid1 = data, *uuid2 = NULL;
+ uuid_t *uuid1 = data, *uuid2 = NULL;
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
uuid2 = nspm->uuid;
- } else if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- uuid2 = nsblk->uuid;
} else if (is_nd_btt(dev)) {
struct nd_btt *nd_btt = to_nd_btt(dev);
@@ -70,7 +53,7 @@ static int is_uuid_busy(struct device *dev, void *data)
uuid2 = nd_pfn->uuid;
}
- if (uuid2 && memcmp(uuid1, uuid2, NSLABEL_UUID_LEN) == 0)
+ if (uuid2 && uuid_equal(uuid1, uuid2))
return -EBUSY;
return 0;
@@ -88,7 +71,7 @@ static int is_namespace_uuid_busy(struct device *dev, void *data)
* @dev: any device on a nvdimm_bus
* @uuid: uuid to check
*/
-bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
+bool nd_is_uuid_unique(struct device *dev, uuid_t *uuid)
{
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
@@ -177,12 +160,6 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
else
sprintf(name, "pmem%d%s", nd_region->id,
suffix ? suffix : "");
- } else if (is_namespace_blk(&ndns->dev)) {
- struct nd_namespace_blk *nsblk;
-
- nsblk = to_nd_namespace_blk(&ndns->dev);
- sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id,
- suffix ? suffix : "");
} else {
return NULL;
}
@@ -191,23 +168,14 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
}
EXPORT_SYMBOL(nvdimm_namespace_disk_name);
-const u8 *nd_dev_to_uuid(struct device *dev)
+const uuid_t *nd_dev_to_uuid(struct device *dev)
{
- static const u8 null_uuid[16];
-
- if (!dev)
- return null_uuid;
-
- if (is_namespace_pmem(dev)) {
+ if (dev && is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
return nspm->uuid;
- } else if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- return nsblk->uuid;
- } else
- return null_uuid;
+ }
+ return &uuid_null;
}
EXPORT_SYMBOL(nd_dev_to_uuid);
@@ -230,10 +198,6 @@ static ssize_t __alt_name_store(struct device *dev, const char *buf,
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
ns_altname = &nspm->alt_name;
- } else if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- ns_altname = &nsblk->alt_name;
} else
return -ENXIO;
@@ -265,83 +229,6 @@ out:
return rc;
}
-static resource_size_t nd_namespace_blk_size(struct nd_namespace_blk *nsblk)
-{
- struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent);
- struct nd_mapping *nd_mapping = &nd_region->mapping[0];
- struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- struct nd_label_id label_id;
- resource_size_t size = 0;
- struct resource *res;
-
- if (!nsblk->uuid)
- return 0;
- nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
- for_each_dpa_resource(ndd, res)
- if (strcmp(res->name, label_id.id) == 0)
- size += resource_size(res);
- return size;
-}
-
-static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
-{
- struct nd_region *nd_region = to_nd_region(nsblk->common.dev.parent);
- struct nd_mapping *nd_mapping = &nd_region->mapping[0];
- struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- struct nd_label_id label_id;
- struct resource *res;
- int count, i;
-
- if (!nsblk->uuid || !nsblk->lbasize || !ndd)
- return false;
-
- count = 0;
- nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
- for_each_dpa_resource(ndd, res) {
- if (strcmp(res->name, label_id.id) != 0)
- continue;
- /*
- * Resources with unacknowledged adjustments indicate a
- * failure to update labels
- */
- if (res->flags & DPA_RESOURCE_ADJUSTED)
- return false;
- count++;
- }
-
- /* These values match after a successful label update */
- if (count != nsblk->num_resources)
- return false;
-
- for (i = 0; i < nsblk->num_resources; i++) {
- struct resource *found = NULL;
-
- for_each_dpa_resource(ndd, res)
- if (res == nsblk->res[i]) {
- found = res;
- break;
- }
- /* stale resource */
- if (!found)
- return false;
- }
-
- return true;
-}
-
-resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
-{
- resource_size_t size;
-
- nvdimm_bus_lock(&nsblk->common.dev);
- size = __nd_namespace_blk_validate(nsblk);
- nvdimm_bus_unlock(&nsblk->common.dev);
-
- return size;
-}
-EXPORT_SYMBOL(nd_namespace_blk_validate);
-
-
static int nd_namespace_label_update(struct nd_region *nd_region,
struct device *dev)
{
@@ -364,16 +251,6 @@ static int nd_namespace_label_update(struct nd_region *nd_region,
return 0;
return nd_pmem_namespace_label_update(nd_region, nspm, size);
- } else if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
- resource_size_t size = nd_namespace_blk_size(nsblk);
-
- if (size == 0 && nsblk->uuid)
- /* delete allocation */;
- else if (!nsblk->uuid || !nsblk->lbasize)
- return 0;
-
- return nd_blk_namespace_label_update(nd_region, nsblk, size);
} else
return -ENXIO;
}
@@ -384,7 +261,7 @@ static ssize_t alt_name_store(struct device *dev,
struct nd_region *nd_region = to_nd_region(dev->parent);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __alt_name_store(dev, buf, len);
@@ -392,7 +269,7 @@ static ssize_t alt_name_store(struct device *dev,
rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc < 0 ? rc : len;
}
@@ -406,10 +283,6 @@ static ssize_t alt_name_show(struct device *dev,
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
ns_altname = nspm->alt_name;
- } else if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- ns_altname = nsblk->alt_name;
} else
return -ENXIO;
@@ -421,13 +294,11 @@ static int scan_free(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
resource_size_t n)
{
- bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
int rc = 0;
while (n) {
struct resource *res, *last;
- resource_size_t new_start;
last = NULL;
for_each_dpa_resource(ndd, res)
@@ -445,16 +316,7 @@ static int scan_free(struct nd_region *nd_region,
continue;
}
- /*
- * Keep BLK allocations relegated to high DPA as much as
- * possible
- */
- if (is_blk)
- new_start = res->start + n;
- else
- new_start = res->start;
-
- rc = adjust_resource(res, new_start, resource_size(res) - n);
+ rc = adjust_resource(res, res->start, resource_size(res) - n);
if (rc == 0)
res->flags |= DPA_RESOURCE_ADJUSTED;
nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc);
@@ -496,20 +358,12 @@ static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
struct nd_region *nd_region, struct nd_mapping *nd_mapping,
resource_size_t n)
{
- bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- resource_size_t first_dpa;
struct resource *res;
int rc = 0;
- /* allocate blk from highest dpa first */
- if (is_blk)
- first_dpa = nd_mapping->start + nd_mapping->size - n;
- else
- first_dpa = nd_mapping->start;
-
/* first resource allocation for this label-id or dimm */
- res = nvdimm_allocate_dpa(ndd, label_id, first_dpa, n);
+ res = nvdimm_allocate_dpa(ndd, label_id, nd_mapping->start, n);
if (!res)
rc = -EBUSY;
@@ -531,7 +385,7 @@ static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
*
* BLK-space is valid as long as it does not precede a PMEM
* allocation in a given region. PMEM-space must be contiguous
- * and adjacent to an existing existing allocation (if one
+ * and adjacent to an existing allocation (if one
* exists). If reserving PMEM any space is valid.
*/
static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
@@ -540,7 +394,11 @@ static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
resource_size_t n, struct resource *valid)
{
bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
- bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+ unsigned long align;
+
+ align = nd_region->align / nd_region->ndr_mappings;
+ valid->start = ALIGN(valid->start, align);
+ valid->end = ALIGN_DOWN(valid->end + 1, align) - 1;
if (valid->start >= valid->end)
goto invalid;
@@ -548,21 +406,6 @@ static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
if (is_reserve)
return;
- if (!is_pmem) {
- struct nd_mapping *nd_mapping = &nd_region->mapping[0];
- struct nvdimm_bus *nvdimm_bus;
- struct blk_alloc_info info = {
- .nd_mapping = nd_mapping,
- .available = nd_mapping->size,
- .res = valid,
- };
-
- WARN_ON(!is_nd_blk(&nd_region->dev));
- nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
- device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
- return;
- }
-
/* allocation needs to be contiguous, so this is all or nothing */
if (resource_size(valid) < n)
goto invalid;
@@ -590,7 +433,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
resource_size_t n)
{
resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
- bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct resource *res, *exist = NULL, valid;
const resource_size_t to_allocate = n;
@@ -688,10 +530,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
}
if (strcmp(action, "allocate") == 0) {
- /* BLK allocate bottom up */
- if (!is_pmem)
- valid.start += available - allocate;
-
new_res = nvdimm_allocate_dpa(ndd, label_id,
valid.start, allocate);
if (!new_res)
@@ -727,12 +565,7 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
return 0;
}
- /*
- * If we allocated nothing in the BLK case it may be because we are in
- * an initial "pmem-reserve pass". Only do an initial BLK allocation
- * when none of the DPA space is reserved.
- */
- if ((is_pmem || !ndd->dpa.child) && n == to_allocate)
+ if (n == to_allocate)
return init_dpa_allocation(label_id, nd_region, nd_mapping, n);
return n;
}
@@ -791,7 +624,7 @@ int __reserve_free_pmem(struct device *dev, void *data)
if (nd_mapping->nvdimm != nvdimm)
continue;
- n = nd_pmem_available_dpa(nd_region, nd_mapping, &rem);
+ n = nd_pmem_available_dpa(nd_region, nd_mapping);
if (n == 0)
return 0;
rem = scan_allocate(nd_region, nd_mapping, &label_id, n);
@@ -816,19 +649,6 @@ void release_free_pmem(struct nvdimm_bus *nvdimm_bus,
nvdimm_free_dpa(ndd, res);
}
-static int reserve_free_pmem(struct nvdimm_bus *nvdimm_bus,
- struct nd_mapping *nd_mapping)
-{
- struct nvdimm *nvdimm = nd_mapping->nvdimm;
- int rc;
-
- rc = device_for_each_child(&nvdimm_bus->dev, nvdimm,
- __reserve_free_pmem);
- if (rc)
- release_free_pmem(nvdimm_bus, nd_mapping);
- return rc;
-}
-
/**
* grow_dpa_allocation - for each dimm allocate n bytes for @label_id
* @nd_region: the set of dimms to allocate @n more bytes from
@@ -845,37 +665,14 @@ static int reserve_free_pmem(struct nvdimm_bus *nvdimm_bus,
static int grow_dpa_allocation(struct nd_region *nd_region,
struct nd_label_id *label_id, resource_size_t n)
{
- struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
- bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
int i;
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
resource_size_t rem = n;
- int rc, j;
-
- /*
- * In the BLK case try once with all unallocated PMEM
- * reserved, and once without
- */
- for (j = is_pmem; j < 2; j++) {
- bool blk_only = j == 0;
-
- if (blk_only) {
- rc = reserve_free_pmem(nvdimm_bus, nd_mapping);
- if (rc)
- return rc;
- }
- rem = scan_allocate(nd_region, nd_mapping,
- label_id, rem);
- if (blk_only)
- release_free_pmem(nvdimm_bus, nd_mapping);
-
- /* try again and allow encroachments into PMEM */
- if (rem == 0)
- break;
- }
+ int rc;
+ rem = scan_allocate(nd_region, nd_mapping, label_id, rem);
dev_WARN_ONCE(&nd_region->dev, rem,
"allocation underrun: %#llx of %#llx bytes\n",
(unsigned long long) n - rem,
@@ -932,7 +729,8 @@ static void nd_namespace_pmem_set_resource(struct nd_region *nd_region,
res->end = res->start + size - 1;
}
-static bool uuid_not_set(const u8 *uuid, struct device *dev, const char *where)
+static bool uuid_not_set(const uuid_t *uuid, struct device *dev,
+ const char *where)
{
if (!uuid) {
dev_dbg(dev, "%s: uuid not set\n", where);
@@ -951,7 +749,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
struct nd_label_id label_id;
u32 flags = 0, remainder;
int rc, i, id = -1;
- u8 *uuid = NULL;
+ uuid_t *uuid = NULL;
if (dev->driver || ndns->claim)
return -EBUSY;
@@ -961,12 +759,6 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
uuid = nspm->uuid;
id = nspm->id;
- } else if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- uuid = nsblk->uuid;
- flags = NSLABEL_FLAG_LOCAL;
- id = nsblk->id;
}
/*
@@ -980,10 +772,10 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
return -ENXIO;
}
- div_u64_rem(val, PAGE_SIZE * nd_region->ndr_mappings, &remainder);
+ div_u64_rem(val, nd_region->align, &remainder);
if (remainder) {
dev_dbg(dev, "%llu is not %ldK aligned\n", val,
- (PAGE_SIZE * nd_region->ndr_mappings) / SZ_1K);
+ nd_region->align / SZ_1K);
return -EINVAL;
}
@@ -993,8 +785,8 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
ndd = to_ndd(nd_mapping);
/*
- * All dimms in an interleave set, or the base dimm for a blk
- * region, need to be enabled for the size to be changed.
+ * All dimms in an interleave set, need to be enabled
+ * for the size to be changed.
*/
if (!ndd)
return -ENXIO;
@@ -1044,40 +836,31 @@ static ssize_t size_store(struct device *dev,
{
struct nd_region *nd_region = to_nd_region(dev->parent);
unsigned long long val;
- u8 **uuid = NULL;
int rc;
rc = kstrtoull(buf, 0, &val);
if (rc)
return rc;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __size_store(dev, val);
if (rc >= 0)
rc = nd_namespace_label_update(nd_region, dev);
- if (is_namespace_pmem(dev)) {
+ /* setting size zero == 'delete namespace' */
+ if (rc == 0 && val == 0 && is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
- uuid = &nspm->uuid;
- } else if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- uuid = &nsblk->uuid;
- }
-
- if (rc == 0 && val == 0 && uuid) {
- /* setting size zero == 'delete namespace' */
- kfree(*uuid);
- *uuid = NULL;
+ kfree(nspm->uuid);
+ nspm->uuid = NULL;
}
dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc);
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc < 0 ? rc : len;
}
@@ -1090,8 +873,6 @@ resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns)
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
return resource_size(&nspm->nsio.res);
- } else if (is_namespace_blk(dev)) {
- return nd_namespace_blk_size(to_nd_namespace_blk(dev));
} else if (is_namespace_io(dev)) {
struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
@@ -1141,24 +922,20 @@ static ssize_t size_show(struct device *dev,
}
static DEVICE_ATTR(size, 0444, size_show, size_store);
-static u8 *namespace_to_uuid(struct device *dev)
+static uuid_t *namespace_to_uuid(struct device *dev)
{
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
return nspm->uuid;
- } else if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- return nsblk->uuid;
- } else
- return ERR_PTR(-ENXIO);
+ }
+ return ERR_PTR(-ENXIO);
}
-static ssize_t uuid_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
- u8 *uuid = namespace_to_uuid(dev);
+ uuid_t *uuid = namespace_to_uuid(dev);
if (IS_ERR(uuid))
return PTR_ERR(uuid);
@@ -1175,9 +952,9 @@ static ssize_t uuid_show(struct device *dev,
* @old_uuid: reference to the uuid storage location in the namespace object
*/
static int namespace_update_uuid(struct nd_region *nd_region,
- struct device *dev, u8 *new_uuid, u8 **old_uuid)
+ struct device *dev, uuid_t *new_uuid,
+ uuid_t **old_uuid)
{
- u32 flags = is_namespace_blk(dev) ? NSLABEL_FLAG_LOCAL : 0;
struct nd_label_id old_label_id;
struct nd_label_id new_label_id;
int i;
@@ -1208,8 +985,8 @@ static int namespace_update_uuid(struct nd_region *nd_region,
return -EBUSY;
}
- nd_label_gen_id(&old_label_id, *old_uuid, flags);
- nd_label_gen_id(&new_label_id, new_uuid, flags);
+ nd_label_gen_id(&old_label_id, *old_uuid, 0);
+ nd_label_gen_id(&new_label_id, new_uuid, 0);
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
@@ -1225,11 +1002,13 @@ static int namespace_update_uuid(struct nd_region *nd_region,
list_for_each_entry(label_ent, &nd_mapping->labels, list) {
struct nd_namespace_label *nd_label = label_ent->label;
struct nd_label_id label_id;
+ uuid_t uuid;
if (!nd_label)
continue;
- nd_label_gen_id(&label_id, nd_label->uuid,
- __le32_to_cpu(nd_label->flags));
+ nsl_get_uuid(ndd, nd_label, &uuid);
+ nd_label_gen_id(&label_id, &uuid,
+ nsl_get_flags(ndd, nd_label));
if (strcmp(old_label_id.id, label_id.id) == 0)
set_bit(ND_LABEL_REAP, &label_ent->flags);
}
@@ -1245,22 +1024,18 @@ static ssize_t uuid_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
- u8 *uuid = NULL;
+ uuid_t *uuid = NULL;
+ uuid_t **ns_uuid;
ssize_t rc = 0;
- u8 **ns_uuid;
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
ns_uuid = &nspm->uuid;
- } else if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- ns_uuid = &nsblk->uuid;
} else
return -ENXIO;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
if (to_ndns(dev)->claim)
@@ -1276,7 +1051,7 @@ static ssize_t uuid_store(struct device *dev,
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc < 0 ? rc : len;
}
@@ -1303,23 +1078,13 @@ static ssize_t resource_show(struct device *dev,
return -ENXIO;
return sprintf(buf, "%#llx\n", (unsigned long long) res->start);
}
-static DEVICE_ATTR(resource, 0400, resource_show, NULL);
-
-static const unsigned long blk_lbasize_supported[] = { 512, 520, 528,
- 4096, 4104, 4160, 4224, 0 };
+static DEVICE_ATTR_ADMIN_RO(resource);
static const unsigned long pmem_lbasize_supported[] = { 512, 4096, 0 };
static ssize_t sector_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- return nd_size_select_show(nsblk->lbasize,
- blk_lbasize_supported, buf);
- }
-
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
@@ -1337,12 +1102,7 @@ static ssize_t sector_size_store(struct device *dev,
unsigned long *lbasize;
ssize_t rc = 0;
- if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- lbasize = &nsblk->lbasize;
- supported = blk_lbasize_supported;
- } else if (is_namespace_pmem(dev)) {
+ if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
lbasize = &nspm->lbasize;
@@ -1350,7 +1110,7 @@ static ssize_t sector_size_store(struct device *dev,
} else
return -ENXIO;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
if (to_ndns(dev)->claim)
rc = -EBUSY;
@@ -1361,7 +1121,7 @@ static ssize_t sector_size_store(struct device *dev,
dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote",
buf, buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc ? rc : len;
}
@@ -1372,8 +1132,8 @@ static ssize_t dpa_extents_show(struct device *dev,
{
struct nd_region *nd_region = to_nd_region(dev->parent);
struct nd_label_id label_id;
+ uuid_t *uuid = NULL;
int count = 0, i;
- u8 *uuid = NULL;
u32 flags = 0;
nvdimm_bus_lock(dev);
@@ -1382,11 +1142,6 @@ static ssize_t dpa_extents_show(struct device *dev,
uuid = nspm->uuid;
flags = 0;
- } else if (is_namespace_blk(dev)) {
- struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
-
- uuid = nsblk->uuid;
- flags = NSLABEL_FLAG_LOCAL;
}
if (!uuid)
@@ -1476,9 +1231,9 @@ static ssize_t holder_show(struct device *dev,
struct nd_namespace_common *ndns = to_ndns(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : "");
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc;
}
@@ -1515,7 +1270,7 @@ static ssize_t holder_class_store(struct device *dev,
struct nd_region *nd_region = to_nd_region(dev->parent);
int rc;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __holder_class_store(dev, buf);
@@ -1523,7 +1278,7 @@ static ssize_t holder_class_store(struct device *dev,
rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s(%d)\n", rc < 0 ? "fail " : "", rc);
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc < 0 ? rc : len;
}
@@ -1534,7 +1289,7 @@ static ssize_t holder_class_show(struct device *dev,
struct nd_namespace_common *ndns = to_ndns(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
if (ndns->claim_class == NVDIMM_CCLASS_NONE)
rc = sprintf(buf, "\n");
else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) ||
@@ -1546,7 +1301,7 @@ static ssize_t holder_class_show(struct device *dev,
rc = sprintf(buf, "dax\n");
else
rc = sprintf(buf, "<unknown>\n");
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc;
}
@@ -1560,7 +1315,7 @@ static ssize_t mode_show(struct device *dev,
char *mode;
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
claim = ndns->claim;
if (claim && is_nd_btt(claim))
mode = "safe";
@@ -1573,7 +1328,7 @@ static ssize_t mode_show(struct device *dev,
else
mode = "raw";
rc = sprintf(buf, "%s\n", mode);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc;
}
@@ -1619,21 +1374,18 @@ static umode_t namespace_visible(struct kobject *kobj,
{
struct device *dev = container_of(kobj, struct device, kobj);
- if (a == &dev_attr_resource.attr && is_namespace_blk(dev))
- return 0;
-
- if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
+ if (is_namespace_pmem(dev)) {
if (a == &dev_attr_size.attr)
return 0644;
return a->mode;
}
- if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr
- || a == &dev_attr_holder.attr
- || a == &dev_attr_holder_class.attr
- || a == &dev_attr_force_raw.attr
- || a == &dev_attr_mode.attr)
+ /* base is_namespace_io() attributes */
+ if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr ||
+ a == &dev_attr_holder.attr || a == &dev_attr_holder_class.attr ||
+ a == &dev_attr_force_raw.attr || a == &dev_attr_mode.attr ||
+ a == &dev_attr_resource.attr)
return a->mode;
return 0;
@@ -1663,22 +1415,11 @@ static const struct device_type namespace_pmem_device_type = {
.groups = nd_namespace_attribute_groups,
};
-static const struct device_type namespace_blk_device_type = {
- .name = "nd_namespace_blk",
- .release = namespace_blk_release,
- .groups = nd_namespace_attribute_groups,
-};
-
static bool is_namespace_pmem(const struct device *dev)
{
return dev ? dev->type == &namespace_pmem_device_type : false;
}
-static bool is_namespace_blk(const struct device *dev)
-{
- return dev ? dev->type == &namespace_blk_device_type : false;
-}
-
static bool is_namespace_io(const struct device *dev)
{
return dev ? dev->type == &namespace_io_device_type : false;
@@ -1707,8 +1448,8 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
* Flush any in-progess probes / removals in the driver
* for the raw personality of this namespace.
*/
- nd_device_lock(&ndns->dev);
- nd_device_unlock(&ndns->dev);
+ device_lock(&ndns->dev);
+ device_unlock(&ndns->dev);
if (ndns->dev.driver) {
dev_dbg(&ndns->dev, "is active, can't bind %s\n",
dev_name(dev));
@@ -1739,24 +1480,28 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
return ERR_PTR(-ENODEV);
}
+ /*
+ * Note, alignment validation for fsdax and devdax mode
+ * namespaces happens in nd_pfn_validate() where infoblock
+ * padding parameters can be applied.
+ */
+ if (pmem_should_map_pages(dev)) {
+ struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+ struct resource *res = &nsio->res;
+
+ if (!IS_ALIGNED(res->start | (res->end + 1),
+ memremap_compat_align())) {
+ dev_err(&ndns->dev, "%pr misaligned, unable to map\n", res);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ }
+
if (is_namespace_pmem(&ndns->dev)) {
struct nd_namespace_pmem *nspm;
nspm = to_nd_namespace_pmem(&ndns->dev);
if (uuid_not_set(nspm->uuid, &ndns->dev, __func__))
return ERR_PTR(-ENODEV);
- } else if (is_namespace_blk(&ndns->dev)) {
- struct nd_namespace_blk *nsblk;
-
- nsblk = to_nd_namespace_blk(&ndns->dev);
- if (uuid_not_set(nsblk->uuid, &ndns->dev, __func__))
- return ERR_PTR(-ENODEV);
- if (!nsblk->lbasize) {
- dev_dbg(&ndns->dev, "sector size not set\n");
- return ERR_PTR(-ENODEV);
- }
- if (!nd_namespace_blk_validate(nsblk))
- return ERR_PTR(-ENODEV);
}
return ndns;
@@ -1766,16 +1511,12 @@ EXPORT_SYMBOL(nvdimm_namespace_common_probe);
int devm_namespace_enable(struct device *dev, struct nd_namespace_common *ndns,
resource_size_t size)
{
- if (is_namespace_blk(&ndns->dev))
- return 0;
return devm_nsio_enable(dev, to_nd_namespace_io(&ndns->dev), size);
}
EXPORT_SYMBOL_GPL(devm_namespace_enable);
void devm_namespace_disable(struct device *dev, struct nd_namespace_common *ndns)
{
- if (is_namespace_blk(&ndns->dev))
- return;
devm_nsio_disable(dev, to_nd_namespace_io(&ndns->dev));
}
EXPORT_SYMBOL_GPL(devm_namespace_disable);
@@ -1809,8 +1550,8 @@ static struct device **create_namespace_io(struct nd_region *nd_region)
return devs;
}
-static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
- u64 cookie, u16 pos)
+static bool has_uuid_at_pos(struct nd_region *nd_region, const uuid_t *uuid,
+ u64 cookie, u16 pos)
{
struct nd_namespace_label *found = NULL;
int i;
@@ -1824,36 +1565,28 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
list_for_each_entry(label_ent, &nd_mapping->labels, list) {
struct nd_namespace_label *nd_label = label_ent->label;
- u16 position, nlabel;
- u64 isetcookie;
+ u16 position;
if (!nd_label)
continue;
- isetcookie = __le64_to_cpu(nd_label->isetcookie);
- position = __le16_to_cpu(nd_label->position);
- nlabel = __le16_to_cpu(nd_label->nlabel);
+ position = nsl_get_position(ndd, nd_label);
- if (isetcookie != cookie)
+ if (!nsl_validate_isetcookie(ndd, nd_label, cookie))
continue;
- if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
+ if (!nsl_uuid_equal(ndd, nd_label, uuid))
continue;
- if (namespace_label_has(ndd, type_guid)
- && !guid_equal(&nd_set->type_guid,
- &nd_label->type_guid)) {
- dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
- &nd_set->type_guid,
- &nd_label->type_guid);
+ if (!nsl_validate_type_guid(ndd, nd_label,
+ &nd_set->type_guid))
continue;
- }
if (found_uuid) {
dev_dbg(ndd->dev, "duplicate entry for uuid\n");
return false;
}
found_uuid = true;
- if (nlabel != nd_region->ndr_mappings)
+ if (!nsl_validate_nlabel(nd_region, ndd, nd_label))
continue;
if (position != pos)
continue;
@@ -1866,7 +1599,7 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
return found != NULL;
}
-static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
+static int select_pmem_id(struct nd_region *nd_region, const uuid_t *pmem_id)
{
int i;
@@ -1885,7 +1618,7 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
nd_label = label_ent->label;
if (!nd_label)
continue;
- if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0)
+ if (nsl_uuid_equal(ndd, nd_label, pmem_id))
break;
nd_label = NULL;
}
@@ -1901,14 +1634,15 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
*/
hw_start = nd_mapping->start;
hw_end = hw_start + nd_mapping->size;
- pmem_start = __le64_to_cpu(nd_label->dpa);
- pmem_end = pmem_start + __le64_to_cpu(nd_label->rawsize);
+ pmem_start = nsl_get_dpa(ndd, nd_label);
+ pmem_end = pmem_start + nsl_get_rawsize(ndd, nd_label);
if (pmem_start >= hw_start && pmem_start < hw_end
&& pmem_end <= hw_end && pmem_end > hw_start)
/* pass */;
else {
dev_dbg(&nd_region->dev, "%s invalid label for %pUb\n",
- dev_name(ndd->dev), nd_label->uuid);
+ dev_name(ndd->dev),
+ nsl_uuid_raw(ndd, nd_label));
return -EINVAL;
}
@@ -1925,17 +1659,20 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
* @nd_label: target pmem namespace label to evaluate
*/
static struct device *create_namespace_pmem(struct nd_region *nd_region,
- struct nd_namespace_index *nsindex,
- struct nd_namespace_label *nd_label)
+ struct nd_mapping *nd_mapping,
+ struct nd_namespace_label *nd_label)
{
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct nd_namespace_index *nsindex =
+ to_namespace_index(ndd, ndd->ns_current);
u64 cookie = nd_region_interleave_set_cookie(nd_region, nsindex);
u64 altcookie = nd_region_interleave_set_altcookie(nd_region);
struct nd_label_ent *label_ent;
struct nd_namespace_pmem *nspm;
- struct nd_mapping *nd_mapping;
resource_size_t size = 0;
struct resource *res;
struct device *dev;
+ uuid_t uuid;
int rc = 0;
u16 i;
@@ -1944,14 +1681,14 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
return ERR_PTR(-ENXIO);
}
- if (__le64_to_cpu(nd_label->isetcookie) != cookie) {
+ if (!nsl_validate_isetcookie(ndd, nd_label, cookie)) {
dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
- nd_label->uuid);
- if (__le64_to_cpu(nd_label->isetcookie) != altcookie)
+ nsl_uuid_raw(ndd, nd_label));
+ if (!nsl_validate_isetcookie(ndd, nd_label, altcookie))
return ERR_PTR(-EAGAIN);
dev_dbg(&nd_region->dev, "valid altcookie in label: %pUb\n",
- nd_label->uuid);
+ nsl_uuid_raw(ndd, nd_label));
}
nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
@@ -1967,9 +1704,10 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
res->flags = IORESOURCE_MEM;
for (i = 0; i < nd_region->ndr_mappings; i++) {
- if (has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
+ nsl_get_uuid(ndd, nd_label, &uuid);
+ if (has_uuid_at_pos(nd_region, &uuid, cookie, i))
continue;
- if (has_uuid_at_pos(nd_region, nd_label->uuid, altcookie, i))
+ if (has_uuid_at_pos(nd_region, &uuid, altcookie, i))
continue;
break;
}
@@ -1983,7 +1721,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
* find a dimm with two instances of the same uuid.
*/
dev_err(&nd_region->dev, "%s missing label for %pUb\n",
- nvdimm_name(nvdimm), nd_label->uuid);
+ nvdimm_name(nvdimm), nsl_uuid_raw(ndd, nd_label));
rc = -EINVAL;
goto err;
}
@@ -1991,12 +1729,10 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
/*
* Fix up each mapping's 'labels' to have the validated pmem label for
* that position at labels[0], and NULL at labels[1]. In the process,
- * check that the namespace aligns with interleave-set. We know
- * that it does not overlap with any blk namespaces by virtue of
- * the dimm being enabled (i.e. nd_label_reserve_dpa()
- * succeeded).
+ * check that the namespace aligns with interleave-set.
*/
- rc = select_pmem_id(nd_region, nd_label->uuid);
+ nsl_get_uuid(ndd, nd_label, &uuid);
+ rc = select_pmem_id(nd_region, &uuid);
if (rc)
goto err;
@@ -2015,20 +1751,18 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
continue;
}
- size += __le64_to_cpu(label0->rawsize);
- if (__le16_to_cpu(label0->position) != 0)
+ ndd = to_ndd(nd_mapping);
+ size += nsl_get_rawsize(ndd, label0);
+ if (nsl_get_position(ndd, label0) != 0)
continue;
WARN_ON(nspm->alt_name || nspm->uuid);
- nspm->alt_name = kmemdup((void __force *) label0->name,
- NSLABEL_NAME_LEN, GFP_KERNEL);
- nspm->uuid = kmemdup((void __force *) label0->uuid,
- NSLABEL_UUID_LEN, GFP_KERNEL);
- nspm->lbasize = __le64_to_cpu(label0->lbasize);
- ndd = to_ndd(nd_mapping);
- if (namespace_label_has(ndd, abstraction_guid))
- nspm->nsio.common.claim_class
- = to_nvdimm_cclass(&label0->abstraction_guid);
-
+ nspm->alt_name = kmemdup(nsl_ref_name(ndd, label0),
+ NSLABEL_NAME_LEN, GFP_KERNEL);
+ nsl_get_uuid(ndd, label0, &uuid);
+ nspm->uuid = kmemdup(&uuid, sizeof(uuid_t), GFP_KERNEL);
+ nspm->lbasize = nsl_get_lbasize(ndd, label0);
+ nspm->nsio.common.claim_class =
+ nsl_get_claim_class(ndd, label0);
}
if (!nspm->alt_name || !nspm->uuid) {
@@ -2055,54 +1789,6 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
return ERR_PTR(rc);
}
-struct resource *nsblk_add_resource(struct nd_region *nd_region,
- struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
- resource_size_t start)
-{
- struct nd_label_id label_id;
- struct resource *res;
-
- nd_label_gen_id(&label_id, nsblk->uuid, NSLABEL_FLAG_LOCAL);
- res = krealloc(nsblk->res,
- sizeof(void *) * (nsblk->num_resources + 1),
- GFP_KERNEL);
- if (!res)
- return NULL;
- nsblk->res = (struct resource **) res;
- for_each_dpa_resource(ndd, res)
- if (strcmp(res->name, label_id.id) == 0
- && res->start == start) {
- nsblk->res[nsblk->num_resources++] = res;
- return res;
- }
- return NULL;
-}
-
-static struct device *nd_namespace_blk_create(struct nd_region *nd_region)
-{
- struct nd_namespace_blk *nsblk;
- struct device *dev;
-
- if (!is_nd_blk(&nd_region->dev))
- return NULL;
-
- nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
- if (!nsblk)
- return NULL;
-
- dev = &nsblk->common.dev;
- dev->type = &namespace_blk_device_type;
- nsblk->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL);
- if (nsblk->id < 0) {
- kfree(nsblk);
- return NULL;
- }
- dev_set_name(dev, "namespace%d.%d", nd_region->id, nsblk->id);
- dev->parent = &nd_region->dev;
-
- return &nsblk->common.dev;
-}
-
static struct device *nd_namespace_pmem_create(struct nd_region *nd_region)
{
struct nd_namespace_pmem *nspm;
@@ -2134,6 +1820,8 @@ static struct device *nd_namespace_pmem_create(struct nd_region *nd_region)
return dev;
}
+static struct lock_class_key nvdimm_namespace_key;
+
void nd_region_create_ns_seed(struct nd_region *nd_region)
{
WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
@@ -2141,20 +1829,20 @@ void nd_region_create_ns_seed(struct nd_region *nd_region)
if (nd_region_to_nstype(nd_region) == ND_DEVICE_NAMESPACE_IO)
return;
- if (is_nd_blk(&nd_region->dev))
- nd_region->ns_seed = nd_namespace_blk_create(nd_region);
- else
- nd_region->ns_seed = nd_namespace_pmem_create(nd_region);
+ nd_region->ns_seed = nd_namespace_pmem_create(nd_region);
/*
* Seed creation failures are not fatal, provisioning is simply
* disabled until memory becomes available
*/
if (!nd_region->ns_seed)
- dev_err(&nd_region->dev, "failed to create %s namespace\n",
- is_nd_blk(&nd_region->dev) ? "blk" : "pmem");
- else
+ dev_err(&nd_region->dev, "failed to create namespace\n");
+ else {
+ device_initialize(nd_region->ns_seed);
+ lockdep_set_class(&nd_region->ns_seed->mutex,
+ &nvdimm_namespace_key);
nd_device_register(nd_region->ns_seed);
+ }
}
void nd_region_create_dax_seed(struct nd_region *nd_region)
@@ -2202,114 +1890,32 @@ static int add_namespace_resource(struct nd_region *nd_region,
int i;
for (i = 0; i < count; i++) {
- u8 *uuid = namespace_to_uuid(devs[i]);
- struct resource *res;
+ uuid_t *uuid = namespace_to_uuid(devs[i]);
- if (IS_ERR_OR_NULL(uuid)) {
+ if (IS_ERR(uuid)) {
WARN_ON(1);
continue;
}
- if (memcmp(uuid, nd_label->uuid, NSLABEL_UUID_LEN) != 0)
+ if (!nsl_uuid_equal(ndd, nd_label, uuid))
continue;
- if (is_namespace_blk(devs[i])) {
- res = nsblk_add_resource(nd_region, ndd,
- to_nd_namespace_blk(devs[i]),
- __le64_to_cpu(nd_label->dpa));
- if (!res)
- return -ENXIO;
- nd_dbg_dpa(nd_region, ndd, res, "%d assign\n", count);
- } else {
- dev_err(&nd_region->dev,
- "error: conflicting extents for uuid: %pUb\n",
- nd_label->uuid);
- return -ENXIO;
- }
- break;
+ dev_err(&nd_region->dev,
+ "error: conflicting extents for uuid: %pUb\n", uuid);
+ return -ENXIO;
}
return i;
}
-static struct device *create_namespace_blk(struct nd_region *nd_region,
- struct nd_namespace_label *nd_label, int count)
-{
-
- struct nd_mapping *nd_mapping = &nd_region->mapping[0];
- struct nd_interleave_set *nd_set = nd_region->nd_set;
- struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- struct nd_namespace_blk *nsblk;
- char name[NSLABEL_NAME_LEN];
- struct device *dev = NULL;
- struct resource *res;
-
- if (namespace_label_has(ndd, type_guid)) {
- if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) {
- dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
- &nd_set->type_guid,
- &nd_label->type_guid);
- return ERR_PTR(-EAGAIN);
- }
-
- if (nd_label->isetcookie != __cpu_to_le64(nd_set->cookie2)) {
- dev_dbg(ndd->dev, "expect cookie %#llx got %#llx\n",
- nd_set->cookie2,
- __le64_to_cpu(nd_label->isetcookie));
- return ERR_PTR(-EAGAIN);
- }
- }
-
- nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
- if (!nsblk)
- return ERR_PTR(-ENOMEM);
- dev = &nsblk->common.dev;
- dev->type = &namespace_blk_device_type;
- dev->parent = &nd_region->dev;
- nsblk->id = -1;
- nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
- nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
- GFP_KERNEL);
- if (namespace_label_has(ndd, abstraction_guid))
- nsblk->common.claim_class
- = to_nvdimm_cclass(&nd_label->abstraction_guid);
- if (!nsblk->uuid)
- goto blk_err;
- memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
- if (name[0]) {
- nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
- GFP_KERNEL);
- if (!nsblk->alt_name)
- goto blk_err;
- }
- res = nsblk_add_resource(nd_region, ndd, nsblk,
- __le64_to_cpu(nd_label->dpa));
- if (!res)
- goto blk_err;
- nd_dbg_dpa(nd_region, ndd, res, "%d: assign\n", count);
- return dev;
- blk_err:
- namespace_blk_release(dev);
- return ERR_PTR(-ENXIO);
-}
-
static int cmp_dpa(const void *a, const void *b)
{
const struct device *dev_a = *(const struct device **) a;
const struct device *dev_b = *(const struct device **) b;
- struct nd_namespace_blk *nsblk_a, *nsblk_b;
struct nd_namespace_pmem *nspm_a, *nspm_b;
if (is_namespace_io(dev_a))
return 0;
- if (is_namespace_blk(dev_a)) {
- nsblk_a = to_nd_namespace_blk(dev_a);
- nsblk_b = to_nd_namespace_blk(dev_b);
-
- return memcmp(&nsblk_a->res[0]->start, &nsblk_b->res[0]->start,
- sizeof(resource_size_t));
- }
-
nspm_a = to_nd_namespace_pmem(dev_a);
nspm_b = to_nd_namespace_pmem(dev_b);
@@ -2323,27 +1929,21 @@ static struct device **scan_labels(struct nd_region *nd_region)
struct device *dev, **devs = NULL;
struct nd_label_ent *label_ent, *e;
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
resource_size_t map_end = nd_mapping->start + nd_mapping->size - 1;
/* "safe" because create_namespace_pmem() might list_move() label_ent */
list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
struct nd_namespace_label *nd_label = label_ent->label;
struct device **__devs;
- u32 flags;
if (!nd_label)
continue;
- flags = __le32_to_cpu(nd_label->flags);
- if (is_nd_blk(&nd_region->dev)
- == !!(flags & NSLABEL_FLAG_LOCAL))
- /* pass, region matches label type */;
- else
- continue;
/* skip labels that describe extents outside of the region */
- if (__le64_to_cpu(nd_label->dpa) < nd_mapping->start ||
- __le64_to_cpu(nd_label->dpa) > map_end)
- continue;
+ if (nsl_get_dpa(ndd, nd_label) < nd_mapping->start ||
+ nsl_get_dpa(ndd, nd_label) > map_end)
+ continue;
i = add_namespace_resource(nd_region, nd_label, devs, count);
if (i < 0)
@@ -2357,16 +1957,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
kfree(devs);
devs = __devs;
- if (is_nd_blk(&nd_region->dev))
- dev = create_namespace_blk(nd_region, nd_label, count);
- else {
- struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- struct nd_namespace_index *nsindex;
-
- nsindex = to_namespace_index(ndd, ndd->ns_current);
- dev = create_namespace_pmem(nd_region, nsindex, nd_label);
- }
-
+ dev = create_namespace_pmem(nd_region, nd_mapping, nd_label);
if (IS_ERR(dev)) {
switch (PTR_ERR(dev)) {
case -EAGAIN:
@@ -2383,35 +1974,25 @@ static struct device **scan_labels(struct nd_region *nd_region)
}
- dev_dbg(&nd_region->dev, "discovered %d %s namespace%s\n",
- count, is_nd_blk(&nd_region->dev)
- ? "blk" : "pmem", count == 1 ? "" : "s");
+ dev_dbg(&nd_region->dev, "discovered %d namespace%s\n", count,
+ count == 1 ? "" : "s");
if (count == 0) {
+ struct nd_namespace_pmem *nspm;
+
/* Publish a zero-sized namespace for userspace to configure. */
nd_mapping_free_labels(nd_mapping);
devs = kcalloc(2, sizeof(dev), GFP_KERNEL);
if (!devs)
goto err;
- if (is_nd_blk(&nd_region->dev)) {
- struct nd_namespace_blk *nsblk;
-
- nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
- if (!nsblk)
- goto err;
- dev = &nsblk->common.dev;
- dev->type = &namespace_blk_device_type;
- } else {
- struct nd_namespace_pmem *nspm;
- nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
- if (!nspm)
- goto err;
- dev = &nspm->nsio.common.dev;
- dev->type = &namespace_pmem_device_type;
- nd_namespace_pmem_set_resource(nd_region, nspm, 0);
- }
+ nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+ if (!nspm)
+ goto err;
+ dev = &nspm->nsio.common.dev;
+ dev->type = &namespace_pmem_device_type;
+ nd_namespace_pmem_set_resource(nd_region, nspm, 0);
dev->parent = &nd_region->dev;
devs[count++] = dev;
} else if (is_memory(&nd_region->dev)) {
@@ -2446,10 +2027,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
err:
if (devs) {
for (i = 0; devs[i]; i++)
- if (is_nd_blk(&nd_region->dev))
- namespace_blk_release(devs[i]);
- else
- namespace_pmem_release(devs[i]);
+ namespace_pmem_release(devs[i]);
kfree(devs);
}
return NULL;
@@ -2505,7 +2083,7 @@ static void deactivate_labels(void *region)
static int init_active_labels(struct nd_region *nd_region)
{
- int i;
+ int i, rc = 0;
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
@@ -2521,16 +2099,17 @@ static int init_active_labels(struct nd_region *nd_region)
if (!ndd) {
if (test_bit(NDD_LOCKED, &nvdimm->flags))
/* fail, label data may be unreadable */;
- else if (test_bit(NDD_ALIASING, &nvdimm->flags))
+ else if (test_bit(NDD_LABELING, &nvdimm->flags))
/* fail, labels needed to disambiguate dpa */;
else
- return 0;
+ continue;
dev_err(&nd_region->dev, "%s: is %s, failing probe\n",
dev_name(&nd_mapping->nvdimm->dev),
test_bit(NDD_LOCKED, &nvdimm->flags)
? "locked" : "disabled");
- return -ENXIO;
+ rc = -ENXIO;
+ goto out;
}
nd_mapping->ndd = ndd;
atomic_inc(&nvdimm->busy);
@@ -2547,12 +2126,6 @@ static int init_active_labels(struct nd_region *nd_region)
if (!label_ent)
break;
label = nd_label_active(ndd, j);
- if (test_bit(NDD_NOBLK, &nvdimm->flags)) {
- u32 flags = __le32_to_cpu(label->flags);
-
- flags &= ~NSLABEL_FLAG_LOCAL;
- label->flags = __cpu_to_le32(flags);
- }
label_ent->label = label;
mutex_lock(&nd_mapping->lock);
@@ -2564,13 +2137,17 @@ static int init_active_labels(struct nd_region *nd_region)
break;
}
- if (i < nd_region->ndr_mappings) {
+ if (i < nd_region->ndr_mappings)
+ rc = -ENOMEM;
+
+out:
+ if (rc) {
deactivate_labels(nd_region);
- return -ENOMEM;
+ return rc;
}
return devm_add_action_or_reset(&nd_region->dev, deactivate_labels,
- nd_region);
+ nd_region);
}
int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
@@ -2592,7 +2169,6 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
devs = create_namespace_io(nd_region);
break;
case ND_DEVICE_NAMESPACE_PMEM:
- case ND_DEVICE_NAMESPACE_BLK:
devs = create_namespaces(nd_region);
break;
default:
@@ -2607,19 +2183,12 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
struct device *dev = devs[i];
int id;
- if (type == ND_DEVICE_NAMESPACE_BLK) {
- struct nd_namespace_blk *nsblk;
-
- nsblk = to_nd_namespace_blk(dev);
- id = ida_simple_get(&nd_region->ns_ida, 0, 0,
- GFP_KERNEL);
- nsblk->id = id;
- } else if (type == ND_DEVICE_NAMESPACE_PMEM) {
+ if (type == ND_DEVICE_NAMESPACE_PMEM) {
struct nd_namespace_pmem *nspm;
nspm = to_nd_namespace_pmem(dev);
id = ida_simple_get(&nd_region->ns_ida, 0, 0,
- GFP_KERNEL);
+ GFP_KERNEL);
nspm->id = id;
} else
id = i;
@@ -2627,6 +2196,8 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
if (id < 0)
break;
dev_set_name(dev, "namespace%d.%d", nd_region->id, id);
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &nvdimm_namespace_key);
nd_device_register(dev);
}
if (i)
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index ddb9d97d9129..cc86ee09d7c0 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -45,6 +45,7 @@ struct nvdimm {
struct kernfs_node *overwrite_state;
} sec;
struct delayed_work dwork;
+ const struct nvdimm_fw_ops *fw_ops;
};
static inline unsigned long nvdimm_security_flags(
@@ -81,30 +82,12 @@ static inline void nvdimm_security_overwrite_query(struct work_struct *work)
}
#endif
-/**
- * struct blk_alloc_info - tracking info for BLK dpa scanning
- * @nd_mapping: blk region mapping boundaries
- * @available: decremented in alias_dpa_busy as aliased PMEM is scanned
- * @busy: decremented in blk_dpa_busy to account for ranges already
- * handled by alias_dpa_busy
- * @res: alias_dpa_busy interprets this a free space range that needs to
- * be truncated to the valid BLK allocation starting DPA, blk_dpa_busy
- * treats it as a busy range that needs the aliased PMEM ranges
- * truncated.
- */
-struct blk_alloc_info {
- struct nd_mapping *nd_mapping;
- resource_size_t available, busy;
- struct resource *res;
-};
-
bool is_nvdimm(struct device *dev);
bool is_nd_pmem(struct device *dev);
bool is_nd_volatile(struct device *dev);
-bool is_nd_blk(struct device *dev);
static inline bool is_nd_region(struct device *dev)
{
- return is_nd_pmem(dev) || is_nd_blk(dev) || is_nd_volatile(dev);
+ return is_nd_pmem(dev) || is_nd_volatile(dev);
}
static inline bool is_memory(struct device *dev)
{
@@ -123,10 +106,11 @@ void nd_region_create_dax_seed(struct nd_region *nd_region);
int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
void nd_synchronize(void);
-void __nd_device_register(struct device *dev);
+void nd_device_register(struct device *dev);
struct nd_label_id;
-char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
-bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
+char *nd_label_gen_id(struct nd_label_id *label_id, const uuid_t *uuid,
+ u32 flags);
+bool nd_is_uuid_unique(struct device *dev, uuid_t *uuid);
struct nd_region;
struct nvdimm_drvdata;
struct nd_mapping;
@@ -140,17 +124,12 @@ resource_size_t nd_pmem_max_contiguous_dpa(struct nd_region *nd_region,
struct nd_mapping *nd_mapping);
resource_size_t nd_region_allocatable_dpa(struct nd_region *nd_region);
resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
- struct nd_mapping *nd_mapping, resource_size_t *overlap);
-resource_size_t nd_blk_available_dpa(struct nd_region *nd_region);
+ struct nd_mapping *nd_mapping);
resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
int nd_region_conflict(struct nd_region *nd_region, resource_size_t start,
resource_size_t size);
resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
struct nd_label_id *label_id);
-int alias_dpa_busy(struct device *dev, void *data);
-struct resource *nsblk_add_resource(struct nd_region *nd_region,
- struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
- resource_size_t start);
int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd);
void get_ndd(struct nvdimm_drvdata *ndd);
resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns);
@@ -182,70 +161,4 @@ static inline void devm_nsio_disable(struct device *dev,
{
}
#endif
-
-#ifdef CONFIG_PROVE_LOCKING
-extern struct class *nd_class;
-
-enum {
- LOCK_BUS,
- LOCK_NDCTL,
- LOCK_REGION,
- LOCK_DIMM = LOCK_REGION,
- LOCK_NAMESPACE,
- LOCK_CLAIM,
-};
-
-static inline void debug_nvdimm_lock(struct device *dev)
-{
- if (is_nd_region(dev))
- mutex_lock_nested(&dev->lockdep_mutex, LOCK_REGION);
- else if (is_nvdimm(dev))
- mutex_lock_nested(&dev->lockdep_mutex, LOCK_DIMM);
- else if (is_nd_btt(dev) || is_nd_pfn(dev) || is_nd_dax(dev))
- mutex_lock_nested(&dev->lockdep_mutex, LOCK_CLAIM);
- else if (dev->parent && (is_nd_region(dev->parent)))
- mutex_lock_nested(&dev->lockdep_mutex, LOCK_NAMESPACE);
- else if (is_nvdimm_bus(dev))
- mutex_lock_nested(&dev->lockdep_mutex, LOCK_BUS);
- else if (dev->class && dev->class == nd_class)
- mutex_lock_nested(&dev->lockdep_mutex, LOCK_NDCTL);
- else
- dev_WARN(dev, "unknown lock level\n");
-}
-
-static inline void debug_nvdimm_unlock(struct device *dev)
-{
- mutex_unlock(&dev->lockdep_mutex);
-}
-
-static inline void nd_device_lock(struct device *dev)
-{
- device_lock(dev);
- debug_nvdimm_lock(dev);
-}
-
-static inline void nd_device_unlock(struct device *dev)
-{
- debug_nvdimm_unlock(dev);
- device_unlock(dev);
-}
-#else
-static inline void nd_device_lock(struct device *dev)
-{
- device_lock(dev);
-}
-
-static inline void nd_device_unlock(struct device *dev)
-{
- device_unlock(dev);
-}
-
-static inline void debug_nvdimm_lock(struct device *dev)
-{
-}
-
-static inline void debug_nvdimm_unlock(struct device *dev)
-{
-}
-#endif
#endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index c9f6a5b5253a..85ca5b4da3cf 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -30,16 +30,281 @@ struct nvdimm_drvdata {
int nslabel_size;
struct nd_cmd_get_config_size nsarea;
void *data;
+ bool cxl;
int ns_current, ns_next;
struct resource dpa;
struct kref kref;
};
+static inline const u8 *nsl_ref_name(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return nd_label->cxl.name;
+ return nd_label->efi.name;
+}
+
+static inline u8 *nsl_get_name(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label, u8 *name)
+{
+ if (ndd->cxl)
+ return memcpy(name, nd_label->cxl.name, NSLABEL_NAME_LEN);
+ return memcpy(name, nd_label->efi.name, NSLABEL_NAME_LEN);
+}
+
+static inline u8 *nsl_set_name(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label, u8 *name)
+{
+ if (!name)
+ return NULL;
+ if (ndd->cxl)
+ return memcpy(nd_label->cxl.name, name, NSLABEL_NAME_LEN);
+ return memcpy(nd_label->efi.name, name, NSLABEL_NAME_LEN);
+}
+
+static inline u32 nsl_get_slot(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return __le32_to_cpu(nd_label->cxl.slot);
+ return __le32_to_cpu(nd_label->efi.slot);
+}
+
+static inline void nsl_set_slot(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label, u32 slot)
+{
+ if (ndd->cxl)
+ nd_label->cxl.slot = __cpu_to_le32(slot);
+ else
+ nd_label->efi.slot = __cpu_to_le32(slot);
+}
+
+static inline u64 nsl_get_checksum(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return __le64_to_cpu(nd_label->cxl.checksum);
+ return __le64_to_cpu(nd_label->efi.checksum);
+}
+
+static inline void nsl_set_checksum(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ u64 checksum)
+{
+ if (ndd->cxl)
+ nd_label->cxl.checksum = __cpu_to_le64(checksum);
+ else
+ nd_label->efi.checksum = __cpu_to_le64(checksum);
+}
+
+static inline u32 nsl_get_flags(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return __le32_to_cpu(nd_label->cxl.flags);
+ return __le32_to_cpu(nd_label->efi.flags);
+}
+
+static inline void nsl_set_flags(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label, u32 flags)
+{
+ if (ndd->cxl)
+ nd_label->cxl.flags = __cpu_to_le32(flags);
+ else
+ nd_label->efi.flags = __cpu_to_le32(flags);
+}
+
+static inline u64 nsl_get_dpa(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return __le64_to_cpu(nd_label->cxl.dpa);
+ return __le64_to_cpu(nd_label->efi.dpa);
+}
+
+static inline void nsl_set_dpa(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label, u64 dpa)
+{
+ if (ndd->cxl)
+ nd_label->cxl.dpa = __cpu_to_le64(dpa);
+ else
+ nd_label->efi.dpa = __cpu_to_le64(dpa);
+}
+
+static inline u64 nsl_get_rawsize(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return __le64_to_cpu(nd_label->cxl.rawsize);
+ return __le64_to_cpu(nd_label->efi.rawsize);
+}
+
+static inline void nsl_set_rawsize(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ u64 rawsize)
+{
+ if (ndd->cxl)
+ nd_label->cxl.rawsize = __cpu_to_le64(rawsize);
+ else
+ nd_label->efi.rawsize = __cpu_to_le64(rawsize);
+}
+
+static inline u64 nsl_get_isetcookie(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ /* WARN future refactor attempts that break this assumption */
+ if (dev_WARN_ONCE(ndd->dev, ndd->cxl,
+ "CXL labels do not use the isetcookie concept\n"))
+ return 0;
+ return __le64_to_cpu(nd_label->efi.isetcookie);
+}
+
+static inline void nsl_set_isetcookie(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ u64 isetcookie)
+{
+ if (!ndd->cxl)
+ nd_label->efi.isetcookie = __cpu_to_le64(isetcookie);
+}
+
+static inline bool nsl_validate_isetcookie(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ u64 cookie)
+{
+ /*
+ * Let the EFI and CXL validation comingle, where fields that
+ * don't matter to CXL always validate.
+ */
+ if (ndd->cxl)
+ return true;
+ return cookie == __le64_to_cpu(nd_label->efi.isetcookie);
+}
+
+static inline u16 nsl_get_position(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return __le16_to_cpu(nd_label->cxl.position);
+ return __le16_to_cpu(nd_label->efi.position);
+}
+
+static inline void nsl_set_position(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ u16 position)
+{
+ if (ndd->cxl)
+ nd_label->cxl.position = __cpu_to_le16(position);
+ else
+ nd_label->efi.position = __cpu_to_le16(position);
+}
+
+static inline u16 nsl_get_nlabel(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return 0;
+ return __le16_to_cpu(nd_label->efi.nlabel);
+}
+
+static inline void nsl_set_nlabel(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ u16 nlabel)
+{
+ if (!ndd->cxl)
+ nd_label->efi.nlabel = __cpu_to_le16(nlabel);
+}
+
+static inline u16 nsl_get_nrange(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return __le16_to_cpu(nd_label->cxl.nrange);
+ return 1;
+}
+
+static inline void nsl_set_nrange(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ u16 nrange)
+{
+ if (ndd->cxl)
+ nd_label->cxl.nrange = __cpu_to_le16(nrange);
+}
+
+static inline u64 nsl_get_lbasize(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ /*
+ * Yes, for some reason the EFI labels convey a massive 64-bit
+ * lbasize, that got fixed for CXL.
+ */
+ if (ndd->cxl)
+ return __le16_to_cpu(nd_label->cxl.lbasize);
+ return __le64_to_cpu(nd_label->efi.lbasize);
+}
+
+static inline void nsl_set_lbasize(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ u64 lbasize)
+{
+ if (ndd->cxl)
+ nd_label->cxl.lbasize = __cpu_to_le16(lbasize);
+ else
+ nd_label->efi.lbasize = __cpu_to_le64(lbasize);
+}
+
+static inline const uuid_t *nsl_get_uuid(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ uuid_t *uuid)
+{
+ if (ndd->cxl)
+ import_uuid(uuid, nd_label->cxl.uuid);
+ else
+ import_uuid(uuid, nd_label->efi.uuid);
+ return uuid;
+}
+
+static inline const uuid_t *nsl_set_uuid(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ const uuid_t *uuid)
+{
+ if (ndd->cxl)
+ export_uuid(nd_label->cxl.uuid, uuid);
+ else
+ export_uuid(nd_label->efi.uuid, uuid);
+ return uuid;
+}
+
+static inline bool nsl_uuid_equal(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label,
+ const uuid_t *uuid)
+{
+ uuid_t tmp;
+
+ if (ndd->cxl)
+ import_uuid(&tmp, nd_label->cxl.uuid);
+ else
+ import_uuid(&tmp, nd_label->efi.uuid);
+ return uuid_equal(&tmp, uuid);
+}
+
+static inline const u8 *nsl_uuid_raw(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return nd_label->cxl.uuid;
+ return nd_label->efi.uuid;
+}
+
+bool nsl_validate_type_guid(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label, guid_t *guid);
+enum nvdimm_claim_class nsl_get_claim_class(struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label);
+
struct nd_region_data {
int ns_count;
int ns_active;
unsigned int hints_shift;
- void __iomem *flush_wpq[0];
+ void __iomem *flush_wpq[];
};
static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd,
@@ -83,8 +348,8 @@ static inline struct nd_namespace_index *to_next_namespace_index(
unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd);
-#define namespace_label_has(ndd, field) \
- (offsetof(struct nd_namespace_label, field) \
+#define efi_namespace_label_has(ndd, field) \
+ (!ndd->cxl && offsetof(struct nvdimm_efi_label, field) \
< sizeof_namespace_label(ndd))
#define nd_dbg_dpa(r, d, res, fmt, arg...) \
@@ -146,6 +411,7 @@ struct nd_region {
struct device *btt_seed;
struct device *pfn_seed;
struct device *dax_seed;
+ unsigned long align;
u16 ndr_mappings;
u64 ndr_size;
u64 ndr_start;
@@ -156,16 +422,17 @@ struct nd_region {
struct nd_interleave_set *nd_set;
struct nd_percpu_lane __percpu *lane;
int (*flush)(struct nd_region *nd_region, struct bio *bio);
- struct nd_mapping mapping[0];
+ struct nd_mapping mapping[];
};
-struct nd_blk_region {
- int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
- int (*do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
- void *iobuf, u64 len, int rw);
- void *blk_provider_data;
- struct nd_region nd_region;
-};
+static inline bool nsl_validate_nlabel(struct nd_region *nd_region,
+ struct nvdimm_drvdata *ndd,
+ struct nd_namespace_label *nd_label)
+{
+ if (ndd->cxl)
+ return true;
+ return nsl_get_nlabel(ndd, nd_label) == nd_region->ndr_mappings;
+}
/*
* Lookup next in the repeating sequence of 01, 10, and 11.
@@ -184,7 +451,7 @@ struct nd_btt {
struct btt *btt;
unsigned long lbasize;
u64 size;
- u8 *uuid;
+ uuid_t *uuid;
int id;
int initial_offset;
u16 version_major;
@@ -199,7 +466,7 @@ enum nd_pfn_mode {
struct nd_pfn {
int id;
- u8 *uuid;
+ uuid_t *uuid;
struct device dev;
unsigned long align;
unsigned long npfns;
@@ -227,7 +494,7 @@ void wait_nvdimm_bus_probe_idle(struct device *dev);
void nd_device_register(struct device *dev);
void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
void nd_device_notify(struct device *dev, enum nvdimm_event event);
-int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+int nd_uuid_store(struct device *dev, uuid_t **uuid_out, const char *buf,
size_t len);
ssize_t nd_size_select_show(unsigned long current_size,
const unsigned long *supported, char *buf);
@@ -252,7 +519,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
void *buf, size_t len);
long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
unsigned int len);
-void nvdimm_set_aliasing(struct device *dev);
+void nvdimm_set_labeling(struct device *dev);
void nvdimm_set_locked(struct device *dev);
void nvdimm_clear_locked(struct device *dev);
int nvdimm_security_setup_events(struct device *dev);
@@ -360,7 +627,7 @@ u64 nd_region_interleave_set_altcookie(struct nd_region *nd_region);
void nvdimm_bus_lock(struct device *dev);
void nvdimm_bus_unlock(struct device *dev);
bool is_nvdimm_bus_locked(struct device *dev);
-int nvdimm_revalidate_disk(struct gendisk *disk);
+void nvdimm_check_and_set_ro(struct gendisk *disk);
void nvdimm_drvdata_release(struct kref *kref);
void put_ndd(struct nvdimm_drvdata *ndd);
int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
@@ -376,15 +643,16 @@ int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt);
const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
char *name);
unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
+struct range;
void nvdimm_badblocks_populate(struct nd_region *nd_region,
- struct badblocks *bb, const struct resource *res);
+ struct badblocks *bb, const struct range *range);
int devm_namespace_enable(struct device *dev, struct nd_namespace_common *ndns,
resource_size_t size);
void devm_namespace_disable(struct device *dev,
struct nd_namespace_common *ndns);
#if IS_ENABLED(CONFIG_ND_CLAIM)
/* max struct page size independent of kernel config */
-#define MAX_STRUCT_PAGE_SIZE 64
+#define MAX_STRUCT_PAGE_SIZE 128
int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
#else
static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
@@ -393,27 +661,7 @@ static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
return -ENXIO;
}
#endif
-int nd_blk_region_init(struct nd_region *nd_region);
int nd_region_activate(struct nd_region *nd_region);
-void __nd_iostat_start(struct bio *bio, unsigned long *start);
-static inline bool nd_iostat_start(struct bio *bio, unsigned long *start)
-{
- struct gendisk *disk = bio->bi_disk;
-
- if (!blk_queue_io_stat(disk->queue))
- return false;
-
- *start = jiffies;
- generic_start_io_acct(disk->queue, bio_op(bio), bio_sectors(bio),
- &disk->part0);
- return true;
-}
-static inline void nd_iostat_end(struct bio *bio, unsigned long start)
-{
- struct gendisk *disk = bio->bi_disk;
-
- generic_end_io_acct(disk->queue, bio_op(bio), &disk->part0, start);
-}
static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
unsigned int len)
{
@@ -427,7 +675,6 @@ static inline bool is_bad_pmem(struct badblocks *bb, sector_t sector,
return false;
}
-resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk);
-const u8 *nd_dev_to_uuid(struct device *dev);
+const uuid_t *nd_dev_to_uuid(struct device *dev);
bool pmem_should_map_pages(struct device *dev);
#endif /* __ND_H__ */
diff --git a/drivers/nvdimm/nd_perf.c b/drivers/nvdimm/nd_perf.c
new file mode 100644
index 000000000000..433bbb68ae64
--- /dev/null
+++ b/drivers/nvdimm/nd_perf.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * nd_perf.c: NVDIMM Device Performance Monitoring Unit support
+ *
+ * Perf interface to expose nvdimm performance stats.
+ *
+ * Copyright (C) 2021 IBM Corporation
+ */
+
+#define pr_fmt(fmt) "nvdimm_pmu: " fmt
+
+#include <linux/nd.h>
+#include <linux/platform_device.h>
+
+#define EVENT(_name, _code) enum{_name = _code}
+
+/*
+ * NVDIMM Events codes.
+ */
+
+/* Controller Reset Count */
+EVENT(CTL_RES_CNT, 0x1);
+/* Controller Reset Elapsed Time */
+EVENT(CTL_RES_TM, 0x2);
+/* Power-on Seconds */
+EVENT(POWERON_SECS, 0x3);
+/* Life Remaining */
+EVENT(MEM_LIFE, 0x4);
+/* Critical Resource Utilization */
+EVENT(CRI_RES_UTIL, 0x5);
+/* Host Load Count */
+EVENT(HOST_L_CNT, 0x6);
+/* Host Store Count */
+EVENT(HOST_S_CNT, 0x7);
+/* Host Store Duration */
+EVENT(HOST_S_DUR, 0x8);
+/* Host Load Duration */
+EVENT(HOST_L_DUR, 0x9);
+/* Media Read Count */
+EVENT(MED_R_CNT, 0xa);
+/* Media Write Count */
+EVENT(MED_W_CNT, 0xb);
+/* Media Read Duration */
+EVENT(MED_R_DUR, 0xc);
+/* Media Write Duration */
+EVENT(MED_W_DUR, 0xd);
+/* Cache Read Hit Count */
+EVENT(CACHE_RH_CNT, 0xe);
+/* Cache Write Hit Count */
+EVENT(CACHE_WH_CNT, 0xf);
+/* Fast Write Count */
+EVENT(FAST_W_CNT, 0x10);
+
+NVDIMM_EVENT_ATTR(ctl_res_cnt, CTL_RES_CNT);
+NVDIMM_EVENT_ATTR(ctl_res_tm, CTL_RES_TM);
+NVDIMM_EVENT_ATTR(poweron_secs, POWERON_SECS);
+NVDIMM_EVENT_ATTR(mem_life, MEM_LIFE);
+NVDIMM_EVENT_ATTR(cri_res_util, CRI_RES_UTIL);
+NVDIMM_EVENT_ATTR(host_l_cnt, HOST_L_CNT);
+NVDIMM_EVENT_ATTR(host_s_cnt, HOST_S_CNT);
+NVDIMM_EVENT_ATTR(host_s_dur, HOST_S_DUR);
+NVDIMM_EVENT_ATTR(host_l_dur, HOST_L_DUR);
+NVDIMM_EVENT_ATTR(med_r_cnt, MED_R_CNT);
+NVDIMM_EVENT_ATTR(med_w_cnt, MED_W_CNT);
+NVDIMM_EVENT_ATTR(med_r_dur, MED_R_DUR);
+NVDIMM_EVENT_ATTR(med_w_dur, MED_W_DUR);
+NVDIMM_EVENT_ATTR(cache_rh_cnt, CACHE_RH_CNT);
+NVDIMM_EVENT_ATTR(cache_wh_cnt, CACHE_WH_CNT);
+NVDIMM_EVENT_ATTR(fast_w_cnt, FAST_W_CNT);
+
+static struct attribute *nvdimm_events_attr[] = {
+ NVDIMM_EVENT_PTR(CTL_RES_CNT),
+ NVDIMM_EVENT_PTR(CTL_RES_TM),
+ NVDIMM_EVENT_PTR(POWERON_SECS),
+ NVDIMM_EVENT_PTR(MEM_LIFE),
+ NVDIMM_EVENT_PTR(CRI_RES_UTIL),
+ NVDIMM_EVENT_PTR(HOST_L_CNT),
+ NVDIMM_EVENT_PTR(HOST_S_CNT),
+ NVDIMM_EVENT_PTR(HOST_S_DUR),
+ NVDIMM_EVENT_PTR(HOST_L_DUR),
+ NVDIMM_EVENT_PTR(MED_R_CNT),
+ NVDIMM_EVENT_PTR(MED_W_CNT),
+ NVDIMM_EVENT_PTR(MED_R_DUR),
+ NVDIMM_EVENT_PTR(MED_W_DUR),
+ NVDIMM_EVENT_PTR(CACHE_RH_CNT),
+ NVDIMM_EVENT_PTR(CACHE_WH_CNT),
+ NVDIMM_EVENT_PTR(FAST_W_CNT),
+ NULL
+};
+
+static struct attribute_group nvdimm_pmu_events_group = {
+ .name = "events",
+ .attrs = nvdimm_events_attr,
+};
+
+PMU_FORMAT_ATTR(event, "config:0-4");
+
+static struct attribute *nvdimm_pmu_format_attr[] = {
+ &format_attr_event.attr,
+ NULL,
+};
+
+static struct attribute_group nvdimm_pmu_format_group = {
+ .name = "format",
+ .attrs = nvdimm_pmu_format_attr,
+};
+
+ssize_t nvdimm_events_sysfs_show(struct device *dev,
+ struct device_attribute *attr, char *page)
+{
+ struct perf_pmu_events_attr *pmu_attr;
+
+ pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
+
+ return sprintf(page, "event=0x%02llx\n", pmu_attr->id);
+}
+
+static ssize_t nvdimm_pmu_cpumask_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct pmu *pmu = dev_get_drvdata(dev);
+ struct nvdimm_pmu *nd_pmu;
+
+ nd_pmu = container_of(pmu, struct nvdimm_pmu, pmu);
+
+ return cpumap_print_to_pagebuf(true, buf, cpumask_of(nd_pmu->cpu));
+}
+
+static int nvdimm_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+ struct nvdimm_pmu *nd_pmu;
+ u32 target;
+ int nodeid;
+ const struct cpumask *cpumask;
+
+ nd_pmu = hlist_entry_safe(node, struct nvdimm_pmu, node);
+
+ /* Clear it, incase given cpu is set in nd_pmu->arch_cpumask */
+ cpumask_test_and_clear_cpu(cpu, &nd_pmu->arch_cpumask);
+
+ /*
+ * If given cpu is not same as current designated cpu for
+ * counter access, just return.
+ */
+ if (cpu != nd_pmu->cpu)
+ return 0;
+
+ /* Check for any active cpu in nd_pmu->arch_cpumask */
+ target = cpumask_any(&nd_pmu->arch_cpumask);
+
+ /*
+ * Incase we don't have any active cpu in nd_pmu->arch_cpumask,
+ * check in given cpu's numa node list.
+ */
+ if (target >= nr_cpu_ids) {
+ nodeid = cpu_to_node(cpu);
+ cpumask = cpumask_of_node(nodeid);
+ target = cpumask_any_but(cpumask, cpu);
+ }
+ nd_pmu->cpu = target;
+
+ /* Migrate nvdimm pmu events to the new target cpu if valid */
+ if (target >= 0 && target < nr_cpu_ids)
+ perf_pmu_migrate_context(&nd_pmu->pmu, cpu, target);
+
+ return 0;
+}
+
+static int nvdimm_pmu_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct nvdimm_pmu *nd_pmu;
+
+ nd_pmu = hlist_entry_safe(node, struct nvdimm_pmu, node);
+
+ if (nd_pmu->cpu >= nr_cpu_ids)
+ nd_pmu->cpu = cpu;
+
+ return 0;
+}
+
+static int create_cpumask_attr_group(struct nvdimm_pmu *nd_pmu)
+{
+ struct perf_pmu_events_attr *pmu_events_attr;
+ struct attribute **attrs_group;
+ struct attribute_group *nvdimm_pmu_cpumask_group;
+
+ pmu_events_attr = kzalloc(sizeof(*pmu_events_attr), GFP_KERNEL);
+ if (!pmu_events_attr)
+ return -ENOMEM;
+
+ attrs_group = kzalloc(2 * sizeof(struct attribute *), GFP_KERNEL);
+ if (!attrs_group) {
+ kfree(pmu_events_attr);
+ return -ENOMEM;
+ }
+
+ /* Allocate memory for cpumask attribute group */
+ nvdimm_pmu_cpumask_group = kzalloc(sizeof(*nvdimm_pmu_cpumask_group), GFP_KERNEL);
+ if (!nvdimm_pmu_cpumask_group) {
+ kfree(pmu_events_attr);
+ kfree(attrs_group);
+ return -ENOMEM;
+ }
+
+ sysfs_attr_init(&pmu_events_attr->attr.attr);
+ pmu_events_attr->attr.attr.name = "cpumask";
+ pmu_events_attr->attr.attr.mode = 0444;
+ pmu_events_attr->attr.show = nvdimm_pmu_cpumask_show;
+ attrs_group[0] = &pmu_events_attr->attr.attr;
+ attrs_group[1] = NULL;
+
+ nvdimm_pmu_cpumask_group->attrs = attrs_group;
+ nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR] = nvdimm_pmu_cpumask_group;
+ return 0;
+}
+
+static int nvdimm_pmu_cpu_hotplug_init(struct nvdimm_pmu *nd_pmu)
+{
+ int nodeid, rc;
+ const struct cpumask *cpumask;
+
+ /*
+ * Incase of cpu hotplug feature, arch specific code
+ * can provide required cpumask which can be used
+ * to get designatd cpu for counter access.
+ * Check for any active cpu in nd_pmu->arch_cpumask.
+ */
+ if (!cpumask_empty(&nd_pmu->arch_cpumask)) {
+ nd_pmu->cpu = cpumask_any(&nd_pmu->arch_cpumask);
+ } else {
+ /* pick active cpu from the cpumask of device numa node. */
+ nodeid = dev_to_node(nd_pmu->dev);
+ cpumask = cpumask_of_node(nodeid);
+ nd_pmu->cpu = cpumask_any(cpumask);
+ }
+
+ rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "perf/nvdimm:online",
+ nvdimm_pmu_cpu_online, nvdimm_pmu_cpu_offline);
+
+ if (rc < 0)
+ return rc;
+
+ nd_pmu->cpuhp_state = rc;
+
+ /* Register the pmu instance for cpu hotplug */
+ rc = cpuhp_state_add_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
+ if (rc) {
+ cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
+ return rc;
+ }
+
+ /* Create cpumask attribute group */
+ rc = create_cpumask_attr_group(nd_pmu);
+ if (rc) {
+ cpuhp_state_remove_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
+ cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
+ return rc;
+ }
+
+ return 0;
+}
+
+static void nvdimm_pmu_free_hotplug_memory(struct nvdimm_pmu *nd_pmu)
+{
+ cpuhp_state_remove_instance_nocalls(nd_pmu->cpuhp_state, &nd_pmu->node);
+ cpuhp_remove_multi_state(nd_pmu->cpuhp_state);
+
+ if (nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR])
+ kfree(nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR]->attrs);
+ kfree(nd_pmu->pmu.attr_groups[NVDIMM_PMU_CPUMASK_ATTR]);
+}
+
+int register_nvdimm_pmu(struct nvdimm_pmu *nd_pmu, struct platform_device *pdev)
+{
+ int rc;
+
+ if (!nd_pmu || !pdev)
+ return -EINVAL;
+
+ /* event functions like add/del/read/event_init and pmu name should not be NULL */
+ if (WARN_ON_ONCE(!(nd_pmu->pmu.event_init && nd_pmu->pmu.add &&
+ nd_pmu->pmu.del && nd_pmu->pmu.read && nd_pmu->pmu.name)))
+ return -EINVAL;
+
+ nd_pmu->pmu.attr_groups = kzalloc((NVDIMM_PMU_NULL_ATTR + 1) *
+ sizeof(struct attribute_group *), GFP_KERNEL);
+ if (!nd_pmu->pmu.attr_groups)
+ return -ENOMEM;
+
+ /*
+ * Add platform_device->dev pointer to nvdimm_pmu to access
+ * device data in events functions.
+ */
+ nd_pmu->dev = &pdev->dev;
+
+ /* Fill attribute groups for the nvdimm pmu device */
+ nd_pmu->pmu.attr_groups[NVDIMM_PMU_FORMAT_ATTR] = &nvdimm_pmu_format_group;
+ nd_pmu->pmu.attr_groups[NVDIMM_PMU_EVENT_ATTR] = &nvdimm_pmu_events_group;
+ nd_pmu->pmu.attr_groups[NVDIMM_PMU_NULL_ATTR] = NULL;
+
+ /* Fill attribute group for cpumask */
+ rc = nvdimm_pmu_cpu_hotplug_init(nd_pmu);
+ if (rc) {
+ pr_info("cpu hotplug feature failed for device: %s\n", nd_pmu->pmu.name);
+ kfree(nd_pmu->pmu.attr_groups);
+ return rc;
+ }
+
+ rc = perf_pmu_register(&nd_pmu->pmu, nd_pmu->pmu.name, -1);
+ if (rc) {
+ kfree(nd_pmu->pmu.attr_groups);
+ nvdimm_pmu_free_hotplug_memory(nd_pmu);
+ return rc;
+ }
+
+ pr_info("%s NVDIMM performance monitor support registered\n",
+ nd_pmu->pmu.name);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(register_nvdimm_pmu);
+
+void unregister_nvdimm_pmu(struct nvdimm_pmu *nd_pmu)
+{
+ perf_pmu_unregister(&nd_pmu->pmu);
+ nvdimm_pmu_free_hotplug_memory(nd_pmu);
+ kfree(nd_pmu);
+}
+EXPORT_SYMBOL_GPL(unregister_nvdimm_pmu);
diff --git a/drivers/nvdimm/nd_virtio.c b/drivers/nvdimm/nd_virtio.c
index 10351d5b49fa..c6a648fd8744 100644
--- a/drivers/nvdimm/nd_virtio.c
+++ b/drivers/nvdimm/nd_virtio.c
@@ -105,12 +105,12 @@ int async_pmem_flush(struct nd_region *nd_region, struct bio *bio)
* parent bio. Otherwise directly call nd_region flush.
*/
if (bio && bio->bi_iter.bi_sector != -1) {
- struct bio *child = bio_alloc(GFP_ATOMIC, 0);
+ struct bio *child = bio_alloc(bio->bi_bdev, 0, REQ_PREFLUSH,
+ GFP_ATOMIC);
if (!child)
return -ENOMEM;
- bio_copy_dev(child, bio);
- child->bi_opf = REQ_PREFLUSH;
+ bio_clone_blkg_association(child, bio);
child->bi_iter.bi_sector = -1;
bio_chain(child, bio);
submit_bio(child);
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index 8224d1431ea9..10dbdcdfb9ce 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -62,8 +62,10 @@ static int of_pmem_region_probe(struct platform_device *pdev)
if (is_volatile)
region = nvdimm_volatile_region_create(bus, &ndr_desc);
- else
+ else {
+ set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);
region = nvdimm_pmem_region_create(bus, &ndr_desc);
+ }
if (!region)
dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
@@ -88,6 +90,7 @@ static int of_pmem_region_remove(struct platform_device *pdev)
static const struct of_device_id of_pmem_region_match[] = {
{ .compatible = "pmem-region" },
+ { .compatible = "pmem-region-v2" },
{ },
};
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index acb19517f678..37cb1b8a2a39 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -24,6 +24,18 @@ struct nd_pfn_sb {
__le64 npfns;
__le32 mode;
/* minor-version-1 additions for section alignment */
+ /**
+ * @start_pad: Deprecated attribute to pad start-misaligned namespaces
+ *
+ * start_pad is deprecated because the original definition did
+ * not comprehend that dataoff is relative to the base address
+ * of the namespace not the start_pad adjusted base. The result
+ * is that the dax path is broken, but the block-I/O path is
+ * not. The kernel will no longer create namespaces using start
+ * padding, but it still supports block-I/O for legacy
+ * configurations mainly to allow a backup, reconfigure the
+ * namespace, and restore flow to repair dax operation.
+ */
__le32 start_pad;
__le32 end_trunc;
/* minor-version-2 record the base alignment of the mapping */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index b94f7a7e94b8..61af072ac98f 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -5,7 +5,6 @@
#include <linux/memremap.h>
#include <linux/blkdev.h>
#include <linux/device.h>
-#include <linux/genhd.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/fs.h>
@@ -56,7 +55,7 @@ static ssize_t mode_store(struct device *dev,
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc = 0;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
if (dev->driver)
rc = -EBUSY;
@@ -78,7 +77,7 @@ static ssize_t mode_store(struct device *dev,
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc ? rc : len;
}
@@ -124,14 +123,14 @@ static ssize_t align_store(struct device *dev,
unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, };
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_size_select_store(dev, buf, &nd_pfn->align,
nd_pfn_supported_alignments(aligns));
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc ? rc : len;
}
@@ -153,11 +152,11 @@ static ssize_t uuid_store(struct device *dev,
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc ? rc : len;
}
@@ -182,13 +181,13 @@ static ssize_t namespace_store(struct device *dev,
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc;
}
@@ -200,7 +199,7 @@ static ssize_t resource_show(struct device *dev,
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
if (dev->driver) {
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = __le64_to_cpu(pfn_sb->dataoff);
@@ -214,11 +213,11 @@ static ssize_t resource_show(struct device *dev,
/* no address to convey if the pfn instance is disabled */
rc = -ENXIO;
}
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc;
}
-static DEVICE_ATTR(resource, 0400, resource_show, NULL);
+static DEVICE_ATTR_ADMIN_RO(resource);
static ssize_t size_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -226,7 +225,7 @@ static ssize_t size_show(struct device *dev,
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
if (dev->driver) {
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = __le64_to_cpu(pfn_sb->dataoff);
@@ -242,7 +241,7 @@ static ssize_t size_show(struct device *dev,
/* no size to convey if the pfn instance is disabled */
rc = -ENXIO;
}
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc;
}
@@ -292,6 +291,8 @@ bool is_nd_pfn(struct device *dev)
}
EXPORT_SYMBOL(is_nd_pfn);
+static struct lock_class_key nvdimm_pfn_key;
+
struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
struct nd_namespace_common *ndns)
{
@@ -304,6 +305,7 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
nd_pfn->align = nd_pfn_default_alignment();
dev = &nd_pfn->dev;
device_initialize(&nd_pfn->dev);
+ lockdep_set_class(&nd_pfn->dev.mutex, &nvdimm_pfn_key);
if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
dev_name(ndns->claim));
@@ -347,7 +349,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region)
nd_pfn = nd_pfn_alloc(nd_region);
dev = nd_pfn_devinit(nd_pfn, NULL);
- __nd_device_register(dev);
+ nd_device_register(dev);
return dev;
}
@@ -446,12 +448,13 @@ static bool nd_supported_alignment(unsigned long align)
int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
{
u64 checksum, offset;
+ struct resource *res;
enum nd_pfn_mode mode;
struct nd_namespace_io *nsio;
unsigned long align, start_pad;
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
struct nd_namespace_common *ndns = nd_pfn->ndns;
- const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev);
+ const uuid_t *parent_uuid = nd_dev_to_uuid(&ndns->dev);
if (!pfn_sb || !ndns)
return -ENODEV;
@@ -561,14 +564,14 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
nd_pfn->align, align, nd_pfn->mode,
mode);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
}
if (align > nvdimm_namespace_capacity(ndns)) {
dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
align, nvdimm_namespace_capacity(ndns));
- return -EINVAL;
+ return -EOPNOTSUPP;
}
/*
@@ -578,18 +581,31 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
* established.
*/
nsio = to_nd_namespace_io(&ndns->dev);
- if (offset >= resource_size(&nsio->res)) {
+ res = &nsio->res;
+ if (offset >= resource_size(res)) {
dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
dev_name(&ndns->dev));
- return -EBUSY;
+ return -EOPNOTSUPP;
}
- if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
+ if ((align && !IS_ALIGNED(res->start + offset + start_pad, align))
|| !IS_ALIGNED(offset, PAGE_SIZE)) {
dev_err(&nd_pfn->dev,
"bad offset: %#llx dax disabled align: %#lx\n",
offset, align);
- return -ENXIO;
+ return -EOPNOTSUPP;
+ }
+
+ if (!IS_ALIGNED(res->start + le32_to_cpu(pfn_sb->start_pad),
+ memremap_compat_align())) {
+ dev_err(&nd_pfn->dev, "resource start misaligned\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (!IS_ALIGNED(res->end + 1 - le32_to_cpu(pfn_sb->end_trunc),
+ memremap_compat_align())) {
+ dev_err(&nd_pfn->dev, "resource end misaligned\n");
+ return -EOPNOTSUPP;
}
return 0;
@@ -630,7 +646,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
nd_detach_ndns(pfn_dev, &nd_pfn->ndns);
put_device(pfn_dev);
} else
- __nd_device_register(pfn_dev);
+ nd_device_register(pfn_dev);
return rc;
}
@@ -658,7 +674,7 @@ static unsigned long init_altmap_reserve(resource_size_t base)
static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
{
- struct resource *res = &pgmap->res;
+ struct range *range = &pgmap->range;
struct vmem_altmap *altmap = &pgmap->altmap;
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = le64_to_cpu(pfn_sb->dataoff);
@@ -675,16 +691,17 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
.end_pfn = PHYS_PFN(end),
};
- memcpy(res, &nsio->res, sizeof(*res));
- res->start += start_pad;
- res->end -= end_trunc;
-
+ *range = (struct range) {
+ .start = nsio->res.start + start_pad,
+ .end = nsio->res.end - end_trunc,
+ };
+ pgmap->nr_range = 1;
if (nd_pfn->mode == PFN_MODE_RAM) {
if (offset < reserve)
return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
} else if (nd_pfn->mode == PFN_MODE_PMEM) {
- nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset));
+ nd_pfn->npfns = PHYS_PFN((range_len(range) - offset));
if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
dev_info(&nd_pfn->dev,
"number of pfns truncated from %lld to %ld\n",
@@ -750,7 +767,19 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
start = nsio->res.start;
size = resource_size(&nsio->res);
npfns = PHYS_PFN(size - SZ_8K);
- align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT));
+ align = max(nd_pfn->align, memremap_compat_align());
+
+ /*
+ * When @start is misaligned fail namespace creation. See
+ * the 'struct nd_pfn_sb' commentary on why ->start_pad is not
+ * an option.
+ */
+ if (!IS_ALIGNED(start, memremap_compat_align())) {
+ dev_err(&nd_pfn->dev, "%s: start %pa misaligned to %#lx\n",
+ dev_name(&ndns->dev), &start,
+ memremap_compat_align());
+ return -EINVAL;
+ }
end_trunc = start + size - ALIGN_DOWN(start + size, align);
if (nd_pfn->mode == PFN_MODE_PMEM) {
/*
@@ -758,7 +787,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
* when populating the vmemmap. This *should* be equal to
* PMD_SIZE for most architectures.
*
- * Also make sure size of struct page is less than 64. We
+ * Also make sure size of struct page is less than 128. We
* want to make sure we use large enough size here so that
* we don't have a dynamic reserve space depending on
* struct page size. But we also want to make sure we notice
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 4eae441f86c9..96e6e9a5f235 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -7,8 +7,8 @@
* Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
*/
-#include <asm/cacheflush.h>
#include <linux/blkdev.h>
+#include <linux/pagemap.h>
#include <linux/hdreg.h>
#include <linux/init.h>
#include <linux/platform_device.h>
@@ -24,8 +24,10 @@
#include <linux/uio.h>
#include <linux/dax.h>
#include <linux/nd.h>
-#include <linux/backing-dev.h>
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
#include "pmem.h"
+#include "btt.h"
#include "pfn.h"
#include "nd.h"
@@ -43,9 +45,25 @@ static struct nd_region *to_region(struct pmem_device *pmem)
return to_nd_region(to_dev(pmem)->parent);
}
-static void hwpoison_clear(struct pmem_device *pmem,
- phys_addr_t phys, unsigned int len)
+static phys_addr_t pmem_to_phys(struct pmem_device *pmem, phys_addr_t offset)
+{
+ return pmem->phys_addr + offset;
+}
+
+static sector_t to_sect(struct pmem_device *pmem, phys_addr_t offset)
+{
+ return (offset - pmem->data_offset) >> SECTOR_SHIFT;
+}
+
+static phys_addr_t to_offset(struct pmem_device *pmem, sector_t sector)
+{
+ return (sector << SECTOR_SHIFT) + pmem->data_offset;
+}
+
+static void pmem_mkpage_present(struct pmem_device *pmem, phys_addr_t offset,
+ unsigned int len)
{
+ phys_addr_t phys = pmem_to_phys(pmem, offset);
unsigned long pfn_start, pfn_end, pfn;
/* only pmem in the linear map supports HWPoison */
@@ -67,33 +85,40 @@ static void hwpoison_clear(struct pmem_device *pmem,
}
}
-static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
- phys_addr_t offset, unsigned int len)
+static void pmem_clear_bb(struct pmem_device *pmem, sector_t sector, long blks)
{
- struct device *dev = to_dev(pmem);
- sector_t sector;
- long cleared;
- blk_status_t rc = BLK_STS_OK;
+ if (blks == 0)
+ return;
+ badblocks_clear(&pmem->bb, sector, blks);
+ if (pmem->bb_state)
+ sysfs_notify_dirent(pmem->bb_state);
+}
- sector = (offset - pmem->data_offset) / 512;
+static long __pmem_clear_poison(struct pmem_device *pmem,
+ phys_addr_t offset, unsigned int len)
+{
+ phys_addr_t phys = pmem_to_phys(pmem, offset);
+ long cleared = nvdimm_clear_poison(to_dev(pmem), phys, len);
- cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
- if (cleared < len)
- rc = BLK_STS_IOERR;
- if (cleared > 0 && cleared / 512) {
- hwpoison_clear(pmem, pmem->phys_addr + offset, cleared);
- cleared /= 512;
- dev_dbg(dev, "%#llx clear %ld sector%s\n",
- (unsigned long long) sector, cleared,
- cleared > 1 ? "s" : "");
- badblocks_clear(&pmem->bb, sector, cleared);
- if (pmem->bb_state)
- sysfs_notify_dirent(pmem->bb_state);
+ if (cleared > 0) {
+ pmem_mkpage_present(pmem, offset, cleared);
+ arch_invalidate_pmem(pmem->virt_addr + offset, len);
}
+ return cleared;
+}
- arch_invalidate_pmem(pmem->virt_addr + offset, len);
+static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
+ phys_addr_t offset, unsigned int len)
+{
+ long cleared = __pmem_clear_poison(pmem, offset, len);
- return rc;
+ if (cleared < 0)
+ return BLK_STS_IOERR;
+
+ pmem_clear_bb(pmem, to_sect(pmem, offset), cleared >> SECTOR_SHIFT);
+ if (cleared < len)
+ return BLK_STS_IOERR;
+ return BLK_STS_OK;
}
static void write_pmem(void *pmem_addr, struct page *page,
@@ -124,7 +149,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
while (len) {
mem = kmap_atomic(page);
chunk = min_t(unsigned int, len, PAGE_SIZE - off);
- rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
+ rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
kunmap_atomic(mem);
if (rem)
return BLK_STS_IOERR;
@@ -136,52 +161,43 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
return BLK_STS_OK;
}
-static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,
- unsigned int len, unsigned int off, unsigned int op,
- sector_t sector)
+static blk_status_t pmem_do_read(struct pmem_device *pmem,
+ struct page *page, unsigned int page_off,
+ sector_t sector, unsigned int len)
{
- blk_status_t rc = BLK_STS_OK;
- bool bad_pmem = false;
- phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
+ blk_status_t rc;
+ phys_addr_t pmem_off = to_offset(pmem, sector);
void *pmem_addr = pmem->virt_addr + pmem_off;
if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
- bad_pmem = true;
-
- if (!op_is_write(op)) {
- if (unlikely(bad_pmem))
- rc = BLK_STS_IOERR;
- else {
- rc = read_pmem(page, off, pmem_addr, len);
- flush_dcache_page(page);
- }
- } else {
- /*
- * Note that we write the data both before and after
- * clearing poison. The write before clear poison
- * handles situations where the latest written data is
- * preserved and the clear poison operation simply marks
- * the address range as valid without changing the data.
- * In this case application software can assume that an
- * interrupted write will either return the new good
- * data or an error.
- *
- * However, if pmem_clear_poison() leaves the data in an
- * indeterminate state we need to perform the write
- * after clear poison.
- */
- flush_dcache_page(page);
- write_pmem(pmem_addr, page, off, len);
- if (unlikely(bad_pmem)) {
- rc = pmem_clear_poison(pmem, pmem_off, len);
- write_pmem(pmem_addr, page, off, len);
- }
- }
+ return BLK_STS_IOERR;
+ rc = read_pmem(page, page_off, pmem_addr, len);
+ flush_dcache_page(page);
return rc;
}
-static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
+static blk_status_t pmem_do_write(struct pmem_device *pmem,
+ struct page *page, unsigned int page_off,
+ sector_t sector, unsigned int len)
+{
+ phys_addr_t pmem_off = to_offset(pmem, sector);
+ void *pmem_addr = pmem->virt_addr + pmem_off;
+
+ if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) {
+ blk_status_t rc = pmem_clear_poison(pmem, pmem_off, len);
+
+ if (rc != BLK_STS_OK)
+ return rc;
+ }
+
+ flush_dcache_page(page);
+ write_pmem(pmem_addr, page, page_off, len);
+
+ return BLK_STS_OK;
+}
+
+static void pmem_submit_bio(struct bio *bio)
{
int ret = 0;
blk_status_t rc = 0;
@@ -189,23 +205,29 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
unsigned long start;
struct bio_vec bvec;
struct bvec_iter iter;
- struct pmem_device *pmem = q->queuedata;
+ struct pmem_device *pmem = bio->bi_bdev->bd_disk->private_data;
struct nd_region *nd_region = to_region(pmem);
if (bio->bi_opf & REQ_PREFLUSH)
ret = nvdimm_flush(nd_region, bio);
- do_acct = nd_iostat_start(bio, &start);
+ do_acct = blk_queue_io_stat(bio->bi_bdev->bd_disk->queue);
+ if (do_acct)
+ start = bio_start_io_acct(bio);
bio_for_each_segment(bvec, bio, iter) {
- rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
- bvec.bv_offset, bio_op(bio), iter.bi_sector);
+ if (op_is_write(bio_op(bio)))
+ rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset,
+ iter.bi_sector, bvec.bv_len);
+ else
+ rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset,
+ iter.bi_sector, bvec.bv_len);
if (rc) {
bio->bi_status = rc;
break;
}
}
if (do_acct)
- nd_iostat_end(bio, start);
+ bio_end_io_acct(bio, start);
if (bio->bi_opf & REQ_FUA)
ret = nvdimm_flush(nd_region, bio);
@@ -214,18 +236,18 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
bio->bi_status = errno_to_blk_status(ret);
bio_endio(bio);
- return BLK_QC_T_NONE;
}
static int pmem_rw_page(struct block_device *bdev, sector_t sector,
- struct page *page, unsigned int op)
+ struct page *page, enum req_op op)
{
- struct pmem_device *pmem = bdev->bd_queue->queuedata;
+ struct pmem_device *pmem = bdev->bd_disk->private_data;
blk_status_t rc;
- rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE,
- 0, op, sector);
-
+ if (op_is_write(op))
+ rc = pmem_do_write(pmem, page, 0, sector, thp_size(page));
+ else
+ rc = pmem_do_read(pmem, page, 0, sector, thp_size(page));
/*
* The ->rw_page interface is subtle and tricky. The core
* retries on any error, so we can only invoke page_endio() in
@@ -240,106 +262,210 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,
/* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */
__weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
- long nr_pages, void **kaddr, pfn_t *pfn)
+ long nr_pages, enum dax_access_mode mode, void **kaddr,
+ pfn_t *pfn)
{
resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset;
-
- if (unlikely(is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) / 512,
- PFN_PHYS(nr_pages))))
- return -EIO;
+ sector_t sector = PFN_PHYS(pgoff) >> SECTOR_SHIFT;
+ unsigned int num = PFN_PHYS(nr_pages) >> SECTOR_SHIFT;
+ struct badblocks *bb = &pmem->bb;
+ sector_t first_bad;
+ int num_bad;
if (kaddr)
*kaddr = pmem->virt_addr + offset;
if (pfn)
*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+ if (bb->count &&
+ badblocks_check(bb, sector, num, &first_bad, &num_bad)) {
+ long actual_nr;
+
+ if (mode != DAX_RECOVERY_WRITE)
+ return -EIO;
+
+ /*
+ * Set the recovery stride is set to kernel page size because
+ * the underlying driver and firmware clear poison functions
+ * don't appear to handle large chunk(such as 2MiB) reliably.
+ */
+ actual_nr = PHYS_PFN(
+ PAGE_ALIGN((first_bad - sector) << SECTOR_SHIFT));
+ dev_dbg(pmem->bb.dev, "start sector(%llu), nr_pages(%ld), first_bad(%llu), actual_nr(%ld)\n",
+ sector, nr_pages, first_bad, actual_nr);
+ if (actual_nr)
+ return actual_nr;
+ return 1;
+ }
+
/*
- * If badblocks are present, limit known good range to the
- * requested range.
+ * If badblocks are present but not in the range, limit known good range
+ * to the requested range.
*/
- if (unlikely(pmem->bb.count))
+ if (bb->count)
return nr_pages;
return PHYS_PFN(pmem->size - pmem->pfn_pad - offset);
}
static const struct block_device_operations pmem_fops = {
.owner = THIS_MODULE,
+ .submit_bio = pmem_submit_bio,
.rw_page = pmem_rw_page,
- .revalidate_disk = nvdimm_revalidate_disk,
};
+static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
+ size_t nr_pages)
+{
+ struct pmem_device *pmem = dax_get_private(dax_dev);
+
+ return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0,
+ PFN_PHYS(pgoff) >> SECTOR_SHIFT,
+ PAGE_SIZE));
+}
+
static long pmem_dax_direct_access(struct dax_device *dax_dev,
- pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)
+ pgoff_t pgoff, long nr_pages, enum dax_access_mode mode,
+ void **kaddr, pfn_t *pfn)
{
struct pmem_device *pmem = dax_get_private(dax_dev);
- return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn);
+ return __pmem_direct_access(pmem, pgoff, nr_pages, mode, kaddr, pfn);
}
/*
- * Use the 'no check' versions of copy_from_iter_flushcache() and
- * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
- * checking, both file offset and device offset, is handled by
- * dax_iomap_actor()
+ * The recovery write thread started out as a normal pwrite thread and
+ * when the filesystem was told about potential media error in the
+ * range, filesystem turns the normal pwrite to a dax_recovery_write.
+ *
+ * The recovery write consists of clearing media poison, clearing page
+ * HWPoison bit, reenable page-wide read-write permission, flush the
+ * caches and finally write. A competing pread thread will be held
+ * off during the recovery process since data read back might not be
+ * valid, and this is achieved by clearing the badblock records after
+ * the recovery write is complete. Competing recovery write threads
+ * are already serialized by writer lock held by dax_iomap_rw().
*/
-static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
+static size_t pmem_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff,
void *addr, size_t bytes, struct iov_iter *i)
{
- return _copy_from_iter_flushcache(addr, bytes, i);
-}
+ struct pmem_device *pmem = dax_get_private(dax_dev);
+ size_t olen, len, off;
+ phys_addr_t pmem_off;
+ struct device *dev = pmem->bb.dev;
+ long cleared;
-static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
- void *addr, size_t bytes, struct iov_iter *i)
-{
- return _copy_to_iter_mcsafe(addr, bytes, i);
+ off = offset_in_page(addr);
+ len = PFN_PHYS(PFN_UP(off + bytes));
+ if (!is_bad_pmem(&pmem->bb, PFN_PHYS(pgoff) >> SECTOR_SHIFT, len))
+ return _copy_from_iter_flushcache(addr, bytes, i);
+
+ /*
+ * Not page-aligned range cannot be recovered. This should not
+ * happen unless something else went wrong.
+ */
+ if (off || !PAGE_ALIGNED(bytes)) {
+ dev_dbg(dev, "Found poison, but addr(%p) or bytes(%#zx) not page aligned\n",
+ addr, bytes);
+ return 0;
+ }
+
+ pmem_off = PFN_PHYS(pgoff) + pmem->data_offset;
+ cleared = __pmem_clear_poison(pmem, pmem_off, len);
+ if (cleared > 0 && cleared < len) {
+ dev_dbg(dev, "poison cleared only %ld out of %zu bytes\n",
+ cleared, len);
+ return 0;
+ }
+ if (cleared < 0) {
+ dev_dbg(dev, "poison clear failed: %ld\n", cleared);
+ return 0;
+ }
+
+ olen = _copy_from_iter_flushcache(addr, bytes, i);
+ pmem_clear_bb(pmem, to_sect(pmem, pmem_off), cleared >> SECTOR_SHIFT);
+
+ return olen;
}
static const struct dax_operations pmem_dax_ops = {
.direct_access = pmem_dax_direct_access,
- .dax_supported = generic_fsdax_supported,
- .copy_from_iter = pmem_copy_from_iter,
- .copy_to_iter = pmem_copy_to_iter,
-};
-
-static const struct attribute_group *pmem_attribute_groups[] = {
- &dax_attribute_group,
- NULL,
+ .zero_page_range = pmem_dax_zero_page_range,
+ .recovery_write = pmem_recovery_write,
};
-static void pmem_pagemap_cleanup(struct dev_pagemap *pgmap)
+static ssize_t write_cache_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- struct request_queue *q =
- container_of(pgmap->ref, struct request_queue, q_usage_counter);
+ struct pmem_device *pmem = dev_to_disk(dev)->private_data;
- blk_cleanup_queue(q);
+ return sprintf(buf, "%d\n", !!dax_write_cache_enabled(pmem->dax_dev));
}
-static void pmem_release_queue(void *pgmap)
+static ssize_t write_cache_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
{
- pmem_pagemap_cleanup(pgmap);
+ struct pmem_device *pmem = dev_to_disk(dev)->private_data;
+ bool write_cache;
+ int rc;
+
+ rc = strtobool(buf, &write_cache);
+ if (rc)
+ return rc;
+ dax_write_cache(pmem->dax_dev, write_cache);
+ return len;
}
+static DEVICE_ATTR_RW(write_cache);
-static void pmem_pagemap_kill(struct dev_pagemap *pgmap)
+static umode_t dax_visible(struct kobject *kobj, struct attribute *a, int n)
{
- struct request_queue *q =
- container_of(pgmap->ref, struct request_queue, q_usage_counter);
-
- blk_freeze_queue_start(q);
+#ifndef CONFIG_ARCH_HAS_PMEM_API
+ if (a == &dev_attr_write_cache.attr)
+ return 0;
+#endif
+ return a->mode;
}
+static struct attribute *dax_attributes[] = {
+ &dev_attr_write_cache.attr,
+ NULL,
+};
+
+static const struct attribute_group dax_attribute_group = {
+ .name = "dax",
+ .attrs = dax_attributes,
+ .is_visible = dax_visible,
+};
+
+static const struct attribute_group *pmem_attribute_groups[] = {
+ &dax_attribute_group,
+ NULL,
+};
+
static void pmem_release_disk(void *__pmem)
{
struct pmem_device *pmem = __pmem;
+ dax_remove_host(pmem->disk);
kill_dax(pmem->dax_dev);
put_dax(pmem->dax_dev);
del_gendisk(pmem->disk);
+
put_disk(pmem->disk);
}
+static int pmem_pagemap_memory_failure(struct dev_pagemap *pgmap,
+ unsigned long pfn, unsigned long nr_pages, int mf_flags)
+{
+ struct pmem_device *pmem =
+ container_of(pgmap, struct pmem_device, pgmap);
+ u64 offset = PFN_PHYS(pfn) - pmem->phys_addr - pmem->data_offset;
+ u64 len = nr_pages << PAGE_SHIFT;
+
+ return dax_holder_notify_failure(pmem->dax_dev, offset, len, mf_flags);
+}
+
static const struct dev_pagemap_ops fsdax_pagemap_ops = {
- .kill = pmem_pagemap_kill,
- .cleanup = pmem_pagemap_cleanup,
+ .memory_failure = pmem_pagemap_memory_failure,
};
static int pmem_attach_disk(struct device *dev,
@@ -349,17 +475,15 @@ static int pmem_attach_disk(struct device *dev,
struct nd_region *nd_region = to_nd_region(dev->parent);
int nid = dev_to_node(dev), fua;
struct resource *res = &nsio->res;
- struct resource bb_res;
+ struct range bb_range;
struct nd_pfn *nd_pfn = NULL;
struct dax_device *dax_dev;
struct nd_pfn_sb *pfn_sb;
struct pmem_device *pmem;
struct request_queue *q;
- struct device *gendev;
struct gendisk *disk;
void *addr;
int rc;
- unsigned long flags = 0UL;
pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
if (!pmem)
@@ -395,12 +519,14 @@ static int pmem_attach_disk(struct device *dev,
return -EBUSY;
}
- q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
- if (!q)
+ disk = blk_alloc_disk(nid);
+ if (!disk)
return -ENOMEM;
+ q = disk->queue;
+ pmem->disk = disk;
+ pmem->pgmap.owner = pmem;
pmem->pfn_flags = PFN_DEV;
- pmem->pgmap.ref = &q->q_usage_counter;
if (is_nd_pfn(dev)) {
pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
pmem->pgmap.ops = &fsdax_pagemap_ops;
@@ -408,81 +534,87 @@ static int pmem_attach_disk(struct device *dev,
pfn_sb = nd_pfn->pfn_sb;
pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
pmem->pfn_pad = resource_size(res) -
- resource_size(&pmem->pgmap.res);
+ range_len(&pmem->pgmap.range);
pmem->pfn_flags |= PFN_MAP;
- memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
- bb_res.start += pmem->data_offset;
+ bb_range = pmem->pgmap.range;
+ bb_range.start += pmem->data_offset;
} else if (pmem_should_map_pages(dev)) {
- memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
+ pmem->pgmap.range.start = res->start;
+ pmem->pgmap.range.end = res->end;
+ pmem->pgmap.nr_range = 1;
pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
pmem->pgmap.ops = &fsdax_pagemap_ops;
addr = devm_memremap_pages(dev, &pmem->pgmap);
pmem->pfn_flags |= PFN_MAP;
- memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
+ bb_range = pmem->pgmap.range;
} else {
- if (devm_add_action_or_reset(dev, pmem_release_queue,
- &pmem->pgmap))
- return -ENOMEM;
addr = devm_memremap(dev, pmem->phys_addr,
pmem->size, ARCH_MEMREMAP_PMEM);
- memcpy(&bb_res, &nsio->res, sizeof(bb_res));
+ bb_range.start = res->start;
+ bb_range.end = res->end;
}
- if (IS_ERR(addr))
- return PTR_ERR(addr);
+ if (IS_ERR(addr)) {
+ rc = PTR_ERR(addr);
+ goto out;
+ }
pmem->virt_addr = addr;
blk_queue_write_cache(q, true, fua);
- blk_queue_make_request(q, pmem_make_request);
blk_queue_physical_block_size(q, PAGE_SIZE);
blk_queue_logical_block_size(q, pmem_sector_size(ndns));
blk_queue_max_hw_sectors(q, UINT_MAX);
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
if (pmem->pfn_flags & PFN_MAP)
blk_queue_flag_set(QUEUE_FLAG_DAX, q);
- q->queuedata = pmem;
-
- disk = alloc_disk_node(0, nid);
- if (!disk)
- return -ENOMEM;
- pmem->disk = disk;
disk->fops = &pmem_fops;
- disk->queue = q;
- disk->flags = GENHD_FL_EXT_DEVT;
- disk->queue->backing_dev_info->capabilities |= BDI_CAP_SYNCHRONOUS_IO;
+ disk->private_data = pmem;
nvdimm_namespace_disk_name(ndns, disk->disk_name);
set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
/ 512);
if (devm_init_badblocks(dev, &pmem->bb))
return -ENOMEM;
- nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
+ nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range);
disk->bb = &pmem->bb;
- if (is_nvdimm_sync(nd_region))
- flags = DAXDEV_F_SYNC;
- dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags);
- if (!dax_dev) {
- put_disk(disk);
- return -ENOMEM;
+ dax_dev = alloc_dax(pmem, &pmem_dax_ops);
+ if (IS_ERR(dax_dev)) {
+ rc = PTR_ERR(dax_dev);
+ goto out;
}
+ set_dax_nocache(dax_dev);
+ set_dax_nomc(dax_dev);
+ if (is_nvdimm_sync(nd_region))
+ set_dax_synchronous(dax_dev);
+ rc = dax_add_host(dax_dev, disk);
+ if (rc)
+ goto out_cleanup_dax;
dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));
pmem->dax_dev = dax_dev;
- gendev = disk_to_dev(disk);
- gendev->groups = pmem_attribute_groups;
- device_add_disk(dev, disk, NULL);
+ rc = device_add_disk(dev, disk, pmem_attribute_groups);
+ if (rc)
+ goto out_remove_host;
if (devm_add_action_or_reset(dev, pmem_release_disk, pmem))
return -ENOMEM;
- revalidate_disk(disk);
+ nvdimm_check_and_set_ro(disk);
pmem->bb_state = sysfs_get_dirent(disk_to_dev(disk)->kobj.sd,
"badblocks");
if (!pmem->bb_state)
dev_warn(dev, "'badblocks' notification disabled\n");
-
return 0;
+
+out_remove_host:
+ dax_remove_host(pmem->disk);
+out_cleanup_dax:
+ kill_dax(pmem->dax_dev);
+ put_dax(pmem->dax_dev);
+out:
+ put_disk(pmem->disk);
+ return rc;
}
static int nd_pmem_probe(struct device *dev)
@@ -537,7 +669,7 @@ static int nd_pmem_probe(struct device *dev)
return pmem_attach_disk(dev, ndns);
}
-static int nd_pmem_remove(struct device *dev)
+static void nd_pmem_remove(struct device *dev)
{
struct pmem_device *pmem = dev_get_drvdata(dev);
@@ -545,15 +677,13 @@ static int nd_pmem_remove(struct device *dev)
nvdimm_namespace_detach_btt(to_nd_btt(dev));
else {
/*
- * Note, this assumes nd_device_lock() context to not
+ * Note, this assumes device_lock() context to not
* race nd_pmem_notify()
*/
sysfs_put(pmem->bb_state);
pmem->bb_state = NULL;
}
nvdimm_flush(to_nd_region(dev->parent), NULL);
-
- return 0;
}
static void nd_pmem_shutdown(struct device *dev)
@@ -561,19 +691,16 @@ static void nd_pmem_shutdown(struct device *dev)
nvdimm_flush(to_nd_region(dev->parent), NULL);
}
-static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
+static void pmem_revalidate_poison(struct device *dev)
{
struct nd_region *nd_region;
resource_size_t offset = 0, end_trunc = 0;
struct nd_namespace_common *ndns;
struct nd_namespace_io *nsio;
- struct resource res;
struct badblocks *bb;
+ struct range range;
struct kernfs_node *bb_state;
- if (event != NVDIMM_REVALIDATE_POISON)
- return;
-
if (is_nd_btt(dev)) {
struct nd_btt *nd_btt = to_nd_btt(dev);
@@ -604,13 +731,44 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
nsio = to_nd_namespace_io(&ndns->dev);
}
- res.start = nsio->res.start + offset;
- res.end = nsio->res.end - end_trunc;
- nvdimm_badblocks_populate(nd_region, bb, &res);
+ range.start = nsio->res.start + offset;
+ range.end = nsio->res.end - end_trunc;
+ nvdimm_badblocks_populate(nd_region, bb, &range);
if (bb_state)
sysfs_notify_dirent(bb_state);
}
+static void pmem_revalidate_region(struct device *dev)
+{
+ struct pmem_device *pmem;
+
+ if (is_nd_btt(dev)) {
+ struct nd_btt *nd_btt = to_nd_btt(dev);
+ struct btt *btt = nd_btt->btt;
+
+ nvdimm_check_and_set_ro(btt->btt_disk);
+ return;
+ }
+
+ pmem = dev_get_drvdata(dev);
+ nvdimm_check_and_set_ro(pmem->disk);
+}
+
+static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
+{
+ switch (event) {
+ case NVDIMM_REVALIDATE_POISON:
+ pmem_revalidate_poison(dev);
+ break;
+ case NVDIMM_REVALIDATE_REGION:
+ pmem_revalidate_region(dev);
+ break;
+ default:
+ dev_WARN_ONCE(dev, 1, "notify: unknown event: %d\n", event);
+ break;
+ }
+}
+
MODULE_ALIAS("pmem");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 59cfe13ea8a8..392b0b38acb9 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -3,10 +3,13 @@
#define __NVDIMM_PMEM_H__
#include <linux/page-flags.h>
#include <linux/badblocks.h>
+#include <linux/memremap.h>
#include <linux/types.h>
#include <linux/pfn_t.h>
#include <linux/fs.h>
+enum dax_access_mode;
+
/* this definition is in it's own header for tools/testing/nvdimm to consume */
struct pmem_device {
/* One contiguous memory region per device */
@@ -27,7 +30,8 @@ struct pmem_device {
};
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
- long nr_pages, void **kaddr, pfn_t *pfn);
+ long nr_pages, enum dax_access_mode mode, void **kaddr,
+ pfn_t *pfn);
#ifdef CONFIG_MEMORY_FAILURE
static inline bool test_and_clear_pmem_poison(struct page *page)
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 0f6978e72e7c..390123d293ea 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -15,6 +15,10 @@ static int nd_region_probe(struct device *dev)
static unsigned long once;
struct nd_region_data *ndrd;
struct nd_region *nd_region = to_nd_region(dev);
+ struct range range = {
+ .start = nd_region->ndr_start,
+ .end = nd_region->ndr_start + nd_region->ndr_size - 1,
+ };
if (nd_region->num_lanes > num_online_cpus()
&& nd_region->num_lanes < num_possible_cpus()
@@ -30,24 +34,13 @@ static int nd_region_probe(struct device *dev)
if (rc)
return rc;
- rc = nd_blk_region_init(nd_region);
- if (rc)
- return rc;
-
- if (is_memory(&nd_region->dev)) {
- struct resource ndr_res;
-
- if (devm_init_badblocks(dev, &nd_region->bb))
- return -ENODEV;
- nd_region->bb_state = sysfs_get_dirent(nd_region->dev.kobj.sd,
- "badblocks");
- if (!nd_region->bb_state)
- dev_warn(&nd_region->dev,
- "'badblocks' notification disabled\n");
- ndr_res.start = nd_region->ndr_start;
- ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1;
- nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
- }
+ if (devm_init_badblocks(dev, &nd_region->bb))
+ return -ENODEV;
+ nd_region->bb_state =
+ sysfs_get_dirent(nd_region->dev.kobj.sd, "badblocks");
+ if (!nd_region->bb_state)
+ dev_warn(dev, "'badblocks' notification disabled\n");
+ nvdimm_badblocks_populate(nd_region, &nd_region->bb, &range);
rc = nd_region_register_namespaces(nd_region, &err);
if (rc < 0)
@@ -86,7 +79,7 @@ static int child_unregister(struct device *dev, void *data)
return 0;
}
-static int nd_region_remove(struct device *dev)
+static void nd_region_remove(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev);
@@ -102,13 +95,11 @@ static int nd_region_remove(struct device *dev)
nvdimm_bus_unlock(dev);
/*
- * Note, this assumes nd_device_lock() context to not race
+ * Note, this assumes device_lock() context to not race
* nd_region_notify()
*/
sysfs_put(nd_region->bb_state);
nd_region->bb_state = NULL;
-
- return 0;
}
static int child_notify(struct device *dev, void *data)
@@ -121,14 +112,16 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event)
{
if (event == NVDIMM_REVALIDATE_POISON) {
struct nd_region *nd_region = to_nd_region(dev);
- struct resource res;
if (is_memory(&nd_region->dev)) {
- res.start = nd_region->ndr_start;
- res.end = nd_region->ndr_start +
- nd_region->ndr_size - 1;
+ struct range range = {
+ .start = nd_region->ndr_start,
+ .end = nd_region->ndr_start +
+ nd_region->ndr_size - 1,
+ };
+
nvdimm_badblocks_populate(nd_region,
- &nd_region->bb, &res);
+ &nd_region->bb, &range);
if (nd_region->bb_state)
sysfs_notify_dirent(nd_region->bb_state);
}
@@ -157,4 +150,3 @@ void nd_region_exit(void)
}
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_PMEM);
-MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_BLK);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index a19e535830d9..e0875d369762 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -133,11 +133,9 @@ static void nd_region_release(struct device *dev)
put_device(&nvdimm->dev);
}
free_percpu(nd_region->lane);
- memregion_free(nd_region->id);
- if (is_nd_blk(dev))
- kfree(to_nd_blk_region(dev));
- else
- kfree(nd_region);
+ if (!test_bit(ND_REGION_CXL, &nd_region->flags))
+ memregion_free(nd_region->id);
+ kfree(nd_region);
}
struct nd_region *to_nd_region(struct device *dev)
@@ -157,33 +155,12 @@ struct device *nd_region_dev(struct nd_region *nd_region)
}
EXPORT_SYMBOL_GPL(nd_region_dev);
-struct nd_blk_region *to_nd_blk_region(struct device *dev)
-{
- struct nd_region *nd_region = to_nd_region(dev);
-
- WARN_ON(!is_nd_blk(dev));
- return container_of(nd_region, struct nd_blk_region, nd_region);
-}
-EXPORT_SYMBOL_GPL(to_nd_blk_region);
-
void *nd_region_provider_data(struct nd_region *nd_region)
{
return nd_region->provider_data;
}
EXPORT_SYMBOL_GPL(nd_region_provider_data);
-void *nd_blk_region_provider_data(struct nd_blk_region *ndbr)
-{
- return ndbr->blk_provider_data;
-}
-EXPORT_SYMBOL_GPL(nd_blk_region_provider_data);
-
-void nd_blk_region_set_provider_data(struct nd_blk_region *ndbr, void *data)
-{
- ndbr->blk_provider_data = data;
-}
-EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
-
/**
* nd_region_to_nstype() - region to an integer namespace type
* @nd_region: region-device to interrogate
@@ -195,42 +172,44 @@ EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
int nd_region_to_nstype(struct nd_region *nd_region)
{
if (is_memory(&nd_region->dev)) {
- u16 i, alias;
+ u16 i, label;
- for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
+ for (i = 0, label = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
- if (test_bit(NDD_ALIASING, &nvdimm->flags))
- alias++;
+ if (test_bit(NDD_LABELING, &nvdimm->flags))
+ label++;
}
- if (alias)
+ if (label)
return ND_DEVICE_NAMESPACE_PMEM;
else
return ND_DEVICE_NAMESPACE_IO;
- } else if (is_nd_blk(&nd_region->dev)) {
- return ND_DEVICE_NAMESPACE_BLK;
}
return 0;
}
EXPORT_SYMBOL(nd_region_to_nstype);
-static ssize_t size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static unsigned long long region_size(struct nd_region *nd_region)
{
- struct nd_region *nd_region = to_nd_region(dev);
- unsigned long long size = 0;
-
- if (is_memory(dev)) {
- size = nd_region->ndr_size;
+ if (is_memory(&nd_region->dev)) {
+ return nd_region->ndr_size;
} else if (nd_region->ndr_mappings == 1) {
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
- size = nd_mapping->size;
+ return nd_mapping->size;
}
- return sprintf(buf, "%llu\n", size);
+ return 0;
+}
+
+static ssize_t size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+
+ return sprintf(buf, "%llu\n", region_size(nd_region));
}
static DEVICE_ATTR_RO(size);
@@ -301,7 +280,7 @@ static ssize_t set_cookie_show(struct device *dev,
* the v1.1 namespace label cookie definition. To read all this
* data we need to wait for probing to settle.
*/
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
if (nd_region->ndr_mappings) {
@@ -318,7 +297,7 @@ static ssize_t set_cookie_show(struct device *dev,
}
}
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
if (rc)
return rc;
@@ -328,14 +307,12 @@ static DEVICE_ATTR_RO(set_cookie);
resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
{
- resource_size_t blk_max_overlap = 0, available, overlap;
+ resource_size_t available;
int i;
WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
- retry:
available = 0;
- overlap = blk_max_overlap;
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
@@ -344,15 +321,7 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
if (!ndd)
return 0;
- if (is_memory(&nd_region->dev)) {
- available += nd_pmem_available_dpa(nd_region,
- nd_mapping, &overlap);
- if (overlap > blk_max_overlap) {
- blk_max_overlap = overlap;
- goto retry;
- }
- } else if (is_nd_blk(&nd_region->dev))
- available += nd_blk_available_dpa(nd_region);
+ available += nd_pmem_available_dpa(nd_region, nd_mapping);
}
return available;
@@ -360,26 +329,17 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
resource_size_t nd_region_allocatable_dpa(struct nd_region *nd_region)
{
- resource_size_t available = 0;
+ resource_size_t avail = 0;
int i;
- if (is_memory(&nd_region->dev))
- available = PHYS_ADDR_MAX;
-
WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
- if (is_memory(&nd_region->dev))
- available = min(available,
- nd_pmem_max_contiguous_dpa(nd_region,
- nd_mapping));
- else if (is_nd_blk(&nd_region->dev))
- available += nd_blk_available_dpa(nd_region);
+ avail = min_not_zero(avail, nd_pmem_max_contiguous_dpa(
+ nd_region, nd_mapping));
}
- if (is_memory(&nd_region->dev))
- return available * nd_region->ndr_mappings;
- return available;
+ return avail * nd_region->ndr_mappings;
}
static ssize_t available_size_show(struct device *dev,
@@ -394,12 +354,12 @@ static ssize_t available_size_show(struct device *dev,
* memory nvdimm_bus_lock() is dropped, but that's userspace's
* problem to not race itself.
*/
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
available = nd_region_available_dpa(nd_region);
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return sprintf(buf, "%llu\n", available);
}
@@ -411,12 +371,12 @@ static ssize_t max_available_extent_show(struct device *dev,
struct nd_region *nd_region = to_nd_region(dev);
unsigned long long available = 0;
- nd_device_lock(dev);
+ device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
available = nd_region_allocatable_dpa(nd_region);
nvdimm_bus_unlock(dev);
- nd_device_unlock(dev);
+ device_unlock(dev);
return sprintf(buf, "%llu\n", available);
}
@@ -514,6 +474,12 @@ static ssize_t read_only_show(struct device *dev,
return sprintf(buf, "%d\n", nd_region->ro);
}
+static int revalidate_read_only(struct device *dev, void *data)
+{
+ nd_device_notify(dev, NVDIMM_REVALIDATE_REGION);
+ return 0;
+}
+
static ssize_t read_only_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
@@ -525,22 +491,69 @@ static ssize_t read_only_store(struct device *dev,
return rc;
nd_region->ro = ro;
+ device_for_each_child(dev, NULL, revalidate_read_only);
return len;
}
static DEVICE_ATTR_RW(read_only);
+static ssize_t align_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+
+ return sprintf(buf, "%#lx\n", nd_region->align);
+}
+
+static ssize_t align_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+ unsigned long val, dpa;
+ u32 mappings, remainder;
+ int rc;
+
+ rc = kstrtoul(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ /*
+ * Ensure space-align is evenly divisible by the region
+ * interleave-width because the kernel typically has no facility
+ * to determine which DIMM(s), dimm-physical-addresses, would
+ * contribute to the tail capacity in system-physical-address
+ * space for the namespace.
+ */
+ mappings = max_t(u32, 1, nd_region->ndr_mappings);
+ dpa = div_u64_rem(val, mappings, &remainder);
+ if (!is_power_of_2(dpa) || dpa < PAGE_SIZE
+ || val > region_size(nd_region) || remainder)
+ return -EINVAL;
+
+ /*
+ * Given that space allocation consults this value multiple
+ * times ensure it does not change for the duration of the
+ * allocation.
+ */
+ nvdimm_bus_lock(dev);
+ nd_region->align = val;
+ nvdimm_bus_unlock(dev);
+
+ return len;
+}
+static DEVICE_ATTR_RW(align);
+
static ssize_t region_badblocks_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct nd_region *nd_region = to_nd_region(dev);
ssize_t rc;
- nd_device_lock(dev);
+ device_lock(dev);
if (dev->driver)
rc = badblocks_show(&nd_region->bb, buf, 0);
else
rc = -ENXIO;
- nd_device_unlock(dev);
+ device_unlock(dev);
return rc;
}
@@ -553,7 +566,7 @@ static ssize_t resource_show(struct device *dev,
return sprintf(buf, "%#llx\n", nd_region->ndr_start);
}
-static DEVICE_ATTR(resource, 0400, resource_show, NULL);
+static DEVICE_ATTR_ADMIN_RO(resource);
static ssize_t persistence_domain_show(struct device *dev,
struct device_attribute *attr, char *buf)
@@ -571,6 +584,7 @@ static DEVICE_ATTR_RO(persistence_domain);
static struct attribute *nd_region_attributes[] = {
&dev_attr_size.attr,
+ &dev_attr_align.attr,
&dev_attr_nstype.attr,
&dev_attr_mappings.attr,
&dev_attr_btt_seed.attr,
@@ -626,13 +640,15 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
return a->mode;
}
+ if (a == &dev_attr_align.attr)
+ return a->mode;
+
if (a != &dev_attr_set_cookie.attr
&& a != &dev_attr_available_size.attr)
return a->mode;
- if ((type == ND_DEVICE_NAMESPACE_PMEM
- || type == ND_DEVICE_NAMESPACE_BLK)
- && a == &dev_attr_available_size.attr)
+ if (type == ND_DEVICE_NAMESPACE_PMEM &&
+ a == &dev_attr_available_size.attr)
return a->mode;
else if (is_memory(dev) && nd_set)
return a->mode;
@@ -765,12 +781,6 @@ static const struct attribute_group *nd_region_attribute_groups[] = {
NULL,
};
-static const struct device_type nd_blk_device_type = {
- .name = "nd_blk",
- .release = nd_region_release,
- .groups = nd_region_attribute_groups,
-};
-
static const struct device_type nd_pmem_device_type = {
.name = "nd_pmem",
.release = nd_region_release,
@@ -788,11 +798,6 @@ bool is_nd_pmem(struct device *dev)
return dev ? dev->type == &nd_pmem_device_type : false;
}
-bool is_nd_blk(struct device *dev)
-{
- return dev ? dev->type == &nd_blk_device_type : false;
-}
-
bool is_nd_volatile(struct device *dev)
{
return dev ? dev->type == &nd_volatile_device_type : false;
@@ -866,22 +871,6 @@ void nd_region_advance_seeds(struct nd_region *nd_region, struct device *dev)
nvdimm_bus_unlock(dev);
}
-int nd_blk_region_init(struct nd_region *nd_region)
-{
- struct device *dev = &nd_region->dev;
- struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
-
- if (!is_nd_blk(dev))
- return 0;
-
- if (nd_region->ndr_mappings < 1) {
- dev_dbg(dev, "invalid BLK region\n");
- return -ENXIO;
- }
-
- return to_nd_blk_region(dev)->enable(nvdimm_bus, dev);
-}
-
/**
* nd_region_acquire_lane - allocate and lock a lane
* @nd_region: region id and number of lanes possible
@@ -935,13 +924,38 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
}
EXPORT_SYMBOL(nd_region_release_lane);
+/*
+ * PowerPC requires this alignment for memremap_pages(). All other archs
+ * should be ok with SUBSECTION_SIZE (see memremap_compat_align()).
+ */
+#define MEMREMAP_COMPAT_ALIGN_MAX SZ_16M
+
+static unsigned long default_align(struct nd_region *nd_region)
+{
+ unsigned long align;
+ u32 remainder;
+ int mappings;
+
+ align = MEMREMAP_COMPAT_ALIGN_MAX;
+ if (nd_region->ndr_size < MEMREMAP_COMPAT_ALIGN_MAX)
+ align = PAGE_SIZE;
+
+ mappings = max_t(u16, 1, nd_region->ndr_mappings);
+ div_u64_rem(align, mappings, &remainder);
+ if (remainder)
+ align *= mappings;
+
+ return align;
+}
+
+static struct lock_class_key nvdimm_region_key;
+
static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
struct nd_region_desc *ndr_desc,
const struct device_type *dev_type, const char *caller)
{
struct nd_region *nd_region;
struct device *dev;
- void *region_buf;
unsigned int i;
int ro = 0;
@@ -959,40 +973,22 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
if (test_bit(NDD_UNARMED, &nvdimm->flags))
ro = 1;
- if (test_bit(NDD_NOBLK, &nvdimm->flags)
- && dev_type == &nd_blk_device_type) {
- dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not BLK capable\n",
- caller, dev_name(&nvdimm->dev), i);
- return NULL;
- }
}
- if (dev_type == &nd_blk_device_type) {
- struct nd_blk_region_desc *ndbr_desc;
- struct nd_blk_region *ndbr;
-
- ndbr_desc = to_blk_region_desc(ndr_desc);
- ndbr = kzalloc(sizeof(*ndbr) + sizeof(struct nd_mapping)
- * ndr_desc->num_mappings,
- GFP_KERNEL);
- if (ndbr) {
- nd_region = &ndbr->nd_region;
- ndbr->enable = ndbr_desc->enable;
- ndbr->do_io = ndbr_desc->do_io;
- }
- region_buf = ndbr;
- } else {
- nd_region = kzalloc(struct_size(nd_region, mapping,
- ndr_desc->num_mappings),
- GFP_KERNEL);
- region_buf = nd_region;
- }
+ nd_region =
+ kzalloc(struct_size(nd_region, mapping, ndr_desc->num_mappings),
+ GFP_KERNEL);
- if (!region_buf)
+ if (!nd_region)
return NULL;
- nd_region->id = memregion_alloc(GFP_KERNEL);
- if (nd_region->id < 0)
- goto err_id;
+ /* CXL pre-assigns memregion ids before creating nvdimm regions */
+ if (test_bit(ND_REGION_CXL, &ndr_desc->flags)) {
+ nd_region->id = ndr_desc->memregion;
+ } else {
+ nd_region->id = memregion_alloc(GFP_KERNEL);
+ if (nd_region->id < 0)
+ goto err_id;
+ }
nd_region->lane = alloc_percpu(struct nd_percpu_lane);
if (!nd_region->lane)
@@ -1039,19 +1035,23 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
dev->of_node = ndr_desc->of_node;
nd_region->ndr_size = resource_size(ndr_desc->res);
nd_region->ndr_start = ndr_desc->res->start;
+ nd_region->align = default_align(nd_region);
if (ndr_desc->flush)
nd_region->flush = ndr_desc->flush;
else
nd_region->flush = NULL;
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &nvdimm_region_key);
nd_device_register(dev);
return nd_region;
- err_percpu:
- memregion_free(nd_region->id);
- err_id:
- kfree(region_buf);
+err_percpu:
+ if (!test_bit(ND_REGION_CXL, &ndr_desc->flags))
+ memregion_free(nd_region->id);
+err_id:
+ kfree(nd_region);
return NULL;
}
@@ -1064,17 +1064,6 @@ struct nd_region *nvdimm_pmem_region_create(struct nvdimm_bus *nvdimm_bus,
}
EXPORT_SYMBOL_GPL(nvdimm_pmem_region_create);
-struct nd_region *nvdimm_blk_region_create(struct nvdimm_bus *nvdimm_bus,
- struct nd_region_desc *ndr_desc)
-{
- if (ndr_desc->num_mappings > 1)
- return NULL;
- ndr_desc->num_lanes = min(ndr_desc->num_lanes, ND_MAX_LANES);
- return nd_region_create(nvdimm_bus, ndr_desc, &nd_blk_device_type,
- __func__);
-}
-EXPORT_SYMBOL_GPL(nvdimm_blk_region_create);
-
struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
struct nd_region_desc *ndr_desc)
{
@@ -1084,6 +1073,13 @@ struct nd_region *nvdimm_volatile_region_create(struct nvdimm_bus *nvdimm_bus,
}
EXPORT_SYMBOL_GPL(nvdimm_volatile_region_create);
+void nvdimm_region_delete(struct nd_region *nd_region)
+{
+ if (nd_region)
+ nd_device_unregister(&nd_region->dev, ND_SYNC);
+}
+EXPORT_SYMBOL_GPL(nvdimm_region_delete);
+
int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
{
int rc = 0;
@@ -1098,8 +1094,8 @@ int nvdimm_flush(struct nd_region *nd_region, struct bio *bio)
return rc;
}
/**
- * nvdimm_flush - flush any posted write queues between the cpu and pmem media
- * @nd_region: blk or interleaved pmem region
+ * generic_nvdimm_flush() - flush any posted write queues between the cpu and pmem media
+ * @nd_region: interleaved pmem region
*/
int generic_nvdimm_flush(struct nd_region *nd_region)
{
@@ -1114,13 +1110,13 @@ int generic_nvdimm_flush(struct nd_region *nd_region)
idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
/*
- * The first wmb() is needed to 'sfence' all previous writes
- * such that they are architecturally visible for the platform
- * buffer flush. Note that we've already arranged for pmem
+ * The pmem_wmb() is needed to 'sfence' all
+ * previous writes such that they are architecturally visible for
+ * the platform buffer flush. Note that we've already arranged for pmem
* writes to avoid the cache via memcpy_flushcache(). The final
* wmb() ensures ordering for the NVDIMM flush write.
*/
- wmb();
+ pmem_wmb();
for (i = 0; i < nd_region->ndr_mappings; i++)
if (ndrd_get_flush_wpq(ndrd, i, 0))
writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
@@ -1132,7 +1128,7 @@ EXPORT_SYMBOL_GPL(nvdimm_flush);
/**
* nvdimm_has_flush - determine write flushing requirements
- * @nd_region: blk or interleaved pmem region
+ * @nd_region: interleaved pmem region
*
* Returns 1 if writes require flushing
* Returns 0 if writes do not require flushing
@@ -1147,6 +1143,11 @@ int nvdimm_has_flush(struct nd_region *nd_region)
|| !IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API))
return -ENXIO;
+ /* Test if an explicit flush function is defined */
+ if (test_bit(ND_REGION_ASYNC, &nd_region->flags) && nd_region->flush)
+ return 1;
+
+ /* Test if any flush hints for the region are available */
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
@@ -1157,8 +1158,8 @@ int nvdimm_has_flush(struct nd_region *nd_region)
}
/*
- * The platform defines dimm devices without hints, assume
- * platform persistence mechanism like ADR
+ * The platform defines dimm devices without hints nor explicit flush,
+ * assume platform persistence mechanism like ADR
*/
return 0;
}
diff --git a/drivers/nvdimm/security.c b/drivers/nvdimm/security.c
index 89b85970912d..8aefb60c42ff 100644
--- a/drivers/nvdimm/security.c
+++ b/drivers/nvdimm/security.c
@@ -95,7 +95,7 @@ static struct key *nvdimm_lookup_user_key(struct nvdimm *nvdimm,
struct encrypted_key_payload *epayload;
struct device *dev = &nvdimm->dev;
- keyref = lookup_user_key(id, 0, 0);
+ keyref = lookup_user_key(id, 0, KEY_NEED_SEARCH);
if (IS_ERR(keyref))
return NULL;
@@ -379,11 +379,6 @@ static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
|| !nvdimm->sec.flags)
return -EOPNOTSUPP;
- if (dev->driver == NULL) {
- dev_dbg(dev, "Unable to overwrite while DIMM active.\n");
- return -EINVAL;
- }
-
rc = check_security_state(nvdimm);
if (rc)
return rc;
@@ -413,7 +408,7 @@ static int security_overwrite(struct nvdimm *nvdimm, unsigned int keyid)
return rc;
}
-void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm)
+static void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm)
{
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
int rc;
@@ -450,14 +445,19 @@ void __nvdimm_security_overwrite_query(struct nvdimm *nvdimm)
else
dev_dbg(&nvdimm->dev, "overwrite completed\n");
- if (nvdimm->sec.overwrite_state)
- sysfs_notify_dirent(nvdimm->sec.overwrite_state);
+ /*
+ * Mark the overwrite work done and update dimm security flags,
+ * then send a sysfs event notification to wake up userspace
+ * poll threads to picked up the changed state.
+ */
nvdimm->sec.overwrite_tmo = 0;
clear_bit(NDD_SECURITY_OVERWRITE, &nvdimm->flags);
clear_bit(NDD_WORK_PENDING, &nvdimm->flags);
- put_device(&nvdimm->dev);
nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_USER);
- nvdimm->sec.flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
+ nvdimm->sec.ext_flags = nvdimm_security_flags(nvdimm, NVDIMM_MASTER);
+ if (nvdimm->sec.overwrite_state)
+ sysfs_notify_dirent(nvdimm->sec.overwrite_state);
+ put_device(&nvdimm->dev);
}
void nvdimm_security_overwrite_query(struct work_struct *work)
diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
index 5e3d07b47e0c..20da455d2ef6 100644
--- a/drivers/nvdimm/virtio_pmem.c
+++ b/drivers/nvdimm/virtio_pmem.c
@@ -58,9 +58,9 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
goto out_err;
}
- virtio_cread(vpmem->vdev, struct virtio_pmem_config,
+ virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
start, &vpmem->start);
- virtio_cread(vpmem->vdev, struct virtio_pmem_config,
+ virtio_cread_le(vpmem->vdev, struct virtio_pmem_config,
size, &vpmem->size);
res.start = vpmem->start;
@@ -81,17 +81,24 @@ static int virtio_pmem_probe(struct virtio_device *vdev)
ndr_desc.res = &res;
ndr_desc.numa_node = nid;
ndr_desc.flush = async_pmem_flush;
+ ndr_desc.provider_data = vdev;
set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
set_bit(ND_REGION_ASYNC, &ndr_desc.flags);
+ /*
+ * The NVDIMM region could be available before the
+ * virtio_device_ready() that is called by
+ * virtio_dev_probe(), so we set device ready here.
+ */
+ virtio_device_ready(vdev);
nd_region = nvdimm_pmem_region_create(vpmem->nvdimm_bus, &ndr_desc);
if (!nd_region) {
dev_err(&vdev->dev, "failed to create nvdimm region\n");
err = -ENXIO;
goto out_nd;
}
- nd_region->provider_data = dev_to_virtio(nd_region->dev.parent->parent);
return 0;
out_nd:
+ virtio_reset_device(vdev);
nvdimm_bus_unregister(vpmem->nvdimm_bus);
out_vq:
vdev->config->del_vqs(vdev);
@@ -105,7 +112,7 @@ static void virtio_pmem_remove(struct virtio_device *vdev)
nvdimm_bus_unregister(nvdimm_bus);
vdev->config->del_vqs(vdev);
- vdev->config->reset(vdev);
+ virtio_reset_device(vdev);
}
static struct virtio_driver virtio_pmem_driver = {