aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/acpi/nfit/core.c210
-rw-r--r--drivers/acpi/nfit/mce.c24
-rw-r--r--drivers/acpi/nfit/nfit.h17
-rw-r--r--drivers/dax/Kconfig5
-rw-r--r--drivers/dax/dax.c577
-rw-r--r--drivers/dax/dax.h5
-rw-r--r--drivers/dax/pmem.c7
-rw-r--r--drivers/nvdimm/Kconfig2
-rw-r--r--drivers/nvdimm/bus.c2
-rw-r--r--drivers/nvdimm/core.c73
-rw-r--r--drivers/nvdimm/dimm.c11
-rw-r--r--drivers/nvdimm/dimm_devs.c226
-rw-r--r--drivers/nvdimm/label.c192
-rw-r--r--drivers/nvdimm/namespace_devs.c792
-rw-r--r--drivers/nvdimm/nd-core.h24
-rw-r--r--drivers/nvdimm/nd.h29
-rw-r--r--drivers/nvdimm/pmem.c28
-rw-r--r--drivers/nvdimm/region_devs.c75
-rw-r--r--fs/char_dev.c1
-rw-r--r--include/linux/libnvdimm.h28
-rw-r--r--include/linux/nd.h8
-rw-r--r--include/uapi/linux/magic.h1
-rw-r--r--include/uapi/linux/ndctl.h30
-rw-r--r--tools/testing/nvdimm/Kbuild1
-rw-r--r--tools/testing/nvdimm/test/iomap.c151
-rw-r--r--tools/testing/nvdimm/test/nfit.c160
-rw-r--r--tools/testing/nvdimm/test/nfit_test.h12
27 files changed, 1940 insertions, 751 deletions
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index e1d5ea6d5e40..71a7d07c28c9 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -886,6 +886,58 @@ static ssize_t revision_show(struct device *dev,
}
static DEVICE_ATTR_RO(revision);
+static ssize_t hw_error_scrub_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus);
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+ return sprintf(buf, "%d\n", acpi_desc->scrub_mode);
+}
+
+/*
+ * The 'hw_error_scrub' attribute can have the following values written to it:
+ * '0': Switch to the default mode where an exception will only insert
+ * the address of the memory error into the poison and badblocks lists.
+ * '1': Enable a full scrub to happen if an exception for a memory error is
+ * received.
+ */
+static ssize_t hw_error_scrub_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t size)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
+ ssize_t rc;
+ long val;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ device_lock(dev);
+ nd_desc = dev_get_drvdata(dev);
+ if (nd_desc) {
+ struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
+
+ switch (val) {
+ case HW_ERROR_SCRUB_ON:
+ acpi_desc->scrub_mode = HW_ERROR_SCRUB_ON;
+ break;
+ case HW_ERROR_SCRUB_OFF:
+ acpi_desc->scrub_mode = HW_ERROR_SCRUB_OFF;
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+ }
+ device_unlock(dev);
+ if (rc)
+ return rc;
+ return size;
+}
+static DEVICE_ATTR_RW(hw_error_scrub);
+
/*
* This shows the number of full Address Range Scrubs that have been
* completed since driver load time. Userspace can wait on this using
@@ -958,6 +1010,7 @@ static umode_t nfit_visible(struct kobject *kobj, struct attribute *a, int n)
static struct attribute *acpi_nfit_attributes[] = {
&dev_attr_revision.attr,
&dev_attr_scrub.attr,
+ &dev_attr_hw_error_scrub.attr,
NULL,
};
@@ -1256,6 +1309,44 @@ static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
return NULL;
}
+void __acpi_nvdimm_notify(struct device *dev, u32 event)
+{
+ struct nfit_mem *nfit_mem;
+ struct acpi_nfit_desc *acpi_desc;
+
+ dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__,
+ event);
+
+ if (event != NFIT_NOTIFY_DIMM_HEALTH) {
+ dev_dbg(dev->parent, "%s: unknown event: %d\n", dev_name(dev),
+ event);
+ return;
+ }
+
+ acpi_desc = dev_get_drvdata(dev->parent);
+ if (!acpi_desc)
+ return;
+
+ /*
+ * If we successfully retrieved acpi_desc, then we know nfit_mem data
+ * is still valid.
+ */
+ nfit_mem = dev_get_drvdata(dev);
+ if (nfit_mem && nfit_mem->flags_attr)
+ sysfs_notify_dirent(nfit_mem->flags_attr);
+}
+EXPORT_SYMBOL_GPL(__acpi_nvdimm_notify);
+
+static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
+{
+ struct acpi_device *adev = data;
+ struct device *dev = &adev->dev;
+
+ device_lock(dev->parent);
+ __acpi_nvdimm_notify(dev, event);
+ device_unlock(dev->parent);
+}
+
static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
struct nfit_mem *nfit_mem, u32 device_handle)
{
@@ -1280,6 +1371,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
return force_enable_dimms ? 0 : -ENODEV;
}
+ if (ACPI_FAILURE(acpi_install_notify_handler(adev_dimm->handle,
+ ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify, adev_dimm))) {
+ dev_err(dev, "%s: notification registration failed\n",
+ dev_name(&adev_dimm->dev));
+ return -ENXIO;
+ }
+
/*
* Until standardization materializes we need to consider 4
* different command sets. Note, that checking for function0 (bit0)
@@ -1318,18 +1416,41 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
return 0;
}
+static void shutdown_dimm_notify(void *data)
+{
+ struct acpi_nfit_desc *acpi_desc = data;
+ struct nfit_mem *nfit_mem;
+
+ mutex_lock(&acpi_desc->init_mutex);
+ /*
+ * Clear out the nfit_mem->flags_attr and shut down dimm event
+ * notifications.
+ */
+ list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+ struct acpi_device *adev_dimm = nfit_mem->adev;
+
+ if (nfit_mem->flags_attr) {
+ sysfs_put(nfit_mem->flags_attr);
+ nfit_mem->flags_attr = NULL;
+ }
+ if (adev_dimm)
+ acpi_remove_notify_handler(adev_dimm->handle,
+ ACPI_DEVICE_NOTIFY, acpi_nvdimm_notify);
+ }
+ mutex_unlock(&acpi_desc->init_mutex);
+}
+
static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
{
struct nfit_mem *nfit_mem;
- int dimm_count = 0;
+ int dimm_count = 0, rc;
+ struct nvdimm *nvdimm;
list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
struct acpi_nfit_flush_address *flush;
unsigned long flags = 0, cmd_mask;
- struct nvdimm *nvdimm;
u32 device_handle;
u16 mem_flags;
- int rc;
device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
@@ -1382,7 +1503,30 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
}
- return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
+ rc = nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
+ if (rc)
+ return rc;
+
+ /*
+ * Now that dimms are successfully registered, and async registration
+ * is flushed, attempt to enable event notification.
+ */
+ list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+ struct kernfs_node *nfit_kernfs;
+
+ nvdimm = nfit_mem->nvdimm;
+ nfit_kernfs = sysfs_get_dirent(nvdimm_kobj(nvdimm)->sd, "nfit");
+ if (nfit_kernfs)
+ nfit_mem->flags_attr = sysfs_get_dirent(nfit_kernfs,
+ "flags");
+ sysfs_put(nfit_kernfs);
+ if (!nfit_mem->flags_attr)
+ dev_warn(acpi_desc->dev, "%s: notifications disabled\n",
+ nvdimm_name(nvdimm));
+ }
+
+ return devm_add_action_or_reset(acpi_desc->dev, shutdown_dimm_notify,
+ acpi_desc);
}
static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
@@ -1491,9 +1635,9 @@ static int acpi_nfit_init_interleave_set(struct acpi_nfit_desc *acpi_desc,
if (!info)
return -ENOMEM;
for (i = 0; i < nr; i++) {
- struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
+ struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
struct nfit_set_info_map *map = &info->mapping[i];
- struct nvdimm *nvdimm = nd_mapping->nvdimm;
+ struct nvdimm *nvdimm = mapping->nvdimm;
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
struct acpi_nfit_memory_map *memdev = memdev_from_spa(acpi_desc,
spa->range_index, i);
@@ -1917,7 +2061,7 @@ static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
}
static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
- struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
+ struct nd_mapping_desc *mapping, struct nd_region_desc *ndr_desc,
struct acpi_nfit_memory_map *memdev,
struct nfit_spa *nfit_spa)
{
@@ -1934,12 +2078,12 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
return -ENODEV;
}
- nd_mapping->nvdimm = nvdimm;
+ mapping->nvdimm = nvdimm;
switch (nfit_spa_type(spa)) {
case NFIT_SPA_PM:
case NFIT_SPA_VOLATILE:
- nd_mapping->start = memdev->address;
- nd_mapping->size = memdev->region_size;
+ mapping->start = memdev->address;
+ mapping->size = memdev->region_size;
break;
case NFIT_SPA_DCR:
nfit_mem = nvdimm_provider_data(nvdimm);
@@ -1947,13 +2091,13 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
spa->range_index, nvdimm_name(nvdimm));
} else {
- nd_mapping->size = nfit_mem->bdw->capacity;
- nd_mapping->start = nfit_mem->bdw->start_address;
+ mapping->size = nfit_mem->bdw->capacity;
+ mapping->start = nfit_mem->bdw->start_address;
ndr_desc->num_lanes = nfit_mem->bdw->windows;
blk_valid = 1;
}
- ndr_desc->nd_mapping = nd_mapping;
+ ndr_desc->mapping = mapping;
ndr_desc->num_mappings = blk_valid;
ndbr_desc = to_blk_region_desc(ndr_desc);
ndbr_desc->enable = acpi_nfit_blk_region_enable;
@@ -1979,7 +2123,7 @@ static bool nfit_spa_is_virtual(struct acpi_nfit_system_address *spa)
static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
struct nfit_spa *nfit_spa)
{
- static struct nd_mapping nd_mappings[ND_MAX_MAPPINGS];
+ static struct nd_mapping_desc mappings[ND_MAX_MAPPINGS];
struct acpi_nfit_system_address *spa = nfit_spa->spa;
struct nd_blk_region_desc ndbr_desc;
struct nd_region_desc *ndr_desc;
@@ -1998,7 +2142,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
}
memset(&res, 0, sizeof(res));
- memset(&nd_mappings, 0, sizeof(nd_mappings));
+ memset(&mappings, 0, sizeof(mappings));
memset(&ndbr_desc, 0, sizeof(ndbr_desc));
res.start = spa->address;
res.end = res.start + spa->length - 1;
@@ -2014,7 +2158,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
struct acpi_nfit_memory_map *memdev = nfit_memdev->memdev;
- struct nd_mapping *nd_mapping;
+ struct nd_mapping_desc *mapping;
if (memdev->range_index != spa->range_index)
continue;
@@ -2023,14 +2167,14 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
spa->range_index, ND_MAX_MAPPINGS);
return -ENXIO;
}
- nd_mapping = &nd_mappings[count++];
- rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
+ mapping = &mappings[count++];
+ rc = acpi_nfit_init_mapping(acpi_desc, mapping, ndr_desc,
memdev, nfit_spa);
if (rc)
goto out;
}
- ndr_desc->nd_mapping = nd_mappings;
+ ndr_desc->mapping = mappings;
ndr_desc->num_mappings = count;
rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
if (rc)
@@ -2678,29 +2822,30 @@ static int acpi_nfit_remove(struct acpi_device *adev)
return 0;
}
-static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
+void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
{
- struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
+ struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
- struct device *dev = &adev->dev;
union acpi_object *obj;
acpi_status status;
int ret;
dev_dbg(dev, "%s: event: %d\n", __func__, event);
- device_lock(dev);
+ if (event != NFIT_NOTIFY_UPDATE)
+ return;
+
if (!dev->driver) {
/* dev->driver may be null if we're being removed */
dev_dbg(dev, "%s: no driver found for dev\n", __func__);
- goto out_unlock;
+ return;
}
if (!acpi_desc) {
acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
if (!acpi_desc)
- goto out_unlock;
- acpi_nfit_desc_init(acpi_desc, &adev->dev);
+ return;
+ acpi_nfit_desc_init(acpi_desc, dev);
} else {
/*
* Finish previous registration before considering new
@@ -2710,10 +2855,10 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
}
/* Evaluate _FIT */
- status = acpi_evaluate_object(adev->handle, "_FIT", NULL, &buf);
+ status = acpi_evaluate_object(handle, "_FIT", NULL, &buf);
if (ACPI_FAILURE(status)) {
dev_err(dev, "failed to evaluate _FIT\n");
- goto out_unlock;
+ return;
}
obj = buf.pointer;
@@ -2725,9 +2870,14 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
} else
dev_err(dev, "Invalid _FIT\n");
kfree(buf.pointer);
+}
+EXPORT_SYMBOL_GPL(__acpi_nfit_notify);
- out_unlock:
- device_unlock(dev);
+static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
+{
+ device_lock(&adev->dev);
+ __acpi_nfit_notify(&adev->dev, adev->handle, event);
+ device_unlock(&adev->dev);
}
static const struct acpi_device_id acpi_nfit_ids[] = {
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index 161f91539ae6..e5ce81c38eed 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -14,6 +14,7 @@
*/
#include <linux/notifier.h>
#include <linux/acpi.h>
+#include <linux/nd.h>
#include <asm/mce.h>
#include "nfit.h"
@@ -62,12 +63,25 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
}
mutex_unlock(&acpi_desc->init_mutex);
- /*
- * We can ignore an -EBUSY here because if an ARS is already
- * in progress, just let that be the last authoritative one
- */
- if (found_match)
+ if (!found_match)
+ continue;
+
+ /* If this fails due to an -ENOMEM, there is little we can do */
+ nvdimm_bus_add_poison(acpi_desc->nvdimm_bus,
+ ALIGN(mce->addr, L1_CACHE_BYTES),
+ L1_CACHE_BYTES);
+ nvdimm_region_notify(nfit_spa->nd_region,
+ NVDIMM_REVALIDATE_POISON);
+
+ if (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) {
+ /*
+ * We can ignore an -EBUSY here because if an ARS is
+ * already in progress, just let that be the last
+ * authoritative one
+ */
acpi_nfit_ars_rescan(acpi_desc);
+ }
+ break;
}
mutex_unlock(&acpi_desc_lock);
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index e894ded24d99..14296f5267c8 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -78,6 +78,14 @@ enum {
NFIT_ARS_TIMEOUT = 90,
};
+enum nfit_root_notifiers {
+ NFIT_NOTIFY_UPDATE = 0x80,
+};
+
+enum nfit_dimm_notifiers {
+ NFIT_NOTIFY_DIMM_HEALTH = 0x81,
+};
+
struct nfit_spa {
struct list_head list;
struct nd_region *nd_region;
@@ -124,6 +132,7 @@ struct nfit_mem {
struct acpi_nfit_system_address *spa_bdw;
struct acpi_nfit_interleave *idt_dcr;
struct acpi_nfit_interleave *idt_bdw;
+ struct kernfs_node *flags_attr;
struct nfit_flush *nfit_flush;
struct list_head list;
struct acpi_device *adev;
@@ -152,6 +161,7 @@ struct acpi_nfit_desc {
struct list_head list;
struct kernfs_node *scrub_count_state;
unsigned int scrub_count;
+ unsigned int scrub_mode;
unsigned int cancel:1;
unsigned long dimm_cmd_force_en;
unsigned long bus_cmd_force_en;
@@ -159,6 +169,11 @@ struct acpi_nfit_desc {
void *iobuf, u64 len, int rw);
};
+enum scrub_mode {
+ HW_ERROR_SCRUB_OFF,
+ HW_ERROR_SCRUB_ON,
+};
+
enum nd_blk_mmio_selector {
BDW,
DCR,
@@ -223,5 +238,7 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
const u8 *to_nfit_uuid(enum nfit_uuids id);
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
+void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event);
+void __acpi_nvdimm_notify(struct device *dev, u32 event);
void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
#endif /* __NFIT_H__ */
diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index cedab7572de3..daadd20aa936 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -23,4 +23,9 @@ config DEV_DAX_PMEM
Say Y if unsure
+config NR_DEV_DAX
+ int "Maximum number of Device-DAX instances"
+ default 32768
+ range 256 2147483647
+
endif
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 29f600f2c447..0e499bfca41c 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -13,15 +13,25 @@
#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/device.h>
+#include <linux/mount.h>
#include <linux/pfn_t.h>
+#include <linux/hash.h>
+#include <linux/cdev.h>
#include <linux/slab.h>
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/mm.h>
+#include "dax.h"
-static int dax_major;
+static dev_t dax_devt;
static struct class *dax_class;
static DEFINE_IDA(dax_minor_ida);
+static int nr_dax = CONFIG_NR_DEV_DAX;
+module_param(nr_dax, int, S_IRUGO);
+static struct vfsmount *dax_mnt;
+static struct kmem_cache *dax_cache __read_mostly;
+static struct super_block *dax_superblock __read_mostly;
+MODULE_PARM_DESC(nr_dax, "max number of device-dax instances");
/**
* struct dax_region - mapping infrastructure for dax devices
@@ -48,7 +58,7 @@ struct dax_region {
* struct dax_dev - subdivision of a dax region
* @region - parent region
* @dev - device backing the character device
- * @kref - enable this data to be tracked in filp->private_data
+ * @cdev - core chardev data
* @alive - !alive + rcu grace period == no new mappings can be established
* @id - child id in the region
* @num_resources - number of physical address extents in this device
@@ -56,41 +66,139 @@ struct dax_region {
*/
struct dax_dev {
struct dax_region *region;
- struct device *dev;
- struct kref kref;
+ struct inode *inode;
+ struct device dev;
+ struct cdev cdev;
bool alive;
int id;
int num_resources;
struct resource res[0];
};
-static void dax_region_free(struct kref *kref)
+static struct inode *dax_alloc_inode(struct super_block *sb)
{
- struct dax_region *dax_region;
+ return kmem_cache_alloc(dax_cache, GFP_KERNEL);
+}
- dax_region = container_of(kref, struct dax_region, kref);
- kfree(dax_region);
+static void dax_i_callback(struct rcu_head *head)
+{
+ struct inode *inode = container_of(head, struct inode, i_rcu);
+
+ kmem_cache_free(dax_cache, inode);
}
-void dax_region_put(struct dax_region *dax_region)
+static void dax_destroy_inode(struct inode *inode)
{
- kref_put(&dax_region->kref, dax_region_free);
+ call_rcu(&inode->i_rcu, dax_i_callback);
}
-EXPORT_SYMBOL_GPL(dax_region_put);
-static void dax_dev_free(struct kref *kref)
+static const struct super_operations dax_sops = {
+ .statfs = simple_statfs,
+ .alloc_inode = dax_alloc_inode,
+ .destroy_inode = dax_destroy_inode,
+ .drop_inode = generic_delete_inode,
+};
+
+static struct dentry *dax_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data)
{
- struct dax_dev *dax_dev;
+ return mount_pseudo(fs_type, "dax:", &dax_sops, NULL, DAXFS_MAGIC);
+}
- dax_dev = container_of(kref, struct dax_dev, kref);
- dax_region_put(dax_dev->region);
- kfree(dax_dev);
+static struct file_system_type dax_type = {
+ .name = "dax",
+ .mount = dax_mount,
+ .kill_sb = kill_anon_super,
+};
+
+static int dax_test(struct inode *inode, void *data)
+{
+ return inode->i_cdev == data;
+}
+
+static int dax_set(struct inode *inode, void *data)
+{
+ inode->i_cdev = data;
+ return 0;
+}
+
+static struct inode *dax_inode_get(struct cdev *cdev, dev_t devt)
+{
+ struct inode *inode;
+
+ inode = iget5_locked(dax_superblock, hash_32(devt + DAXFS_MAGIC, 31),
+ dax_test, dax_set, cdev);
+
+ if (!inode)
+ return NULL;
+
+ if (inode->i_state & I_NEW) {
+ inode->i_mode = S_IFCHR;
+ inode->i_flags = S_DAX;
+ inode->i_rdev = devt;
+ mapping_set_gfp_mask(&inode->i_data, GFP_USER);
+ unlock_new_inode(inode);
+ }
+ return inode;
+}
+
+static void init_once(void *inode)
+{
+ inode_init_once(inode);
+}
+
+static int dax_inode_init(void)
+{
+ int rc;
+
+ dax_cache = kmem_cache_create("dax_cache", sizeof(struct inode), 0,
+ (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT),
+ init_once);
+ if (!dax_cache)
+ return -ENOMEM;
+
+ rc = register_filesystem(&dax_type);
+ if (rc)
+ goto err_register_fs;
+
+ dax_mnt = kern_mount(&dax_type);
+ if (IS_ERR(dax_mnt)) {
+ rc = PTR_ERR(dax_mnt);
+ goto err_mount;
+ }
+ dax_superblock = dax_mnt->mnt_sb;
+
+ return 0;
+
+ err_mount:
+ unregister_filesystem(&dax_type);
+ err_register_fs:
+ kmem_cache_destroy(dax_cache);
+
+ return rc;
}
-static void dax_dev_put(struct dax_dev *dax_dev)
+static void dax_inode_exit(void)
+{
+ kern_unmount(dax_mnt);
+ unregister_filesystem(&dax_type);
+ kmem_cache_destroy(dax_cache);
+}
+
+static void dax_region_free(struct kref *kref)
+{
+ struct dax_region *dax_region;
+
+ dax_region = container_of(kref, struct dax_region, kref);
+ kfree(dax_region);
+}
+
+void dax_region_put(struct dax_region *dax_region)
{
- kref_put(&dax_dev->kref, dax_dev_free);
+ kref_put(&dax_region->kref, dax_region_free);
}
+EXPORT_SYMBOL_GPL(dax_region_put);
struct dax_region *alloc_dax_region(struct device *parent, int region_id,
struct resource *res, unsigned int align, void *addr,
@@ -98,8 +206,11 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
{
struct dax_region *dax_region;
- dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
+ if (!IS_ALIGNED(res->start, align)
+ || !IS_ALIGNED(resource_size(res), align))
+ return NULL;
+ dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
if (!dax_region)
return NULL;
@@ -116,10 +227,15 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
}
EXPORT_SYMBOL_GPL(alloc_dax_region);
+static struct dax_dev *to_dax_dev(struct device *dev)
+{
+ return container_of(dev, struct dax_dev, dev);
+}
+
static ssize_t size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
- struct dax_dev *dax_dev = dev_get_drvdata(dev);
+ struct dax_dev *dax_dev = to_dax_dev(dev);
unsigned long long size = 0;
int i;
@@ -144,180 +260,11 @@ static const struct attribute_group *dax_attribute_groups[] = {
NULL,
};
-static void unregister_dax_dev(void *_dev)
-{
- struct device *dev = _dev;
- struct dax_dev *dax_dev = dev_get_drvdata(dev);
- struct dax_region *dax_region = dax_dev->region;
-
- dev_dbg(dev, "%s\n", __func__);
-
- /*
- * Note, rcu is not protecting the liveness of dax_dev, rcu is
- * ensuring that any fault handlers that might have seen
- * dax_dev->alive == true, have completed. Any fault handlers
- * that start after synchronize_rcu() has started will abort
- * upon seeing dax_dev->alive == false.
- */
- dax_dev->alive = false;
- synchronize_rcu();
-
- get_device(dev);
- device_unregister(dev);
- ida_simple_remove(&dax_region->ida, dax_dev->id);
- ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
- put_device(dev);
- dax_dev_put(dax_dev);
-}
-
-int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
- int count)
-{
- struct device *parent = dax_region->dev;
- struct dax_dev *dax_dev;
- struct device *dev;
- int rc, minor;
- dev_t dev_t;
-
- dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL);
- if (!dax_dev)
- return -ENOMEM;
- memcpy(dax_dev->res, res, sizeof(*res) * count);
- dax_dev->num_resources = count;
- kref_init(&dax_dev->kref);
- dax_dev->alive = true;
- dax_dev->region = dax_region;
- kref_get(&dax_region->kref);
-
- dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
- if (dax_dev->id < 0) {
- rc = dax_dev->id;
- goto err_id;
- }
-
- minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL);
- if (minor < 0) {
- rc = minor;
- goto err_minor;
- }
-
- dev_t = MKDEV(dax_major, minor);
- dev = device_create_with_groups(dax_class, parent, dev_t, dax_dev,
- dax_attribute_groups, "dax%d.%d", dax_region->id,
- dax_dev->id);
- if (IS_ERR(dev)) {
- rc = PTR_ERR(dev);
- goto err_create;
- }
- dax_dev->dev = dev;
-
- rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
- if (rc)
- return rc;
-
- return 0;
-
- err_create:
- ida_simple_remove(&dax_minor_ida, minor);
- err_minor:
- ida_simple_remove(&dax_region->ida, dax_dev->id);
- err_id:
- dax_dev_put(dax_dev);
-
- return rc;
-}
-EXPORT_SYMBOL_GPL(devm_create_dax_dev);
-
-/* return an unmapped area aligned to the dax region specified alignment */
-static unsigned long dax_dev_get_unmapped_area(struct file *filp,
- unsigned long addr, unsigned long len, unsigned long pgoff,
- unsigned long flags)
-{
- unsigned long off, off_end, off_align, len_align, addr_align, align;
- struct dax_dev *dax_dev = filp ? filp->private_data : NULL;
- struct dax_region *dax_region;
-
- if (!dax_dev || addr)
- goto out;
-
- dax_region = dax_dev->region;
- align = dax_region->align;
- off = pgoff << PAGE_SHIFT;
- off_end = off + len;
- off_align = round_up(off, align);
-
- if ((off_end <= off_align) || ((off_end - off_align) < align))
- goto out;
-
- len_align = len + align;
- if ((off + len_align) < off)
- goto out;
-
- addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
- pgoff, flags);
- if (!IS_ERR_VALUE(addr_align)) {
- addr_align += (off - addr_align) & (align - 1);
- return addr_align;
- }
- out:
- return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
-}
-
-static int __match_devt(struct device *dev, const void *data)
-{
- const dev_t *devt = data;
-
- return dev->devt == *devt;
-}
-
-static struct device *dax_dev_find(dev_t dev_t)
-{
- return class_find_device(dax_class, NULL, &dev_t, __match_devt);
-}
-
-static int dax_dev_open(struct inode *inode, struct file *filp)
-{
- struct dax_dev *dax_dev = NULL;
- struct device *dev;
-
- dev = dax_dev_find(inode->i_rdev);
- if (!dev)
- return -ENXIO;
-
- device_lock(dev);
- dax_dev = dev_get_drvdata(dev);
- if (dax_dev) {
- dev_dbg(dev, "%s\n", __func__);
- filp->private_data = dax_dev;
- kref_get(&dax_dev->kref);
- inode->i_flags = S_DAX;
- }
- device_unlock(dev);
-
- if (!dax_dev) {
- put_device(dev);
- return -ENXIO;
- }
- return 0;
-}
-
-static int dax_dev_release(struct inode *inode, struct file *filp)
-{
- struct dax_dev *dax_dev = filp->private_data;
- struct device *dev = dax_dev->dev;
-
- dev_dbg(dax_dev->dev, "%s\n", __func__);
- dax_dev_put(dax_dev);
- put_device(dev);
-
- return 0;
-}
-
static int check_vma(struct dax_dev *dax_dev, struct vm_area_struct *vma,
const char *func)
{
struct dax_region *dax_region = dax_dev->region;
- struct device *dev = dax_dev->dev;
+ struct device *dev = &dax_dev->dev;
unsigned long mask;
if (!dax_dev->alive)
@@ -382,7 +329,7 @@ static int __dax_dev_fault(struct dax_dev *dax_dev, struct vm_area_struct *vma,
struct vm_fault *vmf)
{
unsigned long vaddr = (unsigned long) vmf->virtual_address;
- struct device *dev = dax_dev->dev;
+ struct device *dev = &dax_dev->dev;
struct dax_region *dax_region;
int rc = VM_FAULT_SIGBUS;
phys_addr_t phys;
@@ -422,7 +369,7 @@ static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct file *filp = vma->vm_file;
struct dax_dev *dax_dev = filp->private_data;
- dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
+ dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
current->comm, (vmf->flags & FAULT_FLAG_WRITE)
? "write" : "read", vma->vm_start, vma->vm_end);
rcu_read_lock();
@@ -437,7 +384,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
unsigned int flags)
{
unsigned long pmd_addr = addr & PMD_MASK;
- struct device *dev = dax_dev->dev;
+ struct device *dev = &dax_dev->dev;
struct dax_region *dax_region;
phys_addr_t phys;
pgoff_t pgoff;
@@ -479,7 +426,7 @@ static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
struct file *filp = vma->vm_file;
struct dax_dev *dax_dev = filp->private_data;
- dev_dbg(dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
+ dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
current->comm, (flags & FAULT_FLAG_WRITE)
? "write" : "read", vma->vm_start, vma->vm_end);
@@ -490,81 +437,257 @@ static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
return rc;
}
-static void dax_dev_vm_open(struct vm_area_struct *vma)
-{
- struct file *filp = vma->vm_file;
- struct dax_dev *dax_dev = filp->private_data;
-
- dev_dbg(dax_dev->dev, "%s\n", __func__);
- kref_get(&dax_dev->kref);
-}
-
-static void dax_dev_vm_close(struct vm_area_struct *vma)
-{
- struct file *filp = vma->vm_file;
- struct dax_dev *dax_dev = filp->private_data;
-
- dev_dbg(dax_dev->dev, "%s\n", __func__);
- dax_dev_put(dax_dev);
-}
-
static const struct vm_operations_struct dax_dev_vm_ops = {
.fault = dax_dev_fault,
.pmd_fault = dax_dev_pmd_fault,
- .open = dax_dev_vm_open,
- .close = dax_dev_vm_close,
};
-static int dax_dev_mmap(struct file *filp, struct vm_area_struct *vma)
+static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
{
struct dax_dev *dax_dev = filp->private_data;
int rc;
- dev_dbg(dax_dev->dev, "%s\n", __func__);
+ dev_dbg(&dax_dev->dev, "%s\n", __func__);
rc = check_vma(dax_dev, vma, __func__);
if (rc)
return rc;
- kref_get(&dax_dev->kref);
vma->vm_ops = &dax_dev_vm_ops;
vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
return 0;
+}
+
+/* return an unmapped area aligned to the dax region specified alignment */
+static unsigned long dax_get_unmapped_area(struct file *filp,
+ unsigned long addr, unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ unsigned long off, off_end, off_align, len_align, addr_align, align;
+ struct dax_dev *dax_dev = filp ? filp->private_data : NULL;
+ struct dax_region *dax_region;
+
+ if (!dax_dev || addr)
+ goto out;
+
+ dax_region = dax_dev->region;
+ align = dax_region->align;
+ off = pgoff << PAGE_SHIFT;
+ off_end = off + len;
+ off_align = round_up(off, align);
+
+ if ((off_end <= off_align) || ((off_end - off_align) < align))
+ goto out;
+
+ len_align = len + align;
+ if ((off + len_align) < off)
+ goto out;
+ addr_align = current->mm->get_unmapped_area(filp, addr, len_align,
+ pgoff, flags);
+ if (!IS_ERR_VALUE(addr_align)) {
+ addr_align += (off - addr_align) & (align - 1);
+ return addr_align;
+ }
+ out:
+ return current->mm->get_unmapped_area(filp, addr, len, pgoff, flags);
+}
+
+static int dax_open(struct inode *inode, struct file *filp)
+{
+ struct dax_dev *dax_dev;
+
+ dax_dev = container_of(inode->i_cdev, struct dax_dev, cdev);
+ dev_dbg(&dax_dev->dev, "%s\n", __func__);
+ inode->i_mapping = dax_dev->inode->i_mapping;
+ inode->i_mapping->host = dax_dev->inode;
+ filp->f_mapping = inode->i_mapping;
+ filp->private_data = dax_dev;
+ inode->i_flags = S_DAX;
+
+ return 0;
+}
+
+static int dax_release(struct inode *inode, struct file *filp)
+{
+ struct dax_dev *dax_dev = filp->private_data;
+
+ dev_dbg(&dax_dev->dev, "%s\n", __func__);
+ return 0;
}
static const struct file_operations dax_fops = {
.llseek = noop_llseek,
.owner = THIS_MODULE,
- .open = dax_dev_open,
- .release = dax_dev_release,
- .get_unmapped_area = dax_dev_get_unmapped_area,
- .mmap = dax_dev_mmap,
+ .open = dax_open,
+ .release = dax_release,
+ .get_unmapped_area = dax_get_unmapped_area,
+ .mmap = dax_mmap,
};
+static void dax_dev_release(struct device *dev)
+{
+ struct dax_dev *dax_dev = to_dax_dev(dev);
+ struct dax_region *dax_region = dax_dev->region;
+
+ ida_simple_remove(&dax_region->ida, dax_dev->id);
+ ida_simple_remove(&dax_minor_ida, MINOR(dev->devt));
+ dax_region_put(dax_region);
+ iput(dax_dev->inode);
+ kfree(dax_dev);
+}
+
+static void unregister_dax_dev(void *dev)
+{
+ struct dax_dev *dax_dev = to_dax_dev(dev);
+ struct cdev *cdev = &dax_dev->cdev;
+
+ dev_dbg(dev, "%s\n", __func__);
+
+ /*
+ * Note, rcu is not protecting the liveness of dax_dev, rcu is
+ * ensuring that any fault handlers that might have seen
+ * dax_dev->alive == true, have completed. Any fault handlers
+ * that start after synchronize_rcu() has started will abort
+ * upon seeing dax_dev->alive == false.
+ */
+ dax_dev->alive = false;
+ synchronize_rcu();
+ unmap_mapping_range(dax_dev->inode->i_mapping, 0, 0, 1);
+ cdev_del(cdev);
+ device_unregister(dev);
+}
+
+struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
+ struct resource *res, int count)
+{
+ struct device *parent = dax_region->dev;
+ struct dax_dev *dax_dev;
+ int rc = 0, minor, i;
+ struct device *dev;
+ struct cdev *cdev;
+ dev_t dev_t;
+
+ dax_dev = kzalloc(sizeof(*dax_dev) + sizeof(*res) * count, GFP_KERNEL);
+ if (!dax_dev)
+ return ERR_PTR(-ENOMEM);
+
+ for (i = 0; i < count; i++) {
+ if (!IS_ALIGNED(res[i].start, dax_region->align)
+ || !IS_ALIGNED(resource_size(&res[i]),
+ dax_region->align)) {
+ rc = -EINVAL;
+ break;
+ }
+ dax_dev->res[i].start = res[i].start;
+ dax_dev->res[i].end = res[i].end;
+ }
+
+ if (i < count)
+ goto err_id;
+
+ dax_dev->id = ida_simple_get(&dax_region->ida, 0, 0, GFP_KERNEL);
+ if (dax_dev->id < 0) {
+ rc = dax_dev->id;
+ goto err_id;
+ }
+
+ minor = ida_simple_get(&dax_minor_ida, 0, 0, GFP_KERNEL);
+ if (minor < 0) {
+ rc = minor;
+ goto err_minor;
+ }
+
+ dev_t = MKDEV(MAJOR(dax_devt), minor);
+ dev = &dax_dev->dev;
+ dax_dev->inode = dax_inode_get(&dax_dev->cdev, dev_t);
+ if (!dax_dev->inode) {
+ rc = -ENOMEM;
+ goto err_inode;
+ }
+
+ /* device_initialize() so cdev can reference kobj parent */
+ device_initialize(dev);
+
+ cdev = &dax_dev->cdev;
+ cdev_init(cdev, &dax_fops);
+ cdev->owner = parent->driver->owner;
+ cdev->kobj.parent = &dev->kobj;
+ rc = cdev_add(&dax_dev->cdev, dev_t, 1);
+ if (rc)
+ goto err_cdev;
+
+ /* from here on we're committed to teardown via dax_dev_release() */
+ dax_dev->num_resources = count;
+ dax_dev->alive = true;
+ dax_dev->region = dax_region;
+ kref_get(&dax_region->kref);
+
+ dev->devt = dev_t;
+ dev->class = dax_class;
+ dev->parent = parent;
+ dev->groups = dax_attribute_groups;
+ dev->release = dax_dev_release;
+ dev_set_name(dev, "dax%d.%d", dax_region->id, dax_dev->id);
+ rc = device_add(dev);
+ if (rc) {
+ put_device(dev);
+ return ERR_PTR(rc);
+ }
+
+ rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_dev, dev);
+ if (rc)
+ return ERR_PTR(rc);
+
+ return dax_dev;
+
+ err_cdev:
+ iput(dax_dev->inode);
+ err_inode:
+ ida_simple_remove(&dax_minor_ida, minor);
+ err_minor:
+ ida_simple_remove(&dax_region->ida, dax_dev->id);
+ err_id:
+ kfree(dax_dev);
+
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(devm_create_dax_dev);
+
static int __init dax_init(void)
{
int rc;
- rc = register_chrdev(0, "dax", &dax_fops);
- if (rc < 0)
+ rc = dax_inode_init();
+ if (rc)
return rc;
- dax_major = rc;
+
+ nr_dax = max(nr_dax, 256);
+ rc = alloc_chrdev_region(&dax_devt, 0, nr_dax, "dax");
+ if (rc)
+ goto err_chrdev;
dax_class = class_create(THIS_MODULE, "dax");
if (IS_ERR(dax_class)) {
- unregister_chrdev(dax_major, "dax");
- return PTR_ERR(dax_class);
+ rc = PTR_ERR(dax_class);
+ goto err_class;
}
return 0;
+
+ err_class:
+ unregister_chrdev_region(dax_devt, nr_dax);
+ err_chrdev:
+ dax_inode_exit();
+ return rc;
}
static void __exit dax_exit(void)
{
class_destroy(dax_class);
- unregister_chrdev(dax_major, "dax");
+ unregister_chrdev_region(dax_devt, nr_dax);
ida_destroy(&dax_minor_ida);
+ dax_inode_exit();
}
MODULE_AUTHOR("Intel Corporation");
diff --git a/drivers/dax/dax.h b/drivers/dax/dax.h
index d8b8f1f25054..ddd829ab58c0 100644
--- a/drivers/dax/dax.h
+++ b/drivers/dax/dax.h
@@ -13,12 +13,13 @@
#ifndef __DAX_H__
#define __DAX_H__
struct device;
+struct dax_dev;
struct resource;
struct dax_region;
void dax_region_put(struct dax_region *dax_region);
struct dax_region *alloc_dax_region(struct device *parent,
int region_id, struct resource *res, unsigned int align,
void *addr, unsigned long flags);
-int devm_create_dax_dev(struct dax_region *dax_region, struct resource *res,
- int count);
+struct dax_dev *devm_create_dax_dev(struct dax_region *dax_region,
+ struct resource *res, int count);
#endif /* __DAX_H__ */
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 1f01e98c83c7..9630d8837ba9 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -24,7 +24,7 @@ struct dax_pmem {
struct completion cmp;
};
-struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
+static struct dax_pmem *to_dax_pmem(struct percpu_ref *ref)
{
return container_of(ref, struct dax_pmem, ref);
}
@@ -61,6 +61,7 @@ static int dax_pmem_probe(struct device *dev)
int rc;
void *addr;
struct resource res;
+ struct dax_dev *dax_dev;
struct nd_pfn_sb *pfn_sb;
struct dax_pmem *dax_pmem;
struct nd_region *nd_region;
@@ -126,12 +127,12 @@ static int dax_pmem_probe(struct device *dev)
return -ENOMEM;
/* TODO: support for subdividing a dax region... */
- rc = devm_create_dax_dev(dax_region, &res, 1);
+ dax_dev = devm_create_dax_dev(dax_region, &res, 1);
/* child dax_dev instances now own the lifetime of the dax_region */
dax_region_put(dax_region);
- return rc;
+ return PTR_ERR_OR_ZERO(dax_dev);
}
static struct nd_device_driver dax_pmem_driver = {
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 124c2432ac9c..8b2b740d6679 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -89,7 +89,7 @@ config NVDIMM_PFN
Select Y if unsure
config NVDIMM_DAX
- bool "NVDIMM DAX: Raw access to persistent memory"
+ tristate "NVDIMM DAX: Raw access to persistent memory"
default LIBNVDIMM
depends on NVDIMM_PFN
help
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 935866fe5ec2..a8b6949a8778 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -217,6 +217,8 @@ long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
return rc;
if (cmd_rc < 0)
return cmd_rc;
+
+ nvdimm_clear_from_poison_list(nvdimm_bus, phys, len);
return clear_err.cleared;
}
EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 4d7bbd2df5c0..7ceba08774b6 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -547,11 +547,12 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
}
EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
-static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
+static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length,
+ gfp_t flags)
{
struct nd_poison *pl;
- pl = kzalloc(sizeof(*pl), GFP_KERNEL);
+ pl = kzalloc(sizeof(*pl), flags);
if (!pl)
return -ENOMEM;
@@ -567,7 +568,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
struct nd_poison *pl;
if (list_empty(&nvdimm_bus->poison_list))
- return add_poison(nvdimm_bus, addr, length);
+ return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
/*
* There is a chance this is a duplicate, check for those first.
@@ -587,7 +588,7 @@ static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
* as any overlapping ranges will get resolved when the list is consumed
* and converted to badblocks
*/
- return add_poison(nvdimm_bus, addr, length);
+ return add_poison(nvdimm_bus, addr, length, GFP_KERNEL);
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
@@ -602,6 +603,70 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
}
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
+void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
+ phys_addr_t start, unsigned int len)
+{
+ struct list_head *poison_list = &nvdimm_bus->poison_list;
+ u64 clr_end = start + len - 1;
+ struct nd_poison *pl, *next;
+
+ nvdimm_bus_lock(&nvdimm_bus->dev);
+ WARN_ON_ONCE(list_empty(poison_list));
+
+ /*
+ * [start, clr_end] is the poison interval being cleared.
+ * [pl->start, pl_end] is the poison_list entry we're comparing
+ * the above interval against. The poison list entry may need
+ * to be modified (update either start or length), deleted, or
+ * split into two based on the overlap characteristics
+ */
+
+ list_for_each_entry_safe(pl, next, poison_list, list) {
+ u64 pl_end = pl->start + pl->length - 1;
+
+ /* Skip intervals with no intersection */
+ if (pl_end < start)
+ continue;
+ if (pl->start > clr_end)
+ continue;
+ /* Delete completely overlapped poison entries */
+ if ((pl->start >= start) && (pl_end <= clr_end)) {
+ list_del(&pl->list);
+ kfree(pl);
+ continue;
+ }
+ /* Adjust start point of partially cleared entries */
+ if ((start <= pl->start) && (clr_end > pl->start)) {
+ pl->length -= clr_end - pl->start + 1;
+ pl->start = clr_end + 1;
+ continue;
+ }
+ /* Adjust pl->length for partial clearing at the tail end */
+ if ((pl->start < start) && (pl_end <= clr_end)) {
+ /* pl->start remains the same */
+ pl->length = start - pl->start;
+ continue;
+ }
+ /*
+ * If clearing in the middle of an entry, we split it into
+ * two by modifying the current entry to represent one half of
+ * the split, and adding a new entry for the second half.
+ */
+ if ((pl->start < start) && (pl_end > clr_end)) {
+ u64 new_start = clr_end + 1;
+ u64 new_len = pl_end - new_start + 1;
+
+ /* Add new entry covering the right half */
+ add_poison(nvdimm_bus, new_start, new_len, GFP_NOIO);
+ /* Adjust this entry to cover the left half */
+ pl->length = start - pl->start;
+ continue;
+ }
+ }
+ nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
+EXPORT_SYMBOL_GPL(nvdimm_clear_from_poison_list);
+
#ifdef CONFIG_BLK_DEV_INTEGRITY
int nd_integrity_init(struct gendisk *disk, unsigned long meta_size)
{
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 71d12bb67339..619834e144d1 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -26,6 +26,14 @@ static int nvdimm_probe(struct device *dev)
struct nvdimm_drvdata *ndd;
int rc;
+ rc = nvdimm_check_config_data(dev);
+ if (rc) {
+ /* not required for non-aliased nvdimm, ex. NVDIMM-N */
+ if (rc == -ENOTTY)
+ rc = 0;
+ return rc;
+ }
+
ndd = kzalloc(sizeof(*ndd), GFP_KERNEL);
if (!ndd)
return -ENOMEM;
@@ -72,6 +80,9 @@ static int nvdimm_remove(struct device *dev)
{
struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
+ if (!ndd)
+ return 0;
+
nvdimm_bus_lock(dev);
dev_set_drvdata(dev, NULL);
nvdimm_bus_unlock(dev);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index d9bba5edd8dc..d614493ad5ac 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -28,28 +28,30 @@ static DEFINE_IDA(dimm_ida);
* Retrieve bus and dimm handle and return if this bus supports
* get_config_data commands
*/
-static int __validate_dimm(struct nvdimm_drvdata *ndd)
+int nvdimm_check_config_data(struct device *dev)
{
- struct nvdimm *nvdimm;
-
- if (!ndd)
- return -EINVAL;
-
- nvdimm = to_nvdimm(ndd->dev);
+ struct nvdimm *nvdimm = to_nvdimm(dev);
- if (!nvdimm->cmd_mask)
- return -ENXIO;
- if (!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask))
- return -ENXIO;
+ if (!nvdimm->cmd_mask ||
+ !test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) {
+ if (nvdimm->flags & NDD_ALIASING)
+ return -ENXIO;
+ else
+ return -ENOTTY;
+ }
return 0;
}
static int validate_dimm(struct nvdimm_drvdata *ndd)
{
- int rc = __validate_dimm(ndd);
+ int rc;
- if (rc && ndd)
+ if (!ndd)
+ return -EINVAL;
+
+ rc = nvdimm_check_config_data(ndd->dev);
+ if (rc)
dev_dbg(ndd->dev, "%pf: %s error: %d\n",
__builtin_return_address(0), __func__, rc);
return rc;
@@ -263,6 +265,12 @@ const char *nvdimm_name(struct nvdimm *nvdimm)
}
EXPORT_SYMBOL_GPL(nvdimm_name);
+struct kobject *nvdimm_kobj(struct nvdimm *nvdimm)
+{
+ return &nvdimm->dev.kobj;
+}
+EXPORT_SYMBOL_GPL(nvdimm_kobj);
+
unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm)
{
return nvdimm->cmd_mask;
@@ -378,40 +386,166 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
}
EXPORT_SYMBOL_GPL(nvdimm_create);
+int alias_dpa_busy(struct device *dev, void *data)
+{
+ resource_size_t map_end, blk_start, new, busy;
+ struct blk_alloc_info *info = data;
+ struct nd_mapping *nd_mapping;
+ struct nd_region *nd_region;
+ struct nvdimm_drvdata *ndd;
+ struct resource *res;
+ int i;
+
+ if (!is_nd_pmem(dev))
+ return 0;
+
+ nd_region = to_nd_region(dev);
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ nd_mapping = &nd_region->mapping[i];
+ if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
+ break;
+ }
+
+ if (i >= nd_region->ndr_mappings)
+ return 0;
+
+ ndd = to_ndd(nd_mapping);
+ map_end = nd_mapping->start + nd_mapping->size - 1;
+ blk_start = nd_mapping->start;
+
+ /*
+ * In the allocation case ->res is set to free space that we are
+ * looking to validate against PMEM aliasing collision rules
+ * (i.e. BLK is allocated after all aliased PMEM).
+ */
+ if (info->res) {
+ if (info->res->start >= nd_mapping->start
+ && info->res->start < map_end)
+ /* pass */;
+ else
+ return 0;
+ }
+
+ retry:
+ /*
+ * Find the free dpa from the end of the last pmem allocation to
+ * the end of the interleave-set mapping that is not already
+ * covered by a blk allocation.
+ */
+ busy = 0;
+ for_each_dpa_resource(ndd, res) {
+ if ((res->start >= blk_start && res->start < map_end)
+ || (res->end >= blk_start
+ && res->end <= map_end)) {
+ if (strncmp(res->name, "pmem", 4) == 0) {
+ new = max(blk_start, min(map_end + 1,
+ res->end + 1));
+ if (new != blk_start) {
+ blk_start = new;
+ goto retry;
+ }
+ } else
+ busy += min(map_end, res->end)
+ - max(nd_mapping->start, res->start) + 1;
+ } else if (nd_mapping->start > res->start
+ && map_end < res->end) {
+ /* total eclipse of the PMEM region mapping */
+ busy += nd_mapping->size;
+ break;
+ }
+ }
+
+ /* update the free space range with the probed blk_start */
+ if (info->res && blk_start > info->res->start) {
+ info->res->start = max(info->res->start, blk_start);
+ if (info->res->start > info->res->end)
+ info->res->end = info->res->start - 1;
+ return 1;
+ }
+
+ info->available -= blk_start - nd_mapping->start + busy;
+
+ return 0;
+}
+
+static int blk_dpa_busy(struct device *dev, void *data)
+{
+ struct blk_alloc_info *info = data;
+ struct nd_mapping *nd_mapping;
+ struct nd_region *nd_region;
+ resource_size_t map_end;
+ int i;
+
+ if (!is_nd_pmem(dev))
+ return 0;
+
+ nd_region = to_nd_region(dev);
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ nd_mapping = &nd_region->mapping[i];
+ if (nd_mapping->nvdimm == info->nd_mapping->nvdimm)
+ break;
+ }
+
+ if (i >= nd_region->ndr_mappings)
+ return 0;
+
+ map_end = nd_mapping->start + nd_mapping->size - 1;
+ if (info->res->start >= nd_mapping->start
+ && info->res->start < map_end) {
+ if (info->res->end <= map_end) {
+ info->busy = 0;
+ return 1;
+ } else {
+ info->busy -= info->res->end - map_end;
+ return 0;
+ }
+ } else if (info->res->end >= nd_mapping->start
+ && info->res->end <= map_end) {
+ info->busy -= nd_mapping->start - info->res->start;
+ return 0;
+ } else {
+ info->busy -= nd_mapping->size;
+ return 0;
+ }
+}
+
/**
* nd_blk_available_dpa - account the unused dpa of BLK region
* @nd_mapping: container of dpa-resource-root + labels
*
- * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges.
+ * Unlike PMEM, BLK namespaces can occupy discontiguous DPA ranges, but
+ * we arrange for them to never start at an lower dpa than the last
+ * PMEM allocation in an aliased region.
*/
-resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping)
+resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- resource_size_t map_end, busy = 0, available;
+ struct blk_alloc_info info = {
+ .nd_mapping = nd_mapping,
+ .available = nd_mapping->size,
+ .res = NULL,
+ };
struct resource *res;
if (!ndd)
return 0;
- map_end = nd_mapping->start + nd_mapping->size - 1;
- for_each_dpa_resource(ndd, res)
- if (res->start >= nd_mapping->start && res->start < map_end) {
- resource_size_t end = min(map_end, res->end);
+ device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
- busy += end - res->start + 1;
- } else if (res->end >= nd_mapping->start
- && res->end <= map_end) {
- busy += res->end - nd_mapping->start;
- } else if (nd_mapping->start > res->start
- && nd_mapping->start < res->end) {
- /* total eclipse of the BLK region mapping */
- busy += nd_mapping->size;
- }
+ /* now account for busy blk allocations in unaliased dpa */
+ for_each_dpa_resource(ndd, res) {
+ if (strncmp(res->name, "blk", 3) != 0)
+ continue;
- available = map_end - nd_mapping->start + 1;
- if (busy < available)
- return available - busy;
- return 0;
+ info.res = res;
+ info.busy = resource_size(res);
+ device_for_each_child(&nvdimm_bus->dev, &info, blk_dpa_busy);
+ info.available -= info.busy;
+ }
+
+ return info.available;
}
/**
@@ -443,21 +577,16 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
map_start = nd_mapping->start;
map_end = map_start + nd_mapping->size - 1;
blk_start = max(map_start, map_end + 1 - *overlap);
- for_each_dpa_resource(ndd, res)
+ for_each_dpa_resource(ndd, res) {
if (res->start >= map_start && res->start < map_end) {
if (strncmp(res->name, "blk", 3) == 0)
- blk_start = min(blk_start, res->start);
- else if (res->start != map_start) {
+ blk_start = min(blk_start,
+ max(map_start, res->start));
+ else if (res->end > map_end) {
reason = "misaligned to iset";
goto err;
- } else {
- if (busy) {
- reason = "duplicate overlapping PMEM reservations?";
- goto err;
- }
+ } else
busy += resource_size(res);
- continue;
- }
} else if (res->end >= map_start && res->end <= map_end) {
if (strncmp(res->name, "blk", 3) == 0) {
/*
@@ -466,15 +595,14 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
* be used for BLK.
*/
blk_start = map_start;
- } else {
- reason = "misaligned to iset";
- goto err;
- }
+ } else
+ busy += resource_size(res);
} else if (map_start > res->start && map_start < res->end) {
/* total eclipse of the mapping */
busy += nd_mapping->size;
blk_start = map_start;
}
+ }
*overlap = map_end + 1 - blk_start;
available = blk_start - map_start;
@@ -483,10 +611,6 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
return 0;
err:
- /*
- * Something is wrong, PMEM must align with the start of the
- * interleave set, and there can only be one allocation per set.
- */
nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
return 0;
}
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index 96526dcfdd37..fac7cabe8f56 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -494,11 +494,13 @@ static int __pmem_label_update(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
int pos)
{
- u64 cookie = nd_region_interleave_set_cookie(nd_region), rawsize;
+ u64 cookie = nd_region_interleave_set_cookie(nd_region);
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- struct nd_namespace_label *victim_label;
+ struct nd_label_ent *label_ent, *victim = NULL;
struct nd_namespace_label *nd_label;
struct nd_namespace_index *nsindex;
+ struct nd_label_id label_id;
+ struct resource *res;
unsigned long *free;
u32 nslot, slot;
size_t offset;
@@ -507,6 +509,16 @@ static int __pmem_label_update(struct nd_region *nd_region,
if (!preamble_next(ndd, &nsindex, &free, &nslot))
return -ENXIO;
+ nd_label_gen_id(&label_id, nspm->uuid, 0);
+ for_each_dpa_resource(ndd, res)
+ if (strcmp(res->name, label_id.id) == 0)
+ break;
+
+ if (!res) {
+ WARN_ON_ONCE(1);
+ return -ENXIO;
+ }
+
/* allocate and write the label to the staging (next) index */
slot = nd_label_alloc_slot(ndd);
if (slot == UINT_MAX)
@@ -522,11 +534,10 @@ static int __pmem_label_update(struct nd_region *nd_region,
nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
nd_label->position = __cpu_to_le16(pos);
nd_label->isetcookie = __cpu_to_le64(cookie);
- rawsize = div_u64(resource_size(&nspm->nsio.res),
- nd_region->ndr_mappings);
- nd_label->rawsize = __cpu_to_le64(rawsize);
- nd_label->dpa = __cpu_to_le64(nd_mapping->start);
+ nd_label->rawsize = __cpu_to_le64(resource_size(res));
+ nd_label->dpa = __cpu_to_le64(res->start);
nd_label->slot = __cpu_to_le32(slot);
+ nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__);
/* update label */
offset = nd_label_offset(ndd, nd_label);
@@ -536,38 +547,43 @@ static int __pmem_label_update(struct nd_region *nd_region,
return rc;
/* Garbage collect the previous label */
- victim_label = nd_mapping->labels[0];
- if (victim_label) {
- slot = to_slot(ndd, victim_label);
- nd_label_free_slot(ndd, slot);
+ mutex_lock(&nd_mapping->lock);
+ list_for_each_entry(label_ent, &nd_mapping->labels, list) {
+ if (!label_ent->label)
+ continue;
+ if (memcmp(nspm->uuid, label_ent->label->uuid,
+ NSLABEL_UUID_LEN) != 0)
+ continue;
+ victim = label_ent;
+ list_move_tail(&victim->list, &nd_mapping->labels);
+ break;
+ }
+ if (victim) {
dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+ slot = to_slot(ndd, victim->label);
+ nd_label_free_slot(ndd, slot);
+ victim->label = NULL;
}
/* update index */
rc = nd_label_write_index(ndd, ndd->ns_next,
nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
- if (rc < 0)
- return rc;
-
- nd_mapping->labels[0] = nd_label;
-
- return 0;
-}
-
-static void del_label(struct nd_mapping *nd_mapping, int l)
-{
- struct nd_namespace_label *next_label, *nd_label;
- struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- unsigned int slot;
- int j;
-
- nd_label = nd_mapping->labels[l];
- slot = to_slot(ndd, nd_label);
- dev_vdbg(ndd->dev, "%s: clear: %d\n", __func__, slot);
+ if (rc == 0) {
+ list_for_each_entry(label_ent, &nd_mapping->labels, list)
+ if (!label_ent->label) {
+ label_ent->label = nd_label;
+ nd_label = NULL;
+ break;
+ }
+ dev_WARN_ONCE(&nspm->nsio.common.dev, nd_label,
+ "failed to track label: %d\n",
+ to_slot(ndd, nd_label));
+ if (nd_label)
+ rc = -ENXIO;
+ }
+ mutex_unlock(&nd_mapping->lock);
- for (j = l; (next_label = nd_mapping->labels[j + 1]); j++)
- nd_mapping->labels[j] = next_label;
- nd_mapping->labels[j] = NULL;
+ return rc;
}
static bool is_old_resource(struct resource *res, struct resource **list, int n)
@@ -607,14 +623,16 @@ static int __blk_label_update(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, struct nd_namespace_blk *nsblk,
int num_labels)
{
- int i, l, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
+ int i, alloc, victims, nfree, old_num_resources, nlabel, rc = -ENXIO;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_label *nd_label;
+ struct nd_label_ent *label_ent, *e;
struct nd_namespace_index *nsindex;
unsigned long *free, *victim_map = NULL;
struct resource *res, **old_res_list;
struct nd_label_id label_id;
u8 uuid[NSLABEL_UUID_LEN];
+ LIST_HEAD(list);
u32 nslot, slot;
if (!preamble_next(ndd, &nsindex, &free, &nslot))
@@ -736,15 +754,22 @@ static int __blk_label_update(struct nd_region *nd_region,
* entries in nd_mapping->labels
*/
nlabel = 0;
- for_each_label(l, nd_label, nd_mapping->labels) {
+ mutex_lock(&nd_mapping->lock);
+ list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
+ nd_label = label_ent->label;
+ if (!nd_label)
+ continue;
nlabel++;
memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
if (memcmp(uuid, nsblk->uuid, NSLABEL_UUID_LEN) != 0)
continue;
nlabel--;
- del_label(nd_mapping, l);
- l--; /* retry with the new label at this index */
+ list_move(&label_ent->list, &list);
+ label_ent->label = NULL;
}
+ list_splice_tail_init(&list, &nd_mapping->labels);
+ mutex_unlock(&nd_mapping->lock);
+
if (nlabel + nsblk->num_resources > num_labels) {
/*
* Bug, we can't end up with more resources than
@@ -755,6 +780,15 @@ static int __blk_label_update(struct nd_region *nd_region,
goto out;
}
+ mutex_lock(&nd_mapping->lock);
+ label_ent = list_first_entry_or_null(&nd_mapping->labels,
+ typeof(*label_ent), list);
+ if (!label_ent) {
+ WARN_ON(1);
+ mutex_unlock(&nd_mapping->lock);
+ rc = -ENXIO;
+ goto out;
+ }
for_each_clear_bit_le(slot, free, nslot) {
nd_label = nd_label_base(ndd) + slot;
memcpy(uuid, nd_label->uuid, NSLABEL_UUID_LEN);
@@ -762,11 +796,19 @@ static int __blk_label_update(struct nd_region *nd_region,
continue;
res = to_resource(ndd, nd_label);
res->flags &= ~DPA_RESOURCE_ADJUSTED;
- dev_vdbg(&nsblk->common.dev, "assign label[%d] slot: %d\n",
- l, slot);
- nd_mapping->labels[l++] = nd_label;
+ dev_vdbg(&nsblk->common.dev, "assign label slot: %d\n", slot);
+ list_for_each_entry_from(label_ent, &nd_mapping->labels, list) {
+ if (label_ent->label)
+ continue;
+ label_ent->label = nd_label;
+ nd_label = NULL;
+ break;
+ }
+ if (nd_label)
+ dev_WARN(&nsblk->common.dev,
+ "failed to track label slot%d\n", slot);
}
- nd_mapping->labels[l] = NULL;
+ mutex_unlock(&nd_mapping->lock);
out:
kfree(old_res_list);
@@ -788,32 +830,28 @@ static int __blk_label_update(struct nd_region *nd_region,
static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
{
- int i, l, old_num_labels = 0;
+ int i, old_num_labels = 0;
+ struct nd_label_ent *label_ent;
struct nd_namespace_index *nsindex;
- struct nd_namespace_label *nd_label;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- size_t size = (num_labels + 1) * sizeof(struct nd_namespace_label *);
- for_each_label(l, nd_label, nd_mapping->labels)
+ mutex_lock(&nd_mapping->lock);
+ list_for_each_entry(label_ent, &nd_mapping->labels, list)
old_num_labels++;
+ mutex_unlock(&nd_mapping->lock);
/*
* We need to preserve all the old labels for the mapping so
* they can be garbage collected after writing the new labels.
*/
- if (num_labels > old_num_labels) {
- struct nd_namespace_label **labels;
-
- labels = krealloc(nd_mapping->labels, size, GFP_KERNEL);
- if (!labels)
+ for (i = old_num_labels; i < num_labels; i++) {
+ label_ent = kzalloc(sizeof(*label_ent), GFP_KERNEL);
+ if (!label_ent)
return -ENOMEM;
- nd_mapping->labels = labels;
+ mutex_lock(&nd_mapping->lock);
+ list_add_tail(&label_ent->list, &nd_mapping->labels);
+ mutex_unlock(&nd_mapping->lock);
}
- if (!nd_mapping->labels)
- return -ENOMEM;
-
- for (i = old_num_labels; i <= num_labels; i++)
- nd_mapping->labels[i] = NULL;
if (ndd->ns_current == -1 || ndd->ns_next == -1)
/* pass */;
@@ -837,42 +875,45 @@ static int init_labels(struct nd_mapping *nd_mapping, int num_labels)
static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
{
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
- struct nd_namespace_label *nd_label;
+ struct nd_label_ent *label_ent, *e;
struct nd_namespace_index *nsindex;
u8 label_uuid[NSLABEL_UUID_LEN];
- int l, num_freed = 0;
unsigned long *free;
+ LIST_HEAD(list);
u32 nslot, slot;
+ int active = 0;
if (!uuid)
return 0;
/* no index || no labels == nothing to delete */
- if (!preamble_next(ndd, &nsindex, &free, &nslot)
- || !nd_mapping->labels)
+ if (!preamble_next(ndd, &nsindex, &free, &nslot))
return 0;
- for_each_label(l, nd_label, nd_mapping->labels) {
+ mutex_lock(&nd_mapping->lock);
+ list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
+ struct nd_namespace_label *nd_label = label_ent->label;
+
+ if (!nd_label)
+ continue;
+ active++;
memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
if (memcmp(label_uuid, uuid, NSLABEL_UUID_LEN) != 0)
continue;
+ active--;
slot = to_slot(ndd, nd_label);
nd_label_free_slot(ndd, slot);
dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
- del_label(nd_mapping, l);
- num_freed++;
- l--; /* retry with new label at this index */
+ list_move_tail(&label_ent->list, &list);
+ label_ent->label = NULL;
}
+ list_splice_tail_init(&list, &nd_mapping->labels);
- if (num_freed > l) {
- /*
- * num_freed will only ever be > l when we delete the last
- * label
- */
- kfree(nd_mapping->labels);
- nd_mapping->labels = NULL;
- dev_dbg(ndd->dev, "%s: no more labels\n", __func__);
+ if (active == 0) {
+ nd_mapping_free_labels(nd_mapping);
+ dev_dbg(ndd->dev, "%s: no more active labels\n", __func__);
}
+ mutex_unlock(&nd_mapping->lock);
return nd_label_write_index(ndd, ndd->ns_next,
nd_inc_seq(__le32_to_cpu(nsindex->seq)), 0);
@@ -885,7 +926,9 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region,
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
- int rc;
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct resource *res;
+ int rc, count = 0;
if (size == 0) {
rc = del_labels(nd_mapping, nspm->uuid);
@@ -894,7 +937,12 @@ int nd_pmem_namespace_label_update(struct nd_region *nd_region,
continue;
}
- rc = init_labels(nd_mapping, 1);
+ for_each_dpa_resource(ndd, res)
+ if (strncmp(res->name, "pmem", 3) == 0)
+ count++;
+ WARN_ON_ONCE(!count);
+
+ rc = init_labels(nd_mapping, count);
if (rc < 0)
return rc;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index c5e3196c45b0..3509cff68ef9 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -12,8 +12,10 @@
*/
#include <linux/module.h>
#include <linux/device.h>
+#include <linux/sort.h>
#include <linux/slab.h>
#include <linux/pmem.h>
+#include <linux/list.h>
#include <linux/nd.h>
#include "nd-core.h"
#include "nd.h"
@@ -28,7 +30,10 @@ static void namespace_io_release(struct device *dev)
static void namespace_pmem_release(struct device *dev)
{
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+ struct nd_region *nd_region = to_nd_region(dev->parent);
+ if (nspm->id >= 0)
+ ida_simple_remove(&nd_region->ns_ida, nspm->id);
kfree(nspm->alt_name);
kfree(nspm->uuid);
kfree(nspm);
@@ -62,17 +67,17 @@ static struct device_type namespace_blk_device_type = {
.release = namespace_blk_release,
};
-static bool is_namespace_pmem(struct device *dev)
+static bool is_namespace_pmem(const struct device *dev)
{
return dev ? dev->type == &namespace_pmem_device_type : false;
}
-static bool is_namespace_blk(struct device *dev)
+static bool is_namespace_blk(const struct device *dev)
{
return dev ? dev->type == &namespace_blk_device_type : false;
}
-static bool is_namespace_io(struct device *dev)
+static bool is_namespace_io(const struct device *dev)
{
return dev ? dev->type == &namespace_io_device_type : false;
}
@@ -168,7 +173,21 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
suffix = "s";
if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) {
- sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : "");
+ int nsidx = 0;
+
+ if (is_namespace_pmem(&ndns->dev)) {
+ struct nd_namespace_pmem *nspm;
+
+ nspm = to_nd_namespace_pmem(&ndns->dev);
+ nsidx = nspm->id;
+ }
+
+ if (nsidx)
+ sprintf(name, "pmem%d.%d%s", nd_region->id, nsidx,
+ suffix ? suffix : "");
+ else
+ sprintf(name, "pmem%d%s", nd_region->id,
+ suffix ? suffix : "");
} else if (is_namespace_blk(&ndns->dev)) {
struct nd_namespace_blk *nsblk;
@@ -294,7 +313,7 @@ static bool __nd_namespace_blk_validate(struct nd_namespace_blk *nsblk)
if (strcmp(res->name, label_id.id) != 0)
continue;
/*
- * Resources with unacknoweldged adjustments indicate a
+ * Resources with unacknowledged adjustments indicate a
* failure to update labels
*/
if (res->flags & DPA_RESOURCE_ADJUSTED)
@@ -510,19 +529,68 @@ static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
return rc ? n : 0;
}
-static bool space_valid(bool is_pmem, bool is_reserve,
- struct nd_label_id *label_id, struct resource *res)
+
+/**
+ * space_valid() - validate free dpa space against constraints
+ * @nd_region: hosting region of the free space
+ * @ndd: dimm device data for debug
+ * @label_id: namespace id to allocate space
+ * @prev: potential allocation that precedes free space
+ * @next: allocation that follows the given free space range
+ * @exist: first allocation with same id in the mapping
+ * @n: range that must satisfied for pmem allocations
+ * @valid: free space range to validate
+ *
+ * BLK-space is valid as long as it does not precede a PMEM
+ * allocation in a given region. PMEM-space must be contiguous
+ * and adjacent to an existing existing allocation (if one
+ * exists). If reserving PMEM any space is valid.
+ */
+static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
+ struct nd_label_id *label_id, struct resource *prev,
+ struct resource *next, struct resource *exist,
+ resource_size_t n, struct resource *valid)
{
- /*
- * For BLK-space any space is valid, for PMEM-space, it must be
- * contiguous with an existing allocation unless we are
- * reserving pmem.
- */
- if (is_reserve || !is_pmem)
- return true;
- if (!res || strcmp(res->name, label_id->id) == 0)
- return true;
- return false;
+ bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
+ bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+
+ if (valid->start >= valid->end)
+ goto invalid;
+
+ if (is_reserve)
+ return;
+
+ if (!is_pmem) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct nvdimm_bus *nvdimm_bus;
+ struct blk_alloc_info info = {
+ .nd_mapping = nd_mapping,
+ .available = nd_mapping->size,
+ .res = valid,
+ };
+
+ WARN_ON(!is_nd_blk(&nd_region->dev));
+ nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+ device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
+ return;
+ }
+
+ /* allocation needs to be contiguous, so this is all or nothing */
+ if (resource_size(valid) < n)
+ goto invalid;
+
+ /* we've got all the space we need and no existing allocation */
+ if (!exist)
+ return;
+
+ /* allocation needs to be contiguous with the existing namespace */
+ if (valid->start == exist->end + 1
+ || valid->end == exist->start - 1)
+ return;
+
+ invalid:
+ /* truncate @valid size to 0 */
+ valid->end = valid->start - 1;
}
enum alloc_loc {
@@ -534,18 +602,24 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
resource_size_t n)
{
resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
- bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct resource *res, *exist = NULL, valid;
const resource_size_t to_allocate = n;
- struct resource *res;
int first;
+ for_each_dpa_resource(ndd, res)
+ if (strcmp(label_id->id, res->name) == 0)
+ exist = res;
+
+ valid.start = nd_mapping->start;
+ valid.end = mapping_end;
+ valid.name = "free space";
retry:
first = 0;
for_each_dpa_resource(ndd, res) {
- resource_size_t allocate, available = 0, free_start, free_end;
struct resource *next = res->sibling, *new_res = NULL;
+ resource_size_t allocate, available = 0;
enum alloc_loc loc = ALLOC_ERR;
const char *action;
int rc = 0;
@@ -558,32 +632,35 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
/* space at the beginning of the mapping */
if (!first++ && res->start > nd_mapping->start) {
- free_start = nd_mapping->start;
- available = res->start - free_start;
- if (space_valid(is_pmem, is_reserve, label_id, NULL))
+ valid.start = nd_mapping->start;
+ valid.end = res->start - 1;
+ space_valid(nd_region, ndd, label_id, NULL, next, exist,
+ to_allocate, &valid);
+ available = resource_size(&valid);
+ if (available)
loc = ALLOC_BEFORE;
}
/* space between allocations */
if (!loc && next) {
- free_start = res->start + resource_size(res);
- free_end = min(mapping_end, next->start - 1);
- if (space_valid(is_pmem, is_reserve, label_id, res)
- && free_start < free_end) {
- available = free_end + 1 - free_start;
+ valid.start = res->start + resource_size(res);
+ valid.end = min(mapping_end, next->start - 1);
+ space_valid(nd_region, ndd, label_id, res, next, exist,
+ to_allocate, &valid);
+ available = resource_size(&valid);
+ if (available)
loc = ALLOC_MID;
- }
}
/* space at the end of the mapping */
if (!loc && !next) {
- free_start = res->start + resource_size(res);
- free_end = mapping_end;
- if (space_valid(is_pmem, is_reserve, label_id, res)
- && free_start < free_end) {
- available = free_end + 1 - free_start;
+ valid.start = res->start + resource_size(res);
+ valid.end = mapping_end;
+ space_valid(nd_region, ndd, label_id, res, next, exist,
+ to_allocate, &valid);
+ available = resource_size(&valid);
+ if (available)
loc = ALLOC_AFTER;
- }
}
if (!loc || !available)
@@ -593,8 +670,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
case ALLOC_BEFORE:
if (strcmp(res->name, label_id->id) == 0) {
/* adjust current resource up */
- if (is_pmem && !is_reserve)
- return n;
rc = adjust_resource(res, res->start - allocate,
resource_size(res) + allocate);
action = "cur grow up";
@@ -604,8 +679,6 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
case ALLOC_MID:
if (strcmp(next->name, label_id->id) == 0) {
/* adjust next resource up */
- if (is_pmem && !is_reserve)
- return n;
rc = adjust_resource(next, next->start
- allocate, resource_size(next)
+ allocate);
@@ -629,12 +702,10 @@ static resource_size_t scan_allocate(struct nd_region *nd_region,
if (strcmp(action, "allocate") == 0) {
/* BLK allocate bottom up */
if (!is_pmem)
- free_start += available - allocate;
- else if (!is_reserve && free_start != nd_mapping->start)
- return n;
+ valid.start += available - allocate;
new_res = nvdimm_allocate_dpa(ndd, label_id,
- free_start, allocate);
+ valid.start, allocate);
if (!new_res)
rc = -EBUSY;
} else if (strcmp(action, "grow down") == 0) {
@@ -832,13 +903,45 @@ static int grow_dpa_allocation(struct nd_region *nd_region,
return 0;
}
-static void nd_namespace_pmem_set_size(struct nd_region *nd_region,
+static void nd_namespace_pmem_set_resource(struct nd_region *nd_region,
struct nd_namespace_pmem *nspm, resource_size_t size)
{
struct resource *res = &nspm->nsio.res;
+ resource_size_t offset = 0;
- res->start = nd_region->ndr_start;
- res->end = nd_region->ndr_start + size - 1;
+ if (size && !nspm->uuid) {
+ WARN_ON_ONCE(1);
+ size = 0;
+ }
+
+ if (size && nspm->uuid) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct nd_label_id label_id;
+ struct resource *res;
+
+ if (!ndd) {
+ size = 0;
+ goto out;
+ }
+
+ nd_label_gen_id(&label_id, nspm->uuid, 0);
+
+ /* calculate a spa offset from the dpa allocation offset */
+ for_each_dpa_resource(ndd, res)
+ if (strcmp(res->name, label_id.id) == 0) {
+ offset = (res->start - nd_mapping->start)
+ * nd_region->ndr_mappings;
+ goto out;
+ }
+
+ WARN_ON_ONCE(1);
+ size = 0;
+ }
+
+ out:
+ res->start = nd_region->ndr_start + offset;
+ res->end = res->start + size - 1;
}
static bool uuid_not_set(const u8 *uuid, struct device *dev, const char *where)
@@ -929,7 +1032,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
- nd_namespace_pmem_set_size(nd_region, nspm,
+ nd_namespace_pmem_set_resource(nd_region, nspm,
val * nd_region->ndr_mappings);
} else if (is_namespace_blk(dev)) {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
@@ -1031,22 +1134,27 @@ static ssize_t size_show(struct device *dev,
}
static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);
-static ssize_t uuid_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static u8 *namespace_to_uuid(struct device *dev)
{
- u8 *uuid;
-
if (is_namespace_pmem(dev)) {
struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
- uuid = nspm->uuid;
+ return nspm->uuid;
} else if (is_namespace_blk(dev)) {
struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev);
- uuid = nsblk->uuid;
+ return nsblk->uuid;
} else
- return -ENXIO;
+ return ERR_PTR(-ENXIO);
+}
+static ssize_t uuid_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ u8 *uuid = namespace_to_uuid(dev);
+
+ if (IS_ERR(uuid))
+ return PTR_ERR(uuid);
if (uuid)
return sprintf(buf, "%pUb\n", uuid);
return sprintf(buf, "\n");
@@ -1089,7 +1197,7 @@ static int namespace_update_uuid(struct nd_region *nd_region,
*
* FIXME: can we delete uuid with zero dpa allocated?
*/
- if (nd_mapping->labels)
+ if (list_empty(&nd_mapping->labels))
return -EBUSY;
}
@@ -1491,14 +1599,19 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
- struct nd_namespace_label *nd_label;
+ struct nd_label_ent *label_ent;
bool found_uuid = false;
- int l;
- for_each_label(l, nd_label, nd_mapping->labels) {
- u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
- u16 position = __le16_to_cpu(nd_label->position);
- u16 nlabel = __le16_to_cpu(nd_label->nlabel);
+ list_for_each_entry(label_ent, &nd_mapping->labels, list) {
+ struct nd_namespace_label *nd_label = label_ent->label;
+ u16 position, nlabel;
+ u64 isetcookie;
+
+ if (!nd_label)
+ continue;
+ isetcookie = __le64_to_cpu(nd_label->isetcookie);
+ position = __le16_to_cpu(nd_label->position);
+ nlabel = __le16_to_cpu(nd_label->nlabel);
if (isetcookie != cookie)
continue;
@@ -1528,7 +1641,6 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
{
- struct nd_namespace_label *select = NULL;
int i;
if (!pmem_id)
@@ -1536,90 +1648,106 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
for (i = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
- struct nd_namespace_label *nd_label;
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ struct nd_namespace_label *nd_label = NULL;
u64 hw_start, hw_end, pmem_start, pmem_end;
- int l;
+ struct nd_label_ent *label_ent;
- for_each_label(l, nd_label, nd_mapping->labels)
+ WARN_ON(!mutex_is_locked(&nd_mapping->lock));
+ list_for_each_entry(label_ent, &nd_mapping->labels, list) {
+ nd_label = label_ent->label;
+ if (!nd_label)
+ continue;
if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0)
break;
+ nd_label = NULL;
+ }
if (!nd_label) {
WARN_ON(1);
return -EINVAL;
}
- select = nd_label;
/*
* Check that this label is compliant with the dpa
* range published in NFIT
*/
hw_start = nd_mapping->start;
hw_end = hw_start + nd_mapping->size;
- pmem_start = __le64_to_cpu(select->dpa);
- pmem_end = pmem_start + __le64_to_cpu(select->rawsize);
- if (pmem_start == hw_start && pmem_end <= hw_end)
+ pmem_start = __le64_to_cpu(nd_label->dpa);
+ pmem_end = pmem_start + __le64_to_cpu(nd_label->rawsize);
+ if (pmem_start >= hw_start && pmem_start < hw_end
+ && pmem_end <= hw_end && pmem_end > hw_start)
/* pass */;
- else
+ else {
+ dev_dbg(&nd_region->dev, "%s invalid label for %pUb\n",
+ dev_name(ndd->dev), nd_label->uuid);
return -EINVAL;
+ }
- nd_mapping->labels[0] = select;
- nd_mapping->labels[1] = NULL;
+ /* move recently validated label to the front of the list */
+ list_move(&label_ent->list, &nd_mapping->labels);
}
return 0;
}
/**
- * find_pmem_label_set - validate interleave set labelling, retrieve label0
+ * create_namespace_pmem - validate interleave set labelling, retrieve label0
* @nd_region: region with mappings to validate
+ * @nspm: target namespace to create
+ * @nd_label: target pmem namespace label to evaluate
*/
-static int find_pmem_label_set(struct nd_region *nd_region,
- struct nd_namespace_pmem *nspm)
+struct device *create_namespace_pmem(struct nd_region *nd_region,
+ struct nd_namespace_label *nd_label)
{
u64 cookie = nd_region_interleave_set_cookie(nd_region);
- struct nd_namespace_label *nd_label;
- u8 select_id[NSLABEL_UUID_LEN];
+ struct nd_label_ent *label_ent;
+ struct nd_namespace_pmem *nspm;
+ struct nd_mapping *nd_mapping;
resource_size_t size = 0;
- u8 *pmem_id = NULL;
- int rc = -ENODEV, l;
+ struct resource *res;
+ struct device *dev;
+ int rc = 0;
u16 i;
- if (cookie == 0)
- return -ENXIO;
+ if (cookie == 0) {
+ dev_dbg(&nd_region->dev, "invalid interleave-set-cookie\n");
+ return ERR_PTR(-ENXIO);
+ }
- /*
- * Find a complete set of labels by uuid. By definition we can start
- * with any mapping as the reference label
- */
- for_each_label(l, nd_label, nd_region->mapping[0].labels) {
- u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
+ if (__le64_to_cpu(nd_label->isetcookie) != cookie) {
+ dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
+ nd_label->uuid);
+ return ERR_PTR(-EAGAIN);
+ }
- if (isetcookie != cookie)
- continue;
+ nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+ if (!nspm)
+ return ERR_PTR(-ENOMEM);
- for (i = 0; nd_region->ndr_mappings; i++)
- if (!has_uuid_at_pos(nd_region, nd_label->uuid,
- cookie, i))
- break;
- if (i < nd_region->ndr_mappings) {
- /*
- * Give up if we don't find an instance of a
- * uuid at each position (from 0 to
- * nd_region->ndr_mappings - 1), or if we find a
- * dimm with two instances of the same uuid.
- */
- rc = -EINVAL;
- goto err;
- } else if (pmem_id) {
- /*
- * If there is more than one valid uuid set, we
- * need userspace to clean this up.
- */
- rc = -EBUSY;
- goto err;
- }
- memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN);
- pmem_id = select_id;
+ nspm->id = -1;
+ dev = &nspm->nsio.common.dev;
+ dev->type = &namespace_pmem_device_type;
+ dev->parent = &nd_region->dev;
+ res = &nspm->nsio.res;
+ res->name = dev_name(&nd_region->dev);
+ res->flags = IORESOURCE_MEM;
+
+ for (i = 0; i < nd_region->ndr_mappings; i++)
+ if (!has_uuid_at_pos(nd_region, nd_label->uuid, cookie, i))
+ break;
+ if (i < nd_region->ndr_mappings) {
+ struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
+
+ /*
+ * Give up if we don't find an instance of a uuid at each
+ * position (from 0 to nd_region->ndr_mappings - 1), or if we
+ * find a dimm with two instances of the same uuid.
+ */
+ dev_err(&nd_region->dev, "%s missing label for %pUb\n",
+ dev_name(ndd->dev), nd_label->uuid);
+ rc = -EINVAL;
+ goto err;
}
/*
@@ -1630,14 +1758,23 @@ static int find_pmem_label_set(struct nd_region *nd_region,
* the dimm being enabled (i.e. nd_label_reserve_dpa()
* succeeded).
*/
- rc = select_pmem_id(nd_region, pmem_id);
+ rc = select_pmem_id(nd_region, nd_label->uuid);
if (rc)
goto err;
/* Calculate total size and populate namespace properties from label0 */
for (i = 0; i < nd_region->ndr_mappings; i++) {
- struct nd_mapping *nd_mapping = &nd_region->mapping[i];
- struct nd_namespace_label *label0 = nd_mapping->labels[0];
+ struct nd_namespace_label *label0;
+
+ nd_mapping = &nd_region->mapping[i];
+ label_ent = list_first_entry_or_null(&nd_mapping->labels,
+ typeof(*label_ent), list);
+ label0 = label_ent ? label_ent->label : 0;
+
+ if (!label0) {
+ WARN_ON(1);
+ continue;
+ }
size += __le64_to_cpu(label0->rawsize);
if (__le16_to_cpu(label0->position) != 0)
@@ -1654,10 +1791,11 @@ static int find_pmem_label_set(struct nd_region *nd_region,
goto err;
}
- nd_namespace_pmem_set_size(nd_region, nspm, size);
+ nd_namespace_pmem_set_resource(nd_region, nspm, size);
- return 0;
+ return dev;
err:
+ namespace_pmem_release(dev);
switch (rc) {
case -EINVAL:
dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
@@ -1670,55 +1808,7 @@ static int find_pmem_label_set(struct nd_region *nd_region,
__func__, rc);
break;
}
- return rc;
-}
-
-static struct device **create_namespace_pmem(struct nd_region *nd_region)
-{
- struct nd_namespace_pmem *nspm;
- struct device *dev, **devs;
- struct resource *res;
- int rc;
-
- nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
- if (!nspm)
- return NULL;
-
- dev = &nspm->nsio.common.dev;
- dev->type = &namespace_pmem_device_type;
- dev->parent = &nd_region->dev;
- res = &nspm->nsio.res;
- res->name = dev_name(&nd_region->dev);
- res->flags = IORESOURCE_MEM;
- rc = find_pmem_label_set(nd_region, nspm);
- if (rc == -ENODEV) {
- int i;
-
- /* Pass, try to permit namespace creation... */
- for (i = 0; i < nd_region->ndr_mappings; i++) {
- struct nd_mapping *nd_mapping = &nd_region->mapping[i];
-
- kfree(nd_mapping->labels);
- nd_mapping->labels = NULL;
- }
-
- /* Publish a zero-sized namespace for userspace to configure. */
- nd_namespace_pmem_set_size(nd_region, nspm, 0);
-
- rc = 0;
- } else if (rc)
- goto err;
-
- devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
- if (!devs)
- goto err;
-
- devs[0] = dev;
- return devs;
-
- err:
- namespace_pmem_release(&nspm->nsio.common.dev);
- return NULL;
+ return ERR_PTR(rc);
}
struct resource *nsblk_add_resource(struct nd_region *nd_region,
@@ -1770,16 +1860,58 @@ static struct device *nd_namespace_blk_create(struct nd_region *nd_region)
return &nsblk->common.dev;
}
-void nd_region_create_blk_seed(struct nd_region *nd_region)
+static struct device *nd_namespace_pmem_create(struct nd_region *nd_region)
+{
+ struct nd_namespace_pmem *nspm;
+ struct resource *res;
+ struct device *dev;
+
+ if (!is_nd_pmem(&nd_region->dev))
+ return NULL;
+
+ nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+ if (!nspm)
+ return NULL;
+
+ dev = &nspm->nsio.common.dev;
+ dev->type = &namespace_pmem_device_type;
+ dev->parent = &nd_region->dev;
+ res = &nspm->nsio.res;
+ res->name = dev_name(&nd_region->dev);
+ res->flags = IORESOURCE_MEM;
+
+ nspm->id = ida_simple_get(&nd_region->ns_ida, 0, 0, GFP_KERNEL);
+ if (nspm->id < 0) {
+ kfree(nspm);
+ return NULL;
+ }
+ dev_set_name(dev, "namespace%d.%d", nd_region->id, nspm->id);
+ dev->parent = &nd_region->dev;
+ dev->groups = nd_namespace_attribute_groups;
+ nd_namespace_pmem_set_resource(nd_region, nspm, 0);
+
+ return dev;
+}
+
+void nd_region_create_ns_seed(struct nd_region *nd_region)
{
WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
- nd_region->ns_seed = nd_namespace_blk_create(nd_region);
+
+ if (nd_region_to_nstype(nd_region) == ND_DEVICE_NAMESPACE_IO)
+ return;
+
+ if (is_nd_blk(&nd_region->dev))
+ nd_region->ns_seed = nd_namespace_blk_create(nd_region);
+ else
+ nd_region->ns_seed = nd_namespace_pmem_create(nd_region);
+
/*
* Seed creation failures are not fatal, provisioning is simply
* disabled until memory becomes available
*/
if (!nd_region->ns_seed)
- dev_err(&nd_region->dev, "failed to create blk namespace\n");
+ dev_err(&nd_region->dev, "failed to create %s namespace\n",
+ is_nd_blk(&nd_region->dev) ? "blk" : "pmem");
else
nd_device_register(nd_region->ns_seed);
}
@@ -1820,43 +1952,137 @@ void nd_region_create_btt_seed(struct nd_region *nd_region)
dev_err(&nd_region->dev, "failed to create btt namespace\n");
}
-static struct device **create_namespace_blk(struct nd_region *nd_region)
+static int add_namespace_resource(struct nd_region *nd_region,
+ struct nd_namespace_label *nd_label, struct device **devs,
+ int count)
{
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
- struct nd_namespace_label *nd_label;
- struct device *dev, **devs = NULL;
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+ int i;
+
+ for (i = 0; i < count; i++) {
+ u8 *uuid = namespace_to_uuid(devs[i]);
+ struct resource *res;
+
+ if (IS_ERR_OR_NULL(uuid)) {
+ WARN_ON(1);
+ continue;
+ }
+
+ if (memcmp(uuid, nd_label->uuid, NSLABEL_UUID_LEN) != 0)
+ continue;
+ if (is_namespace_blk(devs[i])) {
+ res = nsblk_add_resource(nd_region, ndd,
+ to_nd_namespace_blk(devs[i]),
+ __le64_to_cpu(nd_label->dpa));
+ if (!res)
+ return -ENXIO;
+ nd_dbg_dpa(nd_region, ndd, res, "%d assign\n", count);
+ } else {
+ dev_err(&nd_region->dev,
+ "error: conflicting extents for uuid: %pUb\n",
+ nd_label->uuid);
+ return -ENXIO;
+ }
+ break;
+ }
+
+ return i;
+}
+
+struct device *create_namespace_blk(struct nd_region *nd_region,
+ struct nd_namespace_label *nd_label, int count)
+{
+
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nd_namespace_blk *nsblk;
- struct nvdimm_drvdata *ndd;
- int i, l, count = 0;
+ char *name[NSLABEL_NAME_LEN];
+ struct device *dev = NULL;
struct resource *res;
- if (nd_region->ndr_mappings == 0)
- return NULL;
+ nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+ if (!nsblk)
+ return ERR_PTR(-ENOMEM);
+ dev = &nsblk->common.dev;
+ dev->type = &namespace_blk_device_type;
+ dev->parent = &nd_region->dev;
+ nsblk->id = -1;
+ nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
+ nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
+ GFP_KERNEL);
+ if (!nsblk->uuid)
+ goto blk_err;
+ memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
+ if (name[0])
+ nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
+ GFP_KERNEL);
+ res = nsblk_add_resource(nd_region, ndd, nsblk,
+ __le64_to_cpu(nd_label->dpa));
+ if (!res)
+ goto blk_err;
+ nd_dbg_dpa(nd_region, ndd, res, "%d: assign\n", count);
+ return dev;
+ blk_err:
+ namespace_blk_release(dev);
+ return ERR_PTR(-ENXIO);
+}
+
+static int cmp_dpa(const void *a, const void *b)
+{
+ const struct device *dev_a = *(const struct device **) a;
+ const struct device *dev_b = *(const struct device **) b;
+ struct nd_namespace_blk *nsblk_a, *nsblk_b;
+ struct nd_namespace_pmem *nspm_a, *nspm_b;
+
+ if (is_namespace_io(dev_a))
+ return 0;
+
+ if (is_namespace_blk(dev_a)) {
+ nsblk_a = to_nd_namespace_blk(dev_a);
+ nsblk_b = to_nd_namespace_blk(dev_b);
+
+ return memcmp(&nsblk_a->res[0]->start, &nsblk_b->res[0]->start,
+ sizeof(resource_size_t));
+ }
+
+ nspm_a = to_nd_namespace_pmem(dev_a);
+ nspm_b = to_nd_namespace_pmem(dev_b);
+
+ return memcmp(&nspm_a->nsio.res.start, &nspm_b->nsio.res.start,
+ sizeof(resource_size_t));
+}
- ndd = to_ndd(nd_mapping);
- for_each_label(l, nd_label, nd_mapping->labels) {
- u32 flags = __le32_to_cpu(nd_label->flags);
- char *name[NSLABEL_NAME_LEN];
+static struct device **scan_labels(struct nd_region *nd_region)
+{
+ int i, count = 0;
+ struct device *dev, **devs = NULL;
+ struct nd_label_ent *label_ent, *e;
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ resource_size_t map_end = nd_mapping->start + nd_mapping->size - 1;
+
+ /* "safe" because create_namespace_pmem() might list_move() label_ent */
+ list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
+ struct nd_namespace_label *nd_label = label_ent->label;
struct device **__devs;
+ u32 flags;
- if (flags & NSLABEL_FLAG_LOCAL)
- /* pass */;
+ if (!nd_label)
+ continue;
+ flags = __le32_to_cpu(nd_label->flags);
+ if (is_nd_blk(&nd_region->dev)
+ == !!(flags & NSLABEL_FLAG_LOCAL))
+ /* pass, region matches label type */;
else
continue;
- for (i = 0; i < count; i++) {
- nsblk = to_nd_namespace_blk(devs[i]);
- if (memcmp(nsblk->uuid, nd_label->uuid,
- NSLABEL_UUID_LEN) == 0) {
- res = nsblk_add_resource(nd_region, ndd, nsblk,
- __le64_to_cpu(nd_label->dpa));
- if (!res)
- goto err;
- nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
- dev_name(&nsblk->common.dev));
- break;
- }
- }
+ /* skip labels that describe extents outside of the region */
+ if (nd_label->dpa < nd_mapping->start || nd_label->dpa > map_end)
+ continue;
+
+ i = add_namespace_resource(nd_region, nd_label, devs, count);
+ if (i < 0)
+ goto err;
if (i < count)
continue;
__devs = kcalloc(count + 2, sizeof(dev), GFP_KERNEL);
@@ -1866,67 +2092,126 @@ static struct device **create_namespace_blk(struct nd_region *nd_region)
kfree(devs);
devs = __devs;
- nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
- if (!nsblk)
- goto err;
- dev = &nsblk->common.dev;
- dev->type = &namespace_blk_device_type;
- dev->parent = &nd_region->dev;
- dev_set_name(dev, "namespace%d.%d", nd_region->id, count);
- devs[count++] = dev;
- nsblk->id = -1;
- nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
- nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
- GFP_KERNEL);
- if (!nsblk->uuid)
- goto err;
- memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
- if (name[0])
- nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
- GFP_KERNEL);
- res = nsblk_add_resource(nd_region, ndd, nsblk,
- __le64_to_cpu(nd_label->dpa));
- if (!res)
- goto err;
- nd_dbg_dpa(nd_region, ndd, res, "%s assign\n",
- dev_name(&nsblk->common.dev));
+ if (is_nd_blk(&nd_region->dev)) {
+ dev = create_namespace_blk(nd_region, nd_label, count);
+ if (IS_ERR(dev))
+ goto err;
+ devs[count++] = dev;
+ } else {
+ dev = create_namespace_pmem(nd_region, nd_label);
+ if (IS_ERR(dev)) {
+ switch (PTR_ERR(dev)) {
+ case -EAGAIN:
+ /* skip invalid labels */
+ continue;
+ case -ENODEV:
+ /* fallthrough to seed creation */
+ break;
+ default:
+ goto err;
+ }
+ } else
+ devs[count++] = dev;
+ }
}
- dev_dbg(&nd_region->dev, "%s: discovered %d blk namespace%s\n",
- __func__, count, count == 1 ? "" : "s");
+ dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n",
+ __func__, count, is_nd_blk(&nd_region->dev)
+ ? "blk" : "pmem", count == 1 ? "" : "s");
if (count == 0) {
/* Publish a zero-sized namespace for userspace to configure. */
- for (i = 0; i < nd_region->ndr_mappings; i++) {
- struct nd_mapping *nd_mapping = &nd_region->mapping[i];
-
- kfree(nd_mapping->labels);
- nd_mapping->labels = NULL;
- }
+ nd_mapping_free_labels(nd_mapping);
devs = kcalloc(2, sizeof(dev), GFP_KERNEL);
if (!devs)
goto err;
- nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
- if (!nsblk)
- goto err;
- dev = &nsblk->common.dev;
- dev->type = &namespace_blk_device_type;
+ if (is_nd_blk(&nd_region->dev)) {
+ struct nd_namespace_blk *nsblk;
+
+ nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
+ if (!nsblk)
+ goto err;
+ dev = &nsblk->common.dev;
+ dev->type = &namespace_blk_device_type;
+ } else {
+ struct nd_namespace_pmem *nspm;
+
+ nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+ if (!nspm)
+ goto err;
+ dev = &nspm->nsio.common.dev;
+ dev->type = &namespace_pmem_device_type;
+ nd_namespace_pmem_set_resource(nd_region, nspm, 0);
+ }
dev->parent = &nd_region->dev;
devs[count++] = dev;
+ } else if (is_nd_pmem(&nd_region->dev)) {
+ /* clean unselected labels */
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct list_head *l, *e;
+ LIST_HEAD(list);
+ int j;
+
+ nd_mapping = &nd_region->mapping[i];
+ if (list_empty(&nd_mapping->labels)) {
+ WARN_ON(1);
+ continue;
+ }
+
+ j = count;
+ list_for_each_safe(l, e, &nd_mapping->labels) {
+ if (!j--)
+ break;
+ list_move_tail(l, &list);
+ }
+ nd_mapping_free_labels(nd_mapping);
+ list_splice_init(&list, &nd_mapping->labels);
+ }
}
+ if (count > 1)
+ sort(devs, count, sizeof(struct device *), cmp_dpa, NULL);
+
return devs;
-err:
- for (i = 0; i < count; i++) {
- nsblk = to_nd_namespace_blk(devs[i]);
- namespace_blk_release(&nsblk->common.dev);
- }
+ err:
+ for (i = 0; devs[i]; i++)
+ if (is_nd_blk(&nd_region->dev))
+ namespace_blk_release(devs[i]);
+ else
+ namespace_pmem_release(devs[i]);
kfree(devs);
return NULL;
}
+static struct device **create_namespaces(struct nd_region *nd_region)
+{
+ struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+ struct device **devs;
+ int i;
+
+ if (nd_region->ndr_mappings == 0)
+ return NULL;
+
+ /* lock down all mappings while we scan labels */
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ nd_mapping = &nd_region->mapping[i];
+ mutex_lock_nested(&nd_mapping->lock, i);
+ }
+
+ devs = scan_labels(nd_region);
+
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ int reverse = nd_region->ndr_mappings - 1 - i;
+
+ nd_mapping = &nd_region->mapping[reverse];
+ mutex_unlock(&nd_mapping->lock);
+ }
+
+ return devs;
+}
+
static int init_active_labels(struct nd_region *nd_region)
{
int i;
@@ -1935,6 +2220,7 @@ static int init_active_labels(struct nd_region *nd_region)
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct nvdimm *nvdimm = nd_mapping->nvdimm;
+ struct nd_label_ent *label_ent;
int count, j;
/*
@@ -1956,16 +2242,27 @@ static int init_active_labels(struct nd_region *nd_region)
dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
if (!count)
continue;
- nd_mapping->labels = kcalloc(count + 1, sizeof(void *),
- GFP_KERNEL);
- if (!nd_mapping->labels)
- return -ENOMEM;
for (j = 0; j < count; j++) {
struct nd_namespace_label *label;
+ label_ent = kzalloc(sizeof(*label_ent), GFP_KERNEL);
+ if (!label_ent)
+ break;
label = nd_label_active(ndd, j);
- nd_mapping->labels[j] = label;
+ label_ent->label = label;
+
+ mutex_lock(&nd_mapping->lock);
+ list_add_tail(&label_ent->list, &nd_mapping->labels);
+ mutex_unlock(&nd_mapping->lock);
}
+
+ if (j >= count)
+ continue;
+
+ mutex_lock(&nd_mapping->lock);
+ nd_mapping_free_labels(nd_mapping);
+ mutex_unlock(&nd_mapping->lock);
+ return -ENOMEM;
}
return 0;
@@ -1990,10 +2287,8 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
devs = create_namespace_io(nd_region);
break;
case ND_DEVICE_NAMESPACE_PMEM:
- devs = create_namespace_pmem(nd_region);
- break;
case ND_DEVICE_NAMESPACE_BLK:
- devs = create_namespace_blk(nd_region);
+ devs = create_namespaces(nd_region);
break;
default:
break;
@@ -2014,6 +2309,13 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
id = ida_simple_get(&nd_region->ns_ida, 0, 0,
GFP_KERNEL);
nsblk->id = id;
+ } else if (type == ND_DEVICE_NAMESPACE_PMEM) {
+ struct nd_namespace_pmem *nspm;
+
+ nspm = to_nd_namespace_pmem(dev);
+ id = ida_simple_get(&nd_region->ns_ida, 0, 0,
+ GFP_KERNEL);
+ nspm->id = id;
} else
id = i;
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 38ce6bbbc170..8623e57c2ce3 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -44,6 +44,23 @@ struct nvdimm {
struct resource *flush_wpq;
};
+/**
+ * struct blk_alloc_info - tracking info for BLK dpa scanning
+ * @nd_mapping: blk region mapping boundaries
+ * @available: decremented in alias_dpa_busy as aliased PMEM is scanned
+ * @busy: decremented in blk_dpa_busy to account for ranges already
+ * handled by alias_dpa_busy
+ * @res: alias_dpa_busy interprets this a free space range that needs to
+ * be truncated to the valid BLK allocation starting DPA, blk_dpa_busy
+ * treats it as a busy range that needs the aliased PMEM ranges
+ * truncated.
+ */
+struct blk_alloc_info {
+ struct nd_mapping *nd_mapping;
+ resource_size_t available, busy;
+ struct resource *res;
+};
+
bool is_nvdimm(struct device *dev);
bool is_nd_pmem(struct device *dev);
bool is_nd_blk(struct device *dev);
@@ -54,7 +71,7 @@ void nvdimm_devs_exit(void);
void nd_region_devs_exit(void);
void nd_region_probe_success(struct nvdimm_bus *nvdimm_bus, struct device *dev);
struct nd_region;
-void nd_region_create_blk_seed(struct nd_region *nd_region);
+void nd_region_create_ns_seed(struct nd_region *nd_region);
void nd_region_create_btt_seed(struct nd_region *nd_region);
void nd_region_create_pfn_seed(struct nd_region *nd_region);
void nd_region_create_dax_seed(struct nd_region *nd_region);
@@ -73,13 +90,14 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
struct nd_region;
struct nvdimm_drvdata;
struct nd_mapping;
+void nd_mapping_free_labels(struct nd_mapping *nd_mapping);
resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
struct nd_mapping *nd_mapping, resource_size_t *overlap);
-resource_size_t nd_blk_available_dpa(struct nd_mapping *nd_mapping);
+resource_size_t nd_blk_available_dpa(struct nd_region *nd_region);
resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
struct nd_label_id *label_id);
-struct nd_mapping;
+int alias_dpa_busy(struct device *dev, void *data);
struct resource *nsblk_add_resource(struct nd_region *nd_region,
struct nvdimm_drvdata *ndd, struct nd_namespace_blk *nsblk,
resource_size_t start);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 0b78a8211f4a..d3b2fca8deec 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -101,9 +101,6 @@ static inline struct nd_namespace_index *to_next_namespace_index(
(unsigned long long) (res ? resource_size(res) : 0), \
(unsigned long long) (res ? res->start : 0), ##arg)
-#define for_each_label(l, label, labels) \
- for (l = 0; (label = labels ? labels[l] : NULL); l++)
-
#define for_each_dpa_resource(ndd, res) \
for (res = (ndd)->dpa.child; res; res = res->sibling)
@@ -116,6 +113,31 @@ struct nd_percpu_lane {
spinlock_t lock;
};
+struct nd_label_ent {
+ struct list_head list;
+ struct nd_namespace_label *label;
+};
+
+enum nd_mapping_lock_class {
+ ND_MAPPING_CLASS0,
+ ND_MAPPING_UUID_SCAN,
+};
+
+struct nd_mapping {
+ struct nvdimm *nvdimm;
+ u64 start;
+ u64 size;
+ struct list_head labels;
+ struct mutex lock;
+ /*
+ * @ndd is for private use at region enable / disable time for
+ * get_ndd() + put_ndd(), all other nd_mapping to ndd
+ * conversions use to_ndd() which respects enabled state of the
+ * nvdimm.
+ */
+ struct nvdimm_drvdata *ndd;
+};
+
struct nd_region {
struct device dev;
struct ida ns_ida;
@@ -209,6 +231,7 @@ void nvdimm_exit(void);
void nd_region_exit(void);
struct nvdimm;
struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
+int nvdimm_check_config_data(struct device *dev);
int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 571a6c7ee2fc..42b3a8217073 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -66,13 +66,32 @@ static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
invalidate_pmem(pmem->virt_addr + offset, len);
}
+static void write_pmem(void *pmem_addr, struct page *page,
+ unsigned int off, unsigned int len)
+{
+ void *mem = kmap_atomic(page);
+
+ memcpy_to_pmem(pmem_addr, mem + off, len);
+ kunmap_atomic(mem);
+}
+
+static int read_pmem(struct page *page, unsigned int off,
+ void *pmem_addr, unsigned int len)
+{
+ int rc;
+ void *mem = kmap_atomic(page);
+
+ rc = memcpy_from_pmem(mem + off, pmem_addr, len);
+ kunmap_atomic(mem);
+ return rc;
+}
+
static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
unsigned int len, unsigned int off, bool is_write,
sector_t sector)
{
int rc = 0;
bool bad_pmem = false;
- void *mem = kmap_atomic(page);
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void *pmem_addr = pmem->virt_addr + pmem_off;
@@ -83,7 +102,7 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
if (unlikely(bad_pmem))
rc = -EIO;
else {
- rc = memcpy_from_pmem(mem + off, pmem_addr, len);
+ rc = read_pmem(page, off, pmem_addr, len);
flush_dcache_page(page);
}
} else {
@@ -102,14 +121,13 @@ static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
* after clear poison.
*/
flush_dcache_page(page);
- memcpy_to_pmem(pmem_addr, mem + off, len);
+ write_pmem(pmem_addr, page, off, len);
if (unlikely(bad_pmem)) {
pmem_clear_poison(pmem, pmem_off, len);
- memcpy_to_pmem(pmem_addr, mem + off, len);
+ write_pmem(pmem_addr, page, off, len);
}
}
- kunmap_atomic(mem);
return rc;
}
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 4c0ac4abb629..6af5e629140c 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -70,7 +70,7 @@ static int nvdimm_map_flush(struct device *dev, struct nvdimm *nvdimm, int dimm,
int nd_region_activate(struct nd_region *nd_region)
{
- int i, num_flush = 0;
+ int i, j, num_flush = 0;
struct nd_region_data *ndrd;
struct device *dev = &nd_region->dev;
size_t flush_data_size = sizeof(void *);
@@ -107,6 +107,21 @@ int nd_region_activate(struct nd_region *nd_region)
return rc;
}
+ /*
+ * Clear out entries that are duplicates. This should prevent the
+ * extra flushings.
+ */
+ for (i = 0; i < nd_region->ndr_mappings - 1; i++) {
+ /* ignore if NULL already */
+ if (!ndrd_get_flush_wpq(ndrd, i, 0))
+ continue;
+
+ for (j = i + 1; j < nd_region->ndr_mappings; j++)
+ if (ndrd_get_flush_wpq(ndrd, i, 0) ==
+ ndrd_get_flush_wpq(ndrd, j, 0))
+ ndrd_set_flush_wpq(ndrd, j, 0, NULL);
+ }
+
return 0;
}
@@ -298,9 +313,8 @@ resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
blk_max_overlap = overlap;
goto retry;
}
- } else if (is_nd_blk(&nd_region->dev)) {
- available += nd_blk_available_dpa(nd_mapping);
- }
+ } else if (is_nd_blk(&nd_region->dev))
+ available += nd_blk_available_dpa(nd_region);
}
return available;
@@ -491,6 +505,17 @@ u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
return 0;
}
+void nd_mapping_free_labels(struct nd_mapping *nd_mapping)
+{
+ struct nd_label_ent *label_ent, *e;
+
+ WARN_ON(!mutex_is_locked(&nd_mapping->lock));
+ list_for_each_entry_safe(label_ent, e, &nd_mapping->labels, list) {
+ list_del(&label_ent->list);
+ kfree(label_ent);
+ }
+}
+
/*
* Upon successful probe/remove, take/release a reference on the
* associated interleave set (if present), and plant new btt + namespace
@@ -511,8 +536,10 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
struct nvdimm_drvdata *ndd = nd_mapping->ndd;
struct nvdimm *nvdimm = nd_mapping->nvdimm;
- kfree(nd_mapping->labels);
- nd_mapping->labels = NULL;
+ mutex_lock(&nd_mapping->lock);
+ nd_mapping_free_labels(nd_mapping);
+ mutex_unlock(&nd_mapping->lock);
+
put_ndd(ndd);
nd_mapping->ndd = NULL;
if (ndd)
@@ -522,11 +549,12 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
if (is_nd_pmem(dev))
return;
}
- if (dev->parent && is_nd_blk(dev->parent) && probe) {
+ if (dev->parent && (is_nd_blk(dev->parent) || is_nd_pmem(dev->parent))
+ && probe) {
nd_region = to_nd_region(dev->parent);
nvdimm_bus_lock(dev);
if (nd_region->ns_seed == dev)
- nd_region_create_blk_seed(nd_region);
+ nd_region_create_ns_seed(nd_region);
nvdimm_bus_unlock(dev);
}
if (is_nd_btt(dev) && probe) {
@@ -536,23 +564,30 @@ static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
nvdimm_bus_lock(dev);
if (nd_region->btt_seed == dev)
nd_region_create_btt_seed(nd_region);
- if (nd_region->ns_seed == &nd_btt->ndns->dev &&
- is_nd_blk(dev->parent))
- nd_region_create_blk_seed(nd_region);
+ if (nd_region->ns_seed == &nd_btt->ndns->dev)
+ nd_region_create_ns_seed(nd_region);
nvdimm_bus_unlock(dev);
}
if (is_nd_pfn(dev) && probe) {
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+
nd_region = to_nd_region(dev->parent);
nvdimm_bus_lock(dev);
if (nd_region->pfn_seed == dev)
nd_region_create_pfn_seed(nd_region);
+ if (nd_region->ns_seed == &nd_pfn->ndns->dev)
+ nd_region_create_ns_seed(nd_region);
nvdimm_bus_unlock(dev);
}
if (is_nd_dax(dev) && probe) {
+ struct nd_dax *nd_dax = to_nd_dax(dev);
+
nd_region = to_nd_region(dev->parent);
nvdimm_bus_lock(dev);
if (nd_region->dax_seed == dev)
nd_region_create_dax_seed(nd_region);
+ if (nd_region->ns_seed == &nd_dax->nd_pfn.ndns->dev)
+ nd_region_create_ns_seed(nd_region);
nvdimm_bus_unlock(dev);
}
}
@@ -759,10 +794,10 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
int ro = 0;
for (i = 0; i < ndr_desc->num_mappings; i++) {
- struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
- struct nvdimm *nvdimm = nd_mapping->nvdimm;
+ struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
+ struct nvdimm *nvdimm = mapping->nvdimm;
- if ((nd_mapping->start | nd_mapping->size) % SZ_4K) {
+ if ((mapping->start | mapping->size) % SZ_4K) {
dev_err(&nvdimm_bus->dev, "%s: %s mapping%d is not 4K aligned\n",
caller, dev_name(&nvdimm->dev), i);
@@ -813,11 +848,15 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
ndl->count = 0;
}
- memcpy(nd_region->mapping, ndr_desc->nd_mapping,
- sizeof(struct nd_mapping) * ndr_desc->num_mappings);
for (i = 0; i < ndr_desc->num_mappings; i++) {
- struct nd_mapping *nd_mapping = &ndr_desc->nd_mapping[i];
- struct nvdimm *nvdimm = nd_mapping->nvdimm;
+ struct nd_mapping_desc *mapping = &ndr_desc->mapping[i];
+ struct nvdimm *nvdimm = mapping->nvdimm;
+
+ nd_region->mapping[i].nvdimm = nvdimm;
+ nd_region->mapping[i].start = mapping->start;
+ nd_region->mapping[i].size = mapping->size;
+ INIT_LIST_HEAD(&nd_region->mapping[i].labels);
+ mutex_init(&nd_region->mapping[i].lock);
get_device(&nvdimm->dev);
}
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 6edd825231c5..44a240c4bb65 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -406,6 +406,7 @@ void cd_forget(struct inode *inode)
spin_lock(&cdev_lock);
list_del_init(&inode->i_devices);
inode->i_cdev = NULL;
+ inode->i_mapping = &inode->i_data;
spin_unlock(&cdev_lock);
}
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index b519e137b9b7..f4947fda11e7 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -50,23 +50,6 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc);
-struct nd_namespace_label;
-struct nvdimm_drvdata;
-
-struct nd_mapping {
- struct nvdimm *nvdimm;
- struct nd_namespace_label **labels;
- u64 start;
- u64 size;
- /*
- * @ndd is for private use at region enable / disable time for
- * get_ndd() + put_ndd(), all other nd_mapping to ndd
- * conversions use to_ndd() which respects enabled state of the
- * nvdimm.
- */
- struct nvdimm_drvdata *ndd;
-};
-
struct nvdimm_bus_descriptor {
const struct attribute_group **attr_groups;
unsigned long cmd_mask;
@@ -89,9 +72,15 @@ struct nd_interleave_set {
u64 cookie;
};
+struct nd_mapping_desc {
+ struct nvdimm *nvdimm;
+ u64 start;
+ u64 size;
+};
+
struct nd_region_desc {
struct resource *res;
- struct nd_mapping *nd_mapping;
+ struct nd_mapping_desc *mapping;
u16 num_mappings;
const struct attribute_group **attr_groups;
struct nd_interleave_set *nd_set;
@@ -129,6 +118,8 @@ static inline struct nd_blk_region_desc *to_blk_region_desc(
}
int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length);
+void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus,
+ phys_addr_t start, unsigned int len);
struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
struct nvdimm_bus_descriptor *nfit_desc);
void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
@@ -139,6 +130,7 @@ struct nd_blk_region *to_nd_blk_region(struct device *dev);
struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
const char *nvdimm_name(struct nvdimm *nvdimm);
+struct kobject *nvdimm_kobj(struct nvdimm *nvdimm);
unsigned long nvdimm_cmd_mask(struct nvdimm *nvdimm);
void *nvdimm_provider_data(struct nvdimm *nvdimm);
struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
diff --git a/include/linux/nd.h b/include/linux/nd.h
index f1ea426d6a5e..fa66aeed441a 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -77,11 +77,13 @@ struct nd_namespace_io {
* @nsio: device and system physical address range to drive
* @alt_name: namespace name supplied in the dimm label
* @uuid: namespace name supplied in the dimm label
+ * @id: ida allocated id
*/
struct nd_namespace_pmem {
struct nd_namespace_io nsio;
char *alt_name;
u8 *uuid;
+ int id;
};
/**
@@ -105,19 +107,19 @@ struct nd_namespace_blk {
struct resource **res;
};
-static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
+static inline struct nd_namespace_io *to_nd_namespace_io(const struct device *dev)
{
return container_of(dev, struct nd_namespace_io, common.dev);
}
-static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
+static inline struct nd_namespace_pmem *to_nd_namespace_pmem(const struct device *dev)
{
struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
return container_of(nsio, struct nd_namespace_pmem, nsio);
}
-static inline struct nd_namespace_blk *to_nd_namespace_blk(struct device *dev)
+static inline struct nd_namespace_blk *to_nd_namespace_blk(const struct device *dev)
{
return container_of(dev, struct nd_namespace_blk, common.dev);
}
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index e398beac67b8..9bd559472c92 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -65,6 +65,7 @@
#define V9FS_MAGIC 0x01021997
#define BDEVFS_MAGIC 0x62646576
+#define DAXFS_MAGIC 0x64646178
#define BINFMTFS_MAGIC 0x42494e4d
#define DEVPTS_SUPER_MAGIC 0x1cd1
#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index ba5a8c79652a..ede5c6a62164 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -21,14 +21,16 @@ struct nd_cmd_smart {
} __packed;
#define ND_SMART_HEALTH_VALID (1 << 0)
-#define ND_SMART_TEMP_VALID (1 << 1)
-#define ND_SMART_SPARES_VALID (1 << 2)
-#define ND_SMART_ALARM_VALID (1 << 3)
-#define ND_SMART_USED_VALID (1 << 4)
-#define ND_SMART_SHUTDOWN_VALID (1 << 5)
-#define ND_SMART_VENDOR_VALID (1 << 6)
-#define ND_SMART_TEMP_TRIP (1 << 0)
-#define ND_SMART_SPARE_TRIP (1 << 1)
+#define ND_SMART_SPARES_VALID (1 << 1)
+#define ND_SMART_USED_VALID (1 << 2)
+#define ND_SMART_TEMP_VALID (1 << 3)
+#define ND_SMART_CTEMP_VALID (1 << 4)
+#define ND_SMART_ALARM_VALID (1 << 9)
+#define ND_SMART_SHUTDOWN_VALID (1 << 10)
+#define ND_SMART_VENDOR_VALID (1 << 11)
+#define ND_SMART_SPARE_TRIP (1 << 0)
+#define ND_SMART_TEMP_TRIP (1 << 1)
+#define ND_SMART_CTEMP_TRIP (1 << 2)
#define ND_SMART_NON_CRITICAL_HEALTH (1 << 0)
#define ND_SMART_CRITICAL_HEALTH (1 << 1)
#define ND_SMART_FATAL_HEALTH (1 << 2)
@@ -37,14 +39,15 @@ struct nd_smart_payload {
__u32 flags;
__u8 reserved0[4];
__u8 health;
- __u16 temperature;
__u8 spares;
- __u8 alarm_flags;
__u8 life_used;
+ __u8 alarm_flags;
+ __u16 temperature;
+ __u16 ctrl_temperature;
+ __u8 reserved1[15];
__u8 shutdown_state;
- __u8 reserved1;
__u32 vendor_size;
- __u8 vendor_data[108];
+ __u8 vendor_data[92];
} __packed;
struct nd_cmd_smart_threshold {
@@ -53,7 +56,8 @@ struct nd_cmd_smart_threshold {
} __packed;
struct nd_smart_threshold_payload {
- __u16 alarm_control;
+ __u8 alarm_control;
+ __u8 reserved0;
__u16 temperature;
__u8 spares;
__u8 reserved[3];
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index ad6dd0543019..582db95127ed 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -13,6 +13,7 @@ ldflags-y += --wrap=__release_region
ldflags-y += --wrap=devm_memremap_pages
ldflags-y += --wrap=insert_resource
ldflags-y += --wrap=remove_resource
+ldflags-y += --wrap=acpi_evaluate_object
DRIVERS := ../../../drivers
NVDIMM_SRC := $(DRIVERS)/nvdimm
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index c29f8dca9e67..3ccef732fce9 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/pfn_t.h>
+#include <linux/acpi.h>
#include <linux/io.h>
#include <linux/mm.h>
#include "nfit_test.h"
@@ -73,7 +74,7 @@ void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
if (nfit_res)
return (void __iomem *) nfit_res->buf + offset
- - nfit_res->res->start;
+ - nfit_res->res.start;
return fallback_fn(offset, size);
}
@@ -84,7 +85,7 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev,
if (nfit_res)
return (void __iomem *) nfit_res->buf + offset
- - nfit_res->res->start;
+ - nfit_res->res.start;
return devm_ioremap_nocache(dev, offset, size);
}
EXPORT_SYMBOL(__wrap_devm_ioremap_nocache);
@@ -95,7 +96,7 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
- return nfit_res->buf + offset - nfit_res->res->start;
+ return nfit_res->buf + offset - nfit_res->res.start;
return devm_memremap(dev, offset, size, flags);
}
EXPORT_SYMBOL(__wrap_devm_memremap);
@@ -107,7 +108,7 @@ void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
- return nfit_res->buf + offset - nfit_res->res->start;
+ return nfit_res->buf + offset - nfit_res->res.start;
return devm_memremap_pages(dev, res, ref, altmap);
}
EXPORT_SYMBOL(__wrap_devm_memremap_pages);
@@ -128,7 +129,7 @@ void *__wrap_memremap(resource_size_t offset, size_t size,
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
- return nfit_res->buf + offset - nfit_res->res->start;
+ return nfit_res->buf + offset - nfit_res->res.start;
return memremap(offset, size, flags);
}
EXPORT_SYMBOL(__wrap_memremap);
@@ -174,6 +175,63 @@ void __wrap_memunmap(void *addr)
}
EXPORT_SYMBOL(__wrap_memunmap);
+static bool nfit_test_release_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n);
+
+static void nfit_devres_release(struct device *dev, void *data)
+{
+ struct resource *res = *((struct resource **) data);
+
+ WARN_ON(!nfit_test_release_region(NULL, &iomem_resource, res->start,
+ resource_size(res)));
+}
+
+static int match(struct device *dev, void *__res, void *match_data)
+{
+ struct resource *res = *((struct resource **) __res);
+ resource_size_t start = *((resource_size_t *) match_data);
+
+ return res->start == start;
+}
+
+static bool nfit_test_release_region(struct device *dev,
+ struct resource *parent, resource_size_t start,
+ resource_size_t n)
+{
+ if (parent == &iomem_resource) {
+ struct nfit_test_resource *nfit_res = get_nfit_res(start);
+
+ if (nfit_res) {
+ struct nfit_test_request *req;
+ struct resource *res = NULL;
+
+ if (dev) {
+ devres_release(dev, nfit_devres_release, match,
+ &start);
+ return true;
+ }
+
+ spin_lock(&nfit_res->lock);
+ list_for_each_entry(req, &nfit_res->requests, list)
+ if (req->res.start == start) {
+ res = &req->res;
+ list_del(&req->list);
+ break;
+ }
+ spin_unlock(&nfit_res->lock);
+
+ WARN(!res || resource_size(res) != n,
+ "%s: start: %llx n: %llx mismatch: %pr\n",
+ __func__, start, n, res);
+ if (res)
+ kfree(req);
+ return true;
+ }
+ }
+ return false;
+}
+
static struct resource *nfit_test_request_region(struct device *dev,
struct resource *parent, resource_size_t start,
resource_size_t n, const char *name, int flags)
@@ -183,21 +241,57 @@ static struct resource *nfit_test_request_region(struct device *dev,
if (parent == &iomem_resource) {
nfit_res = get_nfit_res(start);
if (nfit_res) {
- struct resource *res = nfit_res->res + 1;
+ struct nfit_test_request *req;
+ struct resource *res = NULL;
- if (start + n > nfit_res->res->start
- + resource_size(nfit_res->res)) {
+ if (start + n > nfit_res->res.start
+ + resource_size(&nfit_res->res)) {
pr_debug("%s: start: %llx n: %llx overflow: %pr\n",
__func__, start, n,
- nfit_res->res);
+ &nfit_res->res);
return NULL;
}
+ spin_lock(&nfit_res->lock);
+ list_for_each_entry(req, &nfit_res->requests, list)
+ if (start == req->res.start) {
+ res = &req->res;
+ break;
+ }
+ spin_unlock(&nfit_res->lock);
+
+ if (res) {
+ WARN(1, "%pr already busy\n", res);
+ return NULL;
+ }
+
+ req = kzalloc(sizeof(*req), GFP_KERNEL);
+ if (!req)
+ return NULL;
+ INIT_LIST_HEAD(&req->list);
+ res = &req->res;
+
res->start = start;
res->end = start + n - 1;
res->name = name;
res->flags = resource_type(parent);
res->flags |= IORESOURCE_BUSY | flags;
+ spin_lock(&nfit_res->lock);
+ list_add(&req->list, &nfit_res->requests);
+ spin_unlock(&nfit_res->lock);
+
+ if (dev) {
+ struct resource **d;
+
+ d = devres_alloc(nfit_devres_release,
+ sizeof(struct resource *),
+ GFP_KERNEL);
+ if (!d)
+ return NULL;
+ *d = res;
+ devres_add(dev, d);
+ }
+
pr_debug("%s: %pr\n", __func__, res);
return res;
}
@@ -241,29 +335,10 @@ struct resource *__wrap___devm_request_region(struct device *dev,
}
EXPORT_SYMBOL(__wrap___devm_request_region);
-static bool nfit_test_release_region(struct resource *parent,
- resource_size_t start, resource_size_t n)
-{
- if (parent == &iomem_resource) {
- struct nfit_test_resource *nfit_res = get_nfit_res(start);
- if (nfit_res) {
- struct resource *res = nfit_res->res + 1;
-
- if (start != res->start || resource_size(res) != n)
- pr_info("%s: start: %llx n: %llx mismatch: %pr\n",
- __func__, start, n, res);
- else
- memset(res, 0, sizeof(*res));
- return true;
- }
- }
- return false;
-}
-
void __wrap___release_region(struct resource *parent, resource_size_t start,
resource_size_t n)
{
- if (!nfit_test_release_region(parent, start, n))
+ if (!nfit_test_release_region(NULL, parent, start, n))
__release_region(parent, start, n);
}
EXPORT_SYMBOL(__wrap___release_region);
@@ -271,9 +346,25 @@ EXPORT_SYMBOL(__wrap___release_region);
void __wrap___devm_release_region(struct device *dev, struct resource *parent,
resource_size_t start, resource_size_t n)
{
- if (!nfit_test_release_region(parent, start, n))
+ if (!nfit_test_release_region(dev, parent, start, n))
__devm_release_region(dev, parent, start, n);
}
EXPORT_SYMBOL(__wrap___devm_release_region);
+acpi_status __wrap_acpi_evaluate_object(acpi_handle handle, acpi_string path,
+ struct acpi_object_list *p, struct acpi_buffer *buf)
+{
+ struct nfit_test_resource *nfit_res = get_nfit_res((long) handle);
+ union acpi_object **obj;
+
+ if (!nfit_res || strcmp(path, "_FIT") || !buf)
+ return acpi_evaluate_object(handle, path, p, buf);
+
+ obj = nfit_res->buf;
+ buf->length = sizeof(union acpi_object);
+ buf->pointer = *obj;
+ return AE_OK;
+}
+EXPORT_SYMBOL(__wrap_acpi_evaluate_object);
+
MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index f64c57bf1d4b..c9a6458cb63e 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -132,6 +132,8 @@ static u32 handle[NUM_DCR] = {
[4] = NFIT_DIMM_HANDLE(0, 1, 0, 0, 0),
};
+static unsigned long dimm_fail_cmd_flags[NUM_DCR];
+
struct nfit_test {
struct acpi_nfit_desc acpi_desc;
struct platform_device pdev;
@@ -154,11 +156,14 @@ struct nfit_test {
int (*alloc)(struct nfit_test *t);
void (*setup)(struct nfit_test *t);
int setup_hotplug;
+ union acpi_object **_fit;
+ dma_addr_t _fit_dma;
struct ars_state {
struct nd_cmd_ars_status *ars_status;
unsigned long deadline;
spinlock_t lock;
} ars_state;
+ struct device *dimm_dev[NUM_DCR];
};
static struct nfit_test *to_nfit_test(struct device *dev)
@@ -411,6 +416,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
if (i >= ARRAY_SIZE(handle))
return -ENXIO;
+ if ((1 << func) & dimm_fail_cmd_flags[i])
+ return -EIO;
+
switch (func) {
case ND_CMD_GET_CONFIG_SIZE:
rc = nfit_test_cmd_get_config_size(buf, buf_len);
@@ -428,6 +436,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
break;
case ND_CMD_SMART_THRESHOLD:
rc = nfit_test_cmd_smart_threshold(buf, buf_len);
+ device_lock(&t->pdev.dev);
+ __acpi_nvdimm_notify(t->dimm_dev[i], 0x81);
+ device_unlock(&t->pdev.dev);
break;
default:
return -ENOTTY;
@@ -467,14 +478,12 @@ static struct nfit_test *instances[NUM_NFITS];
static void release_nfit_res(void *data)
{
struct nfit_test_resource *nfit_res = data;
- struct resource *res = nfit_res->res;
spin_lock(&nfit_test_lock);
list_del(&nfit_res->list);
spin_unlock(&nfit_test_lock);
vfree(nfit_res->buf);
- kfree(res);
kfree(nfit_res);
}
@@ -482,12 +491,11 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
void *buf)
{
struct device *dev = &t->pdev.dev;
- struct resource *res = kzalloc(sizeof(*res) * 2, GFP_KERNEL);
struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res),
GFP_KERNEL);
int rc;
- if (!res || !buf || !nfit_res)
+ if (!buf || !nfit_res)
goto err;
rc = devm_add_action(dev, release_nfit_res, nfit_res);
if (rc)
@@ -496,10 +504,11 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
memset(buf, 0, size);
nfit_res->dev = dev;
nfit_res->buf = buf;
- nfit_res->res = res;
- res->start = *dma;
- res->end = *dma + size - 1;
- res->name = "NFIT";
+ nfit_res->res.start = *dma;
+ nfit_res->res.end = *dma + size - 1;
+ nfit_res->res.name = "NFIT";
+ spin_lock_init(&nfit_res->lock);
+ INIT_LIST_HEAD(&nfit_res->requests);
spin_lock(&nfit_test_lock);
list_add(&nfit_res->list, &t->resources);
spin_unlock(&nfit_test_lock);
@@ -508,7 +517,6 @@ static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma,
err:
if (buf)
vfree(buf);
- kfree(res);
kfree(nfit_res);
return NULL;
}
@@ -533,13 +541,13 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
continue;
spin_lock(&nfit_test_lock);
list_for_each_entry(n, &t->resources, list) {
- if (addr >= n->res->start && (addr < n->res->start
- + resource_size(n->res))) {
+ if (addr >= n->res.start && (addr < n->res.start
+ + resource_size(&n->res))) {
nfit_res = n;
break;
} else if (addr >= (unsigned long) n->buf
&& (addr < (unsigned long) n->buf
- + resource_size(n->res))) {
+ + resource_size(&n->res))) {
nfit_res = n;
break;
}
@@ -564,6 +572,86 @@ static int ars_state_init(struct device *dev, struct ars_state *ars_state)
return 0;
}
+static void put_dimms(void *data)
+{
+ struct device **dimm_dev = data;
+ int i;
+
+ for (i = 0; i < NUM_DCR; i++)
+ if (dimm_dev[i])
+ device_unregister(dimm_dev[i]);
+}
+
+static struct class *nfit_test_dimm;
+
+static int dimm_name_to_id(struct device *dev)
+{
+ int dimm;
+
+ if (sscanf(dev_name(dev), "test_dimm%d", &dimm) != 1
+ || dimm >= NUM_DCR || dimm < 0)
+ return -ENXIO;
+ return dimm;
+}
+
+
+static ssize_t handle_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%#x", handle[dimm]);
+}
+DEVICE_ATTR_RO(handle);
+
+static ssize_t fail_cmd_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ int dimm = dimm_name_to_id(dev);
+
+ if (dimm < 0)
+ return dimm;
+
+ return sprintf(buf, "%#lx\n", dimm_fail_cmd_flags[dimm]);
+}
+
+static ssize_t fail_cmd_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t size)
+{
+ int dimm = dimm_name_to_id(dev);
+ unsigned long val;
+ ssize_t rc;
+
+ if (dimm < 0)
+ return dimm;
+
+ rc = kstrtol(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ dimm_fail_cmd_flags[dimm] = val;
+ return size;
+}
+static DEVICE_ATTR_RW(fail_cmd);
+
+static struct attribute *nfit_test_dimm_attributes[] = {
+ &dev_attr_fail_cmd.attr,
+ &dev_attr_handle.attr,
+ NULL,
+};
+
+static struct attribute_group nfit_test_dimm_attribute_group = {
+ .attrs = nfit_test_dimm_attributes,
+};
+
+static const struct attribute_group *nfit_test_dimm_attribute_groups[] = {
+ &nfit_test_dimm_attribute_group,
+ NULL,
+};
+
static int nfit_test0_alloc(struct nfit_test *t)
{
size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
@@ -616,6 +704,21 @@ static int nfit_test0_alloc(struct nfit_test *t)
return -ENOMEM;
}
+ t->_fit = test_alloc(t, sizeof(union acpi_object **), &t->_fit_dma);
+ if (!t->_fit)
+ return -ENOMEM;
+
+ if (devm_add_action_or_reset(&t->pdev.dev, put_dimms, t->dimm_dev))
+ return -ENOMEM;
+ for (i = 0; i < NUM_DCR; i++) {
+ t->dimm_dev[i] = device_create_with_groups(nfit_test_dimm,
+ &t->pdev.dev, 0, NULL,
+ nfit_test_dimm_attribute_groups,
+ "test_dimm%d", i);
+ if (!t->dimm_dev[i])
+ return -ENOMEM;
+ }
+
return ars_state_init(&t->pdev.dev, &t->ars_state);
}
@@ -1409,6 +1512,8 @@ static int nfit_test_probe(struct platform_device *pdev)
struct acpi_nfit_desc *acpi_desc;
struct device *dev = &pdev->dev;
struct nfit_test *nfit_test;
+ struct nfit_mem *nfit_mem;
+ union acpi_object *obj;
int rc;
nfit_test = to_nfit_test(&pdev->dev);
@@ -1476,14 +1581,30 @@ static int nfit_test_probe(struct platform_device *pdev)
if (nfit_test->setup != nfit_test0_setup)
return 0;
- flush_work(&acpi_desc->work);
nfit_test->setup_hotplug = 1;
nfit_test->setup(nfit_test);
- rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
- nfit_test->nfit_size);
- if (rc)
- return rc;
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return -ENOMEM;
+ obj->type = ACPI_TYPE_BUFFER;
+ obj->buffer.length = nfit_test->nfit_size;
+ obj->buffer.pointer = nfit_test->nfit_buf;
+ *(nfit_test->_fit) = obj;
+ __acpi_nfit_notify(&pdev->dev, nfit_test, 0x80);
+
+ /* associate dimm devices with nfit_mem data for notification testing */
+ mutex_lock(&acpi_desc->init_mutex);
+ list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
+ u32 nfit_handle = __to_nfit_memdev(nfit_mem)->device_handle;
+ int i;
+
+ for (i = 0; i < NUM_DCR; i++)
+ if (nfit_handle == handle[i])
+ dev_set_drvdata(nfit_test->dimm_dev[i],
+ nfit_mem);
+ }
+ mutex_unlock(&acpi_desc->init_mutex);
return 0;
}
@@ -1518,6 +1639,10 @@ static __init int nfit_test_init(void)
{
int rc, i;
+ nfit_test_dimm = class_create(THIS_MODULE, "nfit_test_dimm");
+ if (IS_ERR(nfit_test_dimm))
+ return PTR_ERR(nfit_test_dimm);
+
nfit_test_setup(nfit_test_lookup);
for (i = 0; i < NUM_NFITS; i++) {
@@ -1584,6 +1709,7 @@ static __exit void nfit_test_exit(void)
for (i = 0; i < NUM_NFITS; i++)
platform_device_unregister(&instances[i]->pdev);
nfit_test_teardown();
+ class_destroy(nfit_test_dimm);
}
module_init(nfit_test_init);
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 9f18e2a4a862..c281dd2e5e2d 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -13,11 +13,21 @@
#ifndef __NFIT_TEST_H__
#define __NFIT_TEST_H__
#include <linux/list.h>
+#include <linux/ioport.h>
+#include <linux/spinlock_types.h>
+
+struct nfit_test_request {
+ struct list_head list;
+ struct resource res;
+};
struct nfit_test_resource {
+ struct list_head requests;
struct list_head list;
- struct resource *res;
+ struct resource res;
struct device *dev;
+ spinlock_t lock;
+ int req_count;
void *buf;
};