diff options
Diffstat (limited to 'drivers/cxl/pmem.c')
-rw-r--r-- | drivers/cxl/pmem.c | 382 |
1 files changed, 346 insertions, 36 deletions
diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index ceb2115981e5..4c627d67281a 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -7,6 +7,7 @@ #include <linux/ndctl.h> #include <linux/async.h> #include <linux/slab.h> +#include <linux/nd.h> #include "cxlmem.h" #include "cxl.h" @@ -19,14 +20,33 @@ static struct workqueue_struct *cxl_pmem_wq; static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); -static void clear_exclusive(void *cxlm) +static void clear_exclusive(void *cxlds) { - clear_exclusive_cxl_commands(cxlm, exclusive_cmds); + clear_exclusive_cxl_commands(cxlds, exclusive_cmds); } static void unregister_nvdimm(void *nvdimm) { + struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); + struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge; + struct cxl_pmem_region *cxlr_pmem; + unsigned long index; + + device_lock(&cxl_nvb->dev); + dev_set_drvdata(&cxl_nvd->dev, NULL); + xa_for_each(&cxl_nvd->pmem_regions, index, cxlr_pmem) { + get_device(&cxlr_pmem->dev); + device_unlock(&cxl_nvb->dev); + + device_release_driver(&cxlr_pmem->dev); + put_device(&cxlr_pmem->dev); + + device_lock(&cxl_nvb->dev); + } + device_unlock(&cxl_nvb->dev); + nvdimm_delete(nvdimm); + cxl_nvd->bridge = NULL; } static int cxl_nvdimm_probe(struct device *dev) @@ -34,12 +54,12 @@ static int cxl_nvdimm_probe(struct device *dev) struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; unsigned long flags = 0, cmd_mask = 0; - struct cxl_mem *cxlm = cxlmd->cxlm; + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_nvdimm_bridge *cxl_nvb; struct nvdimm *nvdimm; int rc; - cxl_nvb = cxl_find_nvdimm_bridge(cxl_nvd); + cxl_nvb = cxl_find_nvdimm_bridge(dev); if (!cxl_nvb) return -ENXIO; @@ -49,8 +69,8 @@ static int cxl_nvdimm_probe(struct device *dev) goto out; } - set_exclusive_cxl_commands(cxlm, exclusive_cmds); - rc = devm_add_action_or_reset(dev, clear_exclusive, cxlm); + set_exclusive_cxl_commands(cxlds, exclusive_cmds); + rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds); if (rc) goto out; @@ -66,6 +86,7 @@ static int cxl_nvdimm_probe(struct device *dev) } dev_set_drvdata(dev, nvdimm); + cxl_nvd->bridge = cxl_nvb; rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); out: device_unlock(&cxl_nvb->dev); @@ -80,7 +101,7 @@ static struct cxl_driver cxl_nvdimm_driver = { .id = CXL_DEVICE_NVDIMM, }; -static int cxl_pmem_get_config_size(struct cxl_mem *cxlm, +static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds, struct nd_cmd_get_config_size *cmd, unsigned int buf_len) { @@ -88,14 +109,14 @@ static int cxl_pmem_get_config_size(struct cxl_mem *cxlm, return -EINVAL; *cmd = (struct nd_cmd_get_config_size) { - .config_size = cxlm->lsa_size, - .max_xfer = cxlm->payload_size, + .config_size = cxlds->lsa_size, + .max_xfer = cxlds->payload_size - sizeof(struct cxl_mbox_set_lsa), }; return 0; } -static int cxl_pmem_get_config_data(struct cxl_mem *cxlm, +static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds, struct nd_cmd_get_config_data_hdr *cmd, unsigned int buf_len) { @@ -108,19 +129,18 @@ static int cxl_pmem_get_config_data(struct cxl_mem *cxlm, return -EINVAL; get_lsa = (struct cxl_mbox_get_lsa) { - .offset = cmd->in_offset, - .length = cmd->in_length, + .offset = cpu_to_le32(cmd->in_offset), + .length = cpu_to_le32(cmd->in_length), }; - rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_LSA, &get_lsa, - sizeof(get_lsa), cmd->out_buf, - cmd->in_length); + rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa, + sizeof(get_lsa), cmd->out_buf, cmd->in_length); cmd->status = 0; return rc; } -static int cxl_pmem_set_config_data(struct cxl_mem *cxlm, +static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, struct nd_cmd_set_config_hdr *cmd, unsigned int buf_len) { @@ -131,7 +151,7 @@ static int cxl_pmem_set_config_data(struct cxl_mem *cxlm, return -EINVAL; /* 4-byte status follows the input data in the payload */ - if (struct_size(cmd, in_buf, cmd->in_length) + 4 > buf_len) + if (size_add(struct_size(cmd, in_buf, cmd->in_length), 4) > buf_len) return -EINVAL; set_lsa = @@ -140,13 +160,13 @@ static int cxl_pmem_set_config_data(struct cxl_mem *cxlm, return -ENOMEM; *set_lsa = (struct cxl_mbox_set_lsa) { - .offset = cmd->in_offset, + .offset = cpu_to_le32(cmd->in_offset), }; memcpy(set_lsa->data, cmd->in_buf, cmd->in_length); - rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_SET_LSA, set_lsa, - struct_size(set_lsa, data, cmd->in_length), - NULL, 0); + rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_LSA, set_lsa, + struct_size(set_lsa, data, cmd->in_length), + NULL, 0); /* * Set "firmware" status (4-packed bytes at the end of the input @@ -164,18 +184,18 @@ static int cxl_pmem_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; - struct cxl_mem *cxlm = cxlmd->cxlm; + struct cxl_dev_state *cxlds = cxlmd->cxlds; if (!test_bit(cmd, &cmd_mask)) return -ENOTTY; switch (cmd) { case ND_CMD_GET_CONFIG_SIZE: - return cxl_pmem_get_config_size(cxlm, buf, buf_len); + return cxl_pmem_get_config_size(cxlds, buf, buf_len); case ND_CMD_GET_CONFIG_DATA: - return cxl_pmem_get_config_data(cxlm, buf, buf_len); + return cxl_pmem_get_config_data(cxlds, buf, buf_len); case ND_CMD_SET_CONFIG_DATA: - return cxl_pmem_set_config_data(cxlm, buf, buf_len); + return cxl_pmem_set_config_data(cxlds, buf, buf_len); default: return -ENOTTY; } @@ -205,15 +225,38 @@ static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb) return cxl_nvb->nvdimm_bus != NULL; } -static int cxl_nvdimm_release_driver(struct device *dev, void *data) +static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb) { + struct cxl_nvdimm *cxl_nvd; + if (!is_cxl_nvdimm(dev)) return 0; + + cxl_nvd = to_cxl_nvdimm(dev); + if (cxl_nvd->bridge != cxl_nvb) + return 0; + + device_release_driver(dev); + return 0; +} + +static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb) +{ + struct cxl_pmem_region *cxlr_pmem; + + if (!is_cxl_pmem_region(dev)) + return 0; + + cxlr_pmem = to_cxl_pmem_region(dev); + if (cxlr_pmem->bridge != cxl_nvb) + return 0; + device_release_driver(dev); return 0; } -static void offline_nvdimm_bus(struct nvdimm_bus *nvdimm_bus) +static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb, + struct nvdimm_bus *nvdimm_bus) { if (!nvdimm_bus) return; @@ -223,7 +266,10 @@ static void offline_nvdimm_bus(struct nvdimm_bus *nvdimm_bus) * nvdimm_bus_unregister() rips the nvdimm objects out from * underneath them. */ - bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_release_driver); + bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, + cxl_pmem_region_release_driver); + bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, + cxl_nvdimm_release_driver); nvdimm_bus_unregister(nvdimm_bus); } @@ -261,19 +307,29 @@ static void cxl_nvb_update_state(struct work_struct *work) dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc); } - offline_nvdimm_bus(victim_bus); + offline_nvdimm_bus(cxl_nvb, victim_bus); put_device(&cxl_nvb->dev); } +static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb) +{ + /* + * Take a reference that the workqueue will drop if new work + * gets queued. + */ + get_device(&cxl_nvb->dev); + if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work)) + put_device(&cxl_nvb->dev); +} + static void cxl_nvdimm_bridge_remove(struct device *dev) { struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev); if (cxl_nvb->state == CXL_NVB_ONLINE) cxl_nvb->state = CXL_NVB_OFFLINE; - if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work)) - get_device(&cxl_nvb->dev); + cxl_nvdimm_bridge_state_work(cxl_nvb); } static int cxl_nvdimm_bridge_probe(struct device *dev) @@ -294,8 +350,7 @@ static int cxl_nvdimm_bridge_probe(struct device *dev) } cxl_nvb->state = CXL_NVB_ONLINE; - if (queue_work(cxl_pmem_wq, &cxl_nvb->state_work)) - get_device(&cxl_nvb->dev); + cxl_nvdimm_bridge_state_work(cxl_nvb); return 0; } @@ -307,11 +362,258 @@ static struct cxl_driver cxl_nvdimm_bridge_driver = { .id = CXL_DEVICE_NVDIMM_BRIDGE, }; +static int match_cxl_nvdimm(struct device *dev, void *data) +{ + return is_cxl_nvdimm(dev); +} + +static void unregister_nvdimm_region(void *nd_region) +{ + nvdimm_region_delete(nd_region); +} + +static int cxl_nvdimm_add_region(struct cxl_nvdimm *cxl_nvd, + struct cxl_pmem_region *cxlr_pmem) +{ + int rc; + + rc = xa_insert(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem, + cxlr_pmem, GFP_KERNEL); + if (rc) + return rc; + + get_device(&cxlr_pmem->dev); + return 0; +} + +static void cxl_nvdimm_del_region(struct cxl_nvdimm *cxl_nvd, + struct cxl_pmem_region *cxlr_pmem) +{ + /* + * It is possible this is called without a corresponding + * cxl_nvdimm_add_region for @cxlr_pmem + */ + cxlr_pmem = xa_erase(&cxl_nvd->pmem_regions, (unsigned long)cxlr_pmem); + if (cxlr_pmem) + put_device(&cxlr_pmem->dev); +} + +static void release_mappings(void *data) +{ + int i; + struct cxl_pmem_region *cxlr_pmem = data; + struct cxl_nvdimm_bridge *cxl_nvb = cxlr_pmem->bridge; + + device_lock(&cxl_nvb->dev); + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; + struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; + + cxl_nvdimm_del_region(cxl_nvd, cxlr_pmem); + } + device_unlock(&cxl_nvb->dev); +} + +static void cxlr_pmem_remove_resource(void *res) +{ + remove_resource(res); +} + +struct cxl_pmem_region_info { + u64 offset; + u64 serial; +}; + +static int cxl_pmem_region_probe(struct device *dev) +{ + struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE]; + struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev); + struct cxl_region *cxlr = cxlr_pmem->cxlr; + struct cxl_pmem_region_info *info = NULL; + struct cxl_nvdimm_bridge *cxl_nvb; + struct nd_interleave_set *nd_set; + struct nd_region_desc ndr_desc; + struct cxl_nvdimm *cxl_nvd; + struct nvdimm *nvdimm; + struct resource *res; + int rc, i = 0; + + cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev); + if (!cxl_nvb) { + dev_dbg(dev, "bridge not found\n"); + return -ENXIO; + } + cxlr_pmem->bridge = cxl_nvb; + + device_lock(&cxl_nvb->dev); + if (!cxl_nvb->nvdimm_bus) { + dev_dbg(dev, "nvdimm bus not found\n"); + rc = -ENXIO; + goto out_nvb; + } + + memset(&mappings, 0, sizeof(mappings)); + memset(&ndr_desc, 0, sizeof(ndr_desc)); + + res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); + if (!res) { + rc = -ENOMEM; + goto out_nvb; + } + + res->name = "Persistent Memory"; + res->start = cxlr_pmem->hpa_range.start; + res->end = cxlr_pmem->hpa_range.end; + res->flags = IORESOURCE_MEM; + res->desc = IORES_DESC_PERSISTENT_MEMORY; + + rc = insert_resource(&iomem_resource, res); + if (rc) + goto out_nvb; + + rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); + if (rc) + goto out_nvb; + + ndr_desc.res = res; + ndr_desc.provider_data = cxlr_pmem; + + ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start); + ndr_desc.target_node = phys_to_target_node(res->start); + if (ndr_desc.target_node == NUMA_NO_NODE) { + ndr_desc.target_node = ndr_desc.numa_node; + dev_dbg(&cxlr->dev, "changing target node from %d to %d", + NUMA_NO_NODE, ndr_desc.target_node); + } + + nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); + if (!nd_set) { + rc = -ENOMEM; + goto out_nvb; + } + + ndr_desc.memregion = cxlr->id; + set_bit(ND_REGION_CXL, &ndr_desc.flags); + set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags); + + info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); + if (!info) { + rc = -ENOMEM; + goto out_nvb; + } + + rc = devm_add_action_or_reset(dev, release_mappings, cxlr_pmem); + if (rc) + goto out_nvd; + + for (i = 0; i < cxlr_pmem->nr_mappings; i++) { + struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; + struct cxl_memdev *cxlmd = m->cxlmd; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct device *d; + + d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm); + if (!d) { + dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, + dev_name(&cxlmd->dev)); + rc = -ENODEV; + goto out_nvd; + } + + /* safe to drop ref now with bridge lock held */ + put_device(d); + + cxl_nvd = to_cxl_nvdimm(d); + nvdimm = dev_get_drvdata(&cxl_nvd->dev); + if (!nvdimm) { + dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, + dev_name(&cxlmd->dev)); + rc = -ENODEV; + goto out_nvd; + } + + /* + * Pin the region per nvdimm device as those may be released + * out-of-order with respect to the region, and a single nvdimm + * maybe associated with multiple regions + */ + rc = cxl_nvdimm_add_region(cxl_nvd, cxlr_pmem); + if (rc) + goto out_nvd; + m->cxl_nvd = cxl_nvd; + mappings[i] = (struct nd_mapping_desc) { + .nvdimm = nvdimm, + .start = m->start, + .size = m->size, + .position = i, + }; + info[i].offset = m->start; + info[i].serial = cxlds->serial; + } + ndr_desc.num_mappings = cxlr_pmem->nr_mappings; + ndr_desc.mapping = mappings; + + /* + * TODO enable CXL labels which skip the need for 'interleave-set cookie' + */ + nd_set->cookie1 = + nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0); + nd_set->cookie2 = nd_set->cookie1; + ndr_desc.nd_set = nd_set; + + cxlr_pmem->nd_region = + nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc); + if (!cxlr_pmem->nd_region) { + rc = -ENOMEM; + goto out_nvd; + } + + rc = devm_add_action_or_reset(dev, unregister_nvdimm_region, + cxlr_pmem->nd_region); +out_nvd: + kfree(info); +out_nvb: + device_unlock(&cxl_nvb->dev); + put_device(&cxl_nvb->dev); + + return rc; +} + +static struct cxl_driver cxl_pmem_region_driver = { + .name = "cxl_pmem_region", + .probe = cxl_pmem_region_probe, + .id = CXL_DEVICE_PMEM_REGION, +}; + +/* + * Return all bridges to the CXL_NVB_NEW state to invalidate any + * ->state_work referring to the now destroyed cxl_pmem_wq. + */ +static int cxl_nvdimm_bridge_reset(struct device *dev, void *data) +{ + struct cxl_nvdimm_bridge *cxl_nvb; + + if (!is_cxl_nvdimm_bridge(dev)) + return 0; + + cxl_nvb = to_cxl_nvdimm_bridge(dev); + device_lock(dev); + cxl_nvb->state = CXL_NVB_NEW; + device_unlock(dev); + + return 0; +} + +static void destroy_cxl_pmem_wq(void) +{ + destroy_workqueue(cxl_pmem_wq); + bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_bridge_reset); +} + static __init int cxl_pmem_init(void) { int rc; - set_bit(CXL_MEM_COMMAND_ID_SET_PARTITION_INFO, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds); set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds); @@ -327,20 +629,27 @@ static __init int cxl_pmem_init(void) if (rc) goto err_nvdimm; + rc = cxl_driver_register(&cxl_pmem_region_driver); + if (rc) + goto err_region; + return 0; +err_region: + cxl_driver_unregister(&cxl_nvdimm_driver); err_nvdimm: cxl_driver_unregister(&cxl_nvdimm_bridge_driver); err_bridge: - destroy_workqueue(cxl_pmem_wq); + destroy_cxl_pmem_wq(); return rc; } static __exit void cxl_pmem_exit(void) { + cxl_driver_unregister(&cxl_pmem_region_driver); cxl_driver_unregister(&cxl_nvdimm_driver); cxl_driver_unregister(&cxl_nvdimm_bridge_driver); - destroy_workqueue(cxl_pmem_wq); + destroy_cxl_pmem_wq(); } MODULE_LICENSE("GPL v2"); @@ -349,3 +658,4 @@ module_exit(cxl_pmem_exit); MODULE_IMPORT_NS(CXL); MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE); MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM); +MODULE_ALIAS_CXL(CXL_DEVICE_PMEM_REGION); |