aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cxl/core
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cxl/core')
-rw-r--r--drivers/cxl/core/Makefile8
-rw-r--r--drivers/cxl/core/bus.c677
-rw-r--r--drivers/cxl/core/core.h53
-rw-r--r--drivers/cxl/core/hdm.c855
-rw-r--r--drivers/cxl/core/mbox.c561
-rw-r--r--drivers/cxl/core/memdev.c102
-rw-r--r--drivers/cxl/core/pci.c627
-rw-r--r--drivers/cxl/core/pmem.c42
-rw-r--r--drivers/cxl/core/port.c1905
-rw-r--r--drivers/cxl/core/region.c1953
-rw-r--r--drivers/cxl/core/regs.c75
-rw-r--r--drivers/cxl/core/suspend.c24
12 files changed, 5910 insertions, 972 deletions
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 07eb8e1fb8a6..79c7257f4107 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -1,9 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_CXL_BUS) += cxl_core.o
+obj-$(CONFIG_CXL_SUSPEND) += suspend.o
-ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=CXL -I$(srctree)/drivers/cxl
-cxl_core-y := bus.o
+ccflags-y += -I$(srctree)/drivers/cxl
+cxl_core-y := port.o
cxl_core-y += pmem.o
cxl_core-y += regs.o
cxl_core-y += memdev.o
cxl_core-y += mbox.o
+cxl_core-y += pci.o
+cxl_core-y += hdm.o
+cxl_core-$(CONFIG_CXL_REGION) += region.o
diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
deleted file mode 100644
index ebd061d03950..000000000000
--- a/drivers/cxl/core/bus.c
+++ /dev/null
@@ -1,677 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
-#include <linux/io-64-nonatomic-lo-hi.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/idr.h>
-#include <cxlmem.h>
-#include <cxl.h>
-#include "core.h"
-
-/**
- * DOC: cxl core
- *
- * The CXL core provides a set of interfaces that can be consumed by CXL aware
- * drivers. The interfaces allow for creation, modification, and destruction of
- * regions, memory devices, ports, and decoders. CXL aware drivers must register
- * with the CXL core via these interfaces in order to be able to participate in
- * cross-device interleave coordination. The CXL core also establishes and
- * maintains the bridge to the nvdimm subsystem.
- *
- * CXL core introduces sysfs hierarchy to control the devices that are
- * instantiated by the core.
- */
-
-static DEFINE_IDA(cxl_port_ida);
-
-static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- return sysfs_emit(buf, "%s\n", dev->type->name);
-}
-static DEVICE_ATTR_RO(devtype);
-
-static struct attribute *cxl_base_attributes[] = {
- &dev_attr_devtype.attr,
- NULL,
-};
-
-struct attribute_group cxl_base_attribute_group = {
- .attrs = cxl_base_attributes,
-};
-
-static ssize_t start_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
- return sysfs_emit(buf, "%#llx\n", cxld->range.start);
-}
-static DEVICE_ATTR_RO(start);
-
-static ssize_t size_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
- return sysfs_emit(buf, "%#llx\n", range_len(&cxld->range));
-}
-static DEVICE_ATTR_RO(size);
-
-#define CXL_DECODER_FLAG_ATTR(name, flag) \
-static ssize_t name##_show(struct device *dev, \
- struct device_attribute *attr, char *buf) \
-{ \
- struct cxl_decoder *cxld = to_cxl_decoder(dev); \
- \
- return sysfs_emit(buf, "%s\n", \
- (cxld->flags & (flag)) ? "1" : "0"); \
-} \
-static DEVICE_ATTR_RO(name)
-
-CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM);
-CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM);
-CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2);
-CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3);
-CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK);
-
-static ssize_t target_type_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
- switch (cxld->target_type) {
- case CXL_DECODER_ACCELERATOR:
- return sysfs_emit(buf, "accelerator\n");
- case CXL_DECODER_EXPANDER:
- return sysfs_emit(buf, "expander\n");
- }
- return -ENXIO;
-}
-static DEVICE_ATTR_RO(target_type);
-
-static ssize_t target_list_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct cxl_decoder *cxld = to_cxl_decoder(dev);
- ssize_t offset = 0;
- int i, rc = 0;
-
- device_lock(dev);
- for (i = 0; i < cxld->interleave_ways; i++) {
- struct cxl_dport *dport = cxld->target[i];
- struct cxl_dport *next = NULL;
-
- if (!dport)
- break;
-
- if (i + 1 < cxld->interleave_ways)
- next = cxld->target[i + 1];
- rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id,
- next ? "," : "");
- if (rc < 0)
- break;
- offset += rc;
- }
- device_unlock(dev);
-
- if (rc < 0)
- return rc;
-
- rc = sysfs_emit_at(buf, offset, "\n");
- if (rc < 0)
- return rc;
-
- return offset + rc;
-}
-static DEVICE_ATTR_RO(target_list);
-
-static struct attribute *cxl_decoder_base_attrs[] = {
- &dev_attr_start.attr,
- &dev_attr_size.attr,
- &dev_attr_locked.attr,
- &dev_attr_target_list.attr,
- NULL,
-};
-
-static struct attribute_group cxl_decoder_base_attribute_group = {
- .attrs = cxl_decoder_base_attrs,
-};
-
-static struct attribute *cxl_decoder_root_attrs[] = {
- &dev_attr_cap_pmem.attr,
- &dev_attr_cap_ram.attr,
- &dev_attr_cap_type2.attr,
- &dev_attr_cap_type3.attr,
- NULL,
-};
-
-static struct attribute_group cxl_decoder_root_attribute_group = {
- .attrs = cxl_decoder_root_attrs,
-};
-
-static const struct attribute_group *cxl_decoder_root_attribute_groups[] = {
- &cxl_decoder_root_attribute_group,
- &cxl_decoder_base_attribute_group,
- &cxl_base_attribute_group,
- NULL,
-};
-
-static struct attribute *cxl_decoder_switch_attrs[] = {
- &dev_attr_target_type.attr,
- NULL,
-};
-
-static struct attribute_group cxl_decoder_switch_attribute_group = {
- .attrs = cxl_decoder_switch_attrs,
-};
-
-static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = {
- &cxl_decoder_switch_attribute_group,
- &cxl_decoder_base_attribute_group,
- &cxl_base_attribute_group,
- NULL,
-};
-
-static void cxl_decoder_release(struct device *dev)
-{
- struct cxl_decoder *cxld = to_cxl_decoder(dev);
- struct cxl_port *port = to_cxl_port(dev->parent);
-
- ida_free(&port->decoder_ida, cxld->id);
- kfree(cxld);
-}
-
-static const struct device_type cxl_decoder_switch_type = {
- .name = "cxl_decoder_switch",
- .release = cxl_decoder_release,
- .groups = cxl_decoder_switch_attribute_groups,
-};
-
-static const struct device_type cxl_decoder_root_type = {
- .name = "cxl_decoder_root",
- .release = cxl_decoder_release,
- .groups = cxl_decoder_root_attribute_groups,
-};
-
-bool is_root_decoder(struct device *dev)
-{
- return dev->type == &cxl_decoder_root_type;
-}
-EXPORT_SYMBOL_GPL(is_root_decoder);
-
-struct cxl_decoder *to_cxl_decoder(struct device *dev)
-{
- if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release,
- "not a cxl_decoder device\n"))
- return NULL;
- return container_of(dev, struct cxl_decoder, dev);
-}
-EXPORT_SYMBOL_GPL(to_cxl_decoder);
-
-static void cxl_dport_release(struct cxl_dport *dport)
-{
- list_del(&dport->list);
- put_device(dport->dport);
- kfree(dport);
-}
-
-static void cxl_port_release(struct device *dev)
-{
- struct cxl_port *port = to_cxl_port(dev);
- struct cxl_dport *dport, *_d;
-
- device_lock(dev);
- list_for_each_entry_safe(dport, _d, &port->dports, list)
- cxl_dport_release(dport);
- device_unlock(dev);
- ida_free(&cxl_port_ida, port->id);
- kfree(port);
-}
-
-static const struct attribute_group *cxl_port_attribute_groups[] = {
- &cxl_base_attribute_group,
- NULL,
-};
-
-static const struct device_type cxl_port_type = {
- .name = "cxl_port",
- .release = cxl_port_release,
- .groups = cxl_port_attribute_groups,
-};
-
-struct cxl_port *to_cxl_port(struct device *dev)
-{
- if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
- "not a cxl_port device\n"))
- return NULL;
- return container_of(dev, struct cxl_port, dev);
-}
-
-static void unregister_port(void *_port)
-{
- struct cxl_port *port = _port;
- struct cxl_dport *dport;
-
- device_lock(&port->dev);
- list_for_each_entry(dport, &port->dports, list) {
- char link_name[CXL_TARGET_STRLEN];
-
- if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d",
- dport->port_id) >= CXL_TARGET_STRLEN)
- continue;
- sysfs_remove_link(&port->dev.kobj, link_name);
- }
- device_unlock(&port->dev);
- device_unregister(&port->dev);
-}
-
-static void cxl_unlink_uport(void *_port)
-{
- struct cxl_port *port = _port;
-
- sysfs_remove_link(&port->dev.kobj, "uport");
-}
-
-static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
-{
- int rc;
-
- rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport");
- if (rc)
- return rc;
- return devm_add_action_or_reset(host, cxl_unlink_uport, port);
-}
-
-static struct cxl_port *cxl_port_alloc(struct device *uport,
- resource_size_t component_reg_phys,
- struct cxl_port *parent_port)
-{
- struct cxl_port *port;
- struct device *dev;
- int rc;
-
- port = kzalloc(sizeof(*port), GFP_KERNEL);
- if (!port)
- return ERR_PTR(-ENOMEM);
-
- rc = ida_alloc(&cxl_port_ida, GFP_KERNEL);
- if (rc < 0)
- goto err;
- port->id = rc;
-
- /*
- * The top-level cxl_port "cxl_root" does not have a cxl_port as
- * its parent and it does not have any corresponding component
- * registers as its decode is described by a fixed platform
- * description.
- */
- dev = &port->dev;
- if (parent_port)
- dev->parent = &parent_port->dev;
- else
- dev->parent = uport;
-
- port->uport = uport;
- port->component_reg_phys = component_reg_phys;
- ida_init(&port->decoder_ida);
- INIT_LIST_HEAD(&port->dports);
-
- device_initialize(dev);
- device_set_pm_not_required(dev);
- dev->bus = &cxl_bus_type;
- dev->type = &cxl_port_type;
-
- return port;
-
-err:
- kfree(port);
- return ERR_PTR(rc);
-}
-
-/**
- * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
- * @host: host device for devm operations
- * @uport: "physical" device implementing this upstream port
- * @component_reg_phys: (optional) for configurable cxl_port instances
- * @parent_port: next hop up in the CXL memory decode hierarchy
- */
-struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
- resource_size_t component_reg_phys,
- struct cxl_port *parent_port)
-{
- struct cxl_port *port;
- struct device *dev;
- int rc;
-
- port = cxl_port_alloc(uport, component_reg_phys, parent_port);
- if (IS_ERR(port))
- return port;
-
- dev = &port->dev;
- if (parent_port)
- rc = dev_set_name(dev, "port%d", port->id);
- else
- rc = dev_set_name(dev, "root%d", port->id);
- if (rc)
- goto err;
-
- rc = device_add(dev);
- if (rc)
- goto err;
-
- rc = devm_add_action_or_reset(host, unregister_port, port);
- if (rc)
- return ERR_PTR(rc);
-
- rc = devm_cxl_link_uport(host, port);
- if (rc)
- return ERR_PTR(rc);
-
- return port;
-
-err:
- put_device(dev);
- return ERR_PTR(rc);
-}
-EXPORT_SYMBOL_GPL(devm_cxl_add_port);
-
-static struct cxl_dport *find_dport(struct cxl_port *port, int id)
-{
- struct cxl_dport *dport;
-
- device_lock_assert(&port->dev);
- list_for_each_entry (dport, &port->dports, list)
- if (dport->port_id == id)
- return dport;
- return NULL;
-}
-
-static int add_dport(struct cxl_port *port, struct cxl_dport *new)
-{
- struct cxl_dport *dup;
-
- device_lock(&port->dev);
- dup = find_dport(port, new->port_id);
- if (dup)
- dev_err(&port->dev,
- "unable to add dport%d-%s non-unique port id (%s)\n",
- new->port_id, dev_name(new->dport),
- dev_name(dup->dport));
- else
- list_add_tail(&new->list, &port->dports);
- device_unlock(&port->dev);
-
- return dup ? -EEXIST : 0;
-}
-
-/**
- * cxl_add_dport - append downstream port data to a cxl_port
- * @port: the cxl_port that references this dport
- * @dport_dev: firmware or PCI device representing the dport
- * @port_id: identifier for this dport in a decoder's target list
- * @component_reg_phys: optional location of CXL component registers
- *
- * Note that all allocations and links are undone by cxl_port deletion
- * and release.
- */
-int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id,
- resource_size_t component_reg_phys)
-{
- char link_name[CXL_TARGET_STRLEN];
- struct cxl_dport *dport;
- int rc;
-
- if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >=
- CXL_TARGET_STRLEN)
- return -EINVAL;
-
- dport = kzalloc(sizeof(*dport), GFP_KERNEL);
- if (!dport)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&dport->list);
- dport->dport = get_device(dport_dev);
- dport->port_id = port_id;
- dport->component_reg_phys = component_reg_phys;
- dport->port = port;
-
- rc = add_dport(port, dport);
- if (rc)
- goto err;
-
- rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name);
- if (rc)
- goto err;
-
- return 0;
-err:
- cxl_dport_release(dport);
- return rc;
-}
-EXPORT_SYMBOL_GPL(cxl_add_dport);
-
-static int decoder_populate_targets(struct cxl_decoder *cxld,
- struct cxl_port *port, int *target_map)
-{
- int rc = 0, i;
-
- if (!target_map)
- return 0;
-
- device_lock(&port->dev);
- if (list_empty(&port->dports)) {
- rc = -EINVAL;
- goto out_unlock;
- }
-
- for (i = 0; i < cxld->nr_targets; i++) {
- struct cxl_dport *dport = find_dport(port, target_map[i]);
-
- if (!dport) {
- rc = -ENXIO;
- goto out_unlock;
- }
- cxld->target[i] = dport;
- }
-
-out_unlock:
- device_unlock(&port->dev);
-
- return rc;
-}
-
-struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets)
-{
- struct cxl_decoder *cxld, cxld_const_init = {
- .nr_targets = nr_targets,
- };
- struct device *dev;
- int rc = 0;
-
- if (nr_targets > CXL_DECODER_MAX_INTERLEAVE || nr_targets < 1)
- return ERR_PTR(-EINVAL);
-
- cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL);
- if (!cxld)
- return ERR_PTR(-ENOMEM);
- memcpy(cxld, &cxld_const_init, sizeof(cxld_const_init));
-
- rc = ida_alloc(&port->decoder_ida, GFP_KERNEL);
- if (rc < 0)
- goto err;
-
- cxld->id = rc;
- dev = &cxld->dev;
- device_initialize(dev);
- device_set_pm_not_required(dev);
- dev->parent = &port->dev;
- dev->bus = &cxl_bus_type;
-
- /* root ports do not have a cxl_port_type parent */
- if (port->dev.parent->type == &cxl_port_type)
- dev->type = &cxl_decoder_switch_type;
- else
- dev->type = &cxl_decoder_root_type;
-
- return cxld;
-err:
- kfree(cxld);
- return ERR_PTR(rc);
-}
-EXPORT_SYMBOL_GPL(cxl_decoder_alloc);
-
-int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
-{
- struct cxl_port *port;
- struct device *dev;
- int rc;
-
- if (WARN_ON_ONCE(!cxld))
- return -EINVAL;
-
- if (WARN_ON_ONCE(IS_ERR(cxld)))
- return PTR_ERR(cxld);
-
- if (cxld->interleave_ways < 1)
- return -EINVAL;
-
- port = to_cxl_port(cxld->dev.parent);
- rc = decoder_populate_targets(cxld, port, target_map);
- if (rc)
- return rc;
-
- dev = &cxld->dev;
- rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id);
- if (rc)
- return rc;
-
- return device_add(dev);
-}
-EXPORT_SYMBOL_GPL(cxl_decoder_add);
-
-static void cxld_unregister(void *dev)
-{
- device_unregister(dev);
-}
-
-int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
-{
- return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev);
-}
-EXPORT_SYMBOL_GPL(cxl_decoder_autoremove);
-
-/**
- * __cxl_driver_register - register a driver for the cxl bus
- * @cxl_drv: cxl driver structure to attach
- * @owner: owning module/driver
- * @modname: KBUILD_MODNAME for parent driver
- */
-int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
- const char *modname)
-{
- if (!cxl_drv->probe) {
- pr_debug("%s ->probe() must be specified\n", modname);
- return -EINVAL;
- }
-
- if (!cxl_drv->name) {
- pr_debug("%s ->name must be specified\n", modname);
- return -EINVAL;
- }
-
- if (!cxl_drv->id) {
- pr_debug("%s ->id must be specified\n", modname);
- return -EINVAL;
- }
-
- cxl_drv->drv.bus = &cxl_bus_type;
- cxl_drv->drv.owner = owner;
- cxl_drv->drv.mod_name = modname;
- cxl_drv->drv.name = cxl_drv->name;
-
- return driver_register(&cxl_drv->drv);
-}
-EXPORT_SYMBOL_GPL(__cxl_driver_register);
-
-void cxl_driver_unregister(struct cxl_driver *cxl_drv)
-{
- driver_unregister(&cxl_drv->drv);
-}
-EXPORT_SYMBOL_GPL(cxl_driver_unregister);
-
-static int cxl_device_id(struct device *dev)
-{
- if (dev->type == &cxl_nvdimm_bridge_type)
- return CXL_DEVICE_NVDIMM_BRIDGE;
- if (dev->type == &cxl_nvdimm_type)
- return CXL_DEVICE_NVDIMM;
- return 0;
-}
-
-static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
- return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
- cxl_device_id(dev));
-}
-
-static int cxl_bus_match(struct device *dev, struct device_driver *drv)
-{
- return cxl_device_id(dev) == to_cxl_drv(drv)->id;
-}
-
-static int cxl_bus_probe(struct device *dev)
-{
- return to_cxl_drv(dev->driver)->probe(dev);
-}
-
-static void cxl_bus_remove(struct device *dev)
-{
- struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
-
- if (cxl_drv->remove)
- cxl_drv->remove(dev);
-}
-
-struct bus_type cxl_bus_type = {
- .name = "cxl",
- .uevent = cxl_bus_uevent,
- .match = cxl_bus_match,
- .probe = cxl_bus_probe,
- .remove = cxl_bus_remove,
-};
-EXPORT_SYMBOL_GPL(cxl_bus_type);
-
-static __init int cxl_core_init(void)
-{
- int rc;
-
- cxl_mbox_init();
-
- rc = cxl_memdev_init();
- if (rc)
- return rc;
-
- rc = bus_register(&cxl_bus_type);
- if (rc)
- goto err;
- return 0;
-
-err:
- cxl_memdev_exit();
- cxl_mbox_exit();
- return rc;
-}
-
-static void cxl_core_exit(void)
-{
- bus_unregister(&cxl_bus_type);
- cxl_memdev_exit();
- cxl_mbox_exit();
-}
-
-module_init(cxl_core_init);
-module_exit(cxl_core_exit);
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index e0c9aacc4e9c..1d8f87be283f 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -9,15 +9,66 @@ extern const struct device_type cxl_nvdimm_type;
extern struct attribute_group cxl_base_attribute_group;
+#ifdef CONFIG_CXL_REGION
+extern struct device_attribute dev_attr_create_pmem_region;
+extern struct device_attribute dev_attr_delete_region;
+extern struct device_attribute dev_attr_region;
+extern const struct device_type cxl_pmem_region_type;
+extern const struct device_type cxl_region_type;
+void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled);
+#define CXL_REGION_ATTR(x) (&dev_attr_##x.attr)
+#define CXL_REGION_TYPE(x) (&cxl_region_type)
+#define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr),
+#define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type)
+int cxl_region_init(void);
+void cxl_region_exit(void);
+#else
+static inline void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
+{
+}
+static inline int cxl_region_init(void)
+{
+ return 0;
+}
+static inline void cxl_region_exit(void)
+{
+}
+#define CXL_REGION_ATTR(x) NULL
+#define CXL_REGION_TYPE(x) NULL
+#define SET_CXL_REGION_ATTR(x)
+#define CXL_PMEM_REGION_TYPE(x) NULL
+#endif
+
struct cxl_send_command;
struct cxl_mem_query_commands;
int cxl_query_cmd(struct cxl_memdev *cxlmd,
struct cxl_mem_query_commands __user *q);
int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s);
+void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
+ resource_size_t length);
+
+struct dentry *cxl_debugfs_create_dir(const char *dir);
+int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
+ enum cxl_decoder_mode mode);
+int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size);
+int cxl_dpa_free(struct cxl_endpoint_decoder *cxled);
+resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled);
+resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled);
+extern struct rw_semaphore cxl_dpa_rwsem;
+
+bool is_switch_decoder(struct device *dev);
+struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev);
+static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port,
+ struct cxl_memdev *cxlmd)
+{
+ if (!port)
+ return NULL;
+
+ return xa_load(&port->endpoints, (unsigned long)&cxlmd->dev);
+}
int cxl_memdev_init(void);
void cxl_memdev_exit(void);
void cxl_mbox_init(void);
-void cxl_mbox_exit(void);
#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
new file mode 100644
index 000000000000..d1d2caea5c62
--- /dev/null
+++ b/drivers/cxl/core/hdm.c
@@ -0,0 +1,855 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/io-64-nonatomic-hi-lo.h>
+#include <linux/seq_file.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+
+#include "cxlmem.h"
+#include "core.h"
+
+/**
+ * DOC: cxl core hdm
+ *
+ * Compute Express Link Host Managed Device Memory, starting with the
+ * CXL 2.0 specification, is managed by an array of HDM Decoder register
+ * instances per CXL port and per CXL endpoint. Define common helpers
+ * for enumerating these registers and capabilities.
+ */
+
+DECLARE_RWSEM(cxl_dpa_rwsem);
+
+static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
+ int *target_map)
+{
+ int rc;
+
+ rc = cxl_decoder_add_locked(cxld, target_map);
+ if (rc) {
+ put_device(&cxld->dev);
+ dev_err(&port->dev, "Failed to add decoder\n");
+ return rc;
+ }
+
+ rc = cxl_decoder_autoremove(&port->dev, cxld);
+ if (rc)
+ return rc;
+
+ dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev));
+
+ return 0;
+}
+
+/*
+ * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability Structure)
+ * single ported host-bridges need not publish a decoder capability when a
+ * passthrough decode can be assumed, i.e. all transactions that the uport sees
+ * are claimed and passed to the single dport. Disable the range until the first
+ * CXL region is enumerated / activated.
+ */
+int devm_cxl_add_passthrough_decoder(struct cxl_port *port)
+{
+ struct cxl_switch_decoder *cxlsd;
+ struct cxl_dport *dport = NULL;
+ int single_port_map[1];
+ unsigned long index;
+
+ cxlsd = cxl_switch_decoder_alloc(port, 1);
+ if (IS_ERR(cxlsd))
+ return PTR_ERR(cxlsd);
+
+ device_lock_assert(&port->dev);
+
+ xa_for_each(&port->dports, index, dport)
+ break;
+ single_port_map[0] = dport->port_id;
+
+ return add_hdm_decoder(port, &cxlsd->cxld, single_port_map);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, CXL);
+
+static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
+{
+ u32 hdm_cap;
+
+ hdm_cap = readl(cxlhdm->regs.hdm_decoder + CXL_HDM_DECODER_CAP_OFFSET);
+ cxlhdm->decoder_count = cxl_hdm_decoder_count(hdm_cap);
+ cxlhdm->target_count =
+ FIELD_GET(CXL_HDM_DECODER_TARGET_COUNT_MASK, hdm_cap);
+ if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_11_8, hdm_cap))
+ cxlhdm->interleave_mask |= GENMASK(11, 8);
+ if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap))
+ cxlhdm->interleave_mask |= GENMASK(14, 12);
+}
+
+static void __iomem *map_hdm_decoder_regs(struct cxl_port *port,
+ void __iomem *crb)
+{
+ struct cxl_component_reg_map map;
+
+ cxl_probe_component_regs(&port->dev, crb, &map);
+ if (!map.hdm_decoder.valid) {
+ dev_err(&port->dev, "HDM decoder registers invalid\n");
+ return IOMEM_ERR_PTR(-ENXIO);
+ }
+
+ return crb + map.hdm_decoder.offset;
+}
+
+/**
+ * devm_cxl_setup_hdm - map HDM decoder component registers
+ * @port: cxl_port to map
+ */
+struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port)
+{
+ struct device *dev = &port->dev;
+ void __iomem *crb, *hdm;
+ struct cxl_hdm *cxlhdm;
+
+ cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
+ if (!cxlhdm)
+ return ERR_PTR(-ENOMEM);
+
+ cxlhdm->port = port;
+ crb = devm_cxl_iomap_block(dev, port->component_reg_phys,
+ CXL_COMPONENT_REG_BLOCK_SIZE);
+ if (!crb) {
+ dev_err(dev, "No component registers mapped\n");
+ return ERR_PTR(-ENXIO);
+ }
+
+ hdm = map_hdm_decoder_regs(port, crb);
+ if (IS_ERR(hdm))
+ return ERR_CAST(hdm);
+ cxlhdm->regs.hdm_decoder = hdm;
+
+ parse_hdm_decoder_caps(cxlhdm);
+ if (cxlhdm->decoder_count == 0) {
+ dev_err(dev, "Spec violation. Caps invalid\n");
+ return ERR_PTR(-ENXIO);
+ }
+
+ dev_set_drvdata(dev, cxlhdm);
+
+ return cxlhdm;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL);
+
+static void __cxl_dpa_debug(struct seq_file *file, struct resource *r, int depth)
+{
+ unsigned long long start = r->start, end = r->end;
+
+ seq_printf(file, "%*s%08llx-%08llx : %s\n", depth * 2, "", start, end,
+ r->name);
+}
+
+void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds)
+{
+ struct resource *p1, *p2;
+
+ down_read(&cxl_dpa_rwsem);
+ for (p1 = cxlds->dpa_res.child; p1; p1 = p1->sibling) {
+ __cxl_dpa_debug(file, p1, 0);
+ for (p2 = p1->child; p2; p2 = p2->sibling)
+ __cxl_dpa_debug(file, p2, 1);
+ }
+ up_read(&cxl_dpa_rwsem);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, CXL);
+
+/*
+ * Must be called in a context that synchronizes against this decoder's
+ * port ->remove() callback (like an endpoint decoder sysfs attribute)
+ */
+static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxled_to_port(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct resource *res = cxled->dpa_res;
+ resource_size_t skip_start;
+
+ lockdep_assert_held_write(&cxl_dpa_rwsem);
+
+ /* save @skip_start, before @res is released */
+ skip_start = res->start - cxled->skip;
+ __release_region(&cxlds->dpa_res, res->start, resource_size(res));
+ if (cxled->skip)
+ __release_region(&cxlds->dpa_res, skip_start, cxled->skip);
+ cxled->skip = 0;
+ cxled->dpa_res = NULL;
+ put_device(&cxled->cxld.dev);
+ port->hdm_end--;
+}
+
+static void cxl_dpa_release(void *cxled)
+{
+ down_write(&cxl_dpa_rwsem);
+ __cxl_dpa_release(cxled);
+ up_write(&cxl_dpa_rwsem);
+}
+
+/*
+ * Must be called from context that will not race port device
+ * unregistration, like decoder sysfs attribute methods
+ */
+static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_port *port = cxled_to_port(cxled);
+
+ lockdep_assert_held_write(&cxl_dpa_rwsem);
+ devm_remove_action(&port->dev, cxl_dpa_release, cxled);
+ __cxl_dpa_release(cxled);
+}
+
+static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
+ resource_size_t base, resource_size_t len,
+ resource_size_t skipped)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *port = cxled_to_port(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct device *dev = &port->dev;
+ struct resource *res;
+
+ lockdep_assert_held_write(&cxl_dpa_rwsem);
+
+ if (!len)
+ goto success;
+
+ if (cxled->dpa_res) {
+ dev_dbg(dev, "decoder%d.%d: existing allocation %pr assigned\n",
+ port->id, cxled->cxld.id, cxled->dpa_res);
+ return -EBUSY;
+ }
+
+ if (port->hdm_end + 1 != cxled->cxld.id) {
+ /*
+ * Assumes alloc and commit order is always in hardware instance
+ * order per expectations from 8.2.5.12.20 Committing Decoder
+ * Programming that enforce decoder[m] committed before
+ * decoder[m+1] commit start.
+ */
+ dev_dbg(dev, "decoder%d.%d: expected decoder%d.%d\n", port->id,
+ cxled->cxld.id, port->id, port->hdm_end + 1);
+ return -EBUSY;
+ }
+
+ if (skipped) {
+ res = __request_region(&cxlds->dpa_res, base - skipped, skipped,
+ dev_name(&cxled->cxld.dev), 0);
+ if (!res) {
+ dev_dbg(dev,
+ "decoder%d.%d: failed to reserve skipped space\n",
+ port->id, cxled->cxld.id);
+ return -EBUSY;
+ }
+ }
+ res = __request_region(&cxlds->dpa_res, base, len,
+ dev_name(&cxled->cxld.dev), 0);
+ if (!res) {
+ dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n",
+ port->id, cxled->cxld.id);
+ if (skipped)
+ __release_region(&cxlds->dpa_res, base - skipped,
+ skipped);
+ return -EBUSY;
+ }
+ cxled->dpa_res = res;
+ cxled->skip = skipped;
+
+ if (resource_contains(&cxlds->pmem_res, res))
+ cxled->mode = CXL_DECODER_PMEM;
+ else if (resource_contains(&cxlds->ram_res, res))
+ cxled->mode = CXL_DECODER_RAM;
+ else {
+ dev_dbg(dev, "decoder%d.%d: %pr mixed\n", port->id,
+ cxled->cxld.id, cxled->dpa_res);
+ cxled->mode = CXL_DECODER_MIXED;
+ }
+
+success:
+ port->hdm_end++;
+ get_device(&cxled->cxld.dev);
+ return 0;
+}
+
+static int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled,
+ resource_size_t base, resource_size_t len,
+ resource_size_t skipped)
+{
+ struct cxl_port *port = cxled_to_port(cxled);
+ int rc;
+
+ down_write(&cxl_dpa_rwsem);
+ rc = __cxl_dpa_reserve(cxled, base, len, skipped);
+ up_write(&cxl_dpa_rwsem);
+
+ if (rc)
+ return rc;
+
+ return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled);
+}
+
+resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled)
+{
+ resource_size_t size = 0;
+
+ down_read(&cxl_dpa_rwsem);
+ if (cxled->dpa_res)
+ size = resource_size(cxled->dpa_res);
+ up_read(&cxl_dpa_rwsem);
+
+ return size;
+}
+
+resource_size_t cxl_dpa_resource_start(struct cxl_endpoint_decoder *cxled)
+{
+ resource_size_t base = -1;
+
+ down_read(&cxl_dpa_rwsem);
+ if (cxled->dpa_res)
+ base = cxled->dpa_res->start;
+ up_read(&cxl_dpa_rwsem);
+
+ return base;
+}
+
+int cxl_dpa_free(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_port *port = cxled_to_port(cxled);
+ struct device *dev = &cxled->cxld.dev;
+ int rc;
+
+ down_write(&cxl_dpa_rwsem);
+ if (!cxled->dpa_res) {
+ rc = 0;
+ goto out;
+ }
+ if (cxled->cxld.region) {
+ dev_dbg(dev, "decoder assigned to: %s\n",
+ dev_name(&cxled->cxld.region->dev));
+ rc = -EBUSY;
+ goto out;
+ }
+ if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
+ dev_dbg(dev, "decoder enabled\n");
+ rc = -EBUSY;
+ goto out;
+ }
+ if (cxled->cxld.id != port->hdm_end) {
+ dev_dbg(dev, "expected decoder%d.%d\n", port->id,
+ port->hdm_end);
+ rc = -EBUSY;
+ goto out;
+ }
+ devm_cxl_dpa_release(cxled);
+ rc = 0;
+out:
+ up_write(&cxl_dpa_rwsem);
+ return rc;
+}
+
+int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
+ enum cxl_decoder_mode mode)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct device *dev = &cxled->cxld.dev;
+ int rc;
+
+ switch (mode) {
+ case CXL_DECODER_RAM:
+ case CXL_DECODER_PMEM:
+ break;
+ default:
+ dev_dbg(dev, "unsupported mode: %d\n", mode);
+ return -EINVAL;
+ }
+
+ down_write(&cxl_dpa_rwsem);
+ if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ /*
+ * Only allow modes that are supported by the current partition
+ * configuration
+ */
+ if (mode == CXL_DECODER_PMEM && !resource_size(&cxlds->pmem_res)) {
+ dev_dbg(dev, "no available pmem capacity\n");
+ rc = -ENXIO;
+ goto out;
+ }
+ if (mode == CXL_DECODER_RAM && !resource_size(&cxlds->ram_res)) {
+ dev_dbg(dev, "no available ram capacity\n");
+ rc = -ENXIO;
+ goto out;
+ }
+
+ cxled->mode = mode;
+ rc = 0;
+out:
+ up_write(&cxl_dpa_rwsem);
+
+ return rc;
+}
+
+int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ resource_size_t free_ram_start, free_pmem_start;
+ struct cxl_port *port = cxled_to_port(cxled);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct device *dev = &cxled->cxld.dev;
+ resource_size_t start, avail, skip;
+ struct resource *p, *last;
+ int rc;
+
+ down_write(&cxl_dpa_rwsem);
+ if (cxled->cxld.region) {
+ dev_dbg(dev, "decoder attached to %s\n",
+ dev_name(&cxled->cxld.region->dev));
+ rc = -EBUSY;
+ goto out;
+ }
+
+ if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) {
+ dev_dbg(dev, "decoder enabled\n");
+ rc = -EBUSY;
+ goto out;
+ }
+
+ for (p = cxlds->ram_res.child, last = NULL; p; p = p->sibling)
+ last = p;
+ if (last)
+ free_ram_start = last->end + 1;
+ else
+ free_ram_start = cxlds->ram_res.start;
+
+ for (p = cxlds->pmem_res.child, last = NULL; p; p = p->sibling)
+ last = p;
+ if (last)
+ free_pmem_start = last->end + 1;
+ else
+ free_pmem_start = cxlds->pmem_res.start;
+
+ if (cxled->mode == CXL_DECODER_RAM) {
+ start = free_ram_start;
+ avail = cxlds->ram_res.end - start + 1;
+ skip = 0;
+ } else if (cxled->mode == CXL_DECODER_PMEM) {
+ resource_size_t skip_start, skip_end;
+
+ start = free_pmem_start;
+ avail = cxlds->pmem_res.end - start + 1;
+ skip_start = free_ram_start;
+
+ /*
+ * If some pmem is already allocated, then that allocation
+ * already handled the skip.
+ */
+ if (cxlds->pmem_res.child &&
+ skip_start == cxlds->pmem_res.child->start)
+ skip_end = skip_start - 1;
+ else
+ skip_end = start - 1;
+ skip = skip_end - skip_start + 1;
+ } else {
+ dev_dbg(dev, "mode not set\n");
+ rc = -EINVAL;
+ goto out;
+ }
+
+ if (size > avail) {
+ dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size,
+ cxled->mode == CXL_DECODER_RAM ? "ram" : "pmem",
+ &avail);
+ rc = -ENOSPC;
+ goto out;
+ }
+
+ rc = __cxl_dpa_reserve(cxled, start, size, skip);
+out:
+ up_write(&cxl_dpa_rwsem);
+
+ if (rc)
+ return rc;
+
+ return devm_add_action_or_reset(&port->dev, cxl_dpa_release, cxled);
+}
+
+static void cxld_set_interleave(struct cxl_decoder *cxld, u32 *ctrl)
+{
+ u16 eig;
+ u8 eiw;
+
+ /*
+ * Input validation ensures these warns never fire, but otherwise
+ * suppress unititalized variable usage warnings.
+ */
+ if (WARN_ONCE(ways_to_cxl(cxld->interleave_ways, &eiw),
+ "invalid interleave_ways: %d\n", cxld->interleave_ways))
+ return;
+ if (WARN_ONCE(granularity_to_cxl(cxld->interleave_granularity, &eig),
+ "invalid interleave_granularity: %d\n",
+ cxld->interleave_granularity))
+ return;
+
+ u32p_replace_bits(ctrl, eig, CXL_HDM_DECODER0_CTRL_IG_MASK);
+ u32p_replace_bits(ctrl, eiw, CXL_HDM_DECODER0_CTRL_IW_MASK);
+ *ctrl |= CXL_HDM_DECODER0_CTRL_COMMIT;
+}
+
+static void cxld_set_type(struct cxl_decoder *cxld, u32 *ctrl)
+{
+ u32p_replace_bits(ctrl, !!(cxld->target_type == 3),
+ CXL_HDM_DECODER0_CTRL_TYPE);
+}
+
+static int cxlsd_set_targets(struct cxl_switch_decoder *cxlsd, u64 *tgt)
+{
+ struct cxl_dport **t = &cxlsd->target[0];
+ int ways = cxlsd->cxld.interleave_ways;
+
+ if (dev_WARN_ONCE(&cxlsd->cxld.dev,
+ ways > 8 || ways > cxlsd->nr_targets,
+ "ways: %d overflows targets: %d\n", ways,
+ cxlsd->nr_targets))
+ return -ENXIO;
+
+ *tgt = FIELD_PREP(GENMASK(7, 0), t[0]->port_id);
+ if (ways > 1)
+ *tgt |= FIELD_PREP(GENMASK(15, 8), t[1]->port_id);
+ if (ways > 2)
+ *tgt |= FIELD_PREP(GENMASK(23, 16), t[2]->port_id);
+ if (ways > 3)
+ *tgt |= FIELD_PREP(GENMASK(31, 24), t[3]->port_id);
+ if (ways > 4)
+ *tgt |= FIELD_PREP(GENMASK_ULL(39, 32), t[4]->port_id);
+ if (ways > 5)
+ *tgt |= FIELD_PREP(GENMASK_ULL(47, 40), t[5]->port_id);
+ if (ways > 6)
+ *tgt |= FIELD_PREP(GENMASK_ULL(55, 48), t[6]->port_id);
+ if (ways > 7)
+ *tgt |= FIELD_PREP(GENMASK_ULL(63, 56), t[7]->port_id);
+
+ return 0;
+}
+
+/*
+ * Per CXL 2.0 8.2.5.12.20 Committing Decoder Programming, hardware must set
+ * committed or error within 10ms, but just be generous with 20ms to account for
+ * clock skew and other marginal behavior
+ */
+#define COMMIT_TIMEOUT_MS 20
+static int cxld_await_commit(void __iomem *hdm, int id)
+{
+ u32 ctrl;
+ int i;
+
+ for (i = 0; i < COMMIT_TIMEOUT_MS; i++) {
+ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
+ if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMIT_ERROR, ctrl)) {
+ ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT;
+ writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
+ return -EIO;
+ }
+ if (FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl))
+ return 0;
+ fsleep(1000);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int cxl_decoder_commit(struct cxl_decoder *cxld)
+{
+ struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+ struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+ void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+ int id = cxld->id, rc;
+ u64 base, size;
+ u32 ctrl;
+
+ if (cxld->flags & CXL_DECODER_F_ENABLE)
+ return 0;
+
+ if (port->commit_end + 1 != id) {
+ dev_dbg(&port->dev,
+ "%s: out of order commit, expected decoder%d.%d\n",
+ dev_name(&cxld->dev), port->id, port->commit_end + 1);
+ return -EBUSY;
+ }
+
+ down_read(&cxl_dpa_rwsem);
+ /* common decoder settings */
+ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id));
+ cxld_set_interleave(cxld, &ctrl);
+ cxld_set_type(cxld, &ctrl);
+ base = cxld->hpa_range.start;
+ size = range_len(&cxld->hpa_range);
+
+ writel(upper_32_bits(base), hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(id));
+ writel(lower_32_bits(base), hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id));
+ writel(upper_32_bits(size), hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(id));
+ writel(lower_32_bits(size), hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(id));
+
+ if (is_switch_decoder(&cxld->dev)) {
+ struct cxl_switch_decoder *cxlsd =
+ to_cxl_switch_decoder(&cxld->dev);
+ void __iomem *tl_hi = hdm + CXL_HDM_DECODER0_TL_HIGH(id);
+ void __iomem *tl_lo = hdm + CXL_HDM_DECODER0_TL_LOW(id);
+ u64 targets;
+
+ rc = cxlsd_set_targets(cxlsd, &targets);
+ if (rc) {
+ dev_dbg(&port->dev, "%s: target configuration error\n",
+ dev_name(&cxld->dev));
+ goto err;
+ }
+
+ writel(upper_32_bits(targets), tl_hi);
+ writel(lower_32_bits(targets), tl_lo);
+ } else {
+ struct cxl_endpoint_decoder *cxled =
+ to_cxl_endpoint_decoder(&cxld->dev);
+ void __iomem *sk_hi = hdm + CXL_HDM_DECODER0_SKIP_HIGH(id);
+ void __iomem *sk_lo = hdm + CXL_HDM_DECODER0_SKIP_LOW(id);
+
+ writel(upper_32_bits(cxled->skip), sk_hi);
+ writel(lower_32_bits(cxled->skip), sk_lo);
+ }
+
+ writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
+ up_read(&cxl_dpa_rwsem);
+
+ port->commit_end++;
+ rc = cxld_await_commit(hdm, cxld->id);
+err:
+ if (rc) {
+ dev_dbg(&port->dev, "%s: error %d committing decoder\n",
+ dev_name(&cxld->dev), rc);
+ cxld->reset(cxld);
+ return rc;
+ }
+ cxld->flags |= CXL_DECODER_F_ENABLE;
+
+ return 0;
+}
+
+static int cxl_decoder_reset(struct cxl_decoder *cxld)
+{
+ struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+ struct cxl_hdm *cxlhdm = dev_get_drvdata(&port->dev);
+ void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+ int id = cxld->id;
+ u32 ctrl;
+
+ if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
+ return 0;
+
+ if (port->commit_end != id) {
+ dev_dbg(&port->dev,
+ "%s: out of order reset, expected decoder%d.%d\n",
+ dev_name(&cxld->dev), port->id, port->commit_end);
+ return -EBUSY;
+ }
+
+ down_read(&cxl_dpa_rwsem);
+ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
+ ctrl &= ~CXL_HDM_DECODER0_CTRL_COMMIT;
+ writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(id));
+
+ writel(0, hdm + CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(id));
+ writel(0, hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(id));
+ writel(0, hdm + CXL_HDM_DECODER0_BASE_HIGH_OFFSET(id));
+ writel(0, hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(id));
+ up_read(&cxl_dpa_rwsem);
+
+ port->commit_end--;
+ cxld->flags &= ~CXL_DECODER_F_ENABLE;
+
+ return 0;
+}
+
+static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld,
+ int *target_map, void __iomem *hdm, int which,
+ u64 *dpa_base)
+{
+ struct cxl_endpoint_decoder *cxled = NULL;
+ u64 size, base, skip, dpa_size;
+ bool committed;
+ u32 remainder;
+ int i, rc;
+ u32 ctrl;
+ union {
+ u64 value;
+ unsigned char target_id[8];
+ } target_list;
+
+ if (is_endpoint_decoder(&cxld->dev))
+ cxled = to_cxl_endpoint_decoder(&cxld->dev);
+
+ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
+ base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which));
+ size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which));
+ committed = !!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED);
+ cxld->commit = cxl_decoder_commit;
+ cxld->reset = cxl_decoder_reset;
+
+ if (!committed)
+ size = 0;
+ if (base == U64_MAX || size == U64_MAX) {
+ dev_warn(&port->dev, "decoder%d.%d: Invalid resource range\n",
+ port->id, cxld->id);
+ return -ENXIO;
+ }
+
+ cxld->hpa_range = (struct range) {
+ .start = base,
+ .end = base + size - 1,
+ };
+
+ /* decoders are enabled if committed */
+ if (committed) {
+ cxld->flags |= CXL_DECODER_F_ENABLE;
+ if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK)
+ cxld->flags |= CXL_DECODER_F_LOCK;
+ if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl))
+ cxld->target_type = CXL_DECODER_EXPANDER;
+ else
+ cxld->target_type = CXL_DECODER_ACCELERATOR;
+ if (cxld->id != port->commit_end + 1) {
+ dev_warn(&port->dev,
+ "decoder%d.%d: Committed out of order\n",
+ port->id, cxld->id);
+ return -ENXIO;
+ }
+ port->commit_end = cxld->id;
+ } else {
+ /* unless / until type-2 drivers arrive, assume type-3 */
+ if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl) == 0) {
+ ctrl |= CXL_HDM_DECODER0_CTRL_TYPE;
+ writel(ctrl, hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which));
+ }
+ cxld->target_type = CXL_DECODER_EXPANDER;
+ }
+ rc = cxl_to_ways(FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl),
+ &cxld->interleave_ways);
+ if (rc) {
+ dev_warn(&port->dev,
+ "decoder%d.%d: Invalid interleave ways (ctrl: %#x)\n",
+ port->id, cxld->id, ctrl);
+ return rc;
+ }
+ rc = cxl_to_granularity(FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl),
+ &cxld->interleave_granularity);
+ if (rc)
+ return rc;
+
+ if (!cxled) {
+ target_list.value =
+ ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which));
+ for (i = 0; i < cxld->interleave_ways; i++)
+ target_map[i] = target_list.target_id[i];
+
+ return 0;
+ }
+
+ if (!committed)
+ return 0;
+
+ dpa_size = div_u64_rem(size, cxld->interleave_ways, &remainder);
+ if (remainder) {
+ dev_err(&port->dev,
+ "decoder%d.%d: invalid committed configuration size: %#llx ways: %d\n",
+ port->id, cxld->id, size, cxld->interleave_ways);
+ return -ENXIO;
+ }
+ skip = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SKIP_LOW(which));
+ rc = devm_cxl_dpa_reserve(cxled, *dpa_base + skip, dpa_size, skip);
+ if (rc) {
+ dev_err(&port->dev,
+ "decoder%d.%d: Failed to reserve DPA range %#llx - %#llx\n (%d)",
+ port->id, cxld->id, *dpa_base,
+ *dpa_base + dpa_size + skip - 1, rc);
+ return rc;
+ }
+ *dpa_base += dpa_size + skip;
+ return 0;
+}
+
+/**
+ * devm_cxl_enumerate_decoders - add decoder objects per HDM register set
+ * @cxlhdm: Structure to populate with HDM capabilities
+ */
+int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm)
+{
+ void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+ struct cxl_port *port = cxlhdm->port;
+ int i, committed;
+ u64 dpa_base = 0;
+ u32 ctrl;
+
+ /*
+ * Since the register resource was recently claimed via request_region()
+ * be careful about trusting the "not-committed" status until the commit
+ * timeout has elapsed. The commit timeout is 10ms (CXL 2.0
+ * 8.2.5.12.20), but double it to be tolerant of any clock skew between
+ * host and target.
+ */
+ for (i = 0, committed = 0; i < cxlhdm->decoder_count; i++) {
+ ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i));
+ if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)
+ committed++;
+ }
+
+ /* ensure that future checks of committed can be trusted */
+ if (committed != cxlhdm->decoder_count)
+ msleep(20);
+
+ for (i = 0; i < cxlhdm->decoder_count; i++) {
+ int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 };
+ int rc, target_count = cxlhdm->target_count;
+ struct cxl_decoder *cxld;
+
+ if (is_cxl_endpoint(port)) {
+ struct cxl_endpoint_decoder *cxled;
+
+ cxled = cxl_endpoint_decoder_alloc(port);
+ if (IS_ERR(cxled)) {
+ dev_warn(&port->dev,
+ "Failed to allocate the decoder\n");
+ return PTR_ERR(cxled);
+ }
+ cxld = &cxled->cxld;
+ } else {
+ struct cxl_switch_decoder *cxlsd;
+
+ cxlsd = cxl_switch_decoder_alloc(port, target_count);
+ if (IS_ERR(cxlsd)) {
+ dev_warn(&port->dev,
+ "Failed to allocate the decoder\n");
+ return PTR_ERR(cxlsd);
+ }
+ cxld = &cxlsd->cxld;
+ }
+
+ rc = init_hdm_decoder(port, cxld, target_map, hdm, i, &dpa_base);
+ if (rc) {
+ put_device(&cxld->dev);
+ return rc;
+ }
+ rc = add_hdm_decoder(port, cxld, target_map);
+ if (rc) {
+ dev_warn(&port->dev,
+ "Failed to add decoder to port\n");
+ return rc;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, CXL);
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index 576796a5d9f3..0c90f13870a4 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -35,6 +35,7 @@ static bool cxl_raw_allow_all;
.flags = _flags, \
}
+#define CXL_VARIABLE_PAYLOAD ~0U
/*
* This table defines the supported mailbox commands for the driver. This table
* is made up of a UAPI structure. Non-negative values as parameters in the
@@ -44,26 +45,26 @@ static bool cxl_raw_allow_all;
static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
#ifdef CONFIG_CXL_MEM_RAW_COMMANDS
- CXL_CMD(RAW, ~0, ~0, 0),
+ CXL_CMD(RAW, CXL_VARIABLE_PAYLOAD, CXL_VARIABLE_PAYLOAD, 0),
#endif
- CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
+ CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE),
CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
- CXL_CMD(GET_LSA, 0x8, ~0, 0),
+ CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0),
CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
- CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
+ CXL_CMD(GET_LOG, 0x18, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE),
CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0),
- CXL_CMD(SET_LSA, ~0, 0, 0),
+ CXL_CMD(SET_LSA, CXL_VARIABLE_PAYLOAD, 0, 0),
CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0),
CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
- CXL_CMD(GET_POISON, 0x10, ~0, 0),
+ CXL_CMD(GET_POISON, 0x10, CXL_VARIABLE_PAYLOAD, 0),
CXL_CMD(INJECT_POISON, 0x8, 0, 0),
CXL_CMD(CLEAR_POISON, 0x48, 0, 0),
CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
CXL_CMD(SCAN_MEDIA, 0x11, 0, 0),
- CXL_CMD(GET_SCAN_MEDIA, 0, ~0, 0),
+ CXL_CMD(GET_SCAN_MEDIA, 0, CXL_VARIABLE_PAYLOAD, 0),
};
/*
@@ -127,16 +128,27 @@ static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
return NULL;
}
+static const char *cxl_mem_opcode_to_name(u16 opcode)
+{
+ struct cxl_mem_command *c;
+
+ c = cxl_mem_find_command(opcode);
+ if (!c)
+ return NULL;
+
+ return cxl_command_names[c->info.id].name;
+}
+
/**
- * cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device.
- * @cxlm: The CXL memory device to communicate with.
+ * cxl_mbox_send_cmd() - Send a mailbox command to a device.
+ * @cxlds: The device data for the operation
* @opcode: Opcode for the mailbox command.
* @in: The input payload for the mailbox command.
* @in_size: The length of the input payload
* @out: Caller allocated buffer for the output.
* @out_size: Expected size of output.
*
- * Context: Any context. Will acquire and release mbox_mutex.
+ * Context: Any context.
* Return:
* * %>=0 - Number of bytes returned in @out.
* * %-E2BIG - Payload is too large for hardware.
@@ -148,11 +160,9 @@ static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
* Mailbox commands may execute successfully yet the device itself reported an
* error. While this distinction can be useful for commands from userspace, the
* kernel will only be able to use results when both are successful.
- *
- * See __cxl_mem_mbox_send_cmd()
*/
-int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode, void *in,
- size_t in_size, void *out, size_t out_size)
+int cxl_mbox_send_cmd(struct cxl_dev_state *cxlds, u16 opcode, void *in,
+ size_t in_size, void *out, size_t out_size)
{
const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
struct cxl_mbox_cmd mbox_cmd = {
@@ -164,27 +174,27 @@ int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode, void *in,
};
int rc;
- if (out_size > cxlm->payload_size)
+ if (in_size > cxlds->payload_size || out_size > cxlds->payload_size)
return -E2BIG;
- rc = cxlm->mbox_send(cxlm, &mbox_cmd);
+ rc = cxlds->mbox_send(cxlds, &mbox_cmd);
if (rc)
return rc;
- /* TODO: Map return code to proper kernel style errno */
- if (mbox_cmd.return_code != CXL_MBOX_SUCCESS)
- return -ENXIO;
+ if (mbox_cmd.return_code != CXL_MBOX_CMD_RC_SUCCESS)
+ return cxl_mbox_cmd_rc2errno(&mbox_cmd);
/*
* Variable sized commands can't be validated and so it's up to the
* caller to do that if they wish.
*/
- if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size)
- return -EIO;
-
+ if (cmd->info.size_out != CXL_VARIABLE_PAYLOAD) {
+ if (mbox_cmd.size_out != out_size)
+ return -EIO;
+ }
return 0;
}
-EXPORT_SYMBOL_GPL(cxl_mem_mbox_send_cmd);
+EXPORT_SYMBOL_NS_GPL(cxl_mbox_send_cmd, CXL);
static bool cxl_mem_raw_command_allowed(u16 opcode)
{
@@ -210,75 +220,122 @@ static bool cxl_mem_raw_command_allowed(u16 opcode)
}
/**
- * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
- * @cxlm: &struct cxl_mem device whose mailbox will be used.
- * @send_cmd: &struct cxl_send_command copied in from userspace.
- * @out_cmd: Sanitized and populated &struct cxl_mem_command.
+ * cxl_payload_from_user_allowed() - Check contents of in_payload.
+ * @opcode: The mailbox command opcode.
+ * @payload_in: Pointer to the input payload passed in from user space.
*
* Return:
- * * %0 - @out_cmd is ready to send.
- * * %-ENOTTY - Invalid command specified.
- * * %-EINVAL - Reserved fields or invalid values were used.
- * * %-ENOMEM - Input or output buffer wasn't sized properly.
- * * %-EPERM - Attempted to use a protected command.
- * * %-EBUSY - Kernel has claimed exclusive access to this opcode
+ * * true - payload_in passes check for @opcode.
+ * * false - payload_in contains invalid or unsupported values.
*
- * The result of this command is a fully validated command in @out_cmd that is
- * safe to send to the hardware.
+ * The driver may inspect payload contents before sending a mailbox
+ * command from user space to the device. The intent is to reject
+ * commands with input payloads that are known to be unsafe. This
+ * check is not intended to replace the users careful selection of
+ * mailbox command parameters and makes no guarantee that the user
+ * command will succeed, nor that it is appropriate.
*
- * See handle_mailbox_cmd_from_user()
+ * The specific checks are determined by the opcode.
*/
-static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
- const struct cxl_send_command *send_cmd,
- struct cxl_mem_command *out_cmd)
+static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in)
{
- const struct cxl_command_info *info;
- struct cxl_mem_command *c;
+ switch (opcode) {
+ case CXL_MBOX_OP_SET_PARTITION_INFO: {
+ struct cxl_mbox_set_partition_info *pi = payload_in;
- if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
- return -ENOTTY;
+ if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG)
+ return false;
+ break;
+ }
+ default:
+ break;
+ }
+ return true;
+}
- /*
- * The user can never specify an input payload larger than what hardware
- * supports, but output can be arbitrarily large (simply write out as
- * much data as the hardware provides).
- */
- if (send_cmd->in.size > cxlm->payload_size)
+static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox,
+ struct cxl_dev_state *cxlds, u16 opcode,
+ size_t in_size, size_t out_size, u64 in_payload)
+{
+ *mbox = (struct cxl_mbox_cmd) {
+ .opcode = opcode,
+ .size_in = in_size,
+ };
+
+ if (in_size) {
+ mbox->payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
+ in_size);
+ if (IS_ERR(mbox->payload_in))
+ return PTR_ERR(mbox->payload_in);
+
+ if (!cxl_payload_from_user_allowed(opcode, mbox->payload_in)) {
+ dev_dbg(cxlds->dev, "%s: input payload not allowed\n",
+ cxl_mem_opcode_to_name(opcode));
+ kvfree(mbox->payload_in);
+ return -EBUSY;
+ }
+ }
+
+ /* Prepare to handle a full payload for variable sized output */
+ if (out_size == CXL_VARIABLE_PAYLOAD)
+ mbox->size_out = cxlds->payload_size;
+ else
+ mbox->size_out = out_size;
+
+ if (mbox->size_out) {
+ mbox->payload_out = kvzalloc(mbox->size_out, GFP_KERNEL);
+ if (!mbox->payload_out) {
+ kvfree(mbox->payload_in);
+ return -ENOMEM;
+ }
+ }
+ return 0;
+}
+
+static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox)
+{
+ kvfree(mbox->payload_in);
+ kvfree(mbox->payload_out);
+}
+
+static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
+ const struct cxl_send_command *send_cmd,
+ struct cxl_dev_state *cxlds)
+{
+ if (send_cmd->raw.rsvd)
return -EINVAL;
/*
- * Checks are bypassed for raw commands but a WARN/taint will occur
- * later in the callchain
+ * Unlike supported commands, the output size of RAW commands
+ * gets passed along without further checking, so it must be
+ * validated here.
*/
- if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) {
- const struct cxl_mem_command temp = {
- .info = {
- .id = CXL_MEM_COMMAND_ID_RAW,
- .flags = 0,
- .size_in = send_cmd->in.size,
- .size_out = send_cmd->out.size,
- },
- .opcode = send_cmd->raw.opcode
- };
+ if (send_cmd->out.size > cxlds->payload_size)
+ return -EINVAL;
- if (send_cmd->raw.rsvd)
- return -EINVAL;
+ if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
+ return -EPERM;
- /*
- * Unlike supported commands, the output size of RAW commands
- * gets passed along without further checking, so it must be
- * validated here.
- */
- if (send_cmd->out.size > cxlm->payload_size)
- return -EINVAL;
+ dev_WARN_ONCE(cxlds->dev, true, "raw command path used\n");
- if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
- return -EPERM;
+ *mem_cmd = (struct cxl_mem_command) {
+ .info = {
+ .id = CXL_MEM_COMMAND_ID_RAW,
+ .size_in = send_cmd->in.size,
+ .size_out = send_cmd->out.size,
+ },
+ .opcode = send_cmd->raw.opcode
+ };
- memcpy(out_cmd, &temp, sizeof(temp));
+ return 0;
+}
- return 0;
- }
+static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
+ const struct cxl_send_command *send_cmd,
+ struct cxl_dev_state *cxlds)
+{
+ struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id];
+ const struct cxl_command_info *info = &c->info;
if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
return -EINVAL;
@@ -289,35 +346,85 @@ static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
if (send_cmd->in.rsvd || send_cmd->out.rsvd)
return -EINVAL;
- /* Convert user's command into the internal representation */
- c = &cxl_mem_commands[send_cmd->id];
- info = &c->info;
-
/* Check that the command is enabled for hardware */
- if (!test_bit(info->id, cxlm->enabled_cmds))
+ if (!test_bit(info->id, cxlds->enabled_cmds))
return -ENOTTY;
/* Check that the command is not claimed for exclusive kernel use */
- if (test_bit(info->id, cxlm->exclusive_cmds))
+ if (test_bit(info->id, cxlds->exclusive_cmds))
return -EBUSY;
/* Check the input buffer is the expected size */
- if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
+ if ((info->size_in != CXL_VARIABLE_PAYLOAD) &&
+ (info->size_in != send_cmd->in.size))
return -ENOMEM;
/* Check the output buffer is at least large enough */
- if (info->size_out >= 0 && send_cmd->out.size < info->size_out)
+ if ((info->size_out != CXL_VARIABLE_PAYLOAD) &&
+ (send_cmd->out.size < info->size_out))
return -ENOMEM;
- memcpy(out_cmd, c, sizeof(*c));
- out_cmd->info.size_in = send_cmd->in.size;
+ *mem_cmd = (struct cxl_mem_command) {
+ .info = {
+ .id = info->id,
+ .flags = info->flags,
+ .size_in = send_cmd->in.size,
+ .size_out = send_cmd->out.size,
+ },
+ .opcode = c->opcode
+ };
+
+ return 0;
+}
+
+/**
+ * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
+ * @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd.
+ * @cxlds: The device data for the operation
+ * @send_cmd: &struct cxl_send_command copied in from userspace.
+ *
+ * Return:
+ * * %0 - @out_cmd is ready to send.
+ * * %-ENOTTY - Invalid command specified.
+ * * %-EINVAL - Reserved fields or invalid values were used.
+ * * %-ENOMEM - Input or output buffer wasn't sized properly.
+ * * %-EPERM - Attempted to use a protected command.
+ * * %-EBUSY - Kernel has claimed exclusive access to this opcode
+ *
+ * The result of this command is a fully validated command in @mbox_cmd that is
+ * safe to send to the hardware.
+ */
+static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
+ struct cxl_dev_state *cxlds,
+ const struct cxl_send_command *send_cmd)
+{
+ struct cxl_mem_command mem_cmd;
+ int rc;
+
+ if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
+ return -ENOTTY;
+
/*
- * XXX: out_cmd->info.size_out will be controlled by the driver, and the
- * specified number of bytes @send_cmd->out.size will be copied back out
- * to userspace.
+ * The user can never specify an input payload larger than what hardware
+ * supports, but output can be arbitrarily large (simply write out as
+ * much data as the hardware provides).
*/
+ if (send_cmd->in.size > cxlds->payload_size)
+ return -EINVAL;
- return 0;
+ /* Sanitize and construct a cxl_mem_command */
+ if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW)
+ rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxlds);
+ else
+ rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxlds);
+
+ if (rc)
+ return rc;
+
+ /* Sanitize and construct a cxl_mbox_cmd */
+ return cxl_mbox_cmd_ctor(mbox_cmd, cxlds, mem_cmd.opcode,
+ mem_cmd.info.size_in, mem_cmd.info.size_out,
+ send_cmd->in.payload);
}
int cxl_query_cmd(struct cxl_memdev *cxlmd,
@@ -356,9 +463,8 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
/**
* handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
- * @cxlm: The CXL memory device to communicate with.
- * @cmd: The validated command.
- * @in_payload: Pointer to userspace's input payload.
+ * @cxlds: The device data for the operation
+ * @mbox_cmd: The validated mailbox command.
* @out_payload: Pointer to userspace's output payload.
* @size_out: (Input) Max payload size to copy out.
* (Output) Payload size hardware generated.
@@ -373,51 +479,27 @@ int cxl_query_cmd(struct cxl_memdev *cxlmd,
* * %-EINTR - Mailbox acquisition interrupted.
* * %-EXXX - Transaction level failures.
*
- * Creates the appropriate mailbox command and dispatches it on behalf of a
- * userspace request. The input and output payloads are copied between
- * userspace.
+ * Dispatches a mailbox command on behalf of a userspace request.
+ * The output payload is copied to userspace.
*
* See cxl_send_cmd().
*/
-static int handle_mailbox_cmd_from_user(struct cxl_mem *cxlm,
- const struct cxl_mem_command *cmd,
- u64 in_payload, u64 out_payload,
- s32 *size_out, u32 *retval)
+static int handle_mailbox_cmd_from_user(struct cxl_dev_state *cxlds,
+ struct cxl_mbox_cmd *mbox_cmd,
+ u64 out_payload, s32 *size_out,
+ u32 *retval)
{
- struct device *dev = cxlm->dev;
- struct cxl_mbox_cmd mbox_cmd = {
- .opcode = cmd->opcode,
- .size_in = cmd->info.size_in,
- .size_out = cmd->info.size_out,
- };
+ struct device *dev = cxlds->dev;
int rc;
- if (cmd->info.size_out) {
- mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL);
- if (!mbox_cmd.payload_out)
- return -ENOMEM;
- }
-
- if (cmd->info.size_in) {
- mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
- cmd->info.size_in);
- if (IS_ERR(mbox_cmd.payload_in)) {
- kvfree(mbox_cmd.payload_out);
- return PTR_ERR(mbox_cmd.payload_in);
- }
- }
-
dev_dbg(dev,
"Submitting %s command for user\n"
"\topcode: %x\n"
- "\tsize: %ub\n",
- cxl_command_names[cmd->info.id].name, mbox_cmd.opcode,
- cmd->info.size_in);
-
- dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW,
- "raw command path used\n");
+ "\tsize: %zx\n",
+ cxl_mem_opcode_to_name(mbox_cmd->opcode),
+ mbox_cmd->opcode, mbox_cmd->size_in);
- rc = cxlm->mbox_send(cxlm, &mbox_cmd);
+ rc = cxlds->mbox_send(cxlds, mbox_cmd);
if (rc)
goto out;
@@ -426,31 +508,30 @@ static int handle_mailbox_cmd_from_user(struct cxl_mem *cxlm,
* to userspace. While the payload may have written more output than
* this it will have to be ignored.
*/
- if (mbox_cmd.size_out) {
- dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out,
+ if (mbox_cmd->size_out) {
+ dev_WARN_ONCE(dev, mbox_cmd->size_out > *size_out,
"Invalid return size\n");
if (copy_to_user(u64_to_user_ptr(out_payload),
- mbox_cmd.payload_out, mbox_cmd.size_out)) {
+ mbox_cmd->payload_out, mbox_cmd->size_out)) {
rc = -EFAULT;
goto out;
}
}
- *size_out = mbox_cmd.size_out;
- *retval = mbox_cmd.return_code;
+ *size_out = mbox_cmd->size_out;
+ *retval = mbox_cmd->return_code;
out:
- kvfree(mbox_cmd.payload_in);
- kvfree(mbox_cmd.payload_out);
+ cxl_mbox_cmd_dtor(mbox_cmd);
return rc;
}
int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
{
- struct cxl_mem *cxlm = cxlmd->cxlm;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct device *dev = &cxlmd->dev;
struct cxl_send_command send;
- struct cxl_mem_command c;
+ struct cxl_mbox_cmd mbox_cmd;
int rc;
dev_dbg(dev, "Send IOCTL\n");
@@ -458,17 +539,12 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
if (copy_from_user(&send, s, sizeof(send)))
return -EFAULT;
- rc = cxl_validate_cmd_from_user(cxlmd->cxlm, &send, &c);
+ rc = cxl_validate_cmd_from_user(&mbox_cmd, cxlmd->cxlds, &send);
if (rc)
return rc;
- /* Prepare to handle a full payload for variable sized output */
- if (c.info.size_out < 0)
- c.info.size_out = cxlm->payload_size;
-
- rc = handle_mailbox_cmd_from_user(cxlm, &c, send.in.payload,
- send.out.payload, &send.out.size,
- &send.retval);
+ rc = handle_mailbox_cmd_from_user(cxlds, &mbox_cmd, send.out.payload,
+ &send.out.size, &send.retval);
if (rc)
return rc;
@@ -478,13 +554,13 @@ int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s)
return 0;
}
-static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
+static int cxl_xfer_log(struct cxl_dev_state *cxlds, uuid_t *uuid, u32 size, u8 *out)
{
u32 remaining = size;
u32 offset = 0;
while (remaining) {
- u32 xfer_size = min_t(u32, remaining, cxlm->payload_size);
+ u32 xfer_size = min_t(u32, remaining, cxlds->payload_size);
struct cxl_mbox_get_log log = {
.uuid = *uuid,
.offset = cpu_to_le32(offset),
@@ -492,8 +568,8 @@ static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
};
int rc;
- rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_LOG, &log,
- sizeof(log), out, xfer_size);
+ rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LOG, &log, sizeof(log),
+ out, xfer_size);
if (rc < 0)
return rc;
@@ -507,14 +583,14 @@ static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
/**
* cxl_walk_cel() - Walk through the Command Effects Log.
- * @cxlm: Device.
+ * @cxlds: The device data for the operation
* @size: Length of the Command Effects Log.
* @cel: CEL
*
* Iterate over each entry in the CEL and determine if the driver supports the
* command. If so, the command is enabled for the device and can be used later.
*/
-static void cxl_walk_cel(struct cxl_mem *cxlm, size_t size, u8 *cel)
+static void cxl_walk_cel(struct cxl_dev_state *cxlds, size_t size, u8 *cel)
{
struct cxl_cel_entry *cel_entry;
const int cel_entries = size / sizeof(*cel_entry);
@@ -527,26 +603,26 @@ static void cxl_walk_cel(struct cxl_mem *cxlm, size_t size, u8 *cel)
struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
if (!cmd) {
- dev_dbg(cxlm->dev,
+ dev_dbg(cxlds->dev,
"Opcode 0x%04x unsupported by driver", opcode);
continue;
}
- set_bit(cmd->info.id, cxlm->enabled_cmds);
+ set_bit(cmd->info.id, cxlds->enabled_cmds);
}
}
-static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_mem *cxlm)
+static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_supported_logs *ret;
int rc;
- ret = kvmalloc(cxlm->payload_size, GFP_KERNEL);
+ ret = kvmalloc(cxlds->payload_size, GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
- rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL,
- 0, ret, cxlm->payload_size);
+ rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL, 0, ret,
+ cxlds->payload_size);
if (rc < 0) {
kvfree(ret);
return ERR_PTR(rc);
@@ -567,23 +643,23 @@ static const uuid_t log_uuid[] = {
};
/**
- * cxl_mem_enumerate_cmds() - Enumerate commands for a device.
- * @cxlm: The device.
+ * cxl_enumerate_cmds() - Enumerate commands for a device.
+ * @cxlds: The device data for the operation
*
* Returns 0 if enumerate completed successfully.
*
* CXL devices have optional support for certain commands. This function will
* determine the set of supported commands for the hardware and update the
- * enabled_cmds bitmap in the @cxlm.
+ * enabled_cmds bitmap in the @cxlds.
*/
-int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm)
+int cxl_enumerate_cmds(struct cxl_dev_state *cxlds)
{
struct cxl_mbox_get_supported_logs *gsl;
- struct device *dev = cxlm->dev;
+ struct device *dev = cxlds->dev;
struct cxl_mem_command *cmd;
int i, rc;
- gsl = cxl_get_gsl(cxlm);
+ gsl = cxl_get_gsl(cxlds);
if (IS_ERR(gsl))
return PTR_ERR(gsl);
@@ -604,19 +680,19 @@ int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm)
goto out;
}
- rc = cxl_xfer_log(cxlm, &uuid, size, log);
+ rc = cxl_xfer_log(cxlds, &uuid, size, log);
if (rc) {
kvfree(log);
goto out;
}
- cxl_walk_cel(cxlm, size, log);
+ cxl_walk_cel(cxlds, size, log);
kvfree(log);
/* In case CEL was bogus, enable some default commands. */
cxl_for_each_cmd(cmd)
if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
- set_bit(cmd->info.id, cxlm->enabled_cmds);
+ set_bit(cmd->info.id, cxlds->enabled_cmds);
/* Found the required CEL */
rc = 0;
@@ -626,11 +702,11 @@ out:
kvfree(gsl);
return rc;
}
-EXPORT_SYMBOL_GPL(cxl_mem_enumerate_cmds);
+EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, CXL);
/**
* cxl_mem_get_partition_info - Get partition info
- * @cxlm: cxl_mem instance to update partition info
+ * @cxlds: The device data for the operation
*
* Retrieve the current partition info for the device specified. The active
* values are the current capacity in bytes. If not 0, the 'next' values are
@@ -640,148 +716,147 @@ EXPORT_SYMBOL_GPL(cxl_mem_enumerate_cmds);
*
* See CXL @8.2.9.5.2.1 Get Partition Info
*/
-static int cxl_mem_get_partition_info(struct cxl_mem *cxlm)
+static int cxl_mem_get_partition_info(struct cxl_dev_state *cxlds)
{
- struct cxl_mbox_get_partition_info {
- __le64 active_volatile_cap;
- __le64 active_persistent_cap;
- __le64 next_volatile_cap;
- __le64 next_persistent_cap;
- } __packed pi;
+ struct cxl_mbox_get_partition_info pi;
int rc;
- rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_PARTITION_INFO,
- NULL, 0, &pi, sizeof(pi));
+ rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_PARTITION_INFO, NULL, 0,
+ &pi, sizeof(pi));
if (rc)
return rc;
- cxlm->active_volatile_bytes =
+ cxlds->active_volatile_bytes =
le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
- cxlm->active_persistent_bytes =
+ cxlds->active_persistent_bytes =
le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER;
- cxlm->next_volatile_bytes =
+ cxlds->next_volatile_bytes =
le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
- cxlm->next_persistent_bytes =
+ cxlds->next_persistent_bytes =
le64_to_cpu(pi.next_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
return 0;
}
/**
- * cxl_mem_identify() - Send the IDENTIFY command to the device.
- * @cxlm: The device to identify.
+ * cxl_dev_state_identify() - Send the IDENTIFY command to the device.
+ * @cxlds: The device data for the operation
*
* Return: 0 if identify was executed successfully.
*
* This will dispatch the identify command to the device and on success populate
* structures to be exported to sysfs.
*/
-int cxl_mem_identify(struct cxl_mem *cxlm)
+int cxl_dev_state_identify(struct cxl_dev_state *cxlds)
{
/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
struct cxl_mbox_identify id;
int rc;
- rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
- sizeof(id));
+ rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
+ sizeof(id));
if (rc < 0)
return rc;
- cxlm->total_bytes =
+ cxlds->total_bytes =
le64_to_cpu(id.total_capacity) * CXL_CAPACITY_MULTIPLIER;
- cxlm->volatile_only_bytes =
+ cxlds->volatile_only_bytes =
le64_to_cpu(id.volatile_capacity) * CXL_CAPACITY_MULTIPLIER;
- cxlm->persistent_only_bytes =
+ cxlds->persistent_only_bytes =
le64_to_cpu(id.persistent_capacity) * CXL_CAPACITY_MULTIPLIER;
- cxlm->partition_align_bytes =
+ cxlds->partition_align_bytes =
le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER;
- dev_dbg(cxlm->dev,
- "Identify Memory Device\n"
- " total_bytes = %#llx\n"
- " volatile_only_bytes = %#llx\n"
- " persistent_only_bytes = %#llx\n"
- " partition_align_bytes = %#llx\n",
- cxlm->total_bytes, cxlm->volatile_only_bytes,
- cxlm->persistent_only_bytes, cxlm->partition_align_bytes);
-
- cxlm->lsa_size = le32_to_cpu(id.lsa_size);
- memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));
+ cxlds->lsa_size = le32_to_cpu(id.lsa_size);
+ memcpy(cxlds->firmware_version, id.fw_revision, sizeof(id.fw_revision));
return 0;
}
-EXPORT_SYMBOL_GPL(cxl_mem_identify);
+EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, CXL);
-int cxl_mem_create_range_info(struct cxl_mem *cxlm)
+static int add_dpa_res(struct device *dev, struct resource *parent,
+ struct resource *res, resource_size_t start,
+ resource_size_t size, const char *type)
{
int rc;
- if (cxlm->partition_align_bytes == 0) {
- cxlm->ram_range.start = 0;
- cxlm->ram_range.end = cxlm->volatile_only_bytes - 1;
- cxlm->pmem_range.start = cxlm->volatile_only_bytes;
- cxlm->pmem_range.end = cxlm->volatile_only_bytes +
- cxlm->persistent_only_bytes - 1;
+ res->name = type;
+ res->start = start;
+ res->end = start + size - 1;
+ res->flags = IORESOURCE_MEM;
+ if (resource_size(res) == 0) {
+ dev_dbg(dev, "DPA(%s): no capacity\n", res->name);
return 0;
}
-
- rc = cxl_mem_get_partition_info(cxlm);
+ rc = request_resource(parent, res);
if (rc) {
- dev_err(cxlm->dev, "Failed to query partition information\n");
+ dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name,
+ res, rc);
return rc;
}
- dev_dbg(cxlm->dev,
- "Get Partition Info\n"
- " active_volatile_bytes = %#llx\n"
- " active_persistent_bytes = %#llx\n"
- " next_volatile_bytes = %#llx\n"
- " next_persistent_bytes = %#llx\n",
- cxlm->active_volatile_bytes, cxlm->active_persistent_bytes,
- cxlm->next_volatile_bytes, cxlm->next_persistent_bytes);
+ dev_dbg(dev, "DPA(%s): %pr\n", res->name, res);
- cxlm->ram_range.start = 0;
- cxlm->ram_range.end = cxlm->active_volatile_bytes - 1;
+ return 0;
+}
- cxlm->pmem_range.start = cxlm->active_volatile_bytes;
- cxlm->pmem_range.end =
- cxlm->active_volatile_bytes + cxlm->active_persistent_bytes - 1;
+int cxl_mem_create_range_info(struct cxl_dev_state *cxlds)
+{
+ struct device *dev = cxlds->dev;
+ int rc;
- return 0;
+ cxlds->dpa_res =
+ (struct resource)DEFINE_RES_MEM(0, cxlds->total_bytes);
+
+ if (cxlds->partition_align_bytes == 0) {
+ rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
+ cxlds->volatile_only_bytes, "ram");
+ if (rc)
+ return rc;
+ return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
+ cxlds->volatile_only_bytes,
+ cxlds->persistent_only_bytes, "pmem");
+ }
+
+ rc = cxl_mem_get_partition_info(cxlds);
+ if (rc) {
+ dev_err(dev, "Failed to query partition information\n");
+ return rc;
+ }
+
+ rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0,
+ cxlds->active_volatile_bytes, "ram");
+ if (rc)
+ return rc;
+ return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res,
+ cxlds->active_volatile_bytes,
+ cxlds->active_persistent_bytes, "pmem");
}
-EXPORT_SYMBOL_GPL(cxl_mem_create_range_info);
+EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, CXL);
-struct cxl_mem *cxl_mem_create(struct device *dev)
+struct cxl_dev_state *cxl_dev_state_create(struct device *dev)
{
- struct cxl_mem *cxlm;
+ struct cxl_dev_state *cxlds;
- cxlm = devm_kzalloc(dev, sizeof(*cxlm), GFP_KERNEL);
- if (!cxlm) {
+ cxlds = devm_kzalloc(dev, sizeof(*cxlds), GFP_KERNEL);
+ if (!cxlds) {
dev_err(dev, "No memory available\n");
return ERR_PTR(-ENOMEM);
}
- mutex_init(&cxlm->mbox_mutex);
- cxlm->dev = dev;
+ mutex_init(&cxlds->mbox_mutex);
+ cxlds->dev = dev;
- return cxlm;
+ return cxlds;
}
-EXPORT_SYMBOL_GPL(cxl_mem_create);
-
-static struct dentry *cxl_debugfs;
+EXPORT_SYMBOL_NS_GPL(cxl_dev_state_create, CXL);
void __init cxl_mbox_init(void)
{
struct dentry *mbox_debugfs;
- cxl_debugfs = debugfs_create_dir("cxl", NULL);
- mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
+ mbox_debugfs = cxl_debugfs_create_dir("mbox");
debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
&cxl_raw_allow_all);
}
-
-void cxl_mbox_exit(void)
-{
- debugfs_remove_recursive(cxl_debugfs);
-}
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
index bf1b04d00ff4..20ce488a7754 100644
--- a/drivers/cxl/core/memdev.c
+++ b/drivers/cxl/core/memdev.c
@@ -37,9 +37,9 @@ static ssize_t firmware_version_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_mem *cxlm = cxlmd->cxlm;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
- return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version);
+ return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version);
}
static DEVICE_ATTR_RO(firmware_version);
@@ -47,9 +47,9 @@ static ssize_t payload_max_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_mem *cxlm = cxlmd->cxlm;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
- return sysfs_emit(buf, "%zu\n", cxlm->payload_size);
+ return sysfs_emit(buf, "%zu\n", cxlds->payload_size);
}
static DEVICE_ATTR_RO(payload_max);
@@ -57,9 +57,9 @@ static ssize_t label_storage_size_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_mem *cxlm = cxlmd->cxlm;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
- return sysfs_emit(buf, "%zu\n", cxlm->lsa_size);
+ return sysfs_emit(buf, "%zu\n", cxlds->lsa_size);
}
static DEVICE_ATTR_RO(label_storage_size);
@@ -67,8 +67,8 @@ static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_mem *cxlm = cxlmd->cxlm;
- unsigned long long len = range_len(&cxlm->ram_range);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ unsigned long long len = resource_size(&cxlds->ram_res);
return sysfs_emit(buf, "%#llx\n", len);
}
@@ -80,8 +80,8 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
- struct cxl_mem *cxlm = cxlmd->cxlm;
- unsigned long long len = range_len(&cxlm->pmem_range);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ unsigned long long len = resource_size(&cxlds->pmem_res);
return sysfs_emit(buf, "%#llx\n", len);
}
@@ -89,10 +89,29 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
static struct device_attribute dev_attr_pmem_size =
__ATTR(size, 0444, pmem_size_show, NULL);
+static ssize_t serial_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+
+ return sysfs_emit(buf, "%#llx\n", cxlds->serial);
+}
+static DEVICE_ATTR_RO(serial);
+
+static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%d\n", dev_to_node(dev));
+}
+static DEVICE_ATTR_RO(numa_node);
+
static struct attribute *cxl_memdev_attributes[] = {
+ &dev_attr_serial.attr,
&dev_attr_firmware_version.attr,
&dev_attr_payload_max.attr,
&dev_attr_label_storage_size.attr,
+ &dev_attr_numa_node.attr,
NULL,
};
@@ -106,8 +125,17 @@ static struct attribute *cxl_memdev_ram_attributes[] = {
NULL,
};
+static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a,
+ int n)
+{
+ if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr)
+ return 0;
+ return a->mode;
+}
+
static struct attribute_group cxl_memdev_attribute_group = {
.attrs = cxl_memdev_attributes,
+ .is_visible = cxl_memdev_visible,
};
static struct attribute_group cxl_memdev_ram_attribute_group = {
@@ -134,44 +162,50 @@ static const struct device_type cxl_memdev_type = {
.groups = cxl_memdev_attribute_groups,
};
+bool is_cxl_memdev(struct device *dev)
+{
+ return dev->type == &cxl_memdev_type;
+}
+EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL);
+
/**
* set_exclusive_cxl_commands() - atomically disable user cxl commands
- * @cxlm: cxl_mem instance to modify
+ * @cxlds: The device state to operate on
* @cmds: bitmap of commands to mark exclusive
*
* Grab the cxl_memdev_rwsem in write mode to flush in-flight
* invocations of the ioctl path and then disable future execution of
* commands with the command ids set in @cmds.
*/
-void set_exclusive_cxl_commands(struct cxl_mem *cxlm, unsigned long *cmds)
+void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
{
down_write(&cxl_memdev_rwsem);
- bitmap_or(cxlm->exclusive_cmds, cxlm->exclusive_cmds, cmds,
+ bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem);
}
-EXPORT_SYMBOL_GPL(set_exclusive_cxl_commands);
+EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL);
/**
* clear_exclusive_cxl_commands() - atomically enable user cxl commands
- * @cxlm: cxl_mem instance to modify
+ * @cxlds: The device state to modify
* @cmds: bitmap of commands to mark available for userspace
*/
-void clear_exclusive_cxl_commands(struct cxl_mem *cxlm, unsigned long *cmds)
+void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds)
{
down_write(&cxl_memdev_rwsem);
- bitmap_andnot(cxlm->exclusive_cmds, cxlm->exclusive_cmds, cmds,
+ bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds,
CXL_MEM_COMMAND_ID_MAX);
up_write(&cxl_memdev_rwsem);
}
-EXPORT_SYMBOL_GPL(clear_exclusive_cxl_commands);
+EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL);
static void cxl_memdev_shutdown(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
down_write(&cxl_memdev_rwsem);
- cxlmd->cxlm = NULL;
+ cxlmd->cxlds = NULL;
up_write(&cxl_memdev_rwsem);
}
@@ -185,7 +219,18 @@ static void cxl_memdev_unregister(void *_cxlmd)
put_device(dev);
}
-static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm,
+static void detach_memdev(struct work_struct *work)
+{
+ struct cxl_memdev *cxlmd;
+
+ cxlmd = container_of(work, typeof(*cxlmd), detach_work);
+ device_release_driver(&cxlmd->dev);
+ put_device(&cxlmd->dev);
+}
+
+static struct lock_class_key cxl_memdev_key;
+
+static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds,
const struct file_operations *fops)
{
struct cxl_memdev *cxlmd;
@@ -204,11 +249,13 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm,
dev = &cxlmd->dev;
device_initialize(dev);
- dev->parent = cxlm->dev;
+ lockdep_set_class(&dev->mutex, &cxl_memdev_key);
+ dev->parent = cxlds->dev;
dev->bus = &cxl_bus_type;
dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
dev->type = &cxl_memdev_type;
device_set_pm_not_required(dev);
+ INIT_WORK(&cxlmd->detach_work, detach_memdev);
cdev = &cxlmd->cdev;
cdev_init(cdev, fops);
@@ -239,7 +286,7 @@ static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
int rc = -ENXIO;
down_read(&cxl_memdev_rwsem);
- if (cxlmd->cxlm)
+ if (cxlmd->cxlds)
rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
up_read(&cxl_memdev_rwsem);
@@ -276,15 +323,14 @@ static const struct file_operations cxl_memdev_fops = {
.llseek = noop_llseek,
};
-struct cxl_memdev *
-devm_cxl_add_memdev(struct cxl_mem *cxlm)
+struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds)
{
struct cxl_memdev *cxlmd;
struct device *dev;
struct cdev *cdev;
int rc;
- cxlmd = cxl_memdev_alloc(cxlm, &cxl_memdev_fops);
+ cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops);
if (IS_ERR(cxlmd))
return cxlmd;
@@ -297,14 +343,14 @@ devm_cxl_add_memdev(struct cxl_mem *cxlm)
* Activate ioctl operations, no cxl_memdev_rwsem manipulation
* needed as this is ordered with cdev_add() publishing the device.
*/
- cxlmd->cxlm = cxlm;
+ cxlmd->cxlds = cxlds;
cdev = &cxlmd->cdev;
rc = cdev_device_add(cdev, dev);
if (rc)
goto err;
- rc = devm_add_action_or_reset(cxlm->dev, cxl_memdev_unregister, cxlmd);
+ rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd);
if (rc)
return ERR_PTR(rc);
return cxlmd;
@@ -318,7 +364,7 @@ err:
put_device(dev);
return ERR_PTR(rc);
}
-EXPORT_SYMBOL_GPL(devm_cxl_add_memdev);
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL);
__init int cxl_memdev_init(void)
{
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
new file mode 100644
index 000000000000..9240df53ed87
--- /dev/null
+++ b/drivers/cxl/core/pci.c
@@ -0,0 +1,627 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2021 Intel Corporation. All rights reserved. */
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/pci-doe.h>
+#include <cxlpci.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
+
+/**
+ * DOC: cxl core pci
+ *
+ * Compute Express Link protocols are layered on top of PCIe. CXL core provides
+ * a set of helpers for CXL interactions which occur via PCIe.
+ */
+
+static unsigned short media_ready_timeout = 60;
+module_param(media_ready_timeout, ushort, 0644);
+MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready");
+
+struct cxl_walk_context {
+ struct pci_bus *bus;
+ struct cxl_port *port;
+ int type;
+ int error;
+ int count;
+};
+
+static int match_add_dports(struct pci_dev *pdev, void *data)
+{
+ struct cxl_walk_context *ctx = data;
+ struct cxl_port *port = ctx->port;
+ int type = pci_pcie_type(pdev);
+ struct cxl_register_map map;
+ struct cxl_dport *dport;
+ u32 lnkcap, port_num;
+ int rc;
+
+ if (pdev->bus != ctx->bus)
+ return 0;
+ if (!pci_is_pcie(pdev))
+ return 0;
+ if (type != ctx->type)
+ return 0;
+ if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
+ &lnkcap))
+ return 0;
+
+ rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
+ if (rc)
+ dev_dbg(&port->dev, "failed to find component registers\n");
+
+ port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
+ dport = devm_cxl_add_dport(port, &pdev->dev, port_num,
+ cxl_regmap_to_base(pdev, &map));
+ if (IS_ERR(dport)) {
+ ctx->error = PTR_ERR(dport);
+ return PTR_ERR(dport);
+ }
+ ctx->count++;
+
+ dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev));
+
+ return 0;
+}
+
+/**
+ * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
+ * @port: cxl_port whose ->uport is the upstream of dports to be enumerated
+ *
+ * Returns a positive number of dports enumerated or a negative error
+ * code.
+ */
+int devm_cxl_port_enumerate_dports(struct cxl_port *port)
+{
+ struct pci_bus *bus = cxl_port_to_pci_bus(port);
+ struct cxl_walk_context ctx;
+ int type;
+
+ if (!bus)
+ return -ENXIO;
+
+ if (pci_is_root_bus(bus))
+ type = PCI_EXP_TYPE_ROOT_PORT;
+ else
+ type = PCI_EXP_TYPE_DOWNSTREAM;
+
+ ctx = (struct cxl_walk_context) {
+ .port = port,
+ .bus = bus,
+ .type = type,
+ };
+ pci_walk_bus(bus, match_add_dports, &ctx);
+
+ if (ctx.count == 0)
+ return -ENODEV;
+ if (ctx.error)
+ return ctx.error;
+ return ctx.count;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL);
+
+/*
+ * Wait up to @media_ready_timeout for the device to report memory
+ * active.
+ */
+int cxl_await_media_ready(struct cxl_dev_state *cxlds)
+{
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ int d = cxlds->cxl_dvsec;
+ bool active = false;
+ u64 md_status;
+ int rc, i;
+
+ for (i = media_ready_timeout; i; i--) {
+ u32 temp;
+
+ rc = pci_read_config_dword(
+ pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
+ if (rc)
+ return rc;
+
+ active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
+ if (active)
+ break;
+ msleep(1000);
+ }
+
+ if (!active) {
+ dev_err(&pdev->dev,
+ "timeout awaiting memory active after %d seconds\n",
+ media_ready_timeout);
+ return -ETIMEDOUT;
+ }
+
+ md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+ if (!CXLMDEV_READY(md_status))
+ return -EIO;
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, CXL);
+
+static int wait_for_valid(struct cxl_dev_state *cxlds)
+{
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ int d = cxlds->cxl_dvsec, rc;
+ u32 val;
+
+ /*
+ * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high
+ * and Size Low registers are valid. Must be set within 1 second of
+ * deassertion of reset to CXL device. Likely it is already set by the
+ * time this runs, but otherwise give a 1.5 second timeout in case of
+ * clock skew.
+ */
+ rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
+ if (rc)
+ return rc;
+
+ if (val & CXL_DVSEC_MEM_INFO_VALID)
+ return 0;
+
+ msleep(1500);
+
+ rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
+ if (rc)
+ return rc;
+
+ if (val & CXL_DVSEC_MEM_INFO_VALID)
+ return 0;
+
+ return -ETIMEDOUT;
+}
+
+static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val)
+{
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ int d = cxlds->cxl_dvsec;
+ u16 ctrl;
+ int rc;
+
+ rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
+ if (rc < 0)
+ return rc;
+
+ if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val)
+ return 1;
+ ctrl &= ~CXL_DVSEC_MEM_ENABLE;
+ ctrl |= val;
+
+ rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl);
+ if (rc < 0)
+ return rc;
+
+ return 0;
+}
+
+static void clear_mem_enable(void *cxlds)
+{
+ cxl_set_mem_enable(cxlds, 0);
+}
+
+static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds)
+{
+ int rc;
+
+ rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE);
+ if (rc < 0)
+ return rc;
+ if (rc > 0)
+ return 0;
+ return devm_add_action_or_reset(host, clear_mem_enable, cxlds);
+}
+
+static bool range_contains(struct range *r1, struct range *r2)
+{
+ return r1->start <= r2->start && r1->end >= r2->end;
+}
+
+/* require dvsec ranges to be covered by a locked platform window */
+static int dvsec_range_allowed(struct device *dev, void *arg)
+{
+ struct range *dev_range = arg;
+ struct cxl_decoder *cxld;
+
+ if (!is_root_decoder(dev))
+ return 0;
+
+ cxld = to_cxl_decoder(dev);
+
+ if (!(cxld->flags & CXL_DECODER_F_LOCK))
+ return 0;
+ if (!(cxld->flags & CXL_DECODER_F_RAM))
+ return 0;
+
+ return range_contains(&cxld->hpa_range, dev_range);
+}
+
+static void disable_hdm(void *_cxlhdm)
+{
+ u32 global_ctrl;
+ struct cxl_hdm *cxlhdm = _cxlhdm;
+ void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+
+ global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
+ writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE,
+ hdm + CXL_HDM_DECODER_CTRL_OFFSET);
+}
+
+static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm)
+{
+ void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+ u32 global_ctrl;
+
+ global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
+ writel(global_ctrl | CXL_HDM_DECODER_ENABLE,
+ hdm + CXL_HDM_DECODER_CTRL_OFFSET);
+
+ return devm_add_action_or_reset(host, disable_hdm, cxlhdm);
+}
+
+static bool __cxl_hdm_decode_init(struct cxl_dev_state *cxlds,
+ struct cxl_hdm *cxlhdm,
+ struct cxl_endpoint_dvsec_info *info)
+{
+ void __iomem *hdm = cxlhdm->regs.hdm_decoder;
+ struct cxl_port *port = cxlhdm->port;
+ struct device *dev = cxlds->dev;
+ struct cxl_port *root;
+ int i, rc, allowed;
+ u32 global_ctrl;
+
+ global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET);
+
+ /*
+ * If the HDM Decoder Capability is already enabled then assume
+ * that some other agent like platform firmware set it up.
+ */
+ if (global_ctrl & CXL_HDM_DECODER_ENABLE) {
+ rc = devm_cxl_enable_mem(&port->dev, cxlds);
+ if (rc)
+ return false;
+ return true;
+ }
+
+ root = to_cxl_port(port->dev.parent);
+ while (!is_cxl_root(root) && is_cxl_port(root->dev.parent))
+ root = to_cxl_port(root->dev.parent);
+ if (!is_cxl_root(root)) {
+ dev_err(dev, "Failed to acquire root port for HDM enable\n");
+ return false;
+ }
+
+ for (i = 0, allowed = 0; info->mem_enabled && i < info->ranges; i++) {
+ struct device *cxld_dev;
+
+ cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i],
+ dvsec_range_allowed);
+ if (!cxld_dev) {
+ dev_dbg(dev, "DVSEC Range%d denied by platform\n", i);
+ continue;
+ }
+ dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i);
+ put_device(cxld_dev);
+ allowed++;
+ }
+
+ if (!allowed) {
+ cxl_set_mem_enable(cxlds, 0);
+ info->mem_enabled = 0;
+ }
+
+ /*
+ * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base
+ * [High,Low] when HDM operation is enabled the range register values
+ * are ignored by the device, but the spec also recommends matching the
+ * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges
+ * are expected even though Linux does not require or maintain that
+ * match. If at least one DVSEC range is enabled and allowed, skip HDM
+ * Decoder Capability Enable.
+ */
+ if (info->mem_enabled)
+ return false;
+
+ rc = devm_cxl_enable_hdm(&port->dev, cxlhdm);
+ if (rc)
+ return false;
+
+ rc = devm_cxl_enable_mem(&port->dev, cxlds);
+ if (rc)
+ return false;
+
+ return true;
+}
+
+/**
+ * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint
+ * @cxlds: Device state
+ * @cxlhdm: Mapped HDM decoder Capability
+ *
+ * Try to enable the endpoint's HDM Decoder Capability
+ */
+int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm)
+{
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ struct cxl_endpoint_dvsec_info info = { 0 };
+ int hdm_count, rc, i, ranges = 0;
+ struct device *dev = &pdev->dev;
+ int d = cxlds->cxl_dvsec;
+ u16 cap, ctrl;
+
+ if (!d) {
+ dev_dbg(dev, "No DVSEC Capability\n");
+ return -ENXIO;
+ }
+
+ rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
+ if (rc)
+ return rc;
+
+ rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
+ if (rc)
+ return rc;
+
+ if (!(cap & CXL_DVSEC_MEM_CAPABLE)) {
+ dev_dbg(dev, "Not MEM Capable\n");
+ return -ENXIO;
+ }
+
+ /*
+ * It is not allowed by spec for MEM.capable to be set and have 0 legacy
+ * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
+ * driver is for a spec defined class code which must be CXL.mem
+ * capable, there is no point in continuing to enable CXL.mem.
+ */
+ hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
+ if (!hdm_count || hdm_count > 2)
+ return -EINVAL;
+
+ rc = wait_for_valid(cxlds);
+ if (rc) {
+ dev_dbg(dev, "Failure awaiting MEM_INFO_VALID (%d)\n", rc);
+ return rc;
+ }
+
+ /*
+ * The current DVSEC values are moot if the memory capability is
+ * disabled, and they will remain moot after the HDM Decoder
+ * capability is enabled.
+ */
+ info.mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
+ if (!info.mem_enabled)
+ goto hdm_init;
+
+ for (i = 0; i < hdm_count; i++) {
+ u64 base, size;
+ u32 temp;
+
+ rc = pci_read_config_dword(
+ pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
+ if (rc)
+ return rc;
+
+ size = (u64)temp << 32;
+
+ rc = pci_read_config_dword(
+ pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
+ if (rc)
+ return rc;
+
+ size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
+
+ rc = pci_read_config_dword(
+ pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
+ if (rc)
+ return rc;
+
+ base = (u64)temp << 32;
+
+ rc = pci_read_config_dword(
+ pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
+ if (rc)
+ return rc;
+
+ base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
+
+ info.dvsec_range[i] = (struct range) {
+ .start = base,
+ .end = base + size - 1
+ };
+
+ if (size)
+ ranges++;
+ }
+
+ info.ranges = ranges;
+
+ /*
+ * If DVSEC ranges are being used instead of HDM decoder registers there
+ * is no use in trying to manage those.
+ */
+hdm_init:
+ if (!__cxl_hdm_decode_init(cxlds, cxlhdm, &info)) {
+ dev_err(dev,
+ "Legacy range registers configuration prevents HDM operation.\n");
+ return -EBUSY;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, CXL);
+
+#define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff
+#define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0
+#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00
+#define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0
+#define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000
+#define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff
+#define CXL_DOE_PROTOCOL_TABLE_ACCESS 2
+
+static struct pci_doe_mb *find_cdat_doe(struct device *uport)
+{
+ struct cxl_memdev *cxlmd;
+ struct cxl_dev_state *cxlds;
+ unsigned long index;
+ void *entry;
+
+ cxlmd = to_cxl_memdev(uport);
+ cxlds = cxlmd->cxlds;
+
+ xa_for_each(&cxlds->doe_mbs, index, entry) {
+ struct pci_doe_mb *cur = entry;
+
+ if (pci_doe_supports_prot(cur, PCI_DVSEC_VENDOR_ID_CXL,
+ CXL_DOE_PROTOCOL_TABLE_ACCESS))
+ return cur;
+ }
+
+ return NULL;
+}
+
+#define CDAT_DOE_REQ(entry_handle) \
+ (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \
+ CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \
+ FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \
+ CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \
+ FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle)))
+
+static void cxl_doe_task_complete(struct pci_doe_task *task)
+{
+ complete(task->private);
+}
+
+struct cdat_doe_task {
+ u32 request_pl;
+ u32 response_pl[32];
+ struct completion c;
+ struct pci_doe_task task;
+};
+
+#define DECLARE_CDAT_DOE_TASK(req, cdt) \
+struct cdat_doe_task cdt = { \
+ .c = COMPLETION_INITIALIZER_ONSTACK(cdt.c), \
+ .request_pl = req, \
+ .task = { \
+ .prot.vid = PCI_DVSEC_VENDOR_ID_CXL, \
+ .prot.type = CXL_DOE_PROTOCOL_TABLE_ACCESS, \
+ .request_pl = &cdt.request_pl, \
+ .request_pl_sz = sizeof(cdt.request_pl), \
+ .response_pl = cdt.response_pl, \
+ .response_pl_sz = sizeof(cdt.response_pl), \
+ .complete = cxl_doe_task_complete, \
+ .private = &cdt.c, \
+ } \
+}
+
+static int cxl_cdat_get_length(struct device *dev,
+ struct pci_doe_mb *cdat_doe,
+ size_t *length)
+{
+ DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(0), t);
+ int rc;
+
+ rc = pci_doe_submit_task(cdat_doe, &t.task);
+ if (rc < 0) {
+ dev_err(dev, "DOE submit failed: %d", rc);
+ return rc;
+ }
+ wait_for_completion(&t.c);
+ if (t.task.rv < sizeof(u32))
+ return -EIO;
+
+ *length = t.response_pl[1];
+ dev_dbg(dev, "CDAT length %zu\n", *length);
+
+ return 0;
+}
+
+static int cxl_cdat_read_table(struct device *dev,
+ struct pci_doe_mb *cdat_doe,
+ struct cxl_cdat *cdat)
+{
+ size_t length = cdat->length;
+ u32 *data = cdat->table;
+ int entry_handle = 0;
+
+ do {
+ DECLARE_CDAT_DOE_TASK(CDAT_DOE_REQ(entry_handle), t);
+ size_t entry_dw;
+ u32 *entry;
+ int rc;
+
+ rc = pci_doe_submit_task(cdat_doe, &t.task);
+ if (rc < 0) {
+ dev_err(dev, "DOE submit failed: %d", rc);
+ return rc;
+ }
+ wait_for_completion(&t.c);
+ /* 1 DW header + 1 DW data min */
+ if (t.task.rv < (2 * sizeof(u32)))
+ return -EIO;
+
+ /* Get the CXL table access header entry handle */
+ entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE,
+ t.response_pl[0]);
+ entry = t.response_pl + 1;
+ entry_dw = t.task.rv / sizeof(u32);
+ /* Skip Header */
+ entry_dw -= 1;
+ entry_dw = min(length / sizeof(u32), entry_dw);
+ /* Prevent length < 1 DW from causing a buffer overflow */
+ if (entry_dw) {
+ memcpy(data, entry, entry_dw * sizeof(u32));
+ length -= entry_dw * sizeof(u32);
+ data += entry_dw;
+ }
+ } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY);
+
+ return 0;
+}
+
+/**
+ * read_cdat_data - Read the CDAT data on this port
+ * @port: Port to read data from
+ *
+ * This call will sleep waiting for responses from the DOE mailbox.
+ */
+void read_cdat_data(struct cxl_port *port)
+{
+ struct pci_doe_mb *cdat_doe;
+ struct device *dev = &port->dev;
+ struct device *uport = port->uport;
+ size_t cdat_length;
+ int rc;
+
+ cdat_doe = find_cdat_doe(uport);
+ if (!cdat_doe) {
+ dev_dbg(dev, "No CDAT mailbox\n");
+ return;
+ }
+
+ port->cdat_available = true;
+
+ if (cxl_cdat_get_length(dev, cdat_doe, &cdat_length)) {
+ dev_dbg(dev, "No CDAT length\n");
+ return;
+ }
+
+ port->cdat.table = devm_kzalloc(dev, cdat_length, GFP_KERNEL);
+ if (!port->cdat.table)
+ return;
+
+ port->cdat.length = cdat_length;
+ rc = cxl_cdat_read_table(dev, cdat_doe, &port->cdat);
+ if (rc) {
+ /* Don't leave table data allocated on error */
+ devm_kfree(dev, port->cdat.table);
+ port->cdat.table = NULL;
+ port->cdat.length = 0;
+ dev_err(dev, "CDAT data read error\n");
+ }
+}
+EXPORT_SYMBOL_NS_GPL(read_cdat_data, CXL);
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
index 5032f4c1c69d..36aa5070d902 100644
--- a/drivers/cxl/core/pmem.c
+++ b/drivers/cxl/core/pmem.c
@@ -49,26 +49,40 @@ struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev)
return NULL;
return container_of(dev, struct cxl_nvdimm_bridge, dev);
}
-EXPORT_SYMBOL_GPL(to_cxl_nvdimm_bridge);
+EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm_bridge, CXL);
-__mock int match_nvdimm_bridge(struct device *dev, const void *data)
+bool is_cxl_nvdimm_bridge(struct device *dev)
{
return dev->type == &cxl_nvdimm_bridge_type;
}
+EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm_bridge, CXL);
-struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd)
+static int match_nvdimm_bridge(struct device *dev, void *data)
{
+ return is_cxl_nvdimm_bridge(dev);
+}
+
+struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct device *start)
+{
+ struct cxl_port *port = find_cxl_root(start);
struct device *dev;
- dev = bus_find_device(&cxl_bus_type, NULL, cxl_nvd, match_nvdimm_bridge);
+ if (!port)
+ return NULL;
+
+ dev = device_find_child(&port->dev, NULL, match_nvdimm_bridge);
+ put_device(&port->dev);
+
if (!dev)
return NULL;
+
return to_cxl_nvdimm_bridge(dev);
}
-EXPORT_SYMBOL_GPL(cxl_find_nvdimm_bridge);
+EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL);
-static struct cxl_nvdimm_bridge *
-cxl_nvdimm_bridge_alloc(struct cxl_port *port)
+static struct lock_class_key cxl_nvdimm_bridge_key;
+
+static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port)
{
struct cxl_nvdimm_bridge *cxl_nvb;
struct device *dev;
@@ -87,6 +101,7 @@ cxl_nvdimm_bridge_alloc(struct cxl_port *port)
cxl_nvb->port = port;
cxl_nvb->state = CXL_NVB_NEW;
device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_nvdimm_bridge_key);
device_set_pm_not_required(dev);
dev->parent = &port->dev;
dev->bus = &cxl_bus_type;
@@ -167,12 +182,13 @@ err:
put_device(dev);
return ERR_PTR(rc);
}
-EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm_bridge);
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm_bridge, CXL);
static void cxl_nvdimm_release(struct device *dev)
{
struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
+ xa_destroy(&cxl_nvd->pmem_regions);
kfree(cxl_nvd);
}
@@ -191,7 +207,7 @@ bool is_cxl_nvdimm(struct device *dev)
{
return dev->type == &cxl_nvdimm_type;
}
-EXPORT_SYMBOL_GPL(is_cxl_nvdimm);
+EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm, CXL);
struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev)
{
@@ -200,7 +216,9 @@ struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev)
return NULL;
return container_of(dev, struct cxl_nvdimm, dev);
}
-EXPORT_SYMBOL_GPL(to_cxl_nvdimm);
+EXPORT_SYMBOL_NS_GPL(to_cxl_nvdimm, CXL);
+
+static struct lock_class_key cxl_nvdimm_key;
static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
{
@@ -213,7 +231,9 @@ static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
dev = &cxl_nvd->dev;
cxl_nvd->cxlmd = cxlmd;
+ xa_init(&cxl_nvd->pmem_regions);
device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_nvdimm_key);
device_set_pm_not_required(dev);
dev->parent = &cxlmd->dev;
dev->bus = &cxl_bus_type;
@@ -262,4 +282,4 @@ err:
put_device(dev);
return rc;
}
-EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm);
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_nvdimm, CXL);
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
new file mode 100644
index 000000000000..e7556864ea80
--- /dev/null
+++ b/drivers/cxl/core/port.c
@@ -0,0 +1,1905 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/memregion.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <cxlmem.h>
+#include <cxlpci.h>
+#include <cxl.h>
+#include "core.h"
+
+/**
+ * DOC: cxl core
+ *
+ * The CXL core provides a set of interfaces that can be consumed by CXL aware
+ * drivers. The interfaces allow for creation, modification, and destruction of
+ * regions, memory devices, ports, and decoders. CXL aware drivers must register
+ * with the CXL core via these interfaces in order to be able to participate in
+ * cross-device interleave coordination. The CXL core also establishes and
+ * maintains the bridge to the nvdimm subsystem.
+ *
+ * CXL core introduces sysfs hierarchy to control the devices that are
+ * instantiated by the core.
+ */
+
+static DEFINE_IDA(cxl_port_ida);
+static DEFINE_XARRAY(cxl_root_buses);
+
+static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%s\n", dev->type->name);
+}
+static DEVICE_ATTR_RO(devtype);
+
+static int cxl_device_id(struct device *dev)
+{
+ if (dev->type == &cxl_nvdimm_bridge_type)
+ return CXL_DEVICE_NVDIMM_BRIDGE;
+ if (dev->type == &cxl_nvdimm_type)
+ return CXL_DEVICE_NVDIMM;
+ if (dev->type == CXL_PMEM_REGION_TYPE())
+ return CXL_DEVICE_PMEM_REGION;
+ if (is_cxl_port(dev)) {
+ if (is_cxl_root(to_cxl_port(dev)))
+ return CXL_DEVICE_ROOT;
+ return CXL_DEVICE_PORT;
+ }
+ if (is_cxl_memdev(dev))
+ return CXL_DEVICE_MEMORY_EXPANDER;
+ if (dev->type == CXL_REGION_TYPE())
+ return CXL_DEVICE_REGION;
+ return 0;
+}
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, CXL_MODALIAS_FMT "\n", cxl_device_id(dev));
+}
+static DEVICE_ATTR_RO(modalias);
+
+static struct attribute *cxl_base_attributes[] = {
+ &dev_attr_devtype.attr,
+ &dev_attr_modalias.attr,
+ NULL,
+};
+
+struct attribute_group cxl_base_attribute_group = {
+ .attrs = cxl_base_attributes,
+};
+
+static ssize_t start_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+ return sysfs_emit(buf, "%#llx\n", cxld->hpa_range.start);
+}
+static DEVICE_ATTR_ADMIN_RO(start);
+
+static ssize_t size_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+ return sysfs_emit(buf, "%#llx\n", range_len(&cxld->hpa_range));
+}
+static DEVICE_ATTR_RO(size);
+
+#define CXL_DECODER_FLAG_ATTR(name, flag) \
+static ssize_t name##_show(struct device *dev, \
+ struct device_attribute *attr, char *buf) \
+{ \
+ struct cxl_decoder *cxld = to_cxl_decoder(dev); \
+ \
+ return sysfs_emit(buf, "%s\n", \
+ (cxld->flags & (flag)) ? "1" : "0"); \
+} \
+static DEVICE_ATTR_RO(name)
+
+CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM);
+CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM);
+CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2);
+CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3);
+CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK);
+
+static ssize_t target_type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+ switch (cxld->target_type) {
+ case CXL_DECODER_ACCELERATOR:
+ return sysfs_emit(buf, "accelerator\n");
+ case CXL_DECODER_EXPANDER:
+ return sysfs_emit(buf, "expander\n");
+ }
+ return -ENXIO;
+}
+static DEVICE_ATTR_RO(target_type);
+
+static ssize_t emit_target_list(struct cxl_switch_decoder *cxlsd, char *buf)
+{
+ struct cxl_decoder *cxld = &cxlsd->cxld;
+ ssize_t offset = 0;
+ int i, rc = 0;
+
+ for (i = 0; i < cxld->interleave_ways; i++) {
+ struct cxl_dport *dport = cxlsd->target[i];
+ struct cxl_dport *next = NULL;
+
+ if (!dport)
+ break;
+
+ if (i + 1 < cxld->interleave_ways)
+ next = cxlsd->target[i + 1];
+ rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id,
+ next ? "," : "");
+ if (rc < 0)
+ return rc;
+ offset += rc;
+ }
+
+ return offset;
+}
+
+static ssize_t target_list_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev);
+ ssize_t offset;
+ unsigned int seq;
+ int rc;
+
+ do {
+ seq = read_seqbegin(&cxlsd->target_lock);
+ rc = emit_target_list(cxlsd, buf);
+ } while (read_seqretry(&cxlsd->target_lock, seq));
+
+ if (rc < 0)
+ return rc;
+ offset = rc;
+
+ rc = sysfs_emit_at(buf, offset, "\n");
+ if (rc < 0)
+ return rc;
+
+ return offset + rc;
+}
+static DEVICE_ATTR_RO(target_list);
+
+static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
+
+ switch (cxled->mode) {
+ case CXL_DECODER_RAM:
+ return sysfs_emit(buf, "ram\n");
+ case CXL_DECODER_PMEM:
+ return sysfs_emit(buf, "pmem\n");
+ case CXL_DECODER_NONE:
+ return sysfs_emit(buf, "none\n");
+ case CXL_DECODER_MIXED:
+ default:
+ return sysfs_emit(buf, "mixed\n");
+ }
+}
+
+static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
+ enum cxl_decoder_mode mode;
+ ssize_t rc;
+
+ if (sysfs_streq(buf, "pmem"))
+ mode = CXL_DECODER_PMEM;
+ else if (sysfs_streq(buf, "ram"))
+ mode = CXL_DECODER_RAM;
+ else
+ return -EINVAL;
+
+ rc = cxl_dpa_set_mode(cxled, mode);
+ if (rc)
+ return rc;
+
+ return len;
+}
+static DEVICE_ATTR_RW(mode);
+
+static ssize_t dpa_resource_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
+ u64 base = cxl_dpa_resource_start(cxled);
+
+ return sysfs_emit(buf, "%#llx\n", base);
+}
+static DEVICE_ATTR_RO(dpa_resource);
+
+static ssize_t dpa_size_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
+ resource_size_t size = cxl_dpa_size(cxled);
+
+ return sysfs_emit(buf, "%pa\n", &size);
+}
+
+static ssize_t dpa_size_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
+ unsigned long long size;
+ ssize_t rc;
+
+ rc = kstrtoull(buf, 0, &size);
+ if (rc)
+ return rc;
+
+ if (!IS_ALIGNED(size, SZ_256M))
+ return -EINVAL;
+
+ rc = cxl_dpa_free(cxled);
+ if (rc)
+ return rc;
+
+ if (size == 0)
+ return len;
+
+ rc = cxl_dpa_alloc(cxled, size);
+ if (rc)
+ return rc;
+
+ return len;
+}
+static DEVICE_ATTR_RW(dpa_size);
+
+static ssize_t interleave_granularity_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+ return sysfs_emit(buf, "%d\n", cxld->interleave_granularity);
+}
+
+static DEVICE_ATTR_RO(interleave_granularity);
+
+static ssize_t interleave_ways_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+ return sysfs_emit(buf, "%d\n", cxld->interleave_ways);
+}
+
+static DEVICE_ATTR_RO(interleave_ways);
+
+static struct attribute *cxl_decoder_base_attrs[] = {
+ &dev_attr_start.attr,
+ &dev_attr_size.attr,
+ &dev_attr_locked.attr,
+ &dev_attr_interleave_granularity.attr,
+ &dev_attr_interleave_ways.attr,
+ NULL,
+};
+
+static struct attribute_group cxl_decoder_base_attribute_group = {
+ .attrs = cxl_decoder_base_attrs,
+};
+
+static struct attribute *cxl_decoder_root_attrs[] = {
+ &dev_attr_cap_pmem.attr,
+ &dev_attr_cap_ram.attr,
+ &dev_attr_cap_type2.attr,
+ &dev_attr_cap_type3.attr,
+ &dev_attr_target_list.attr,
+ SET_CXL_REGION_ATTR(create_pmem_region)
+ SET_CXL_REGION_ATTR(delete_region)
+ NULL,
+};
+
+static bool can_create_pmem(struct cxl_root_decoder *cxlrd)
+{
+ unsigned long flags = CXL_DECODER_F_TYPE3 | CXL_DECODER_F_PMEM;
+
+ return (cxlrd->cxlsd.cxld.flags & flags) == flags;
+}
+
+static umode_t cxl_root_decoder_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+
+ if (a == CXL_REGION_ATTR(create_pmem_region) && !can_create_pmem(cxlrd))
+ return 0;
+
+ if (a == CXL_REGION_ATTR(delete_region) && !can_create_pmem(cxlrd))
+ return 0;
+
+ return a->mode;
+}
+
+static struct attribute_group cxl_decoder_root_attribute_group = {
+ .attrs = cxl_decoder_root_attrs,
+ .is_visible = cxl_root_decoder_visible,
+};
+
+static const struct attribute_group *cxl_decoder_root_attribute_groups[] = {
+ &cxl_decoder_root_attribute_group,
+ &cxl_decoder_base_attribute_group,
+ &cxl_base_attribute_group,
+ NULL,
+};
+
+static struct attribute *cxl_decoder_switch_attrs[] = {
+ &dev_attr_target_type.attr,
+ &dev_attr_target_list.attr,
+ SET_CXL_REGION_ATTR(region)
+ NULL,
+};
+
+static struct attribute_group cxl_decoder_switch_attribute_group = {
+ .attrs = cxl_decoder_switch_attrs,
+};
+
+static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = {
+ &cxl_decoder_switch_attribute_group,
+ &cxl_decoder_base_attribute_group,
+ &cxl_base_attribute_group,
+ NULL,
+};
+
+static struct attribute *cxl_decoder_endpoint_attrs[] = {
+ &dev_attr_target_type.attr,
+ &dev_attr_mode.attr,
+ &dev_attr_dpa_size.attr,
+ &dev_attr_dpa_resource.attr,
+ SET_CXL_REGION_ATTR(region)
+ NULL,
+};
+
+static struct attribute_group cxl_decoder_endpoint_attribute_group = {
+ .attrs = cxl_decoder_endpoint_attrs,
+};
+
+static const struct attribute_group *cxl_decoder_endpoint_attribute_groups[] = {
+ &cxl_decoder_base_attribute_group,
+ &cxl_decoder_endpoint_attribute_group,
+ &cxl_base_attribute_group,
+ NULL,
+};
+
+static void __cxl_decoder_release(struct cxl_decoder *cxld)
+{
+ struct cxl_port *port = to_cxl_port(cxld->dev.parent);
+
+ ida_free(&port->decoder_ida, cxld->id);
+ put_device(&port->dev);
+}
+
+static void cxl_endpoint_decoder_release(struct device *dev)
+{
+ struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev);
+
+ __cxl_decoder_release(&cxled->cxld);
+ kfree(cxled);
+}
+
+static void cxl_switch_decoder_release(struct device *dev)
+{
+ struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev);
+
+ __cxl_decoder_release(&cxlsd->cxld);
+ kfree(cxlsd);
+}
+
+struct cxl_root_decoder *to_cxl_root_decoder(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev, !is_root_decoder(dev),
+ "not a cxl_root_decoder device\n"))
+ return NULL;
+ return container_of(dev, struct cxl_root_decoder, cxlsd.cxld.dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_root_decoder, CXL);
+
+static void cxl_root_decoder_release(struct device *dev)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+
+ if (atomic_read(&cxlrd->region_id) >= 0)
+ memregion_free(atomic_read(&cxlrd->region_id));
+ __cxl_decoder_release(&cxlrd->cxlsd.cxld);
+ kfree(cxlrd);
+}
+
+static const struct device_type cxl_decoder_endpoint_type = {
+ .name = "cxl_decoder_endpoint",
+ .release = cxl_endpoint_decoder_release,
+ .groups = cxl_decoder_endpoint_attribute_groups,
+};
+
+static const struct device_type cxl_decoder_switch_type = {
+ .name = "cxl_decoder_switch",
+ .release = cxl_switch_decoder_release,
+ .groups = cxl_decoder_switch_attribute_groups,
+};
+
+static const struct device_type cxl_decoder_root_type = {
+ .name = "cxl_decoder_root",
+ .release = cxl_root_decoder_release,
+ .groups = cxl_decoder_root_attribute_groups,
+};
+
+bool is_endpoint_decoder(struct device *dev)
+{
+ return dev->type == &cxl_decoder_endpoint_type;
+}
+
+bool is_root_decoder(struct device *dev)
+{
+ return dev->type == &cxl_decoder_root_type;
+}
+EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL);
+
+bool is_switch_decoder(struct device *dev)
+{
+ return is_root_decoder(dev) || dev->type == &cxl_decoder_switch_type;
+}
+
+struct cxl_decoder *to_cxl_decoder(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev,
+ !is_switch_decoder(dev) && !is_endpoint_decoder(dev),
+ "not a cxl_decoder device\n"))
+ return NULL;
+ return container_of(dev, struct cxl_decoder, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL);
+
+struct cxl_endpoint_decoder *to_cxl_endpoint_decoder(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev, !is_endpoint_decoder(dev),
+ "not a cxl_endpoint_decoder device\n"))
+ return NULL;
+ return container_of(dev, struct cxl_endpoint_decoder, cxld.dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_endpoint_decoder, CXL);
+
+struct cxl_switch_decoder *to_cxl_switch_decoder(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev, !is_switch_decoder(dev),
+ "not a cxl_switch_decoder device\n"))
+ return NULL;
+ return container_of(dev, struct cxl_switch_decoder, cxld.dev);
+}
+
+static void cxl_ep_release(struct cxl_ep *ep)
+{
+ put_device(ep->ep);
+ kfree(ep);
+}
+
+static void cxl_ep_remove(struct cxl_port *port, struct cxl_ep *ep)
+{
+ if (!ep)
+ return;
+ xa_erase(&port->endpoints, (unsigned long) ep->ep);
+ cxl_ep_release(ep);
+}
+
+static void cxl_port_release(struct device *dev)
+{
+ struct cxl_port *port = to_cxl_port(dev);
+ unsigned long index;
+ struct cxl_ep *ep;
+
+ xa_for_each(&port->endpoints, index, ep)
+ cxl_ep_remove(port, ep);
+ xa_destroy(&port->endpoints);
+ xa_destroy(&port->dports);
+ xa_destroy(&port->regions);
+ ida_free(&cxl_port_ida, port->id);
+ kfree(port);
+}
+
+static const struct attribute_group *cxl_port_attribute_groups[] = {
+ &cxl_base_attribute_group,
+ NULL,
+};
+
+static const struct device_type cxl_port_type = {
+ .name = "cxl_port",
+ .release = cxl_port_release,
+ .groups = cxl_port_attribute_groups,
+};
+
+bool is_cxl_port(struct device *dev)
+{
+ return dev->type == &cxl_port_type;
+}
+EXPORT_SYMBOL_NS_GPL(is_cxl_port, CXL);
+
+struct cxl_port *to_cxl_port(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
+ "not a cxl_port device\n"))
+ return NULL;
+ return container_of(dev, struct cxl_port, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL);
+
+static void unregister_port(void *_port)
+{
+ struct cxl_port *port = _port;
+ struct cxl_port *parent;
+ struct device *lock_dev;
+
+ if (is_cxl_root(port))
+ parent = NULL;
+ else
+ parent = to_cxl_port(port->dev.parent);
+
+ /*
+ * CXL root port's and the first level of ports are unregistered
+ * under the platform firmware device lock, all other ports are
+ * unregistered while holding their parent port lock.
+ */
+ if (!parent)
+ lock_dev = port->uport;
+ else if (is_cxl_root(parent))
+ lock_dev = parent->uport;
+ else
+ lock_dev = &parent->dev;
+
+ device_lock_assert(lock_dev);
+ port->dead = true;
+ device_unregister(&port->dev);
+}
+
+static void cxl_unlink_uport(void *_port)
+{
+ struct cxl_port *port = _port;
+
+ sysfs_remove_link(&port->dev.kobj, "uport");
+}
+
+static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
+{
+ int rc;
+
+ rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport");
+ if (rc)
+ return rc;
+ return devm_add_action_or_reset(host, cxl_unlink_uport, port);
+}
+
+static struct lock_class_key cxl_port_key;
+
+static struct cxl_port *cxl_port_alloc(struct device *uport,
+ resource_size_t component_reg_phys,
+ struct cxl_dport *parent_dport)
+{
+ struct cxl_port *port;
+ struct device *dev;
+ int rc;
+
+ port = kzalloc(sizeof(*port), GFP_KERNEL);
+ if (!port)
+ return ERR_PTR(-ENOMEM);
+
+ rc = ida_alloc(&cxl_port_ida, GFP_KERNEL);
+ if (rc < 0)
+ goto err;
+ port->id = rc;
+ port->uport = uport;
+
+ /*
+ * The top-level cxl_port "cxl_root" does not have a cxl_port as
+ * its parent and it does not have any corresponding component
+ * registers as its decode is described by a fixed platform
+ * description.
+ */
+ dev = &port->dev;
+ if (parent_dport) {
+ struct cxl_port *parent_port = parent_dport->port;
+ struct cxl_port *iter;
+
+ dev->parent = &parent_port->dev;
+ port->depth = parent_port->depth + 1;
+ port->parent_dport = parent_dport;
+
+ /*
+ * walk to the host bridge, or the first ancestor that knows
+ * the host bridge
+ */
+ iter = port;
+ while (!iter->host_bridge &&
+ !is_cxl_root(to_cxl_port(iter->dev.parent)))
+ iter = to_cxl_port(iter->dev.parent);
+ if (iter->host_bridge)
+ port->host_bridge = iter->host_bridge;
+ else
+ port->host_bridge = iter->uport;
+ dev_dbg(uport, "host-bridge: %s\n", dev_name(port->host_bridge));
+ } else
+ dev->parent = uport;
+
+ port->component_reg_phys = component_reg_phys;
+ ida_init(&port->decoder_ida);
+ port->hdm_end = -1;
+ port->commit_end = -1;
+ xa_init(&port->dports);
+ xa_init(&port->endpoints);
+ xa_init(&port->regions);
+
+ device_initialize(dev);
+ lockdep_set_class_and_subclass(&dev->mutex, &cxl_port_key, port->depth);
+ device_set_pm_not_required(dev);
+ dev->bus = &cxl_bus_type;
+ dev->type = &cxl_port_type;
+
+ return port;
+
+err:
+ kfree(port);
+ return ERR_PTR(rc);
+}
+
+/**
+ * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
+ * @host: host device for devm operations
+ * @uport: "physical" device implementing this upstream port
+ * @component_reg_phys: (optional) for configurable cxl_port instances
+ * @parent_dport: next hop up in the CXL memory decode hierarchy
+ */
+struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
+ resource_size_t component_reg_phys,
+ struct cxl_dport *parent_dport)
+{
+ struct cxl_port *port;
+ struct device *dev;
+ int rc;
+
+ port = cxl_port_alloc(uport, component_reg_phys, parent_dport);
+ if (IS_ERR(port))
+ return port;
+
+ dev = &port->dev;
+ if (is_cxl_memdev(uport))
+ rc = dev_set_name(dev, "endpoint%d", port->id);
+ else if (parent_dport)
+ rc = dev_set_name(dev, "port%d", port->id);
+ else
+ rc = dev_set_name(dev, "root%d", port->id);
+ if (rc)
+ goto err;
+
+ rc = device_add(dev);
+ if (rc)
+ goto err;
+
+ rc = devm_add_action_or_reset(host, unregister_port, port);
+ if (rc)
+ return ERR_PTR(rc);
+
+ rc = devm_cxl_link_uport(host, port);
+ if (rc)
+ return ERR_PTR(rc);
+
+ return port;
+
+err:
+ put_device(dev);
+ return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL);
+
+struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port)
+{
+ /* There is no pci_bus associated with a CXL platform-root port */
+ if (is_cxl_root(port))
+ return NULL;
+
+ if (dev_is_pci(port->uport)) {
+ struct pci_dev *pdev = to_pci_dev(port->uport);
+
+ return pdev->subordinate;
+ }
+
+ return xa_load(&cxl_root_buses, (unsigned long)port->uport);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_port_to_pci_bus, CXL);
+
+static void unregister_pci_bus(void *uport)
+{
+ xa_erase(&cxl_root_buses, (unsigned long)uport);
+}
+
+int devm_cxl_register_pci_bus(struct device *host, struct device *uport,
+ struct pci_bus *bus)
+{
+ int rc;
+
+ if (dev_is_pci(uport))
+ return -EINVAL;
+
+ rc = xa_insert(&cxl_root_buses, (unsigned long)uport, bus, GFP_KERNEL);
+ if (rc)
+ return rc;
+ return devm_add_action_or_reset(host, unregister_pci_bus, uport);
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_register_pci_bus, CXL);
+
+static bool dev_is_cxl_root_child(struct device *dev)
+{
+ struct cxl_port *port, *parent;
+
+ if (!is_cxl_port(dev))
+ return false;
+
+ port = to_cxl_port(dev);
+ if (is_cxl_root(port))
+ return false;
+
+ parent = to_cxl_port(port->dev.parent);
+ if (is_cxl_root(parent))
+ return true;
+
+ return false;
+}
+
+/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */
+static int match_root_child(struct device *dev, const void *match)
+{
+ const struct device *iter = NULL;
+ struct cxl_dport *dport;
+ struct cxl_port *port;
+
+ if (!dev_is_cxl_root_child(dev))
+ return 0;
+
+ port = to_cxl_port(dev);
+ iter = match;
+ while (iter) {
+ dport = cxl_find_dport_by_dev(port, iter);
+ if (dport)
+ break;
+ iter = iter->parent;
+ }
+
+ return !!iter;
+}
+
+struct cxl_port *find_cxl_root(struct device *dev)
+{
+ struct device *port_dev;
+ struct cxl_port *root;
+
+ port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child);
+ if (!port_dev)
+ return NULL;
+
+ root = to_cxl_port(port_dev->parent);
+ get_device(&root->dev);
+ put_device(port_dev);
+ return root;
+}
+EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL);
+
+static struct cxl_dport *find_dport(struct cxl_port *port, int id)
+{
+ struct cxl_dport *dport;
+ unsigned long index;
+
+ device_lock_assert(&port->dev);
+ xa_for_each(&port->dports, index, dport)
+ if (dport->port_id == id)
+ return dport;
+ return NULL;
+}
+
+static int add_dport(struct cxl_port *port, struct cxl_dport *new)
+{
+ struct cxl_dport *dup;
+ int rc;
+
+ device_lock_assert(&port->dev);
+ dup = find_dport(port, new->port_id);
+ if (dup) {
+ dev_err(&port->dev,
+ "unable to add dport%d-%s non-unique port id (%s)\n",
+ new->port_id, dev_name(new->dport),
+ dev_name(dup->dport));
+ return -EBUSY;
+ }
+
+ rc = xa_insert(&port->dports, (unsigned long)new->dport, new,
+ GFP_KERNEL);
+ if (rc)
+ return rc;
+
+ port->nr_dports++;
+ return 0;
+}
+
+/*
+ * Since root-level CXL dports cannot be enumerated by PCI they are not
+ * enumerated by the common port driver that acquires the port lock over
+ * dport add/remove. Instead, root dports are manually added by a
+ * platform driver and cond_cxl_root_lock() is used to take the missing
+ * port lock in that case.
+ */
+static void cond_cxl_root_lock(struct cxl_port *port)
+{
+ if (is_cxl_root(port))
+ device_lock(&port->dev);
+}
+
+static void cond_cxl_root_unlock(struct cxl_port *port)
+{
+ if (is_cxl_root(port))
+ device_unlock(&port->dev);
+}
+
+static void cxl_dport_remove(void *data)
+{
+ struct cxl_dport *dport = data;
+ struct cxl_port *port = dport->port;
+
+ xa_erase(&port->dports, (unsigned long) dport->dport);
+ put_device(dport->dport);
+}
+
+static void cxl_dport_unlink(void *data)
+{
+ struct cxl_dport *dport = data;
+ struct cxl_port *port = dport->port;
+ char link_name[CXL_TARGET_STRLEN];
+
+ sprintf(link_name, "dport%d", dport->port_id);
+ sysfs_remove_link(&port->dev.kobj, link_name);
+}
+
+/**
+ * devm_cxl_add_dport - append downstream port data to a cxl_port
+ * @port: the cxl_port that references this dport
+ * @dport_dev: firmware or PCI device representing the dport
+ * @port_id: identifier for this dport in a decoder's target list
+ * @component_reg_phys: optional location of CXL component registers
+ *
+ * Note that dports are appended to the devm release action's of the
+ * either the port's host (for root ports), or the port itself (for
+ * switch ports)
+ */
+struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port,
+ struct device *dport_dev, int port_id,
+ resource_size_t component_reg_phys)
+{
+ char link_name[CXL_TARGET_STRLEN];
+ struct cxl_dport *dport;
+ struct device *host;
+ int rc;
+
+ if (is_cxl_root(port))
+ host = port->uport;
+ else
+ host = &port->dev;
+
+ if (!host->driver) {
+ dev_WARN_ONCE(&port->dev, 1, "dport:%s bad devm context\n",
+ dev_name(dport_dev));
+ return ERR_PTR(-ENXIO);
+ }
+
+ if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >=
+ CXL_TARGET_STRLEN)
+ return ERR_PTR(-EINVAL);
+
+ dport = devm_kzalloc(host, sizeof(*dport), GFP_KERNEL);
+ if (!dport)
+ return ERR_PTR(-ENOMEM);
+
+ dport->dport = dport_dev;
+ dport->port_id = port_id;
+ dport->component_reg_phys = component_reg_phys;
+ dport->port = port;
+
+ cond_cxl_root_lock(port);
+ rc = add_dport(port, dport);
+ cond_cxl_root_unlock(port);
+ if (rc)
+ return ERR_PTR(rc);
+
+ get_device(dport_dev);
+ rc = devm_add_action_or_reset(host, cxl_dport_remove, dport);
+ if (rc)
+ return ERR_PTR(rc);
+
+ rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name);
+ if (rc)
+ return ERR_PTR(rc);
+
+ rc = devm_add_action_or_reset(host, cxl_dport_unlink, dport);
+ if (rc)
+ return ERR_PTR(rc);
+
+ return dport;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL);
+
+static int add_ep(struct cxl_ep *new)
+{
+ struct cxl_port *port = new->dport->port;
+ int rc;
+
+ device_lock(&port->dev);
+ if (port->dead) {
+ device_unlock(&port->dev);
+ return -ENXIO;
+ }
+ rc = xa_insert(&port->endpoints, (unsigned long)new->ep, new,
+ GFP_KERNEL);
+ device_unlock(&port->dev);
+
+ return rc;
+}
+
+/**
+ * cxl_add_ep - register an endpoint's interest in a port
+ * @dport: the dport that routes to @ep_dev
+ * @ep_dev: device representing the endpoint
+ *
+ * Intermediate CXL ports are scanned based on the arrival of endpoints.
+ * When those endpoints depart the port can be destroyed once all
+ * endpoints that care about that port have been removed.
+ */
+static int cxl_add_ep(struct cxl_dport *dport, struct device *ep_dev)
+{
+ struct cxl_ep *ep;
+ int rc;
+
+ ep = kzalloc(sizeof(*ep), GFP_KERNEL);
+ if (!ep)
+ return -ENOMEM;
+
+ ep->ep = get_device(ep_dev);
+ ep->dport = dport;
+
+ rc = add_ep(ep);
+ if (rc)
+ cxl_ep_release(ep);
+ return rc;
+}
+
+struct cxl_find_port_ctx {
+ const struct device *dport_dev;
+ const struct cxl_port *parent_port;
+ struct cxl_dport **dport;
+};
+
+static int match_port_by_dport(struct device *dev, const void *data)
+{
+ const struct cxl_find_port_ctx *ctx = data;
+ struct cxl_dport *dport;
+ struct cxl_port *port;
+
+ if (!is_cxl_port(dev))
+ return 0;
+ if (ctx->parent_port && dev->parent != &ctx->parent_port->dev)
+ return 0;
+
+ port = to_cxl_port(dev);
+ dport = cxl_find_dport_by_dev(port, ctx->dport_dev);
+ if (ctx->dport)
+ *ctx->dport = dport;
+ return dport != NULL;
+}
+
+static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx)
+{
+ struct device *dev;
+
+ if (!ctx->dport_dev)
+ return NULL;
+
+ dev = bus_find_device(&cxl_bus_type, NULL, ctx, match_port_by_dport);
+ if (dev)
+ return to_cxl_port(dev);
+ return NULL;
+}
+
+static struct cxl_port *find_cxl_port(struct device *dport_dev,
+ struct cxl_dport **dport)
+{
+ struct cxl_find_port_ctx ctx = {
+ .dport_dev = dport_dev,
+ .dport = dport,
+ };
+ struct cxl_port *port;
+
+ port = __find_cxl_port(&ctx);
+ return port;
+}
+
+static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port,
+ struct device *dport_dev,
+ struct cxl_dport **dport)
+{
+ struct cxl_find_port_ctx ctx = {
+ .dport_dev = dport_dev,
+ .parent_port = parent_port,
+ .dport = dport,
+ };
+ struct cxl_port *port;
+
+ port = __find_cxl_port(&ctx);
+ return port;
+}
+
+/*
+ * All users of grandparent() are using it to walk PCIe-like swich port
+ * hierarchy. A PCIe switch is comprised of a bridge device representing the
+ * upstream switch port and N bridges representing downstream switch ports. When
+ * bridges stack the grand-parent of a downstream switch port is another
+ * downstream switch port in the immediate ancestor switch.
+ */
+static struct device *grandparent(struct device *dev)
+{
+ if (dev && dev->parent)
+ return dev->parent->parent;
+ return NULL;
+}
+
+static void delete_endpoint(void *data)
+{
+ struct cxl_memdev *cxlmd = data;
+ struct cxl_port *endpoint = dev_get_drvdata(&cxlmd->dev);
+ struct cxl_port *parent_port;
+ struct device *parent;
+
+ parent_port = cxl_mem_find_port(cxlmd, NULL);
+ if (!parent_port)
+ goto out;
+ parent = &parent_port->dev;
+
+ device_lock(parent);
+ if (parent->driver && !endpoint->dead) {
+ devm_release_action(parent, cxl_unlink_uport, endpoint);
+ devm_release_action(parent, unregister_port, endpoint);
+ }
+ device_unlock(parent);
+ put_device(parent);
+out:
+ put_device(&endpoint->dev);
+}
+
+int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint)
+{
+ struct device *dev = &cxlmd->dev;
+
+ get_device(&endpoint->dev);
+ dev_set_drvdata(dev, endpoint);
+ return devm_add_action_or_reset(dev, delete_endpoint, cxlmd);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL);
+
+/*
+ * The natural end of life of a non-root 'cxl_port' is when its parent port goes
+ * through a ->remove() event ("top-down" unregistration). The unnatural trigger
+ * for a port to be unregistered is when all memdevs beneath that port have gone
+ * through ->remove(). This "bottom-up" removal selectively removes individual
+ * child ports manually. This depends on devm_cxl_add_port() to not change is
+ * devm action registration order, and for dports to have already been
+ * destroyed by reap_dports().
+ */
+static void delete_switch_port(struct cxl_port *port)
+{
+ devm_release_action(port->dev.parent, cxl_unlink_uport, port);
+ devm_release_action(port->dev.parent, unregister_port, port);
+}
+
+static void reap_dports(struct cxl_port *port)
+{
+ struct cxl_dport *dport;
+ unsigned long index;
+
+ device_lock_assert(&port->dev);
+
+ xa_for_each(&port->dports, index, dport) {
+ devm_release_action(&port->dev, cxl_dport_unlink, dport);
+ devm_release_action(&port->dev, cxl_dport_remove, dport);
+ devm_kfree(&port->dev, dport);
+ }
+}
+
+int devm_cxl_add_endpoint(struct cxl_memdev *cxlmd,
+ struct cxl_dport *parent_dport)
+{
+ struct cxl_port *parent_port = parent_dport->port;
+ struct cxl_dev_state *cxlds = cxlmd->cxlds;
+ struct cxl_port *endpoint, *iter, *down;
+ int rc;
+
+ /*
+ * Now that the path to the root is established record all the
+ * intervening ports in the chain.
+ */
+ for (iter = parent_port, down = NULL; !is_cxl_root(iter);
+ down = iter, iter = to_cxl_port(iter->dev.parent)) {
+ struct cxl_ep *ep;
+
+ ep = cxl_ep_load(iter, cxlmd);
+ ep->next = down;
+ }
+
+ endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev,
+ cxlds->component_reg_phys, parent_dport);
+ if (IS_ERR(endpoint))
+ return PTR_ERR(endpoint);
+
+ dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev));
+
+ rc = cxl_endpoint_autoremove(cxlmd, endpoint);
+ if (rc)
+ return rc;
+
+ if (!endpoint->dev.driver) {
+ dev_err(&cxlmd->dev, "%s failed probe\n",
+ dev_name(&endpoint->dev));
+ return -ENXIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_add_endpoint, CXL);
+
+static void cxl_detach_ep(void *data)
+{
+ struct cxl_memdev *cxlmd = data;
+ struct device *iter;
+
+ for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) {
+ struct device *dport_dev = grandparent(iter);
+ struct cxl_port *port, *parent_port;
+ struct cxl_ep *ep;
+ bool died = false;
+
+ if (!dport_dev)
+ break;
+
+ port = find_cxl_port(dport_dev, NULL);
+ if (!port)
+ continue;
+
+ if (is_cxl_root(port)) {
+ put_device(&port->dev);
+ continue;
+ }
+
+ parent_port = to_cxl_port(port->dev.parent);
+ device_lock(&parent_port->dev);
+ if (!parent_port->dev.driver) {
+ /*
+ * The bottom-up race to delete the port lost to a
+ * top-down port disable, give up here, because the
+ * parent_port ->remove() will have cleaned up all
+ * descendants.
+ */
+ device_unlock(&parent_port->dev);
+ put_device(&port->dev);
+ continue;
+ }
+
+ device_lock(&port->dev);
+ ep = cxl_ep_load(port, cxlmd);
+ dev_dbg(&cxlmd->dev, "disconnect %s from %s\n",
+ ep ? dev_name(ep->ep) : "", dev_name(&port->dev));
+ cxl_ep_remove(port, ep);
+ if (ep && !port->dead && xa_empty(&port->endpoints) &&
+ !is_cxl_root(parent_port)) {
+ /*
+ * This was the last ep attached to a dynamically
+ * enumerated port. Block new cxl_add_ep() and garbage
+ * collect the port.
+ */
+ died = true;
+ port->dead = true;
+ reap_dports(port);
+ }
+ device_unlock(&port->dev);
+
+ if (died) {
+ dev_dbg(&cxlmd->dev, "delete %s\n",
+ dev_name(&port->dev));
+ delete_switch_port(port);
+ }
+ put_device(&port->dev);
+ device_unlock(&parent_port->dev);
+ }
+}
+
+static resource_size_t find_component_registers(struct device *dev)
+{
+ struct cxl_register_map map;
+ struct pci_dev *pdev;
+
+ /*
+ * Theoretically, CXL component registers can be hosted on a
+ * non-PCI device, in practice, only cxl_test hits this case.
+ */
+ if (!dev_is_pci(dev))
+ return CXL_RESOURCE_NONE;
+
+ pdev = to_pci_dev(dev);
+
+ cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
+ return cxl_regmap_to_base(pdev, &map);
+}
+
+static int add_port_attach_ep(struct cxl_memdev *cxlmd,
+ struct device *uport_dev,
+ struct device *dport_dev)
+{
+ struct device *dparent = grandparent(dport_dev);
+ struct cxl_port *port, *parent_port = NULL;
+ struct cxl_dport *dport, *parent_dport;
+ resource_size_t component_reg_phys;
+ int rc;
+
+ if (!dparent) {
+ /*
+ * The iteration reached the topology root without finding the
+ * CXL-root 'cxl_port' on a previous iteration, fail for now to
+ * be re-probed after platform driver attaches.
+ */
+ dev_dbg(&cxlmd->dev, "%s is a root dport\n",
+ dev_name(dport_dev));
+ return -ENXIO;
+ }
+
+ parent_port = find_cxl_port(dparent, &parent_dport);
+ if (!parent_port) {
+ /* iterate to create this parent_port */
+ return -EAGAIN;
+ }
+
+ device_lock(&parent_port->dev);
+ if (!parent_port->dev.driver) {
+ dev_warn(&cxlmd->dev,
+ "port %s:%s disabled, failed to enumerate CXL.mem\n",
+ dev_name(&parent_port->dev), dev_name(uport_dev));
+ port = ERR_PTR(-ENXIO);
+ goto out;
+ }
+
+ port = find_cxl_port_at(parent_port, dport_dev, &dport);
+ if (!port) {
+ component_reg_phys = find_component_registers(uport_dev);
+ port = devm_cxl_add_port(&parent_port->dev, uport_dev,
+ component_reg_phys, parent_dport);
+ /* retry find to pick up the new dport information */
+ if (!IS_ERR(port))
+ port = find_cxl_port_at(parent_port, dport_dev, &dport);
+ }
+out:
+ device_unlock(&parent_port->dev);
+
+ if (IS_ERR(port))
+ rc = PTR_ERR(port);
+ else {
+ dev_dbg(&cxlmd->dev, "add to new port %s:%s\n",
+ dev_name(&port->dev), dev_name(port->uport));
+ rc = cxl_add_ep(dport, &cxlmd->dev);
+ if (rc == -EBUSY) {
+ /*
+ * "can't" happen, but this error code means
+ * something to the caller, so translate it.
+ */
+ rc = -ENXIO;
+ }
+ put_device(&port->dev);
+ }
+
+ put_device(&parent_port->dev);
+ return rc;
+}
+
+int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd)
+{
+ struct device *dev = &cxlmd->dev;
+ struct device *iter;
+ int rc;
+
+ rc = devm_add_action_or_reset(&cxlmd->dev, cxl_detach_ep, cxlmd);
+ if (rc)
+ return rc;
+
+ /*
+ * Scan for and add all cxl_ports in this device's ancestry.
+ * Repeat until no more ports are added. Abort if a port add
+ * attempt fails.
+ */
+retry:
+ for (iter = dev; iter; iter = grandparent(iter)) {
+ struct device *dport_dev = grandparent(iter);
+ struct device *uport_dev;
+ struct cxl_dport *dport;
+ struct cxl_port *port;
+
+ if (!dport_dev)
+ return 0;
+
+ uport_dev = dport_dev->parent;
+ if (!uport_dev) {
+ dev_warn(dev, "at %s no parent for dport: %s\n",
+ dev_name(iter), dev_name(dport_dev));
+ return -ENXIO;
+ }
+
+ dev_dbg(dev, "scan: iter: %s dport_dev: %s parent: %s\n",
+ dev_name(iter), dev_name(dport_dev),
+ dev_name(uport_dev));
+ port = find_cxl_port(dport_dev, &dport);
+ if (port) {
+ dev_dbg(&cxlmd->dev,
+ "found already registered port %s:%s\n",
+ dev_name(&port->dev), dev_name(port->uport));
+ rc = cxl_add_ep(dport, &cxlmd->dev);
+
+ /*
+ * If the endpoint already exists in the port's list,
+ * that's ok, it was added on a previous pass.
+ * Otherwise, retry in add_port_attach_ep() after taking
+ * the parent_port lock as the current port may be being
+ * reaped.
+ */
+ if (rc && rc != -EBUSY) {
+ put_device(&port->dev);
+ return rc;
+ }
+
+ /* Any more ports to add between this one and the root? */
+ if (!dev_is_cxl_root_child(&port->dev)) {
+ put_device(&port->dev);
+ continue;
+ }
+
+ put_device(&port->dev);
+ return 0;
+ }
+
+ rc = add_port_attach_ep(cxlmd, uport_dev, dport_dev);
+ /* port missing, try to add parent */
+ if (rc == -EAGAIN)
+ continue;
+ /* failed to add ep or port */
+ if (rc)
+ return rc;
+ /* port added, new descendants possible, start over */
+ goto retry;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_ports, CXL);
+
+struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd,
+ struct cxl_dport **dport)
+{
+ return find_cxl_port(grandparent(&cxlmd->dev), dport);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, CXL);
+
+static int decoder_populate_targets(struct cxl_switch_decoder *cxlsd,
+ struct cxl_port *port, int *target_map)
+{
+ int i, rc = 0;
+
+ if (!target_map)
+ return 0;
+
+ device_lock_assert(&port->dev);
+
+ if (xa_empty(&port->dports))
+ return -EINVAL;
+
+ write_seqlock(&cxlsd->target_lock);
+ for (i = 0; i < cxlsd->nr_targets; i++) {
+ struct cxl_dport *dport = find_dport(port, target_map[i]);
+
+ if (!dport) {
+ rc = -ENXIO;
+ break;
+ }
+ cxlsd->target[i] = dport;
+ }
+ write_sequnlock(&cxlsd->target_lock);
+
+ return rc;
+}
+
+static struct cxl_dport *cxl_hb_modulo(struct cxl_root_decoder *cxlrd, int pos)
+{
+ struct cxl_switch_decoder *cxlsd = &cxlrd->cxlsd;
+ struct cxl_decoder *cxld = &cxlsd->cxld;
+ int iw;
+
+ iw = cxld->interleave_ways;
+ if (dev_WARN_ONCE(&cxld->dev, iw != cxlsd->nr_targets,
+ "misconfigured root decoder\n"))
+ return NULL;
+
+ return cxlrd->cxlsd.target[pos % iw];
+}
+
+static struct lock_class_key cxl_decoder_key;
+
+/**
+ * cxl_decoder_init - Common decoder setup / initialization
+ * @port: owning port of this decoder
+ * @cxld: common decoder properties to initialize
+ *
+ * A port may contain one or more decoders. Each of those decoders
+ * enable some address space for CXL.mem utilization. A decoder is
+ * expected to be configured by the caller before registering via
+ * cxl_decoder_add()
+ */
+static int cxl_decoder_init(struct cxl_port *port, struct cxl_decoder *cxld)
+{
+ struct device *dev;
+ int rc;
+
+ rc = ida_alloc(&port->decoder_ida, GFP_KERNEL);
+ if (rc < 0)
+ return rc;
+
+ /* need parent to stick around to release the id */
+ get_device(&port->dev);
+ cxld->id = rc;
+
+ dev = &cxld->dev;
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_decoder_key);
+ device_set_pm_not_required(dev);
+ dev->parent = &port->dev;
+ dev->bus = &cxl_bus_type;
+
+ /* Pre initialize an "empty" decoder */
+ cxld->interleave_ways = 1;
+ cxld->interleave_granularity = PAGE_SIZE;
+ cxld->target_type = CXL_DECODER_EXPANDER;
+ cxld->hpa_range = (struct range) {
+ .start = 0,
+ .end = -1,
+ };
+
+ return 0;
+}
+
+static int cxl_switch_decoder_init(struct cxl_port *port,
+ struct cxl_switch_decoder *cxlsd,
+ int nr_targets)
+{
+ if (nr_targets > CXL_DECODER_MAX_INTERLEAVE)
+ return -EINVAL;
+
+ cxlsd->nr_targets = nr_targets;
+ seqlock_init(&cxlsd->target_lock);
+ return cxl_decoder_init(port, &cxlsd->cxld);
+}
+
+/**
+ * cxl_root_decoder_alloc - Allocate a root level decoder
+ * @port: owning CXL root of this decoder
+ * @nr_targets: static number of downstream targets
+ *
+ * Return: A new cxl decoder to be registered by cxl_decoder_add(). A
+ * 'CXL root' decoder is one that decodes from a top-level / static platform
+ * firmware description of CXL resources into a CXL standard decode
+ * topology.
+ */
+struct cxl_root_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
+ unsigned int nr_targets)
+{
+ struct cxl_root_decoder *cxlrd;
+ struct cxl_switch_decoder *cxlsd;
+ struct cxl_decoder *cxld;
+ int rc;
+
+ if (!is_cxl_root(port))
+ return ERR_PTR(-EINVAL);
+
+ cxlrd = kzalloc(struct_size(cxlrd, cxlsd.target, nr_targets),
+ GFP_KERNEL);
+ if (!cxlrd)
+ return ERR_PTR(-ENOMEM);
+
+ cxlsd = &cxlrd->cxlsd;
+ rc = cxl_switch_decoder_init(port, cxlsd, nr_targets);
+ if (rc) {
+ kfree(cxlrd);
+ return ERR_PTR(rc);
+ }
+
+ cxlrd->calc_hb = cxl_hb_modulo;
+
+ cxld = &cxlsd->cxld;
+ cxld->dev.type = &cxl_decoder_root_type;
+ /*
+ * cxl_root_decoder_release() special cases negative ids to
+ * detect memregion_alloc() failures.
+ */
+ atomic_set(&cxlrd->region_id, -1);
+ rc = memregion_alloc(GFP_KERNEL);
+ if (rc < 0) {
+ put_device(&cxld->dev);
+ return ERR_PTR(rc);
+ }
+
+ atomic_set(&cxlrd->region_id, rc);
+ return cxlrd;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL);
+
+/**
+ * cxl_switch_decoder_alloc - Allocate a switch level decoder
+ * @port: owning CXL switch port of this decoder
+ * @nr_targets: max number of dynamically addressable downstream targets
+ *
+ * Return: A new cxl decoder to be registered by cxl_decoder_add(). A
+ * 'switch' decoder is any decoder that can be enumerated by PCIe
+ * topology and the HDM Decoder Capability. This includes the decoders
+ * that sit between Switch Upstream Ports / Switch Downstream Ports and
+ * Host Bridges / Root Ports.
+ */
+struct cxl_switch_decoder *cxl_switch_decoder_alloc(struct cxl_port *port,
+ unsigned int nr_targets)
+{
+ struct cxl_switch_decoder *cxlsd;
+ struct cxl_decoder *cxld;
+ int rc;
+
+ if (is_cxl_root(port) || is_cxl_endpoint(port))
+ return ERR_PTR(-EINVAL);
+
+ cxlsd = kzalloc(struct_size(cxlsd, target, nr_targets), GFP_KERNEL);
+ if (!cxlsd)
+ return ERR_PTR(-ENOMEM);
+
+ rc = cxl_switch_decoder_init(port, cxlsd, nr_targets);
+ if (rc) {
+ kfree(cxlsd);
+ return ERR_PTR(rc);
+ }
+
+ cxld = &cxlsd->cxld;
+ cxld->dev.type = &cxl_decoder_switch_type;
+ return cxlsd;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL);
+
+/**
+ * cxl_endpoint_decoder_alloc - Allocate an endpoint decoder
+ * @port: owning port of this decoder
+ *
+ * Return: A new cxl decoder to be registered by cxl_decoder_add()
+ */
+struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port)
+{
+ struct cxl_endpoint_decoder *cxled;
+ struct cxl_decoder *cxld;
+ int rc;
+
+ if (!is_cxl_endpoint(port))
+ return ERR_PTR(-EINVAL);
+
+ cxled = kzalloc(sizeof(*cxled), GFP_KERNEL);
+ if (!cxled)
+ return ERR_PTR(-ENOMEM);
+
+ cxled->pos = -1;
+ cxld = &cxled->cxld;
+ rc = cxl_decoder_init(port, cxld);
+ if (rc) {
+ kfree(cxled);
+ return ERR_PTR(rc);
+ }
+
+ cxld->dev.type = &cxl_decoder_endpoint_type;
+ return cxled;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, CXL);
+
+/**
+ * cxl_decoder_add_locked - Add a decoder with targets
+ * @cxld: The cxl decoder allocated by cxl_<type>_decoder_alloc()
+ * @target_map: A list of downstream ports that this decoder can direct memory
+ * traffic to. These numbers should correspond with the port number
+ * in the PCIe Link Capabilities structure.
+ *
+ * Certain types of decoders may not have any targets. The main example of this
+ * is an endpoint device. A more awkward example is a hostbridge whose root
+ * ports get hot added (technically possible, though unlikely).
+ *
+ * This is the locked variant of cxl_decoder_add().
+ *
+ * Context: Process context. Expects the device lock of the port that owns the
+ * @cxld to be held.
+ *
+ * Return: Negative error code if the decoder wasn't properly configured; else
+ * returns 0.
+ */
+int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map)
+{
+ struct cxl_port *port;
+ struct device *dev;
+ int rc;
+
+ if (WARN_ON_ONCE(!cxld))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(IS_ERR(cxld)))
+ return PTR_ERR(cxld);
+
+ if (cxld->interleave_ways < 1)
+ return -EINVAL;
+
+ dev = &cxld->dev;
+
+ port = to_cxl_port(cxld->dev.parent);
+ if (!is_endpoint_decoder(dev)) {
+ struct cxl_switch_decoder *cxlsd = to_cxl_switch_decoder(dev);
+
+ rc = decoder_populate_targets(cxlsd, port, target_map);
+ if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) {
+ dev_err(&port->dev,
+ "Failed to populate active decoder targets\n");
+ return rc;
+ }
+ }
+
+ rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id);
+ if (rc)
+ return rc;
+
+ return device_add(dev);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, CXL);
+
+/**
+ * cxl_decoder_add - Add a decoder with targets
+ * @cxld: The cxl decoder allocated by cxl_<type>_decoder_alloc()
+ * @target_map: A list of downstream ports that this decoder can direct memory
+ * traffic to. These numbers should correspond with the port number
+ * in the PCIe Link Capabilities structure.
+ *
+ * This is the unlocked variant of cxl_decoder_add_locked().
+ * See cxl_decoder_add_locked().
+ *
+ * Context: Process context. Takes and releases the device lock of the port that
+ * owns the @cxld.
+ */
+int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map)
+{
+ struct cxl_port *port;
+ int rc;
+
+ if (WARN_ON_ONCE(!cxld))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(IS_ERR(cxld)))
+ return PTR_ERR(cxld);
+
+ port = to_cxl_port(cxld->dev.parent);
+
+ device_lock(&port->dev);
+ rc = cxl_decoder_add_locked(cxld, target_map);
+ device_unlock(&port->dev);
+
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL);
+
+static void cxld_unregister(void *dev)
+{
+ struct cxl_endpoint_decoder *cxled;
+
+ if (is_endpoint_decoder(dev)) {
+ cxled = to_cxl_endpoint_decoder(dev);
+ cxl_decoder_kill_region(cxled);
+ }
+
+ device_unregister(dev);
+}
+
+int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld)
+{
+ return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_decoder_autoremove, CXL);
+
+/**
+ * __cxl_driver_register - register a driver for the cxl bus
+ * @cxl_drv: cxl driver structure to attach
+ * @owner: owning module/driver
+ * @modname: KBUILD_MODNAME for parent driver
+ */
+int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
+ const char *modname)
+{
+ if (!cxl_drv->probe) {
+ pr_debug("%s ->probe() must be specified\n", modname);
+ return -EINVAL;
+ }
+
+ if (!cxl_drv->name) {
+ pr_debug("%s ->name must be specified\n", modname);
+ return -EINVAL;
+ }
+
+ if (!cxl_drv->id) {
+ pr_debug("%s ->id must be specified\n", modname);
+ return -EINVAL;
+ }
+
+ cxl_drv->drv.bus = &cxl_bus_type;
+ cxl_drv->drv.owner = owner;
+ cxl_drv->drv.mod_name = modname;
+ cxl_drv->drv.name = cxl_drv->name;
+
+ return driver_register(&cxl_drv->drv);
+}
+EXPORT_SYMBOL_NS_GPL(__cxl_driver_register, CXL);
+
+void cxl_driver_unregister(struct cxl_driver *cxl_drv)
+{
+ driver_unregister(&cxl_drv->drv);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL);
+
+static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+ return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
+ cxl_device_id(dev));
+}
+
+static int cxl_bus_match(struct device *dev, struct device_driver *drv)
+{
+ return cxl_device_id(dev) == to_cxl_drv(drv)->id;
+}
+
+static int cxl_bus_probe(struct device *dev)
+{
+ int rc;
+
+ rc = to_cxl_drv(dev->driver)->probe(dev);
+ dev_dbg(dev, "probe: %d\n", rc);
+ return rc;
+}
+
+static void cxl_bus_remove(struct device *dev)
+{
+ struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
+
+ if (cxl_drv->remove)
+ cxl_drv->remove(dev);
+}
+
+static struct workqueue_struct *cxl_bus_wq;
+
+int cxl_bus_rescan(void)
+{
+ return bus_rescan_devices(&cxl_bus_type);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL);
+
+bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd)
+{
+ return queue_work(cxl_bus_wq, &cxlmd->detach_work);
+}
+EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL);
+
+/* for user tooling to ensure port disable work has completed */
+static ssize_t flush_store(struct bus_type *bus, const char *buf, size_t count)
+{
+ if (sysfs_streq(buf, "1")) {
+ flush_workqueue(cxl_bus_wq);
+ return count;
+ }
+
+ return -EINVAL;
+}
+
+static BUS_ATTR_WO(flush);
+
+static struct attribute *cxl_bus_attributes[] = {
+ &bus_attr_flush.attr,
+ NULL,
+};
+
+static struct attribute_group cxl_bus_attribute_group = {
+ .attrs = cxl_bus_attributes,
+};
+
+static const struct attribute_group *cxl_bus_attribute_groups[] = {
+ &cxl_bus_attribute_group,
+ NULL,
+};
+
+struct bus_type cxl_bus_type = {
+ .name = "cxl",
+ .uevent = cxl_bus_uevent,
+ .match = cxl_bus_match,
+ .probe = cxl_bus_probe,
+ .remove = cxl_bus_remove,
+ .bus_groups = cxl_bus_attribute_groups,
+};
+EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL);
+
+static struct dentry *cxl_debugfs;
+
+struct dentry *cxl_debugfs_create_dir(const char *dir)
+{
+ return debugfs_create_dir(dir, cxl_debugfs);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_debugfs_create_dir, CXL);
+
+static __init int cxl_core_init(void)
+{
+ int rc;
+
+ cxl_debugfs = debugfs_create_dir("cxl", NULL);
+
+ cxl_mbox_init();
+
+ rc = cxl_memdev_init();
+ if (rc)
+ return rc;
+
+ cxl_bus_wq = alloc_ordered_workqueue("cxl_port", 0);
+ if (!cxl_bus_wq) {
+ rc = -ENOMEM;
+ goto err_wq;
+ }
+
+ rc = bus_register(&cxl_bus_type);
+ if (rc)
+ goto err_bus;
+
+ rc = cxl_region_init();
+ if (rc)
+ goto err_region;
+
+ return 0;
+
+err_region:
+ bus_unregister(&cxl_bus_type);
+err_bus:
+ destroy_workqueue(cxl_bus_wq);
+err_wq:
+ cxl_memdev_exit();
+ return rc;
+}
+
+static void cxl_core_exit(void)
+{
+ cxl_region_exit();
+ bus_unregister(&cxl_bus_type);
+ destroy_workqueue(cxl_bus_wq);
+ cxl_memdev_exit();
+ debugfs_remove_recursive(cxl_debugfs);
+}
+
+module_init(cxl_core_init);
+module_exit(cxl_core_exit);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
new file mode 100644
index 000000000000..f9ae5ad284ff
--- /dev/null
+++ b/drivers/cxl/core/region.c
@@ -0,0 +1,1953 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/memregion.h>
+#include <linux/genalloc.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/uuid.h>
+#include <linux/idr.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
+
+/**
+ * DOC: cxl core region
+ *
+ * CXL Regions represent mapped memory capacity in system physical address
+ * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
+ * Memory ranges, Regions represent the active mapped capacity by the HDM
+ * Decoder Capability structures throughout the Host Bridges, Switches, and
+ * Endpoints in the topology.
+ *
+ * Region configuration has ordering constraints. UUID may be set at any time
+ * but is only visible for persistent regions.
+ * 1. Interleave granularity
+ * 2. Interleave size
+ * 3. Decoder targets
+ */
+
+/*
+ * All changes to the interleave configuration occur with this lock held
+ * for write.
+ */
+static DECLARE_RWSEM(cxl_region_rwsem);
+
+static struct cxl_region *to_cxl_region(struct device *dev);
+
+static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ ssize_t rc;
+
+ rc = down_read_interruptible(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+ rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+
+static int is_dup(struct device *match, void *data)
+{
+ struct cxl_region_params *p;
+ struct cxl_region *cxlr;
+ uuid_t *uuid = data;
+
+ if (!is_cxl_region(match))
+ return 0;
+
+ lockdep_assert_held(&cxl_region_rwsem);
+ cxlr = to_cxl_region(match);
+ p = &cxlr->params;
+
+ if (uuid_equal(&p->uuid, uuid)) {
+ dev_dbg(match, "already has uuid: %pUb\n", uuid);
+ return -EBUSY;
+ }
+
+ return 0;
+}
+
+static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ uuid_t temp;
+ ssize_t rc;
+
+ if (len != UUID_STRING_LEN + 1)
+ return -EINVAL;
+
+ rc = uuid_parse(buf, &temp);
+ if (rc)
+ return rc;
+
+ if (uuid_is_null(&temp))
+ return -EINVAL;
+
+ rc = down_write_killable(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+
+ if (uuid_equal(&p->uuid, &temp))
+ goto out;
+
+ rc = -EBUSY;
+ if (p->state >= CXL_CONFIG_ACTIVE)
+ goto out;
+
+ rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
+ if (rc < 0)
+ goto out;
+
+ uuid_copy(&p->uuid, &temp);
+out:
+ up_write(&cxl_region_rwsem);
+
+ if (rc)
+ return rc;
+ return len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
+ struct cxl_region *cxlr)
+{
+ return xa_load(&port->regions, (unsigned long)cxlr);
+}
+
+static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int i;
+
+ for (i = count - 1; i >= 0; i--) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *iter = cxled_to_port(cxled);
+ struct cxl_ep *ep;
+ int rc;
+
+ while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
+ iter = to_cxl_port(iter->dev.parent);
+
+ for (ep = cxl_ep_load(iter, cxlmd); iter;
+ iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
+ struct cxl_region_ref *cxl_rr;
+ struct cxl_decoder *cxld;
+
+ cxl_rr = cxl_rr_load(iter, cxlr);
+ cxld = cxl_rr->decoder;
+ rc = cxld->reset(cxld);
+ if (rc)
+ return rc;
+ }
+
+ rc = cxled->cxld.reset(&cxled->cxld);
+ if (rc)
+ return rc;
+ }
+
+ return 0;
+}
+
+static int cxl_region_decode_commit(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int i, rc = 0;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_region_ref *cxl_rr;
+ struct cxl_decoder *cxld;
+ struct cxl_port *iter;
+ struct cxl_ep *ep;
+
+ /* commit bottom up */
+ for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
+ iter = to_cxl_port(iter->dev.parent)) {
+ cxl_rr = cxl_rr_load(iter, cxlr);
+ cxld = cxl_rr->decoder;
+ if (cxld->commit)
+ rc = cxld->commit(cxld);
+ if (rc)
+ break;
+ }
+
+ if (rc) {
+ /* programming @iter failed, teardown */
+ for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
+ iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
+ cxl_rr = cxl_rr_load(iter, cxlr);
+ cxld = cxl_rr->decoder;
+ cxld->reset(cxld);
+ }
+
+ cxled->cxld.reset(&cxled->cxld);
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ /* undo the targets that were successfully committed */
+ cxl_region_decode_reset(cxlr, i);
+ return rc;
+}
+
+static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ bool commit;
+ ssize_t rc;
+
+ rc = kstrtobool(buf, &commit);
+ if (rc)
+ return rc;
+
+ rc = down_write_killable(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+
+ /* Already in the requested state? */
+ if (commit && p->state >= CXL_CONFIG_COMMIT)
+ goto out;
+ if (!commit && p->state < CXL_CONFIG_COMMIT)
+ goto out;
+
+ /* Not ready to commit? */
+ if (commit && p->state < CXL_CONFIG_ACTIVE) {
+ rc = -ENXIO;
+ goto out;
+ }
+
+ if (commit)
+ rc = cxl_region_decode_commit(cxlr);
+ else {
+ p->state = CXL_CONFIG_RESET_PENDING;
+ up_write(&cxl_region_rwsem);
+ device_release_driver(&cxlr->dev);
+ down_write(&cxl_region_rwsem);
+
+ /*
+ * The lock was dropped, so need to revalidate that the reset is
+ * still pending.
+ */
+ if (p->state == CXL_CONFIG_RESET_PENDING)
+ rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
+ }
+
+ if (rc)
+ goto out;
+
+ if (commit)
+ p->state = CXL_CONFIG_COMMIT;
+ else if (p->state == CXL_CONFIG_RESET_PENDING)
+ p->state = CXL_CONFIG_ACTIVE;
+
+out:
+ up_write(&cxl_region_rwsem);
+
+ if (rc)
+ return rc;
+ return len;
+}
+
+static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ ssize_t rc;
+
+ rc = down_read_interruptible(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+ rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+static DEVICE_ATTR_RW(commit);
+
+static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
+ int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_region *cxlr = to_cxl_region(dev);
+
+ if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
+ return 0;
+ return a->mode;
+}
+
+static ssize_t interleave_ways_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ ssize_t rc;
+
+ rc = down_read_interruptible(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+ rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+
+static const struct attribute_group *get_cxl_region_target_group(void);
+
+static ssize_t interleave_ways_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
+ struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ unsigned int val, save;
+ int rc;
+ u8 iw;
+
+ rc = kstrtouint(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ rc = ways_to_cxl(val, &iw);
+ if (rc)
+ return rc;
+
+ /*
+ * Even for x3, x9, and x12 interleaves the region interleave must be a
+ * power of 2 multiple of the host bridge interleave.
+ */
+ if (!is_power_of_2(val / cxld->interleave_ways) ||
+ (val % cxld->interleave_ways)) {
+ dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
+ return -EINVAL;
+ }
+
+ rc = down_write_killable(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+ if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ save = p->interleave_ways;
+ p->interleave_ways = val;
+ rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
+ if (rc)
+ p->interleave_ways = save;
+out:
+ up_write(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+ return len;
+}
+static DEVICE_ATTR_RW(interleave_ways);
+
+static ssize_t interleave_granularity_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ ssize_t rc;
+
+ rc = down_read_interruptible(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+ rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+
+static ssize_t interleave_granularity_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
+ struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ int rc, val;
+ u16 ig;
+
+ rc = kstrtoint(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ rc = granularity_to_cxl(val, &ig);
+ if (rc)
+ return rc;
+
+ /*
+ * When the host-bridge is interleaved, disallow region granularity !=
+ * root granularity. Regions with a granularity less than the root
+ * interleave result in needing multiple endpoints to support a single
+ * slot in the interleave (possible to suport in the future). Regions
+ * with a granularity greater than the root interleave result in invalid
+ * DPA translations (invalid to support).
+ */
+ if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
+ return -EINVAL;
+
+ rc = down_write_killable(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+ if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ rc = -EBUSY;
+ goto out;
+ }
+
+ p->interleave_granularity = val;
+out:
+ up_write(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+ return len;
+}
+static DEVICE_ATTR_RW(interleave_granularity);
+
+static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ u64 resource = -1ULL;
+ ssize_t rc;
+
+ rc = down_read_interruptible(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+ if (p->res)
+ resource = p->res->start;
+ rc = sysfs_emit(buf, "%#llx\n", resource);
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+static DEVICE_ATTR_RO(resource);
+
+static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ struct cxl_region_params *p = &cxlr->params;
+ struct resource *res;
+ u32 remainder = 0;
+
+ lockdep_assert_held_write(&cxl_region_rwsem);
+
+ /* Nothing to do... */
+ if (p->res && resource_size(p->res) == size)
+ return 0;
+
+ /* To change size the old size must be freed first */
+ if (p->res)
+ return -EBUSY;
+
+ if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
+ return -EBUSY;
+
+ /* ways, granularity and uuid (if PMEM) need to be set before HPA */
+ if (!p->interleave_ways || !p->interleave_granularity ||
+ (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
+ return -ENXIO;
+
+ div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder);
+ if (remainder)
+ return -EINVAL;
+
+ res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
+ dev_name(&cxlr->dev));
+ if (IS_ERR(res)) {
+ dev_dbg(&cxlr->dev, "failed to allocate HPA: %ld\n",
+ PTR_ERR(res));
+ return PTR_ERR(res);
+ }
+
+ p->res = res;
+ p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
+
+ return 0;
+}
+
+static void cxl_region_iomem_release(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+
+ if (device_is_registered(&cxlr->dev))
+ lockdep_assert_held_write(&cxl_region_rwsem);
+ if (p->res) {
+ remove_resource(p->res);
+ kfree(p->res);
+ p->res = NULL;
+ }
+}
+
+static int free_hpa(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+
+ lockdep_assert_held_write(&cxl_region_rwsem);
+
+ if (!p->res)
+ return 0;
+
+ if (p->state >= CXL_CONFIG_ACTIVE)
+ return -EBUSY;
+
+ cxl_region_iomem_release(cxlr);
+ p->state = CXL_CONFIG_IDLE;
+ return 0;
+}
+
+static ssize_t size_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ u64 val;
+ int rc;
+
+ rc = kstrtou64(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ rc = down_write_killable(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+
+ if (val)
+ rc = alloc_hpa(cxlr, val);
+ else
+ rc = free_hpa(cxlr);
+ up_write(&cxl_region_rwsem);
+
+ if (rc)
+ return rc;
+
+ return len;
+}
+
+static ssize_t size_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ u64 size = 0;
+ ssize_t rc;
+
+ rc = down_read_interruptible(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+ if (p->res)
+ size = resource_size(p->res);
+ rc = sysfs_emit(buf, "%#llx\n", size);
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+static DEVICE_ATTR_RW(size);
+
+static struct attribute *cxl_region_attrs[] = {
+ &dev_attr_uuid.attr,
+ &dev_attr_commit.attr,
+ &dev_attr_interleave_ways.attr,
+ &dev_attr_interleave_granularity.attr,
+ &dev_attr_resource.attr,
+ &dev_attr_size.attr,
+ NULL,
+};
+
+static const struct attribute_group cxl_region_group = {
+ .attrs = cxl_region_attrs,
+ .is_visible = cxl_region_visible,
+};
+
+static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_endpoint_decoder *cxled;
+ int rc;
+
+ rc = down_read_interruptible(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+
+ if (pos >= p->interleave_ways) {
+ dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
+ p->interleave_ways);
+ rc = -ENXIO;
+ goto out;
+ }
+
+ cxled = p->targets[pos];
+ if (!cxled)
+ rc = sysfs_emit(buf, "\n");
+ else
+ rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
+out:
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+
+static int match_free_decoder(struct device *dev, void *data)
+{
+ struct cxl_decoder *cxld;
+ int *id = data;
+
+ if (!is_switch_decoder(dev))
+ return 0;
+
+ cxld = to_cxl_decoder(dev);
+
+ /* enforce ordered allocation */
+ if (cxld->id != *id)
+ return 0;
+
+ if (!cxld->region)
+ return 1;
+
+ (*id)++;
+
+ return 0;
+}
+
+static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
+ struct cxl_region *cxlr)
+{
+ struct device *dev;
+ int id = 0;
+
+ dev = device_find_child(&port->dev, &id, match_free_decoder);
+ if (!dev)
+ return NULL;
+ /*
+ * This decoder is pinned registered as long as the endpoint decoder is
+ * registered, and endpoint decoder unregistration holds the
+ * cxl_region_rwsem over unregister events, so no need to hold on to
+ * this extra reference.
+ */
+ put_device(dev);
+ return to_cxl_decoder(dev);
+}
+
+static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
+ struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_region_ref *cxl_rr, *iter;
+ unsigned long index;
+ int rc;
+
+ xa_for_each(&port->regions, index, iter) {
+ struct cxl_region_params *ip = &iter->region->params;
+
+ if (!ip->res)
+ continue;
+
+ if (ip->res->start > p->res->start) {
+ dev_dbg(&cxlr->dev,
+ "%s: HPA order violation %s:%pr vs %pr\n",
+ dev_name(&port->dev),
+ dev_name(&iter->region->dev), ip->res, p->res);
+ return ERR_PTR(-EBUSY);
+ }
+ }
+
+ cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
+ if (!cxl_rr)
+ return ERR_PTR(-ENOMEM);
+ cxl_rr->port = port;
+ cxl_rr->region = cxlr;
+ cxl_rr->nr_targets = 1;
+ xa_init(&cxl_rr->endpoints);
+
+ rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
+ if (rc) {
+ dev_dbg(&cxlr->dev,
+ "%s: failed to track region reference: %d\n",
+ dev_name(&port->dev), rc);
+ kfree(cxl_rr);
+ return ERR_PTR(rc);
+ }
+
+ return cxl_rr;
+}
+
+static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
+{
+ struct cxl_region *cxlr = cxl_rr->region;
+ struct cxl_decoder *cxld = cxl_rr->decoder;
+
+ if (!cxld)
+ return;
+
+ dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
+ if (cxld->region == cxlr) {
+ cxld->region = NULL;
+ put_device(&cxlr->dev);
+ }
+}
+
+static void free_region_ref(struct cxl_region_ref *cxl_rr)
+{
+ struct cxl_port *port = cxl_rr->port;
+ struct cxl_region *cxlr = cxl_rr->region;
+
+ cxl_rr_free_decoder(cxl_rr);
+ xa_erase(&port->regions, (unsigned long)cxlr);
+ xa_destroy(&cxl_rr->endpoints);
+ kfree(cxl_rr);
+}
+
+static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
+ struct cxl_endpoint_decoder *cxled)
+{
+ int rc;
+ struct cxl_port *port = cxl_rr->port;
+ struct cxl_region *cxlr = cxl_rr->region;
+ struct cxl_decoder *cxld = cxl_rr->decoder;
+ struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
+
+ if (ep) {
+ rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
+ GFP_KERNEL);
+ if (rc)
+ return rc;
+ }
+ cxl_rr->nr_eps++;
+
+ if (!cxld->region) {
+ cxld->region = cxlr;
+ get_device(&cxlr->dev);
+ }
+
+ return 0;
+}
+
+static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled,
+ struct cxl_region_ref *cxl_rr)
+{
+ struct cxl_decoder *cxld;
+
+ if (port == cxled_to_port(cxled))
+ cxld = &cxled->cxld;
+ else
+ cxld = cxl_region_find_decoder(port, cxlr);
+ if (!cxld) {
+ dev_dbg(&cxlr->dev, "%s: no decoder available\n",
+ dev_name(&port->dev));
+ return -EBUSY;
+ }
+
+ if (cxld->region) {
+ dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
+ dev_name(&port->dev), dev_name(&cxld->dev),
+ dev_name(&cxld->region->dev));
+ return -EBUSY;
+ }
+
+ cxl_rr->decoder = cxld;
+ return 0;
+}
+
+/**
+ * cxl_port_attach_region() - track a region's interest in a port by endpoint
+ * @port: port to add a new region reference 'struct cxl_region_ref'
+ * @cxlr: region to attach to @port
+ * @cxled: endpoint decoder used to create or further pin a region reference
+ * @pos: interleave position of @cxled in @cxlr
+ *
+ * The attach event is an opportunity to validate CXL decode setup
+ * constraints and record metadata needed for programming HDM decoders,
+ * in particular decoder target lists.
+ *
+ * The steps are:
+ *
+ * - validate that there are no other regions with a higher HPA already
+ * associated with @port
+ * - establish a region reference if one is not already present
+ *
+ * - additionally allocate a decoder instance that will host @cxlr on
+ * @port
+ *
+ * - pin the region reference by the endpoint
+ * - account for how many entries in @port's target list are needed to
+ * cover all of the added endpoints.
+ */
+static int cxl_port_attach_region(struct cxl_port *port,
+ struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
+ struct cxl_region_ref *cxl_rr;
+ bool nr_targets_inc = false;
+ struct cxl_decoder *cxld;
+ unsigned long index;
+ int rc = -EBUSY;
+
+ lockdep_assert_held_write(&cxl_region_rwsem);
+
+ cxl_rr = cxl_rr_load(port, cxlr);
+ if (cxl_rr) {
+ struct cxl_ep *ep_iter;
+ int found = 0;
+
+ /*
+ * Walk the existing endpoints that have been attached to
+ * @cxlr at @port and see if they share the same 'next' port
+ * in the downstream direction. I.e. endpoints that share common
+ * upstream switch.
+ */
+ xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
+ if (ep_iter == ep)
+ continue;
+ if (ep_iter->next == ep->next) {
+ found++;
+ break;
+ }
+ }
+
+ /*
+ * New target port, or @port is an endpoint port that always
+ * accounts its own local decode as a target.
+ */
+ if (!found || !ep->next) {
+ cxl_rr->nr_targets++;
+ nr_targets_inc = true;
+ }
+ } else {
+ cxl_rr = alloc_region_ref(port, cxlr);
+ if (IS_ERR(cxl_rr)) {
+ dev_dbg(&cxlr->dev,
+ "%s: failed to allocate region reference\n",
+ dev_name(&port->dev));
+ return PTR_ERR(cxl_rr);
+ }
+ nr_targets_inc = true;
+
+ rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
+ if (rc)
+ goto out_erase;
+ }
+ cxld = cxl_rr->decoder;
+
+ rc = cxl_rr_ep_add(cxl_rr, cxled);
+ if (rc) {
+ dev_dbg(&cxlr->dev,
+ "%s: failed to track endpoint %s:%s reference\n",
+ dev_name(&port->dev), dev_name(&cxlmd->dev),
+ dev_name(&cxld->dev));
+ goto out_erase;
+ }
+
+ dev_dbg(&cxlr->dev,
+ "%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ dev_name(&cxld->dev), dev_name(&cxlmd->dev),
+ dev_name(&cxled->cxld.dev), pos,
+ ep ? ep->next ? dev_name(ep->next->uport) :
+ dev_name(&cxlmd->dev) :
+ "none",
+ cxl_rr->nr_eps, cxl_rr->nr_targets);
+
+ return 0;
+out_erase:
+ if (nr_targets_inc)
+ cxl_rr->nr_targets--;
+ if (cxl_rr->nr_eps == 0)
+ free_region_ref(cxl_rr);
+ return rc;
+}
+
+static void cxl_port_detach_region(struct cxl_port *port,
+ struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_region_ref *cxl_rr;
+ struct cxl_ep *ep = NULL;
+
+ lockdep_assert_held_write(&cxl_region_rwsem);
+
+ cxl_rr = cxl_rr_load(port, cxlr);
+ if (!cxl_rr)
+ return;
+
+ /*
+ * Endpoint ports do not carry cxl_ep references, and they
+ * never target more than one endpoint by definition
+ */
+ if (cxl_rr->decoder == &cxled->cxld)
+ cxl_rr->nr_eps--;
+ else
+ ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
+ if (ep) {
+ struct cxl_ep *ep_iter;
+ unsigned long index;
+ int found = 0;
+
+ cxl_rr->nr_eps--;
+ xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
+ if (ep_iter->next == ep->next) {
+ found++;
+ break;
+ }
+ }
+ if (!found)
+ cxl_rr->nr_targets--;
+ }
+
+ if (cxl_rr->nr_eps == 0)
+ free_region_ref(cxl_rr);
+}
+
+static int check_last_peer(struct cxl_endpoint_decoder *cxled,
+ struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
+ int distance)
+{
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_region *cxlr = cxl_rr->region;
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_endpoint_decoder *cxled_peer;
+ struct cxl_port *port = cxl_rr->port;
+ struct cxl_memdev *cxlmd_peer;
+ struct cxl_ep *ep_peer;
+ int pos = cxled->pos;
+
+ /*
+ * If this position wants to share a dport with the last endpoint mapped
+ * then that endpoint, at index 'position - distance', must also be
+ * mapped by this dport.
+ */
+ if (pos < distance) {
+ dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
+ return -ENXIO;
+ }
+ cxled_peer = p->targets[pos - distance];
+ cxlmd_peer = cxled_to_memdev(cxled_peer);
+ ep_peer = cxl_ep_load(port, cxlmd_peer);
+ if (ep->dport != ep_peer->dport) {
+ dev_dbg(&cxlr->dev,
+ "%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
+ dev_name(&cxlmd_peer->dev),
+ dev_name(&cxled_peer->cxld.dev));
+ return -ENXIO;
+ }
+
+ return 0;
+}
+
+static int cxl_port_setup_targets(struct cxl_port *port,
+ struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
+ struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
+ struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_decoder *cxld = cxl_rr->decoder;
+ struct cxl_switch_decoder *cxlsd;
+ u16 eig, peig;
+ u8 eiw, peiw;
+
+ /*
+ * While root level decoders support x3, x6, x12, switch level
+ * decoders only support powers of 2 up to x16.
+ */
+ if (!is_power_of_2(cxl_rr->nr_targets)) {
+ dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ cxl_rr->nr_targets);
+ return -EINVAL;
+ }
+
+ cxlsd = to_cxl_switch_decoder(&cxld->dev);
+ if (cxl_rr->nr_targets_set) {
+ int i, distance;
+
+ /*
+ * Passthrough ports impose no distance requirements between
+ * peers
+ */
+ if (port->nr_dports == 1)
+ distance = 0;
+ else
+ distance = p->nr_targets / cxl_rr->nr_targets;
+ for (i = 0; i < cxl_rr->nr_targets_set; i++)
+ if (ep->dport == cxlsd->target[i]) {
+ rc = check_last_peer(cxled, ep, cxl_rr,
+ distance);
+ if (rc)
+ return rc;
+ goto out_target_set;
+ }
+ goto add_target;
+ }
+
+ if (is_cxl_root(parent_port)) {
+ parent_ig = cxlrd->cxlsd.cxld.interleave_granularity;
+ parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
+ /*
+ * For purposes of address bit routing, use power-of-2 math for
+ * switch ports.
+ */
+ if (!is_power_of_2(parent_iw))
+ parent_iw /= 3;
+ } else {
+ struct cxl_region_ref *parent_rr;
+ struct cxl_decoder *parent_cxld;
+
+ parent_rr = cxl_rr_load(parent_port, cxlr);
+ parent_cxld = parent_rr->decoder;
+ parent_ig = parent_cxld->interleave_granularity;
+ parent_iw = parent_cxld->interleave_ways;
+ }
+
+ rc = granularity_to_cxl(parent_ig, &peig);
+ if (rc) {
+ dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
+ dev_name(parent_port->uport),
+ dev_name(&parent_port->dev), parent_ig);
+ return rc;
+ }
+
+ rc = ways_to_cxl(parent_iw, &peiw);
+ if (rc) {
+ dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
+ dev_name(parent_port->uport),
+ dev_name(&parent_port->dev), parent_iw);
+ return rc;
+ }
+
+ iw = cxl_rr->nr_targets;
+ rc = ways_to_cxl(iw, &eiw);
+ if (rc) {
+ dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
+ dev_name(port->uport), dev_name(&port->dev), iw);
+ return rc;
+ }
+
+ /*
+ * If @parent_port is masking address bits, pick the next unused address
+ * bit to route @port's targets.
+ */
+ if (parent_iw > 1 && cxl_rr->nr_targets > 1) {
+ u32 address_bit = max(peig + peiw, eiw + peig);
+
+ eig = address_bit - eiw + 1;
+ } else {
+ eiw = peiw;
+ eig = peig;
+ }
+
+ rc = cxl_to_granularity(eig, &ig);
+ if (rc) {
+ dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ 256 << eig);
+ return rc;
+ }
+
+ cxld->interleave_ways = iw;
+ cxld->interleave_granularity = ig;
+ cxld->hpa_range = (struct range) {
+ .start = p->res->start,
+ .end = p->res->end,
+ };
+ dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport),
+ dev_name(&port->dev), iw, ig);
+add_target:
+ if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
+ dev_dbg(&cxlr->dev,
+ "%s:%s: targets full trying to add %s:%s at %d\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
+ return -ENXIO;
+ }
+ cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
+ inc = 1;
+out_target_set:
+ cxl_rr->nr_targets_set += inc;
+ dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
+ dev_name(port->uport), dev_name(&port->dev),
+ cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport),
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
+
+ return 0;
+}
+
+static void cxl_port_reset_targets(struct cxl_port *port,
+ struct cxl_region *cxlr)
+{
+ struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
+ struct cxl_decoder *cxld;
+
+ /*
+ * After the last endpoint has been detached the entire cxl_rr may now
+ * be gone.
+ */
+ if (!cxl_rr)
+ return;
+ cxl_rr->nr_targets_set = 0;
+
+ cxld = cxl_rr->decoder;
+ cxld->hpa_range = (struct range) {
+ .start = 0,
+ .end = -1,
+ };
+}
+
+static void cxl_region_teardown_targets(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_endpoint_decoder *cxled;
+ struct cxl_memdev *cxlmd;
+ struct cxl_port *iter;
+ struct cxl_ep *ep;
+ int i;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ cxled = p->targets[i];
+ cxlmd = cxled_to_memdev(cxled);
+
+ iter = cxled_to_port(cxled);
+ while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
+ iter = to_cxl_port(iter->dev.parent);
+
+ for (ep = cxl_ep_load(iter, cxlmd); iter;
+ iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
+ cxl_port_reset_targets(iter, cxlr);
+ }
+}
+
+static int cxl_region_setup_targets(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_endpoint_decoder *cxled;
+ struct cxl_memdev *cxlmd;
+ struct cxl_port *iter;
+ struct cxl_ep *ep;
+ int i, rc;
+
+ for (i = 0; i < p->nr_targets; i++) {
+ cxled = p->targets[i];
+ cxlmd = cxled_to_memdev(cxled);
+
+ iter = cxled_to_port(cxled);
+ while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
+ iter = to_cxl_port(iter->dev.parent);
+
+ /*
+ * Descend the topology tree programming targets while
+ * looking for conflicts.
+ */
+ for (ep = cxl_ep_load(iter, cxlmd); iter;
+ iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
+ rc = cxl_port_setup_targets(iter, cxlr, cxled);
+ if (rc) {
+ cxl_region_teardown_targets(cxlr);
+ return rc;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int cxl_region_attach(struct cxl_region *cxlr,
+ struct cxl_endpoint_decoder *cxled, int pos)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_port *ep_port, *root_port, *iter;
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_dport *dport;
+ int i, rc = -ENXIO;
+
+ if (cxled->mode == CXL_DECODER_DEAD) {
+ dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
+ return -ENODEV;
+ }
+
+ /* all full of members, or interleave config not established? */
+ if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_dbg(&cxlr->dev, "region already active\n");
+ return -EBUSY;
+ } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
+ dev_dbg(&cxlr->dev, "interleave config missing\n");
+ return -ENXIO;
+ }
+
+ if (pos < 0 || pos >= p->interleave_ways) {
+ dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
+ p->interleave_ways);
+ return -ENXIO;
+ }
+
+ if (p->targets[pos] == cxled)
+ return 0;
+
+ if (p->targets[pos]) {
+ struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
+ struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
+
+ dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
+ pos, dev_name(&cxlmd_target->dev),
+ dev_name(&cxled_target->cxld.dev));
+ return -EBUSY;
+ }
+
+ for (i = 0; i < p->interleave_ways; i++) {
+ struct cxl_endpoint_decoder *cxled_target;
+ struct cxl_memdev *cxlmd_target;
+
+ cxled_target = p->targets[pos];
+ if (!cxled_target)
+ continue;
+
+ cxlmd_target = cxled_to_memdev(cxled_target);
+ if (cxlmd_target == cxlmd) {
+ dev_dbg(&cxlr->dev,
+ "%s already specified at position %d via: %s\n",
+ dev_name(&cxlmd->dev), pos,
+ dev_name(&cxled_target->cxld.dev));
+ return -EBUSY;
+ }
+ }
+
+ ep_port = cxled_to_port(cxled);
+ root_port = cxlrd_to_port(cxlrd);
+ dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
+ if (!dport) {
+ dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ dev_name(cxlr->dev.parent));
+ return -ENXIO;
+ }
+
+ if (cxlrd->calc_hb(cxlrd, pos) != dport) {
+ dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ dev_name(&cxlrd->cxlsd.cxld.dev));
+ return -ENXIO;
+ }
+
+ if (cxled->cxld.target_type != cxlr->type) {
+ dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ cxled->cxld.target_type, cxlr->type);
+ return -ENXIO;
+ }
+
+ if (!cxled->dpa_res) {
+ dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
+ return -ENXIO;
+ }
+
+ if (resource_size(cxled->dpa_res) * p->interleave_ways !=
+ resource_size(p->res)) {
+ dev_dbg(&cxlr->dev,
+ "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ (u64)resource_size(cxled->dpa_res), p->interleave_ways,
+ (u64)resource_size(p->res));
+ return -EINVAL;
+ }
+
+ for (iter = ep_port; !is_cxl_root(iter);
+ iter = to_cxl_port(iter->dev.parent)) {
+ rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
+ if (rc)
+ goto err;
+ }
+
+ p->targets[pos] = cxled;
+ cxled->pos = pos;
+ p->nr_targets++;
+
+ if (p->nr_targets == p->interleave_ways) {
+ rc = cxl_region_setup_targets(cxlr);
+ if (rc)
+ goto err_decrement;
+ p->state = CXL_CONFIG_ACTIVE;
+ }
+
+ cxled->cxld.interleave_ways = p->interleave_ways;
+ cxled->cxld.interleave_granularity = p->interleave_granularity;
+ cxled->cxld.hpa_range = (struct range) {
+ .start = p->res->start,
+ .end = p->res->end,
+ };
+
+ return 0;
+
+err_decrement:
+ p->nr_targets--;
+err:
+ for (iter = ep_port; !is_cxl_root(iter);
+ iter = to_cxl_port(iter->dev.parent))
+ cxl_port_detach_region(iter, cxlr, cxled);
+ return rc;
+}
+
+static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
+{
+ struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
+ struct cxl_region *cxlr = cxled->cxld.region;
+ struct cxl_region_params *p;
+ int rc = 0;
+
+ lockdep_assert_held_write(&cxl_region_rwsem);
+
+ if (!cxlr)
+ return 0;
+
+ p = &cxlr->params;
+ get_device(&cxlr->dev);
+
+ if (p->state > CXL_CONFIG_ACTIVE) {
+ /*
+ * TODO: tear down all impacted regions if a device is
+ * removed out of order
+ */
+ rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
+ if (rc)
+ goto out;
+ p->state = CXL_CONFIG_ACTIVE;
+ }
+
+ for (iter = ep_port; !is_cxl_root(iter);
+ iter = to_cxl_port(iter->dev.parent))
+ cxl_port_detach_region(iter, cxlr, cxled);
+
+ if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
+ p->targets[cxled->pos] != cxled) {
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+
+ dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
+ dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
+ cxled->pos);
+ goto out;
+ }
+
+ if (p->state == CXL_CONFIG_ACTIVE) {
+ p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
+ cxl_region_teardown_targets(cxlr);
+ }
+ p->targets[cxled->pos] = NULL;
+ p->nr_targets--;
+ cxled->cxld.hpa_range = (struct range) {
+ .start = 0,
+ .end = -1,
+ };
+
+ /* notify the region driver that one of its targets has departed */
+ up_write(&cxl_region_rwsem);
+ device_release_driver(&cxlr->dev);
+ down_write(&cxl_region_rwsem);
+out:
+ put_device(&cxlr->dev);
+ return rc;
+}
+
+void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
+{
+ down_write(&cxl_region_rwsem);
+ cxled->mode = CXL_DECODER_DEAD;
+ cxl_region_detach(cxled);
+ up_write(&cxl_region_rwsem);
+}
+
+static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos)
+{
+ struct device *dev;
+ int rc;
+
+ dev = bus_find_device_by_name(&cxl_bus_type, NULL, decoder);
+ if (!dev)
+ return -ENODEV;
+
+ if (!is_endpoint_decoder(dev)) {
+ put_device(dev);
+ return -EINVAL;
+ }
+
+ rc = down_write_killable(&cxl_region_rwsem);
+ if (rc)
+ goto out;
+ down_read(&cxl_dpa_rwsem);
+ rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos);
+ up_read(&cxl_dpa_rwsem);
+ up_write(&cxl_region_rwsem);
+out:
+ put_device(dev);
+ return rc;
+}
+
+static int detach_target(struct cxl_region *cxlr, int pos)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ int rc;
+
+ rc = down_write_killable(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+
+ if (pos >= p->interleave_ways) {
+ dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
+ p->interleave_ways);
+ rc = -ENXIO;
+ goto out;
+ }
+
+ if (!p->targets[pos]) {
+ rc = 0;
+ goto out;
+ }
+
+ rc = cxl_region_detach(p->targets[pos]);
+out:
+ up_write(&cxl_region_rwsem);
+ return rc;
+}
+
+static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
+ size_t len)
+{
+ int rc;
+
+ if (sysfs_streq(buf, "\n"))
+ rc = detach_target(cxlr, pos);
+ else
+ rc = attach_target(cxlr, buf, pos);
+
+ if (rc < 0)
+ return rc;
+ return len;
+}
+
+#define TARGET_ATTR_RW(n) \
+static ssize_t target##n##_show( \
+ struct device *dev, struct device_attribute *attr, char *buf) \
+{ \
+ return show_targetN(to_cxl_region(dev), buf, (n)); \
+} \
+static ssize_t target##n##_store(struct device *dev, \
+ struct device_attribute *attr, \
+ const char *buf, size_t len) \
+{ \
+ return store_targetN(to_cxl_region(dev), buf, (n), len); \
+} \
+static DEVICE_ATTR_RW(target##n)
+
+TARGET_ATTR_RW(0);
+TARGET_ATTR_RW(1);
+TARGET_ATTR_RW(2);
+TARGET_ATTR_RW(3);
+TARGET_ATTR_RW(4);
+TARGET_ATTR_RW(5);
+TARGET_ATTR_RW(6);
+TARGET_ATTR_RW(7);
+TARGET_ATTR_RW(8);
+TARGET_ATTR_RW(9);
+TARGET_ATTR_RW(10);
+TARGET_ATTR_RW(11);
+TARGET_ATTR_RW(12);
+TARGET_ATTR_RW(13);
+TARGET_ATTR_RW(14);
+TARGET_ATTR_RW(15);
+
+static struct attribute *target_attrs[] = {
+ &dev_attr_target0.attr,
+ &dev_attr_target1.attr,
+ &dev_attr_target2.attr,
+ &dev_attr_target3.attr,
+ &dev_attr_target4.attr,
+ &dev_attr_target5.attr,
+ &dev_attr_target6.attr,
+ &dev_attr_target7.attr,
+ &dev_attr_target8.attr,
+ &dev_attr_target9.attr,
+ &dev_attr_target10.attr,
+ &dev_attr_target11.attr,
+ &dev_attr_target12.attr,
+ &dev_attr_target13.attr,
+ &dev_attr_target14.attr,
+ &dev_attr_target15.attr,
+ NULL,
+};
+
+static umode_t cxl_region_target_visible(struct kobject *kobj,
+ struct attribute *a, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+
+ if (n < p->interleave_ways)
+ return a->mode;
+ return 0;
+}
+
+static const struct attribute_group cxl_region_target_group = {
+ .attrs = target_attrs,
+ .is_visible = cxl_region_target_visible,
+};
+
+static const struct attribute_group *get_cxl_region_target_group(void)
+{
+ return &cxl_region_target_group;
+}
+
+static const struct attribute_group *region_groups[] = {
+ &cxl_base_attribute_group,
+ &cxl_region_group,
+ &cxl_region_target_group,
+ NULL,
+};
+
+static void cxl_region_release(struct device *dev)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ int id = atomic_read(&cxlrd->region_id);
+
+ /*
+ * Try to reuse the recently idled id rather than the cached
+ * next id to prevent the region id space from increasing
+ * unnecessarily.
+ */
+ if (cxlr->id < id)
+ if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
+ memregion_free(id);
+ goto out;
+ }
+
+ memregion_free(cxlr->id);
+out:
+ put_device(dev->parent);
+ kfree(cxlr);
+}
+
+const struct device_type cxl_region_type = {
+ .name = "cxl_region",
+ .release = cxl_region_release,
+ .groups = region_groups
+};
+
+bool is_cxl_region(struct device *dev)
+{
+ return dev->type == &cxl_region_type;
+}
+EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
+
+static struct cxl_region *to_cxl_region(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
+ "not a cxl_region device\n"))
+ return NULL;
+
+ return container_of(dev, struct cxl_region, dev);
+}
+
+static void unregister_region(void *dev)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ int i;
+
+ device_del(dev);
+
+ /*
+ * Now that region sysfs is shutdown, the parameter block is now
+ * read-only, so no need to hold the region rwsem to access the
+ * region parameters.
+ */
+ for (i = 0; i < p->interleave_ways; i++)
+ detach_target(cxlr, i);
+
+ cxl_region_iomem_release(cxlr);
+ put_device(dev);
+}
+
+static struct lock_class_key cxl_region_key;
+
+static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
+{
+ struct cxl_region *cxlr;
+ struct device *dev;
+
+ cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
+ if (!cxlr) {
+ memregion_free(id);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ dev = &cxlr->dev;
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_region_key);
+ dev->parent = &cxlrd->cxlsd.cxld.dev;
+ /*
+ * Keep root decoder pinned through cxl_region_release to fixup
+ * region id allocations
+ */
+ get_device(dev->parent);
+ device_set_pm_not_required(dev);
+ dev->bus = &cxl_bus_type;
+ dev->type = &cxl_region_type;
+ cxlr->id = id;
+
+ return cxlr;
+}
+
+/**
+ * devm_cxl_add_region - Adds a region to a decoder
+ * @cxlrd: root decoder
+ * @id: memregion id to create, or memregion_free() on failure
+ * @mode: mode for the endpoint decoders of this region
+ * @type: select whether this is an expander or accelerator (type-2 or type-3)
+ *
+ * This is the second step of region initialization. Regions exist within an
+ * address space which is mapped by a @cxlrd.
+ *
+ * Return: 0 if the region was added to the @cxlrd, else returns negative error
+ * code. The region will be named "regionZ" where Z is the unique region number.
+ */
+static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
+ int id,
+ enum cxl_decoder_mode mode,
+ enum cxl_decoder_type type)
+{
+ struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
+ struct cxl_region *cxlr;
+ struct device *dev;
+ int rc;
+
+ cxlr = cxl_region_alloc(cxlrd, id);
+ if (IS_ERR(cxlr))
+ return cxlr;
+ cxlr->mode = mode;
+ cxlr->type = type;
+
+ dev = &cxlr->dev;
+ rc = dev_set_name(dev, "region%d", id);
+ if (rc)
+ goto err;
+
+ rc = device_add(dev);
+ if (rc)
+ goto err;
+
+ rc = devm_add_action_or_reset(port->uport, unregister_region, cxlr);
+ if (rc)
+ return ERR_PTR(rc);
+
+ dev_dbg(port->uport, "%s: created %s\n",
+ dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
+ return cxlr;
+
+err:
+ put_device(dev);
+ return ERR_PTR(rc);
+}
+
+static ssize_t create_pmem_region_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+
+ return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
+}
+
+static ssize_t create_pmem_region_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+ struct cxl_region *cxlr;
+ int id, rc;
+
+ rc = sscanf(buf, "region%d\n", &id);
+ if (rc != 1)
+ return -EINVAL;
+
+ rc = memregion_alloc(GFP_KERNEL);
+ if (rc < 0)
+ return rc;
+
+ if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
+ memregion_free(rc);
+ return -EBUSY;
+ }
+
+ cxlr = devm_cxl_add_region(cxlrd, id, CXL_DECODER_PMEM,
+ CXL_DECODER_EXPANDER);
+ if (IS_ERR(cxlr))
+ return PTR_ERR(cxlr);
+
+ return len;
+}
+DEVICE_ATTR_RW(create_pmem_region);
+
+static ssize_t region_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct cxl_decoder *cxld = to_cxl_decoder(dev);
+ ssize_t rc;
+
+ rc = down_read_interruptible(&cxl_region_rwsem);
+ if (rc)
+ return rc;
+
+ if (cxld->region)
+ rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
+ else
+ rc = sysfs_emit(buf, "\n");
+ up_read(&cxl_region_rwsem);
+
+ return rc;
+}
+DEVICE_ATTR_RO(region);
+
+static struct cxl_region *
+cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
+{
+ struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
+ struct device *region_dev;
+
+ region_dev = device_find_child_by_name(&cxld->dev, name);
+ if (!region_dev)
+ return ERR_PTR(-ENODEV);
+
+ return to_cxl_region(region_dev);
+}
+
+static ssize_t delete_region_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
+{
+ struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
+ struct cxl_port *port = to_cxl_port(dev->parent);
+ struct cxl_region *cxlr;
+
+ cxlr = cxl_find_region_by_name(cxlrd, buf);
+ if (IS_ERR(cxlr))
+ return PTR_ERR(cxlr);
+
+ devm_release_action(port->uport, unregister_region, cxlr);
+ put_device(&cxlr->dev);
+
+ return len;
+}
+DEVICE_ATTR_WO(delete_region);
+
+static void cxl_pmem_region_release(struct device *dev)
+{
+ struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
+ int i;
+
+ for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
+ struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
+
+ put_device(&cxlmd->dev);
+ }
+
+ kfree(cxlr_pmem);
+}
+
+static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
+ &cxl_base_attribute_group,
+ NULL,
+};
+
+const struct device_type cxl_pmem_region_type = {
+ .name = "cxl_pmem_region",
+ .release = cxl_pmem_region_release,
+ .groups = cxl_pmem_region_attribute_groups,
+};
+
+bool is_cxl_pmem_region(struct device *dev)
+{
+ return dev->type == &cxl_pmem_region_type;
+}
+EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL);
+
+struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
+{
+ if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
+ "not a cxl_pmem_region device\n"))
+ return NULL;
+ return container_of(dev, struct cxl_pmem_region, dev);
+}
+EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
+
+static struct lock_class_key cxl_pmem_region_key;
+
+static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
+{
+ struct cxl_region_params *p = &cxlr->params;
+ struct cxl_pmem_region *cxlr_pmem;
+ struct device *dev;
+ int i;
+
+ down_read(&cxl_region_rwsem);
+ if (p->state != CXL_CONFIG_COMMIT) {
+ cxlr_pmem = ERR_PTR(-ENXIO);
+ goto out;
+ }
+
+ cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets),
+ GFP_KERNEL);
+ if (!cxlr_pmem) {
+ cxlr_pmem = ERR_PTR(-ENOMEM);
+ goto out;
+ }
+
+ cxlr_pmem->hpa_range.start = p->res->start;
+ cxlr_pmem->hpa_range.end = p->res->end;
+
+ /* Snapshot the region configuration underneath the cxl_region_rwsem */
+ cxlr_pmem->nr_mappings = p->nr_targets;
+ for (i = 0; i < p->nr_targets; i++) {
+ struct cxl_endpoint_decoder *cxled = p->targets[i];
+ struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
+ struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
+
+ m->cxlmd = cxlmd;
+ get_device(&cxlmd->dev);
+ m->start = cxled->dpa_res->start;
+ m->size = resource_size(cxled->dpa_res);
+ m->position = i;
+ }
+
+ dev = &cxlr_pmem->dev;
+ cxlr_pmem->cxlr = cxlr;
+ device_initialize(dev);
+ lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
+ device_set_pm_not_required(dev);
+ dev->parent = &cxlr->dev;
+ dev->bus = &cxl_bus_type;
+ dev->type = &cxl_pmem_region_type;
+out:
+ up_read(&cxl_region_rwsem);
+
+ return cxlr_pmem;
+}
+
+static void cxlr_pmem_unregister(void *dev)
+{
+ device_unregister(dev);
+}
+
+/**
+ * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
+ * @cxlr: parent CXL region for this pmem region bridge device
+ *
+ * Return: 0 on success negative error code on failure.
+ */
+static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
+{
+ struct cxl_pmem_region *cxlr_pmem;
+ struct device *dev;
+ int rc;
+
+ cxlr_pmem = cxl_pmem_region_alloc(cxlr);
+ if (IS_ERR(cxlr_pmem))
+ return PTR_ERR(cxlr_pmem);
+
+ dev = &cxlr_pmem->dev;
+ rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
+ if (rc)
+ goto err;
+
+ rc = device_add(dev);
+ if (rc)
+ goto err;
+
+ dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
+ dev_name(dev));
+
+ return devm_add_action_or_reset(&cxlr->dev, cxlr_pmem_unregister, dev);
+
+err:
+ put_device(dev);
+ return rc;
+}
+
+static int cxl_region_probe(struct device *dev)
+{
+ struct cxl_region *cxlr = to_cxl_region(dev);
+ struct cxl_region_params *p = &cxlr->params;
+ int rc;
+
+ rc = down_read_interruptible(&cxl_region_rwsem);
+ if (rc) {
+ dev_dbg(&cxlr->dev, "probe interrupted\n");
+ return rc;
+ }
+
+ if (p->state < CXL_CONFIG_COMMIT) {
+ dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
+ rc = -ENXIO;
+ }
+
+ /*
+ * From this point on any path that changes the region's state away from
+ * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
+ */
+ up_read(&cxl_region_rwsem);
+
+ switch (cxlr->mode) {
+ case CXL_DECODER_PMEM:
+ return devm_cxl_add_pmem_region(cxlr);
+ default:
+ dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
+ cxlr->mode);
+ return -ENXIO;
+ }
+}
+
+static struct cxl_driver cxl_region_driver = {
+ .name = "cxl_region",
+ .probe = cxl_region_probe,
+ .id = CXL_DEVICE_REGION,
+};
+
+int cxl_region_init(void)
+{
+ return cxl_driver_register(&cxl_region_driver);
+}
+
+void cxl_region_exit(void)
+{
+ cxl_driver_unregister(&cxl_region_driver);
+}
+
+MODULE_IMPORT_NS(CXL);
+MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
index 41de4a136ecd..39a129c57d40 100644
--- a/drivers/cxl/core/regs.c
+++ b/drivers/cxl/core/regs.c
@@ -5,6 +5,7 @@
#include <linux/slab.h>
#include <linux/pci.h>
#include <cxlmem.h>
+#include <cxlpci.h>
/**
* DOC: cxl registers
@@ -35,7 +36,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
struct cxl_component_reg_map *map)
{
int cap, cap_count;
- u64 cap_array;
+ u32 cap_array;
*map = (struct cxl_component_reg_map) { 0 };
@@ -45,11 +46,11 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
*/
base += CXL_CM_OFFSET;
- cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET);
+ cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET);
if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) {
dev_err(dev,
- "Couldn't locate the CXL.cache and CXL.mem capability array header./n");
+ "Couldn't locate the CXL.cache and CXL.mem capability array header.\n");
return;
}
@@ -90,7 +91,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base,
}
}
}
-EXPORT_SYMBOL_GPL(cxl_probe_component_regs);
+EXPORT_SYMBOL_NS_GPL(cxl_probe_component_regs, CXL);
/**
* cxl_probe_device_regs() - Detect CXL Device register blocks
@@ -156,11 +157,10 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base,
}
}
}
-EXPORT_SYMBOL_GPL(cxl_probe_device_regs);
+EXPORT_SYMBOL_NS_GPL(cxl_probe_device_regs, CXL);
-static void __iomem *devm_cxl_iomap_block(struct device *dev,
- resource_size_t addr,
- resource_size_t length)
+void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr,
+ resource_size_t length)
{
void __iomem *ret_val;
struct resource *res;
@@ -199,7 +199,7 @@ int cxl_map_component_regs(struct pci_dev *pdev,
return 0;
}
-EXPORT_SYMBOL_GPL(cxl_map_component_regs);
+EXPORT_SYMBOL_NS_GPL(cxl_map_component_regs, CXL);
int cxl_map_device_regs(struct pci_dev *pdev,
struct cxl_device_regs *regs,
@@ -246,4 +246,59 @@ int cxl_map_device_regs(struct pci_dev *pdev,
return 0;
}
-EXPORT_SYMBOL_GPL(cxl_map_device_regs);
+EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL);
+
+static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
+ struct cxl_register_map *map)
+{
+ map->block_offset = ((u64)reg_hi << 32) |
+ (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK);
+ map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo);
+ map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo);
+}
+
+/**
+ * cxl_find_regblock() - Locate register blocks by type
+ * @pdev: The CXL PCI device to enumerate.
+ * @type: Register Block Indicator id
+ * @map: Enumeration output, clobbered on error
+ *
+ * Return: 0 if register block enumerated, negative error code otherwise
+ *
+ * A CXL DVSEC may point to one or more register blocks, search for them
+ * by @type.
+ */
+int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
+ struct cxl_register_map *map)
+{
+ u32 regloc_size, regblocks;
+ int regloc, i;
+
+ map->block_offset = U64_MAX;
+ regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
+ CXL_DVSEC_REG_LOCATOR);
+ if (!regloc)
+ return -ENXIO;
+
+ pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
+ regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
+
+ regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET;
+ regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8;
+
+ for (i = 0; i < regblocks; i++, regloc += 8) {
+ u32 reg_lo, reg_hi;
+
+ pci_read_config_dword(pdev, regloc, &reg_lo);
+ pci_read_config_dword(pdev, regloc + 4, &reg_hi);
+
+ cxl_decode_regblock(reg_lo, reg_hi, map);
+
+ if (map->reg_type == type)
+ return 0;
+ }
+
+ map->block_offset = U64_MAX;
+ return -ENODEV;
+}
+EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL);
diff --git a/drivers/cxl/core/suspend.c b/drivers/cxl/core/suspend.c
new file mode 100644
index 000000000000..a5984d96ea1d
--- /dev/null
+++ b/drivers/cxl/core/suspend.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/atomic.h>
+#include <linux/export.h>
+#include "cxlmem.h"
+
+static atomic_t mem_active;
+
+bool cxl_mem_active(void)
+{
+ return atomic_read(&mem_active) != 0;
+}
+
+void cxl_mem_active_inc(void)
+{
+ atomic_inc(&mem_active);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_active_inc, CXL);
+
+void cxl_mem_active_dec(void)
+{
+ atomic_dec(&mem_active);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_mem_active_dec, CXL);