aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/cxl/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cxl/pci.c')
-rw-r--r--drivers/cxl/pci.c367
1 files changed, 172 insertions, 195 deletions
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index c734e21fb4e0..faeb5d9d7a7a 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1,14 +1,17 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/moduleparam.h>
#include <linux/module.h>
+#include <linux/delay.h>
#include <linux/sizes.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/pci.h>
+#include <linux/pci-doe.h>
#include <linux/io.h>
#include "cxlmem.h"
-#include "pci.h"
+#include "cxlpci.h"
#include "cxl.h"
/**
@@ -28,47 +31,62 @@
* - Registers a CXL mailbox with cxl_core.
*/
-#define cxl_doorbell_busy(cxlm) \
- (readl((cxlm)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \
+#define cxl_doorbell_busy(cxlds) \
+ (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \
CXLDEV_MBOX_CTRL_DOORBELL)
/* CXL 2.0 - 8.2.8.4 */
#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
-static int cxl_pci_mbox_wait_for_doorbell(struct cxl_mem *cxlm)
+/*
+ * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
+ * dictate how long to wait for the mailbox to become ready. The new
+ * field allows the device to tell software the amount of time to wait
+ * before mailbox ready. This field per the spec theoretically allows
+ * for up to 255 seconds. 255 seconds is unreasonably long, its longer
+ * than the maximum SATA port link recovery wait. Default to 60 seconds
+ * until someone builds a CXL device that needs more time in practice.
+ */
+static unsigned short mbox_ready_timeout = 60;
+module_param(mbox_ready_timeout, ushort, 0644);
+MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
+
+static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
{
const unsigned long start = jiffies;
unsigned long end = start;
- while (cxl_doorbell_busy(cxlm)) {
+ while (cxl_doorbell_busy(cxlds)) {
end = jiffies;
if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
/* Check again in case preempted before timeout test */
- if (!cxl_doorbell_busy(cxlm))
+ if (!cxl_doorbell_busy(cxlds))
break;
return -ETIMEDOUT;
}
cpu_relax();
}
- dev_dbg(cxlm->dev, "Doorbell wait took %dms",
+ dev_dbg(cxlds->dev, "Doorbell wait took %dms",
jiffies_to_msecs(end) - jiffies_to_msecs(start));
return 0;
}
-static void cxl_pci_mbox_timeout(struct cxl_mem *cxlm,
- struct cxl_mbox_cmd *mbox_cmd)
-{
- struct device *dev = cxlm->dev;
+#define cxl_err(dev, status, msg) \
+ dev_err_ratelimited(dev, msg ", device state %s%s\n", \
+ status & CXLMDEV_DEV_FATAL ? " fatal" : "", \
+ status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
- dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
- mbox_cmd->opcode, mbox_cmd->size_in);
-}
+#define cxl_cmd_err(dev, cmd, status, msg) \
+ dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \
+ (cmd)->opcode, \
+ status & CXLMDEV_DEV_FATAL ? " fatal" : "", \
+ status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
/**
* __cxl_pci_mbox_send_cmd() - Execute a mailbox command
- * @cxlm: The CXL memory device to communicate with.
+ * @cxlds: The device state to communicate with.
* @mbox_cmd: Command to send to the memory device.
*
* Context: Any context. Expects mbox_mutex to be held.
@@ -88,16 +106,16 @@ static void cxl_pci_mbox_timeout(struct cxl_mem *cxlm,
* not need to coordinate with each other. The driver only uses the primary
* mailbox.
*/
-static int __cxl_pci_mbox_send_cmd(struct cxl_mem *cxlm,
+static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
struct cxl_mbox_cmd *mbox_cmd)
{
- void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
- struct device *dev = cxlm->dev;
+ void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
+ struct device *dev = cxlds->dev;
u64 cmd_reg, status_reg;
size_t out_len;
int rc;
- lockdep_assert_held(&cxlm->mbox_mutex);
+ lockdep_assert_held(&cxlds->mbox_mutex);
/*
* Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
@@ -117,8 +135,12 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_mem *cxlm,
*/
/* #1 */
- if (cxl_doorbell_busy(cxlm)) {
- dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n");
+ if (cxl_doorbell_busy(cxlds)) {
+ u64 md_status =
+ readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+
+ cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
+ "mailbox queue busy");
return -EBUSY;
}
@@ -134,32 +156,35 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_mem *cxlm,
}
/* #2, #3 */
- writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
+ writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
/* #4 */
dev_dbg(dev, "Sending command\n");
writel(CXLDEV_MBOX_CTRL_DOORBELL,
- cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
+ cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
/* #5 */
- rc = cxl_pci_mbox_wait_for_doorbell(cxlm);
+ rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
if (rc == -ETIMEDOUT) {
- cxl_pci_mbox_timeout(cxlm, mbox_cmd);
+ u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+
+ cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout");
return rc;
}
/* #6 */
- status_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
+ status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
mbox_cmd->return_code =
FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
- if (mbox_cmd->return_code != 0) {
- dev_dbg(dev, "Mailbox operation had an error\n");
- return 0;
+ if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
+ dev_dbg(dev, "Mailbox operation had an error: %s\n",
+ cxl_mbox_cmd_rc2str(mbox_cmd));
+ return 0; /* completed but caller must check return_code */
}
/* #7 */
- cmd_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
+ cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
/* #8 */
@@ -171,7 +196,7 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_mem *cxlm,
* have requested less data than the hardware supplied even
* within spec.
*/
- size_t n = min3(mbox_cmd->size_out, cxlm->payload_size, out_len);
+ size_t n = min3(mbox_cmd->size_out, cxlds->payload_size, out_len);
memcpy_fromio(mbox_cmd->payload_out, payload, n);
mbox_cmd->size_out = n;
@@ -182,108 +207,51 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_mem *cxlm,
return 0;
}
-/**
- * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox.
- * @cxlm: The memory device to gain access to.
- *
- * Context: Any context. Takes the mbox_mutex.
- * Return: 0 if exclusive access was acquired.
- */
-static int cxl_pci_mbox_get(struct cxl_mem *cxlm)
+static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
- struct device *dev = cxlm->dev;
- u64 md_status;
int rc;
- mutex_lock_io(&cxlm->mbox_mutex);
+ mutex_lock_io(&cxlds->mbox_mutex);
+ rc = __cxl_pci_mbox_send_cmd(cxlds, cmd);
+ mutex_unlock(&cxlds->mbox_mutex);
- /*
- * XXX: There is some amount of ambiguity in the 2.0 version of the spec
- * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the
- * bit is to allow firmware running on the device to notify the driver
- * that it's ready to receive commands. It is unclear if the bit needs
- * to be read for each transaction mailbox, ie. the firmware can switch
- * it on and off as needed. Second, there is no defined timeout for
- * mailbox ready, like there is for the doorbell interface.
- *
- * Assumptions:
- * 1. The firmware might toggle the Mailbox Interface Ready bit, check
- * it for every command.
- *
- * 2. If the doorbell is clear, the firmware should have first set the
- * Mailbox Interface Ready bit. Therefore, waiting for the doorbell
- * to be ready is sufficient.
- */
- rc = cxl_pci_mbox_wait_for_doorbell(cxlm);
- if (rc) {
- dev_warn(dev, "Mailbox interface not ready\n");
- goto out;
- }
-
- md_status = readq(cxlm->regs.memdev + CXLMDEV_STATUS_OFFSET);
- if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
- dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
- rc = -EBUSY;
- goto out;
- }
-
- /*
- * Hardware shouldn't allow a ready status but also have failure bits
- * set. Spit out an error, this should be a bug report
- */
- rc = -EFAULT;
- if (md_status & CXLMDEV_DEV_FATAL) {
- dev_err(dev, "mbox: reported ready, but fatal\n");
- goto out;
- }
- if (md_status & CXLMDEV_FW_HALT) {
- dev_err(dev, "mbox: reported ready, but halted\n");
- goto out;
- }
- if (CXLMDEV_RESET_NEEDED(md_status)) {
- dev_err(dev, "mbox: reported ready, but reset needed\n");
- goto out;
- }
-
- /* with lock held */
- return 0;
-
-out:
- mutex_unlock(&cxlm->mbox_mutex);
return rc;
}
-/**
- * cxl_pci_mbox_put() - Release exclusive access to the mailbox.
- * @cxlm: The CXL memory device to communicate with.
- *
- * Context: Any context. Expects mbox_mutex to be held.
- */
-static void cxl_pci_mbox_put(struct cxl_mem *cxlm)
+static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
{
- mutex_unlock(&cxlm->mbox_mutex);
-}
-
-static int cxl_pci_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd)
-{
- int rc;
-
- rc = cxl_pci_mbox_get(cxlm);
- if (rc)
- return rc;
-
- rc = __cxl_pci_mbox_send_cmd(cxlm, cmd);
- cxl_pci_mbox_put(cxlm);
+ const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
+ unsigned long timeout;
+ u64 md_status;
- return rc;
-}
+ timeout = jiffies + mbox_ready_timeout * HZ;
+ do {
+ md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+ if (md_status & CXLMDEV_MBOX_IF_READY)
+ break;
+ if (msleep_interruptible(100))
+ break;
+ } while (!time_after(jiffies, timeout));
+
+ if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
+ cxl_err(cxlds->dev, md_status,
+ "timeout awaiting mailbox ready");
+ return -ETIMEDOUT;
+ }
-static int cxl_pci_setup_mailbox(struct cxl_mem *cxlm)
-{
- const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
+ /*
+ * A command may be in flight from a previous driver instance,
+ * think kexec, do one doorbell wait so that
+ * __cxl_pci_mbox_send_cmd() can assume that it is the only
+ * source for future doorbell busy events.
+ */
+ if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) {
+ cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle");
+ return -ETIMEDOUT;
+ }
- cxlm->mbox_send = cxl_pci_mbox_send;
- cxlm->payload_size =
+ cxlds->mbox_send = cxl_pci_mbox_send;
+ cxlds->payload_size =
1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
/*
@@ -293,15 +261,15 @@ static int cxl_pci_setup_mailbox(struct cxl_mem *cxlm)
* there's no point in going forward. If the size is too large, there's
* no harm is soft limiting it.
*/
- cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M);
- if (cxlm->payload_size < 256) {
- dev_err(cxlm->dev, "Mailbox is too small (%zub)",
- cxlm->payload_size);
+ cxlds->payload_size = min_t(size_t, cxlds->payload_size, SZ_1M);
+ if (cxlds->payload_size < 256) {
+ dev_err(cxlds->dev, "Mailbox is too small (%zub)",
+ cxlds->payload_size);
return -ENXIO;
}
- dev_dbg(cxlm->dev, "Mailbox payload sized %zu",
- cxlm->payload_size);
+ dev_dbg(cxlds->dev, "Mailbox payload sized %zu",
+ cxlds->payload_size);
return 0;
}
@@ -379,18 +347,18 @@ static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map)
return 0;
}
-static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map)
+static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *map)
{
- struct device *dev = cxlm->dev;
+ struct device *dev = cxlds->dev;
struct pci_dev *pdev = to_pci_dev(dev);
switch (map->reg_type) {
case CXL_REGLOC_RBI_COMPONENT:
- cxl_map_component_regs(pdev, &cxlm->regs.component, map);
+ cxl_map_component_regs(pdev, &cxlds->regs.component, map);
dev_dbg(dev, "Mapping component registers...\n");
break;
case CXL_REGLOC_RBI_MEMDEV:
- cxl_map_device_regs(pdev, &cxlm->regs.device_regs, map);
+ cxl_map_device_regs(pdev, &cxlds->regs.device_regs, map);
dev_dbg(dev, "Probing device registers...\n");
break;
default:
@@ -400,58 +368,6 @@ static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map)
return 0;
}
-static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
- struct cxl_register_map *map)
-{
- map->block_offset =
- ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
- map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
- map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
-}
-
-/**
- * cxl_find_regblock() - Locate register blocks by type
- * @pdev: The CXL PCI device to enumerate.
- * @type: Register Block Indicator id
- * @map: Enumeration output, clobbered on error
- *
- * Return: 0 if register block enumerated, negative error code otherwise
- *
- * A CXL DVSEC may point to one or more register blocks, search for them
- * by @type.
- */
-static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
- struct cxl_register_map *map)
-{
- u32 regloc_size, regblocks;
- int regloc, i;
-
- regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
- PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
- if (!regloc)
- return -ENXIO;
-
- pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
- regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
-
- regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
- regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
-
- for (i = 0; i < regblocks; i++, regloc += 8) {
- u32 reg_lo, reg_hi;
-
- pci_read_config_dword(pdev, regloc, &reg_lo);
- pci_read_config_dword(pdev, regloc + 4, &reg_hi);
-
- cxl_decode_regblock(reg_lo, reg_hi, map);
-
- if (map->reg_type == type)
- return 0;
- }
-
- return -ENODEV;
-}
-
static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
struct cxl_register_map *map)
{
@@ -471,11 +387,52 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
return rc;
}
+static void cxl_pci_destroy_doe(void *mbs)
+{
+ xa_destroy(mbs);
+}
+
+static void devm_cxl_pci_create_doe(struct cxl_dev_state *cxlds)
+{
+ struct device *dev = cxlds->dev;
+ struct pci_dev *pdev = to_pci_dev(dev);
+ u16 off = 0;
+
+ xa_init(&cxlds->doe_mbs);
+ if (devm_add_action(&pdev->dev, cxl_pci_destroy_doe, &cxlds->doe_mbs)) {
+ dev_err(dev, "Failed to create XArray for DOE's\n");
+ return;
+ }
+
+ /*
+ * Mailbox creation is best effort. Higher layers must determine if
+ * the lack of a mailbox for their protocol is a device failure or not.
+ */
+ pci_doe_for_each_off(pdev, off) {
+ struct pci_doe_mb *doe_mb;
+
+ doe_mb = pcim_doe_create_mb(pdev, off);
+ if (IS_ERR(doe_mb)) {
+ dev_err(dev, "Failed to create MB object for MB @ %x\n",
+ off);
+ continue;
+ }
+
+ if (xa_insert(&cxlds->doe_mbs, off, doe_mb, GFP_KERNEL)) {
+ dev_err(dev, "xa_insert failed to insert MB @ %x\n",
+ off);
+ continue;
+ }
+
+ dev_dbg(dev, "Created DOE mailbox @%x\n", off);
+ }
+}
+
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct cxl_register_map map;
struct cxl_memdev *cxlmd;
- struct cxl_mem *cxlm;
+ struct cxl_dev_state *cxlds;
int rc;
/*
@@ -489,39 +446,59 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
- cxlm = cxl_mem_create(&pdev->dev);
- if (IS_ERR(cxlm))
- return PTR_ERR(cxlm);
+ cxlds = cxl_dev_state_create(&pdev->dev);
+ if (IS_ERR(cxlds))
+ return PTR_ERR(cxlds);
+
+ cxlds->serial = pci_get_dsn(pdev);
+ cxlds->cxl_dvsec = pci_find_dvsec_capability(
+ pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
+ if (!cxlds->cxl_dvsec)
+ dev_warn(&pdev->dev,
+ "Device DVSEC not present, skip CXL.mem init\n");
rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
if (rc)
return rc;
- rc = cxl_map_regs(cxlm, &map);
+ rc = cxl_map_regs(cxlds, &map);
if (rc)
return rc;
- rc = cxl_pci_setup_mailbox(cxlm);
+ /*
+ * If the component registers can't be found, the cxl_pci driver may
+ * still be useful for management functions so don't return an error.
+ */
+ cxlds->component_reg_phys = CXL_RESOURCE_NONE;
+ rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
+ if (rc)
+ dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
+
+ cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
+
+ devm_cxl_pci_create_doe(cxlds);
+
+ rc = cxl_pci_setup_mailbox(cxlds);
if (rc)
return rc;
- rc = cxl_mem_enumerate_cmds(cxlm);
+ rc = cxl_enumerate_cmds(cxlds);
if (rc)
return rc;
- rc = cxl_mem_identify(cxlm);
+ rc = cxl_dev_state_identify(cxlds);
if (rc)
return rc;
- rc = cxl_mem_create_range_info(cxlm);
+ rc = cxl_mem_create_range_info(cxlds);
if (rc)
return rc;
- cxlmd = devm_cxl_add_memdev(cxlm);
+ cxlmd = devm_cxl_add_memdev(cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);
- if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM))
+ if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM))
rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd);
return rc;