aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-07-21 09:46:59 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2019-07-21 09:46:59 -0700
commitbec5545edef658f81cd9721dbe8fbebeb3c7534d (patch)
tree42aec8714bbbf85ecf66609c7ea9ff0c6a89dcf3 /drivers
parentMerge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dtor/input (diff)
parentNTB: Describe the ntb_msi_test client in the documentation. (diff)
downloadlinux-dev-bec5545edef658f81cd9721dbe8fbebeb3c7534d.tar.xz
linux-dev-bec5545edef658f81cd9721dbe8fbebeb3c7534d.zip
Merge tag 'ntb-5.3' of git://github.com/jonmason/ntb
Pull NTB updates from Jon Mason: "New feature to add support for NTB virtual MSI interrupts, the ability to test and use this feature in the NTB transport layer. Also, bug fixes for the AMD and Switchtec drivers, as well as some general patches" * tag 'ntb-5.3' of git://github.com/jonmason/ntb: (22 commits) NTB: Describe the ntb_msi_test client in the documentation. NTB: Add MSI interrupt support to ntb_transport NTB: Add ntb_msi_test support to ntb_test NTB: Introduce NTB MSI Test Client NTB: Introduce MSI library NTB: Rename ntb.c to support multiple source files in the module NTB: Introduce functions to calculate multi-port resource index NTB: Introduce helper functions to calculate logical port number PCI/switchtec: Add module parameter to request more interrupts PCI/MSI: Support allocating virtual MSI interrupts ntb_hw_switchtec: Fix setup MW with failure bug ntb_hw_switchtec: Skip unnecessary re-setup of shared memory window for crosslink case ntb_hw_switchtec: Remove redundant steps of switchtec_ntb_reinit_peer() function NTB: correct ntb_dev_ops and ntb_dev comment typos NTB: amd: Silence shift wrapping warning in amd_ntb_db_vector_mask() ntb_hw_switchtec: potential shift wrapping bug in switchtec_ntb_init_sndev() NTB: ntb_transport: Ensure qp->tx_mw_dma_addr is initaliazed NTB: ntb_hw_amd: set peer limit register NTB: ntb_perf: Clear stale values in doorbell and command SPAD register NTB: ntb_perf: Disable NTB link after clearing peer XLAT registers ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/ntb/Kconfig11
-rw-r--r--drivers/ntb/Makefile3
-rw-r--r--drivers/ntb/core.c (renamed from drivers/ntb/ntb.c)0
-rw-r--r--drivers/ntb/hw/amd/ntb_hw_amd.c10
-rw-r--r--drivers/ntb/hw/intel/ntb_hw_gen3.c6
-rw-r--r--drivers/ntb/hw/mscc/ntb_hw_switchtec.c82
-rw-r--r--drivers/ntb/msi.c415
-rw-r--r--drivers/ntb/ntb_transport.c170
-rw-r--r--drivers/ntb/test/Kconfig9
-rw-r--r--drivers/ntb/test/Makefile1
-rw-r--r--drivers/ntb/test/ntb_msi_test.c433
-rw-r--r--drivers/ntb/test/ntb_perf.c14
-rw-r--r--drivers/pci/msi.c54
-rw-r--r--drivers/pci/switch/switchtec.c12
14 files changed, 1165 insertions, 55 deletions
diff --git a/drivers/ntb/Kconfig b/drivers/ntb/Kconfig
index c99eed87382a..df16c755b4da 100644
--- a/drivers/ntb/Kconfig
+++ b/drivers/ntb/Kconfig
@@ -13,6 +13,17 @@ menuconfig NTB
if NTB
+config NTB_MSI
+ bool "MSI Interrupt Support"
+ depends on PCI_MSI
+ help
+ Support using MSI interrupt forwarding instead of (or in addition to)
+ hardware doorbells. MSI interrupts typically offer lower latency
+ than doorbells and more MSI interrupts can be made available to
+ clients. However this requires an extra memory window and support
+ in the hardware driver for creating the MSI interrupts.
+
+ If unsure, say N.
source "drivers/ntb/hw/Kconfig"
source "drivers/ntb/test/Kconfig"
diff --git a/drivers/ntb/Makefile b/drivers/ntb/Makefile
index 5c64438d5b3f..3a6fa181ff99 100644
--- a/drivers/ntb/Makefile
+++ b/drivers/ntb/Makefile
@@ -1,3 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_NTB) += ntb.o hw/ test/
obj-$(CONFIG_NTB_TRANSPORT) += ntb_transport.o
+
+ntb-y := core.o
+ntb-$(CONFIG_NTB_MSI) += msi.o
diff --git a/drivers/ntb/ntb.c b/drivers/ntb/core.c
index 2581ab724c34..2581ab724c34 100644
--- a/drivers/ntb/ntb.c
+++ b/drivers/ntb/core.c
diff --git a/drivers/ntb/hw/amd/ntb_hw_amd.c b/drivers/ntb/hw/amd/ntb_hw_amd.c
index efb214fc545a..2859cc99b73e 100644
--- a/drivers/ntb/hw/amd/ntb_hw_amd.c
+++ b/drivers/ntb/hw/amd/ntb_hw_amd.c
@@ -160,8 +160,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
}
/* set and verify setting the limit */
- write64(limit, mmio + limit_reg);
- reg_val = read64(mmio + limit_reg);
+ write64(limit, peer_mmio + limit_reg);
+ reg_val = read64(peer_mmio + limit_reg);
if (reg_val != limit) {
write64(base_addr, mmio + limit_reg);
write64(0, peer_mmio + xlat_reg);
@@ -183,8 +183,8 @@ static int amd_ntb_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
}
/* set and verify setting the limit */
- writel(limit, mmio + limit_reg);
- reg_val = readl(mmio + limit_reg);
+ writel(limit, peer_mmio + limit_reg);
+ reg_val = readl(peer_mmio + limit_reg);
if (reg_val != limit) {
writel(base_addr, mmio + limit_reg);
writel(0, peer_mmio + xlat_reg);
@@ -333,7 +333,7 @@ static u64 amd_ntb_db_vector_mask(struct ntb_dev *ntb, int db_vector)
if (db_vector < 0 || db_vector > ndev->db_count)
return 0;
- return ntb_ndev(ntb)->db_valid_mask & (1 << db_vector);
+ return ntb_ndev(ntb)->db_valid_mask & (1ULL << db_vector);
}
static u64 amd_ntb_db_read(struct ntb_dev *ntb)
diff --git a/drivers/ntb/hw/intel/ntb_hw_gen3.c b/drivers/ntb/hw/intel/ntb_hw_gen3.c
index f475b56a3f49..c3397160db7f 100644
--- a/drivers/ntb/hw/intel/ntb_hw_gen3.c
+++ b/drivers/ntb/hw/intel/ntb_hw_gen3.c
@@ -532,9 +532,9 @@ static int intel_ntb3_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
return 0;
}
-int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
- resource_size_t *db_size,
- u64 *db_data, int db_bit)
+static int intel_ntb3_peer_db_addr(struct ntb_dev *ntb, phys_addr_t *db_addr,
+ resource_size_t *db_size,
+ u64 *db_data, int db_bit)
{
phys_addr_t db_addr_base;
struct intel_ntb_dev *ndev = ntb_ndev(ntb);
diff --git a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
index db4967748e4d..f4959458d909 100644
--- a/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
+++ b/drivers/ntb/hw/mscc/ntb_hw_switchtec.c
@@ -86,7 +86,8 @@ struct switchtec_ntb {
bool link_is_up;
enum ntb_speed link_speed;
enum ntb_width link_width;
- struct work_struct link_reinit_work;
+ struct work_struct check_link_status_work;
+ bool link_force_down;
};
static struct switchtec_ntb *ntb_sndev(struct ntb_dev *ntb)
@@ -485,33 +486,11 @@ enum switchtec_msg {
static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev);
-static void link_reinit_work(struct work_struct *work)
-{
- struct switchtec_ntb *sndev;
-
- sndev = container_of(work, struct switchtec_ntb, link_reinit_work);
-
- switchtec_ntb_reinit_peer(sndev);
-}
-
-static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
- enum switchtec_msg msg)
+static void switchtec_ntb_link_status_update(struct switchtec_ntb *sndev)
{
int link_sta;
int old = sndev->link_is_up;
- if (msg == MSG_LINK_FORCE_DOWN) {
- schedule_work(&sndev->link_reinit_work);
-
- if (sndev->link_is_up) {
- sndev->link_is_up = 0;
- ntb_link_event(&sndev->ntb);
- dev_info(&sndev->stdev->dev, "ntb link forced down\n");
- }
-
- return;
- }
-
link_sta = sndev->self_shared->link_sta;
if (link_sta) {
u64 peer = ioread64(&sndev->peer_shared->magic);
@@ -536,6 +515,38 @@ static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
}
}
+static void check_link_status_work(struct work_struct *work)
+{
+ struct switchtec_ntb *sndev;
+
+ sndev = container_of(work, struct switchtec_ntb,
+ check_link_status_work);
+
+ if (sndev->link_force_down) {
+ sndev->link_force_down = false;
+ switchtec_ntb_reinit_peer(sndev);
+
+ if (sndev->link_is_up) {
+ sndev->link_is_up = 0;
+ ntb_link_event(&sndev->ntb);
+ dev_info(&sndev->stdev->dev, "ntb link forced down\n");
+ }
+
+ return;
+ }
+
+ switchtec_ntb_link_status_update(sndev);
+}
+
+static void switchtec_ntb_check_link(struct switchtec_ntb *sndev,
+ enum switchtec_msg msg)
+{
+ if (msg == MSG_LINK_FORCE_DOWN)
+ sndev->link_force_down = true;
+
+ schedule_work(&sndev->check_link_status_work);
+}
+
static void switchtec_ntb_link_notification(struct switchtec_dev *stdev)
{
struct switchtec_ntb *sndev = stdev->sndev;
@@ -568,7 +579,7 @@ static int switchtec_ntb_link_enable(struct ntb_dev *ntb,
sndev->self_shared->link_sta = 1;
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_UP);
- switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
+ switchtec_ntb_link_status_update(sndev);
return 0;
}
@@ -582,7 +593,7 @@ static int switchtec_ntb_link_disable(struct ntb_dev *ntb)
sndev->self_shared->link_sta = 0;
switchtec_ntb_send_msg(sndev, LINK_MESSAGE, MSG_LINK_DOWN);
- switchtec_ntb_check_link(sndev, MSG_CHECK_LINK);
+ switchtec_ntb_link_status_update(sndev);
return 0;
}
@@ -835,7 +846,8 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
sndev->ntb.topo = NTB_TOPO_SWITCH;
sndev->ntb.ops = &switchtec_ntb_ops;
- INIT_WORK(&sndev->link_reinit_work, link_reinit_work);
+ INIT_WORK(&sndev->check_link_status_work, check_link_status_work);
+ sndev->link_force_down = false;
sndev->self_partition = sndev->stdev->partition;
@@ -872,7 +884,7 @@ static int switchtec_ntb_init_sndev(struct switchtec_ntb *sndev)
}
sndev->peer_partition = ffs(tpart_vec) - 1;
- if (!(part_map & (1 << sndev->peer_partition))) {
+ if (!(part_map & (1ULL << sndev->peer_partition))) {
dev_err(&sndev->stdev->dev,
"ntb target partition is not NT partition\n");
return -ENODEV;
@@ -1448,10 +1460,16 @@ static void switchtec_ntb_deinit_db_msg_irq(struct switchtec_ntb *sndev)
static int switchtec_ntb_reinit_peer(struct switchtec_ntb *sndev)
{
- dev_info(&sndev->stdev->dev, "peer reinitialized\n");
- switchtec_ntb_deinit_shared_mw(sndev);
- switchtec_ntb_init_mw(sndev);
- return switchtec_ntb_init_shared_mw(sndev);
+ int rc;
+
+ if (crosslink_is_enabled(sndev))
+ return 0;
+
+ dev_info(&sndev->stdev->dev, "reinitialize shared memory window\n");
+ rc = config_rsvd_lut_win(sndev, sndev->mmio_peer_ctrl, 0,
+ sndev->self_partition,
+ sndev->self_shared_dma);
+ return rc;
}
static int switchtec_ntb_add(struct device *dev,
diff --git a/drivers/ntb/msi.c b/drivers/ntb/msi.c
new file mode 100644
index 000000000000..9dddf133658f
--- /dev/null
+++ b/drivers/ntb/msi.c
@@ -0,0 +1,415 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/ntb.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("0.1");
+MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>");
+MODULE_DESCRIPTION("NTB MSI Interrupt Library");
+
+struct ntb_msi {
+ u64 base_addr;
+ u64 end_addr;
+
+ void (*desc_changed)(void *ctx);
+
+ u32 __iomem *peer_mws[];
+};
+
+/**
+ * ntb_msi_init() - Initialize the MSI context
+ * @ntb: NTB device context
+ *
+ * This function must be called before any other ntb_msi function.
+ * It initializes the context for MSI operations and maps
+ * the peer memory windows.
+ *
+ * This function reserves the last N outbound memory windows (where N
+ * is the number of peers).
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_init(struct ntb_dev *ntb,
+ void (*desc_changed)(void *ctx))
+{
+ phys_addr_t mw_phys_addr;
+ resource_size_t mw_size;
+ size_t struct_size;
+ int peer_widx;
+ int peers;
+ int ret;
+ int i;
+
+ peers = ntb_peer_port_count(ntb);
+ if (peers <= 0)
+ return -EINVAL;
+
+ struct_size = sizeof(*ntb->msi) + sizeof(*ntb->msi->peer_mws) * peers;
+
+ ntb->msi = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL);
+ if (!ntb->msi)
+ return -ENOMEM;
+
+ ntb->msi->desc_changed = desc_changed;
+
+ for (i = 0; i < peers; i++) {
+ peer_widx = ntb_peer_mw_count(ntb) - 1 - i;
+
+ ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr,
+ &mw_size);
+ if (ret)
+ goto unroll;
+
+ ntb->msi->peer_mws[i] = devm_ioremap(&ntb->dev, mw_phys_addr,
+ mw_size);
+ if (!ntb->msi->peer_mws[i]) {
+ ret = -EFAULT;
+ goto unroll;
+ }
+ }
+
+ return 0;
+
+unroll:
+ for (i = 0; i < peers; i++)
+ if (ntb->msi->peer_mws[i])
+ devm_iounmap(&ntb->dev, ntb->msi->peer_mws[i]);
+
+ devm_kfree(&ntb->dev, ntb->msi);
+ ntb->msi = NULL;
+ return ret;
+}
+EXPORT_SYMBOL(ntb_msi_init);
+
+/**
+ * ntb_msi_setup_mws() - Initialize the MSI inbound memory windows
+ * @ntb: NTB device context
+ *
+ * This function sets up the required inbound memory windows. It should be
+ * called from a work function after a link up event.
+ *
+ * Over the entire network, this function will reserves the last N
+ * inbound memory windows for each peer (where N is the number of peers).
+ *
+ * ntb_msi_init() must be called before this function.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_setup_mws(struct ntb_dev *ntb)
+{
+ struct msi_desc *desc;
+ u64 addr;
+ int peer, peer_widx;
+ resource_size_t addr_align, size_align, size_max;
+ resource_size_t mw_size = SZ_32K;
+ resource_size_t mw_min_size = mw_size;
+ int i;
+ int ret;
+
+ if (!ntb->msi)
+ return -EINVAL;
+
+ desc = first_msi_entry(&ntb->pdev->dev);
+ addr = desc->msg.address_lo + ((uint64_t)desc->msg.address_hi << 32);
+
+ for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
+ peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+ if (peer_widx < 0)
+ return peer_widx;
+
+ ret = ntb_mw_get_align(ntb, peer, peer_widx, &addr_align,
+ NULL, NULL);
+ if (ret)
+ return ret;
+
+ addr &= ~(addr_align - 1);
+ }
+
+ for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
+ peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+ if (peer_widx < 0) {
+ ret = peer_widx;
+ goto error_out;
+ }
+
+ ret = ntb_mw_get_align(ntb, peer, peer_widx, NULL,
+ &size_align, &size_max);
+ if (ret)
+ goto error_out;
+
+ mw_size = round_up(mw_size, size_align);
+ mw_size = max(mw_size, size_max);
+ if (mw_size < mw_min_size)
+ mw_min_size = mw_size;
+
+ ret = ntb_mw_set_trans(ntb, peer, peer_widx,
+ addr, mw_size);
+ if (ret)
+ goto error_out;
+ }
+
+ ntb->msi->base_addr = addr;
+ ntb->msi->end_addr = addr + mw_min_size;
+
+ return 0;
+
+error_out:
+ for (i = 0; i < peer; i++) {
+ peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+ if (peer_widx < 0)
+ continue;
+
+ ntb_mw_clear_trans(ntb, i, peer_widx);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL(ntb_msi_setup_mws);
+
+/**
+ * ntb_msi_clear_mws() - Clear all inbound memory windows
+ * @ntb: NTB device context
+ *
+ * This function tears down the resources used by ntb_msi_setup_mws().
+ */
+void ntb_msi_clear_mws(struct ntb_dev *ntb)
+{
+ int peer;
+ int peer_widx;
+
+ for (peer = 0; peer < ntb_peer_port_count(ntb); peer++) {
+ peer_widx = ntb_peer_highest_mw_idx(ntb, peer);
+ if (peer_widx < 0)
+ continue;
+
+ ntb_mw_clear_trans(ntb, peer, peer_widx);
+ }
+}
+EXPORT_SYMBOL(ntb_msi_clear_mws);
+
+struct ntb_msi_devres {
+ struct ntb_dev *ntb;
+ struct msi_desc *entry;
+ struct ntb_msi_desc *msi_desc;
+};
+
+static int ntb_msi_set_desc(struct ntb_dev *ntb, struct msi_desc *entry,
+ struct ntb_msi_desc *msi_desc)
+{
+ u64 addr;
+
+ addr = entry->msg.address_lo +
+ ((uint64_t)entry->msg.address_hi << 32);
+
+ if (addr < ntb->msi->base_addr || addr >= ntb->msi->end_addr) {
+ dev_warn_once(&ntb->dev,
+ "IRQ %d: MSI Address not within the memory window (%llx, [%llx %llx])\n",
+ entry->irq, addr, ntb->msi->base_addr,
+ ntb->msi->end_addr);
+ return -EFAULT;
+ }
+
+ msi_desc->addr_offset = addr - ntb->msi->base_addr;
+ msi_desc->data = entry->msg.data;
+
+ return 0;
+}
+
+static void ntb_msi_write_msg(struct msi_desc *entry, void *data)
+{
+ struct ntb_msi_devres *dr = data;
+
+ WARN_ON(ntb_msi_set_desc(dr->ntb, entry, dr->msi_desc));
+
+ if (dr->ntb->msi->desc_changed)
+ dr->ntb->msi->desc_changed(dr->ntb->ctx);
+}
+
+static void ntbm_msi_callback_release(struct device *dev, void *res)
+{
+ struct ntb_msi_devres *dr = res;
+
+ dr->entry->write_msi_msg = NULL;
+ dr->entry->write_msi_msg_data = NULL;
+}
+
+static int ntbm_msi_setup_callback(struct ntb_dev *ntb, struct msi_desc *entry,
+ struct ntb_msi_desc *msi_desc)
+{
+ struct ntb_msi_devres *dr;
+
+ dr = devres_alloc(ntbm_msi_callback_release,
+ sizeof(struct ntb_msi_devres), GFP_KERNEL);
+ if (!dr)
+ return -ENOMEM;
+
+ dr->ntb = ntb;
+ dr->entry = entry;
+ dr->msi_desc = msi_desc;
+
+ devres_add(&ntb->dev, dr);
+
+ dr->entry->write_msi_msg = ntb_msi_write_msg;
+ dr->entry->write_msi_msg_data = dr;
+
+ return 0;
+}
+
+/**
+ * ntbm_msi_request_threaded_irq() - allocate an MSI interrupt
+ * @ntb: NTB device context
+ * @handler: Function to be called when the IRQ occurs
+ * @thread_fn: Function to be called in a threaded interrupt context. NULL
+ * for clients which handle everything in @handler
+ * @devname: An ascii name for the claiming device, dev_name(dev) if NULL
+ * @dev_id: A cookie passed back to the handler function
+ *
+ * This function assigns an interrupt handler to an unused
+ * MSI interrupt and returns the descriptor used to trigger
+ * it. The descriptor can then be sent to a peer to trigger
+ * the interrupt.
+ *
+ * The interrupt resource is managed with devres so it will
+ * be automatically freed when the NTB device is torn down.
+ *
+ * If an IRQ allocated with this function needs to be freed
+ * separately, ntbm_free_irq() must be used.
+ *
+ * Return: IRQ number assigned on success, otherwise a negative error number.
+ */
+int ntbm_msi_request_threaded_irq(struct ntb_dev *ntb, irq_handler_t handler,
+ irq_handler_t thread_fn,
+ const char *name, void *dev_id,
+ struct ntb_msi_desc *msi_desc)
+{
+ struct msi_desc *entry;
+ struct irq_desc *desc;
+ int ret;
+
+ if (!ntb->msi)
+ return -EINVAL;
+
+ for_each_pci_msi_entry(entry, ntb->pdev) {
+ desc = irq_to_desc(entry->irq);
+ if (desc->action)
+ continue;
+
+ ret = devm_request_threaded_irq(&ntb->dev, entry->irq, handler,
+ thread_fn, 0, name, dev_id);
+ if (ret)
+ continue;
+
+ if (ntb_msi_set_desc(ntb, entry, msi_desc)) {
+ devm_free_irq(&ntb->dev, entry->irq, dev_id);
+ continue;
+ }
+
+ ret = ntbm_msi_setup_callback(ntb, entry, msi_desc);
+ if (ret) {
+ devm_free_irq(&ntb->dev, entry->irq, dev_id);
+ return ret;
+ }
+
+
+ return entry->irq;
+ }
+
+ return -ENODEV;
+}
+EXPORT_SYMBOL(ntbm_msi_request_threaded_irq);
+
+static int ntbm_msi_callback_match(struct device *dev, void *res, void *data)
+{
+ struct ntb_dev *ntb = dev_ntb(dev);
+ struct ntb_msi_devres *dr = res;
+
+ return dr->ntb == ntb && dr->entry == data;
+}
+
+/**
+ * ntbm_msi_free_irq() - free an interrupt
+ * @ntb: NTB device context
+ * @irq: Interrupt line to free
+ * @dev_id: Device identity to free
+ *
+ * This function should be used to manually free IRQs allocated with
+ * ntbm_request_[threaded_]irq().
+ */
+void ntbm_msi_free_irq(struct ntb_dev *ntb, unsigned int irq, void *dev_id)
+{
+ struct msi_desc *entry = irq_get_msi_desc(irq);
+
+ entry->write_msi_msg = NULL;
+ entry->write_msi_msg_data = NULL;
+
+ WARN_ON(devres_destroy(&ntb->dev, ntbm_msi_callback_release,
+ ntbm_msi_callback_match, entry));
+
+ devm_free_irq(&ntb->dev, irq, dev_id);
+}
+EXPORT_SYMBOL(ntbm_msi_free_irq);
+
+/**
+ * ntb_msi_peer_trigger() - Trigger an interrupt handler on a peer
+ * @ntb: NTB device context
+ * @peer: Peer index
+ * @desc: MSI descriptor data which triggers the interrupt
+ *
+ * This function triggers an interrupt on a peer. It requires
+ * the descriptor structure to have been passed from that peer
+ * by some other means.
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_peer_trigger(struct ntb_dev *ntb, int peer,
+ struct ntb_msi_desc *desc)
+{
+ int idx;
+
+ if (!ntb->msi)
+ return -EINVAL;
+
+ idx = desc->addr_offset / sizeof(*ntb->msi->peer_mws[peer]);
+
+ iowrite32(desc->data, &ntb->msi->peer_mws[peer][idx]);
+
+ return 0;
+}
+EXPORT_SYMBOL(ntb_msi_peer_trigger);
+
+/**
+ * ntb_msi_peer_addr() - Get the DMA address to trigger a peer's MSI interrupt
+ * @ntb: NTB device context
+ * @peer: Peer index
+ * @desc: MSI descriptor data which triggers the interrupt
+ * @msi_addr: Physical address to trigger the interrupt
+ *
+ * This function allows using DMA engines to trigger an interrupt
+ * (for example, trigger an interrupt to process the data after
+ * sending it). To trigger the interrupt, write @desc.data to the address
+ * returned in @msi_addr
+ *
+ * Return: Zero on success, otherwise a negative error number.
+ */
+int ntb_msi_peer_addr(struct ntb_dev *ntb, int peer,
+ struct ntb_msi_desc *desc,
+ phys_addr_t *msi_addr)
+{
+ int peer_widx = ntb_peer_mw_count(ntb) - 1 - peer;
+ phys_addr_t mw_phys_addr;
+ int ret;
+
+ ret = ntb_peer_mw_get_addr(ntb, peer_widx, &mw_phys_addr, NULL);
+ if (ret)
+ return ret;
+
+ if (msi_addr)
+ *msi_addr = mw_phys_addr + desc->addr_offset;
+
+ return 0;
+}
+EXPORT_SYMBOL(ntb_msi_peer_addr);
diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c
index d4f39ba1d976..40c90ca10729 100644
--- a/drivers/ntb/ntb_transport.c
+++ b/drivers/ntb/ntb_transport.c
@@ -93,6 +93,12 @@ static bool use_dma;
module_param(use_dma, bool, 0644);
MODULE_PARM_DESC(use_dma, "Use DMA engine to perform large data copy");
+static bool use_msi;
+#ifdef CONFIG_NTB_MSI
+module_param(use_msi, bool, 0644);
+MODULE_PARM_DESC(use_msi, "Use MSI interrupts instead of doorbells");
+#endif
+
static struct dentry *nt_debugfs_dir;
/* Only two-ports NTB devices are supported */
@@ -188,6 +194,11 @@ struct ntb_transport_qp {
u64 tx_err_no_buf;
u64 tx_memcpy;
u64 tx_async;
+
+ bool use_msi;
+ int msi_irq;
+ struct ntb_msi_desc msi_desc;
+ struct ntb_msi_desc peer_msi_desc;
};
struct ntb_transport_mw {
@@ -221,6 +232,10 @@ struct ntb_transport_ctx {
u64 qp_bitmap;
u64 qp_bitmap_free;
+ bool use_msi;
+ unsigned int msi_spad_offset;
+ u64 msi_db_mask;
+
bool link_is_up;
struct delayed_work link_work;
struct work_struct link_cleanup;
@@ -667,6 +682,114 @@ static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
return 0;
}
+static irqreturn_t ntb_transport_isr(int irq, void *dev)
+{
+ struct ntb_transport_qp *qp = dev;
+
+ tasklet_schedule(&qp->rxc_db_work);
+
+ return IRQ_HANDLED;
+}
+
+static void ntb_transport_setup_qp_peer_msi(struct ntb_transport_ctx *nt,
+ unsigned int qp_num)
+{
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+ int spad = qp_num * 2 + nt->msi_spad_offset;
+
+ if (!nt->use_msi)
+ return;
+
+ if (spad >= ntb_spad_count(nt->ndev))
+ return;
+
+ qp->peer_msi_desc.addr_offset =
+ ntb_peer_spad_read(qp->ndev, PIDX, spad);
+ qp->peer_msi_desc.data =
+ ntb_peer_spad_read(qp->ndev, PIDX, spad + 1);
+
+ dev_dbg(&qp->ndev->pdev->dev, "QP%d Peer MSI addr=%x data=%x\n",
+ qp_num, qp->peer_msi_desc.addr_offset, qp->peer_msi_desc.data);
+
+ if (qp->peer_msi_desc.addr_offset) {
+ qp->use_msi = true;
+ dev_info(&qp->ndev->pdev->dev,
+ "Using MSI interrupts for QP%d\n", qp_num);
+ }
+}
+
+static void ntb_transport_setup_qp_msi(struct ntb_transport_ctx *nt,
+ unsigned int qp_num)
+{
+ struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
+ int spad = qp_num * 2 + nt->msi_spad_offset;
+ int rc;
+
+ if (!nt->use_msi)
+ return;
+
+ if (spad >= ntb_spad_count(nt->ndev)) {
+ dev_warn_once(&qp->ndev->pdev->dev,
+ "Not enough SPADS to use MSI interrupts\n");
+ return;
+ }
+
+ ntb_spad_write(qp->ndev, spad, 0);
+ ntb_spad_write(qp->ndev, spad + 1, 0);
+
+ if (!qp->msi_irq) {
+ qp->msi_irq = ntbm_msi_request_irq(qp->ndev, ntb_transport_isr,
+ KBUILD_MODNAME, qp,
+ &qp->msi_desc);
+ if (qp->msi_irq < 0) {
+ dev_warn(&qp->ndev->pdev->dev,
+ "Unable to allocate MSI interrupt for qp%d\n",
+ qp_num);
+ return;
+ }
+ }
+
+ rc = ntb_spad_write(qp->ndev, spad, qp->msi_desc.addr_offset);
+ if (rc)
+ goto err_free_interrupt;
+
+ rc = ntb_spad_write(qp->ndev, spad + 1, qp->msi_desc.data);
+ if (rc)
+ goto err_free_interrupt;
+
+ dev_dbg(&qp->ndev->pdev->dev, "QP%d MSI %d addr=%x data=%x\n",
+ qp_num, qp->msi_irq, qp->msi_desc.addr_offset,
+ qp->msi_desc.data);
+
+ return;
+
+err_free_interrupt:
+ devm_free_irq(&nt->ndev->dev, qp->msi_irq, qp);
+}
+
+static void ntb_transport_msi_peer_desc_changed(struct ntb_transport_ctx *nt)
+{
+ int i;
+
+ dev_dbg(&nt->ndev->pdev->dev, "Peer MSI descriptors changed");
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_setup_qp_peer_msi(nt, i);
+}
+
+static void ntb_transport_msi_desc_changed(void *data)
+{
+ struct ntb_transport_ctx *nt = data;
+ int i;
+
+ dev_dbg(&nt->ndev->pdev->dev, "MSI descriptors changed");
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_setup_qp_msi(nt, i);
+
+ ntb_peer_db_set(nt->ndev, nt->msi_db_mask);
+}
+
static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
{
struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
@@ -905,6 +1028,20 @@ static void ntb_transport_link_work(struct work_struct *work)
int rc = 0, i, spad;
/* send the local info, in the opposite order of the way we read it */
+
+ if (nt->use_msi) {
+ rc = ntb_msi_setup_mws(ndev);
+ if (rc) {
+ dev_warn(&pdev->dev,
+ "Failed to register MSI memory window: %d\n",
+ rc);
+ nt->use_msi = false;
+ }
+ }
+
+ for (i = 0; i < nt->qp_count; i++)
+ ntb_transport_setup_qp_msi(nt, i);
+
for (i = 0; i < nt->mw_count; i++) {
size = nt->mw_vec[i].phys_size;
@@ -962,6 +1099,7 @@ static void ntb_transport_link_work(struct work_struct *work)
struct ntb_transport_qp *qp = &nt->qp_vec[i];
ntb_transport_setup_qp_mw(nt, i);
+ ntb_transport_setup_qp_peer_msi(nt, i);
if (qp->client_ready)
schedule_delayed_work(&qp->link_work, 0);
@@ -1135,6 +1273,19 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
return -ENOMEM;
nt->ndev = ndev;
+
+ /*
+ * If we are using MSI, and have at least one extra memory window,
+ * we will reserve the last MW for the MSI window.
+ */
+ if (use_msi && mw_count > 1) {
+ rc = ntb_msi_init(ndev, ntb_transport_msi_desc_changed);
+ if (!rc) {
+ mw_count -= 1;
+ nt->use_msi = true;
+ }
+ }
+
spad_count = ntb_spad_count(ndev);
/* Limit the MW's based on the availability of scratchpads */
@@ -1148,6 +1299,8 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
max_mw_count_for_spads = (spad_count - MW0_SZ_HIGH) / 2;
nt->mw_count = min(mw_count, max_mw_count_for_spads);
+ nt->msi_spad_offset = nt->mw_count * 2 + MW0_SZ_HIGH;
+
nt->mw_vec = kcalloc_node(mw_count, sizeof(*nt->mw_vec),
GFP_KERNEL, node);
if (!nt->mw_vec) {
@@ -1178,6 +1331,12 @@ static int ntb_transport_probe(struct ntb_client *self, struct ntb_dev *ndev)
qp_bitmap = ntb_db_valid_mask(ndev);
qp_count = ilog2(qp_bitmap);
+ if (nt->use_msi) {
+ qp_count -= 1;
+ nt->msi_db_mask = 1 << qp_count;
+ ntb_db_clear_mask(ndev, nt->msi_db_mask);
+ }
+
if (max_num_clients && max_num_clients < qp_count)
qp_count = max_num_clients;
else if (nt->mw_count < qp_count)
@@ -1601,7 +1760,10 @@ static void ntb_tx_copy_callback(void *data,
iowrite32(entry->flags | DESC_DONE_FLAG, &hdr->flags);
- ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
+ if (qp->use_msi)
+ ntb_msi_peer_trigger(qp->ndev, PIDX, &qp->peer_msi_desc);
+ else
+ ntb_peer_db_set(qp->ndev, BIT_ULL(qp->qp_num));
/* The entry length can only be zero if the packet is intended to be a
* "link down" or similar. Since no payload is being sent in these
@@ -1869,6 +2031,7 @@ ntb_transport_create_queue(void *data, struct device *client_dev,
qp->rx_dma_chan = NULL;
}
+ qp->tx_mw_dma_addr = 0;
if (qp->tx_dma_chan) {
qp->tx_mw_dma_addr =
dma_map_resource(qp->tx_dma_chan->device->dev,
@@ -2268,6 +2431,11 @@ static void ntb_transport_doorbell_callback(void *data, int vector)
u64 db_bits;
unsigned int qp_num;
+ if (ntb_db_read(nt->ndev) & nt->msi_db_mask) {
+ ntb_transport_msi_peer_desc_changed(nt);
+ ntb_db_clear(nt->ndev, nt->msi_db_mask);
+ }
+
db_bits = (nt->qp_bitmap & ~nt->qp_bitmap_free &
ntb_db_vector_mask(nt->ndev, vector));
diff --git a/drivers/ntb/test/Kconfig b/drivers/ntb/test/Kconfig
index a8db00a7e087..516b991f33b9 100644
--- a/drivers/ntb/test/Kconfig
+++ b/drivers/ntb/test/Kconfig
@@ -26,3 +26,12 @@ config NTB_PERF
to and from the window without additional software interaction.
If unsure, say N.
+
+config NTB_MSI_TEST
+ tristate "NTB MSI Test Client"
+ depends on NTB_MSI
+ help
+ This tool demonstrates the use of the NTB MSI library to
+ send MSI interrupts between peers.
+
+ If unsure, say N.
diff --git a/drivers/ntb/test/Makefile b/drivers/ntb/test/Makefile
index cbfd67622ef7..19ed91d8a3b1 100644
--- a/drivers/ntb/test/Makefile
+++ b/drivers/ntb/test/Makefile
@@ -2,3 +2,4 @@
obj-$(CONFIG_NTB_PINGPONG) += ntb_pingpong.o
obj-$(CONFIG_NTB_TOOL) += ntb_tool.o
obj-$(CONFIG_NTB_PERF) += ntb_perf.o
+obj-$(CONFIG_NTB_MSI_TEST) += ntb_msi_test.o
diff --git a/drivers/ntb/test/ntb_msi_test.c b/drivers/ntb/test/ntb_msi_test.c
new file mode 100644
index 000000000000..99d826ed9c34
--- /dev/null
+++ b/drivers/ntb/test/ntb_msi_test.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/ntb.h>
+#include <linux/pci.h>
+#include <linux/radix-tree.h>
+#include <linux/workqueue.h>
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION("0.1");
+MODULE_AUTHOR("Logan Gunthorpe <logang@deltatee.com>");
+MODULE_DESCRIPTION("Test for sending MSI interrupts over an NTB memory window");
+
+static int num_irqs = 4;
+module_param(num_irqs, int, 0644);
+MODULE_PARM_DESC(num_irqs, "number of irqs to use");
+
+struct ntb_msit_ctx {
+ struct ntb_dev *ntb;
+ struct dentry *dbgfs_dir;
+ struct work_struct setup_work;
+
+ struct ntb_msit_isr_ctx {
+ int irq_idx;
+ int irq_num;
+ int occurrences;
+ struct ntb_msit_ctx *nm;
+ struct ntb_msi_desc desc;
+ } *isr_ctx;
+
+ struct ntb_msit_peer {
+ struct ntb_msit_ctx *nm;
+ int pidx;
+ int num_irqs;
+ struct completion init_comp;
+ struct ntb_msi_desc *msi_desc;
+ } peers[];
+};
+
+static struct dentry *ntb_msit_dbgfs_topdir;
+
+static irqreturn_t ntb_msit_isr(int irq, void *dev)
+{
+ struct ntb_msit_isr_ctx *isr_ctx = dev;
+ struct ntb_msit_ctx *nm = isr_ctx->nm;
+
+ dev_dbg(&nm->ntb->dev, "Interrupt Occurred: %d",
+ isr_ctx->irq_idx);
+
+ isr_ctx->occurrences++;
+
+ return IRQ_HANDLED;
+}
+
+static void ntb_msit_setup_work(struct work_struct *work)
+{
+ struct ntb_msit_ctx *nm = container_of(work, struct ntb_msit_ctx,
+ setup_work);
+ int irq_count = 0;
+ int irq;
+ int ret;
+ uintptr_t i;
+
+ ret = ntb_msi_setup_mws(nm->ntb);
+ if (ret) {
+ dev_err(&nm->ntb->dev, "Unable to setup MSI windows: %d\n",
+ ret);
+ return;
+ }
+
+ for (i = 0; i < num_irqs; i++) {
+ nm->isr_ctx[i].irq_idx = i;
+ nm->isr_ctx[i].nm = nm;
+
+ if (!nm->isr_ctx[i].irq_num) {
+ irq = ntbm_msi_request_irq(nm->ntb, ntb_msit_isr,
+ KBUILD_MODNAME,
+ &nm->isr_ctx[i],
+ &nm->isr_ctx[i].desc);
+ if (irq < 0)
+ break;
+
+ nm->isr_ctx[i].irq_num = irq;
+ }
+
+ ret = ntb_spad_write(nm->ntb, 2 * i + 1,
+ nm->isr_ctx[i].desc.addr_offset);
+ if (ret)
+ break;
+
+ ret = ntb_spad_write(nm->ntb, 2 * i + 2,
+ nm->isr_ctx[i].desc.data);
+ if (ret)
+ break;
+
+ irq_count++;
+ }
+
+ ntb_spad_write(nm->ntb, 0, irq_count);
+ ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb)));
+}
+
+static void ntb_msit_desc_changed(void *ctx)
+{
+ struct ntb_msit_ctx *nm = ctx;
+ int i;
+
+ dev_dbg(&nm->ntb->dev, "MSI Descriptors Changed\n");
+
+ for (i = 0; i < num_irqs; i++) {
+ ntb_spad_write(nm->ntb, 2 * i + 1,
+ nm->isr_ctx[i].desc.addr_offset);
+ ntb_spad_write(nm->ntb, 2 * i + 2,
+ nm->isr_ctx[i].desc.data);
+ }
+
+ ntb_peer_db_set(nm->ntb, BIT(ntb_port_number(nm->ntb)));
+}
+
+static void ntb_msit_link_event(void *ctx)
+{
+ struct ntb_msit_ctx *nm = ctx;
+
+ if (!ntb_link_is_up(nm->ntb, NULL, NULL))
+ return;
+
+ schedule_work(&nm->setup_work);
+}
+
+static void ntb_msit_copy_peer_desc(struct ntb_msit_ctx *nm, int peer)
+{
+ int i;
+ struct ntb_msi_desc *desc = nm->peers[peer].msi_desc;
+ int irq_count = nm->peers[peer].num_irqs;
+
+ for (i = 0; i < irq_count; i++) {
+ desc[i].addr_offset = ntb_peer_spad_read(nm->ntb, peer,
+ 2 * i + 1);
+ desc[i].data = ntb_peer_spad_read(nm->ntb, peer, 2 * i + 2);
+ }
+
+ dev_info(&nm->ntb->dev, "Found %d interrupts on peer %d\n",
+ irq_count, peer);
+
+ complete_all(&nm->peers[peer].init_comp);
+}
+
+static void ntb_msit_db_event(void *ctx, int vec)
+{
+ struct ntb_msit_ctx *nm = ctx;
+ struct ntb_msi_desc *desc;
+ u64 peer_mask = ntb_db_read(nm->ntb);
+ u32 irq_count;
+ int peer;
+
+ ntb_db_clear(nm->ntb, peer_mask);
+
+ for (peer = 0; peer < sizeof(peer_mask) * 8; peer++) {
+ if (!(peer_mask & BIT(peer)))
+ continue;
+
+ irq_count = ntb_peer_spad_read(nm->ntb, peer, 0);
+ if (irq_count == -1)
+ continue;
+
+ desc = kcalloc(irq_count, sizeof(*desc), GFP_ATOMIC);
+ if (!desc)
+ continue;
+
+ kfree(nm->peers[peer].msi_desc);
+ nm->peers[peer].msi_desc = desc;
+ nm->peers[peer].num_irqs = irq_count;
+
+ ntb_msit_copy_peer_desc(nm, peer);
+ }
+}
+
+static const struct ntb_ctx_ops ntb_msit_ops = {
+ .link_event = ntb_msit_link_event,
+ .db_event = ntb_msit_db_event,
+};
+
+static int ntb_msit_dbgfs_trigger(void *data, u64 idx)
+{
+ struct ntb_msit_peer *peer = data;
+
+ if (idx >= peer->num_irqs)
+ return -EINVAL;
+
+ dev_dbg(&peer->nm->ntb->dev, "trigger irq %llu on peer %u\n",
+ idx, peer->pidx);
+
+ return ntb_msi_peer_trigger(peer->nm->ntb, peer->pidx,
+ &peer->msi_desc[idx]);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_trigger_fops, NULL,
+ ntb_msit_dbgfs_trigger, "%llu\n");
+
+static int ntb_msit_dbgfs_port_get(void *data, u64 *port)
+{
+ struct ntb_msit_peer *peer = data;
+
+ *port = ntb_peer_port_number(peer->nm->ntb, peer->pidx);
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_port_fops, ntb_msit_dbgfs_port_get,
+ NULL, "%llu\n");
+
+static int ntb_msit_dbgfs_count_get(void *data, u64 *count)
+{
+ struct ntb_msit_peer *peer = data;
+
+ *count = peer->num_irqs;
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_count_fops, ntb_msit_dbgfs_count_get,
+ NULL, "%llu\n");
+
+static int ntb_msit_dbgfs_ready_get(void *data, u64 *ready)
+{
+ struct ntb_msit_peer *peer = data;
+
+ *ready = try_wait_for_completion(&peer->init_comp);
+
+ return 0;
+}
+
+static int ntb_msit_dbgfs_ready_set(void *data, u64 ready)
+{
+ struct ntb_msit_peer *peer = data;
+
+ return wait_for_completion_interruptible(&peer->init_comp);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_ready_fops, ntb_msit_dbgfs_ready_get,
+ ntb_msit_dbgfs_ready_set, "%llu\n");
+
+static int ntb_msit_dbgfs_occurrences_get(void *data, u64 *occurrences)
+{
+ struct ntb_msit_isr_ctx *isr_ctx = data;
+
+ *occurrences = isr_ctx->occurrences;
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_occurrences_fops,
+ ntb_msit_dbgfs_occurrences_get,
+ NULL, "%llu\n");
+
+static int ntb_msit_dbgfs_local_port_get(void *data, u64 *port)
+{
+ struct ntb_msit_ctx *nm = data;
+
+ *port = ntb_port_number(nm->ntb);
+
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(ntb_msit_local_port_fops,
+ ntb_msit_dbgfs_local_port_get,
+ NULL, "%llu\n");
+
+static void ntb_msit_create_dbgfs(struct ntb_msit_ctx *nm)
+{
+ struct pci_dev *pdev = nm->ntb->pdev;
+ char buf[32];
+ int i;
+ struct dentry *peer_dir;
+
+ nm->dbgfs_dir = debugfs_create_dir(pci_name(pdev),
+ ntb_msit_dbgfs_topdir);
+ debugfs_create_file("port", 0400, nm->dbgfs_dir, nm,
+ &ntb_msit_local_port_fops);
+
+ for (i = 0; i < ntb_peer_port_count(nm->ntb); i++) {
+ nm->peers[i].pidx = i;
+ nm->peers[i].nm = nm;
+ init_completion(&nm->peers[i].init_comp);
+
+ snprintf(buf, sizeof(buf), "peer%d", i);
+ peer_dir = debugfs_create_dir(buf, nm->dbgfs_dir);
+
+ debugfs_create_file_unsafe("trigger", 0200, peer_dir,
+ &nm->peers[i],
+ &ntb_msit_trigger_fops);
+
+ debugfs_create_file_unsafe("port", 0400, peer_dir,
+ &nm->peers[i], &ntb_msit_port_fops);
+
+ debugfs_create_file_unsafe("count", 0400, peer_dir,
+ &nm->peers[i],
+ &ntb_msit_count_fops);
+
+ debugfs_create_file_unsafe("ready", 0600, peer_dir,
+ &nm->peers[i],
+ &ntb_msit_ready_fops);
+ }
+
+ for (i = 0; i < num_irqs; i++) {
+ snprintf(buf, sizeof(buf), "irq%d_occurrences", i);
+ debugfs_create_file_unsafe(buf, 0400, nm->dbgfs_dir,
+ &nm->isr_ctx[i],
+ &ntb_msit_occurrences_fops);
+ }
+}
+
+static void ntb_msit_remove_dbgfs(struct ntb_msit_ctx *nm)
+{
+ debugfs_remove_recursive(nm->dbgfs_dir);
+}
+
+static int ntb_msit_probe(struct ntb_client *client, struct ntb_dev *ntb)
+{
+ struct ntb_msit_ctx *nm;
+ size_t struct_size;
+ int peers;
+ int ret;
+
+ peers = ntb_peer_port_count(ntb);
+ if (peers <= 0)
+ return -EINVAL;
+
+ if (ntb_spad_is_unsafe(ntb) || ntb_spad_count(ntb) < 2 * num_irqs + 1) {
+ dev_err(&ntb->dev, "NTB MSI test requires at least %d spads for %d irqs\n",
+ 2 * num_irqs + 1, num_irqs);
+ return -EFAULT;
+ }
+
+ ret = ntb_spad_write(ntb, 0, -1);
+ if (ret) {
+ dev_err(&ntb->dev, "Unable to write spads: %d\n", ret);
+ return ret;
+ }
+
+ ret = ntb_db_clear_mask(ntb, GENMASK(peers - 1, 0));
+ if (ret) {
+ dev_err(&ntb->dev, "Unable to clear doorbell mask: %d\n", ret);
+ return ret;
+ }
+
+ ret = ntb_msi_init(ntb, ntb_msit_desc_changed);
+ if (ret) {
+ dev_err(&ntb->dev, "Unable to initialize MSI library: %d\n",
+ ret);
+ return ret;
+ }
+
+ struct_size = sizeof(*nm) + sizeof(*nm->peers) * peers;
+
+ nm = devm_kzalloc(&ntb->dev, struct_size, GFP_KERNEL);
+ if (!nm)
+ return -ENOMEM;
+
+ nm->isr_ctx = devm_kcalloc(&ntb->dev, num_irqs, sizeof(*nm->isr_ctx),
+ GFP_KERNEL);
+ if (!nm->isr_ctx)
+ return -ENOMEM;
+
+ INIT_WORK(&nm->setup_work, ntb_msit_setup_work);
+ nm->ntb = ntb;
+
+ ntb_msit_create_dbgfs(nm);
+
+ ret = ntb_set_ctx(ntb, nm, &ntb_msit_ops);
+ if (ret)
+ goto remove_dbgfs;
+
+ if (!nm->isr_ctx)
+ goto remove_dbgfs;
+
+ ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
+
+ return 0;
+
+remove_dbgfs:
+ ntb_msit_remove_dbgfs(nm);
+ devm_kfree(&ntb->dev, nm->isr_ctx);
+ devm_kfree(&ntb->dev, nm);
+ return ret;
+}
+
+static void ntb_msit_remove(struct ntb_client *client, struct ntb_dev *ntb)
+{
+ struct ntb_msit_ctx *nm = ntb->ctx;
+ int i;
+
+ ntb_link_disable(ntb);
+ ntb_db_set_mask(ntb, ntb_db_valid_mask(ntb));
+ ntb_msi_clear_mws(ntb);
+
+ for (i = 0; i < ntb_peer_port_count(ntb); i++)
+ kfree(nm->peers[i].msi_desc);
+
+ ntb_clear_ctx(ntb);
+ ntb_msit_remove_dbgfs(nm);
+}
+
+static struct ntb_client ntb_msit_client = {
+ .ops = {
+ .probe = ntb_msit_probe,
+ .remove = ntb_msit_remove
+ }
+};
+
+static int __init ntb_msit_init(void)
+{
+ int ret;
+
+ if (debugfs_initialized())
+ ntb_msit_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME,
+ NULL);
+
+ ret = ntb_register_client(&ntb_msit_client);
+ if (ret)
+ debugfs_remove_recursive(ntb_msit_dbgfs_topdir);
+
+ return ret;
+}
+module_init(ntb_msit_init);
+
+static void __exit ntb_msit_exit(void)
+{
+ ntb_unregister_client(&ntb_msit_client);
+ debugfs_remove_recursive(ntb_msit_dbgfs_topdir);
+}
+module_exit(ntb_msit_exit);
diff --git a/drivers/ntb/test/ntb_perf.c b/drivers/ntb/test/ntb_perf.c
index 11a6cd374004..d028331558ea 100644
--- a/drivers/ntb/test/ntb_perf.c
+++ b/drivers/ntb/test/ntb_perf.c
@@ -100,7 +100,7 @@ MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool");
#define DMA_TRIES 100
#define DMA_MDELAY 10
-#define MSG_TRIES 500
+#define MSG_TRIES 1000
#define MSG_UDELAY_LOW 1000
#define MSG_UDELAY_HIGH 2000
@@ -734,8 +734,6 @@ static void perf_disable_service(struct perf_ctx *perf)
{
int pidx;
- ntb_link_disable(perf->ntb);
-
if (perf->cmd_send == perf_msg_cmd_send) {
u64 inbits;
@@ -752,6 +750,16 @@ static void perf_disable_service(struct perf_ctx *perf)
for (pidx = 0; pidx < perf->pcnt; pidx++)
flush_work(&perf->peers[pidx].service);
+
+ for (pidx = 0; pidx < perf->pcnt; pidx++) {
+ struct perf_peer *peer = &perf->peers[pidx];
+
+ ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx), 0);
+ }
+
+ ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
+
+ ntb_link_disable(perf->ntb);
}
/*==============================================================================
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 59a6d232f77a..0884bedcfc7a 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -192,6 +192,9 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
{
+ if (desc->msi_attrib.is_virtual)
+ return NULL;
+
return desc->mask_base +
desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
}
@@ -206,14 +209,19 @@ static void __iomem *pci_msix_desc_addr(struct msi_desc *desc)
u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
{
u32 mask_bits = desc->masked;
+ void __iomem *desc_addr;
if (pci_msi_ignore_mask)
return 0;
+ desc_addr = pci_msix_desc_addr(desc);
+ if (!desc_addr)
+ return 0;
mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
if (flag)
mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
- writel(mask_bits, pci_msix_desc_addr(desc) + PCI_MSIX_ENTRY_VECTOR_CTRL);
+
+ writel(mask_bits, desc_addr + PCI_MSIX_ENTRY_VECTOR_CTRL);
return mask_bits;
}
@@ -273,6 +281,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
if (entry->msi_attrib.is_msix) {
void __iomem *base = pci_msix_desc_addr(entry);
+ if (!base) {
+ WARN_ON(1);
+ return;
+ }
+
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR);
msg->data = readl(base + PCI_MSIX_ENTRY_DATA);
@@ -303,6 +316,9 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
} else if (entry->msi_attrib.is_msix) {
void __iomem *base = pci_msix_desc_addr(entry);
+ if (!base)
+ goto skip;
+
writel(msg->address_lo, base + PCI_MSIX_ENTRY_LOWER_ADDR);
writel(msg->address_hi, base + PCI_MSIX_ENTRY_UPPER_ADDR);
writel(msg->data, base + PCI_MSIX_ENTRY_DATA);
@@ -327,7 +343,13 @@ void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
msg->data);
}
}
+
+skip:
entry->msg = *msg;
+
+ if (entry->write_msi_msg)
+ entry->write_msi_msg(entry, entry->write_msi_msg_data);
+
}
void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
@@ -550,6 +572,7 @@ msi_setup_entry(struct pci_dev *dev, int nvec, struct irq_affinity *affd)
entry->msi_attrib.is_msix = 0;
entry->msi_attrib.is_64 = !!(control & PCI_MSI_FLAGS_64BIT);
+ entry->msi_attrib.is_virtual = 0;
entry->msi_attrib.entry_nr = 0;
entry->msi_attrib.maskbit = !!(control & PCI_MSI_FLAGS_MASKBIT);
entry->msi_attrib.default_irq = dev->irq; /* Save IOAPIC IRQ */
@@ -674,6 +697,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
struct irq_affinity_desc *curmsk, *masks = NULL;
struct msi_desc *entry;
int ret, i;
+ int vec_count = pci_msix_vec_count(dev);
if (affd)
masks = irq_create_affinity_masks(nvec, affd);
@@ -696,6 +720,10 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
entry->msi_attrib.entry_nr = entries[i].entry;
else
entry->msi_attrib.entry_nr = i;
+
+ entry->msi_attrib.is_virtual =
+ entry->msi_attrib.entry_nr >= vec_count;
+
entry->msi_attrib.default_irq = dev->irq;
entry->mask_base = base;
@@ -714,12 +742,19 @@ static void msix_program_entries(struct pci_dev *dev,
{
struct msi_desc *entry;
int i = 0;
+ void __iomem *desc_addr;
for_each_pci_msi_entry(entry, dev) {
if (entries)
entries[i++].vector = entry->irq;
- entry->masked = readl(pci_msix_desc_addr(entry) +
- PCI_MSIX_ENTRY_VECTOR_CTRL);
+
+ desc_addr = pci_msix_desc_addr(entry);
+ if (desc_addr)
+ entry->masked = readl(desc_addr +
+ PCI_MSIX_ENTRY_VECTOR_CTRL);
+ else
+ entry->masked = 0;
+
msix_mask_irq(entry, 1);
}
}
@@ -932,7 +967,7 @@ int pci_msix_vec_count(struct pci_dev *dev)
EXPORT_SYMBOL(pci_msix_vec_count);
static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
- int nvec, struct irq_affinity *affd)
+ int nvec, struct irq_affinity *affd, int flags)
{
int nr_entries;
int i, j;
@@ -943,7 +978,7 @@ static int __pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries,
nr_entries = pci_msix_vec_count(dev);
if (nr_entries < 0)
return nr_entries;
- if (nvec > nr_entries)
+ if (nvec > nr_entries && !(flags & PCI_IRQ_VIRTUAL))
return nr_entries;
if (entries) {
@@ -1079,7 +1114,8 @@ EXPORT_SYMBOL(pci_enable_msi);
static int __pci_enable_msix_range(struct pci_dev *dev,
struct msix_entry *entries, int minvec,
- int maxvec, struct irq_affinity *affd)
+ int maxvec, struct irq_affinity *affd,
+ int flags)
{
int rc, nvec = maxvec;
@@ -1096,7 +1132,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
return -ENOSPC;
}
- rc = __pci_enable_msix(dev, entries, nvec, affd);
+ rc = __pci_enable_msix(dev, entries, nvec, affd, flags);
if (rc == 0)
return nvec;
@@ -1127,7 +1163,7 @@ static int __pci_enable_msix_range(struct pci_dev *dev,
int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
int minvec, int maxvec)
{
- return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL);
+ return __pci_enable_msix_range(dev, entries, minvec, maxvec, NULL, 0);
}
EXPORT_SYMBOL(pci_enable_msix_range);
@@ -1167,7 +1203,7 @@ int pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs,
if (flags & PCI_IRQ_MSIX) {
msix_vecs = __pci_enable_msix_range(dev, NULL, min_vecs,
- max_vecs, affd);
+ max_vecs, affd, flags);
if (msix_vecs > 0)
return msix_vecs;
}
diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c
index bebbde4ebec0..8c94cd3fd1f2 100644
--- a/drivers/pci/switch/switchtec.c
+++ b/drivers/pci/switch/switchtec.c
@@ -30,6 +30,10 @@ module_param(use_dma_mrpc, bool, 0644);
MODULE_PARM_DESC(use_dma_mrpc,
"Enable the use of the DMA MRPC feature");
+static int nirqs = 32;
+module_param(nirqs, int, 0644);
+MODULE_PARM_DESC(nirqs, "number of interrupts to allocate (more may be useful for NTB applications)");
+
static dev_t switchtec_devt;
static DEFINE_IDA(switchtec_minor_ida);
@@ -1263,8 +1267,12 @@ static int switchtec_init_isr(struct switchtec_dev *stdev)
int dma_mrpc_irq;
int rc;
- nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, 4,
- PCI_IRQ_MSIX | PCI_IRQ_MSI);
+ if (nirqs < 4)
+ nirqs = 4;
+
+ nvecs = pci_alloc_irq_vectors(stdev->pdev, 1, nirqs,
+ PCI_IRQ_MSIX | PCI_IRQ_MSI |
+ PCI_IRQ_VIRTUAL);
if (nvecs < 0)
return nvecs;