aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/drivers/pci
diff options
context:
space:
mode:
authorAndrea Parri (Microsoft) <parri.andrea@gmail.com>2020-04-06 02:15:10 +0200
committerWei Liu <wei.liu@kernel.org>2020-04-23 13:17:12 +0000
commit240ad77cb50d9f0a961fcb0f21e67939cf7a9c04 (patch)
tree43892c546d7475d93726ae96b239f782ccb8495b /drivers/pci
parentDrivers: hv: vmbus: Use a spin lock for synchronizing channel scheduling vs. channel removal (diff)
downloadwireguard-linux-240ad77cb50d9f0a961fcb0f21e67939cf7a9c04.tar.xz
wireguard-linux-240ad77cb50d9f0a961fcb0f21e67939cf7a9c04.zip
PCI: hv: Prepare hv_compose_msi_msg() for the VMBus-channel-interrupt-to-vCPU reassignment functionality
The current implementation of hv_compose_msi_msg() is incompatible with the new functionality that allows changing the vCPU a VMBus channel will interrupt: if this function always calls hv_pci_onchannelcallback() in the polling loop, the interrupt going to a different CPU could cause hv_pci_onchannelcallback() to be running simultaneously in a tasklet, which will break. The current code also has a problem in that it is not synchronized with vmbus_reset_channel_cb(): hv_compose_msi_msg() could be accessing the ring buffer via the call of hv_pci_onchannelcallback() well after the time that vmbus_reset_channel_cb() has finished. Fix these issues as follows. Disable the channel tasklet before entering the polling loop in hv_compose_msi_msg() and re-enable it when done. This will prevent hv_pci_onchannelcallback() from running in a tasklet on a different CPU. Moreover, poll by always calling hv_pci_onchannelcallback(), but check the channel callback function for NULL and invoke the callback within a sched_lock critical section. This will prevent hv_compose_msi_msg() from accessing the ring buffer after vmbus_reset_channel_cb() has acquired the sched_lock spinlock. Suggested-by: Michael Kelley <mikelley@microsoft.com> Signed-off-by: Andrea Parri (Microsoft) <parri.andrea@gmail.com> Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> Cc: Andrew Murray <amurray@thegoodpenguin.co.uk> Cc: Bjorn Helgaas <bhelgaas@google.com> Cc: <linux-pci@vger.kernel.org> Link: https://lore.kernel.org/r/20200406001514.19876-8-parri.andrea@gmail.com Reviewed-by: Michael Kelley <mikelley@microsoft.com> Signed-off-by: Wei Liu <wei.liu@kernel.org>
Diffstat (limited to 'drivers/pci')
-rw-r--r--drivers/pci/controller/pci-hyperv.c44
1 files changed, 28 insertions, 16 deletions
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c
index e15022ff63e3..222ff5639ebe 100644
--- a/drivers/pci/controller/pci-hyperv.c
+++ b/drivers/pci/controller/pci-hyperv.c
@@ -1356,11 +1356,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
struct irq_cfg *cfg = irqd_cfg(data);
struct hv_pcibus_device *hbus;
+ struct vmbus_channel *channel;
struct hv_pci_dev *hpdev;
struct pci_bus *pbus;
struct pci_dev *pdev;
struct cpumask *dest;
- unsigned long flags;
struct compose_comp_ctxt comp;
struct tran_int_desc *int_desc;
struct {
@@ -1378,6 +1378,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
dest = irq_data_get_effective_affinity_mask(data);
pbus = pdev->bus;
hbus = container_of(pbus->sysdata, struct hv_pcibus_device, sysdata);
+ channel = hbus->hdev->channel;
hpdev = get_pcichild_wslot(hbus, devfn_to_wslot(pdev->devfn));
if (!hpdev)
goto return_null_message;
@@ -1436,42 +1437,51 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
}
/*
+ * Prevents hv_pci_onchannelcallback() from running concurrently
+ * in the tasklet.
+ */
+ tasklet_disable(&channel->callback_event);
+
+ /*
* Since this function is called with IRQ locks held, can't
* do normal wait for completion; instead poll.
*/
while (!try_wait_for_completion(&comp.comp_pkt.host_event)) {
+ unsigned long flags;
+
/* 0xFFFF means an invalid PCI VENDOR ID. */
if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
dev_err_once(&hbus->hdev->device,
"the device has gone\n");
- goto free_int_desc;
+ goto enable_tasklet;
}
/*
- * When the higher level interrupt code calls us with
- * interrupt disabled, we must poll the channel by calling
- * the channel callback directly when channel->target_cpu is
- * the current CPU. When the higher level interrupt code
- * calls us with interrupt enabled, let's add the
- * local_irq_save()/restore() to avoid race:
- * hv_pci_onchannelcallback() can also run in tasklet.
+ * Make sure that the ring buffer data structure doesn't get
+ * freed while we dereference the ring buffer pointer. Test
+ * for the channel's onchannel_callback being NULL within a
+ * sched_lock critical section. See also the inline comments
+ * in vmbus_reset_channel_cb().
*/
- local_irq_save(flags);
-
- if (hbus->hdev->channel->target_cpu == smp_processor_id())
- hv_pci_onchannelcallback(hbus);
-
- local_irq_restore(flags);
+ spin_lock_irqsave(&channel->sched_lock, flags);
+ if (unlikely(channel->onchannel_callback == NULL)) {
+ spin_unlock_irqrestore(&channel->sched_lock, flags);
+ goto enable_tasklet;
+ }
+ hv_pci_onchannelcallback(hbus);
+ spin_unlock_irqrestore(&channel->sched_lock, flags);
if (hpdev->state == hv_pcichild_ejecting) {
dev_err_once(&hbus->hdev->device,
"the device is being ejected\n");
- goto free_int_desc;
+ goto enable_tasklet;
}
udelay(100);
}
+ tasklet_enable(&channel->callback_event);
+
if (comp.comp_pkt.completion_status < 0) {
dev_err(&hbus->hdev->device,
"Request for interrupt failed: 0x%x",
@@ -1495,6 +1505,8 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
put_pcichild(hpdev);
return;
+enable_tasklet:
+ tasklet_enable(&channel->callback_event);
free_int_desc:
kfree(int_desc);
drop_reference: