aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/pci/msi.c2
-rw-r--r--drivers/vfio/pci/vfio_pci.c136
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c15
-rw-r--r--drivers/vfio/vfio_iommu_type1.c30
-rw-r--r--drivers/vfio/vfio_spapr_eeh.c2
-rw-r--r--include/linux/iommu.h1
-rw-r--r--include/uapi/linux/vfio.h3
7 files changed, 104 insertions, 85 deletions
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 2f7c92c4757a..9fab30af0e75 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -302,6 +302,7 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
__get_cached_msi_msg(entry, msg);
}
+EXPORT_SYMBOL_GPL(get_cached_msi_msg);
void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
{
@@ -346,6 +347,7 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
__write_msi_msg(entry, msg);
}
+EXPORT_SYMBOL_GPL(write_msi_msg);
static void free_msi_irqs(struct pci_dev *dev)
{
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index f7825332a325..9558da3f06a0 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -876,15 +876,11 @@ static void vfio_pci_remove(struct pci_dev *pdev)
{
struct vfio_pci_device *vdev;
- mutex_lock(&driver_lock);
-
vdev = vfio_del_group_dev(&pdev->dev);
if (vdev) {
iommu_group_put(pdev->dev.iommu_group);
kfree(vdev);
}
-
- mutex_unlock(&driver_lock);
}
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@@ -927,108 +923,90 @@ static struct pci_driver vfio_pci_driver = {
.err_handler = &vfio_err_handlers,
};
-/*
- * Test whether a reset is necessary and possible. We mark devices as
- * needs_reset when they are released, but don't have a function-local reset
- * available. If any of these exist in the affected devices, we want to do
- * a bus/slot reset. We also need all of the affected devices to be unused,
- * so we abort if any device has a non-zero refcnt. driver_lock prevents a
- * device from being opened during the scan or unbound from vfio-pci.
- */
-static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data)
-{
- bool *needs_reset = data;
- struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
- int ret = -EBUSY;
-
- if (pci_drv == &vfio_pci_driver) {
- struct vfio_device *device;
- struct vfio_pci_device *vdev;
-
- device = vfio_device_get_from_dev(&pdev->dev);
- if (!device)
- return ret;
-
- vdev = vfio_device_data(device);
- if (vdev) {
- if (vdev->needs_reset)
- *needs_reset = true;
-
- if (!vdev->refcnt)
- ret = 0;
- }
-
- vfio_device_put(device);
- }
-
- /*
- * TODO: vfio-core considers groups to be viable even if some devices
- * are attached to known drivers, like pci-stub or pcieport. We can't
- * freeze devices from being unbound to those drivers like we can
- * here though, so it would be racy to test for them. We also can't
- * use device_lock() to prevent changes as that would interfere with
- * PCI-core taking device_lock during bus reset. For now, we require
- * devices to be bound to vfio-pci to get a bus/slot reset on release.
- */
-
- return ret;
-}
+struct vfio_devices {
+ struct vfio_device **devices;
+ int cur_index;
+ int max_index;
+};
-/* Clear needs_reset on all affected devices after successful bus/slot reset */
-static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data)
+static int vfio_pci_get_devs(struct pci_dev *pdev, void *data)
{
+ struct vfio_devices *devs = data;
struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
- if (pci_drv == &vfio_pci_driver) {
- struct vfio_device *device;
- struct vfio_pci_device *vdev;
+ if (pci_drv != &vfio_pci_driver)
+ return -EBUSY;
- device = vfio_device_get_from_dev(&pdev->dev);
- if (!device)
- return 0;
+ if (devs->cur_index == devs->max_index)
+ return -ENOSPC;
- vdev = vfio_device_data(device);
- if (vdev)
- vdev->needs_reset = false;
-
- vfio_device_put(device);
- }
+ devs->devices[devs->cur_index] = vfio_device_get_from_dev(&pdev->dev);
+ if (!devs->devices[devs->cur_index])
+ return -EINVAL;
+ devs->cur_index++;
return 0;
}
/*
* Attempt to do a bus/slot reset if there are devices affected by a reset for
* this device that are needs_reset and all of the affected devices are unused
- * (!refcnt). Callers of this function are required to hold driver_lock such
- * that devices can not be unbound from vfio-pci or opened by a user while we
- * test for and perform a bus/slot reset.
+ * (!refcnt). Callers are required to hold driver_lock when calling this to
+ * prevent device opens and concurrent bus reset attempts. We prevent device
+ * unbinds by acquiring and holding a reference to the vfio_device.
+ *
+ * NB: vfio-core considers a group to be viable even if some devices are
+ * bound to drivers like pci-stub or pcieport. Here we require all devices
+ * to be bound to vfio_pci since that's the only way we can be sure they
+ * stay put.
*/
static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
{
+ struct vfio_devices devs = { .cur_index = 0 };
+ int i = 0, ret = -EINVAL;
bool needs_reset = false, slot = false;
- int ret;
+ struct vfio_pci_device *tmp;
if (!pci_probe_reset_slot(vdev->pdev->slot))
slot = true;
else if (pci_probe_reset_bus(vdev->pdev->bus))
return;
- if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
- vfio_pci_test_bus_reset,
- &needs_reset, slot) || !needs_reset)
+ if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
+ &i, slot) || !i)
return;
- if (slot)
- ret = pci_try_reset_slot(vdev->pdev->slot);
- else
- ret = pci_try_reset_bus(vdev->pdev->bus);
-
- if (ret)
+ devs.max_index = i;
+ devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
+ if (!devs.devices)
return;
- vfio_pci_for_each_slot_or_bus(vdev->pdev,
- vfio_pci_clear_needs_reset, NULL, slot);
+ if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
+ vfio_pci_get_devs, &devs, slot))
+ goto put_devs;
+
+ for (i = 0; i < devs.cur_index; i++) {
+ tmp = vfio_device_data(devs.devices[i]);
+ if (tmp->needs_reset)
+ needs_reset = true;
+ if (tmp->refcnt)
+ goto put_devs;
+ }
+
+ if (needs_reset)
+ ret = slot ? pci_try_reset_slot(vdev->pdev->slot) :
+ pci_try_reset_bus(vdev->pdev->bus);
+
+put_devs:
+ for (i = 0; i < devs.cur_index; i++) {
+ if (!ret) {
+ tmp = vfio_device_data(devs.devices[i]);
+ tmp->needs_reset = false;
+ }
+ vfio_device_put(devs.devices[i]);
+ }
+
+ kfree(devs.devices);
}
static void __exit vfio_pci_cleanup(void)
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 9dd49c9839ac..553212f037c3 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -16,6 +16,7 @@
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/eventfd.h>
+#include <linux/msi.h>
#include <linux/pci.h>
#include <linux/file.h>
#include <linux/poll.h>
@@ -548,6 +549,20 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
return PTR_ERR(trigger);
}
+ /*
+ * The MSIx vector table resides in device memory which may be cleared
+ * via backdoor resets. We don't allow direct access to the vector
+ * table so even if a userspace driver attempts to save/restore around
+ * such a reset it would be unsuccessful. To avoid this, restore the
+ * cached value of the message prior to enabling.
+ */
+ if (msix) {
+ struct msi_msg msg;
+
+ get_cached_msi_msg(irq, &msg);
+ write_msi_msg(irq, &msg);
+ }
+
ret = request_irq(irq, vfio_msihandler, 0,
vdev->ctx[vector].name, trigger);
if (ret) {
diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 0734fbe5b651..583ccdb2c58f 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -57,7 +57,8 @@ struct vfio_iommu {
struct list_head domain_list;
struct mutex lock;
struct rb_root dma_list;
- bool v2;
+ bool v2;
+ bool nesting;
};
struct vfio_domain {
@@ -705,6 +706,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
goto out_free;
}
+ if (iommu->nesting) {
+ int attr = 1;
+
+ ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING,
+ &attr);
+ if (ret)
+ goto out_domain;
+ }
+
ret = iommu_attach_group(domain->domain, iommu_group);
if (ret)
goto out_domain;
@@ -819,17 +829,26 @@ static void *vfio_iommu_type1_open(unsigned long arg)
{
struct vfio_iommu *iommu;
- if (arg != VFIO_TYPE1_IOMMU && arg != VFIO_TYPE1v2_IOMMU)
- return ERR_PTR(-EINVAL);
-
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu)
return ERR_PTR(-ENOMEM);
+ switch (arg) {
+ case VFIO_TYPE1_IOMMU:
+ break;
+ case VFIO_TYPE1_NESTING_IOMMU:
+ iommu->nesting = true;
+ case VFIO_TYPE1v2_IOMMU:
+ iommu->v2 = true;
+ break;
+ default:
+ kfree(iommu);
+ return ERR_PTR(-EINVAL);
+ }
+
INIT_LIST_HEAD(&iommu->domain_list);
iommu->dma_list = RB_ROOT;
mutex_init(&iommu->lock);
- iommu->v2 = (arg == VFIO_TYPE1v2_IOMMU);
return iommu;
}
@@ -885,6 +904,7 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
switch (arg) {
case VFIO_TYPE1_IOMMU:
case VFIO_TYPE1v2_IOMMU:
+ case VFIO_TYPE1_NESTING_IOMMU:
return 1;
case VFIO_DMA_CC_IOMMU:
if (!iommu)
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 86dfceb9201f..5fa42db769ee 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -92,7 +92,7 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
return ret;
}
-EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl);
+EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl);
MODULE_VERSION(DRIVER_VERSION);
MODULE_LICENSE("GPL v2");
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 20f9a527922a..7b02bcc85b9e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -80,6 +80,7 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMU_STASH,
DOMAIN_ATTR_FSL_PAMU_ENABLE,
DOMAIN_ATTR_FSL_PAMUV1,
+ DOMAIN_ATTR_NESTING, /* two stages of translation */
DOMAIN_ATTR_MAX,
};
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 6612974c64bf..29715d27548f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -33,6 +33,9 @@
/* Check if EEH is supported */
#define VFIO_EEH 5
+/* Two-stage IOMMU */
+#define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */
+
/*
* The IOCTL interface is designed for extensibility by embedding the
* structure length (argsz) and flags into structures passed between