aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vfio/pci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/pci')
-rw-r--r--drivers/vfio/pci/Kconfig1
-rw-r--r--drivers/vfio/pci/vfio_pci.c188
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c238
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h3
4 files changed, 178 insertions, 252 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index c6bb5da2d2a7..579d83bf5358 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -1,6 +1,7 @@
config VFIO_PCI
tristate "VFIO support for PCI devices"
depends on VFIO && PCI && EVENTFD
+ select VFIO_VIRQFD
help
Support for the PCI VFIO bus driver. This is required to make
use of PCI drivers using the VFIO framework.
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index f8a186381ae8..69fab0fd15ae 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -11,6 +11,8 @@
* Author: Tom Lyon, pugs@cisco.com
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/device.h>
#include <linux/eventfd.h>
#include <linux/file.h>
@@ -25,6 +27,7 @@
#include <linux/types.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
+#include <linux/vgaarb.h>
#include "vfio_pci_private.h"
@@ -32,13 +35,81 @@
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
#define DRIVER_DESC "VFIO PCI - User Level meta-driver"
+static char ids[1024] __initdata;
+module_param_string(ids, ids, sizeof(ids), 0);
+MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified");
+
static bool nointxmask;
module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(nointxmask,
"Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag.");
+#ifdef CONFIG_VFIO_PCI_VGA
+static bool disable_vga;
+module_param(disable_vga, bool, S_IRUGO);
+MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci");
+#endif
+
+static bool disable_idle_d3;
+module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(disable_idle_d3,
+ "Disable using the PCI D3 low power state for idle, unused devices");
+
static DEFINE_MUTEX(driver_lock);
+static inline bool vfio_vga_disabled(void)
+{
+#ifdef CONFIG_VFIO_PCI_VGA
+ return disable_vga;
+#else
+ return true;
+#endif
+}
+
+/*
+ * Our VGA arbiter participation is limited since we don't know anything
+ * about the device itself. However, if the device is the only VGA device
+ * downstream of a bridge and VFIO VGA support is disabled, then we can
+ * safely return legacy VGA IO and memory as not decoded since the user
+ * has no way to get to it and routing can be disabled externally at the
+ * bridge.
+ */
+static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga)
+{
+ struct vfio_pci_device *vdev = opaque;
+ struct pci_dev *tmp = NULL, *pdev = vdev->pdev;
+ unsigned char max_busnr;
+ unsigned int decodes;
+
+ if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus))
+ return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
+ VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+
+ max_busnr = pci_bus_max_busnr(pdev->bus);
+ decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM;
+
+ while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) {
+ if (tmp == pdev ||
+ pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) ||
+ pci_is_root_bus(tmp->bus))
+ continue;
+
+ if (tmp->bus->number >= pdev->bus->number &&
+ tmp->bus->number <= max_busnr) {
+ pci_dev_put(tmp);
+ decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM;
+ break;
+ }
+ }
+
+ return decodes;
+}
+
+static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
+{
+ return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
+}
+
static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
static int vfio_pci_enable(struct vfio_pci_device *vdev)
@@ -48,6 +119,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
u16 cmd;
u8 msix_pos;
+ pci_set_power_state(pdev, PCI_D0);
+
/* Don't allow our initial saved state to include busmaster */
pci_clear_master(pdev);
@@ -93,10 +166,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
} else
vdev->msix_bar = 0xFF;
-#ifdef CONFIG_VFIO_PCI_VGA
- if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA)
+ if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
-#endif
return 0;
}
@@ -153,20 +224,17 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
* Try to reset the device. The success of this is dependent on
* being able to lock the device, which is not always possible.
*/
- if (vdev->reset_works) {
- int ret = pci_try_reset_function(pdev);
- if (ret)
- pr_warn("%s: Failed to reset device %s (%d)\n",
- __func__, dev_name(&pdev->dev), ret);
- else
- vdev->needs_reset = false;
- }
+ if (vdev->reset_works && !pci_try_reset_function(pdev))
+ vdev->needs_reset = false;
pci_restore_state(pdev);
out:
pci_disable_device(pdev);
vfio_pci_try_bus_reset(vdev);
+
+ if (!disable_idle_d3)
+ pci_set_power_state(pdev, PCI_D3hot);
}
static void vfio_pci_release(void *device_data)
@@ -885,6 +953,27 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret) {
iommu_group_put(group);
kfree(vdev);
+ return ret;
+ }
+
+ if (vfio_pci_is_vga(pdev)) {
+ vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode);
+ vga_set_legacy_decoding(pdev,
+ vfio_pci_set_vga_decode(vdev, false));
+ }
+
+ if (!disable_idle_d3) {
+ /*
+ * pci-core sets the device power state to an unknown value at
+ * bootup and after being removed from a driver. The only
+ * transition it allows from this unknown state is to D0, which
+ * typically happens when a driver calls pci_enable_device().
+ * We're not ready to enable the device yet, but we do want to
+ * be able to get to D3. Therefore first do a D0 transition
+ * before going to D3.
+ */
+ pci_set_power_state(pdev, PCI_D0);
+ pci_set_power_state(pdev, PCI_D3hot);
}
return ret;
@@ -895,10 +984,21 @@ static void vfio_pci_remove(struct pci_dev *pdev)
struct vfio_pci_device *vdev;
vdev = vfio_del_group_dev(&pdev->dev);
- if (vdev) {
- iommu_group_put(pdev->dev.iommu_group);
- kfree(vdev);
+ if (!vdev)
+ return;
+
+ iommu_group_put(pdev->dev.iommu_group);
+ kfree(vdev);
+
+ if (vfio_pci_is_vga(pdev)) {
+ vga_client_register(pdev, NULL, NULL, NULL);
+ vga_set_legacy_decoding(pdev,
+ VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM |
+ VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM);
}
+
+ if (!disable_idle_d3)
+ pci_set_power_state(pdev, PCI_D0);
}
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
@@ -1017,10 +1117,13 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
put_devs:
for (i = 0; i < devs.cur_index; i++) {
- if (!ret) {
- tmp = vfio_device_data(devs.devices[i]);
+ tmp = vfio_device_data(devs.devices[i]);
+ if (!ret)
tmp->needs_reset = false;
- }
+
+ if (!tmp->refcnt && !disable_idle_d3)
+ pci_set_power_state(tmp->pdev, PCI_D3hot);
+
vfio_device_put(devs.devices[i]);
}
@@ -1030,10 +1133,50 @@ put_devs:
static void __exit vfio_pci_cleanup(void)
{
pci_unregister_driver(&vfio_pci_driver);
- vfio_pci_virqfd_exit();
vfio_pci_uninit_perm_bits();
}
+static void __init vfio_pci_fill_ids(void)
+{
+ char *p, *id;
+ int rc;
+
+ /* no ids passed actually */
+ if (ids[0] == '\0')
+ return;
+
+ /* add ids specified in the module parameter */
+ p = ids;
+ while ((id = strsep(&p, ","))) {
+ unsigned int vendor, device, subvendor = PCI_ANY_ID,
+ subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
+ int fields;
+
+ if (!strlen(id))
+ continue;
+
+ fields = sscanf(id, "%x:%x:%x:%x:%x:%x",
+ &vendor, &device, &subvendor, &subdevice,
+ &class, &class_mask);
+
+ if (fields < 2) {
+ pr_warn("invalid id string \"%s\"\n", id);
+ continue;
+ }
+
+ rc = pci_add_dynid(&vfio_pci_driver, vendor, device,
+ subvendor, subdevice, class, class_mask, 0);
+ if (rc)
+ pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n",
+ vendor, device, subvendor, subdevice,
+ class, class_mask, rc);
+ else
+ pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n",
+ vendor, device, subvendor, subdevice,
+ class, class_mask);
+ }
+}
+
static int __init vfio_pci_init(void)
{
int ret;
@@ -1043,21 +1186,16 @@ static int __init vfio_pci_init(void)
if (ret)
return ret;
- /* Start the virqfd cleanup handler */
- ret = vfio_pci_virqfd_init();
- if (ret)
- goto out_virqfd;
-
/* Register and scan for devices */
ret = pci_register_driver(&vfio_pci_driver);
if (ret)
goto out_driver;
+ vfio_pci_fill_ids();
+
return 0;
out_driver:
- vfio_pci_virqfd_exit();
-out_virqfd:
vfio_pci_uninit_perm_bits();
return ret;
}
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 2027a27546ef..1f577b4ac126 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -19,230 +19,19 @@
#include <linux/msi.h>
#include <linux/pci.h>
#include <linux/file.h>
-#include <linux/poll.h>
#include <linux/vfio.h>
#include <linux/wait.h>
-#include <linux/workqueue.h>
#include <linux/slab.h>
#include "vfio_pci_private.h"
/*
- * IRQfd - generic
- */
-struct virqfd {
- struct vfio_pci_device *vdev;
- struct eventfd_ctx *eventfd;
- int (*handler)(struct vfio_pci_device *, void *);
- void (*thread)(struct vfio_pci_device *, void *);
- void *data;
- struct work_struct inject;
- wait_queue_t wait;
- poll_table pt;
- struct work_struct shutdown;
- struct virqfd **pvirqfd;
-};
-
-static struct workqueue_struct *vfio_irqfd_cleanup_wq;
-
-int __init vfio_pci_virqfd_init(void)
-{
- vfio_irqfd_cleanup_wq =
- create_singlethread_workqueue("vfio-irqfd-cleanup");
- if (!vfio_irqfd_cleanup_wq)
- return -ENOMEM;
-
- return 0;
-}
-
-void vfio_pci_virqfd_exit(void)
-{
- destroy_workqueue(vfio_irqfd_cleanup_wq);
-}
-
-static void virqfd_deactivate(struct virqfd *virqfd)
-{
- queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown);
-}
-
-static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
-{
- struct virqfd *virqfd = container_of(wait, struct virqfd, wait);
- unsigned long flags = (unsigned long)key;
-
- if (flags & POLLIN) {
- /* An event has been signaled, call function */
- if ((!virqfd->handler ||
- virqfd->handler(virqfd->vdev, virqfd->data)) &&
- virqfd->thread)
- schedule_work(&virqfd->inject);
- }
-
- if (flags & POLLHUP) {
- unsigned long flags;
- spin_lock_irqsave(&virqfd->vdev->irqlock, flags);
-
- /*
- * The eventfd is closing, if the virqfd has not yet been
- * queued for release, as determined by testing whether the
- * vdev pointer to it is still valid, queue it now. As
- * with kvm irqfds, we know we won't race against the virqfd
- * going away because we hold wqh->lock to get here.
- */
- if (*(virqfd->pvirqfd) == virqfd) {
- *(virqfd->pvirqfd) = NULL;
- virqfd_deactivate(virqfd);
- }
-
- spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags);
- }
-
- return 0;
-}
-
-static void virqfd_ptable_queue_proc(struct file *file,
- wait_queue_head_t *wqh, poll_table *pt)
-{
- struct virqfd *virqfd = container_of(pt, struct virqfd, pt);
- add_wait_queue(wqh, &virqfd->wait);
-}
-
-static void virqfd_shutdown(struct work_struct *work)
-{
- struct virqfd *virqfd = container_of(work, struct virqfd, shutdown);
- u64 cnt;
-
- eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt);
- flush_work(&virqfd->inject);
- eventfd_ctx_put(virqfd->eventfd);
-
- kfree(virqfd);
-}
-
-static void virqfd_inject(struct work_struct *work)
-{
- struct virqfd *virqfd = container_of(work, struct virqfd, inject);
- if (virqfd->thread)
- virqfd->thread(virqfd->vdev, virqfd->data);
-}
-
-static int virqfd_enable(struct vfio_pci_device *vdev,
- int (*handler)(struct vfio_pci_device *, void *),
- void (*thread)(struct vfio_pci_device *, void *),
- void *data, struct virqfd **pvirqfd, int fd)
-{
- struct fd irqfd;
- struct eventfd_ctx *ctx;
- struct virqfd *virqfd;
- int ret = 0;
- unsigned int events;
-
- virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL);
- if (!virqfd)
- return -ENOMEM;
-
- virqfd->pvirqfd = pvirqfd;
- virqfd->vdev = vdev;
- virqfd->handler = handler;
- virqfd->thread = thread;
- virqfd->data = data;
-
- INIT_WORK(&virqfd->shutdown, virqfd_shutdown);
- INIT_WORK(&virqfd->inject, virqfd_inject);
-
- irqfd = fdget(fd);
- if (!irqfd.file) {
- ret = -EBADF;
- goto err_fd;
- }
-
- ctx = eventfd_ctx_fileget(irqfd.file);
- if (IS_ERR(ctx)) {
- ret = PTR_ERR(ctx);
- goto err_ctx;
- }
-
- virqfd->eventfd = ctx;
-
- /*
- * virqfds can be released by closing the eventfd or directly
- * through ioctl. These are both done through a workqueue, so
- * we update the pointer to the virqfd under lock to avoid
- * pushing multiple jobs to release the same virqfd.
- */
- spin_lock_irq(&vdev->irqlock);
-
- if (*pvirqfd) {
- spin_unlock_irq(&vdev->irqlock);
- ret = -EBUSY;
- goto err_busy;
- }
- *pvirqfd = virqfd;
-
- spin_unlock_irq(&vdev->irqlock);
-
- /*
- * Install our own custom wake-up handling so we are notified via
- * a callback whenever someone signals the underlying eventfd.
- */
- init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
- init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
-
- events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
-
- /*
- * Check if there was an event already pending on the eventfd
- * before we registered and trigger it as if we didn't miss it.
- */
- if (events & POLLIN) {
- if ((!handler || handler(vdev, data)) && thread)
- schedule_work(&virqfd->inject);
- }
-
- /*
- * Do not drop the file until the irqfd is fully initialized,
- * otherwise we might race against the POLLHUP.
- */
- fdput(irqfd);
-
- return 0;
-err_busy:
- eventfd_ctx_put(ctx);
-err_ctx:
- fdput(irqfd);
-err_fd:
- kfree(virqfd);
-
- return ret;
-}
-
-static void virqfd_disable(struct vfio_pci_device *vdev,
- struct virqfd **pvirqfd)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&vdev->irqlock, flags);
-
- if (*pvirqfd) {
- virqfd_deactivate(*pvirqfd);
- *pvirqfd = NULL;
- }
-
- spin_unlock_irqrestore(&vdev->irqlock, flags);
-
- /*
- * Block until we know all outstanding shutdown jobs have completed.
- * Even if we don't queue the job, flush the wq to be sure it's
- * been released.
- */
- flush_workqueue(vfio_irqfd_cleanup_wq);
-}
-
-/*
* INTx
*/
-static void vfio_send_intx_eventfd(struct vfio_pci_device *vdev, void *unused)
+static void vfio_send_intx_eventfd(void *opaque, void *unused)
{
+ struct vfio_pci_device *vdev = opaque;
+
if (likely(is_intx(vdev) && !vdev->virq_disabled))
eventfd_signal(vdev->ctx[0].trigger, 1);
}
@@ -285,9 +74,9 @@ void vfio_pci_intx_mask(struct vfio_pci_device *vdev)
* a signal is necessary, which can then be handled via a work queue
* or directly depending on the caller.
*/
-static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev,
- void *unused)
+static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
{
+ struct vfio_pci_device *vdev = opaque;
struct pci_dev *pdev = vdev->pdev;
unsigned long flags;
int ret = 0;
@@ -440,8 +229,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd)
static void vfio_intx_disable(struct vfio_pci_device *vdev)
{
vfio_intx_set_signal(vdev, -1);
- virqfd_disable(vdev, &vdev->ctx[0].unmask);
- virqfd_disable(vdev, &vdev->ctx[0].mask);
+ vfio_virqfd_disable(&vdev->ctx[0].unmask);
+ vfio_virqfd_disable(&vdev->ctx[0].mask);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
vdev->num_ctx = 0;
kfree(vdev->ctx);
@@ -605,8 +394,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix)
vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
for (i = 0; i < vdev->num_ctx; i++) {
- virqfd_disable(vdev, &vdev->ctx[i].unmask);
- virqfd_disable(vdev, &vdev->ctx[i].mask);
+ vfio_virqfd_disable(&vdev->ctx[i].unmask);
+ vfio_virqfd_disable(&vdev->ctx[i].mask);
}
if (msix) {
@@ -639,11 +428,12 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev,
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
int32_t fd = *(int32_t *)data;
if (fd >= 0)
- return virqfd_enable(vdev, vfio_pci_intx_unmask_handler,
- vfio_send_intx_eventfd, NULL,
- &vdev->ctx[0].unmask, fd);
+ return vfio_virqfd_enable((void *) vdev,
+ vfio_pci_intx_unmask_handler,
+ vfio_send_intx_eventfd, NULL,
+ &vdev->ctx[0].unmask, fd);
- virqfd_disable(vdev, &vdev->ctx[0].unmask);
+ vfio_virqfd_disable(&vdev->ctx[0].unmask);
}
return 0;
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index c9f9b323f152..ae0e1b4c1711 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -87,9 +87,6 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf,
extern int vfio_pci_init_perm_bits(void);
extern void vfio_pci_uninit_perm_bits(void);
-extern int vfio_pci_virqfd_init(void);
-extern void vfio_pci_virqfd_exit(void);
-
extern int vfio_config_init(struct vfio_pci_device *vdev);
extern void vfio_config_free(struct vfio_pci_device *vdev);
#endif /* VFIO_PCI_PRIVATE_H */