diff options
Diffstat (limited to 'drivers/vfio/pci')
-rw-r--r-- | drivers/vfio/pci/Kconfig | 1 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci.c | 188 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_intrs.c | 240 | ||||
-rw-r--r-- | drivers/vfio/pci/vfio_pci_private.h | 3 |
4 files changed, 180 insertions, 252 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index c6bb5da2d2a7..579d83bf5358 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -1,6 +1,7 @@ config VFIO_PCI tristate "VFIO support for PCI devices" depends on VFIO && PCI && EVENTFD + select VFIO_VIRQFD help Support for the PCI VFIO bus driver. This is required to make use of PCI drivers using the VFIO framework. diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index f8a186381ae8..69fab0fd15ae 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -11,6 +11,8 @@ * Author: Tom Lyon, pugs@cisco.com */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/device.h> #include <linux/eventfd.h> #include <linux/file.h> @@ -25,6 +27,7 @@ #include <linux/types.h> #include <linux/uaccess.h> #include <linux/vfio.h> +#include <linux/vgaarb.h> #include "vfio_pci_private.h" @@ -32,13 +35,81 @@ #define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>" #define DRIVER_DESC "VFIO PCI - User Level meta-driver" +static char ids[1024] __initdata; +module_param_string(ids, ids, sizeof(ids), 0); +MODULE_PARM_DESC(ids, "Initial PCI IDs to add to the vfio driver, format is \"vendor:device[:subvendor[:subdevice[:class[:class_mask]]]]\" and multiple comma separated entries can be specified"); + static bool nointxmask; module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nointxmask, "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); +#ifdef CONFIG_VFIO_PCI_VGA +static bool disable_vga; +module_param(disable_vga, bool, S_IRUGO); +MODULE_PARM_DESC(disable_vga, "Disable VGA resource access through vfio-pci"); +#endif + +static bool disable_idle_d3; +module_param(disable_idle_d3, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(disable_idle_d3, + "Disable using the PCI D3 low power state for idle, unused devices"); + static DEFINE_MUTEX(driver_lock); +static inline bool vfio_vga_disabled(void) +{ +#ifdef CONFIG_VFIO_PCI_VGA + return disable_vga; +#else + return true; +#endif +} + +/* + * Our VGA arbiter participation is limited since we don't know anything + * about the device itself. However, if the device is the only VGA device + * downstream of a bridge and VFIO VGA support is disabled, then we can + * safely return legacy VGA IO and memory as not decoded since the user + * has no way to get to it and routing can be disabled externally at the + * bridge. + */ +static unsigned int vfio_pci_set_vga_decode(void *opaque, bool single_vga) +{ + struct vfio_pci_device *vdev = opaque; + struct pci_dev *tmp = NULL, *pdev = vdev->pdev; + unsigned char max_busnr; + unsigned int decodes; + + if (single_vga || !vfio_vga_disabled() || pci_is_root_bus(pdev->bus)) + return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | + VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; + + max_busnr = pci_bus_max_busnr(pdev->bus); + decodes = VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; + + while ((tmp = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, tmp)) != NULL) { + if (tmp == pdev || + pci_domain_nr(tmp->bus) != pci_domain_nr(pdev->bus) || + pci_is_root_bus(tmp->bus)) + continue; + + if (tmp->bus->number >= pdev->bus->number && + tmp->bus->number <= max_busnr) { + pci_dev_put(tmp); + decodes |= VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM; + break; + } + } + + return decodes; +} + +static inline bool vfio_pci_is_vga(struct pci_dev *pdev) +{ + return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA; +} + static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); static int vfio_pci_enable(struct vfio_pci_device *vdev) @@ -48,6 +119,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) u16 cmd; u8 msix_pos; + pci_set_power_state(pdev, PCI_D0); + /* Don't allow our initial saved state to include busmaster */ pci_clear_master(pdev); @@ -93,10 +166,8 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) } else vdev->msix_bar = 0xFF; -#ifdef CONFIG_VFIO_PCI_VGA - if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) + if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) vdev->has_vga = true; -#endif return 0; } @@ -153,20 +224,17 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) * Try to reset the device. The success of this is dependent on * being able to lock the device, which is not always possible. */ - if (vdev->reset_works) { - int ret = pci_try_reset_function(pdev); - if (ret) - pr_warn("%s: Failed to reset device %s (%d)\n", - __func__, dev_name(&pdev->dev), ret); - else - vdev->needs_reset = false; - } + if (vdev->reset_works && !pci_try_reset_function(pdev)) + vdev->needs_reset = false; pci_restore_state(pdev); out: pci_disable_device(pdev); vfio_pci_try_bus_reset(vdev); + + if (!disable_idle_d3) + pci_set_power_state(pdev, PCI_D3hot); } static void vfio_pci_release(void *device_data) @@ -885,6 +953,27 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (ret) { iommu_group_put(group); kfree(vdev); + return ret; + } + + if (vfio_pci_is_vga(pdev)) { + vga_client_register(pdev, vdev, NULL, vfio_pci_set_vga_decode); + vga_set_legacy_decoding(pdev, + vfio_pci_set_vga_decode(vdev, false)); + } + + if (!disable_idle_d3) { + /* + * pci-core sets the device power state to an unknown value at + * bootup and after being removed from a driver. The only + * transition it allows from this unknown state is to D0, which + * typically happens when a driver calls pci_enable_device(). + * We're not ready to enable the device yet, but we do want to + * be able to get to D3. Therefore first do a D0 transition + * before going to D3. + */ + pci_set_power_state(pdev, PCI_D0); + pci_set_power_state(pdev, PCI_D3hot); } return ret; @@ -895,10 +984,21 @@ static void vfio_pci_remove(struct pci_dev *pdev) struct vfio_pci_device *vdev; vdev = vfio_del_group_dev(&pdev->dev); - if (vdev) { - iommu_group_put(pdev->dev.iommu_group); - kfree(vdev); + if (!vdev) + return; + + iommu_group_put(pdev->dev.iommu_group); + kfree(vdev); + + if (vfio_pci_is_vga(pdev)) { + vga_client_register(pdev, NULL, NULL, NULL); + vga_set_legacy_decoding(pdev, + VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM | + VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM); } + + if (!disable_idle_d3) + pci_set_power_state(pdev, PCI_D0); } static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, @@ -1017,10 +1117,13 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) put_devs: for (i = 0; i < devs.cur_index; i++) { - if (!ret) { - tmp = vfio_device_data(devs.devices[i]); + tmp = vfio_device_data(devs.devices[i]); + if (!ret) tmp->needs_reset = false; - } + + if (!tmp->refcnt && !disable_idle_d3) + pci_set_power_state(tmp->pdev, PCI_D3hot); + vfio_device_put(devs.devices[i]); } @@ -1030,10 +1133,50 @@ put_devs: static void __exit vfio_pci_cleanup(void) { pci_unregister_driver(&vfio_pci_driver); - vfio_pci_virqfd_exit(); vfio_pci_uninit_perm_bits(); } +static void __init vfio_pci_fill_ids(void) +{ + char *p, *id; + int rc; + + /* no ids passed actually */ + if (ids[0] == '\0') + return; + + /* add ids specified in the module parameter */ + p = ids; + while ((id = strsep(&p, ","))) { + unsigned int vendor, device, subvendor = PCI_ANY_ID, + subdevice = PCI_ANY_ID, class = 0, class_mask = 0; + int fields; + + if (!strlen(id)) + continue; + + fields = sscanf(id, "%x:%x:%x:%x:%x:%x", + &vendor, &device, &subvendor, &subdevice, + &class, &class_mask); + + if (fields < 2) { + pr_warn("invalid id string \"%s\"\n", id); + continue; + } + + rc = pci_add_dynid(&vfio_pci_driver, vendor, device, + subvendor, subdevice, class, class_mask, 0); + if (rc) + pr_warn("failed to add dynamic id [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x (%d)\n", + vendor, device, subvendor, subdevice, + class, class_mask, rc); + else + pr_info("add [%04hx:%04hx[%04hx:%04hx]] class %#08x/%08x\n", + vendor, device, subvendor, subdevice, + class, class_mask); + } +} + static int __init vfio_pci_init(void) { int ret; @@ -1043,21 +1186,16 @@ static int __init vfio_pci_init(void) if (ret) return ret; - /* Start the virqfd cleanup handler */ - ret = vfio_pci_virqfd_init(); - if (ret) - goto out_virqfd; - /* Register and scan for devices */ ret = pci_register_driver(&vfio_pci_driver); if (ret) goto out_driver; + vfio_pci_fill_ids(); + return 0; out_driver: - vfio_pci_virqfd_exit(); -out_virqfd: vfio_pci_uninit_perm_bits(); return ret; } diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index f88bfdf5b6a0..1f577b4ac126 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -19,230 +19,19 @@ #include <linux/msi.h> #include <linux/pci.h> #include <linux/file.h> -#include <linux/poll.h> #include <linux/vfio.h> #include <linux/wait.h> -#include <linux/workqueue.h> #include <linux/slab.h> #include "vfio_pci_private.h" /* - * IRQfd - generic - */ -struct virqfd { - struct vfio_pci_device *vdev; - struct eventfd_ctx *eventfd; - int (*handler)(struct vfio_pci_device *, void *); - void (*thread)(struct vfio_pci_device *, void *); - void *data; - struct work_struct inject; - wait_queue_t wait; - poll_table pt; - struct work_struct shutdown; - struct virqfd **pvirqfd; -}; - -static struct workqueue_struct *vfio_irqfd_cleanup_wq; - -int __init vfio_pci_virqfd_init(void) -{ - vfio_irqfd_cleanup_wq = - create_singlethread_workqueue("vfio-irqfd-cleanup"); - if (!vfio_irqfd_cleanup_wq) - return -ENOMEM; - - return 0; -} - -void vfio_pci_virqfd_exit(void) -{ - destroy_workqueue(vfio_irqfd_cleanup_wq); -} - -static void virqfd_deactivate(struct virqfd *virqfd) -{ - queue_work(vfio_irqfd_cleanup_wq, &virqfd->shutdown); -} - -static int virqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) -{ - struct virqfd *virqfd = container_of(wait, struct virqfd, wait); - unsigned long flags = (unsigned long)key; - - if (flags & POLLIN) { - /* An event has been signaled, call function */ - if ((!virqfd->handler || - virqfd->handler(virqfd->vdev, virqfd->data)) && - virqfd->thread) - schedule_work(&virqfd->inject); - } - - if (flags & POLLHUP) { - unsigned long flags; - spin_lock_irqsave(&virqfd->vdev->irqlock, flags); - - /* - * The eventfd is closing, if the virqfd has not yet been - * queued for release, as determined by testing whether the - * vdev pointer to it is still valid, queue it now. As - * with kvm irqfds, we know we won't race against the virqfd - * going away because we hold wqh->lock to get here. - */ - if (*(virqfd->pvirqfd) == virqfd) { - *(virqfd->pvirqfd) = NULL; - virqfd_deactivate(virqfd); - } - - spin_unlock_irqrestore(&virqfd->vdev->irqlock, flags); - } - - return 0; -} - -static void virqfd_ptable_queue_proc(struct file *file, - wait_queue_head_t *wqh, poll_table *pt) -{ - struct virqfd *virqfd = container_of(pt, struct virqfd, pt); - add_wait_queue(wqh, &virqfd->wait); -} - -static void virqfd_shutdown(struct work_struct *work) -{ - struct virqfd *virqfd = container_of(work, struct virqfd, shutdown); - u64 cnt; - - eventfd_ctx_remove_wait_queue(virqfd->eventfd, &virqfd->wait, &cnt); - flush_work(&virqfd->inject); - eventfd_ctx_put(virqfd->eventfd); - - kfree(virqfd); -} - -static void virqfd_inject(struct work_struct *work) -{ - struct virqfd *virqfd = container_of(work, struct virqfd, inject); - if (virqfd->thread) - virqfd->thread(virqfd->vdev, virqfd->data); -} - -static int virqfd_enable(struct vfio_pci_device *vdev, - int (*handler)(struct vfio_pci_device *, void *), - void (*thread)(struct vfio_pci_device *, void *), - void *data, struct virqfd **pvirqfd, int fd) -{ - struct fd irqfd; - struct eventfd_ctx *ctx; - struct virqfd *virqfd; - int ret = 0; - unsigned int events; - - virqfd = kzalloc(sizeof(*virqfd), GFP_KERNEL); - if (!virqfd) - return -ENOMEM; - - virqfd->pvirqfd = pvirqfd; - virqfd->vdev = vdev; - virqfd->handler = handler; - virqfd->thread = thread; - virqfd->data = data; - - INIT_WORK(&virqfd->shutdown, virqfd_shutdown); - INIT_WORK(&virqfd->inject, virqfd_inject); - - irqfd = fdget(fd); - if (!irqfd.file) { - ret = -EBADF; - goto err_fd; - } - - ctx = eventfd_ctx_fileget(irqfd.file); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - goto err_ctx; - } - - virqfd->eventfd = ctx; - - /* - * virqfds can be released by closing the eventfd or directly - * through ioctl. These are both done through a workqueue, so - * we update the pointer to the virqfd under lock to avoid - * pushing multiple jobs to release the same virqfd. - */ - spin_lock_irq(&vdev->irqlock); - - if (*pvirqfd) { - spin_unlock_irq(&vdev->irqlock); - ret = -EBUSY; - goto err_busy; - } - *pvirqfd = virqfd; - - spin_unlock_irq(&vdev->irqlock); - - /* - * Install our own custom wake-up handling so we are notified via - * a callback whenever someone signals the underlying eventfd. - */ - init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup); - init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc); - - events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt); - - /* - * Check if there was an event already pending on the eventfd - * before we registered and trigger it as if we didn't miss it. - */ - if (events & POLLIN) { - if ((!handler || handler(vdev, data)) && thread) - schedule_work(&virqfd->inject); - } - - /* - * Do not drop the file until the irqfd is fully initialized, - * otherwise we might race against the POLLHUP. - */ - fdput(irqfd); - - return 0; -err_busy: - eventfd_ctx_put(ctx); -err_ctx: - fdput(irqfd); -err_fd: - kfree(virqfd); - - return ret; -} - -static void virqfd_disable(struct vfio_pci_device *vdev, - struct virqfd **pvirqfd) -{ - unsigned long flags; - - spin_lock_irqsave(&vdev->irqlock, flags); - - if (*pvirqfd) { - virqfd_deactivate(*pvirqfd); - *pvirqfd = NULL; - } - - spin_unlock_irqrestore(&vdev->irqlock, flags); - - /* - * Block until we know all outstanding shutdown jobs have completed. - * Even if we don't queue the job, flush the wq to be sure it's - * been released. - */ - flush_workqueue(vfio_irqfd_cleanup_wq); -} - -/* * INTx */ -static void vfio_send_intx_eventfd(struct vfio_pci_device *vdev, void *unused) +static void vfio_send_intx_eventfd(void *opaque, void *unused) { + struct vfio_pci_device *vdev = opaque; + if (likely(is_intx(vdev) && !vdev->virq_disabled)) eventfd_signal(vdev->ctx[0].trigger, 1); } @@ -285,9 +74,9 @@ void vfio_pci_intx_mask(struct vfio_pci_device *vdev) * a signal is necessary, which can then be handled via a work queue * or directly depending on the caller. */ -static int vfio_pci_intx_unmask_handler(struct vfio_pci_device *vdev, - void *unused) +static int vfio_pci_intx_unmask_handler(void *opaque, void *unused) { + struct vfio_pci_device *vdev = opaque; struct pci_dev *pdev = vdev->pdev; unsigned long flags; int ret = 0; @@ -440,8 +229,8 @@ static int vfio_intx_set_signal(struct vfio_pci_device *vdev, int fd) static void vfio_intx_disable(struct vfio_pci_device *vdev) { vfio_intx_set_signal(vdev, -1); - virqfd_disable(vdev, &vdev->ctx[0].unmask); - virqfd_disable(vdev, &vdev->ctx[0].mask); + vfio_virqfd_disable(&vdev->ctx[0].unmask); + vfio_virqfd_disable(&vdev->ctx[0].mask); vdev->irq_type = VFIO_PCI_NUM_IRQS; vdev->num_ctx = 0; kfree(vdev->ctx); @@ -605,8 +394,8 @@ static void vfio_msi_disable(struct vfio_pci_device *vdev, bool msix) vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix); for (i = 0; i < vdev->num_ctx; i++) { - virqfd_disable(vdev, &vdev->ctx[i].unmask); - virqfd_disable(vdev, &vdev->ctx[i].mask); + vfio_virqfd_disable(&vdev->ctx[i].unmask); + vfio_virqfd_disable(&vdev->ctx[i].mask); } if (msix) { @@ -639,11 +428,12 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_device *vdev, } else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) { int32_t fd = *(int32_t *)data; if (fd >= 0) - return virqfd_enable(vdev, vfio_pci_intx_unmask_handler, - vfio_send_intx_eventfd, NULL, - &vdev->ctx[0].unmask, fd); + return vfio_virqfd_enable((void *) vdev, + vfio_pci_intx_unmask_handler, + vfio_send_intx_eventfd, NULL, + &vdev->ctx[0].unmask, fd); - virqfd_disable(vdev, &vdev->ctx[0].unmask); + vfio_virqfd_disable(&vdev->ctx[0].unmask); } return 0; @@ -868,12 +658,14 @@ int vfio_pci_set_irqs_ioctl(struct vfio_pci_device *vdev, uint32_t flags, func = vfio_pci_set_err_trigger; break; } + break; case VFIO_PCI_REQ_IRQ_INDEX: switch (flags & VFIO_IRQ_SET_ACTION_TYPE_MASK) { case VFIO_IRQ_SET_ACTION_TRIGGER: func = vfio_pci_set_req_trigger; break; } + break; } if (!func) diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index c9f9b323f152..ae0e1b4c1711 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -87,9 +87,6 @@ extern ssize_t vfio_pci_vga_rw(struct vfio_pci_device *vdev, char __user *buf, extern int vfio_pci_init_perm_bits(void); extern void vfio_pci_uninit_perm_bits(void); -extern int vfio_pci_virqfd_init(void); -extern void vfio_pci_virqfd_exit(void); - extern int vfio_config_init(struct vfio_pci_device *vdev); extern void vfio_config_free(struct vfio_pci_device *vdev); #endif /* VFIO_PCI_PRIVATE_H */ |