From b95d9305e8cb8d432ca02da1b759fef59bc50ace Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:29 -0700 Subject: vfio/pci: Fix unsigned comparison overflow Signed versus unsigned comparisons are implicitly cast to unsigned, which result in a couple possible overflows. For instance (start + count) might overflow and wrap, getting through our validation test. Also when unwinding setup, -1 being compared as unsigned doesn't produce the intended stop condition. Fix both of these and also fix vfio_msi_set_vector_signal() to validate parameters before using the vector index, though none of the callers should pass bad indexes anymore. Reported-by: Eric Auger Reviewed-by: Eric Auger Tested-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_intrs.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c index 3b3ba15558b7..e9ea3fef144a 100644 --- a/drivers/vfio/pci/vfio_pci_intrs.c +++ b/drivers/vfio/pci/vfio_pci_intrs.c @@ -309,14 +309,14 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, int vector, int fd, bool msix) { struct pci_dev *pdev = vdev->pdev; - int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector; - char *name = msix ? "vfio-msix" : "vfio-msi"; struct eventfd_ctx *trigger; - int ret; + int irq, ret; - if (vector >= vdev->num_ctx) + if (vector < 0 || vector >= vdev->num_ctx) return -EINVAL; + irq = msix ? vdev->msix[vector].vector : pdev->irq + vector; + if (vdev->ctx[vector].trigger) { free_irq(irq, vdev->ctx[vector].trigger); irq_bypass_unregister_producer(&vdev->ctx[vector].producer); @@ -328,8 +328,9 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, if (fd < 0) return 0; - vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "%s[%d](%s)", - name, vector, pci_name(pdev)); + vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)", + msix ? "x" : "", vector, + pci_name(pdev)); if (!vdev->ctx[vector].name) return -ENOMEM; @@ -379,7 +380,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start, { int i, j, ret = 0; - if (start + count > vdev->num_ctx) + if (start >= vdev->num_ctx || start + count > vdev->num_ctx) return -EINVAL; for (i = 0, j = start; i < count && !ret; i++, j++) { @@ -388,7 +389,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start, } if (ret) { - for (--j; j >= start; j--) + for (--j; j >= (int)start; j--) vfio_msi_set_vector_signal(vdev, j, -1, msix); } -- cgit v1.2.3-59-g8ed1b From 7c435b46c29a0240616cd1ad15d23341f9a341f4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:30 -0700 Subject: vfio: If an IOMMU backend fails, keep looking Consider an IOMMU to be an API rather than an implementation, we might have multiple implementations supporting the same API, so try another if one fails. The expectation here is that we'll really only have one implementation per device type. For instance the existing type1 driver works with any PCI device where the IOMMU API is available. A vGPU vendor may have a virtual PCI device which provides DMA isolation and mapping through other mechanisms, but can re-use userspaces that make use of the type1 VFIO IOMMU API. This allows that to work. Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index ecca316386f5..4cc961b894af 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1080,30 +1080,26 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container, continue; } - /* module reference holds the driver we're working on */ - mutex_unlock(&vfio.iommu_drivers_lock); - data = driver->ops->open(arg); if (IS_ERR(data)) { ret = PTR_ERR(data); module_put(driver->ops->owner); - goto skip_drivers_unlock; + continue; } ret = __vfio_container_attach_groups(container, driver, data); - if (!ret) { - container->iommu_driver = driver; - container->iommu_data = data; - } else { + if (ret) { driver->ops->release(data); module_put(driver->ops->owner); + continue; } - goto skip_drivers_unlock; + container->iommu_driver = driver; + container->iommu_data = data; + break; } mutex_unlock(&vfio.iommu_drivers_lock); -skip_drivers_unlock: up_write(&container->group_lock); return ret; -- cgit v1.2.3-59-g8ed1b From c84982adb23bcf3b99b79ca33527cd2625fbe279 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:32 -0700 Subject: vfio: Define capability chains We have a few cases where we need to extend the data returned from the INFO ioctls in VFIO. For instance we already have devices exposed through vfio-pci where VFIO_DEVICE_GET_REGION_INFO reports the region as mmap-capable, but really only supports sparse mmaps, avoiding the MSI-X table. If we wanted to provide in-kernel emulation or extended functionality for devices, we'd also want the ability to tell the user not to mmap various regions, rather than forcing them to figure it out on their own. Another example is VFIO_IOMMU_GET_INFO. We'd really like to expose the actual IOVA capabilities of the IOMMU rather than letting the user assume the address space they have available to them. We could add IOVA base and size fields to struct vfio_iommu_type1_info, but what if we have multiple IOVA ranges. For instance x86 uses a range of addresses at 0xfee00000 for MSI vectors. These typically are not available for standard DMA IOVA mappings and splits our available IOVA space into two ranges. POWER systems have both an IOVA window below 4G as well as dynamic data window which they can use to remap all of guest memory. Representing variable sized arrays within a fixed structure makes it very difficult to parse, we'd therefore like to put this data beyond fixed fields within the data structures. One way to do this is to emulate capabilities in PCI configuration space. A new flag indciates whether capabilties are supported and a new fixed field reports the offset of the first entry. Users can then walk the chain to find capabilities, adding capabilities does not require additional fields in the fixed structure, and parsing variable sized data becomes trivial. This patch outlines the theory and base header structure, which should be shared by all future users. Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 7d7a4c6f2090..d508adf17610 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -59,6 +59,33 @@ #define VFIO_TYPE (';') #define VFIO_BASE 100 +/* + * For extension of INFO ioctls, VFIO makes use of a capability chain + * designed after PCI/e capabilities. A flag bit indicates whether + * this capability chain is supported and a field defined in the fixed + * structure defines the offset of the first capability in the chain. + * This field is only valid when the corresponding bit in the flags + * bitmap is set. This offset field is relative to the start of the + * INFO buffer, as is the next field within each capability header. + * The id within the header is a shared address space per INFO ioctl, + * while the version field is specific to the capability id. The + * contents following the header are specific to the capability id. + */ +struct vfio_info_cap_header { + __u16 id; /* Identifies capability */ + __u16 version; /* Version specific to the capability ID */ + __u32 next; /* Offset of next capability */ +}; + +/* + * Callers of INFO ioctls passing insufficiently sized buffers will see + * the capability chain flag bit set, a zero value for the first capability + * offset (if available within the provided argsz), and argsz will be + * updated to report the necessary buffer size. For compatibility, the + * INFO ioctl will not report error in this case, but the capability chain + * will not be available. + */ + /* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */ /** -- cgit v1.2.3-59-g8ed1b From d7a8d5ed876970ac7f9bafbb6708500a7838c1d7 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:33 -0700 Subject: vfio: Add capability chain helpers Allow sub-modules to easily reallocate a buffer for managing capability chains for info ioctls. Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/vfio.h | 11 +++++++++++ 2 files changed, 65 insertions(+) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 4cc961b894af..6fd6fa5469de 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1728,6 +1728,60 @@ long vfio_external_check_extension(struct vfio_group *group, unsigned long arg) } EXPORT_SYMBOL_GPL(vfio_external_check_extension); +/** + * Sub-module support + */ +/* + * Helper for managing a buffer of info chain capabilities, allocate or + * reallocate a buffer with additional @size, filling in @id and @version + * of the capability. A pointer to the new capability is returned. + * + * NB. The chain is based at the head of the buffer, so new entries are + * added to the tail, vfio_info_cap_shift() should be called to fixup the + * next offsets prior to copying to the user buffer. + */ +struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps, + size_t size, u16 id, u16 version) +{ + void *buf; + struct vfio_info_cap_header *header, *tmp; + + buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL); + if (!buf) { + kfree(caps->buf); + caps->size = 0; + return ERR_PTR(-ENOMEM); + } + + caps->buf = buf; + header = buf + caps->size; + + /* Eventually copied to user buffer, zero */ + memset(header, 0, size); + + header->id = id; + header->version = version; + + /* Add to the end of the capability chain */ + for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next) + ; /* nothing */ + + tmp->next = caps->size; + caps->size += size; + + return header; +} +EXPORT_SYMBOL_GPL(vfio_info_cap_add); + +void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset) +{ + struct vfio_info_cap_header *tmp; + + for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset) + tmp->next += offset; +} +EXPORT_SYMBOL_GPL(vfio_info_cap_shift); + /** * Module/class support */ diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 610a86a892b8..0ecae0b1cd34 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -92,6 +92,17 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); +/* + * Sub-module helpers + */ +struct vfio_info_cap { + struct vfio_info_cap_header *buf; + size_t size; +}; +extern struct vfio_info_cap_header *vfio_info_cap_add( + struct vfio_info_cap *caps, size_t size, u16 id, u16 version); +extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset); + struct pci_dev; #ifdef CONFIG_EEH extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); -- cgit v1.2.3-59-g8ed1b From ff63eb638d63b95e489f976428f1df01391e15e4 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:35 -0700 Subject: vfio: Define sparse mmap capability for regions We can't always support mmap across an entire device region, for example we deny mmaps covering the MSI-X table of PCI devices, but we don't really have a way to report it. We expect the user to implicitly know this restriction. We also can't split the region because vfio-pci defines an API with fixed region index to BAR number mapping. We therefore define a new capability which lists areas within the region that may be mmap'd. In addition to the MSI-X case, this potentially enables in-kernel emulation and extensions to devices. Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index d508adf17610..fde7b1e60948 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -221,13 +221,37 @@ struct vfio_region_info { #define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */ #define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */ #define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */ +#define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */ __u32 index; /* Region index */ - __u32 resv; /* Reserved for alignment */ + __u32 cap_offset; /* Offset within info struct of first cap */ __u64 size; /* Region size (bytes) */ __u64 offset; /* Region offset from start of device fd */ }; #define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8) +/* + * The sparse mmap capability allows finer granularity of specifying areas + * within a region with mmap support. When specified, the user should only + * mmap the offset ranges specified by the areas array. mmaps outside of the + * areas specified may fail (such as the range covering a PCI MSI-X table) or + * may result in improper device behavior. + * + * The structures below define version 1 of this capability. + */ +#define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1 + +struct vfio_region_sparse_mmap_area { + __u64 offset; /* Offset of mmap'able area within region */ + __u64 size; /* Size of mmap'able area */ +}; + +struct vfio_region_info_cap_sparse_mmap { + struct vfio_info_cap_header header; + __u32 nr_areas; + __u32 reserved; + struct vfio_region_sparse_mmap_area areas[]; +}; + /** * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, * struct vfio_irq_info) -- cgit v1.2.3-59-g8ed1b From 188ad9d6cbbce4a1d322ac208914a1dea34b30b6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:36 -0700 Subject: vfio/pci: Include sparse mmap capability for MSI-X table regions vfio-pci has never allowed the user to directly mmap the MSI-X vector table, but we've always relied on implicit knowledge of the user that they cannot do this. Now that we have capability chains that we can expose in the region info ioctl and a sparse mmap capability that represents the sub-areas within the region that can be mmap'd, we can make the mmap constraints more explicit. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 73 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 2760a7ba3f30..4682207b1ac8 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -421,6 +421,48 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev, return walk.ret; } +static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev, + struct vfio_info_cap *caps) +{ + struct vfio_info_cap_header *header; + struct vfio_region_info_cap_sparse_mmap *sparse; + size_t end, size; + int nr_areas = 2, i = 0; + + end = pci_resource_len(vdev->pdev, vdev->msix_bar); + + /* If MSI-X table is aligned to the start or end, only one area */ + if (((vdev->msix_offset & PAGE_MASK) == 0) || + (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end)) + nr_areas = 1; + + size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas)); + + header = vfio_info_cap_add(caps, size, + VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + sparse = container_of(header, + struct vfio_region_info_cap_sparse_mmap, header); + sparse->nr_areas = nr_areas; + + if (vdev->msix_offset & PAGE_MASK) { + sparse->areas[i].offset = 0; + sparse->areas[i].size = vdev->msix_offset & PAGE_MASK; + i++; + } + + if (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) < end) { + sparse->areas[i].offset = PAGE_ALIGN(vdev->msix_offset + + vdev->msix_size); + sparse->areas[i].size = end - sparse->areas[i].offset; + i++; + } + + return 0; +} + static long vfio_pci_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { @@ -451,6 +493,8 @@ static long vfio_pci_ioctl(void *device_data, } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) { struct pci_dev *pdev = vdev->pdev; struct vfio_region_info info; + struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; + int ret; minsz = offsetofend(struct vfio_region_info, offset); @@ -479,8 +523,15 @@ static long vfio_pci_ioctl(void *device_data, VFIO_REGION_INFO_FLAG_WRITE; if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) && pci_resource_flags(pdev, info.index) & - IORESOURCE_MEM && info.size >= PAGE_SIZE) + IORESOURCE_MEM && info.size >= PAGE_SIZE) { info.flags |= VFIO_REGION_INFO_FLAG_MMAP; + if (info.index == vdev->msix_bar) { + ret = msix_sparse_mmap_cap(vdev, &caps); + if (ret) + return ret; + } + } + break; case VFIO_PCI_ROM_REGION_INDEX: { @@ -520,6 +571,26 @@ static long vfio_pci_ioctl(void *device_data, return -EINVAL; } + if (caps.size) { + info.flags |= VFIO_REGION_INFO_FLAG_CAPS; + if (info.argsz < sizeof(info) + caps.size) { + info.argsz = sizeof(info) + caps.size; + info.cap_offset = 0; + } else { + vfio_info_cap_shift(&caps, sizeof(info)); + ret = copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size); + if (ret) { + kfree(caps.buf); + return ret; + } + info.cap_offset = sizeof(info); + } + + kfree(caps.buf); + } + return copy_to_user((void __user *)arg, &info, minsz); } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) { -- cgit v1.2.3-59-g8ed1b From c7bb4cb40f89224dc55755178343728e30dd583a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:38 -0700 Subject: vfio: Define device specific region type capability To this point vfio has only provided an interface to the user that allows them to determine the number of regions and specifics about each region. What the region represents is left to the vfio bus driver. vfio-pci chooses to use fixed indexes for fixed resources, index 0 is BAR0, 1 is BAR1,... 7 is config space, etc. This works pretty well since all PCI devices have these regions, even if they don't necessarily populate all of them. Then we start to add things like VGA, which only certain device even support. We added this the same way, but now we've wasted a region index, and due to our offset implementation the corresponding address space, for all devices. Rather than continuing that process, let's try to make regions self describing by including a capability that defines their type. For vfio-pci we'll make the current VFIO_PCI_NUM_REGIONS fixed, defining the end of the static indexes and the beginning of self describing regions. Signed-off-by: Alex Williamson --- include/uapi/linux/vfio.h | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index fde7b1e60948..1c37a0e500c6 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -252,6 +252,34 @@ struct vfio_region_info_cap_sparse_mmap { struct vfio_region_sparse_mmap_area areas[]; }; +/* + * The device specific type capability allows regions unique to a specific + * device or class of devices to be exposed. This helps solve the problem for + * vfio bus drivers of defining which region indexes correspond to which region + * on the device, without needing to resort to static indexes, as done by + * vfio-pci. For instance, if we were to go back in time, we might remove + * VFIO_PCI_VGA_REGION_INDEX and let vfio-pci simply define that all indexes + * greater than or equal to VFIO_PCI_NUM_REGIONS are device specific and we'd + * make a "VGA" device specific type to describe the VGA access space. This + * means that non-VGA devices wouldn't need to waste this index, and thus the + * address space associated with it due to implementation of device file + * descriptor offsets in vfio-pci. + * + * The current implementation is now part of the user ABI, so we can't use this + * for VGA, but there are other upcoming use cases, such as opregions for Intel + * IGD devices and framebuffers for vGPU devices. We missed VGA, but we'll + * use this for future additions. + * + * The structure below defines version 1 of this capability. + */ +#define VFIO_REGION_INFO_CAP_TYPE 2 + +struct vfio_region_info_cap_type { + struct vfio_info_cap_header header; + __u32 type; /* global per bus driver */ + __u32 subtype; /* type specific */ +}; + /** * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, * struct vfio_irq_info) @@ -387,7 +415,8 @@ enum { * between described ranges are unimplemented. */ VFIO_PCI_VGA_REGION_INDEX, - VFIO_PCI_NUM_REGIONS + VFIO_PCI_NUM_REGIONS = 9 /* Fixed user ABI, region indexes >=9 use */ + /* device specific cap to define content. */ }; enum { -- cgit v1.2.3-59-g8ed1b From 28541d41c9e04cb2ddbf93facd1e376dd5613360 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:39 -0700 Subject: vfio/pci: Add infrastructure for additional device specific regions Add support for additional regions with indexes started after the already defined fixed regions. Device specific code can register these regions with the new vfio_pci_register_dev_region() function. The ops structure per region currently only includes read/write access and a release function, allowing automatic cleanup when the device is closed. mmap support is only missing here because it's not needed by the first user queued for this support. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 81 ++++++++++++++++++++++++++++++++++--- drivers/vfio/pci/vfio_pci_private.h | 27 +++++++++++++ 2 files changed, 103 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 4682207b1ac8..813a2e67aa0c 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -175,7 +175,7 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) static void vfio_pci_disable(struct vfio_pci_device *vdev) { struct pci_dev *pdev = vdev->pdev; - int bar; + int i, bar; /* Stop the device from further DMA */ pci_clear_master(pdev); @@ -186,6 +186,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) vdev->virq_disabled = false; + for (i = 0; i < vdev->num_regions; i++) + vdev->region[i].ops->release(vdev, &vdev->region[i]); + + vdev->num_regions = 0; + kfree(vdev->region); + vdev->region = NULL; /* don't krealloc a freed pointer */ + vfio_config_free(vdev); for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) { @@ -463,6 +470,51 @@ static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev, return 0; } +static int region_type_cap(struct vfio_pci_device *vdev, + struct vfio_info_cap *caps, + unsigned int type, unsigned int subtype) +{ + struct vfio_info_cap_header *header; + struct vfio_region_info_cap_type *cap; + + header = vfio_info_cap_add(caps, sizeof(*cap), + VFIO_REGION_INFO_CAP_TYPE, 1); + if (IS_ERR(header)) + return PTR_ERR(header); + + cap = container_of(header, struct vfio_region_info_cap_type, header); + cap->type = type; + cap->subtype = subtype; + + return 0; +} + +int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, + unsigned int type, unsigned int subtype, + const struct vfio_pci_regops *ops, + size_t size, u32 flags, void *data) +{ + struct vfio_pci_region *region; + + region = krealloc(vdev->region, + (vdev->num_regions + 1) * sizeof(*region), + GFP_KERNEL); + if (!region) + return -ENOMEM; + + vdev->region = region; + vdev->region[vdev->num_regions].type = type; + vdev->region[vdev->num_regions].subtype = subtype; + vdev->region[vdev->num_regions].ops = ops; + vdev->region[vdev->num_regions].size = size; + vdev->region[vdev->num_regions].flags = flags; + vdev->region[vdev->num_regions].data = data; + + vdev->num_regions++; + + return 0; +} + static long vfio_pci_ioctl(void *device_data, unsigned int cmd, unsigned long arg) { @@ -485,7 +537,7 @@ static long vfio_pci_ioctl(void *device_data, if (vdev->reset_works) info.flags |= VFIO_DEVICE_FLAGS_RESET; - info.num_regions = VFIO_PCI_NUM_REGIONS; + info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions; info.num_irqs = VFIO_PCI_NUM_IRQS; return copy_to_user((void __user *)arg, &info, minsz); @@ -494,7 +546,7 @@ static long vfio_pci_ioctl(void *device_data, struct pci_dev *pdev = vdev->pdev; struct vfio_region_info info; struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; - int ret; + int i, ret; minsz = offsetofend(struct vfio_region_info, offset); @@ -568,7 +620,21 @@ static long vfio_pci_ioctl(void *device_data, break; default: - return -EINVAL; + if (info.index >= + VFIO_PCI_NUM_REGIONS + vdev->num_regions) + return -EINVAL; + + i = info.index - VFIO_PCI_NUM_REGIONS; + + info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index); + info.size = vdev->region[i].size; + info.flags = vdev->region[i].flags; + + ret = region_type_cap(vdev, &caps, + vdev->region[i].type, + vdev->region[i].subtype); + if (ret) + return ret; } if (caps.size) { @@ -866,7 +932,7 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos); struct vfio_pci_device *vdev = device_data; - if (index >= VFIO_PCI_NUM_REGIONS) + if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions) return -EINVAL; switch (index) { @@ -883,6 +949,10 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf, case VFIO_PCI_VGA_REGION_INDEX: return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite); + default: + index -= VFIO_PCI_NUM_REGIONS; + return vdev->region[index].ops->rw(vdev, buf, + count, ppos, iswrite); } return -EINVAL; @@ -1065,6 +1135,7 @@ static void vfio_pci_remove(struct pci_dev *pdev) return; vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev); + kfree(vdev->region); kfree(vdev); if (vfio_pci_is_vga(pdev)) { diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 0e7394f8f69b..0710bda5ae2c 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -14,6 +14,7 @@ #include #include #include +#include #ifndef VFIO_PCI_PRIVATE_H #define VFIO_PCI_PRIVATE_H @@ -33,6 +34,25 @@ struct vfio_pci_irq_ctx { struct irq_bypass_producer producer; }; +struct vfio_pci_device; +struct vfio_pci_region; + +struct vfio_pci_regops { + size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite); + void (*release)(struct vfio_pci_device *vdev, + struct vfio_pci_region *region); +}; + +struct vfio_pci_region { + u32 type; + u32 subtype; + const struct vfio_pci_regops *ops; + void *data; + size_t size; + u32 flags; +}; + struct vfio_pci_device { struct pci_dev *pdev; void __iomem *barmap[PCI_STD_RESOURCE_END + 1]; @@ -45,6 +65,8 @@ struct vfio_pci_device { struct vfio_pci_irq_ctx *ctx; int num_ctx; int irq_type; + int num_regions; + struct vfio_pci_region *region; u8 msi_qmax; u8 msix_bar; u16 msix_size; @@ -91,4 +113,9 @@ extern void vfio_pci_uninit_perm_bits(void); extern int vfio_config_init(struct vfio_pci_device *vdev); extern void vfio_config_free(struct vfio_pci_device *vdev); + +extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, + unsigned int type, unsigned int subtype, + const struct vfio_pci_regops *ops, + size_t size, u32 flags, void *data); #endif /* VFIO_PCI_PRIVATE_H */ -- cgit v1.2.3-59-g8ed1b From 345d710491e2d2c4a9406c1d530adb37cc0429c1 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:41 -0700 Subject: vfio/pci: Enable virtual register in PCI config space Typically config space for a device is mapped out into capability specific handlers and unassigned space. The latter allows direct read/write access to config space. Sometimes we know about registers living in this void space and would like an easy way to virtualize them, similar to how BAR registers are managed. To do this, create one more pseudo (fake) PCI capability to be handled as purely virtual space. Reads and writes are serviced entirely from virtual config space. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_config.c | 34 ++++++++++++++++++++++++++++++---- drivers/vfio/pci/vfio_pci_private.h | 4 ++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index fe2b470d7ec6..88dc646c0144 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -33,9 +33,8 @@ #define PCI_CFG_SPACE_SIZE 256 -/* Useful "pseudo" capabilities */ +/* Fake capability ID for standard config space */ #define PCI_CAP_ID_BASIC 0 -#define PCI_CAP_ID_INVALID 0xFF #define is_bar(offset) \ ((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \ @@ -301,6 +300,23 @@ static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos, return count; } +/* Virt access uses only virtualization */ +static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 val) +{ + memcpy(vdev->vconfig + pos, &val, count); + return count; +} + +static int vfio_virt_config_read(struct vfio_pci_device *vdev, int pos, + int count, struct perm_bits *perm, + int offset, __le32 *val) +{ + memcpy(val, vdev->vconfig + pos, count); + return count; +} + /* Default capability regions to read-only, no-virtualization */ static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = { [0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read } @@ -319,6 +335,11 @@ static struct perm_bits unassigned_perms = { .writefn = vfio_raw_config_write }; +static struct perm_bits virt_perms = { + .readfn = vfio_virt_config_read, + .writefn = vfio_virt_config_write +}; + static void free_perm_bits(struct perm_bits *perm) { kfree(perm->virt); @@ -1332,6 +1353,8 @@ static int vfio_cap_init(struct vfio_pci_device *vdev) pos + i, map[pos + i], cap); } + BUILD_BUG_ON(PCI_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT); + memset(map + pos, cap, len); ret = vfio_fill_vconfig_bytes(vdev, pos, len); if (ret) @@ -1419,9 +1442,9 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev) /* * Even though ecap is 2 bytes, we're currently a long way * from exceeding 1 byte capabilities. If we ever make it - * up to 0xFF we'll need to up this to a two-byte, byte map. + * up to 0xFE we'll need to up this to a two-byte, byte map. */ - BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID); + BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT); memset(map + epos, ecap, len); ret = vfio_fill_vconfig_bytes(vdev, epos, len); @@ -1597,6 +1620,9 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf, if (cap_id == PCI_CAP_ID_INVALID) { perm = &unassigned_perms; cap_start = *ppos; + } else if (cap_id == PCI_CAP_ID_INVALID_VIRT) { + perm = &virt_perms; + cap_start = *ppos; } else { if (*ppos >= PCI_CFG_SPACE_SIZE) { WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX); diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 0710bda5ae2c..b1e403235feb 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -25,6 +25,10 @@ #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT) #define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1) +/* Special capability IDs predefined access */ +#define PCI_CAP_ID_INVALID 0xFF /* default raw access */ +#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */ + struct vfio_pci_irq_ctx { struct eventfd_ctx *trigger; struct virqfd *unmask; -- cgit v1.2.3-59-g8ed1b From 5846ff54e87d8bab4f1e330af0b5407747a0a57e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:43 -0700 Subject: vfio/pci: Intel IGD OpRegion support This is the first consumer of vfio device specific resource support, providing read-only access to the OpRegion for Intel graphics devices. Signed-off-by: Alex Williamson --- drivers/vfio/pci/Kconfig | 4 ++ drivers/vfio/pci/Makefile | 1 + drivers/vfio/pci/vfio_pci.c | 7 +++ drivers/vfio/pci/vfio_pci_igd.c | 111 ++++++++++++++++++++++++++++++++++++ drivers/vfio/pci/vfio_pci_private.h | 8 +++ include/uapi/linux/vfio.h | 5 ++ 6 files changed, 136 insertions(+) create mode 100644 drivers/vfio/pci/vfio_pci_igd.c diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 02912f180c6d..24ee2605b9f0 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -26,3 +26,7 @@ config VFIO_PCI_MMAP config VFIO_PCI_INTX depends on VFIO_PCI def_bool y if !S390 + +config VFIO_PCI_IGD + depends on VFIO_PCI + def_bool y if X86 diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 131079255fd9..76d8ec058edd 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -1,4 +1,5 @@ vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o +vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 813a2e67aa0c..cb2624db37d8 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -169,6 +169,13 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev)) vdev->has_vga = true; + + if (vfio_pci_is_vga(pdev) && pdev->vendor == PCI_VENDOR_ID_INTEL) { + if (vfio_pci_igd_opregion_init(vdev) == 0) + dev_info(&pdev->dev, + "Intel IGD OpRegion support enabled\n"); + } + return 0; } diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c new file mode 100644 index 000000000000..3b6a6f7b367b --- /dev/null +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -0,0 +1,111 @@ +/* + * VFIO PCI Intel Graphics support + * + * Copyright (C) 2016 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Register a device specific region through which to provide read-only + * access to the Intel IGD opregion. The register defining the opregion + * address is also virtualized to prevent user modification. + */ + +#include +#include +#include +#include + +#include "vfio_pci_private.h" + +#define OPREGION_SIGNATURE "IntelGraphicsMem" +#define OPREGION_SIZE (8 * 1024) +#define OPREGION_PCI_ADDR 0xfc + +static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf, + size_t count, loff_t *ppos, bool iswrite) +{ + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; + void *base = vdev->region[i].data; + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + + if (pos >= vdev->region[i].size || iswrite) + return -EINVAL; + + count = min(count, (size_t)(vdev->region[i].size - pos)); + + if (copy_to_user(buf, base + pos, count)) + return -EFAULT; + + *ppos += count; + + return count; +} + +static void vfio_pci_igd_release(struct vfio_pci_device *vdev, + struct vfio_pci_region *region) +{ + memunmap(region->data); +} + +static const struct vfio_pci_regops vfio_pci_igd_regops = { + .rw = vfio_pci_igd_rw, + .release = vfio_pci_igd_release, +}; + +int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) +{ + __le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR); + u32 addr, size; + void *base; + int ret; + + ret = pci_read_config_dword(vdev->pdev, OPREGION_PCI_ADDR, &addr); + if (ret) + return ret; + + if (!addr || !(~addr)) + return -ENODEV; + + base = memremap(addr, OPREGION_SIZE, MEMREMAP_WB); + if (!base) + return -ENOMEM; + + if (memcmp(base, OPREGION_SIGNATURE, 16)) { + memunmap(base); + return -EINVAL; + } + + size = le32_to_cpu(*(__le32 *)(base + 16)); + if (!size) { + memunmap(base); + return -EINVAL; + } + + size *= 1024; /* In KB */ + + if (size != OPREGION_SIZE) { + memunmap(base); + base = memremap(addr, size, MEMREMAP_WB); + if (!base) + return -ENOMEM; + } + + ret = vfio_pci_register_dev_region(vdev, + PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, + VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, + &vfio_pci_igd_regops, size, VFIO_REGION_INFO_FLAG_READ, base); + if (ret) { + memunmap(base); + return ret; + } + + /* Fill vconfig with the hw value and virtualize register */ + *dwordp = cpu_to_le32(addr); + memset(vdev->pci_config_map + OPREGION_PCI_ADDR, + PCI_CAP_ID_INVALID_VIRT, 4); + + return ret; +} diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index b1e403235feb..19f7699ac699 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -122,4 +122,12 @@ extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, unsigned int type, unsigned int subtype, const struct vfio_pci_regops *ops, size_t size, u32 flags, void *data); +#ifdef CONFIG_VFIO_PCI_IGD +extern int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev); +#else +static inline int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) +{ + return -ENODEV; +} +#endif #endif /* VFIO_PCI_PRIVATE_H */ diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 1c37a0e500c6..e622982dbc53 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -280,6 +280,11 @@ struct vfio_region_info_cap_type { __u32 subtype; /* type specific */ }; +#define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) +#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) + +#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) + /** * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, * struct vfio_irq_info) -- cgit v1.2.3-59-g8ed1b From f572a960a15e8bb56599f6d2358a9c18f0808e91 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:45 -0700 Subject: vfio/pci: Intel IGD host and LCP bridge config space access Provide read-only access to PCI config space of the PCI host bridge and LPC bridge through device specific regions. This may be used to configure a VM with matching register contents to satisfy driver requirements. Providing this through the vfio file descriptor removes an additional userspace requirement for access through pci-sysfs and removes the CAP_SYS_ADMIN requirement that doesn't appear to apply to the specific devices we're accessing. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 15 +++- drivers/vfio/pci/vfio_pci_igd.c | 171 +++++++++++++++++++++++++++++++++++- drivers/vfio/pci/vfio_pci_private.h | 4 +- include/uapi/linux/vfio.h | 3 + 4 files changed, 186 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index cb2624db37d8..74a375297494 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -111,6 +111,7 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev) } static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); +static void vfio_pci_disable(struct vfio_pci_device *vdev); static int vfio_pci_enable(struct vfio_pci_device *vdev) { @@ -170,10 +171,16 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) vdev->has_vga = true; - if (vfio_pci_is_vga(pdev) && pdev->vendor == PCI_VENDOR_ID_INTEL) { - if (vfio_pci_igd_opregion_init(vdev) == 0) - dev_info(&pdev->dev, - "Intel IGD OpRegion support enabled\n"); + if (vfio_pci_is_vga(pdev) && + pdev->vendor == PCI_VENDOR_ID_INTEL && + IS_ENABLED(CONFIG_VFIO_PCI_IGD)) { + ret = vfio_pci_igd_init(vdev); + if (ret) { + dev_warn(&vdev->pdev->dev, + "Failed to setup Intel IGD regions\n"); + vfio_pci_disable(vdev); + return ret; + } } return 0; diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 3b6a6f7b367b..6394b168ef29 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -55,7 +55,7 @@ static const struct vfio_pci_regops vfio_pci_igd_regops = { .release = vfio_pci_igd_release, }; -int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) +static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) { __le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR); u32 addr, size; @@ -109,3 +109,172 @@ int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) return ret; } + +static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, + char __user *buf, size_t count, loff_t *ppos, + bool iswrite) +{ + unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS; + struct pci_dev *pdev = vdev->region[i].data; + loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK; + size_t size; + int ret; + + if (pos >= vdev->region[i].size || iswrite) + return -EINVAL; + + size = count = min(count, (size_t)(vdev->region[i].size - pos)); + + if ((pos & 1) && size) { + u8 val; + + ret = pci_user_read_config_byte(pdev, pos, &val); + if (ret) + return pcibios_err_to_errno(ret); + + if (copy_to_user(buf + count - size, &val, 1)) + return -EFAULT; + + pos++; + size--; + } + + if ((pos & 3) && size > 2) { + u16 val; + + ret = pci_user_read_config_word(pdev, pos, &val); + if (ret) + return pcibios_err_to_errno(ret); + + val = cpu_to_le16(val); + if (copy_to_user(buf + count - size, &val, 2)) + return -EFAULT; + + pos += 2; + size -= 2; + } + + while (size > 3) { + u32 val; + + ret = pci_user_read_config_dword(pdev, pos, &val); + if (ret) + return pcibios_err_to_errno(ret); + + val = cpu_to_le32(val); + if (copy_to_user(buf + count - size, &val, 4)) + return -EFAULT; + + pos += 4; + size -= 4; + } + + while (size >= 2) { + u16 val; + + ret = pci_user_read_config_word(pdev, pos, &val); + if (ret) + return pcibios_err_to_errno(ret); + + val = cpu_to_le16(val); + if (copy_to_user(buf + count - size, &val, 2)) + return -EFAULT; + + pos += 2; + size -= 2; + } + + while (size) { + u8 val; + + ret = pci_user_read_config_byte(pdev, pos, &val); + if (ret) + return pcibios_err_to_errno(ret); + + if (copy_to_user(buf + count - size, &val, 1)) + return -EFAULT; + + pos++; + size--; + } + + *ppos += count; + + return count; +} + +static void vfio_pci_igd_cfg_release(struct vfio_pci_device *vdev, + struct vfio_pci_region *region) +{ + struct pci_dev *pdev = region->data; + + pci_dev_put(pdev); +} + +static const struct vfio_pci_regops vfio_pci_igd_cfg_regops = { + .rw = vfio_pci_igd_cfg_rw, + .release = vfio_pci_igd_cfg_release, +}; + +static int vfio_pci_igd_cfg_init(struct vfio_pci_device *vdev) +{ + struct pci_dev *host_bridge, *lpc_bridge; + int ret; + + host_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0)); + if (!host_bridge) + return -ENODEV; + + if (host_bridge->vendor != PCI_VENDOR_ID_INTEL || + host_bridge->class != (PCI_CLASS_BRIDGE_HOST << 8)) { + pci_dev_put(host_bridge); + return -EINVAL; + } + + ret = vfio_pci_register_dev_region(vdev, + PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, + VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, + &vfio_pci_igd_cfg_regops, host_bridge->cfg_size, + VFIO_REGION_INFO_FLAG_READ, host_bridge); + if (ret) { + pci_dev_put(host_bridge); + return ret; + } + + lpc_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x1f, 0)); + if (!lpc_bridge) + return -ENODEV; + + if (lpc_bridge->vendor != PCI_VENDOR_ID_INTEL || + lpc_bridge->class != (PCI_CLASS_BRIDGE_ISA << 8)) { + pci_dev_put(lpc_bridge); + return -EINVAL; + } + + ret = vfio_pci_register_dev_region(vdev, + PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE, + VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG, + &vfio_pci_igd_cfg_regops, lpc_bridge->cfg_size, + VFIO_REGION_INFO_FLAG_READ, lpc_bridge); + if (ret) { + pci_dev_put(lpc_bridge); + return ret; + } + + return 0; +} + +int vfio_pci_igd_init(struct vfio_pci_device *vdev) +{ + int ret; + + ret = vfio_pci_igd_opregion_init(vdev); + if (ret) + return ret; + + ret = vfio_pci_igd_cfg_init(vdev); + if (ret) + return ret; + + return 0; +} diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 19f7699ac699..8a7d546d18a0 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -123,9 +123,9 @@ extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev, const struct vfio_pci_regops *ops, size_t size, u32 flags, void *data); #ifdef CONFIG_VFIO_PCI_IGD -extern int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev); +extern int vfio_pci_igd_init(struct vfio_pci_device *vdev); #else -static inline int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev) +static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev) { return -ENODEV; } diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index e622982dbc53..255a2113f53c 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -283,7 +283,10 @@ struct vfio_region_info_cap_type { #define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31) #define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff) +/* 8086 Vendor sub-types */ #define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1) +#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2) +#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3) /** * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9, -- cgit v1.2.3-59-g8ed1b From a13b64591747e8a4ab2df24540a17fca34de2d34 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 22 Feb 2016 16:02:46 -0700 Subject: vfio/pci: Expose shadow ROM as PCI option ROM Integrated graphics may have their ROM shadowed at 0xc0000 rather than implement a PCI option ROM. Make this ROM appear to the user using the ROM BAR. Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 10 ++++++++-- drivers/vfio/pci/vfio_pci_config.c | 11 ++++++++--- drivers/vfio/pci/vfio_pci_rdwr.c | 9 ++++++--- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 74a375297494..1ce1d364308c 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -609,8 +609,14 @@ static long vfio_pci_ioctl(void *device_data, /* Report the BAR size, not the ROM size */ info.size = pci_resource_len(pdev, info.index); - if (!info.size) - break; + if (!info.size) { + /* Shadow ROMs appear as PCI option ROMs */ + if (pdev->resource[PCI_ROM_RESOURCE].flags & + IORESOURCE_ROM_SHADOW) + info.size = 0x20000; + else + break; + } /* Is it really there? */ io = pci_map_rom(pdev, &size); diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c index 88dc646c0144..142c533efec7 100644 --- a/drivers/vfio/pci/vfio_pci_config.c +++ b/drivers/vfio/pci/vfio_pci_config.c @@ -475,14 +475,19 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev) bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS]; /* - * NB. we expose the actual BAR size here, regardless of whether - * we can read it. When we report the REGION_INFO for the ROM - * we report what PCI tells us is the actual ROM size. + * NB. REGION_INFO will have reported zero size if we weren't able + * to read the ROM, but we still return the actual BAR size here if + * it exists (or the shadow ROM space). */ if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) { mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1); mask |= PCI_ROM_ADDRESS_ENABLE; *bar &= cpu_to_le32((u32)mask); + } else if (pdev->resource[PCI_ROM_RESOURCE].flags & + IORESOURCE_ROM_SHADOW) { + mask = ~(0x20000 - 1); + mask |= PCI_ROM_ADDRESS_ENABLE; + *bar &= cpu_to_le32((u32)mask); } else *bar = 0; diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c index 210db24d2204..5ffd1d9ad4bd 100644 --- a/drivers/vfio/pci/vfio_pci_rdwr.c +++ b/drivers/vfio/pci/vfio_pci_rdwr.c @@ -124,11 +124,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf, void __iomem *io; ssize_t done; - if (!pci_resource_start(pdev, bar)) + if (pci_resource_start(pdev, bar)) + end = pci_resource_len(pdev, bar); + else if (bar == PCI_ROM_RESOURCE && + pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW) + end = 0x20000; + else return -EINVAL; - end = pci_resource_len(pdev, bar); - if (pos >= end) return -EINVAL; -- cgit v1.2.3-59-g8ed1b From c4aec3101319f84363a57e09086c2aff6c60a3c3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 25 Feb 2016 10:52:12 +0300 Subject: vfio/pci: return -EFAULT if copy_to_user fails The copy_to_user() function returns the number of bytes that were not copied but we want to return -EFAULT on error here. Fixes: 188ad9d6cbbc ('vfio/pci: Include sparse mmap capability for MSI-X table regions') Signed-off-by: Dan Carpenter Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 1ce1d364308c..98059df9cff6 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -664,12 +664,11 @@ static long vfio_pci_ioctl(void *device_data, info.cap_offset = 0; } else { vfio_info_cap_shift(&caps, sizeof(info)); - ret = copy_to_user((void __user *)arg + - sizeof(info), caps.buf, - caps.size); - if (ret) { + if (copy_to_user((void __user *)arg + + sizeof(info), caps.buf, + caps.size)) { kfree(caps.buf); - return ret; + return -EFAULT; } info.cap_offset = sizeof(info); } -- cgit v1.2.3-59-g8ed1b