aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/vfio/pci/Kconfig4
-rw-r--r--drivers/vfio/pci/Makefile1
-rw-r--r--drivers/vfio/pci/vfio_pci.c175
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c45
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c280
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c17
-rw-r--r--drivers/vfio/pci/vfio_pci_private.h39
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c9
-rw-r--r--drivers/vfio/vfio.c70
-rw-r--r--include/linux/vfio.h11
-rw-r--r--include/uapi/linux/vfio.h92
11 files changed, 706 insertions, 37 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index 02912f180c6d..24ee2605b9f0 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -26,3 +26,7 @@ config VFIO_PCI_MMAP
config VFIO_PCI_INTX
depends on VFIO_PCI
def_bool y if !S390
+
+config VFIO_PCI_IGD
+ depends on VFIO_PCI
+ def_bool y if X86
diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile
index 131079255fd9..76d8ec058edd 100644
--- a/drivers/vfio/pci/Makefile
+++ b/drivers/vfio/pci/Makefile
@@ -1,4 +1,5 @@
vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
+vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 8c80a48e3233..712a84978e97 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -111,6 +111,7 @@ static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
}
static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev);
+static void vfio_pci_disable(struct vfio_pci_device *vdev);
static int vfio_pci_enable(struct vfio_pci_device *vdev)
{
@@ -169,13 +170,26 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev)
if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
+
+ if (vfio_pci_is_vga(pdev) &&
+ pdev->vendor == PCI_VENDOR_ID_INTEL &&
+ IS_ENABLED(CONFIG_VFIO_PCI_IGD)) {
+ ret = vfio_pci_igd_init(vdev);
+ if (ret) {
+ dev_warn(&vdev->pdev->dev,
+ "Failed to setup Intel IGD regions\n");
+ vfio_pci_disable(vdev);
+ return ret;
+ }
+ }
+
return 0;
}
static void vfio_pci_disable(struct vfio_pci_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
- int bar;
+ int i, bar;
/* Stop the device from further DMA */
pci_clear_master(pdev);
@@ -186,6 +200,13 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev)
vdev->virq_disabled = false;
+ for (i = 0; i < vdev->num_regions; i++)
+ vdev->region[i].ops->release(vdev, &vdev->region[i]);
+
+ vdev->num_regions = 0;
+ kfree(vdev->region);
+ vdev->region = NULL; /* don't krealloc a freed pointer */
+
vfio_config_free(vdev);
for (bar = PCI_STD_RESOURCES; bar <= PCI_STD_RESOURCE_END; bar++) {
@@ -421,6 +442,93 @@ static int vfio_pci_for_each_slot_or_bus(struct pci_dev *pdev,
return walk.ret;
}
+static int msix_sparse_mmap_cap(struct vfio_pci_device *vdev,
+ struct vfio_info_cap *caps)
+{
+ struct vfio_info_cap_header *header;
+ struct vfio_region_info_cap_sparse_mmap *sparse;
+ size_t end, size;
+ int nr_areas = 2, i = 0;
+
+ end = pci_resource_len(vdev->pdev, vdev->msix_bar);
+
+ /* If MSI-X table is aligned to the start or end, only one area */
+ if (((vdev->msix_offset & PAGE_MASK) == 0) ||
+ (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) >= end))
+ nr_areas = 1;
+
+ size = sizeof(*sparse) + (nr_areas * sizeof(*sparse->areas));
+
+ header = vfio_info_cap_add(caps, size,
+ VFIO_REGION_INFO_CAP_SPARSE_MMAP, 1);
+ if (IS_ERR(header))
+ return PTR_ERR(header);
+
+ sparse = container_of(header,
+ struct vfio_region_info_cap_sparse_mmap, header);
+ sparse->nr_areas = nr_areas;
+
+ if (vdev->msix_offset & PAGE_MASK) {
+ sparse->areas[i].offset = 0;
+ sparse->areas[i].size = vdev->msix_offset & PAGE_MASK;
+ i++;
+ }
+
+ if (PAGE_ALIGN(vdev->msix_offset + vdev->msix_size) < end) {
+ sparse->areas[i].offset = PAGE_ALIGN(vdev->msix_offset +
+ vdev->msix_size);
+ sparse->areas[i].size = end - sparse->areas[i].offset;
+ i++;
+ }
+
+ return 0;
+}
+
+static int region_type_cap(struct vfio_pci_device *vdev,
+ struct vfio_info_cap *caps,
+ unsigned int type, unsigned int subtype)
+{
+ struct vfio_info_cap_header *header;
+ struct vfio_region_info_cap_type *cap;
+
+ header = vfio_info_cap_add(caps, sizeof(*cap),
+ VFIO_REGION_INFO_CAP_TYPE, 1);
+ if (IS_ERR(header))
+ return PTR_ERR(header);
+
+ cap = container_of(header, struct vfio_region_info_cap_type, header);
+ cap->type = type;
+ cap->subtype = subtype;
+
+ return 0;
+}
+
+int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
+ unsigned int type, unsigned int subtype,
+ const struct vfio_pci_regops *ops,
+ size_t size, u32 flags, void *data)
+{
+ struct vfio_pci_region *region;
+
+ region = krealloc(vdev->region,
+ (vdev->num_regions + 1) * sizeof(*region),
+ GFP_KERNEL);
+ if (!region)
+ return -ENOMEM;
+
+ vdev->region = region;
+ vdev->region[vdev->num_regions].type = type;
+ vdev->region[vdev->num_regions].subtype = subtype;
+ vdev->region[vdev->num_regions].ops = ops;
+ vdev->region[vdev->num_regions].size = size;
+ vdev->region[vdev->num_regions].flags = flags;
+ vdev->region[vdev->num_regions].data = data;
+
+ vdev->num_regions++;
+
+ return 0;
+}
+
static long vfio_pci_ioctl(void *device_data,
unsigned int cmd, unsigned long arg)
{
@@ -443,7 +551,7 @@ static long vfio_pci_ioctl(void *device_data,
if (vdev->reset_works)
info.flags |= VFIO_DEVICE_FLAGS_RESET;
- info.num_regions = VFIO_PCI_NUM_REGIONS;
+ info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
info.num_irqs = VFIO_PCI_NUM_IRQS;
return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -452,6 +560,8 @@ static long vfio_pci_ioctl(void *device_data,
} else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
struct pci_dev *pdev = vdev->pdev;
struct vfio_region_info info;
+ struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+ int i, ret;
minsz = offsetofend(struct vfio_region_info, offset);
@@ -480,8 +590,15 @@ static long vfio_pci_ioctl(void *device_data,
VFIO_REGION_INFO_FLAG_WRITE;
if (IS_ENABLED(CONFIG_VFIO_PCI_MMAP) &&
pci_resource_flags(pdev, info.index) &
- IORESOURCE_MEM && info.size >= PAGE_SIZE)
+ IORESOURCE_MEM && info.size >= PAGE_SIZE) {
info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
+ if (info.index == vdev->msix_bar) {
+ ret = msix_sparse_mmap_cap(vdev, &caps);
+ if (ret)
+ return ret;
+ }
+ }
+
break;
case VFIO_PCI_ROM_REGION_INDEX:
{
@@ -493,8 +610,14 @@ static long vfio_pci_ioctl(void *device_data,
/* Report the BAR size, not the ROM size */
info.size = pci_resource_len(pdev, info.index);
- if (!info.size)
- break;
+ if (!info.size) {
+ /* Shadow ROMs appear as PCI option ROMs */
+ if (pdev->resource[PCI_ROM_RESOURCE].flags &
+ IORESOURCE_ROM_SHADOW)
+ info.size = 0x20000;
+ else
+ break;
+ }
/* Is it really there? */
io = pci_map_rom(pdev, &size);
@@ -518,7 +641,40 @@ static long vfio_pci_ioctl(void *device_data,
break;
default:
- return -EINVAL;
+ if (info.index >=
+ VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+ return -EINVAL;
+
+ i = info.index - VFIO_PCI_NUM_REGIONS;
+
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.size = vdev->region[i].size;
+ info.flags = vdev->region[i].flags;
+
+ ret = region_type_cap(vdev, &caps,
+ vdev->region[i].type,
+ vdev->region[i].subtype);
+ if (ret)
+ return ret;
+ }
+
+ if (caps.size) {
+ info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
+ if (info.argsz < sizeof(info) + caps.size) {
+ info.argsz = sizeof(info) + caps.size;
+ info.cap_offset = 0;
+ } else {
+ vfio_info_cap_shift(&caps, sizeof(info));
+ if (copy_to_user((void __user *)arg +
+ sizeof(info), caps.buf,
+ caps.size)) {
+ kfree(caps.buf);
+ return -EFAULT;
+ }
+ info.cap_offset = sizeof(info);
+ }
+
+ kfree(caps.buf);
}
return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -798,7 +954,7 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
struct vfio_pci_device *vdev = device_data;
- if (index >= VFIO_PCI_NUM_REGIONS)
+ if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
return -EINVAL;
switch (index) {
@@ -815,6 +971,10 @@ static ssize_t vfio_pci_rw(void *device_data, char __user *buf,
case VFIO_PCI_VGA_REGION_INDEX:
return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
+ default:
+ index -= VFIO_PCI_NUM_REGIONS;
+ return vdev->region[index].ops->rw(vdev, buf,
+ count, ppos, iswrite);
}
return -EINVAL;
@@ -997,6 +1157,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
return;
vfio_iommu_group_put(pdev->dev.iommu_group, &pdev->dev);
+ kfree(vdev->region);
kfree(vdev);
if (vfio_pci_is_vga(pdev)) {
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index fe2b470d7ec6..142c533efec7 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -33,9 +33,8 @@
#define PCI_CFG_SPACE_SIZE 256
-/* Useful "pseudo" capabilities */
+/* Fake capability ID for standard config space */
#define PCI_CAP_ID_BASIC 0
-#define PCI_CAP_ID_INVALID 0xFF
#define is_bar(offset) \
((offset >= PCI_BASE_ADDRESS_0 && offset < PCI_BASE_ADDRESS_5 + 4) || \
@@ -301,6 +300,23 @@ static int vfio_raw_config_read(struct vfio_pci_device *vdev, int pos,
return count;
}
+/* Virt access uses only virtualization */
+static int vfio_virt_config_write(struct vfio_pci_device *vdev, int pos,
+ int count, struct perm_bits *perm,
+ int offset, __le32 val)
+{
+ memcpy(vdev->vconfig + pos, &val, count);
+ return count;
+}
+
+static int vfio_virt_config_read(struct vfio_pci_device *vdev, int pos,
+ int count, struct perm_bits *perm,
+ int offset, __le32 *val)
+{
+ memcpy(val, vdev->vconfig + pos, count);
+ return count;
+}
+
/* Default capability regions to read-only, no-virtualization */
static struct perm_bits cap_perms[PCI_CAP_ID_MAX + 1] = {
[0 ... PCI_CAP_ID_MAX] = { .readfn = vfio_direct_config_read }
@@ -319,6 +335,11 @@ static struct perm_bits unassigned_perms = {
.writefn = vfio_raw_config_write
};
+static struct perm_bits virt_perms = {
+ .readfn = vfio_virt_config_read,
+ .writefn = vfio_virt_config_write
+};
+
static void free_perm_bits(struct perm_bits *perm)
{
kfree(perm->virt);
@@ -454,14 +475,19 @@ static void vfio_bar_fixup(struct vfio_pci_device *vdev)
bar = (__le32 *)&vdev->vconfig[PCI_ROM_ADDRESS];
/*
- * NB. we expose the actual BAR size here, regardless of whether
- * we can read it. When we report the REGION_INFO for the ROM
- * we report what PCI tells us is the actual ROM size.
+ * NB. REGION_INFO will have reported zero size if we weren't able
+ * to read the ROM, but we still return the actual BAR size here if
+ * it exists (or the shadow ROM space).
*/
if (pci_resource_start(pdev, PCI_ROM_RESOURCE)) {
mask = ~(pci_resource_len(pdev, PCI_ROM_RESOURCE) - 1);
mask |= PCI_ROM_ADDRESS_ENABLE;
*bar &= cpu_to_le32((u32)mask);
+ } else if (pdev->resource[PCI_ROM_RESOURCE].flags &
+ IORESOURCE_ROM_SHADOW) {
+ mask = ~(0x20000 - 1);
+ mask |= PCI_ROM_ADDRESS_ENABLE;
+ *bar &= cpu_to_le32((u32)mask);
} else
*bar = 0;
@@ -1332,6 +1358,8 @@ static int vfio_cap_init(struct vfio_pci_device *vdev)
pos + i, map[pos + i], cap);
}
+ BUILD_BUG_ON(PCI_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT);
+
memset(map + pos, cap, len);
ret = vfio_fill_vconfig_bytes(vdev, pos, len);
if (ret)
@@ -1419,9 +1447,9 @@ static int vfio_ecap_init(struct vfio_pci_device *vdev)
/*
* Even though ecap is 2 bytes, we're currently a long way
* from exceeding 1 byte capabilities. If we ever make it
- * up to 0xFF we'll need to up this to a two-byte, byte map.
+ * up to 0xFE we'll need to up this to a two-byte, byte map.
*/
- BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID);
+ BUILD_BUG_ON(PCI_EXT_CAP_ID_MAX >= PCI_CAP_ID_INVALID_VIRT);
memset(map + epos, ecap, len);
ret = vfio_fill_vconfig_bytes(vdev, epos, len);
@@ -1597,6 +1625,9 @@ static ssize_t vfio_config_do_rw(struct vfio_pci_device *vdev, char __user *buf,
if (cap_id == PCI_CAP_ID_INVALID) {
perm = &unassigned_perms;
cap_start = *ppos;
+ } else if (cap_id == PCI_CAP_ID_INVALID_VIRT) {
+ perm = &virt_perms;
+ cap_start = *ppos;
} else {
if (*ppos >= PCI_CFG_SPACE_SIZE) {
WARN_ON(cap_id > PCI_EXT_CAP_ID_MAX);
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
new file mode 100644
index 000000000000..6394b168ef29
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -0,0 +1,280 @@
+/*
+ * VFIO PCI Intel Graphics support
+ *
+ * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
+ * Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Register a device specific region through which to provide read-only
+ * access to the Intel IGD opregion. The register defining the opregion
+ * address is also virtualized to prevent user modification.
+ */
+
+#include <linux/io.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+#include <linux/vfio.h>
+
+#include "vfio_pci_private.h"
+
+#define OPREGION_SIGNATURE "IntelGraphicsMem"
+#define OPREGION_SIZE (8 * 1024)
+#define OPREGION_PCI_ADDR 0xfc
+
+static size_t vfio_pci_igd_rw(struct vfio_pci_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite)
+{
+ unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+ void *base = vdev->region[i].data;
+ loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+
+ if (pos >= vdev->region[i].size || iswrite)
+ return -EINVAL;
+
+ count = min(count, (size_t)(vdev->region[i].size - pos));
+
+ if (copy_to_user(buf, base + pos, count))
+ return -EFAULT;
+
+ *ppos += count;
+
+ return count;
+}
+
+static void vfio_pci_igd_release(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region)
+{
+ memunmap(region->data);
+}
+
+static const struct vfio_pci_regops vfio_pci_igd_regops = {
+ .rw = vfio_pci_igd_rw,
+ .release = vfio_pci_igd_release,
+};
+
+static int vfio_pci_igd_opregion_init(struct vfio_pci_device *vdev)
+{
+ __le32 *dwordp = (__le32 *)(vdev->vconfig + OPREGION_PCI_ADDR);
+ u32 addr, size;
+ void *base;
+ int ret;
+
+ ret = pci_read_config_dword(vdev->pdev, OPREGION_PCI_ADDR, &addr);
+ if (ret)
+ return ret;
+
+ if (!addr || !(~addr))
+ return -ENODEV;
+
+ base = memremap(addr, OPREGION_SIZE, MEMREMAP_WB);
+ if (!base)
+ return -ENOMEM;
+
+ if (memcmp(base, OPREGION_SIGNATURE, 16)) {
+ memunmap(base);
+ return -EINVAL;
+ }
+
+ size = le32_to_cpu(*(__le32 *)(base + 16));
+ if (!size) {
+ memunmap(base);
+ return -EINVAL;
+ }
+
+ size *= 1024; /* In KB */
+
+ if (size != OPREGION_SIZE) {
+ memunmap(base);
+ base = memremap(addr, size, MEMREMAP_WB);
+ if (!base)
+ return -ENOMEM;
+ }
+
+ ret = vfio_pci_register_dev_region(vdev,
+ PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+ VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
+ &vfio_pci_igd_regops, size, VFIO_REGION_INFO_FLAG_READ, base);
+ if (ret) {
+ memunmap(base);
+ return ret;
+ }
+
+ /* Fill vconfig with the hw value and virtualize register */
+ *dwordp = cpu_to_le32(addr);
+ memset(vdev->pci_config_map + OPREGION_PCI_ADDR,
+ PCI_CAP_ID_INVALID_VIRT, 4);
+
+ return ret;
+}
+
+static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
+ char __user *buf, size_t count, loff_t *ppos,
+ bool iswrite)
+{
+ unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) - VFIO_PCI_NUM_REGIONS;
+ struct pci_dev *pdev = vdev->region[i].data;
+ loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+ size_t size;
+ int ret;
+
+ if (pos >= vdev->region[i].size || iswrite)
+ return -EINVAL;
+
+ size = count = min(count, (size_t)(vdev->region[i].size - pos));
+
+ if ((pos & 1) && size) {
+ u8 val;
+
+ ret = pci_user_read_config_byte(pdev, pos, &val);
+ if (ret)
+ return pcibios_err_to_errno(ret);
+
+ if (copy_to_user(buf + count - size, &val, 1))
+ return -EFAULT;
+
+ pos++;
+ size--;
+ }
+
+ if ((pos & 3) && size > 2) {
+ u16 val;
+
+ ret = pci_user_read_config_word(pdev, pos, &val);
+ if (ret)
+ return pcibios_err_to_errno(ret);
+
+ val = cpu_to_le16(val);
+ if (copy_to_user(buf + count - size, &val, 2))
+ return -EFAULT;
+
+ pos += 2;
+ size -= 2;
+ }
+
+ while (size > 3) {
+ u32 val;
+
+ ret = pci_user_read_config_dword(pdev, pos, &val);
+ if (ret)
+ return pcibios_err_to_errno(ret);
+
+ val = cpu_to_le32(val);
+ if (copy_to_user(buf + count - size, &val, 4))
+ return -EFAULT;
+
+ pos += 4;
+ size -= 4;
+ }
+
+ while (size >= 2) {
+ u16 val;
+
+ ret = pci_user_read_config_word(pdev, pos, &val);
+ if (ret)
+ return pcibios_err_to_errno(ret);
+
+ val = cpu_to_le16(val);
+ if (copy_to_user(buf + count - size, &val, 2))
+ return -EFAULT;
+
+ pos += 2;
+ size -= 2;
+ }
+
+ while (size) {
+ u8 val;
+
+ ret = pci_user_read_config_byte(pdev, pos, &val);
+ if (ret)
+ return pcibios_err_to_errno(ret);
+
+ if (copy_to_user(buf + count - size, &val, 1))
+ return -EFAULT;
+
+ pos++;
+ size--;
+ }
+
+ *ppos += count;
+
+ return count;
+}
+
+static void vfio_pci_igd_cfg_release(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region)
+{
+ struct pci_dev *pdev = region->data;
+
+ pci_dev_put(pdev);
+}
+
+static const struct vfio_pci_regops vfio_pci_igd_cfg_regops = {
+ .rw = vfio_pci_igd_cfg_rw,
+ .release = vfio_pci_igd_cfg_release,
+};
+
+static int vfio_pci_igd_cfg_init(struct vfio_pci_device *vdev)
+{
+ struct pci_dev *host_bridge, *lpc_bridge;
+ int ret;
+
+ host_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0, 0));
+ if (!host_bridge)
+ return -ENODEV;
+
+ if (host_bridge->vendor != PCI_VENDOR_ID_INTEL ||
+ host_bridge->class != (PCI_CLASS_BRIDGE_HOST << 8)) {
+ pci_dev_put(host_bridge);
+ return -EINVAL;
+ }
+
+ ret = vfio_pci_register_dev_region(vdev,
+ PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+ VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG,
+ &vfio_pci_igd_cfg_regops, host_bridge->cfg_size,
+ VFIO_REGION_INFO_FLAG_READ, host_bridge);
+ if (ret) {
+ pci_dev_put(host_bridge);
+ return ret;
+ }
+
+ lpc_bridge = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(0x1f, 0));
+ if (!lpc_bridge)
+ return -ENODEV;
+
+ if (lpc_bridge->vendor != PCI_VENDOR_ID_INTEL ||
+ lpc_bridge->class != (PCI_CLASS_BRIDGE_ISA << 8)) {
+ pci_dev_put(lpc_bridge);
+ return -EINVAL;
+ }
+
+ ret = vfio_pci_register_dev_region(vdev,
+ PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+ VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG,
+ &vfio_pci_igd_cfg_regops, lpc_bridge->cfg_size,
+ VFIO_REGION_INFO_FLAG_READ, lpc_bridge);
+ if (ret) {
+ pci_dev_put(lpc_bridge);
+ return ret;
+ }
+
+ return 0;
+}
+
+int vfio_pci_igd_init(struct vfio_pci_device *vdev)
+{
+ int ret;
+
+ ret = vfio_pci_igd_opregion_init(vdev);
+ if (ret)
+ return ret;
+
+ ret = vfio_pci_igd_cfg_init(vdev);
+ if (ret)
+ return ret;
+
+ return 0;
+}
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 3b3ba15558b7..e9ea3fef144a 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -309,14 +309,14 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
int vector, int fd, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
- int irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
- char *name = msix ? "vfio-msix" : "vfio-msi";
struct eventfd_ctx *trigger;
- int ret;
+ int irq, ret;
- if (vector >= vdev->num_ctx)
+ if (vector < 0 || vector >= vdev->num_ctx)
return -EINVAL;
+ irq = msix ? vdev->msix[vector].vector : pdev->irq + vector;
+
if (vdev->ctx[vector].trigger) {
free_irq(irq, vdev->ctx[vector].trigger);
irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
@@ -328,8 +328,9 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
if (fd < 0)
return 0;
- vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "%s[%d](%s)",
- name, vector, pci_name(pdev));
+ vdev->ctx[vector].name = kasprintf(GFP_KERNEL, "vfio-msi%s[%d](%s)",
+ msix ? "x" : "", vector,
+ pci_name(pdev));
if (!vdev->ctx[vector].name)
return -ENOMEM;
@@ -379,7 +380,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
{
int i, j, ret = 0;
- if (start + count > vdev->num_ctx)
+ if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
return -EINVAL;
for (i = 0, j = start; i < count && !ret; i++, j++) {
@@ -388,7 +389,7 @@ static int vfio_msi_set_block(struct vfio_pci_device *vdev, unsigned start,
}
if (ret) {
- for (--j; j >= start; j--)
+ for (--j; j >= (int)start; j--)
vfio_msi_set_vector_signal(vdev, j, -1, msix);
}
diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h
index 0e7394f8f69b..8a7d546d18a0 100644
--- a/drivers/vfio/pci/vfio_pci_private.h
+++ b/drivers/vfio/pci/vfio_pci_private.h
@@ -14,6 +14,7 @@
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/irqbypass.h>
+#include <linux/types.h>
#ifndef VFIO_PCI_PRIVATE_H
#define VFIO_PCI_PRIVATE_H
@@ -24,6 +25,10 @@
#define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
#define VFIO_PCI_OFFSET_MASK (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
+/* Special capability IDs predefined access */
+#define PCI_CAP_ID_INVALID 0xFF /* default raw access */
+#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */
+
struct vfio_pci_irq_ctx {
struct eventfd_ctx *trigger;
struct virqfd *unmask;
@@ -33,6 +38,25 @@ struct vfio_pci_irq_ctx {
struct irq_bypass_producer producer;
};
+struct vfio_pci_device;
+struct vfio_pci_region;
+
+struct vfio_pci_regops {
+ size_t (*rw)(struct vfio_pci_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite);
+ void (*release)(struct vfio_pci_device *vdev,
+ struct vfio_pci_region *region);
+};
+
+struct vfio_pci_region {
+ u32 type;
+ u32 subtype;
+ const struct vfio_pci_regops *ops;
+ void *data;
+ size_t size;
+ u32 flags;
+};
+
struct vfio_pci_device {
struct pci_dev *pdev;
void __iomem *barmap[PCI_STD_RESOURCE_END + 1];
@@ -45,6 +69,8 @@ struct vfio_pci_device {
struct vfio_pci_irq_ctx *ctx;
int num_ctx;
int irq_type;
+ int num_regions;
+ struct vfio_pci_region *region;
u8 msi_qmax;
u8 msix_bar;
u16 msix_size;
@@ -91,4 +117,17 @@ extern void vfio_pci_uninit_perm_bits(void);
extern int vfio_config_init(struct vfio_pci_device *vdev);
extern void vfio_config_free(struct vfio_pci_device *vdev);
+
+extern int vfio_pci_register_dev_region(struct vfio_pci_device *vdev,
+ unsigned int type, unsigned int subtype,
+ const struct vfio_pci_regops *ops,
+ size_t size, u32 flags, void *data);
+#ifdef CONFIG_VFIO_PCI_IGD
+extern int vfio_pci_igd_init(struct vfio_pci_device *vdev);
+#else
+static inline int vfio_pci_igd_init(struct vfio_pci_device *vdev)
+{
+ return -ENODEV;
+}
+#endif
#endif /* VFIO_PCI_PRIVATE_H */
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 210db24d2204..5ffd1d9ad4bd 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -124,11 +124,14 @@ ssize_t vfio_pci_bar_rw(struct vfio_pci_device *vdev, char __user *buf,
void __iomem *io;
ssize_t done;
- if (!pci_resource_start(pdev, bar))
+ if (pci_resource_start(pdev, bar))
+ end = pci_resource_len(pdev, bar);
+ else if (bar == PCI_ROM_RESOURCE &&
+ pdev->resource[bar].flags & IORESOURCE_ROM_SHADOW)
+ end = 0x20000;
+ else
return -EINVAL;
- end = pci_resource_len(pdev, bar);
-
if (pos >= end)
return -EINVAL;
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index ecca316386f5..6fd6fa5469de 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -1080,30 +1080,26 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
continue;
}
- /* module reference holds the driver we're working on */
- mutex_unlock(&vfio.iommu_drivers_lock);
-
data = driver->ops->open(arg);
if (IS_ERR(data)) {
ret = PTR_ERR(data);
module_put(driver->ops->owner);
- goto skip_drivers_unlock;
+ continue;
}
ret = __vfio_container_attach_groups(container, driver, data);
- if (!ret) {
- container->iommu_driver = driver;
- container->iommu_data = data;
- } else {
+ if (ret) {
driver->ops->release(data);
module_put(driver->ops->owner);
+ continue;
}
- goto skip_drivers_unlock;
+ container->iommu_driver = driver;
+ container->iommu_data = data;
+ break;
}
mutex_unlock(&vfio.iommu_drivers_lock);
-skip_drivers_unlock:
up_write(&container->group_lock);
return ret;
@@ -1733,6 +1729,60 @@ long vfio_external_check_extension(struct vfio_group *group, unsigned long arg)
EXPORT_SYMBOL_GPL(vfio_external_check_extension);
/**
+ * Sub-module support
+ */
+/*
+ * Helper for managing a buffer of info chain capabilities, allocate or
+ * reallocate a buffer with additional @size, filling in @id and @version
+ * of the capability. A pointer to the new capability is returned.
+ *
+ * NB. The chain is based at the head of the buffer, so new entries are
+ * added to the tail, vfio_info_cap_shift() should be called to fixup the
+ * next offsets prior to copying to the user buffer.
+ */
+struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
+ size_t size, u16 id, u16 version)
+{
+ void *buf;
+ struct vfio_info_cap_header *header, *tmp;
+
+ buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
+ if (!buf) {
+ kfree(caps->buf);
+ caps->size = 0;
+ return ERR_PTR(-ENOMEM);
+ }
+
+ caps->buf = buf;
+ header = buf + caps->size;
+
+ /* Eventually copied to user buffer, zero */
+ memset(header, 0, size);
+
+ header->id = id;
+ header->version = version;
+
+ /* Add to the end of the capability chain */
+ for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next)
+ ; /* nothing */
+
+ tmp->next = caps->size;
+ caps->size += size;
+
+ return header;
+}
+EXPORT_SYMBOL_GPL(vfio_info_cap_add);
+
+void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
+{
+ struct vfio_info_cap_header *tmp;
+
+ for (tmp = caps->buf; tmp->next; tmp = (void *)tmp + tmp->next - offset)
+ tmp->next += offset;
+}
+EXPORT_SYMBOL_GPL(vfio_info_cap_shift);
+
+/**
* Module/class support
*/
static char *vfio_devnode(struct device *dev, umode_t *mode)
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 610a86a892b8..0ecae0b1cd34 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -92,6 +92,17 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group);
extern long vfio_external_check_extension(struct vfio_group *group,
unsigned long arg);
+/*
+ * Sub-module helpers
+ */
+struct vfio_info_cap {
+ struct vfio_info_cap_header *buf;
+ size_t size;
+};
+extern struct vfio_info_cap_header *vfio_info_cap_add(
+ struct vfio_info_cap *caps, size_t size, u16 id, u16 version);
+extern void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset);
+
struct pci_dev;
#ifdef CONFIG_EEH
extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 7d7a4c6f2090..255a2113f53c 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -59,6 +59,33 @@
#define VFIO_TYPE (';')
#define VFIO_BASE 100
+/*
+ * For extension of INFO ioctls, VFIO makes use of a capability chain
+ * designed after PCI/e capabilities. A flag bit indicates whether
+ * this capability chain is supported and a field defined in the fixed
+ * structure defines the offset of the first capability in the chain.
+ * This field is only valid when the corresponding bit in the flags
+ * bitmap is set. This offset field is relative to the start of the
+ * INFO buffer, as is the next field within each capability header.
+ * The id within the header is a shared address space per INFO ioctl,
+ * while the version field is specific to the capability id. The
+ * contents following the header are specific to the capability id.
+ */
+struct vfio_info_cap_header {
+ __u16 id; /* Identifies capability */
+ __u16 version; /* Version specific to the capability ID */
+ __u32 next; /* Offset of next capability */
+};
+
+/*
+ * Callers of INFO ioctls passing insufficiently sized buffers will see
+ * the capability chain flag bit set, a zero value for the first capability
+ * offset (if available within the provided argsz), and argsz will be
+ * updated to report the necessary buffer size. For compatibility, the
+ * INFO ioctl will not report error in this case, but the capability chain
+ * will not be available.
+ */
+
/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
/**
@@ -194,13 +221,73 @@ struct vfio_region_info {
#define VFIO_REGION_INFO_FLAG_READ (1 << 0) /* Region supports read */
#define VFIO_REGION_INFO_FLAG_WRITE (1 << 1) /* Region supports write */
#define VFIO_REGION_INFO_FLAG_MMAP (1 << 2) /* Region supports mmap */
+#define VFIO_REGION_INFO_FLAG_CAPS (1 << 3) /* Info supports caps */
__u32 index; /* Region index */
- __u32 resv; /* Reserved for alignment */
+ __u32 cap_offset; /* Offset within info struct of first cap */
__u64 size; /* Region size (bytes) */
__u64 offset; /* Region offset from start of device fd */
};
#define VFIO_DEVICE_GET_REGION_INFO _IO(VFIO_TYPE, VFIO_BASE + 8)
+/*
+ * The sparse mmap capability allows finer granularity of specifying areas
+ * within a region with mmap support. When specified, the user should only
+ * mmap the offset ranges specified by the areas array. mmaps outside of the
+ * areas specified may fail (such as the range covering a PCI MSI-X table) or
+ * may result in improper device behavior.
+ *
+ * The structures below define version 1 of this capability.
+ */
+#define VFIO_REGION_INFO_CAP_SPARSE_MMAP 1
+
+struct vfio_region_sparse_mmap_area {
+ __u64 offset; /* Offset of mmap'able area within region */
+ __u64 size; /* Size of mmap'able area */
+};
+
+struct vfio_region_info_cap_sparse_mmap {
+ struct vfio_info_cap_header header;
+ __u32 nr_areas;
+ __u32 reserved;
+ struct vfio_region_sparse_mmap_area areas[];
+};
+
+/*
+ * The device specific type capability allows regions unique to a specific
+ * device or class of devices to be exposed. This helps solve the problem for
+ * vfio bus drivers of defining which region indexes correspond to which region
+ * on the device, without needing to resort to static indexes, as done by
+ * vfio-pci. For instance, if we were to go back in time, we might remove
+ * VFIO_PCI_VGA_REGION_INDEX and let vfio-pci simply define that all indexes
+ * greater than or equal to VFIO_PCI_NUM_REGIONS are device specific and we'd
+ * make a "VGA" device specific type to describe the VGA access space. This
+ * means that non-VGA devices wouldn't need to waste this index, and thus the
+ * address space associated with it due to implementation of device file
+ * descriptor offsets in vfio-pci.
+ *
+ * The current implementation is now part of the user ABI, so we can't use this
+ * for VGA, but there are other upcoming use cases, such as opregions for Intel
+ * IGD devices and framebuffers for vGPU devices. We missed VGA, but we'll
+ * use this for future additions.
+ *
+ * The structure below defines version 1 of this capability.
+ */
+#define VFIO_REGION_INFO_CAP_TYPE 2
+
+struct vfio_region_info_cap_type {
+ struct vfio_info_cap_header header;
+ __u32 type; /* global per bus driver */
+ __u32 subtype; /* type specific */
+};
+
+#define VFIO_REGION_TYPE_PCI_VENDOR_TYPE (1 << 31)
+#define VFIO_REGION_TYPE_PCI_VENDOR_MASK (0xffff)
+
+/* 8086 Vendor sub-types */
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION (1)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG (2)
+#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
+
/**
* VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
* struct vfio_irq_info)
@@ -336,7 +423,8 @@ enum {
* between described ranges are unimplemented.
*/
VFIO_PCI_VGA_REGION_INDEX,
- VFIO_PCI_NUM_REGIONS
+ VFIO_PCI_NUM_REGIONS = 9 /* Fixed user ABI, region indexes >=9 use */
+ /* device specific cap to define content. */
};
enum {