aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vfio/vfio_main.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--drivers/vfio/vfio_main.c1392
1 files changed, 566 insertions, 826 deletions
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 7cb56c382c97..2d168793d4e1 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -32,6 +32,9 @@
#include <linux/vfio.h>
#include <linux/wait.h>
#include <linux/sched/signal.h>
+#include <linux/pm_runtime.h>
+#include <linux/interval_tree.h>
+#include <linux/iova_bitmap.h>
#include "vfio.h"
#define DRIVER_VERSION "0.3"
@@ -40,54 +43,14 @@
static struct vfio {
struct class *class;
- struct list_head iommu_drivers_list;
- struct mutex iommu_drivers_lock;
struct list_head group_list;
struct mutex group_lock; /* locks group_list */
struct ida group_ida;
dev_t group_devt;
+ struct class *device_class;
+ struct ida device_ida;
} vfio;
-struct vfio_iommu_driver {
- const struct vfio_iommu_driver_ops *ops;
- struct list_head vfio_next;
-};
-
-struct vfio_container {
- struct kref kref;
- struct list_head group_list;
- struct rw_semaphore group_lock;
- struct vfio_iommu_driver *iommu_driver;
- void *iommu_data;
- bool noiommu;
-};
-
-struct vfio_group {
- struct device dev;
- struct cdev cdev;
- refcount_t users;
- unsigned int container_users;
- struct iommu_group *iommu_group;
- struct vfio_container *container;
- struct list_head device_list;
- struct mutex device_lock;
- struct list_head vfio_next;
- struct list_head container_next;
- enum vfio_group_type type;
- unsigned int dev_counter;
- struct rw_semaphore group_rwsem;
- struct kvm *kvm;
- struct file *opened_file;
- struct blocking_notifier_head notifier;
-};
-
-#ifdef CONFIG_VFIO_NOIOMMU
-static bool noiommu __read_mostly;
-module_param_named(enable_unsafe_noiommu_mode,
- noiommu, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
-#endif
-
static DEFINE_XARRAY(vfio_device_set_xa);
static const struct file_operations vfio_group_fops;
@@ -162,146 +125,6 @@ static void vfio_release_device_set(struct vfio_device *device)
xa_unlock(&vfio_device_set_xa);
}
-#ifdef CONFIG_VFIO_NOIOMMU
-static void *vfio_noiommu_open(unsigned long arg)
-{
- if (arg != VFIO_NOIOMMU_IOMMU)
- return ERR_PTR(-EINVAL);
- if (!capable(CAP_SYS_RAWIO))
- return ERR_PTR(-EPERM);
-
- return NULL;
-}
-
-static void vfio_noiommu_release(void *iommu_data)
-{
-}
-
-static long vfio_noiommu_ioctl(void *iommu_data,
- unsigned int cmd, unsigned long arg)
-{
- if (cmd == VFIO_CHECK_EXTENSION)
- return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
-
- return -ENOTTY;
-}
-
-static int vfio_noiommu_attach_group(void *iommu_data,
- struct iommu_group *iommu_group, enum vfio_group_type type)
-{
- return 0;
-}
-
-static void vfio_noiommu_detach_group(void *iommu_data,
- struct iommu_group *iommu_group)
-{
-}
-
-static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
- .name = "vfio-noiommu",
- .owner = THIS_MODULE,
- .open = vfio_noiommu_open,
- .release = vfio_noiommu_release,
- .ioctl = vfio_noiommu_ioctl,
- .attach_group = vfio_noiommu_attach_group,
- .detach_group = vfio_noiommu_detach_group,
-};
-
-/*
- * Only noiommu containers can use vfio-noiommu and noiommu containers can only
- * use vfio-noiommu.
- */
-static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
- const struct vfio_iommu_driver *driver)
-{
- return container->noiommu == (driver->ops == &vfio_noiommu_ops);
-}
-#else
-static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
- const struct vfio_iommu_driver *driver)
-{
- return true;
-}
-#endif /* CONFIG_VFIO_NOIOMMU */
-
-/*
- * IOMMU driver registration
- */
-int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
-{
- struct vfio_iommu_driver *driver, *tmp;
-
- if (WARN_ON(!ops->register_device != !ops->unregister_device))
- return -EINVAL;
-
- driver = kzalloc(sizeof(*driver), GFP_KERNEL);
- if (!driver)
- return -ENOMEM;
-
- driver->ops = ops;
-
- mutex_lock(&vfio.iommu_drivers_lock);
-
- /* Check for duplicates */
- list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
- if (tmp->ops == ops) {
- mutex_unlock(&vfio.iommu_drivers_lock);
- kfree(driver);
- return -EINVAL;
- }
- }
-
- list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
-
- mutex_unlock(&vfio.iommu_drivers_lock);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
-
-void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
-{
- struct vfio_iommu_driver *driver;
-
- mutex_lock(&vfio.iommu_drivers_lock);
- list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
- if (driver->ops == ops) {
- list_del(&driver->vfio_next);
- mutex_unlock(&vfio.iommu_drivers_lock);
- kfree(driver);
- return;
- }
- }
- mutex_unlock(&vfio.iommu_drivers_lock);
-}
-EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
-
-static void vfio_group_get(struct vfio_group *group);
-
-/*
- * Container objects - containers are created when /dev/vfio/vfio is
- * opened, but their lifecycle extends until the last user is done, so
- * it's freed via kref. Must support container/group/device being
- * closed in any order.
- */
-static void vfio_container_get(struct vfio_container *container)
-{
- kref_get(&container->kref);
-}
-
-static void vfio_container_release(struct kref *kref)
-{
- struct vfio_container *container;
- container = container_of(kref, struct vfio_container, kref);
-
- kfree(container);
-}
-
-static void vfio_container_put(struct vfio_container *container)
-{
- kref_put(&container->kref, vfio_container_release);
-}
-
/*
* Group objects - create, release, get, put, search
*/
@@ -310,9 +133,13 @@ __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
{
struct vfio_group *group;
+ /*
+ * group->iommu_group from the vfio.group_list cannot be NULL
+ * under the vfio.group_lock.
+ */
list_for_each_entry(group, &vfio.group_list, vfio_next) {
if (group->iommu_group == iommu_group) {
- vfio_group_get(group);
+ refcount_inc(&group->drivers);
return group;
}
}
@@ -335,7 +162,8 @@ static void vfio_group_release(struct device *dev)
struct vfio_group *group = container_of(dev, struct vfio_group, dev);
mutex_destroy(&group->device_lock);
- iommu_group_put(group->iommu_group);
+ mutex_destroy(&group->group_lock);
+ WARN_ON(group->iommu_group);
ida_free(&vfio.group_ida, MINOR(group->dev.devt));
kfree(group);
}
@@ -363,8 +191,8 @@ static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
cdev_init(&group->cdev, &vfio_group_fops);
group->cdev.owner = THIS_MODULE;
- refcount_set(&group->users, 1);
- init_rwsem(&group->group_rwsem);
+ refcount_set(&group->drivers, 1);
+ mutex_init(&group->group_lock);
INIT_LIST_HEAD(&group->device_list);
mutex_init(&group->device_lock);
group->iommu_group = iommu_group;
@@ -420,44 +248,64 @@ err_put:
return ret;
}
-static void vfio_group_put(struct vfio_group *group)
+static void vfio_device_remove_group(struct vfio_device *device)
{
- if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
+ struct vfio_group *group = device->group;
+ struct iommu_group *iommu_group;
+
+ if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
+ iommu_group_remove_device(device->dev);
+
+ /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */
+ if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock))
return;
+ list_del(&group->vfio_next);
/*
+ * We could concurrently probe another driver in the group that might
+ * race vfio_device_remove_group() with vfio_get_group(), so we have to
+ * ensure that the sysfs is all cleaned up under lock otherwise the
+ * cdev_device_add() will fail due to the name aready existing.
+ */
+ cdev_device_del(&group->cdev, &group->dev);
+
+ mutex_lock(&group->group_lock);
+ /*
* These data structures all have paired operations that can only be
- * undone when the caller holds a live reference on the group. Since all
- * pairs must be undone these WARN_ON's indicate some caller did not
+ * undone when the caller holds a live reference on the device. Since
+ * all pairs must be undone these WARN_ON's indicate some caller did not
* properly hold the group reference.
*/
WARN_ON(!list_empty(&group->device_list));
- WARN_ON(group->container || group->container_users);
WARN_ON(group->notifier.head);
- list_del(&group->vfio_next);
- cdev_device_del(&group->cdev, &group->dev);
+ /*
+ * Revoke all users of group->iommu_group. At this point we know there
+ * are no devices active because we are unplugging the last one. Setting
+ * iommu_group to NULL blocks all new users.
+ */
+ if (group->container)
+ vfio_group_detach_container(group);
+ iommu_group = group->iommu_group;
+ group->iommu_group = NULL;
+ mutex_unlock(&group->group_lock);
mutex_unlock(&vfio.group_lock);
+ iommu_group_put(iommu_group);
put_device(&group->dev);
}
-static void vfio_group_get(struct vfio_group *group)
-{
- refcount_inc(&group->users);
-}
-
/*
* Device objects - create, release, get, put, search
*/
/* Device reference always implies a group reference */
-static void vfio_device_put(struct vfio_device *device)
+static void vfio_device_put_registration(struct vfio_device *device)
{
if (refcount_dec_and_test(&device->refcount))
complete(&device->comp);
}
-static bool vfio_device_try_get(struct vfio_device *device)
+static bool vfio_device_try_get_registration(struct vfio_device *device)
{
return refcount_inc_not_zero(&device->refcount);
}
@@ -469,7 +317,8 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
mutex_lock(&group->device_lock);
list_for_each_entry(device, &group->device_list, group_next) {
- if (device->dev == dev && vfio_device_try_get(device)) {
+ if (device->dev == dev &&
+ vfio_device_try_get_registration(device)) {
mutex_unlock(&group->device_lock);
return device;
}
@@ -481,20 +330,110 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
/*
* VFIO driver API
*/
-void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
- const struct vfio_device_ops *ops)
+/* Release helper called by vfio_put_device() */
+static void vfio_device_release(struct device *dev)
+{
+ struct vfio_device *device =
+ container_of(dev, struct vfio_device, device);
+
+ vfio_release_device_set(device);
+ ida_free(&vfio.device_ida, device->index);
+
+ /*
+ * kvfree() cannot be done here due to a life cycle mess in
+ * vfio-ccw. Before the ccw part is fixed all drivers are
+ * required to support @release and call vfio_free_device()
+ * from there.
+ */
+ device->ops->release(device);
+}
+
+/*
+ * Allocate and initialize vfio_device so it can be registered to vfio
+ * core.
+ *
+ * Drivers should use the wrapper vfio_alloc_device() for allocation.
+ * @size is the size of the structure to be allocated, including any
+ * private data used by the driver.
+ *
+ * Driver may provide an @init callback to cover device private data.
+ *
+ * Use vfio_put_device() to release the structure after success return.
+ */
+struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
+ const struct vfio_device_ops *ops)
{
+ struct vfio_device *device;
+ int ret;
+
+ if (WARN_ON(size < sizeof(struct vfio_device)))
+ return ERR_PTR(-EINVAL);
+
+ device = kvzalloc(size, GFP_KERNEL);
+ if (!device)
+ return ERR_PTR(-ENOMEM);
+
+ ret = vfio_init_device(device, dev, ops);
+ if (ret)
+ goto out_free;
+ return device;
+
+out_free:
+ kvfree(device);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(_vfio_alloc_device);
+
+/*
+ * Initialize a vfio_device so it can be registered to vfio core.
+ *
+ * Only vfio-ccw driver should call this interface.
+ */
+int vfio_init_device(struct vfio_device *device, struct device *dev,
+ const struct vfio_device_ops *ops)
+{
+ int ret;
+
+ ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
+ if (ret < 0) {
+ dev_dbg(dev, "Error to alloc index\n");
+ return ret;
+ }
+
+ device->index = ret;
init_completion(&device->comp);
device->dev = dev;
device->ops = ops;
+
+ if (ops->init) {
+ ret = ops->init(device);
+ if (ret)
+ goto out_uninit;
+ }
+
+ device_initialize(&device->device);
+ device->device.release = vfio_device_release;
+ device->device.class = vfio.device_class;
+ device->device.parent = device->dev;
+ return 0;
+
+out_uninit:
+ vfio_release_device_set(device);
+ ida_free(&vfio.device_ida, device->index);
+ return ret;
}
-EXPORT_SYMBOL_GPL(vfio_init_group_dev);
+EXPORT_SYMBOL_GPL(vfio_init_device);
-void vfio_uninit_group_dev(struct vfio_device *device)
+/*
+ * The helper called by driver @release callback to free the device
+ * structure. Drivers which don't have private data to clean can
+ * simply use this helper as its @release.
+ */
+void vfio_free_device(struct vfio_device *device)
{
- vfio_release_device_set(device);
+ kvfree(device);
}
-EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
+EXPORT_SYMBOL_GPL(vfio_free_device);
static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
enum vfio_group_type type)
@@ -535,8 +474,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
struct vfio_group *group;
iommu_group = iommu_group_get(dev);
-#ifdef CONFIG_VFIO_NOIOMMU
- if (!iommu_group && noiommu) {
+ if (!iommu_group && vfio_noiommu) {
/*
* With noiommu enabled, create an IOMMU group for devices that
* don't already have one, implying no IOMMU hardware/driver
@@ -550,7 +488,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
}
return group;
}
-#endif
+
if (!iommu_group)
return ERR_PTR(-EINVAL);
@@ -577,7 +515,12 @@ static int __vfio_register_dev(struct vfio_device *device,
struct vfio_group *group)
{
struct vfio_device *existing_device;
+ int ret;
+ /*
+ * In all cases group is the output of one of the group allocation
+ * functions and we have group->drivers incremented for us.
+ */
if (IS_ERR(group))
return PTR_ERR(group);
@@ -590,28 +533,39 @@ static int __vfio_register_dev(struct vfio_device *device,
existing_device = vfio_group_get_device(group, device->dev);
if (existing_device) {
+ /*
+ * group->iommu_group is non-NULL because we hold the drivers
+ * refcount.
+ */
dev_WARN(device->dev, "Device already exists on group %d\n",
iommu_group_id(group->iommu_group));
- vfio_device_put(existing_device);
- if (group->type == VFIO_NO_IOMMU ||
- group->type == VFIO_EMULATED_IOMMU)
- iommu_group_remove_device(device->dev);
- vfio_group_put(group);
- return -EBUSY;
+ vfio_device_put_registration(existing_device);
+ ret = -EBUSY;
+ goto err_out;
}
/* Our reference on group is moved to the device */
device->group = group;
+ ret = dev_set_name(&device->device, "vfio%d", device->index);
+ if (ret)
+ goto err_out;
+
+ ret = device_add(&device->device);
+ if (ret)
+ goto err_out;
+
/* Refcounting can't start until the driver calls register */
refcount_set(&device->refcount, 1);
mutex_lock(&group->device_lock);
list_add(&device->group_next, &group->device_list);
- group->dev_counter++;
mutex_unlock(&group->device_lock);
return 0;
+err_out:
+ vfio_device_remove_group(device);
+ return ret;
}
int vfio_register_group_dev(struct vfio_device *device)
@@ -651,7 +605,7 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
ret = !strcmp(dev_name(it->dev), buf);
}
- if (ret && vfio_device_try_get(it)) {
+ if (ret && vfio_device_try_get_registration(it)) {
device = it;
break;
}
@@ -671,7 +625,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
bool interrupted = false;
long rc;
- vfio_device_put(device);
+ vfio_device_put_registration(device);
rc = try_wait_for_completion(&device->comp);
while (rc <= 0) {
if (device->ops->request)
@@ -696,356 +650,78 @@ void vfio_unregister_group_dev(struct vfio_device *device)
mutex_lock(&group->device_lock);
list_del(&device->group_next);
- group->dev_counter--;
mutex_unlock(&group->device_lock);
- if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
- iommu_group_remove_device(device->dev);
+ /* Balances device_add in register path */
+ device_del(&device->device);
- /* Matches the get in vfio_register_group_dev() */
- vfio_group_put(group);
+ vfio_device_remove_group(device);
}
EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
/*
- * VFIO base fd, /dev/vfio/vfio
- */
-static long vfio_ioctl_check_extension(struct vfio_container *container,
- unsigned long arg)
-{
- struct vfio_iommu_driver *driver;
- long ret = 0;
-
- down_read(&container->group_lock);
-
- driver = container->iommu_driver;
-
- switch (arg) {
- /* No base extensions yet */
- default:
- /*
- * If no driver is set, poll all registered drivers for
- * extensions and return the first positive result. If
- * a driver is already set, further queries will be passed
- * only to that driver.
- */
- if (!driver) {
- mutex_lock(&vfio.iommu_drivers_lock);
- list_for_each_entry(driver, &vfio.iommu_drivers_list,
- vfio_next) {
-
- if (!list_empty(&container->group_list) &&
- !vfio_iommu_driver_allowed(container,
- driver))
- continue;
- if (!try_module_get(driver->ops->owner))
- continue;
-
- ret = driver->ops->ioctl(NULL,
- VFIO_CHECK_EXTENSION,
- arg);
- module_put(driver->ops->owner);
- if (ret > 0)
- break;
- }
- mutex_unlock(&vfio.iommu_drivers_lock);
- } else
- ret = driver->ops->ioctl(container->iommu_data,
- VFIO_CHECK_EXTENSION, arg);
- }
-
- up_read(&container->group_lock);
-
- return ret;
-}
-
-/* hold write lock on container->group_lock */
-static int __vfio_container_attach_groups(struct vfio_container *container,
- struct vfio_iommu_driver *driver,
- void *data)
-{
- struct vfio_group *group;
- int ret = -ENODEV;
-
- list_for_each_entry(group, &container->group_list, container_next) {
- ret = driver->ops->attach_group(data, group->iommu_group,
- group->type);
- if (ret)
- goto unwind;
- }
-
- return ret;
-
-unwind:
- list_for_each_entry_continue_reverse(group, &container->group_list,
- container_next) {
- driver->ops->detach_group(data, group->iommu_group);
- }
-
- return ret;
-}
-
-static long vfio_ioctl_set_iommu(struct vfio_container *container,
- unsigned long arg)
-{
- struct vfio_iommu_driver *driver;
- long ret = -ENODEV;
-
- down_write(&container->group_lock);
-
- /*
- * The container is designed to be an unprivileged interface while
- * the group can be assigned to specific users. Therefore, only by
- * adding a group to a container does the user get the privilege of
- * enabling the iommu, which may allocate finite resources. There
- * is no unset_iommu, but by removing all the groups from a container,
- * the container is deprivileged and returns to an unset state.
- */
- if (list_empty(&container->group_list) || container->iommu_driver) {
- up_write(&container->group_lock);
- return -EINVAL;
- }
-
- mutex_lock(&vfio.iommu_drivers_lock);
- list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
- void *data;
-
- if (!vfio_iommu_driver_allowed(container, driver))
- continue;
- if (!try_module_get(driver->ops->owner))
- continue;
-
- /*
- * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
- * so test which iommu driver reported support for this
- * extension and call open on them. We also pass them the
- * magic, allowing a single driver to support multiple
- * interfaces if they'd like.
- */
- if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
- module_put(driver->ops->owner);
- continue;
- }
-
- data = driver->ops->open(arg);
- if (IS_ERR(data)) {
- ret = PTR_ERR(data);
- module_put(driver->ops->owner);
- continue;
- }
-
- ret = __vfio_container_attach_groups(container, driver, data);
- if (ret) {
- driver->ops->release(data);
- module_put(driver->ops->owner);
- continue;
- }
-
- container->iommu_driver = driver;
- container->iommu_data = data;
- break;
- }
-
- mutex_unlock(&vfio.iommu_drivers_lock);
- up_write(&container->group_lock);
-
- return ret;
-}
-
-static long vfio_fops_unl_ioctl(struct file *filep,
- unsigned int cmd, unsigned long arg)
-{
- struct vfio_container *container = filep->private_data;
- struct vfio_iommu_driver *driver;
- void *data;
- long ret = -EINVAL;
-
- if (!container)
- return ret;
-
- switch (cmd) {
- case VFIO_GET_API_VERSION:
- ret = VFIO_API_VERSION;
- break;
- case VFIO_CHECK_EXTENSION:
- ret = vfio_ioctl_check_extension(container, arg);
- break;
- case VFIO_SET_IOMMU:
- ret = vfio_ioctl_set_iommu(container, arg);
- break;
- default:
- driver = container->iommu_driver;
- data = container->iommu_data;
-
- if (driver) /* passthrough all unrecognized ioctls */
- ret = driver->ops->ioctl(data, cmd, arg);
- }
-
- return ret;
-}
-
-static int vfio_fops_open(struct inode *inode, struct file *filep)
-{
- struct vfio_container *container;
-
- container = kzalloc(sizeof(*container), GFP_KERNEL);
- if (!container)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&container->group_list);
- init_rwsem(&container->group_lock);
- kref_init(&container->kref);
-
- filep->private_data = container;
-
- return 0;
-}
-
-static int vfio_fops_release(struct inode *inode, struct file *filep)
-{
- struct vfio_container *container = filep->private_data;
- struct vfio_iommu_driver *driver = container->iommu_driver;
-
- if (driver && driver->ops->notify)
- driver->ops->notify(container->iommu_data,
- VFIO_IOMMU_CONTAINER_CLOSE);
-
- filep->private_data = NULL;
-
- vfio_container_put(container);
-
- return 0;
-}
-
-static const struct file_operations vfio_fops = {
- .owner = THIS_MODULE,
- .open = vfio_fops_open,
- .release = vfio_fops_release,
- .unlocked_ioctl = vfio_fops_unl_ioctl,
- .compat_ioctl = compat_ptr_ioctl,
-};
-
-/*
* VFIO Group fd, /dev/vfio/$GROUP
*/
-static void __vfio_group_unset_container(struct vfio_group *group)
-{
- struct vfio_container *container = group->container;
- struct vfio_iommu_driver *driver;
-
- lockdep_assert_held_write(&group->group_rwsem);
-
- down_write(&container->group_lock);
-
- driver = container->iommu_driver;
- if (driver)
- driver->ops->detach_group(container->iommu_data,
- group->iommu_group);
-
- if (group->type == VFIO_IOMMU)
- iommu_group_release_dma_owner(group->iommu_group);
-
- group->container = NULL;
- group->container_users = 0;
- list_del(&group->container_next);
-
- /* Detaching the last group deprivileges a container, remove iommu */
- if (driver && list_empty(&container->group_list)) {
- driver->ops->release(container->iommu_data);
- module_put(driver->ops->owner);
- container->iommu_driver = NULL;
- container->iommu_data = NULL;
- }
-
- up_write(&container->group_lock);
-
- vfio_container_put(container);
-}
-
/*
* VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
* if there was no container to unset. Since the ioctl is called on
* the group, we know that still exists, therefore the only valid
* transition here is 1->0.
*/
-static int vfio_group_unset_container(struct vfio_group *group)
+static int vfio_group_ioctl_unset_container(struct vfio_group *group)
{
- lockdep_assert_held_write(&group->group_rwsem);
+ int ret = 0;
- if (!group->container)
- return -EINVAL;
- if (group->container_users != 1)
- return -EBUSY;
- __vfio_group_unset_container(group);
- return 0;
+ mutex_lock(&group->group_lock);
+ if (!group->container) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (group->container_users != 1) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ vfio_group_detach_container(group);
+
+out_unlock:
+ mutex_unlock(&group->group_lock);
+ return ret;
}
-static int vfio_group_set_container(struct vfio_group *group, int container_fd)
+static int vfio_group_ioctl_set_container(struct vfio_group *group,
+ int __user *arg)
{
- struct fd f;
struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret = 0;
-
- lockdep_assert_held_write(&group->group_rwsem);
-
- if (group->container || WARN_ON(group->container_users))
- return -EINVAL;
+ struct fd f;
+ int ret;
+ int fd;
- if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
- return -EPERM;
+ if (get_user(fd, arg))
+ return -EFAULT;
- f = fdget(container_fd);
+ f = fdget(fd);
if (!f.file)
return -EBADF;
- /* Sanity check, is this really our fd? */
- if (f.file->f_op != &vfio_fops) {
- fdput(f);
- return -EINVAL;
+ mutex_lock(&group->group_lock);
+ if (group->container || WARN_ON(group->container_users)) {
+ ret = -EINVAL;
+ goto out_unlock;
}
-
- container = f.file->private_data;
- WARN_ON(!container); /* fget ensures we don't race vfio_release */
-
- down_write(&container->group_lock);
-
- /* Real groups and fake groups cannot mix */
- if (!list_empty(&container->group_list) &&
- container->noiommu != (group->type == VFIO_NO_IOMMU)) {
- ret = -EPERM;
- goto unlock_out;
- }
-
- if (group->type == VFIO_IOMMU) {
- ret = iommu_group_claim_dma_owner(group->iommu_group, f.file);
- if (ret)
- goto unlock_out;
+ if (!group->iommu_group) {
+ ret = -ENODEV;
+ goto out_unlock;
}
- driver = container->iommu_driver;
- if (driver) {
- ret = driver->ops->attach_group(container->iommu_data,
- group->iommu_group,
- group->type);
- if (ret) {
- if (group->type == VFIO_IOMMU)
- iommu_group_release_dma_owner(
- group->iommu_group);
- goto unlock_out;
- }
+ container = vfio_container_from_file(f.file);
+ ret = -EINVAL;
+ if (container) {
+ ret = vfio_container_attach_group(container, group);
+ goto out_unlock;
}
- group->container = container;
- group->container_users = 1;
- container->noiommu = (group->type == VFIO_NO_IOMMU);
- list_add(&group->container_next, &container->group_list);
-
- /* Get a reference on the container and mark a user within the group */
- vfio_container_get(container);
-
-unlock_out:
- up_write(&container->group_lock);
+out_unlock:
+ mutex_unlock(&group->group_lock);
fdput(f);
return ret;
}
@@ -1053,47 +729,19 @@ unlock_out:
static const struct file_operations vfio_device_fops;
/* true if the vfio_device has open_device() called but not close_device() */
-static bool vfio_assert_device_open(struct vfio_device *device)
+bool vfio_assert_device_open(struct vfio_device *device)
{
return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
}
-static int vfio_device_assign_container(struct vfio_device *device)
-{
- struct vfio_group *group = device->group;
-
- lockdep_assert_held_write(&group->group_rwsem);
-
- if (!group->container || !group->container->iommu_driver ||
- WARN_ON(!group->container_users))
- return -EINVAL;
-
- if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- get_file(group->opened_file);
- group->container_users++;
- return 0;
-}
-
-static void vfio_device_unassign_container(struct vfio_device *device)
-{
- down_write(&device->group->group_rwsem);
- WARN_ON(device->group->container_users <= 1);
- device->group->container_users--;
- fput(device->group->opened_file);
- up_write(&device->group->group_rwsem);
-}
-
static struct file *vfio_device_open(struct vfio_device *device)
{
- struct vfio_iommu_driver *iommu_driver;
struct file *filep;
int ret;
- down_write(&device->group->group_rwsem);
+ mutex_lock(&device->group->group_lock);
ret = vfio_device_assign_container(device);
- up_write(&device->group->group_rwsem);
+ mutex_unlock(&device->group->group_lock);
if (ret)
return ERR_PTR(ret);
@@ -1110,7 +758,7 @@ static struct file *vfio_device_open(struct vfio_device *device)
* lock. If the device driver will use it, it must obtain a
* reference and release it during close_device.
*/
- down_read(&device->group->group_rwsem);
+ mutex_lock(&device->group->group_lock);
device->kvm = device->group->kvm;
if (device->ops->open_device) {
@@ -1118,13 +766,8 @@ static struct file *vfio_device_open(struct vfio_device *device)
if (ret)
goto err_undo_count;
}
-
- iommu_driver = device->group->container->iommu_driver;
- if (iommu_driver && iommu_driver->ops->register_device)
- iommu_driver->ops->register_device(
- device->group->container->iommu_data, device);
-
- up_read(&device->group->group_rwsem);
+ vfio_device_container_register(device);
+ mutex_unlock(&device->group->group_lock);
}
mutex_unlock(&device->dev_set->lock);
@@ -1157,17 +800,14 @@ static struct file *vfio_device_open(struct vfio_device *device)
err_close_device:
mutex_lock(&device->dev_set->lock);
- down_read(&device->group->group_rwsem);
+ mutex_lock(&device->group->group_lock);
if (device->open_count == 1 && device->ops->close_device) {
device->ops->close_device(device);
- iommu_driver = device->group->container->iommu_driver;
- if (iommu_driver && iommu_driver->ops->unregister_device)
- iommu_driver->ops->unregister_device(
- device->group->container->iommu_data, device);
+ vfio_device_container_unregister(device);
}
err_undo_count:
- up_read(&device->group->group_rwsem);
+ mutex_unlock(&device->group->group_lock);
device->open_count--;
if (device->open_count == 0 && device->kvm)
device->kvm = NULL;
@@ -1178,14 +818,21 @@ err_unassign_container:
return ERR_PTR(ret);
}
-static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
+static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
+ char __user *arg)
{
struct vfio_device *device;
struct file *filep;
+ char *buf;
int fdno;
int ret;
+ buf = strndup_user(arg, PAGE_SIZE);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
device = vfio_device_get_from_name(group, buf);
+ kfree(buf);
if (IS_ERR(device))
return PTR_ERR(device);
@@ -1207,81 +854,60 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
err_put_fdno:
put_unused_fd(fdno);
err_put_device:
- vfio_device_put(device);
+ vfio_device_put_registration(device);
return ret;
}
-static long vfio_group_fops_unl_ioctl(struct file *filep,
- unsigned int cmd, unsigned long arg)
+static int vfio_group_ioctl_get_status(struct vfio_group *group,
+ struct vfio_group_status __user *arg)
{
- struct vfio_group *group = filep->private_data;
- long ret = -ENOTTY;
+ unsigned long minsz = offsetofend(struct vfio_group_status, flags);
+ struct vfio_group_status status;
- switch (cmd) {
- case VFIO_GROUP_GET_STATUS:
- {
- struct vfio_group_status status;
- unsigned long minsz;
+ if (copy_from_user(&status, arg, minsz))
+ return -EFAULT;
- minsz = offsetofend(struct vfio_group_status, flags);
+ if (status.argsz < minsz)
+ return -EINVAL;
- if (copy_from_user(&status, (void __user *)arg, minsz))
- return -EFAULT;
+ status.flags = 0;
- if (status.argsz < minsz)
- return -EINVAL;
+ mutex_lock(&group->group_lock);
+ if (!group->iommu_group) {
+ mutex_unlock(&group->group_lock);
+ return -ENODEV;
+ }
- status.flags = 0;
+ if (group->container)
+ status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
+ VFIO_GROUP_FLAGS_VIABLE;
+ else if (!iommu_group_dma_owner_claimed(group->iommu_group))
+ status.flags |= VFIO_GROUP_FLAGS_VIABLE;
+ mutex_unlock(&group->group_lock);
- down_read(&group->group_rwsem);
- if (group->container)
- status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
- VFIO_GROUP_FLAGS_VIABLE;
- else if (!iommu_group_dma_owner_claimed(group->iommu_group))
- status.flags |= VFIO_GROUP_FLAGS_VIABLE;
- up_read(&group->group_rwsem);
+ if (copy_to_user(arg, &status, minsz))
+ return -EFAULT;
+ return 0;
+}
- if (copy_to_user((void __user *)arg, &status, minsz))
- return -EFAULT;
+static long vfio_group_fops_unl_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_group *group = filep->private_data;
+ void __user *uarg = (void __user *)arg;
- ret = 0;
- break;
- }
+ switch (cmd) {
+ case VFIO_GROUP_GET_DEVICE_FD:
+ return vfio_group_ioctl_get_device_fd(group, uarg);
+ case VFIO_GROUP_GET_STATUS:
+ return vfio_group_ioctl_get_status(group, uarg);
case VFIO_GROUP_SET_CONTAINER:
- {
- int fd;
-
- if (get_user(fd, (int __user *)arg))
- return -EFAULT;
-
- if (fd < 0)
- return -EINVAL;
-
- down_write(&group->group_rwsem);
- ret = vfio_group_set_container(group, fd);
- up_write(&group->group_rwsem);
- break;
- }
+ return vfio_group_ioctl_set_container(group, uarg);
case VFIO_GROUP_UNSET_CONTAINER:
- down_write(&group->group_rwsem);
- ret = vfio_group_unset_container(group);
- up_write(&group->group_rwsem);
- break;
- case VFIO_GROUP_GET_DEVICE_FD:
- {
- char *buf;
-
- buf = strndup_user((const char __user *)arg, PAGE_SIZE);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
-
- ret = vfio_group_get_device_fd(group, buf);
- kfree(buf);
- break;
- }
+ return vfio_group_ioctl_unset_container(group);
+ default:
+ return -ENOTTY;
}
-
- return ret;
}
static int vfio_group_fops_open(struct inode *inode, struct file *filep)
@@ -1290,17 +916,20 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
container_of(inode->i_cdev, struct vfio_group, cdev);
int ret;
- down_write(&group->group_rwsem);
+ mutex_lock(&group->group_lock);
- /* users can be zero if this races with vfio_group_put() */
- if (!refcount_inc_not_zero(&group->users)) {
+ /*
+ * drivers can be zero if this races with vfio_device_remove_group(), it
+ * will be stable at 0 under the group rwsem
+ */
+ if (refcount_read(&group->drivers) == 0) {
ret = -ENODEV;
- goto err_unlock;
+ goto out_unlock;
}
if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
ret = -EPERM;
- goto err_put;
+ goto out_unlock;
}
/*
@@ -1308,17 +937,13 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
*/
if (group->opened_file) {
ret = -EBUSY;
- goto err_put;
+ goto out_unlock;
}
group->opened_file = filep;
filep->private_data = group;
-
- up_write(&group->group_rwsem);
- return 0;
-err_put:
- vfio_group_put(group);
-err_unlock:
- up_write(&group->group_rwsem);
+ ret = 0;
+out_unlock:
+ mutex_unlock(&group->group_lock);
return ret;
}
@@ -1328,21 +953,16 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
filep->private_data = NULL;
- down_write(&group->group_rwsem);
+ mutex_lock(&group->group_lock);
/*
* Device FDs hold a group file reference, therefore the group release
* is only called when there are no open devices.
*/
WARN_ON(group->notifier.head);
- if (group->container) {
- WARN_ON(group->container_users != 1);
- __vfio_group_unset_container(group);
- }
+ if (group->container)
+ vfio_group_detach_container(group);
group->opened_file = NULL;
- up_write(&group->group_rwsem);
-
- vfio_group_put(group);
-
+ mutex_unlock(&group->group_lock);
return 0;
}
@@ -1355,24 +975,53 @@ static const struct file_operations vfio_group_fops = {
};
/*
+ * Wrapper around pm_runtime_resume_and_get().
+ * Return error code on failure or 0 on success.
+ */
+static inline int vfio_device_pm_runtime_get(struct vfio_device *device)
+{
+ struct device *dev = device->dev;
+
+ if (dev->driver && dev->driver->pm) {
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret) {
+ dev_info_ratelimited(dev,
+ "vfio: runtime resume failed %d\n", ret);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Wrapper around pm_runtime_put().
+ */
+static inline void vfio_device_pm_runtime_put(struct vfio_device *device)
+{
+ struct device *dev = device->dev;
+
+ if (dev->driver && dev->driver->pm)
+ pm_runtime_put(dev);
+}
+
+/*
* VFIO Device fd
*/
static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
- struct vfio_iommu_driver *iommu_driver;
mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
- down_read(&device->group->group_rwsem);
+ mutex_lock(&device->group->group_lock);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
- iommu_driver = device->group->container->iommu_driver;
- if (iommu_driver && iommu_driver->ops->unregister_device)
- iommu_driver->ops->unregister_device(
- device->group->container->iommu_data, device);
- up_read(&device->group->group_rwsem);
+ vfio_device_container_unregister(device);
+ mutex_unlock(&device->group->group_lock);
device->open_count--;
if (device->open_count == 0)
device->kvm = NULL;
@@ -1382,7 +1031,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
vfio_device_unassign_container(device);
- vfio_device_put(device);
+ vfio_device_put_registration(device);
return 0;
}
@@ -1628,6 +1277,167 @@ static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
return 0;
}
+/* Ranges should fit into a single kernel page */
+#define LOG_MAX_RANGES \
+ (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
+
+static int
+vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ size_t minsz =
+ offsetofend(struct vfio_device_feature_dma_logging_control,
+ ranges);
+ struct vfio_device_feature_dma_logging_range __user *ranges;
+ struct vfio_device_feature_dma_logging_control control;
+ struct vfio_device_feature_dma_logging_range range;
+ struct rb_root_cached root = RB_ROOT_CACHED;
+ struct interval_tree_node *nodes;
+ u64 iova_end;
+ u32 nnodes;
+ int i, ret;
+
+ if (!device->log_ops)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_SET,
+ sizeof(control));
+ if (ret != 1)
+ return ret;
+
+ if (copy_from_user(&control, arg, minsz))
+ return -EFAULT;
+
+ nnodes = control.num_ranges;
+ if (!nnodes)
+ return -EINVAL;
+
+ if (nnodes > LOG_MAX_RANGES)
+ return -E2BIG;
+
+ ranges = u64_to_user_ptr(control.ranges);
+ nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
+ GFP_KERNEL);
+ if (!nodes)
+ return -ENOMEM;
+
+ for (i = 0; i < nnodes; i++) {
+ if (copy_from_user(&range, &ranges[i], sizeof(range))) {
+ ret = -EFAULT;
+ goto end;
+ }
+ if (!IS_ALIGNED(range.iova, control.page_size) ||
+ !IS_ALIGNED(range.length, control.page_size)) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ if (check_add_overflow(range.iova, range.length, &iova_end) ||
+ iova_end > ULONG_MAX) {
+ ret = -EOVERFLOW;
+ goto end;
+ }
+
+ nodes[i].start = range.iova;
+ nodes[i].last = range.iova + range.length - 1;
+ if (interval_tree_iter_first(&root, nodes[i].start,
+ nodes[i].last)) {
+ /* Range overlapping */
+ ret = -EINVAL;
+ goto end;
+ }
+ interval_tree_insert(nodes + i, &root);
+ }
+
+ ret = device->log_ops->log_start(device, &root, nnodes,
+ &control.page_size);
+ if (ret)
+ goto end;
+
+ if (copy_to_user(arg, &control, sizeof(control))) {
+ ret = -EFAULT;
+ device->log_ops->log_stop(device);
+ }
+
+end:
+ kfree(nodes);
+ return ret;
+}
+
+static int
+vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ int ret;
+
+ if (!device->log_ops)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_SET, 0);
+ if (ret != 1)
+ return ret;
+
+ return device->log_ops->log_stop(device);
+}
+
+static int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
+ unsigned long iova, size_t length,
+ void *opaque)
+{
+ struct vfio_device *device = opaque;
+
+ return device->log_ops->log_read_and_clear(device, iova, length, iter);
+}
+
+static int
+vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ size_t minsz =
+ offsetofend(struct vfio_device_feature_dma_logging_report,
+ bitmap);
+ struct vfio_device_feature_dma_logging_report report;
+ struct iova_bitmap *iter;
+ u64 iova_end;
+ int ret;
+
+ if (!device->log_ops)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_GET,
+ sizeof(report));
+ if (ret != 1)
+ return ret;
+
+ if (copy_from_user(&report, arg, minsz))
+ return -EFAULT;
+
+ if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
+ return -EINVAL;
+
+ if (check_add_overflow(report.iova, report.length, &iova_end) ||
+ iova_end > ULONG_MAX)
+ return -EOVERFLOW;
+
+ iter = iova_bitmap_alloc(report.iova, report.length,
+ report.page_size,
+ u64_to_user_ptr(report.bitmap));
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
+ ret = iova_bitmap_for_each(iter, device,
+ vfio_device_log_read_and_clear);
+
+ iova_bitmap_free(iter);
+ return ret;
+}
+
static int vfio_ioctl_device_feature(struct vfio_device *device,
struct vfio_device_feature __user *arg)
{
@@ -1661,6 +1471,18 @@ static int vfio_ioctl_device_feature(struct vfio_device *device,
return vfio_ioctl_device_feature_mig_device_state(
device, feature.flags, arg->data,
feature.argsz - minsz);
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
+ return vfio_ioctl_device_feature_logging_start(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
+ return vfio_ioctl_device_feature_logging_stop(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
+ return vfio_ioctl_device_feature_logging_report(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
default:
if (unlikely(!device->ops->device_feature))
return -EINVAL;
@@ -1674,15 +1496,27 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
unsigned int cmd, unsigned long arg)
{
struct vfio_device *device = filep->private_data;
+ int ret;
+
+ ret = vfio_device_pm_runtime_get(device);
+ if (ret)
+ return ret;
switch (cmd) {
case VFIO_DEVICE_FEATURE:
- return vfio_ioctl_device_feature(device, (void __user *)arg);
+ ret = vfio_ioctl_device_feature(device, (void __user *)arg);
+ break;
+
default:
if (unlikely(!device->ops->ioctl))
- return -EINVAL;
- return device->ops->ioctl(device, cmd, arg);
+ ret = -EINVAL;
+ else
+ ret = device->ops->ioctl(device, cmd, arg);
+ break;
}
+
+ vfio_device_pm_runtime_put(device);
+ return ret;
}
static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
@@ -1732,19 +1566,42 @@ static const struct file_operations vfio_device_fops = {
* vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
* @file: VFIO group file
*
- * The returned iommu_group is valid as long as a ref is held on the file.
+ * The returned iommu_group is valid as long as a ref is held on the file. This
+ * returns a reference on the group. This function is deprecated, only the SPAPR
+ * path in kvm should call it.
*/
struct iommu_group *vfio_file_iommu_group(struct file *file)
{
struct vfio_group *group = file->private_data;
+ struct iommu_group *iommu_group = NULL;
- if (file->f_op != &vfio_group_fops)
+ if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
return NULL;
- return group->iommu_group;
+
+ if (!vfio_file_is_group(file))
+ return NULL;
+
+ mutex_lock(&group->group_lock);
+ if (group->iommu_group) {
+ iommu_group = group->iommu_group;
+ iommu_group_ref_get(iommu_group);
+ }
+ mutex_unlock(&group->group_lock);
+ return iommu_group;
}
EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
/**
+ * vfio_file_is_group - True if the file is usable with VFIO aPIS
+ * @file: VFIO group file
+ */
+bool vfio_file_is_group(struct file *file)
+{
+ return file->f_op == &vfio_group_fops;
+}
+EXPORT_SYMBOL_GPL(vfio_file_is_group);
+
+/**
* vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
* is always CPU cache coherent
* @file: VFIO group file
@@ -1758,13 +1615,13 @@ bool vfio_file_enforced_coherent(struct file *file)
struct vfio_group *group = file->private_data;
bool ret;
- if (file->f_op != &vfio_group_fops)
+ if (!vfio_file_is_group(file))
return true;
- down_read(&group->group_rwsem);
+ mutex_lock(&group->group_lock);
if (group->container) {
- ret = vfio_ioctl_check_extension(group->container,
- VFIO_DMA_CC_IOMMU);
+ ret = vfio_container_ioctl_check_extension(group->container,
+ VFIO_DMA_CC_IOMMU);
} else {
/*
* Since the coherency state is determined only once a container
@@ -1773,7 +1630,7 @@ bool vfio_file_enforced_coherent(struct file *file)
*/
ret = true;
}
- up_read(&group->group_rwsem);
+ mutex_unlock(&group->group_lock);
return ret;
}
EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
@@ -1790,12 +1647,12 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
{
struct vfio_group *group = file->private_data;
- if (file->f_op != &vfio_group_fops)
+ if (!vfio_file_is_group(file))
return;
- down_write(&group->group_rwsem);
+ mutex_lock(&group->group_lock);
group->kvm = kvm;
- up_write(&group->group_rwsem);
+ mutex_unlock(&group->group_lock);
}
EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
@@ -1810,7 +1667,7 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
{
struct vfio_group *group = file->private_data;
- if (file->f_op != &vfio_group_fops)
+ if (!vfio_file_is_group(file))
return false;
return group == device->group;
@@ -1937,114 +1794,6 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
/*
- * Pin contiguous user pages and return their associated host pages for local
- * domain only.
- * @device [in] : device
- * @iova [in] : starting IOVA of user pages to be pinned.
- * @npage [in] : count of pages to be pinned. This count should not
- * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- * @prot [in] : protection flags
- * @pages[out] : array of host pages
- * Return error or number of pages pinned.
- */
-int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
- int npage, int prot, struct page **pages)
-{
- struct vfio_container *container;
- struct vfio_group *group = device->group;
- struct vfio_iommu_driver *driver;
- int ret;
-
- if (!pages || !npage || !vfio_assert_device_open(device))
- return -EINVAL;
-
- if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
- return -E2BIG;
-
- if (group->dev_counter > 1)
- return -EINVAL;
-
- /* group->container cannot change while a vfio device is open */
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->pin_pages))
- ret = driver->ops->pin_pages(container->iommu_data,
- group->iommu_group, iova,
- npage, prot, pages);
- else
- ret = -ENOTTY;
-
- return ret;
-}
-EXPORT_SYMBOL(vfio_pin_pages);
-
-/*
- * Unpin contiguous host pages for local domain only.
- * @device [in] : device
- * @iova [in] : starting address of user pages to be unpinned.
- * @npage [in] : count of pages to be unpinned. This count should not
- * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- */
-void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
-
- if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
- return;
-
- if (WARN_ON(!vfio_assert_device_open(device)))
- return;
-
- /* group->container cannot change while a vfio device is open */
- container = device->group->container;
- driver = container->iommu_driver;
-
- driver->ops->unpin_pages(container->iommu_data, iova, npage);
-}
-EXPORT_SYMBOL(vfio_unpin_pages);
-
-/*
- * This interface allows the CPUs to perform some sort of virtual DMA on
- * behalf of the device.
- *
- * CPUs read/write from/into a range of IOVAs pointing to user space memory
- * into/from a kernel buffer.
- *
- * As the read/write of user space memory is conducted via the CPUs and is
- * not a real device DMA, it is not necessary to pin the user space memory.
- *
- * @device [in] : VFIO device
- * @iova [in] : base IOVA of a user space buffer
- * @data [in] : pointer to kernel buffer
- * @len [in] : kernel buffer length
- * @write : indicate read or write
- * Return error code on failure or 0 on success.
- */
-int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
- size_t len, bool write)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret = 0;
-
- if (!data || len <= 0 || !vfio_assert_device_open(device))
- return -EINVAL;
-
- /* group->container cannot change while a vfio device is open */
- container = device->group->container;
- driver = container->iommu_driver;
-
- if (likely(driver && driver->ops->dma_rw))
- ret = driver->ops->dma_rw(container->iommu_data,
- iova, data, len, write);
- else
- ret = -ENOTTY;
- return ret;
-}
-EXPORT_SYMBOL(vfio_dma_rw);
-
-/*
* Module/class support
*/
static char *vfio_devnode(struct device *dev, umode_t *mode)
@@ -2052,59 +1801,50 @@ static char *vfio_devnode(struct device *dev, umode_t *mode)
return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
}
-static struct miscdevice vfio_dev = {
- .minor = VFIO_MINOR,
- .name = "vfio",
- .fops = &vfio_fops,
- .nodename = "vfio/vfio",
- .mode = S_IRUGO | S_IWUGO,
-};
-
static int __init vfio_init(void)
{
int ret;
ida_init(&vfio.group_ida);
+ ida_init(&vfio.device_ida);
mutex_init(&vfio.group_lock);
- mutex_init(&vfio.iommu_drivers_lock);
INIT_LIST_HEAD(&vfio.group_list);
- INIT_LIST_HEAD(&vfio.iommu_drivers_list);
- ret = misc_register(&vfio_dev);
- if (ret) {
- pr_err("vfio: misc device register failed\n");
+ ret = vfio_container_init();
+ if (ret)
return ret;
- }
/* /dev/vfio/$GROUP */
vfio.class = class_create(THIS_MODULE, "vfio");
if (IS_ERR(vfio.class)) {
ret = PTR_ERR(vfio.class);
- goto err_class;
+ goto err_group_class;
}
vfio.class->devnode = vfio_devnode;
+ /* /sys/class/vfio-dev/vfioX */
+ vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
+ if (IS_ERR(vfio.device_class)) {
+ ret = PTR_ERR(vfio.device_class);
+ goto err_dev_class;
+ }
+
ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
if (ret)
goto err_alloc_chrdev;
-#ifdef CONFIG_VFIO_NOIOMMU
- ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
-#endif
- if (ret)
- goto err_driver_register;
-
pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
return 0;
-err_driver_register:
- unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
err_alloc_chrdev:
+ class_destroy(vfio.device_class);
+ vfio.device_class = NULL;
+err_dev_class:
class_destroy(vfio.class);
vfio.class = NULL;
-err_class:
- misc_deregister(&vfio_dev);
+err_group_class:
+ vfio_container_cleanup();
return ret;
}
@@ -2112,14 +1852,14 @@ static void __exit vfio_cleanup(void)
{
WARN_ON(!list_empty(&vfio.group_list));
-#ifdef CONFIG_VFIO_NOIOMMU
- vfio_unregister_iommu_driver(&vfio_noiommu_ops);
-#endif
+ ida_destroy(&vfio.device_ida);
ida_destroy(&vfio.group_ida);
unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
+ class_destroy(vfio.device_class);
+ vfio.device_class = NULL;
class_destroy(vfio.class);
+ vfio_container_cleanup();
vfio.class = NULL;
- misc_deregister(&vfio_dev);
xa_destroy(&vfio_device_set_xa);
}