aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vfio/vfio.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/vfio.c')
-rw-r--r--drivers/vfio/vfio.c622
1 files changed, 255 insertions, 367 deletions
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 3c034fe14ccb..82fb75464f92 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -32,6 +32,7 @@
#include <linux/vfio.h>
#include <linux/wait.h>
#include <linux/sched/signal.h>
+#include "vfio.h"
#define DRIVER_VERSION "0.3"
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
@@ -42,9 +43,8 @@ static struct vfio {
struct list_head iommu_drivers_list;
struct mutex iommu_drivers_lock;
struct list_head group_list;
- struct idr group_idr;
- struct mutex group_lock;
- struct cdev group_cdev;
+ struct mutex group_lock; /* locks group_list */
+ struct ida group_ida;
dev_t group_devt;
} vfio;
@@ -68,14 +68,14 @@ struct vfio_unbound_dev {
};
struct vfio_group {
- struct kref kref;
- int minor;
+ struct device dev;
+ struct cdev cdev;
+ refcount_t users;
atomic_t container_users;
struct iommu_group *iommu_group;
struct vfio_container *container;
struct list_head device_list;
struct mutex device_lock;
- struct device *dev;
struct notifier_block nb;
struct list_head vfio_next;
struct list_head container_next;
@@ -83,7 +83,7 @@ struct vfio_group {
struct mutex unbound_lock;
atomic_t opened;
wait_queue_head_t container_q;
- bool noiommu;
+ enum vfio_group_type type;
unsigned int dev_counter;
struct kvm *kvm;
struct blocking_notifier_head notifier;
@@ -97,6 +97,7 @@ MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. Thi
#endif
static DEFINE_XARRAY(vfio_device_set_xa);
+static const struct file_operations vfio_group_fops;
int vfio_assign_device_set(struct vfio_device *device, void *set_id)
{
@@ -169,70 +170,6 @@ static void vfio_release_device_set(struct vfio_device *device)
xa_unlock(&vfio_device_set_xa);
}
-/*
- * vfio_iommu_group_{get,put} are only intended for VFIO bus driver probe
- * and remove functions, any use cases other than acquiring the first
- * reference for the purpose of calling vfio_register_group_dev() or removing
- * that symmetric reference after vfio_unregister_group_dev() should use the raw
- * iommu_group_{get,put} functions. In particular, vfio_iommu_group_put()
- * removes the device from the dummy group and cannot be nested.
- */
-struct iommu_group *vfio_iommu_group_get(struct device *dev)
-{
- struct iommu_group *group;
- int __maybe_unused ret;
-
- group = iommu_group_get(dev);
-
-#ifdef CONFIG_VFIO_NOIOMMU
- /*
- * With noiommu enabled, an IOMMU group will be created for a device
- * that doesn't already have one and doesn't have an iommu_ops on their
- * bus. We set iommudata simply to be able to identify these groups
- * as special use and for reclamation later.
- */
- if (group || !noiommu || iommu_present(dev->bus))
- return group;
-
- group = iommu_group_alloc();
- if (IS_ERR(group))
- return NULL;
-
- iommu_group_set_name(group, "vfio-noiommu");
- iommu_group_set_iommudata(group, &noiommu, NULL);
- ret = iommu_group_add_device(group, dev);
- if (ret) {
- iommu_group_put(group);
- return NULL;
- }
-
- /*
- * Where to taint? At this point we've added an IOMMU group for a
- * device that is not backed by iommu_ops, therefore any iommu_
- * callback using iommu_ops can legitimately Oops. So, while we may
- * be about to give a DMA capable device to a user without IOMMU
- * protection, which is clearly taint-worthy, let's go ahead and do
- * it here.
- */
- add_taint(TAINT_USER, LOCKDEP_STILL_OK);
- dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
-#endif
-
- return group;
-}
-EXPORT_SYMBOL_GPL(vfio_iommu_group_get);
-
-void vfio_iommu_group_put(struct iommu_group *group, struct device *dev)
-{
-#ifdef CONFIG_VFIO_NOIOMMU
- if (iommu_group_get_iommudata(group) == &noiommu)
- iommu_group_remove_device(dev);
-#endif
-
- iommu_group_put(group);
-}
-EXPORT_SYMBOL_GPL(vfio_iommu_group_put);
-
#ifdef CONFIG_VFIO_NOIOMMU
static void *vfio_noiommu_open(unsigned long arg)
{
@@ -258,9 +195,9 @@ static long vfio_noiommu_ioctl(void *iommu_data,
}
static int vfio_noiommu_attach_group(void *iommu_data,
- struct iommu_group *iommu_group)
+ struct iommu_group *iommu_group, enum vfio_group_type type)
{
- return iommu_group_get_iommudata(iommu_group) == &noiommu ? 0 : -EINVAL;
+ return 0;
}
static void vfio_noiommu_detach_group(void *iommu_data,
@@ -277,8 +214,23 @@ static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
.attach_group = vfio_noiommu_attach_group,
.detach_group = vfio_noiommu_detach_group,
};
-#endif
+/*
+ * Only noiommu containers can use vfio-noiommu and noiommu containers can only
+ * use vfio-noiommu.
+ */
+static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
+ const struct vfio_iommu_driver *driver)
+{
+ return container->noiommu == (driver->ops == &vfio_noiommu_ops);
+}
+#else
+static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
+ const struct vfio_iommu_driver *driver)
+{
+ return true;
+}
+#endif /* CONFIG_VFIO_NOIOMMU */
/**
* IOMMU driver registration
@@ -329,19 +281,6 @@ void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
}
EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
-/**
- * Group minor allocation/free - both called with vfio.group_lock held
- */
-static int vfio_alloc_group_minor(struct vfio_group *group)
-{
- return idr_alloc(&vfio.group_idr, group, 0, MINORMASK + 1, GFP_KERNEL);
-}
-
-static void vfio_free_group_minor(int minor)
-{
- idr_remove(&vfio.group_idr, minor);
-}
-
static int vfio_iommu_group_notifier(struct notifier_block *nb,
unsigned long action, void *data);
static void vfio_group_get(struct vfio_group *group);
@@ -370,105 +309,38 @@ static void vfio_container_put(struct vfio_container *container)
kref_put(&container->kref, vfio_container_release);
}
-static void vfio_group_unlock_and_free(struct vfio_group *group)
-{
- mutex_unlock(&vfio.group_lock);
- /*
- * Unregister outside of lock. A spurious callback is harmless now
- * that the group is no longer in vfio.group_list.
- */
- iommu_group_unregister_notifier(group->iommu_group, &group->nb);
- kfree(group);
-}
-
/**
* Group objects - create, release, get, put, search
*/
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group)
+static struct vfio_group *
+__vfio_group_get_from_iommu(struct iommu_group *iommu_group)
{
- struct vfio_group *group, *tmp;
- struct device *dev;
- int ret, minor;
-
- group = kzalloc(sizeof(*group), GFP_KERNEL);
- if (!group)
- return ERR_PTR(-ENOMEM);
-
- kref_init(&group->kref);
- INIT_LIST_HEAD(&group->device_list);
- mutex_init(&group->device_lock);
- INIT_LIST_HEAD(&group->unbound_list);
- mutex_init(&group->unbound_lock);
- atomic_set(&group->container_users, 0);
- atomic_set(&group->opened, 0);
- init_waitqueue_head(&group->container_q);
- group->iommu_group = iommu_group;
-#ifdef CONFIG_VFIO_NOIOMMU
- group->noiommu = (iommu_group_get_iommudata(iommu_group) == &noiommu);
-#endif
- BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
-
- group->nb.notifier_call = vfio_iommu_group_notifier;
-
- /*
- * blocking notifiers acquire a rwsem around registering and hold
- * it around callback. Therefore, need to register outside of
- * vfio.group_lock to avoid A-B/B-A contention. Our callback won't
- * do anything unless it can find the group in vfio.group_list, so
- * no harm in registering early.
- */
- ret = iommu_group_register_notifier(iommu_group, &group->nb);
- if (ret) {
- kfree(group);
- return ERR_PTR(ret);
- }
-
- mutex_lock(&vfio.group_lock);
+ struct vfio_group *group;
- /* Did we race creating this group? */
- list_for_each_entry(tmp, &vfio.group_list, vfio_next) {
- if (tmp->iommu_group == iommu_group) {
- vfio_group_get(tmp);
- vfio_group_unlock_and_free(group);
- return tmp;
+ list_for_each_entry(group, &vfio.group_list, vfio_next) {
+ if (group->iommu_group == iommu_group) {
+ vfio_group_get(group);
+ return group;
}
}
+ return NULL;
+}
- minor = vfio_alloc_group_minor(group);
- if (minor < 0) {
- vfio_group_unlock_and_free(group);
- return ERR_PTR(minor);
- }
-
- dev = device_create(vfio.class, NULL,
- MKDEV(MAJOR(vfio.group_devt), minor),
- group, "%s%d", group->noiommu ? "noiommu-" : "",
- iommu_group_id(iommu_group));
- if (IS_ERR(dev)) {
- vfio_free_group_minor(minor);
- vfio_group_unlock_and_free(group);
- return ERR_CAST(dev);
- }
-
- group->minor = minor;
- group->dev = dev;
-
- list_add(&group->vfio_next, &vfio.group_list);
+static struct vfio_group *
+vfio_group_get_from_iommu(struct iommu_group *iommu_group)
+{
+ struct vfio_group *group;
+ mutex_lock(&vfio.group_lock);
+ group = __vfio_group_get_from_iommu(iommu_group);
mutex_unlock(&vfio.group_lock);
-
return group;
}
-/* called with vfio.group_lock held */
-static void vfio_group_release(struct kref *kref)
+static void vfio_group_release(struct device *dev)
{
- struct vfio_group *group = container_of(kref, struct vfio_group, kref);
+ struct vfio_group *group = container_of(dev, struct vfio_group, dev);
struct vfio_unbound_dev *unbound, *tmp;
- struct iommu_group *iommu_group = group->iommu_group;
-
- WARN_ON(!list_empty(&group->device_list));
- WARN_ON(group->notifier.head);
list_for_each_entry_safe(unbound, tmp,
&group->unbound_list, unbound_next) {
@@ -476,105 +348,129 @@ static void vfio_group_release(struct kref *kref)
kfree(unbound);
}
- device_destroy(vfio.class, MKDEV(MAJOR(vfio.group_devt), group->minor));
- list_del(&group->vfio_next);
- vfio_free_group_minor(group->minor);
- vfio_group_unlock_and_free(group);
- iommu_group_put(iommu_group);
+ mutex_destroy(&group->device_lock);
+ mutex_destroy(&group->unbound_lock);
+ iommu_group_put(group->iommu_group);
+ ida_free(&vfio.group_ida, MINOR(group->dev.devt));
+ kfree(group);
}
-static void vfio_group_put(struct vfio_group *group)
+static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
+ enum vfio_group_type type)
{
- kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock);
-}
-
-struct vfio_group_put_work {
- struct work_struct work;
struct vfio_group *group;
-};
+ int minor;
-static void vfio_group_put_bg(struct work_struct *work)
-{
- struct vfio_group_put_work *do_work;
-
- do_work = container_of(work, struct vfio_group_put_work, work);
+ group = kzalloc(sizeof(*group), GFP_KERNEL);
+ if (!group)
+ return ERR_PTR(-ENOMEM);
- vfio_group_put(do_work->group);
- kfree(do_work);
-}
+ minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
+ if (minor < 0) {
+ kfree(group);
+ return ERR_PTR(minor);
+ }
-static void vfio_group_schedule_put(struct vfio_group *group)
-{
- struct vfio_group_put_work *do_work;
+ device_initialize(&group->dev);
+ group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
+ group->dev.class = vfio.class;
+ group->dev.release = vfio_group_release;
+ cdev_init(&group->cdev, &vfio_group_fops);
+ group->cdev.owner = THIS_MODULE;
- do_work = kmalloc(sizeof(*do_work), GFP_KERNEL);
- if (WARN_ON(!do_work))
- return;
+ refcount_set(&group->users, 1);
+ INIT_LIST_HEAD(&group->device_list);
+ mutex_init(&group->device_lock);
+ INIT_LIST_HEAD(&group->unbound_list);
+ mutex_init(&group->unbound_lock);
+ init_waitqueue_head(&group->container_q);
+ group->iommu_group = iommu_group;
+ /* put in vfio_group_release() */
+ iommu_group_ref_get(iommu_group);
+ group->type = type;
+ BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
- INIT_WORK(&do_work->work, vfio_group_put_bg);
- do_work->group = group;
- schedule_work(&do_work->work);
+ return group;
}
-/* Assume group_lock or group reference is held */
-static void vfio_group_get(struct vfio_group *group)
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
+ enum vfio_group_type type)
{
- kref_get(&group->kref);
-}
+ struct vfio_group *group;
+ struct vfio_group *ret;
+ int err;
-/*
- * Not really a try as we will sleep for mutex, but we need to make
- * sure the group pointer is valid under lock and get a reference.
- */
-static struct vfio_group *vfio_group_try_get(struct vfio_group *group)
-{
- struct vfio_group *target = group;
+ group = vfio_group_alloc(iommu_group, type);
+ if (IS_ERR(group))
+ return group;
- mutex_lock(&vfio.group_lock);
- list_for_each_entry(group, &vfio.group_list, vfio_next) {
- if (group == target) {
- vfio_group_get(group);
- mutex_unlock(&vfio.group_lock);
- return group;
- }
+ err = dev_set_name(&group->dev, "%s%d",
+ group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
+ iommu_group_id(iommu_group));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err_put;
}
- mutex_unlock(&vfio.group_lock);
- return NULL;
-}
-
-static
-struct vfio_group *vfio_group_get_from_iommu(struct iommu_group *iommu_group)
-{
- struct vfio_group *group;
+ group->nb.notifier_call = vfio_iommu_group_notifier;
+ err = iommu_group_register_notifier(iommu_group, &group->nb);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err_put;
+ }
mutex_lock(&vfio.group_lock);
- list_for_each_entry(group, &vfio.group_list, vfio_next) {
- if (group->iommu_group == iommu_group) {
- vfio_group_get(group);
- mutex_unlock(&vfio.group_lock);
- return group;
- }
+
+ /* Did we race creating this group? */
+ ret = __vfio_group_get_from_iommu(iommu_group);
+ if (ret)
+ goto err_unlock;
+
+ err = cdev_device_add(&group->cdev, &group->dev);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err_unlock;
}
+
+ list_add(&group->vfio_next, &vfio.group_list);
+
mutex_unlock(&vfio.group_lock);
+ return group;
- return NULL;
+err_unlock:
+ mutex_unlock(&vfio.group_lock);
+ iommu_group_unregister_notifier(group->iommu_group, &group->nb);
+err_put:
+ put_device(&group->dev);
+ return ret;
}
-static struct vfio_group *vfio_group_get_from_minor(int minor)
+static void vfio_group_put(struct vfio_group *group)
{
- struct vfio_group *group;
+ if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
+ return;
- mutex_lock(&vfio.group_lock);
- group = idr_find(&vfio.group_idr, minor);
- if (!group) {
- mutex_unlock(&vfio.group_lock);
- return NULL;
- }
- vfio_group_get(group);
+ /*
+ * These data structures all have paired operations that can only be
+ * undone when the caller holds a live reference on the group. Since all
+ * pairs must be undone these WARN_ON's indicate some caller did not
+ * properly hold the group reference.
+ */
+ WARN_ON(!list_empty(&group->device_list));
+ WARN_ON(atomic_read(&group->container_users));
+ WARN_ON(group->notifier.head);
+
+ list_del(&group->vfio_next);
+ cdev_device_del(&group->cdev, &group->dev);
mutex_unlock(&vfio.group_lock);
- return group;
+ iommu_group_unregister_notifier(group->iommu_group, &group->nb);
+ put_device(&group->dev);
+}
+
+static void vfio_group_get(struct vfio_group *group)
+{
+ refcount_inc(&group->users);
}
static struct vfio_group *vfio_group_get_from_dev(struct device *dev)
@@ -740,14 +636,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
struct device *dev = data;
struct vfio_unbound_dev *unbound;
- /*
- * Need to go through a group_lock lookup to get a reference or we
- * risk racing a group being removed. Ignore spurious notifies.
- */
- group = vfio_group_try_get(group);
- if (!group)
- return NOTIFY_OK;
-
switch (action) {
case IOMMU_GROUP_NOTIFY_ADD_DEVICE:
vfio_group_nb_add_dev(group, dev);
@@ -798,15 +686,6 @@ static int vfio_iommu_group_notifier(struct notifier_block *nb,
mutex_unlock(&group->unbound_lock);
break;
}
-
- /*
- * If we're the last reference to the group, the group will be
- * released, which includes unregistering the iommu group notifier.
- * We hold a read-lock on that notifier list, unregistering needs
- * a write-lock... deadlock. Release our reference asynchronously
- * to avoid that situation.
- */
- vfio_group_schedule_put(group);
return NOTIFY_OK;
}
@@ -828,11 +707,78 @@ void vfio_uninit_group_dev(struct vfio_device *device)
}
EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
-int vfio_register_group_dev(struct vfio_device *device)
+static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
+ enum vfio_group_type type)
{
- struct vfio_device *existing_device;
struct iommu_group *iommu_group;
struct vfio_group *group;
+ int ret;
+
+ iommu_group = iommu_group_alloc();
+ if (IS_ERR(iommu_group))
+ return ERR_CAST(iommu_group);
+
+ iommu_group_set_name(iommu_group, "vfio-noiommu");
+ ret = iommu_group_add_device(iommu_group, dev);
+ if (ret)
+ goto out_put_group;
+
+ group = vfio_create_group(iommu_group, type);
+ if (IS_ERR(group)) {
+ ret = PTR_ERR(group);
+ goto out_remove_device;
+ }
+ iommu_group_put(iommu_group);
+ return group;
+
+out_remove_device:
+ iommu_group_remove_device(dev);
+out_put_group:
+ iommu_group_put(iommu_group);
+ return ERR_PTR(ret);
+}
+
+static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
+{
+ struct iommu_group *iommu_group;
+ struct vfio_group *group;
+
+ iommu_group = iommu_group_get(dev);
+#ifdef CONFIG_VFIO_NOIOMMU
+ if (!iommu_group && noiommu && !iommu_present(dev->bus)) {
+ /*
+ * With noiommu enabled, create an IOMMU group for devices that
+ * don't already have one and don't have an iommu_ops on their
+ * bus. Taint the kernel because we're about to give a DMA
+ * capable device to a user without IOMMU protection.
+ */
+ group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
+ if (!IS_ERR(group)) {
+ add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+ dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
+ }
+ return group;
+ }
+#endif
+ if (!iommu_group)
+ return ERR_PTR(-EINVAL);
+
+ group = vfio_group_get_from_iommu(iommu_group);
+ if (!group)
+ group = vfio_create_group(iommu_group, VFIO_IOMMU);
+
+ /* The vfio_group holds a reference to the iommu_group */
+ iommu_group_put(iommu_group);
+ return group;
+}
+
+static int __vfio_register_dev(struct vfio_device *device,
+ struct vfio_group *group)
+{
+ struct vfio_device *existing_device;
+
+ if (IS_ERR(group))
+ return PTR_ERR(group);
/*
* If the driver doesn't specify a set then the device is added to a
@@ -841,30 +787,14 @@ int vfio_register_group_dev(struct vfio_device *device)
if (!device->dev_set)
vfio_assign_device_set(device, device);
- iommu_group = iommu_group_get(device->dev);
- if (!iommu_group)
- return -EINVAL;
-
- group = vfio_group_get_from_iommu(iommu_group);
- if (!group) {
- group = vfio_create_group(iommu_group);
- if (IS_ERR(group)) {
- iommu_group_put(iommu_group);
- return PTR_ERR(group);
- }
- } else {
- /*
- * A found vfio_group already holds a reference to the
- * iommu_group. A created vfio_group keeps the reference.
- */
- iommu_group_put(iommu_group);
- }
-
existing_device = vfio_group_get_device(group, device->dev);
if (existing_device) {
dev_WARN(device->dev, "Device already exists on group %d\n",
- iommu_group_id(iommu_group));
+ iommu_group_id(group->iommu_group));
vfio_device_put(existing_device);
+ if (group->type == VFIO_NO_IOMMU ||
+ group->type == VFIO_EMULATED_IOMMU)
+ iommu_group_remove_device(device->dev);
vfio_group_put(group);
return -EBUSY;
}
@@ -882,8 +812,25 @@ int vfio_register_group_dev(struct vfio_device *device)
return 0;
}
+
+int vfio_register_group_dev(struct vfio_device *device)
+{
+ return __vfio_register_dev(device,
+ vfio_group_find_or_alloc(device->dev));
+}
EXPORT_SYMBOL_GPL(vfio_register_group_dev);
+/*
+ * Register a virtual device without IOMMU backing. The user of this
+ * device must not be able to directly trigger unmediated DMA.
+ */
+int vfio_register_emulated_iommu_dev(struct vfio_device *device)
+{
+ return __vfio_register_dev(device,
+ vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
+}
+EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
+
/**
* Get a reference to the vfio_device for a device. Even if the
* caller thinks they own the device, they could be racing with a
@@ -1010,6 +957,9 @@ void vfio_unregister_group_dev(struct vfio_device *device)
if (list_empty(&group->device_list))
wait_event(group->container_q, !group->container);
+ if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
+ iommu_group_remove_device(device->dev);
+
/* Matches the get in vfio_register_group_dev() */
vfio_group_put(group);
}
@@ -1042,13 +992,10 @@ static long vfio_ioctl_check_extension(struct vfio_container *container,
list_for_each_entry(driver, &vfio.iommu_drivers_list,
vfio_next) {
-#ifdef CONFIG_VFIO_NOIOMMU
if (!list_empty(&container->group_list) &&
- (container->noiommu !=
- (driver->ops == &vfio_noiommu_ops)))
+ !vfio_iommu_driver_allowed(container,
+ driver))
continue;
-#endif
-
if (!try_module_get(driver->ops->owner))
continue;
@@ -1079,7 +1026,8 @@ static int __vfio_container_attach_groups(struct vfio_container *container,
int ret = -ENODEV;
list_for_each_entry(group, &container->group_list, container_next) {
- ret = driver->ops->attach_group(data, group->iommu_group);
+ ret = driver->ops->attach_group(data, group->iommu_group,
+ group->type);
if (ret)
goto unwind;
}
@@ -1120,15 +1068,8 @@ static long vfio_ioctl_set_iommu(struct vfio_container *container,
list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
void *data;
-#ifdef CONFIG_VFIO_NOIOMMU
- /*
- * Only noiommu containers can use vfio-noiommu and noiommu
- * containers can only use vfio-noiommu.
- */
- if (container->noiommu != (driver->ops == &vfio_noiommu_ops))
+ if (!vfio_iommu_driver_allowed(container, driver))
continue;
-#endif
-
if (!try_module_get(driver->ops->owner))
continue;
@@ -1234,62 +1175,12 @@ static int vfio_fops_release(struct inode *inode, struct file *filep)
return 0;
}
-/*
- * Once an iommu driver is set, we optionally pass read/write/mmap
- * on to the driver, allowing management interfaces beyond ioctl.
- */
-static ssize_t vfio_fops_read(struct file *filep, char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct vfio_container *container = filep->private_data;
- struct vfio_iommu_driver *driver;
- ssize_t ret = -EINVAL;
-
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->read))
- ret = driver->ops->read(container->iommu_data,
- buf, count, ppos);
-
- return ret;
-}
-
-static ssize_t vfio_fops_write(struct file *filep, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- struct vfio_container *container = filep->private_data;
- struct vfio_iommu_driver *driver;
- ssize_t ret = -EINVAL;
-
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->write))
- ret = driver->ops->write(container->iommu_data,
- buf, count, ppos);
-
- return ret;
-}
-
-static int vfio_fops_mmap(struct file *filep, struct vm_area_struct *vma)
-{
- struct vfio_container *container = filep->private_data;
- struct vfio_iommu_driver *driver;
- int ret = -EINVAL;
-
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->mmap))
- ret = driver->ops->mmap(container->iommu_data, vma);
-
- return ret;
-}
-
static const struct file_operations vfio_fops = {
.owner = THIS_MODULE,
.open = vfio_fops_open,
.release = vfio_fops_release,
- .read = vfio_fops_read,
- .write = vfio_fops_write,
.unlocked_ioctl = vfio_fops_unl_ioctl,
.compat_ioctl = compat_ptr_ioctl,
- .mmap = vfio_fops_mmap,
};
/**
@@ -1366,7 +1257,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
if (atomic_read(&group->container_users))
return -EINVAL;
- if (group->noiommu && !capable(CAP_SYS_RAWIO))
+ if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
return -EPERM;
f = fdget(container_fd);
@@ -1386,7 +1277,7 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
/* Real groups and fake groups cannot mix */
if (!list_empty(&container->group_list) &&
- container->noiommu != group->noiommu) {
+ container->noiommu != (group->type == VFIO_NO_IOMMU)) {
ret = -EPERM;
goto unlock_out;
}
@@ -1394,13 +1285,14 @@ static int vfio_group_set_container(struct vfio_group *group, int container_fd)
driver = container->iommu_driver;
if (driver) {
ret = driver->ops->attach_group(container->iommu_data,
- group->iommu_group);
+ group->iommu_group,
+ group->type);
if (ret)
goto unlock_out;
}
group->container = container;
- container->noiommu = group->noiommu;
+ container->noiommu = (group->type == VFIO_NO_IOMMU);
list_add(&group->container_next, &container->group_list);
/* Get a reference on the container and mark a user within the group */
@@ -1424,7 +1316,7 @@ static int vfio_group_add_container_user(struct vfio_group *group)
if (!atomic_inc_not_zero(&group->container_users))
return -EINVAL;
- if (group->noiommu) {
+ if (group->type == VFIO_NO_IOMMU) {
atomic_dec(&group->container_users);
return -EPERM;
}
@@ -1449,7 +1341,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
!group->container->iommu_driver || !vfio_group_viable(group))
return -EINVAL;
- if (group->noiommu && !capable(CAP_SYS_RAWIO))
+ if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
return -EPERM;
device = vfio_device_get_from_name(group, buf);
@@ -1496,7 +1388,7 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
fd_install(fdno, filep);
- if (group->noiommu)
+ if (group->type == VFIO_NO_IOMMU)
dev_warn(device->dev, "vfio-noiommu device opened by user "
"(%s:%d)\n", current->comm, task_pid_nr(current));
return fdno;
@@ -1585,14 +1477,15 @@ static long vfio_group_fops_unl_ioctl(struct file *filep,
static int vfio_group_fops_open(struct inode *inode, struct file *filep)
{
- struct vfio_group *group;
+ struct vfio_group *group =
+ container_of(inode->i_cdev, struct vfio_group, cdev);
int opened;
- group = vfio_group_get_from_minor(iminor(inode));
- if (!group)
+ /* users can be zero if this races with vfio_group_put() */
+ if (!refcount_inc_not_zero(&group->users))
return -ENODEV;
- if (group->noiommu && !capable(CAP_SYS_RAWIO)) {
+ if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
vfio_group_put(group);
return -EPERM;
}
@@ -1757,6 +1650,9 @@ struct vfio_group *vfio_group_get_external_user(struct file *filep)
if (ret)
return ERR_PTR(ret);
+ /*
+ * Since the caller holds the fget on the file group->users must be >= 1
+ */
vfio_group_get(group);
return group;
@@ -2396,7 +2292,7 @@ static int __init vfio_init(void)
{
int ret;
- idr_init(&vfio.group_idr);
+ ida_init(&vfio.group_ida);
mutex_init(&vfio.group_lock);
mutex_init(&vfio.iommu_drivers_lock);
INIT_LIST_HEAD(&vfio.group_list);
@@ -2421,11 +2317,6 @@ static int __init vfio_init(void)
if (ret)
goto err_alloc_chrdev;
- cdev_init(&vfio.group_cdev, &vfio_group_fops);
- ret = cdev_add(&vfio.group_cdev, vfio.group_devt, MINORMASK + 1);
- if (ret)
- goto err_cdev_add;
-
pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
#ifdef CONFIG_VFIO_NOIOMMU
@@ -2433,8 +2324,6 @@ static int __init vfio_init(void)
#endif
return 0;
-err_cdev_add:
- unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
err_alloc_chrdev:
class_destroy(vfio.class);
vfio.class = NULL;
@@ -2450,8 +2339,7 @@ static void __exit vfio_cleanup(void)
#ifdef CONFIG_VFIO_NOIOMMU
vfio_unregister_iommu_driver(&vfio_noiommu_ops);
#endif
- idr_destroy(&vfio.group_idr);
- cdev_del(&vfio.group_cdev);
+ ida_destroy(&vfio.group_ida);
unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
class_destroy(vfio.class);
vfio.class = NULL;