aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost/vhost.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost/vhost.c')
-rw-r--r--drivers/vhost/vhost.c315
1 files changed, 252 insertions, 63 deletions
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 7aa4eea930f1..c14c42b95ab8 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -37,6 +37,11 @@ enum {
VHOST_MEMORY_F_LOG = 0x1,
};
+static unsigned vhost_zcopy_mask __read_mostly;
+
+#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
+#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
+
static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
poll_table *pt)
{
@@ -161,6 +166,8 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->last_avail_idx = 0;
vq->avail_idx = 0;
vq->last_used_idx = 0;
+ vq->signalled_used = 0;
+ vq->signalled_used_valid = false;
vq->used_flags = 0;
vq->log_used = false;
vq->log_addr = -1ull;
@@ -174,6 +181,9 @@ static void vhost_vq_reset(struct vhost_dev *dev,
vq->call_ctx = NULL;
vq->call = NULL;
vq->log_ctx = NULL;
+ vq->upend_idx = 0;
+ vq->done_idx = 0;
+ vq->ubufs = NULL;
}
static int vhost_worker(void *data)
@@ -220,10 +230,28 @@ static int vhost_worker(void *data)
return 0;
}
+static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
+{
+ kfree(vq->indirect);
+ vq->indirect = NULL;
+ kfree(vq->log);
+ vq->log = NULL;
+ kfree(vq->heads);
+ vq->heads = NULL;
+ kfree(vq->ubuf_info);
+ vq->ubuf_info = NULL;
+}
+
+void vhost_enable_zcopy(int vq)
+{
+ vhost_zcopy_mask |= 0x1 << vq;
+}
+
/* Helper to allocate iovec buffers for all vqs. */
static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
{
int i;
+ bool zcopy;
for (i = 0; i < dev->nvqs; ++i) {
dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect *
@@ -232,19 +260,21 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
GFP_KERNEL);
dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads *
UIO_MAXIOV, GFP_KERNEL);
-
+ zcopy = vhost_zcopy_mask & (0x1 << i);
+ if (zcopy)
+ dev->vqs[i].ubuf_info =
+ kmalloc(sizeof *dev->vqs[i].ubuf_info *
+ UIO_MAXIOV, GFP_KERNEL);
if (!dev->vqs[i].indirect || !dev->vqs[i].log ||
- !dev->vqs[i].heads)
+ !dev->vqs[i].heads ||
+ (zcopy && !dev->vqs[i].ubuf_info))
goto err_nomem;
}
return 0;
err_nomem:
- for (; i >= 0; --i) {
- kfree(dev->vqs[i].indirect);
- kfree(dev->vqs[i].log);
- kfree(dev->vqs[i].heads);
- }
+ for (; i >= 0; --i)
+ vhost_vq_free_iovecs(&dev->vqs[i]);
return -ENOMEM;
}
@@ -252,14 +282,8 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
{
int i;
- for (i = 0; i < dev->nvqs; ++i) {
- kfree(dev->vqs[i].indirect);
- dev->vqs[i].indirect = NULL;
- kfree(dev->vqs[i].log);
- dev->vqs[i].log = NULL;
- kfree(dev->vqs[i].heads);
- dev->vqs[i].heads = NULL;
- }
+ for (i = 0; i < dev->nvqs; ++i)
+ vhost_vq_free_iovecs(&dev->vqs[i]);
}
long vhost_dev_init(struct vhost_dev *dev,
@@ -282,6 +306,7 @@ long vhost_dev_init(struct vhost_dev *dev,
dev->vqs[i].log = NULL;
dev->vqs[i].indirect = NULL;
dev->vqs[i].heads = NULL;
+ dev->vqs[i].ubuf_info = NULL;
dev->vqs[i].dev = dev;
mutex_init(&dev->vqs[i].mutex);
vhost_vq_reset(dev, dev->vqs + i);
@@ -385,6 +410,30 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
return 0;
}
+/* In case of DMA done not in order in lower device driver for some reason.
+ * upend_idx is used to track end of used idx, done_idx is used to track head
+ * of used idx. Once lower device DMA done contiguously, we will signal KVM
+ * guest used idx.
+ */
+int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq)
+{
+ int i;
+ int j = 0;
+
+ for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
+ if ((vq->heads[i].len == VHOST_DMA_DONE_LEN)) {
+ vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
+ vhost_add_used_and_signal(vq->dev, vq,
+ vq->heads[i].id, 0);
+ ++j;
+ } else
+ break;
+ }
+ if (j)
+ vq->done_idx = i;
+ return j;
+}
+
/* Caller should have device mutex */
void vhost_dev_cleanup(struct vhost_dev *dev)
{
@@ -395,6 +444,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
vhost_poll_stop(&dev->vqs[i].poll);
vhost_poll_flush(&dev->vqs[i].poll);
}
+ /* Wait for all lower device DMAs done. */
+ if (dev->vqs[i].ubufs)
+ vhost_ubuf_put_and_wait(dev->vqs[i].ubufs);
+
+ /* Signal guest as appropriate. */
+ vhost_zerocopy_signal_used(&dev->vqs[i]);
+
if (dev->vqs[i].error_ctx)
eventfd_ctx_put(dev->vqs[i].error_ctx);
if (dev->vqs[i].error)
@@ -489,16 +545,17 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
return 1;
}
-static int vq_access_ok(unsigned int num,
+static int vq_access_ok(struct vhost_dev *d, unsigned int num,
struct vring_desc __user *desc,
struct vring_avail __user *avail,
struct vring_used __user *used)
{
+ size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
return access_ok(VERIFY_READ, desc, num * sizeof *desc) &&
access_ok(VERIFY_READ, avail,
- sizeof *avail + num * sizeof *avail->ring) &&
+ sizeof *avail + num * sizeof *avail->ring + s) &&
access_ok(VERIFY_WRITE, used,
- sizeof *used + num * sizeof *used->ring);
+ sizeof *used + num * sizeof *used->ring + s);
}
/* Can we log writes? */
@@ -514,9 +571,11 @@ int vhost_log_access_ok(struct vhost_dev *dev)
/* Verify access for write logging. */
/* Caller should have vq mutex and device mutex */
-static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
+static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq,
+ void __user *log_base)
{
struct vhost_memory *mp;
+ size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;
mp = rcu_dereference_protected(vq->dev->memory,
lockdep_is_held(&vq->mutex));
@@ -524,15 +583,15 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
(!vq->log_used || log_access_ok(log_base, vq->log_addr,
sizeof *vq->used +
- vq->num * sizeof *vq->used->ring));
+ vq->num * sizeof *vq->used->ring + s));
}
/* Can we start vq? */
/* Caller should have vq mutex and device mutex */
int vhost_vq_access_ok(struct vhost_virtqueue *vq)
{
- return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) &&
- vq_log_access_ok(vq, vq->log_base);
+ return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) &&
+ vq_log_access_ok(vq->dev, vq, vq->log_base);
}
static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
@@ -570,16 +629,6 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
return 0;
}
-static int init_used(struct vhost_virtqueue *vq,
- struct vring_used __user *used)
-{
- int r = put_user(vq->used_flags, &used->flags);
-
- if (r)
- return r;
- return get_user(vq->last_used_idx, &used->idx);
-}
-
static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
{
struct file *eventfp, *filep = NULL,
@@ -674,7 +723,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
* If it is not, we don't as size might not have been setup.
* We will verify when backend is configured. */
if (vq->private_data) {
- if (!vq_access_ok(vq->num,
+ if (!vq_access_ok(d, vq->num,
(void __user *)(unsigned long)a.desc_user_addr,
(void __user *)(unsigned long)a.avail_user_addr,
(void __user *)(unsigned long)a.used_user_addr)) {
@@ -692,10 +741,6 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
}
}
- r = init_used(vq, (struct vring_used __user *)(unsigned long)
- a.used_user_addr);
- if (r)
- break;
vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
@@ -818,7 +863,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
vq = d->vqs + i;
mutex_lock(&vq->mutex);
/* If ring is inactive, will check when it's enabled. */
- if (vq->private_data && !vq_log_access_ok(vq, base))
+ if (vq->private_data && !vq_log_access_ok(d, vq, base))
r = -EFAULT;
else
vq->log_base = base;
@@ -950,6 +995,57 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
return 0;
}
+static int vhost_update_used_flags(struct vhost_virtqueue *vq)
+{
+ void __user *used;
+ if (__put_user(vq->used_flags, &vq->used->flags) < 0)
+ return -EFAULT;
+ if (unlikely(vq->log_used)) {
+ /* Make sure the flag is seen before log. */
+ smp_wmb();
+ /* Log used flag write. */
+ used = &vq->used->flags;
+ log_write(vq->log_base, vq->log_addr +
+ (used - (void __user *)vq->used),
+ sizeof vq->used->flags);
+ if (vq->log_ctx)
+ eventfd_signal(vq->log_ctx, 1);
+ }
+ return 0;
+}
+
+static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event)
+{
+ if (__put_user(vq->avail_idx, vhost_avail_event(vq)))
+ return -EFAULT;
+ if (unlikely(vq->log_used)) {
+ void __user *used;
+ /* Make sure the event is seen before log. */
+ smp_wmb();
+ /* Log avail event write */
+ used = vhost_avail_event(vq);
+ log_write(vq->log_base, vq->log_addr +
+ (used - (void __user *)vq->used),
+ sizeof *vhost_avail_event(vq));
+ if (vq->log_ctx)
+ eventfd_signal(vq->log_ctx, 1);
+ }
+ return 0;
+}
+
+int vhost_init_used(struct vhost_virtqueue *vq)
+{
+ int r;
+ if (!vq->private_data)
+ return 0;
+
+ r = vhost_update_used_flags(vq);
+ if (r)
+ return r;
+ vq->signalled_used_valid = false;
+ return get_user(vq->last_used_idx, &vq->used->idx);
+}
+
static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
struct iovec iov[], int iov_size)
{
@@ -1219,6 +1315,10 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
/* On success, increment avail index. */
vq->last_avail_idx++;
+
+ /* Assume notifications from guest are disabled at this point,
+ * if they aren't we would need to update avail_event index. */
+ BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY));
return head;
}
@@ -1267,6 +1367,12 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
eventfd_signal(vq->log_ctx, 1);
}
vq->last_used_idx++;
+ /* If the driver never bothers to signal in a very long while,
+ * used index might wrap around. If that happens, invalidate
+ * signalled_used index we stored. TODO: make sure driver
+ * signals at least once in 2^16 and remove this. */
+ if (unlikely(vq->last_used_idx == vq->signalled_used))
+ vq->signalled_used_valid = false;
return 0;
}
@@ -1275,6 +1381,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
unsigned count)
{
struct vring_used_elem __user *used;
+ u16 old, new;
int start;
start = vq->last_used_idx % vq->num;
@@ -1292,7 +1399,14 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq,
((void __user *)used - (void __user *)vq->used),
count * sizeof *used);
}
- vq->last_used_idx += count;
+ old = vq->last_used_idx;
+ new = (vq->last_used_idx += count);
+ /* If the driver never bothers to signal in a very long while,
+ * used index might wrap around. If that happens, invalidate
+ * signalled_used index we stored. TODO: make sure driver
+ * signals at least once in 2^16 and remove this. */
+ if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old)))
+ vq->signalled_used_valid = false;
return 0;
}
@@ -1331,29 +1445,47 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads,
return r;
}
-/* This actually signals the guest, using eventfd. */
-void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
- __u16 flags;
-
+ __u16 old, new, event;
+ bool v;
/* Flush out used index updates. This is paired
* with the barrier that the Guest executes when enabling
* interrupts. */
smp_mb();
- if (__get_user(flags, &vq->avail->flags)) {
- vq_err(vq, "Failed to get flags");
- return;
+ if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
+ unlikely(vq->avail_idx == vq->last_avail_idx))
+ return true;
+
+ if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ __u16 flags;
+ if (__get_user(flags, &vq->avail->flags)) {
+ vq_err(vq, "Failed to get flags");
+ return true;
+ }
+ return !(flags & VRING_AVAIL_F_NO_INTERRUPT);
}
+ old = vq->signalled_used;
+ v = vq->signalled_used_valid;
+ new = vq->signalled_used = vq->last_used_idx;
+ vq->signalled_used_valid = true;
- /* If they don't want an interrupt, don't signal, unless empty. */
- if ((flags & VRING_AVAIL_F_NO_INTERRUPT) &&
- (vq->avail_idx != vq->last_avail_idx ||
- !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY)))
- return;
+ if (unlikely(!v))
+ return true;
+
+ if (get_user(event, vhost_used_event(vq))) {
+ vq_err(vq, "Failed to get used event idx");
+ return true;
+ }
+ return vring_need_event(event, new, old);
+}
+/* This actually signals the guest, using eventfd. */
+void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
/* Signal the Guest tell them we used something up. */
- if (vq->call_ctx)
+ if (vq->call_ctx && vhost_notify(dev, vq))
eventfd_signal(vq->call_ctx, 1);
}
@@ -1376,7 +1508,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev,
}
/* OK, now we need to know about added descriptors. */
-bool vhost_enable_notify(struct vhost_virtqueue *vq)
+bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
u16 avail_idx;
int r;
@@ -1384,11 +1516,20 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
return false;
vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
- r = put_user(vq->used_flags, &vq->used->flags);
- if (r) {
- vq_err(vq, "Failed to enable notification at %p: %d\n",
- &vq->used->flags, r);
- return false;
+ if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ r = vhost_update_used_flags(vq);
+ if (r) {
+ vq_err(vq, "Failed to enable notification at %p: %d\n",
+ &vq->used->flags, r);
+ return false;
+ }
+ } else {
+ r = vhost_update_avail_event(vq, vq->avail_idx);
+ if (r) {
+ vq_err(vq, "Failed to update avail event index at %p: %d\n",
+ vhost_avail_event(vq), r);
+ return false;
+ }
}
/* They could have slipped one in as we were doing that: make
* sure it's written, then check again. */
@@ -1404,15 +1545,63 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq)
}
/* We don't need to be notified again. */
-void vhost_disable_notify(struct vhost_virtqueue *vq)
+void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
{
int r;
if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
return;
vq->used_flags |= VRING_USED_F_NO_NOTIFY;
- r = put_user(vq->used_flags, &vq->used->flags);
- if (r)
- vq_err(vq, "Failed to enable notification at %p: %d\n",
- &vq->used->flags, r);
+ if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
+ r = vhost_update_used_flags(vq);
+ if (r)
+ vq_err(vq, "Failed to enable notification at %p: %d\n",
+ &vq->used->flags, r);
+ }
+}
+
+static void vhost_zerocopy_done_signal(struct kref *kref)
+{
+ struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
+ kref);
+ wake_up(&ubufs->wait);
+}
+
+struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
+ bool zcopy)
+{
+ struct vhost_ubuf_ref *ubufs;
+ /* No zero copy backend? Nothing to count. */
+ if (!zcopy)
+ return NULL;
+ ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL);
+ if (!ubufs)
+ return ERR_PTR(-ENOMEM);
+ kref_init(&ubufs->kref);
+ init_waitqueue_head(&ubufs->wait);
+ ubufs->vq = vq;
+ return ubufs;
+}
+
+void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
+{
+ kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
+}
+
+void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
+{
+ kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
+ wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
+ kfree(ubufs);
+}
+
+void vhost_zerocopy_callback(void *arg)
+{
+ struct ubuf_info *ubuf = arg;
+ struct vhost_ubuf_ref *ubufs = ubuf->arg;
+ struct vhost_virtqueue *vq = ubufs->vq;
+
+ /* set len = 1 to mark this desc buffers done DMA */
+ vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
+ kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
}