From 93d2c4de8d8129b97ee1e1a222aedb0719d2fcd9 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 28 Aug 2019 13:36:59 +0800 Subject: vhost/test: fix build for vhost test Since below commit, callers need to specify the iov_limit in vhost_dev_init() explicitly. Fixes: b46a0bf78ad7 ("vhost: fix OOB in get_rx_bufs()") Cc: stable@vger.kernel.org Signed-off-by: Tiwei Bie Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 9e90e969af55..ac4f762c4f65 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -115,7 +115,7 @@ static int vhost_test_open(struct inode *inode, struct file *f) dev = &n->dev; vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX); + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV); f->private_data = n; -- cgit v1.2.3-59-g8ed1b From 264b563b8675771834419057cbe076c1a41fb666 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Wed, 28 Aug 2019 13:37:00 +0800 Subject: vhost/test: fix build for vhost test Since vhost_exceeds_weight() was introduced, callers need to specify the packet weight and byte weight in vhost_dev_init(). Note that, the packet weight isn't counted in this patch to keep the original behavior unchanged. Fixes: e82b9b0727ff ("vhost: introduce vhost_exceeds_weight()") Cc: stable@vger.kernel.org Signed-off-by: Tiwei Bie Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/test.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index ac4f762c4f65..7804869c6a31 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -22,6 +22,12 @@ * Using this limit prevents one virtqueue from starving others. */ #define VHOST_TEST_WEIGHT 0x80000 +/* Max number of packets transferred before requeueing the job. + * Using this limit prevents one virtqueue from starving others with + * pkts. + */ +#define VHOST_TEST_PKT_WEIGHT 256 + enum { VHOST_TEST_VQ = 0, VHOST_TEST_VQ_MAX = 1, @@ -80,10 +86,8 @@ static void handle_vq(struct vhost_test *n) } vhost_add_used_and_signal(&n->dev, vq, head, 0); total_len += len; - if (unlikely(total_len >= VHOST_TEST_WEIGHT)) { - vhost_poll_queue(&vq->poll); + if (unlikely(vhost_exceeds_weight(vq, 0, total_len))) break; - } } mutex_unlock(&vq->mutex); @@ -115,7 +119,8 @@ static int vhost_test_open(struct inode *inode, struct file *f) dev = &n->dev; vqs[VHOST_TEST_VQ] = &n->vqs[VHOST_TEST_VQ]; n->vqs[VHOST_TEST_VQ].handle_kick = handle_vq_kick; - vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV); + vhost_dev_init(dev, vqs, VHOST_TEST_VQ_MAX, UIO_MAXIOV, + VHOST_TEST_PKT_WEIGHT, VHOST_TEST_WEIGHT); f->private_data = n; -- cgit v1.2.3-59-g8ed1b From 896fc242bc1d261c1178838487a0a54b260625cc Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Tue, 20 Aug 2019 20:36:32 +0800 Subject: vhost: Remove unnecessary variable It is unnecessary to use ret variable to return the error code, just return the error code directly. Signed-off-by: Yunsheng Lin Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 0536f8526359..1ac9de250319 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -203,7 +203,6 @@ EXPORT_SYMBOL_GPL(vhost_poll_init); int vhost_poll_start(struct vhost_poll *poll, struct file *file) { __poll_t mask; - int ret = 0; if (poll->wqh) return 0; @@ -213,10 +212,10 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file) vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask)); if (mask & EPOLLERR) { vhost_poll_stop(poll); - ret = -EINVAL; + return -EINVAL; } - return ret; + return 0; } EXPORT_SYMBOL_GPL(vhost_poll_start); -- cgit v1.2.3-59-g8ed1b From 3d2c7d37047557175fb41de044091050b5f0d73b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 10 Aug 2019 13:53:21 -0400 Subject: Revert "vhost: access vq metadata through kernel virtual address" This reverts commit 7f466032dc ("vhost: access vq metadata through kernel virtual address"). The commit caused a bunch of issues, and while commit 73f628ec9e ("vhost: disable metadata prefetch optimization") disabled the optimization it's not nice to keep lots of dead code around. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 515 +------------------------------------------------- drivers/vhost/vhost.h | 41 ---- 2 files changed, 3 insertions(+), 553 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 1ac9de250319..5dc174ac8cac 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -297,160 +297,6 @@ static void vhost_vq_meta_reset(struct vhost_dev *d) __vhost_vq_meta_reset(d->vqs[i]); } -#if VHOST_ARCH_CAN_ACCEL_UACCESS -static void vhost_map_unprefetch(struct vhost_map *map) -{ - kfree(map->pages); - map->pages = NULL; - map->npages = 0; - map->addr = NULL; -} - -static void vhost_uninit_vq_maps(struct vhost_virtqueue *vq) -{ - struct vhost_map *map[VHOST_NUM_ADDRS]; - int i; - - spin_lock(&vq->mmu_lock); - for (i = 0; i < VHOST_NUM_ADDRS; i++) { - map[i] = rcu_dereference_protected(vq->maps[i], - lockdep_is_held(&vq->mmu_lock)); - if (map[i]) - rcu_assign_pointer(vq->maps[i], NULL); - } - spin_unlock(&vq->mmu_lock); - - synchronize_rcu(); - - for (i = 0; i < VHOST_NUM_ADDRS; i++) - if (map[i]) - vhost_map_unprefetch(map[i]); - -} - -static void vhost_reset_vq_maps(struct vhost_virtqueue *vq) -{ - int i; - - vhost_uninit_vq_maps(vq); - for (i = 0; i < VHOST_NUM_ADDRS; i++) - vq->uaddrs[i].size = 0; -} - -static bool vhost_map_range_overlap(struct vhost_uaddr *uaddr, - unsigned long start, - unsigned long end) -{ - if (unlikely(!uaddr->size)) - return false; - - return !(end < uaddr->uaddr || start > uaddr->uaddr - 1 + uaddr->size); -} - -static void vhost_invalidate_vq_start(struct vhost_virtqueue *vq, - int index, - unsigned long start, - unsigned long end) -{ - struct vhost_uaddr *uaddr = &vq->uaddrs[index]; - struct vhost_map *map; - int i; - - if (!vhost_map_range_overlap(uaddr, start, end)) - return; - - spin_lock(&vq->mmu_lock); - ++vq->invalidate_count; - - map = rcu_dereference_protected(vq->maps[index], - lockdep_is_held(&vq->mmu_lock)); - if (map) { - if (uaddr->write) { - for (i = 0; i < map->npages; i++) - set_page_dirty(map->pages[i]); - } - rcu_assign_pointer(vq->maps[index], NULL); - } - spin_unlock(&vq->mmu_lock); - - if (map) { - synchronize_rcu(); - vhost_map_unprefetch(map); - } -} - -static void vhost_invalidate_vq_end(struct vhost_virtqueue *vq, - int index, - unsigned long start, - unsigned long end) -{ - if (!vhost_map_range_overlap(&vq->uaddrs[index], start, end)) - return; - - spin_lock(&vq->mmu_lock); - --vq->invalidate_count; - spin_unlock(&vq->mmu_lock); -} - -static int vhost_invalidate_range_start(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct vhost_dev *dev = container_of(mn, struct vhost_dev, - mmu_notifier); - int i, j; - - if (!mmu_notifier_range_blockable(range)) - return -EAGAIN; - - for (i = 0; i < dev->nvqs; i++) { - struct vhost_virtqueue *vq = dev->vqs[i]; - - for (j = 0; j < VHOST_NUM_ADDRS; j++) - vhost_invalidate_vq_start(vq, j, - range->start, - range->end); - } - - return 0; -} - -static void vhost_invalidate_range_end(struct mmu_notifier *mn, - const struct mmu_notifier_range *range) -{ - struct vhost_dev *dev = container_of(mn, struct vhost_dev, - mmu_notifier); - int i, j; - - for (i = 0; i < dev->nvqs; i++) { - struct vhost_virtqueue *vq = dev->vqs[i]; - - for (j = 0; j < VHOST_NUM_ADDRS; j++) - vhost_invalidate_vq_end(vq, j, - range->start, - range->end); - } -} - -static const struct mmu_notifier_ops vhost_mmu_notifier_ops = { - .invalidate_range_start = vhost_invalidate_range_start, - .invalidate_range_end = vhost_invalidate_range_end, -}; - -static void vhost_init_maps(struct vhost_dev *dev) -{ - struct vhost_virtqueue *vq; - int i, j; - - dev->mmu_notifier.ops = &vhost_mmu_notifier_ops; - - for (i = 0; i < dev->nvqs; ++i) { - vq = dev->vqs[i]; - for (j = 0; j < VHOST_NUM_ADDRS; j++) - RCU_INIT_POINTER(vq->maps[j], NULL); - } -} -#endif - static void vhost_vq_reset(struct vhost_dev *dev, struct vhost_virtqueue *vq) { @@ -479,11 +325,7 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->busyloop_timeout = 0; vq->umem = NULL; vq->iotlb = NULL; - vq->invalidate_count = 0; __vhost_vq_meta_reset(vq); -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_reset_vq_maps(vq); -#endif } static int vhost_worker(void *data) @@ -633,9 +475,7 @@ void vhost_dev_init(struct vhost_dev *dev, INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->pending_list); spin_lock_init(&dev->iotlb_lock); -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_init_maps(dev); -#endif + for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; @@ -644,7 +484,6 @@ void vhost_dev_init(struct vhost_dev *dev, vq->heads = NULL; vq->dev = dev; mutex_init(&vq->mutex); - spin_lock_init(&vq->mmu_lock); vhost_vq_reset(dev, vq); if (vq->handle_kick) vhost_poll_init(&vq->poll, vq->handle_kick, @@ -724,18 +563,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev) if (err) goto err_cgroup; -#if VHOST_ARCH_CAN_ACCEL_UACCESS - err = mmu_notifier_register(&dev->mmu_notifier, dev->mm); - if (err) - goto err_mmu_notifier; -#endif - return 0; - -#if VHOST_ARCH_CAN_ACCEL_UACCESS -err_mmu_notifier: - vhost_dev_free_iovecs(dev); -#endif err_cgroup: kthread_stop(worker); dev->worker = NULL; @@ -826,107 +654,6 @@ static void vhost_clear_msg(struct vhost_dev *dev) spin_unlock(&dev->iotlb_lock); } -#if VHOST_ARCH_CAN_ACCEL_UACCESS -static void vhost_setup_uaddr(struct vhost_virtqueue *vq, - int index, unsigned long uaddr, - size_t size, bool write) -{ - struct vhost_uaddr *addr = &vq->uaddrs[index]; - - addr->uaddr = uaddr; - addr->size = size; - addr->write = write; -} - -static void vhost_setup_vq_uaddr(struct vhost_virtqueue *vq) -{ - vhost_setup_uaddr(vq, VHOST_ADDR_DESC, - (unsigned long)vq->desc, - vhost_get_desc_size(vq, vq->num), - false); - vhost_setup_uaddr(vq, VHOST_ADDR_AVAIL, - (unsigned long)vq->avail, - vhost_get_avail_size(vq, vq->num), - false); - vhost_setup_uaddr(vq, VHOST_ADDR_USED, - (unsigned long)vq->used, - vhost_get_used_size(vq, vq->num), - true); -} - -static int vhost_map_prefetch(struct vhost_virtqueue *vq, - int index) -{ - struct vhost_map *map; - struct vhost_uaddr *uaddr = &vq->uaddrs[index]; - struct page **pages; - int npages = DIV_ROUND_UP(uaddr->size, PAGE_SIZE); - int npinned; - void *vaddr, *v; - int err; - int i; - - spin_lock(&vq->mmu_lock); - - err = -EFAULT; - if (vq->invalidate_count) - goto err; - - err = -ENOMEM; - map = kmalloc(sizeof(*map), GFP_ATOMIC); - if (!map) - goto err; - - pages = kmalloc_array(npages, sizeof(struct page *), GFP_ATOMIC); - if (!pages) - goto err_pages; - - err = EFAULT; - npinned = __get_user_pages_fast(uaddr->uaddr, npages, - uaddr->write, pages); - if (npinned > 0) - release_pages(pages, npinned); - if (npinned != npages) - goto err_gup; - - for (i = 0; i < npinned; i++) - if (PageHighMem(pages[i])) - goto err_gup; - - vaddr = v = page_address(pages[0]); - - /* For simplicity, fallback to userspace address if VA is not - * contigious. - */ - for (i = 1; i < npinned; i++) { - v += PAGE_SIZE; - if (v != page_address(pages[i])) - goto err_gup; - } - - map->addr = vaddr + (uaddr->uaddr & (PAGE_SIZE - 1)); - map->npages = npages; - map->pages = pages; - - rcu_assign_pointer(vq->maps[index], map); - /* No need for a synchronize_rcu(). This function should be - * called by dev->worker so we are serialized with all - * readers. - */ - spin_unlock(&vq->mmu_lock); - - return 0; - -err_gup: - kfree(pages); -err_pages: - kfree(map); -err: - spin_unlock(&vq->mmu_lock); - return err; -} -#endif - void vhost_dev_cleanup(struct vhost_dev *dev) { int i; @@ -956,16 +683,8 @@ void vhost_dev_cleanup(struct vhost_dev *dev) kthread_stop(dev->worker); dev->worker = NULL; } - if (dev->mm) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - mmu_notifier_unregister(&dev->mmu_notifier, dev->mm); -#endif + if (dev->mm) mmput(dev->mm); - } -#if VHOST_ARCH_CAN_ACCEL_UACCESS - for (i = 0; i < dev->nvqs; i++) - vhost_uninit_vq_maps(dev->vqs[i]); -#endif dev->mm = NULL; } EXPORT_SYMBOL_GPL(vhost_dev_cleanup); @@ -1194,26 +913,6 @@ static inline void __user *__vhost_get_user(struct vhost_virtqueue *vq, static inline int vhost_put_avail_event(struct vhost_virtqueue *vq) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - *((__virtio16 *)&used->ring[vq->num]) = - cpu_to_vhost16(vq, vq->avail_idx); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_put_user(vq, cpu_to_vhost16(vq, vq->avail_idx), vhost_avail_event(vq)); } @@ -1222,27 +921,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq, struct vring_used_elem *head, int idx, int count) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - size_t size; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - size = count * sizeof(*head); - memcpy(used->ring + idx, head, size); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_copy_to_user(vq, vq->used->ring + idx, head, count * sizeof(*head)); } @@ -1250,25 +928,6 @@ static inline int vhost_put_used(struct vhost_virtqueue *vq, static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - used->flags = cpu_to_vhost16(vq, vq->used_flags); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_put_user(vq, cpu_to_vhost16(vq, vq->used_flags), &vq->used->flags); } @@ -1276,25 +935,6 @@ static inline int vhost_put_used_flags(struct vhost_virtqueue *vq) static inline int vhost_put_used_idx(struct vhost_virtqueue *vq) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - used->idx = cpu_to_vhost16(vq, vq->last_used_idx); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_put_user(vq, cpu_to_vhost16(vq, vq->last_used_idx), &vq->used->idx); } @@ -1340,50 +980,12 @@ static void vhost_dev_unlock_vqs(struct vhost_dev *d) static inline int vhost_get_avail_idx(struct vhost_virtqueue *vq, __virtio16 *idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *idx = avail->idx; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *idx, &vq->avail->idx); } static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, __virtio16 *head, int idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *head = avail->ring[idx & (vq->num - 1)]; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *head, &vq->avail->ring[idx & (vq->num - 1)]); } @@ -1391,98 +993,24 @@ static inline int vhost_get_avail_head(struct vhost_virtqueue *vq, static inline int vhost_get_avail_flags(struct vhost_virtqueue *vq, __virtio16 *flags) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *flags = avail->flags; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *flags, &vq->avail->flags); } static inline int vhost_get_used_event(struct vhost_virtqueue *vq, __virtio16 *event) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_avail *avail; - - if (!vq->iotlb) { - rcu_read_lock(); - map = rcu_dereference(vq->maps[VHOST_ADDR_AVAIL]); - if (likely(map)) { - avail = map->addr; - *event = (__virtio16)avail->ring[vq->num]; - rcu_read_unlock(); - return 0; - } - rcu_read_unlock(); - } -#endif - return vhost_get_avail(vq, *event, vhost_used_event(vq)); } static inline int vhost_get_used_idx(struct vhost_virtqueue *vq, __virtio16 *idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_used *used; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_USED]); - if (likely(map)) { - used = map->addr; - *idx = used->idx; - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_get_used(vq, *idx, &vq->used->idx); } static inline int vhost_get_desc(struct vhost_virtqueue *vq, struct vring_desc *desc, int idx) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - struct vhost_map *map; - struct vring_desc *d; - - if (!vq->iotlb) { - rcu_read_lock(); - - map = rcu_dereference(vq->maps[VHOST_ADDR_DESC]); - if (likely(map)) { - d = map->addr; - *desc = *(d + idx); - rcu_read_unlock(); - return 0; - } - - rcu_read_unlock(); - } -#endif - return vhost_copy_from_user(vq, desc, vq->desc + idx, sizeof(*desc)); } @@ -1823,32 +1351,12 @@ static bool iotlb_access_ok(struct vhost_virtqueue *vq, return true; } -#if VHOST_ARCH_CAN_ACCEL_UACCESS -static void vhost_vq_map_prefetch(struct vhost_virtqueue *vq) -{ - struct vhost_map __rcu *map; - int i; - - for (i = 0; i < VHOST_NUM_ADDRS; i++) { - rcu_read_lock(); - map = rcu_dereference(vq->maps[i]); - rcu_read_unlock(); - if (unlikely(!map)) - vhost_map_prefetch(vq, i); - } -} -#endif - int vq_meta_prefetch(struct vhost_virtqueue *vq) { unsigned int num = vq->num; - if (!vq->iotlb) { -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_vq_map_prefetch(vq); -#endif + if (!vq->iotlb) return 1; - } return iotlb_access_ok(vq, VHOST_ACCESS_RO, (u64)(uintptr_t)vq->desc, vhost_get_desc_size(vq, num), VHOST_ADDR_DESC) && @@ -2059,16 +1567,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, mutex_lock(&vq->mutex); -#if VHOST_ARCH_CAN_ACCEL_UACCESS - /* Unregister MMU notifer to allow invalidation callback - * can access vq->uaddrs[] without holding a lock. - */ - if (d->mm) - mmu_notifier_unregister(&d->mmu_notifier, d->mm); - - vhost_uninit_vq_maps(vq); -#endif - switch (ioctl) { case VHOST_SET_VRING_NUM: r = vhost_vring_set_num(d, vq, argp); @@ -2080,13 +1578,6 @@ static long vhost_vring_set_num_addr(struct vhost_dev *d, BUG(); } -#if VHOST_ARCH_CAN_ACCEL_UACCESS - vhost_setup_vq_uaddr(vq); - - if (d->mm) - mmu_notifier_register(&d->mmu_notifier, d->mm); -#endif - mutex_unlock(&vq->mutex); return r; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 42a8c2a13ab1..e9ed2722b633 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -12,9 +12,6 @@ #include #include #include -#include -#include -#include struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); @@ -83,24 +80,6 @@ enum vhost_uaddr_type { VHOST_NUM_ADDRS = 3, }; -struct vhost_map { - int npages; - void *addr; - struct page **pages; -}; - -struct vhost_uaddr { - unsigned long uaddr; - size_t size; - bool write; -}; - -#if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0 -#define VHOST_ARCH_CAN_ACCEL_UACCESS 0 -#else -#define VHOST_ARCH_CAN_ACCEL_UACCESS 0 -#endif - /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -111,22 +90,7 @@ struct vhost_virtqueue { struct vring_desc __user *desc; struct vring_avail __user *avail; struct vring_used __user *used; - -#if VHOST_ARCH_CAN_ACCEL_UACCESS - /* Read by memory accessors, modified by meta data - * prefetching, MMU notifier and vring ioctl(). - * Synchonrized through mmu_lock (writers) and RCU (writers - * and readers). - */ - struct vhost_map __rcu *maps[VHOST_NUM_ADDRS]; - /* Read by MMU notifier, modified by vring ioctl(), - * synchronized through MMU notifier - * registering/unregistering. - */ - struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS]; -#endif const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; - struct file *kick; struct eventfd_ctx *call_ctx; struct eventfd_ctx *error_ctx; @@ -181,8 +145,6 @@ struct vhost_virtqueue { bool user_be; #endif u32 busyloop_timeout; - spinlock_t mmu_lock; - int invalidate_count; }; struct vhost_msg_node { @@ -196,9 +158,6 @@ struct vhost_msg_node { struct vhost_dev { struct mm_struct *mm; -#ifdef CONFIG_MMU_NOTIFIER - struct mmu_notifier mmu_notifier; -#endif struct mutex mutex; struct vhost_virtqueue **vqs; int nvqs; -- cgit v1.2.3-59-g8ed1b From a89db445fbd7f1f8457b03759aa7343fa530ef6b Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 8 Sep 2019 07:04:08 -0400 Subject: vhost: block speculation of translated descriptors iovec addresses coming from vhost are assumed to be pre-validated, but in fact can be speculated to a value out of range. Userspace address are later validated with array_index_nospec so we can be sure kernel info does not leak through these addresses, but vhost must also not leak userspace info outside the allowed memory table to guests. Following the defence in depth principle, make sure the address is not validated out of node range. Signed-off-by: Michael S. Tsirkin Cc: stable@vger.kernel.org Acked-by: Jason Wang Tested-by: Jason Wang --- drivers/vhost/vhost.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 5dc174ac8cac..34ea219936e3 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2071,8 +2071,10 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, _iov = iov + ret; size = node->size - addr + node->start; _iov->iov_len = min((u64)len - s, size); - _iov->iov_base = (void __user *)(unsigned long) - (node->userspace_addr + addr - node->start); + _iov->iov_base = (void __user *) + ((unsigned long)node->userspace_addr + + array_index_nospec((unsigned long)(addr - node->start), + node->size)); s += size; addr += size; ++ret; -- cgit v1.2.3-59-g8ed1b From 060423bfdee3f8bc6e2c1bac97de24d5415e2bc4 Mon Sep 17 00:00:00 2001 From: yongduan Date: Wed, 11 Sep 2019 17:44:24 +0800 Subject: vhost: make sure log_num < in_num The code assumes log_num < in_num everywhere, and that is true as long as in_num is incremented by descriptor iov count, and log_num by 1. However this breaks if there's a zero sized descriptor. As a result, if a malicious guest creates a vring desc with desc.len = 0, it may cause the host kernel to crash by overflowing the log array. This bug can be triggered during the VM migration. There's no need to log when desc.len = 0, so just don't increment log_num in this case. Fixes: 3a4d5c94e959 ("vhost_net: a kernel-level virtio server") Cc: stable@vger.kernel.org Reviewed-by: Lidong Chen Signed-off-by: ruippan Signed-off-by: yongduan Acked-by: Michael S. Tsirkin Reviewed-by: Tyler Hicks Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 34ea219936e3..acabf20b069e 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2180,7 +2180,7 @@ static int get_indirect(struct vhost_virtqueue *vq, /* If this is an input descriptor, increment that count. */ if (access == VHOST_ACCESS_WO) { *in_num += ret; - if (unlikely(log)) { + if (unlikely(log && ret)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; @@ -2321,7 +2321,7 @@ int vhost_get_vq_desc(struct vhost_virtqueue *vq, /* If this is an input descriptor, * increment that count. */ *in_num += ret; - if (unlikely(log)) { + if (unlikely(log && ret)) { log[*log_num].addr = vhost64_to_cpu(vq, desc.addr); log[*log_num].len = vhost32_to_cpu(vq, desc.len); ++*log_num; -- cgit v1.2.3-59-g8ed1b From 0d4a3f2abbef73b9e5bb5f12213c275565473588 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sat, 14 Sep 2019 15:21:51 -0400 Subject: Revert "vhost: block speculation of translated descriptors" This reverts commit a89db445fbd7f1f8457b03759aa7343fa530ef6b. I was hasty to include this patch, and it breaks the build on 32 bit. Defence in depth is good but let's do it properly. Cc: stable@vger.kernel.org Signed-off-by: Michael S. Tsirkin --- drivers/vhost/vhost.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/vhost') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index acabf20b069e..36ca2cf419bf 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -2071,10 +2071,8 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, _iov = iov + ret; size = node->size - addr + node->start; _iov->iov_len = min((u64)len - s, size); - _iov->iov_base = (void __user *) - ((unsigned long)node->userspace_addr + - array_index_nospec((unsigned long)(addr - node->start), - node->size)); + _iov->iov_base = (void __user *)(unsigned long) + (node->userspace_addr + addr - node->start); s += size; addr += size; ++ret; -- cgit v1.2.3-59-g8ed1b