aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost/vhost.h
diff options
context:
space:
mode:
authorJason Wang <jasowang@redhat.com>2019-05-24 04:12:18 -0400
committerMichael S. Tsirkin <mst@redhat.com>2019-06-05 21:09:18 -0400
commit7f466032dc9e5a61217f22ea34b2df932786bbfc (patch)
tree9c573d71ddb93b2b287865d8d6c4b346a8d36e46 /drivers/vhost/vhost.h
parentvhost: factor out setting vring addr and num (diff)
downloadlinux-dev-7f466032dc9e5a61217f22ea34b2df932786bbfc.tar.xz
linux-dev-7f466032dc9e5a61217f22ea34b2df932786bbfc.zip
vhost: access vq metadata through kernel virtual address
It was noticed that the copy_to/from_user() friends that was used to access virtqueue metdata tends to be very expensive for dataplane implementation like vhost since it involves lots of software checks, speculation barriers, hardware feature toggling (e.g SMAP). The extra cost will be more obvious when transferring small packets since the time spent on metadata accessing become more significant. This patch tries to eliminate those overheads by accessing them through direct mapping of those pages. Invalidation callbacks is implemented for co-operation with general VM management (swap, KSM, THP or NUMA balancing). We will try to get the direct mapping of vq metadata before each round of packet processing if it doesn't exist. If we fail, we will simplely fallback to copy_to/from_user() friends. This invalidation and direct mapping access are synchronized through spinlock and RCU. All matedata accessing through direct map is protected by RCU, and the setup or invalidation are done under spinlock. This method might does not work for high mem page which requires temporary mapping so we just fallback to normal copy_to/from_user() and may not for arch that has virtual tagged cache since extra cache flushing is needed to eliminate the alias. This will result complex logic and bad performance. For those archs, this patch simply go for copy_to/from_user() friends. This is done by ruling out kernel mapping codes through ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE. Note that this is only done when device IOTLB is not enabled. We could use similar method to optimize IOTLB in the future. Tests shows at most about 23% improvement on TX PPS when using virtio-user + vhost_net + xdp1 + TAP on 2.6GHz Broadwell: SMAP on | SMAP off Before: 5.2Mpps | 7.1Mpps After: 6.4Mpps | 8.2Mpps Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: James Bottomley <James.Bottomley@hansenpartnership.com> Cc: Christoph Hellwig <hch@infradead.org> Cc: David Miller <davem@davemloft.net> Cc: Jerome Glisse <jglisse@redhat.com> Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-parisc@vger.kernel.org Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Diffstat (limited to 'drivers/vhost/vhost.h')
-rw-r--r--drivers/vhost/vhost.h38
1 files changed, 38 insertions, 0 deletions
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index e9ed2722b633..c5d950cf7627 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -12,6 +12,9 @@
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/atomic.h>
+#include <linux/pagemap.h>
+#include <linux/mmu_notifier.h>
+#include <asm/cacheflush.h>
struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work);
@@ -80,6 +83,21 @@ enum vhost_uaddr_type {
VHOST_NUM_ADDRS = 3,
};
+struct vhost_map {
+ int npages;
+ void *addr;
+ struct page **pages;
+};
+
+struct vhost_uaddr {
+ unsigned long uaddr;
+ size_t size;
+ bool write;
+};
+
+#define VHOST_ARCH_CAN_ACCEL_UACCESS defined(CONFIG_MMU_NOTIFIER) && \
+ ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0
+
/* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue {
struct vhost_dev *dev;
@@ -90,7 +108,22 @@ struct vhost_virtqueue {
struct vring_desc __user *desc;
struct vring_avail __user *avail;
struct vring_used __user *used;
+
+#if VHOST_ARCH_CAN_ACCEL_UACCESS
+ /* Read by memory accessors, modified by meta data
+ * prefetching, MMU notifier and vring ioctl().
+ * Synchonrized through mmu_lock (writers) and RCU (writers
+ * and readers).
+ */
+ struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
+ /* Read by MMU notifier, modified by vring ioctl(),
+ * synchronized through MMU notifier
+ * registering/unregistering.
+ */
+ struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
+#endif
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
+
struct file *kick;
struct eventfd_ctx *call_ctx;
struct eventfd_ctx *error_ctx;
@@ -145,6 +178,8 @@ struct vhost_virtqueue {
bool user_be;
#endif
u32 busyloop_timeout;
+ spinlock_t mmu_lock;
+ int invalidate_count;
};
struct vhost_msg_node {
@@ -158,6 +193,9 @@ struct vhost_msg_node {
struct vhost_dev {
struct mm_struct *mm;
+#ifdef CONFIG_MMU_NOTIFIER
+ struct mmu_notifier mmu_notifier;
+#endif
struct mutex mutex;
struct vhost_virtqueue **vqs;
int nvqs;