aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason Wang <jasowang@redhat.com>2017-08-11 19:41:16 +0800
committerDavid S. Miller <davem@davemloft.net>2017-08-13 19:56:07 -0700
commit66ccbc9c87c2ae96585f9aa5f733609e6f4acf25 (patch)
treef4b65889e63df18f839a8dac57eca5769c141b1c
parentrtnelink: Move link dump consistency check out of the loop (diff)
downloadlinux-dev-66ccbc9c87c2ae96585f9aa5f733609e6f4acf25.tar.xz
linux-dev-66ccbc9c87c2ae96585f9aa5f733609e6f4acf25.zip
tap: use build_skb() for small packet
We use tun_alloc_skb() which calls sock_alloc_send_pskb() to allocate skb in the past. This socket based method is not suitable for high speed userspace like virtualization which usually: - ignore sk_sndbuf (INT_MAX) and expect to receive the packet as fast as possible - don't want to be block at sendmsg() To eliminate the above overheads, this patch tries to use build_skb() for small packet. We will do this only when the following conditions are all met: - TAP instead of TUN - sk_sndbuf is INT_MAX - caller don't want to be blocked - zerocopy is not used - packet size is smaller enough to use build_skb() Pktgen from guest to host shows ~11% improvement for rx pps of tap: Before: ~1.70Mpps After : ~1.88Mpps What's more important, this makes it possible to implement XDP for tap before creating skbs. Signed-off-by: Jason Wang <jasowang@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/tun.c112
1 files changed, 91 insertions, 21 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d21510d47aa2..9736df40d2bf 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -105,6 +105,8 @@ do { \
} while (0)
#endif
+#define TUN_RX_PAD (NET_IP_ALIGN + NET_SKB_PAD)
+
/* TUN device flags */
/* IFF_ATTACH_QUEUE is never stored in device flags,
@@ -170,6 +172,7 @@ struct tun_file {
struct list_head next;
struct tun_struct *detached;
struct skb_array tx_array;
+ struct page_frag alloc_frag;
};
struct tun_flow_entry {
@@ -571,6 +574,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
}
if (tun)
skb_array_cleanup(&tfile->tx_array);
+ if (tfile->alloc_frag.page)
+ put_page(tfile->alloc_frag.page);
sock_put(&tfile->sk);
}
}
@@ -1190,6 +1195,61 @@ static void tun_rx_batched(struct tun_struct *tun, struct tun_file *tfile,
}
}
+static bool tun_can_build_skb(struct tun_struct *tun, struct tun_file *tfile,
+ int len, int noblock, bool zerocopy)
+{
+ if ((tun->flags & TUN_TYPE_MASK) != IFF_TAP)
+ return false;
+
+ if (tfile->socket.sk->sk_sndbuf != INT_MAX)
+ return false;
+
+ if (!noblock)
+ return false;
+
+ if (zerocopy)
+ return false;
+
+ if (SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) > PAGE_SIZE)
+ return false;
+
+ return true;
+}
+
+static struct sk_buff *tun_build_skb(struct tun_file *tfile,
+ struct iov_iter *from,
+ int len)
+{
+ struct page_frag *alloc_frag = &tfile->alloc_frag;
+ struct sk_buff *skb;
+ int buflen = SKB_DATA_ALIGN(len + TUN_RX_PAD) +
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ char *buf;
+ size_t copied;
+
+ if (unlikely(!skb_page_frag_refill(buflen, alloc_frag, GFP_KERNEL)))
+ return ERR_PTR(-ENOMEM);
+
+ buf = (char *)page_address(alloc_frag->page) + alloc_frag->offset;
+ copied = copy_page_from_iter(alloc_frag->page,
+ alloc_frag->offset + TUN_RX_PAD,
+ len, from);
+ if (copied != len)
+ return ERR_PTR(-EFAULT);
+
+ skb = build_skb(buf, buflen);
+ if (!skb)
+ return ERR_PTR(-ENOMEM);
+
+ skb_reserve(skb, TUN_RX_PAD);
+ skb_put(skb, len);
+ get_page(alloc_frag->page);
+ alloc_frag->offset += buflen;
+
+ return skb;
+}
+
/* Get packet from user space buffer */
static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
void *msg_control, struct iov_iter *from,
@@ -1263,30 +1323,38 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
zerocopy = true;
}
- if (!zerocopy) {
- copylen = len;
- if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
- linear = good_linear;
- else
- linear = tun16_to_cpu(tun, gso.hdr_len);
- }
-
- skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
- if (IS_ERR(skb)) {
- if (PTR_ERR(skb) != -EAGAIN)
+ if (tun_can_build_skb(tun, tfile, len, noblock, zerocopy)) {
+ skb = tun_build_skb(tfile, from, len);
+ if (IS_ERR(skb)) {
this_cpu_inc(tun->pcpu_stats->rx_dropped);
- return PTR_ERR(skb);
- }
+ return PTR_ERR(skb);
+ }
+ } else {
+ if (!zerocopy) {
+ copylen = len;
+ if (tun16_to_cpu(tun, gso.hdr_len) > good_linear)
+ linear = good_linear;
+ else
+ linear = tun16_to_cpu(tun, gso.hdr_len);
+ }
- if (zerocopy)
- err = zerocopy_sg_from_iter(skb, from);
- else
- err = skb_copy_datagram_from_iter(skb, 0, from, len);
+ skb = tun_alloc_skb(tfile, align, copylen, linear, noblock);
+ if (IS_ERR(skb)) {
+ if (PTR_ERR(skb) != -EAGAIN)
+ this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ return PTR_ERR(skb);
+ }
- if (err) {
- this_cpu_inc(tun->pcpu_stats->rx_dropped);
- kfree_skb(skb);
- return -EFAULT;
+ if (zerocopy)
+ err = zerocopy_sg_from_iter(skb, from);
+ else
+ err = skb_copy_datagram_from_iter(skb, 0, from, len);
+
+ if (err) {
+ this_cpu_inc(tun->pcpu_stats->rx_dropped);
+ kfree_skb(skb);
+ return -EFAULT;
+ }
}
if (virtio_net_hdr_to_skb(skb, &gso, tun_is_little_endian(tun))) {
@@ -2377,6 +2445,8 @@ static int tun_chr_open(struct inode *inode, struct file * file)
tfile->sk.sk_write_space = tun_sock_write_space;
tfile->sk.sk_sndbuf = INT_MAX;
+ tfile->alloc_frag.page = NULL;
+
file->private_data = tfile;
INIT_LIST_HEAD(&tfile->next);