aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-10-01 21:35:58 -0400
committerDavid S. Miller <davem@davemloft.net>2014-10-01 21:35:58 -0400
commit25e379c475121c658a344cfd5eeed9affe272d31 (patch)
treeaacc22b440766d6bdbda7b035962a7dcc590263c
parentMerge branch 'bpf-next' (diff)
parentvxlan: Set inner protocol before transmit (diff)
downloadlinux-dev-25e379c475121c658a344cfd5eeed9affe272d31.tar.xz
linux-dev-25e379c475121c658a344cfd5eeed9affe272d31.zip
Merge branch 'udp_gso'
Tom Herbert says: ==================== udp: Generalize GSO for UDP tunnels This patch set generalizes the UDP tunnel segmentation functions so that they can work with various protocol encapsulations. The primary change is to set the inner_protocol field in the skbuff when creating the encapsulated packet, and then in skb_udp_tunnel_segment this data is used to determine the function for segmenting the encapsulated packet. The inner_protocol field is overloaded to take either an Ethertype or IP protocol. The inner_protocol is set on transmit using skb_set_inner_ipproto or skb_set_inner_protocol functions. VXLAN and IP tunnels (for fou GSO) were modified to call these. Notes: - GSO for GRE/UDP where GRE checksum is enabled does not work. Handling this will require some special case code. - Software GSO now supports many varieties of encapsulation with SKB_GSO_UDP_TUNNEL{_CSUM}. We still need a mechanism to query for device support of particular combinations (I intend to add ndo_gso_check for that). - MPLS seems to be the only previous user of inner_protocol. I don't believe these patches can affect that. For supporting GSO with MPLS over UDP, the inner_protocol should be set using the helper functions in this patch. - GSO for L2TP/UDP should also be straightforward now. v2: - Respin for Eric's restructuring of skbuff. Tested GRE, IPIP, and SIT over fou as well as VLXAN. This was done using 200 TCP_STREAMs in netperf. GRE IPv4, FOU, UDP checksum enabled TCP_STREAM TSO enabled on tun interface 14.04% TX CPU utilization 13.17% RX CPU utilization 9211 Mbps TCP_STREAM TSO disabled on tun interface 27.82% TX CPU utilization 25.41% RX CPU utilization 9336 Mbps IPv4, FOU, UDP checksum disabled TCP_STREAM TSO enabled on tun interface 13.14% TX CPU utilization 23.18% RX CPU utilization 9277 Mbps TCP_STREAM TSO disabled on tun interface 30.00% TX CPU utilization 31.28% RX CPU utilization 9327 Mbps IPIP FOU, UDP checksum enabled TCP_STREAM TSO enabled on tun interface 15.28% TX CPU utilization 13.92% RX CPU utilization 9342 Mbps TCP_STREAM TSO disabled on tun interface 27.82% TX CPU utilization 25.41% RX CPU utilization 9336 Mbps FOU, UDP checksum disabled TCP_STREAM TSO enabled on tun interface 15.08% TX CPU utilization 24.64% RX CPU utilization 9226 Mbps TCP_STREAM TSO disabled on tun interface 30.00% TX CPU utilization 31.28% RX CPU utilization 9327 Mbps SIT FOU, UDP checksum enabled TCP_STREAM TSO enabled on tun interface 14.47% TX CPU utilization 14.58% RX CPU utilization 9106 Mbps TCP_STREAM TSO disabled on tun interface 31.82% TX CPU utilization 30.82% RX CPU utilization 9204 Mbps FOU, UDP checksum disabled TCP_STREAM TSO enabled on tun interface 15.70% TX CPU utilization 27.93% RX CPU utilization 9097 Mbps TCP_STREAM TSO disabled on tun interface 33.48% TX CPU utilization 37.36% RX CPU utilization 9197 Mbps VXLAN TCP_STREAM TSO enabled on tun interface 16.42% TX CPU utilization 23.66% RX CPU utilization 9081 Mbps TCP_STREAM TSO disabled on tun interface 30.32% TX CPU utilization 30.55% RX CPU utilization 9185 Mbps Baseline (no encp, TSO and LRO enabled) TCP_STREAM 11.85% TX CPU utilization 15.13% RX CPU utilization 9452 Mbps ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--drivers/net/vxlan.c4
-rw-r--r--include/linux/skbuff.h26
-rw-r--r--include/net/udp.h3
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/udp_offload.c51
-rw-r--r--net/ipv6/ip6_gre.c8
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/udp_offload.c2
9 files changed, 91 insertions, 11 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 34e102ec95c2..2af795d6ba05 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1610,6 +1610,8 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
+ skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+
udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
ttl, src_port, dst_port);
return 0;
@@ -1652,6 +1654,8 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
vxh->vx_flags = htonl(VXLAN_FLAGS);
vxh->vx_vni = vni;
+ skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+
return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
ttl, df, src_port, dst_port, xnet);
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d8f7d74d5a4d..7c5036d11feb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -596,7 +596,8 @@ struct sk_buff {
__u8 ndisc_nodetype:2;
#endif
__u8 ipvs_property:1;
- /* 5 or 7 bit hole */
+ __u8 inner_protocol_type:1;
+ /* 4 or 6 bit hole */
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
@@ -632,7 +633,11 @@ struct sk_buff {
__u32 reserved_tailroom;
};
- __be16 inner_protocol;
+ union {
+ __be16 inner_protocol;
+ __u8 inner_ipproto;
+ };
+
__u16 inner_transport_header;
__u16 inner_network_header;
__u16 inner_mac_header;
@@ -1762,6 +1767,23 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
skb->tail += len;
}
+#define ENCAP_TYPE_ETHER 0
+#define ENCAP_TYPE_IPPROTO 1
+
+static inline void skb_set_inner_protocol(struct sk_buff *skb,
+ __be16 protocol)
+{
+ skb->inner_protocol = protocol;
+ skb->inner_protocol_type = ENCAP_TYPE_ETHER;
+}
+
+static inline void skb_set_inner_ipproto(struct sk_buff *skb,
+ __u8 ipproto)
+{
+ skb->inner_ipproto = ipproto;
+ skb->inner_protocol_type = ENCAP_TYPE_IPPROTO;
+}
+
static inline void skb_reset_inner_headers(struct sk_buff *skb)
{
skb->inner_mac_header = skb->mac_header;
diff --git a/include/net/udp.h b/include/net/udp.h
index 16f4e80f0519..07f9b70962f6 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -239,7 +239,8 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
int udp_disconnect(struct sock *sk, int flags);
unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait);
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
- netdev_features_t features);
+ netdev_features_t features,
+ bool is_ipv6);
int udp_lib_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
int udp_lib_setsockopt(struct sock *sk, int level, int optname,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 829aff8bf723..0485ef18d254 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -241,6 +241,8 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
/* Push GRE header. */
gre_build_header(skb, &tpi, tunnel->tun_hlen);
+ skb_set_inner_protocol(skb, tpi.proto);
+
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index bfec31df8b21..ea88ab3102a8 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -224,6 +224,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb))
goto out;
+ skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+
ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);
return NETDEV_TX_OK;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 19ebe6a39ddc..8c35f2c939ee 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -25,8 +25,11 @@ struct udp_offload_priv {
struct udp_offload_priv __rcu *next;
};
-struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
- netdev_features_t features)
+static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
+ netdev_features_t features,
+ struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+ netdev_features_t features),
+ __be16 new_protocol)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
@@ -48,7 +51,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
skb_reset_mac_header(skb);
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
- skb->protocol = htons(ETH_P_TEB);
+ skb->protocol = new_protocol;
need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
if (need_csum)
@@ -56,7 +59,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
- segs = skb_mac_gso_segment(skb, enc_features);
+ segs = gso_inner_segment(skb, enc_features);
if (IS_ERR_OR_NULL(segs)) {
skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
mac_len);
@@ -101,6 +104,44 @@ out:
return segs;
}
+struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+ netdev_features_t features,
+ bool is_ipv6)
+{
+ __be16 protocol = skb->protocol;
+ const struct net_offload **offloads;
+ const struct net_offload *ops;
+ struct sk_buff *segs = ERR_PTR(-EINVAL);
+ struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+ netdev_features_t features);
+
+ rcu_read_lock();
+
+ switch (skb->inner_protocol_type) {
+ case ENCAP_TYPE_ETHER:
+ protocol = skb->inner_protocol;
+ gso_inner_segment = skb_mac_gso_segment;
+ break;
+ case ENCAP_TYPE_IPPROTO:
+ offloads = is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[skb->inner_ipproto]);
+ if (!ops || !ops->callbacks.gso_segment)
+ goto out_unlock;
+ gso_inner_segment = ops->callbacks.gso_segment;
+ break;
+ default:
+ goto out_unlock;
+ }
+
+ segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
+ protocol);
+
+out_unlock:
+ rcu_read_unlock();
+
+ return segs;
+}
+
static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
netdev_features_t features)
{
@@ -113,7 +154,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (skb->encapsulation &&
(skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
- segs = skb_udp_tunnel_segment(skb, features);
+ segs = skb_udp_tunnel_segment(skb, features, false);
goto out;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 5f19dfbc4c6a..9a0a1aafe727 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -616,6 +616,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
int err = -1;
u8 proto;
struct sk_buff *new_skb;
+ __be16 protocol;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -732,8 +733,9 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
ipv6h->daddr = fl6->daddr;
((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags;
- ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : skb->protocol;
+ protocol = (dev->type == ARPHRD_ETHER) ?
+ htons(ETH_P_TEB) : skb->protocol;
+ ((__be16 *)(ipv6h + 1))[1] = protocol;
if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
__be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4);
@@ -754,6 +756,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
}
}
+ skb_set_inner_protocol(skb, protocol);
+
ip6tunnel_xmit(skb, dev);
if (ndst)
ip6_tnl_dst_store(tunnel, ndst);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index db75809ab843..0d4e27466f82 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -982,6 +982,8 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
goto tx_error;
}
+ skb_set_inner_ipproto(skb, IPPROTO_IPV6);
+
err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr,
protocol, tos, ttl, df,
!net_eq(tunnel->net, dev_net(dev)));
@@ -1006,6 +1008,8 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (IS_ERR(skb))
goto out;
+ skb_set_inner_ipproto(skb, IPPROTO_IPIP);
+
ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);
return NETDEV_TX_OK;
out:
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 212ebfc7973f..8f96988c1db2 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -58,7 +58,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (skb->encapsulation && skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
- segs = skb_udp_tunnel_segment(skb, features);
+ segs = skb_udp_tunnel_segment(skb, features, true);
else {
const struct ipv6hdr *ipv6h;
struct udphdr *uh;