diff options
Diffstat (limited to 'net/ipv4/udp_offload.c')
-rw-r--r-- | net/ipv4/udp_offload.c | 164 |
1 files changed, 140 insertions, 24 deletions
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 1a98583a79f4..6d1a4bec2614 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -7,6 +7,7 @@ */ #include <linux/skbuff.h> +#include <net/gro.h> #include <net/udp.h> #include <net/protocol.h> #include <net/inet_common.h> @@ -49,6 +50,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __skb_pull(skb, tnl_hlen); skb_reset_mac_header(skb); skb_set_network_header(skb, skb_inner_network_offset(skb)); + skb_set_transport_header(skb, skb_inner_transport_offset(skb)); skb->mac_len = skb_inner_network_offset(skb); skb->protocol = new_protocol; @@ -67,6 +69,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, (NETIF_F_HW_CSUM | NETIF_F_IP_CSUM)))); features &= skb->dev->hw_enc_features; + if (need_csum) + features &= ~NETIF_F_SCTP_CRC; /* The only checksum offload we care about from here on out is the * outer one so strip the existing checksum feature flags and @@ -149,8 +153,8 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb, netdev_features_t features, bool is_ipv6) { + const struct net_offload __rcu **offloads; __be16 protocol = skb->protocol; - const struct net_offload **offloads; const struct net_offload *ops; struct sk_buff *segs = ERR_PTR(-EINVAL); struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb, @@ -184,8 +188,67 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); +static void __udpv4_gso_segment_csum(struct sk_buff *seg, + __be32 *oldip, __be32 *newip, + __be16 *oldport, __be16 *newport) +{ + struct udphdr *uh; + struct iphdr *iph; + + if (*oldip == *newip && *oldport == *newport) + return; + + uh = udp_hdr(seg); + iph = ip_hdr(seg); + + if (uh->check) { + inet_proto_csum_replace4(&uh->check, seg, *oldip, *newip, + true); + inet_proto_csum_replace2(&uh->check, seg, *oldport, *newport, + false); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } + *oldport = *newport; + + csum_replace4(&iph->check, *oldip, *newip); + *oldip = *newip; +} + +static struct sk_buff *__udpv4_gso_segment_list_csum(struct sk_buff *segs) +{ + struct sk_buff *seg; + struct udphdr *uh, *uh2; + struct iphdr *iph, *iph2; + + seg = segs; + uh = udp_hdr(seg); + iph = ip_hdr(seg); + + if ((udp_hdr(seg)->dest == udp_hdr(seg->next)->dest) && + (udp_hdr(seg)->source == udp_hdr(seg->next)->source) && + (ip_hdr(seg)->daddr == ip_hdr(seg->next)->daddr) && + (ip_hdr(seg)->saddr == ip_hdr(seg->next)->saddr)) + return segs; + + while ((seg = seg->next)) { + uh2 = udp_hdr(seg); + iph2 = ip_hdr(seg); + + __udpv4_gso_segment_csum(seg, + &iph2->saddr, &iph->saddr, + &uh2->source, &uh->source); + __udpv4_gso_segment_csum(seg, + &iph2->daddr, &iph->daddr, + &uh2->dest, &uh->dest); + } + + return segs; +} + static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, - netdev_features_t features) + netdev_features_t features, + bool is_ipv6) { unsigned int mss = skb_shinfo(skb)->gso_size; @@ -195,11 +258,11 @@ static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb, udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss); - return skb; + return is_ipv6 ? skb : __udpv4_gso_segment_list_csum(skb); } struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, - netdev_features_t features) + netdev_features_t features, bool is_ipv6) { struct sock *sk = gso_skb->sk; unsigned int sum_truesize = 0; @@ -211,7 +274,7 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, __be16 newlen; if (skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST) - return __udp_gso_segment_list(gso_skb, features); + return __udp_gso_segment_list(gso_skb, features, is_ipv6); mss = skb_shinfo(gso_skb)->gso_size; if (gso_skb->len <= sizeof(*uh) + mss) @@ -325,7 +388,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, goto out; if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) - return __udp_gso_segment(skb, features); + return __udp_gso_segment(skb, features, false); mss = skb_shinfo(skb)->gso_size; if (unlikely(skb->len <= mss)) @@ -362,11 +425,38 @@ out: return segs; } +static int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb) +{ + if (unlikely(p->len + skb->len >= 65536)) + return -E2BIG; + + if (NAPI_GRO_CB(p)->last == p) + skb_shinfo(p)->frag_list = skb; + else + NAPI_GRO_CB(p)->last->next = skb; + + skb_pull(skb, skb_gro_offset(skb)); + + NAPI_GRO_CB(p)->last = skb; + NAPI_GRO_CB(p)->count++; + p->data_len += skb->len; + + /* sk owenrship - if any - completely transferred to the aggregated packet */ + skb->destructor = NULL; + p->truesize += skb->truesize; + p->len += skb->len; + + NAPI_GRO_CB(skb)->same_flow = 1; + + return 0; +} + + #define UDP_GRO_CNT_MAX 64 static struct sk_buff *udp_gro_receive_segment(struct list_head *head, struct sk_buff *skb) { - struct udphdr *uh = udp_hdr(skb); + struct udphdr *uh = udp_gro_udphdr(skb); struct sk_buff *pp = NULL; struct udphdr *uh2; struct sk_buff *p; @@ -453,19 +543,26 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb, unsigned int off = skb_gro_offset(skb); int flush = 1; - if (skb->dev->features & NETIF_F_GRO_FRAGLIST) - NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled: 1; + /* we can do L4 aggregation only if the packet can't land in a tunnel + * otherwise we could corrupt the inner stream + */ + NAPI_GRO_CB(skb)->is_flist = 0; + if (!sk || !udp_sk(sk)->gro_receive) { + if (skb->dev->features & NETIF_F_GRO_FRAGLIST) + NAPI_GRO_CB(skb)->is_flist = sk ? !udp_sk(sk)->gro_enabled : 1; - if ((sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) { - pp = call_gro_receive(udp_gro_receive_segment, head, skb); - return pp; + if ((!sk && (skb->dev->features & NETIF_F_GRO_UDP_FWD)) || + (sk && udp_sk(sk)->gro_enabled) || NAPI_GRO_CB(skb)->is_flist) + return call_gro_receive(udp_gro_receive_segment, head, skb); + + /* no GRO, be sure flush the current packet */ + goto out; } - if (!sk || NAPI_GRO_CB(skb)->encap_mark || - (skb->ip_summed != CHECKSUM_PARTIAL && + if (NAPI_GRO_CB(skb)->encap_mark || + (uh->check && skb->ip_summed != CHECKSUM_PARTIAL && NAPI_GRO_CB(skb)->csum_cnt == 0 && - !NAPI_GRO_CB(skb)->csum_valid) || - !udp_sk(sk)->gro_receive) + !NAPI_GRO_CB(skb)->csum_valid)) goto out; /* mark that this skb passed once through the tunnel gro layer */ @@ -499,12 +596,22 @@ out: } EXPORT_SYMBOL(udp_gro_receive); +static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, + __be16 dport) +{ + const struct iphdr *iph = skb_gro_network_header(skb); + + return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + inet_sdif(skb), &udp_table, NULL); +} + INDIRECT_CALLABLE_SCOPE struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) { struct udphdr *uh = udp_gro_udphdr(skb); + struct sock *sk = NULL; struct sk_buff *pp; - struct sock *sk; if (unlikely(!uh)) goto flush; @@ -521,10 +628,11 @@ struct sk_buff *udp4_gro_receive(struct list_head *head, struct sk_buff *skb) inet_gro_compute_pseudo); skip: NAPI_GRO_CB(skb)->is_ipv6 = 0; - rcu_read_lock(); - sk = static_branch_unlikely(&udp_encap_needed_key) ? udp4_lib_lookup_skb(skb, uh->source, uh->dest) : NULL; + + if (static_branch_unlikely(&udp_encap_needed_key)) + sk = udp4_gro_lookup_skb(skb, uh->source, uh->dest); + pp = udp_gro_receive(head, skb, uh, sk); - rcu_read_unlock(); return pp; flush: @@ -542,6 +650,10 @@ static int udp_gro_complete_segment(struct sk_buff *skb) skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count; skb_shinfo(skb)->gso_type |= SKB_GSO_UDP_L4; + + if (skb->encapsulation) + skb->inner_transport_header = skb->transport_header; + return 0; } @@ -550,18 +662,22 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, { __be16 newlen = htons(skb->len - nhoff); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - int err = -ENOSYS; struct sock *sk; + int err; uh->len = newlen; - rcu_read_lock(); sk = INDIRECT_CALL_INET(lookup, udp6_lib_lookup_skb, udp4_lib_lookup_skb, skb, uh->source, uh->dest); if (sk && udp_sk(sk)->gro_complete) { skb_shinfo(skb)->gso_type = uh->check ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL; + /* clear the encap mark, so that inner frag_list gro_complete + * can take place + */ + NAPI_GRO_CB(skb)->encap_mark = 0; + /* Set encapsulation before calling into inner gro_complete() * functions to make them set up the inner offsets. */ @@ -571,7 +687,6 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, } else { err = udp_gro_complete_segment(skb); } - rcu_read_unlock(); if (skb->remcsum_offload) skb_shinfo(skb)->gso_type |= SKB_GSO_TUNNEL_REMCSUM; @@ -585,7 +700,8 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) const struct iphdr *iph = ip_hdr(skb); struct udphdr *uh = (struct udphdr *)(skb->data + nhoff); - if (NAPI_GRO_CB(skb)->is_flist) { + /* do fraglist only if there is no outer UDP encap (or we already processed it) */ + if (NAPI_GRO_CB(skb)->is_flist && !NAPI_GRO_CB(skb)->encap_mark) { uh->len = htons(skb->len - nhoff); skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4); |