diff options
Diffstat (limited to 'net/xfrm/xfrm_output.c')
-rw-r--r-- | net/xfrm/xfrm_output.c | 307 |
1 files changed, 281 insertions, 26 deletions
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index fafc7aba705f..9a5e79a38c67 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -13,9 +13,15 @@ #include <linux/slab.h> #include <linux/spinlock.h> #include <net/dst.h> +#include <net/icmp.h> #include <net/inet_ecn.h> #include <net/xfrm.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_route.h> +#include <net/ipv6_stubs.h> +#endif + #include "xfrm_inout.h" static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); @@ -71,6 +77,83 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } +#if IS_ENABLED(CONFIG_IPV6_MIP6) +static int mip6_rthdr_offset(struct sk_buff *skb, u8 **nexthdr, int type) +{ + const unsigned char *nh = skb_network_header(skb); + unsigned int offset = sizeof(struct ipv6hdr); + unsigned int packet_len; + int found_rhdr = 0; + + packet_len = skb_tail_pointer(skb) - nh; + *nexthdr = &ipv6_hdr(skb)->nexthdr; + + while (offset <= packet_len) { + struct ipv6_opt_hdr *exthdr; + + switch (**nexthdr) { + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + if (type == IPPROTO_ROUTING && offset + 3 <= packet_len) { + struct ipv6_rt_hdr *rt; + + rt = (struct ipv6_rt_hdr *)(nh + offset); + if (rt->type != 0) + return offset; + } + found_rhdr = 1; + break; + case NEXTHDR_DEST: + /* HAO MUST NOT appear more than once. + * XXX: It is better to try to find by the end of + * XXX: packet if HAO exists. + */ + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { + net_dbg_ratelimited("mip6: hao exists already, override\n"); + return offset; + } + + if (found_rhdr) + return offset; + + break; + default: + return offset; + } + + if (offset + sizeof(struct ipv6_opt_hdr) > packet_len) + return -EINVAL; + + exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + + offset); + offset += ipv6_optlen(exthdr); + if (offset > IPV6_MAXPLEN) + return -EINVAL; + *nexthdr = &exthdr->nexthdr; + } + + return -EINVAL; +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) +static int xfrm6_hdr_offset(struct xfrm_state *x, struct sk_buff *skb, u8 **prevhdr) +{ + switch (x->type->proto) { +#if IS_ENABLED(CONFIG_IPV6_MIP6) + case IPPROTO_DSTOPTS: + case IPPROTO_ROUTING: + return mip6_rthdr_offset(skb, prevhdr, x->type->proto); +#endif + default: + break; + } + + return ip6_find_1stfragopt(skb, prevhdr); +} +#endif + /* Add encapsulation header. * * The IP header and mutable extension headers will be moved forward to make @@ -86,7 +169,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) iph = ipv6_hdr(skb); skb_set_inner_transport_header(skb, skb_transport_offset(skb)); - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); if (hdr_len < 0) return hdr_len; skb_set_mac_header(skb, @@ -116,7 +199,7 @@ static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) iph = ipv6_hdr(skb); - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + hdr_len = xfrm6_hdr_offset(x, skb, &prevhdr); if (hdr_len < 0) return hdr_len; skb_set_mac_header(skb, @@ -190,6 +273,7 @@ static int xfrm4_beet_encap_add(struct xfrm_state *x, struct sk_buff *skb) */ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) { + bool small_ipv6 = (skb->protocol == htons(ETH_P_IPV6)) && (skb->len <= IPV6_MIN_MTU); struct dst_entry *dst = skb_dst(skb); struct iphdr *top_iph; int flags; @@ -220,7 +304,7 @@ static int xfrm4_tunnel_encap_add(struct xfrm_state *x, struct sk_buff *skb) if (flags & XFRM_STATE_NOECN) IP_ECN_clear(top_iph); - top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? + top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) || small_ipv6 ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); top_iph->ttl = ip4_dst_hoplimit(xfrm_dst_child(dst)); @@ -442,7 +526,7 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto error; } - err = x->repl->overflow(x, skb); + err = xfrm_replay_overflow(x, skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); goto error; @@ -497,22 +581,22 @@ out: return err; } -int xfrm_output_resume(struct sk_buff *skb, int err) +int xfrm_output_resume(struct sock *sk, struct sk_buff *skb, int err) { struct net *net = xs_net(skb_dst(skb)->xfrm); while (likely((err = xfrm_output_one(skb, err)) == 0)) { nf_reset_ct(skb); - err = skb_dst(skb)->ops->local_out(net, skb->sk, skb); + err = skb_dst(skb)->ops->local_out(net, sk, skb); if (unlikely(err != 1)) goto out; if (!skb_dst(skb)->xfrm) - return dst_output(net, skb->sk, skb); + return dst_output(net, sk, skb); err = nf_hook(skb_dst(skb)->ops->family, - NF_INET_POST_ROUTING, net, skb->sk, skb, + NF_INET_POST_ROUTING, net, sk, skb, NULL, skb_dst(skb)->dev, xfrm_output2); if (unlikely(err != 1)) goto out; @@ -528,15 +612,15 @@ EXPORT_SYMBOL_GPL(xfrm_output_resume); static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { - return xfrm_output_resume(skb, 1); + return xfrm_output_resume(sk, skb, 1); } static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) { struct sk_buff *segs, *nskb; - BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); - BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_GSO_CB_OFFSET); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (IS_ERR(segs)) @@ -559,12 +643,81 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb return 0; } +/* For partial checksum offload, the outer header checksum is calculated + * by software and the inner header checksum is calculated by hardware. + * This requires hardware to know the inner packet type to calculate + * the inner header checksum. Save inner ip protocol here to avoid + * traversing the packet in the vendor's xmit code. + * For IPsec tunnel mode save the ip protocol from the IP header of the + * plain text packet. Otherwise If the encap type is IPIP, just save + * skb->inner_ipproto in any other case get the ip protocol from the IP + * header. + */ +static void xfrm_get_inner_ipproto(struct sk_buff *skb, struct xfrm_state *x) +{ + struct xfrm_offload *xo = xfrm_offload(skb); + const struct ethhdr *eth; + + if (!xo) + return; + + if (x->outer_mode.encap == XFRM_MODE_TUNNEL) { + switch (x->outer_mode.family) { + case AF_INET: + xo->inner_ipproto = ip_hdr(skb)->protocol; + break; + case AF_INET6: + xo->inner_ipproto = ipv6_hdr(skb)->nexthdr; + break; + default: + break; + } + + return; + } + + /* non-Tunnel Mode */ + if (!skb->encapsulation) + return; + + if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { + xo->inner_ipproto = skb->inner_ipproto; + return; + } + + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) + return; + + eth = (struct ethhdr *)skb_inner_mac_header(skb); + + switch (ntohs(eth->h_proto)) { + case ETH_P_IPV6: + xo->inner_ipproto = inner_ipv6_hdr(skb)->nexthdr; + break; + case ETH_P_IP: + xo->inner_ipproto = inner_ip_hdr(skb)->protocol; + break; + } +} + int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); struct xfrm_state *x = skb_dst(skb)->xfrm; int err; + switch (x->outer_mode.family) { + case AF_INET: + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; + break; + case AF_INET6: + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + + IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; + break; + } + secpath_reset(skb); if (xfrm_dev_offload_ok(skb, x)) { @@ -576,25 +729,29 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) kfree_skb(skb); return -ENOMEM; } - skb->encapsulation = 1; sp->olen++; sp->xvec[sp->len++] = x; xfrm_state_hold(x); + xfrm_get_inner_ipproto(skb, x); + skb->encapsulation = 1; + if (skb_is_gso(skb)) { - skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; + if (skb->inner_protocol) + return xfrm_output_gso(net, sk, skb); - return xfrm_output2(net, sk, skb); + skb_shinfo(skb)->gso_type |= SKB_GSO_ESP; + goto out; } if (x->xso.dev && x->xso.dev->features & NETIF_F_HW_ESP_TX_CSUM) goto out; + } else { + if (skb_is_gso(skb)) + return xfrm_output_gso(net, sk, skb); } - if (skb_is_gso(skb)) - return xfrm_output_gso(net, sk, skb); - if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); if (err) { @@ -609,11 +766,107 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output); +static int xfrm4_tunnel_check_size(struct sk_buff *skb) +{ + int mtu, ret = 0; + + if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE) + goto out; + + if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df) + goto out; + + mtu = dst_mtu(skb_dst(skb)); + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { + skb->protocol = htons(ETH_P_IP); + + if (skb->sk) + xfrm_local_error(skb, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, + ICMP_FRAG_NEEDED, htonl(mtu)); + ret = -EMSGSIZE; + } +out: + return ret; +} + +static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ + int err; + + if (x->outer_mode.encap == XFRM_MODE_BEET && + ip_is_fragment(ip_hdr(skb))) { + net_warn_ratelimited("BEET mode doesn't support inner IPv4 fragments\n"); + return -EAFNOSUPPORT; + } + + err = xfrm4_tunnel_check_size(skb); + if (err) + return err; + + XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol; + + xfrm4_extract_header(skb); + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int xfrm6_tunnel_check_size(struct sk_buff *skb) +{ + int mtu, ret = 0; + struct dst_entry *dst = skb_dst(skb); + + if (skb->ignore_df) + goto out; + + mtu = dst_mtu(dst); + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + if ((!skb_is_gso(skb) && skb->len > mtu) || + (skb_is_gso(skb) && + !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) { + skb->dev = dst->dev; + skb->protocol = htons(ETH_P_IPV6); + + if (xfrm6_local_dontfrag(skb->sk)) + ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); + else if (skb->sk) + xfrm_local_error(skb, mtu); + else + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + ret = -EMSGSIZE; + } +out: + return ret; +} +#endif + +static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb) +{ +#if IS_ENABLED(CONFIG_IPV6) + int err; + + err = xfrm6_tunnel_check_size(skb); + if (err) + return err; + + XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr; + + xfrm6_extract_header(skb); + return 0; +#else + WARN_ON_ONCE(1); + return -EAFNOSUPPORT; +#endif +} + static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { - const struct xfrm_state_afinfo *afinfo; const struct xfrm_mode *inner_mode; - int err = -EAFNOSUPPORT; if (x->sel.family == AF_UNSPEC) inner_mode = xfrm_ip2inner_mode(x, @@ -624,13 +877,14 @@ static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) if (inner_mode == NULL) return -EAFNOSUPPORT; - rcu_read_lock(); - afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); - if (likely(afinfo)) - err = afinfo->extract_output(x, skb); - rcu_read_unlock(); + switch (inner_mode->family) { + case AF_INET: + return xfrm4_extract_output(x, skb); + case AF_INET6: + return xfrm6_extract_output(x, skb); + } - return err; + return -EAFNOSUPPORT; } void xfrm_local_error(struct sk_buff *skb, int mtu) @@ -640,7 +894,8 @@ void xfrm_local_error(struct sk_buff *skb, int mtu) if (skb->protocol == htons(ETH_P_IP)) proto = AF_INET; - else if (skb->protocol == htons(ETH_P_IPV6)) + else if (skb->protocol == htons(ETH_P_IPV6) && + skb->sk->sk_family == AF_INET6) proto = AF_INET6; else return; |