aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--include/net/gro.h2
-rw-r--r--include/net/ipv6_stubs.h3
-rw-r--r--include/net/xfrm.h18
-rw-r--r--include/uapi/linux/xfrm.h3
-rw-r--r--net/ipv4/esp4_offload.c6
-rw-r--r--net/ipv4/icmp.c2
-rw-r--r--net/ipv4/ip_vti.c4
-rw-r--r--net/ipv4/netfilter.c2
-rw-r--r--net/ipv4/udp.c16
-rw-r--r--net/ipv4/xfrm4_input.c95
-rw-r--r--net/ipv6/af_inet6.c1
-rw-r--r--net/ipv6/esp6_offload.c10
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ip6_vti.c4
-rw-r--r--net/ipv6/netfilter.c2
-rw-r--r--net/ipv6/xfrm6_input.c103
-rw-r--r--net/netfilter/nf_nat_proto.c2
-rw-r--r--net/xfrm/xfrm_input.c6
-rw-r--r--net/xfrm/xfrm_interface_core.c4
-rw-r--r--net/xfrm/xfrm_policy.c299
20 files changed, 343 insertions, 241 deletions
diff --git a/include/net/gro.h b/include/net/gro.h
index 88644b3ca660..b435f0ddbf64 100644
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -41,7 +41,7 @@ struct napi_gro_cb {
/* Number of segments aggregated. */
u16 count;
- /* Used in ipv6_gro_receive() and foo-over-udp */
+ /* Used in ipv6_gro_receive() and foo-over-udp and esp-in-udp */
u16 proto;
/* Used in napi_gro_cb::free */
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 21da31e1dff5..485c39a89866 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -60,6 +60,9 @@ struct ipv6_stub {
#if IS_ENABLED(CONFIG_XFRM)
void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+ struct sk_buff *(*xfrm6_gro_udp_encap_rcv)(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb);
int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
#endif
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 98d7aa78adda..c9bb0f892f55 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1207,20 +1207,20 @@ static inline int xfrm6_policy_check_reverse(struct sock *sk, int dir,
return __xfrm_policy_check2(sk, dir, skb, AF_INET6, 1);
}
-int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse);
-static inline int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+static inline int xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family)
{
- return __xfrm_decode_session(skb, fl, family, 0);
+ return __xfrm_decode_session(net, skb, fl, family, 0);
}
-static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
+static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
- return __xfrm_decode_session(skb, fl, family, 1);
+ return __xfrm_decode_session(net, skb, fl, family, 1);
}
int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
@@ -1296,7 +1296,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk
{
return 1;
}
-static inline int xfrm_decode_session_reverse(struct sk_buff *skb,
+static inline int xfrm_decode_session_reverse(struct net *net, struct sk_buff *skb,
struct flowi *fl,
unsigned int family)
{
@@ -1669,7 +1669,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
#endif
void xfrm_local_error(struct sk_buff *skb, int mtu);
-int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
int encap_type);
int xfrm4_transport_finish(struct sk_buff *skb, int async);
@@ -1689,7 +1688,6 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot
int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
-int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t);
int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
@@ -1712,6 +1710,10 @@ int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb);
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb);
int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval,
int optlen);
#else
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 23543c33fee8..6a77328be114 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -4,6 +4,7 @@
#include <linux/in6.h>
#include <linux/types.h>
+#include <linux/stddef.h>
/* All of the structures in this file may not change size as they are
* passed into the kernel from userspace via netlink sockets.
@@ -33,7 +34,7 @@ struct xfrm_sec_ctx {
__u8 ctx_alg;
__u16 ctx_len;
__u32 ctx_sid;
- char ctx_str[];
+ char ctx_str[] __counted_by(ctx_len);
};
/* Security Context Domains of Interpretation */
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 10e96ed6c9e3..b3271957ad9a 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
int offset = skb_gro_offset(skb);
struct xfrm_offload *xo;
struct xfrm_state *x;
+ int encap_type = 0;
__be32 seq;
__be32 spi;
@@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
xo->flags |= XFRM_GRO;
+ if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+ encap_type = UDP_ENCAP_ESPINUDP;
+
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
XFRM_SPI_SKB_CB(skb)->family = AF_INET;
XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
@@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
/* We don't need to handle errors from xfrm_input, it does all
* the error handling and frees the resources on error. */
- xfrm_input(skb, IPPROTO_ESP, spi, -2);
+ xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
return ERR_PTR(-EINPROGRESS);
out_reset:
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index b8607763d113..e63a3bf99617 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -517,7 +517,7 @@ static struct rtable *icmp_route_lookup(struct net *net,
} else
return rt;
- err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
+ err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET);
if (err)
goto relookup_failed;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index d1e7d0ceb7ed..9ab9b3ebe0cd 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -288,11 +288,11 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
break;
case htons(ETH_P_IPV6):
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET6);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
break;
default:
goto tx_err;
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index bd135165482a..591a2737808e 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -62,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un
#ifdef CONFIG_XFRM
if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
- xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
+ xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
struct dst_entry *dst = skb_dst(skb);
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1734fd6a1ce0..89e5a806b82e 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2630,6 +2630,19 @@ void udp_destroy_sock(struct sock *sk)
}
}
+static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family,
+ struct sock *sk)
+{
+#ifdef CONFIG_XFRM
+ if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) {
+ if (family == AF_INET)
+ WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv);
+ else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6)
+ WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv);
+ }
+#endif
+}
+
/*
* Socket option code for UDP
*/
@@ -2679,6 +2692,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
case 0:
#ifdef CONFIG_XFRM
case UDP_ENCAP_ESPINUDP:
+ set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk);
+ fallthrough;
case UDP_ENCAP_ESPINUDP_NON_IKE:
#if IS_ENABLED(CONFIG_IPV6)
if (sk->sk_family == AF_INET6)
@@ -2721,6 +2736,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
udp_tunnel_encap_enable(sk);
udp_assign_bit(GRO_ENABLED, sk, valbool);
udp_assign_bit(ACCEPT_L4, sk, valbool);
+ set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk);
break;
/*
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 183f6dc37242..c54676998eb6 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -17,6 +17,8 @@
#include <linux/netfilter_ipv4.h>
#include <net/ip.h>
#include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>
static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
struct sk_buff *skb)
@@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
return 0;
}
-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
{
struct udp_sock *up = udp_sk(sk);
struct udphdr *uh;
@@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
- goto drop;
+ return -EINVAL;
} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
/* ESP Packet without Non-ESP header */
len = sizeof(struct udphdr);
@@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP_NON_IKE:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
- goto drop;
+ return -EINVAL;
} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
udpdata32[0] == 0 && udpdata32[1] == 0) {
@@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
* protocol to ESP, and then call into the transform receiver.
*/
if (skb_unclone(skb, GFP_ATOMIC))
- goto drop;
+ return -EINVAL;
/* Now we can update and verify the packet length... */
iph = ip_hdr(skb);
@@ -147,25 +142,89 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
iph->tot_len = htons(ntohs(iph->tot_len) - len);
if (skb->len < iphlen + len) {
/* packet is too small!?! */
- goto drop;
+ return -EINVAL;
}
/* pull the data buffer up to the ESP header and set the
* transport header to point to ESP. Keep UDP on the stack
* for later.
*/
- __skb_pull(skb, len);
- skb_reset_transport_header(skb);
+ if (pull) {
+ __skb_pull(skb, len);
+ skb_reset_transport_header(skb);
+ } else {
+ skb_set_transport_header(skb, len);
+ }
/* process ESP */
- return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
- kfree_skb(skb);
return 0;
}
+
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+
+ ret = __xfrm4_udp_encap_rcv(sk, skb, true);
+ if (!ret)
+ return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0,
+ udp_sk(sk)->encap_type);
+
+ if (ret < 0) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ return ret;
+}
EXPORT_SYMBOL(xfrm4_udp_encap_rcv);
+struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb)
+{
+ int offset = skb_gro_offset(skb);
+ const struct net_offload *ops;
+ struct sk_buff *pp = NULL;
+ int ret;
+
+ offset = offset - sizeof(struct udphdr);
+
+ if (!pskb_pull(skb, offset))
+ return NULL;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet_offloads[IPPROTO_ESP]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out;
+
+ ret = __xfrm4_udp_encap_rcv(sk, skb, false);
+ if (ret)
+ goto out;
+
+ skb_push(skb, offset);
+ NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ rcu_read_unlock();
+
+ return pp;
+
+out:
+ rcu_read_unlock();
+ skb_push(skb, offset);
+ NAPI_GRO_CB(skb)->same_flow = 0;
+ NAPI_GRO_CB(skb)->flush = 1;
+
+ return NULL;
+}
+EXPORT_SYMBOL(xfrm4_gro_udp_encap_rcv);
+
int xfrm4_rcv(struct sk_buff *skb)
{
return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c35d302a3da9..13a1833a4df5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -1050,6 +1050,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
#if IS_ENABLED(CONFIG_XFRM)
.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+ .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv,
.xfrm6_rcv_encap = xfrm6_rcv_encap,
#endif
.nd_tbl = &nd_tbl,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index a189e08370a5..527b7caddbc6 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -34,7 +34,9 @@ static __u16 esp6_nexthdr_esp_offset(struct ipv6hdr *ipv6_hdr, int nhlen)
int off = sizeof(struct ipv6hdr);
struct ipv6_opt_hdr *exthdr;
- if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP))
+ /* ESP or ESPINUDP */
+ if (likely(ipv6_hdr->nexthdr == NEXTHDR_ESP ||
+ ipv6_hdr->nexthdr == NEXTHDR_UDP))
return offsetof(struct ipv6hdr, nexthdr);
while (off < nhlen) {
@@ -54,10 +56,14 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
int offset = skb_gro_offset(skb);
struct xfrm_offload *xo;
struct xfrm_state *x;
+ int encap_type = 0;
__be32 seq;
__be32 spi;
int nhoff;
+ if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP)
+ encap_type = UDP_ENCAP_ESPINUDP;
+
if (!pskb_pull(skb, offset))
return NULL;
@@ -104,7 +110,7 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
/* We don't need to handle errors from xfrm_input, it does all
* the error handling and frees the resources on error. */
- xfrm_input(skb, IPPROTO_ESP, spi, -2);
+ xfrm_input(skb, IPPROTO_ESP, spi, encap_type);
return ERR_PTR(-EINPROGRESS);
out_reset:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 8fb4a791881a..f62427097126 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -385,7 +385,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net,
return dst;
}
- err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
+ err = xfrm_decode_session_reverse(net, skb, flowi6_to_flowi(&fl2), AF_INET6);
if (err)
goto relookup_failed;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 73c85d4e0e9c..e550240c85e1 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -569,11 +569,11 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err;
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET6);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
break;
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
break;
default:
goto tx_err;
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 857713d7a38a..53d255838e6a 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -61,7 +61,7 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff
#ifdef CONFIG_XFRM
if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- xfrm_decode_session(skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
+ xfrm_decode_session(net, skb, flowi6_to_flowi(&fl6), AF_INET6) == 0) {
skb_dst_set(skb, NULL);
dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
if (IS_ERR(dst))
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 4156387248e4..6e36e5047fba 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -16,6 +16,8 @@
#include <linux/netfilter_ipv6.h>
#include <net/ipv6.h>
#include <net/xfrm.h>
+#include <net/protocol.h>
+#include <net/gro.h>
int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
struct ip6_tnl *t)
@@ -67,14 +69,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
return 0;
}
-/* If it's a keepalive packet, then just eat it.
- * If it's an encapsulated packet, then pass it to the
- * IPsec xfrm input.
- * Returns 0 if skb passed to xfrm or was dropped.
- * Returns >0 if skb should be passed to UDP.
- * Returns <0 if skb should be resubmitted (-ret is protocol)
- */
-int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull)
{
struct udp_sock *up = udp_sk(sk);
struct udphdr *uh;
@@ -85,9 +80,6 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
__be32 *udpdata32;
u16 encap_type;
- if (skb->protocol == htons(ETH_P_IP))
- return xfrm4_udp_encap_rcv(sk, skb);
-
encap_type = READ_ONCE(up->encap_type);
/* if this is not encapsulated socket, then just return now */
if (!encap_type)
@@ -109,7 +101,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
- goto drop;
+ return -EINVAL;
} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
/* ESP Packet without Non-ESP header */
len = sizeof(struct udphdr);
@@ -120,7 +112,7 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
case UDP_ENCAP_ESPINUDP_NON_IKE:
/* Check if this is a keepalive packet. If so, eat it. */
if (len == 1 && udpdata[0] == 0xff) {
- goto drop;
+ return -EINVAL;
} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
udpdata32[0] == 0 && udpdata32[1] == 0) {
@@ -138,31 +130,100 @@ int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
* protocol to ESP, and then call into the transform receiver.
*/
if (skb_unclone(skb, GFP_ATOMIC))
- goto drop;
+ return -EINVAL;
/* Now we can update and verify the packet length... */
ip6h = ipv6_hdr(skb);
ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
if (skb->len < ip6hlen + len) {
/* packet is too small!?! */
- goto drop;
+ return -EINVAL;
}
/* pull the data buffer up to the ESP header and set the
* transport header to point to ESP. Keep UDP on the stack
* for later.
*/
- __skb_pull(skb, len);
- skb_reset_transport_header(skb);
+ if (pull) {
+ __skb_pull(skb, len);
+ skb_reset_transport_header(skb);
+ } else {
+ skb_set_transport_header(skb, len);
+ }
/* process ESP */
- return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
-
-drop:
- kfree_skb(skb);
return 0;
}
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+ int ret;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return xfrm4_udp_encap_rcv(sk, skb);
+
+ ret = __xfrm6_udp_encap_rcv(sk, skb, true);
+ if (!ret)
+ return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0,
+ udp_sk(sk)->encap_type);
+
+ if (ret < 0) {
+ kfree_skb(skb);
+ return 0;
+ }
+
+ return ret;
+}
+
+struct sk_buff *xfrm6_gro_udp_encap_rcv(struct sock *sk, struct list_head *head,
+ struct sk_buff *skb)
+{
+ int offset = skb_gro_offset(skb);
+ const struct net_offload *ops;
+ struct sk_buff *pp = NULL;
+ int ret;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return xfrm4_gro_udp_encap_rcv(sk, head, skb);
+
+ offset = offset - sizeof(struct udphdr);
+
+ if (!pskb_pull(skb, offset))
+ return NULL;
+
+ rcu_read_lock();
+ ops = rcu_dereference(inet6_offloads[IPPROTO_ESP]);
+ if (!ops || !ops->callbacks.gro_receive)
+ goto out;
+
+ ret = __xfrm6_udp_encap_rcv(sk, skb, false);
+ if (ret)
+ goto out;
+
+ skb_push(skb, offset);
+ NAPI_GRO_CB(skb)->proto = IPPROTO_UDP;
+
+ pp = call_gro_receive(ops->callbacks.gro_receive, head, skb);
+ rcu_read_unlock();
+
+ return pp;
+
+out:
+ rcu_read_unlock();
+ skb_push(skb, offset);
+ NAPI_GRO_CB(skb)->same_flow = 0;
+ NAPI_GRO_CB(skb)->flush = 1;
+
+ return NULL;
+}
+
int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
{
return xfrm6_rcv_spi(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index 6d969468c779..dc450cc81222 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -668,7 +668,7 @@ static int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int
struct flowi fl;
int err;
- err = xfrm_decode_session(skb, &fl, family);
+ err = xfrm_decode_session(net, skb, &fl, family);
if (err < 0)
return err;
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index d5ee96789d4b..bd4ce21d76d7 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -462,7 +462,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
struct xfrm_offload *xo = xfrm_offload(skb);
struct sec_path *sp;
- if (encap_type < 0) {
+ if (encap_type < 0 || (xo && xo->flags & XFRM_GRO)) {
x = xfrm_input_state(skb);
if (unlikely(x->km.state != XFRM_STATE_VALID)) {
@@ -485,9 +485,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
seq = XFRM_SKB_CB(skb)->seq.input.low;
goto resume;
}
-
- /* encap_type < -1 indicates a GRO call. */
- encap_type = 0;
+ /* GRO call */
seq = XFRM_SPI_SKB_CB(skb)->seq;
if (xo && (xo->flags & CRYPTO_DONE)) {
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index e21cc71095bb..21d50d75c260 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -536,7 +536,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
switch (skb->protocol) {
case htons(ETH_P_IPV6):
memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET6);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6);
if (!dst) {
fl.u.ip6.flowi6_oif = dev->ifindex;
fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC;
@@ -551,7 +551,7 @@ static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev)
break;
case htons(ETH_P_IP):
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- xfrm_decode_session(skb, &fl, AF_INET);
+ xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET);
if (!dst) {
struct rtable *rt;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 5cdd3bca3637..c13dc3ef7910 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -149,6 +149,21 @@ struct xfrm_pol_inexact_candidates {
struct hlist_head *res[XFRM_POL_CAND_MAX];
};
+struct xfrm_flow_keys {
+ struct flow_dissector_key_basic basic;
+ struct flow_dissector_key_control control;
+ union {
+ struct flow_dissector_key_ipv4_addrs ipv4;
+ struct flow_dissector_key_ipv6_addrs ipv6;
+ } addrs;
+ struct flow_dissector_key_ip ip;
+ struct flow_dissector_key_icmp icmp;
+ struct flow_dissector_key_ports ports;
+ struct flow_dissector_key_keyid gre;
+};
+
+static struct flow_dissector xfrm_session_dissector __ro_after_init;
+
static DEFINE_SPINLOCK(xfrm_if_cb_lock);
static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly;
@@ -2858,7 +2873,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
/* Fixup the mark to support VTI. */
skb_mark = skb->mark;
skb->mark = pol->mark.v;
- xfrm_decode_session(skb, &fl, dst->ops->family);
+ xfrm_decode_session(net, skb, &fl, dst->ops->family);
skb->mark = skb_mark;
spin_unlock(&pq->hold_queue.lock);
@@ -2894,7 +2909,7 @@ static void xfrm_policy_queue_process(struct timer_list *t)
/* Fixup the mark to support VTI. */
skb_mark = skb->mark;
skb->mark = pol->mark.v;
- xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
+ xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family);
skb->mark = skb_mark;
dst_hold(xfrm_dst_path(skb_dst(skb)));
@@ -3372,209 +3387,106 @@ xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int star
}
static void
-decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse)
+decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
{
- const struct iphdr *iph = ip_hdr(skb);
- int ihl = iph->ihl;
- u8 *xprth = skb_network_header(skb) + ihl * 4;
struct flowi4 *fl4 = &fl->u.ip4;
- int oif = 0;
-
- if (skb_dst(skb) && skb_dst(skb)->dev)
- oif = skb_dst(skb)->dev->ifindex;
memset(fl4, 0, sizeof(struct flowi4));
- fl4->flowi4_mark = skb->mark;
- fl4->flowi4_oif = reverse ? skb->skb_iif : oif;
-
- fl4->flowi4_proto = iph->protocol;
- fl4->daddr = reverse ? iph->saddr : iph->daddr;
- fl4->saddr = reverse ? iph->daddr : iph->saddr;
- fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
-
- if (!ip_is_fragment(iph)) {
- switch (iph->protocol) {
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- if (xprth + 4 < skb->data ||
- pskb_may_pull(skb, xprth + 4 - skb->data)) {
- __be16 *ports;
-
- xprth = skb_network_header(skb) + ihl * 4;
- ports = (__be16 *)xprth;
-
- fl4->fl4_sport = ports[!!reverse];
- fl4->fl4_dport = ports[!reverse];
- }
- break;
- case IPPROTO_ICMP:
- if (xprth + 2 < skb->data ||
- pskb_may_pull(skb, xprth + 2 - skb->data)) {
- u8 *icmp;
- xprth = skb_network_header(skb) + ihl * 4;
- icmp = xprth;
+ if (reverse) {
+ fl4->saddr = flkeys->addrs.ipv4.dst;
+ fl4->daddr = flkeys->addrs.ipv4.src;
+ fl4->fl4_sport = flkeys->ports.dst;
+ fl4->fl4_dport = flkeys->ports.src;
+ } else {
+ fl4->saddr = flkeys->addrs.ipv4.src;
+ fl4->daddr = flkeys->addrs.ipv4.dst;
+ fl4->fl4_sport = flkeys->ports.src;
+ fl4->fl4_dport = flkeys->ports.dst;
+ }
- fl4->fl4_icmp_type = icmp[0];
- fl4->fl4_icmp_code = icmp[1];
- }
- break;
- case IPPROTO_GRE:
- if (xprth + 12 < skb->data ||
- pskb_may_pull(skb, xprth + 12 - skb->data)) {
- __be16 *greflags;
- __be32 *gre_hdr;
-
- xprth = skb_network_header(skb) + ihl * 4;
- greflags = (__be16 *)xprth;
- gre_hdr = (__be32 *)xprth;
-
- if (greflags[0] & GRE_KEY) {
- if (greflags[0] & GRE_CSUM)
- gre_hdr++;
- fl4->fl4_gre_key = gre_hdr[1];
- }
- }
- break;
- default:
- break;
- }
+ switch (flkeys->basic.ip_proto) {
+ case IPPROTO_GRE:
+ fl4->fl4_gre_key = flkeys->gre.keyid;
+ break;
+ case IPPROTO_ICMP:
+ fl4->fl4_icmp_type = flkeys->icmp.type;
+ fl4->fl4_icmp_code = flkeys->icmp.code;
+ break;
}
+
+ fl4->flowi4_proto = flkeys->basic.ip_proto;
+ fl4->flowi4_tos = flkeys->ip.tos;
}
#if IS_ENABLED(CONFIG_IPV6)
static void
-decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse)
+decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse)
{
struct flowi6 *fl6 = &fl->u.ip6;
- int onlyproto = 0;
- const struct ipv6hdr *hdr = ipv6_hdr(skb);
- u32 offset = sizeof(*hdr);
- struct ipv6_opt_hdr *exthdr;
- const unsigned char *nh = skb_network_header(skb);
- u16 nhoff = IP6CB(skb)->nhoff;
- int oif = 0;
- u8 nexthdr;
-
- if (!nhoff)
- nhoff = offsetof(struct ipv6hdr, nexthdr);
-
- nexthdr = nh[nhoff];
-
- if (skb_dst(skb) && skb_dst(skb)->dev)
- oif = skb_dst(skb)->dev->ifindex;
memset(fl6, 0, sizeof(struct flowi6));
- fl6->flowi6_mark = skb->mark;
- fl6->flowi6_oif = reverse ? skb->skb_iif : oif;
-
- fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
- fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
-
- while (nh + offset + sizeof(*exthdr) < skb->data ||
- pskb_may_pull(skb, nh + offset + sizeof(*exthdr) - skb->data)) {
- nh = skb_network_header(skb);
- exthdr = (struct ipv6_opt_hdr *)(nh + offset);
-
- switch (nexthdr) {
- case NEXTHDR_FRAGMENT:
- onlyproto = 1;
- fallthrough;
- case NEXTHDR_ROUTING:
- case NEXTHDR_HOP:
- case NEXTHDR_DEST:
- offset += ipv6_optlen(exthdr);
- nexthdr = exthdr->nexthdr;
- break;
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_TCP:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- if (!onlyproto && (nh + offset + 4 < skb->data ||
- pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
- __be16 *ports;
-
- nh = skb_network_header(skb);
- ports = (__be16 *)(nh + offset);
- fl6->fl6_sport = ports[!!reverse];
- fl6->fl6_dport = ports[!reverse];
- }
- fl6->flowi6_proto = nexthdr;
- return;
- case IPPROTO_ICMPV6:
- if (!onlyproto && (nh + offset + 2 < skb->data ||
- pskb_may_pull(skb, nh + offset + 2 - skb->data))) {
- u8 *icmp;
-
- nh = skb_network_header(skb);
- icmp = (u8 *)(nh + offset);
- fl6->fl6_icmp_type = icmp[0];
- fl6->fl6_icmp_code = icmp[1];
- }
- fl6->flowi6_proto = nexthdr;
- return;
- case IPPROTO_GRE:
- if (!onlyproto &&
- (nh + offset + 12 < skb->data ||
- pskb_may_pull(skb, nh + offset + 12 - skb->data))) {
- struct gre_base_hdr *gre_hdr;
- __be32 *gre_key;
-
- nh = skb_network_header(skb);
- gre_hdr = (struct gre_base_hdr *)(nh + offset);
- gre_key = (__be32 *)(gre_hdr + 1);
-
- if (gre_hdr->flags & GRE_KEY) {
- if (gre_hdr->flags & GRE_CSUM)
- gre_key++;
- fl6->fl6_gre_key = *gre_key;
- }
- }
- fl6->flowi6_proto = nexthdr;
- return;
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- case IPPROTO_MH:
- offset += ipv6_optlen(exthdr);
- if (!onlyproto && (nh + offset + 3 < skb->data ||
- pskb_may_pull(skb, nh + offset + 3 - skb->data))) {
- struct ip6_mh *mh;
-
- nh = skb_network_header(skb);
- mh = (struct ip6_mh *)(nh + offset);
- fl6->fl6_mh_type = mh->ip6mh_type;
- }
- fl6->flowi6_proto = nexthdr;
- return;
-#endif
- default:
- fl6->flowi6_proto = nexthdr;
- return;
- }
+ if (reverse) {
+ fl6->saddr = flkeys->addrs.ipv6.dst;
+ fl6->daddr = flkeys->addrs.ipv6.src;
+ fl6->fl6_sport = flkeys->ports.dst;
+ fl6->fl6_dport = flkeys->ports.src;
+ } else {
+ fl6->saddr = flkeys->addrs.ipv6.src;
+ fl6->daddr = flkeys->addrs.ipv6.dst;
+ fl6->fl6_sport = flkeys->ports.src;
+ fl6->fl6_dport = flkeys->ports.dst;
+ }
+
+ switch (flkeys->basic.ip_proto) {
+ case IPPROTO_GRE:
+ fl6->fl6_gre_key = flkeys->gre.keyid;
+ break;
+ case IPPROTO_ICMPV6:
+ fl6->fl6_icmp_type = flkeys->icmp.type;
+ fl6->fl6_icmp_code = flkeys->icmp.code;
+ break;
}
+
+ fl6->flowi6_proto = flkeys->basic.ip_proto;
}
#endif
-int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
+int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl,
unsigned int family, int reverse)
{
+ struct xfrm_flow_keys flkeys;
+
+ memset(&flkeys, 0, sizeof(flkeys));
+ __skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys,
+ NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP);
+
switch (family) {
case AF_INET:
- decode_session4(skb, fl, reverse);
+ decode_session4(&flkeys, fl, reverse);
break;
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6:
- decode_session6(skb, fl, reverse);
+ decode_session6(&flkeys, fl, reverse);
break;
#endif
default:
return -EAFNOSUPPORT;
}
+ fl->flowi_mark = skb->mark;
+ if (reverse) {
+ fl->flowi_oif = skb->skb_iif;
+ } else {
+ int oif = 0;
+
+ if (skb_dst(skb) && skb_dst(skb)->dev)
+ oif = skb_dst(skb)->dev->ifindex;
+
+ fl->flowi_oif = oif;
+ }
+
return security_xfrm_decode_session(skb, &fl->flowi_secid);
}
EXPORT_SYMBOL(__xfrm_decode_session);
@@ -3623,7 +3535,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
reverse = dir & ~XFRM_POLICY_MASK;
dir &= XFRM_POLICY_MASK;
- if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
+ if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
return 0;
}
@@ -3779,7 +3691,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
struct dst_entry *dst;
int res = 1;
- if (xfrm_decode_session(skb, &fl, family) < 0) {
+ if (xfrm_decode_session(net, skb, &fl, family) < 0) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
return 0;
}
@@ -4258,8 +4170,47 @@ static struct pernet_operations __net_initdata xfrm_net_ops = {
.exit = xfrm_net_exit,
};
+static const struct flow_dissector_key xfrm_flow_dissector_keys[] = {
+ {
+ .key_id = FLOW_DISSECTOR_KEY_CONTROL,
+ .offset = offsetof(struct xfrm_flow_keys, control),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_BASIC,
+ .offset = offsetof(struct xfrm_flow_keys, basic),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS,
+ .offset = offsetof(struct xfrm_flow_keys, addrs.ipv4),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS,
+ .offset = offsetof(struct xfrm_flow_keys, addrs.ipv6),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_PORTS,
+ .offset = offsetof(struct xfrm_flow_keys, ports),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID,
+ .offset = offsetof(struct xfrm_flow_keys, gre),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_IP,
+ .offset = offsetof(struct xfrm_flow_keys, ip),
+ },
+ {
+ .key_id = FLOW_DISSECTOR_KEY_ICMP,
+ .offset = offsetof(struct xfrm_flow_keys, icmp),
+ },
+};
+
void __init xfrm_init(void)
{
+ skb_flow_dissector_init(&xfrm_session_dissector,
+ xfrm_flow_dissector_keys,
+ ARRAY_SIZE(xfrm_flow_dissector_keys));
+
register_pernet_subsys(&xfrm_net_ops);
xfrm_dev_init();
xfrm_input_init();