aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorRoopa Prabhu <roopa@cumulusnetworks.com>2016-08-24 20:10:43 -0700
committerDavid S. Miller <davem@davemloft.net>2016-08-30 22:27:18 -0700
commit14972cbd34ff668c390cbd2e6497323484c9e812 (patch)
tree2931fb8d4a03a3ecf457f9789abd802cecdc4fbb /net
parentnet: batch calls to flush_all_backlogs() (diff)
downloadlinux-dev-14972cbd34ff668c390cbd2e6497323484c9e812.tar.xz
linux-dev-14972cbd34ff668c390cbd2e6497323484c9e812.zip
net: lwtunnel: Handle fragmentation
Today mpls iptunnel lwtunnel_output redirect expects the tunnel output function to handle fragmentation. This is ok but can be avoided if we did not do the mpls output redirect too early. ie we could wait until ip fragmentation is done and then call mpls output for each ip fragment. To make this work we will need, 1) the lwtunnel state to carry encap headroom 2) and do the redirect to the encap output handler on the ip fragment (essentially do the output redirect after fragmentation) This patch adds tunnel headroom in lwtstate to make sure we account for tunnel data in mtu calculations during fragmentation and adds new xmit redirect handler to redirect to lwtunnel xmit func after ip fragmentation. This includes IPV6 and some mtu fixes and testing from David Ahern. Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com> Signed-off-by: David Ahern <dsa@cumulusnetworks.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/lwtunnel.c35
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/route.c4
-rw-r--r--net/ipv6/ip6_output.c8
-rw-r--r--net/ipv6/route.c4
-rw-r--r--net/mpls/mpls_iptunnel.c9
6 files changed, 62 insertions, 6 deletions
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 669ecc9f884e..e5f84c26ba1a 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -251,6 +251,41 @@ drop:
}
EXPORT_SYMBOL(lwtunnel_output);
+int lwtunnel_xmit(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ const struct lwtunnel_encap_ops *ops;
+ struct lwtunnel_state *lwtstate;
+ int ret = -EINVAL;
+
+ if (!dst)
+ goto drop;
+
+ lwtstate = dst->lwtstate;
+
+ if (lwtstate->type == LWTUNNEL_ENCAP_NONE ||
+ lwtstate->type > LWTUNNEL_ENCAP_MAX)
+ return 0;
+
+ ret = -EOPNOTSUPP;
+ rcu_read_lock();
+ ops = rcu_dereference(lwtun_encaps[lwtstate->type]);
+ if (likely(ops && ops->xmit))
+ ret = ops->xmit(skb);
+ rcu_read_unlock();
+
+ if (ret == -EOPNOTSUPP)
+ goto drop;
+
+ return ret;
+
+drop:
+ kfree_skb(skb);
+
+ return ret;
+}
+EXPORT_SYMBOL(lwtunnel_xmit);
+
int lwtunnel_input(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index dde37fb340bf..65569274efb8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -73,6 +73,7 @@
#include <net/icmp.h>
#include <net/checksum.h>
#include <net/inetpeer.h>
+#include <net/lwtunnel.h>
#include <linux/igmp.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
@@ -197,6 +198,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
skb = skb2;
}
+ if (lwtunnel_xmit_redirect(dst->lwtstate)) {
+ int res = lwtunnel_xmit(skb);
+
+ if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ return res;
+ }
+
rcu_read_lock_bh();
nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr);
neigh = __ipv4_neigh_lookup_noref(dev, nexthop);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a1f2830d8110..3e992783c1d0 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1246,7 +1246,9 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
mtu = 576;
}
- return min_t(unsigned int, mtu, IP_MAX_MTU);
+ mtu = min_t(unsigned int, mtu, IP_MAX_MTU);
+
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1dfc402d9ad1..993fd9666f1b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -56,6 +56,7 @@
#include <net/checksum.h>
#include <linux/mroute6.h>
#include <net/l3mdev.h>
+#include <net/lwtunnel.h>
static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
{
@@ -104,6 +105,13 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
}
+ if (lwtunnel_xmit_redirect(dst->lwtstate)) {
+ int res = lwtunnel_xmit(skb);
+
+ if (res < 0 || res == LWTUNNEL_XMIT_DONE)
+ return res;
+ }
+
rcu_read_lock_bh();
nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 49817555449e..09d43ff11a8d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1604,7 +1604,9 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)
rcu_read_unlock();
out:
- return min_t(unsigned int, mtu, IP6_MAX_MTU);
+ mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
+
+ return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
}
static struct dst_entry *icmp6_dst_gc_list;
diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c
index 644a8da6d4bd..aed872cc05a6 100644
--- a/net/mpls/mpls_iptunnel.c
+++ b/net/mpls/mpls_iptunnel.c
@@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en)
return en->labels * sizeof(struct mpls_shim_hdr);
}
-static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int mpls_xmit(struct sk_buff *skb)
{
struct mpls_iptunnel_encap *tun_encap_info;
struct mpls_shim_hdr *hdr;
@@ -115,7 +115,7 @@ static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb)
net_dbg_ratelimited("%s: packet transmission failed: %d\n",
__func__, err);
- return 0;
+ return LWTUNNEL_XMIT_DONE;
drop:
kfree_skb(skb);
@@ -153,7 +153,8 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla,
if (ret)
goto errout;
newts->type = LWTUNNEL_ENCAP_MPLS;
- newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+ newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT;
+ newts->headroom = mpls_encap_size(tun_encap_info);
*ts = newts;
@@ -209,7 +210,7 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
static const struct lwtunnel_encap_ops mpls_iptun_ops = {
.build_state = mpls_build_state,
- .output = mpls_output,
+ .xmit = mpls_xmit,
.fill_encap = mpls_fill_encap_info,
.get_encap_size = mpls_encap_nlsize,
.cmp_encap = mpls_encap_cmp,