aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 14:31:10 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-31 14:31:10 -0800
commitb2fe5fa68642860e7de76167c3111623aa0d5de1 (patch)
treeb7f9b89b7039ecefbc35fe3c8e73a6ff972641dd /net/ipv6
parentMerge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6 (diff)
parenttls: Add support for encryption using async offload accelerator (diff)
downloadlinux-dev-b2fe5fa68642860e7de76167c3111623aa0d5de1.tar.xz
linux-dev-b2fe5fa68642860e7de76167c3111623aa0d5de1.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Significantly shrink the core networking routing structures. Result of http://vger.kernel.org/~davem/seoul2017_netdev_keynote.pdf 2) Add netdevsim driver for testing various offloads, from Jakub Kicinski. 3) Support cross-chip FDB operations in DSA, from Vivien Didelot. 4) Add a 2nd listener hash table for TCP, similar to what was done for UDP. From Martin KaFai Lau. 5) Add eBPF based queue selection to tun, from Jason Wang. 6) Lockless qdisc support, from John Fastabend. 7) SCTP stream interleave support, from Xin Long. 8) Smoother TCP receive autotuning, from Eric Dumazet. 9) Lots of erspan tunneling enhancements, from William Tu. 10) Add true function call support to BPF, from Alexei Starovoitov. 11) Add explicit support for GRO HW offloading, from Michael Chan. 12) Support extack generation in more netlink subsystems. From Alexander Aring, Quentin Monnet, and Jakub Kicinski. 13) Add 1000BaseX, flow control, and EEE support to mvneta driver. From Russell King. 14) Add flow table abstraction to netfilter, from Pablo Neira Ayuso. 15) Many improvements and simplifications to the NFP driver bpf JIT, from Jakub Kicinski. 16) Support for ipv6 non-equal cost multipath routing, from Ido Schimmel. 17) Add resource abstration to devlink, from Arkadi Sharshevsky. 18) Packet scheduler classifier shared filter block support, from Jiri Pirko. 19) Avoid locking in act_csum, from Davide Caratti. 20) devinet_ioctl() simplifications from Al viro. 21) More TCP bpf improvements from Lawrence Brakmo. 22) Add support for onlink ipv6 route flag, similar to ipv4, from David Ahern. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1925 commits) tls: Add support for encryption using async offload accelerator ip6mr: fix stale iterator net/sched: kconfig: Remove blank help texts openvswitch: meter: Use 64-bit arithmetic instead of 32-bit tcp_nv: fix potential integer overflow in tcpnv_acked r8169: fix RTL8168EP take too long to complete driver initialization. qmi_wwan: Add support for Quectel EP06 rtnetlink: enable IFLA_IF_NETNSID for RTM_NEWLINK ipmr: Fix ptrdiff_t print formatting ibmvnic: Wait for device response when changing MAC qlcnic: fix deadlock bug tcp: release sk_frag.page in tcp_disconnect ipv4: Get the address of interface correctly. net_sched: gen_estimator: fix lockdep splat net: macb: Handle HRESP error net/mlx5e: IPoIB, Fix copy-paste bug in flow steering refactoring ipv6: addrconf: break critical section in addrconf_verify_rtnl() ipv6: change route cache aging logic i40e/i40evf: Update DESC_NEEDED value to reflect larger value bnxt_en: cleanup DIM work on device shutdown ...
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c90
-rw-r--r--net/ipv6/addrlabel.c25
-rw-r--r--net/ipv6/af_inet6.c11
-rw-r--r--net/ipv6/anycast.c1
-rw-r--r--net/ipv6/datagram.c3
-rw-r--r--net/ipv6/esp6.c36
-rw-r--r--net/ipv6/esp6_offload.c82
-rw-r--r--net/ipv6/ila/ila_xlat.c4
-rw-r--r--net/ipv6/inet6_hashtables.c77
-rw-r--r--net/ipv6/ip6_fib.c66
-rw-r--r--net/ipv6/ip6_flowlabel.c1
-rw-r--r--net/ipv6/ip6_gre.c638
-rw-r--r--net/ipv6/ip6_output.c15
-rw-r--r--net/ipv6/ip6_tunnel.c5
-rw-r--r--net/ipv6/ip6_vti.c20
-rw-r--r--net/ipv6/ip6mr.c12
-rw-r--r--net/ipv6/mcast.c4
-rw-r--r--net/ipv6/ndisc.c5
-rw-r--r--net/ipv6/netfilter.c44
-rw-r--r--net/ipv6/netfilter/Kconfig18
-rw-r--r--net/ipv6/netfilter/Makefile4
-rw-r--r--net/ipv6/netfilter/ip6_tables.c27
-rw-r--r--net/ipv6/netfilter/ip6t_srh.c161
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c8
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c4
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c31
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c7
-rw-r--r--net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c4
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c15
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c3
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c277
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c8
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c82
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c12
-rw-r--r--net/ipv6/proc.c3
-rw-r--r--net/ipv6/raw.c1
-rw-r--r--net/ipv6/route.c567
-rw-r--r--net/ipv6/seg6.c4
-rw-r--r--net/ipv6/seg6_local.c2
-rw-r--r--net/ipv6/tcp_ipv6.c6
-rw-r--r--net/ipv6/udp.c55
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c7
-rw-r--r--net/ipv6/xfrm6_policy.c2
46 files changed, 1869 insertions, 585 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f49bd7897e95..e1846b97ee69 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -186,7 +186,8 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
static void addrconf_dad_start(struct inet6_ifaddr *ifp);
static void addrconf_dad_work(struct work_struct *w);
-static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id);
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
+ bool send_na);
static void addrconf_dad_run(struct inet6_dev *idev);
static void addrconf_rs_timer(struct timer_list *t);
static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
@@ -3438,6 +3439,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
} else if (event == NETDEV_CHANGE) {
if (!addrconf_link_ready(dev)) {
/* device is still not ready. */
+ rt6_sync_down_dev(dev, event);
break;
}
@@ -3449,6 +3451,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
* multicast snooping switches
*/
ipv6_mc_up(idev);
+ rt6_sync_up(dev, RTNH_F_LINKDOWN);
break;
}
idev->if_flags |= IF_READY;
@@ -3484,6 +3487,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (run_pending)
addrconf_dad_run(idev);
+ /* Device has an address by now */
+ rt6_sync_up(dev, RTNH_F_DEAD);
+
/*
* If the MTU changed during the interface down,
* when the interface up, the changed MTU must be
@@ -3577,6 +3583,7 @@ static bool addr_is_local(const struct in6_addr *addr)
static int addrconf_ifdown(struct net_device *dev, int how)
{
+ unsigned long event = how ? NETDEV_UNREGISTER : NETDEV_DOWN;
struct net *net = dev_net(dev);
struct inet6_dev *idev;
struct inet6_ifaddr *ifa, *tmp;
@@ -3586,8 +3593,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
ASSERT_RTNL();
- rt6_ifdown(net, dev);
- neigh_ifdown(&nd_tbl, dev);
+ rt6_disable_ip(dev, event);
idev = __in6_dev_get(dev);
if (!idev)
@@ -3833,12 +3839,17 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
idev->cnf.accept_dad < 1) ||
!(ifp->flags&IFA_F_TENTATIVE) ||
ifp->flags & IFA_F_NODAD) {
+ bool send_na = false;
+
+ if (ifp->flags & IFA_F_TENTATIVE &&
+ !(ifp->flags & IFA_F_OPTIMISTIC))
+ send_na = true;
bump_id = ifp->flags & IFA_F_TENTATIVE;
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
read_unlock_bh(&idev->lock);
- addrconf_dad_completed(ifp, bump_id);
+ addrconf_dad_completed(ifp, bump_id, send_na);
return;
}
@@ -3967,16 +3978,21 @@ static void addrconf_dad_work(struct work_struct *w)
}
if (ifp->dad_probes == 0) {
+ bool send_na = false;
+
/*
* DAD was successful
*/
+ if (ifp->flags & IFA_F_TENTATIVE &&
+ !(ifp->flags & IFA_F_OPTIMISTIC))
+ send_na = true;
bump_id = ifp->flags & IFA_F_TENTATIVE;
ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
write_unlock_bh(&idev->lock);
- addrconf_dad_completed(ifp, bump_id);
+ addrconf_dad_completed(ifp, bump_id, send_na);
goto out;
}
@@ -4014,7 +4030,8 @@ static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp)
return true;
}
-static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
+static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id,
+ bool send_na)
{
struct net_device *dev = ifp->idev->dev;
struct in6_addr lladdr;
@@ -4046,6 +4063,16 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp, bool bump_id)
if (send_mld)
ipv6_mc_dad_complete(ifp->idev);
+ /* send unsolicited NA if enabled */
+ if (send_na &&
+ (ifp->idev->cnf.ndisc_notify ||
+ dev_net(dev)->ipv6.devconf_all->ndisc_notify)) {
+ ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifp->addr,
+ /*router=*/ !!ifp->idev->cnf.forwarding,
+ /*solicited=*/ false, /*override=*/ true,
+ /*inc_opt=*/ true);
+ }
+
if (send_rs) {
/*
* If a host as already performed a random delay
@@ -4209,7 +4236,6 @@ static int if6_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations if6_fops = {
- .owner = THIS_MODULE,
.open = if6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -4352,9 +4378,11 @@ restart:
spin_lock(&ifpub->lock);
ifpub->regen_count = 0;
spin_unlock(&ifpub->lock);
+ rcu_read_unlock_bh();
ipv6_create_tempaddr(ifpub, ifp, true);
in6_ifa_put(ifpub);
in6_ifa_put(ifp);
+ rcu_read_lock_bh();
goto restart;
}
} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
@@ -6595,27 +6623,45 @@ int __init addrconf_init(void)
rtnl_af_register(&inet6_ops);
- err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,
- 0);
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETLINK,
+ NULL, inet6_dump_ifinfo, 0);
if (err < 0)
goto errout;
- /* Only the first call to __rtnl_register can fail */
- __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0);
- __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0);
- __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr,
- inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED);
- __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL,
- inet6_dump_ifmcaddr, 0);
- __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL,
- inet6_dump_ifacaddr, 0);
- __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf,
- inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED);
-
- ipv6_addr_label_rtnl_register();
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDR,
+ inet6_rtm_newaddr, NULL, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDR,
+ inet6_rtm_deladdr, NULL, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDR,
+ inet6_rtm_getaddr, inet6_dump_ifaddr,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETMULTICAST,
+ NULL, inet6_dump_ifmcaddr, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETANYCAST,
+ NULL, inet6_dump_ifacaddr, 0);
+ if (err < 0)
+ goto errout;
+ err = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETNETCONF,
+ inet6_netconf_get_devconf,
+ inet6_netconf_dump_devconf,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ if (err < 0)
+ goto errout;
+ err = ipv6_addr_label_rtnl_register();
+ if (err < 0)
+ goto errout;
return 0;
errout:
+ rtnl_unregister_all(PF_INET6);
rtnl_af_unregister(&inet6_ops);
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 00e1f8ee08f8..1d6ced37ad71 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -547,13 +547,22 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
return err;
}
-void __init ipv6_addr_label_rtnl_register(void)
+int __init ipv6_addr_label_rtnl_register(void)
{
- __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel,
- NULL, RTNL_FLAG_DOIT_UNLOCKED);
- __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get,
- ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
-}
+ int ret;
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWADDRLABEL,
+ ip6addrlbl_newdel,
+ NULL, RTNL_FLAG_DOIT_UNLOCKED);
+ if (ret < 0)
+ return ret;
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELADDRLABEL,
+ ip6addrlbl_newdel,
+ NULL, RTNL_FLAG_DOIT_UNLOCKED);
+ if (ret < 0)
+ return ret;
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETADDRLABEL,
+ ip6addrlbl_get,
+ ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED);
+ return ret;
+}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index c9441ca45399..416917719a6f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -284,6 +284,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct net *net = sock_net(sk);
__be32 v4addr = 0;
unsigned short snum;
+ bool saved_ipv6only;
int addr_type = 0;
int err = 0;
@@ -389,19 +390,21 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (!(addr_type & IPV6_ADDR_MULTICAST))
np->saddr = addr->sin6_addr;
+ saved_ipv6only = sk->sk_ipv6only;
+ if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED)
+ sk->sk_ipv6only = 1;
+
/* Make sure we are allowed to bind here. */
if ((snum || !inet->bind_address_no_port) &&
sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
inet_reset_saddr(sk);
err = -EADDRINUSE;
goto out;
}
- if (addr_type != IPV6_ADDR_ANY) {
+ if (addr_type != IPV6_ADDR_ANY)
sk->sk_userlocks |= SOCK_BINDADDR_LOCK;
- if (addr_type != IPV6_ADDR_MAPPED)
- sk->sk_ipv6only = 1;
- }
if (snum)
sk->sk_userlocks |= SOCK_BINDPORT_LOCK;
inet->inet_sport = htons(inet->inet_num);
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 0bbab8a4b5d8..8e085cc05aeb 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -533,7 +533,6 @@ static int ac6_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations ac6_seq_fops = {
- .owner = THIS_MODULE,
.open = ac6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a1f918713006..fbf08ce3f5ab 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -221,8 +221,7 @@ ipv4_connected:
if (__ipv6_addr_needs_scope_id(addr_type)) {
if (addr_len >= sizeof(struct sockaddr_in6) &&
usin->sin6_scope_id) {
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != usin->sin6_scope_id) {
+ if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) {
err = -EINVAL;
goto out;
}
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 1a7f00cd4803..97513f35bcc5 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -141,14 +141,32 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
static void esp_output_done(struct crypto_async_request *base, int err)
{
struct sk_buff *skb = base->data;
+ struct xfrm_offload *xo = xfrm_offload(skb);
void *tmp;
- struct dst_entry *dst = skb_dst(skb);
- struct xfrm_state *x = dst->xfrm;
+ struct xfrm_state *x;
+
+ if (xo && (xo->flags & XFRM_DEV_RESUME))
+ x = skb->sp->xvec[skb->sp->len - 1];
+ else
+ x = skb_dst(skb)->xfrm;
tmp = ESP_SKB_CB(skb)->tmp;
esp_ssg_unref(x, tmp);
kfree(tmp);
- xfrm_output_resume(skb, err);
+
+ if (xo && (xo->flags & XFRM_DEV_RESUME)) {
+ if (err) {
+ XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
+ kfree_skb(skb);
+ return;
+ }
+
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ secpath_reset(skb);
+ xfrm_dev_resume(skb);
+ } else {
+ xfrm_output_resume(skb, err);
+ }
}
/* Move ESP header back into place. */
@@ -734,17 +752,13 @@ static int esp_init_aead(struct xfrm_state *x)
char aead_name[CRYPTO_MAX_ALG_NAME];
struct crypto_aead *aead;
int err;
- u32 mask = 0;
err = -ENAMETOOLONG;
if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)",
x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME)
goto error;
- if (x->xso.offload_handle)
- mask |= CRYPTO_ALG_ASYNC;
-
- aead = crypto_alloc_aead(aead_name, 0, mask);
+ aead = crypto_alloc_aead(aead_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
@@ -774,7 +788,6 @@ static int esp_init_authenc(struct xfrm_state *x)
char authenc_name[CRYPTO_MAX_ALG_NAME];
unsigned int keylen;
int err;
- u32 mask = 0;
err = -EINVAL;
if (!x->ealg)
@@ -800,10 +813,7 @@ static int esp_init_authenc(struct xfrm_state *x)
goto error;
}
- if (x->xso.offload_handle)
- mask |= CRYPTO_ALG_ASYNC;
-
- aead = crypto_alloc_aead(authenc_name, 0, mask);
+ aead = crypto_alloc_aead(authenc_name, 0, 0);
err = PTR_ERR(aead);
if (IS_ERR(aead))
goto error;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index f52c314d4c97..3fd1ec775dc2 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -136,78 +136,39 @@ static void esp6_gso_encap(struct xfrm_state *x, struct sk_buff *skb)
static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
netdev_features_t features)
{
- __u32 seq;
- int err = 0;
- struct sk_buff *skb2;
struct xfrm_state *x;
struct ip_esp_hdr *esph;
struct crypto_aead *aead;
- struct sk_buff *segs = ERR_PTR(-EINVAL);
netdev_features_t esp_features = features;
struct xfrm_offload *xo = xfrm_offload(skb);
if (!xo)
- goto out;
+ return ERR_PTR(-EINVAL);
if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP))
- goto out;
-
- seq = xo->seq.low;
+ return ERR_PTR(-EINVAL);
x = skb->sp->xvec[skb->sp->len - 1];
aead = x->data;
esph = ip_esp_hdr(skb);
if (esph->spi != x->id.spi)
- goto out;
+ return ERR_PTR(-EINVAL);
if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)))
- goto out;
+ return ERR_PTR(-EINVAL);
__skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead));
skb->encap_hdr_csum = 1;
- if (!(features & NETIF_F_HW_ESP))
+ if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
+ (x->xso.dev != skb->dev))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
- segs = x->outer_mode->gso_segment(x, skb, esp_features);
- if (IS_ERR_OR_NULL(segs))
- goto out;
-
- __skb_pull(skb, skb->data - skb_mac_header(skb));
-
- skb2 = segs;
- do {
- struct sk_buff *nskb = skb2->next;
-
- xo = xfrm_offload(skb2);
- xo->flags |= XFRM_GSO_SEGMENT;
- xo->seq.low = seq;
- xo->seq.hi = xfrm_replay_seqhi(x, seq);
-
- if(!(features & NETIF_F_HW_ESP))
- xo->flags |= CRYPTO_FALLBACK;
-
- x->outer_mode->xmit(x, skb2);
-
- err = x->type_offload->xmit(x, skb2, esp_features);
- if (err) {
- kfree_skb_list(segs);
- return ERR_PTR(err);
- }
+ xo->flags |= XFRM_GSO_SEGMENT;
- if (!skb_is_gso(skb2))
- seq++;
- else
- seq += skb_shinfo(skb2)->gso_segs;
-
- skb_push(skb2, skb2->mac_len);
- skb2 = nskb;
- } while (skb2);
-
-out:
- return segs;
+ return x->outer_mode->gso_segment(x, skb, esp_features);
}
static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
@@ -226,6 +187,7 @@ static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb)
static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features)
{
+ int len;
int err;
int alen;
int blksize;
@@ -234,6 +196,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
struct crypto_aead *aead;
struct esp_info esp;
bool hw_offload = true;
+ __u32 seq;
esp.inplace = true;
@@ -269,28 +232,33 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
return esp.nfrags;
}
+ seq = xo->seq.low;
+
esph = ip_esp_hdr(skb);
esph->spi = x->id.spi;
skb_push(skb, -skb_network_offset(skb));
if (xo->flags & XFRM_GSO_SEGMENT) {
- esph->seq_no = htonl(xo->seq.low);
- } else {
- int len;
-
- len = skb->len - sizeof(struct ipv6hdr);
- if (len > IPV6_MAXPLEN)
- len = 0;
+ esph->seq_no = htonl(seq);
- ipv6_hdr(skb)->payload_len = htons(len);
+ if (!skb_is_gso(skb))
+ xo->seq.low++;
+ else
+ xo->seq.low += skb_shinfo(skb)->gso_segs;
}
+ esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
+
+ len = skb->len - sizeof(struct ipv6hdr);
+ if (len > IPV6_MAXPLEN)
+ len = 0;
+
+ ipv6_hdr(skb)->payload_len = htons(len);
+
if (hw_offload)
return 0;
- esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
-
err = esp6_output_tail(x, skb, &esp);
if (err)
return err;
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 6eb5e68f112a..44c39c5f0638 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -512,9 +512,7 @@ static int ila_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
struct ila_map *ila;
int ret;
- ret = rhashtable_walk_start(rhiter);
- if (ret && ret != -EAGAIN)
- goto done;
+ rhashtable_walk_start(rhiter);
for (;;) {
ila = rhashtable_walk_next(rhiter);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index b01858f5deb1..2febe26de6a1 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -125,6 +125,40 @@ static inline int compute_score(struct sock *sk, struct net *net,
}
/* called with rcu_read_lock() */
+static struct sock *inet6_lhash2_lookup(struct net *net,
+ struct inet_listen_hashbucket *ilb2,
+ struct sk_buff *skb, int doff,
+ const struct in6_addr *saddr,
+ const __be16 sport, const struct in6_addr *daddr,
+ const unsigned short hnum, const int dif, const int sdif)
+{
+ bool exact_dif = inet6_exact_dif_match(net, skb);
+ struct inet_connection_sock *icsk;
+ struct sock *sk, *result = NULL;
+ int score, hiscore = 0;
+ u32 phash = 0;
+
+ inet_lhash2_for_each_icsk_rcu(icsk, &ilb2->head) {
+ sk = (struct sock *)icsk;
+ score = compute_score(sk, net, hnum, daddr, dif, sdif,
+ exact_dif);
+ if (score > hiscore) {
+ if (sk->sk_reuseport) {
+ phash = inet6_ehashfn(net, daddr, hnum,
+ saddr, sport);
+ result = reuseport_select_sock(sk, phash,
+ skb, doff);
+ if (result)
+ return result;
+ }
+ result = sk;
+ hiscore = score;
+ }
+ }
+
+ return result;
+}
+
struct sock *inet6_lookup_listener(struct net *net,
struct inet_hashinfo *hashinfo,
struct sk_buff *skb, int doff,
@@ -134,31 +168,56 @@ struct sock *inet6_lookup_listener(struct net *net,
{
unsigned int hash = inet_lhashfn(net, hnum);
struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash];
- int score, hiscore = 0, matches = 0, reuseport = 0;
bool exact_dif = inet6_exact_dif_match(net, skb);
+ struct inet_listen_hashbucket *ilb2;
struct sock *sk, *result = NULL;
+ int score, hiscore = 0;
+ unsigned int hash2;
u32 phash = 0;
+ if (ilb->count <= 10 || !hashinfo->lhash2)
+ goto port_lookup;
+
+ /* Too many sk in the ilb bucket (which is hashed by port alone).
+ * Try lhash2 (which is hashed by port and addr) instead.
+ */
+
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum);
+ ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+ if (ilb2->count > ilb->count)
+ goto port_lookup;
+
+ result = inet6_lhash2_lookup(net, ilb2, skb, doff,
+ saddr, sport, daddr, hnum,
+ dif, sdif);
+ if (result)
+ return result;
+
+ /* Lookup lhash2 with in6addr_any */
+
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
+ ilb2 = inet_lhash2_bucket(hashinfo, hash2);
+ if (ilb2->count > ilb->count)
+ goto port_lookup;
+
+ return inet6_lhash2_lookup(net, ilb2, skb, doff,
+ saddr, sport, daddr, hnum,
+ dif, sdif);
+
+port_lookup:
sk_for_each(sk, &ilb->head) {
score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif);
if (score > hiscore) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
phash = inet6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, phash,
skb, doff);
if (result)
return result;
- matches = 1;
}
result = sk;
hiscore = score;
- } else if (score == hiscore && reuseport) {
- matches++;
- if (reciprocal_scale(phash, matches) == 0)
- result = sk;
- phash = next_pseudo_random32(phash);
}
}
return result;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 217683d40f12..92b8d8c75eed 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -107,16 +107,13 @@ enum {
void fib6_update_sernum(struct rt6_info *rt)
{
- struct fib6_table *table = rt->rt6i_table;
struct net *net = dev_net(rt->dst.dev);
struct fib6_node *fn;
- spin_lock_bh(&table->tb6_lock);
fn = rcu_dereference_protected(rt->rt6i_node,
- lockdep_is_held(&table->tb6_lock));
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
if (fn)
fn->fn_sernum = fib6_new_sernum(net);
- spin_unlock_bh(&table->tb6_lock);
}
/*
@@ -804,12 +801,6 @@ insert_above:
return ln;
}
-static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
-{
- return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
- RTF_GATEWAY;
-}
-
static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
{
int i;
@@ -898,7 +889,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
ins = &fn->leaf;
for (iter = leaf; iter;
- iter = rcu_dereference_protected(iter->dst.rt6_next,
+ iter = rcu_dereference_protected(iter->rt6_next,
lockdep_is_held(&rt->rt6i_table->tb6_lock))) {
/*
* Search for duplicates
@@ -955,7 +946,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
break;
next_iter:
- ins = &iter->dst.rt6_next;
+ ins = &iter->rt6_next;
}
if (fallback_ins && !found) {
@@ -984,7 +975,7 @@ next_iter:
&sibling->rt6i_siblings);
break;
}
- sibling = rcu_dereference_protected(sibling->dst.rt6_next,
+ sibling = rcu_dereference_protected(sibling->rt6_next,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
}
/* For each sibling in the list, increment the counter of
@@ -999,6 +990,7 @@ next_iter:
rt6i_nsiblings++;
}
BUG_ON(rt6i_nsiblings != rt->rt6i_nsiblings);
+ rt6_multipath_rebalance(temp_sibling);
}
/*
@@ -1014,7 +1006,7 @@ add:
if (err)
return err;
- rcu_assign_pointer(rt->dst.rt6_next, iter);
+ rcu_assign_pointer(rt->rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rcu_assign_pointer(*ins, rt);
@@ -1045,7 +1037,7 @@ add:
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
- rt->dst.rt6_next = iter->dst.rt6_next;
+ rt->rt6_next = iter->rt6_next;
rcu_assign_pointer(*ins, rt);
call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
rt, extack);
@@ -1064,14 +1056,14 @@ add:
if (nsiblings) {
/* Replacing an ECMP route, remove all siblings */
- ins = &rt->dst.rt6_next;
+ ins = &rt->rt6_next;
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
while (iter) {
if (iter->rt6i_metric > rt->rt6i_metric)
break;
if (rt6_qualify_for_ecmp(iter)) {
- *ins = iter->dst.rt6_next;
+ *ins = iter->rt6_next;
iter->rt6i_node = NULL;
fib6_purge_rt(iter, fn, info->nl_net);
if (rcu_access_pointer(fn->rr_ptr) == iter)
@@ -1080,7 +1072,7 @@ add:
nsiblings--;
info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
} else {
- ins = &iter->dst.rt6_next;
+ ins = &iter->rt6_next;
}
iter = rcu_dereference_protected(*ins,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1107,8 +1099,8 @@ void fib6_force_start_gc(struct net *net)
jiffies + net->ipv6.sysctl.ip6_rt_gc_interval);
}
-static void fib6_update_sernum_upto_root(struct rt6_info *rt,
- int sernum)
+static void __fib6_update_sernum_upto_root(struct rt6_info *rt,
+ int sernum)
{
struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
lockdep_is_held(&rt->rt6i_table->tb6_lock));
@@ -1122,6 +1114,11 @@ static void fib6_update_sernum_upto_root(struct rt6_info *rt,
}
}
+void fib6_update_sernum_upto_root(struct net *net, struct rt6_info *rt)
+{
+ __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net));
+}
+
/*
* Add routing information to the routing tree.
* <destination addr>/<source addr>
@@ -1241,7 +1238,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
err = fib6_add_rt2node(fn, rt, info, mxc, extack);
if (!err) {
- fib6_update_sernum_upto_root(rt, sernum);
+ __fib6_update_sernum_upto_root(rt, sernum);
fib6_start_gc(info->nl_net, rt);
}
@@ -1670,7 +1667,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE);
/* Unlink it */
- *rtp = rt->dst.rt6_next;
+ *rtp = rt->rt6_next;
rt->rt6i_node = NULL;
net->ipv6.rt6_stats->fib_rt_entries--;
net->ipv6.rt6_stats->fib_discarded_routes++;
@@ -1691,6 +1688,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
sibling->rt6i_nsiblings--;
rt->rt6i_nsiblings = 0;
list_del_init(&rt->rt6i_siblings);
+ rt6_multipath_rebalance(next_sibling);
}
/* Adjust walkers */
@@ -1698,7 +1696,7 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
FOR_WALKERS(net, w) {
if (w->state == FWS_C && w->leaf == rt) {
RT6_TRACE("walker %p adjusted by delroute\n", w);
- w->leaf = rcu_dereference_protected(rt->dst.rt6_next,
+ w->leaf = rcu_dereference_protected(rt->rt6_next,
lockdep_is_held(&table->tb6_lock));
if (!w->leaf)
w->state = FWS_U;
@@ -1762,7 +1760,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)
fib6_del_route(table, fn, rtp, info);
return 0;
}
- rtp_next = &cur->dst.rt6_next;
+ rtp_next = &cur->rt6_next;
}
return -ENOENT;
}
@@ -1918,7 +1916,7 @@ static int fib6_clean_node(struct fib6_walker *w)
for_each_fib6_walker_rt(w) {
res = c->func(rt, c->arg);
- if (res < 0) {
+ if (res == -1) {
w->leaf = rt;
res = fib6_del(rt, &info);
if (res) {
@@ -1931,6 +1929,12 @@ static int fib6_clean_node(struct fib6_walker *w)
continue;
}
return 0;
+ } else if (res == -2) {
+ if (WARN_ON(!rt->rt6i_nsiblings))
+ continue;
+ rt = list_last_entry(&rt->rt6i_siblings,
+ struct rt6_info, rt6i_siblings);
+ continue;
}
WARN_ON(res != 0);
}
@@ -1942,7 +1946,8 @@ static int fib6_clean_node(struct fib6_walker *w)
* Convenient frontend to tree walker.
*
* func is called on each route.
- * It may return -1 -> delete this route.
+ * It may return -2 -> skip multipath route.
+ * -1 -> delete this route.
* 0 -> continue walking
*/
@@ -2134,7 +2139,6 @@ static void fib6_net_exit(struct net *net)
{
unsigned int i;
- rt6_ifdown(net, NULL);
del_timer_sync(&net->ipv6.ip6_fib_timer);
for (i = 0; i < FIB6_TABLE_HASHSZ; i++) {
@@ -2173,8 +2177,8 @@ int __init fib6_init(void)
if (ret)
goto out_kmem_cache_create;
- ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib,
- 0);
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE, NULL,
+ inet6_dump_fib, 0);
if (ret)
goto out_unregister_subsys;
@@ -2239,7 +2243,7 @@ static int ipv6_route_yield(struct fib6_walker *w)
do {
iter->w.leaf = rcu_dereference_protected(
- iter->w.leaf->dst.rt6_next,
+ iter->w.leaf->rt6_next,
lockdep_is_held(&iter->tbl->tb6_lock));
iter->skip--;
if (!iter->skip && iter->w.leaf)
@@ -2305,7 +2309,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
if (!v)
goto iter_table;
- n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next);
+ n = rcu_dereference_bh(((struct rt6_info *)v)->rt6_next);
if (n) {
++*pos;
return n;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 7f59c8fabeeb..3dab664ff503 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -836,7 +836,6 @@ static int ip6fl_seq_release(struct inode *inode, struct file *file)
}
static const struct file_operations ip6fl_seq_fops = {
- .owner = THIS_MODULE,
.open = ip6fl_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 873549228ccb..05f070e123e4 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -55,6 +55,8 @@
#include <net/ip6_route.h>
#include <net/ip6_tunnel.h>
#include <net/gre.h>
+#include <net/erspan.h>
+#include <net/dst_metadata.h>
static bool log_ecn_error = true;
@@ -68,11 +70,13 @@ static unsigned int ip6gre_net_id __read_mostly;
struct ip6gre_net {
struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE];
+ struct ip6_tnl __rcu *collect_md_tun;
struct net_device *fb_tunnel_dev;
};
static struct rtnl_link_ops ip6gre_link_ops __read_mostly;
static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;
+static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly;
static int ip6gre_tunnel_init(struct net_device *dev);
static void ip6gre_tunnel_setup(struct net_device *dev);
static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
@@ -121,7 +125,8 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
unsigned int h1 = HASH_KEY(key);
struct ip6_tnl *t, *cand = NULL;
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
- int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
+ int dev_type = (gre_proto == htons(ETH_P_TEB) ||
+ gre_proto == htons(ETH_P_ERSPAN)) ?
ARPHRD_ETHER : ARPHRD_IP6GRE;
int score, cand_score = 4;
@@ -226,6 +231,10 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
if (cand)
return cand;
+ t = rcu_dereference(ign->collect_md_tun);
+ if (t && t->dev->flags & IFF_UP)
+ return t;
+
dev = ign->fb_tunnel_dev;
if (dev->flags & IFF_UP)
return netdev_priv(dev);
@@ -261,6 +270,9 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t)
{
struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t);
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, t);
+
rcu_assign_pointer(t->next, rtnl_dereference(*tp));
rcu_assign_pointer(*tp, t);
}
@@ -270,6 +282,9 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t)
struct ip6_tnl __rcu **tp;
struct ip6_tnl *iter;
+ if (t->parms.collect_md)
+ rcu_assign_pointer(ign->collect_md_tun, NULL);
+
for (tp = ip6gre_bucket(ign, t);
(iter = rtnl_dereference(*tp)) != NULL;
tp = &iter->next) {
@@ -461,7 +476,101 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
tpi->proto);
if (tunnel) {
- ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ if (tunnel->parms.collect_md) {
+ struct metadata_dst *tun_dst;
+ __be64 tun_id;
+ __be16 flags;
+
+ flags = tpi->flags;
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id, 0);
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+ } else {
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ }
+
+ return PACKET_RCVD;
+ }
+
+ return PACKET_REJECT;
+}
+
+static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
+ struct tnl_ptk_info *tpi)
+{
+ struct erspan_base_hdr *ershdr;
+ struct erspan_metadata *pkt_md;
+ const struct ipv6hdr *ipv6h;
+ struct ip6_tnl *tunnel;
+ u8 ver;
+
+ if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
+ return PACKET_REJECT;
+
+ ipv6h = ipv6_hdr(skb);
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ ver = ershdr->ver;
+ tpi->key = cpu_to_be32(get_session_id(ershdr));
+
+ tunnel = ip6gre_tunnel_lookup(skb->dev,
+ &ipv6h->saddr, &ipv6h->daddr, tpi->key,
+ tpi->proto);
+ if (tunnel) {
+ int len = erspan_hdr_len(ver);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return PACKET_REJECT;
+
+ ershdr = (struct erspan_base_hdr *)skb->data;
+ pkt_md = (struct erspan_metadata *)(ershdr + 1);
+
+ if (__iptunnel_pull_header(skb, len,
+ htons(ETH_P_TEB),
+ false, false) < 0)
+ return PACKET_REJECT;
+
+ if (tunnel->parms.collect_md) {
+ struct metadata_dst *tun_dst;
+ struct ip_tunnel_info *info;
+ struct erspan_metadata *md;
+ __be64 tun_id;
+ __be16 flags;
+
+ tpi->flags |= TUNNEL_KEY;
+ flags = tpi->flags;
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = ipv6_tun_rx_dst(skb, flags, tun_id,
+ sizeof(*md));
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ info = &tun_dst->u.tun_info;
+ md = ip_tunnel_info_opts(info);
+
+ memcpy(md, pkt_md, sizeof(*md));
+ md->version = ver;
+ info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ info->options_len = sizeof(*md);
+
+ ip6_tnl_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+
+ } else {
+ tunnel->parms.erspan_ver = ver;
+
+ if (ver == 1) {
+ tunnel->parms.index = ntohl(pkt_md->u.index);
+ } else {
+ tunnel->parms.dir = pkt_md->u.md2.dir;
+ tunnel->parms.hwid = get_hwid(&pkt_md->u.md2);
+ }
+
+ ip6_tnl_rcv(tunnel, skb, tpi, NULL, log_ecn_error);
+ }
return PACKET_RCVD;
}
@@ -482,9 +591,17 @@ static int gre_rcv(struct sk_buff *skb)
if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false))
goto drop;
+ if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+ tpi.proto == htons(ETH_P_ERSPAN2))) {
+ if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
+ return 0;
+ goto out;
+ }
+
if (ip6gre_rcv(skb, &tpi) == PACKET_RCVD)
return 0;
+out:
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
drop:
kfree_skb(skb);
@@ -497,6 +614,78 @@ static int gre_handle_offloads(struct sk_buff *skb, bool csum)
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
+static void prepare_ip6gre_xmit_ipv4(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi6 *fl6, __u8 *dsfield,
+ int *encap_limit)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct ip6_tnl *t = netdev_priv(dev);
+
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ *encap_limit = t->parms.encap_limit;
+
+ memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ *dsfield = ipv4_get_dsfield(iph);
+ else
+ *dsfield = ip6_tclass(t->parms.flowinfo);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6->flowi6_mark = skb->mark;
+ else
+ fl6->flowi6_mark = t->parms.fwmark;
+
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+}
+
+static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
+ struct net_device *dev,
+ struct flowi6 *fl6, __u8 *dsfield,
+ int *encap_limit)
+{
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ip6_tnl *t = netdev_priv(dev);
+ __u16 offset;
+
+ offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
+ /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
+
+ if (offset > 0) {
+ struct ipv6_tlv_tnl_enc_lim *tel;
+
+ tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
+ if (tel->encap_limit == 0) {
+ icmpv6_send(skb, ICMPV6_PARAMPROB,
+ ICMPV6_HDR_FIELD, offset + 2);
+ return -1;
+ }
+ *encap_limit = tel->encap_limit - 1;
+ } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+ *encap_limit = t->parms.encap_limit;
+ }
+
+ memcpy(fl6, &t->fl.u.ip6, sizeof(*fl6));
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+ *dsfield = ipv6_get_dsfield(ipv6h);
+ else
+ *dsfield = ip6_tclass(t->parms.flowinfo);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+ fl6->flowlabel |= ip6_flowlabel(ipv6h);
+
+ if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
+ fl6->flowi6_mark = skb->mark;
+ else
+ fl6->flowi6_mark = t->parms.fwmark;
+
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ return 0;
+}
+
static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
struct net_device *dev, __u8 dsfield,
struct flowi6 *fl6, int encap_limit,
@@ -518,8 +707,38 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
- gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
- protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+
+ if (tunnel->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ __be16 flags;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info ||
+ !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
+ return -EINVAL;
+
+ key = &tun_info->key;
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = IPPROTO_GRE;
+ fl6->daddr = key->u.ipv6.dst;
+ fl6->flowlabel = key->label;
+ fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ dsfield = key->tos;
+ flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ tunnel->tun_hlen = gre_calc_hlen(flags);
+
+ gre_build_header(skb, tunnel->tun_hlen,
+ flags, protocol,
+ tunnel_id_to_key32(tun_info->key.tun_id), 0);
+
+ } else {
+ gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+ protocol, tunnel->parms.o_key,
+ htonl(tunnel->o_seqno));
+ }
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
@@ -528,30 +747,17 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- const struct iphdr *iph = ip_hdr(skb);
int encap_limit = -1;
struct flowi6 fl6;
- __u8 dsfield;
+ __u8 dsfield = 0;
__u32 mtu;
int err;
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- dsfield = ipv4_get_dsfield(iph);
- else
- dsfield = ip6_tclass(t->parms.flowinfo);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
- else
- fl6.flowi6_mark = t->parms.fwmark;
-
- fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ if (!t->parms.collect_md)
+ prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+ &dsfield, &encap_limit);
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
@@ -575,46 +781,17 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
struct ip6_tnl *t = netdev_priv(dev);
struct ipv6hdr *ipv6h = ipv6_hdr(skb);
int encap_limit = -1;
- __u16 offset;
struct flowi6 fl6;
- __u8 dsfield;
+ __u8 dsfield = 0;
__u32 mtu;
int err;
if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
return -1;
- offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
- /* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
- ipv6h = ipv6_hdr(skb);
-
- if (offset > 0) {
- struct ipv6_tlv_tnl_enc_lim *tel;
- tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
- if (tel->encap_limit == 0) {
- icmpv6_send(skb, ICMPV6_PARAMPROB,
- ICMPV6_HDR_FIELD, offset + 2);
- return -1;
- }
- encap_limit = tel->encap_limit - 1;
- } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
- encap_limit = t->parms.encap_limit;
-
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
- dsfield = ipv6_get_dsfield(ipv6h);
- else
- dsfield = ip6_tclass(t->parms.flowinfo);
-
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
- fl6.flowlabel |= ip6_flowlabel(ipv6h);
- if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
- fl6.flowi6_mark = skb->mark;
- else
- fl6.flowi6_mark = t->parms.fwmark;
-
- fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+ if (!t->parms.collect_md &&
+ prepare_ip6gre_xmit_ipv6(skb, dev, &fl6, &dsfield, &encap_limit))
+ return -1;
if (gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM)))
return -1;
@@ -661,7 +838,8 @@ static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev)
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
encap_limit = t->parms.encap_limit;
- memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ if (!t->parms.collect_md)
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
err = gre_handle_offloads(skb, !!(t->parms.o_flags & TUNNEL_CSUM));
if (err)
@@ -706,6 +884,137 @@ tx_err:
return NETDEV_TX_OK;
}
+static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device_stats *stats;
+ bool truncate = false;
+ int encap_limit = -1;
+ __u8 dsfield = false;
+ struct flowi6 fl6;
+ int err = -EINVAL;
+ __u32 mtu;
+
+ if (!ip6_tnl_xmit_ctl(t, &t->parms.laddr, &t->parms.raddr))
+ goto tx_err;
+
+ if (gre_handle_offloads(skb, false))
+ goto tx_err;
+
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ pskb_trim(skb, dev->mtu + dev->hard_header_len);
+ truncate = true;
+ }
+
+ t->parms.o_flags &= ~TUNNEL_KEY;
+ IPCB(skb)->flags = 0;
+
+ /* For collect_md mode, derive fl6 from the tunnel key,
+ * for native mode, call prepare_ip6gre_xmit_{ipv4,ipv6}.
+ */
+ if (t->parms.collect_md) {
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ struct erspan_metadata *md;
+ __be32 tun_id;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info ||
+ !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET6))
+ return -EINVAL;
+
+ key = &tun_info->key;
+ memset(&fl6, 0, sizeof(fl6));
+ fl6.flowi6_proto = IPPROTO_GRE;
+ fl6.daddr = key->u.ipv6.dst;
+ fl6.flowlabel = key->label;
+ fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
+
+ dsfield = key->tos;
+ md = ip_tunnel_info_opts(tun_info);
+ if (!md)
+ goto tx_err;
+
+ tun_id = tunnel_id_to_key32(key->tun_id);
+ if (md->version == 1) {
+ erspan_build_header(skb,
+ ntohl(tun_id),
+ ntohl(md->u.index), truncate,
+ false);
+ } else if (md->version == 2) {
+ erspan_build_header_v2(skb,
+ ntohl(tun_id),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, false);
+ }
+ } else {
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+ prepare_ip6gre_xmit_ipv4(skb, dev, &fl6,
+ &dsfield, &encap_limit);
+ break;
+ case htons(ETH_P_IPV6):
+ if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr))
+ goto tx_err;
+ if (prepare_ip6gre_xmit_ipv6(skb, dev, &fl6,
+ &dsfield, &encap_limit))
+ goto tx_err;
+ break;
+ default:
+ memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
+ break;
+ }
+
+ if (t->parms.erspan_ver == 1)
+ erspan_build_header(skb, ntohl(t->parms.o_key),
+ t->parms.index,
+ truncate, false);
+ else
+ erspan_build_header_v2(skb, ntohl(t->parms.o_key),
+ t->parms.dir,
+ t->parms.hwid,
+ truncate, false);
+ fl6.daddr = t->parms.raddr;
+ }
+
+ /* Push GRE header. */
+ gre_build_header(skb, 8, TUNNEL_SEQ,
+ htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++));
+
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+
+ err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
+ NEXTHDR_GRE);
+ if (err != 0) {
+ /* XXX: send ICMP error even if DF is not set. */
+ if (err == -EMSGSIZE) {
+ if (skb->protocol == htons(ETH_P_IP))
+ icmp_send(skb, ICMP_DEST_UNREACH,
+ ICMP_FRAG_NEEDED, htonl(mtu));
+ else
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ }
+
+ goto tx_err;
+ }
+ return NETDEV_TX_OK;
+
+tx_err:
+ stats = &t->dev->stats;
+ stats->tx_errors++;
+ stats->tx_dropped++;
+ kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
{
struct net_device *dev = t->dev;
@@ -1079,6 +1388,10 @@ static int ip6gre_tunnel_init_common(struct net_device *dev)
if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
+ if (tunnel->parms.collect_md) {
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ netif_keep_dst(dev);
+ }
ip6gre_tnl_init_features(dev);
return 0;
@@ -1095,6 +1408,9 @@ static int ip6gre_tunnel_init(struct net_device *dev)
tunnel = netdev_priv(dev);
+ if (tunnel->parms.collect_md)
+ return 0;
+
memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr));
@@ -1117,7 +1433,6 @@ static void ip6gre_fb_tunnel_init(struct net_device *dev)
dev_hold(dev);
}
-
static struct inet6_protocol ip6gre_protocol __read_mostly = {
.handler = gre_rcv,
.err_handler = ip6gre_err,
@@ -1132,7 +1447,8 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == &ip6gre_link_ops ||
- dev->rtnl_link_ops == &ip6gre_tap_ops)
+ dev->rtnl_link_ops == &ip6gre_tap_ops ||
+ dev->rtnl_link_ops == &ip6erspan_tap_ops)
unregister_netdevice_queue(dev, head);
for (prio = 0; prio < 4; prio++) {
@@ -1254,6 +1570,70 @@ out:
return ip6gre_tunnel_validate(tb, data, extack);
}
+static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ __be16 flags = 0;
+ int ret, ver = 0;
+
+ if (!data)
+ return 0;
+
+ ret = ip6gre_tap_validate(tb, data, extack);
+ if (ret)
+ return ret;
+
+ /* ERSPAN should only have GRE sequence and key flag */
+ if (data[IFLA_GRE_OFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ if (data[IFLA_GRE_IFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ if (!data[IFLA_GRE_COLLECT_METADATA] &&
+ flags != (GRE_SEQ | GRE_KEY))
+ return -EINVAL;
+
+ /* ERSPAN Session ID only has 10-bit. Since we reuse
+ * 32-bit key field as ID, check it's range.
+ */
+ if (data[IFLA_GRE_IKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ if (data[IFLA_GRE_OKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ if (data[IFLA_GRE_ERSPAN_VER]) {
+ ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+ if (ver != 1 && ver != 2)
+ return -EINVAL;
+ }
+
+ if (ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX]) {
+ u32 index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+
+ if (index & ~INDEX_MASK)
+ return -EINVAL;
+ }
+ } else if (ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR]) {
+ u16 dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+
+ if (dir & ~(DIR_MASK >> DIR_OFFSET))
+ return -EINVAL;
+ }
+
+ if (data[IFLA_GRE_ERSPAN_HWID]) {
+ u16 hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+
+ if (hwid & ~(HWID_MASK >> HWID_OFFSET))
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
static void ip6gre_netlink_parms(struct nlattr *data[],
struct __ip6_tnl_parm *parms)
@@ -1300,6 +1680,22 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_FWMARK])
parms->fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+
+ if (data[IFLA_GRE_COLLECT_METADATA])
+ parms->collect_md = true;
+
+ if (data[IFLA_GRE_ERSPAN_VER])
+ parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+ if (parms->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX])
+ parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ } else if (parms->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR])
+ parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (data[IFLA_GRE_ERSPAN_HWID])
+ parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ }
}
static int ip6gre_tap_init(struct net_device *dev)
@@ -1326,6 +1722,59 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_get_iflink = ip6_tnl_get_iflink,
};
+static int ip6erspan_tap_init(struct net_device *dev)
+{
+ struct ip6_tnl *tunnel;
+ int t_hlen;
+ int ret;
+
+ tunnel = netdev_priv(dev);
+
+ tunnel->dev = dev;
+ tunnel->net = dev_net(dev);
+ strcpy(tunnel->parms.name, dev->name);
+
+ dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+ if (!dev->tstats)
+ return -ENOMEM;
+
+ ret = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
+ if (ret) {
+ free_percpu(dev->tstats);
+ dev->tstats = NULL;
+ return ret;
+ }
+
+ tunnel->tun_hlen = 8;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+ erspan_hdr_len(tunnel->parms.erspan_ver);
+ t_hlen = tunnel->hlen + sizeof(struct ipv6hdr);
+
+ dev->hard_header_len = LL_MAX_HEADER + t_hlen;
+ dev->mtu = ETH_DATA_LEN - t_hlen;
+ if (dev->type == ARPHRD_ETHER)
+ dev->mtu -= ETH_HLEN;
+ if (!(tunnel->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
+ dev->mtu -= 8;
+
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ tunnel = netdev_priv(dev);
+ ip6gre_tnl_link_config(tunnel, 1);
+
+ return 0;
+}
+
+static const struct net_device_ops ip6erspan_netdev_ops = {
+ .ndo_init = ip6erspan_tap_init,
+ .ndo_uninit = ip6gre_tunnel_uninit,
+ .ndo_start_xmit = ip6erspan_tunnel_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ip6_tnl_change_mtu,
+ .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_iflink = ip6_tnl_get_iflink,
+};
+
static void ip6gre_tap_setup(struct net_device *dev)
{
@@ -1396,8 +1845,13 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
ip6gre_netlink_parms(data, &nt->parms);
- if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
- return -EEXIST;
+ if (nt->parms.collect_md) {
+ if (rtnl_dereference(ign->collect_md_tun))
+ return -EEXIST;
+ } else {
+ if (ip6gre_tunnel_find(net, &nt->parms, dev->type))
+ return -EEXIST;
+ }
if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
eth_hw_addr_random(dev);
@@ -1500,8 +1954,12 @@ static size_t ip6gre_get_size(const struct net_device *dev)
nla_total_size(2) +
/* IFLA_GRE_ENCAP_DPORT */
nla_total_size(2) +
+ /* IFLA_GRE_COLLECT_METADATA */
+ nla_total_size(0) +
/* IFLA_GRE_FWMARK */
nla_total_size(4) +
+ /* IFLA_GRE_ERSPAN_INDEX */
+ nla_total_size(4) +
0;
}
@@ -1523,7 +1981,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
- nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
+ nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
+ nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -1536,6 +1995,24 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
t->encap.flags))
goto nla_put_failure;
+ if (p->collect_md) {
+ if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
+ goto nla_put_failure;
+ }
+
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
+ goto nla_put_failure;
+
+ if (p->erspan_ver == 1) {
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+ goto nla_put_failure;
+ } else if (p->erspan_ver == 2) {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
@@ -1558,9 +2035,28 @@ static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
[IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
[IFLA_GRE_FWMARK] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
};
+static void ip6erspan_tap_setup(struct net_device *dev)
+{
+ ether_setup(dev);
+
+ dev->netdev_ops = &ip6erspan_netdev_ops;
+ dev->needs_free_netdev = true;
+ dev->priv_destructor = ip6gre_dev_free;
+
+ dev->features |= NETIF_F_NETNS_LOCAL;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
+}
+
static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
.kind = "ip6gre",
.maxtype = IFLA_GRE_MAX,
@@ -1590,6 +2086,20 @@ static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
.get_link_net = ip6_tnl_get_link_net,
};
+static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = {
+ .kind = "ip6erspan",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ip6gre_policy,
+ .priv_size = sizeof(struct ip6_tnl),
+ .setup = ip6erspan_tap_setup,
+ .validate = ip6erspan_tap_validate,
+ .newlink = ip6gre_newlink,
+ .changelink = ip6gre_changelink,
+ .get_size = ip6gre_get_size,
+ .fill_info = ip6gre_fill_info,
+ .get_link_net = ip6_tnl_get_link_net,
+};
+
/*
* And now the modules code and kernel interface.
*/
@@ -1618,9 +2128,15 @@ static int __init ip6gre_init(void)
if (err < 0)
goto tap_ops_failed;
+ err = rtnl_link_register(&ip6erspan_tap_ops);
+ if (err < 0)
+ goto erspan_link_failed;
+
out:
return err;
+erspan_link_failed:
+ rtnl_link_unregister(&ip6gre_tap_ops);
tap_ops_failed:
rtnl_link_unregister(&ip6gre_link_ops);
rtnl_link_failed:
@@ -1634,6 +2150,7 @@ static void __exit ip6gre_fini(void)
{
rtnl_link_unregister(&ip6gre_tap_ops);
rtnl_link_unregister(&ip6gre_link_ops);
+ rtnl_link_unregister(&ip6erspan_tap_ops);
inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE);
unregister_pernet_device(&ip6gre_net_ops);
}
@@ -1645,4 +2162,5 @@ MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)");
MODULE_DESCRIPTION("GRE over IPv6 tunneling device");
MODULE_ALIAS_RTNL_LINK("ip6gre");
MODULE_ALIAS_RTNL_LINK("ip6gretap");
+MODULE_ALIAS_RTNL_LINK("ip6erspan");
MODULE_ALIAS_NETDEV("ip6gre0");
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 3763dc01e374..997c7f19ad62 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -138,6 +138,14 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
return ret;
}
+#if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
+ /* Policy lookup after SNAT yielded a new policy */
+ if (skb_dst(skb)->xfrm) {
+ IPCB(skb)->flags |= IPSKB_REROUTED;
+ return dst_output(net, sk, skb);
+ }
+#endif
+
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
dst_allfrag(skb_dst(skb)) ||
(IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
@@ -370,7 +378,7 @@ static inline int ip6_forward_finish(struct net *net, struct sock *sk,
return dst_output(net, sk, skb);
}
-static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
+unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
{
unsigned int mtu;
struct inet6_dev *idev;
@@ -390,6 +398,7 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
+EXPORT_SYMBOL_GPL(ip6_dst_mtu_forward);
static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
{
@@ -1209,7 +1218,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
else
mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
- READ_ONCE(rt->dst.dev->mtu) : dst_mtu(rt->dst.path);
+ READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
if (np->frag_size < mtu) {
if (np->frag_size)
mtu = np->frag_size;
@@ -1217,7 +1226,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (mtu < IPV6_MIN_MTU)
return -EINVAL;
cork->base.fragsize = mtu;
- if (dst_allfrag(rt->dst.path))
+ if (dst_allfrag(xfrm_dst_path(&rt->dst)))
cork->base.flags |= IPCORK_ALLFRAG;
cork->base.length = 0;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1ee5584c3555..4b15fe928278 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -860,7 +860,7 @@ int ip6_tnl_rcv(struct ip6_tnl *t, struct sk_buff *skb,
struct metadata_dst *tun_dst,
bool log_ecn_err)
{
- return __ip6_tnl_rcv(t, skb, tpi, NULL, ip6ip6_dscp_ecn_decapsulate,
+ return __ip6_tnl_rcv(t, skb, tpi, tun_dst, ip6ip6_dscp_ecn_decapsulate,
log_ecn_err);
}
EXPORT_SYMBOL(ip6_tnl_rcv);
@@ -978,6 +978,9 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
int ret = 0;
struct net *net = t->net;
+ if (t->parms.collect_md)
+ return 1;
+
if ((p->flags & IP6_TNL_F_CAP_XMIT) ||
((p->flags & IP6_TNL_F_CAP_PER_PACKET) &&
(ip6_tnl_get_cap(t, laddr, raddr) & IP6_TNL_F_CAP_XMIT))) {
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 8c184f84f353..fa3ae1cb50d3 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -626,6 +626,7 @@ static void vti6_link_config(struct ip6_tnl *t)
{
struct net_device *dev = t->dev;
struct __ip6_tnl_parm *p = &t->parms;
+ struct net_device *tdev = NULL;
memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr));
memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
@@ -638,6 +639,25 @@ static void vti6_link_config(struct ip6_tnl *t)
dev->flags |= IFF_POINTOPOINT;
else
dev->flags &= ~IFF_POINTOPOINT;
+
+ if (p->flags & IP6_TNL_F_CAP_XMIT) {
+ int strict = (ipv6_addr_type(&p->raddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
+ struct rt6_info *rt = rt6_lookup(t->net,
+ &p->raddr, &p->laddr,
+ p->link, strict);
+
+ if (rt)
+ tdev = rt->dst.dev;
+ ip6_rt_put(rt);
+ }
+
+ if (!tdev && p->link)
+ tdev = __dev_get_by_index(t->net, p->link);
+
+ if (tdev)
+ dev->mtu = max_t(int, tdev->mtu - dev->hard_header_len,
+ IPV6_MIN_MTU);
}
/**
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index a2e1a864eb46..9f6cace9c817 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -477,7 +477,6 @@ static int ip6mr_vif_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip6mr_vif_fops = {
- .owner = THIS_MODULE,
.open = ip6mr_vif_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -495,6 +494,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
return ERR_PTR(-ENOENT);
it->mrt = mrt;
+ it->cache = NULL;
return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
: SEQ_START_TOKEN;
}
@@ -609,7 +609,6 @@ static int ipmr_mfc_open(struct inode *inode, struct file *file)
}
static const struct file_operations ip6mr_mfc_fops = {
- .owner = THIS_MODULE,
.open = ipmr_mfc_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -1425,10 +1424,13 @@ int __init ip6_mr_init(void)
goto add_proto_fail;
}
#endif
- rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
- ip6mr_rtm_dumproute, 0);
- return 0;
+ err = rtnl_register_module(THIS_MODULE, RTNL_FAMILY_IP6MR, RTM_GETROUTE,
+ NULL, ip6mr_rtm_dumproute, 0);
+ if (err == 0)
+ return 0;
+
#ifdef CONFIG_IPV6_PIMSM_V2
+ inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
add_proto_fail:
unregister_netdevice_notifier(&ip6_mr_notifier);
#endif
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 844642682b83..6a5d0e39bb87 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1655,8 +1655,6 @@ static void mld_sendpack(struct sk_buff *skb)
if (err)
goto err_out;
- payload_len = skb->len;
-
err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
dst_output);
@@ -2758,7 +2756,6 @@ static int igmp6_mc_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations igmp6_mc_seq_fops = {
- .owner = THIS_MODULE,
.open = igmp6_mc_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -2913,7 +2910,6 @@ static int igmp6_mcf_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations igmp6_mcf_seq_fops = {
- .owner = THIS_MODULE,
.open = igmp6_mcf_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b3cea200c85e..f61a5b613b52 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -566,6 +566,11 @@ static void ndisc_send_unsol_na(struct net_device *dev)
read_lock_bh(&idev->lock);
list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ /* skip tentative addresses until dad completes */
+ if (ifa->flags & IFA_F_TENTATIVE &&
+ !(ifa->flags & IFA_F_OPTIMISTIC))
+ continue;
+
ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr,
/*router=*/ !!idev->cnf.forwarding,
/*solicited=*/ false, /*override=*/ true,
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 39970e212ad5..d95ceca7ff8f 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -68,32 +68,7 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
}
EXPORT_SYMBOL(ip6_route_me_harder);
-/*
- * Extra routing may needed on local out, as the QUEUE target never
- * returns control to the table.
- */
-
-struct ip6_rt_info {
- struct in6_addr daddr;
- struct in6_addr saddr;
- u_int32_t mark;
-};
-
-static void nf_ip6_saveroute(const struct sk_buff *skb,
- struct nf_queue_entry *entry)
-{
- struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
-
- if (entry->state.hook == NF_INET_LOCAL_OUT) {
- const struct ipv6hdr *iph = ipv6_hdr(skb);
-
- rt_info->daddr = iph->daddr;
- rt_info->saddr = iph->saddr;
- rt_info->mark = skb->mark;
- }
-}
-
-static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
+static int nf_ip6_reroute(struct sk_buff *skb,
const struct nf_queue_entry *entry)
{
struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry);
@@ -103,7 +78,7 @@ static int nf_ip6_reroute(struct net *net, struct sk_buff *skb,
if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) ||
!ipv6_addr_equal(&iph->saddr, &rt_info->saddr) ||
skb->mark != rt_info->mark)
- return ip6_route_me_harder(net, skb);
+ return ip6_route_me_harder(entry->state.net, skb);
}
return 0;
}
@@ -190,25 +165,19 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook,
};
static const struct nf_ipv6_ops ipv6ops = {
- .chk_addr = ipv6_chk_addr,
- .route_input = ip6_route_input,
- .fragment = ip6_fragment
-};
-
-static const struct nf_afinfo nf_ip6_afinfo = {
- .family = AF_INET6,
+ .chk_addr = ipv6_chk_addr,
+ .route_input = ip6_route_input,
+ .fragment = ip6_fragment,
.checksum = nf_ip6_checksum,
.checksum_partial = nf_ip6_checksum_partial,
.route = nf_ip6_route,
- .saveroute = nf_ip6_saveroute,
.reroute = nf_ip6_reroute,
- .route_key_size = sizeof(struct ip6_rt_info),
};
int __init ipv6_netfilter_init(void)
{
RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops);
- return nf_register_afinfo(&nf_ip6_afinfo);
+ return 0;
}
/* This can be called from inet6_init() on errors, so it cannot
@@ -217,5 +186,4 @@ int __init ipv6_netfilter_init(void)
void ipv6_netfilter_fini(void)
{
RCU_INIT_POINTER(nf_ipv6_ops, NULL);
- nf_unregister_afinfo(&nf_ip6_afinfo);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 6acb2eecd986..4a634b7a2c80 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -71,6 +71,15 @@ config NFT_FIB_IPV6
endif # NF_TABLES_IPV6
endif # NF_TABLES
+config NF_FLOW_TABLE_IPV6
+ tristate "Netfilter flow table IPv6 module"
+ depends on NF_CONNTRACK && NF_TABLES
+ select NF_FLOW_TABLE
+ help
+ This option adds the flow table IPv6 support.
+
+ To compile it as a module, choose M here.
+
config NF_DUP_IPV6
tristate "Netfilter IPv6 packet duplication to alternate destination"
depends on !NF_CONNTRACK || NF_CONNTRACK
@@ -232,6 +241,15 @@ config IP6_NF_MATCH_RT
To compile it as a module, choose M here. If unsure, say N.
+config IP6_NF_MATCH_SRH
+ tristate '"srh" Segment Routing header match support'
+ depends on NETFILTER_ADVANCED
+ help
+ srh matching allows you to match packets based on the segment
+ routing header of the packet.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
# The targets
config IP6_NF_TARGET_HL
tristate '"HL" hoplimit target support'
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index c6ee0cdd0ba9..d984057b8395 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -45,6 +45,9 @@ obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
+# flow table support
+obj-$(CONFIG_NF_FLOW_TABLE_IPV6) += nf_flow_table_ipv6.o
+
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o
@@ -54,6 +57,7 @@ obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o
obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o
obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o
obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o
+obj-$(CONFIG_IP6_NF_MATCH_SRH) += ip6t_srh.o
# targets
obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 66a8c69a3db4..af4c917e0836 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -986,9 +986,8 @@ static int get_info(struct net *net, void __user *user,
if (compat)
xt_compat_lock(AF_INET6);
#endif
- t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
- "ip6table_%s", name);
- if (t) {
+ t = xt_request_find_table_lock(net, AF_INET6, name);
+ if (!IS_ERR(t)) {
struct ip6t_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1018,7 +1017,7 @@ static int get_info(struct net *net, void __user *user,
xt_table_unlock(t);
module_put(t->me);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_unlock(AF_INET6);
@@ -1044,7 +1043,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
get.name[sizeof(get.name) - 1] = '\0';
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
struct xt_table_info *private = t->private;
if (get.size == private->size)
ret = copy_entries_to_user(private->size,
@@ -1055,7 +1054,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
return ret;
}
@@ -1078,10 +1077,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
goto out;
}
- t = try_then_request_module(xt_find_table_lock(net, AF_INET6, name),
- "ip6table_%s", name);
- if (!t) {
- ret = -ENOENT;
+ t = xt_request_find_table_lock(net, AF_INET6, name);
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free_newinfo_counters_untrans;
}
@@ -1194,8 +1192,8 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
if (IS_ERR(paddc))
return PTR_ERR(paddc);
t = xt_find_table_lock(net, AF_INET6, tmp.name);
- if (!t) {
- ret = -ENOENT;
+ if (IS_ERR(t)) {
+ ret = PTR_ERR(t);
goto free;
}
@@ -1631,7 +1629,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
xt_compat_lock(AF_INET6);
t = xt_find_table_lock(net, AF_INET6, get.name);
- if (t) {
+ if (!IS_ERR(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
ret = compat_table_info(private, &info);
@@ -1645,7 +1643,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
module_put(t->me);
xt_table_unlock(t);
} else
- ret = -ENOENT;
+ ret = PTR_ERR(t);
xt_compat_unlock(AF_INET6);
return ret;
@@ -1949,7 +1947,6 @@ static int __init ip6_tables_init(void)
if (ret < 0)
goto err5;
- pr_info("(C) 2000-2006 Netfilter Core Team\n");
return 0;
err5:
diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c
new file mode 100644
index 000000000000..9642164107ce
--- /dev/null
+++ b/net/ipv6/netfilter/ip6t_srh.c
@@ -0,0 +1,161 @@
+/* Kernel module to match Segment Routing Header (SRH) parameters. */
+
+/* Author:
+ * Ahmed Abdelsalam <amsalam20@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ipv6.h>
+#include <linux/types.h>
+#include <net/ipv6.h>
+#include <net/seg6.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv6/ip6t_srh.h>
+#include <linux/netfilter_ipv6/ip6_tables.h>
+
+/* Test a struct->mt_invflags and a boolean for inequality */
+#define NF_SRH_INVF(ptr, flag, boolean) \
+ ((boolean) ^ !!((ptr)->mt_invflags & (flag)))
+
+static bool srh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct ip6t_srh *srhinfo = par->matchinfo;
+ struct ipv6_sr_hdr *srh;
+ struct ipv6_sr_hdr _srh;
+ int hdrlen, srhoff = 0;
+
+ if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0)
+ return false;
+ srh = skb_header_pointer(skb, srhoff, sizeof(_srh), &_srh);
+ if (!srh)
+ return false;
+
+ hdrlen = ipv6_optlen(srh);
+ if (skb->len - srhoff < hdrlen)
+ return false;
+
+ if (srh->type != IPV6_SRCRT_TYPE_4)
+ return false;
+
+ if (srh->segments_left > srh->first_segment)
+ return false;
+
+ /* Next Header matching */
+ if (srhinfo->mt_flags & IP6T_SRH_NEXTHDR)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NEXTHDR,
+ !(srh->nexthdr == srhinfo->next_hdr)))
+ return false;
+
+ /* Header Extension Length matching */
+ if (srhinfo->mt_flags & IP6T_SRH_LEN_EQ)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_EQ,
+ !(srh->hdrlen == srhinfo->hdr_len)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LEN_GT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_GT,
+ !(srh->hdrlen > srhinfo->hdr_len)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LEN_LT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LEN_LT,
+ !(srh->hdrlen < srhinfo->hdr_len)))
+ return false;
+
+ /* Segments Left matching */
+ if (srhinfo->mt_flags & IP6T_SRH_SEGS_EQ)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_EQ,
+ !(srh->segments_left == srhinfo->segs_left)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_SEGS_GT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_GT,
+ !(srh->segments_left > srhinfo->segs_left)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_SEGS_LT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_SEGS_LT,
+ !(srh->segments_left < srhinfo->segs_left)))
+ return false;
+
+ /**
+ * Last Entry matching
+ * Last_Entry field was introduced in revision 6 of the SRH draft.
+ * It was called First_Segment in the previous revision
+ */
+ if (srhinfo->mt_flags & IP6T_SRH_LAST_EQ)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_EQ,
+ !(srh->first_segment == srhinfo->last_entry)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LAST_GT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_GT,
+ !(srh->first_segment > srhinfo->last_entry)))
+ return false;
+
+ if (srhinfo->mt_flags & IP6T_SRH_LAST_LT)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LAST_LT,
+ !(srh->first_segment < srhinfo->last_entry)))
+ return false;
+
+ /**
+ * Tag matchig
+ * Tag field was introduced in revision 6 of the SRH draft.
+ */
+ if (srhinfo->mt_flags & IP6T_SRH_TAG)
+ if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_TAG,
+ !(srh->tag == srhinfo->tag)))
+ return false;
+ return true;
+}
+
+static int srh_mt6_check(const struct xt_mtchk_param *par)
+{
+ const struct ip6t_srh *srhinfo = par->matchinfo;
+
+ if (srhinfo->mt_flags & ~IP6T_SRH_MASK) {
+ pr_err("unknown srh match flags %X\n", srhinfo->mt_flags);
+ return -EINVAL;
+ }
+
+ if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) {
+ pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match srh_mt6_reg __read_mostly = {
+ .name = "srh",
+ .family = NFPROTO_IPV6,
+ .match = srh_mt6,
+ .matchsize = sizeof(struct ip6t_srh),
+ .checkentry = srh_mt6_check,
+ .me = THIS_MODULE,
+};
+
+static int __init srh_mt6_init(void)
+{
+ return xt_register_match(&srh_mt6_reg);
+}
+
+static void __exit srh_mt6_exit(void)
+{
+ xt_unregister_match(&srh_mt6_reg);
+}
+
+module_init(srh_mt6_init);
+module_exit(srh_mt6_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Xtables: IPv6 Segment Routing Header match");
+MODULE_AUTHOR("Ahmed Abdelsalam <amsalam20@gmail.com>");
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 2b1a9dcdbcb3..b0524b18c4fb 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -42,14 +42,6 @@ ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state)
u_int8_t hop_limit;
u_int32_t flowlabel, mark;
int err;
-#if 0
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr)) {
- net_warn_ratelimited("ip6t_hook: happy cracking\n");
- return NF_ACCEPT;
- }
-#endif
/* save source/dest address, mark, hoplimit, flowlabel, priority, */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 991512576c8c..47306e45a80a 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -74,6 +74,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
.hook = ip6table_nat_in,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP6_PRI_NAT_DST,
},
@@ -81,6 +82,7 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
.hook = ip6table_nat_out,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_NAT_SRC,
},
@@ -88,12 +90,14 @@ static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
{
.hook = ip6table_nat_local_fn,
.pf = NFPROTO_IPV6,
+ .nat_hook = true,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP6_PRI_NAT_DST,
},
/* After packet filtering, change source */
{
.hook = ip6table_nat_fn,
+ .nat_hook = true,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP6_PRI_NAT_SRC,
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index d4bc56443dc1..710fa0806c37 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <linux/slab.h>
@@ -11,6 +12,10 @@
static int __net_init ip6table_raw_table_init(struct net *net);
+static bool raw_before_defrag __read_mostly;
+MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
+module_param(raw_before_defrag, bool, 0000);
+
static const struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
@@ -20,6 +25,15 @@ static const struct xt_table packet_raw = {
.table_init = ip6table_raw_table_init,
};
+static const struct xt_table packet_raw_before_defrag = {
+ .name = "raw",
+ .valid_hooks = RAW_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV6,
+ .priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
+ .table_init = ip6table_raw_table_init,
+};
+
/* The work comes in here from netfilter.c. */
static unsigned int
ip6table_raw_hook(void *priv, struct sk_buff *skb,
@@ -33,15 +47,19 @@ static struct nf_hook_ops *rawtable_ops __read_mostly;
static int __net_init ip6table_raw_table_init(struct net *net)
{
struct ip6t_replace *repl;
+ const struct xt_table *table = &packet_raw;
int ret;
+ if (raw_before_defrag)
+ table = &packet_raw_before_defrag;
+
if (net->ipv6.ip6table_raw)
return 0;
- repl = ip6t_alloc_initial_table(&packet_raw);
+ repl = ip6t_alloc_initial_table(table);
if (repl == NULL)
return -ENOMEM;
- ret = ip6t_register_table(net, &packet_raw, repl, rawtable_ops,
+ ret = ip6t_register_table(net, table, repl, rawtable_ops,
&net->ipv6.ip6table_raw);
kfree(repl);
return ret;
@@ -62,9 +80,16 @@ static struct pernet_operations ip6table_raw_net_ops = {
static int __init ip6table_raw_init(void)
{
int ret;
+ const struct xt_table *table = &packet_raw;
+
+ if (raw_before_defrag) {
+ table = &packet_raw_before_defrag;
+
+ pr_info("Enabling raw table before defrag\n");
+ }
/* Register hooks */
- rawtable_ops = xt_hook_ops_alloc(&packet_raw, ip6table_raw_hook);
+ rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
if (IS_ERR(rawtable_ops))
return PTR_ERR(rawtable_ops);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 3b80a38f62b8..11a313fd9273 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -176,11 +176,6 @@ static unsigned int ipv6_conntrack_local(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr)) {
- net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
- return NF_ACCEPT;
- }
return nf_conntrack_in(state->net, PF_INET6, state->hook, skb);
}
@@ -368,7 +363,7 @@ static struct nf_sockopt_ops so_getorigdst6 = {
.owner = THIS_MODULE,
};
-static struct nf_conntrack_l4proto *builtin_l4proto6[] = {
+static const struct nf_conntrack_l4proto * const builtin_l4proto6[] = {
&nf_conntrack_l4proto_tcp6,
&nf_conntrack_l4proto_udp6,
&nf_conntrack_l4proto_icmpv6,
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 3ac0d826afc4..2548e2c8aedd 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -27,7 +27,7 @@
#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h>
#include <net/netfilter/nf_log.h>
-static unsigned int nf_ct_icmpv6_timeout __read_mostly = 30*HZ;
+static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
static inline struct nf_icmp_net *icmpv6_pernet(struct net *net)
{
@@ -352,7 +352,7 @@ static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
return &net->ct.nf_ct_proto.icmpv6.pn;
}
-struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly =
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
{
.l3proto = PF_INET6,
.l4proto = IPPROTO_ICMPV6,
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 977d8900cfd1..ce53dcfda88a 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -231,7 +231,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
if ((unsigned int)end > IPV6_MAXPLEN) {
pr_debug("offset is too large.\n");
- return -1;
+ return -EINVAL;
}
ecn = ip6_frag_ecn(ipv6_hdr(skb));
@@ -264,7 +264,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- return -1;
+ return -EPROTO;
}
if (end > fq->q.len) {
/* Some bits beyond end -> corruption. */
@@ -358,7 +358,7 @@ found:
discard_fq:
inet_frag_kill(&fq->q, &nf_frags);
err:
- return -1;
+ return -EINVAL;
}
/*
@@ -567,6 +567,7 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
+ u16 savethdr = skb->transport_header;
struct net_device *dev = skb->dev;
int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
@@ -600,8 +601,12 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
spin_lock_bh(&fq->q.lock);
- if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
- ret = -EINVAL;
+ ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
+ if (ret < 0) {
+ if (ret == -EPROTO) {
+ skb->transport_header = savethdr;
+ ret = 0;
+ }
goto out_unlock;
}
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index b326da59257f..c87b48359e8f 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -63,6 +63,9 @@ static unsigned int ipv6_defrag(void *priv,
/* Previously seen (loopback)? */
if (skb_nfct(skb) && !nf_ct_is_template((struct nf_conn *)skb_nfct(skb)))
return NF_ACCEPT;
+
+ if (skb->_nfct == IP_CT_UNTRACKED)
+ return NF_ACCEPT;
#endif
err = nf_ct_frag6_gather(state->net, skb,
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
new file mode 100644
index 000000000000..fff21602875a
--- /dev/null
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -0,0 +1,277 @@
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/rhashtable.h>
+#include <linux/ipv6.h>
+#include <linux/netdevice.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/neighbour.h>
+#include <net/netfilter/nf_flow_table.h>
+#include <net/netfilter/nf_tables.h>
+/* For layer 4 checksum field offset. */
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct tcphdr *tcph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*tcph)))
+ return -1;
+
+ tcph = (void *)(skb_network_header(skb) + thoff);
+ inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
+ struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ struct udphdr *udph;
+
+ if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
+ skb_try_make_writable(skb, thoff + sizeof(*udph)))
+ return -1;
+
+ udph = (void *)(skb_network_header(skb) + thoff);
+ if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
+ new_addr->s6_addr32, true);
+ if (!udph->check)
+ udph->check = CSUM_MANGLED_0;
+ }
+
+ return 0;
+}
+
+static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff, struct in6_addr *addr,
+ struct in6_addr *new_addr)
+{
+ switch (ip6h->nexthdr) {
+ case IPPROTO_TCP:
+ if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ case IPPROTO_UDP:
+ if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
+ return NF_DROP;
+ break;
+ }
+
+ return 0;
+}
+
+static int nf_flow_snat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb, struct ipv6hdr *ip6h,
+ unsigned int thoff,
+ enum flow_offload_tuple_dir dir)
+{
+ struct in6_addr addr, new_addr;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = ip6h->daddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6;
+ ip6h->daddr = new_addr;
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = ip6h->saddr;
+ new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
+ ip6h->saddr = new_addr;
+ break;
+ default:
+ return -1;
+ }
+
+ return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
+}
+
+static int nf_flow_nat_ipv6(const struct flow_offload *flow,
+ struct sk_buff *skb,
+ enum flow_offload_tuple_dir dir)
+{
+ struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ unsigned int thoff = sizeof(*ip6h);
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT &&
+ (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+ if (flow->flags & FLOW_OFFLOAD_DNAT &&
+ (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
+ return -1;
+
+ return 0;
+}
+
+static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
+ struct flow_offload_tuple *tuple)
+{
+ struct flow_ports *ports;
+ struct ipv6hdr *ip6h;
+ unsigned int thoff;
+
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ return -1;
+
+ ip6h = ipv6_hdr(skb);
+
+ if (ip6h->nexthdr != IPPROTO_TCP &&
+ ip6h->nexthdr != IPPROTO_UDP)
+ return -1;
+
+ thoff = sizeof(*ip6h);
+ if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
+ return -1;
+
+ ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
+
+ tuple->src_v6 = ip6h->saddr;
+ tuple->dst_v6 = ip6h->daddr;
+ tuple->src_port = ports->source;
+ tuple->dst_port = ports->dest;
+ tuple->l3proto = AF_INET6;
+ tuple->l4proto = ip6h->nexthdr;
+ tuple->iifidx = dev->ifindex;
+
+ return 0;
+}
+
+/* Based on ip_exceeds_mtu(). */
+static bool __nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_validate_mtu(skb, mtu))
+ return false;
+
+ return true;
+}
+
+static bool nf_flow_exceeds_mtu(struct sk_buff *skb, const struct rt6_info *rt)
+{
+ u32 mtu;
+
+ mtu = ip6_dst_mtu_forward(&rt->dst);
+ if (__nf_flow_exceeds_mtu(skb, mtu))
+ return true;
+
+ return false;
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ struct flow_offload_tuple tuple = {};
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct in6_addr *nexthop;
+ struct ipv6hdr *ip6h;
+ struct rt6_info *rt;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return NF_ACCEPT;
+
+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
+ return NF_ACCEPT;
+
+ tuplehash = flow_offload_lookup(flow_table, &tuple);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ outdev = dev_get_by_index_rcu(state->net, tuplehash->tuple.oifidx);
+ if (!outdev)
+ return NF_ACCEPT;
+
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
+ rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
+ if (unlikely(nf_flow_exceeds_mtu(skb, rt)))
+ return NF_ACCEPT;
+
+ if (skb_try_make_writable(skb, sizeof(*ip6h)))
+ return NF_DROP;
+
+ if (flow->flags & (FLOW_OFFLOAD_SNAT | FLOW_OFFLOAD_DNAT) &&
+ nf_flow_nat_ipv6(flow, skb, dir) < 0)
+ return NF_DROP;
+
+ flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
+ ip6h = ipv6_hdr(skb);
+ ip6h->hop_limit--;
+
+ skb->dev = outdev;
+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
+
+ return NF_STOLEN;
+}
+EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
+
+static struct nf_flowtable_type flowtable_ipv6 = {
+ .family = NFPROTO_IPV6,
+ .params = &nf_flow_offload_rhash_params,
+ .gc = nf_flow_offload_work_gc,
+ .hook = nf_flow_offload_ipv6_hook,
+ .owner = THIS_MODULE,
+};
+
+static int __init nf_flow_ipv6_module_init(void)
+{
+ nft_register_flowtable_type(&flowtable_ipv6);
+
+ return 0;
+}
+
+static void __exit nf_flow_ipv6_module_exit(void)
+{
+ nft_unregister_flowtable_type(&flowtable_ipv6);
+}
+
+module_init(nf_flow_ipv6_module_init);
+module_exit(nf_flow_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_ALIAS_NF_FLOWTABLE(AF_INET6);
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 1d2fb9267d6f..bed57ee65f7b 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -369,10 +369,6 @@ nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
#endif
unsigned int ret;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
@@ -408,10 +404,6 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
unsigned int ret;
int err;
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct ipv6hdr))
- return NF_ACCEPT;
-
ret = nf_nat_ipv6_fn(priv, skb, state, do_chain);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index d6e4ba5de916..17e03589331c 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -22,68 +22,12 @@ static unsigned int nft_do_chain_ipv6(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
-static unsigned int nft_ipv6_output(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- if (unlikely(skb->len < sizeof(struct ipv6hdr))) {
- if (net_ratelimit())
- pr_info("nf_tables_ipv6: ignoring short SOCK_RAW "
- "packet\n");
- return NF_ACCEPT;
- }
-
- return nft_do_chain_ipv6(priv, skb, state);
-}
-
-struct nft_af_info nft_af_ipv6 __read_mostly = {
- .family = NFPROTO_IPV6,
- .nhooks = NF_INET_NUMHOOKS,
- .owner = THIS_MODULE,
- .nops = 1,
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
- [NF_INET_LOCAL_OUT] = nft_ipv6_output,
- [NF_INET_FORWARD] = nft_do_chain_ipv6,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
- },
-};
-EXPORT_SYMBOL_GPL(nft_af_ipv6);
-
-static int nf_tables_ipv6_init_net(struct net *net)
-{
- net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
- if (net->nft.ipv6 == NULL)
- return -ENOMEM;
-
- memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6));
-
- if (nft_register_afinfo(net, net->nft.ipv6) < 0)
- goto err;
-
- return 0;
-err:
- kfree(net->nft.ipv6);
- return -ENOMEM;
-}
-
-static void nf_tables_ipv6_exit_net(struct net *net)
-{
- nft_unregister_afinfo(net, net->nft.ipv6);
- kfree(net->nft.ipv6);
-}
-
-static struct pernet_operations nf_tables_ipv6_net_ops = {
- .init = nf_tables_ipv6_init_net,
- .exit = nf_tables_ipv6_exit_net,
-};
-
static const struct nf_chain_type filter_ipv6 = {
.name = "filter",
.type = NFT_CHAIN_T_DEFAULT,
@@ -94,26 +38,22 @@ static const struct nf_chain_type filter_ipv6 = {
(1 << NF_INET_FORWARD) |
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_POST_ROUTING),
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
+ [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
+ [NF_INET_FORWARD] = nft_do_chain_ipv6,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
+ },
};
static int __init nf_tables_ipv6_init(void)
{
- int ret;
-
- ret = nft_register_chain_type(&filter_ipv6);
- if (ret < 0)
- return ret;
-
- ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
- if (ret < 0)
- nft_unregister_chain_type(&filter_ipv6);
-
- return ret;
+ return nft_register_chain_type(&filter_ipv6);
}
static void __exit nf_tables_ipv6_exit(void)
{
- unregister_pernet_subsys(&nf_tables_ipv6_net_ops);
nft_unregister_chain_type(&filter_ipv6);
}
@@ -122,4 +62,4 @@ module_exit(nf_tables_ipv6_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_FAMILY(AF_INET6);
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 443cd306c0b0..73fe2bd13fcf 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -31,7 +31,8 @@ static unsigned int nft_nat_do_chain(void *priv,
{
struct nft_pktinfo pkt;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
return nft_do_chain(&pkt, priv);
}
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index f2727475895e..11d3c3b9aa18 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -33,7 +33,8 @@ static unsigned int nf_route_table_hook(void *priv,
u32 mark, flowlabel;
int err;
- nft_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_ipv6(&pkt, skb);
/* save source/dest address, mark, hoplimit, flowlabel, priority */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 54b5899543ef..cc5174c7254c 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -60,7 +60,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
{
const struct net_device *dev = NULL;
const struct nf_ipv6_ops *v6ops;
- const struct nf_afinfo *afinfo;
int route_err, addrtype;
struct rt6_info *rt;
struct flowi6 fl6 = {
@@ -69,8 +68,8 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
};
u32 ret = 0;
- afinfo = nf_get_afinfo(NFPROTO_IPV6);
- if (!afinfo)
+ v6ops = nf_get_ipv6_ops();
+ if (!v6ops)
return RTN_UNREACHABLE;
if (priv->flags & NFTA_FIB_F_IIF)
@@ -80,12 +79,11 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
- v6ops = nf_get_ipv6_ops();
- if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+ if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
ret = RTN_LOCAL;
- route_err = afinfo->route(nft_net(pkt), (struct dst_entry **)&rt,
- flowi6_to_flowi(&fl6), false);
+ route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
+ flowi6_to_flowi(&fl6), false);
if (route_err)
goto err;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index e88bcb8ff0fd..b67814242f78 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -58,7 +58,6 @@ static int sockstat6_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations sockstat6_seq_fops = {
- .owner = THIS_MODULE,
.open = sockstat6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -248,7 +247,6 @@ static int snmp6_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations snmp6_seq_fops = {
- .owner = THIS_MODULE,
.open = snmp6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -274,7 +272,6 @@ static int snmp6_dev_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations snmp6_dev_seq_fops = {
- .owner = THIS_MODULE,
.open = snmp6_dev_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 761a473a07c5..ddda7eb3c623 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1308,7 +1308,6 @@ static int raw6_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations raw6_seq_fops = {
- .owner = THIS_MODULE,
.open = raw6_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0458b761f3c5..fb2d251c0500 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -186,7 +186,7 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
{
- return dst_metrics_write_ptr(rt->dst.from);
+ return dst_metrics_write_ptr(&rt->from->dst);
}
static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
@@ -391,7 +391,7 @@ static void ip6_dst_destroy(struct dst_entry *dst)
{
struct rt6_info *rt = (struct rt6_info *)dst;
struct rt6_exception_bucket *bucket;
- struct dst_entry *from = dst->from;
+ struct rt6_info *from = rt->from;
struct inet6_dev *idev;
dst_destroy_metrics_generic(dst);
@@ -409,8 +409,8 @@ static void ip6_dst_destroy(struct dst_entry *dst)
kfree(bucket);
}
- dst->from = NULL;
- dst_release(from);
+ rt->from = NULL;
+ dst_release(&from->dst);
}
static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -443,9 +443,9 @@ static bool rt6_check_expired(const struct rt6_info *rt)
if (rt->rt6i_flags & RTF_EXPIRES) {
if (time_after(jiffies, rt->dst.expires))
return true;
- } else if (rt->dst.from) {
+ } else if (rt->from) {
return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
- rt6_check_expired((struct rt6_info *)rt->dst.from);
+ rt6_check_expired(rt->from);
}
return false;
}
@@ -455,7 +455,6 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
int strict)
{
struct rt6_info *sibling, *next_sibling;
- int route_choosen;
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
@@ -463,26 +462,19 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
if (!fl6->mp_hash)
fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
- route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1);
- /* Don't change the route, if route_choosen == 0
- * (siblings does not include ourself)
- */
- if (route_choosen)
- list_for_each_entry_safe(sibling, next_sibling,
- &match->rt6i_siblings, rt6i_siblings) {
- route_choosen--;
- if (route_choosen == 0) {
- struct inet6_dev *idev = sibling->rt6i_idev;
-
- if (!netif_carrier_ok(sibling->dst.dev) &&
- idev->cnf.ignore_routes_with_linkdown)
- break;
- if (rt6_score_route(sibling, oif, strict) < 0)
- break;
- match = sibling;
- break;
- }
- }
+ if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
+ return match;
+
+ list_for_each_entry_safe(sibling, next_sibling, &match->rt6i_siblings,
+ rt6i_siblings) {
+ if (fl6->mp_hash > atomic_read(&sibling->rt6i_nh_upper_bound))
+ continue;
+ if (rt6_score_route(sibling, oif, strict) < 0)
+ break;
+ match = sibling;
+ break;
+ }
+
return match;
}
@@ -499,12 +491,15 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
struct rt6_info *local = NULL;
struct rt6_info *sprt;
- if (!oif && ipv6_addr_any(saddr))
- goto out;
+ if (!oif && ipv6_addr_any(saddr) && !(rt->rt6i_nh_flags & RTNH_F_DEAD))
+ return rt;
- for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) {
+ for (sprt = rt; sprt; sprt = rcu_dereference(sprt->rt6_next)) {
struct net_device *dev = sprt->dst.dev;
+ if (sprt->rt6i_nh_flags & RTNH_F_DEAD)
+ continue;
+
if (oif) {
if (dev->ifindex == oif)
return sprt;
@@ -533,8 +528,8 @@ static inline struct rt6_info *rt6_device_match(struct net *net,
if (flags & RT6_LOOKUP_F_IFACE)
return net->ipv6.ip6_null_entry;
}
-out:
- return rt;
+
+ return rt->rt6i_nh_flags & RTNH_F_DEAD ? net->ipv6.ip6_null_entry : rt;
}
#ifdef CONFIG_IPV6_ROUTER_PREF
@@ -679,10 +674,12 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
int m;
bool match_do_rr = false;
struct inet6_dev *idev = rt->rt6i_idev;
- struct net_device *dev = rt->dst.dev;
- if (dev && !netif_carrier_ok(dev) &&
- idev->cnf.ignore_routes_with_linkdown &&
+ if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+ goto out;
+
+ if (idev->cnf.ignore_routes_with_linkdown &&
+ rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
!(strict & RT6_LOOKUP_F_IGNORE_LINKSTATE))
goto out;
@@ -721,7 +718,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = NULL;
cont = NULL;
- for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) {
+ for (rt = rr_head; rt; rt = rcu_dereference(rt->rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -731,7 +728,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
}
for (rt = leaf; rt && rt != rr_head;
- rt = rcu_dereference(rt->dst.rt6_next)) {
+ rt = rcu_dereference(rt->rt6_next)) {
if (rt->rt6i_metric != metric) {
cont = rt;
break;
@@ -743,7 +740,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
if (match || !cont)
return match;
- for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next))
+ for (rt = cont; rt; rt = rcu_dereference(rt->rt6_next))
match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
@@ -781,7 +778,7 @@ static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn,
&do_rr);
if (do_rr) {
- struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next);
+ struct rt6_info *next = rcu_dereference(rt0->rt6_next);
/* no entries matched; do round-robin */
if (!next || next->rt6i_metric != rt0->rt6i_metric)
@@ -1054,7 +1051,7 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
*/
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
- ort = (struct rt6_info *)ort->dst.from;
+ ort = ort->from;
rcu_read_lock();
dev = ip6_rt_get_dev_rcu(ort);
@@ -1274,7 +1271,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
/* ort can't be a cache or pcpu route */
if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
- ort = (struct rt6_info *)ort->dst.from;
+ ort = ort->from;
WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU));
spin_lock_bh(&rt6_exception_lock);
@@ -1346,7 +1343,9 @@ out:
/* Update fn->fn_sernum to invalidate all cached dst */
if (!err) {
+ spin_lock_bh(&ort->rt6i_table->tb6_lock);
fib6_update_sernum(ort);
+ spin_unlock_bh(&ort->rt6i_table->tb6_lock);
fib6_force_start_gc(net);
}
@@ -1415,8 +1414,8 @@ static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt,
/* Remove the passed in cached rt from the hash table that contains it */
int rt6_remove_exception_rt(struct rt6_info *rt)
{
- struct rt6_info *from = (struct rt6_info *)rt->dst.from;
struct rt6_exception_bucket *bucket;
+ struct rt6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
int err;
@@ -1460,8 +1459,8 @@ int rt6_remove_exception_rt(struct rt6_info *rt)
*/
static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
- struct rt6_info *from = (struct rt6_info *)rt->dst.from;
struct rt6_exception_bucket *bucket;
+ struct rt6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
@@ -1586,12 +1585,19 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
* EXPIRES exceptions - e.g. pmtu-generated ones are pruned when
* expired, independently from their aging, as per RFC 8201 section 4
*/
- if (!(rt->rt6i_flags & RTF_EXPIRES) &&
- time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
- RT6_TRACE("aging clone %p\n", rt);
+ if (!(rt->rt6i_flags & RTF_EXPIRES)) {
+ if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+ RT6_TRACE("aging clone %p\n", rt);
+ rt6_remove_exception(bucket, rt6_ex);
+ return;
+ }
+ } else if (time_after(jiffies, rt->dst.expires)) {
+ RT6_TRACE("purging expired route %p\n", rt);
rt6_remove_exception(bucket, rt6_ex);
return;
- } else if (rt->rt6i_flags & RTF_GATEWAY) {
+ }
+
+ if (rt->rt6i_flags & RTF_GATEWAY) {
struct neighbour *neigh;
__u8 neigh_flags = 0;
@@ -1606,11 +1612,8 @@ static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket,
rt6_remove_exception(bucket, rt6_ex);
return;
}
- } else if (__rt6_check_expired(rt)) {
- RT6_TRACE("purging expired route %p\n", rt);
- rt6_remove_exception(bucket, rt6_ex);
- return;
}
+
gc_args->more++;
}
@@ -1824,10 +1827,10 @@ u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
if (skb) {
ip6_multipath_l3_keys(skb, &hash_keys);
- return flow_hash_from_keys(&hash_keys);
+ return flow_hash_from_keys(&hash_keys) >> 1;
}
- return get_hash_from_flowi6(fl6);
+ return get_hash_from_flowi6(fl6) >> 1;
}
void ip6_route_input(struct sk_buff *skb)
@@ -1929,9 +1932,9 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
static void rt6_dst_from_metrics_check(struct rt6_info *rt)
{
- if (rt->dst.from &&
- dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from))
- dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true);
+ if (rt->from &&
+ dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(&rt->from->dst))
+ dst_init_metrics(&rt->dst, dst_metrics_ptr(&rt->from->dst), true);
}
static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
@@ -1951,7 +1954,7 @@ static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie)
{
if (!__rt6_check_expired(rt) &&
rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
- rt6_check((struct rt6_info *)(rt->dst.from), cookie))
+ rt6_check(rt->from, cookie))
return &rt->dst;
else
return NULL;
@@ -1971,7 +1974,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
rt6_dst_from_metrics_check(rt);
if (rt->rt6i_flags & RTF_PCPU ||
- (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->dst.from))
+ (unlikely(!list_empty(&rt->rt6i_uncached)) && rt->from))
return rt6_dst_from_check(rt, cookie);
else
return rt6_check(rt, cookie);
@@ -2154,6 +2157,8 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
restart:
for_each_fib6_node_rt_rcu(fn) {
+ if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+ continue;
if (rt6_check_expired(rt))
continue;
if (rt->dst.error)
@@ -2344,7 +2349,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->rt6i_idev = idev;
dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 0);
- /* Add this dst into uncached_list so that rt6_ifdown() can
+ /* Add this dst into uncached_list so that rt6_disable_ip() can
* do proper release of the net_device
*/
rt6_uncached_list_add(rt);
@@ -2439,7 +2444,8 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
static struct rt6_info *ip6_nh_lookup_table(struct net *net,
struct fib6_config *cfg,
- const struct in6_addr *gw_addr)
+ const struct in6_addr *gw_addr,
+ u32 tbid, int flags)
{
struct flowi6 fl6 = {
.flowi6_oif = cfg->fc_ifindex,
@@ -2448,15 +2454,15 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
};
struct fib6_table *table;
struct rt6_info *rt;
- int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_IGNORE_LINKSTATE;
- table = fib6_get_table(net, cfg->fc_table);
+ table = fib6_get_table(net, tbid);
if (!table)
return NULL;
if (!ipv6_addr_any(&cfg->fc_prefsrc))
flags |= RT6_LOOKUP_F_HAS_SADDR;
+ flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
/* if table lookup failed, fall back to full lookup */
@@ -2468,6 +2474,82 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
return rt;
}
+static int ip6_route_check_nh_onlink(struct net *net,
+ struct fib6_config *cfg,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_LOCAL;
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
+ struct rt6_info *grt;
+ int err;
+
+ err = 0;
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
+ if (grt) {
+ if (grt->rt6i_flags & flags || dev != grt->dst.dev) {
+ NL_SET_ERR_MSG(extack, "Nexthop has invalid gateway");
+ err = -EINVAL;
+ }
+
+ ip6_rt_put(grt);
+ }
+
+ return err;
+}
+
+static int ip6_route_check_nh(struct net *net,
+ struct fib6_config *cfg,
+ struct net_device **_dev,
+ struct inet6_dev **idev)
+{
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ struct net_device *dev = _dev ? *_dev : NULL;
+ struct rt6_info *grt = NULL;
+ int err = -EHOSTUNREACH;
+
+ if (cfg->fc_table) {
+ int flags = RT6_LOOKUP_F_IFACE;
+
+ grt = ip6_nh_lookup_table(net, cfg, gw_addr,
+ cfg->fc_table, flags);
+ if (grt) {
+ if (grt->rt6i_flags & RTF_GATEWAY ||
+ (dev && dev != grt->dst.dev)) {
+ ip6_rt_put(grt);
+ grt = NULL;
+ }
+ }
+ }
+
+ if (!grt)
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+
+ if (!grt)
+ goto out;
+
+ if (dev) {
+ if (dev != grt->dst.dev) {
+ ip6_rt_put(grt);
+ goto out;
+ }
+ } else {
+ *_dev = dev = grt->dst.dev;
+ *idev = grt->rt6i_idev;
+ dev_hold(dev);
+ in6_dev_hold(grt->rt6i_idev);
+ }
+
+ if (!(grt->rt6i_flags & RTF_GATEWAY))
+ err = 0;
+
+ ip6_rt_put(grt);
+
+out:
+ return err;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -2519,6 +2601,21 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (cfg->fc_metric == 0)
cfg->fc_metric = IP6_RT_PRIO_USER;
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ if (!dev) {
+ NL_SET_ERR_MSG(extack,
+ "Nexthop device required for onlink");
+ err = -ENODEV;
+ goto out;
+ }
+
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+ }
+
err = -ENOBUFS;
if (cfg->fc_nlinfo.nlh &&
!(cfg->fc_nlinfo.nlh->nlmsg_flags & NLM_F_CREATE)) {
@@ -2593,6 +2690,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
#endif
rt->rt6i_metric = cfg->fc_metric;
+ rt->rt6i_nh_weight = 1;
/* We cannot add true routes via loopback here,
they would result in kernel looping; promote them to reject routes
@@ -2662,8 +2760,6 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->rt6i_gateway = *gw_addr;
if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- struct rt6_info *grt = NULL;
-
/* IPv6 strictly inhibits using not link-local
addresses as nexthop address.
Otherwise, router will not able to send redirects.
@@ -2680,40 +2776,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
goto out;
}
- if (cfg->fc_table) {
- grt = ip6_nh_lookup_table(net, cfg, gw_addr);
-
- if (grt) {
- if (grt->rt6i_flags & RTF_GATEWAY ||
- (dev && dev != grt->dst.dev)) {
- ip6_rt_put(grt);
- grt = NULL;
- }
- }
- }
-
- if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL,
- cfg->fc_ifindex, 1);
-
- err = -EHOSTUNREACH;
- if (!grt)
- goto out;
- if (dev) {
- if (dev != grt->dst.dev) {
- ip6_rt_put(grt);
- goto out;
- }
+ if (cfg->fc_flags & RTNH_F_ONLINK) {
+ err = ip6_route_check_nh_onlink(net, cfg, dev,
+ extack);
} else {
- dev = grt->dst.dev;
- idev = grt->rt6i_idev;
- dev_hold(dev);
- in6_dev_hold(grt->rt6i_idev);
+ err = ip6_route_check_nh(net, cfg, &dev, &idev);
}
- if (!(grt->rt6i_flags & RTF_GATEWAY))
- err = 0;
- ip6_rt_put(grt);
-
if (err)
goto out;
}
@@ -2732,6 +2800,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (!dev)
goto out;
+ if (!(dev->flags & IFF_UP)) {
+ NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+ err = -ENETDOWN;
+ goto out;
+ }
+
if (!ipv6_addr_any(&cfg->fc_prefsrc)) {
if (!ipv6_chk_addr(net, &cfg->fc_prefsrc, dev, 0)) {
NL_SET_ERR_MSG(extack, "Invalid source address");
@@ -2746,6 +2820,10 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
rt->rt6i_flags = cfg->fc_flags;
install_route:
+ if (!(rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST)) &&
+ !netif_carrier_ok(dev))
+ rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+ rt->rt6i_nh_flags |= (cfg->fc_flags & RTNH_F_ONLINK);
rt->dst.dev = dev;
rt->rt6i_idev = idev;
rt->rt6i_table = table;
@@ -3056,11 +3134,11 @@ out:
static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from)
{
- BUG_ON(from->dst.from);
+ BUG_ON(from->from);
rt->rt6i_flags &= ~RTF_EXPIRES;
dst_hold(&from->dst);
- rt->dst.from = &from->dst;
+ rt->from = from;
dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true);
}
@@ -3459,37 +3537,249 @@ void rt6_clean_tohost(struct net *net, struct in6_addr *gateway)
fib6_clean_all(net, fib6_clean_tohost, gateway);
}
-struct arg_dev_net {
- struct net_device *dev;
- struct net *net;
+struct arg_netdev_event {
+ const struct net_device *dev;
+ union {
+ unsigned int nh_flags;
+ unsigned long event;
+ };
};
+static struct rt6_info *rt6_multipath_first_sibling(const struct rt6_info *rt)
+{
+ struct rt6_info *iter;
+ struct fib6_node *fn;
+
+ fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ iter = rcu_dereference_protected(fn->leaf,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ while (iter) {
+ if (iter->rt6i_metric == rt->rt6i_metric &&
+ rt6_qualify_for_ecmp(iter))
+ return iter;
+ iter = rcu_dereference_protected(iter->rt6_next,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
+ }
+
+ return NULL;
+}
+
+static bool rt6_is_dead(const struct rt6_info *rt)
+{
+ if (rt->rt6i_nh_flags & RTNH_F_DEAD ||
+ (rt->rt6i_nh_flags & RTNH_F_LINKDOWN &&
+ rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
+ return true;
+
+ return false;
+}
+
+static int rt6_multipath_total_weight(const struct rt6_info *rt)
+{
+ struct rt6_info *iter;
+ int total = 0;
+
+ if (!rt6_is_dead(rt))
+ total += rt->rt6i_nh_weight;
+
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings) {
+ if (!rt6_is_dead(iter))
+ total += iter->rt6i_nh_weight;
+ }
+
+ return total;
+}
+
+static void rt6_upper_bound_set(struct rt6_info *rt, int *weight, int total)
+{
+ int upper_bound = -1;
+
+ if (!rt6_is_dead(rt)) {
+ *weight += rt->rt6i_nh_weight;
+ upper_bound = DIV_ROUND_CLOSEST_ULL((u64) (*weight) << 31,
+ total) - 1;
+ }
+ atomic_set(&rt->rt6i_nh_upper_bound, upper_bound);
+}
+
+static void rt6_multipath_upper_bound_set(struct rt6_info *rt, int total)
+{
+ struct rt6_info *iter;
+ int weight = 0;
+
+ rt6_upper_bound_set(rt, &weight, total);
+
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ rt6_upper_bound_set(iter, &weight, total);
+}
+
+void rt6_multipath_rebalance(struct rt6_info *rt)
+{
+ struct rt6_info *first;
+ int total;
+
+ /* In case the entire multipath route was marked for flushing,
+ * then there is no need to rebalance upon the removal of every
+ * sibling route.
+ */
+ if (!rt->rt6i_nsiblings || rt->should_flush)
+ return;
+
+ /* During lookup routes are evaluated in order, so we need to
+ * make sure upper bounds are assigned from the first sibling
+ * onwards.
+ */
+ first = rt6_multipath_first_sibling(rt);
+ if (WARN_ON_ONCE(!first))
+ return;
+
+ total = rt6_multipath_total_weight(first);
+ rt6_multipath_upper_bound_set(first, total);
+}
+
+static int fib6_ifup(struct rt6_info *rt, void *p_arg)
+{
+ const struct arg_netdev_event *arg = p_arg;
+ const struct net *net = dev_net(arg->dev);
+
+ if (rt != net->ipv6.ip6_null_entry && rt->dst.dev == arg->dev) {
+ rt->rt6i_nh_flags &= ~arg->nh_flags;
+ fib6_update_sernum_upto_root(dev_net(rt->dst.dev), rt);
+ rt6_multipath_rebalance(rt);
+ }
+
+ return 0;
+}
+
+void rt6_sync_up(struct net_device *dev, unsigned int nh_flags)
+{
+ struct arg_netdev_event arg = {
+ .dev = dev,
+ {
+ .nh_flags = nh_flags,
+ },
+ };
+
+ if (nh_flags & RTNH_F_DEAD && netif_carrier_ok(dev))
+ arg.nh_flags |= RTNH_F_LINKDOWN;
+
+ fib6_clean_all(dev_net(dev), fib6_ifup, &arg);
+}
+
+static bool rt6_multipath_uses_dev(const struct rt6_info *rt,
+ const struct net_device *dev)
+{
+ struct rt6_info *iter;
+
+ if (rt->dst.dev == dev)
+ return true;
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ if (iter->dst.dev == dev)
+ return true;
+
+ return false;
+}
+
+static void rt6_multipath_flush(struct rt6_info *rt)
+{
+ struct rt6_info *iter;
+
+ rt->should_flush = 1;
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ iter->should_flush = 1;
+}
+
+static unsigned int rt6_multipath_dead_count(const struct rt6_info *rt,
+ const struct net_device *down_dev)
+{
+ struct rt6_info *iter;
+ unsigned int dead = 0;
+
+ if (rt->dst.dev == down_dev || rt->rt6i_nh_flags & RTNH_F_DEAD)
+ dead++;
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ if (iter->dst.dev == down_dev ||
+ iter->rt6i_nh_flags & RTNH_F_DEAD)
+ dead++;
+
+ return dead;
+}
+
+static void rt6_multipath_nh_flags_set(struct rt6_info *rt,
+ const struct net_device *dev,
+ unsigned int nh_flags)
+{
+ struct rt6_info *iter;
+
+ if (rt->dst.dev == dev)
+ rt->rt6i_nh_flags |= nh_flags;
+ list_for_each_entry(iter, &rt->rt6i_siblings, rt6i_siblings)
+ if (iter->dst.dev == dev)
+ iter->rt6i_nh_flags |= nh_flags;
+}
+
/* called with write lock held for table with rt */
-static int fib6_ifdown(struct rt6_info *rt, void *arg)
+static int fib6_ifdown(struct rt6_info *rt, void *p_arg)
{
- const struct arg_dev_net *adn = arg;
- const struct net_device *dev = adn->dev;
+ const struct arg_netdev_event *arg = p_arg;
+ const struct net_device *dev = arg->dev;
+ const struct net *net = dev_net(dev);
- if ((rt->dst.dev == dev || !dev) &&
- rt != adn->net->ipv6.ip6_null_entry &&
- (rt->rt6i_nsiblings == 0 ||
- (dev && netdev_unregistering(dev)) ||
- !rt->rt6i_idev->cnf.ignore_routes_with_linkdown))
- return -1;
+ if (rt == net->ipv6.ip6_null_entry)
+ return 0;
+
+ switch (arg->event) {
+ case NETDEV_UNREGISTER:
+ return rt->dst.dev == dev ? -1 : 0;
+ case NETDEV_DOWN:
+ if (rt->should_flush)
+ return -1;
+ if (!rt->rt6i_nsiblings)
+ return rt->dst.dev == dev ? -1 : 0;
+ if (rt6_multipath_uses_dev(rt, dev)) {
+ unsigned int count;
+
+ count = rt6_multipath_dead_count(rt, dev);
+ if (rt->rt6i_nsiblings + 1 == count) {
+ rt6_multipath_flush(rt);
+ return -1;
+ }
+ rt6_multipath_nh_flags_set(rt, dev, RTNH_F_DEAD |
+ RTNH_F_LINKDOWN);
+ fib6_update_sernum(rt);
+ rt6_multipath_rebalance(rt);
+ }
+ return -2;
+ case NETDEV_CHANGE:
+ if (rt->dst.dev != dev ||
+ rt->rt6i_flags & (RTF_LOCAL | RTF_ANYCAST))
+ break;
+ rt->rt6i_nh_flags |= RTNH_F_LINKDOWN;
+ rt6_multipath_rebalance(rt);
+ break;
+ }
return 0;
}
-void rt6_ifdown(struct net *net, struct net_device *dev)
+void rt6_sync_down_dev(struct net_device *dev, unsigned long event)
{
- struct arg_dev_net adn = {
+ struct arg_netdev_event arg = {
.dev = dev,
- .net = net,
+ {
+ .event = event,
+ },
};
- fib6_clean_all(net, fib6_ifdown, &adn);
- if (dev)
- rt6_uncached_list_flush_dev(net, dev);
+ fib6_clean_all(dev_net(dev), fib6_ifdown, &arg);
+}
+
+void rt6_disable_ip(struct net_device *dev, unsigned long event)
+{
+ rt6_sync_down_dev(dev, event);
+ rt6_uncached_list_flush_dev(dev_net(dev), dev);
+ neigh_ifdown(&nd_tbl, dev);
}
struct rt6_mtu_change_arg {
@@ -3603,6 +3893,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rtm->rtm_flags & RTM_F_CLONED)
cfg->fc_flags |= RTF_CACHE;
+ cfg->fc_flags |= (rtm->rtm_flags & RTNH_F_ONLINK);
+
cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid;
cfg->fc_nlinfo.nlh = nlh;
cfg->fc_nlinfo.nl_net = sock_net(skb->sk);
@@ -3812,6 +4104,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
goto cleanup;
}
+ rt->rt6i_nh_weight = rtnh->rtnh_hops + 1;
+
err = ip6_route_info_append(&rt6_nh_list, rt, &r_cfg);
if (err) {
dst_release_immediate(&rt->dst);
@@ -3992,7 +4286,10 @@ static size_t rt6_nlmsg_size(struct rt6_info *rt)
static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
unsigned int *flags, bool skip_oif)
{
- if (!netif_running(rt->dst.dev) || !netif_carrier_ok(rt->dst.dev)) {
+ if (rt->rt6i_nh_flags & RTNH_F_DEAD)
+ *flags |= RTNH_F_DEAD;
+
+ if (rt->rt6i_nh_flags & RTNH_F_LINKDOWN) {
*flags |= RTNH_F_LINKDOWN;
if (rt->rt6i_idev->cnf.ignore_routes_with_linkdown)
*flags |= RTNH_F_DEAD;
@@ -4003,6 +4300,7 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt,
goto nla_put_failure;
}
+ *flags |= (rt->rt6i_nh_flags & RTNH_F_ONLINK);
if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD)
*flags |= RTNH_F_OFFLOAD;
@@ -4031,7 +4329,7 @@ static int rt6_add_nexthop(struct sk_buff *skb, struct rt6_info *rt)
if (!rtnh)
goto nla_put_failure;
- rtnh->rtnh_hops = 0;
+ rtnh->rtnh_hops = rt->rt6i_nh_weight - 1;
rtnh->rtnh_ifindex = rt->dst.dev ? rt->dst.dev->ifindex : 0;
if (rt6_nexthop_info(skb, rt, &flags, true) < 0)
@@ -4321,9 +4619,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
goto errout;
}
- if (fibmatch && rt->dst.from) {
- struct rt6_info *ort = container_of(rt->dst.from,
- struct rt6_info, dst);
+ if (fibmatch && rt->from) {
+ struct rt6_info *ort = rt->from;
dst_hold(&ort->dst);
ip6_rt_put(rt);
@@ -4427,7 +4724,6 @@ static int ip6_route_dev_notify(struct notifier_block *this,
#ifdef CONFIG_PROC_FS
static const struct file_operations ipv6_route_proc_fops = {
- .owner = THIS_MODULE,
.open = ipv6_route_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -4455,7 +4751,6 @@ static int rt6_stats_seq_open(struct inode *inode, struct file *file)
}
static const struct file_operations rt6_stats_seq_fops = {
- .owner = THIS_MODULE,
.open = rt6_stats_seq_open,
.read = seq_read,
.llseek = seq_lseek,
@@ -4600,8 +4895,6 @@ static int __net_init ip6_route_net_init(struct net *net)
GFP_KERNEL);
if (!net->ipv6.ip6_null_entry)
goto out_ip6_dst_entries;
- net->ipv6.ip6_null_entry->dst.path =
- (struct dst_entry *)net->ipv6.ip6_null_entry;
net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
ip6_template_metrics, true);
@@ -4613,8 +4906,6 @@ static int __net_init ip6_route_net_init(struct net *net)
GFP_KERNEL);
if (!net->ipv6.ip6_prohibit_entry)
goto out_ip6_null_entry;
- net->ipv6.ip6_prohibit_entry->dst.path =
- (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
ip6_template_metrics, true);
@@ -4624,8 +4915,6 @@ static int __net_init ip6_route_net_init(struct net *net)
GFP_KERNEL);
if (!net->ipv6.ip6_blk_hole_entry)
goto out_ip6_prohibit_entry;
- net->ipv6.ip6_blk_hole_entry->dst.path =
- (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
ip6_template_metrics, true);
@@ -4782,11 +5071,20 @@ int __init ip6_route_init(void)
if (ret)
goto fib6_rules_init;
- ret = -ENOBUFS;
- if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) ||
- __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) ||
- __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL,
- RTNL_FLAG_DOIT_UNLOCKED))
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_NEWROUTE,
+ inet6_rtm_newroute, NULL, 0);
+ if (ret < 0)
+ goto out_register_late_subsys;
+
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_DELROUTE,
+ inet6_rtm_delroute, NULL, 0);
+ if (ret < 0)
+ goto out_register_late_subsys;
+
+ ret = rtnl_register_module(THIS_MODULE, PF_INET6, RTM_GETROUTE,
+ inet6_rtm_getroute, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ if (ret < 0)
goto out_register_late_subsys;
ret = register_netdevice_notifier(&ip6_route_dev_notifier);
@@ -4804,6 +5102,7 @@ out:
return ret;
out_register_late_subsys:
+ rtnl_unregister_all(PF_INET6);
unregister_pernet_subsys(&ip6_route_net_late_ops);
fib6_rules_init:
fib6_rules_cleanup();
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index c81407770956..7f5621d09571 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -306,9 +306,7 @@ static int seg6_genl_dumphmac(struct sk_buff *skb, struct netlink_callback *cb)
struct seg6_hmac_info *hinfo;
int ret;
- ret = rhashtable_walk_start(iter);
- if (ret && ret != -EAGAIN)
- goto done;
+ rhashtable_walk_start(iter);
for (;;) {
hinfo = rhashtable_walk_next(iter);
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 825b8e01f947..ba3767ef5e93 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -501,7 +501,7 @@ static struct seg6_action_desc *__get_action_desc(int action)
struct seg6_action_desc *desc;
int i, count;
- count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc);
+ count = ARRAY_SIZE(seg6_action_table);
for (i = 0; i < count; i++) {
desc = &seg6_action_table[i];
if (desc->action == action)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7178476b3d2f..a1ab29e2ab3b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -176,8 +176,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
/* If interface is set while binding, indices
* must coincide.
*/
- if (sk->sk_bound_dev_if &&
- sk->sk_bound_dev_if != usin->sin6_scope_id)
+ if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
return -EINVAL;
sk->sk_bound_dev_if = usin->sin6_scope_id;
@@ -1795,7 +1794,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
timer_expires = jiffies;
}
- state = sk_state_load(sp);
+ state = inet_sk_state_load(sp);
if (state == TCP_LISTEN)
rx_queue = sp->sk_ack_backlog;
else
@@ -1884,7 +1883,6 @@ out:
}
static const struct file_operations tcp6_afinfo_seq_fops = {
- .owner = THIS_MODULE,
.open = tcp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3f30fa313bf2..52e3ea0e6f50 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -89,28 +89,12 @@ static u32 udp6_ehashfn(const struct net *net,
udp_ipv6_hash_secret + net_hash_mix(net));
}
-static u32 udp6_portaddr_hash(const struct net *net,
- const struct in6_addr *addr6,
- unsigned int port)
-{
- unsigned int hash, mix = net_hash_mix(net);
-
- if (ipv6_addr_any(addr6))
- hash = jhash_1word(0, mix);
- else if (ipv6_addr_v4mapped(addr6))
- hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
- else
- hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
-
- return hash ^ port;
-}
-
int udp_v6_get_port(struct sock *sk, unsigned short snum)
{
unsigned int hash2_nulladdr =
- udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
+ ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum);
unsigned int hash2_partial =
- udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
+ ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);
/* precompute partial secondary hash */
udp_sk(sk)->udp_portaddr_hash = hash2_partial;
@@ -119,7 +103,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
static void udp_v6_rehash(struct sock *sk)
{
- u16 new_hash = udp6_portaddr_hash(sock_net(sk),
+ u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr,
inet_sk(sk)->inet_num);
@@ -184,7 +168,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
struct udp_hslot *hslot2, struct sk_buff *skb)
{
struct sock *sk, *result;
- int score, badness, matches = 0, reuseport = 0;
+ int score, badness;
u32 hash = 0;
result = NULL;
@@ -193,8 +177,7 @@ static struct sock *udp6_lib_lookup2(struct net *net,
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
@@ -202,15 +185,9 @@ static struct sock *udp6_lib_lookup2(struct net *net,
sizeof(struct udphdr));
if (result)
return result;
- matches = 1;
}
result = sk;
badness = score;
- } else if (score == badness && reuseport) {
- matches++;
- if (reciprocal_scale(hash, matches) == 0)
- result = sk;
- hash = next_pseudo_random32(hash);
}
}
return result;
@@ -228,11 +205,11 @@ struct sock *__udp6_lib_lookup(struct net *net,
unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
bool exact_dif = udp6_lib_exact_dif_match(net, skb);
- int score, badness, matches = 0, reuseport = 0;
+ int score, badness;
u32 hash = 0;
if (hslot->count > 10) {
- hash2 = udp6_portaddr_hash(net, daddr, hnum);
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum);
slot2 = hash2 & udptable->mask;
hslot2 = &udptable->hash2[slot2];
if (hslot->count < hslot2->count)
@@ -243,7 +220,7 @@ struct sock *__udp6_lib_lookup(struct net *net,
hslot2, skb);
if (!result) {
unsigned int old_slot2 = slot2;
- hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum);
+ hash2 = ipv6_portaddr_hash(net, &in6addr_any, hnum);
slot2 = hash2 & udptable->mask;
/* avoid searching the same slot again. */
if (unlikely(slot2 == old_slot2))
@@ -267,23 +244,16 @@ begin:
score = compute_score(sk, net, saddr, sport, daddr, hnum, dif,
sdif, exact_dif);
if (score > badness) {
- reuseport = sk->sk_reuseport;
- if (reuseport) {
+ if (sk->sk_reuseport) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);
result = reuseport_select_sock(sk, hash, skb,
sizeof(struct udphdr));
if (result)
return result;
- matches = 1;
}
result = sk;
badness = score;
- } else if (score == badness && reuseport) {
- matches++;
- if (reciprocal_scale(hash, matches) == 0)
- result = sk;
- hash = next_pseudo_random32(hash);
}
}
return result;
@@ -719,9 +689,9 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
struct sk_buff *nskb;
if (use_hash2) {
- hash2_any = udp6_portaddr_hash(net, &in6addr_any, hnum) &
+ hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) &
udptable->mask;
- hash2 = udp6_portaddr_hash(net, daddr, hnum) & udptable->mask;
+ hash2 = ipv6_portaddr_hash(net, daddr, hnum) & udptable->mask;
start_lookup:
hslot = &udptable->hash2[hash2];
offset = offsetof(typeof(*sk), __sk_common.skc_portaddr_node);
@@ -909,7 +879,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net,
int dif, int sdif)
{
unsigned short hnum = ntohs(loc_port);
- unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum);
+ unsigned int hash2 = ipv6_portaddr_hash(net, loc_addr, hnum);
unsigned int slot2 = hash2 & udp_table.mask;
struct udp_hslot *hslot2 = &udp_table.hash2[slot2];
const __portpair ports = INET_COMBINED_PORTS(rmt_port, hnum);
@@ -1509,7 +1479,6 @@ int udp6_seq_show(struct seq_file *seq, void *v)
}
static const struct file_operations udp6_afinfo_seq_fops = {
- .owner = THIS_MODULE,
.open = udp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index 2784cc363f2b..14ae32bb1f3d 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -94,7 +94,6 @@ void udplitev6_exit(void)
#ifdef CONFIG_PROC_FS
static const struct file_operations udplite6_afinfo_seq_fops = {
- .owner = THIS_MODULE,
.open = udp_seq_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index dc93002ff9d1..bb935a3b7fea 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -59,7 +59,7 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
if (x->props.flags & XFRM_STATE_NOECN)
dsfield &= ~INET_ECN_MASK;
ipv6_change_dsfield(top_iph, 0, dsfield);
- top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
+ top_iph->hop_limit = ip6_dst_hoplimit(xfrm_dst_child(dst));
top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
return 0;
@@ -106,17 +106,14 @@ static struct sk_buff *xfrm6_mode_tunnel_gso_segment(struct xfrm_state *x,
{
__skb_push(skb, skb->mac_len);
return skb_mac_gso_segment(skb, features);
-
}
static void xfrm6_mode_tunnel_xmit(struct xfrm_state *x, struct sk_buff *skb)
{
struct xfrm_offload *xo = xfrm_offload(skb);
- if (xo->flags & XFRM_GSO_SEGMENT) {
- skb->network_header = skb->network_header - x->props.header_len;
+ if (xo->flags & XFRM_GSO_SEGMENT)
skb->transport_header = skb->network_header + sizeof(struct ipv6hdr);
- }
skb_reset_mac_len(skb);
pskb_pull(skb, skb->mac_len + x->props.header_len);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 885ade234a49..09fb44ee3b45 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -265,7 +265,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
in6_dev_put(xdst->u.rt6.rt6i_idev);
xdst->u.rt6.rt6i_idev = loopback_idev;
in6_dev_hold(loopback_idev);
- xdst = (struct xfrm_dst *)xdst->u.dst.child;
+ xdst = (struct xfrm_dst *)xfrm_dst_child(&xdst->u.dst);
} while (xdst->u.dst.xfrm);
__in6_dev_put(loopback_idev);