aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c107
-rw-r--r--net/ipv6/addrconf_core.c6
-rw-r--r--net/ipv6/addrlabel.c47
-rw-r--r--net/ipv6/af_inet6.c21
-rw-r--r--net/ipv6/datagram.c11
-rw-r--r--net/ipv6/esp6.c2
-rw-r--r--net/ipv6/fou6.c19
-rw-r--r--net/ipv6/icmp.c40
-rw-r--r--net/ipv6/ila/ila_xlat.c17
-rw-r--r--net/ipv6/ip6_gre.c96
-rw-r--r--net/ipv6/ip6_offload.c33
-rw-r--r--net/ipv6/ip6_output.c6
-rw-r--r--net/ipv6/ip6_udp_tunnel.c15
-rw-r--r--net/ipv6/ip6mr.c87
-rw-r--r--net/ipv6/ipv6_sockglue.c10
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/mcast_snoop.c84
-rw-r--r--net/ipv6/netfilter.c17
-rw-r--r--net/ipv6/netfilter/Kconfig48
-rw-r--r--net/ipv6/netfilter/Makefile7
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c8
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c261
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c411
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c223
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c3
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c85
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c9
-rw-r--r--net/ipv6/netfilter/nft_masq_ipv6.c91
-rw-r--r--net/ipv6/netfilter/nft_redir_ipv6.c83
-rw-r--r--net/ipv6/reassembly.c234
-rw-r--r--net/ipv6/route.c131
-rw-r--r--net/ipv6/seg6.c4
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/ipv6/sit.c4
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/ipv6/udp.c38
-rw-r--r--net/ipv6/udp_impl.h1
-rw-r--r--net/ipv6/udplite.c1
-rw-r--r--net/ipv6/xfrm6_tunnel.c2
39 files changed, 700 insertions, 1570 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8eeec6eb2bd3..4ae17a966ae3 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -597,6 +597,43 @@ static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {
[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN] = { .len = sizeof(int) },
};
+static int inet6_netconf_valid_get_req(struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ int i, err;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(struct netconfmsg))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for netconf get request");
+ return -EINVAL;
+ }
+
+ if (!netlink_strict_get_check(skb))
+ return nlmsg_parse(nlh, sizeof(struct netconfmsg), tb,
+ NETCONFA_MAX, devconf_ipv6_policy, extack);
+
+ err = nlmsg_parse_strict(nlh, sizeof(struct netconfmsg), tb,
+ NETCONFA_MAX, devconf_ipv6_policy, extack);
+ if (err)
+ return err;
+
+ for (i = 0; i <= NETCONFA_MAX; i++) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case NETCONFA_IFINDEX:
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in netconf get request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
@@ -605,14 +642,12 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
struct nlattr *tb[NETCONFA_MAX+1];
struct inet6_dev *in6_dev = NULL;
struct net_device *dev = NULL;
- struct netconfmsg *ncm;
struct sk_buff *skb;
struct ipv6_devconf *devconf;
int ifindex;
int err;
- err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX,
- devconf_ipv6_policy, extack);
+ err = inet6_netconf_valid_get_req(in_skb, nlh, tb, extack);
if (err < 0)
return err;
@@ -1165,7 +1200,8 @@ check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires)
list_for_each_entry(ifa, &idev->addr_list, if_list) {
if (ifa == ifp)
continue;
- if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr,
+ if (ifa->prefix_len != ifp->prefix_len ||
+ !ipv6_prefix_equal(&ifa->addr, &ifp->addr,
ifp->prefix_len))
continue;
if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE))
@@ -3495,8 +3531,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
if (!addrconf_link_ready(dev)) {
/* device is not ready yet. */
- pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
- dev->name);
+ pr_debug("ADDRCONF(NETDEV_UP): %s: link is not ready\n",
+ dev->name);
break;
}
@@ -5120,6 +5156,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
if (idev) {
err = in6_dump_addrs(idev, skb, cb, s_ip_idx,
&fillargs);
+ if (err > 0)
+ err = 0;
}
goto put_tgt_net;
}
@@ -5154,7 +5192,7 @@ put_tgt_net:
if (fillargs.netnsid >= 0)
put_net(tgt_net);
- return err < 0 ? err : skb->len;
+ return skb->len ? : err;
}
static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
@@ -5179,6 +5217,52 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb)
return inet6_dump_addr(skb, cb, type);
}
+static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct ifaddrmsg *ifm;
+ int i, err;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifm))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for get address request");
+ return -EINVAL;
+ }
+
+ ifm = nlmsg_data(nlh);
+ if (ifm->ifa_prefixlen || ifm->ifa_flags || ifm->ifa_scope) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get address request");
+ return -EINVAL;
+ }
+
+ if (!netlink_strict_get_check(skb))
+ return nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
+
+ err = nlmsg_parse_strict(nlh, sizeof(*ifm), tb, IFA_MAX,
+ ifa_ipv6_policy, extack);
+ if (err)
+ return err;
+
+ for (i = 0; i <= IFA_MAX; i++) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case IFA_TARGET_NETNSID:
+ case IFA_ADDRESS:
+ case IFA_LOCAL:
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get address request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -5199,8 +5283,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct sk_buff *skb;
int err;
- err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
- extack);
+ err = inet6_rtm_valid_getaddr_req(in_skb, nlh, tb, extack);
if (err < 0)
return err;
@@ -6822,6 +6905,12 @@ static int __net_init addrconf_init_net(struct net *net)
if (!dflt)
goto err_alloc_dflt;
+ if (IS_ENABLED(CONFIG_SYSCTL) &&
+ sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) {
+ memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf));
+ memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt));
+ }
+
/* these will be inherited by all namespaces */
dflt->autoconf = ipv6_defaults.autoconf;
dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 5cd0029d930e..6c79af056d9b 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -134,6 +134,11 @@ static int eafnosupport_ipv6_dst_lookup(struct net *net, struct sock *u1,
return -EAFNOSUPPORT;
}
+static int eafnosupport_ipv6_route_input(struct sk_buff *skb)
+{
+ return -EAFNOSUPPORT;
+}
+
static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id)
{
return NULL;
@@ -170,6 +175,7 @@ eafnosupport_ip6_mtu_from_fib6(struct fib6_info *f6i, struct in6_addr *daddr,
const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
.ipv6_dst_lookup = eafnosupport_ipv6_dst_lookup,
+ .ipv6_route_input = eafnosupport_ipv6_route_input,
.fib6_get_table = eafnosupport_fib6_get_table,
.fib6_table_lookup = eafnosupport_fib6_table_lookup,
.fib6_lookup = eafnosupport_fib6_lookup,
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index 0d1ee82ee55b..d43d076c98f5 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -523,6 +523,50 @@ static inline int ip6addrlbl_msgsize(void)
+ nla_total_size(4); /* IFAL_LABEL */
}
+static int ip6addrlbl_valid_get_req(struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct ifaddrlblmsg *ifal;
+ int i, err;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ifal))) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid header for addrlabel get request");
+ return -EINVAL;
+ }
+
+ if (!netlink_strict_get_check(skb))
+ return nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX,
+ ifal_policy, extack);
+
+ ifal = nlmsg_data(nlh);
+ if (ifal->__ifal_reserved || ifal->ifal_flags || ifal->ifal_seq) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for addrlabel get request");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(*ifal), tb, IFAL_MAX,
+ ifal_policy, extack);
+ if (err)
+ return err;
+
+ for (i = 0; i <= IFAL_MAX; i++) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case IFAL_ADDRESS:
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in addrlabel get request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -535,8 +579,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct ip6addrlbl_entry *p;
struct sk_buff *skb;
- err = nlmsg_parse(nlh, sizeof(*ifal), tb, IFAL_MAX, ifal_policy,
- extack);
+ err = ip6addrlbl_valid_get_req(in_skb, nlh, tb, extack);
if (err < 0)
return err;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 0bfb6cc0a30a..2f45d2a3e3a3 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -310,6 +310,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
/* Check if the address belongs to the host. */
if (addr_type == IPV6_ADDR_MAPPED) {
+ struct net_device *dev = NULL;
int chk_addr_ret;
/* Binding to v4-mapped address on a v6-only socket
@@ -320,9 +321,20 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
goto out;
}
+ rcu_read_lock();
+ if (sk->sk_bound_dev_if) {
+ dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ if (!dev) {
+ err = -ENODEV;
+ goto out_unlock;
+ }
+ }
+
/* Reproduce AF_INET checks to make the bindings consistent */
v4addr = addr->sin6_addr.s6_addr32[3];
- chk_addr_ret = inet_addr_type(net, v4addr);
+ chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr);
+ rcu_read_unlock();
+
if (!inet_can_nonlocal_bind(net, inet) &&
v4addr != htonl(INADDR_ANY) &&
chk_addr_ret != RTN_LOCAL &&
@@ -888,10 +900,17 @@ static struct pernet_operations inet6_net_ops = {
.exit = inet6_net_exit,
};
+static int ipv6_route_input(struct sk_buff *skb)
+{
+ ip6_route_input(skb);
+ return skb_dst(skb)->error;
+}
+
static const struct ipv6_stub ipv6_stub_impl = {
.ipv6_sock_mc_join = ipv6_sock_mc_join,
.ipv6_sock_mc_drop = ipv6_sock_mc_drop,
.ipv6_dst_lookup = ip6_dst_lookup,
+ .ipv6_route_input = ipv6_route_input,
.fib6_get_table = fib6_get_table,
.fib6_table_lookup = fib6_table_lookup,
.fib6_lookup = fib6_lookup,
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index bde08aa549f3..ee4a4e54d016 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -341,6 +341,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
skb_reset_network_header(skb);
iph = ipv6_hdr(skb);
iph->daddr = fl6->daddr;
+ ip6_flow_hdr(iph, 0, 0);
serr = SKB_EXT_ERR(skb);
serr->ee.ee_errno = err;
@@ -700,17 +701,15 @@ void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg,
}
if (np->rxopt.bits.rxorigdstaddr) {
struct sockaddr_in6 sin6;
- __be16 *ports;
- int end;
+ __be16 _ports[2], *ports;
- end = skb_transport_offset(skb) + 4;
- if (end <= 0 || pskb_may_pull(skb, end)) {
+ ports = skb_header_pointer(skb, skb_transport_offset(skb),
+ sizeof(_ports), &_ports);
+ if (ports) {
/* All current transport protocols have the port numbers in the
* first four bytes of the transport header and this function is
* written with this assumption in mind.
*/
- ports = (__be16 *)skb_transport_header(skb);
-
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = ipv6_hdr(skb)->daddr;
sin6.sin6_port = ports[1];
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 5afe9f83374d..239d4a65ad6e 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -296,7 +296,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
skb->len += tailen;
skb->data_len += tailen;
skb->truesize += tailen;
- if (sk)
+ if (sk && sk_fullsock(sk))
refcount_add(tailen, &sk->sk_wmem_alloc);
goto out;
diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c
index bd675c61deb1..867474abe269 100644
--- a/net/ipv6/fou6.c
+++ b/net/ipv6/fou6.c
@@ -72,7 +72,7 @@ static int gue6_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
static int gue6_err_proto_handler(int proto, struct sk_buff *skb,
struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, u32 info)
+ u8 type, u8 code, int offset, __be32 info)
{
const struct inet6_protocol *ipprot;
@@ -90,10 +90,11 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
{
int transport_offset = skb_transport_offset(skb);
struct guehdr *guehdr;
- size_t optlen;
+ size_t len, optlen;
int ret;
- if (skb->len < sizeof(struct udphdr) + sizeof(struct guehdr))
+ len = sizeof(struct udphdr) + sizeof(struct guehdr);
+ if (!pskb_may_pull(skb, len))
return -EINVAL;
guehdr = (struct guehdr *)&udp_hdr(skb)[1];
@@ -128,9 +129,21 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
optlen = guehdr->hlen << 2;
+ if (!pskb_may_pull(skb, len + optlen))
+ return -EINVAL;
+
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
if (validate_gue_flags(guehdr, optlen))
return -EINVAL;
+ /* Handling exceptions for direct UDP encapsulation in GUE would lead to
+ * recursion. Besides, this kind of encapsulation can't even be
+ * configured currently. Discard this.
+ */
+ if (guehdr->proto_ctype == IPPROTO_UDP ||
+ guehdr->proto_ctype == IPPROTO_UDPLITE)
+ return -EOPNOTSUPP;
+
skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr));
ret = gue6_err_proto_handler(guehdr->proto_ctype, skb,
opt, type, code, offset, info);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 5d7aa2c2770c..802faa2fcc0e 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -81,7 +81,7 @@
*/
static inline struct sock *icmpv6_sk(struct net *net)
{
- return net->ipv6.icmp_sk[smp_processor_id()];
+ return *this_cpu_ptr(net->ipv6.icmp_sk);
}
static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
@@ -423,10 +423,10 @@ static int icmp6_iif(const struct sk_buff *skb)
static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
const struct in6_addr *force_saddr)
{
- struct net *net = dev_net(skb->dev);
struct inet6_dev *idev = NULL;
struct ipv6hdr *hdr = ipv6_hdr(skb);
struct sock *sk;
+ struct net *net;
struct ipv6_pinfo *np;
const struct in6_addr *saddr = NULL;
struct dst_entry *dst;
@@ -437,12 +437,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
int iif = 0;
int addr_type = 0;
int len;
- u32 mark = IP6_REPLY_MARK(net, skb->mark);
+ u32 mark;
if ((u8 *)hdr < skb->head ||
(skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
return;
+ if (!skb->dev)
+ return;
+ net = dev_net(skb->dev);
+ mark = IP6_REPLY_MARK(net, skb->mark);
/*
* Make sure we respect the rules
* i.e. RFC 1885 2.4(e)
@@ -949,13 +953,21 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
}
+static void __net_exit icmpv6_sk_exit(struct net *net)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv6.icmp_sk, i));
+ free_percpu(net->ipv6.icmp_sk);
+}
+
static int __net_init icmpv6_sk_init(struct net *net)
{
struct sock *sk;
- int err, i, j;
+ int err, i;
- net->ipv6.icmp_sk =
- kcalloc(nr_cpu_ids, sizeof(struct sock *), GFP_KERNEL);
+ net->ipv6.icmp_sk = alloc_percpu(struct sock *);
if (!net->ipv6.icmp_sk)
return -ENOMEM;
@@ -968,7 +980,7 @@ static int __net_init icmpv6_sk_init(struct net *net)
goto fail;
}
- net->ipv6.icmp_sk[i] = sk;
+ *per_cpu_ptr(net->ipv6.icmp_sk, i) = sk;
/* Enough space for 2 64K ICMP packets, including
* sk_buff struct overhead.
@@ -978,22 +990,10 @@ static int __net_init icmpv6_sk_init(struct net *net)
return 0;
fail:
- for (j = 0; j < i; j++)
- inet_ctl_sock_destroy(net->ipv6.icmp_sk[j]);
- kfree(net->ipv6.icmp_sk);
+ icmpv6_sk_exit(net);
return err;
}
-static void __net_exit icmpv6_sk_exit(struct net *net)
-{
- int i;
-
- for_each_possible_cpu(i) {
- inet_ctl_sock_destroy(net->ipv6.icmp_sk[i]);
- }
- kfree(net->ipv6.icmp_sk);
-}
-
static struct pernet_operations icmpv6_sk_ops = {
.init = icmpv6_sk_init,
.exit = icmpv6_sk_exit,
diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 17c455ff69ff..79d2e43c05c5 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -383,12 +383,9 @@ int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info)
struct rhashtable_iter iter;
struct ila_map *ila;
spinlock_t *lock;
- int ret;
-
- ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter, GFP_KERNEL);
- if (ret)
- goto done;
+ int ret = 0;
+ rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter);
rhashtable_walk_start(&iter);
for (;;) {
@@ -509,23 +506,17 @@ int ila_xlat_nl_dump_start(struct netlink_callback *cb)
struct net *net = sock_net(cb->skb->sk);
struct ila_net *ilan = net_generic(net, ila_net_id);
struct ila_dump_iter *iter;
- int ret;
iter = kmalloc(sizeof(*iter), GFP_KERNEL);
if (!iter)
return -ENOMEM;
- ret = rhashtable_walk_init(&ilan->xlat.rhash_table, &iter->rhiter,
- GFP_KERNEL);
- if (ret) {
- kfree(iter);
- return ret;
- }
+ rhashtable_walk_enter(&ilan->xlat.rhash_table, &iter->rhiter);
iter->skip = 0;
cb->args[0] = (long)iter;
- return ret;
+ return 0;
}
int ila_xlat_nl_dump_done(struct netlink_callback *cb)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 09d0826742f8..b32c95f02128 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -524,7 +524,7 @@ static int ip6gre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
return PACKET_REJECT;
}
-static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
+static int ip6erspan_rcv(struct sk_buff *skb,
struct tnl_ptk_info *tpi)
{
struct erspan_base_hdr *ershdr;
@@ -534,13 +534,9 @@ static int ip6erspan_rcv(struct sk_buff *skb, int gre_hdr_len,
struct ip6_tnl *tunnel;
u8 ver;
- if (unlikely(!pskb_may_pull(skb, sizeof(*ershdr))))
- return PACKET_REJECT;
-
ipv6h = ipv6_hdr(skb);
ershdr = (struct erspan_base_hdr *)skb->data;
ver = ershdr->ver;
- tpi->key = cpu_to_be32(get_session_id(ershdr));
tunnel = ip6gre_tunnel_lookup(skb->dev,
&ipv6h->saddr, &ipv6h->daddr, tpi->key,
@@ -611,7 +607,7 @@ static int gre_rcv(struct sk_buff *skb)
if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
tpi.proto == htons(ETH_P_ERSPAN2))) {
- if (ip6erspan_rcv(skb, hdr_len, &tpi) == PACKET_RCVD)
+ if (ip6erspan_rcv(skb, &tpi) == PACKET_RCVD)
return 0;
goto out;
}
@@ -922,6 +918,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
__u8 dsfield = false;
struct flowi6 fl6;
int err = -EINVAL;
+ __be16 proto;
__u32 mtu;
int nhoff;
int thoff;
@@ -1035,8 +1032,9 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
}
/* Push GRE header. */
- gre_build_header(skb, 8, TUNNEL_SEQ,
- htons(ETH_P_ERSPAN), 0, htonl(t->o_seqno++));
+ proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
+ : htons(ETH_P_ERSPAN2);
+ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
@@ -1169,6 +1167,10 @@ static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t,
t->parms.i_flags = p->i_flags;
t->parms.o_flags = p->o_flags;
t->parms.fwmark = p->fwmark;
+ t->parms.erspan_ver = p->erspan_ver;
+ t->parms.index = p->index;
+ t->parms.dir = p->dir;
+ t->parms.hwid = p->hwid;
dst_cache_reset(&t->dst_cache);
}
@@ -1717,6 +1719,27 @@ static int ip6erspan_tap_validate(struct nlattr *tb[], struct nlattr *data[],
return 0;
}
+static void ip6erspan_set_version(struct nlattr *data[],
+ struct __ip6_tnl_parm *parms)
+{
+ if (!data)
+ return;
+
+ parms->erspan_ver = 1;
+ if (data[IFLA_GRE_ERSPAN_VER])
+ parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+ if (parms->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX])
+ parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ } else if (parms->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR])
+ parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (data[IFLA_GRE_ERSPAN_HWID])
+ parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ }
+}
+
static void ip6gre_netlink_parms(struct nlattr *data[],
struct __ip6_tnl_parm *parms)
{
@@ -1765,20 +1788,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[],
if (data[IFLA_GRE_COLLECT_METADATA])
parms->collect_md = true;
-
- parms->erspan_ver = 1;
- if (data[IFLA_GRE_ERSPAN_VER])
- parms->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
-
- if (parms->erspan_ver == 1) {
- if (data[IFLA_GRE_ERSPAN_INDEX])
- parms->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
- } else if (parms->erspan_ver == 2) {
- if (data[IFLA_GRE_ERSPAN_DIR])
- parms->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
- if (data[IFLA_GRE_ERSPAN_HWID])
- parms->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
- }
}
static int ip6gre_tap_init(struct net_device *dev)
@@ -2025,9 +2034,9 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
struct nlattr *data[],
struct netlink_ext_ack *extack)
{
- struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id);
+ struct ip6_tnl *t = netdev_priv(dev);
+ struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
struct __ip6_tnl_parm p;
- struct ip6_tnl *t;
t = ip6gre_changelink_common(dev, tb, data, &p, extack);
if (IS_ERR(t))
@@ -2096,12 +2105,31 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
struct __ip6_tnl_parm *p = &t->parms;
+ __be16 o_flags = p->o_flags;
+
+ if (p->erspan_ver == 1 || p->erspan_ver == 2) {
+ if (!p->collect_md)
+ o_flags |= TUNNEL_KEY;
+
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
+ goto nla_put_failure;
+
+ if (p->erspan_ver == 1) {
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+ goto nla_put_failure;
+ } else {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
+ goto nla_put_failure;
+ }
+ }
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
nla_put_be16(skb, IFLA_GRE_IFLAGS,
gre_tnl_flags_to_gre_flags(p->i_flags)) ||
nla_put_be16(skb, IFLA_GRE_OFLAGS,
- gre_tnl_flags_to_gre_flags(p->o_flags)) ||
+ gre_tnl_flags_to_gre_flags(o_flags)) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) ||
@@ -2110,8 +2138,7 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) ||
nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) ||
nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags) ||
- nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark) ||
- nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
+ nla_put_u32(skb, IFLA_GRE_FWMARK, p->fwmark))
goto nla_put_failure;
if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
@@ -2129,19 +2156,6 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev)
goto nla_put_failure;
}
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, p->erspan_ver))
- goto nla_put_failure;
-
- if (p->erspan_ver == 1) {
- if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, p->index))
- goto nla_put_failure;
- } else if (p->erspan_ver == 2) {
- if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, p->dir))
- goto nla_put_failure;
- if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, p->hwid))
- goto nla_put_failure;
- }
-
return 0;
nla_put_failure:
@@ -2196,6 +2210,7 @@ static int ip6erspan_newlink(struct net *src_net, struct net_device *dev,
int err;
ip6gre_netlink_parms(data, &nt->parms);
+ ip6erspan_set_version(data, &nt->parms);
ign = net_generic(net, ip6gre_net_id);
if (nt->parms.collect_md) {
@@ -2241,6 +2256,7 @@ static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[],
if (IS_ERR(t))
return PTR_ERR(t);
+ ip6erspan_set_version(data, &p);
ip6gre_tunnel_unlink_md(ign, t);
ip6gre_tunnel_unlink(ign, t);
ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]);
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 5c045691c302..345882d9c061 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -383,9 +383,36 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
},
};
+static struct sk_buff *sit_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP4))
+ return ERR_PTR(-EINVAL);
+
+ return ipv6_gso_segment(skb, features);
+}
+
+static struct sk_buff *ip4ip6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6))
+ return ERR_PTR(-EINVAL);
+
+ return inet_gso_segment(skb, features);
+}
+
+static struct sk_buff *ip6ip6_gso_segment(struct sk_buff *skb,
+ netdev_features_t features)
+{
+ if (!(skb_shinfo(skb)->gso_type & SKB_GSO_IPXIP6))
+ return ERR_PTR(-EINVAL);
+
+ return ipv6_gso_segment(skb, features);
+}
+
static const struct net_offload sit_offload = {
.callbacks = {
- .gso_segment = ipv6_gso_segment,
+ .gso_segment = sit_gso_segment,
.gro_receive = sit_ip6ip6_gro_receive,
.gro_complete = sit_gro_complete,
},
@@ -393,7 +420,7 @@ static const struct net_offload sit_offload = {
static const struct net_offload ip4ip6_offload = {
.callbacks = {
- .gso_segment = inet_gso_segment,
+ .gso_segment = ip4ip6_gso_segment,
.gro_receive = ip4ip6_gro_receive,
.gro_complete = ip4ip6_gro_complete,
},
@@ -401,7 +428,7 @@ static const struct net_offload ip4ip6_offload = {
static const struct net_offload ip6ip6_offload = {
.callbacks = {
- .gso_segment = ipv6_gso_segment,
+ .gso_segment = ip6ip6_gso_segment,
.gro_receive = sit_ip6ip6_gro_receive,
.gro_complete = ip6ip6_gro_complete,
},
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5f9fa0302b5a..edbd12067170 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -300,6 +300,12 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
if (sk && ra->sel == sel &&
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (np && np->rtalert_isolate &&
+ !net_eq(sock_net(sk), dev_net(skb->dev))) {
+ continue;
+ }
if (last) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index ad1a9ccd4b44..25430c991cea 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -32,18 +32,9 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
goto error;
}
if (cfg->bind_ifindex) {
- struct net_device *dev;
-
- dev = dev_get_by_index(net, cfg->bind_ifindex);
- if (!dev) {
- err = -ENODEV;
- goto error;
- }
-
- err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
- dev->name, strlen(dev->name) + 1);
- dev_put(dev);
-
+ err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
+ (void *)&cfg->bind_ifindex,
+ sizeof(cfg->bind_ifindex));
if (err < 0)
goto error;
}
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 30337b38274b..e4dd57976737 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -97,7 +97,7 @@ static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, int flags);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -393,7 +393,8 @@ static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt, true);
+ mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC |
+ MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC);
rhltable_destroy(&mrt->mfc_hash);
kfree(mrt);
}
@@ -1496,43 +1497,51 @@ static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, int flags)
{
struct mr_mfc *c, *tmp;
LIST_HEAD(list);
int i;
/* Shut down all active vif entries */
- for (i = 0; i < mrt->maxvif; i++) {
- if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
- continue;
- mif6_delete(mrt, i, 0, &list);
+ if (flags & (MRT6_FLUSH_MIFS | MRT6_FLUSH_MIFS_STATIC)) {
+ for (i = 0; i < mrt->maxvif; i++) {
+ if (((mrt->vif_table[i].flags & VIFF_STATIC) &&
+ !(flags & MRT6_FLUSH_MIFS_STATIC)) ||
+ (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT6_FLUSH_MIFS)))
+ continue;
+ mif6_delete(mrt, i, 0, &list);
+ }
+ unregister_netdevice_many(&list);
}
- unregister_netdevice_many(&list);
/* Wipe the cache */
- list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
- list_del_rcu(&c->list);
- mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
- mr_cache_put(c);
- }
-
- if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
- list_del(&c->list);
+ if (flags & (MRT6_FLUSH_MFC | MRT6_FLUSH_MFC_STATIC)) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC_STATIC)) ||
+ (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT6_FLUSH_MFC)))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+ list_del_rcu(&c->list);
call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
FIB_EVENT_ENTRY_DEL,
- (struct mfc6_cache *)c,
- mrt->id);
- mr6_netlink_event(mrt, (struct mfc6_cache *)c,
- RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
+ (struct mfc6_cache *)c, mrt->id);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ mr_cache_put(c);
+ }
+ }
+
+ if (flags & MRT6_FLUSH_MFC) {
+ if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
+ spin_lock_bh(&mfc_unres_lock);
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
+ list_del(&c->list);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+ RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
+ }
+ spin_unlock_bh(&mfc_unres_lock);
}
- spin_unlock_bh(&mfc_unres_lock);
}
}
@@ -1588,7 +1597,7 @@ int ip6mr_sk_done(struct sock *sk)
NETCONFA_IFINDEX_ALL,
net->ipv6.devconf_all);
- mroute_clean_tables(mrt, false);
+ mroute_clean_tables(mrt, MRT6_FLUSH_MIFS | MRT6_FLUSH_MFC);
err = 0;
break;
}
@@ -1704,6 +1713,20 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
rtnl_unlock();
return ret;
+ case MRT6_FLUSH:
+ {
+ int flags;
+
+ if (optlen != sizeof(flags))
+ return -EINVAL;
+ if (get_user(flags, (int __user *)optval))
+ return -EFAULT;
+ rtnl_lock();
+ mroute_clean_tables(mrt, flags);
+ rtnl_unlock();
+ return 0;
+ }
+
/*
* Control PIM assert (to activate pim will activate assert)
*/
@@ -1965,10 +1988,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTFORWDATAGRAMS);
- __IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTOCTETS, skb->len);
+ IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP6_ADD_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_OUTOCTETS, skb->len);
return dst_output(net, sk, skb);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 973e215c3114..40f21fef25ff 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -787,6 +787,12 @@ done:
goto e_inval;
retv = ip6_ra_control(sk, val);
break;
+ case IPV6_ROUTER_ALERT_ISOLATE:
+ if (optlen < sizeof(int))
+ goto e_inval;
+ np->rtalert_isolate = valbool;
+ retv = 0;
+ break;
case IPV6_MTU_DISCOVER:
if (optlen < sizeof(int))
goto e_inval;
@@ -1358,6 +1364,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
val = np->rxopt.bits.recvfragsize;
break;
+ case IPV6_ROUTER_ALERT_ISOLATE:
+ val = np->rtalert_isolate;
+ break;
+
default:
return -ENOPROTOOPT;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 21f6deb2aec9..42f3f5cd349f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -940,6 +940,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
{
return __ipv6_dev_mc_inc(dev, addr, MCAST_EXCLUDE);
}
+EXPORT_SYMBOL(ipv6_dev_mc_inc);
/*
* device multicast group del
@@ -987,6 +988,7 @@ int ipv6_dev_mc_dec(struct net_device *dev, const struct in6_addr *addr)
return err;
}
+EXPORT_SYMBOL(ipv6_dev_mc_dec);
/*
* check if the interface/address pair is valid
diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c
index 9405b04eecc6..dddd75d1be0e 100644
--- a/net/ipv6/mcast_snoop.c
+++ b/net/ipv6/mcast_snoop.c
@@ -41,6 +41,8 @@ static int ipv6_mc_check_ip6hdr(struct sk_buff *skb)
if (skb->len < len || len <= offset)
return -EINVAL;
+ skb_set_transport_header(skb, offset);
+
return 0;
}
@@ -77,27 +79,27 @@ static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb)
len += sizeof(struct mld2_report);
- return pskb_may_pull(skb, len) ? 0 : -EINVAL;
+ return ipv6_mc_may_pull(skb, len) ? 0 : -EINVAL;
}
static int ipv6_mc_check_mld_query(struct sk_buff *skb)
{
+ unsigned int transport_len = ipv6_transport_len(skb);
struct mld_msg *mld;
- unsigned int len = skb_transport_offset(skb);
+ unsigned int len;
/* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL))
return -EINVAL;
- len += sizeof(struct mld_msg);
- if (skb->len < len)
- return -EINVAL;
-
/* MLDv1? */
- if (skb->len != len) {
+ if (transport_len != sizeof(struct mld_msg)) {
/* or MLDv2? */
- len += sizeof(struct mld2_query) - sizeof(struct mld_msg);
- if (skb->len < len || !pskb_may_pull(skb, len))
+ if (transport_len < sizeof(struct mld2_query))
+ return -EINVAL;
+
+ len = skb_transport_offset(skb) + sizeof(struct mld2_query);
+ if (!ipv6_mc_may_pull(skb, len))
return -EINVAL;
}
@@ -115,12 +117,17 @@ static int ipv6_mc_check_mld_query(struct sk_buff *skb)
static int ipv6_mc_check_mld_msg(struct sk_buff *skb)
{
- struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb);
+ unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg);
+ struct mld_msg *mld;
+
+ if (!ipv6_mc_may_pull(skb, len))
+ return -EINVAL;
+
+ mld = (struct mld_msg *)skb_transport_header(skb);
switch (mld->mld_type) {
case ICMPV6_MGM_REDUCTION:
case ICMPV6_MGM_REPORT:
- /* fall through */
return 0;
case ICMPV6_MLD2_REPORT:
return ipv6_mc_check_mld_reportv2(skb);
@@ -136,49 +143,30 @@ static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb)
return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo);
}
-static int __ipv6_mc_check_mld(struct sk_buff *skb,
- struct sk_buff **skb_trimmed)
-
+int ipv6_mc_check_icmpv6(struct sk_buff *skb)
{
- struct sk_buff *skb_chk = NULL;
- unsigned int transport_len;
- unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg);
- int ret = -EINVAL;
+ unsigned int len = skb_transport_offset(skb) + sizeof(struct icmp6hdr);
+ unsigned int transport_len = ipv6_transport_len(skb);
+ struct sk_buff *skb_chk;
- transport_len = ntohs(ipv6_hdr(skb)->payload_len);
- transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr);
+ if (!ipv6_mc_may_pull(skb, len))
+ return -EINVAL;
skb_chk = skb_checksum_trimmed(skb, transport_len,
ipv6_mc_validate_checksum);
if (!skb_chk)
- goto err;
-
- if (!pskb_may_pull(skb_chk, len))
- goto err;
-
- ret = ipv6_mc_check_mld_msg(skb_chk);
- if (ret)
- goto err;
-
- if (skb_trimmed)
- *skb_trimmed = skb_chk;
- /* free now unneeded clone */
- else if (skb_chk != skb)
- kfree_skb(skb_chk);
-
- ret = 0;
+ return -EINVAL;
-err:
- if (ret && skb_chk && skb_chk != skb)
+ if (skb_chk != skb)
kfree_skb(skb_chk);
- return ret;
+ return 0;
}
+EXPORT_SYMBOL(ipv6_mc_check_icmpv6);
/**
* ipv6_mc_check_mld - checks whether this is a sane MLD packet
* @skb: the skb to validate
- * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional)
*
* Checks whether an IPv6 packet is a valid MLD packet. If so sets
* skb transport header accordingly and returns zero.
@@ -188,18 +176,10 @@ err:
* -ENOMSG: IP header validation succeeded but it is not an MLD packet.
* -ENOMEM: A memory allocation failure happened.
*
- * Optionally, an skb pointer might be provided via skb_trimmed (or set it
- * to NULL): After parsing an MLD packet successfully it will point to
- * an skb which has its tail aligned to the IP packet end. This might
- * either be the originally provided skb or a trimmed, cloned version if
- * the skb frame had data beyond the IP packet. A cloned skb allows us
- * to leave the original skb and its full frame unchanged (which might be
- * desirable for layer 2 frame jugglers).
- *
* Caller needs to set the skb network header and free any returned skb if it
* differs from the provided skb.
*/
-int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed)
+int ipv6_mc_check_mld(struct sk_buff *skb)
{
int ret;
@@ -211,6 +191,10 @@ int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed)
if (ret < 0)
return ret;
- return __ipv6_mc_check_mld(skb, skb_trimmed);
+ ret = ipv6_mc_check_icmpv6(skb);
+ if (ret < 0)
+ return ret;
+
+ return ipv6_mc_check_mld_msg(skb);
}
EXPORT_SYMBOL(ipv6_mc_check_mld);
diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index 8b075f0bc351..1240ccd57f39 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -23,9 +23,11 @@ int ip6_route_me_harder(struct net *net, struct sk_buff *skb)
struct sock *sk = sk_to_full_sk(skb->sk);
unsigned int hh_len;
struct dst_entry *dst;
+ int strict = (ipv6_addr_type(&iph->daddr) &
+ (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct flowi6 fl6 = {
.flowi6_oif = sk && sk->sk_bound_dev_if ? sk->sk_bound_dev_if :
- rt6_need_strict(&iph->daddr) ? skb_dst(skb)->dev->ifindex : 0,
+ strict ? skb_dst(skb)->dev->ifindex : 0,
.flowi6_mark = skb->mark,
.flowi6_uid = sock_net_uid(net, sk),
.daddr = iph->daddr,
@@ -84,8 +86,8 @@ static int nf_ip6_reroute(struct sk_buff *skb,
return 0;
}
-static int nf_ip6_route(struct net *net, struct dst_entry **dst,
- struct flowi *fl, bool strict)
+int __nf_ip6_route(struct net *net, struct dst_entry **dst,
+ struct flowi *fl, bool strict)
{
static const struct ipv6_pinfo fake_pinfo;
static const struct inet_sock fake_sk = {
@@ -105,12 +107,17 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst,
*dst = result;
return err;
}
+EXPORT_SYMBOL_GPL(__nf_ip6_route);
static const struct nf_ipv6_ops ipv6ops = {
+#if IS_MODULE(CONFIG_IPV6)
.chk_addr = ipv6_chk_addr,
- .route_input = ip6_route_input,
+ .route_me_harder = ip6_route_me_harder,
+ .dev_get_saddr = ipv6_dev_get_saddr,
+ .route = __nf_ip6_route,
+#endif
+ .route_input = ip6_route_input,
.fragment = ip6_fragment,
- .route = nf_ip6_route,
.reroute = nf_ip6_reroute,
};
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 339d0762b027..ddc99a1653aa 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -31,34 +31,6 @@ config NFT_CHAIN_ROUTE_IPV6
fields such as the source, destination, flowlabel, hop-limit and
the packet mark.
-if NF_NAT_IPV6
-
-config NFT_CHAIN_NAT_IPV6
- tristate "IPv6 nf_tables nat chain support"
- help
- This option enables the "nat" chain for IPv6 in nf_tables. This
- chain type is used to perform Network Address Translation (NAT)
- packet transformations such as the source, destination address and
- source and destination ports.
-
-config NFT_MASQ_IPV6
- tristate "IPv6 masquerade support for nf_tables"
- depends on NFT_MASQ
- select NF_NAT_MASQUERADE_IPV6
- help
- This is the expression that provides IPv4 masquerading support for
- nf_tables.
-
-config NFT_REDIR_IPV6
- tristate "IPv6 redirect support for nf_tables"
- depends on NFT_REDIR
- select NF_NAT_REDIRECT
- help
- This is the expression that provides IPv4 redirect support for
- nf_tables.
-
-endif # NF_NAT_IPV6
-
config NFT_REJECT_IPV6
select NF_REJECT_IPV6
default NFT_REJECT
@@ -106,23 +78,6 @@ config NF_LOG_IPV6
default m if NETFILTER_ADVANCED=n
select NF_LOG_COMMON
-config NF_NAT_IPV6
- tristate "IPv6 NAT"
- depends on NF_CONNTRACK
- depends on NETFILTER_ADVANCED
- select NF_NAT
- help
- The IPv6 NAT option allows masquerading, port forwarding and other
- forms of full Network Address Port Translation. This can be
- controlled by iptables or nft.
-
-if NF_NAT_IPV6
-
-config NF_NAT_MASQUERADE_IPV6
- bool
-
-endif # NF_NAT_IPV6
-
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
@@ -311,7 +266,6 @@ config IP6_NF_NAT
depends on NF_CONNTRACK
depends on NETFILTER_ADVANCED
select NF_NAT
- select NF_NAT_IPV6
select NETFILTER_XT_NAT
help
This enables the `nat' table in ip6tables. This allows masquerading,
@@ -324,7 +278,7 @@ if IP6_NF_NAT
config IP6_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- select NF_NAT_MASQUERADE_IPV6
+ select NF_NAT_MASQUERADE
help
Masquerading is a special case of NAT: all outgoing connections are
changed to seem to come from a particular interface's address, and
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 9ea43d5256e0..3853c648ebaa 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,10 +11,6 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
-nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o
-nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
-obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
-
# defrag
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
@@ -32,10 +28,7 @@ obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
# nf_tables
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
-obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
-obj-$(CONFIG_NFT_MASQ_IPV6) += nft_masq_ipv6.o
-obj-$(CONFIG_NFT_REDIR_IPV6) += nft_redir_ipv6.o
obj-$(CONFIG_NFT_DUP_IPV6) += nft_dup_ipv6.o
obj-$(CONFIG_NFT_FIB_IPV6) += nft_fib_ipv6.o
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index 67ba70ab9f5c..3e1fab9d7503 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -17,8 +17,6 @@
#include <net/ipv6.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
static int __net_init ip6table_nat_table_init(struct net *net);
@@ -72,10 +70,10 @@ static int ip6t_nat_register_lookups(struct net *net)
int i, ret;
for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++) {
- ret = nf_nat_l3proto_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]);
+ ret = nf_nat_ipv6_register_fn(net, &nf_nat_ipv6_ops[i]);
if (ret) {
while (i)
- nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]);
+ nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[--i]);
return ret;
}
@@ -89,7 +87,7 @@ static void ip6t_nat_unregister_lookups(struct net *net)
int i;
for (i = 0; i < ARRAY_SIZE(nf_nat_ipv6_ops); i++)
- nf_nat_l3proto_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]);
+ nf_nat_ipv6_unregister_fn(net, &nf_nat_ipv6_ops[i]);
}
static int __net_init ip6table_nat_table_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 181da2c40f9a..3de0e9b0a482 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -136,6 +136,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net)
}
#endif
+static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev);
+
static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -177,9 +180,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
const struct frag_hdr *fhdr, int nhoff)
{
- struct sk_buff *prev, *next;
unsigned int payload_len;
- int offset, end;
+ struct net_device *dev;
+ struct sk_buff *prev;
+ int offset, end, err;
u8 ecn;
if (fq->q.flags & INET_FRAG_COMPLETE) {
@@ -254,55 +258,18 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
goto err;
}
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = fq->q.fragments_tail;
- if (!prev || prev->ip_defrag_offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (next->ip_defrag_offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* RFC5722, Section 4:
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments, including those not yet received) MUST be silently
- * discarded.
- */
-
- /* Check for overlap with preceding fragment. */
- if (prev &&
- (prev->ip_defrag_offset + prev->len) > offset)
- goto discard_fq;
-
- /* Look for overlap with succeeding segment. */
- if (next && next->ip_defrag_offset < end)
- goto discard_fq;
-
- /* Note : skb->ip_defrag_offset and skb->dev share the same location */
- if (skb->dev)
- fq->iif = skb->dev->ifindex;
+ /* Note : skb->rbnode and skb->dev share the same location. */
+ dev = skb->dev;
/* Makes sure compiler wont do silly aliasing games */
barrier();
- skb->ip_defrag_offset = offset;
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- fq->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- fq->q.fragments = skb;
+ prev = fq->q.fragments_tail;
+ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+ if (err)
+ goto insert_error;
+
+ if (dev)
+ fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
@@ -319,11 +286,25 @@ found:
fq->q.flags |= INET_FRAG_FIRST_IN;
}
- return 0;
+ if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
+ fq->q.meat == fq->q.len) {
+ unsigned long orefdst = skb->_skb_refdst;
+
+ skb->_skb_refdst = 0UL;
+ err = nf_ct_frag6_reasm(fq, skb, prev, dev);
+ skb->_skb_refdst = orefdst;
+ return err;
+ }
+
+ skb_dst_drop(skb);
+ return -EINPROGRESS;
-discard_fq:
+insert_error:
+ if (err == IPFRAG_DUP)
+ goto err;
inet_frag_kill(&fq->q);
err:
+ skb_dst_drop(skb);
return -EINVAL;
}
@@ -333,147 +314,66 @@ err:
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
- *
- * returns true if *prev skb has been transformed into the reassembled
- * skb, false otherwise.
*/
-static bool
-nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
+static int nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev)
{
- struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len, delta;
+ void *reasm_data;
+ int payload_len;
u8 ecn;
inet_frag_kill(&fq->q);
- WARN_ON(head == NULL);
- WARN_ON(head->ip_defrag_offset != 0);
-
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
- return false;
+ goto err;
+
+ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+ if (!reasm_data)
+ goto err;
- /* Unfragmented part is taken from the first segment. */
- payload_len = ((head->data - skb_network_header(head)) -
+ payload_len = ((skb->data - skb_network_header(skb)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
payload_len);
- return false;
- }
-
- delta = - head->truesize;
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- return false;
-
- delta += head->truesize;
- if (delta)
- add_frag_mem_limit(fq->q.net, delta);
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (clone == NULL)
- return false;
-
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
-
- add_frag_mem_limit(fq->q.net, clone->truesize);
- }
-
- /* morph head into last received skb: prev.
- *
- * This allows callers of ipv6 conntrack defrag to continue
- * to use the last skb(frag) passed into the reasm engine.
- * The last skb frag 'silently' turns into the full reassembled skb.
- *
- * Since prev is also part of q->fragments we have to clone it first.
- */
- if (head != prev) {
- struct sk_buff *iter;
-
- fp = skb_clone(prev, GFP_ATOMIC);
- if (!fp)
- return false;
-
- fp->next = prev->next;
-
- iter = head;
- while (iter) {
- if (iter->next == prev) {
- iter->next = fp;
- break;
- }
- iter = iter->next;
- }
-
- skb_morph(prev, head);
- prev->next = head->next;
- consume_skb(head);
- head = prev;
+ goto err;
}
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
- skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
- memmove(head->head + sizeof(struct frag_hdr), head->head,
- (head->data - head->head) - sizeof(struct frag_hdr));
- head->mac_header += sizeof(struct frag_hdr);
- head->network_header += sizeof(struct frag_hdr);
-
- skb_shinfo(head)->frag_list = head->next;
- skb_reset_transport_header(head);
- skb_push(head, head->data - skb_network_header(head));
-
- for (fp = head->next; fp; fp = fp->next) {
- head->data_len += fp->len;
- head->len += fp->len;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
- head->truesize += fp->truesize;
- fp->sk = NULL;
- }
- sub_frag_mem_limit(fq->q.net, head->truesize);
+ skb_network_header(skb)[fq->nhoffset] = skb_transport_header(skb)[0];
+ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+ (skb->data - skb->head) - sizeof(struct frag_hdr));
+ skb->mac_header += sizeof(struct frag_hdr);
+ skb->network_header += sizeof(struct frag_hdr);
+
+ skb_reset_transport_header(skb);
+
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
- head->ignore_df = 1;
- skb_mark_not_on_list(head);
- head->dev = dev;
- head->tstamp = fq->q.stamp;
- ipv6_hdr(head)->payload_len = htons(payload_len);
- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
- IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
+ skb->ignore_df = 1;
+ skb->dev = dev;
+ ipv6_hdr(skb)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+ IP6CB(skb)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
- if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_partial(skb_network_header(head),
- skb_network_header_len(head),
- head->csum);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->csum = csum_partial(skb_network_header(skb),
+ skb_network_header_len(skb),
+ skb->csum);
- fq->q.fragments = NULL;
fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
+ fq->q.last_run_head = NULL;
- return true;
+ return 0;
+
+err:
+ inet_frag_kill(&fq->q);
+ return -EINVAL;
}
/*
@@ -542,7 +442,6 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
u16 savethdr = skb->transport_header;
- struct net_device *dev = skb->dev;
int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
struct frag_queue *fq;
@@ -565,10 +464,6 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
- fhdr->frag_off & htons(IP6_MF))
- return -EINVAL;
-
skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, hdr,
skb->dev ? skb->dev->ifindex : 0);
@@ -580,31 +475,17 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
spin_lock_bh(&fq->q.lock);
ret = nf_ct_frag6_queue(fq, skb, fhdr, nhoff);
- if (ret < 0) {
- if (ret == -EPROTO) {
- skb->transport_header = savethdr;
- ret = 0;
- }
- goto out_unlock;
+ if (ret == -EPROTO) {
+ skb->transport_header = savethdr;
+ ret = 0;
}
/* after queue has assumed skb ownership, only 0 or -EINPROGRESS
* must be returned.
*/
- ret = -EINPROGRESS;
- if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len) {
- unsigned long orefdst = skb->_skb_refdst;
-
- skb->_skb_refdst = 0UL;
- if (nf_ct_frag6_reasm(fq, skb, dev))
- ret = 0;
- skb->_skb_refdst = orefdst;
- } else {
- skb_dst_drop(skb);
- }
+ if (ret)
+ ret = -EINPROGRESS;
-out_unlock:
spin_unlock_bh(&fq->q.lock);
inet_frag_put(&fq->q);
return ret;
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
deleted file mode 100644
index 23022447eb49..000000000000
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ /dev/null
@@ -1,411 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of IPv6 NAT funded by Astaro.
- */
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/secure_seq.h>
-#include <net/checksum.h>
-#include <net/ip6_checksum.h>
-#include <net/ip6_route.h>
-#include <net/ipv6.h>
-
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv6;
-
-#ifdef CONFIG_XFRM
-static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
- const struct nf_conn *ct,
- enum ip_conntrack_dir dir,
- unsigned long statusbit,
- struct flowi *fl)
-{
- const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
- struct flowi6 *fl6 = &fl->u.ip6;
-
- if (ct->status & statusbit) {
- fl6->daddr = t->dst.u3.in6;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP ||
- t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
- t->dst.protonum == IPPROTO_SCTP)
- fl6->fl6_dport = t->dst.u.all;
- }
-
- statusbit ^= IPS_NAT_MASK;
-
- if (ct->status & statusbit) {
- fl6->saddr = t->src.u3.in6;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP ||
- t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
- t->dst.protonum == IPPROTO_SCTP)
- fl6->fl6_sport = t->src.u.all;
- }
-}
-#endif
-
-static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
- const struct nf_conntrack_tuple *target,
- enum nf_nat_manip_type maniptype)
-{
- struct ipv6hdr *ipv6h;
- __be16 frag_off;
- int hdroff;
- u8 nexthdr;
-
- if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
- return false;
-
- ipv6h = (void *)skb->data + iphdroff;
- nexthdr = ipv6h->nexthdr;
- hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
- &nexthdr, &frag_off);
- if (hdroff < 0)
- goto manip_addr;
-
- if ((frag_off & htons(~0x7)) == 0 &&
- !nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
- target, maniptype))
- return false;
-
- /* must reload, offset might have changed */
- ipv6h = (void *)skb->data + iphdroff;
-
-manip_addr:
- if (maniptype == NF_NAT_MANIP_SRC)
- ipv6h->saddr = target->src.u3.in6;
- else
- ipv6h->daddr = target->dst.u3.in6;
-
- return true;
-}
-
-static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
- unsigned int iphdroff, __sum16 *check,
- const struct nf_conntrack_tuple *t,
- enum nf_nat_manip_type maniptype)
-{
- const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
- const struct in6_addr *oldip, *newip;
-
- if (maniptype == NF_NAT_MANIP_SRC) {
- oldip = &ipv6h->saddr;
- newip = &t->src.u3.in6;
- } else {
- oldip = &ipv6h->daddr;
- newip = &t->dst.u3.in6;
- }
- inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
- newip->s6_addr32, true);
-}
-
-static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
- u8 proto, void *data, __sum16 *check,
- int datalen, int oldlen)
-{
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
-
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
- (data - (void *)skb->data);
- skb->csum_offset = (void *)check - data;
- *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
- datalen, proto, 0);
- } else
- inet_proto_csum_replace2(check, skb,
- htons(oldlen), htons(datalen), true);
-}
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
- struct nf_nat_range2 *range)
-{
- if (tb[CTA_NAT_V6_MINIP]) {
- nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
- sizeof(struct in6_addr));
- range->flags |= NF_NAT_RANGE_MAP_IPS;
- }
-
- if (tb[CTA_NAT_V6_MAXIP])
- nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
- sizeof(struct in6_addr));
- else
- range->max_addr = range->min_addr;
-
- return 0;
-}
-#endif
-
-static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
- .l3proto = NFPROTO_IPV6,
- .manip_pkt = nf_nat_ipv6_manip_pkt,
- .csum_update = nf_nat_ipv6_csum_update,
- .csum_recalc = nf_nat_ipv6_csum_recalc,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_ipv6_nlattr_to_range,
-#endif
-#ifdef CONFIG_XFRM
- .decode_session = nf_nat_ipv6_decode_session,
-#endif
-};
-
-int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- unsigned int hdrlen)
-{
- struct {
- struct icmp6hdr icmp6;
- struct ipv6hdr ip6;
- } *inside;
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
- struct nf_conntrack_tuple target;
- unsigned long statusbit;
-
- WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
-
- if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
- return 0;
- if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
- return 0;
-
- inside = (void *)skb->data + hdrlen;
- if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
- if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
- return 0;
- if (ct->status & IPS_NAT_MASK)
- return 0;
- }
-
- if (manip == NF_NAT_MANIP_SRC)
- statusbit = IPS_SRC_NAT;
- else
- statusbit = IPS_DST_NAT;
-
- /* Invert if this is reply direction */
- if (dir == IP_CT_DIR_REPLY)
- statusbit ^= IPS_NAT_MASK;
-
- if (!(ct->status & statusbit))
- return 1;
-
- if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
- &ct->tuplehash[!dir].tuple, !manip))
- return 0;
-
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
- struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- inside = (void *)skb->data + hdrlen;
- inside->icmp6.icmp6_cksum = 0;
- inside->icmp6.icmp6_cksum =
- csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
- skb->len - hdrlen, IPPROTO_ICMPV6,
- skb_checksum(skb, hdrlen,
- skb->len - hdrlen, 0));
- }
-
- nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
- return 0;
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
-
-static unsigned int
-nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- __be16 frag_off;
- int hdrlen;
- u8 nexthdr;
-
- ct = nf_ct_get(skb, &ctinfo);
- /* Can't track? It's not due to stress, or conntrack would
- * have dropped it. Hence it's the user's responsibilty to
- * packet filter it out, or implement conntrack/NAT for that
- * protocol. 8) --RR
- */
- if (!ct)
- return NF_ACCEPT;
-
- if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
- nexthdr = ipv6_hdr(skb)->nexthdr;
- hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
- &nexthdr, &frag_off);
-
- if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
- if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
- state->hook,
- hdrlen))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- }
-
- return nf_nat_inet_fn(priv, skb, state);
-}
-
-static unsigned int
-nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- unsigned int ret;
- struct in6_addr daddr = ipv6_hdr(skb)->daddr;
-
- ret = nf_nat_ipv6_fn(priv, skb, state);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
- skb_dst_drop(skb);
-
- return ret;
-}
-
-static unsigned int
-nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
-#ifdef CONFIG_XFRM
- const struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- int err;
-#endif
- unsigned int ret;
-
- ret = nf_nat_ipv6_fn(priv, skb, state);
-#ifdef CONFIG_XFRM
- if (ret != NF_DROP && ret != NF_STOLEN &&
- !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
- &ct->tuplehash[!dir].tuple.dst.u3) ||
- (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
- ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all)) {
- err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
- }
-#endif
- return ret;
-}
-
-static unsigned int
-nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- const struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
- int err;
-
- ret = nf_nat_ipv6_fn(priv, skb, state);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
- &ct->tuplehash[!dir].tuple.src.u3)) {
- err = ip6_route_me_harder(state->net, skb);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-#ifdef CONFIG_XFRM
- else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
- ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
- ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all) {
- err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
- if (err < 0)
- ret = NF_DROP_ERR(err);
- }
-#endif
- }
- return ret;
-}
-
-static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
- /* Before packet filtering, change destination */
- {
- .hook = nf_nat_ipv6_in,
- .pf = NFPROTO_IPV6,
- .hooknum = NF_INET_PRE_ROUTING,
- .priority = NF_IP6_PRI_NAT_DST,
- },
- /* After packet filtering, change source */
- {
- .hook = nf_nat_ipv6_out,
- .pf = NFPROTO_IPV6,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP6_PRI_NAT_SRC,
- },
- /* Before packet filtering, change destination */
- {
- .hook = nf_nat_ipv6_local_fn,
- .pf = NFPROTO_IPV6,
- .hooknum = NF_INET_LOCAL_OUT,
- .priority = NF_IP6_PRI_NAT_DST,
- },
- /* After packet filtering, change source */
- {
- .hook = nf_nat_ipv6_fn,
- .pf = NFPROTO_IPV6,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP6_PRI_NAT_SRC,
- },
-};
-
-int nf_nat_l3proto_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
-{
- return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops));
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_register_fn);
-
-void nf_nat_l3proto_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
-{
- nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn);
-
-static int __init nf_nat_l3proto_ipv6_init(void)
-{
- return nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
-}
-
-static void __exit nf_nat_l3proto_ipv6_exit(void)
-{
- nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
-}
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("nf-nat-" __stringify(AF_INET6));
-
-module_init(nf_nat_l3proto_ipv6_init);
-module_exit(nf_nat_l3proto_ipv6_exit);
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
deleted file mode 100644
index 0ad0da5a2600..000000000000
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ /dev/null
@@ -1,223 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6
- * NAT funded by Astaro.
- */
-
-#include <linux/kernel.h>
-#include <linux/atomic.h>
-#include <linux/netdevice.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/addrconf.h>
-#include <net/ipv6.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-
-#define MAX_WORK_COUNT 16
-
-static atomic_t v6_worker_count;
-
-unsigned int
-nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
- const struct net_device *out)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn_nat *nat;
- struct in6_addr src;
- struct nf_conn *ct;
- struct nf_nat_range2 newrange;
-
- ct = nf_ct_get(skb, &ctinfo);
- WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY)));
-
- if (ipv6_dev_get_saddr(nf_ct_net(ct), out,
- &ipv6_hdr(skb)->daddr, 0, &src) < 0)
- return NF_DROP;
-
- nat = nf_ct_nat_ext_add(ct);
- if (nat)
- nat->masq_index = out->ifindex;
-
- newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
- newrange.min_addr.in6 = src;
- newrange.max_addr.in6 = src;
- newrange.min_proto = range->min_proto;
- newrange.max_proto = range->max_proto;
-
- return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
-
-static int device_cmp(struct nf_conn *ct, void *ifindex)
-{
- const struct nf_conn_nat *nat = nfct_nat(ct);
-
- if (!nat)
- return 0;
- if (nf_ct_l3num(ct) != NFPROTO_IPV6)
- return 0;
- return nat->masq_index == (int)(long)ifindex;
-}
-
-static int masq_device_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct net *net = dev_net(dev);
-
- if (event == NETDEV_DOWN)
- nf_ct_iterate_cleanup_net(net, device_cmp,
- (void *)(long)dev->ifindex, 0, 0);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block masq_dev_notifier = {
- .notifier_call = masq_device_event,
-};
-
-struct masq_dev_work {
- struct work_struct work;
- struct net *net;
- struct in6_addr addr;
- int ifindex;
-};
-
-static int inet_cmp(struct nf_conn *ct, void *work)
-{
- struct masq_dev_work *w = (struct masq_dev_work *)work;
- struct nf_conntrack_tuple *tuple;
-
- if (!device_cmp(ct, (void *)(long)w->ifindex))
- return 0;
-
- tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
- return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
-}
-
-static void iterate_cleanup_work(struct work_struct *work)
-{
- struct masq_dev_work *w;
-
- w = container_of(work, struct masq_dev_work, work);
-
- nf_ct_iterate_cleanup_net(w->net, inet_cmp, (void *)w, 0, 0);
-
- put_net(w->net);
- kfree(w);
- atomic_dec(&v6_worker_count);
- module_put(THIS_MODULE);
-}
-
-/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
- * schedule.
- *
- * Unfortunately, nf_ct_iterate_cleanup_net can run for a long
- * time if there are lots of conntracks and the system
- * handles high softirq load, so it frequently calls cond_resched
- * while iterating the conntrack table.
- *
- * So we defer nf_ct_iterate_cleanup_net walk to the system workqueue.
- *
- * As we can have 'a lot' of inet_events (depending on amount
- * of ipv6 addresses being deleted), we also need to add an upper
- * limit to the number of queued work items.
- */
-static int masq_inet6_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct inet6_ifaddr *ifa = ptr;
- const struct net_device *dev;
- struct masq_dev_work *w;
- struct net *net;
-
- if (event != NETDEV_DOWN ||
- atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
- return NOTIFY_DONE;
-
- dev = ifa->idev->dev;
- net = maybe_get_net(dev_net(dev));
- if (!net)
- return NOTIFY_DONE;
-
- if (!try_module_get(THIS_MODULE))
- goto err_module;
-
- w = kmalloc(sizeof(*w), GFP_ATOMIC);
- if (w) {
- atomic_inc(&v6_worker_count);
-
- INIT_WORK(&w->work, iterate_cleanup_work);
- w->ifindex = dev->ifindex;
- w->net = net;
- w->addr = ifa->addr;
- schedule_work(&w->work);
-
- return NOTIFY_DONE;
- }
-
- module_put(THIS_MODULE);
- err_module:
- put_net(net);
- return NOTIFY_DONE;
-}
-
-static struct notifier_block masq_inet6_notifier = {
- .notifier_call = masq_inet6_event,
-};
-
-static int masq_refcnt;
-static DEFINE_MUTEX(masq_mutex);
-
-int nf_nat_masquerade_ipv6_register_notifier(void)
-{
- int ret = 0;
-
- mutex_lock(&masq_mutex);
- /* check if the notifier is already set */
- if (++masq_refcnt > 1)
- goto out_unlock;
-
- ret = register_netdevice_notifier(&masq_dev_notifier);
- if (ret)
- goto err_dec;
-
- ret = register_inet6addr_notifier(&masq_inet6_notifier);
- if (ret)
- goto err_unregister;
-
- mutex_unlock(&masq_mutex);
- return ret;
-
-err_unregister:
- unregister_netdevice_notifier(&masq_dev_notifier);
-err_dec:
- masq_refcnt--;
-out_unlock:
- mutex_unlock(&masq_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
-
-void nf_nat_masquerade_ipv6_unregister_notifier(void)
-{
- mutex_lock(&masq_mutex);
- /* check if the notifier still has clients */
- if (--masq_refcnt > 0)
- goto out_unlock;
-
- unregister_inet6addr_notifier(&masq_inet6_notifier);
- unregister_netdevice_notifier(&masq_dev_notifier);
-out_unlock:
- mutex_unlock(&masq_mutex);
-}
-EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index b9c8a763c863..02e9228641e0 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -233,6 +233,9 @@ static bool reject6_csum_ok(struct sk_buff *skb, int hook)
if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
return false;
+ if (!nf_reject_verify_csum(proto))
+ return true;
+
return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
}
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
deleted file mode 100644
index 8a081ad7d5db..000000000000
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/list.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/ipv6.h>
-
-static unsigned int nft_nat_do_chain(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv6(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static int nft_nat_ipv6_reg(struct net *net, const struct nf_hook_ops *ops)
-{
- return nf_nat_l3proto_ipv6_register_fn(net, ops);
-}
-
-static void nft_nat_ipv6_unreg(struct net *net, const struct nf_hook_ops *ops)
-{
- nf_nat_l3proto_ipv6_unregister_fn(net, ops);
-}
-
-static const struct nft_chain_type nft_chain_nat_ipv6 = {
- .name = "nat",
- .type = NFT_CHAIN_T_NAT,
- .family = NFPROTO_IPV6,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_LOCAL_IN),
- .hooks = {
- [NF_INET_PRE_ROUTING] = nft_nat_do_chain,
- [NF_INET_POST_ROUTING] = nft_nat_do_chain,
- [NF_INET_LOCAL_OUT] = nft_nat_do_chain,
- [NF_INET_LOCAL_IN] = nft_nat_do_chain,
- },
- .ops_register = nft_nat_ipv6_reg,
- .ops_unregister = nft_nat_ipv6_unreg,
-};
-
-static int __init nft_chain_nat_ipv6_init(void)
-{
- nft_register_chain_type(&nft_chain_nat_ipv6);
-
- return 0;
-}
-
-static void __exit nft_chain_nat_ipv6_exit(void)
-{
- nft_unregister_chain_type(&nft_chain_nat_ipv6);
-}
-
-module_init(nft_chain_nat_ipv6_init);
-module_exit(nft_chain_nat_ipv6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 36be3cf0adef..73cdc0bc63f7 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -59,7 +59,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
struct ipv6hdr *iph)
{
const struct net_device *dev = NULL;
- const struct nf_ipv6_ops *v6ops;
int route_err, addrtype;
struct rt6_info *rt;
struct flowi6 fl6 = {
@@ -68,10 +67,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
};
u32 ret = 0;
- v6ops = nf_get_ipv6_ops();
- if (!v6ops)
- return RTN_UNREACHABLE;
-
if (priv->flags & NFTA_FIB_F_IIF)
dev = nft_in(pkt);
else if (priv->flags & NFTA_FIB_F_OIF)
@@ -79,10 +74,10 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
- if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
+ if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
ret = RTN_LOCAL;
- route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt,
+ route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt,
flowi6_to_flowi(&fl6), false);
if (route_err)
goto err;
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
deleted file mode 100644
index e06c82e9dfcd..000000000000
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nft_masq.h>
-#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-
-static void nft_masq_ipv6_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
-{
- struct nft_masq *priv = nft_expr_priv(expr);
- struct nf_nat_range2 range;
-
- memset(&range, 0, sizeof(range));
- range.flags = priv->flags;
- if (priv->sreg_proto_min) {
- range.min_proto.all = (__force __be16)nft_reg_load16(
- &regs->data[priv->sreg_proto_min]);
- range.max_proto.all = (__force __be16)nft_reg_load16(
- &regs->data[priv->sreg_proto_max]);
- }
- regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
- nft_out(pkt));
-}
-
-static void
-nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
-{
- nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
-}
-
-static struct nft_expr_type nft_masq_ipv6_type;
-static const struct nft_expr_ops nft_masq_ipv6_ops = {
- .type = &nft_masq_ipv6_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
- .eval = nft_masq_ipv6_eval,
- .init = nft_masq_init,
- .destroy = nft_masq_ipv6_destroy,
- .dump = nft_masq_dump,
- .validate = nft_masq_validate,
-};
-
-static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
- .family = NFPROTO_IPV6,
- .name = "masq",
- .ops = &nft_masq_ipv6_ops,
- .policy = nft_masq_policy,
- .maxattr = NFTA_MASQ_MAX,
- .owner = THIS_MODULE,
-};
-
-static int __init nft_masq_ipv6_module_init(void)
-{
- int ret;
-
- ret = nft_register_expr(&nft_masq_ipv6_type);
- if (ret < 0)
- return ret;
-
- ret = nf_nat_masquerade_ipv6_register_notifier();
- if (ret)
- nft_unregister_expr(&nft_masq_ipv6_type);
-
- return ret;
-}
-
-static void __exit nft_masq_ipv6_module_exit(void)
-{
- nft_unregister_expr(&nft_masq_ipv6_type);
- nf_nat_masquerade_ipv6_unregister_notifier();
-}
-
-module_init(nft_masq_ipv6_module_init);
-module_exit(nft_masq_ipv6_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
deleted file mode 100644
index 74269865acc8..000000000000
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2014 Arturo Borrero Gonzalez <arturo@debian.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/netlink.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nft_redir.h>
-#include <net/netfilter/nf_nat_redirect.h>
-
-static void nft_redir_ipv6_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
-{
- struct nft_redir *priv = nft_expr_priv(expr);
- struct nf_nat_range2 range;
-
- memset(&range, 0, sizeof(range));
- if (priv->sreg_proto_min) {
- range.min_proto.all = (__force __be16)nft_reg_load16(
- &regs->data[priv->sreg_proto_min]);
- range.max_proto.all = (__force __be16)nft_reg_load16(
- &regs->data[priv->sreg_proto_max]);
- range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
- }
-
- range.flags |= priv->flags;
-
- regs->verdict.code =
- nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt));
-}
-
-static void
-nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
-{
- nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
-}
-
-static struct nft_expr_type nft_redir_ipv6_type;
-static const struct nft_expr_ops nft_redir_ipv6_ops = {
- .type = &nft_redir_ipv6_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
- .eval = nft_redir_ipv6_eval,
- .init = nft_redir_init,
- .destroy = nft_redir_ipv6_destroy,
- .dump = nft_redir_dump,
- .validate = nft_redir_validate,
-};
-
-static struct nft_expr_type nft_redir_ipv6_type __read_mostly = {
- .family = NFPROTO_IPV6,
- .name = "redir",
- .ops = &nft_redir_ipv6_ops,
- .policy = nft_redir_policy,
- .maxattr = NFTA_REDIR_MAX,
- .owner = THIS_MODULE,
-};
-
-static int __init nft_redir_ipv6_module_init(void)
-{
- return nft_register_expr(&nft_redir_ipv6_type);
-}
-
-static void __exit nft_redir_ipv6_module_exit(void)
-{
- nft_unregister_expr(&nft_redir_ipv6_type);
-}
-
-module_init(nft_redir_ipv6_module_init);
-module_exit(nft_redir_ipv6_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 36a3d8dc61f5..1a832f5e190b 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -69,8 +69,8 @@ static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
static struct inet_frags ip6_frags;
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
- struct net_device *dev);
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev);
static void ip6_frag_expire(struct timer_list *t)
{
@@ -111,21 +111,26 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
struct frag_hdr *fhdr, int nhoff,
u32 *prob_offset)
{
- struct sk_buff *prev, *next;
- struct net_device *dev;
- int offset, end, fragsize;
struct net *net = dev_net(skb_dst(skb)->dev);
+ int offset, end, fragsize;
+ struct sk_buff *prev_tail;
+ struct net_device *dev;
+ int err = -ENOENT;
u8 ecn;
if (fq->q.flags & INET_FRAG_COMPLETE)
goto err;
+ err = -EINVAL;
offset = ntohs(fhdr->frag_off) & ~0x7;
end = offset + (ntohs(ipv6_hdr(skb)->payload_len) -
((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1)));
if ((unsigned int)end > IPV6_MAXPLEN) {
*prob_offset = (u8 *)&fhdr->frag_off - skb_network_header(skb);
+ /* note that if prob_offset is set, the skb is freed elsewhere,
+ * we do not free it here.
+ */
return -1;
}
@@ -170,62 +175,27 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
if (end == offset)
goto discard_fq;
+ err = -ENOMEM;
/* Point into the IP datagram 'data' part. */
if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data))
goto discard_fq;
- if (pskb_trim_rcsum(skb, end - offset))
+ err = pskb_trim_rcsum(skb, end - offset);
+ if (err)
goto discard_fq;
- /* Find out which fragments are in front and at the back of us
- * in the chain of fragments so far. We must know where to put
- * this fragment, right?
- */
- prev = fq->q.fragments_tail;
- if (!prev || prev->ip_defrag_offset < offset) {
- next = NULL;
- goto found;
- }
- prev = NULL;
- for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (next->ip_defrag_offset >= offset)
- break; /* bingo! */
- prev = next;
- }
-
-found:
- /* RFC5722, Section 4, amended by Errata ID : 3089
- * When reassembling an IPv6 datagram, if
- * one or more its constituent fragments is determined to be an
- * overlapping fragment, the entire datagram (and any constituent
- * fragments) MUST be silently discarded.
- */
-
- /* Check for overlap with preceding fragment. */
- if (prev &&
- (prev->ip_defrag_offset + prev->len) > offset)
- goto discard_fq;
-
- /* Look for overlap with succeeding segment. */
- if (next && next->ip_defrag_offset < end)
- goto discard_fq;
-
- /* Note : skb->ip_defrag_offset and skb->sk share the same location */
+ /* Note : skb->rbnode and skb->dev share the same location. */
dev = skb->dev;
- if (dev)
- fq->iif = dev->ifindex;
/* Makes sure compiler wont do silly aliasing games */
barrier();
- skb->ip_defrag_offset = offset;
- /* Insert this fragment in the chain of fragments. */
- skb->next = next;
- if (!next)
- fq->q.fragments_tail = skb;
- if (prev)
- prev->next = skb;
- else
- fq->q.fragments = skb;
+ prev_tail = fq->q.fragments_tail;
+ err = inet_frag_queue_insert(&fq->q, skb, offset, end);
+ if (err)
+ goto insert_error;
+
+ if (dev)
+ fq->iif = dev->ifindex;
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
@@ -246,44 +216,48 @@ found:
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
- int res;
unsigned long orefdst = skb->_skb_refdst;
skb->_skb_refdst = 0UL;
- res = ip6_frag_reasm(fq, prev, dev);
+ err = ip6_frag_reasm(fq, skb, prev_tail, dev);
skb->_skb_refdst = orefdst;
- return res;
+ return err;
}
skb_dst_drop(skb);
- return -1;
+ return -EINPROGRESS;
+insert_error:
+ if (err == IPFRAG_DUP) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+ err = -EINVAL;
+ __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
+ IPSTATS_MIB_REASM_OVERLAPS);
discard_fq:
inet_frag_kill(&fq->q);
-err:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
+err:
kfree_skb(skb);
- return -1;
+ return err;
}
/*
* Check if this packet is complete.
- * Returns NULL on failure by any reason, and pointer
- * to current nexthdr field in reassembled frame.
*
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
*/
-static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
- struct net_device *dev)
+static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
+ struct sk_buff *prev_tail, struct net_device *dev)
{
struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
- struct sk_buff *fp, *head = fq->q.fragments;
- int payload_len, delta;
unsigned int nhoff;
- int sum_truesize;
+ void *reasm_data;
+ int payload_len;
u8 ecn;
inet_frag_kill(&fq->q);
@@ -292,128 +266,47 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
if (unlikely(ecn == 0xff))
goto out_fail;
- /* Make the one we just received the head. */
- if (prev) {
- head = prev->next;
- fp = skb_clone(head, GFP_ATOMIC);
-
- if (!fp)
- goto out_oom;
-
- fp->next = head->next;
- if (!fp->next)
- fq->q.fragments_tail = fp;
- prev->next = fp;
-
- skb_morph(head, fq->q.fragments);
- head->next = fq->q.fragments->next;
-
- consume_skb(fq->q.fragments);
- fq->q.fragments = head;
- }
-
- WARN_ON(head == NULL);
- WARN_ON(head->ip_defrag_offset != 0);
+ reasm_data = inet_frag_reasm_prepare(&fq->q, skb, prev_tail);
+ if (!reasm_data)
+ goto out_oom;
- /* Unfragmented part is taken from the first segment. */
- payload_len = ((head->data - skb_network_header(head)) -
+ payload_len = ((skb->data - skb_network_header(skb)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN)
goto out_oversize;
- delta = - head->truesize;
-
- /* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC))
- goto out_oom;
-
- delta += head->truesize;
- if (delta)
- add_frag_mem_limit(fq->q.net, delta);
-
- /* If the first fragment is fragmented itself, we split
- * it to two chunks: the first with data and paged part
- * and the second, holding only fragments. */
- if (skb_has_frag_list(head)) {
- struct sk_buff *clone;
- int i, plen = 0;
-
- clone = alloc_skb(0, GFP_ATOMIC);
- if (!clone)
- goto out_oom;
- clone->next = head->next;
- head->next = clone;
- skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
- skb_frag_list_init(head);
- for (i = 0; i < skb_shinfo(head)->nr_frags; i++)
- plen += skb_frag_size(&skb_shinfo(head)->frags[i]);
- clone->len = clone->data_len = head->data_len - plen;
- head->data_len -= clone->len;
- head->len -= clone->len;
- clone->csum = 0;
- clone->ip_summed = head->ip_summed;
- add_frag_mem_limit(fq->q.net, clone->truesize);
- }
-
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
nhoff = fq->nhoffset;
- skb_network_header(head)[nhoff] = skb_transport_header(head)[0];
- memmove(head->head + sizeof(struct frag_hdr), head->head,
- (head->data - head->head) - sizeof(struct frag_hdr));
- if (skb_mac_header_was_set(head))
- head->mac_header += sizeof(struct frag_hdr);
- head->network_header += sizeof(struct frag_hdr);
-
- skb_reset_transport_header(head);
- skb_push(head, head->data - skb_network_header(head));
-
- sum_truesize = head->truesize;
- for (fp = head->next; fp;) {
- bool headstolen;
- int delta;
- struct sk_buff *next = fp->next;
-
- sum_truesize += fp->truesize;
- if (head->ip_summed != fp->ip_summed)
- head->ip_summed = CHECKSUM_NONE;
- else if (head->ip_summed == CHECKSUM_COMPLETE)
- head->csum = csum_add(head->csum, fp->csum);
-
- if (skb_try_coalesce(head, fp, &headstolen, &delta)) {
- kfree_skb_partial(fp, headstolen);
- } else {
- fp->sk = NULL;
- if (!skb_shinfo(head)->frag_list)
- skb_shinfo(head)->frag_list = fp;
- head->data_len += fp->len;
- head->len += fp->len;
- head->truesize += fp->truesize;
- }
- fp = next;
- }
- sub_frag_mem_limit(fq->q.net, sum_truesize);
+ skb_network_header(skb)[nhoff] = skb_transport_header(skb)[0];
+ memmove(skb->head + sizeof(struct frag_hdr), skb->head,
+ (skb->data - skb->head) - sizeof(struct frag_hdr));
+ if (skb_mac_header_was_set(skb))
+ skb->mac_header += sizeof(struct frag_hdr);
+ skb->network_header += sizeof(struct frag_hdr);
+
+ skb_reset_transport_header(skb);
+
+ inet_frag_reasm_finish(&fq->q, skb, reasm_data);
- skb_mark_not_on_list(head);
- head->dev = dev;
- head->tstamp = fq->q.stamp;
- ipv6_hdr(head)->payload_len = htons(payload_len);
- ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
- IP6CB(head)->nhoff = nhoff;
- IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
- IP6CB(head)->frag_max_size = fq->q.max_size;
+ skb->dev = dev;
+ ipv6_hdr(skb)->payload_len = htons(payload_len);
+ ipv6_change_dsfield(ipv6_hdr(skb), 0xff, ecn);
+ IP6CB(skb)->nhoff = nhoff;
+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
+ IP6CB(skb)->frag_max_size = fq->q.max_size;
/* Yes, and fold redundant checksum back. 8) */
- skb_postpush_rcsum(head, skb_network_header(head),
- skb_network_header_len(head));
+ skb_postpush_rcsum(skb, skb_network_header(skb),
+ skb_network_header_len(skb));
rcu_read_lock();
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS);
rcu_read_unlock();
- fq->q.fragments = NULL;
fq->q.rb_fragments = RB_ROOT;
fq->q.fragments_tail = NULL;
+ fq->q.last_run_head = NULL;
return 1;
out_oversize:
@@ -464,10 +357,6 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- if (skb->len - skb_network_offset(skb) < IPV6_MIN_MTU &&
- fhdr->frag_off & htons(IP6_MF))
- goto fail_hdr;
-
iif = skb->dev ? skb->dev->ifindex : 0;
fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
@@ -485,6 +374,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
if (prob_offset) {
__IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
IPSTATS_MIB_INHDRERRORS);
+ /* icmpv6_param_prob() calls kfree_skb(skb) */
icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, prob_offset);
}
return ret;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 40b225f87d5e..4ef4bbdb49d4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1274,18 +1274,29 @@ static DEFINE_SPINLOCK(rt6_exception_lock);
static void rt6_remove_exception(struct rt6_exception_bucket *bucket,
struct rt6_exception *rt6_ex)
{
+ struct fib6_info *from;
struct net *net;
if (!bucket || !rt6_ex)
return;
net = dev_net(rt6_ex->rt6i->dst.dev);
+ net->ipv6.rt6_stats->fib_rt_cache--;
+
+ /* purge completely the exception to allow releasing the held resources:
+ * some [sk] cache may keep the dst around for unlimited time
+ */
+ from = rcu_dereference_protected(rt6_ex->rt6i->from,
+ lockdep_is_held(&rt6_exception_lock));
+ rcu_assign_pointer(rt6_ex->rt6i->from, NULL);
+ fib6_info_release(from);
+ dst_dev_put(&rt6_ex->rt6i->dst);
+
hlist_del_rcu(&rt6_ex->hlist);
dst_release(&rt6_ex->rt6i->dst);
kfree_rcu(rt6_ex, rcu);
WARN_ON_ONCE(!bucket->depth);
bucket->depth--;
- net->ipv6.rt6_stats->fib_rt_cache--;
}
/* Remove oldest rt6_ex in bucket and free the memory
@@ -1599,15 +1610,15 @@ static int rt6_remove_exception_rt(struct rt6_info *rt)
static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
{
struct rt6_exception_bucket *bucket;
- struct fib6_info *from = rt->from;
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
-
- if (!from ||
- !(rt->rt6i_flags & RTF_CACHE))
- return;
+ struct fib6_info *from;
rcu_read_lock();
+ from = rcu_dereference(rt->from);
+ if (!from || !(rt->rt6i_flags & RTF_CACHE))
+ goto unlock;
+
bucket = rcu_dereference(from->rt6i_exception_bucket);
#ifdef CONFIG_IPV6_SUBTREES
@@ -1626,6 +1637,7 @@ static void rt6_update_exception_stamp_rt(struct rt6_info *rt)
if (rt6_ex)
rt6_ex->stamp = jiffies;
+unlock:
rcu_read_unlock();
}
@@ -2277,14 +2289,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
{
- bool from_set;
-
- rcu_read_lock();
- from_set = !!rcu_dereference(rt->from);
- rcu_read_unlock();
-
return !(rt->rt6i_flags & RTF_CACHE) &&
- (rt->rt6i_flags & RTF_PCPU || from_set);
+ (rt->rt6i_flags & RTF_PCPU || rcu_access_pointer(rt->from));
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -2742,20 +2748,24 @@ static int ip6_route_check_nh_onlink(struct net *net,
u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
const struct in6_addr *gw_addr = &cfg->fc_gateway;
u32 flags = RTF_LOCAL | RTF_ANYCAST | RTF_REJECT;
+ struct fib6_info *from;
struct rt6_info *grt;
int err;
err = 0;
grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
if (grt) {
+ rcu_read_lock();
+ from = rcu_dereference(grt->from);
if (!grt->dst.error &&
/* ignore match if it is the default route */
- grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) &&
+ from && !ipv6_addr_any(&from->fib6_dst.addr) &&
(grt->rt6i_flags & flags || dev != grt->dst.dev)) {
NL_SET_ERR_MSG(extack,
"Nexthop has invalid gateway or device mismatch");
err = -EINVAL;
}
+ rcu_read_unlock();
ip6_rt_put(grt);
}
@@ -4166,6 +4176,10 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]);
cfg->fc_flags |= RTF_GATEWAY;
}
+ if (tb[RTA_VIA]) {
+ NL_SET_ERR_MSG(extack, "IPv6 does not support RTA_VIA attribute");
+ goto errout;
+ }
if (tb[RTA_DST]) {
int plen = (rtm->rtm_dst_len + 7) >> 3;
@@ -4251,17 +4265,6 @@ struct rt6_nh {
struct list_head next;
};
-static void ip6_print_replace_route_err(struct list_head *rt6_nh_list)
-{
- struct rt6_nh *nh;
-
- list_for_each_entry(nh, rt6_nh_list, next) {
- pr_warn("IPV6: multipath route replace failed (check consistency of installed routes): %pI6c nexthop %pI6c ifi %d\n",
- &nh->r_cfg.fc_dst, &nh->r_cfg.fc_gateway,
- nh->r_cfg.fc_ifindex);
- }
-}
-
static int ip6_route_info_append(struct net *net,
struct list_head *rt6_nh_list,
struct fib6_info *rt,
@@ -4407,7 +4410,8 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
nh->fib6_info = NULL;
if (err) {
if (replace && nhn)
- ip6_print_replace_route_err(&rt6_nh_list);
+ NL_SET_ERR_MSG_MOD(extack,
+ "multipath route replace failed (check consistency of installed routes)");
err_nh = nh;
goto add_errout;
}
@@ -4659,7 +4663,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
table = rt->fib6_table->tb6_id;
else
table = RT6_TABLE_UNSPEC;
- rtm->rtm_table = table;
+ rtm->rtm_table = table < 256 ? table : RT_TABLE_COMPAT;
if (nla_put_u32(skb, RTA_TABLE, table))
goto nla_put_failure;
@@ -4822,6 +4826,73 @@ int rt6_dump_route(struct fib6_info *rt, void *p_arg)
arg->cb->nlh->nlmsg_seq, flags);
}
+static int inet6_rtm_valid_getroute_req(struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ struct rtmsg *rtm;
+ int i, err;
+
+ if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Invalid header for get route request");
+ return -EINVAL;
+ }
+
+ if (!netlink_strict_get_check(skb))
+ return nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX,
+ rtm_ipv6_policy, extack);
+
+ rtm = nlmsg_data(nlh);
+ if ((rtm->rtm_src_len && rtm->rtm_src_len != 128) ||
+ (rtm->rtm_dst_len && rtm->rtm_dst_len != 128) ||
+ rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope ||
+ rtm->rtm_type) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid values in header for get route request");
+ return -EINVAL;
+ }
+ if (rtm->rtm_flags & ~RTM_F_FIB_MATCH) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Invalid flags for get route request");
+ return -EINVAL;
+ }
+
+ err = nlmsg_parse_strict(nlh, sizeof(*rtm), tb, RTA_MAX,
+ rtm_ipv6_policy, extack);
+ if (err)
+ return err;
+
+ if ((tb[RTA_SRC] && !rtm->rtm_src_len) ||
+ (tb[RTA_DST] && !rtm->rtm_dst_len)) {
+ NL_SET_ERR_MSG_MOD(extack, "rtm_src_len and rtm_dst_len must be 128 for IPv6");
+ return -EINVAL;
+ }
+
+ for (i = 0; i <= RTA_MAX; i++) {
+ if (!tb[i])
+ continue;
+
+ switch (i) {
+ case RTA_SRC:
+ case RTA_DST:
+ case RTA_IIF:
+ case RTA_OIF:
+ case RTA_MARK:
+ case RTA_UID:
+ case RTA_SPORT:
+ case RTA_DPORT:
+ case RTA_IP_PROTO:
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported attribute in get route request");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
@@ -4836,8 +4907,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct flowi6 fl6 = {};
bool fibmatch;
- err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy,
- extack);
+ err = inet6_rtm_valid_getroute_req(in_skb, nlh, tb, extack);
if (err < 0)
goto errout;
@@ -4883,7 +4953,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (tb[RTA_IP_PROTO]) {
err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
- &fl6.flowi6_proto, extack);
+ &fl6.flowi6_proto, AF_INET6,
+ extack);
if (err)
goto errout;
}
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 8d0ba757a46c..9b2f272ca164 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -221,9 +221,7 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info)
rcu_read_unlock();
genlmsg_end(msg, hdr);
- genlmsg_reply(msg, info);
-
- return 0;
+ return genlmsg_reply(msg, info);
nla_put_failure:
rcu_read_unlock();
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 8181ee7e1e27..ee5403cbe655 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -146,6 +146,8 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
} else {
ip6_flow_hdr(hdr, 0, flowlabel);
hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
}
hdr->nexthdr = NEXTHDR_ROUTING;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 1e03305c0549..09e440e8dfae 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -546,7 +546,8 @@ static int ipip6_err(struct sk_buff *skb, u32 info)
}
err = 0;
- if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
+ if (__in6_dev_get(skb->dev) &&
+ !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4, type, data_len))
goto out;
if (t->parms.iph.daddr == 0)
@@ -1872,6 +1873,7 @@ static int __net_init sit_init_net(struct net *net)
err_reg_dev:
ipip6_dev_free(sitn->fb_tunnel_dev);
+ free_netdev(sitn->fb_tunnel_dev);
err_alloc_dev:
return err;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b81eb7cb815e..57ef69a10889 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -220,8 +220,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
- SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
-
if (__ipv6_only_sock(sk))
return -ENETUNREACH;
@@ -1864,7 +1862,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
refcount_read(&sp->sk_refcnt), sp,
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
- (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
tp->snd_cwnd,
state == TCP_LISTEN ?
fastopenq->max_qlen :
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9cbf363172bd..b444483cdb2b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -102,7 +102,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)
return udp_lib_get_port(sk, snum, hash2_nulladdr);
}
-static void udp_v6_rehash(struct sock *sk)
+void udp_v6_rehash(struct sock *sk)
{
u16 new_hash = ipv6_portaddr_hash(sock_net(sk),
&sk->sk_v6_rcv_saddr,
@@ -288,8 +288,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
int peeked, peeking, off;
int err;
int is_udplite = IS_UDPLITE(sk);
+ struct udp_mib __percpu *mib;
bool checksum_valid = false;
- struct udp_mib *mib;
int is_udp4;
if (flags & MSG_ERRQUEUE)
@@ -420,17 +420,19 @@ EXPORT_SYMBOL(udpv6_encap_enable);
*/
static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb,
struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, u32 info)
+ u8 type, u8 code, int offset, __be32 info)
{
int i;
for (i = 0; i < MAX_IPTUN_ENCAP_OPS; i++) {
int (*handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
- u8 type, u8 code, int offset, u32 info);
+ u8 type, u8 code, int offset, __be32 info);
+ const struct ip6_tnl_encap_ops *encap;
- if (!ip6tun_encaps[i])
+ encap = rcu_dereference(ip6tun_encaps[i]);
+ if (!encap)
continue;
- handler = rcu_dereference(ip6tun_encaps[i]->err_handler);
+ handler = encap->err_handler;
if (handler && !handler(skb, opt, type, code, offset, info))
return 0;
}
@@ -1132,15 +1134,23 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
const int hlen = skb_network_header_len(skb) +
sizeof(struct udphdr);
- if (hlen + cork->gso_size > cork->fragsize)
+ if (hlen + cork->gso_size > cork->fragsize) {
+ kfree_skb(skb);
return -EINVAL;
- if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+ }
+ if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+ kfree_skb(skb);
return -EINVAL;
- if (udp_sk(sk)->no_check6_tx)
+ }
+ if (udp_sk(sk)->no_check6_tx) {
+ kfree_skb(skb);
return -EINVAL;
+ }
if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite ||
- dst_xfrm(skb_dst(skb)))
+ dst_xfrm(skb_dst(skb))) {
+ kfree_skb(skb);
return -EIO;
+ }
skb_shinfo(skb)->gso_size = cork->gso_size;
skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
@@ -1390,10 +1400,7 @@ do_udp_sendmsg:
ipc6.opt = opt;
fl6.flowi6_proto = sk->sk_protocol;
- if (!ipv6_addr_any(daddr))
- fl6.daddr = *daddr;
- else
- fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+ fl6.daddr = *daddr;
if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
fl6.saddr = np->saddr;
fl6.fl6_sport = inet->inet_sport;
@@ -1421,6 +1428,9 @@ do_udp_sendmsg:
}
}
+ if (ipv6_addr_any(&fl6.daddr))
+ fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
+
final_p = fl6_update_dst(&fl6, opt, &final);
if (final_p)
connected = false;
diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h
index 5730e6503cb4..20e324b6f358 100644
--- a/net/ipv6/udp_impl.h
+++ b/net/ipv6/udp_impl.h
@@ -13,6 +13,7 @@ int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int,
__be32, struct udp_table *);
int udp_v6_get_port(struct sock *sk, unsigned short snum);
+void udp_v6_rehash(struct sock *sk);
int udpv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c
index a125aebc29e5..f35907836444 100644
--- a/net/ipv6/udplite.c
+++ b/net/ipv6/udplite.c
@@ -49,6 +49,7 @@ struct proto udplitev6_prot = {
.recvmsg = udpv6_recvmsg,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
+ .rehash = udp_v6_rehash,
.get_port = udp_v6_get_port,
.memory_allocated = &udp_memory_allocated,
.sysctl_mem = sysctl_udp_mem,
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index f5b4febeaa25..bc65db782bfb 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -344,8 +344,8 @@ static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
unsigned int i;
- xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
xfrm_flush_gc();
+ xfrm_state_flush(net, IPSEC_PROTO_ANY, false, true);
for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i]));