aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-15 13:22:29 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-15 13:22:29 -0800
commitd635a69dd4981cc51f90293f5f64268620ed1565 (patch)
tree5e0a758b402ea7d624c25c3a343545dd29e80f31 /net/ipv6
parentMerge branch 'akpm' (patches from Andrew) (diff)
parentnet: hns3: fix expression that is currently always true (diff)
downloadlinux-dev-d635a69dd4981cc51f90293f5f64268620ed1565.tar.xz
linux-dev-d635a69dd4981cc51f90293f5f64268620ed1565.zip
Merge tag 'net-next-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - support "prefer busy polling" NAPI operation mode, where we defer softirq for some time expecting applications to periodically busy poll - AF_XDP: improve efficiency by more batching and hindering the adjacency cache prefetcher - af_packet: make packet_fanout.arr size configurable up to 64K - tcp: optimize TCP zero copy receive in presence of partial or unaligned reads making zero copy a performance win for much smaller messages - XDP: add bulk APIs for returning / freeing frames - sched: support fragmenting IP packets as they come out of conntrack - net: allow virtual netdevs to forward UDP L4 and fraglist GSO skbs BPF: - BPF switch from crude rlimit-based to memcg-based memory accounting - BPF type format information for kernel modules and related tracing enhancements - BPF implement task local storage for BPF LSM - allow the FENTRY/FEXIT/RAW_TP tracing programs to use bpf_sk_storage Protocols: - mptcp: improve multiple xmit streams support, memory accounting and many smaller improvements - TLS: support CHACHA20-POLY1305 cipher - seg6: add support for SRv6 End.DT4/DT6 behavior - sctp: Implement RFC 6951: UDP Encapsulation of SCTP - ppp_generic: add ability to bridge channels directly - bridge: Connectivity Fault Management (CFM) support as is defined in IEEE 802.1Q section 12.14. Drivers: - mlx5: make use of the new auxiliary bus to organize the driver internals - mlx5: more accurate port TX timestamping support - mlxsw: - improve the efficiency of offloaded next hop updates by using the new nexthop object API - support blackhole nexthops - support IEEE 802.1ad (Q-in-Q) bridging - rtw88: major bluetooth co-existance improvements - iwlwifi: support new 6 GHz frequency band - ath11k: Fast Initial Link Setup (FILS) - mt7915: dual band concurrent (DBDC) support - net: ipa: add basic support for IPA v4.5 Refactor: - a few pieces of in_interrupt() cleanup work from Sebastian Andrzej Siewior - phy: add support for shared interrupts; get rid of multiple driver APIs and have the drivers write a full IRQ handler, slight growth of driver code should be compensated by the simpler API which also allows shared IRQs - add common code for handling netdev per-cpu counters - move TX packet re-allocation from Ethernet switch tag drivers to a central place - improve efficiency and rename nla_strlcpy - number of W=1 warning cleanups as we now catch those in a patchwork build bot Old code removal: - wan: delete the DLCI / SDLA drivers - wimax: move to staging - wifi: remove old WDS wifi bridging support" * tag 'net-next-5.11' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1922 commits) net: hns3: fix expression that is currently always true net: fix proc_fs init handling in af_packet and tls nfc: pn533: convert comma to semicolon af_vsock: Assign the vsock transport considering the vsock address flags af_vsock: Set VMADDR_FLAG_TO_HOST flag on the receive path vsock_addr: Check for supported flag values vm_sockets: Add VMADDR_FLAG_TO_HOST vsock flag vm_sockets: Add flags field in the vsock address data structure net: Disable NETIF_F_HW_TLS_TX when HW_CSUM is disabled tcp: Add logic to check for SYN w/ data in tcp_simple_retransmit net: mscc: ocelot: install MAC addresses in .ndo_set_rx_mode from process context nfc: s3fwrn5: Release the nfc firmware net: vxget: clean up sparse warnings mlxsw: spectrum_router: Use eXtended mezzanine to offload IPv4 router mlxsw: spectrum: Set KVH XLT cache mode for Spectrum2/3 mlxsw: spectrum_router_xm: Introduce basic XM cache flushing mlxsw: reg: Add Router LPM Cache Enable Register mlxsw: reg: Add Router LPM Cache ML Delete Register mlxsw: spectrum_router_xm: Implement L-value tracking for M-index mlxsw: reg: Add XM Router M Table Register ...
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/addrconf.c1
-rw-r--r--net/ipv6/af_inet6.c2
-rw-r--r--net/ipv6/calipso.c4
-rw-r--r--net/ipv6/exthdrs.c5
-rw-r--r--net/ipv6/ip6_gre.c6
-rw-r--r--net/ipv6/ip6_tunnel.c47
-rw-r--r--net/ipv6/ip6_vti.c3
-rw-r--r--net/ipv6/ipv6_sockglue.c2
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/netfilter/ip6t_REJECT.c2
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c144
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c3
-rw-r--r--net/ipv6/proc.c2
-rw-r--r--net/ipv6/route.c9
-rw-r--r--net/ipv6/rpl.c2
-rw-r--r--net/ipv6/rpl_iptunnel.c9
-rw-r--r--net/ipv6/seg6_local.c590
-rw-r--r--net/ipv6/sit.c2
-rw-r--r--net/ipv6/tcp_ipv6.c9
-rw-r--r--net/ipv6/udp.c8
-rw-r--r--net/ipv6/udp_offload.c8
21 files changed, 765 insertions, 95 deletions
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8b6eb384bac7..eff2cacd5209 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1997,6 +1997,7 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
* ipv6_dev_find - find the first device with a given source address.
* @net: the net namespace
* @addr: the source address
+ * @dev: used to find the L3 domain of interest
*
* The caller should be protected by RCU, or RTNL.
*/
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e648fbebb167..a7e3d170af51 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
/* BPF prog is run before any checks are done so that if the prog
* changes context in a wrong way it will be caught.
*/
- err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+ err = BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr);
if (err)
return err;
diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c
index 78f766019b7e..51184a70ac7e 100644
--- a/net/ipv6/calipso.c
+++ b/net/ipv6/calipso.c
@@ -423,7 +423,7 @@ static void calipso_doi_free_rcu(struct rcu_head *entry)
/**
* calipso_doi_remove - Remove an existing DOI from the CALIPSO protocol engine
* @doi: the DOI value
- * @audit_secid: the LSM secid to use in the audit message
+ * @audit_info: NetLabel audit information
*
* Description:
* Removes a DOI definition from the CALIPSO engine. The NetLabel routines will
@@ -1226,7 +1226,7 @@ static int calipso_req_setattr(struct request_sock *req,
/**
* calipso_req_delattr - Delete the CALIPSO option from a request socket
- * @reg: the request socket
+ * @req: the request socket
*
* Description:
* Removes the CALIPSO option from a request socket, if present.
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 374105e4394f..6126f8bf94b3 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -906,11 +906,6 @@ void ipv6_exthdrs_exit(void)
/*
* Note: we cannot rely on skb_dst(skb) before we assign it in ip6_route_input().
*/
-static inline struct inet6_dev *ipv6_skb_idev(struct sk_buff *skb)
-{
- return skb_dst(skb) ? ip6_dst_idev(skb_dst(skb)) : __in6_dev_get(skb->dev);
-}
-
static inline struct net *ipv6_skb_net(struct sk_buff *skb)
{
return skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index cf6e1380b527..c3bc89b6b1a1 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1401,7 +1401,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
.ndo_start_xmit = ip6gre_tunnel_xmit,
.ndo_do_ioctl = ip6gre_tunnel_ioctl,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1838,7 +1838,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
@@ -1906,7 +1906,7 @@ static const struct net_device_ops ip6erspan_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 648db3fe508f..a7950baa05e5 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -94,36 +94,6 @@ static inline int ip6_tnl_mpls_supported(void)
return IS_ENABLED(CONFIG_MPLS);
}
-static struct net_device_stats *ip6_get_stats(struct net_device *dev)
-{
- struct pcpu_sw_netstats tmp, sum = { 0 };
- int i;
-
- for_each_possible_cpu(i) {
- unsigned int start;
- const struct pcpu_sw_netstats *tstats =
- per_cpu_ptr(dev->tstats, i);
-
- do {
- start = u64_stats_fetch_begin_irq(&tstats->syncp);
- tmp.rx_packets = tstats->rx_packets;
- tmp.rx_bytes = tstats->rx_bytes;
- tmp.tx_packets = tstats->tx_packets;
- tmp.tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_irq(&tstats->syncp, start));
-
- sum.rx_packets += tmp.rx_packets;
- sum.rx_bytes += tmp.rx_bytes;
- sum.tx_packets += tmp.tx_packets;
- sum.tx_bytes += tmp.tx_bytes;
- }
- dev->stats.rx_packets = sum.rx_packets;
- dev->stats.rx_bytes = sum.rx_bytes;
- dev->stats.tx_packets = sum.tx_packets;
- dev->stats.tx_bytes = sum.tx_bytes;
- return &dev->stats;
-}
-
#define for_each_ip6_tunnel_rcu(start) \
for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
@@ -204,6 +174,7 @@ ip6_tnl_lookup(struct net *net, int link,
/**
* ip6_tnl_bucket - get head of list matching given tunnel parameters
+ * @ip6n: the private data for ip6_vti in the netns
* @p: parameters containing tunnel end-points
*
* Description:
@@ -230,6 +201,7 @@ ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p)
/**
* ip6_tnl_link - add tunnel to hash table
+ * @ip6n: the private data for ip6_vti in the netns
* @t: tunnel to be added
**/
@@ -246,6 +218,7 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t)
/**
* ip6_tnl_unlink - remove tunnel from hash table
+ * @ip6n: the private data for ip6_vti in the netns
* @t: tunnel to be removed
**/
@@ -417,6 +390,7 @@ ip6_tnl_dev_uninit(struct net_device *dev)
/**
* parse_tvl_tnl_enc_lim - handle encapsulation limit option
* @skb: received socket buffer
+ * @raw: the ICMPv6 error message data
*
* Return:
* 0 if none was found,
@@ -485,14 +459,9 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
}
EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim);
-/**
- * ip6_tnl_err - tunnel error handler
- *
- * Description:
- * ip6_tnl_err() should handle errors in the tunnel according
- * to the specifications in RFC 2473.
- **/
-
+/* ip6_tnl_err() should handle errors in the tunnel according to the
+ * specifications in RFC 2473.
+ */
static int
ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt,
u8 *type, u8 *code, int *msg, __u32 *info, int offset)
@@ -1835,7 +1804,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_start_xmit = ip6_tnl_start_xmit,
.ndo_do_ioctl = ip6_tnl_ioctl,
.ndo_change_mtu = ip6_tnl_change_mtu,
- .ndo_get_stats = ip6_get_stats,
+ .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 5f9c4fdc120d..0225fd694192 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -125,6 +125,7 @@ vti6_tnl_lookup(struct net *net, const struct in6_addr *remote,
/**
* vti6_tnl_bucket - get head of list matching given tunnel parameters
+ * @ip6n: the private data for ip6_vti in the netns
* @p: parameters containing tunnel end-points
*
* Description:
@@ -889,7 +890,7 @@ static const struct net_device_ops vti6_netdev_ops = {
.ndo_uninit = vti6_dev_uninit,
.ndo_start_xmit = vti6_tnl_xmit,
.ndo_do_ioctl = vti6_ioctl,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 43a894bf9a1b..a6804a7e34c1 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1148,7 +1148,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
if (sk->sk_type != SOCK_STREAM)
return -ENOPROTOOPT;
- msg.msg_control = optval;
+ msg.msg_control_user = optval;
msg.msg_controllen = len;
msg.msg_flags = flags;
msg.msg_control_is_user = true;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 8cd2782a31e4..6c8604390266 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -548,7 +548,7 @@ done:
}
int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
- struct sockaddr_storage *p)
+ struct sockaddr_storage __user *p)
{
int err, i, count, copycount;
const struct in6_addr *group;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 3ac5485049f0..a35019d2e480 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -61,7 +61,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
/* Do nothing */
break;
case IP6T_TCP_RESET:
- nf_send_reset6(net, skb, xt_hooknum(par));
+ nf_send_reset6(net, par->state->sk, skb, xt_hooknum(par));
break;
case IP6T_ICMP6_POLICY_FAIL:
nf_send_unreach6(net, skb, ICMPV6_POLICY_FAIL, xt_hooknum(par));
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 4aef6baaa55e..570d1d76c44d 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -12,6 +12,140 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
+static bool nf_reject_v6_csum_ok(struct sk_buff *skb, int hook)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int thoff;
+ __be16 fo;
+ u8 proto = ip6h->nexthdr;
+
+ if (skb_csum_unnecessary(skb))
+ return true;
+
+ if (ip6h->payload_len &&
+ pskb_trim_rcsum(skb, ntohs(ip6h->payload_len) + sizeof(*ip6h)))
+ return false;
+
+ ip6h = ipv6_hdr(skb);
+ thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo);
+ if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0)
+ return false;
+
+ if (!nf_reject_verify_csum(proto))
+ return true;
+
+ return nf_ip6_checksum(skb, hook, thoff, proto) == 0;
+}
+
+static int nf_reject_ip6hdr_validate(struct sk_buff *skb)
+{
+ struct ipv6hdr *hdr;
+ u32 pkt_len;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ return 0;
+
+ hdr = ipv6_hdr(skb);
+ if (hdr->version != 6)
+ return 0;
+
+ pkt_len = ntohs(hdr->payload_len);
+ if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
+ return 0;
+
+ return 1;
+}
+
+struct sk_buff *nf_reject_skb_v6_tcp_reset(struct net *net,
+ struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook)
+{
+ struct sk_buff *nskb;
+ const struct tcphdr *oth;
+ struct tcphdr _oth;
+ unsigned int otcplen;
+ struct ipv6hdr *nip6h;
+
+ if (!nf_reject_ip6hdr_validate(oldskb))
+ return NULL;
+
+ oth = nf_reject_ip6_tcphdr_get(oldskb, &_oth, &otcplen, hook);
+ if (!oth)
+ return NULL;
+
+ nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct tcphdr) +
+ LL_MAX_HEADER, GFP_ATOMIC);
+ if (!nskb)
+ return NULL;
+
+ nskb->dev = (struct net_device *)dev;
+
+ skb_reserve(nskb, LL_MAX_HEADER);
+ nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_TCP,
+ net->ipv6.devconf_all->hop_limit);
+ nf_reject_ip6_tcphdr_put(nskb, oldskb, oth, otcplen);
+ nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr));
+
+ return nskb;
+}
+EXPORT_SYMBOL_GPL(nf_reject_skb_v6_tcp_reset);
+
+struct sk_buff *nf_reject_skb_v6_unreach(struct net *net,
+ struct sk_buff *oldskb,
+ const struct net_device *dev,
+ int hook, u8 code)
+{
+ struct sk_buff *nskb;
+ struct ipv6hdr *nip6h;
+ struct icmp6hdr *icmp6h;
+ unsigned int len;
+
+ if (!nf_reject_ip6hdr_validate(oldskb))
+ return NULL;
+
+ /* Include "As much of invoking packet as possible without the ICMPv6
+ * packet exceeding the minimum IPv6 MTU" in the ICMP payload.
+ */
+ len = min_t(unsigned int, 1220, oldskb->len);
+
+ if (!pskb_may_pull(oldskb, len))
+ return NULL;
+
+ if (!nf_reject_v6_csum_ok(oldskb, hook))
+ return NULL;
+
+ nskb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr) +
+ LL_MAX_HEADER + len, GFP_ATOMIC);
+ if (!nskb)
+ return NULL;
+
+ nskb->dev = (struct net_device *)dev;
+
+ skb_reserve(nskb, LL_MAX_HEADER);
+ nip6h = nf_reject_ip6hdr_put(nskb, oldskb, IPPROTO_ICMPV6,
+ net->ipv6.devconf_all->hop_limit);
+
+ skb_reset_transport_header(nskb);
+ icmp6h = skb_put_zero(nskb, sizeof(struct icmp6hdr));
+ icmp6h->icmp6_type = ICMPV6_DEST_UNREACH;
+ icmp6h->icmp6_code = code;
+
+ skb_put_data(nskb, skb_network_header(oldskb), len);
+ nip6h->payload_len = htons(nskb->len - sizeof(struct ipv6hdr));
+
+ icmp6h->icmp6_cksum =
+ csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr,
+ nskb->len - sizeof(struct ipv6hdr),
+ IPPROTO_ICMPV6,
+ csum_partial(icmp6h,
+ nskb->len - sizeof(struct ipv6hdr),
+ 0));
+
+ return nskb;
+}
+EXPORT_SYMBOL_GPL(nf_reject_skb_v6_unreach);
+
const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
struct tcphdr *otcph,
unsigned int *otcplen, int hook)
@@ -141,7 +275,8 @@ static int nf_reject6_fill_skb_dst(struct sk_buff *skb_in)
return 0;
}
-void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
+void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
+ int hook)
{
struct net_device *br_indev __maybe_unused;
struct sk_buff *nskb;
@@ -170,7 +305,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
fl6.fl6_sport = otcph->dest;
fl6.fl6_dport = otcph->source;
- if (hook == NF_INET_PRE_ROUTING) {
+ if (hook == NF_INET_PRE_ROUTING || hook == NF_INET_INGRESS) {
nf_ip6_route(net, &dst, flowi6_to_flowi(&fl6), false);
if (!dst)
return;
@@ -233,7 +368,7 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
dev_queue_xmit(nskb);
} else
#endif
- ip6_local_out(net, nskb->sk, nskb);
+ ip6_local_out(net, sk, nskb);
}
EXPORT_SYMBOL_GPL(nf_send_reset6);
@@ -268,7 +403,8 @@ void nf_send_unreach6(struct net *net, struct sk_buff *skb_in,
if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL)
skb_in->dev = net->loopback_dev;
- if (hooknum == NF_INET_PRE_ROUTING && nf_reject6_fill_skb_dst(skb_in))
+ if ((hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_INGRESS) &&
+ nf_reject6_fill_skb_dst(skb_in) < 0)
return;
icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0);
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
index c1098a1968e1..7969d1f3018d 100644
--- a/net/ipv6/netfilter/nft_reject_ipv6.c
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -28,7 +28,8 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr,
nft_hook(pkt));
break;
case NFT_REJECT_TCP_RST:
- nf_send_reset6(nft_net(pkt), pkt->skb, nft_hook(pkt));
+ nf_send_reset6(nft_net(pkt), pkt->xt.state->sk, pkt->skb,
+ nft_hook(pkt));
break;
default:
break;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index bbff3e02e302..d6306aa46bb1 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -126,6 +126,7 @@ static const struct snmp_mib snmp6_udp6_list[] = {
SNMP_MIB_ITEM("Udp6SndbufErrors", UDP_MIB_SNDBUFERRORS),
SNMP_MIB_ITEM("Udp6InCsumErrors", UDP_MIB_CSUMERRORS),
SNMP_MIB_ITEM("Udp6IgnoredMulti", UDP_MIB_IGNOREDMULTI),
+ SNMP_MIB_ITEM("Udp6MemErrors", UDP_MIB_MEMERRORS),
SNMP_MIB_SENTINEL
};
@@ -137,6 +138,7 @@ static const struct snmp_mib snmp6_udplite6_list[] = {
SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS),
SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS),
SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS),
+ SNMP_MIB_ITEM("UdpLite6MemErrors", UDP_MIB_MEMERRORS),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7e0ce7af8234..188e114b29b4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5558,6 +5558,10 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (dst->dev && nla_put_u32(skb, RTA_OIF, dst->dev->ifindex))
goto nla_put_failure;
+
+ if (dst->lwtstate &&
+ lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
+ goto nla_put_failure;
} else if (rt->fib6_nsiblings) {
struct fib6_info *sibling, *next_sibling;
struct nlattr *mp;
@@ -6039,11 +6043,6 @@ void fib6_rt_update(struct net *net, struct fib6_info *rt,
struct sk_buff *skb;
int err = -ENOBUFS;
- /* call_fib6_entry_notifiers will be removed when in-kernel notifier
- * is implemented and supported for nexthop objects
- */
- call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, rt, NULL);
-
skb = nlmsg_new(rt6_nlmsg_size(rt), gfp_any());
if (!skb)
goto errout;
diff --git a/net/ipv6/rpl.c b/net/ipv6/rpl.c
index 307f336b5353..488aec9e1a74 100644
--- a/net/ipv6/rpl.c
+++ b/net/ipv6/rpl.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
* Authors:
* (C) 2020 Alexander Aring <alex.aring@gmail.com>
*/
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index 5fdf3ebb953f..ff691d9f4a04 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
* Authors:
* (C) 2020 Alexander Aring <alex.aring@gmail.com>
*/
@@ -190,18 +190,13 @@ static int rpl_do_srh(struct sk_buff *skb, const struct rpl_lwt *rlwt)
{
struct dst_entry *dst = skb_dst(skb);
struct rpl_iptunnel_encap *tinfo;
- int err = 0;
if (skb->protocol != htons(ETH_P_IPV6))
return -EINVAL;
tinfo = rpl_encap_lwtunnel(dst->lwtstate);
- err = rpl_do_srh_inline(skb, rlwt, tinfo->srh);
- if (err)
- return err;
-
- return 0;
+ return rpl_do_srh_inline(skb, rlwt, tinfo->srh);
}
static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index eba23279912d..b07f7c1c82a4 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -33,11 +33,35 @@
struct seg6_local_lwt;
+/* callbacks used for customizing the creation and destruction of a behavior */
+struct seg6_local_lwtunnel_ops {
+ int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack);
+ void (*destroy_state)(struct seg6_local_lwt *slwt);
+};
+
struct seg6_action_desc {
int action;
unsigned long attrs;
+
+ /* The optattrs field is used for specifying all the optional
+ * attributes supported by a specific behavior.
+ * It means that if one of these attributes is not provided in the
+ * netlink message during the behavior creation, no errors will be
+ * returned to the userspace.
+ *
+ * Each attribute can be only of two types (mutually exclusive):
+ * 1) required or 2) optional.
+ * Every user MUST obey to this rule! If you set an attribute as
+ * required the same attribute CANNOT be set as optional and vice
+ * versa.
+ */
+ unsigned long optattrs;
+
int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
int static_headroom;
+
+ struct seg6_local_lwtunnel_ops slwt_ops;
};
struct bpf_lwt_prog {
@@ -45,6 +69,28 @@ struct bpf_lwt_prog {
char *name;
};
+enum seg6_end_dt_mode {
+ DT_INVALID_MODE = -EINVAL,
+ DT_LEGACY_MODE = 0,
+ DT_VRF_MODE = 1,
+};
+
+struct seg6_end_dt_info {
+ enum seg6_end_dt_mode mode;
+
+ struct net *net;
+ /* VRF device associated to the routing table used by the SRv6
+ * End.DT4/DT6 behavior for routing IPv4/IPv6 packets.
+ */
+ int vrf_ifindex;
+ int vrf_table;
+
+ /* tunneled packet proto and family (IPv4 or IPv6) */
+ __be16 proto;
+ u16 family;
+ int hdrlen;
+};
+
struct seg6_local_lwt {
int action;
struct ipv6_sr_hdr *srh;
@@ -54,9 +100,16 @@ struct seg6_local_lwt {
int iif;
int oif;
struct bpf_lwt_prog bpf;
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ struct seg6_end_dt_info dt_info;
+#endif
int headroom;
struct seg6_action_desc *desc;
+ /* unlike the required attrs, we have to track the optional attributes
+ * that have been effectively parsed.
+ */
+ unsigned long parsed_optattrs;
};
static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt)
@@ -401,6 +454,248 @@ drop:
return -EINVAL;
}
+#ifdef CONFIG_NET_L3_MASTER_DEV
+static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg)
+{
+ const struct nl_info *nli = &fib6_cfg->fc_nlinfo;
+
+ return nli->nl_net;
+}
+
+static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg,
+ u16 family, struct netlink_ext_ack *extack)
+{
+ struct seg6_end_dt_info *info = &slwt->dt_info;
+ int vrf_ifindex;
+ struct net *net;
+
+ net = fib6_config_get_net(cfg);
+
+ /* note that vrf_table was already set by parse_nla_vrftable() */
+ vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net,
+ info->vrf_table);
+ if (vrf_ifindex < 0) {
+ if (vrf_ifindex == -EPERM) {
+ NL_SET_ERR_MSG(extack,
+ "Strict mode for VRF is disabled");
+ } else if (vrf_ifindex == -ENODEV) {
+ NL_SET_ERR_MSG(extack,
+ "Table has no associated VRF device");
+ } else {
+ pr_debug("seg6local: SRv6 End.DT* creation error=%d\n",
+ vrf_ifindex);
+ }
+
+ return vrf_ifindex;
+ }
+
+ info->net = net;
+ info->vrf_ifindex = vrf_ifindex;
+
+ switch (family) {
+ case AF_INET:
+ info->proto = htons(ETH_P_IP);
+ info->hdrlen = sizeof(struct iphdr);
+ break;
+ case AF_INET6:
+ info->proto = htons(ETH_P_IPV6);
+ info->hdrlen = sizeof(struct ipv6hdr);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ info->family = family;
+ info->mode = DT_VRF_MODE;
+
+ return 0;
+}
+
+/* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and
+ * routes the IPv4/IPv6 packet by looking at the configured routing table.
+ *
+ * In the SRv6 End.DT4/DT6 use case, we can receive traffic (IPv6+Segment
+ * Routing Header packets) from several interfaces and the outer IPv6
+ * destination address (DA) is used for retrieving the specific instance of the
+ * End.DT4/DT6 behavior that should process the packets.
+ *
+ * However, the inner IPv4/IPv6 packet is not really bound to any receiving
+ * interface and thus the End.DT4/DT6 sets the VRF (associated with the
+ * corresponding routing table) as the *receiving* interface.
+ * In other words, the End.DT4/DT6 processes a packet as if it has been received
+ * directly by the VRF (and not by one of its slave devices, if any).
+ * In this way, the VRF interface is used for routing the IPv4/IPv6 packet in
+ * according to the routing table configured by the End.DT4/DT6 instance.
+ *
+ * This design allows you to get some interesting features like:
+ * 1) the statistics on rx packets;
+ * 2) the possibility to install a packet sniffer on the receiving interface
+ * (the VRF one) for looking at the incoming packets;
+ * 3) the possibility to leverage the netfilter prerouting hook for the inner
+ * IPv4 packet.
+ *
+ * This function returns:
+ * - the sk_buff* when the VRF rcv handler has processed the packet correctly;
+ * - NULL when the skb is consumed by the VRF rcv handler;
+ * - a pointer which encodes a negative error number in case of error.
+ * Note that in this case, the function takes care of freeing the skb.
+ */
+static struct sk_buff *end_dt_vrf_rcv(struct sk_buff *skb, u16 family,
+ struct net_device *dev)
+{
+ /* based on l3mdev_ip_rcv; we are only interested in the master */
+ if (unlikely(!netif_is_l3_master(dev) && !netif_has_l3_rx_handler(dev)))
+ goto drop;
+
+ if (unlikely(!dev->l3mdev_ops->l3mdev_l3_rcv))
+ goto drop;
+
+ /* the decap packet IPv4/IPv6 does not come with any mac header info.
+ * We must unset the mac header to allow the VRF device to rebuild it,
+ * just in case there is a sniffer attached on the device.
+ */
+ skb_unset_mac_header(skb);
+
+ skb = dev->l3mdev_ops->l3mdev_l3_rcv(dev, skb, family);
+ if (!skb)
+ /* the skb buffer was consumed by the handler */
+ return NULL;
+
+ /* when a packet is received by a VRF or by one of its slaves, the
+ * master device reference is set into the skb.
+ */
+ if (unlikely(skb->dev != dev || skb->skb_iif != dev->ifindex))
+ goto drop;
+
+ return skb;
+
+drop:
+ kfree_skb(skb);
+ return ERR_PTR(-EINVAL);
+}
+
+static struct net_device *end_dt_get_vrf_rcu(struct sk_buff *skb,
+ struct seg6_end_dt_info *info)
+{
+ int vrf_ifindex = info->vrf_ifindex;
+ struct net *net = info->net;
+
+ if (unlikely(vrf_ifindex < 0))
+ goto error;
+
+ if (unlikely(!net_eq(dev_net(skb->dev), net)))
+ goto error;
+
+ return dev_get_by_index_rcu(net, vrf_ifindex);
+
+error:
+ return NULL;
+}
+
+static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct seg6_end_dt_info *info = &slwt->dt_info;
+ struct net_device *vrf;
+
+ vrf = end_dt_get_vrf_rcu(skb, info);
+ if (unlikely(!vrf))
+ goto drop;
+
+ skb->protocol = info->proto;
+
+ skb_dst_drop(skb);
+
+ skb_set_transport_header(skb, info->hdrlen);
+
+ return end_dt_vrf_rcv(skb, info->family, vrf);
+
+drop:
+ kfree_skb(skb);
+ return ERR_PTR(-EINVAL);
+}
+
+static int input_action_end_dt4(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct iphdr *iph;
+ int err;
+
+ if (!decap_and_validate(skb, IPPROTO_IPIP))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+ skb = end_dt_vrf_core(skb, slwt);
+ if (!skb)
+ /* packet has been processed and consumed by the VRF */
+ return 0;
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ iph = ip_hdr(skb);
+
+ err = ip_route_input(skb, iph->daddr, iph->saddr, 0, skb->dev);
+ if (unlikely(err))
+ goto drop;
+
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int seg6_end_dt4_build(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET, extack);
+}
+
+static enum
+seg6_end_dt_mode seg6_end_dt6_parse_mode(struct seg6_local_lwt *slwt)
+{
+ unsigned long parsed_optattrs = slwt->parsed_optattrs;
+ bool legacy, vrfmode;
+
+ legacy = !!(parsed_optattrs & (1 << SEG6_LOCAL_TABLE));
+ vrfmode = !!(parsed_optattrs & (1 << SEG6_LOCAL_VRFTABLE));
+
+ if (!(legacy ^ vrfmode))
+ /* both are absent or present: invalid DT6 mode */
+ return DT_INVALID_MODE;
+
+ return legacy ? DT_LEGACY_MODE : DT_VRF_MODE;
+}
+
+static enum seg6_end_dt_mode seg6_end_dt6_get_mode(struct seg6_local_lwt *slwt)
+{
+ struct seg6_end_dt_info *info = &slwt->dt_info;
+
+ return info->mode;
+}
+
+static int seg6_end_dt6_build(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ enum seg6_end_dt_mode mode = seg6_end_dt6_parse_mode(slwt);
+ struct seg6_end_dt_info *info = &slwt->dt_info;
+
+ switch (mode) {
+ case DT_LEGACY_MODE:
+ info->mode = DT_LEGACY_MODE;
+ return 0;
+ case DT_VRF_MODE:
+ return __seg6_end_dt_vrf_build(slwt, cfg, AF_INET6, extack);
+ default:
+ NL_SET_ERR_MSG(extack, "table or vrftable must be specified");
+ return -EINVAL;
+ }
+}
+#endif
+
static int input_action_end_dt6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
@@ -410,6 +705,28 @@ static int input_action_end_dt6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ if (seg6_end_dt6_get_mode(slwt) == DT_LEGACY_MODE)
+ goto legacy_mode;
+
+ /* DT6_VRF_MODE */
+ skb = end_dt_vrf_core(skb, slwt);
+ if (!skb)
+ /* packet has been processed and consumed by the VRF */
+ return 0;
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ /* note: this time we do not need to specify the table because the VRF
+ * takes care of selecting the correct table.
+ */
+ seg6_lookup_any_nexthop(skb, NULL, 0, true);
+
+ return dst_input(skb);
+
+legacy_mode:
+#endif
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
@@ -590,8 +907,27 @@ static struct seg6_action_desc seg6_action_table[] = {
.input = input_action_end_dx4,
},
{
+ .action = SEG6_LOCAL_ACTION_END_DT4,
+ .attrs = (1 << SEG6_LOCAL_VRFTABLE),
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ .input = input_action_end_dt4,
+ .slwt_ops = {
+ .build_state = seg6_end_dt4_build,
+ },
+#endif
+ },
+ {
.action = SEG6_LOCAL_ACTION_END_DT6,
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ .attrs = 0,
+ .optattrs = (1 << SEG6_LOCAL_TABLE) |
+ (1 << SEG6_LOCAL_VRFTABLE),
+ .slwt_ops = {
+ .build_state = seg6_end_dt6_build,
+ },
+#else
.attrs = (1 << SEG6_LOCAL_TABLE),
+#endif
.input = input_action_end_dt6,
},
{
@@ -649,6 +985,7 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
[SEG6_LOCAL_TABLE] = { .type = NLA_U32 },
+ [SEG6_LOCAL_VRFTABLE] = { .type = NLA_U32 },
[SEG6_LOCAL_NH4] = { .type = NLA_BINARY,
.len = sizeof(struct in_addr) },
[SEG6_LOCAL_NH6] = { .type = NLA_BINARY,
@@ -710,6 +1047,11 @@ static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return memcmp(a->srh, b->srh, len);
}
+static void destroy_attr_srh(struct seg6_local_lwt *slwt)
+{
+ kfree(slwt->srh);
+}
+
static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]);
@@ -733,6 +1075,53 @@ static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return 0;
}
+static struct
+seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt)
+{
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ return &slwt->dt_info;
+#else
+ return ERR_PTR(-EOPNOTSUPP);
+#endif
+}
+
+static int parse_nla_vrftable(struct nlattr **attrs,
+ struct seg6_local_lwt *slwt)
+{
+ struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
+
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+
+ info->vrf_table = nla_get_u32(attrs[SEG6_LOCAL_VRFTABLE]);
+
+ return 0;
+}
+
+static int put_nla_vrftable(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt);
+
+ if (IS_ERR(info))
+ return PTR_ERR(info);
+
+ if (nla_put_u32(skb, SEG6_LOCAL_VRFTABLE, info->vrf_table))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ struct seg6_end_dt_info *info_a = seg6_possible_end_dt_info(a);
+ struct seg6_end_dt_info *info_b = seg6_possible_end_dt_info(b);
+
+ if (info_a->vrf_table != info_b->vrf_table)
+ return 1;
+
+ return 0;
+}
+
static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]),
@@ -901,16 +1290,30 @@ static int cmp_nla_bpf(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
return strcmp(a->bpf.name, b->bpf.name);
}
+static void destroy_attr_bpf(struct seg6_local_lwt *slwt)
+{
+ kfree(slwt->bpf.name);
+ if (slwt->bpf.prog)
+ bpf_prog_put(slwt->bpf.prog);
+}
+
struct seg6_action_param {
int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b);
+
+ /* optional destroy() callback useful for releasing resources which
+ * have been previously acquired in the corresponding parse()
+ * function.
+ */
+ void (*destroy)(struct seg6_local_lwt *slwt);
};
static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_SRH] = { .parse = parse_nla_srh,
.put = put_nla_srh,
- .cmp = cmp_nla_srh },
+ .cmp = cmp_nla_srh,
+ .destroy = destroy_attr_srh },
[SEG6_LOCAL_TABLE] = { .parse = parse_nla_table,
.put = put_nla_table,
@@ -934,14 +1337,130 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_BPF] = { .parse = parse_nla_bpf,
.put = put_nla_bpf,
- .cmp = cmp_nla_bpf },
+ .cmp = cmp_nla_bpf,
+ .destroy = destroy_attr_bpf },
+
+ [SEG6_LOCAL_VRFTABLE] = { .parse = parse_nla_vrftable,
+ .put = put_nla_vrftable,
+ .cmp = cmp_nla_vrftable },
};
+/* call the destroy() callback (if available) for each set attribute in
+ * @parsed_attrs, starting from the first attribute up to the @max_parsed
+ * (excluded) attribute.
+ */
+static void __destroy_attrs(unsigned long parsed_attrs, int max_parsed,
+ struct seg6_local_lwt *slwt)
+{
+ struct seg6_action_param *param;
+ int i;
+
+ /* Every required seg6local attribute is identified by an ID which is
+ * encoded as a flag (i.e: 1 << ID) in the 'attrs' bitmask;
+ *
+ * We scan the 'parsed_attrs' bitmask, starting from the first attribute
+ * up to the @max_parsed (excluded) attribute.
+ * For each set attribute, we retrieve the corresponding destroy()
+ * callback. If the callback is not available, then we skip to the next
+ * attribute; otherwise, we call the destroy() callback.
+ */
+ for (i = 0; i < max_parsed; ++i) {
+ if (!(parsed_attrs & (1 << i)))
+ continue;
+
+ param = &seg6_action_params[i];
+
+ if (param->destroy)
+ param->destroy(slwt);
+ }
+}
+
+/* release all the resources that may have been acquired during parsing
+ * operations.
+ */
+static void destroy_attrs(struct seg6_local_lwt *slwt)
+{
+ unsigned long attrs = slwt->desc->attrs | slwt->parsed_optattrs;
+
+ __destroy_attrs(attrs, SEG6_LOCAL_MAX + 1, slwt);
+}
+
+static int parse_nla_optional_attrs(struct nlattr **attrs,
+ struct seg6_local_lwt *slwt)
+{
+ struct seg6_action_desc *desc = slwt->desc;
+ unsigned long parsed_optattrs = 0;
+ struct seg6_action_param *param;
+ int err, i;
+
+ for (i = 0; i < SEG6_LOCAL_MAX + 1; ++i) {
+ if (!(desc->optattrs & (1 << i)) || !attrs[i])
+ continue;
+
+ /* once here, the i-th attribute is provided by the
+ * userspace AND it is identified optional as well.
+ */
+ param = &seg6_action_params[i];
+
+ err = param->parse(attrs, slwt);
+ if (err < 0)
+ goto parse_optattrs_err;
+
+ /* current attribute has been correctly parsed */
+ parsed_optattrs |= (1 << i);
+ }
+
+ /* store in the tunnel state all the optional attributed successfully
+ * parsed.
+ */
+ slwt->parsed_optattrs = parsed_optattrs;
+
+ return 0;
+
+parse_optattrs_err:
+ __destroy_attrs(parsed_optattrs, i, slwt);
+
+ return err;
+}
+
+/* call the custom constructor of the behavior during its initialization phase
+ * and after that all its attributes have been parsed successfully.
+ */
+static int
+seg6_local_lwtunnel_build_state(struct seg6_local_lwt *slwt, const void *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct seg6_action_desc *desc = slwt->desc;
+ struct seg6_local_lwtunnel_ops *ops;
+
+ ops = &desc->slwt_ops;
+ if (!ops->build_state)
+ return 0;
+
+ return ops->build_state(slwt, cfg, extack);
+}
+
+/* call the custom destructor of the behavior which is invoked before the
+ * tunnel is going to be destroyed.
+ */
+static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt)
+{
+ struct seg6_action_desc *desc = slwt->desc;
+ struct seg6_local_lwtunnel_ops *ops;
+
+ ops = &desc->slwt_ops;
+ if (!ops->destroy_state)
+ return;
+
+ ops->destroy_state(slwt);
+}
+
static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
{
struct seg6_action_param *param;
struct seg6_action_desc *desc;
+ unsigned long invalid_attrs;
int i, err;
desc = __get_action_desc(slwt->action);
@@ -954,6 +1473,26 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
slwt->desc = desc;
slwt->headroom += desc->static_headroom;
+ /* Forcing the desc->optattrs *set* and the desc->attrs *set* to be
+ * disjoined, this allow us to release acquired resources by optional
+ * attributes and by required attributes independently from each other
+ * without any interfarence.
+ * In other terms, we are sure that we do not release some the acquired
+ * resources twice.
+ *
+ * Note that if an attribute is configured both as required and as
+ * optional, it means that the user has messed something up in the
+ * seg6_action_table. Therefore, this check is required for SRv6
+ * behaviors to work properly.
+ */
+ invalid_attrs = desc->attrs & desc->optattrs;
+ if (invalid_attrs) {
+ WARN_ONCE(1,
+ "An attribute cannot be both required AND optional");
+ return -EINVAL;
+ }
+
+ /* parse the required attributes */
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
if (desc->attrs & (1 << i)) {
if (!attrs[i])
@@ -963,11 +1502,24 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt)
err = param->parse(attrs, slwt);
if (err < 0)
- return err;
+ goto parse_attrs_err;
}
}
+ /* parse the optional attributes, if any */
+ err = parse_nla_optional_attrs(attrs, slwt);
+ if (err < 0)
+ goto parse_attrs_err;
+
return 0;
+
+parse_attrs_err:
+ /* release any resource that may have been acquired during the i-1
+ * parse() operations.
+ */
+ __destroy_attrs(desc->attrs, i, slwt);
+
+ return err;
}
static int seg6_local_build_state(struct net *net, struct nlattr *nla,
@@ -1003,6 +1555,10 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla,
if (err < 0)
goto out_free;
+ err = seg6_local_lwtunnel_build_state(slwt, cfg, extack);
+ if (err < 0)
+ goto out_destroy_attrs;
+
newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL;
newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT;
newts->headroom = slwt->headroom;
@@ -1011,8 +1567,9 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla,
return 0;
+out_destroy_attrs:
+ destroy_attrs(slwt);
out_free:
- kfree(slwt->srh);
kfree(newts);
return err;
}
@@ -1021,12 +1578,9 @@ static void seg6_local_destroy_state(struct lwtunnel_state *lwt)
{
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
- kfree(slwt->srh);
+ seg6_local_lwtunnel_destroy_state(slwt);
- if (slwt->desc->attrs & (1 << SEG6_LOCAL_BPF)) {
- kfree(slwt->bpf.name);
- bpf_prog_put(slwt->bpf.prog);
- }
+ destroy_attrs(slwt);
return;
}
@@ -1036,13 +1590,16 @@ static int seg6_local_fill_encap(struct sk_buff *skb,
{
struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt);
struct seg6_action_param *param;
+ unsigned long attrs;
int i, err;
if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action))
return -EMSGSIZE;
+ attrs = slwt->desc->attrs | slwt->parsed_optattrs;
+
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
- if (slwt->desc->attrs & (1 << i)) {
+ if (attrs & (1 << i)) {
param = &seg6_action_params[i];
err = param->put(skb, slwt);
if (err < 0)
@@ -1061,7 +1618,7 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
nlsize = nla_total_size(4); /* action */
- attrs = slwt->desc->attrs;
+ attrs = slwt->desc->attrs | slwt->parsed_optattrs;
if (attrs & (1 << SEG6_LOCAL_SRH))
nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3);
@@ -1086,6 +1643,9 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt)
nla_total_size(MAX_PROG_NAME) +
nla_total_size(4);
+ if (attrs & (1 << SEG6_LOCAL_VRFTABLE))
+ nlsize += nla_total_size(4);
+
return nlsize;
}
@@ -1094,6 +1654,7 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a,
{
struct seg6_local_lwt *slwt_a, *slwt_b;
struct seg6_action_param *param;
+ unsigned long attrs_a, attrs_b;
int i;
slwt_a = seg6_local_lwtunnel(a);
@@ -1102,11 +1663,14 @@ static int seg6_local_cmp_encap(struct lwtunnel_state *a,
if (slwt_a->action != slwt_b->action)
return 1;
- if (slwt_a->desc->attrs != slwt_b->desc->attrs)
+ attrs_a = slwt_a->desc->attrs | slwt_a->parsed_optattrs;
+ attrs_b = slwt_b->desc->attrs | slwt_b->parsed_optattrs;
+
+ if (attrs_a != attrs_b)
return 1;
for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) {
- if (slwt_a->desc->attrs & (1 << i)) {
+ if (attrs_a & (1 << i)) {
param = &seg6_action_params[i];
if (param->cmp(slwt_a, slwt_b))
return 1;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5e7983cb6154..2da0ee703779 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1395,7 +1395,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
.ndo_uninit = ipip6_tunnel_uninit,
.ndo_start_xmit = sit_tunnel_xmit,
.ndo_do_ioctl = ipip6_tunnel_ioctl,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
.ndo_tunnel_ctl = ipip6_tunnel_ctl,
};
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 991dc36f95ff..e254569a3005 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -829,9 +829,15 @@ static void tcp_v6_init_req(struct request_sock *req,
}
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
+ struct sk_buff *skb,
struct flowi *fl,
- const struct request_sock *req)
+ struct request_sock *req)
{
+ tcp_v6_init_req(req, sk, skb);
+
+ if (security_inet_conn_request(sk, skb, req))
+ return NULL;
+
return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
}
@@ -852,7 +858,6 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.req_md5_lookup = tcp_v6_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
#endif
- .init_req = tcp_v6_init_req,
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v6_init_sequence,
#endif
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 29d9691359b9..9008f5796ad4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -276,7 +276,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
inet6_sdif(skb), udptable, skb);
}
-struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
+struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
@@ -285,7 +285,6 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb,
&iph->daddr, dport, inet6_iif(skb),
inet6_sdif(skb), &udp_table, NULL);
}
-EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
/* Must be called under rcu_read_lock().
* Does increment socket refcount.
@@ -560,7 +559,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
inet6_iif(skb), inet6_sdif(skb), udptable, NULL);
- if (!sk) {
+ if (!sk || udp_sk(sk)->encap_type) {
/* No socket for error: try tunnels before discarding */
sk = ERR_PTR(-ENOENT);
if (static_branch_unlikely(&udpv6_encap_needed_key)) {
@@ -637,6 +636,9 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
if (rc == -ENOMEM)
UDP6_INC_STATS(sock_net(sk),
UDP_MIB_RCVBUFERRORS, is_udplite);
+ else
+ UDP6_INC_STATS(sock_net(sk),
+ UDP_MIB_MEMERRORS, is_udplite);
UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index f9e888d1b9af..c7bd7b1a04c1 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -28,10 +28,6 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
int tnl_hlen;
int err;
- mss = skb_shinfo(skb)->gso_size;
- if (unlikely(skb->len <= mss))
- goto out;
-
if (skb->encapsulation && skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
segs = skb_udp_tunnel_segment(skb, features, true);
@@ -48,6 +44,10 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
return __udp_gso_segment(skb, features);
+ mss = skb_shinfo(skb)->gso_size;
+ if (unlikely(skb->len <= mss))
+ goto out;
+
/* Do software UFO. Complete and fill in the UDP checksum as HW cannot
* do checksum of UDP packets sent as multiple IP fragments.
*/