aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv6/tcp_ipv6.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/tcp_ipv6.c')
-rw-r--r--net/ipv6/tcp_ipv6.c440
1 files changed, 265 insertions, 175 deletions
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index eaf09e6b7844..2a3f9296df1e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -72,7 +72,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
struct request_sock *req);
-static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
+INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
static const struct inet_connection_sock_af_ops ipv6_mapped;
const struct inet_connection_sock_af_ops ipv6_specific;
@@ -107,9 +107,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
if (dst && dst_hold_safe(dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;
- sk->sk_rx_dst = dst;
- inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
- tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+ rcu_assign_pointer(sk->sk_rx_dst, dst);
+ sk->sk_rx_dst_ifindex = skb->skb_iif;
+ sk->sk_rx_dst_cookie = rt6_get_cookie(rt);
}
}
@@ -146,17 +146,18 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
- struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct in6_addr *saddr = NULL, *final_p, final;
+ struct inet_timewait_death_row *tcp_death_row;
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- struct in6_addr *saddr = NULL, *final_p, final;
+ struct net *net = sock_net(sk);
struct ipv6_txoptions *opt;
- struct flowi6 fl6;
struct dst_entry *dst;
+ struct flowi6 fl6;
int addr_type;
int err;
- struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
@@ -230,14 +231,15 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
u32 exthdrlen = icsk->icsk_ext_hdr_len;
struct sockaddr_in sin;
- if (__ipv6_only_sock(sk))
+ if (ipv6_only_sock(sk))
return -ENETUNREACH;
sin.sin_family = AF_INET;
sin.sin_port = usin->sin6_port;
sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
- icsk->icsk_af_ops = &ipv6_mapped;
+ /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped);
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, true);
sk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -249,7 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
if (err) {
icsk->icsk_ext_hdr_len = exthdrlen;
- icsk->icsk_af_ops = &ipv6_specific;
+ /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */
+ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific);
if (sk_is_mptcp(sk))
mptcpv6_handle_mapped(sk, false);
sk->sk_backlog_rcv = tcp_v6_do_rcv;
@@ -278,17 +281,35 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
final_p = fl6_update_dst(&fl6, opt, &final);
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
+ security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6));
- dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
+ dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto failure;
}
+ tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
+
if (!saddr) {
+ struct inet_bind_hashbucket *prev_addr_hashbucket = NULL;
+ struct in6_addr prev_v6_rcv_saddr;
+
+ if (icsk->icsk_bind2_hash) {
+ prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo,
+ sk, net, inet->inet_num);
+ prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+ }
saddr = &fl6.saddr;
sk->sk_v6_rcv_saddr = *saddr;
+
+ if (prev_addr_hashbucket) {
+ err = inet_bhash2_update_saddr(prev_addr_hashbucket, sk);
+ if (err) {
+ sk->sk_v6_rcv_saddr = prev_v6_rcv_saddr;
+ goto failure;
+ }
+ }
}
/* set the source address */
@@ -321,8 +342,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
sk->sk_v6_daddr.s6_addr32,
inet->inet_sport,
inet->inet_dport));
- tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
- np->saddr.s6_addr32,
+ tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32,
sk->sk_v6_daddr.s6_addr32);
}
@@ -348,11 +368,20 @@ failure:
static void tcp_v6_mtu_reduced(struct sock *sk)
{
struct dst_entry *dst;
+ u32 mtu;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
return;
- dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
+ mtu = READ_ONCE(tcp_sk(sk)->mtu_info);
+
+ /* Drop requests trying to increase our current mss.
+ * Check done in __ip6_rt_update_pmtu() is too late.
+ */
+ if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache)
+ return;
+
+ dst = inet6_csk_update_pmtu(sk, mtu);
if (!dst)
return;
@@ -376,7 +405,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
bool fatal;
int err;
- sk = __inet6_lookup_established(net, &tcp_hashinfo,
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
&hdr->daddr, th->dest,
&hdr->saddr, ntohs(th->source),
skb->dev->ifindex, inet6_sdif(skb));
@@ -405,9 +434,12 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (sk->sk_state == TCP_CLOSE)
goto out;
- if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
- __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
- goto out;
+ if (static_branch_unlikely(&ip6_min_hopcount)) {
+ /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
+ if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ goto out;
+ }
}
tp = tcp_sk(sk);
@@ -433,6 +465,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
if (type == ICMPV6_PKT_TOOBIG) {
+ u32 mtu = ntohl(info);
+
/* We are not interested in TCP_LISTEN and open_requests
* (SYN-ACKs send out by Linux are always <576bytes so
* they should go through unfragmented).
@@ -443,7 +477,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (!ip6_sk_accept_pmtu(sk))
goto out;
- tp->mtu_info = ntohl(info);
+ if (mtu < IPV6_MIN_MTU)
+ goto out;
+
+ WRITE_ONCE(tp->mtu_info, mtu);
+
if (!sock_owned_by_user(sk))
tcp_v6_mtu_reduced(sk);
else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
@@ -458,24 +496,35 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case TCP_SYN_SENT:
case TCP_SYN_RECV:
/* Only in fast or simultaneous open. If a fast open socket is
- * is already accepted it is treated as a connected one below.
+ * already accepted it is treated as a connected one below.
*/
if (fastopen && !fastopen->sk)
break;
+ ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
+
if (!sock_owned_by_user(sk)) {
sk->sk_err = err;
- sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
+ sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
tcp_done(sk);
} else
sk->sk_err_soft = err;
goto out;
+ case TCP_LISTEN:
+ break;
+ default:
+ /* check if this ICMP message allows revert of backoff.
+ * (see RFC 6069)
+ */
+ if (!fastopen && type == ICMPV6_DEST_UNREACH &&
+ code == ICMPV6_NOROUTE)
+ tcp_ld_RTO_revert(sk, seq);
}
if (!sock_owned_by_user(sk) && np->recverr) {
sk->sk_err = err;
- sk->sk_error_report(sk);
+ sk_error_report(sk);
} else
sk->sk_err_soft = err;
@@ -490,7 +539,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
- enum tcp_synack_type synack_type)
+ enum tcp_synack_type synack_type,
+ struct sk_buff *syn_skb)
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
@@ -498,13 +548,14 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi6 *fl6 = &fl->u.ip6;
struct sk_buff *skb;
int err = -ENOMEM;
+ u8 tclass;
/* First, grab a route. */
if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
IPPROTO_TCP)) == NULL)
goto done;
- skb = tcp_make_synack(sk, dst, req, foc, synack_type);
+ skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
if (skb) {
__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
@@ -514,12 +565,21 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
+ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
+ (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
+ (np->tclass & INET_ECN_MASK) :
+ np->tclass;
+
+ if (!INET_ECN_is_capable(tclass) &&
+ tcp_bpf_ca_needs_ecn((struct sock *)req))
+ tclass |= INET_ECN_ECT_0;
+
rcu_read_lock();
opt = ireq->ipv6_opt;
if (!opt)
opt = rcu_dereference(np->opt);
- err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
- sk->sk_priority);
+ err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
+ tclass, sk->sk_priority);
rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -532,7 +592,7 @@ done:
static void tcp_v6_reqsk_destructor(struct request_sock *req)
{
kfree(inet_rsk(req)->ipv6_opt);
- kfree_skb(inet_rsk(req)->pktopts);
+ consume_skb(inet_rsk(req)->pktopts);
}
#ifdef CONFIG_TCP_MD5SIG
@@ -556,22 +616,25 @@ static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
}
static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
- char __user *optval, int optlen)
+ sockptr_t optval, int optlen)
{
struct tcp_md5sig cmd;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
int l3index = 0;
u8 prefixlen;
+ u8 flags;
if (optlen < sizeof(cmd))
return -EINVAL;
- if (copy_from_user(&cmd, optval, sizeof(cmd)))
+ if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
return -EFAULT;
if (sin6->sin6_family != AF_INET6)
return -EINVAL;
+ flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX;
+
if (optname == TCP_MD5SIG_EXT &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
prefixlen = cmd.tcpm_prefixlen;
@@ -582,7 +645,7 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
}
- if (optname == TCP_MD5SIG_EXT &&
+ if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex &&
cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
struct net_device *dev;
@@ -603,9 +666,9 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
AF_INET, prefixlen,
- l3index);
+ l3index, flags);
return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, l3index);
+ AF_INET6, prefixlen, l3index, flags);
}
if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
@@ -613,12 +676,12 @@ static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
if (ipv6_addr_v4mapped(&sin6->sin6_addr))
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
- AF_INET, prefixlen, l3index,
+ AF_INET, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen,
GFP_KERNEL);
return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
- AF_INET6, prefixlen, l3index,
+ AF_INET6, prefixlen, l3index, flags,
cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
}
@@ -729,57 +792,6 @@ clear_hash_noput:
#endif
-static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
- const struct sk_buff *skb,
- int dif, int sdif)
-{
-#ifdef CONFIG_TCP_MD5SIG
- const __u8 *hash_location = NULL;
- struct tcp_md5sig_key *hash_expected;
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
- const struct tcphdr *th = tcp_hdr(skb);
- int genhash, l3index;
- u8 newhash[16];
-
- /* sdif set, means packet ingressed via a device
- * in an L3 domain and dif is set to the l3mdev
- */
- l3index = sdif ? dif : 0;
-
- hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
- hash_location = tcp_parse_md5sig_option(th);
-
- /* We've parsed the options - do we have a hash? */
- if (!hash_expected && !hash_location)
- return false;
-
- if (hash_expected && !hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
- return true;
- }
-
- if (!hash_expected && hash_location) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
- return true;
- }
-
- /* check the signature */
- genhash = tcp_v6_md5_hash_skb(newhash,
- hash_expected,
- NULL, skb);
-
- if (genhash || memcmp(hash_location, newhash, 16) != 0) {
- NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
- net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
- genhash ? "failed" : "mismatch",
- &ip6h->saddr, ntohs(th->source),
- &ip6h->daddr, ntohs(th->dest), l3index);
- return true;
- }
-#endif
- return false;
-}
-
static void tcp_v6_init_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb)
@@ -807,9 +819,15 @@ static void tcp_v6_init_req(struct request_sock *req,
}
static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
+ struct sk_buff *skb,
struct flowi *fl,
- const struct request_sock *req)
+ struct request_sock *req)
{
+ tcp_v6_init_req(req, sk, skb);
+
+ if (security_inet_conn_request(sk, skb, req))
+ return NULL;
+
return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
}
@@ -830,7 +848,6 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
.req_md5_lookup = tcp_v6_md5_lookup,
.calc_md5_hash = tcp_v6_md5_hash_skb,
#endif
- .init_req = tcp_v6_init_req,
#ifdef CONFIG_SYN_COOKIES
.cookie_init_seq = cookie_v6_init_sequence,
#endif
@@ -843,7 +860,7 @@ const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr,
int oif, struct tcp_md5sig_key *key, int rst,
- u8 tclass, __be32 label, u32 priority)
+ u8 tclass, __be32 label, u32 priority, u32 txhash)
{
const struct tcphdr *th = tcp_hdr(skb);
struct tcphdr *t1;
@@ -852,8 +869,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
struct sock *ctl_sk = net->ipv6.tcp_sk;
unsigned int tot_len = sizeof(struct tcphdr);
+ __be32 mrst = 0, *topt;
struct dst_entry *dst;
- __be32 *topt;
__u32 mark = 0;
if (tsecr)
@@ -863,12 +880,20 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
tot_len += TCPOLEN_MD5SIG_ALIGNED;
#endif
- buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
- GFP_ATOMIC);
+#ifdef CONFIG_MPTCP
+ if (rst && !key) {
+ mrst = mptcp_reset_option(skb);
+
+ if (mrst)
+ tot_len += sizeof(__be32);
+ }
+#endif
+
+ buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (!buff)
return;
- skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
+ skb_reserve(buff, MAX_TCP_HEADER);
t1 = skb_push(buff, tot_len);
skb_reset_transport_header(buff);
@@ -893,6 +918,9 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
*topt++ = htonl(tsecr);
}
+ if (mrst)
+ *topt++ = mrst;
+
#ifdef CONFIG_TCP_MD5SIG
if (key) {
*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
@@ -909,7 +937,6 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
fl6.flowlabel = label;
buff->ip_summed = CHECKSUM_PARTIAL;
- buff->csum = 0;
__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
@@ -924,31 +951,34 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
}
if (sk) {
- if (sk->sk_state == TCP_TIME_WAIT) {
+ if (sk->sk_state == TCP_TIME_WAIT)
mark = inet_twsk(sk)->tw_mark;
- /* autoflowlabel relies on buff->hash */
- skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
- PKT_HASH_TYPE_L4);
- } else {
+ else
mark = sk->sk_mark;
- }
- buff->tstamp = tcp_transmit_time(sk);
+ skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
+ }
+ if (txhash) {
+ /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */
+ skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4);
}
fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
fl6.fl6_dport = t1->dest;
fl6.fl6_sport = t1->source;
fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
- security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
+ security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
/* Pass a socket to ip6_dst_lookup either it is for RST
* Underlying function will use this to retrieve the network
* namespace
*/
- dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
+ if (sk && sk->sk_state != TCP_TIME_WAIT)
+ dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/
+ else
+ dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL);
if (!IS_ERR(dst)) {
skb_dst_set(buff, dst);
- ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
- priority);
+ ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
+ tclass & ~INET_ECN_MASK, priority);
TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
if (rst)
TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
@@ -973,6 +1003,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
__be32 label = 0;
u32 priority = 0;
struct net *net;
+ u32 txhash = 0;
int oif = 0;
if (th->rst)
@@ -1008,11 +1039,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
* Incoming packet is checked with md5 hash with finding key,
* no RST generated if md5 hash doesn't match.
*/
- sk1 = inet6_lookup_listener(net,
- &tcp_hashinfo, NULL, 0,
- &ipv6h->saddr,
- th->source, &ipv6h->daddr,
- ntohs(th->source), dif, sdif);
+ sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
+ NULL, 0, &ipv6h->saddr, th->source,
+ &ipv6h->daddr, ntohs(th->source),
+ dif, sdif);
if (!sk1)
goto out;
@@ -1046,18 +1076,20 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
if (np->repflow)
label = ip6_flowlabel(ipv6h);
priority = sk->sk_priority;
+ txhash = sk->sk_hash;
}
if (sk->sk_state == TCP_TIME_WAIT) {
label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
priority = inet_twsk(sk)->tw_priority;
+ txhash = inet_twsk(sk)->tw_txhash;
}
} else {
if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
label = ip6_flowlabel(ipv6h);
}
- tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
- label, priority);
+ tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
+ ipv6_get_dsfield(ipv6h), label, priority, txhash);
#ifdef CONFIG_TCP_MD5SIG
out:
@@ -1068,10 +1100,10 @@ out:
static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
struct tcp_md5sig_key *key, u8 tclass,
- __be32 label, u32 priority)
+ __be32 label, u32 priority, u32 txhash)
{
tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
- tclass, label, priority);
+ tclass, label, priority, txhash);
}
static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
@@ -1083,7 +1115,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp_raw() + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
- tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
+ tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority,
+ tw->tw_txhash);
inet_twsk_put(tw);
}
@@ -1110,7 +1143,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
req->ts_recent, sk->sk_bound_dev_if,
tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
- 0, 0, sk->sk_priority);
+ ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
+ tcp_rsk(req)->txhash);
}
@@ -1148,6 +1182,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
if (!ipv6_unicast_destination(skb))
goto drop;
+ if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
+ __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
+ return 0;
+ }
+
return tcp_conn_request(&tcp6_request_sock_ops,
&tcp_request_sock_ipv6_ops, sk, skb);
@@ -1177,6 +1216,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct ipv6_txoptions *opt;
struct inet_sock *newinet;
+ bool found_dup_sk = false;
struct tcp_sock *newtp;
struct sock *newsk;
#ifdef CONFIG_TCP_MD5SIG
@@ -1198,7 +1238,6 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
- newinet = inet_sk(newsk);
newnp = tcp_inet6_sk(newsk);
newtp = tcp_sk(newsk);
@@ -1298,6 +1337,12 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
if (np->repflow)
newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
+ /* Set ToS of the new socket based upon the value of incoming SYN.
+ * ECT bits are set later in tcp_init_transfer().
+ */
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
+ newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
+
/* Clone native IPv6 options from listening socket (if any)
Yes, keeping reference count would be much more clever,
@@ -1338,7 +1383,7 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
* across. Shucks.
*/
tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
- AF_INET6, 128, l3index, key->key, key->keylen,
+ AF_INET6, 128, l3index, key->flags, key->key, key->keylen,
sk_gfp_mask(sk, GFP_ATOMIC));
}
#endif
@@ -1348,7 +1393,8 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
tcp_done(newsk);
goto out;
}
- *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+ *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
+ &found_dup_sk);
if (*own_req) {
tcp_move_syn(newtp, req);
@@ -1363,6 +1409,15 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
skb_set_owner_r(newnp->pktoptions, newsk);
}
}
+ } else {
+ if (!req_unhash && found_dup_sk) {
+ /* This code path should only be executed in the
+ * syncookie case only
+ */
+ bh_unlock_sock(newsk);
+ sock_put(newsk);
+ newsk = NULL;
+ }
}
return newsk;
@@ -1376,6 +1431,8 @@ out:
return NULL;
}
+INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *,
+ u32));
/* The socket must have it's spinlock held when we get
* here, unless it is a TCP_LISTEN socket.
*
@@ -1384,10 +1441,12 @@ out:
* This is because we cannot sleep with the original spinlock
* held.
*/
-static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+INDIRECT_CALLABLE_SCOPE
+int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
{
struct ipv6_pinfo *np = tcp_inet6_sk(sk);
struct sk_buff *opt_skb = NULL;
+ enum skb_drop_reason reason;
struct tcp_sock *tp;
/* Imagine: socket is IPv6. IPv4 packet arrives,
@@ -1422,16 +1481,21 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (np->rxopt.all)
opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
- struct dst_entry *dst = sk->sk_rx_dst;
+ struct dst_entry *dst;
+
+ dst = rcu_dereference_protected(sk->sk_rx_dst,
+ lockdep_sock_is_held(sk));
sock_rps_save_rxhash(sk, skb);
sk_mark_napi_id(sk, skb);
if (dst) {
- if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
- dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
+ if (sk->sk_rx_dst_ifindex != skb->skb_iif ||
+ INDIRECT_CALL_1(dst->ops->check, ip6_dst_check,
+ dst, sk->sk_rx_dst_cookie) == NULL) {
+ RCU_INIT_POINTER(sk->sk_rx_dst, NULL);
dst_release(dst);
- sk->sk_rx_dst = NULL;
}
}
@@ -1471,9 +1535,11 @@ reset:
discard:
if (opt_skb)
__kfree_skb(opt_skb);
- kfree_skb(skb);
+ kfree_skb_reason(skb, reason);
return 0;
csum_err:
+ reason = SKB_DROP_REASON_TCP_CSUM;
+ trace_tcp_bad_csum(skb);
TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
goto discard;
@@ -1508,7 +1574,7 @@ ipv6_pktoptions:
}
}
- kfree_skb(opt_skb);
+ consume_skb(opt_skb);
return 0;
}
@@ -1538,7 +1604,7 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
{
- struct sk_buff *skb_to_free;
+ enum skb_drop_reason drop_reason;
int sdif = inet6_sdif(skb);
int dif = inet6_iif(skb);
const struct tcphdr *th;
@@ -1548,6 +1614,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
int ret;
struct net *net = dev_net(skb->dev);
+ drop_reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (skb->pkt_type != PACKET_HOST)
goto discard_it;
@@ -1561,8 +1628,10 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
th = (const struct tcphdr *)skb->data;
- if (unlikely(th->doff < sizeof(struct tcphdr)/4))
+ if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) {
+ drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
goto bad_packet;
+ }
if (!pskb_may_pull(skb, th->doff*4))
goto discard_it;
@@ -1573,7 +1642,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
hdr = ipv6_hdr(skb);
lookup:
- sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
+ sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th),
th->source, th->dest, inet6_iif(skb), sdif,
&refcounted);
if (!sk)
@@ -1589,7 +1658,10 @@ process:
struct sock *nsk;
sk = req->rsk_listener;
- if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
+ drop_reason = tcp_inbound_md5_hash(sk, skb,
+ &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
+ if (drop_reason) {
sk_drops_add(sk, skb);
reqsk_put(req);
goto discard_it;
@@ -1599,10 +1671,18 @@ process:
goto csum_error;
}
if (unlikely(sk->sk_state != TCP_LISTEN)) {
- inet_csk_reqsk_queue_drop_and_put(sk, req);
- goto lookup;
+ nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb);
+ if (!nsk) {
+ inet_csk_reqsk_queue_drop_and_put(sk, req);
+ goto lookup;
+ }
+ sk = nsk;
+ /* reuseport_migrate_sock() has already held one sk_refcnt
+ * before returning.
+ */
+ } else {
+ sock_hold(sk);
}
- sock_hold(sk);
refcounted = true;
nsk = NULL;
if (!tcp_filter(sk, skb)) {
@@ -1610,6 +1690,8 @@ process:
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
+ } else {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
}
if (!nsk) {
reqsk_put(req);
@@ -1636,19 +1718,29 @@ process:
return 0;
}
}
- if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
- __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
- goto discard_and_relse;
+
+ if (static_branch_unlikely(&ip6_min_hopcount)) {
+ /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
+ if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
+ __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ goto discard_and_relse;
+ }
}
- if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
+ if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
goto discard_and_relse;
+ }
- if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
+ drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr,
+ AF_INET6, dif, sdif);
+ if (drop_reason)
goto discard_and_relse;
- if (tcp_filter(sk, skb))
+ if (tcp_filter(sk, skb)) {
+ drop_reason = SKB_DROP_REASON_SOCKET_FILTER;
goto discard_and_relse;
+ }
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
@@ -1666,23 +1758,19 @@ process:
tcp_segs_in(tcp_sk(sk), skb);
ret = 0;
if (!sock_owned_by_user(sk)) {
- skb_to_free = sk->sk_rx_skb_cache;
- sk->sk_rx_skb_cache = NULL;
ret = tcp_v6_do_rcv(sk, skb);
} else {
- if (tcp_add_backlog(sk, skb))
+ if (tcp_add_backlog(sk, skb, &drop_reason))
goto discard_and_relse;
- skb_to_free = NULL;
}
bh_unlock_sock(sk);
- if (skb_to_free)
- __kfree_skb(skb_to_free);
put_and_return:
if (refcounted)
sock_put(sk);
return ret ? -1 : 0;
no_tcp_socket:
+ drop_reason = SKB_DROP_REASON_NO_SOCKET;
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
goto discard_it;
@@ -1690,6 +1778,8 @@ no_tcp_socket:
if (tcp_checksum_complete(skb)) {
csum_error:
+ drop_reason = SKB_DROP_REASON_TCP_CSUM;
+ trace_tcp_bad_csum(skb);
__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
bad_packet:
__TCP_INC_STATS(net, TCP_MIB_INERRS);
@@ -1698,7 +1788,8 @@ bad_packet:
}
discard_it:
- kfree_skb(skb);
+ SKB_DR_OR(drop_reason, NOT_SPECIFIED);
+ kfree_skb_reason(skb, drop_reason);
return 0;
discard_and_relse:
@@ -1709,6 +1800,7 @@ discard_and_relse:
do_time_wait:
if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
inet_twsk_put(inet_twsk(sk));
goto discard_it;
}
@@ -1725,7 +1817,7 @@ do_time_wait:
{
struct sock *sk2;
- sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
+ sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo,
skb, __tcp_hdrlen(th),
&ipv6_hdr(skb)->saddr, th->source,
&ipv6_hdr(skb)->daddr,
@@ -1742,7 +1834,7 @@ do_time_wait:
}
}
/* to ACK */
- /* fall through */
+ fallthrough;
case TCP_TW_ACK:
tcp_v6_timewait_ack(sk, skb);
break;
@@ -1756,8 +1848,9 @@ do_time_wait:
goto discard_it;
}
-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
+void tcp_v6_early_demux(struct sk_buff *skb)
{
+ struct net *net = dev_net(skb->dev);
const struct ipv6hdr *hdr;
const struct tcphdr *th;
struct sock *sk;
@@ -1775,7 +1868,7 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
return;
/* Note : We use inet6_iif() here, not tcp_v6_iif() */
- sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
+ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo,
&hdr->saddr, th->source,
&hdr->daddr, ntohs(th->dest),
inet6_iif(skb), inet6_sdif(skb));
@@ -1783,12 +1876,12 @@ INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
skb->sk = sk;
skb->destructor = sock_edemux;
if (sk_fullsock(sk)) {
- struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
+ struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst);
if (dst)
- dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
+ dst = dst_check(dst, sk->sk_rx_dst_cookie);
if (dst &&
- inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
+ sk->sk_rx_dst_ifindex == skb->skb_iif)
skb_dst_set_noref(skb, dst);
}
}
@@ -1800,6 +1893,11 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
.twsk_destructor = tcp_twsk_destructor,
};
+INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
+{
+ __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr);
+}
+
const struct inet_connection_sock_af_ops ipv6_specific = {
.queue_xmit = inet6_csk_xmit,
.send_check = tcp_v6_send_check,
@@ -1813,10 +1911,6 @@ const struct inet_connection_sock_af_ops ipv6_specific = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ipv6_setsockopt,
- .compat_getsockopt = compat_ipv6_getsockopt,
-#endif
.mtu_reduced = tcp_v6_mtu_reduced,
};
@@ -1843,10 +1937,6 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
.getsockopt = ipv6_getsockopt,
.addr2sockaddr = inet6_csk_addr2sockaddr,
.sockaddr_len = sizeof(struct sockaddr_in6),
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_ipv6_setsockopt,
- .compat_getsockopt = compat_ipv6_getsockopt,
-#endif
.mtu_reduced = tcp_v4_mtu_reduced,
};
@@ -1981,7 +2071,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
jiffies_to_clock_t(icsk->icsk_rto),
jiffies_to_clock_t(icsk->icsk_ack.ato),
(icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
- tp->snd_cwnd,
+ tcp_snd_cwnd(tp),
state == TCP_LISTEN ?
fastopenq->max_qlen :
(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
@@ -2078,6 +2168,7 @@ struct proto tcpv6_prot = {
.shutdown = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
+ .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt,
.keepalive = tcp_set_keepalive,
.recvmsg = tcp_recvmsg,
.sendmsg = tcp_sendmsg,
@@ -2087,11 +2178,18 @@ struct proto tcpv6_prot = {
.hash = inet6_hash,
.unhash = inet_unhash,
.get_port = inet_csk_get_port,
+ .put_port = inet_put_port,
+#ifdef CONFIG_BPF_SYSCALL
+ .psock_update_sk_prot = tcp_bpf_update_proto,
+#endif
.enter_memory_pressure = tcp_enter_memory_pressure,
.leave_memory_pressure = tcp_leave_memory_pressure,
.stream_memory_free = tcp_stream_memory_free,
.sockets_allocated = &tcp_sockets_allocated,
+
.memory_allocated = &tcp_memory_allocated,
+ .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc,
+
.memory_pressure = &tcp_memory_pressure,
.orphan_count = &tcp_orphan_count,
.sysctl_mem = sysctl_tcp_mem,
@@ -2102,21 +2200,13 @@ struct proto tcpv6_prot = {
.slab_flags = SLAB_TYPESAFE_BY_RCU,
.twsk_prot = &tcp6_timewait_sock_ops,
.rsk_prot = &tcp6_request_sock_ops,
- .h.hashinfo = &tcp_hashinfo,
+ .h.hashinfo = NULL,
.no_autobind = true,
-#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_tcp_setsockopt,
- .compat_getsockopt = compat_tcp_getsockopt,
-#endif
.diag_destroy = tcp_abort,
};
+EXPORT_SYMBOL_GPL(tcpv6_prot);
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol tcpv6_protocol = {
- .early_demux = tcp_v6_early_demux,
- .early_demux_handler = tcp_v6_early_demux,
+static const struct inet6_protocol tcpv6_protocol = {
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
@@ -2144,7 +2234,7 @@ static void __net_exit tcpv6_net_exit(struct net *net)
static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
{
- inet_twsk_purge(&tcp_hashinfo, AF_INET6);
+ tcp_twsk_purge(net_exit_list, AF_INET6);
}
static struct pernet_operations tcpv6_net_ops = {