From 540207ae69777b85d167df28f469e77f0fcbb8f9 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 15 Apr 2015 11:48:49 -0700 Subject: fou: avoid missing unlock in failure path Fixes: 7a6c8c34e5b7 ("fou: implement FOU_CMD_GET") Reported-by: Dan Carpenter Cc: Dan Carpenter Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv4/fou.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index af150b43b214..34968cd5c146 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -711,11 +711,10 @@ static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, FOU_CMD_GET); if (ret) - goto done; + break; } mutex_unlock(&fn->fou_lock); -done: cb->args[0] = idx; return skb->len; } -- cgit v1.2.3-59-g8ed1b From fad9dfefea6405039491e7e4fc21fb6e59e7d26c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2015 16:12:28 -0700 Subject: tcp: tcp_get_info() should fetch socket fields once tcp_get_info() can be called without holding socket lock, so any socket fields can change under us. Use READ_ONCE() to fetch sk_pacing_rate and sk_max_pacing_rate Fixes: 977cb0ecf82e ("tcp: add pacing_rate information into tcp_info") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 18e3a12eb1b2..59c8a027721b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2595,6 +2595,7 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; + u32 rate; memset(info, 0, sizeof(*info)); @@ -2655,10 +2656,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; - info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ? - sk->sk_pacing_rate : ~0ULL; - info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ? - sk->sk_max_pacing_rate : ~0ULL; + rate = READ_ONCE(sk->sk_pacing_rate); + info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL; + + rate = READ_ONCE(sk->sk_max_pacing_rate); + info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL; } EXPORT_SYMBOL_GPL(tcp_get_info); -- cgit v1.2.3-59-g8ed1b From 521f1cf1dbb9d5ad858dca5dc75d1b45f64b6589 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 16 Apr 2015 18:10:35 -0700 Subject: inet_diag: fix access to tcp cc information Two different problems are fixed here : 1) inet_sk_diag_fill() might be called without socket lock held. icsk->icsk_ca_ops can change under us and module be unloaded. -> Access to freed memory. Fix this using rcu_read_lock() to prevent module unload. 2) Some TCP Congestion Control modules provide information but again this is not safe against icsk->icsk_ca_ops change and nla_put() errors were ignored. Some sockets could not get the additional info if skb was almost full. Fix this by returning a status from get_info() handlers and using rcu protection as well. Signed-off-by: Eric Dumazet Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/tcp.h | 2 +- net/ipv4/inet_diag.c | 28 ++++++++++++++++++++++------ net/ipv4/tcp_dctcp.c | 5 +++-- net/ipv4/tcp_illinois.c | 6 +++--- net/ipv4/tcp_vegas.c | 5 +++-- net/ipv4/tcp_vegas.h | 2 +- net/ipv4/tcp_westwood.c | 6 +++--- 7 files changed, 36 insertions(+), 18 deletions(-) (limited to 'net/ipv4') diff --git a/include/net/tcp.h b/include/net/tcp.h index 9598871485ce..051dc5c2802d 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -829,7 +829,7 @@ struct tcp_congestion_ops { /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us); /* get info for inet_diag (optional) */ - void (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); + int (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb); char name[TCP_CA_NAME_MAX]; struct module *owner; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 70e8b3c308ec..bb77ebdae3b3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -111,6 +111,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); + const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; int ext = req->idiag_ext; struct inet_diag_msg *r; @@ -208,16 +209,31 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, info = nla_data(attr); } - if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) - if (nla_put_string(skb, INET_DIAG_CONG, - icsk->icsk_ca_ops->name) < 0) + if (ext & (1 << (INET_DIAG_CONG - 1))) { + int err = 0; + + rcu_read_lock(); + ca_ops = READ_ONCE(icsk->icsk_ca_ops); + if (ca_ops) + err = nla_put_string(skb, INET_DIAG_CONG, ca_ops->name); + rcu_read_unlock(); + if (err < 0) goto errout; + } handler->idiag_get_info(sk, r, info); - if (sk->sk_state < TCP_TIME_WAIT && - icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info) - icsk->icsk_ca_ops->get_info(sk, ext, skb); + if (sk->sk_state < TCP_TIME_WAIT) { + int err = 0; + + rcu_read_lock(); + ca_ops = READ_ONCE(icsk->icsk_ca_ops); + if (ca_ops && ca_ops->get_info) + err = ca_ops->get_info(sk, ext, skb); + rcu_read_unlock(); + if (err < 0) + goto errout; + } out: nlmsg_end(skb, nlh); diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index b504371af742..4376016f7fa5 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -277,7 +277,7 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) } } -static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) +static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct dctcp *ca = inet_csk_ca(sk); @@ -297,8 +297,9 @@ static void dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) info.dctcp_ab_tot = ca->acked_bytes_total; } - nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info); + return nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info); } + return 0; } static struct tcp_congestion_ops dctcp __read_mostly = { diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 1d5a30a90adf..67476f085e48 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -300,8 +300,7 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) } /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_illinois_info(struct sock *sk, u32 ext, - struct sk_buff *skb) +static int tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct illinois *ca = inet_csk_ca(sk); @@ -318,8 +317,9 @@ static void tcp_illinois_info(struct sock *sk, u32 ext, do_div(t, info.tcpv_rttcnt); info.tcpv_rtt = t; } - nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } + return 0; } static struct tcp_congestion_ops tcp_illinois __read_mostly = { diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index a6afde666ab1..c71a1b8f7bde 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -286,7 +286,7 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) } /* Extract info for Tcp socket info provided via netlink. */ -void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) +int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct vegas *ca = inet_csk_ca(sk); if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) { @@ -297,8 +297,9 @@ void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb) .tcpv_minrtt = ca->minRTT, }; - nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } + return 0; } EXPORT_SYMBOL_GPL(tcp_vegas_get_info); diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index 0531b99d8637..e8a6b33cc61d 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -19,6 +19,6 @@ void tcp_vegas_init(struct sock *sk); void tcp_vegas_state(struct sock *sk, u8 ca_state); void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us); void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); -void tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); +int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb); #endif /* __TCP_VEGAS_H */ diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index bb63fba47d47..b3c57cceb990 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -256,8 +256,7 @@ static void tcp_westwood_event(struct sock *sk, enum tcp_ca_event event) } /* Extract info for Tcp socket info provided via netlink. */ -static void tcp_westwood_info(struct sock *sk, u32 ext, - struct sk_buff *skb) +static int tcp_westwood_info(struct sock *sk, u32 ext, struct sk_buff *skb) { const struct westwood *ca = inet_csk_ca(sk); @@ -268,8 +267,9 @@ static void tcp_westwood_info(struct sock *sk, u32 ext, .tcpv_minrtt = jiffies_to_usecs(ca->rtt_min), }; - nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); + return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info); } + return 0; } static struct tcp_congestion_ops tcp_westwood __read_mostly = { -- cgit v1.2.3-59-g8ed1b From 2ab957492d13bb819400ac29ae55911d50a82a13 Mon Sep 17 00:00:00 2001 From: Sebastian Pöhn Date: Mon, 20 Apr 2015 09:19:20 +0200 Subject: ip_forward: Drop frames with attached skb->sk Initial discussion was: [FYI] xfrm: Don't lookup sk_policy for timewait sockets Forwarded frames should not have a socket attached. Especially tw sockets will lead to panics later-on in the stack. This was observed with TPROXY assigning a tw socket and broken policy routing (misconfigured). As a result frame enters forwarding path instead of input. We cannot solve this in TPROXY as it cannot know that policy routing is broken. v2: Remove useless comment Signed-off-by: Sebastian Poehn Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 939992c456f3..3674484946a5 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -82,6 +82,9 @@ int ip_forward(struct sk_buff *skb) if (skb->pkt_type != PACKET_HOST) goto drop; + if (unlikely(skb->sk)) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; -- cgit v1.2.3-59-g8ed1b From 3c7151275c0c9a80c3375f9874b1c7129a105eea Mon Sep 17 00:00:00 2001 From: "jbaron@akamai.com" Date: Mon, 20 Apr 2015 20:05:07 +0000 Subject: tcp: add memory barriers to write space paths Ensure that we either see that the buffer has write space in tcp_poll() or that we perform a wakeup from the input side. Did not run into any actual problem here, but thought that we should make things explicit. Signed-off-by: Jason Baron Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 +++- net/ipv4/tcp_input.c | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 59c8a027721b..8c5cd9efebbc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -520,8 +520,10 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Race breaker. If space is freed after * wspace test but before the flags are set, - * IO signal will be lost. + * IO signal will be lost. Memory barrier + * pairs with the input side. */ + smp_mb__after_atomic(); if (sk_stream_is_writeable(sk)) mask |= POLLOUT | POLLWRNORM; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a7ef679dd3ea..3a4d9b34bed4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4845,6 +4845,8 @@ static void tcp_check_space(struct sock *sk) { if (sock_flag(sk, SOCK_QUEUE_SHRUNK)) { sock_reset_flag(sk, SOCK_QUEUE_SHRUNK); + /* pairs with tcp_poll() */ + smp_mb__after_atomic(); if (sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) tcp_new_space(sk); -- cgit v1.2.3-59-g8ed1b