aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/net/rds/tcp.c
diff options
context:
space:
mode:
authorTetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>2022-05-05 10:53:53 +0900
committerJakub Kicinski <kuba@kernel.org>2022-05-05 16:44:49 -0700
commit6997fbd7a3dafa754f81d541498ace35b43246d8 (patch)
tree9903b4be70449cb3b8759040ed1b57bdf4d89ed4 /net/rds/tcp.c
parentMerge tag 'net-5.18-rc6' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net (diff)
downloadwireguard-linux-6997fbd7a3dafa754f81d541498ace35b43246d8.tar.xz
wireguard-linux-6997fbd7a3dafa754f81d541498ace35b43246d8.zip
net: rds: use maybe_get_net() when acquiring refcount on TCP sockets
Eric Dumazet is reporting addition on 0 problem at rds_tcp_tune(), for delayed works queued in rds_wq might be invoked after a net namespace's refcount already reached 0. Since rds_tcp_exit_net() from cleanup_net() calls flush_workqueue(rds_wq), it is guaranteed that we can instead use maybe_get_net() from delayed work functions until rds_tcp_exit_net() returns. Note that I'm not convinced that all works which might access a net namespace are already queued in rds_wq by the moment rds_tcp_exit_net() calls flush_workqueue(rds_wq). If some race is there, rds_tcp_exit_net() will fail to wait for work functions, and kmem_cache_free() could be called from net_free() before maybe_get_net() is called from rds_tcp_tune(). Reported-by: Eric Dumazet <edumazet@google.com> Fixes: 3a58f13a881ed351 ("net: rds: acquire refcount on TCP sockets") Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Reviewed-by: Eric Dumazet <edumazet@google.com> Link: https://lore.kernel.org/r/41d09faf-bc78-1a87-dfd1-c6d1b5984b61@I-love.SAKURA.ne.jp Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/rds/tcp.c')
-rw-r--r--net/rds/tcp.c12
1 files changed, 9 insertions, 3 deletions
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 2f638f8b7b1e..73ee2771093d 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -487,11 +487,11 @@ struct rds_tcp_net {
/* All module specific customizations to the RDS-TCP socket should be done in
* rds_tcp_tune() and applied after socket creation.
*/
-void rds_tcp_tune(struct socket *sock)
+bool rds_tcp_tune(struct socket *sock)
{
struct sock *sk = sock->sk;
struct net *net = sock_net(sk);
- struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
+ struct rds_tcp_net *rtn;
tcp_sock_set_nodelay(sock->sk);
lock_sock(sk);
@@ -499,10 +499,15 @@ void rds_tcp_tune(struct socket *sock)
* a process which created this net namespace terminated.
*/
if (!sk->sk_net_refcnt) {
+ if (!maybe_get_net(net)) {
+ release_sock(sk);
+ return false;
+ }
sk->sk_net_refcnt = 1;
- get_net_track(net, &sk->ns_tracker, GFP_KERNEL);
+ netns_tracker_alloc(net, &sk->ns_tracker, GFP_KERNEL);
sock_inuse_add(net, 1);
}
+ rtn = net_generic(net, rds_tcp_netid);
if (rtn->sndbuf_size > 0) {
sk->sk_sndbuf = rtn->sndbuf_size;
sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
@@ -512,6 +517,7 @@ void rds_tcp_tune(struct socket *sock)
sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
}
release_sock(sk);
+ return true;
}
static void rds_tcp_accept_worker(struct work_struct *work)