From 7a1b3490f47e88ec4cbde65f1a77a0f4bc972282 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Fri, 29 Mar 2024 13:08:52 +0100 Subject: mptcp: don't account accept() of non-MPC client as fallback to TCP Current MPTCP servers increment MPTcpExtMPCapableFallbackACK when they accept non-MPC connections. As reported by Christoph, this is "surprising" because the counter might become greater than MPTcpExtMPCapableSYNRX. MPTcpExtMPCapableFallbackACK counter's name suggests it should only be incremented when a connection was seen using MPTCP options, then a fallback to TCP has been done. Let's do that by incrementing it when the subflow context of an inbound MPC connection attempt is dropped. Also, update mptcp_connect.sh kselftest, to ensure that the above MIB does not increment in case a pure TCP client connects to a MPTCP server. Fixes: fc518953bc9c ("mptcp: add and use MIB counter infrastructure") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/449 Signed-off-by: Davide Caratti Reviewed-by: Mat Martineau Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240329-upstream-net-20240329-fallback-mib-v1-1-324a8981da48@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net/mptcp/protocol.c') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3a1967bc7bad..7e74b812e366 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3937,8 +3937,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, mptcp_set_state(newsk, TCP_CLOSE); } } else { - MPTCP_INC_STATS(sock_net(ssk), - MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK); tcpfallback: newsk->sk_kern_sock = kern; lock_sock(newsk); -- cgit v1.2.3-59-g8ed1b From f410cbea9f3d2675b4c8e52af1d1985b11b387d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 4 Apr 2024 11:42:31 +0000 Subject: tcp: annotate data-races around tp->window_clamp tp->window_clamp can be read locklessly, add READ_ONCE() and WRITE_ONCE() annotations. Signed-off-by: Eric Dumazet Reviewed-by: Jason Xing Link: https://lore.kernel.org/r/20240404114231.2195171-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/syncookies.c | 3 ++- net/ipv4/tcp.c | 8 ++++---- net/ipv4/tcp_input.c | 17 ++++++++++------- net/ipv4/tcp_output.c | 18 ++++++++++-------- net/ipv6/syncookies.c | 2 +- net/mptcp/protocol.c | 2 +- net/mptcp/sockopt.c | 2 +- 7 files changed, 29 insertions(+), 23 deletions(-) (limited to 'net/mptcp/protocol.c') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 500f665f98cb..b61d36810fe3 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -462,7 +462,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) } /* Try to redo what tcp_v4_send_synack did. */ - req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); + req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? : + dst_metric(&rt->dst, RTAX_WINDOW); /* limit the window selection if the user enforce a smaller rx buffer */ full_space = tcp_full_space(sk); if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e767721b3a58..92ee60492314 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1721,7 +1721,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val) space = tcp_space_from_win(sk, val); if (space > sk->sk_rcvbuf) { WRITE_ONCE(sk->sk_rcvbuf, space); - tcp_sk(sk)->window_clamp = val; + WRITE_ONCE(tcp_sk(sk)->window_clamp, val); } return 0; } @@ -3379,7 +3379,7 @@ int tcp_set_window_clamp(struct sock *sk, int val) if (!val) { if (sk->sk_state != TCP_CLOSE) return -EINVAL; - tp->window_clamp = 0; + WRITE_ONCE(tp->window_clamp, 0); } else { u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp; u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ? @@ -3388,7 +3388,7 @@ int tcp_set_window_clamp(struct sock *sk, int val) if (new_window_clamp == old_window_clamp) return 0; - tp->window_clamp = new_window_clamp; + WRITE_ONCE(tp->window_clamp, new_window_clamp); if (new_window_clamp < old_window_clamp) { /* need to apply the reserved mem provisioning only * when shrinking the window clamp @@ -4057,7 +4057,7 @@ int do_tcp_getsockopt(struct sock *sk, int level, TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: - val = tp->window_clamp; + val = READ_ONCE(tp->window_clamp); break; case TCP_INFO: { struct tcp_info info; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1b6cd3840012..8d44ab5671ea 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -563,19 +563,20 @@ static void tcp_init_buffer_space(struct sock *sk) maxwin = tcp_full_space(sk); if (tp->window_clamp >= maxwin) { - tp->window_clamp = maxwin; + WRITE_ONCE(tp->window_clamp, maxwin); if (tcp_app_win && maxwin > 4 * tp->advmss) - tp->window_clamp = max(maxwin - - (maxwin >> tcp_app_win), - 4 * tp->advmss); + WRITE_ONCE(tp->window_clamp, + max(maxwin - (maxwin >> tcp_app_win), + 4 * tp->advmss)); } /* Force reservation of one segment. */ if (tcp_app_win && tp->window_clamp > 2 * tp->advmss && tp->window_clamp + tp->advmss > maxwin) - tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); + WRITE_ONCE(tp->window_clamp, + max(2 * tp->advmss, maxwin - tp->advmss)); tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); tp->snd_cwnd_stamp = tcp_jiffies32; @@ -773,7 +774,8 @@ void tcp_rcv_space_adjust(struct sock *sk) WRITE_ONCE(sk->sk_rcvbuf, rcvbuf); /* Make the window clamp follow along. */ - tp->window_clamp = tcp_win_from_space(sk, rcvbuf); + WRITE_ONCE(tp->window_clamp, + tcp_win_from_space(sk, rcvbuf)); } } tp->rcvq_space.space = copied; @@ -6426,7 +6428,8 @@ consume: if (!tp->rx_opt.wscale_ok) { tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; - tp->window_clamp = min(tp->window_clamp, 65535U); + WRITE_ONCE(tp->window_clamp, + min(tp->window_clamp, 65535U)); } if (tp->rx_opt.saw_tstamp) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e3167ad96567..9282fafc0e61 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -203,16 +203,17 @@ static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt) * This MUST be enforced by all callers. */ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, - __u32 *rcv_wnd, __u32 *window_clamp, + __u32 *rcv_wnd, __u32 *__window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd) { unsigned int space = (__space < 0 ? 0 : __space); + u32 window_clamp = READ_ONCE(*__window_clamp); /* If no clamp set the clamp to the max possible scaled window */ - if (*window_clamp == 0) - (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE); - space = min(*window_clamp, space); + if (window_clamp == 0) + window_clamp = (U16_MAX << TCP_MAX_WSCALE); + space = min(window_clamp, space); /* Quantize space offering to a multiple of mss if possible. */ if (space > mss) @@ -239,12 +240,13 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, /* Set window scaling on max possible window */ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); space = max_t(u32, space, READ_ONCE(sysctl_rmem_max)); - space = min_t(u32, space, *window_clamp); + space = min_t(u32, space, window_clamp); *rcv_wscale = clamp_t(int, ilog2(space) - 15, 0, TCP_MAX_WSCALE); } /* Set the clamp no higher than max representable value */ - (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp); + WRITE_ONCE(*__window_clamp, + min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp)); } EXPORT_SYMBOL(tcp_select_initial_window); @@ -3855,7 +3857,7 @@ static void tcp_connect_init(struct sock *sk) tcp_ca_dst_init(sk, dst); if (!tp->window_clamp) - tp->window_clamp = dst_metric(dst, RTAX_WINDOW); + WRITE_ONCE(tp->window_clamp, dst_metric(dst, RTAX_WINDOW)); tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); tcp_initialize_rcv_mss(sk); @@ -3863,7 +3865,7 @@ static void tcp_connect_init(struct sock *sk) /* limit the window selection if the user enforce a smaller rx buffer */ if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0)) - tp->window_clamp = tcp_full_space(sk); + WRITE_ONCE(tp->window_clamp, tcp_full_space(sk)); rcv_wnd = tcp_rwnd_init_bpf(sk); if (rcv_wnd == 0) diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 6d8286c299c9..bfad1e89b6a6 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -246,7 +246,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) } } - req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); + req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :dst_metric(dst, RTAX_WINDOW); /* limit the window selection if the user enforce a smaller rx buffer */ full_space = tcp_full_space(sk); if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7e74b812e366..995b53cd021c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2056,7 +2056,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) ssk = mptcp_subflow_tcp_sock(subflow); slow = lock_sock_fast(ssk); WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf); - tcp_sk(ssk)->window_clamp = window_clamp; + WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp); tcp_cleanup_rbuf(ssk, 1); unlock_sock_fast(ssk, slow); } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 73fdf423de44..9d5d42a77bcc 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1523,7 +1523,7 @@ int mptcp_set_rcvlowat(struct sock *sk, int val) slow = lock_sock_fast(ssk); WRITE_ONCE(ssk->sk_rcvbuf, space); - tcp_sk(ssk)->window_clamp = val; + WRITE_ONCE(tcp_sk(ssk)->window_clamp, val); unlock_sock_fast(ssk, slow); } return 0; -- cgit v1.2.3-59-g8ed1b From 18d82cde743237def9f80171e32be1153f5cadac Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Wed, 10 Apr 2024 11:48:24 +0200 Subject: mptcp: add last time fields in mptcp_info This patch adds "last time" fields last_data_sent, last_data_recv and last_ack_recv in struct mptcp_sock to record the last time data_sent, data_recv and ack_recv happened. They all are initialized as tcp_jiffies32 in __mptcp_init_sock(), and updated as tcp_jiffies32 too when data is sent in __subflow_push_pending(), data is received in __mptcp_move_skbs_from_subflow(), and ack is received in ack_update_msk(). Similar to tcpi_last_data_sent, tcpi_last_data_recv and tcpi_last_ack_recv exposed with TCP, this patch exposes the last time "an action happened" for MPTCP in mptcp_info, named mptcpi_last_data_sent, mptcpi_last_data_recv and mptcpi_last_ack_recv, calculated in mptcp_diag_fill_info() as the time deltas between now and the newly added last time fields in mptcp_sock. Since msk->last_ack_recv needs to be protected by mptcp_data_lock/unlock, and lock_sock_fast can sleep and be quite slow, move the entire mptcp_data_lock/unlock block after the lock/unlock_sock_fast block. Then mptcpi_last_data_sent and mptcpi_last_data_recv are set in lock/unlock_sock_fast block, while mptcpi_last_ack_recv is set in mptcp_data_lock/unlock block, which is protected by a spinlock and should not block for too long. Also add three reserved bytes in struct mptcp_info not to have holes in this structure exposed to userspace. Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/446 Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240410-upstream-net-next-20240405-mptcp-last-time-info-v2-1-f95bd6b33e51@kernel.org Signed-off-by: Jakub Kicinski --- include/uapi/linux/mptcp.h | 4 ++++ net/mptcp/options.c | 1 + net/mptcp/protocol.c | 7 +++++++ net/mptcp/protocol.h | 3 +++ net/mptcp/sockopt.c | 16 +++++++++++----- 5 files changed, 26 insertions(+), 5 deletions(-) (limited to 'net/mptcp/protocol.c') diff --git a/include/uapi/linux/mptcp.h b/include/uapi/linux/mptcp.h index 74cfe496891e..67d015df8893 100644 --- a/include/uapi/linux/mptcp.h +++ b/include/uapi/linux/mptcp.h @@ -58,6 +58,10 @@ struct mptcp_info { __u64 mptcpi_bytes_received; __u64 mptcpi_bytes_acked; __u8 mptcpi_subflows_total; + __u8 reserved[3]; + __u32 mptcpi_last_data_sent; + __u32 mptcpi_last_data_recv; + __u32 mptcpi_last_ack_recv; }; /* MPTCP Reset reason codes, rfc8684 */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 27ca42c77b02..8e8dcfbc2993 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1068,6 +1068,7 @@ static void ack_update_msk(struct mptcp_sock *msk, __mptcp_snd_una_update(msk, new_snd_una); __mptcp_data_acked(sk); } + msk->last_ack_recv = tcp_jiffies32; mptcp_data_unlock(sk); trace_ack_update_msk(mp_opt->data_ack, diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 995b53cd021c..f8bc34f0d973 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -706,6 +706,8 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } } while (more_data_avail); + if (moved > 0) + msk->last_data_recv = tcp_jiffies32; *bytes += moved; return done; } @@ -1556,6 +1558,8 @@ static int __subflow_push_pending(struct sock *sk, struct sock *ssk, err = copied; out: + if (err > 0) + msk->last_data_sent = tcp_jiffies32; return err; } @@ -2793,6 +2797,9 @@ static void __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->allow_infinite_fallback, true); msk->recovery = false; msk->subflow_id = 1; + msk->last_data_sent = tcp_jiffies32; + msk->last_data_recv = tcp_jiffies32; + msk->last_ack_recv = tcp_jiffies32; mptcp_pm_data_init(msk); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 46f4655b7123..fdfa843e2d88 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -282,6 +282,9 @@ struct mptcp_sock { u64 bytes_acked; u64 snd_una; u64 wnd_end; + u32 last_data_sent; + u32 last_data_recv; + u32 last_ack_recv; unsigned long timer_ival; u32 token; int rmem_released; diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 9d5d42a77bcc..1fea43f5b6f3 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -898,6 +898,7 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) struct sock *sk = (struct sock *)msk; u32 flags = 0; bool slow; + u32 now; memset(info, 0, sizeof(*info)); @@ -926,11 +927,6 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) if (READ_ONCE(msk->can_ack)) flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED; info->mptcpi_flags = flags; - mptcp_data_lock(sk); - info->mptcpi_snd_una = msk->snd_una; - info->mptcpi_rcv_nxt = msk->ack_seq; - info->mptcpi_bytes_acked = msk->bytes_acked; - mptcp_data_unlock(sk); slow = lock_sock_fast(sk); info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled); @@ -942,7 +938,17 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info) info->mptcpi_bytes_retrans = msk->bytes_retrans; info->mptcpi_subflows_total = info->mptcpi_subflows + __mptcp_has_initial_subflow(msk); + now = tcp_jiffies32; + info->mptcpi_last_data_sent = jiffies_to_msecs(now - msk->last_data_sent); + info->mptcpi_last_data_recv = jiffies_to_msecs(now - msk->last_data_recv); unlock_sock_fast(sk, slow); + + mptcp_data_lock(sk); + info->mptcpi_last_ack_recv = jiffies_to_msecs(now - msk->last_ack_recv); + info->mptcpi_snd_una = msk->snd_una; + info->mptcpi_rcv_nxt = msk->ack_seq; + info->mptcpi_bytes_acked = msk->bytes_acked; + mptcp_data_unlock(sk); } EXPORT_SYMBOL_GPL(mptcp_diag_fill_info); -- cgit v1.2.3-59-g8ed1b From 5691276b39daf90294c6a81fb6d62d667f634c92 Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 25 Apr 2024 11:13:36 +0800 Subject: rstreason: prepare for active reset Like what we did to passive reset: only passing possible reset reason in each active reset path. No functional changes. Signed-off-by: Jason Xing Acked-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- include/net/tcp.h | 3 ++- net/ipv4/tcp.c | 15 ++++++++++----- net/ipv4/tcp_output.c | 3 ++- net/ipv4/tcp_timer.c | 9 ++++++--- net/mptcp/protocol.c | 4 +++- net/mptcp/subflow.c | 5 +++-- 6 files changed, 26 insertions(+), 13 deletions(-) (limited to 'net/mptcp/protocol.c') diff --git a/include/net/tcp.h b/include/net/tcp.h index ffc9371fe9de..a9eb21251195 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -670,7 +670,8 @@ int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, void tcp_send_probe0(struct sock *); int tcp_write_wakeup(struct sock *, int mib); void tcp_send_fin(struct sock *sk); -void tcp_send_active_reset(struct sock *sk, gfp_t priority); +void tcp_send_active_reset(struct sock *sk, gfp_t priority, + enum sk_rst_reason reason); int tcp_send_synack(struct sock *); void tcp_push_one(struct sock *, unsigned int mss_now); void __tcp_send_ack(struct sock *sk, u32 rcv_nxt); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f23b97777ea5..4ec0f4feee00 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -275,6 +275,7 @@ #include #include #include +#include #include #include @@ -2811,7 +2812,8 @@ void __tcp_close(struct sock *sk, long timeout) /* Unread data was tossed, zap the connection. */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, sk->sk_allocation); + tcp_send_active_reset(sk, sk->sk_allocation, + SK_RST_REASON_NOT_SPECIFIED); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); @@ -2885,7 +2887,8 @@ adjudge_to_death: struct tcp_sock *tp = tcp_sk(sk); if (READ_ONCE(tp->linger2) < 0) { tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONLINGER); } else { @@ -2903,7 +2906,8 @@ adjudge_to_death: if (sk->sk_state != TCP_CLOSE) { if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } else if (!check_net(sock_net(sk))) { @@ -3007,7 +3011,7 @@ int tcp_disconnect(struct sock *sk, int flags) /* The last check adjusts for discrepancy of Linux wrt. RFC * states */ - tcp_send_active_reset(sk, gfp_any()); + tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED); WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) WRITE_ONCE(sk->sk_err, ECONNRESET); @@ -4564,7 +4568,8 @@ int tcp_abort(struct sock *sk, int err) smp_wmb(); sk_error_report(sk); if (tcp_need_reset(sk->sk_state)) - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_done(sk); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ce59e4499b66..41c352bf3394 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3615,7 +3615,8 @@ void tcp_send_fin(struct sock *sk) * was unread data in the receive queue. This behavior is recommended * by RFC 2525, section 2.17. -DaveM */ -void tcp_send_active_reset(struct sock *sk, gfp_t priority) +void tcp_send_active_reset(struct sock *sk, gfp_t priority, + enum sk_rst_reason reason) { struct sk_buff *skb; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 976db57b95d4..83fe7f62f7f1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,6 +22,7 @@ #include #include #include +#include static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { @@ -127,7 +128,8 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) (!tp->snd_wnd && !tp->packets_out)) do_reset = true; if (do_reset) - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; @@ -768,7 +770,7 @@ static void tcp_keepalive_timer (struct timer_list *t) goto out; } } - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); goto death; } @@ -795,7 +797,8 @@ static void tcp_keepalive_timer (struct timer_list *t) icsk->icsk_probes_out > 0) || (user_timeout == 0 && icsk->icsk_probes_out >= keepalive_probes(tp))) { - tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_send_active_reset(sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_write_err(sk); goto out; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index f8bc34f0d973..065967086492 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -21,6 +21,7 @@ #endif #include #include +#include #include #include "protocol.h" #include "mib.h" @@ -2569,7 +2570,8 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) slow = lock_sock_fast(tcp_sk); if (tcp_sk->sk_state != TCP_CLOSE) { - tcp_send_active_reset(tcp_sk, GFP_ATOMIC); + tcp_send_active_reset(tcp_sk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); tcp_set_state(tcp_sk, TCP_CLOSE); } unlock_sock_fast(tcp_sk, slow); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 32fe2ef36d56..ac867d277860 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -414,7 +414,7 @@ void mptcp_subflow_reset(struct sock *ssk) /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); - tcp_send_active_reset(ssk, GFP_ATOMIC); + tcp_send_active_reset(ssk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); tcp_done(ssk); if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags)) mptcp_schedule_work(sk); @@ -1350,7 +1350,8 @@ reset: tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); - tcp_send_active_reset(ssk, GFP_ATOMIC); + tcp_send_active_reset(ssk, GFP_ATOMIC, + SK_RST_REASON_NOT_SPECIFIED); WRITE_ONCE(subflow->data_avail, false); return false; } -- cgit v1.2.3-59-g8ed1b From 215d40248bde5562a21e4c6cdeaeca0495c9365a Mon Sep 17 00:00:00 2001 From: Jason Xing Date: Thu, 25 Apr 2024 11:13:39 +0800 Subject: mptcp: introducing a helper into active reset logic Since we have mapped every mptcp reset reason definition in enum sk_rst_reason, introducing a new helper can cover some missing places where we have already set the subflow->reset_reason. Note: using SK_RST_REASON_NOT_SPECIFIED is the same as SK_RST_REASON_MPTCP_RST_EUNSPEC. They are both unknown. So we can convert it directly. Suggested-by: Paolo Abeni Signed-off-by: Jason Xing Reviewed-by: Matthieu Baerts (NGI0) Reviewed-by: Eric Dumazet Signed-off-by: Paolo Abeni --- net/mptcp/protocol.c | 4 +--- net/mptcp/protocol.h | 11 +++++++++++ net/mptcp/subflow.c | 6 ++---- 3 files changed, 14 insertions(+), 7 deletions(-) (limited to 'net/mptcp/protocol.c') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 065967086492..4b13ca362efa 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -21,7 +21,6 @@ #endif #include #include -#include #include #include "protocol.h" #include "mib.h" @@ -2570,8 +2569,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) slow = lock_sock_fast(tcp_sk); if (tcp_sk->sk_state != TCP_CLOSE) { - tcp_send_active_reset(tcp_sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + mptcp_send_active_reset_reason(tcp_sk); tcp_set_state(tcp_sk, TCP_CLOSE); } unlock_sock_fast(tcp_sk, slow); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 252618859ee8..cfc5f9c3f113 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -12,6 +12,7 @@ #include #include #include +#include #include "mptcp_pm_gen.h" @@ -608,6 +609,16 @@ sk_rst_convert_mptcp_reason(u32 reason) } } +static inline void +mptcp_send_active_reset_reason(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + enum sk_rst_reason reason; + + reason = sk_rst_convert_mptcp_reason(subflow->reset_reason); + tcp_send_active_reset(sk, GFP_ATOMIC, reason); +} + static inline u64 mptcp_subflow_get_map_offset(const struct mptcp_subflow_context *subflow) { diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index fb7abf2d01ca..97ec44d1df30 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -20,7 +20,6 @@ #include #endif #include -#include #include "protocol.h" #include "mib.h" @@ -424,7 +423,7 @@ void mptcp_subflow_reset(struct sock *ssk) /* must hold: tcp_done() could drop last reference on parent */ sock_hold(sk); - tcp_send_active_reset(ssk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); + mptcp_send_active_reset_reason(ssk); tcp_done(ssk); if (!test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &mptcp_sk(sk)->flags)) mptcp_schedule_work(sk); @@ -1362,8 +1361,7 @@ reset: tcp_set_state(ssk, TCP_CLOSE); while ((skb = skb_peek(&ssk->sk_receive_queue))) sk_eat_skb(ssk, skb); - tcp_send_active_reset(ssk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + mptcp_send_active_reset_reason(ssk); WRITE_ONCE(subflow->data_avail, false); return false; } -- cgit v1.2.3-59-g8ed1b From a86a0661b86f310c0b73a30c829648864f0b2619 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 29 Apr 2024 13:40:21 +0000 Subject: net: move sysctl_max_skb_frags to net_hotdata sysctl_max_skb_frags is used in TCP and MPTCP fast paths, move it to net_hodata for better cache locality. Signed-off-by: Eric Dumazet Reviewed-by: David Ahern Link: https://lore.kernel.org/r/20240429134025.1233626-2-edumazet@google.com Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 2 -- include/net/hotdata.h | 1 + net/core/hotdata.c | 1 + net/core/skbuff.c | 5 +---- net/core/sysctl_net_core.c | 2 +- net/ipv4/tcp.c | 3 ++- net/mptcp/protocol.c | 3 ++- 7 files changed, 8 insertions(+), 9 deletions(-) (limited to 'net/mptcp/protocol.c') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index adf75d69770c..36b133f04d30 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -353,8 +353,6 @@ struct sk_buff; #define MAX_SKB_FRAGS CONFIG_MAX_SKB_FRAGS -extern int sysctl_max_skb_frags; - /* Set skb_shinfo(skb)->gso_size to this in case you want skb_segment to * segment using its current segmentation instead. */ diff --git a/include/net/hotdata.h b/include/net/hotdata.h index 003667a1efd6..a6cff6590426 100644 --- a/include/net/hotdata.h +++ b/include/net/hotdata.h @@ -38,6 +38,7 @@ struct net_hotdata { int max_backlog; int dev_tx_weight; int dev_rx_weight; + int sysctl_max_skb_frags; }; #define inet_ehash_secret net_hotdata.tcp_protocol.secret diff --git a/net/core/hotdata.c b/net/core/hotdata.c index c8a7a451c18a..f17cbb4807b9 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -18,5 +18,6 @@ struct net_hotdata net_hotdata __cacheline_aligned = { .max_backlog = 1000, .dev_tx_weight = 64, .dev_rx_weight = 64, + .sysctl_max_skb_frags = MAX_SKB_FRAGS, }; EXPORT_SYMBOL(net_hotdata); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0c8b82750000..65779b8f0b12 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -109,9 +109,6 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; #define SKB_SMALL_HEAD_HEADROOM \ SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) -int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; -EXPORT_SYMBOL(sysctl_max_skb_frags); - /* kcm_write_msgs() relies on casting paged frags to bio_vec to use * iov_iter_bvec(). These static asserts ensure the cast is valid is long as the * netmem is a page. @@ -7040,7 +7037,7 @@ static void skb_splice_csum_page(struct sk_buff *skb, struct page *page, ssize_t skb_splice_from_iter(struct sk_buff *skb, struct iov_iter *iter, ssize_t maxsize, gfp_t gfp) { - size_t frag_limit = READ_ONCE(sysctl_max_skb_frags); + size_t frag_limit = READ_ONCE(net_hotdata.sysctl_max_skb_frags); struct page *pages[8], **ppages = pages; ssize_t spliced = 0, ret = 0; unsigned int i; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 903ab4a51c17..e75375d54b9e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -595,7 +595,7 @@ static struct ctl_table net_core_table[] = { }, { .procname = "max_skb_frags", - .data = &sysctl_max_skb_frags, + .data = &net_hotdata.sysctl_max_skb_frags, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4ec0f4feee00..388f6e115bf1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -280,6 +280,7 @@ #include #include #include +#include #include /* Track pending CMSGs. */ @@ -1188,7 +1189,7 @@ new_segment: if (!skb_can_coalesce(skb, i, pfrag->page, pfrag->offset)) { - if (i >= READ_ONCE(sysctl_max_skb_frags)) { + if (i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) { tcp_mark_push(tp, skb); goto new_segment; } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 4b13ca362efa..aff17597e6a7 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -20,6 +20,7 @@ #include #endif #include +#include #include #include #include "protocol.h" @@ -1272,7 +1273,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, i = skb_shinfo(skb)->nr_frags; can_coalesce = skb_can_coalesce(skb, i, dfrag->page, offset); - if (!can_coalesce && i >= READ_ONCE(sysctl_max_skb_frags)) { + if (!can_coalesce && i >= READ_ONCE(net_hotdata.sysctl_max_skb_frags)) { tcp_mark_push(tcp_sk(ssk), skb); goto alloc_skb; } -- cgit v1.2.3-59-g8ed1b From fb7a0d334894206ae35f023a82cad5a290fd7386 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 29 Apr 2024 20:00:31 +0200 Subject: mptcp: ensure snd_nxt is properly initialized on connect Christoph reported a splat hinting at a corrupted snd_una: WARNING: CPU: 1 PID: 38 at net/mptcp/protocol.c:1005 __mptcp_clean_una+0x4b3/0x620 net/mptcp/protocol.c:1005 Modules linked in: CPU: 1 PID: 38 Comm: kworker/1:1 Not tainted 6.9.0-rc1-gbbeac67456c9 #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014 Workqueue: events mptcp_worker RIP: 0010:__mptcp_clean_una+0x4b3/0x620 net/mptcp/protocol.c:1005 Code: be 06 01 00 00 bf 06 01 00 00 e8 a8 12 e7 fe e9 00 fe ff ff e8 8e 1a e7 fe 0f b7 ab 3e 02 00 00 e9 d3 fd ff ff e8 7d 1a e7 fe <0f> 0b 4c 8b bb e0 05 00 00 e9 74 fc ff ff e8 6a 1a e7 fe 0f 0b e9 RSP: 0018:ffffc9000013fd48 EFLAGS: 00010293 RAX: 0000000000000000 RBX: ffff8881029bd280 RCX: ffffffff82382fe4 RDX: ffff8881003cbd00 RSI: ffffffff823833c3 RDI: 0000000000000001 RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000000 R10: 0000000000000000 R11: fefefefefefefeff R12: ffff888138ba8000 R13: 0000000000000106 R14: ffff8881029bd908 R15: ffff888126560000 FS: 0000000000000000(0000) GS:ffff88813bd00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f604a5dae38 CR3: 0000000101dac002 CR4: 0000000000170ef0 Call Trace: __mptcp_clean_una_wakeup net/mptcp/protocol.c:1055 [inline] mptcp_clean_una_wakeup net/mptcp/protocol.c:1062 [inline] __mptcp_retrans+0x7f/0x7e0 net/mptcp/protocol.c:2615 mptcp_worker+0x434/0x740 net/mptcp/protocol.c:2767 process_one_work+0x1e0/0x560 kernel/workqueue.c:3254 process_scheduled_works kernel/workqueue.c:3335 [inline] worker_thread+0x3c7/0x640 kernel/workqueue.c:3416 kthread+0x121/0x170 kernel/kthread.c:388 ret_from_fork+0x44/0x50 arch/x86/kernel/process.c:147 ret_from_fork_asm+0x1a/0x30 arch/x86/entry/entry_64.S:243 When fallback to TCP happens early on a client socket, snd_nxt is not yet initialized and any incoming ack will copy such value into snd_una. If the mptcp worker (dumbly) tries mptcp-level re-injection after such ack, that would unconditionally trigger a send buffer cleanup using 'bad' snd_una values. We could easily disable re-injection for fallback sockets, but such dumb behavior already helped catching a few subtle issues and a very low to zero impact in practice. Instead address the issue always initializing snd_nxt (and write_seq, for consistency) at connect time. Fixes: 8fd738049ac3 ("mptcp: fallback in case of simultaneous connect") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/485 Tested-by: Christoph Paasch Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://lore.kernel.org/r/20240429-upstream-net-20240429-mptcp-snd_nxt-init-connect-v1-1-59ceac0a7dcb@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/mptcp/protocol.c') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7e74b812e366..965eb69dc5de 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3723,6 +3723,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); mptcp_subflow_early_fallback(msk, subflow); } + + WRITE_ONCE(msk->write_seq, subflow->idsn); + WRITE_ONCE(msk->snd_nxt, subflow->idsn); if (likely(!__mptcp_check_fallback(msk))) MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVE); -- cgit v1.2.3-59-g8ed1b From 92ef0fd55ac80dfc2e4654edfe5d1ddfa6e070fe Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 9 May 2024 09:20:08 -0600 Subject: net: change proto and proto_ops accept type Rather than pass in flags, error pointer, and whether this is a kernel invocation or not, add a struct proto_accept_arg struct as the argument. This then holds all of these arguments, and prepares accept for being able to pass back more information. No functional changes in this patch. Acked-by: Jakub Kicinski Signed-off-by: Jens Axboe --- crypto/af_alg.c | 11 ++++++----- crypto/algif_hash.c | 10 +++++----- drivers/xen/pvcalls-back.c | 6 +++++- fs/ocfs2/cluster/tcp.c | 5 ++++- include/crypto/if_alg.h | 3 ++- include/linux/net.h | 4 +++- include/net/inet_common.h | 4 ++-- include/net/inet_connection_sock.h | 2 +- include/net/sock.h | 12 +++++++++--- net/atm/svc.c | 8 ++++---- net/ax25/af_ax25.c | 6 +++--- net/bluetooth/iso.c | 4 ++-- net/bluetooth/l2cap_sock.c | 4 ++-- net/bluetooth/rfcomm/sock.c | 6 +++--- net/bluetooth/sco.c | 4 ++-- net/core/sock.c | 4 ++-- net/ipv4/af_inet.c | 10 +++++----- net/ipv4/inet_connection_sock.c | 6 +++--- net/iucv/af_iucv.c | 4 ++-- net/llc/af_llc.c | 7 +++---- net/mptcp/protocol.c | 11 +++++------ net/netrom/af_netrom.c | 6 +++--- net/nfc/llcp_sock.c | 4 ++-- net/phonet/pep.c | 12 ++++++------ net/phonet/socket.c | 7 +++---- net/rds/tcp_listen.c | 6 +++++- net/rose/af_rose.c | 6 +++--- net/sctp/socket.c | 8 ++++---- net/smc/af_smc.c | 6 +++--- net/socket.c | 13 ++++++++++--- net/tipc/socket.c | 13 +++++-------- net/unix/af_unix.c | 21 ++++++++++----------- net/vmw_vsock/af_vsock.c | 6 +++--- net/x25/af_x25.c | 4 ++-- 34 files changed, 132 insertions(+), 111 deletions(-) (limited to 'net/mptcp/protocol.c') diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 5bc6d0fa7498..18cfead0081d 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -407,7 +407,8 @@ unlock: return err; } -int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern) +int af_alg_accept(struct sock *sk, struct socket *newsock, + struct proto_accept_arg *arg) { struct alg_sock *ask = alg_sk(sk); const struct af_alg_type *type; @@ -422,7 +423,7 @@ int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern) if (!type) goto unlock; - sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, kern); + sk2 = sk_alloc(sock_net(sk), PF_ALG, GFP_KERNEL, &alg_proto, arg->kern); err = -ENOMEM; if (!sk2) goto unlock; @@ -468,10 +469,10 @@ unlock: } EXPORT_SYMBOL_GPL(af_alg_accept); -static int alg_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int alg_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { - return af_alg_accept(sock->sk, newsock, kern); + return af_alg_accept(sock->sk, newsock, arg); } static const struct proto_ops alg_proto_ops = { diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index e24c829d7a01..7c7394d46a23 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -223,8 +223,8 @@ unlock: return err ?: len; } -static int hash_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int hash_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct alg_sock *ask = alg_sk(sk); @@ -252,7 +252,7 @@ static int hash_accept(struct socket *sock, struct socket *newsock, int flags, if (err) goto out_free_state; - err = af_alg_accept(ask->parent, newsock, kern); + err = af_alg_accept(ask->parent, newsock, arg); if (err) goto out_free_state; @@ -355,7 +355,7 @@ static int hash_recvmsg_nokey(struct socket *sock, struct msghdr *msg, } static int hash_accept_nokey(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { int err; @@ -363,7 +363,7 @@ static int hash_accept_nokey(struct socket *sock, struct socket *newsock, if (err) return err; - return hash_accept(sock, newsock, flags, kern); + return hash_accept(sock, newsock, arg); } static struct proto_ops algif_hash_ops_nokey = { diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c index d52593466a79..fd7ed65e0197 100644 --- a/drivers/xen/pvcalls-back.c +++ b/drivers/xen/pvcalls-back.c @@ -517,6 +517,10 @@ static void __pvcalls_back_accept(struct work_struct *work) { struct sockpass_mapping *mappass = container_of( work, struct sockpass_mapping, register_work); + struct proto_accept_arg arg = { + .flags = O_NONBLOCK, + .kern = true, + }; struct sock_mapping *map; struct pvcalls_ioworker *iow; struct pvcalls_fedata *fedata; @@ -548,7 +552,7 @@ static void __pvcalls_back_accept(struct work_struct *work) sock->type = mappass->sock->type; sock->ops = mappass->sock->ops; - ret = inet_accept(mappass->sock, sock, O_NONBLOCK, true); + ret = inet_accept(mappass->sock, sock, &arg); if (ret == -EAGAIN) { sock_release(sock); return; diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 960080753d3b..2b8fa3e782fb 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1784,6 +1784,9 @@ static int o2net_accept_one(struct socket *sock, int *more) struct o2nm_node *node = NULL; struct o2nm_node *local_node = NULL; struct o2net_sock_container *sc = NULL; + struct proto_accept_arg arg = { + .flags = O_NONBLOCK, + }; struct o2net_node *nn; unsigned int nofs_flag; @@ -1802,7 +1805,7 @@ static int o2net_accept_one(struct socket *sock, int *more) new_sock->type = sock->type; new_sock->ops = sock->ops; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); + ret = sock->ops->accept(sock, new_sock, &arg); if (ret < 0) goto out; diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h index 78ecaf5db04c..f7b3b93f3a49 100644 --- a/include/crypto/if_alg.h +++ b/include/crypto/if_alg.h @@ -166,7 +166,8 @@ int af_alg_unregister_type(const struct af_alg_type *type); int af_alg_release(struct socket *sock); void af_alg_release_parent(struct sock *sk); -int af_alg_accept(struct sock *sk, struct socket *newsock, bool kern); +int af_alg_accept(struct sock *sk, struct socket *newsock, + struct proto_accept_arg *arg); void af_alg_free_sg(struct af_alg_sgl *sgl); diff --git a/include/linux/net.h b/include/linux/net.h index 15df6d5f27a7..688320b79fcc 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -153,6 +153,7 @@ struct sockaddr; struct msghdr; struct module; struct sk_buff; +struct proto_accept_arg; typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, unsigned int, size_t); typedef int (*skb_read_actor_t)(struct sock *, struct sk_buff *); @@ -171,7 +172,8 @@ struct proto_ops { int (*socketpair)(struct socket *sock1, struct socket *sock2); int (*accept) (struct socket *sock, - struct socket *newsock, int flags, bool kern); + struct socket *newsock, + struct proto_accept_arg *arg); int (*getname) (struct socket *sock, struct sockaddr *addr, int peer); diff --git a/include/net/inet_common.h b/include/net/inet_common.h index f50a644d87a9..c17a6585d0b0 100644 --- a/include/net/inet_common.h +++ b/include/net/inet_common.h @@ -29,8 +29,8 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags, int is_sendmsg); int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags); -int inet_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern); +int inet_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg); void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *newsk); int inet_send_prepare(struct sock *sk); diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 20e7b0c0b3d1..7d6b1254c92d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -250,7 +250,7 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, return (unsigned long)min_t(u64, when, max_when); } -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern); +struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg); int inet_csk_get_port(struct sock *sk, unsigned short snum); diff --git a/include/net/sock.h b/include/net/sock.h index 0450494a1766..217079b3e3e8 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1194,6 +1194,12 @@ static inline void sk_prot_clear_nulls(struct sock *sk, int size) size - offsetof(struct sock, sk_node.pprev)); } +struct proto_accept_arg { + int flags; + int err; + bool kern; +}; + /* Networking protocol blocks we attach to sockets. * socket layer -> transport layer interface */ @@ -1208,8 +1214,8 @@ struct proto { int addr_len); int (*disconnect)(struct sock *sk, int flags); - struct sock * (*accept)(struct sock *sk, int flags, int *err, - bool kern); + struct sock * (*accept)(struct sock *sk, + struct proto_accept_arg *arg); int (*ioctl)(struct sock *sk, int cmd, int *karg); @@ -1804,7 +1810,7 @@ int sock_cmsg_send(struct sock *sk, struct msghdr *msg, int sock_no_bind(struct socket *, struct sockaddr *, int); int sock_no_connect(struct socket *, struct sockaddr *, int, int); int sock_no_socketpair(struct socket *, struct socket *); -int sock_no_accept(struct socket *, struct socket *, int, bool); +int sock_no_accept(struct socket *, struct socket *, struct proto_accept_arg *); int sock_no_getname(struct socket *, struct sockaddr *, int); int sock_no_ioctl(struct socket *, unsigned int, unsigned long); int sock_no_listen(struct socket *, int); diff --git a/net/atm/svc.c b/net/atm/svc.c index 36a814f1fbd1..f8137ae693b0 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -324,8 +324,8 @@ out: return error; } -static int svc_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int svc_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct sk_buff *skb; @@ -336,7 +336,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags, lock_sock(sk); - error = svc_create(sock_net(sk), newsock, 0, kern); + error = svc_create(sock_net(sk), newsock, 0, arg->kern); if (error) goto out; @@ -355,7 +355,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags, error = -sk->sk_err; break; } - if (flags & O_NONBLOCK) { + if (arg->flags & O_NONBLOCK) { error = -EAGAIN; break; } diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 9169efb2f43a..8077cf2ee448 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1373,8 +1373,8 @@ out_release: return err; } -static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int ax25_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sk_buff *skb; struct sock *newsk; @@ -1409,7 +1409,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags, if (skb) break; - if (flags & O_NONBLOCK) { + if (arg->flags & O_NONBLOCK) { err = -EWOULDBLOCK; break; } diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c index ef0cc80b4c0c..2a075119d65d 100644 --- a/net/bluetooth/iso.c +++ b/net/bluetooth/iso.c @@ -1186,7 +1186,7 @@ done: } static int iso_sock_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *ch; @@ -1195,7 +1195,7 @@ static int iso_sock_accept(struct socket *sock, struct socket *newsock, lock_sock(sk); - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index 5cc83f906c12..125dddc77452 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -327,7 +327,7 @@ done: } static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; @@ -336,7 +336,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, lock_sock_nested(sk, L2CAP_NESTING_PARENT); - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 29aa07e9db9d..37d63d768afb 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -468,8 +468,8 @@ done: return err; } -static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; @@ -483,7 +483,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f goto done; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index e0ad30862ee4..94c6f2b46279 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -647,7 +647,7 @@ done: } static int sco_sock_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *ch; @@ -656,7 +656,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, lock_sock(sk); - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); diff --git a/net/core/sock.c b/net/core/sock.c index 8d6e638b5426..8629f9aecf91 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3241,8 +3241,8 @@ int sock_no_socketpair(struct socket *sock1, struct socket *sock2) } EXPORT_SYMBOL(sock_no_socketpair); -int sock_no_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +int sock_no_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { return -EOPNOTSUPP; } diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index a7bad18bc8b5..de3449e16b89 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -771,16 +771,16 @@ void __inet_accept(struct socket *sock, struct socket *newsock, struct sock *new * Accept a pending connection. The TCP layer now gives BSD semantics. */ -int inet_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +int inet_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk1 = sock->sk, *sk2; - int err = -EINVAL; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ - sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern); + arg->err = -EINVAL; + sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, arg); if (!sk2) - return err; + return arg->err; lock_sock(sk2); __inet_accept(sock, newsock, sk2); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3b38610958ee..7734d189c66b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -661,7 +661,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) /* * This will accept the next outstanding connection. */ -struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) +struct sock *inet_csk_accept(struct sock *sk, struct proto_accept_arg *arg) { struct inet_connection_sock *icsk = inet_csk(sk); struct request_sock_queue *queue = &icsk->icsk_accept_queue; @@ -680,7 +680,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) /* Find already established connection */ if (reqsk_queue_empty(queue)) { - long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + long timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); /* If this is a non blocking socket don't sleep */ error = -EAGAIN; @@ -745,7 +745,7 @@ out: out_err: newsk = NULL; req = NULL; - *err = error; + arg->err = error; goto out; } EXPORT_SYMBOL(inet_csk_accept); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c951bb9cc2e0..c3b0b610b0aa 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -795,7 +795,7 @@ done: /* Accept a pending connection */ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { DECLARE_WAITQUEUE(wait, current); struct sock *sk = sock->sk, *nsk; @@ -809,7 +809,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock, goto done; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); /* Wait for an incoming connection */ add_wait_queue_exclusive(sk_sleep(sk), &wait); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index fde1140d899e..4eb52add7103 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -688,14 +688,13 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb) * llc_ui_accept - accept a new incoming connection. * @sock: Socket which connections arrive on. * @newsock: Socket to move incoming connection to. - * @flags: User specified operational flags. - * @kern: If the socket is kernel internal + * @arg: User specified arguments * * Accept a new incoming connection. * Returns 0 upon success, negative otherwise. */ -static int llc_ui_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int llc_ui_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk, *newsk; struct llc_sock *llc, *newllc; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index bb8f96f2b86f..815ce439183c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3881,11 +3881,10 @@ unlock: } static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct sock *ssk, *newsk; - int err; pr_debug("msk=%p", msk); @@ -3897,9 +3896,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, return -EINVAL; pr_debug("ssk=%p, listener=%p", ssk, mptcp_subflow_ctx(ssk)); - newsk = inet_csk_accept(ssk, flags, &err, kern); + newsk = inet_csk_accept(ssk, arg); if (!newsk) - return err; + return arg->err; pr_debug("newsk=%p, subflow is mptcp=%d", newsk, sk_is_mptcp(newsk)); if (sk_is_mptcp(newsk)) { @@ -3920,7 +3919,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, newsk = new_mptcp_sock; MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEPASSIVEACK); - newsk->sk_kern_sock = kern; + newsk->sk_kern_sock = arg->kern; lock_sock(newsk); __inet_accept(sock, newsock, newsk); @@ -3949,7 +3948,7 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, } } else { tcpfallback: - newsk->sk_kern_sock = kern; + newsk->sk_kern_sock = arg->kern; lock_sock(newsk); __inet_accept(sock, newsock, newsk); /* we are being invoked after accepting a non-mp-capable diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 104a80b75477..6ee148f0e6d0 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -772,8 +772,8 @@ out_release: return err; } -static int nr_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int nr_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sk_buff *skb; struct sock *newsk; @@ -805,7 +805,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags, if (skb) break; - if (flags & O_NONBLOCK) { + if (arg->flags & O_NONBLOCK) { err = -EWOULDBLOCK; break; } diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index d5344563e525..57a2f97004e1 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -447,7 +447,7 @@ struct sock *nfc_llcp_accept_dequeue(struct sock *parent, } static int llcp_sock_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { DECLARE_WAITQUEUE(wait, current); struct sock *sk = sock->sk, *new_sk; @@ -463,7 +463,7 @@ static int llcp_sock_accept(struct socket *sock, struct socket *newsock, goto error; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); /* Wait for an incoming connection. */ add_wait_queue_exclusive(sk_sleep(sk), &wait); diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 3dd5f52bc1b5..53a858478e22 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -759,8 +759,8 @@ static void pep_sock_close(struct sock *sk, long timeout) sock_put(sk); } -static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, - bool kern) +static struct sock *pep_sock_accept(struct sock *sk, + struct proto_accept_arg *arg) { struct pep_sock *pn = pep_sk(sk), *newpn; struct sock *newsk = NULL; @@ -772,8 +772,8 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, u8 pipe_handle, enabled, n_sb; u8 aligned = 0; - skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, - errp); + skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + &arg->err); if (!skb) return NULL; @@ -836,7 +836,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, /* Create a new to-be-accepted sock */ newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_KERNEL, sk->sk_prot, - kern); + arg->kern); if (!newsk) { pep_reject_conn(sk, skb, PN_PIPE_ERR_OVERLOAD, GFP_KERNEL); err = -ENOBUFS; @@ -878,7 +878,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp, drop: release_sock(sk); kfree_skb(skb); - *errp = err; + arg->err = err; return newsk; } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 1018340d89a7..5ce0b3ee5def 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -292,18 +292,17 @@ out: } static int pn_socket_accept(struct socket *sock, struct socket *newsock, - int flags, bool kern) + struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct sock *newsk; - int err; if (unlikely(sk->sk_state != TCP_LISTEN)) return -EINVAL; - newsk = sk->sk_prot->accept(sk, flags, &err, kern); + newsk = sk->sk_prot->accept(sk, arg); if (!newsk) - return err; + return arg->err; lock_sock(newsk); sock_graft(newsk, newsock); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 05008ce5c421..d89bd8d0c354 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -105,6 +105,10 @@ int rds_tcp_accept_one(struct socket *sock) int conn_state; struct rds_conn_path *cp; struct in6_addr *my_addr, *peer_addr; + struct proto_accept_arg arg = { + .flags = O_NONBLOCK, + .kern = true, + }; #if !IS_ENABLED(CONFIG_IPV6) struct in6_addr saddr, daddr; #endif @@ -119,7 +123,7 @@ int rds_tcp_accept_one(struct socket *sock) if (ret) goto out; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, true); + ret = sock->ops->accept(sock, new_sock, &arg); if (ret < 0) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index ef81d019b20f..59050caab65c 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -919,8 +919,8 @@ out_release: return err; } -static int rose_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int rose_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sk_buff *skb; struct sock *newsk; @@ -953,7 +953,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags, if (skb) break; - if (flags & O_NONBLOCK) { + if (arg->flags & O_NONBLOCK) { err = -EWOULDBLOCK; break; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 64196b1dce1d..c009383369b2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4847,7 +4847,7 @@ static int sctp_disconnect(struct sock *sk, int flags) * descriptor will be returned from accept() to represent the newly * formed association. */ -static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) +static struct sock *sctp_accept(struct sock *sk, struct proto_accept_arg *arg) { struct sctp_sock *sp; struct sctp_endpoint *ep; @@ -4871,7 +4871,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) goto out; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); error = sctp_wait_for_accept(sk, timeo); if (error) @@ -4882,7 +4882,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) */ asoc = list_entry(ep->asocs.next, struct sctp_association, asocs); - newsk = sp->pf->create_accept_sk(sk, asoc, kern); + newsk = sp->pf->create_accept_sk(sk, asoc, arg->kern); if (!newsk) { error = -ENOMEM; goto out; @@ -4899,7 +4899,7 @@ static struct sock *sctp_accept(struct sock *sk, int flags, int *err, bool kern) out: release_sock(sk); - *err = error; + arg->err = error; return newsk; } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 9389f0cfa374..e50a286fd0fb 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2689,7 +2689,7 @@ out: } static int smc_accept(struct socket *sock, struct socket *new_sock, - int flags, bool kern) + struct proto_accept_arg *arg) { struct sock *sk = sock->sk, *nsk; DECLARE_WAITQUEUE(wait, current); @@ -2708,7 +2708,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, } /* Wait for an incoming connection */ - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); add_wait_queue_exclusive(sk_sleep(sk), &wait); while (!(nsk = smc_accept_dequeue(sk, new_sock))) { set_current_state(TASK_INTERRUPTIBLE); @@ -2735,7 +2735,7 @@ static int smc_accept(struct socket *sock, struct socket *new_sock, if (rc) goto out; - if (lsmc->sockopt_defer_accept && !(flags & O_NONBLOCK)) { + if (lsmc->sockopt_defer_accept && !(arg->flags & O_NONBLOCK)) { /* wait till data arrives on the socket */ timeo = msecs_to_jiffies(lsmc->sockopt_defer_accept * MSEC_PER_SEC); diff --git a/net/socket.c b/net/socket.c index 01a71ae10c35..6ff5f21d9633 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1898,6 +1898,9 @@ struct file *do_accept(struct file *file, unsigned file_flags, struct file *newfile; int err, len; struct sockaddr_storage address; + struct proto_accept_arg arg = { + .flags = file_flags, + }; const struct proto_ops *ops; sock = sock_from_file(file); @@ -1926,8 +1929,8 @@ struct file *do_accept(struct file *file, unsigned file_flags, if (err) goto out_fd; - err = ops->accept(sock, newsock, sock->file->f_flags | file_flags, - false); + arg.flags |= sock->file->f_flags; + err = ops->accept(sock, newsock, &arg); if (err < 0) goto out_fd; @@ -3580,6 +3583,10 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { struct sock *sk = sock->sk; const struct proto_ops *ops = READ_ONCE(sock->ops); + struct proto_accept_arg arg = { + .flags = flags, + .kern = true, + }; int err; err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, @@ -3587,7 +3594,7 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) if (err < 0) goto done; - err = ops->accept(sock, *newsock, flags, true); + err = ops->accept(sock, *newsock, &arg); if (err < 0) { sock_release(*newsock); *newsock = NULL; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 798397b6811e..2d58ecae4e21 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -146,8 +146,6 @@ static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static void tipc_sock_destruct(struct sock *sk); static int tipc_release(struct socket *sock); -static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, - bool kern); static void tipc_sk_timeout(struct timer_list *t); static int tipc_sk_publish(struct tipc_sock *tsk, struct tipc_uaddr *ua); static int tipc_sk_withdraw(struct tipc_sock *tsk, struct tipc_uaddr *ua); @@ -2711,13 +2709,12 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) * tipc_accept - wait for connection request * @sock: listening socket * @new_sock: new socket that is to be connected - * @flags: file-related flags associated with socket - * @kern: caused by kernel or by userspace? + * @arg: arguments for accept * * Return: 0 on success, errno otherwise */ -static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, - bool kern) +static int tipc_accept(struct socket *sock, struct socket *new_sock, + struct proto_accept_arg *arg) { struct sock *new_sk, *sk = sock->sk; struct tipc_sock *new_tsock; @@ -2733,14 +2730,14 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, res = -EINVAL; goto exit; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); res = tipc_wait_for_accept(sock, timeo); if (res) goto exit; buf = skb_peek(&sk->sk_receive_queue); - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, kern); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, arg->kern); if (res) goto exit; security_sk_clone(sock->sk, new_sock->sk); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dc1651541723..26e3b5f1ee46 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -755,7 +755,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int); static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); -static int unix_accept(struct socket *, struct socket *, int, bool); +static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg); static int unix_getname(struct socket *, struct sockaddr *, int); static __poll_t unix_poll(struct file *, struct socket *, poll_table *); static __poll_t unix_dgram_poll(struct file *, struct socket *, @@ -1689,19 +1689,18 @@ static void unix_sock_inherit_flags(const struct socket *old, set_bit(SOCK_PASSSEC, &new->flags); } -static int unix_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int unix_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct sk_buff *skb; struct sock *tsk; - int err; - err = -EOPNOTSUPP; + arg->err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; - err = -EINVAL; + arg->err = -EINVAL; if (sk->sk_state != TCP_LISTEN) goto out; @@ -1709,12 +1708,12 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, * so that no locks are necessary. */ - skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, - &err); + skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + &arg->err); if (!skb) { /* This means receive shutdown. */ - if (err == 0) - err = -EINVAL; + if (arg->err == 0) + arg->err = -EINVAL; goto out; } @@ -1732,7 +1731,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, return 0; out: - return err; + return arg->err; } diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 54ba7316f808..4b040285aa78 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1500,8 +1500,8 @@ out: return err; } -static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int vsock_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *listener; int err; @@ -1528,7 +1528,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, /* Wait for children sockets to appear; these are the new sockets * created upon connection establishment. */ - timeout = sock_rcvtimeo(listener, flags & O_NONBLOCK); + timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK); prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); while ((connected = vsock_dequeue_accept(listener)) == NULL && diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index d18d51412cc0..8dda4178497c 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -871,8 +871,8 @@ static int x25_wait_for_data(struct sock *sk, long timeout) return rc; } -static int x25_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int x25_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk; struct sock *newsk; -- cgit v1.2.3-59-g8ed1b From 5eae7a8202f368a77ff0148729a9fa73cc24033a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 13 May 2024 18:13:29 -0700 Subject: mptcp: prefer strscpy over strcpy strcpy() performs no bounds checking on the destination buffer. This could result in linear overflows beyond the end of the buffer, leading to all kinds of misbehaviors. The safe replacement is strscpy() [1]. This is in preparation of a possible future step where all strcpy() uses will be removed in favour of strscpy() [2]. This fixes CheckPatch warnings: WARNING: Prefer strscpy over strcpy Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strcpy [1] Link: https://github.com/KSPP/linux/issues/88 [2] Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Signed-off-by: Mat Martineau Link: https://lore.kernel.org/r/20240514011335.176158-6-martineau@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/ctrl.c | 2 +- net/mptcp/protocol.c | 5 +++-- net/mptcp/sockopt.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'net/mptcp/protocol.c') diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 542555ba474c..98b1dd498ff6 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -92,7 +92,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) pernet->allow_join_initial_addr_port = 1; pernet->stale_loss_cnt = 4; pernet->pm_type = MPTCP_PM_TYPE_KERNEL; - strcpy(pernet->scheduler, "default"); + strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler)); } #ifdef CONFIG_SYSCTL diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index bb8f96f2b86f..a42494d3a71b 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2814,7 +2814,8 @@ static void mptcp_ca_reset(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); tcp_assign_congestion_control(sk); - strcpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name); + strscpy(mptcp_sk(sk)->ca_name, icsk->icsk_ca_ops->name, + sizeof(mptcp_sk(sk)->ca_name)); /* no need to keep a reference to the ops, the name will suffice */ tcp_cleanup_congestion_control(sk); @@ -4169,7 +4170,7 @@ int __init mptcp_proto_v6_init(void) int err; mptcp_v6_prot = mptcp_prot; - strcpy(mptcp_v6_prot.name, "MPTCPv6"); + strscpy(mptcp_v6_prot.name, "MPTCPv6", sizeof(mptcp_v6_prot.name)); mptcp_v6_prot.slab = NULL; mptcp_v6_prot.obj_size = sizeof(struct mptcp6_sock); mptcp_v6_prot.ipv6_pinfo_offset = offsetof(struct mptcp6_sock, np); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index a77b33488176..f9a4fb17b5b7 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -616,7 +616,7 @@ static int mptcp_setsockopt_sol_tcp_congestion(struct mptcp_sock *msk, sockptr_t } if (ret == 0) - strcpy(msk->ca_name, name); + strscpy(msk->ca_name, name, sizeof(msk->ca_name)); release_sock(sk); return ret; -- cgit v1.2.3-59-g8ed1b