aboutsummaryrefslogtreecommitdiffstats
path: root/net/mptcp/subflow.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/subflow.c')
-rw-r--r--net/mptcp/subflow.c363
1 files changed, 250 insertions, 113 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 6172f380dfb7..02a54d59697b 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -62,7 +62,9 @@ static void subflow_generate_hmac(u64 key1, u64 key2, u32 nonce1, u32 nonce2,
static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
{
return mptcp_is_fully_established((void *)msk) &&
- READ_ONCE(msk->pm.accept_subflow);
+ ((mptcp_pm_is_userspace(msk) &&
+ mptcp_userspace_pm_active(msk)) ||
+ READ_ONCE(msk->pm.accept_subflow));
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
@@ -153,7 +155,7 @@ static int subflow_check_req(struct request_sock *req,
return -EINVAL;
#endif
- mptcp_get_options(sk_listener, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
@@ -250,7 +252,7 @@ int mptcp_subflow_init_cookie_req(struct request_sock *req,
int err;
subflow_init_req(req, sk_listener);
- mptcp_get_options(sk_listener, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
opt_mp_capable = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPC);
opt_mp_join = !!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ);
@@ -344,9 +346,7 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow)
thmac = get_unaligned_be64(hmac);
pr_debug("subflow=%p, token=%u, thmac=%llu, subflow->thmac=%llu\n",
- subflow, subflow->token,
- (unsigned long long)thmac,
- (unsigned long long)subflow->thmac);
+ subflow, subflow->token, thmac, subflow->thmac);
return thmac == subflow->thmac;
}
@@ -388,7 +388,7 @@ static void mptcp_set_connected(struct sock *sk)
if (!sock_owned_by_user(sk))
__mptcp_set_connected(sk);
else
- set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_CONNECTED, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
@@ -410,7 +410,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset);
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
if (subflow->request_mptcp) {
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
MPTCP_INC_STATS(sock_net(sk),
@@ -443,6 +443,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->backup = mp_opt.backup;
subflow->thmac = mp_opt.thmac;
subflow->remote_nonce = mp_opt.nonce;
+ subflow->remote_id = mp_opt.join_id;
pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u backup=%d",
subflow, subflow->thmac, subflow->remote_nonce,
subflow->backup);
@@ -483,9 +484,53 @@ do_reset:
mptcp_subflow_reset(sk);
}
+static void subflow_set_local_id(struct mptcp_subflow_context *subflow, int local_id)
+{
+ subflow->local_id = local_id;
+ subflow->local_id_valid = 1;
+}
+
+static int subflow_chk_local_id(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ int err;
+
+ if (likely(subflow->local_id_valid))
+ return 0;
+
+ err = mptcp_pm_get_local_id(msk, (struct sock_common *)sk);
+ if (err < 0)
+ return err;
+
+ subflow_set_local_id(subflow, err);
+ return 0;
+}
+
+static int subflow_rebuild_header(struct sock *sk)
+{
+ int err = subflow_chk_local_id(sk);
+
+ if (unlikely(err < 0))
+ return err;
+
+ return inet_sk_rebuild_header(sk);
+}
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static int subflow_v6_rebuild_header(struct sock *sk)
+{
+ int err = subflow_chk_local_id(sk);
+
+ if (unlikely(err < 0))
+ return err;
+
+ return inet6_sk_rebuild_header(sk);
+}
+#endif
+
struct request_sock_ops mptcp_subflow_request_sock_ops;
-EXPORT_SYMBOL_GPL(mptcp_subflow_request_sock_ops);
-static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops;
+static struct tcp_request_sock_ops subflow_request_sock_ipv4_ops __ro_after_init;
static int subflow_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
@@ -506,9 +551,9 @@ drop:
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
-static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
-static struct inet_connection_sock_af_ops subflow_v6_specific;
-static struct inet_connection_sock_af_ops subflow_v6m_specific;
+static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops __ro_after_init;
+static struct inet_connection_sock_af_ops subflow_v6_specific __ro_after_init;
+static struct inet_connection_sock_af_ops subflow_v6m_specific __ro_after_init;
static struct proto tcpv6_prot_override;
static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
@@ -557,29 +602,6 @@ static bool subflow_hmac_valid(const struct request_sock *req,
return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN);
}
-static void mptcp_sock_destruct(struct sock *sk)
-{
- /* if new mptcp socket isn't accepted, it is free'd
- * from the tcp listener sockets request queue, linked
- * from req->sk. The tcp socket is released.
- * This calls the ULP release function which will
- * also remove the mptcp socket, via
- * sock_put(ctx->conn).
- *
- * Problem is that the mptcp socket will be in
- * ESTABLISHED state and will not have the SOCK_DEAD flag.
- * Both result in warnings from inet_sock_destruct.
- */
- if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
- sk->sk_state = TCP_CLOSE;
- WARN_ON_ONCE(sk->sk_socket);
- sock_orphan(sk);
- }
-
- mptcp_destroy_common(mptcp_sk(sk));
- inet_sock_destruct(sk);
-}
-
static void mptcp_force_close(struct sock *sk)
{
/* the msk is not yet exposed to user-space */
@@ -663,7 +685,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
* reordered MPC will cause fallback, but we don't have other
* options.
*/
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPC)) {
fallback = true;
goto create_child;
@@ -673,7 +695,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk,
if (!new_msk)
fallback = true;
} else if (subflow_req->mp_join) {
- mptcp_get_options(sk, skb, &mp_opt);
+ mptcp_get_options(skb, &mp_opt);
if (!(mp_opt.suboptions & OPTIONS_MPTCP_MPJ) ||
!subflow_hmac_valid(req, &mp_opt) ||
!mptcp_can_accept_new_subflow(subflow_req->msk)) {
@@ -701,6 +723,8 @@ create_child:
goto dispose_child;
}
+ if (new_msk)
+ mptcp_copy_inaddrs(new_msk, child);
subflow_drop_ctx(child);
goto out;
}
@@ -722,13 +746,17 @@ create_child:
/* new mpc subflow takes ownership of the newly
* created mptcp socket
*/
- new_msk->sk_destruct = mptcp_sock_destruct;
mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq;
mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk;
new_msk = NULL;
+ /* set msk addresses early to ensure mptcp_pm_get_local_id()
+ * uses the correct data
+ */
+ mptcp_copy_inaddrs(ctx->conn, child);
+
/* with OoO packets we can reach here without ingress
* mpc option
*/
@@ -790,7 +818,7 @@ dispose_child:
return child;
}
-static struct inet_connection_sock_af_ops subflow_specific;
+static struct inet_connection_sock_af_ops subflow_specific __ro_after_init;
static struct proto tcp_prot_override;
enum mapping_status {
@@ -798,7 +826,8 @@ enum mapping_status {
MAPPING_INVALID,
MAPPING_EMPTY,
MAPPING_DATA_FIN,
- MAPPING_DUMMY
+ MAPPING_DUMMY,
+ MAPPING_BAD_CSUM
};
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
@@ -845,9 +874,8 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
bool csum_reqd)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
- struct csum_pseudo_header header;
u32 offset, seq, delta;
- __wsum csum;
+ __sum16 csum;
int len;
if (!csum_reqd)
@@ -908,19 +936,16 @@ static enum mapping_status validate_data_csum(struct sock *ssk, struct sk_buff *
* while the pseudo header requires the original DSS data len,
* including that
*/
- header.data_seq = cpu_to_be64(subflow->map_seq);
- header.subflow_seq = htonl(subflow->map_subflow_seq);
- header.data_len = htons(subflow->map_data_len + subflow->map_data_fin);
- header.csum = 0;
-
- csum = csum_partial(&header, sizeof(header), subflow->map_data_csum);
- if (unlikely(csum_fold(csum))) {
+ csum = __mptcp_make_csum(subflow->map_seq,
+ subflow->map_subflow_seq,
+ subflow->map_data_len + subflow->map_data_fin,
+ subflow->map_data_csum);
+ if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
- subflow->send_mp_fail = 1;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
- return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ return MAPPING_BAD_CSUM;
}
+ subflow->valid_csum_seen = 1;
return MAPPING_OK;
}
@@ -967,7 +992,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk,
data_len = mpext->data_len;
if (data_len == 0) {
+ pr_debug("infinite mapping received");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
+ subflow->map_data_len = 0;
return MAPPING_INVALID;
}
@@ -1102,6 +1129,45 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
}
}
+static bool subflow_can_fallback(struct mptcp_subflow_context *subflow)
+{
+ struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+
+ if (subflow->mp_join)
+ return false;
+ else if (READ_ONCE(msk->csum_enabled))
+ return !subflow->valid_csum_seen;
+ else
+ return !subflow->fully_established;
+}
+
+static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ unsigned long fail_tout;
+
+ /* greceful failure can happen only on the MPC subflow */
+ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
+ return;
+
+ /* since the close timeout take precedence on the fail one,
+ * no need to start the latter when the first is already set
+ */
+ if (sock_flag((struct sock *)msk, SOCK_DEAD))
+ return;
+
+ /* we don't need extreme accuracy here, use a zero fail_tout as special
+ * value meaning no fail timeout at all;
+ */
+ fail_tout = jiffies + TCP_RTO_MAX;
+ if (!fail_tout)
+ fail_tout = 1;
+ WRITE_ONCE(subflow->fail_tout, fail_tout);
+ tcp_send_ack(ssk);
+
+ mptcp_reset_timeout(msk, subflow->fail_tout);
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -1110,7 +1176,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
struct sk_buff *skb;
if (!skb_peek(&ssk->sk_receive_queue))
- WRITE_ONCE(subflow->data_avail, 0);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
if (subflow->data_avail)
return true;
@@ -1121,10 +1187,8 @@ static bool subflow_check_data_avail(struct sock *ssk)
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
- if (unlikely(status == MAPPING_INVALID))
- goto fallback;
-
- if (unlikely(status == MAPPING_DUMMY))
+ if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY ||
+ status == MAPPING_BAD_CSUM))
goto fallback;
if (status != MAPPING_OK)
@@ -1164,35 +1228,42 @@ no_data:
return false;
fallback:
- /* RFC 8684 section 3.7. */
- if (subflow->send_mp_fail) {
- if (mptcp_has_another_subflow(ssk)) {
+ if (!__mptcp_check_fallback(msk)) {
+ /* RFC 8684 section 3.7. */
+ if (status == MAPPING_BAD_CSUM &&
+ (subflow->mp_join || subflow->valid_csum_seen)) {
+ subflow->send_mp_fail = 1;
+
+ if (!READ_ONCE(msk->allow_infinite_fallback)) {
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
+ goto reset;
+ }
+ mptcp_subflow_fail(msk, ssk);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
+ return true;
+ }
+
+ if (!subflow_can_fallback(subflow) && subflow->map_data_len) {
+ /* fatal protocol error, close the socket.
+ * subflow_error_report() will introduce the appropriate barriers
+ */
+ subflow->reset_transient = 0;
+ subflow->reset_reason = MPTCP_RST_EMPTCP;
+
+reset:
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
while ((skb = skb_peek(&ssk->sk_receive_queue)))
sk_eat_skb(ssk, skb);
+ tcp_send_active_reset(ssk, GFP_ATOMIC);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ return false;
}
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, 0);
- return true;
- }
- if (subflow->mp_join || subflow->fully_established) {
- /* fatal protocol error, close the socket.
- * subflow_error_report() will introduce the appropriate barriers
- */
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
- subflow->reset_transient = 0;
- subflow->reset_reason = MPTCP_RST_EMPTCP;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- WRITE_ONCE(subflow->data_avail, 0);
- return false;
+ mptcp_do_fallback(ssk);
}
- __mptcp_do_fallback(msk);
skb = skb_peek(&ssk->sk_receive_queue);
subflow->map_valid = 1;
subflow->map_seq = READ_ONCE(msk->ack_seq);
@@ -1210,7 +1281,7 @@ bool mptcp_subflow_data_available(struct sock *sk)
if (subflow->map_valid &&
mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) {
subflow->map_valid = 0;
- WRITE_ONCE(subflow->data_avail, 0);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
pr_debug("Done with mapping: seq=%u data_len=%u",
subflow->map_subflow_seq,
@@ -1274,7 +1345,7 @@ static void subflow_error_report(struct sock *ssk)
if (!sock_owned_by_user(sk))
__mptcp_error_report(sk);
else
- set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags);
+ __set_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
}
@@ -1293,7 +1364,6 @@ static void subflow_data_ready(struct sock *sk)
if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
return;
- set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
}
@@ -1315,7 +1385,7 @@ static void subflow_write_space(struct sock *ssk)
mptcp_write_space(sk);
}
-static struct inet_connection_sock_af_ops *
+static const struct inet_connection_sock_af_ops *
subflow_default_af_ops(struct sock *sk)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1330,7 +1400,7 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- struct inet_connection_sock_af_ops *target;
+ const struct inet_connection_sock_af_ops *target;
target = mapped ? &subflow_v6m_specific : subflow_default_af_ops(sk);
@@ -1384,20 +1454,20 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
struct sockaddr_storage addr;
int remote_id = remote->id;
int local_id = loc->id;
+ int err = -ENOTCONN;
struct socket *sf;
struct sock *ssk;
u32 remote_token;
int addrlen;
int ifindex;
u8 flags;
- int err;
if (!mptcp_is_fully_established(sk))
- return -ENOTCONN;
+ goto err_out;
err = mptcp_subflow_create_socket(sk, &sf);
if (err)
- return err;
+ goto err_out;
ssk = sf->sk;
subflow = mptcp_subflow_ctx(ssk);
@@ -1405,15 +1475,10 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
get_random_bytes(&subflow->local_nonce, sizeof(u32));
} while (!subflow->local_nonce);
- if (!local_id) {
- err = mptcp_pm_get_local_id(msk, (struct sock_common *)ssk);
- if (err < 0)
- goto failed;
+ if (local_id)
+ subflow_set_local_id(subflow, local_id);
- local_id = err;
- }
-
- mptcp_pm_get_flags_and_ifindex_by_id(sock_net(sk), local_id,
+ mptcp_pm_get_flags_and_ifindex_by_id(msk, local_id,
&flags, &ifindex);
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
@@ -1425,6 +1490,8 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
+ mptcp_sockopt_sync(msk, ssk);
+
ssk->sk_bound_dev_if = ifindex;
err = kernel_bind(sf, (struct sockaddr *)&addr, addrlen);
if (err)
@@ -1434,14 +1501,13 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
pr_debug("msk=%p remote_token=%u local_id=%d remote_id=%d", msk,
remote_token, local_id, remote_id);
subflow->remote_token = remote_token;
- subflow->local_id = local_id;
subflow->remote_id = remote_id;
subflow->request_join = 1;
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
- mptcp_add_pending_subflow(msk, subflow);
- mptcp_sockopt_sync(msk, ssk);
+ sock_hold(ssk);
+ list_add_tail(&subflow->node, &msk->conn_list);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
goto failed_unlink;
@@ -1449,17 +1515,22 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
/* discard the subflow socket */
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
- return err;
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
+ return 0;
failed_unlink:
- spin_lock_bh(&msk->join_list_lock);
list_del(&subflow->node);
- spin_unlock_bh(&msk->join_list_lock);
sock_put(mptcp_subflow_tcp_sock(subflow));
failed:
subflow->disposable = 1;
sock_release(sf);
+
+err_out:
+ /* we account subflows before the creation, and this failures will not
+ * be caught by sk_state_change()
+ */
+ mptcp_pm_close_subflow(msk);
return err;
}
@@ -1533,10 +1604,8 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
* needs it.
*/
sf->sk->sk_net_refcnt = 1;
- get_net(net);
-#ifdef CONFIG_PROC_FS
- this_cpu_add(*net->core.sock_inuse, 1);
-#endif
+ get_net_track(net, &sf->sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
err = tcp_set_ulp(sf->sk, "mptcp");
release_sock(sf->sk);
@@ -1548,7 +1617,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
/* the newly created socket really belongs to the owning MPTCP master
* socket, even if for additional subflows the allocation is performed
* by a kernel workqueue. Adjust inode references, so that the
- * procfs/diag interaces really show this one belonging to the correct
+ * procfs/diag interfaces really show this one belonging to the correct
* user.
*/
SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino;
@@ -1637,6 +1706,64 @@ static void subflow_state_change(struct sock *sk)
}
}
+void mptcp_subflow_queue_clean(struct sock *listener_ssk)
+{
+ struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
+ struct mptcp_sock *msk, *next, *head = NULL;
+ struct request_sock *req;
+
+ /* build a list of all unaccepted mptcp sockets */
+ spin_lock_bh(&queue->rskq_lock);
+ for (req = queue->rskq_accept_head; req; req = req->dl_next) {
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk = req->sk;
+ struct mptcp_sock *msk;
+
+ if (!sk_is_mptcp(ssk))
+ continue;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ if (!subflow || !subflow->conn)
+ continue;
+
+ /* skip if already in list */
+ msk = mptcp_sk(subflow->conn);
+ if (msk->dl_next || msk == head)
+ continue;
+
+ msk->dl_next = head;
+ head = msk;
+ }
+ spin_unlock_bh(&queue->rskq_lock);
+ if (!head)
+ return;
+
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (msk = head; msk; msk = next) {
+ struct sock *sk = (struct sock *)msk;
+ bool slow, do_cancel_work;
+
+ sock_hold(sk);
+ slow = lock_sock_fast_nested(sk);
+ next = msk->dl_next;
+ msk->first = NULL;
+ msk->dl_next = NULL;
+
+ do_cancel_work = __mptcp_close(sk, 0);
+ unlock_sock_fast(sk, slow);
+ if (do_cancel_work)
+ mptcp_cancel_work(sk);
+ sock_put(sk);
+ }
+
+ /* we are still under the listener msk socket lock */
+ lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+}
+
static int subflow_ulp_init(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1663,10 +1790,12 @@ static int subflow_ulp_init(struct sock *sk)
tp->is_mptcp = 1;
ctx->icsk_af_ops = icsk->icsk_af_ops;
icsk->icsk_af_ops = subflow_default_af_ops(sk);
- ctx->tcp_data_ready = sk->sk_data_ready;
ctx->tcp_state_change = sk->sk_state_change;
- ctx->tcp_write_space = sk->sk_write_space;
ctx->tcp_error_report = sk->sk_error_report;
+
+ WARN_ON_ONCE(sk->sk_data_ready != sock_def_readable);
+ WARN_ON_ONCE(sk->sk_write_space != sk_stream_write_space);
+
sk->sk_data_ready = subflow_data_ready;
sk->sk_write_space = subflow_write_space;
sk->sk_state_change = subflow_state_change;
@@ -1721,9 +1850,7 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->conn_finished = 1;
new_ctx->icsk_af_ops = old_ctx->icsk_af_ops;
- new_ctx->tcp_data_ready = old_ctx->tcp_data_ready;
new_ctx->tcp_state_change = old_ctx->tcp_state_change;
- new_ctx->tcp_write_space = old_ctx->tcp_write_space;
new_ctx->tcp_error_report = old_ctx->tcp_error_report;
new_ctx->rel_write_seq = 1;
new_ctx->tcp_sock = newsk;
@@ -1737,15 +1864,22 @@ static void subflow_ulp_clone(const struct request_sock *req,
new_ctx->token = subflow_req->token;
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->idsn = subflow_req->idsn;
+
+ /* this is the first subflow, id is always 0 */
+ new_ctx->local_id_valid = 1;
} else if (subflow_req->mp_join) {
new_ctx->ssn_offset = subflow_req->ssn_offset;
new_ctx->mp_join = 1;
new_ctx->fully_established = 1;
new_ctx->backup = subflow_req->backup;
- new_ctx->local_id = subflow_req->local_id;
new_ctx->remote_id = subflow_req->remote_id;
new_ctx->token = subflow_req->token;
new_ctx->thmac = subflow_req->thmac;
+
+ /* the subflow req id is valid, fetched via subflow_check_req()
+ * and subflow_token_join_request()
+ */
+ subflow_set_local_id(new_ctx, subflow_req->local_id);
}
}
@@ -1798,6 +1932,7 @@ void __init mptcp_subflow_init(void)
subflow_specific.conn_request = subflow_v4_conn_request;
subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_specific.sk_rx_dst_set = subflow_finish_connect;
+ subflow_specific.rebuild_header = subflow_rebuild_header;
tcp_prot_override = tcp_prot;
tcp_prot_override.release_cb = tcp_release_cb_override;
@@ -1810,6 +1945,7 @@ void __init mptcp_subflow_init(void)
subflow_v6_specific.conn_request = subflow_v6_conn_request;
subflow_v6_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_v6_specific.sk_rx_dst_set = subflow_finish_connect;
+ subflow_v6_specific.rebuild_header = subflow_v6_rebuild_header;
subflow_v6m_specific = subflow_v6_specific;
subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit;
@@ -1817,6 +1953,7 @@ void __init mptcp_subflow_init(void)
subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
subflow_v6m_specific.net_frag_header_len = 0;
+ subflow_v6m_specific.rebuild_header = subflow_rebuild_header;
tcpv6_prot_override = tcpv6_prot;
tcpv6_prot_override.release_cb = tcp_release_cb_override;