aboutsummaryrefslogtreecommitdiffstats
path: root/net/mptcp/subflow.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--net/mptcp/subflow.c187
1 files changed, 164 insertions, 23 deletions
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 8b2338dfdc80..e1fbcab257e6 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -18,12 +18,15 @@
#include <net/tcp.h>
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
#include <net/ip6_route.h>
+#include <net/transp_v6.h>
#endif
#include <net/mptcp.h>
#include <uapi/linux/mptcp.h>
#include "protocol.h"
#include "mib.h"
+static void mptcp_subflow_ops_undo_override(struct sock *ssk);
+
static void SUBFLOW_REQ_INC_STATS(struct request_sock *req,
enum linux_mptcp_mib_field field)
{
@@ -61,11 +64,23 @@ static bool mptcp_can_accept_new_subflow(const struct mptcp_sock *msk)
}
/* validate received token and create truncated hmac and nonce for SYN-ACK */
-static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
- const struct sk_buff *skb)
+static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_req)
{
- struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
+ struct mptcp_sock *msk = subflow_req->msk;
u8 hmac[SHA256_DIGEST_SIZE];
+
+ get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
+
+ subflow_generate_hmac(msk->local_key, msk->remote_key,
+ subflow_req->local_nonce,
+ subflow_req->remote_nonce, hmac);
+
+ subflow_req->thmac = get_unaligned_be64(hmac);
+}
+
+static struct mptcp_sock *subflow_token_join_request(struct request_sock *req)
+{
+ struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct mptcp_sock *msk;
int local_id;
@@ -82,13 +97,6 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req,
}
subflow_req->local_id = local_id;
- get_random_bytes(&subflow_req->local_nonce, sizeof(u32));
-
- subflow_generate_hmac(msk->local_key, msk->remote_key,
- subflow_req->local_nonce,
- subflow_req->remote_nonce, hmac);
-
- subflow_req->thmac = get_unaligned_be64(hmac);
return msk;
}
@@ -102,6 +110,11 @@ static void subflow_init_req(struct request_sock *req, const struct sock *sk_lis
mptcp_token_init_request(req);
}
+static bool subflow_use_different_sport(struct mptcp_sock *msk, const struct sock *sk)
+{
+ return inet_sk(sk)->inet_sport != inet_sk((struct sock *)msk)->inet_sport;
+}
+
/* Init mptcp request socket.
*
* Returns an error code if a JOIN has failed and a TCP reset
@@ -171,12 +184,30 @@ again:
subflow_req->remote_id = mp_opt.join_id;
subflow_req->token = mp_opt.token;
subflow_req->remote_nonce = mp_opt.nonce;
- subflow_req->msk = subflow_token_join_request(req, skb);
+ subflow_req->msk = subflow_token_join_request(req);
/* Can't fall back to TCP in this case. */
if (!subflow_req->msk)
return -EPERM;
+ if (subflow_use_different_sport(subflow_req->msk, sk_listener)) {
+ pr_debug("syn inet_sport=%d %d",
+ ntohs(inet_sk(sk_listener)->inet_sport),
+ ntohs(inet_sk((struct sock *)subflow_req->msk)->inet_sport));
+ if (!mptcp_pm_sport_in_anno_list(subflow_req->msk, sk_listener)) {
+ sock_put((struct sock *)subflow_req->msk);
+ mptcp_token_destroy_request(req);
+ tcp_request_sock_ops.destructor(req);
+ subflow_req->msk = NULL;
+ subflow_req->mp_join = 0;
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTSYNRX);
+ return -EPERM;
+ }
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTSYNRX);
+ }
+
+ subflow_req_create_thmac(subflow_req);
+
if (unlikely(req->syncookie)) {
if (mptcp_can_accept_new_subflow(subflow_req->msk))
subflow_init_req_cookie_join_save(subflow_req, skb);
@@ -318,6 +349,11 @@ void mptcp_subflow_reset(struct sock *ssk)
sock_put(sk);
}
+static bool subflow_use_different_dport(struct mptcp_sock *msk, const struct sock *sk)
+{
+ return inet_sk(sk)->inet_dport != inet_sk((struct sock *)msk)->inet_dport;
+}
+
static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
@@ -335,6 +371,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
if (subflow->conn_finished)
return;
+ mptcp_propagate_sndbuf(parent, sk);
subflow->rel_write_seq = 1;
subflow->conn_finished = 1;
subflow->ssn_offset = TCP_SKB_CB(skb)->seq;
@@ -383,6 +420,13 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
subflow->mp_join = 1;
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX);
+
+ if (subflow_use_different_dport(mptcp_sk(parent), sk)) {
+ pr_debug("synack inet_dport=%d %d",
+ ntohs(inet_sk(sk)->inet_dport),
+ ntohs(inet_sk(parent)->inet_dport));
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
+ }
} else if (mptcp_check_fallback(sk)) {
fallback:
mptcp_rcv_space_init(mptcp_sk(parent), sk);
@@ -419,6 +463,7 @@ drop:
static struct tcp_request_sock_ops subflow_request_sock_ipv6_ops;
static struct inet_connection_sock_af_ops subflow_v6_specific;
static struct inet_connection_sock_af_ops subflow_v6m_specific;
+static struct proto tcpv6_prot_override;
static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
{
@@ -500,6 +545,8 @@ static void subflow_ulp_fallback(struct sock *sk,
icsk->icsk_ulp_ops = NULL;
rcu_assign_pointer(icsk->icsk_ulp_data, NULL);
tcp_sk(sk)->is_mptcp = 0;
+
+ mptcp_subflow_ops_undo_override(sk);
}
static void subflow_drop_ctx(struct sock *ssk)
@@ -620,7 +667,7 @@ create_child:
* created mptcp socket
*/
new_msk->sk_destruct = mptcp_sock_destruct;
- mptcp_pm_new_connection(mptcp_sk(new_msk), 1);
+ mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1);
mptcp_token_accept(subflow_req, mptcp_sk(new_msk));
ctx->conn = new_msk;
new_msk = NULL;
@@ -645,6 +692,17 @@ create_child:
SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
tcp_rsk(req)->drop_req = true;
+
+ if (subflow_use_different_sport(owner, sk)) {
+ pr_debug("ack inet_sport=%d %d",
+ ntohs(inet_sk(sk)->inet_sport),
+ ntohs(inet_sk((struct sock *)owner)->inet_sport));
+ if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
+ goto out;
+ }
+ SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
+ }
}
}
@@ -673,6 +731,7 @@ dispose_child:
}
static struct inet_connection_sock_af_ops subflow_specific;
+static struct proto tcp_prot_override;
enum mapping_status {
MAPPING_OK,
@@ -886,6 +945,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb,
subflow->map_valid = 0;
}
+/* sched mptcp worker to remove the subflow if no more data is pending */
+static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct sock *sk = (struct sock *)msk;
+
+ if (likely(ssk->sk_state != TCP_CLOSE))
+ return;
+
+ if (skb_queue_empty(&ssk->sk_receive_queue) &&
+ !test_and_set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) {
+ sock_hold(sk);
+ if (!schedule_work(&msk->work))
+ sock_put(sk);
+ }
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -924,11 +999,11 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
if (status != MAPPING_OK)
- return false;
+ goto no_data;
skb = skb_peek(&ssk->sk_receive_queue);
if (WARN_ON_ONCE(!skb))
- return false;
+ goto no_data;
/* if msk lacks the remote key, this subflow must provide an
* MP_CAPABLE-based mapping
@@ -962,6 +1037,9 @@ static bool subflow_check_data_avail(struct sock *ssk)
}
return true;
+no_data:
+ subflow_sched_work_if_closed(msk, ssk);
+ return false;
fatal:
/* fatal protocol error, close the socket */
/* This barrier is coupled with smp_rmb() in tcp_poll() */
@@ -1018,6 +1096,12 @@ static void subflow_data_ready(struct sock *sk)
msk = mptcp_sk(parent);
if (state & TCPF_LISTEN) {
+ /* MPJ subflow are removed from accept queue before reaching here,
+ * avoid stray wakeups
+ */
+ if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+ return;
+
set_bit(MPTCP_DATA_READY, &msk->flags);
parent->sk_data_ready(parent);
return;
@@ -1032,7 +1116,10 @@ static void subflow_data_ready(struct sock *sk)
static void subflow_write_space(struct sock *ssk)
{
- /* we take action in __mptcp_clean_una() */
+ struct sock *sk = mptcp_subflow_ctx(ssk)->conn;
+
+ mptcp_propagate_sndbuf(sk, ssk);
+ mptcp_write_space(sk);
}
void __mptcp_error_report(struct sock *sk)
@@ -1105,22 +1192,32 @@ void mptcpv6_handle_mapped(struct sock *sk, bool mapped)
}
#endif
-static void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
- struct sockaddr_storage *addr)
+void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
+ struct sockaddr_storage *addr,
+ unsigned short family)
{
memset(addr, 0, sizeof(*addr));
- addr->ss_family = info->family;
+ addr->ss_family = family;
if (addr->ss_family == AF_INET) {
struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
- in_addr->sin_addr = info->addr;
+ if (info->family == AF_INET)
+ in_addr->sin_addr = info->addr;
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ else if (ipv6_addr_v4mapped(&info->addr6))
+ in_addr->sin_addr.s_addr = info->addr6.s6_addr32[3];
+#endif
in_addr->sin_port = info->port;
}
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
else if (addr->ss_family == AF_INET6) {
struct sockaddr_in6 *in6_addr = (struct sockaddr_in6 *)addr;
- in6_addr->sin6_addr = info->addr6;
+ if (info->family == AF_INET)
+ ipv6_addr_set_v4mapped(info->addr.s_addr,
+ &in6_addr->sin6_addr);
+ else
+ in6_addr->sin6_addr = info->addr6;
in6_addr->sin6_port = info->port;
}
#endif
@@ -1164,11 +1261,11 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->remote_key = msk->remote_key;
subflow->local_key = msk->local_key;
subflow->token = msk->token;
- mptcp_info2sockaddr(loc, &addr);
+ mptcp_info2sockaddr(loc, &addr, ssk->sk_family);
addrlen = sizeof(struct sockaddr_in);
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if (loc->family == AF_INET6)
+ if (addr.ss_family == AF_INET6)
addrlen = sizeof(struct sockaddr_in6);
#endif
ssk->sk_bound_dev_if = loc->ifindex;
@@ -1184,13 +1281,16 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
subflow->remote_id = remote_id;
subflow->request_join = 1;
subflow->request_bkup = !!(loc->flags & MPTCP_PM_ADDR_FLAG_BACKUP);
- mptcp_info2sockaddr(remote, &addr);
+ mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
mptcp_add_pending_subflow(msk, subflow);
err = kernel_connect(sf, (struct sockaddr *)&addr, addrlen, O_NONBLOCK);
if (err && err != -EINPROGRESS)
goto failed_unlink;
+ /* discard the subflow socket */
+ mptcp_sock_graft(ssk, sk->sk_socket);
+ iput(SOCK_INODE(sf));
return err;
failed_unlink:
@@ -1228,6 +1328,25 @@ static void mptcp_attach_cgroup(struct sock *parent, struct sock *child)
#endif /* CONFIG_SOCK_CGROUP_DATA */
}
+static void mptcp_subflow_ops_override(struct sock *ssk)
+{
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (ssk->sk_prot == &tcpv6_prot)
+ ssk->sk_prot = &tcpv6_prot_override;
+ else
+#endif
+ ssk->sk_prot = &tcp_prot_override;
+}
+
+static void mptcp_subflow_ops_undo_override(struct sock *ssk)
+{
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if (ssk->sk_prot == &tcpv6_prot_override)
+ ssk->sk_prot = &tcpv6_prot;
+ else
+#endif
+ ssk->sk_prot = &tcp_prot;
+}
int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
{
struct mptcp_subflow_context *subflow;
@@ -1283,6 +1402,7 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
*new_sock = sf;
sock_hold(sk);
subflow->conn = sk;
+ mptcp_subflow_ops_override(sf->sk);
return 0;
}
@@ -1299,6 +1419,7 @@ static struct mptcp_subflow_context *subflow_create_ctx(struct sock *sk,
rcu_assign_pointer(icsk->icsk_ulp_data, ctx);
INIT_LIST_HEAD(&ctx->node);
+ INIT_LIST_HEAD(&ctx->delegated_node);
pr_debug("subflow=%p", ctx);
@@ -1331,6 +1452,7 @@ static void subflow_state_change(struct sock *sk)
__subflow_state_change(sk);
if (subflow_simultaneous_connect(sk)) {
+ mptcp_propagate_sndbuf(parent, sk);
mptcp_do_fallback(sk);
mptcp_rcv_space_init(mptcp_sk(parent), sk);
pr_fallback(mptcp_sk(parent));
@@ -1348,6 +1470,8 @@ static void subflow_state_change(struct sock *sk)
if (mptcp_subflow_data_available(sk))
mptcp_data_ready(parent, sk);
+ subflow_sched_work_if_closed(mptcp_sk(parent), sk);
+
if (__mptcp_check_fallback(mptcp_sk(parent)) &&
!subflow->rx_eof && subflow_is_done(sk)) {
subflow->rx_eof = 1;
@@ -1412,6 +1536,7 @@ static void subflow_ulp_release(struct sock *ssk)
sock_put(sk);
}
+ mptcp_subflow_ops_undo_override(ssk);
if (release)
kfree_rcu(ctx, rcu);
}
@@ -1466,6 +1591,16 @@ static void subflow_ulp_clone(const struct request_sock *req,
}
}
+static void tcp_release_cb_override(struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+
+ if (mptcp_subflow_has_delegated_action(subflow))
+ mptcp_subflow_process_delegated(ssk);
+
+ tcp_release_cb(ssk);
+}
+
static struct tcp_ulp_ops subflow_ulp_ops __read_mostly = {
.name = "mptcp",
.owner = THIS_MODULE,
@@ -1506,6 +1641,9 @@ void __init mptcp_subflow_init(void)
subflow_specific.syn_recv_sock = subflow_syn_recv_sock;
subflow_specific.sk_rx_dst_set = subflow_finish_connect;
+ tcp_prot_override = tcp_prot;
+ tcp_prot_override.release_cb = tcp_release_cb_override;
+
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
subflow_request_sock_ipv6_ops = tcp_request_sock_ipv6_ops;
subflow_request_sock_ipv6_ops.route_req = subflow_v6_route_req;
@@ -1521,6 +1659,9 @@ void __init mptcp_subflow_init(void)
subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len;
subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced;
subflow_v6m_specific.net_frag_header_len = 0;
+
+ tcpv6_prot_override = tcpv6_prot;
+ tcpv6_prot_override.release_cb = tcp_release_cb_override;
#endif
mptcp_diag_subflow_init(&subflow_ulp_ops);