aboutsummaryrefslogtreecommitdiffstats
path: root/net/mptcp/pm_netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/pm_netlink.c')
-rw-r--r--net/mptcp/pm_netlink.c215
1 files changed, 185 insertions, 30 deletions
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 7b3794459783..c4f9a5ce3815 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -46,6 +46,7 @@ struct pm_nl_pernet {
spinlock_t lock;
struct list_head local_addr_list;
unsigned int addrs;
+ unsigned int stale_loss_cnt;
unsigned int add_addr_signal_max;
unsigned int add_addr_accept_max;
unsigned int local_addr_max;
@@ -316,14 +317,14 @@ static void mptcp_pm_add_timer(struct timer_list *timer)
if (!entry->addr.id)
return;
- if (mptcp_pm_should_add_signal(msk)) {
+ if (mptcp_pm_should_add_signal_addr(msk)) {
sk_reset_timer(sk, timer, jiffies + TCP_RTO_MAX / 8);
goto out;
}
spin_lock_bh(&msk->pm.lock);
- if (!mptcp_pm_should_add_signal(msk)) {
+ if (!mptcp_pm_should_add_signal_addr(msk)) {
pr_debug("retransmit ADD_ADDR id=%d", entry->addr.id);
mptcp_pm_announce_addr(msk, &entry->addr, false);
mptcp_pm_add_addr_send_ack(msk);
@@ -409,6 +410,55 @@ void mptcp_pm_free_anno_list(struct mptcp_sock *msk)
}
}
+static bool lookup_address_in_vec(struct mptcp_addr_info *addrs, unsigned int nr,
+ struct mptcp_addr_info *addr)
+{
+ int i;
+
+ for (i = 0; i < nr; i++) {
+ if (addresses_equal(&addrs[i], addr, addr->port))
+ return true;
+ }
+
+ return false;
+}
+
+/* Fill all the remote addresses into the array addrs[],
+ * and return the array size.
+ */
+static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
+ struct mptcp_addr_info *addrs)
+{
+ struct sock *sk = (struct sock *)msk, *ssk;
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_addr_info remote = { 0 };
+ unsigned int subflows_max;
+ int i = 0;
+
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
+ /* Non-fullmesh endpoint, fill in the single entry
+ * corresponding to the primary MPC subflow remote address
+ */
+ if (!fullmesh) {
+ remote_address((struct sock_common *)sk, &remote);
+ msk->pm.subflows++;
+ addrs[i++] = remote;
+ } else {
+ mptcp_for_each_subflow(msk, subflow) {
+ ssk = mptcp_subflow_tcp_sock(subflow);
+ remote_address((struct sock_common *)ssk, &remote);
+ if (!lookup_address_in_vec(addrs, i, &remote) &&
+ msk->pm.subflows < subflows_max) {
+ msk->pm.subflows++;
+ addrs[i++] = remote;
+ }
+ }
+ }
+
+ return i;
+}
+
static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
@@ -454,15 +504,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
!READ_ONCE(msk->pm.remote_deny_join_id0)) {
local = select_local_address(pernet, msk);
if (local) {
- struct mptcp_addr_info remote = { 0 };
+ bool fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
+ struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
+ int i, nr;
msk->pm.local_addr_used++;
- msk->pm.subflows++;
check_work_pending(msk);
- remote_address((struct sock_common *)sk, &remote);
+ nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, &local->addr, &remote,
- local->flags, local->ifindex);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
spin_lock_bh(&msk->pm.lock);
return;
}
@@ -483,13 +534,67 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
mptcp_pm_create_subflow_or_signal_addr(msk);
}
+/* Fill all the local addresses into the array addrs[],
+ * and return the array size.
+ */
+static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addrs)
+{
+ struct sock *sk = (struct sock *)msk;
+ struct mptcp_pm_addr_entry *entry;
+ struct mptcp_addr_info local;
+ struct pm_nl_pernet *pernet;
+ unsigned int subflows_max;
+ int i = 0;
+
+ pernet = net_generic(sock_net(sk), pm_nl_pernet_id);
+ subflows_max = mptcp_pm_get_subflows_max(msk);
+
+ rcu_read_lock();
+ __mptcp_flush_join_list(msk);
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
+ if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
+ continue;
+
+ if (entry->addr.family != sk->sk_family) {
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ if ((entry->addr.family == AF_INET &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
+ (sk->sk_family == AF_INET &&
+ !ipv6_addr_v4mapped(&entry->addr.addr6)))
+#endif
+ continue;
+ }
+
+ if (msk->pm.subflows < subflows_max) {
+ msk->pm.subflows++;
+ addrs[i++] = entry->addr;
+ }
+ }
+ rcu_read_unlock();
+
+ /* If the array is empty, fill in the single
+ * 'IPADDRANY' local address
+ */
+ if (!i) {
+ memset(&local, 0, sizeof(local));
+ local.family = msk->pm.remote.family;
+
+ msk->pm.subflows++;
+ addrs[i++] = local;
+ }
+
+ return i;
+}
+
static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
{
+ struct mptcp_addr_info addrs[MPTCP_PM_ADDR_MAX];
struct sock *sk = (struct sock *)msk;
unsigned int add_addr_accept_max;
struct mptcp_addr_info remote;
- struct mptcp_addr_info local;
unsigned int subflows_max;
+ int i, nr;
add_addr_accept_max = mptcp_pm_get_add_addr_accept_max(msk);
subflows_max = mptcp_pm_get_subflows_max(msk);
@@ -501,23 +606,22 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
if (lookup_subflow_by_daddr(&msk->conn_list, &msk->pm.remote))
goto add_addr_echo;
- msk->pm.add_addr_accepted++;
- msk->pm.subflows++;
- if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
- msk->pm.subflows >= subflows_max)
- WRITE_ONCE(msk->pm.accept_addr, false);
-
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
remote = msk->pm.remote;
if (!remote.port)
remote.port = sk->sk_dport;
- memset(&local, 0, sizeof(local));
- local.family = remote.family;
+ nr = fill_local_addresses_vec(msk, addrs);
+
+ msk->pm.add_addr_accepted++;
+ if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
+ msk->pm.subflows >= subflows_max)
+ WRITE_ONCE(msk->pm.accept_addr, false);
spin_unlock_bh(&msk->pm.lock);
- __mptcp_subflow_connect(sk, &local, &remote, 0, 0);
+ for (i = 0; i < nr; i++)
+ __mptcp_subflow_connect(sk, &addrs[i], &remote);
spin_lock_bh(&msk->pm.lock);
add_addr_echo:
@@ -540,17 +644,12 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
if (subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- bool slow;
spin_unlock_bh(&msk->pm.lock);
- pr_debug("send ack for %s%s%s",
- mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr",
- mptcp_pm_should_add_signal_ipv6(msk) ? " [ipv6]" : "",
- mptcp_pm_should_add_signal_port(msk) ? " [port]" : "");
-
- slow = lock_sock_fast(ssk);
- tcp_send_ack(ssk);
- unlock_sock_fast(ssk, slow);
+ pr_debug("send ack for %s",
+ mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr");
+
+ mptcp_subflow_send_ack(ssk);
spin_lock_bh(&msk->pm.lock);
}
}
@@ -567,7 +666,6 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct sock *sk = (struct sock *)msk;
struct mptcp_addr_info local;
- bool slow;
local_address((struct sock_common *)ssk, &local);
if (!addresses_equal(&local, addr, addr->port))
@@ -580,9 +678,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
spin_unlock_bh(&msk->pm.lock);
pr_debug("send ack for mp_prio");
- slow = lock_sock_fast(ssk);
- tcp_send_ack(ssk);
- unlock_sock_fast(ssk, slow);
+ mptcp_subflow_send_ack(ssk);
spin_lock_bh(&msk->pm.lock);
return 0;
@@ -899,6 +995,43 @@ static const struct nla_policy mptcp_pm_policy[MPTCP_PM_ATTR_MAX + 1] = {
[MPTCP_PM_ATTR_SUBFLOWS] = { .type = NLA_U32, },
};
+void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *iter, *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = (struct sock *)msk;
+ unsigned int active_max_loss_cnt;
+ struct net *net = sock_net(sk);
+ unsigned int stale_loss_cnt;
+ bool slow;
+
+ stale_loss_cnt = mptcp_stale_loss_cnt(net);
+ if (subflow->stale || !stale_loss_cnt || subflow->stale_count <= stale_loss_cnt)
+ return;
+
+ /* look for another available subflow not in loss state */
+ active_max_loss_cnt = max_t(int, stale_loss_cnt - 1, 1);
+ mptcp_for_each_subflow(msk, iter) {
+ if (iter != subflow && mptcp_subflow_active(iter) &&
+ iter->stale_count < active_max_loss_cnt) {
+ /* we have some alternatives, try to mark this subflow as idle ...*/
+ slow = lock_sock_fast(ssk);
+ if (!tcp_rtx_and_write_queues_empty(ssk)) {
+ subflow->stale = 1;
+ __mptcp_retransmit_pending_data(sk);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
+ }
+ unlock_sock_fast(ssk, slow);
+
+ /* always try to push the pending data regarless of re-injections:
+ * we can possibly use backup subflows now, and subflow selection
+ * is cheap under the msk socket lock
+ */
+ __mptcp_push_pending(sk, 0);
+ return;
+ }
+ }
+}
+
static int mptcp_pm_family_to_addr(int family)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -1067,6 +1200,27 @@ __lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
return NULL;
}
+int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
+ u8 *flags, int *ifindex)
+{
+ struct mptcp_pm_addr_entry *entry;
+
+ *flags = 0;
+ *ifindex = 0;
+
+ if (id) {
+ rcu_read_lock();
+ entry = __lookup_addr_by_id(net_generic(net, pm_nl_pernet_id), id);
+ if (entry) {
+ *flags = entry->flags;
+ *ifindex = entry->ifindex;
+ }
+ rcu_read_unlock();
+ }
+
+ return 0;
+}
+
static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
struct mptcp_addr_info *addr)
{
@@ -1901,6 +2055,7 @@ static int __net_init pm_nl_init_net(struct net *net)
INIT_LIST_HEAD_RCU(&pernet->local_addr_list);
pernet->next_id = 1;
+ pernet->stale_loss_cnt = 4;
spin_lock_init(&pernet->lock);
/* No need to initialize other pernet fields, the struct is zeroed at