aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
-rw-r--r--include/net/sock.h20
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/inet_diag.c2
-rw-r--r--net/mptcp/protocol.c234
-rw-r--r--net/mptcp/protocol.h15
-rw-r--r--net/sched/em_meta.c2
6 files changed, 120 insertions, 155 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index ff4e62aa62e5..4f0280ad6c23 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1210,6 +1210,8 @@ struct proto {
unsigned int inuse_idx;
#endif
+ int (*forward_alloc_get)(const struct sock *sk);
+
bool (*stream_memory_free)(const struct sock *sk, int wake);
bool (*stream_memory_read)(const struct sock *sk);
/* Memory pressure */
@@ -1217,6 +1219,7 @@ struct proto {
void (*leave_memory_pressure)(struct sock *sk);
atomic_long_t *memory_allocated; /* Current allocated memory. */
struct percpu_counter *sockets_allocated; /* Current number of sockets. */
+
/*
* Pressure flag: try to collapse.
* Technical note: it is used by multiple contexts non atomically.
@@ -1294,6 +1297,14 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
INDIRECT_CALLABLE_DECLARE(bool tcp_stream_memory_free(const struct sock *sk, int wake));
+static inline int sk_forward_alloc_get(const struct sock *sk)
+{
+ if (!sk->sk_prot->forward_alloc_get)
+ return sk->sk_forward_alloc;
+
+ return sk->sk_prot->forward_alloc_get(sk);
+}
+
static inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
{
if (READ_ONCE(sk->sk_wmem_queued) >= READ_ONCE(sk->sk_sndbuf))
@@ -1573,6 +1584,11 @@ static inline void sk_mem_charge(struct sock *sk, int size)
sk->sk_forward_alloc -= size;
}
+/* the following macros control memory reclaiming in sk_mem_uncharge()
+ */
+#define SK_RECLAIM_THRESHOLD (1 << 21)
+#define SK_RECLAIM_CHUNK (1 << 20)
+
static inline void sk_mem_uncharge(struct sock *sk, int size)
{
int reclaimable;
@@ -1589,8 +1605,8 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
* If we reach 2 MBytes, reclaim 1 MBytes right now, there is
* no need to hold that much forward allocation anyway.
*/
- if (unlikely(reclaimable >= 1 << 21))
- __sk_mem_reclaim(sk, 1 << 20);
+ if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
+ __sk_mem_reclaim(sk, SK_RECLAIM_CHUNK);
}
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 31d5cefa9979..0189e3cd4a7d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -150,7 +150,7 @@ void inet_sock_destruct(struct sock *sk)
WARN_ON(atomic_read(&sk->sk_rmem_alloc));
WARN_ON(refcount_read(&sk->sk_wmem_alloc));
WARN_ON(sk->sk_wmem_queued);
- WARN_ON(sk->sk_forward_alloc);
+ WARN_ON(sk_forward_alloc_get(sk));
kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ef7897226f08..c8fa6e7f7d12 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -271,7 +271,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct inet_diag_meminfo minfo = {
.idiag_rmem = sk_rmem_alloc_get(sk),
.idiag_wmem = READ_ONCE(sk->sk_wmem_queued),
- .idiag_fmem = sk->sk_forward_alloc,
+ .idiag_fmem = sk_forward_alloc_get(sk),
.idiag_tmem = sk_wmem_alloc_get(sk),
};
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index cd6b11c9b54d..eb316bd578bb 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -126,6 +126,11 @@ static void mptcp_drop(struct sock *sk, struct sk_buff *skb)
__kfree_skb(skb);
}
+static void mptcp_rmem_charge(struct sock *sk, int size)
+{
+ mptcp_sk(sk)->rmem_fwd_alloc -= size;
+}
+
static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
struct sk_buff *from)
{
@@ -142,7 +147,7 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to,
MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq;
kfree_skb_partial(from, fragstolen);
atomic_add(delta, &sk->sk_rmem_alloc);
- sk_mem_charge(sk, delta);
+ mptcp_rmem_charge(sk, delta);
return true;
}
@@ -155,6 +160,44 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to,
return mptcp_try_coalesce((struct sock *)msk, to, from);
}
+static void __mptcp_rmem_reclaim(struct sock *sk, int amount)
+{
+ amount >>= SK_MEM_QUANTUM_SHIFT;
+ mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
+ __sk_mem_reduce_allocated(sk, amount);
+}
+
+static void mptcp_rmem_uncharge(struct sock *sk, int size)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int reclaimable;
+
+ msk->rmem_fwd_alloc += size;
+ reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
+
+ /* see sk_mem_uncharge() for the rationale behind the following schema */
+ if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD))
+ __mptcp_rmem_reclaim(sk, SK_RECLAIM_CHUNK);
+}
+
+static void mptcp_rfree(struct sk_buff *skb)
+{
+ unsigned int len = skb->truesize;
+ struct sock *sk = skb->sk;
+
+ atomic_sub(len, &sk->sk_rmem_alloc);
+ mptcp_rmem_uncharge(sk, len);
+}
+
+static void mptcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+ skb_orphan(skb);
+ skb->sk = sk;
+ skb->destructor = mptcp_rfree;
+ atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ mptcp_rmem_charge(sk, skb->truesize);
+}
+
/* "inspired" by tcp_data_queue_ofo(), main differences:
* - use mptcp seqs
* - don't cope with sacks
@@ -267,7 +310,29 @@ merge_right:
end:
skb_condense(skb);
- skb_set_owner_r(skb, sk);
+ mptcp_set_owner_r(skb, sk);
+}
+
+static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ int amt, amount;
+
+ if (size < msk->rmem_fwd_alloc)
+ return true;
+
+ amt = sk_mem_pages(size);
+ amount = amt << SK_MEM_QUANTUM_SHIFT;
+ msk->rmem_fwd_alloc += amount;
+ if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) {
+ if (ssk->sk_forward_alloc < amount) {
+ msk->rmem_fwd_alloc -= amount;
+ return false;
+ }
+
+ ssk->sk_forward_alloc -= amount;
+ }
+ return true;
}
static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
@@ -285,15 +350,8 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
skb_orphan(skb);
/* try to fetch required memory from subflow */
- if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
- int amount = sk_mem_pages(skb->truesize) << SK_MEM_QUANTUM_SHIFT;
-
- if (ssk->sk_forward_alloc < amount)
- goto drop;
-
- ssk->sk_forward_alloc -= amount;
- sk->sk_forward_alloc += amount;
- }
+ if (!mptcp_rmem_schedule(sk, ssk, skb->truesize))
+ goto drop;
has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
@@ -313,7 +371,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
if (tail && mptcp_try_coalesce(sk, tail, skb))
return true;
- skb_set_owner_r(skb, sk);
+ mptcp_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
return true;
} else if (after64(MPTCP_SKB_CB(skb)->map_seq, msk->ack_seq)) {
@@ -908,122 +966,20 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk,
df->data_seq + df->data_len == msk->write_seq;
}
-static int mptcp_wmem_with_overhead(int size)
-{
- return size + ((sizeof(struct mptcp_data_frag) * size) >> PAGE_SHIFT);
-}
-
-static void __mptcp_wmem_reserve(struct sock *sk, int size)
-{
- int amount = mptcp_wmem_with_overhead(size);
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- WARN_ON_ONCE(msk->wmem_reserved);
- if (WARN_ON_ONCE(amount < 0))
- amount = 0;
-
- if (amount <= sk->sk_forward_alloc)
- goto reserve;
-
- /* under memory pressure try to reserve at most a single page
- * otherwise try to reserve the full estimate and fallback
- * to a single page before entering the error path
- */
- if ((tcp_under_memory_pressure(sk) && amount > PAGE_SIZE) ||
- !sk_wmem_schedule(sk, amount)) {
- if (amount <= PAGE_SIZE)
- goto nomem;
-
- amount = PAGE_SIZE;
- if (!sk_wmem_schedule(sk, amount))
- goto nomem;
- }
-
-reserve:
- msk->wmem_reserved = amount;
- sk->sk_forward_alloc -= amount;
- return;
-
-nomem:
- /* we will wait for memory on next allocation */
- msk->wmem_reserved = -1;
-}
-
-static void __mptcp_update_wmem(struct sock *sk)
+static void __mptcp_mem_reclaim_partial(struct sock *sk)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
+ int reclaimable = mptcp_sk(sk)->rmem_fwd_alloc - sk_unused_reserved_mem(sk);
lockdep_assert_held_once(&sk->sk_lock.slock);
- if (!msk->wmem_reserved)
- return;
-
- if (msk->wmem_reserved < 0)
- msk->wmem_reserved = 0;
- if (msk->wmem_reserved > 0) {
- sk->sk_forward_alloc += msk->wmem_reserved;
- msk->wmem_reserved = 0;
- }
-}
-
-static bool mptcp_wmem_alloc(struct sock *sk, int size)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- /* check for pre-existing error condition */
- if (msk->wmem_reserved < 0)
- return false;
-
- if (msk->wmem_reserved >= size)
- goto account;
-
- mptcp_data_lock(sk);
- if (!sk_wmem_schedule(sk, size)) {
- mptcp_data_unlock(sk);
- return false;
- }
-
- sk->sk_forward_alloc -= size;
- msk->wmem_reserved += size;
- mptcp_data_unlock(sk);
-
-account:
- msk->wmem_reserved -= size;
- return true;
-}
-
-static void mptcp_wmem_uncharge(struct sock *sk, int size)
-{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- if (msk->wmem_reserved < 0)
- msk->wmem_reserved = 0;
- msk->wmem_reserved += size;
-}
-
-static void __mptcp_mem_reclaim_partial(struct sock *sk)
-{
- lockdep_assert_held_once(&sk->sk_lock.slock);
- __mptcp_update_wmem(sk);
+ __mptcp_rmem_reclaim(sk, reclaimable - 1);
sk_mem_reclaim_partial(sk);
}
static void mptcp_mem_reclaim_partial(struct sock *sk)
{
- struct mptcp_sock *msk = mptcp_sk(sk);
-
- /* if we are experiencing a transint allocation error,
- * the forward allocation memory has been already
- * released
- */
- if (msk->wmem_reserved < 0)
- return;
-
mptcp_data_lock(sk);
- sk->sk_forward_alloc += msk->wmem_reserved;
- sk_mem_reclaim_partial(sk);
- msk->wmem_reserved = sk->sk_forward_alloc;
- sk->sk_forward_alloc = 0;
+ __mptcp_mem_reclaim_partial(sk);
mptcp_data_unlock(sk);
}
@@ -1513,8 +1469,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
return NULL;
}
-static void mptcp_push_release(struct sock *sk, struct sock *ssk,
- struct mptcp_sendmsg_info *info)
+static void mptcp_push_release(struct sock *ssk, struct mptcp_sendmsg_info *info)
{
tcp_push(ssk, 0, info->mss_now, tcp_sk(ssk)->nonagle, info->size_goal);
release_sock(ssk);
@@ -1577,7 +1532,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
* the last round, release prev_ssk
*/
if (ssk != prev_ssk && prev_ssk)
- mptcp_push_release(sk, prev_ssk, &info);
+ mptcp_push_release(prev_ssk, &info);
if (!ssk)
goto out;
@@ -1590,7 +1545,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0) {
- mptcp_push_release(sk, ssk, &info);
+ mptcp_push_release(ssk, &info);
goto out;
}
@@ -1605,7 +1560,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
/* at this point we held the socket lock for the last subflow we used */
if (ssk)
- mptcp_push_release(sk, ssk, &info);
+ mptcp_push_release(ssk, &info);
out:
/* ensure the rtx timer is running */
@@ -1664,7 +1619,6 @@ out:
/* __mptcp_alloc_tx_skb could have released some wmem and we are
* not going to flush it via release_sock()
*/
- __mptcp_update_wmem(sk);
if (copied) {
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
@@ -1701,7 +1655,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
/* silently ignore everything else */
msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL;
- mptcp_lock_sock(sk, __mptcp_wmem_reserve(sk, min_t(size_t, 1 << 20, len)));
+ lock_sock(sk);
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
@@ -1749,17 +1703,17 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
psize = min_t(size_t, psize, msg_data_left(msg));
total_ts = psize + frag_truesize;
- if (!mptcp_wmem_alloc(sk, total_ts))
+ if (!sk_wmem_schedule(sk, total_ts))
goto wait_for_memory;
if (copy_page_from_iter(dfrag->page, offset, psize,
&msg->msg_iter) != psize) {
- mptcp_wmem_uncharge(sk, psize + frag_truesize);
ret = -EFAULT;
goto out;
}
/* data successfully copied into the write queue */
+ sk->sk_forward_alloc -= total_ts;
copied += psize;
dfrag->data_len += psize;
frag_truesize += psize;
@@ -1956,7 +1910,7 @@ static void __mptcp_update_rmem(struct sock *sk)
return;
atomic_sub(msk->rmem_released, &sk->sk_rmem_alloc);
- sk_mem_uncharge(sk, msk->rmem_released);
+ mptcp_rmem_uncharge(sk, msk->rmem_released);
WRITE_ONCE(msk->rmem_released, 0);
}
@@ -2024,7 +1978,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
if (unlikely(flags & MSG_ERRQUEUE))
return inet_recv_error(sk, msg, len, addr_len);
- mptcp_lock_sock(sk, __mptcp_splice_receive_queue(sk));
+ lock_sock(sk);
if (unlikely(sk->sk_state == TCP_LISTEN)) {
copied = -ENOTCONN;
goto out_err;
@@ -2504,7 +2458,7 @@ static int __mptcp_init_sock(struct sock *sk)
__skb_queue_head_init(&msk->receive_queue);
msk->out_of_order_queue = RB_ROOT;
msk->first_pending = NULL;
- msk->wmem_reserved = 0;
+ msk->rmem_fwd_alloc = 0;
WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
@@ -2715,7 +2669,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk->sk_prot->destroy(sk);
- WARN_ON_ONCE(msk->wmem_reserved);
+ WARN_ON_ONCE(msk->rmem_fwd_alloc);
WARN_ON_ONCE(msk->rmem_released);
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
@@ -2948,8 +2902,14 @@ void mptcp_destroy_common(struct mptcp_sock *msk)
/* move to sk_receive_queue, sk_stream_kill_queues will purge it */
skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue);
-
+ __skb_queue_purge(&sk->sk_receive_queue);
skb_rbtree_purge(&msk->out_of_order_queue);
+
+ /* move all the rx fwd alloc into the sk_mem_reclaim_final in
+ * inet_sock_destruct() will dispose it
+ */
+ sk->sk_forward_alloc += msk->rmem_fwd_alloc;
+ msk->rmem_fwd_alloc = 0;
mptcp_token_destroy(msk);
mptcp_pm_free_anno_list(msk);
}
@@ -3031,10 +2991,6 @@ static void mptcp_release_cb(struct sock *sk)
if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
__mptcp_error_report(sk);
- /* push_pending may touch wmem_reserved, ensure we do the cleanup
- * later
- */
- __mptcp_update_wmem(sk);
__mptcp_update_rmem(sk);
}
@@ -3184,6 +3140,11 @@ static void mptcp_shutdown(struct sock *sk, int how)
__mptcp_wr_shutdown(sk);
}
+static int mptcp_forward_alloc_get(const struct sock *sk)
+{
+ return sk->sk_forward_alloc + mptcp_sk(sk)->rmem_fwd_alloc;
+}
+
static struct proto mptcp_prot = {
.name = "MPTCP",
.owner = THIS_MODULE,
@@ -3201,6 +3162,7 @@ static struct proto mptcp_prot = {
.hash = mptcp_hash,
.unhash = mptcp_unhash,
.get_port = mptcp_get_port,
+ .forward_alloc_get = mptcp_forward_alloc_get,
.sockets_allocated = &mptcp_sockets_allocated,
.memory_allocated = &tcp_memory_allocated,
.memory_pressure = &tcp_memory_pressure,
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 284fdcec067e..67a61ac48b20 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -227,7 +227,7 @@ struct mptcp_sock {
u64 ack_seq;
u64 rcv_wnd_sent;
u64 rcv_data_fin_seq;
- int wmem_reserved;
+ int rmem_fwd_alloc;
struct sock *last_snd;
int snd_burst;
int old_wspace;
@@ -272,19 +272,6 @@ struct mptcp_sock {
char ca_name[TCP_CA_NAME_MAX];
};
-#define mptcp_lock_sock(___sk, cb) do { \
- struct sock *__sk = (___sk); /* silence macro reuse warning */ \
- might_sleep(); \
- spin_lock_bh(&__sk->sk_lock.slock); \
- if (__sk->sk_lock.owned) \
- __lock_sock(__sk); \
- cb; \
- __sk->sk_lock.owned = 1; \
- spin_unlock(&__sk->sk_lock.slock); \
- mutex_acquire(&__sk->sk_lock.dep_map, 0, 0, _RET_IP_); \
- local_bh_enable(); \
-} while (0)
-
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
#define mptcp_data_unlock(sk) spin_unlock_bh(&(sk)->sk_lock.slock)
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 46254968d390..0a04468b7314 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -457,7 +457,7 @@ META_COLLECTOR(int_sk_fwd_alloc)
*err = -1;
return;
}
- dst->value = sk->sk_forward_alloc;
+ dst->value = sk_forward_alloc_get(sk);
}
META_COLLECTOR(int_sk_sndbuf)