aboutsummaryrefslogtreecommitdiffstats
path: root/include/net/sock.h
diff options
context:
space:
mode:
authorWei Wang <weiwan@google.com>2021-09-29 10:25:11 -0700
committerDavid S. Miller <davem@davemloft.net>2021-09-30 13:36:46 +0100
commit2bb2f5fb21b0486ff69b7b4a1fe03a760527d133 (patch)
tree1e1b3e094fd135390997b383ccdd38a85eecbfe0 /include/net/sock.h
parentnet: phy: marvell10g: add downshift tunable support (diff)
downloadlinux-dev-2bb2f5fb21b0486ff69b7b4a1fe03a760527d133.tar.xz
linux-dev-2bb2f5fb21b0486ff69b7b4a1fe03a760527d133.zip
net: add new socket option SO_RESERVE_MEM
This socket option provides a mechanism for users to reserve a certain amount of memory for the socket to use. When this option is set, kernel charges the user specified amount of memory to memcg, as well as sk_forward_alloc. This amount of memory is not reclaimable and is available in sk_forward_alloc for this socket. With this socket option set, the networking stack spends less cycles doing forward alloc and reclaim, which should lead to better system performance, with the cost of an amount of pre-allocated and unreclaimable memory, even under memory pressure. Note: This socket option is only available when memory cgroup is enabled and we require this reserved memory to be charged to the user's memcg. We hope this could avoid mis-behaving users to abused this feature to reserve a large amount on certain sockets and cause unfairness for others. Signed-off-by: Wei Wang <weiwan@google.com> Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net/sock.h')
-rw-r--r--include/net/sock.h44
1 files changed, 39 insertions, 5 deletions
diff --git a/include/net/sock.h b/include/net/sock.h
index 879980de3dcd..447fddb384a4 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -269,6 +269,7 @@ struct bpf_local_storage;
* @sk_omem_alloc: "o" is "option" or "other"
* @sk_wmem_queued: persistent queue size
* @sk_forward_alloc: space allocated forward
+ * @sk_reserved_mem: space reserved and non-reclaimable for the socket
* @sk_napi_id: id of the last napi context to receive data for sk
* @sk_ll_usec: usecs to busypoll when there is no data
* @sk_allocation: allocation mode
@@ -409,6 +410,7 @@ struct sock {
#define sk_rmem_alloc sk_backlog.rmem_alloc
int sk_forward_alloc;
+ u32 sk_reserved_mem;
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_ll_usec;
/* ===== mostly read cache line ===== */
@@ -1511,20 +1513,49 @@ sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size)
skb_pfmemalloc(skb);
}
+static inline int sk_unused_reserved_mem(const struct sock *sk)
+{
+ int unused_mem;
+
+ if (likely(!sk->sk_reserved_mem))
+ return 0;
+
+ unused_mem = sk->sk_reserved_mem - sk->sk_wmem_queued -
+ atomic_read(&sk->sk_rmem_alloc);
+
+ return unused_mem > 0 ? unused_mem : 0;
+}
+
static inline void sk_mem_reclaim(struct sock *sk)
{
+ int reclaimable;
+
if (!sk_has_account(sk))
return;
- if (sk->sk_forward_alloc >= SK_MEM_QUANTUM)
- __sk_mem_reclaim(sk, sk->sk_forward_alloc);
+
+ reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
+
+ if (reclaimable >= SK_MEM_QUANTUM)
+ __sk_mem_reclaim(sk, reclaimable);
+}
+
+static inline void sk_mem_reclaim_final(struct sock *sk)
+{
+ sk->sk_reserved_mem = 0;
+ sk_mem_reclaim(sk);
}
static inline void sk_mem_reclaim_partial(struct sock *sk)
{
+ int reclaimable;
+
if (!sk_has_account(sk))
return;
- if (sk->sk_forward_alloc > SK_MEM_QUANTUM)
- __sk_mem_reclaim(sk, sk->sk_forward_alloc - 1);
+
+ reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
+
+ if (reclaimable > SK_MEM_QUANTUM)
+ __sk_mem_reclaim(sk, reclaimable - 1);
}
static inline void sk_mem_charge(struct sock *sk, int size)
@@ -1536,9 +1567,12 @@ static inline void sk_mem_charge(struct sock *sk, int size)
static inline void sk_mem_uncharge(struct sock *sk, int size)
{
+ int reclaimable;
+
if (!sk_has_account(sk))
return;
sk->sk_forward_alloc += size;
+ reclaimable = sk->sk_forward_alloc - sk_unused_reserved_mem(sk);
/* Avoid a possible overflow.
* TCP send queues can make this happen, if sk_mem_reclaim()
@@ -1547,7 +1581,7 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
* If we reach 2 MBytes, reclaim 1 MBytes right now, there is
* no need to hold that much forward allocation anyway.
*/
- if (unlikely(sk->sk_forward_alloc >= 1 << 21))
+ if (unlikely(reclaimable >= 1 << 21))
__sk_mem_reclaim(sk, 1 << 20);
}