aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2019-06-14 20:18:28 -0700
committerDavid S. Miller <davem@davemloft.net>2019-06-14 20:18:28 -0700
commit35fc07aee8f6d55aeacdbfdccc425e684737f741 (patch)
tree220daaf7fb72acc2cfe6fab06c1ac0e0f4384157
parenthv_netvsc: Set probe mode to sync (diff)
parentnet: add high_order_alloc_disable sysctl/static key (diff)
downloadlinux-dev-35fc07aee8f6d55aeacdbfdccc425e684737f741.tar.xz
linux-dev-35fc07aee8f6d55aeacdbfdccc425e684737f741.zip
Merge branch 'tcp-add-three-static-keys'
Eric Dumazet says: ==================== tcp: add three static keys Recent addition of per TCP socket rx/tx cache brought regressions for some workloads, as reported by Feng Tang. It seems better to make them opt-in, before we adopt better heuristics. The last patch adds high_order_alloc_disable sysctl to ask TCP sendmsg() to exclusively use order-0 allocations, as mm layer has specific optimizations. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/ip-sysctl.txt8
-rw-r--r--include/linux/bpf.h1
-rw-r--r--include/linux/sysctl.h3
-rw-r--r--include/net/sock.h12
-rw-r--r--kernel/bpf/core.c1
-rw-r--r--kernel/sysctl.c44
-rw-r--r--net/core/sock.c4
-rw-r--r--net/core/sysctl_net_core.c7
-rw-r--r--net/ipv4/sysctl_net_ipv4.c17
9 files changed, 68 insertions, 29 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 14fe93049d28..288aa264ac26 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -772,6 +772,14 @@ tcp_challenge_ack_limit - INTEGER
in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
Default: 100
+tcp_rx_skb_cache - BOOLEAN
+ Controls a per TCP socket cache of one skb, that might help
+ performance of some workloads. This might be dangerous
+ on systems with a lot of TCP sockets, since it increases
+ memory usage.
+
+ Default: 0 (disabled)
+
UDP variables:
udp_l3mdev_accept - BOOLEAN
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5df8e9e2a393..b92ef9f73e42 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -600,7 +600,6 @@ void bpf_map_area_free(void *base);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
extern int sysctl_unprivileged_bpf_disabled;
-extern int sysctl_bpf_stats_enabled;
int bpf_map_new_fd(struct bpf_map *map, int flags);
int bpf_prog_new_fd(struct bpf_prog *prog);
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index b769ecfcc3bd..aadd310769d0 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -63,6 +63,9 @@ extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
void __user *, size_t *, loff_t *);
extern int proc_do_large_bitmap(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
+extern int proc_do_static_key(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos);
/*
* Register a set of sysctl names by calling register_sysctl_table
diff --git a/include/net/sock.h b/include/net/sock.h
index e9d769c04637..6cbc16136357 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1463,12 +1463,14 @@ static inline void sk_mem_uncharge(struct sock *sk, int size)
__sk_mem_reclaim(sk, 1 << 20);
}
+DECLARE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb)
{
sock_set_flag(sk, SOCK_QUEUE_SHRUNK);
sk->sk_wmem_queued -= skb->truesize;
sk_mem_uncharge(sk, skb->truesize);
- if (!sk->sk_tx_skb_cache && !skb_cloned(skb)) {
+ if (static_branch_unlikely(&tcp_tx_skb_cache_key) &&
+ !sk->sk_tx_skb_cache && !skb_cloned(skb)) {
skb_zcopy_clear(skb, true);
sk->sk_tx_skb_cache = skb;
return;
@@ -2433,13 +2435,11 @@ static inline void skb_setup_tx_timestamp(struct sk_buff *skb, __u16 tsflags)
* This routine must be called with interrupts disabled or with the socket
* locked so that the sk_buff queue operation is ok.
*/
+DECLARE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
{
__skb_unlink(skb, &sk->sk_receive_queue);
- if (
-#ifdef CONFIG_RPS
- !static_branch_unlikely(&rps_needed) &&
-#endif
+ if (static_branch_unlikely(&tcp_rx_skb_cache_key) &&
!sk->sk_rx_skb_cache) {
sk->sk_rx_skb_cache = skb;
skb_orphan(skb);
@@ -2534,6 +2534,8 @@ extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
+DECLARE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
+
static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_wmem ? */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7c473f208a10..080e2bb644cc 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2097,7 +2097,6 @@ int __weak skb_copy_bits(const struct sk_buff *skb, int offset, void *to,
DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
EXPORT_SYMBOL(bpf_stats_enabled_key);
-int sysctl_bpf_stats_enabled __read_mostly;
/* All definitions of tracepoints related to BPF. */
#define CREATE_TRACE_POINTS
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7d1008be6173..1beca96fb625 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -230,11 +230,6 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
#endif
static int proc_dopipe_max_size(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-#ifdef CONFIG_BPF_SYSCALL
-static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos);
-#endif
#ifdef CONFIG_MAGIC_SYSRQ
/* Note: sysrq code uses its own private copy */
@@ -1253,12 +1248,10 @@ static struct ctl_table kern_table[] = {
},
{
.procname = "bpf_stats_enabled",
- .data = &sysctl_bpf_stats_enabled,
- .maxlen = sizeof(sysctl_bpf_stats_enabled),
+ .data = &bpf_stats_enabled_key.key,
+ .maxlen = sizeof(bpf_stats_enabled_key),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax_bpf_stats,
- .extra1 = &zero,
- .extra2 = &one,
+ .proc_handler = proc_do_static_key,
},
#endif
#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
@@ -3374,26 +3367,35 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
#endif /* CONFIG_PROC_SYSCTL */
-#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
-static int proc_dointvec_minmax_bpf_stats(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp,
- loff_t *ppos)
+#if defined(CONFIG_SYSCTL)
+int proc_do_static_key(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
{
- int ret, bpf_stats = *(int *)table->data;
- struct ctl_table tmp = *table;
+ struct static_key *key = (struct static_key *)table->data;
+ static DEFINE_MUTEX(static_key_mutex);
+ int val, ret;
+ struct ctl_table tmp = {
+ .data = &val,
+ .maxlen = sizeof(val),
+ .mode = table->mode,
+ .extra1 = &zero,
+ .extra2 = &one,
+ };
if (write && !capable(CAP_SYS_ADMIN))
return -EPERM;
- tmp.data = &bpf_stats;
+ mutex_lock(&static_key_mutex);
+ val = static_key_enabled(key);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (write && !ret) {
- *(int *)table->data = bpf_stats;
- if (bpf_stats)
- static_branch_enable(&bpf_stats_enabled_key);
+ if (val)
+ static_key_enable(key);
else
- static_branch_disable(&bpf_stats_enabled_key);
+ static_key_disable(key);
}
+ mutex_unlock(&static_key_mutex);
return ret;
}
#endif
diff --git a/net/core/sock.c b/net/core/sock.c
index 2b3701958486..7f49392579a5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2320,6 +2320,7 @@ static void sk_leave_memory_pressure(struct sock *sk)
/* On 32bit arches, an skb frag is limited to 2^15 */
#define SKB_FRAG_PAGE_ORDER get_order(32768)
+DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
/**
* skb_page_frag_refill - check that a page_frag contains enough room
@@ -2344,7 +2345,8 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
}
pfrag->offset = 0;
- if (SKB_FRAG_PAGE_ORDER) {
+ if (SKB_FRAG_PAGE_ORDER &&
+ !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
/* Avoid direct reclaim but allow kswapd to wake */
pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
__GFP_COMP | __GFP_NOWARN |
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 1a2685694abd..f9204719aeee 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -562,6 +562,13 @@ static struct ctl_table net_core_table[] = {
.extra1 = &zero,
.extra2 = &two,
},
+ {
+ .procname = "high_order_alloc_disable",
+ .data = &net_high_order_alloc_disable_key.key,
+ .maxlen = sizeof(net_high_order_alloc_disable_key),
+ .mode = 0644,
+ .proc_handler = proc_do_static_key,
+ },
{ }
};
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 875867b64d6a..08a428a7b274 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -51,6 +51,11 @@ static int comp_sack_nr_max = 255;
static u32 u32_max_div_HZ = UINT_MAX / HZ;
static int one_day_secs = 24 * 3600;
+DEFINE_STATIC_KEY_FALSE(tcp_rx_skb_cache_key);
+EXPORT_SYMBOL(tcp_rx_skb_cache_key);
+
+DEFINE_STATIC_KEY_FALSE(tcp_tx_skb_cache_key);
+
/* obsolete */
static int sysctl_tcp_low_latency __read_mostly;
@@ -559,6 +564,18 @@ static struct ctl_table ipv4_table[] = {
.extra1 = &sysctl_fib_sync_mem_min,
.extra2 = &sysctl_fib_sync_mem_max,
},
+ {
+ .procname = "tcp_rx_skb_cache",
+ .data = &tcp_rx_skb_cache_key.key,
+ .mode = 0644,
+ .proc_handler = proc_do_static_key,
+ },
+ {
+ .procname = "tcp_tx_skb_cache",
+ .data = &tcp_tx_skb_cache_key.key,
+ .mode = 0644,
+ .proc_handler = proc_do_static_key,
+ },
{ }
};