aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-09-03 11:01:05 -0700
committerDavid S. Miller <davem@davemloft.net>2017-09-03 11:01:05 -0700
commit4113f36bc7084ea0d47c0f5f1137776b603c91a7 (patch)
treedf2e5f6fbb0792262701818650a9d6f35f6b5151
parentipv4: Don't override return code from ip_route_input_noref() (diff)
parentRevert "net: fix percpu memory leaks" (diff)
downloadlinux-dev-4113f36bc7084ea0d47c0f5f1137776b603c91a7.tar.xz
linux-dev-4113f36bc7084ea0d47c0f5f1137776b603c91a7.zip
Merge branch 'net-revert-lib-percpu_counter-API-for-fragmentation-mem-accounting'
Jesper Dangaard Brouer says: ==================== net: revert lib/percpu_counter API for fragmentation mem accounting There is a bug in fragmentation codes use of the percpu_counter API, that can cause issues on systems with many CPUs, above 24 CPUs. After much consideration and different attempts at solving the API usage. The conclusion is to revert to the simple atomic_t API instead. The ratio between batch size and threshold size make it a bad use-case for the lib/percpu_counter API. As using the correct API calls will unfortunately cause systems with many CPUs to always execute an expensive sum across all CPUs. Plus the added complexity is not worth it. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_frag.h35
-rw-r--r--net/ieee802154/6lowpan/reassembly.c11
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c12
-rw-r--r--net/ipv6/reassembly.c12
6 files changed, 22 insertions, 64 deletions
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 6fdcd2427776..fc59e0775e00 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -1,14 +1,9 @@
#ifndef __NET_FRAG_H__
#define __NET_FRAG_H__
-#include <linux/percpu_counter.h>
-
struct netns_frags {
- /* The percpu_counter "mem" need to be cacheline aligned.
- * mem.count must not share cacheline with other writers
- */
- struct percpu_counter mem ____cacheline_aligned_in_smp;
-
+ /* Keep atomic mem on separate cachelines in structs that include it */
+ atomic_t mem ____cacheline_aligned_in_smp;
/* sysctls */
int timeout;
int high_thresh;
@@ -108,15 +103,10 @@ struct inet_frags {
int inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);
-static inline int inet_frags_init_net(struct netns_frags *nf)
-{
- return percpu_counter_init(&nf->mem, 0, GFP_KERNEL);
-}
-static inline void inet_frags_uninit_net(struct netns_frags *nf)
+static inline void inet_frags_init_net(struct netns_frags *nf)
{
- percpu_counter_destroy(&nf->mem);
+ atomic_set(&nf->mem, 0);
}
-
void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f);
void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f);
@@ -140,31 +130,24 @@ static inline bool inet_frag_evicting(struct inet_frag_queue *q)
/* Memory Tracking Functions. */
-/* The default percpu_counter batch size is not big enough to scale to
- * fragmentation mem acct sizes.
- * The mem size of a 64K fragment is approx:
- * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes
- */
-static unsigned int frag_percpu_counter_batch = 130000;
-
static inline int frag_mem_limit(struct netns_frags *nf)
{
- return percpu_counter_read(&nf->mem);
+ return atomic_read(&nf->mem);
}
static inline void sub_frag_mem_limit(struct netns_frags *nf, int i)
{
- percpu_counter_add_batch(&nf->mem, -i, frag_percpu_counter_batch);
+ atomic_sub(i, &nf->mem);
}
static inline void add_frag_mem_limit(struct netns_frags *nf, int i)
{
- percpu_counter_add_batch(&nf->mem, i, frag_percpu_counter_batch);
+ atomic_add(i, &nf->mem);
}
-static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
+static inline int sum_frag_mem_limit(struct netns_frags *nf)
{
- return percpu_counter_sum_positive(&nf->mem);
+ return atomic_read(&nf->mem);
}
/* RFC 3168 support :
diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
index 30d875dff6b5..f85b08baff16 100644
--- a/net/ieee802154/6lowpan/reassembly.c
+++ b/net/ieee802154/6lowpan/reassembly.c
@@ -580,19 +580,14 @@ static int __net_init lowpan_frags_init_net(struct net *net)
{
struct netns_ieee802154_lowpan *ieee802154_lowpan =
net_ieee802154_lowpan(net);
- int res;
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
- res = inet_frags_init_net(&ieee802154_lowpan->frags);
- if (res)
- return res;
- res = lowpan_frags_ns_sysctl_register(net);
- if (res)
- inet_frags_uninit_net(&ieee802154_lowpan->frags);
- return res;
+ inet_frags_init_net(&ieee802154_lowpan->frags);
+
+ return lowpan_frags_ns_sysctl_register(net);
}
static void __net_exit lowpan_frags_exit_net(struct net *net)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 96e95e83cc61..af74d0433453 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -234,10 +234,8 @@ evict_again:
cond_resched();
if (read_seqretry(&f->rnd_seqlock, seq) ||
- percpu_counter_sum(&nf->mem))
+ sum_frag_mem_limit(nf))
goto evict_again;
-
- percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9a8cfac503dc..46408c220d9d 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -844,8 +844,6 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
- int res;
-
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -871,13 +869,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.max_dist = 64;
- res = inet_frags_init_net(&net->ipv4.frags);
- if (res)
- return res;
- res = ip4_frags_ns_ctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->ipv4.frags);
- return res;
+ inet_frags_init_net(&net->ipv4.frags);
+
+ return ip4_frags_ns_ctl_register(net);
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 986d4ca38832..b263bf3a19f7 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -622,18 +622,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
static int nf_ct_net_init(struct net *net)
{
- int res;
-
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- res = inet_frags_init_net(&net->nf_frag.frags);
- if (res)
- return res;
- res = nf_ct_frag6_sysctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->nf_frag.frags);
- return res;
+ inet_frags_init_net(&net->nf_frag.frags);
+
+ return nf_ct_frag6_sysctl_register(net);
}
static void nf_ct_net_exit(struct net *net)
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e1da5b888cc4..846012eae526 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -714,19 +714,13 @@ static void ip6_frags_sysctl_unregister(void)
static int __net_init ipv6_frags_init_net(struct net *net)
{
- int res;
-
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
- res = inet_frags_init_net(&net->ipv6.frags);
- if (res)
- return res;
- res = ip6_frags_ns_sysctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->ipv6.frags);
- return res;
+ inet_frags_init_net(&net->ipv6.frags);
+
+ return ip6_frags_ns_sysctl_register(net);
}
static void __net_exit ipv6_frags_exit_net(struct net *net)