diff options
author | David S. Miller <davem@davemloft.net> | 2019-05-26 14:08:05 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-05-26 14:08:05 -0700 |
commit | 8fb91c3550c4666d4c37b5494b1c68aa9c3284a5 (patch) | |
tree | 117978cd906b0d9088608e0c98700ca4c80246ff | |
parent | net: phy: tja11xx: Add TJA11xx PHY driver (diff) | |
parent | inet: frags: rework rhashtable dismantle (diff) | |
download | linux-dev-8fb91c3550c4666d4c37b5494b1c68aa9c3284a5.tar.xz linux-dev-8fb91c3550c4666d4c37b5494b1c68aa9c3284a5.zip |
Merge branch 'inet-frags-avoid-possible-races-at-netns-dismantle'
Eric Dumazet says:
====================
inet: frags: avoid possible races at netns dismantle
This patch series fixes a race happening on netns dismantle with
frag queues. While rhashtable_free_and_destroy() is running,
concurrent timers might run inet_frag_kill() and attempt
rhashtable_remove_fast() calls. This is not allowed by
rhashtable logic.
Since I do not want to add expensive synchronize_rcu() calls
in the netns dismantle path, I had to no longer inline
netns_frags structures, but dynamically allocate them.
The ten first patches make this preparation, so that
the last patch clearly shows the fix.
As this patch series is not exactly trivial, I chose to
target 5.3. We will backport it once soaked a bit.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/inet_frag.h | 48 | ||||
-rw-r--r-- | include/net/netns/ieee802154_6lowpan.h | 2 | ||||
-rw-r--r-- | include/net/netns/ipv4.h | 2 | ||||
-rw-r--r-- | include/net/netns/ipv6.h | 4 | ||||
-rw-r--r-- | net/ieee802154/6lowpan/reassembly.c | 36 | ||||
-rw-r--r-- | net/ipv4/inet_fragment.c | 98 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 67 | ||||
-rw-r--r-- | net/ipv4/proc.c | 4 | ||||
-rw-r--r-- | net/ipv6/netfilter/nf_conntrack_reasm.c | 43 | ||||
-rw-r--r-- | net/ipv6/proc.c | 4 | ||||
-rw-r--r-- | net/ipv6/reassembly.c | 40 |
11 files changed, 181 insertions, 167 deletions
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 378904ee9129..002f23c1a1a7 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -4,18 +4,22 @@ #include <linux/rhashtable-types.h> -struct netns_frags { +/* Per netns frag queues directory */ +struct fqdir { /* sysctls */ long high_thresh; long low_thresh; int timeout; int max_dist; struct inet_frags *f; + struct net *net; + bool dead; struct rhashtable rhashtable ____cacheline_aligned_in_smp; /* Keep atomic mem on separate cachelines in structs that include it */ atomic_long_t mem ____cacheline_aligned_in_smp; + struct rcu_work destroy_rwork; }; /** @@ -24,11 +28,13 @@ struct netns_frags { * @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction + * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), + INET_FRAG_HASH_DEAD = BIT(3), }; struct frag_v4_compare_key { @@ -64,7 +70,7 @@ struct frag_v6_compare_key { * @meat: length of received fragments so far * @flags: fragment queue flags * @max_size: maximum received fragment size - * @net: namespace that this frag belongs to + * @fqdir: pointer to struct fqdir * @rcu: rcu head for freeing deferall */ struct inet_frag_queue { @@ -84,7 +90,7 @@ struct inet_frag_queue { int meat; __u8 flags; u16 max_size; - struct netns_frags *net; + struct fqdir *fqdir; struct rcu_head rcu; }; @@ -103,16 +109,30 @@ struct inet_frags { int inet_frags_init(struct inet_frags *); void inet_frags_fini(struct inet_frags *); -static inline int inet_frags_init_net(struct netns_frags *nf) +static inline int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, + struct net *net) { - atomic_long_set(&nf->mem, 0); - return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params); + struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL); + int res; + + if (!fqdir) + return -ENOMEM; + fqdir->f = f; + fqdir->net = net; + res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params); + if (res < 0) { + kfree(fqdir); + return res; + } + *fqdirp = fqdir; + return 0; } -void inet_frags_exit_net(struct netns_frags *nf); + +void fqdir_exit(struct fqdir *fqdir); void inet_frag_kill(struct inet_frag_queue *q); void inet_frag_destroy(struct inet_frag_queue *q); -struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key); +struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); /* Free all skbs in the queue; return the sum of their truesizes. */ unsigned int inet_frag_rbtree_purge(struct rb_root *root); @@ -125,19 +145,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q) /* Memory Tracking Functions. */ -static inline long frag_mem_limit(const struct netns_frags *nf) +static inline long frag_mem_limit(const struct fqdir *fqdir) { - return atomic_long_read(&nf->mem); + return atomic_long_read(&fqdir->mem); } -static inline void sub_frag_mem_limit(struct netns_frags *nf, long val) +static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val) { - atomic_long_sub(val, &nf->mem); + atomic_long_sub(val, &fqdir->mem); } -static inline void add_frag_mem_limit(struct netns_frags *nf, long val) +static inline void add_frag_mem_limit(struct fqdir *fqdir, long val) { - atomic_long_add(val, &nf->mem); + atomic_long_add(val, &fqdir->mem); } /* RFC 3168 support : diff --git a/include/net/netns/ieee802154_6lowpan.h b/include/net/netns/ieee802154_6lowpan.h index 736aeac52f56..95406e1342cb 100644 --- a/include/net/netns/ieee802154_6lowpan.h +++ b/include/net/netns/ieee802154_6lowpan.h @@ -16,7 +16,7 @@ struct netns_sysctl_lowpan { struct netns_ieee802154_lowpan { struct netns_sysctl_lowpan sysctl; - struct netns_frags frags; + struct fqdir *fqdir; }; #endif diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 7698460a3dd1..c07cee1e0c9e 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -72,7 +72,7 @@ struct netns_ipv4 { struct inet_peer_base *peers; struct sock * __percpu *tcp_sk; - struct netns_frags frags; + struct fqdir *fqdir; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; struct xt_table *iptable_mangle; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 5e61b5a8635d..022a0fd1a5a4 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -58,7 +58,7 @@ struct netns_ipv6 { struct ipv6_devconf *devconf_all; struct ipv6_devconf *devconf_dflt; struct inet_peer_base *peers; - struct netns_frags frags; + struct fqdir *fqdir; #ifdef CONFIG_NETFILTER struct xt_table *ip6table_filter; struct xt_table *ip6table_mangle; @@ -116,7 +116,7 @@ struct netns_ipv6 { #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) struct netns_nf_frag { - struct netns_frags frags; + struct fqdir *fqdir; }; #endif diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 4196bcd4105a..e59c3b708969 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -79,7 +79,7 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb, key.src = *src; key.dst = *dst; - q = inet_frag_find(&ieee802154_lowpan->frags, &key); + q = inet_frag_find(ieee802154_lowpan->fqdir, &key); if (!q) return NULL; @@ -139,7 +139,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq, fq->q.flags |= INET_FRAG_FIRST_IN; fq->q.meat += skb->len; - add_frag_mem_limit(fq->q.net, skb->truesize); + add_frag_mem_limit(fq->q.fqdir, skb->truesize); if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { @@ -326,23 +326,18 @@ err: static struct ctl_table lowpan_frags_ns_ctl_table[] = { { .procname = "6lowpanfrag_high_thresh", - .data = &init_net.ieee802154_lowpan.frags.high_thresh, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &init_net.ieee802154_lowpan.frags.low_thresh }, { .procname = "6lowpanfrag_low_thresh", - .data = &init_net.ieee802154_lowpan.frags.low_thresh, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra2 = &init_net.ieee802154_lowpan.frags.high_thresh }, { .procname = "6lowpanfrag_time", - .data = &init_net.ieee802154_lowpan.frags.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -377,17 +372,17 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net) if (table == NULL) goto err_alloc; - table[0].data = &ieee802154_lowpan->frags.high_thresh; - table[0].extra1 = &ieee802154_lowpan->frags.low_thresh; - table[1].data = &ieee802154_lowpan->frags.low_thresh; - table[1].extra2 = &ieee802154_lowpan->frags.high_thresh; - table[2].data = &ieee802154_lowpan->frags.timeout; - /* Don't export sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) table[0].procname = NULL; } + table[0].data = &ieee802154_lowpan->fqdir->high_thresh; + table[0].extra1 = &ieee802154_lowpan->fqdir->low_thresh; + table[1].data = &ieee802154_lowpan->fqdir->low_thresh; + table[1].extra2 = &ieee802154_lowpan->fqdir->high_thresh; + table[2].data = &ieee802154_lowpan->fqdir->timeout; + hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table); if (hdr == NULL) goto err_reg; @@ -454,17 +449,18 @@ static int __net_init lowpan_frags_init_net(struct net *net) net_ieee802154_lowpan(net); int res; - ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; - ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; - ieee802154_lowpan->frags.f = &lowpan_frags; - res = inet_frags_init_net(&ieee802154_lowpan->frags); + res = fqdir_init(&ieee802154_lowpan->fqdir, &lowpan_frags, net); if (res < 0) return res; + + ieee802154_lowpan->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; + ieee802154_lowpan->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; + ieee802154_lowpan->fqdir->timeout = IPV6_FRAG_TIMEOUT; + res = lowpan_frags_ns_sysctl_register(net); if (res < 0) - inet_frags_exit_net(&ieee802154_lowpan->frags); + fqdir_exit(ieee802154_lowpan->fqdir); return res; } @@ -474,7 +470,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net) net_ieee802154_lowpan(net); lowpan_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&ieee802154_lowpan->frags); + fqdir_exit(ieee802154_lowpan->fqdir); } static struct pernet_operations lowpan_frags_ops = { diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 737808e27f8b..6ca9523374da 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -124,34 +124,50 @@ void inet_frags_fini(struct inet_frags *f) } EXPORT_SYMBOL(inet_frags_fini); +/* called from rhashtable_free_and_destroy() at netns_frags dismantle */ static void inet_frags_free_cb(void *ptr, void *arg) { struct inet_frag_queue *fq = ptr; + int count; - /* If we can not cancel the timer, it means this frag_queue - * is already disappearing, we have nothing to do. - * Otherwise, we own a refcount until the end of this function. - */ - if (!del_timer(&fq->timer)) - return; + count = del_timer_sync(&fq->timer) ? 1 : 0; spin_lock_bh(&fq->lock); if (!(fq->flags & INET_FRAG_COMPLETE)) { fq->flags |= INET_FRAG_COMPLETE; - refcount_dec(&fq->refcnt); + count++; + } else if (fq->flags & INET_FRAG_HASH_DEAD) { + count++; } spin_unlock_bh(&fq->lock); - inet_frag_put(fq); + if (refcount_sub_and_test(count, &fq->refcnt)) + inet_frag_destroy(fq); +} + +static void fqdir_rwork_fn(struct work_struct *work) +{ + struct fqdir *fqdir = container_of(to_rcu_work(work), + struct fqdir, destroy_rwork); + + rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL); + kfree(fqdir); } -void inet_frags_exit_net(struct netns_frags *nf) +void fqdir_exit(struct fqdir *fqdir) { - nf->high_thresh = 0; /* prevent creation of new frags */ + fqdir->high_thresh = 0; /* prevent creation of new frags */ + + /* paired with READ_ONCE() in inet_frag_kill() : + * We want to prevent rhashtable_remove_fast() calls + */ + smp_store_release(&fqdir->dead, true); + + INIT_RCU_WORK(&fqdir->destroy_rwork, fqdir_rwork_fn); + queue_rcu_work(system_wq, &fqdir->destroy_rwork); - rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL); } -EXPORT_SYMBOL(inet_frags_exit_net); +EXPORT_SYMBOL(fqdir_exit); void inet_frag_kill(struct inet_frag_queue *fq) { @@ -159,11 +175,21 @@ void inet_frag_kill(struct inet_frag_queue *fq) refcount_dec(&fq->refcnt); if (!(fq->flags & INET_FRAG_COMPLETE)) { - struct netns_frags *nf = fq->net; + struct fqdir *fqdir = fq->fqdir; fq->flags |= INET_FRAG_COMPLETE; - rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params); - refcount_dec(&fq->refcnt); + rcu_read_lock(); + /* This READ_ONCE() is paired with smp_store_release() + * in inet_frags_exit_net(). + */ + if (!READ_ONCE(fqdir->dead)) { + rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, + fqdir->f->rhash_params); + refcount_dec(&fq->refcnt); + } else { + fq->flags |= INET_FRAG_HASH_DEAD; + } + rcu_read_unlock(); } } EXPORT_SYMBOL(inet_frag_kill); @@ -172,7 +198,7 @@ static void inet_frag_destroy_rcu(struct rcu_head *head) { struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, rcu); - struct inet_frags *f = q->net->f; + struct inet_frags *f = q->fqdir->f; if (f->destructor) f->destructor(q); @@ -203,7 +229,7 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge); void inet_frag_destroy(struct inet_frag_queue *q) { - struct netns_frags *nf; + struct fqdir *fqdir; unsigned int sum, sum_truesize = 0; struct inet_frags *f; @@ -211,18 +237,18 @@ void inet_frag_destroy(struct inet_frag_queue *q) WARN_ON(del_timer(&q->timer) != 0); /* Release all fragment data. */ - nf = q->net; - f = nf->f; + fqdir = q->fqdir; + f = fqdir->f; sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments); sum = sum_truesize + f->qsize; call_rcu(&q->rcu, inet_frag_destroy_rcu); - sub_frag_mem_limit(nf, sum); + sub_frag_mem_limit(fqdir, sum); } EXPORT_SYMBOL(inet_frag_destroy); -static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, +static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, struct inet_frags *f, void *arg) { @@ -232,9 +258,9 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, if (!q) return NULL; - q->net = nf; + q->fqdir = fqdir; f->constructor(q, arg); - add_frag_mem_limit(nf, f->qsize); + add_frag_mem_limit(fqdir, f->qsize); timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); @@ -243,21 +269,21 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, return q; } -static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, +static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, void *arg, struct inet_frag_queue **prev) { - struct inet_frags *f = nf->f; + struct inet_frags *f = fqdir->f; struct inet_frag_queue *q; - q = inet_frag_alloc(nf, f, arg); + q = inet_frag_alloc(fqdir, f, arg); if (!q) { *prev = ERR_PTR(-ENOMEM); return NULL; } - mod_timer(&q->timer, jiffies + nf->timeout); + mod_timer(&q->timer, jiffies + fqdir->timeout); - *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key, + *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, &q->node, f->rhash_params); if (*prev) { q->flags |= INET_FRAG_COMPLETE; @@ -269,18 +295,18 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, } /* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ -struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key) +struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) { struct inet_frag_queue *fq = NULL, *prev; - if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh) + if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh) return NULL; rcu_read_lock(); - prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params); + prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); if (!prev) - fq = inet_frag_create(nf, key, &prev); + fq = inet_frag_create(fqdir, key, &prev); if (prev && !IS_ERR(prev)) { fq = prev; if (!refcount_inc_not_zero(&fq->refcnt)) @@ -391,7 +417,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, delta += head->truesize; if (delta) - add_frag_mem_limit(q->net, delta); + add_frag_mem_limit(q->fqdir, delta); /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part @@ -413,7 +439,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, head->truesize += clone->truesize; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(q->net, clone->truesize); + add_frag_mem_limit(q->fqdir, clone->truesize); skb_shinfo(head)->frag_list = clone; nextp = &clone->next; } else { @@ -466,7 +492,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, rbn = rbnext; } } - sub_frag_mem_limit(q->net, head->truesize); + sub_frag_mem_limit(q->fqdir, head->truesize); *nextp = NULL; skb_mark_not_on_list(head); @@ -494,7 +520,7 @@ struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) if (head == q->fragments_tail) q->fragments_tail = NULL; - sub_frag_mem_limit(q->net, head->truesize); + sub_frag_mem_limit(q->fqdir, head->truesize); return head; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index cf2b0a6a3337..1ffaec056821 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -82,15 +82,13 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, static void ip4_frag_init(struct inet_frag_queue *q, const void *a) { struct ipq *qp = container_of(q, struct ipq, q); - struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4, - frags); - struct net *net = container_of(ipv4, struct net, ipv4); + struct net *net = q->fqdir->net; const struct frag_v4_compare_key *key = a; q->key.v4 = *key; qp->ecn = 0; - qp->peer = q->net->max_dist ? + qp->peer = q->fqdir->max_dist ? inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) : NULL; } @@ -142,7 +140,7 @@ static void ip_expire(struct timer_list *t) int err; qp = container_of(frag, struct ipq, q); - net = container_of(qp->q.net, struct net, ipv4.frags); + net = qp->q.fqdir->net; rcu_read_lock(); spin_lock(&qp->q.lock); @@ -211,7 +209,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph, }; struct inet_frag_queue *q; - q = inet_frag_find(&net->ipv4.frags, &key); + q = inet_frag_find(net->ipv4.fqdir, &key); if (!q) return NULL; @@ -222,7 +220,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph, static int ip_frag_too_far(struct ipq *qp) { struct inet_peer *peer = qp->peer; - unsigned int max = qp->q.net->max_dist; + unsigned int max = qp->q.fqdir->max_dist; unsigned int start, end; int rc; @@ -236,12 +234,8 @@ static int ip_frag_too_far(struct ipq *qp) rc = qp->q.fragments_tail && (end - start) > max; - if (rc) { - struct net *net; - - net = container_of(qp->q.net, struct net, ipv4.frags); - __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS); - } + if (rc) + __IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS); return rc; } @@ -250,13 +244,13 @@ static int ip_frag_reinit(struct ipq *qp) { unsigned int sum_truesize = 0; - if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) { + if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) { refcount_inc(&qp->q.refcnt); return -ETIMEDOUT; } sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments); - sub_frag_mem_limit(qp->q.net, sum_truesize); + sub_frag_mem_limit(qp->q.fqdir, sum_truesize); qp->q.flags = 0; qp->q.len = 0; @@ -273,7 +267,7 @@ static int ip_frag_reinit(struct ipq *qp) /* Add new segment to existing queue. */ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) { - struct net *net = container_of(qp->q.net, struct net, ipv4.frags); + struct net *net = qp->q.fqdir->net; int ihl, end, flags, offset; struct sk_buff *prev_tail; struct net_device *dev; @@ -352,7 +346,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; - add_frag_mem_limit(qp->q.net, skb->truesize); + add_frag_mem_limit(qp->q.fqdir, skb->truesize); if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; @@ -399,7 +393,7 @@ err: static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev) { - struct net *net = container_of(qp->q.net, struct net, ipv4.frags); + struct net *net = qp->q.fqdir->net; struct iphdr *iph; void *reasm_data; int len, err; @@ -544,30 +538,24 @@ static int dist_min; static struct ctl_table ip4_frags_ns_ctl_table[] = { { .procname = "ipfrag_high_thresh", - .data = &init_net.ipv4.frags.high_thresh, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &init_net.ipv4.frags.low_thresh }, { .procname = "ipfrag_low_thresh", - .data = &init_net.ipv4.frags.low_thresh, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra2 = &init_net.ipv4.frags.high_thresh }, { .procname = "ipfrag_time", - .data = &init_net.ipv4.frags.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "ipfrag_max_dist", - .data = &init_net.ipv4.frags.max_dist, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, @@ -600,13 +588,13 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) if (!table) goto err_alloc; - table[0].data = &net->ipv4.frags.high_thresh; - table[0].extra1 = &net->ipv4.frags.low_thresh; - table[1].data = &net->ipv4.frags.low_thresh; - table[1].extra2 = &net->ipv4.frags.high_thresh; - table[2].data = &net->ipv4.frags.timeout; - table[3].data = &net->ipv4.frags.max_dist; } + table[0].data = &net->ipv4.fqdir->high_thresh; + table[0].extra1 = &net->ipv4.fqdir->low_thresh; + table[1].data = &net->ipv4.fqdir->low_thresh; + table[1].extra2 = &net->ipv4.fqdir->high_thresh; + table[2].data = &net->ipv4.fqdir->timeout; + table[3].data = &net->ipv4.fqdir->max_dist; hdr = register_net_sysctl(net, "net/ipv4", table); if (!hdr) @@ -654,6 +642,9 @@ static int __net_init ipv4_frags_init_net(struct net *net) { int res; + res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net); + if (res < 0) + return res; /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for @@ -668,31 +659,27 @@ static int __net_init ipv4_frags_init_net(struct net *net) * we will prune down to 3MB, making room for approx 8 big 64K * fragments 8x128k. */ - net->ipv4.frags.high_thresh = 4 * 1024 * 1024; - net->ipv4.frags.low_thresh = 3 * 1024 * 1024; + net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024; + net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024; /* * Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival * by TTL. */ - net->ipv4.frags.timeout = IP_FRAG_TIME; + net->ipv4.fqdir->timeout = IP_FRAG_TIME; - net->ipv4.frags.max_dist = 64; - net->ipv4.frags.f = &ip4_frags; + net->ipv4.fqdir->max_dist = 64; - res = inet_frags_init_net(&net->ipv4.frags); - if (res < 0) - return res; res = ip4_frags_ns_ctl_register(net); if (res < 0) - inet_frags_exit_net(&net->ipv4.frags); + fqdir_exit(net->ipv4.fqdir); return res; } static void __net_exit ipv4_frags_exit_net(struct net *net) { ip4_frags_ns_ctl_unregister(net); - inet_frags_exit_net(&net->ipv4.frags); + fqdir_exit(net->ipv4.fqdir); } static struct pernet_operations ip4_frags_ops = { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index c3610b37bb4c..b613572c6616 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -72,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(net, &raw_prot)); seq_printf(seq, "FRAG: inuse %u memory %lu\n", - atomic_read(&net->ipv4.frags.rhashtable.nelems), - frag_mem_limit(&net->ipv4.frags)); + atomic_read(&net->ipv4.fqdir->rhashtable.nelems), + frag_mem_limit(net->ipv4.fqdir)); return 0; } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 3de0e9b0a482..c5d59fa568d6 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -58,26 +58,21 @@ static struct inet_frags nf_frags; static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", - .data = &init_net.nf_frag.frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_frag6_low_thresh", - .data = &init_net.nf_frag.frags.low_thresh, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra2 = &init_net.nf_frag.frags.high_thresh }, { .procname = "nf_conntrack_frag6_high_thresh", - .data = &init_net.nf_frag.frags.high_thresh, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &init_net.nf_frag.frags.low_thresh }, { } }; @@ -93,15 +88,15 @@ static int nf_ct_frag6_sysctl_register(struct net *net) GFP_KERNEL); if (table == NULL) goto err_alloc; - - table[0].data = &net->nf_frag.frags.timeout; - table[1].data = &net->nf_frag.frags.low_thresh; - table[1].extra2 = &net->nf_frag.frags.high_thresh; - table[2].data = &net->nf_frag.frags.high_thresh; - table[2].extra1 = &net->nf_frag.frags.low_thresh; - table[2].extra2 = &init_net.nf_frag.frags.high_thresh; } + table[0].data = &net->nf_frag.fqdir->timeout; + table[1].data = &net->nf_frag.fqdir->low_thresh; + table[1].extra2 = &net->nf_frag.fqdir->high_thresh; + table[2].data = &net->nf_frag.fqdir->high_thresh; + table[2].extra1 = &net->nf_frag.fqdir->low_thresh; + table[2].extra2 = &init_net.nf_frag.fqdir->high_thresh; + hdr = register_net_sysctl(net, "net/netfilter", table); if (hdr == NULL) goto err_reg; @@ -148,12 +143,10 @@ static void nf_ct_frag6_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; - struct net *net; fq = container_of(frag, struct frag_queue, q); - net = container_of(fq->q.net, struct net, nf_frag.frags); - ip6frag_expire_frag_queue(net, fq); + ip6frag_expire_frag_queue(fq->q.fqdir->net, fq); } /* Creation primitives. */ @@ -169,7 +162,7 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, }; struct inet_frag_queue *q; - q = inet_frag_find(&net->nf_frag.frags, &key); + q = inet_frag_find(net->nf_frag.fqdir, &key); if (!q) return NULL; @@ -276,7 +269,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - add_frag_mem_limit(fq->q.net, skb->truesize); + add_frag_mem_limit(fq->q.fqdir, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -496,24 +489,24 @@ static int nf_ct_net_init(struct net *net) { int res; - net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; - net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; - net->nf_frag.frags.f = &nf_frags; - - res = inet_frags_init_net(&net->nf_frag.frags); + res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net); if (res < 0) return res; + + net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; + net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; + net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT; + res = nf_ct_frag6_sysctl_register(net); if (res < 0) - inet_frags_exit_net(&net->nf_frag.frags); + fqdir_exit(net->nf_frag.fqdir); return res; } static void nf_ct_net_exit(struct net *net) { nf_ct_frags6_sysctl_unregister(net); - inet_frags_exit_net(&net->nf_frag.frags); + fqdir_exit(net->nf_frag.fqdir); } static struct pernet_operations nf_ct_net_ops = { diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 2356b4af7309..0bbefc440bcd 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -48,8 +48,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); seq_printf(seq, "FRAG6: inuse %u memory %lu\n", - atomic_read(&net->ipv6.frags.rhashtable.nelems), - frag_mem_limit(&net->ipv6.frags)); + atomic_read(&net->ipv6.fqdir->rhashtable.nelems), + frag_mem_limit(net->ipv6.fqdir)); return 0; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1a832f5e190b..836ea964cf14 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -76,12 +76,10 @@ static void ip6_frag_expire(struct timer_list *t) { struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; - struct net *net; fq = container_of(frag, struct frag_queue, q); - net = container_of(fq->q.net, struct net, ipv6.frags); - ip6frag_expire_frag_queue(net, fq); + ip6frag_expire_frag_queue(fq->q.fqdir->net, fq); } static struct frag_queue * @@ -100,7 +98,7 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif) IPV6_ADDR_LINKLOCAL))) key.iif = 0; - q = inet_frag_find(&net->ipv6.frags, &key); + q = inet_frag_find(net->ipv6.fqdir, &key); if (!q) return NULL; @@ -200,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; - add_frag_mem_limit(fq->q.net, skb->truesize); + add_frag_mem_limit(fq->q.fqdir, skb->truesize); fragsize = -skb_network_offset(skb) + skb->len; if (fragsize > fq->q.max_size) @@ -254,7 +252,7 @@ err: static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev_tail, struct net_device *dev) { - struct net *net = container_of(fq->q.net, struct net, ipv6.frags); + struct net *net = fq->q.fqdir->net; unsigned int nhoff; void *reasm_data; int payload_len; @@ -401,23 +399,18 @@ static const struct inet6_protocol frag_protocol = { static struct ctl_table ip6_frags_ns_ctl_table[] = { { .procname = "ip6frag_high_thresh", - .data = &init_net.ipv6.frags.high_thresh, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra1 = &init_net.ipv6.frags.low_thresh }, { .procname = "ip6frag_low_thresh", - .data = &init_net.ipv6.frags.low_thresh, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_doulongvec_minmax, - .extra2 = &init_net.ipv6.frags.high_thresh }, { .procname = "ip6frag_time", - .data = &init_net.ipv6.frags.timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -449,12 +442,12 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) if (!table) goto err_alloc; - table[0].data = &net->ipv6.frags.high_thresh; - table[0].extra1 = &net->ipv6.frags.low_thresh; - table[1].data = &net->ipv6.frags.low_thresh; - table[1].extra2 = &net->ipv6.frags.high_thresh; - table[2].data = &net->ipv6.frags.timeout; } + table[0].data = &net->ipv6.fqdir->high_thresh; + table[0].extra1 = &net->ipv6.fqdir->low_thresh; + table[1].data = &net->ipv6.fqdir->low_thresh; + table[1].extra2 = &net->ipv6.fqdir->high_thresh; + table[2].data = &net->ipv6.fqdir->timeout; hdr = register_net_sysctl(net, "net/ipv6", table); if (!hdr) @@ -517,25 +510,24 @@ static int __net_init ipv6_frags_init_net(struct net *net) { int res; - net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; - net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; - net->ipv6.frags.f = &ip6_frags; - - res = inet_frags_init_net(&net->ipv6.frags); + res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net); if (res < 0) return res; + net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH; + net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH; + net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT; + res = ip6_frags_ns_sysctl_register(net); if (res < 0) - inet_frags_exit_net(&net->ipv6.frags); + fqdir_exit(net->ipv6.fqdir); return res; } static void __net_exit ipv6_frags_exit_net(struct net *net) { ip6_frags_ns_sysctl_unregister(net); - inet_frags_exit_net(&net->ipv6.frags); + fqdir_exit(net->ipv6.fqdir); } static struct pernet_operations ip6_frags_ops = { |