aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/nf_conntrack_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/nf_conntrack_core.c')
-rw-r--r--net/netfilter/nf_conntrack_core.c46
1 files changed, 38 insertions, 8 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 5f72b94b4918..b5869b9574b0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -335,7 +335,8 @@ begin:
h = __nf_conntrack_find(net, tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
- if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ if (unlikely(nf_ct_is_dying(ct) ||
+ !atomic_inc_not_zero(&ct->ct_general.use)))
h = NULL;
else {
if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) {
@@ -425,7 +426,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* Remove from unconfirmed list */
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
- __nf_conntrack_hash_insert(ct, hash, repl_hash);
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
weird delay cases. */
@@ -433,8 +433,16 @@ __nf_conntrack_confirm(struct sk_buff *skb)
add_timer(&ct->timeout);
atomic_inc(&ct->ct_general.use);
set_bit(IPS_CONFIRMED_BIT, &ct->status);
+
+ /* Since the lookup is lockless, hash insertion must be done after
+ * starting the timer and setting the CONFIRMED bit. The RCU barriers
+ * guarantee that no other CPU can find the conntrack before the above
+ * stores are visible.
+ */
+ __nf_conntrack_hash_insert(ct, hash, repl_hash);
NF_CT_STAT_INC(net, insert);
spin_unlock_bh(&nf_conntrack_lock);
+
help = nfct_help(ct);
if (help && help->helper)
nf_conntrack_event_cache(IPCT_HELPER, ct);
@@ -503,7 +511,8 @@ static noinline int early_drop(struct net *net, unsigned int hash)
cnt++;
}
- if (ct && unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ if (ct && unlikely(nf_ct_is_dying(ct) ||
+ !atomic_inc_not_zero(&ct->ct_general.use)))
ct = NULL;
if (ct || cnt >= NF_CT_EVICTION_RANGE)
break;
@@ -552,23 +561,38 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
}
}
- ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp);
+ /*
+ * Do not use kmem_cache_zalloc(), as this cache uses
+ * SLAB_DESTROY_BY_RCU.
+ */
+ ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
if (ct == NULL) {
pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
atomic_dec(&net->ct.count);
return ERR_PTR(-ENOMEM);
}
-
+ /*
+ * Let ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.next
+ * and ct->tuplehash[IP_CT_DIR_REPLY].hnnode.next unchanged.
+ */
+ memset(&ct->tuplehash[IP_CT_DIR_MAX], 0,
+ sizeof(*ct) - offsetof(struct nf_conn, tuplehash[IP_CT_DIR_MAX]));
spin_lock_init(&ct->lock);
- atomic_set(&ct->ct_general.use, 1);
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode.pprev = NULL;
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
+ ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev = NULL;
/* Don't set timer yet: wait for confirmation */
setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct);
#ifdef CONFIG_NET_NS
ct->ct_net = net;
#endif
+ /*
+ * changes to lookup keys must be done before setting refcnt to 1
+ */
+ smp_wmb();
+ atomic_set(&ct->ct_general.use, 1);
return ct;
}
EXPORT_SYMBOL_GPL(nf_conntrack_alloc);
@@ -1267,13 +1291,19 @@ err_cache:
return ret;
}
+/*
+ * We need to use special "null" values, not used in hash table
+ */
+#define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
+#define DYING_NULLS_VAL ((1<<30)+1)
+
static int nf_conntrack_init_net(struct net *net)
{
int ret;
atomic_set(&net->ct.count, 0);
- INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
- INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0);
+ INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
+ INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
if (!net->ct.stat) {
ret = -ENOMEM;