aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/nf_conntrack_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/nf_conntrack_core.c')
-rw-r--r--net/netfilter/nf_conntrack_core.c154
1 files changed, 100 insertions, 54 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 94e18fb9690d..770a63103c7a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -74,10 +74,14 @@ static __read_mostly struct kmem_cache *nf_conntrack_cachep;
static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
+/* serialize hash resizes and nf_ct_iterate_cleanup */
+static DEFINE_MUTEX(nf_conntrack_mutex);
+
#define GC_SCAN_INTERVAL (120u * HZ)
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
-#define MAX_CHAINLEN 64u
+#define MIN_CHAINLEN 8u
+#define MAX_CHAINLEN (32u - MIN_CHAINLEN)
static struct conntrack_gc_work conntrack_gc_work;
@@ -188,11 +192,13 @@ seqcount_spinlock_t nf_conntrack_generation __read_mostly;
static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid,
const struct net *net)
{
struct {
struct nf_conntrack_man src;
union nf_inet_addr dst_addr;
+ unsigned int zone;
u32 net_mix;
u16 dport;
u16 proto;
@@ -205,6 +211,7 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
/* The direction must be ignored, so handle usable members manually. */
combined.src = tuple->src;
combined.dst_addr = tuple->dst.u3;
+ combined.zone = zoneid;
combined.net_mix = net_hash_mix(net);
combined.dport = (__force __u16)tuple->dst.u.all;
combined.proto = tuple->dst.protonum;
@@ -219,15 +226,17 @@ static u32 scale_hash(u32 hash)
static u32 __hash_conntrack(const struct net *net,
const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid,
unsigned int size)
{
- return reciprocal_scale(hash_conntrack_raw(tuple, net), size);
+ return reciprocal_scale(hash_conntrack_raw(tuple, zoneid, net), size);
}
static u32 hash_conntrack(const struct net *net,
- const struct nf_conntrack_tuple *tuple)
+ const struct nf_conntrack_tuple *tuple,
+ unsigned int zoneid)
{
- return scale_hash(hash_conntrack_raw(tuple, net));
+ return scale_hash(hash_conntrack_raw(tuple, zoneid, net));
}
static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
@@ -650,9 +659,11 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct)
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
clean_from_lists(ct);
@@ -819,8 +830,20 @@ struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
const struct nf_conntrack_tuple *tuple)
{
- return __nf_conntrack_find_get(net, zone, tuple,
- hash_conntrack_raw(tuple, net));
+ unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+ struct nf_conntrack_tuple_hash *thash;
+
+ thash = __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, zone_id, net));
+
+ if (thash)
+ return thash;
+
+ rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+ if (rid != zone_id)
+ return __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, rid, net));
+ return thash;
}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
@@ -842,6 +865,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
+ unsigned int max_chainlen;
unsigned int chainlen = 0;
unsigned int sequence;
int err = -EEXIST;
@@ -852,18 +876,22 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
do {
sequence = read_seqcount_begin(&nf_conntrack_generation);
hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_ORIGINAL));
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
+ max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
+
/* See if there's one in the list already, including reverse */
hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -873,7 +901,7 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -1103,8 +1131,8 @@ drop:
int
__nf_conntrack_confirm(struct sk_buff *skb)
{
+ unsigned int chainlen = 0, sequence, max_chainlen;
const struct nf_conntrack_zone *zone;
- unsigned int chainlen = 0, sequence;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -1133,8 +1161,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
hash = *(unsigned long *)&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev;
hash = scale_hash(hash);
reply_hash = hash_conntrack(net,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ nf_ct_zone_id(nf_ct_zone(ct), IP_CT_DIR_REPLY));
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* We're not in hash table, and we refuse to set up related
@@ -1168,6 +1196,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
goto dying;
}
+ max_chainlen = MIN_CHAINLEN + prandom_u32_max(MAX_CHAINLEN);
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
@@ -1175,7 +1204,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN)
+ if (chainlen++ > max_chainlen)
goto chaintoolong;
}
@@ -1184,7 +1213,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
- if (chainlen++ > MAX_CHAINLEN) {
+ if (chainlen++ > max_chainlen) {
chaintoolong:
nf_ct_add_to_dying_list(ct);
NF_CT_STAT_INC(net, chaintoolong);
@@ -1246,7 +1275,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
rcu_read_lock();
begin:
nf_conntrack_get_ht(&ct_hash, &hsize);
- hash = __hash_conntrack(net, tuple, hsize);
+ hash = __hash_conntrack(net, tuple, nf_ct_zone_id(zone, IP_CT_DIR_REPLY), hsize);
hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
@@ -1687,8 +1716,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
struct nf_conntrack_tuple_hash *h;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
+ u32 hash, zone_id, rid;
struct nf_conn *ct;
- u32 hash;
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, state->pf, protonum, state->net,
@@ -1699,8 +1728,20 @@ resolve_normal_ct(struct nf_conn *tmpl,
/* look for tuple match */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
- hash = hash_conntrack_raw(&tuple, state->net);
+
+ zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
+ hash = hash_conntrack_raw(&tuple, zone_id, state->net);
h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
+
+ if (!h) {
+ rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
+ if (zone_id != rid) {
+ u32 tmp = hash_conntrack_raw(&tuple, rid, state->net);
+
+ h = __nf_conntrack_find_get(state->net, zone, &tuple, tmp);
+ }
+ }
+
if (!h) {
h = init_conntrack(state->net, tmpl, &tuple,
skb, dataoff, hash);
@@ -2225,28 +2266,31 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data),
spinlock_t *lockp;
for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+ struct hlist_nulls_head *hslot = &nf_conntrack_hash[*bucket];
+
+ if (hlist_nulls_empty(hslot))
+ continue;
+
lockp = &nf_conntrack_locks[*bucket % CONNTRACK_LOCKS];
local_bh_disable();
nf_conntrack_lock(lockp);
- if (*bucket < nf_conntrack_htable_size) {
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) {
- if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
- continue;
- /* All nf_conn objects are added to hash table twice, one
- * for original direction tuple, once for the reply tuple.
- *
- * Exception: In the IPS_NAT_CLASH case, only the reply
- * tuple is added (the original tuple already existed for
- * a different object).
- *
- * We only need to call the iterator once for each
- * conntrack, so we just use the 'reply' direction
- * tuple while iterating.
- */
- ct = nf_ct_tuplehash_to_ctrack(h);
- if (iter(ct, data))
- goto found;
- }
+ hlist_nulls_for_each_entry(h, n, hslot, hnnode) {
+ if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY)
+ continue;
+ /* All nf_conn objects are added to hash table twice, one
+ * for original direction tuple, once for the reply tuple.
+ *
+ * Exception: In the IPS_NAT_CLASH case, only the reply
+ * tuple is added (the original tuple already existed for
+ * a different object).
+ *
+ * We only need to call the iterator once for each
+ * conntrack, so we just use the 'reply' direction
+ * tuple while iterating.
+ */
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ if (iter(ct, data))
+ goto found;
}
spin_unlock(lockp);
local_bh_enable();
@@ -2264,26 +2308,20 @@ found:
static void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
{
- unsigned int bucket = 0, sequence;
+ unsigned int bucket = 0;
struct nf_conn *ct;
might_sleep();
- for (;;) {
- sequence = read_seqcount_begin(&nf_conntrack_generation);
-
- while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
- /* Time to push up daises... */
+ mutex_lock(&nf_conntrack_mutex);
+ while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
+ /* Time to push up daises... */
- nf_ct_delete(ct, portid, report);
- nf_ct_put(ct);
- cond_resched();
- }
-
- if (!read_seqcount_retry(&nf_conntrack_generation, sequence))
- break;
- bucket = 0;
+ nf_ct_delete(ct, portid, report);
+ nf_ct_put(ct);
+ cond_resched();
}
+ mutex_unlock(&nf_conntrack_mutex);
}
struct iter_data {
@@ -2519,8 +2557,10 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
if (!hash)
return -ENOMEM;
+ mutex_lock(&nf_conntrack_mutex);
old_size = nf_conntrack_htable_size;
if (old_size == hashsize) {
+ mutex_unlock(&nf_conntrack_mutex);
kvfree(hash);
return 0;
}
@@ -2537,12 +2577,16 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
for (i = 0; i < nf_conntrack_htable_size; i++) {
while (!hlist_nulls_empty(&nf_conntrack_hash[i])) {
+ unsigned int zone_id;
+
h = hlist_nulls_entry(nf_conntrack_hash[i].first,
struct nf_conntrack_tuple_hash, hnnode);
ct = nf_ct_tuplehash_to_ctrack(h);
hlist_nulls_del_rcu(&h->hnnode);
+
+ zone_id = nf_ct_zone_id(nf_ct_zone(ct), NF_CT_DIRECTION(h));
bucket = __hash_conntrack(nf_ct_net(ct),
- &h->tuple, hashsize);
+ &h->tuple, zone_id, hashsize);
hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
}
}
@@ -2556,6 +2600,8 @@ int nf_conntrack_hash_resize(unsigned int hashsize)
nf_conntrack_all_unlock();
local_bh_enable();
+ mutex_unlock(&nf_conntrack_mutex);
+
synchronize_net();
kvfree(old_hash);
return 0;