aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/nf_conntrack_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter/nf_conntrack_core.c')
-rw-r--r--net/netfilter/nf_conntrack_core.c109
1 files changed, 61 insertions, 48 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9979f46c81dc..01130392b7c0 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -56,6 +56,8 @@
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
+#include "nf_internals.h"
+
#define NF_CONNTRACK_VERSION "0.5.0"
int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct,
@@ -96,19 +98,26 @@ static struct conntrack_gc_work conntrack_gc_work;
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
+ /* 1) Acquire the lock */
spin_lock(lock);
- while (unlikely(nf_conntrack_locks_all)) {
- spin_unlock(lock);
- /*
- * Order the 'nf_conntrack_locks_all' load vs. the
- * spin_unlock_wait() loads below, to ensure
- * that 'nf_conntrack_locks_all_lock' is indeed held:
- */
- smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
- spin_unlock_wait(&nf_conntrack_locks_all_lock);
- spin_lock(lock);
- }
+ /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
+ * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
+ */
+ if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
+ return;
+
+ /* fast path failed, unlock */
+ spin_unlock(lock);
+
+ /* Slow path 1) get global lock */
+ spin_lock(&nf_conntrack_locks_all_lock);
+
+ /* Slow path 2) get the lock we want */
+ spin_lock(lock);
+
+ /* Slow path 3) release the global lock */
+ spin_unlock(&nf_conntrack_locks_all_lock);
}
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
@@ -149,28 +158,27 @@ static void nf_conntrack_all_lock(void)
int i;
spin_lock(&nf_conntrack_locks_all_lock);
- nf_conntrack_locks_all = true;
- /*
- * Order the above store of 'nf_conntrack_locks_all' against
- * the spin_unlock_wait() loads below, such that if
- * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
- * we must observe nf_conntrack_locks[] held:
- */
- smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
+ nf_conntrack_locks_all = true;
for (i = 0; i < CONNTRACK_LOCKS; i++) {
- spin_unlock_wait(&nf_conntrack_locks[i]);
+ spin_lock(&nf_conntrack_locks[i]);
+
+ /* This spin_unlock provides the "release" to ensure that
+ * nf_conntrack_locks_all==true is visible to everyone that
+ * acquired spin_lock(&nf_conntrack_locks[]).
+ */
+ spin_unlock(&nf_conntrack_locks[i]);
}
}
static void nf_conntrack_all_unlock(void)
{
- /*
- * All prior stores must be complete before we clear
+ /* All prior stores must be complete before we clear
* 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
* might observe the false value but not the entire
- * critical section:
+ * critical section.
+ * It pairs with the smp_load_acquire() in nf_conntrack_lock()
*/
smp_store_release(&nf_conntrack_locks_all, false);
spin_unlock(&nf_conntrack_locks_all_lock);
@@ -248,8 +256,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
u_int16_t l3num,
struct net *net, struct nf_conntrack_tuple *tuple)
{
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
unsigned int protoff;
u_int8_t protonum;
int ret;
@@ -398,22 +406,19 @@ static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
pr_debug("destroy_conntrack(%p)\n", ct);
- NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
+ WARN_ON(atomic_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
nf_ct_tmpl_free(ct);
return;
}
- rcu_read_lock();
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->destroy)
l4proto->destroy(ct);
- rcu_read_unlock();
-
local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
@@ -695,7 +700,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
{
/* This is the conntrack entry already in hashes that won race. */
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
- struct nf_conntrack_l4proto *l4proto;
+ const struct nf_conntrack_l4proto *l4proto;
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
@@ -757,12 +762,11 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* connections for unconfirmed conns. But packet copies and
* REJECT will give spurious warnings here.
*/
- /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
/* No external references means no one else could have
* confirmed us.
*/
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
@@ -1084,7 +1088,7 @@ static void gc_worker(struct work_struct *work)
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
- INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
+ INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker);
gc_work->next_gc_run = HZ;
gc_work->exiting = false;
}
@@ -1161,7 +1165,7 @@ void nf_conntrack_free(struct nf_conn *ct)
/* A freed object has refcnt == 0, that's
* the golden rule for SLAB_TYPESAFE_BY_RCU
*/
- NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+ WARN_ON(atomic_read(&ct->ct_general.use) != 0);
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
@@ -1177,8 +1181,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto,
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
{
@@ -1289,8 +1293,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl,
unsigned int dataoff,
u_int16_t l3num,
u_int8_t protonum,
- struct nf_conntrack_l3proto *l3proto,
- struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_l3proto *l3proto,
+ const struct nf_conntrack_l4proto *l4proto)
{
const struct nf_conntrack_zone *zone;
struct nf_conntrack_tuple tuple;
@@ -1345,10 +1349,10 @@ unsigned int
nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum,
struct sk_buff *skb)
{
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
struct nf_conn *ct, *tmpl;
enum ip_conntrack_info ctinfo;
- struct nf_conntrack_l3proto *l3proto;
- struct nf_conntrack_l4proto *l4proto;
unsigned int *timeouts;
unsigned int dataoff;
u_int8_t protonum;
@@ -1415,7 +1419,7 @@ repeat:
/* Decide what timeout policy we want to apply to this flow. */
timeouts = nf_ct_timeout_lookup(net, ct, l4proto);
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts);
+ ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, timeouts);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@@ -1469,7 +1473,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
struct nf_conn_help *help = nfct_help(ct);
/* Should be unconfirmed, so not in hash table yet */
- NF_CT_ASSERT(!nf_ct_is_confirmed(ct));
+ WARN_ON(nf_ct_is_confirmed(ct));
pr_debug("Altering reply tuple of %p to ", ct);
nf_ct_dump_tuple(newreply);
@@ -1491,7 +1495,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- NF_CT_ASSERT(skb);
+ WARN_ON(!skb);
/* Only update if this is not a fixed timeout */
if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
@@ -1689,6 +1693,18 @@ __nf_ct_unconfirmed_destroy(struct net *net)
}
}
+void nf_ct_unconfirmed_destroy(struct net *net)
+{
+ might_sleep();
+
+ if (atomic_read(&net->ct.count) > 0) {
+ __nf_ct_unconfirmed_destroy(net);
+ nf_queue_nf_hook_drop(net);
+ synchronize_net();
+ }
+}
+EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy);
+
void nf_ct_iterate_cleanup_net(struct net *net,
int (*iter)(struct nf_conn *i, void *data),
void *data, u32 portid, int report)
@@ -1700,14 +1716,10 @@ void nf_ct_iterate_cleanup_net(struct net *net,
if (atomic_read(&net->ct.count) == 0)
return;
- __nf_ct_unconfirmed_destroy(net);
-
d.iter = iter;
d.data = data;
d.net = net;
- synchronize_net();
-
nf_ct_iterate_cleanup(iter_net_only, &d, portid, report);
}
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net);
@@ -1733,6 +1745,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data)
if (atomic_read(&net->ct.count) == 0)
continue;
__nf_ct_unconfirmed_destroy(net);
+ nf_queue_nf_hook_drop(net);
}
rtnl_unlock();