aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig13
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c48
-rw-r--r--net/netfilter/nf_conntrack_core.c162
-rw-r--r--net/netfilter/nf_conntrack_ecache.c264
-rw-r--r--net/netfilter/nf_conntrack_ftp.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c14
-rw-r--r--net/netfilter/nf_conntrack_netbios_ns.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c329
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c31
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c5
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c27
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c140
-rw-r--r--net/netfilter/nf_log.c6
-rw-r--r--net/netfilter/nf_queue.c4
-rw-r--r--net/netfilter/nfnetlink.c28
-rw-r--r--net/netfilter/nfnetlink_queue.c4
-rw-r--r--net/netfilter/x_tables.c54
-rw-r--r--net/netfilter/xt_NFQUEUE.c93
-rw-r--r--net/netfilter/xt_TCPMSS.c6
-rw-r--r--net/netfilter/xt_osf.c428
-rw-r--r--net/netfilter/xt_policy.c2
-rw-r--r--net/netfilter/xt_realm.c2
-rw-r--r--net/netfilter/xt_socket.c63
25 files changed, 1256 insertions, 490 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c26a20c58dde..634d14affc8d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -917,6 +917,19 @@ config NETFILTER_XT_MATCH_U32
Details and examples are in the kernel module source.
+config NETFILTER_XT_MATCH_OSF
+ tristate '"osf" Passive OS fingerprint match'
+ depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
+ help
+ This option selects the Passive OS Fingerprinting match module
+ that allows to passively match the remote operating system by
+ analyzing incoming TCP SYN packets.
+
+ Rules and loading software can be downloaded from
+ http://www.ioremap.net/projects/osf
+
+ To compile it as a module, choose M here. If unsure, say N.
+
endif # NETFILTER_XTABLES
endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 6282060fbda9..49f62ee4e9ff 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index e01061f49cdc..7c1333c67ff3 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3345,22 +3345,8 @@ static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
static int __init ip_vs_genl_register(void)
{
- int ret, i;
-
- ret = genl_register_family(&ip_vs_genl_family);
- if (ret)
- return ret;
-
- for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
- ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
- if (ret)
- goto err_out;
- }
- return 0;
-
-err_out:
- genl_unregister_family(&ip_vs_genl_family);
- return ret;
+ return genl_register_family_with_ops(&ip_vs_genl_family,
+ ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
}
static void ip_vs_genl_unregister(void)
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 425ab144f15d..5874657af7f2 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -260,8 +260,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_send_check(ip_hdr(skb));
/* drop old route */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
@@ -324,8 +324,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* drop old route */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
@@ -388,8 +388,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error_put;
/* drop old route */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -465,8 +465,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error_put;
/* drop old route */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/* mangle the packet */
if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
@@ -553,8 +553,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
goto tx_error;
}
- if (skb->dst)
- skb->dst->ops->update_pmtu(skb->dst, mtu);
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
df |= (old_iph->frag_off & htons(IP_DF));
@@ -596,8 +596,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
/* drop old route */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/*
* Push down and install the IPIP header.
@@ -665,8 +665,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n");
goto tx_error;
}
- if (skb->dst)
- skb->dst->ops->update_pmtu(skb->dst, mtu);
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) {
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
@@ -702,8 +702,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
/* drop old route */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/*
* Push down and install the IPIP header.
@@ -775,8 +775,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
ip_send_check(ip_hdr(skb));
/* drop old route */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
@@ -828,8 +828,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
}
/* drop old route */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
@@ -900,8 +900,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error_put;
/* drop the old route when skb is not shared */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
ip_vs_nat_icmp(skb, pp, cp, 0);
@@ -975,8 +975,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
goto tx_error_put;
/* drop the old route when skb is not shared */
- dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ skb_dst_drop(skb);
+ skb_dst_set(skb, &rt->u.dst);
ip_vs_nat_icmp_v6(skb, pp, cp, 0);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8020db6274b8..5f72b94b4918 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -39,6 +39,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
@@ -182,10 +183,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
- if (!test_bit(IPS_DYING_BIT, &ct->status))
- nf_conntrack_event(IPCT_DESTROY, ct);
- set_bit(IPS_DYING_BIT, &ct->status);
-
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to nf_conntrack_lock!!! -HW */
@@ -219,27 +216,70 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_conntrack_free(ct);
}
-static void death_by_timeout(unsigned long ul_conntrack)
+void nf_ct_delete_from_lists(struct nf_conn *ct)
{
- struct nf_conn *ct = (void *)ul_conntrack;
struct net *net = nf_ct_net(ct);
- struct nf_conn_help *help = nfct_help(ct);
- struct nf_conntrack_helper *helper;
-
- if (help) {
- rcu_read_lock();
- helper = rcu_dereference(help->helper);
- if (helper && helper->destroy)
- helper->destroy(ct);
- rcu_read_unlock();
- }
+ nf_ct_helper_destroy(ct);
spin_lock_bh(&nf_conntrack_lock);
/* Inside lock so preempt is disabled on module removal path.
* Otherwise we can get spurious warnings. */
NF_CT_STAT_INC(net, delete_list);
clean_from_lists(ct);
spin_unlock_bh(&nf_conntrack_lock);
+}
+EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
+
+static void death_by_event(unsigned long ul_conntrack)
+{
+ struct nf_conn *ct = (void *)ul_conntrack;
+ struct net *net = nf_ct_net(ct);
+
+ if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
+ /* bad luck, let's retry again */
+ ct->timeout.expires = jiffies +
+ (random32() % net->ct.sysctl_events_retry_timeout);
+ add_timer(&ct->timeout);
+ return;
+ }
+ /* we've got the event delivered, now it's dying */
+ set_bit(IPS_DYING_BIT, &ct->status);
+ spin_lock(&nf_conntrack_lock);
+ hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
+ spin_unlock(&nf_conntrack_lock);
+ nf_ct_put(ct);
+}
+
+void nf_ct_insert_dying_list(struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+
+ /* add this conntrack to the dying list */
+ spin_lock_bh(&nf_conntrack_lock);
+ hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &net->ct.dying);
+ spin_unlock_bh(&nf_conntrack_lock);
+ /* set a new timer to retry event delivery */
+ setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
+ ct->timeout.expires = jiffies +
+ (random32() % net->ct.sysctl_events_retry_timeout);
+ add_timer(&ct->timeout);
+}
+EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
+
+static void death_by_timeout(unsigned long ul_conntrack)
+{
+ struct nf_conn *ct = (void *)ul_conntrack;
+
+ if (!test_bit(IPS_DYING_BIT, &ct->status) &&
+ unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
+ /* destroy event was not delivered */
+ nf_ct_delete_from_lists(ct);
+ nf_ct_insert_dying_list(ct);
+ return;
+ }
+ set_bit(IPS_DYING_BIT, &ct->status);
+ nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
}
@@ -398,11 +438,7 @@ __nf_conntrack_confirm(struct sk_buff *skb)
help = nfct_help(ct);
if (help && help->helper)
nf_conntrack_event_cache(IPCT_HELPER, ct);
-#ifdef CONFIG_NF_NAT_NEEDED
- if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
- test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- nf_conntrack_event_cache(IPCT_NATINFO, ct);
-#endif
+
nf_conntrack_event_cache(master_ct(ct) ?
IPCT_RELATED : IPCT_NEW, ct);
return NF_ACCEPT;
@@ -523,6 +559,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
return ERR_PTR(-ENOMEM);
}
+ spin_lock_init(&ct->lock);
atomic_set(&ct->ct_general.use, 1);
ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
@@ -580,6 +617,7 @@ init_conntrack(struct net *net,
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+ nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
spin_lock_bh(&nf_conntrack_lock);
exp = nf_ct_find_expectation(net, tuple);
@@ -807,13 +845,9 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
unsigned long extra_jiffies,
int do_acct)
{
- int event = 0;
-
NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
NF_CT_ASSERT(skb);
- spin_lock_bh(&nf_conntrack_lock);
-
/* Only update if this is not a fixed timeout */
if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
goto acct;
@@ -821,19 +855,14 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
/* If not in hash table, timer will not be active yet */
if (!nf_ct_is_confirmed(ct)) {
ct->timeout.expires = extra_jiffies;
- event = IPCT_REFRESH;
} else {
unsigned long newtime = jiffies + extra_jiffies;
/* Only update the timeout if the new timeout is at least
HZ jiffies from the old timeout. Need del_timer for race
avoidance (may already be dying). */
- if (newtime - ct->timeout.expires >= HZ
- && del_timer(&ct->timeout)) {
- ct->timeout.expires = newtime;
- add_timer(&ct->timeout);
- event = IPCT_REFRESH;
- }
+ if (newtime - ct->timeout.expires >= HZ)
+ mod_timer_pending(&ct->timeout, newtime);
}
acct:
@@ -842,17 +871,13 @@ acct:
acct = nf_conn_acct_find(ct);
if (acct) {
+ spin_lock_bh(&ct->lock);
acct[CTINFO2DIR(ctinfo)].packets++;
acct[CTINFO2DIR(ctinfo)].bytes +=
skb->len - skb_network_offset(skb);
+ spin_unlock_bh(&ct->lock);
}
}
-
- spin_unlock_bh(&nf_conntrack_lock);
-
- /* must be unlocked when calling event cache */
- if (event)
- nf_conntrack_event_cache(event, ct);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
@@ -864,14 +889,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
if (do_acct) {
struct nf_conn_counter *acct;
- spin_lock_bh(&nf_conntrack_lock);
acct = nf_conn_acct_find(ct);
if (acct) {
+ spin_lock_bh(&ct->lock);
acct[CTINFO2DIR(ctinfo)].packets++;
acct[CTINFO2DIR(ctinfo)].bytes +=
skb->len - skb_network_offset(skb);
+ spin_unlock_bh(&ct->lock);
}
- spin_unlock_bh(&nf_conntrack_lock);
}
if (del_timer(&ct->timeout)) {
@@ -1001,15 +1026,22 @@ struct __nf_ct_flush_report {
int report;
};
-static int kill_all(struct nf_conn *i, void *data)
+static int kill_report(struct nf_conn *i, void *data)
{
struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
- /* get_next_corpse sets the dying bit for us */
- nf_conntrack_event_report(IPCT_DESTROY,
- i,
- fr->pid,
- fr->report);
+ /* If we fail to deliver the event, death_by_timeout() will retry */
+ if (nf_conntrack_event_report(IPCT_DESTROY, i,
+ fr->pid, fr->report) < 0)
+ return 1;
+
+ /* Avoid the delivery of the destroy event in death_by_timeout(). */
+ set_bit(IPS_DYING_BIT, &i->status);
+ return 1;
+}
+
+static int kill_all(struct nf_conn *i, void *data)
+{
return 1;
}
@@ -1023,15 +1055,30 @@ void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
}
EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
-void nf_conntrack_flush(struct net *net, u32 pid, int report)
+void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
{
struct __nf_ct_flush_report fr = {
.pid = pid,
.report = report,
};
- nf_ct_iterate_cleanup(net, kill_all, &fr);
+ nf_ct_iterate_cleanup(net, kill_report, &fr);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
+
+static void nf_ct_release_dying_list(void)
+{
+ struct nf_conntrack_tuple_hash *h;
+ struct nf_conn *ct;
+ struct hlist_nulls_node *n;
+
+ spin_lock_bh(&nf_conntrack_lock);
+ hlist_nulls_for_each_entry(h, n, &init_net.ct.dying, hnnode) {
+ ct = nf_ct_tuplehash_to_ctrack(h);
+ /* never fails to remove them, no listeners at this point */
+ nf_ct_kill(ct);
+ }
+ spin_unlock_bh(&nf_conntrack_lock);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_flush);
static void nf_conntrack_cleanup_init_net(void)
{
@@ -1042,10 +1089,9 @@ static void nf_conntrack_cleanup_init_net(void)
static void nf_conntrack_cleanup_net(struct net *net)
{
- nf_ct_event_cache_flush(net);
- nf_conntrack_ecache_fini(net);
i_see_dead_people:
- nf_conntrack_flush(net, 0, 0);
+ nf_ct_iterate_cleanup(net, kill_all, NULL);
+ nf_ct_release_dying_list();
if (atomic_read(&net->ct.count) != 0) {
schedule();
goto i_see_dead_people;
@@ -1056,6 +1102,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
+ nf_conntrack_ecache_fini(net);
nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
free_percpu(net->ct.stat);
@@ -1226,14 +1273,12 @@ static int nf_conntrack_init_net(struct net *net)
atomic_set(&net->ct.count, 0);
INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
+ INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0);
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
if (!net->ct.stat) {
ret = -ENOMEM;
goto err_stat;
}
- ret = nf_conntrack_ecache_init(net);
- if (ret < 0)
- goto err_ecache;
net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
&net->ct.hash_vmalloc, 1);
if (!net->ct.hash) {
@@ -1247,6 +1292,9 @@ static int nf_conntrack_init_net(struct net *net)
ret = nf_conntrack_acct_init(net);
if (ret < 0)
goto err_acct;
+ ret = nf_conntrack_ecache_init(net);
+ if (ret < 0)
+ goto err_ecache;
/* Set up fake conntrack:
- to never be deleted, not in any hashes */
@@ -1259,14 +1307,14 @@ static int nf_conntrack_init_net(struct net *net)
return 0;
+err_ecache:
+ nf_conntrack_acct_fini(net);
err_acct:
nf_conntrack_expect_fini(net);
err_expect:
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
err_hash:
- nf_conntrack_ecache_fini(net);
-err_ecache:
free_percpu(net->ct.stat);
err_stat:
return ret;
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index dee4190209cc..aee560b4768d 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -16,121 +16,245 @@
#include <linux/stddef.h>
#include <linux/err.h>
#include <linux/percpu.h>
-#include <linux/notifier.h>
#include <linux/kernel.h>
#include <linux/netdevice.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_extend.h>
-ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain);
-EXPORT_SYMBOL_GPL(nf_conntrack_chain);
+static DEFINE_MUTEX(nf_ct_ecache_mutex);
-ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain);
-EXPORT_SYMBOL_GPL(nf_ct_expect_chain);
+struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_conntrack_event_cb);
+
+struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly;
+EXPORT_SYMBOL_GPL(nf_expect_event_cb);
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
-static inline void
-__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
+void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
- if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
- && ecache->events) {
+ unsigned long events;
+ struct nf_ct_event_notifier *notify;
+ struct nf_conntrack_ecache *e;
+
+ rcu_read_lock();
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify == NULL)
+ goto out_unlock;
+
+ e = nf_ct_ecache_find(ct);
+ if (e == NULL)
+ goto out_unlock;
+
+ events = xchg(&e->cache, 0);
+
+ if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct) && events) {
struct nf_ct_event item = {
- .ct = ecache->ct,
+ .ct = ct,
.pid = 0,
.report = 0
};
+ int ret;
+ /* We make a copy of the missed event cache without taking
+ * the lock, thus we may send missed events twice. However,
+ * this does not harm and it happens very rarely. */
+ unsigned long missed = e->missed;
- atomic_notifier_call_chain(&nf_conntrack_chain,
- ecache->events,
- &item);
+ ret = notify->fcn(events | missed, &item);
+ if (unlikely(ret < 0 || missed)) {
+ spin_lock_bh(&ct->lock);
+ if (ret < 0)
+ e->missed |= events;
+ else
+ e->missed &= ~missed;
+ spin_unlock_bh(&ct->lock);
+ }
}
- ecache->events = 0;
- nf_ct_put(ecache->ct);
- ecache->ct = NULL;
+out_unlock:
+ rcu_read_unlock();
}
+EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
-/* Deliver all cached events for a particular conntrack. This is called
- * by code prior to async packet handling for freeing the skb */
-void nf_ct_deliver_cached_events(const struct nf_conn *ct)
+int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
{
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_ecache *ecache;
-
- local_bh_disable();
- ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
- if (ecache->ct == ct)
- __nf_ct_deliver_cached_events(ecache);
- local_bh_enable();
+ int ret = 0;
+ struct nf_ct_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ if (notify != NULL) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ rcu_assign_pointer(nf_conntrack_event_cb, new);
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
-EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
+EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
-/* Deliver cached events for old pending events, if current conntrack != old */
-void __nf_ct_event_cache_init(struct nf_conn *ct)
+void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
{
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_ecache *ecache;
-
- /* take care of delivering potentially old events */
- ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
- BUG_ON(ecache->ct == ct);
- if (ecache->ct)
- __nf_ct_deliver_cached_events(ecache);
- /* initialize for this conntrack/packet */
- ecache->ct = ct;
- nf_conntrack_get(&ct->ct_general);
+ struct nf_ct_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_conntrack_event_cb);
+ BUG_ON(notify != new);
+ rcu_assign_pointer(nf_conntrack_event_cb, NULL);
+ mutex_unlock(&nf_ct_ecache_mutex);
}
-EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
+EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
-/* flush the event cache - touches other CPU's data and must not be called
- * while packets are still passing through the code */
-void nf_ct_event_cache_flush(struct net *net)
+int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
{
- struct nf_conntrack_ecache *ecache;
- int cpu;
+ int ret = 0;
+ struct nf_exp_event_notifier *notify;
- for_each_possible_cpu(cpu) {
- ecache = per_cpu_ptr(net->ct.ecache, cpu);
- if (ecache->ct)
- nf_ct_put(ecache->ct);
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_expect_event_cb);
+ if (notify != NULL) {
+ ret = -EBUSY;
+ goto out_unlock;
}
+ rcu_assign_pointer(nf_expect_event_cb, new);
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
+
+out_unlock:
+ mutex_unlock(&nf_ct_ecache_mutex);
+ return ret;
}
+EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
-int nf_conntrack_ecache_init(struct net *net)
+void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
{
- net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
- if (!net->ct.ecache)
- return -ENOMEM;
- return 0;
+ struct nf_exp_event_notifier *notify;
+
+ mutex_lock(&nf_ct_ecache_mutex);
+ notify = rcu_dereference(nf_expect_event_cb);
+ BUG_ON(notify != new);
+ rcu_assign_pointer(nf_expect_event_cb, NULL);
+ mutex_unlock(&nf_ct_ecache_mutex);
}
+EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
-void nf_conntrack_ecache_fini(struct net *net)
+#define NF_CT_EVENTS_DEFAULT 1
+static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
+static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table event_sysctl_table[] = {
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_events",
+ .data = &init_net.ct.sysctl_events,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .ctl_name = CTL_UNNUMBERED,
+ .procname = "nf_conntrack_events_retry_timeout",
+ .data = &init_net.ct.sysctl_events_retry_timeout,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {}
+};
+#endif /* CONFIG_SYSCTL */
+
+static struct nf_ct_ext_type event_extend __read_mostly = {
+ .len = sizeof(struct nf_conntrack_ecache),
+ .align = __alignof__(struct nf_conntrack_ecache),
+ .id = NF_CT_EXT_ECACHE,
+};
+
+#ifdef CONFIG_SYSCTL
+static int nf_conntrack_event_init_sysctl(struct net *net)
{
- free_percpu(net->ct.ecache);
+ struct ctl_table *table;
+
+ table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
+ GFP_KERNEL);
+ if (!table)
+ goto out;
+
+ table[0].data = &net->ct.sysctl_events;
+ table[1].data = &net->ct.sysctl_events_retry_timeout;
+
+ net->ct.event_sysctl_header =
+ register_net_sysctl_table(net,
+ nf_net_netfilter_sysctl_path, table);
+ if (!net->ct.event_sysctl_header) {
+ printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n");
+ goto out_register;
+ }
+ return 0;
+
+out_register:
+ kfree(table);
+out:
+ return -ENOMEM;
}
-int nf_conntrack_register_notifier(struct notifier_block *nb)
+static void nf_conntrack_event_fini_sysctl(struct net *net)
{
- return atomic_notifier_chain_register(&nf_conntrack_chain, nb);
+ struct ctl_table *table;
+
+ table = net->ct.event_sysctl_header->ctl_table_arg;
+ unregister_net_sysctl_table(net->ct.event_sysctl_header);
+ kfree(table);
+}
+#else
+static int nf_conntrack_event_init_sysctl(struct net *net)
+{
+ return 0;
}
-EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
-int nf_conntrack_unregister_notifier(struct notifier_block *nb)
+static void nf_conntrack_event_fini_sysctl(struct net *net)
{
- return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb);
}
-EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
+#endif /* CONFIG_SYSCTL */
-int nf_ct_expect_register_notifier(struct notifier_block *nb)
+int nf_conntrack_ecache_init(struct net *net)
{
- return atomic_notifier_chain_register(&nf_ct_expect_chain, nb);
+ int ret;
+
+ net->ct.sysctl_events = nf_ct_events;
+ net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
+
+ if (net_eq(net, &init_net)) {
+ ret = nf_ct_extend_register(&event_extend);
+ if (ret < 0) {
+ printk(KERN_ERR "nf_ct_event: Unable to register "
+ "event extension.\n");
+ goto out_extend_register;
+ }
+ }
+
+ ret = nf_conntrack_event_init_sysctl(net);
+ if (ret < 0)
+ goto out_sysctl;
+
+ return 0;
+
+out_sysctl:
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&event_extend);
+out_extend_register:
+ return ret;
}
-EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
-int nf_ct_expect_unregister_notifier(struct notifier_block *nb)
+void nf_conntrack_ecache_fini(struct net *net)
{
- return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb);
+ nf_conntrack_event_fini_sysctl(net);
+ if (net_eq(net, &init_net))
+ nf_ct_extend_unregister(&event_extend);
}
-EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 00fecc385f9b..5509dd1f14cf 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -338,11 +338,9 @@ static void update_nl_seq(struct nf_conn *ct, u32 nl_seq,
if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
- nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
} else if (oldest != NUM_SEQ_TO_REMEMBER &&
after(nl_seq, info->seq_aft_nl[dir][oldest])) {
info->seq_aft_nl[dir][oldest] = nl_seq;
- nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct);
}
}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 0fa5a422959f..65c2a7bc3afc 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -136,6 +136,20 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
return 0;
}
+void nf_ct_helper_destroy(struct nf_conn *ct)
+{
+ struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conntrack_helper *helper;
+
+ if (help) {
+ rcu_read_lock();
+ helper = rcu_dereference(help->helper);
+ if (helper && helper->destroy)
+ helper->destroy(ct);
+ rcu_read_unlock();
+ }
+}
+
int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
{
unsigned int h = helper_hash(&me->tuple);
diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c
index 8a3875e36ec2..497b2224536f 100644
--- a/net/netfilter/nf_conntrack_netbios_ns.c
+++ b/net/netfilter/nf_conntrack_netbios_ns.c
@@ -48,7 +48,7 @@ static int help(struct sk_buff *skb, unsigned int protoff,
{
struct nf_conntrack_expect *exp;
struct iphdr *iph = ip_hdr(skb);
- struct rtable *rt = skb->rtable;
+ struct rtable *rt = skb_rtable(skb);
struct in_device *in_dev;
__be32 mask = 0;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c523f0b8cee5..49479d194570 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -27,7 +27,6 @@
#include <linux/netlink.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
-#include <linux/notifier.h>
#include <linux/netfilter.h>
#include <net/netlink.h>
@@ -144,7 +143,7 @@ nla_put_failure:
}
static inline int
-ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
+ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
{
struct nf_conntrack_l4proto *l4proto;
struct nlattr *nest_proto;
@@ -346,23 +345,21 @@ nla_put_failure:
return -1;
}
-#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
-
static int
ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event, int nowait,
- const struct nf_conn *ct)
+ int event, struct nf_conn *ct)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- unsigned char *b = skb_tail_pointer(skb);
+ unsigned int flags = pid ? NLM_F_MULTI : 0;
event |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
- nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
+ nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = nf_ct_l3num(ct);
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
@@ -370,14 +367,14 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+ if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+ if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
@@ -395,132 +392,109 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
goto nla_put_failure;
- nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ nlmsg_end(skb, nlh);
return skb->len;
nlmsg_failure:
nla_put_failure:
- nlmsg_trim(skb, b);
+ nlmsg_cancel(skb, nlh);
return -1;
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-/*
- * The general structure of a ctnetlink event is
- *
- * CTA_TUPLE_ORIG
- * <l3/l4-proto-attributes>
- * CTA_TUPLE_REPLY
- * <l3/l4-proto-attributes>
- * CTA_ID
- * ...
- * CTA_PROTOINFO
- * <l4-proto-attributes>
- * CTA_TUPLE_MASTER
- * <l3/l4-proto-attributes>
- *
- * Therefore the formular is
- *
- * size = sizeof(headers) + sizeof(generic_nlas) + 3 * sizeof(tuple_nlas)
- * + sizeof(protoinfo_nlas)
- */
-static struct sk_buff *
-ctnetlink_alloc_skb(const struct nf_conntrack_tuple *tuple, gfp_t gfp)
+static inline size_t
+ctnetlink_proto_size(const struct nf_conn *ct)
{
struct nf_conntrack_l3proto *l3proto;
struct nf_conntrack_l4proto *l4proto;
- int len;
-
-#define NLA_TYPE_SIZE(type) nla_total_size(sizeof(type))
-
- /* proto independant part */
- len = NLMSG_SPACE(sizeof(struct nfgenmsg))
- + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
- + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
- + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
- + 3 * NLA_TYPE_SIZE(u_int8_t) /* CTA_PROTO_NUM */
- + NLA_TYPE_SIZE(u_int32_t) /* CTA_ID */
- + NLA_TYPE_SIZE(u_int32_t) /* CTA_STATUS */
+ size_t len = 0;
+
+ rcu_read_lock();
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+ len += l3proto->nla_size;
+
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ len += l4proto->nla_size;
+ rcu_read_unlock();
+
+ return len;
+}
+
+static inline size_t
+ctnetlink_nlmsg_size(const struct nf_conn *ct)
+{
+ return NLMSG_ALIGN(sizeof(struct nfgenmsg))
+ + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */
+ + 3 * nla_total_size(0) /* CTA_TUPLE_IP */
+ + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */
+ + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
+ + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
+ + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
#ifdef CONFIG_NF_CT_ACCT
- + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
- + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_PACKETS */
- + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_BYTES */
+ + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
+ + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
+ + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
#endif
- + NLA_TYPE_SIZE(u_int32_t) /* CTA_TIMEOUT */
- + nla_total_size(0) /* CTA_PROTOINFO */
- + nla_total_size(0) /* CTA_HELP */
- + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
+ + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
+ + nla_total_size(0) /* CTA_PROTOINFO */
+ + nla_total_size(0) /* CTA_HELP */
+ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */
#ifdef CONFIG_NF_CONNTRACK_SECMARK
- + NLA_TYPE_SIZE(u_int32_t) /* CTA_SECMARK */
+ + nla_total_size(sizeof(u_int32_t)) /* CTA_SECMARK */
#endif
#ifdef CONFIG_NF_NAT_NEEDED
- + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
- + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_POS */
- + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_BEFORE */
- + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_AFTER */
+ + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */
+ + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */
#endif
#ifdef CONFIG_NF_CONNTRACK_MARK
- + NLA_TYPE_SIZE(u_int32_t) /* CTA_MARK */
+ + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */
#endif
- ;
-
-#undef NLA_TYPE_SIZE
-
- rcu_read_lock();
- l3proto = __nf_ct_l3proto_find(tuple->src.l3num);
- len += l3proto->nla_size;
-
- l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum);
- len += l4proto->nla_size;
- rcu_read_unlock();
-
- return alloc_skb(len, gfp);
+ + ctnetlink_proto_size(ct)
+ ;
}
-static int ctnetlink_conntrack_event(struct notifier_block *this,
- unsigned long events, void *ptr)
+static int
+ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
struct nlattr *nest_parms;
- struct nf_ct_event *item = (struct nf_ct_event *)ptr;
struct nf_conn *ct = item->ct;
struct sk_buff *skb;
unsigned int type;
- sk_buff_data_t b;
unsigned int flags = 0, group;
+ int err;
/* ignore our fake conntrack entry */
if (ct == &nf_conntrack_untracked)
- return NOTIFY_DONE;
+ return 0;
- if (events & IPCT_DESTROY) {
+ if (events & (1 << IPCT_DESTROY)) {
type = IPCTNL_MSG_CT_DELETE;
group = NFNLGRP_CONNTRACK_DESTROY;
- } else if (events & (IPCT_NEW | IPCT_RELATED)) {
+ } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
type = IPCTNL_MSG_CT_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
group = NFNLGRP_CONNTRACK_NEW;
- } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) {
+ } else if (events) {
type = IPCTNL_MSG_CT_NEW;
group = NFNLGRP_CONNTRACK_UPDATE;
} else
- return NOTIFY_DONE;
+ return 0;
if (!item->report && !nfnetlink_has_listeners(group))
- return NOTIFY_DONE;
+ return 0;
- skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC);
- if (!skb)
+ skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC);
+ if (skb == NULL)
goto errout;
- b = skb->tail;
-
type |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
- nlh->nlmsg_flags = flags;
+ nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = nf_ct_l3num(ct);
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
@@ -529,14 +503,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+ if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+ if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest_parms);
@@ -546,7 +520,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
if (ctnetlink_dump_status(skb, ct) < 0)
goto nla_put_failure;
- if (events & IPCT_DESTROY) {
+ if (events & (1 << IPCT_DESTROY)) {
if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
goto nla_put_failure;
@@ -554,47 +528,51 @@ static int ctnetlink_conntrack_event(struct notifier_block *this,
if (ctnetlink_dump_timeout(skb, ct) < 0)
goto nla_put_failure;
- if (events & IPCT_PROTOINFO
+ if (events & (1 << IPCT_PROTOINFO)
&& ctnetlink_dump_protoinfo(skb, ct) < 0)
goto nla_put_failure;
- if ((events & IPCT_HELPER || nfct_help(ct))
+ if ((events & (1 << IPCT_HELPER) || nfct_help(ct))
&& ctnetlink_dump_helpinfo(skb, ct) < 0)
goto nla_put_failure;
#ifdef CONFIG_NF_CONNTRACK_SECMARK
- if ((events & IPCT_SECMARK || ct->secmark)
+ if ((events & (1 << IPCT_SECMARK) || ct->secmark)
&& ctnetlink_dump_secmark(skb, ct) < 0)
goto nla_put_failure;
#endif
- if (events & IPCT_RELATED &&
+ if (events & (1 << IPCT_RELATED) &&
ctnetlink_dump_master(skb, ct) < 0)
goto nla_put_failure;
- if (events & IPCT_NATSEQADJ &&
+ if (events & (1 << IPCT_NATSEQADJ) &&
ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
goto nla_put_failure;
}
#ifdef CONFIG_NF_CONNTRACK_MARK
- if ((events & IPCT_MARK || ct->mark)
+ if ((events & (1 << IPCT_MARK) || ct->mark)
&& ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
rcu_read_unlock();
- nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, item->pid, group, item->report);
- return NOTIFY_DONE;
+ nlmsg_end(skb, nlh);
+ err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
+ if (err == -ENOBUFS || err == -EAGAIN)
+ return -ENOBUFS;
+
+ return 0;
nla_put_failure:
rcu_read_unlock();
+ nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
errout:
nfnetlink_set_err(0, group, -ENOBUFS);
- return NOTIFY_DONE;
+ return 0;
}
#endif /* CONFIG_NF_CONNTRACK_EVENTS */
@@ -611,7 +589,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
struct nf_conn *ct, *last;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
- struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
u_int8_t l3proto = nfmsg->nfgen_family;
rcu_read_lock();
@@ -637,8 +615,7 @@ restart:
}
if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW,
- 1, ct) < 0) {
+ IPCTNL_MSG_CT_NEW, ct) < 0) {
cb->args[1] = (unsigned long)ct;
goto out;
}
@@ -792,7 +769,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
int err = 0;
@@ -802,9 +779,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
else {
/* Flush the whole table */
- nf_conntrack_flush(&init_net,
- NETLINK_CB(skb).pid,
- nlmsg_report(nlh));
+ nf_conntrack_flush_report(&init_net,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh));
return 0;
}
@@ -825,10 +802,15 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
}
}
- nf_conntrack_event_report(IPCT_DESTROY,
- ct,
- NETLINK_CB(skb).pid,
- nlmsg_report(nlh));
+ if (nf_conntrack_event_report(IPCT_DESTROY, ct,
+ NETLINK_CB(skb).pid,
+ nlmsg_report(nlh)) < 0) {
+ nf_ct_delete_from_lists(ct);
+ /* we failed to report the event, try later */
+ nf_ct_insert_dying_list(ct);
+ nf_ct_put(ct);
+ return 0;
+ }
/* death_by_timeout would report the event again */
set_bit(IPS_DYING_BIT, &ct->status);
@@ -847,7 +829,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
struct nf_conn *ct;
struct sk_buff *skb2 = NULL;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
int err = 0;
@@ -872,15 +854,15 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
ct = nf_ct_tuplehash_to_ctrack(h);
err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb2) {
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL) {
nf_ct_put(ct);
return -ENOMEM;
}
rcu_read_lock();
err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW, 1, ct);
+ IPCTNL_MSG_CT_NEW, ct);
rcu_read_unlock();
nf_ct_put(ct);
if (err <= 0)
@@ -1280,6 +1262,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
+ nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
@@ -1325,7 +1308,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
{
struct nf_conntrack_tuple otuple, rtuple;
struct nf_conntrack_tuple_hash *h = NULL;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
int err = 0;
@@ -1367,13 +1350,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
else
events = IPCT_NEW;
- nf_conntrack_event_report(IPCT_STATUS |
- IPCT_HELPER |
- IPCT_PROTOINFO |
- IPCT_NATSEQADJ |
- IPCT_MARK | events,
- ct, NETLINK_CB(skb).pid,
- nlmsg_report(nlh));
+ nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
+ (1 << IPCT_HELPER) |
+ (1 << IPCT_PROTOINFO) |
+ (1 << IPCT_NATSEQADJ) |
+ (1 << IPCT_MARK) | events,
+ ct, NETLINK_CB(skb).pid,
+ nlmsg_report(nlh));
nf_ct_put(ct);
} else
spin_unlock_bh(&nf_conntrack_lock);
@@ -1392,13 +1375,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (err == 0) {
nf_conntrack_get(&ct->ct_general);
spin_unlock_bh(&nf_conntrack_lock);
- nf_conntrack_event_report(IPCT_STATUS |
- IPCT_HELPER |
- IPCT_PROTOINFO |
- IPCT_NATSEQADJ |
- IPCT_MARK,
- ct, NETLINK_CB(skb).pid,
- nlmsg_report(nlh));
+ nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
+ (1 << IPCT_HELPER) |
+ (1 << IPCT_PROTOINFO) |
+ (1 << IPCT_NATSEQADJ) |
+ (1 << IPCT_MARK),
+ ct, NETLINK_CB(skb).pid,
+ nlmsg_report(nlh));
nf_ct_put(ct);
} else
spin_unlock_bh(&nf_conntrack_lock);
@@ -1503,19 +1486,18 @@ nla_put_failure:
static int
ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event,
- int nowait,
- const struct nf_conntrack_expect *exp)
+ int event, const struct nf_conntrack_expect *exp)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- unsigned char *b = skb_tail_pointer(skb);
+ unsigned int flags = pid ? NLM_F_MULTI : 0;
event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
- nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
+ nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = exp->tuple.src.l3num;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
@@ -1523,49 +1505,46 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
if (ctnetlink_exp_dump_expect(skb, exp) < 0)
goto nla_put_failure;
- nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+ nlmsg_end(skb, nlh);
return skb->len;
nlmsg_failure:
nla_put_failure:
- nlmsg_trim(skb, b);
+ nlmsg_cancel(skb, nlh);
return -1;
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
- unsigned long events, void *ptr)
+static int
+ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
{
struct nlmsghdr *nlh;
struct nfgenmsg *nfmsg;
- struct nf_exp_event *item = (struct nf_exp_event *)ptr;
struct nf_conntrack_expect *exp = item->exp;
struct sk_buff *skb;
unsigned int type;
- sk_buff_data_t b;
int flags = 0;
- if (events & IPEXP_NEW) {
+ if (events & (1 << IPEXP_NEW)) {
type = IPCTNL_MSG_EXP_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
} else
- return NOTIFY_DONE;
+ return 0;
if (!item->report &&
!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW))
- return NOTIFY_DONE;
+ return 0;
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
- if (!skb)
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+ if (skb == NULL)
goto errout;
- b = skb->tail;
-
type |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
+ nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags);
+ if (nlh == NULL)
+ goto nlmsg_failure;
- nlh->nlmsg_flags = flags;
+ nfmsg = nlmsg_data(nlh);
nfmsg->nfgen_family = exp->tuple.src.l3num;
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = 0;
@@ -1575,17 +1554,19 @@ static int ctnetlink_expect_event(struct notifier_block *this,
goto nla_put_failure;
rcu_read_unlock();
- nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report);
- return NOTIFY_DONE;
+ nlmsg_end(skb, nlh);
+ nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW,
+ item->report, GFP_ATOMIC);
+ return 0;
nla_put_failure:
rcu_read_unlock();
+ nlmsg_cancel(skb, nlh);
nlmsg_failure:
kfree_skb(skb);
errout:
nfnetlink_set_err(0, 0, -ENOBUFS);
- return NOTIFY_DONE;
+ return 0;
}
#endif
static int ctnetlink_exp_done(struct netlink_callback *cb)
@@ -1600,7 +1581,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = &init_net;
struct nf_conntrack_expect *exp, *last;
- struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
struct hlist_node *n;
u_int8_t l3proto = nfmsg->nfgen_family;
@@ -1617,10 +1598,11 @@ restart:
continue;
cb->args[1] = 0;
}
- if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
+ if (ctnetlink_exp_fill_info(skb,
+ NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
IPCTNL_MSG_EXP_NEW,
- 1, exp) < 0) {
+ exp) < 0) {
if (!atomic_inc_not_zero(&exp->use))
continue;
cb->args[1] = (unsigned long)exp;
@@ -1652,7 +1634,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
struct sk_buff *skb2;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
int err = 0;
@@ -1683,14 +1665,13 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
}
err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb2)
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (skb2 == NULL)
goto out;
rcu_read_lock();
err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
- nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
- 1, exp);
+ nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);
rcu_read_unlock();
if (err <= 0)
goto free;
@@ -1713,7 +1694,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
struct nf_conntrack_expect *exp;
struct nf_conntrack_tuple tuple;
struct nf_conntrack_helper *h;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
struct hlist_node *n, *next;
u_int8_t u3 = nfmsg->nfgen_family;
unsigned int i;
@@ -1854,7 +1835,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
{
struct nf_conntrack_tuple tuple;
struct nf_conntrack_expect *exp;
- struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+ struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u_int8_t u3 = nfmsg->nfgen_family;
int err = 0;
@@ -1891,12 +1872,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
}
#ifdef CONFIG_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
- .notifier_call = ctnetlink_conntrack_event,
+static struct nf_ct_event_notifier ctnl_notifier = {
+ .fcn = ctnetlink_conntrack_event,
};
-static struct notifier_block ctnl_notifier_exp = {
- .notifier_call = ctnetlink_expect_event,
+static struct nf_exp_event_notifier ctnl_notifier_exp = {
+ .fcn = ctnetlink_expect_event,
};
#endif
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index aee0d6bea309..1b816a2ea813 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -25,8 +25,6 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_log.h>
-static DEFINE_RWLOCK(dccp_lock);
-
/* Timeouts are based on values from RFC4340:
*
* - REQUEST:
@@ -492,7 +490,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
return NF_ACCEPT;
}
- write_lock_bh(&dccp_lock);
+ spin_lock_bh(&ct->lock);
role = ct->proto.dccp.role[dir];
old_state = ct->proto.dccp.state;
@@ -536,13 +534,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_dir = dir;
ct->proto.dccp.last_pkt = type;
- write_unlock_bh(&dccp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid packet ignored ");
return NF_ACCEPT;
case CT_DCCP_INVALID:
- write_unlock_bh(&dccp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_DCCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_dccp: invalid state transition ");
@@ -552,7 +550,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.dccp.last_dir = dir;
ct->proto.dccp.last_pkt = type;
ct->proto.dccp.state = new_state;
- write_unlock_bh(&dccp_lock);
+ spin_unlock_bh(&ct->lock);
if (new_state != old_state)
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
@@ -621,36 +619,39 @@ static int dccp_print_tuple(struct seq_file *s,
ntohs(tuple->dst.u.dccp.port));
}
-static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);
}
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
- const struct nf_conn *ct)
+ struct nf_conn *ct)
{
struct nlattr *nest_parms;
- read_lock_bh(&dccp_lock);
+ spin_lock_bh(&ct->lock);
nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state);
NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE,
ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]);
+ NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ,
+ cpu_to_be64(ct->proto.dccp.handshake_seq));
nla_nest_end(skb, nest_parms);
- read_unlock_bh(&dccp_lock);
+ spin_unlock_bh(&ct->lock);
return 0;
nla_put_failure:
- read_unlock_bh(&dccp_lock);
+ spin_unlock_bh(&ct->lock);
return -1;
}
static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = {
[CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 },
[CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 },
+ [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 },
};
static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
@@ -674,7 +675,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
return -EINVAL;
}
- write_lock_bh(&dccp_lock);
+ spin_lock_bh(&ct->lock);
ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]);
if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) {
ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT;
@@ -683,7 +684,11 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct)
ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER;
ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT;
}
- write_unlock_bh(&dccp_lock);
+ if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) {
+ ct->proto.dccp.handshake_seq =
+ be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]));
+ }
+ spin_unlock_bh(&ct->lock);
return 0;
}
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 117b80112fcb..a54a0af0edba 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -176,7 +176,7 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple,
static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
- struct net *net = dev_net(skb->dev ? skb->dev : skb->dst->dev);
+ struct net *net = dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev);
const struct gre_hdr_pptp *pgrehdr;
struct gre_hdr_pptp _pgrehdr;
__be16 srckey;
@@ -219,8 +219,7 @@ static int gre_print_tuple(struct seq_file *s,
}
/* print private data for conntrack */
-static int gre_print_conntrack(struct seq_file *s,
- const struct nf_conn *ct)
+static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
return seq_printf(s, "timeout=%u, stream_timeout=%u ",
(ct->proto.gre.timeout / HZ),
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index 101b4ad9e817..c10e6f36e31e 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -25,9 +25,6 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_ecache.h>
-/* Protects ct->proto.sctp */
-static DEFINE_RWLOCK(sctp_lock);
-
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR
@@ -164,13 +161,13 @@ static int sctp_print_tuple(struct seq_file *s,
}
/* Print out the private part of the conntrack. */
-static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
enum sctp_conntrack state;
- read_lock_bh(&sctp_lock);
+ spin_lock_bh(&ct->lock);
state = ct->proto.sctp.state;
- read_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
return seq_printf(s, "%s ", sctp_conntrack_names[state]);
}
@@ -318,7 +315,7 @@ static int sctp_packet(struct nf_conn *ct,
}
old_state = new_state = SCTP_CONNTRACK_NONE;
- write_lock_bh(&sctp_lock);
+ spin_lock_bh(&ct->lock);
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch->type == SCTP_CID_INIT) {
@@ -371,7 +368,7 @@ static int sctp_packet(struct nf_conn *ct,
if (old_state != new_state)
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
}
- write_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]);
@@ -386,7 +383,7 @@ static int sctp_packet(struct nf_conn *ct,
return NF_ACCEPT;
out_unlock:
- write_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
out:
return -NF_ACCEPT;
}
@@ -469,11 +466,11 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
#include <linux/netfilter/nfnetlink_conntrack.h>
static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
- const struct nf_conn *ct)
+ struct nf_conn *ct)
{
struct nlattr *nest_parms;
- read_lock_bh(&sctp_lock);
+ spin_lock_bh(&ct->lock);
nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
@@ -488,14 +485,14 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
CTA_PROTOINFO_SCTP_VTAG_REPLY,
ct->proto.sctp.vtag[IP_CT_DIR_REPLY]);
- read_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
nla_nest_end(skb, nest_parms);
return 0;
nla_put_failure:
- read_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
return -1;
}
@@ -527,13 +524,13 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct)
!tb[CTA_PROTOINFO_SCTP_VTAG_REPLY])
return -EINVAL;
- write_lock_bh(&sctp_lock);
+ spin_lock_bh(&ct->lock);
ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] =
nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] =
nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]);
- write_unlock_bh(&sctp_lock);
+ spin_unlock_bh(&ct->lock);
return 0;
}
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 97a6e93d742e..33fc0a443f3d 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -29,9 +29,6 @@
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
-/* Protects ct->proto.tcp */
-static DEFINE_RWLOCK(tcp_lock);
-
/* "Be conservative in what you do,
be liberal in what you accept from others."
If it's non-zero, we mark only out of window RST segments as INVALID. */
@@ -59,7 +56,7 @@ static const char *const tcp_conntrack_names[] = {
"LAST_ACK",
"TIME_WAIT",
"CLOSE",
- "LISTEN"
+ "SYN_SENT2",
};
#define SECS * HZ
@@ -82,6 +79,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
[TCP_CONNTRACK_LAST_ACK] = 30 SECS,
[TCP_CONNTRACK_TIME_WAIT] = 2 MINS,
[TCP_CONNTRACK_CLOSE] = 10 SECS,
+ [TCP_CONNTRACK_SYN_SENT2] = 2 MINS,
};
#define sNO TCP_CONNTRACK_NONE
@@ -93,7 +91,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = {
#define sLA TCP_CONNTRACK_LAST_ACK
#define sTW TCP_CONNTRACK_TIME_WAIT
#define sCL TCP_CONNTRACK_CLOSE
-#define sLI TCP_CONNTRACK_LISTEN
+#define sS2 TCP_CONNTRACK_SYN_SENT2
#define sIV TCP_CONNTRACK_MAX
#define sIG TCP_CONNTRACK_IGNORE
@@ -123,6 +121,7 @@ enum tcp_bit_set {
*
* NONE: initial state
* SYN_SENT: SYN-only packet seen
+ * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open
* SYN_RECV: SYN-ACK packet seen
* ESTABLISHED: ACK packet seen
* FIN_WAIT: FIN packet seen
@@ -131,26 +130,24 @@ enum tcp_bit_set {
* TIME_WAIT: last ACK seen
* CLOSE: closed connection (RST)
*
- * LISTEN state is not used.
- *
* Packets marked as IGNORED (sIG):
* if they may be either invalid or valid
* and the receiver may send back a connection
* closing RST or a SYN/ACK.
*
* Packets marked as INVALID (sIV):
- * if they are invalid
- * or we do not support the request (simultaneous open)
+ * if we regard them as truly invalid packets
*/
static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
/*
* sNO -> sSS Initialize a new connection
* sSS -> sSS Retransmitted SYN
- * sSR -> sIG Late retransmitted SYN?
+ * sS2 -> sS2 Late retransmitted SYN
+ * sSR -> sIG
* sES -> sIG Error: SYNs in window outside the SYN_SENT state
* are errors. Receiver will reply with RST
* and close the connection.
@@ -161,22 +158,30 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sSS Reopened connection (RFC 1122).
* sCL -> sSS
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
/*
- * A SYN/ACK from the client is always invalid:
- * - either it tries to set up a simultaneous open, which is
- * not supported;
- * - or the firewall has just been inserted between the two hosts
- * during the session set-up. The SYN will be retransmitted
- * by the true client (or it'll time out).
+ * sNO -> sIV Too late and no reason to do anything
+ * sSS -> sIV Client can't send SYN and then SYN/ACK
+ * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open
+ * sSR -> sIG
+ * sES -> sIG Error: SYNs in window outside the SYN_SENT state
+ * are errors. Receiver will reply with RST
+ * and close the connection.
+ * Or we are not in sync and hold a dead connection.
+ * sFW -> sIG
+ * sCW -> sIG
+ * sLA -> sIG
+ * sTW -> sIG
+ * sCL -> sIG
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
* sNO -> sIV Too late and no reason to do anything...
* sSS -> sIV Client migth not send FIN in this state:
* we enforce waiting for a SYN/ACK reply first.
+ * sS2 -> sIV
* sSR -> sFW Close started.
* sES -> sFW
* sFW -> sLA FIN seen in both directions, waiting for
@@ -187,11 +192,12 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sTW
* sCL -> sCL
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
/*
* sNO -> sES Assumed.
* sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
+ * sS2 -> sIV
* sSR -> sES Established state is reached.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
@@ -200,29 +206,31 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sTW Retransmitted last ACK. Remain in the same state.
* sCL -> sCL
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
},
{
/* REPLY */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 },
/*
* sNO -> sIV Never reached.
- * sSS -> sIV Simultaneous open, not supported
- * sSR -> sIV Simultaneous open, not supported.
- * sES -> sIV Server may not initiate a connection.
+ * sSS -> sS2 Simultaneous open
+ * sS2 -> sS2 Retransmitted simultaneous SYN
+ * sSR -> sIV Invalid SYN packets sent by the server
+ * sES -> sIV
* sFW -> sIV
* sCW -> sIV
* sLA -> sIV
* sTW -> sIV Reopened connection, but server may not do it.
* sCL -> sIV
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
/*
* sSS -> sSR Standard open.
+ * sS2 -> sSR Simultaneous open
* sSR -> sSR Retransmitted SYN/ACK.
* sES -> sIG Late retransmitted SYN/ACK?
* sFW -> sIG Might be SYN/ACK answering ignored SYN
@@ -231,10 +239,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sIG
* sCL -> sIG
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
/*
* sSS -> sIV Server might not send FIN in this state.
+ * sS2 -> sIV
* sSR -> sFW Close started.
* sES -> sFW
* sFW -> sLA FIN seen in both directions.
@@ -243,10 +252,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sTW
* sCL -> sCL
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
/*
* sSS -> sIG Might be a half-open connection.
+ * sS2 -> sIG
* sSR -> sSR Might answer late resent SYN.
* sES -> sES :-)
* sFW -> sCW Normal close request answered by ACK.
@@ -255,8 +265,8 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
* sTW -> sTW Retransmitted last ACK.
* sCL -> sCL
*/
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
+/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */
+/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
}
};
@@ -296,13 +306,13 @@ static int tcp_print_tuple(struct seq_file *s,
}
/* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct)
+static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
{
enum tcp_conntrack state;
- read_lock_bh(&tcp_lock);
+ spin_lock_bh(&ct->lock);
state = ct->proto.tcp.state;
- read_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
return seq_printf(s, "%s ", tcp_conntrack_names[state]);
}
@@ -521,13 +531,14 @@ static bool tcp_in_window(const struct nf_conn *ct,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
- if (sender->td_end == 0) {
+ if (sender->td_maxwin == 0) {
/*
* Initialize sender data.
*/
- if (tcph->syn && tcph->ack) {
+ if (tcph->syn) {
/*
- * Outgoing SYN-ACK in reply to a SYN.
+ * SYN-ACK in reply to a SYN
+ * or SYN from reply direction in simultaneous open.
*/
sender->td_end =
sender->td_maxend = end;
@@ -543,6 +554,9 @@ static bool tcp_in_window(const struct nf_conn *ct,
&& receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
sender->td_scale =
receiver->td_scale = 0;
+ if (!tcph->ack)
+ /* Simultaneous open */
+ return true;
} else {
/*
* We are in the middle of a connection,
@@ -716,14 +730,14 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb,
end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph);
- write_lock_bh(&tcp_lock);
+ spin_lock_bh(&ct->lock);
/*
* We have to worry for the ack in the reply packet only...
*/
if (after(end, ct->proto.tcp.seen[dir].td_end))
ct->proto.tcp.seen[dir].td_end = end;
ct->proto.tcp.last_end = end;
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
@@ -832,7 +846,7 @@ static int tcp_packet(struct nf_conn *ct,
th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
BUG_ON(th == NULL);
- write_lock_bh(&tcp_lock);
+ spin_lock_bh(&ct->lock);
old_state = ct->proto.tcp.state;
dir = CTINFO2DIR(ctinfo);
index = get_conntrack_index(th);
@@ -862,7 +876,7 @@ static int tcp_packet(struct nf_conn *ct,
&& ct->proto.tcp.last_index == TCP_RST_SET)) {
/* Attempt to reopen a closed/aborted connection.
* Delete this connection and look up again. */
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
/* Only repeat if we can actually remove the timer.
* Destruction may already be in progress in process
@@ -898,7 +912,7 @@ static int tcp_packet(struct nf_conn *ct,
* that the client cannot but retransmit its SYN and
* thus initiate a clean new session.
*/
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: killing out of sync session ");
@@ -911,7 +925,7 @@ static int tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_end =
segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid packet ignored ");
@@ -920,7 +934,7 @@ static int tcp_packet(struct nf_conn *ct,
/* Invalid packet */
pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th), old_state);
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid state ");
@@ -930,7 +944,7 @@ static int tcp_packet(struct nf_conn *ct,
&& (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
&& before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
/* Invalid RST */
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
if (LOG_INVALID(net, IPPROTO_TCP))
nf_log_packet(pf, 0, skb, NULL, NULL, NULL,
"nf_ct_tcp: invalid RST ");
@@ -961,7 +975,7 @@ static int tcp_packet(struct nf_conn *ct,
if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
skb, dataoff, th, pf)) {
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
return -NF_ACCEPT;
}
in_window:
@@ -990,9 +1004,8 @@ static int tcp_packet(struct nf_conn *ct,
timeout = nf_ct_tcp_timeout_unacknowledged;
else
timeout = tcp_timeouts[new_state];
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
if (new_state != old_state)
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
@@ -1086,7 +1099,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
ct->proto.tcp.seen[1].td_end = 0;
ct->proto.tcp.seen[1].td_maxend = 0;
- ct->proto.tcp.seen[1].td_maxwin = 1;
+ ct->proto.tcp.seen[1].td_maxwin = 0;
ct->proto.tcp.seen[1].td_scale = 0;
/* tcp_packet will set them */
@@ -1108,12 +1121,12 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
#include <linux/netfilter/nfnetlink_conntrack.h>
static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
- const struct nf_conn *ct)
+ struct nf_conn *ct)
{
struct nlattr *nest_parms;
struct nf_ct_tcp_flags tmp = {};
- read_lock_bh(&tcp_lock);
+ spin_lock_bh(&ct->lock);
nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
if (!nest_parms)
goto nla_put_failure;
@@ -1133,14 +1146,14 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
tmp.flags = ct->proto.tcp.seen[1].flags;
NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
sizeof(struct nf_ct_tcp_flags), &tmp);
- read_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
nla_nest_end(skb, nest_parms);
return 0;
nla_put_failure:
- read_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
return -1;
}
@@ -1171,7 +1184,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
return -EINVAL;
- write_lock_bh(&tcp_lock);
+ spin_lock_bh(&ct->lock);
if (tb[CTA_PROTOINFO_TCP_STATE])
ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
@@ -1198,7 +1211,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
ct->proto.tcp.seen[1].td_scale =
nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
}
- write_unlock_bh(&tcp_lock);
+ spin_unlock_bh(&ct->lock);
return 0;
}
@@ -1328,6 +1341,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = {
.proc_handler = proc_dointvec_jiffies,
},
{
+ .procname = "ip_conntrack_tcp_timeout_syn_sent2",
+ .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2],
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
.procname = "ip_conntrack_tcp_timeout_syn_recv",
.data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV],
.maxlen = sizeof(unsigned int),
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index beb37311e1a5..2fefe147750a 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -248,14 +248,14 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
rcu_assign_pointer(nf_loggers[tindex], logger);
mutex_unlock(&nf_log_mutex);
} else {
- rcu_read_lock();
- logger = rcu_dereference(nf_loggers[tindex]);
+ mutex_lock(&nf_log_mutex);
+ logger = nf_loggers[tindex];
if (!logger)
table->data = "NONE";
else
table->data = logger->name;
r = proc_dostring(table, write, filp, buffer, lenp, ppos);
- rcu_read_unlock();
+ mutex_unlock(&nf_log_mutex);
}
return r;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4f2310c93e01..3a6fd77f7761 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -204,10 +204,10 @@ int nf_queue(struct sk_buff *skb,
queuenum);
switch (pf) {
- case AF_INET:
+ case NFPROTO_IPV4:
skb->protocol = htons(ETH_P_IP);
break;
- case AF_INET6:
+ case NFPROTO_IPV6:
skb->protocol = htons(ETH_P_IPV6);
break;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index b8ab37ad7ed5..92761a988375 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group)
}
EXPORT_SYMBOL_GPL(nfnetlink_has_listeners);
-int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo)
+int nfnetlink_send(struct sk_buff *skb, u32 pid,
+ unsigned group, int echo, gfp_t flags)
{
- return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any());
+ return nlmsg_notify(nfnl, skb, pid, group, echo, flags);
}
EXPORT_SYMBOL_GPL(nfnetlink_send);
@@ -136,7 +137,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
return -EPERM;
/* All the messages must at least contain nfgenmsg */
- if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg)))
+ if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg)))
return 0;
type = nlh->nlmsg_type;
@@ -160,19 +161,14 @@ replay:
{
int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg));
u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
- u_int16_t attr_count = ss->cb[cb_id].attr_count;
- struct nlattr *cda[attr_count+1];
-
- if (likely(nlh->nlmsg_len >= min_len)) {
- struct nlattr *attr = (void *)nlh + NLMSG_ALIGN(min_len);
- int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
-
- err = nla_parse(cda, attr_count, attr, attrlen,
- ss->cb[cb_id].policy);
- if (err < 0)
- return err;
- } else
- return -EINVAL;
+ struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
+ struct nlattr *attr = (void *)nlh + min_len;
+ int attrlen = nlh->nlmsg_len - min_len;
+
+ err = nla_parse(cda, ss->cb[cb_id].attr_count,
+ attr, attrlen, ss->cb[cb_id].policy);
+ if (err < 0)
+ return err;
err = nc->call(nfnl, skb, nlh, cda);
if (err == -EAGAIN)
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8c860112ce05..71daa0934b6c 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1,6 +1,6 @@
/*
* This is a module which is used for queueing packets and communicating with
- * userspace via nfetlink.
+ * userspace via nfnetlink.
*
* (C) 2005 by Harald Welte <laforge@netfilter.org>
* (C) 2007 by Patrick McHardy <kaber@trash.net>
@@ -932,6 +932,8 @@ static void __exit nfnetlink_queue_fini(void)
#endif
nfnetlink_subsys_unregister(&nfqnl_subsys);
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
+
+ rcu_barrier(); /* Wait for completion of call_rcu()'s */
}
MODULE_DESCRIPTION("netfilter packet queue handler");
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 150e5cf62f85..025d1a0af78b 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -329,6 +329,32 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
}
EXPORT_SYMBOL_GPL(xt_find_revision);
+static char *textify_hooks(char *buf, size_t size, unsigned int mask)
+{
+ static const char *const names[] = {
+ "PREROUTING", "INPUT", "FORWARD",
+ "OUTPUT", "POSTROUTING", "BROUTING",
+ };
+ unsigned int i;
+ char *p = buf;
+ bool np = false;
+ int res;
+
+ *p = '\0';
+ for (i = 0; i < ARRAY_SIZE(names); ++i) {
+ if (!(mask & (1 << i)))
+ continue;
+ res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]);
+ if (res > 0) {
+ size -= res;
+ p += res;
+ }
+ np = true;
+ }
+
+ return buf;
+}
+
int xt_check_match(struct xt_mtchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
@@ -338,26 +364,30 @@ int xt_check_match(struct xt_mtchk_param *par,
* ebt_among is exempt from centralized matchsize checking
* because it uses a dynamic-size data set.
*/
- printk("%s_tables: %s match: invalid size %Zu != %u\n",
+ pr_err("%s_tables: %s match: invalid size %Zu != %u\n",
xt_prefix[par->family], par->match->name,
XT_ALIGN(par->match->matchsize), size);
return -EINVAL;
}
if (par->match->table != NULL &&
strcmp(par->match->table, par->table) != 0) {
- printk("%s_tables: %s match: only valid in %s table, not %s\n",
+ pr_err("%s_tables: %s match: only valid in %s table, not %s\n",
xt_prefix[par->family], par->match->name,
par->match->table, par->table);
return -EINVAL;
}
if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
- printk("%s_tables: %s match: bad hook_mask %#x/%#x\n",
+ char used[64], allow[64];
+
+ pr_err("%s_tables: %s match: used from hooks %s, but only "
+ "valid from %s\n",
xt_prefix[par->family], par->match->name,
- par->hook_mask, par->match->hooks);
+ textify_hooks(used, sizeof(used), par->hook_mask),
+ textify_hooks(allow, sizeof(allow), par->match->hooks));
return -EINVAL;
}
if (par->match->proto && (par->match->proto != proto || inv_proto)) {
- printk("%s_tables: %s match: only valid for protocol %u\n",
+ pr_err("%s_tables: %s match: only valid for protocol %u\n",
xt_prefix[par->family], par->match->name,
par->match->proto);
return -EINVAL;
@@ -484,26 +514,30 @@ int xt_check_target(struct xt_tgchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
if (XT_ALIGN(par->target->targetsize) != size) {
- printk("%s_tables: %s target: invalid size %Zu != %u\n",
+ pr_err("%s_tables: %s target: invalid size %Zu != %u\n",
xt_prefix[par->family], par->target->name,
XT_ALIGN(par->target->targetsize), size);
return -EINVAL;
}
if (par->target->table != NULL &&
strcmp(par->target->table, par->table) != 0) {
- printk("%s_tables: %s target: only valid in %s table, not %s\n",
+ pr_err("%s_tables: %s target: only valid in %s table, not %s\n",
xt_prefix[par->family], par->target->name,
par->target->table, par->table);
return -EINVAL;
}
if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
- printk("%s_tables: %s target: bad hook_mask %#x/%#x\n",
+ char used[64], allow[64];
+
+ pr_err("%s_tables: %s target: used from hooks %s, but only "
+ "usable from %s\n",
xt_prefix[par->family], par->target->name,
- par->hook_mask, par->target->hooks);
+ textify_hooks(used, sizeof(used), par->hook_mask),
+ textify_hooks(allow, sizeof(allow), par->target->hooks));
return -EINVAL;
}
if (par->target->proto && (par->target->proto != proto || inv_proto)) {
- printk("%s_tables: %s target: only valid for protocol %u\n",
+ pr_err("%s_tables: %s target: only valid for protocol %u\n",
xt_prefix[par->family], par->target->name,
par->target->proto);
return -EINVAL;
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index f9977b3311f7..498b45101df7 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -11,6 +11,10 @@
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/jhash.h>
+
#include <linux/netfilter.h>
#include <linux/netfilter_arp.h>
#include <linux/netfilter/x_tables.h>
@@ -23,6 +27,8 @@ MODULE_ALIAS("ipt_NFQUEUE");
MODULE_ALIAS("ip6t_NFQUEUE");
MODULE_ALIAS("arpt_NFQUEUE");
+static u32 jhash_initval __read_mostly;
+
static unsigned int
nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
{
@@ -31,32 +37,105 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
return NF_QUEUE_NR(tinfo->queuenum);
}
+static u32 hash_v4(const struct sk_buff *skb)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ u32 ipaddr;
+
+ /* packets in either direction go into same queue */
+ ipaddr = iph->saddr ^ iph->daddr;
+
+ return jhash_2words(ipaddr, iph->protocol, jhash_initval);
+}
+
+static unsigned int
+nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_NFQ_info_v1 *info = par->targinfo;
+ u32 queue = info->queuenum;
+
+ if (info->queues_total > 1)
+ queue = hash_v4(skb) % info->queues_total + queue;
+ return NF_QUEUE_NR(queue);
+}
+
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+static u32 hash_v6(const struct sk_buff *skb)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ u32 addr[4];
+
+ addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0];
+ addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1];
+ addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2];
+ addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3];
+
+ return jhash2(addr, ARRAY_SIZE(addr), jhash_initval);
+}
+
+static unsigned int
+nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par)
+{
+ const struct xt_NFQ_info_v1 *info = par->targinfo;
+ u32 queue = info->queuenum;
+
+ if (info->queues_total > 1)
+ queue = hash_v6(skb) % info->queues_total + queue;
+ return NF_QUEUE_NR(queue);
+}
+#endif
+
+static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+{
+ const struct xt_NFQ_info_v1 *info = par->targinfo;
+ u32 maxid;
+
+ if (info->queues_total == 0) {
+ pr_err("NFQUEUE: number of total queues is 0\n");
+ return false;
+ }
+ maxid = info->queues_total - 1 + info->queuenum;
+ if (maxid > 0xffff) {
+ pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
+ info->queues_total, maxid);
+ return false;
+ }
+ return true;
+}
+
static struct xt_target nfqueue_tg_reg[] __read_mostly = {
{
.name = "NFQUEUE",
- .family = NFPROTO_IPV4,
+ .family = NFPROTO_UNSPEC,
.target = nfqueue_tg,
.targetsize = sizeof(struct xt_NFQ_info),
.me = THIS_MODULE,
},
{
.name = "NFQUEUE",
- .family = NFPROTO_IPV6,
- .target = nfqueue_tg,
- .targetsize = sizeof(struct xt_NFQ_info),
+ .revision = 1,
+ .family = NFPROTO_IPV4,
+ .checkentry = nfqueue_tg_v1_check,
+ .target = nfqueue_tg4_v1,
+ .targetsize = sizeof(struct xt_NFQ_info_v1),
.me = THIS_MODULE,
},
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
{
.name = "NFQUEUE",
- .family = NFPROTO_ARP,
- .target = nfqueue_tg,
- .targetsize = sizeof(struct xt_NFQ_info),
+ .revision = 1,
+ .family = NFPROTO_IPV6,
+ .checkentry = nfqueue_tg_v1_check,
+ .target = nfqueue_tg6_v1,
+ .targetsize = sizeof(struct xt_NFQ_info_v1),
.me = THIS_MODULE,
},
+#endif
};
static int __init nfqueue_tg_init(void)
{
+ get_random_bytes(&jhash_initval, sizeof(jhash_initval));
return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg));
}
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 4f3b1f808795..eda64c1cb1e5 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -73,11 +73,11 @@ tcpmss_mangle_packet(struct sk_buff *skb,
}
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
- if (dst_mtu(skb->dst) <= minlen) {
+ if (dst_mtu(skb_dst(skb)) <= minlen) {
if (net_ratelimit())
printk(KERN_ERR "xt_TCPMSS: "
"unknown or invalid path-MTU (%u)\n",
- dst_mtu(skb->dst));
+ dst_mtu(skb_dst(skb)));
return -1;
}
if (in_mtu <= minlen) {
@@ -86,7 +86,7 @@ tcpmss_mangle_packet(struct sk_buff *skb,
"invalid path-MTU (%u)\n", in_mtu);
return -1;
}
- newmss = min(dst_mtu(skb->dst), in_mtu) - minlen;
+ newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
} else
newmss = info->mss;
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
new file mode 100644
index 000000000000..863e40977a4d
--- /dev/null
+++ b/net/netfilter/xt_osf.c
@@ -0,0 +1,428 @@
+/*
+ * Copyright (c) 2003+ Evgeniy Polyakov <zbr@ioremap.net>
+ *
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+#include <linux/if.h>
+#include <linux/inetdevice.h>
+#include <linux/ip.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/skbuff.h>
+#include <linux/slab.h>
+#include <linux/tcp.h>
+
+#include <net/ip.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_log.h>
+#include <linux/netfilter/xt_osf.h>
+
+struct xt_osf_finger {
+ struct rcu_head rcu_head;
+ struct list_head finger_entry;
+ struct xt_osf_user_finger finger;
+};
+
+enum osf_fmatch_states {
+ /* Packet does not match the fingerprint */
+ FMATCH_WRONG = 0,
+ /* Packet matches the fingerprint */
+ FMATCH_OK,
+ /* Options do not match the fingerprint, but header does */
+ FMATCH_OPT_WRONG,
+};
+
+/*
+ * Indexed by dont-fragment bit.
+ * It is the only constant value in the fingerprint.
+ */
+static struct list_head xt_osf_fingers[2];
+
+static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = {
+ [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) },
+};
+
+static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head)
+{
+ struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head);
+
+ kfree(f);
+}
+
+static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
+{
+ struct xt_osf_user_finger *f;
+ struct xt_osf_finger *kf = NULL, *sf;
+ int err = 0;
+
+ if (!osf_attrs[OSF_ATTR_FINGER])
+ return -EINVAL;
+
+ if (!(nlh->nlmsg_flags & NLM_F_CREATE))
+ return -EINVAL;
+
+ f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+ kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL);
+ if (!kf)
+ return -ENOMEM;
+
+ memcpy(&kf->finger, f, sizeof(struct xt_osf_user_finger));
+
+ list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
+ if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
+ continue;
+
+ kfree(kf);
+ kf = NULL;
+
+ if (nlh->nlmsg_flags & NLM_F_EXCL)
+ err = -EEXIST;
+ break;
+ }
+
+ /*
+ * We are protected by nfnl mutex.
+ */
+ if (kf)
+ list_add_tail_rcu(&kf->finger_entry, &xt_osf_fingers[!!f->df]);
+
+ return err;
+}
+
+static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb,
+ struct nlmsghdr *nlh, struct nlattr *osf_attrs[])
+{
+ struct xt_osf_user_finger *f;
+ struct xt_osf_finger *sf;
+ int err = ENOENT;
+
+ if (!osf_attrs[OSF_ATTR_FINGER])
+ return -EINVAL;
+
+ f = nla_data(osf_attrs[OSF_ATTR_FINGER]);
+
+ list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) {
+ if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger)))
+ continue;
+
+ /*
+ * We are protected by nfnl mutex.
+ */
+ list_del_rcu(&sf->finger_entry);
+ call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu);
+
+ err = 0;
+ break;
+ }
+
+ return err;
+}
+
+static const struct nfnl_callback xt_osf_nfnetlink_callbacks[OSF_MSG_MAX] = {
+ [OSF_MSG_ADD] = {
+ .call = xt_osf_add_callback,
+ .attr_count = OSF_ATTR_MAX,
+ .policy = xt_osf_policy,
+ },
+ [OSF_MSG_REMOVE] = {
+ .call = xt_osf_remove_callback,
+ .attr_count = OSF_ATTR_MAX,
+ .policy = xt_osf_policy,
+ },
+};
+
+static const struct nfnetlink_subsystem xt_osf_nfnetlink = {
+ .name = "osf",
+ .subsys_id = NFNL_SUBSYS_OSF,
+ .cb_count = OSF_MSG_MAX,
+ .cb = xt_osf_nfnetlink_callbacks,
+};
+
+static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info,
+ unsigned char f_ttl)
+{
+ const struct iphdr *ip = ip_hdr(skb);
+
+ if (info->flags & XT_OSF_TTL) {
+ if (info->ttl == XT_OSF_TTL_TRUE)
+ return ip->ttl == f_ttl;
+ if (info->ttl == XT_OSF_TTL_NOCHECK)
+ return 1;
+ else if (ip->ttl <= f_ttl)
+ return 1;
+ else {
+ struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+ int ret = 0;
+
+ for_ifa(in_dev) {
+ if (inet_ifa_match(ip->saddr, ifa)) {
+ ret = (ip->ttl == f_ttl);
+ break;
+ }
+ }
+ endfor_ifa(in_dev);
+
+ return ret;
+ }
+ }
+
+ return ip->ttl == f_ttl;
+}
+
+static bool xt_osf_match_packet(const struct sk_buff *skb,
+ const struct xt_match_param *p)
+{
+ const struct xt_osf_info *info = p->matchinfo;
+ const struct iphdr *ip = ip_hdr(skb);
+ const struct tcphdr *tcp;
+ struct tcphdr _tcph;
+ int fmatch = FMATCH_WRONG, fcount = 0;
+ unsigned int optsize = 0, check_WSS = 0;
+ u16 window, totlen, mss = 0;
+ bool df;
+ const unsigned char *optp = NULL, *_optp = NULL;
+ unsigned char opts[MAX_IPOPTLEN];
+ const struct xt_osf_finger *kf;
+ const struct xt_osf_user_finger *f;
+
+ if (!info)
+ return false;
+
+ tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph);
+ if (!tcp)
+ return false;
+
+ if (!tcp->syn)
+ return false;
+
+ totlen = ntohs(ip->tot_len);
+ df = ntohs(ip->frag_off) & IP_DF;
+ window = ntohs(tcp->window);
+
+ if (tcp->doff * 4 > sizeof(struct tcphdr)) {
+ optsize = tcp->doff * 4 - sizeof(struct tcphdr);
+
+ _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) +
+ sizeof(struct tcphdr), optsize, opts);
+ }
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) {
+ f = &kf->finger;
+
+ if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre))
+ continue;
+
+ optp = _optp;
+ fmatch = FMATCH_WRONG;
+
+ if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) {
+ int foptsize, optnum;
+
+ /*
+ * Should not happen if userspace parser was written correctly.
+ */
+ if (f->wss.wc >= OSF_WSS_MAX)
+ continue;
+
+ /* Check options */
+
+ foptsize = 0;
+ for (optnum = 0; optnum < f->opt_num; ++optnum)
+ foptsize += f->opt[optnum].length;
+
+ if (foptsize > MAX_IPOPTLEN ||
+ optsize > MAX_IPOPTLEN ||
+ optsize != foptsize)
+ continue;
+
+ check_WSS = f->wss.wc;
+
+ for (optnum = 0; optnum < f->opt_num; ++optnum) {
+ if (f->opt[optnum].kind == (*optp)) {
+ __u32 len = f->opt[optnum].length;
+ const __u8 *optend = optp + len;
+ int loop_cont = 0;
+
+ fmatch = FMATCH_OK;
+
+ switch (*optp) {
+ case OSFOPT_MSS:
+ mss = optp[3];
+ mss <<= 8;
+ mss |= optp[2];
+
+ mss = ntohs(mss);
+ break;
+ case OSFOPT_TS:
+ loop_cont = 1;
+ break;
+ }
+
+ optp = optend;
+ } else
+ fmatch = FMATCH_OPT_WRONG;
+
+ if (fmatch != FMATCH_OK)
+ break;
+ }
+
+ if (fmatch != FMATCH_OPT_WRONG) {
+ fmatch = FMATCH_WRONG;
+
+ switch (check_WSS) {
+ case OSF_WSS_PLAIN:
+ if (f->wss.val == 0 || window == f->wss.val)
+ fmatch = FMATCH_OK;
+ break;
+ case OSF_WSS_MSS:
+ /*
+ * Some smart modems decrease mangle MSS to
+ * SMART_MSS_2, so we check standard, decreased
+ * and the one provided in the fingerprint MSS
+ * values.
+ */
+#define SMART_MSS_1 1460
+#define SMART_MSS_2 1448
+ if (window == f->wss.val * mss ||
+ window == f->wss.val * SMART_MSS_1 ||
+ window == f->wss.val * SMART_MSS_2)
+ fmatch = FMATCH_OK;
+ break;
+ case OSF_WSS_MTU:
+ if (window == f->wss.val * (mss + 40) ||
+ window == f->wss.val * (SMART_MSS_1 + 40) ||
+ window == f->wss.val * (SMART_MSS_2 + 40))
+ fmatch = FMATCH_OK;
+ break;
+ case OSF_WSS_MODULO:
+ if ((window % f->wss.val) == 0)
+ fmatch = FMATCH_OK;
+ break;
+ }
+ }
+
+ if (fmatch != FMATCH_OK)
+ continue;
+
+ fcount++;
+
+ if (info->flags & XT_OSF_LOG)
+ nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL,
+ "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n",
+ f->genre, f->version, f->subtype,
+ &ip->saddr, ntohs(tcp->source),
+ &ip->daddr, ntohs(tcp->dest),
+ f->ttl - ip->ttl);
+
+ if ((info->flags & XT_OSF_LOG) &&
+ info->loglevel == XT_OSF_LOGLEVEL_FIRST)
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ if (!fcount && (info->flags & XT_OSF_LOG))
+ nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL,
+ "Remote OS is not known: %pi4:%u -> %pi4:%u\n",
+ &ip->saddr, ntohs(tcp->source),
+ &ip->daddr, ntohs(tcp->dest));
+
+ if (fcount)
+ fmatch = FMATCH_OK;
+
+ return fmatch == FMATCH_OK;
+}
+
+static struct xt_match xt_osf_match = {
+ .name = "osf",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .proto = IPPROTO_TCP,
+ .hooks = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_FORWARD),
+ .match = xt_osf_match_packet,
+ .matchsize = sizeof(struct xt_osf_info),
+ .me = THIS_MODULE,
+};
+
+static int __init xt_osf_init(void)
+{
+ int err = -EINVAL;
+ int i;
+
+ for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i)
+ INIT_LIST_HEAD(&xt_osf_fingers[i]);
+
+ err = nfnetlink_subsys_register(&xt_osf_nfnetlink);
+ if (err < 0) {
+ printk(KERN_ERR "Failed (%d) to register OSF nsfnetlink helper.\n", err);
+ goto err_out_exit;
+ }
+
+ err = xt_register_match(&xt_osf_match);
+ if (err) {
+ printk(KERN_ERR "Failed (%d) to register OS fingerprint "
+ "matching module.\n", err);
+ goto err_out_remove;
+ }
+
+ return 0;
+
+err_out_remove:
+ nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
+err_out_exit:
+ return err;
+}
+
+static void __exit xt_osf_fini(void)
+{
+ struct xt_osf_finger *f;
+ int i;
+
+ nfnetlink_subsys_unregister(&xt_osf_nfnetlink);
+ xt_unregister_match(&xt_osf_match);
+
+ rcu_read_lock();
+ for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i) {
+
+ list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) {
+ list_del_rcu(&f->finger_entry);
+ call_rcu(&f->rcu_head, xt_osf_finger_free_rcu);
+ }
+ }
+ rcu_read_unlock();
+
+ rcu_barrier();
+}
+
+module_init(xt_osf_init);
+module_exit(xt_osf_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
+MODULE_DESCRIPTION("Passive OS fingerprint matching.");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 328bd20ddd25..4cbfebda8fa1 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -86,7 +86,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
unsigned short family)
{
const struct xt_policy_elem *e;
- const struct dst_entry *dst = skb->dst;
+ const struct dst_entry *dst = skb_dst(skb);
int strict = info->flags & XT_POLICY_MATCH_STRICT;
int i, pos;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 67419287bc7e..484d1689bfde 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -25,7 +25,7 @@ static bool
realm_mt(const struct sk_buff *skb, const struct xt_match_param *par)
{
const struct xt_realm_info *info = par->matchinfo;
- const struct dst_entry *dst = skb->dst;
+ const struct dst_entry *dst = skb_dst(skb);
return (info->id == (dst->tclassid & info->mask)) ^ info->invert;
}
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 1acc089be7e9..ebf00ad5b194 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -22,6 +22,8 @@
#include <net/netfilter/nf_tproxy_core.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <linux/netfilter/xt_socket.h>
+
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#define XT_SOCKET_HAVE_CONNTRACK 1
#include <net/netfilter/nf_conntrack.h>
@@ -86,7 +88,8 @@ extract_icmp_fields(const struct sk_buff *skb,
static bool
-socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
+ const struct xt_socket_mtinfo1 *info)
{
const struct iphdr *iph = ip_hdr(skb);
struct udphdr _hdr, *hp = NULL;
@@ -141,10 +144,24 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol,
saddr, daddr, sport, dport, par->in, false);
if (sk != NULL) {
- bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0);
+ bool wildcard;
+ bool transparent = true;
+
+ /* Ignore sockets listening on INADDR_ANY */
+ wildcard = (sk->sk_state != TCP_TIME_WAIT &&
+ inet_sk(sk)->rcv_saddr == 0);
+
+ /* Ignore non-transparent sockets,
+ if XT_SOCKET_TRANSPARENT is used */
+ if (info && info->flags & XT_SOCKET_TRANSPARENT)
+ transparent = ((sk->sk_state != TCP_TIME_WAIT &&
+ inet_sk(sk)->transparent) ||
+ (sk->sk_state == TCP_TIME_WAIT &&
+ inet_twsk(sk)->tw_transparent));
nf_tproxy_put_sock(sk);
- if (wildcard)
+
+ if (wildcard || !transparent)
sk = NULL;
}
@@ -157,23 +174,47 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par)
return (sk != NULL);
}
-static struct xt_match socket_mt_reg __read_mostly = {
- .name = "socket",
- .family = AF_INET,
- .match = socket_mt,
- .hooks = 1 << NF_INET_PRE_ROUTING,
- .me = THIS_MODULE,
+static bool
+socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ return socket_match(skb, par, NULL);
+}
+
+static bool
+socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+{
+ return socket_match(skb, par, par->matchinfo);
+}
+
+static struct xt_match socket_mt_reg[] __read_mostly = {
+ {
+ .name = "socket",
+ .revision = 0,
+ .family = NFPROTO_IPV4,
+ .match = socket_mt_v0,
+ .hooks = 1 << NF_INET_PRE_ROUTING,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "socket",
+ .revision = 1,
+ .family = NFPROTO_IPV4,
+ .match = socket_mt_v1,
+ .matchsize = sizeof(struct xt_socket_mtinfo1),
+ .hooks = 1 << NF_INET_PRE_ROUTING,
+ .me = THIS_MODULE,
+ },
};
static int __init socket_mt_init(void)
{
nf_defrag_ipv4_enable();
- return xt_register_match(&socket_mt_reg);
+ return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
}
static void __exit socket_mt_exit(void)
{
- xt_unregister_match(&socket_mt_reg);
+ xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg));
}
module_init(socket_mt_init);