diff options
Diffstat (limited to 'net/netfilter')
25 files changed, 1256 insertions, 490 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c26a20c58dde..634d14affc8d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -917,6 +917,19 @@ config NETFILTER_XT_MATCH_U32 Details and examples are in the kernel module source. +config NETFILTER_XT_MATCH_OSF + tristate '"osf" Passive OS fingerprint match' + depends on NETFILTER_ADVANCED && NETFILTER_NETLINK + help + This option selects the Passive OS Fingerprinting match module + that allows to passively match the remote operating system by + analyzing incoming TCP SYN packets. + + Rules and loading software can be downloaded from + http://www.ioremap.net/projects/osf + + To compile it as a module, choose M here. If unsure, say N. + endif # NETFILTER_XTABLES endmenu diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6282060fbda9..49f62ee4e9ff 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -77,6 +77,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o +obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o obj-$(CONFIG_NETFILTER_XT_MATCH_PKTTYPE) += xt_pkttype.o diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index e01061f49cdc..7c1333c67ff3 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -3345,22 +3345,8 @@ static struct genl_ops ip_vs_genl_ops[] __read_mostly = { static int __init ip_vs_genl_register(void) { - int ret, i; - - ret = genl_register_family(&ip_vs_genl_family); - if (ret) - return ret; - - for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) { - ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]); - if (ret) - goto err_out; - } - return 0; - -err_out: - genl_unregister_family(&ip_vs_genl_family); - return ret; + return genl_register_family_with_ops(&ip_vs_genl_family, + ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops)); } static void ip_vs_genl_unregister(void) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 425ab144f15d..5874657af7f2 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -260,8 +260,8 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(ip_hdr(skb)); /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -324,8 +324,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -388,8 +388,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -465,8 +465,8 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) @@ -553,8 +553,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n"); goto tx_error; } - if (skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); df |= (old_iph->frag_off & htons(IP_DF)); @@ -596,8 +596,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* * Push down and install the IPIP header. @@ -665,8 +665,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n"); goto tx_error; } - if (skb->dst) - skb->dst->ops->update_pmtu(skb->dst, mtu); + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); @@ -702,8 +702,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* * Push down and install the IPIP header. @@ -775,8 +775,8 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_send_check(ip_hdr(skb)); /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -828,8 +828,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } /* drop old route */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; @@ -900,8 +900,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* drop the old route when skb is not shared */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); ip_vs_nat_icmp(skb, pp, cp, 0); @@ -975,8 +975,8 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error_put; /* drop the old route when skb is not shared */ - dst_release(skb->dst); - skb->dst = &rt->u.dst; + skb_dst_drop(skb); + skb_dst_set(skb, &rt->u.dst); ip_vs_nat_icmp_v6(skb, pp, cp, 0); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 8020db6274b8..5f72b94b4918 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -39,6 +39,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_nat.h> #include <net/netfilter/nf_nat_core.h> @@ -182,10 +183,6 @@ destroy_conntrack(struct nf_conntrack *nfct) NF_CT_ASSERT(atomic_read(&nfct->use) == 0); NF_CT_ASSERT(!timer_pending(&ct->timeout)); - if (!test_bit(IPS_DYING_BIT, &ct->status)) - nf_conntrack_event(IPCT_DESTROY, ct); - set_bit(IPS_DYING_BIT, &ct->status); - /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to nf_conntrack_lock!!! -HW */ @@ -219,27 +216,70 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_conntrack_free(ct); } -static void death_by_timeout(unsigned long ul_conntrack) +void nf_ct_delete_from_lists(struct nf_conn *ct) { - struct nf_conn *ct = (void *)ul_conntrack; struct net *net = nf_ct_net(ct); - struct nf_conn_help *help = nfct_help(ct); - struct nf_conntrack_helper *helper; - - if (help) { - rcu_read_lock(); - helper = rcu_dereference(help->helper); - if (helper && helper->destroy) - helper->destroy(ct); - rcu_read_unlock(); - } + nf_ct_helper_destroy(ct); spin_lock_bh(&nf_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ NF_CT_STAT_INC(net, delete_list); clean_from_lists(ct); spin_unlock_bh(&nf_conntrack_lock); +} +EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists); + +static void death_by_event(unsigned long ul_conntrack) +{ + struct nf_conn *ct = (void *)ul_conntrack; + struct net *net = nf_ct_net(ct); + + if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) { + /* bad luck, let's retry again */ + ct->timeout.expires = jiffies + + (random32() % net->ct.sysctl_events_retry_timeout); + add_timer(&ct->timeout); + return; + } + /* we've got the event delivered, now it's dying */ + set_bit(IPS_DYING_BIT, &ct->status); + spin_lock(&nf_conntrack_lock); + hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); + spin_unlock(&nf_conntrack_lock); + nf_ct_put(ct); +} + +void nf_ct_insert_dying_list(struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + + /* add this conntrack to the dying list */ + spin_lock_bh(&nf_conntrack_lock); + hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &net->ct.dying); + spin_unlock_bh(&nf_conntrack_lock); + /* set a new timer to retry event delivery */ + setup_timer(&ct->timeout, death_by_event, (unsigned long)ct); + ct->timeout.expires = jiffies + + (random32() % net->ct.sysctl_events_retry_timeout); + add_timer(&ct->timeout); +} +EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list); + +static void death_by_timeout(unsigned long ul_conntrack) +{ + struct nf_conn *ct = (void *)ul_conntrack; + + if (!test_bit(IPS_DYING_BIT, &ct->status) && + unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) { + /* destroy event was not delivered */ + nf_ct_delete_from_lists(ct); + nf_ct_insert_dying_list(ct); + return; + } + set_bit(IPS_DYING_BIT, &ct->status); + nf_ct_delete_from_lists(ct); nf_ct_put(ct); } @@ -398,11 +438,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) help = nfct_help(ct); if (help && help->helper) nf_conntrack_event_cache(IPCT_HELPER, ct); -#ifdef CONFIG_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, ct); -#endif + nf_conntrack_event_cache(master_ct(ct) ? IPCT_RELATED : IPCT_NEW, ct); return NF_ACCEPT; @@ -523,6 +559,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, return ERR_PTR(-ENOMEM); } + spin_lock_init(&ct->lock); atomic_set(&ct->ct_general.use, 1); ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; @@ -580,6 +617,7 @@ init_conntrack(struct net *net, } nf_ct_acct_ext_add(ct, GFP_ATOMIC); + nf_ct_ecache_ext_add(ct, GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); exp = nf_ct_find_expectation(net, tuple); @@ -807,13 +845,9 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, unsigned long extra_jiffies, int do_acct) { - int event = 0; - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); NF_CT_ASSERT(skb); - spin_lock_bh(&nf_conntrack_lock); - /* Only update if this is not a fixed timeout */ if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) goto acct; @@ -821,19 +855,14 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, /* If not in hash table, timer will not be active yet */ if (!nf_ct_is_confirmed(ct)) { ct->timeout.expires = extra_jiffies; - event = IPCT_REFRESH; } else { unsigned long newtime = jiffies + extra_jiffies; /* Only update the timeout if the new timeout is at least HZ jiffies from the old timeout. Need del_timer for race avoidance (may already be dying). */ - if (newtime - ct->timeout.expires >= HZ - && del_timer(&ct->timeout)) { - ct->timeout.expires = newtime; - add_timer(&ct->timeout); - event = IPCT_REFRESH; - } + if (newtime - ct->timeout.expires >= HZ) + mod_timer_pending(&ct->timeout, newtime); } acct: @@ -842,17 +871,13 @@ acct: acct = nf_conn_acct_find(ct); if (acct) { + spin_lock_bh(&ct->lock); acct[CTINFO2DIR(ctinfo)].packets++; acct[CTINFO2DIR(ctinfo)].bytes += skb->len - skb_network_offset(skb); + spin_unlock_bh(&ct->lock); } } - - spin_unlock_bh(&nf_conntrack_lock); - - /* must be unlocked when calling event cache */ - if (event) - nf_conntrack_event_cache(event, ct); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); @@ -864,14 +889,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, if (do_acct) { struct nf_conn_counter *acct; - spin_lock_bh(&nf_conntrack_lock); acct = nf_conn_acct_find(ct); if (acct) { + spin_lock_bh(&ct->lock); acct[CTINFO2DIR(ctinfo)].packets++; acct[CTINFO2DIR(ctinfo)].bytes += skb->len - skb_network_offset(skb); + spin_unlock_bh(&ct->lock); } - spin_unlock_bh(&nf_conntrack_lock); } if (del_timer(&ct->timeout)) { @@ -1001,15 +1026,22 @@ struct __nf_ct_flush_report { int report; }; -static int kill_all(struct nf_conn *i, void *data) +static int kill_report(struct nf_conn *i, void *data) { struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data; - /* get_next_corpse sets the dying bit for us */ - nf_conntrack_event_report(IPCT_DESTROY, - i, - fr->pid, - fr->report); + /* If we fail to deliver the event, death_by_timeout() will retry */ + if (nf_conntrack_event_report(IPCT_DESTROY, i, + fr->pid, fr->report) < 0) + return 1; + + /* Avoid the delivery of the destroy event in death_by_timeout(). */ + set_bit(IPS_DYING_BIT, &i->status); + return 1; +} + +static int kill_all(struct nf_conn *i, void *data) +{ return 1; } @@ -1023,15 +1055,30 @@ void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush(struct net *net, u32 pid, int report) +void nf_conntrack_flush_report(struct net *net, u32 pid, int report) { struct __nf_ct_flush_report fr = { .pid = pid, .report = report, }; - nf_ct_iterate_cleanup(net, kill_all, &fr); + nf_ct_iterate_cleanup(net, kill_report, &fr); +} +EXPORT_SYMBOL_GPL(nf_conntrack_flush_report); + +static void nf_ct_release_dying_list(void) +{ + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; + struct hlist_nulls_node *n; + + spin_lock_bh(&nf_conntrack_lock); + hlist_nulls_for_each_entry(h, n, &init_net.ct.dying, hnnode) { + ct = nf_ct_tuplehash_to_ctrack(h); + /* never fails to remove them, no listeners at this point */ + nf_ct_kill(ct); + } + spin_unlock_bh(&nf_conntrack_lock); } -EXPORT_SYMBOL_GPL(nf_conntrack_flush); static void nf_conntrack_cleanup_init_net(void) { @@ -1042,10 +1089,9 @@ static void nf_conntrack_cleanup_init_net(void) static void nf_conntrack_cleanup_net(struct net *net) { - nf_ct_event_cache_flush(net); - nf_conntrack_ecache_fini(net); i_see_dead_people: - nf_conntrack_flush(net, 0, 0); + nf_ct_iterate_cleanup(net, kill_all, NULL); + nf_ct_release_dying_list(); if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; @@ -1056,6 +1102,7 @@ static void nf_conntrack_cleanup_net(struct net *net) nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); + nf_conntrack_ecache_fini(net); nf_conntrack_acct_fini(net); nf_conntrack_expect_fini(net); free_percpu(net->ct.stat); @@ -1226,14 +1273,12 @@ static int nf_conntrack_init_net(struct net *net) atomic_set(&net->ct.count, 0); INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0); + INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) { ret = -ENOMEM; goto err_stat; } - ret = nf_conntrack_ecache_init(net); - if (ret < 0) - goto err_ecache; net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, &net->ct.hash_vmalloc, 1); if (!net->ct.hash) { @@ -1247,6 +1292,9 @@ static int nf_conntrack_init_net(struct net *net) ret = nf_conntrack_acct_init(net); if (ret < 0) goto err_acct; + ret = nf_conntrack_ecache_init(net); + if (ret < 0) + goto err_ecache; /* Set up fake conntrack: - to never be deleted, not in any hashes */ @@ -1259,14 +1307,14 @@ static int nf_conntrack_init_net(struct net *net) return 0; +err_ecache: + nf_conntrack_acct_fini(net); err_acct: nf_conntrack_expect_fini(net); err_expect: nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); err_hash: - nf_conntrack_ecache_fini(net); -err_ecache: free_percpu(net->ct.stat); err_stat: return ret; diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index dee4190209cc..aee560b4768d 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -16,121 +16,245 @@ #include <linux/stddef.h> #include <linux/err.h> #include <linux/percpu.h> -#include <linux/notifier.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_extend.h> -ATOMIC_NOTIFIER_HEAD(nf_conntrack_chain); -EXPORT_SYMBOL_GPL(nf_conntrack_chain); +static DEFINE_MUTEX(nf_ct_ecache_mutex); -ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); -EXPORT_SYMBOL_GPL(nf_ct_expect_chain); +struct nf_ct_event_notifier *nf_conntrack_event_cb __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); + +struct nf_exp_event_notifier *nf_expect_event_cb __read_mostly; +EXPORT_SYMBOL_GPL(nf_expect_event_cb); /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ -static inline void -__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) +void nf_ct_deliver_cached_events(struct nf_conn *ct) { - if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct) - && ecache->events) { + unsigned long events; + struct nf_ct_event_notifier *notify; + struct nf_conntrack_ecache *e; + + rcu_read_lock(); + notify = rcu_dereference(nf_conntrack_event_cb); + if (notify == NULL) + goto out_unlock; + + e = nf_ct_ecache_find(ct); + if (e == NULL) + goto out_unlock; + + events = xchg(&e->cache, 0); + + if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct) && events) { struct nf_ct_event item = { - .ct = ecache->ct, + .ct = ct, .pid = 0, .report = 0 }; + int ret; + /* We make a copy of the missed event cache without taking + * the lock, thus we may send missed events twice. However, + * this does not harm and it happens very rarely. */ + unsigned long missed = e->missed; - atomic_notifier_call_chain(&nf_conntrack_chain, - ecache->events, - &item); + ret = notify->fcn(events | missed, &item); + if (unlikely(ret < 0 || missed)) { + spin_lock_bh(&ct->lock); + if (ret < 0) + e->missed |= events; + else + e->missed &= ~missed; + spin_unlock_bh(&ct->lock); + } } - ecache->events = 0; - nf_ct_put(ecache->ct); - ecache->ct = NULL; +out_unlock: + rcu_read_unlock(); } +EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); -/* Deliver all cached events for a particular conntrack. This is called - * by code prior to async packet handling for freeing the skb */ -void nf_ct_deliver_cached_events(const struct nf_conn *ct) +int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) { - struct net *net = nf_ct_net(ct); - struct nf_conntrack_ecache *ecache; - - local_bh_disable(); - ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); - if (ecache->ct == ct) - __nf_ct_deliver_cached_events(ecache); - local_bh_enable(); + int ret = 0; + struct nf_ct_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_conntrack_event_cb); + if (notify != NULL) { + ret = -EBUSY; + goto out_unlock; + } + rcu_assign_pointer(nf_conntrack_event_cb, new); + mutex_unlock(&nf_ct_ecache_mutex); + return ret; + +out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; } -EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); +EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -/* Deliver cached events for old pending events, if current conntrack != old */ -void __nf_ct_event_cache_init(struct nf_conn *ct) +void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new) { - struct net *net = nf_ct_net(ct); - struct nf_conntrack_ecache *ecache; - - /* take care of delivering potentially old events */ - ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); - BUG_ON(ecache->ct == ct); - if (ecache->ct) - __nf_ct_deliver_cached_events(ecache); - /* initialize for this conntrack/packet */ - ecache->ct = ct; - nf_conntrack_get(&ct->ct_general); + struct nf_ct_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_conntrack_event_cb); + BUG_ON(notify != new); + rcu_assign_pointer(nf_conntrack_event_cb, NULL); + mutex_unlock(&nf_ct_ecache_mutex); } -EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init); +EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -/* flush the event cache - touches other CPU's data and must not be called - * while packets are still passing through the code */ -void nf_ct_event_cache_flush(struct net *net) +int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new) { - struct nf_conntrack_ecache *ecache; - int cpu; + int ret = 0; + struct nf_exp_event_notifier *notify; - for_each_possible_cpu(cpu) { - ecache = per_cpu_ptr(net->ct.ecache, cpu); - if (ecache->ct) - nf_ct_put(ecache->ct); + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_expect_event_cb); + if (notify != NULL) { + ret = -EBUSY; + goto out_unlock; } + rcu_assign_pointer(nf_expect_event_cb, new); + mutex_unlock(&nf_ct_ecache_mutex); + return ret; + +out_unlock: + mutex_unlock(&nf_ct_ecache_mutex); + return ret; } +EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); -int nf_conntrack_ecache_init(struct net *net) +void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new) { - net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache); - if (!net->ct.ecache) - return -ENOMEM; - return 0; + struct nf_exp_event_notifier *notify; + + mutex_lock(&nf_ct_ecache_mutex); + notify = rcu_dereference(nf_expect_event_cb); + BUG_ON(notify != new); + rcu_assign_pointer(nf_expect_event_cb, NULL); + mutex_unlock(&nf_ct_ecache_mutex); } +EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); -void nf_conntrack_ecache_fini(struct net *net) +#define NF_CT_EVENTS_DEFAULT 1 +static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT; +static int nf_ct_events_retry_timeout __read_mostly = 15*HZ; + +#ifdef CONFIG_SYSCTL +static struct ctl_table event_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_events", + .data = &init_net.ct.sysctl_events, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nf_conntrack_events_retry_timeout", + .data = &init_net.ct.sysctl_events_retry_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + {} +}; +#endif /* CONFIG_SYSCTL */ + +static struct nf_ct_ext_type event_extend __read_mostly = { + .len = sizeof(struct nf_conntrack_ecache), + .align = __alignof__(struct nf_conntrack_ecache), + .id = NF_CT_EXT_ECACHE, +}; + +#ifdef CONFIG_SYSCTL +static int nf_conntrack_event_init_sysctl(struct net *net) { - free_percpu(net->ct.ecache); + struct ctl_table *table; + + table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table), + GFP_KERNEL); + if (!table) + goto out; + + table[0].data = &net->ct.sysctl_events; + table[1].data = &net->ct.sysctl_events_retry_timeout; + + net->ct.event_sysctl_header = + register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, table); + if (!net->ct.event_sysctl_header) { + printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n"); + goto out_register; + } + return 0; + +out_register: + kfree(table); +out: + return -ENOMEM; } -int nf_conntrack_register_notifier(struct notifier_block *nb) +static void nf_conntrack_event_fini_sysctl(struct net *net) { - return atomic_notifier_chain_register(&nf_conntrack_chain, nb); + struct ctl_table *table; + + table = net->ct.event_sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.event_sysctl_header); + kfree(table); +} +#else +static int nf_conntrack_event_init_sysctl(struct net *net) +{ + return 0; } -EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -int nf_conntrack_unregister_notifier(struct notifier_block *nb) +static void nf_conntrack_event_fini_sysctl(struct net *net) { - return atomic_notifier_chain_unregister(&nf_conntrack_chain, nb); } -EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); +#endif /* CONFIG_SYSCTL */ -int nf_ct_expect_register_notifier(struct notifier_block *nb) +int nf_conntrack_ecache_init(struct net *net) { - return atomic_notifier_chain_register(&nf_ct_expect_chain, nb); + int ret; + + net->ct.sysctl_events = nf_ct_events; + net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout; + + if (net_eq(net, &init_net)) { + ret = nf_ct_extend_register(&event_extend); + if (ret < 0) { + printk(KERN_ERR "nf_ct_event: Unable to register " + "event extension.\n"); + goto out_extend_register; + } + } + + ret = nf_conntrack_event_init_sysctl(net); + if (ret < 0) + goto out_sysctl; + + return 0; + +out_sysctl: + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&event_extend); +out_extend_register: + return ret; } -EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); -int nf_ct_expect_unregister_notifier(struct notifier_block *nb) +void nf_conntrack_ecache_fini(struct net *net) { - return atomic_notifier_chain_unregister(&nf_ct_expect_chain, nb); + nf_conntrack_event_fini_sysctl(net); + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&event_extend); } -EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 00fecc385f9b..5509dd1f14cf 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -338,11 +338,9 @@ static void update_nl_seq(struct nf_conn *ct, u32 nl_seq, if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } else if (oldest != NUM_SEQ_TO_REMEMBER && after(nl_seq, info->seq_aft_nl[dir][oldest])) { info->seq_aft_nl[dir][oldest] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 0fa5a422959f..65c2a7bc3afc 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -136,6 +136,20 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i, return 0; } +void nf_ct_helper_destroy(struct nf_conn *ct) +{ + struct nf_conn_help *help = nfct_help(ct); + struct nf_conntrack_helper *helper; + + if (help) { + rcu_read_lock(); + helper = rcu_dereference(help->helper); + if (helper && helper->destroy) + helper->destroy(ct); + rcu_read_unlock(); + } +} + int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { unsigned int h = helper_hash(&me->tuple); diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 8a3875e36ec2..497b2224536f 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -48,7 +48,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, { struct nf_conntrack_expect *exp; struct iphdr *iph = ip_hdr(skb); - struct rtable *rt = skb->rtable; + struct rtable *rt = skb_rtable(skb); struct in_device *in_dev; __be32 mask = 0; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index c523f0b8cee5..49479d194570 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -27,7 +27,6 @@ #include <linux/netlink.h> #include <linux/spinlock.h> #include <linux/interrupt.h> -#include <linux/notifier.h> #include <linux/netfilter.h> #include <net/netlink.h> @@ -144,7 +143,7 @@ nla_put_failure: } static inline int -ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct) +ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) { struct nf_conntrack_l4proto *l4proto; struct nlattr *nest_proto; @@ -346,23 +345,21 @@ nla_put_failure: return -1; } -#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) - static int ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, - int event, int nowait, - const struct nf_conn *ct) + int event, struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned char *b = skb_tail_pointer(skb); + unsigned int flags = pid ? NLM_F_MULTI : 0; event |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = nf_ct_l3num(ct); nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; @@ -370,14 +367,14 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); @@ -395,132 +392,109 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, ctnetlink_dump_nat_seq_adj(skb, ct) < 0) goto nla_put_failure; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; + nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: - nlmsg_trim(skb, b); + nlmsg_cancel(skb, nlh); return -1; } #ifdef CONFIG_NF_CONNTRACK_EVENTS -/* - * The general structure of a ctnetlink event is - * - * CTA_TUPLE_ORIG - * <l3/l4-proto-attributes> - * CTA_TUPLE_REPLY - * <l3/l4-proto-attributes> - * CTA_ID - * ... - * CTA_PROTOINFO - * <l4-proto-attributes> - * CTA_TUPLE_MASTER - * <l3/l4-proto-attributes> - * - * Therefore the formular is - * - * size = sizeof(headers) + sizeof(generic_nlas) + 3 * sizeof(tuple_nlas) - * + sizeof(protoinfo_nlas) - */ -static struct sk_buff * -ctnetlink_alloc_skb(const struct nf_conntrack_tuple *tuple, gfp_t gfp) +static inline size_t +ctnetlink_proto_size(const struct nf_conn *ct) { struct nf_conntrack_l3proto *l3proto; struct nf_conntrack_l4proto *l4proto; - int len; - -#define NLA_TYPE_SIZE(type) nla_total_size(sizeof(type)) - - /* proto independant part */ - len = NLMSG_SPACE(sizeof(struct nfgenmsg)) - + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ - + 3 * nla_total_size(0) /* CTA_TUPLE_IP */ - + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */ - + 3 * NLA_TYPE_SIZE(u_int8_t) /* CTA_PROTO_NUM */ - + NLA_TYPE_SIZE(u_int32_t) /* CTA_ID */ - + NLA_TYPE_SIZE(u_int32_t) /* CTA_STATUS */ + size_t len = 0; + + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); + len += l3proto->nla_size; + + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + len += l4proto->nla_size; + rcu_read_unlock(); + + return len; +} + +static inline size_t +ctnetlink_nlmsg_size(const struct nf_conn *ct) +{ + return NLMSG_ALIGN(sizeof(struct nfgenmsg)) + + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ + + 3 * nla_total_size(0) /* CTA_TUPLE_IP */ + + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */ + + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */ #ifdef CONFIG_NF_CT_ACCT - + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */ - + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_PACKETS */ - + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_BYTES */ + + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */ + + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */ + + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */ #endif - + NLA_TYPE_SIZE(u_int32_t) /* CTA_TIMEOUT */ - + nla_total_size(0) /* CTA_PROTOINFO */ - + nla_total_size(0) /* CTA_HELP */ - + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */ + + nla_total_size(0) /* CTA_PROTOINFO */ + + nla_total_size(0) /* CTA_HELP */ + + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ #ifdef CONFIG_NF_CONNTRACK_SECMARK - + NLA_TYPE_SIZE(u_int32_t) /* CTA_SECMARK */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_SECMARK */ #endif #ifdef CONFIG_NF_NAT_NEEDED - + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ - + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_POS */ - + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_BEFORE */ - + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_AFTER */ + + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + + 6 * nla_total_size(sizeof(u_int32_t)) /* CTA_NAT_SEQ_OFFSET */ #endif #ifdef CONFIG_NF_CONNTRACK_MARK - + NLA_TYPE_SIZE(u_int32_t) /* CTA_MARK */ + + nla_total_size(sizeof(u_int32_t)) /* CTA_MARK */ #endif - ; - -#undef NLA_TYPE_SIZE - - rcu_read_lock(); - l3proto = __nf_ct_l3proto_find(tuple->src.l3num); - len += l3proto->nla_size; - - l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum); - len += l4proto->nla_size; - rcu_read_unlock(); - - return alloc_skb(len, gfp); + + ctnetlink_proto_size(ct) + ; } -static int ctnetlink_conntrack_event(struct notifier_block *this, - unsigned long events, void *ptr) +static int +ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - struct nf_ct_event *item = (struct nf_ct_event *)ptr; struct nf_conn *ct = item->ct; struct sk_buff *skb; unsigned int type; - sk_buff_data_t b; unsigned int flags = 0, group; + int err; /* ignore our fake conntrack entry */ if (ct == &nf_conntrack_untracked) - return NOTIFY_DONE; + return 0; - if (events & IPCT_DESTROY) { + if (events & (1 << IPCT_DESTROY)) { type = IPCTNL_MSG_CT_DELETE; group = NFNLGRP_CONNTRACK_DESTROY; - } else if (events & (IPCT_NEW | IPCT_RELATED)) { + } else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) { type = IPCTNL_MSG_CT_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { + } else if (events) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else - return NOTIFY_DONE; + return 0; if (!item->report && !nfnetlink_has_listeners(group)) - return NOTIFY_DONE; + return 0; - skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC); - if (!skb) + skb = nlmsg_new(ctnetlink_nlmsg_size(ct), GFP_ATOMIC); + if (skb == NULL) goto errout; - b = skb->tail; - type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = flags; + nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = nf_ct_l3num(ct); nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; @@ -529,14 +503,14 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); nest_parms = nla_nest_start(skb, CTA_TUPLE_REPLY | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) + if (ctnetlink_dump_tuples(skb, nf_ct_tuple(ct, IP_CT_DIR_REPLY)) < 0) goto nla_put_failure; nla_nest_end(skb, nest_parms); @@ -546,7 +520,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (ctnetlink_dump_status(skb, ct) < 0) goto nla_put_failure; - if (events & IPCT_DESTROY) { + if (events & (1 << IPCT_DESTROY)) { if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nla_put_failure; @@ -554,47 +528,51 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (ctnetlink_dump_timeout(skb, ct) < 0) goto nla_put_failure; - if (events & IPCT_PROTOINFO + if (events & (1 << IPCT_PROTOINFO) && ctnetlink_dump_protoinfo(skb, ct) < 0) goto nla_put_failure; - if ((events & IPCT_HELPER || nfct_help(ct)) + if ((events & (1 << IPCT_HELPER) || nfct_help(ct)) && ctnetlink_dump_helpinfo(skb, ct) < 0) goto nla_put_failure; #ifdef CONFIG_NF_CONNTRACK_SECMARK - if ((events & IPCT_SECMARK || ct->secmark) + if ((events & (1 << IPCT_SECMARK) || ct->secmark) && ctnetlink_dump_secmark(skb, ct) < 0) goto nla_put_failure; #endif - if (events & IPCT_RELATED && + if (events & (1 << IPCT_RELATED) && ctnetlink_dump_master(skb, ct) < 0) goto nla_put_failure; - if (events & IPCT_NATSEQADJ && + if (events & (1 << IPCT_NATSEQADJ) && ctnetlink_dump_nat_seq_adj(skb, ct) < 0) goto nla_put_failure; } #ifdef CONFIG_NF_CONNTRACK_MARK - if ((events & IPCT_MARK || ct->mark) + if ((events & (1 << IPCT_MARK) || ct->mark) && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif rcu_read_unlock(); - nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, item->pid, group, item->report); - return NOTIFY_DONE; + nlmsg_end(skb, nlh); + err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC); + if (err == -ENOBUFS || err == -EAGAIN) + return -ENOBUFS; + + return 0; nla_put_failure: rcu_read_unlock(); + nlmsg_cancel(skb, nlh); nlmsg_failure: kfree_skb(skb); errout: nfnetlink_set_err(0, group, -ENOBUFS); - return NOTIFY_DONE; + return 0; } #endif /* CONFIG_NF_CONNTRACK_EVENTS */ @@ -611,7 +589,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; - struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; rcu_read_lock(); @@ -637,8 +615,7 @@ restart: } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, - 1, ct) < 0) { + IPCTNL_MSG_CT_NEW, ct) < 0) { cb->args[1] = (unsigned long)ct; goto out; } @@ -792,7 +769,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; struct nf_conn *ct; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; int err = 0; @@ -802,9 +779,9 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush(&init_net, - NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + nf_conntrack_flush_report(&init_net, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); return 0; } @@ -825,10 +802,15 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, } } - nf_conntrack_event_report(IPCT_DESTROY, - ct, - NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + if (nf_conntrack_event_report(IPCT_DESTROY, ct, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)) < 0) { + nf_ct_delete_from_lists(ct); + /* we failed to report the event, try later */ + nf_ct_insert_dying_list(ct); + nf_ct_put(ct); + return 0; + } /* death_by_timeout would report the event again */ set_bit(IPS_DYING_BIT, &ct->status); @@ -847,7 +829,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple tuple; struct nf_conn *ct; struct sk_buff *skb2 = NULL; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; int err = 0; @@ -872,15 +854,15 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, ct = nf_ct_tuplehash_to_ctrack(h); err = -ENOMEM; - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb2) { + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) { nf_ct_put(ct); return -ENOMEM; } rcu_read_lock(); err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, 1, ct); + IPCTNL_MSG_CT_NEW, ct); rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) @@ -1280,6 +1262,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], } nf_ct_acct_ext_add(ct, GFP_ATOMIC); + nf_ct_ecache_ext_add(ct, GFP_ATOMIC); #if defined(CONFIG_NF_CONNTRACK_MARK) if (cda[CTA_MARK]) @@ -1325,7 +1308,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, { struct nf_conntrack_tuple otuple, rtuple; struct nf_conntrack_tuple_hash *h = NULL; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; int err = 0; @@ -1367,13 +1350,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, else events = IPCT_NEW; - nf_conntrack_event_report(IPCT_STATUS | - IPCT_HELPER | - IPCT_PROTOINFO | - IPCT_NATSEQADJ | - IPCT_MARK | events, - ct, NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + nf_conntrack_eventmask_report((1 << IPCT_STATUS) | + (1 << IPCT_HELPER) | + (1 << IPCT_PROTOINFO) | + (1 << IPCT_NATSEQADJ) | + (1 << IPCT_MARK) | events, + ct, NETLINK_CB(skb).pid, + nlmsg_report(nlh)); nf_ct_put(ct); } else spin_unlock_bh(&nf_conntrack_lock); @@ -1392,13 +1375,13 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err == 0) { nf_conntrack_get(&ct->ct_general); spin_unlock_bh(&nf_conntrack_lock); - nf_conntrack_event_report(IPCT_STATUS | - IPCT_HELPER | - IPCT_PROTOINFO | - IPCT_NATSEQADJ | - IPCT_MARK, - ct, NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + nf_conntrack_eventmask_report((1 << IPCT_STATUS) | + (1 << IPCT_HELPER) | + (1 << IPCT_PROTOINFO) | + (1 << IPCT_NATSEQADJ) | + (1 << IPCT_MARK), + ct, NETLINK_CB(skb).pid, + nlmsg_report(nlh)); nf_ct_put(ct); } else spin_unlock_bh(&nf_conntrack_lock); @@ -1503,19 +1486,18 @@ nla_put_failure: static int ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, - int event, - int nowait, - const struct nf_conntrack_expect *exp) + int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned char *b = skb_tail_pointer(skb); + unsigned int flags = pid ? NLM_F_MULTI : 0; event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; + nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = exp->tuple.src.l3num; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; @@ -1523,49 +1505,46 @@ ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; - nlh->nlmsg_len = skb_tail_pointer(skb) - b; + nlmsg_end(skb, nlh); return skb->len; nlmsg_failure: nla_put_failure: - nlmsg_trim(skb, b); + nlmsg_cancel(skb, nlh); return -1; } #ifdef CONFIG_NF_CONNTRACK_EVENTS -static int ctnetlink_expect_event(struct notifier_block *this, - unsigned long events, void *ptr) +static int +ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - struct nf_exp_event *item = (struct nf_exp_event *)ptr; struct nf_conntrack_expect *exp = item->exp; struct sk_buff *skb; unsigned int type; - sk_buff_data_t b; int flags = 0; - if (events & IPEXP_NEW) { + if (events & (1 << IPEXP_NEW)) { type = IPCTNL_MSG_EXP_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; } else - return NOTIFY_DONE; + return 0; if (!item->report && !nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) - return NOTIFY_DONE; + return 0; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); - if (!skb) + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (skb == NULL) goto errout; - b = skb->tail; - type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = NLMSG_PUT(skb, item->pid, 0, type, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); + nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; - nlh->nlmsg_flags = flags; + nfmsg = nlmsg_data(nlh); nfmsg->nfgen_family = exp->tuple.src.l3num; nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; @@ -1575,17 +1554,19 @@ static int ctnetlink_expect_event(struct notifier_block *this, goto nla_put_failure; rcu_read_unlock(); - nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, item->report); - return NOTIFY_DONE; + nlmsg_end(skb, nlh); + nfnetlink_send(skb, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, + item->report, GFP_ATOMIC); + return 0; nla_put_failure: rcu_read_unlock(); + nlmsg_cancel(skb, nlh); nlmsg_failure: kfree_skb(skb); errout: nfnetlink_set_err(0, 0, -ENOBUFS); - return NOTIFY_DONE; + return 0; } #endif static int ctnetlink_exp_done(struct netlink_callback *cb) @@ -1600,7 +1581,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = &init_net; struct nf_conntrack_expect *exp, *last; - struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); struct hlist_node *n; u_int8_t l3proto = nfmsg->nfgen_family; @@ -1617,10 +1598,11 @@ restart: continue; cb->args[1] = 0; } - if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnetlink_exp_fill_info(skb, + NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, - 1, exp) < 0) { + exp) < 0) { if (!atomic_inc_not_zero(&exp->use)) continue; cb->args[1] = (unsigned long)exp; @@ -1652,7 +1634,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; struct sk_buff *skb2; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; int err = 0; @@ -1683,14 +1665,13 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, } err = -ENOMEM; - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb2) + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) goto out; rcu_read_lock(); err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, - nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, - 1, exp); + nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); if (err <= 0) goto free; @@ -1713,7 +1694,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nf_conntrack_helper *h; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); struct hlist_node *n, *next; u_int8_t u3 = nfmsg->nfgen_family; unsigned int i; @@ -1854,7 +1835,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, { struct nf_conntrack_tuple tuple; struct nf_conntrack_expect *exp; - struct nfgenmsg *nfmsg = NLMSG_DATA(nlh); + struct nfgenmsg *nfmsg = nlmsg_data(nlh); u_int8_t u3 = nfmsg->nfgen_family; int err = 0; @@ -1891,12 +1872,12 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, } #ifdef CONFIG_NF_CONNTRACK_EVENTS -static struct notifier_block ctnl_notifier = { - .notifier_call = ctnetlink_conntrack_event, +static struct nf_ct_event_notifier ctnl_notifier = { + .fcn = ctnetlink_conntrack_event, }; -static struct notifier_block ctnl_notifier_exp = { - .notifier_call = ctnetlink_expect_event, +static struct nf_exp_event_notifier ctnl_notifier_exp = { + .fcn = ctnetlink_expect_event, }; #endif diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index aee0d6bea309..1b816a2ea813 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -25,8 +25,6 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_log.h> -static DEFINE_RWLOCK(dccp_lock); - /* Timeouts are based on values from RFC4340: * * - REQUEST: @@ -492,7 +490,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } - write_lock_bh(&dccp_lock); + spin_lock_bh(&ct->lock); role = ct->proto.dccp.role[dir]; old_state = ct->proto.dccp.state; @@ -536,13 +534,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; - write_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: - write_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); @@ -552,7 +550,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_dir = dir; ct->proto.dccp.last_pkt = type; ct->proto.dccp.state = new_state; - write_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); @@ -621,36 +619,39 @@ static int dccp_print_tuple(struct seq_file *s, ntohs(tuple->dst.u.dccp.port)); } -static int dccp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) +static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - const struct nf_conn *ct) + struct nf_conn *ct) { struct nlattr *nest_parms; - read_lock_bh(&dccp_lock); + spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_DCCP | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_STATE, ct->proto.dccp.state); NLA_PUT_U8(skb, CTA_PROTOINFO_DCCP_ROLE, ct->proto.dccp.role[IP_CT_DIR_ORIGINAL]); + NLA_PUT_BE64(skb, CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ, + cpu_to_be64(ct->proto.dccp.handshake_seq)); nla_nest_end(skb, nest_parms); - read_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); return 0; nla_put_failure: - read_unlock_bh(&dccp_lock); + spin_unlock_bh(&ct->lock); return -1; } static const struct nla_policy dccp_nla_policy[CTA_PROTOINFO_DCCP_MAX + 1] = { [CTA_PROTOINFO_DCCP_STATE] = { .type = NLA_U8 }, [CTA_PROTOINFO_DCCP_ROLE] = { .type = NLA_U8 }, + [CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ] = { .type = NLA_U64 }, }; static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) @@ -674,7 +675,7 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) return -EINVAL; } - write_lock_bh(&dccp_lock); + spin_lock_bh(&ct->lock); ct->proto.dccp.state = nla_get_u8(tb[CTA_PROTOINFO_DCCP_STATE]); if (nla_get_u8(tb[CTA_PROTOINFO_DCCP_ROLE]) == CT_DCCP_ROLE_CLIENT) { ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_CLIENT; @@ -683,7 +684,11 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) ct->proto.dccp.role[IP_CT_DIR_ORIGINAL] = CT_DCCP_ROLE_SERVER; ct->proto.dccp.role[IP_CT_DIR_REPLY] = CT_DCCP_ROLE_CLIENT; } - write_unlock_bh(&dccp_lock); + if (tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ]) { + ct->proto.dccp.handshake_seq = + be64_to_cpu(nla_get_be64(tb[CTA_PROTOINFO_DCCP_HANDSHAKE_SEQ])); + } + spin_unlock_bh(&ct->lock); return 0; } diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 117b80112fcb..a54a0af0edba 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -176,7 +176,7 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { - struct net *net = dev_net(skb->dev ? skb->dev : skb->dst->dev); + struct net *net = dev_net(skb->dev ? skb->dev : skb_dst(skb)->dev); const struct gre_hdr_pptp *pgrehdr; struct gre_hdr_pptp _pgrehdr; __be16 srckey; @@ -219,8 +219,7 @@ static int gre_print_tuple(struct seq_file *s, } /* print private data for conntrack */ -static int gre_print_conntrack(struct seq_file *s, - const struct nf_conn *ct) +static int gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) { return seq_printf(s, "timeout=%u, stream_timeout=%u ", (ct->proto.gre.timeout / HZ), diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 101b4ad9e817..c10e6f36e31e 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -25,9 +25,6 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> -/* Protects ct->proto.sctp */ -static DEFINE_RWLOCK(sctp_lock); - /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR @@ -164,13 +161,13 @@ static int sctp_print_tuple(struct seq_file *s, } /* Print out the private part of the conntrack. */ -static int sctp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) +static int sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { enum sctp_conntrack state; - read_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); state = ct->proto.sctp.state; - read_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); return seq_printf(s, "%s ", sctp_conntrack_names[state]); } @@ -318,7 +315,7 @@ static int sctp_packet(struct nf_conn *ct, } old_state = new_state = SCTP_CONNTRACK_NONE; - write_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Special cases of Verification tag check (Sec 8.5.1) */ if (sch->type == SCTP_CID_INIT) { @@ -371,7 +368,7 @@ static int sctp_packet(struct nf_conn *ct, if (old_state != new_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); } - write_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); nf_ct_refresh_acct(ct, ctinfo, skb, sctp_timeouts[new_state]); @@ -386,7 +383,7 @@ static int sctp_packet(struct nf_conn *ct, return NF_ACCEPT; out_unlock: - write_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); out: return -NF_ACCEPT; } @@ -469,11 +466,11 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb, #include <linux/netfilter/nfnetlink_conntrack.h> static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - const struct nf_conn *ct) + struct nf_conn *ct) { struct nlattr *nest_parms; - read_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_SCTP | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -488,14 +485,14 @@ static int sctp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, CTA_PROTOINFO_SCTP_VTAG_REPLY, ct->proto.sctp.vtag[IP_CT_DIR_REPLY]); - read_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); nla_nest_end(skb, nest_parms); return 0; nla_put_failure: - read_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); return -1; } @@ -527,13 +524,13 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) !tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]) return -EINVAL; - write_lock_bh(&sctp_lock); + spin_lock_bh(&ct->lock); ct->proto.sctp.state = nla_get_u8(tb[CTA_PROTOINFO_SCTP_STATE]); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_ORIGINAL]); ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = nla_get_be32(tb[CTA_PROTOINFO_SCTP_VTAG_REPLY]); - write_unlock_bh(&sctp_lock); + spin_unlock_bh(&ct->lock); return 0; } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 97a6e93d742e..33fc0a443f3d 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -29,9 +29,6 @@ #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> -/* Protects ct->proto.tcp */ -static DEFINE_RWLOCK(tcp_lock); - /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ @@ -59,7 +56,7 @@ static const char *const tcp_conntrack_names[] = { "LAST_ACK", "TIME_WAIT", "CLOSE", - "LISTEN" + "SYN_SENT2", }; #define SECS * HZ @@ -82,6 +79,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { [TCP_CONNTRACK_LAST_ACK] = 30 SECS, [TCP_CONNTRACK_TIME_WAIT] = 2 MINS, [TCP_CONNTRACK_CLOSE] = 10 SECS, + [TCP_CONNTRACK_SYN_SENT2] = 2 MINS, }; #define sNO TCP_CONNTRACK_NONE @@ -93,7 +91,7 @@ static unsigned int tcp_timeouts[TCP_CONNTRACK_MAX] __read_mostly = { #define sLA TCP_CONNTRACK_LAST_ACK #define sTW TCP_CONNTRACK_TIME_WAIT #define sCL TCP_CONNTRACK_CLOSE -#define sLI TCP_CONNTRACK_LISTEN +#define sS2 TCP_CONNTRACK_SYN_SENT2 #define sIV TCP_CONNTRACK_MAX #define sIG TCP_CONNTRACK_IGNORE @@ -123,6 +121,7 @@ enum tcp_bit_set { * * NONE: initial state * SYN_SENT: SYN-only packet seen + * SYN_SENT2: SYN-only packet seen from reply dir, simultaneous open * SYN_RECV: SYN-ACK packet seen * ESTABLISHED: ACK packet seen * FIN_WAIT: FIN packet seen @@ -131,26 +130,24 @@ enum tcp_bit_set { * TIME_WAIT: last ACK seen * CLOSE: closed connection (RST) * - * LISTEN state is not used. - * * Packets marked as IGNORED (sIG): * if they may be either invalid or valid * and the receiver may send back a connection * closing RST or a SYN/ACK. * * Packets marked as INVALID (sIV): - * if they are invalid - * or we do not support the request (simultaneous open) + * if we regard them as truly invalid packets */ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { { /* ORIGINAL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 }, /* * sNO -> sSS Initialize a new connection * sSS -> sSS Retransmitted SYN - * sSR -> sIG Late retransmitted SYN? + * sS2 -> sS2 Late retransmitted SYN + * sSR -> sIG * sES -> sIG Error: SYNs in window outside the SYN_SENT state * are errors. Receiver will reply with RST * and close the connection. @@ -161,22 +158,30 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sSS Reopened connection (RFC 1122). * sCL -> sSS */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*synack*/ { sIV, sIV, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, /* - * A SYN/ACK from the client is always invalid: - * - either it tries to set up a simultaneous open, which is - * not supported; - * - or the firewall has just been inserted between the two hosts - * during the session set-up. The SYN will be retransmitted - * by the true client (or it'll time out). + * sNO -> sIV Too late and no reason to do anything + * sSS -> sIV Client can't send SYN and then SYN/ACK + * sS2 -> sSR SYN/ACK sent to SYN2 in simultaneous open + * sSR -> sIG + * sES -> sIG Error: SYNs in window outside the SYN_SENT state + * are errors. Receiver will reply with RST + * and close the connection. + * Or we are not in sync and hold a dead connection. + * sFW -> sIG + * sCW -> sIG + * sLA -> sIG + * sTW -> sIG + * sCL -> sIG */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /* * sNO -> sIV Too late and no reason to do anything... * sSS -> sIV Client migth not send FIN in this state: * we enforce waiting for a SYN/ACK reply first. + * sS2 -> sIV * sSR -> sFW Close started. * sES -> sFW * sFW -> sLA FIN seen in both directions, waiting for @@ -187,11 +192,12 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, /* * sNO -> sES Assumed. * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. + * sS2 -> sIV * sSR -> sES Established state is reached. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -200,29 +206,31 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW Retransmitted last ACK. Remain in the same state. * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } }, { /* REPLY */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*syn*/ { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sS2 }, /* * sNO -> sIV Never reached. - * sSS -> sIV Simultaneous open, not supported - * sSR -> sIV Simultaneous open, not supported. - * sES -> sIV Server may not initiate a connection. + * sSS -> sS2 Simultaneous open + * sS2 -> sS2 Retransmitted simultaneous SYN + * sSR -> sIV Invalid SYN packets sent by the server + * sES -> sIV * sFW -> sIV * sCW -> sIV * sLA -> sIV * sTW -> sIV Reopened connection, but server may not do it. * sCL -> sIV */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sSR }, /* * sSS -> sSR Standard open. + * sS2 -> sSR Simultaneous open * sSR -> sSR Retransmitted SYN/ACK. * sES -> sIG Late retransmitted SYN/ACK? * sFW -> sIG Might be SYN/ACK answering ignored SYN @@ -231,10 +239,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sIG * sCL -> sIG */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ /*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, /* * sSS -> sIV Server might not send FIN in this state. + * sS2 -> sIV * sSR -> sFW Close started. * sES -> sFW * sFW -> sLA FIN seen in both directions. @@ -243,10 +252,11 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG }, /* * sSS -> sIG Might be a half-open connection. + * sS2 -> sIG * sSR -> sSR Might answer late resent SYN. * sES -> sES :-) * sFW -> sCW Normal close request answered by ACK. @@ -255,8 +265,8 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { * sTW -> sTW Retransmitted last ACK. * sCL -> sCL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2 */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL }, /*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } } }; @@ -296,13 +306,13 @@ static int tcp_print_tuple(struct seq_file *s, } /* Print out the private part of the conntrack. */ -static int tcp_print_conntrack(struct seq_file *s, const struct nf_conn *ct) +static int tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { enum tcp_conntrack state; - read_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); state = ct->proto.tcp.state; - read_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return seq_printf(s, "%s ", tcp_conntrack_names[state]); } @@ -521,13 +531,14 @@ static bool tcp_in_window(const struct nf_conn *ct, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); - if (sender->td_end == 0) { + if (sender->td_maxwin == 0) { /* * Initialize sender data. */ - if (tcph->syn && tcph->ack) { + if (tcph->syn) { /* - * Outgoing SYN-ACK in reply to a SYN. + * SYN-ACK in reply to a SYN + * or SYN from reply direction in simultaneous open. */ sender->td_end = sender->td_maxend = end; @@ -543,6 +554,9 @@ static bool tcp_in_window(const struct nf_conn *ct, && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) sender->td_scale = receiver->td_scale = 0; + if (!tcph->ack) + /* Simultaneous open */ + return true; } else { /* * We are in the middle of a connection, @@ -716,14 +730,14 @@ void nf_conntrack_tcp_update(const struct sk_buff *skb, end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); - write_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); /* * We have to worry for the ack in the reply packet only... */ if (after(end, ct->proto.tcp.seen[dir].td_end)) ct->proto.tcp.seen[dir].td_end = end; ct->proto.tcp.last_end = end; - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); pr_debug("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " "receiver end=%u maxend=%u maxwin=%u scale=%i\n", sender->td_end, sender->td_maxend, sender->td_maxwin, @@ -832,7 +846,7 @@ static int tcp_packet(struct nf_conn *ct, th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); BUG_ON(th == NULL); - write_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); old_state = ct->proto.tcp.state; dir = CTINFO2DIR(ctinfo); index = get_conntrack_index(th); @@ -862,7 +876,7 @@ static int tcp_packet(struct nf_conn *ct, && ct->proto.tcp.last_index == TCP_RST_SET)) { /* Attempt to reopen a closed/aborted connection. * Delete this connection and look up again. */ - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); /* Only repeat if we can actually remove the timer. * Destruction may already be in progress in process @@ -898,7 +912,7 @@ static int tcp_packet(struct nf_conn *ct, * that the client cannot but retransmit its SYN and * thus initiate a clean new session. */ - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); @@ -911,7 +925,7 @@ static int tcp_packet(struct nf_conn *ct, ct->proto.tcp.last_end = segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); @@ -920,7 +934,7 @@ static int tcp_packet(struct nf_conn *ct, /* Invalid packet */ pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); @@ -930,7 +944,7 @@ static int tcp_packet(struct nf_conn *ct, && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid RST "); @@ -961,7 +975,7 @@ static int tcp_packet(struct nf_conn *ct, if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, skb, dataoff, th, pf)) { - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } in_window: @@ -990,9 +1004,8 @@ static int tcp_packet(struct nf_conn *ct, timeout = nf_ct_tcp_timeout_unacknowledged; else timeout = tcp_timeouts[new_state]; - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); if (new_state != old_state) nf_conntrack_event_cache(IPCT_PROTOINFO, ct); @@ -1086,7 +1099,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.tcp.seen[1].td_end = 0; ct->proto.tcp.seen[1].td_maxend = 0; - ct->proto.tcp.seen[1].td_maxwin = 1; + ct->proto.tcp.seen[1].td_maxwin = 0; ct->proto.tcp.seen[1].td_scale = 0; /* tcp_packet will set them */ @@ -1108,12 +1121,12 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb, #include <linux/netfilter/nfnetlink_conntrack.h> static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, - const struct nf_conn *ct) + struct nf_conn *ct) { struct nlattr *nest_parms; struct nf_ct_tcp_flags tmp = {}; - read_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED); if (!nest_parms) goto nla_put_failure; @@ -1133,14 +1146,14 @@ static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, tmp.flags = ct->proto.tcp.seen[1].flags; NLA_PUT(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY, sizeof(struct nf_ct_tcp_flags), &tmp); - read_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); nla_nest_end(skb, nest_parms); return 0; nla_put_failure: - read_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return -1; } @@ -1171,7 +1184,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX) return -EINVAL; - write_lock_bh(&tcp_lock); + spin_lock_bh(&ct->lock); if (tb[CTA_PROTOINFO_TCP_STATE]) ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]); @@ -1198,7 +1211,7 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) ct->proto.tcp.seen[1].td_scale = nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]); } - write_unlock_bh(&tcp_lock); + spin_unlock_bh(&ct->lock); return 0; } @@ -1328,6 +1341,13 @@ static struct ctl_table tcp_compat_sysctl_table[] = { .proc_handler = proc_dointvec_jiffies, }, { + .procname = "ip_conntrack_tcp_timeout_syn_sent2", + .data = &tcp_timeouts[TCP_CONNTRACK_SYN_SENT2], + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { .procname = "ip_conntrack_tcp_timeout_syn_recv", .data = &tcp_timeouts[TCP_CONNTRACK_SYN_RECV], .maxlen = sizeof(unsigned int), diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index beb37311e1a5..2fefe147750a 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -248,14 +248,14 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, rcu_assign_pointer(nf_loggers[tindex], logger); mutex_unlock(&nf_log_mutex); } else { - rcu_read_lock(); - logger = rcu_dereference(nf_loggers[tindex]); + mutex_lock(&nf_log_mutex); + logger = nf_loggers[tindex]; if (!logger) table->data = "NONE"; else table->data = logger->name; r = proc_dostring(table, write, filp, buffer, lenp, ppos); - rcu_read_unlock(); + mutex_unlock(&nf_log_mutex); } return r; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 4f2310c93e01..3a6fd77f7761 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -204,10 +204,10 @@ int nf_queue(struct sk_buff *skb, queuenum); switch (pf) { - case AF_INET: + case NFPROTO_IPV4: skb->protocol = htons(ETH_P_IP); break; - case AF_INET6: + case NFPROTO_IPV6: skb->protocol = htons(ETH_P_IPV6); break; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b8ab37ad7ed5..92761a988375 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -107,9 +107,10 @@ int nfnetlink_has_listeners(unsigned int group) } EXPORT_SYMBOL_GPL(nfnetlink_has_listeners); -int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) +int nfnetlink_send(struct sk_buff *skb, u32 pid, + unsigned group, int echo, gfp_t flags) { - return nlmsg_notify(nfnl, skb, pid, group, echo, gfp_any()); + return nlmsg_notify(nfnl, skb, pid, group, echo, flags); } EXPORT_SYMBOL_GPL(nfnetlink_send); @@ -136,7 +137,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EPERM; /* All the messages must at least contain nfgenmsg */ - if (nlh->nlmsg_len < NLMSG_SPACE(sizeof(struct nfgenmsg))) + if (nlh->nlmsg_len < NLMSG_LENGTH(sizeof(struct nfgenmsg))) return 0; type = nlh->nlmsg_type; @@ -160,19 +161,14 @@ replay: { int min_len = NLMSG_SPACE(sizeof(struct nfgenmsg)); u_int8_t cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type); - u_int16_t attr_count = ss->cb[cb_id].attr_count; - struct nlattr *cda[attr_count+1]; - - if (likely(nlh->nlmsg_len >= min_len)) { - struct nlattr *attr = (void *)nlh + NLMSG_ALIGN(min_len); - int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len); - - err = nla_parse(cda, attr_count, attr, attrlen, - ss->cb[cb_id].policy); - if (err < 0) - return err; - } else - return -EINVAL; + struct nlattr *cda[ss->cb[cb_id].attr_count + 1]; + struct nlattr *attr = (void *)nlh + min_len; + int attrlen = nlh->nlmsg_len - min_len; + + err = nla_parse(cda, ss->cb[cb_id].attr_count, + attr, attrlen, ss->cb[cb_id].policy); + if (err < 0) + return err; err = nc->call(nfnl, skb, nlh, cda); if (err == -EAGAIN) diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8c860112ce05..71daa0934b6c 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1,6 +1,6 @@ /* * This is a module which is used for queueing packets and communicating with - * userspace via nfetlink. + * userspace via nfnetlink. * * (C) 2005 by Harald Welte <laforge@netfilter.org> * (C) 2007 by Patrick McHardy <kaber@trash.net> @@ -932,6 +932,8 @@ static void __exit nfnetlink_queue_fini(void) #endif nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); + + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_DESCRIPTION("netfilter packet queue handler"); diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 150e5cf62f85..025d1a0af78b 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -329,6 +329,32 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target, } EXPORT_SYMBOL_GPL(xt_find_revision); +static char *textify_hooks(char *buf, size_t size, unsigned int mask) +{ + static const char *const names[] = { + "PREROUTING", "INPUT", "FORWARD", + "OUTPUT", "POSTROUTING", "BROUTING", + }; + unsigned int i; + char *p = buf; + bool np = false; + int res; + + *p = '\0'; + for (i = 0; i < ARRAY_SIZE(names); ++i) { + if (!(mask & (1 << i))) + continue; + res = snprintf(p, size, "%s%s", np ? "/" : "", names[i]); + if (res > 0) { + size -= res; + p += res; + } + np = true; + } + + return buf; +} + int xt_check_match(struct xt_mtchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { @@ -338,26 +364,30 @@ int xt_check_match(struct xt_mtchk_param *par, * ebt_among is exempt from centralized matchsize checking * because it uses a dynamic-size data set. */ - printk("%s_tables: %s match: invalid size %Zu != %u\n", + pr_err("%s_tables: %s match: invalid size %Zu != %u\n", xt_prefix[par->family], par->match->name, XT_ALIGN(par->match->matchsize), size); return -EINVAL; } if (par->match->table != NULL && strcmp(par->match->table, par->table) != 0) { - printk("%s_tables: %s match: only valid in %s table, not %s\n", + pr_err("%s_tables: %s match: only valid in %s table, not %s\n", xt_prefix[par->family], par->match->name, par->match->table, par->table); return -EINVAL; } if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { - printk("%s_tables: %s match: bad hook_mask %#x/%#x\n", + char used[64], allow[64]; + + pr_err("%s_tables: %s match: used from hooks %s, but only " + "valid from %s\n", xt_prefix[par->family], par->match->name, - par->hook_mask, par->match->hooks); + textify_hooks(used, sizeof(used), par->hook_mask), + textify_hooks(allow, sizeof(allow), par->match->hooks)); return -EINVAL; } if (par->match->proto && (par->match->proto != proto || inv_proto)) { - printk("%s_tables: %s match: only valid for protocol %u\n", + pr_err("%s_tables: %s match: only valid for protocol %u\n", xt_prefix[par->family], par->match->name, par->match->proto); return -EINVAL; @@ -484,26 +514,30 @@ int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { if (XT_ALIGN(par->target->targetsize) != size) { - printk("%s_tables: %s target: invalid size %Zu != %u\n", + pr_err("%s_tables: %s target: invalid size %Zu != %u\n", xt_prefix[par->family], par->target->name, XT_ALIGN(par->target->targetsize), size); return -EINVAL; } if (par->target->table != NULL && strcmp(par->target->table, par->table) != 0) { - printk("%s_tables: %s target: only valid in %s table, not %s\n", + pr_err("%s_tables: %s target: only valid in %s table, not %s\n", xt_prefix[par->family], par->target->name, par->target->table, par->table); return -EINVAL; } if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { - printk("%s_tables: %s target: bad hook_mask %#x/%#x\n", + char used[64], allow[64]; + + pr_err("%s_tables: %s target: used from hooks %s, but only " + "usable from %s\n", xt_prefix[par->family], par->target->name, - par->hook_mask, par->target->hooks); + textify_hooks(used, sizeof(used), par->hook_mask), + textify_hooks(allow, sizeof(allow), par->target->hooks)); return -EINVAL; } if (par->target->proto && (par->target->proto != proto || inv_proto)) { - printk("%s_tables: %s target: only valid for protocol %u\n", + pr_err("%s_tables: %s target: only valid for protocol %u\n", xt_prefix[par->family], par->target->name, par->target->proto); return -EINVAL; diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index f9977b3311f7..498b45101df7 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -11,6 +11,10 @@ #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/jhash.h> + #include <linux/netfilter.h> #include <linux/netfilter_arp.h> #include <linux/netfilter/x_tables.h> @@ -23,6 +27,8 @@ MODULE_ALIAS("ipt_NFQUEUE"); MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); +static u32 jhash_initval __read_mostly; + static unsigned int nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) { @@ -31,32 +37,105 @@ nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) return NF_QUEUE_NR(tinfo->queuenum); } +static u32 hash_v4(const struct sk_buff *skb) +{ + const struct iphdr *iph = ip_hdr(skb); + u32 ipaddr; + + /* packets in either direction go into same queue */ + ipaddr = iph->saddr ^ iph->daddr; + + return jhash_2words(ipaddr, iph->protocol, jhash_initval); +} + +static unsigned int +nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) + queue = hash_v4(skb) % info->queues_total + queue; + return NF_QUEUE_NR(queue); +} + +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +static u32 hash_v6(const struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + u32 addr[4]; + + addr[0] = ip6h->saddr.s6_addr32[0] ^ ip6h->daddr.s6_addr32[0]; + addr[1] = ip6h->saddr.s6_addr32[1] ^ ip6h->daddr.s6_addr32[1]; + addr[2] = ip6h->saddr.s6_addr32[2] ^ ip6h->daddr.s6_addr32[2]; + addr[3] = ip6h->saddr.s6_addr32[3] ^ ip6h->daddr.s6_addr32[3]; + + return jhash2(addr, ARRAY_SIZE(addr), jhash_initval); +} + +static unsigned int +nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 queue = info->queuenum; + + if (info->queues_total > 1) + queue = hash_v6(skb) % info->queues_total + queue; + return NF_QUEUE_NR(queue); +} +#endif + +static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par) +{ + const struct xt_NFQ_info_v1 *info = par->targinfo; + u32 maxid; + + if (info->queues_total == 0) { + pr_err("NFQUEUE: number of total queues is 0\n"); + return false; + } + maxid = info->queues_total - 1 + info->queuenum; + if (maxid > 0xffff) { + pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n", + info->queues_total, maxid); + return false; + } + return true; +} + static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", - .family = NFPROTO_IPV4, + .family = NFPROTO_UNSPEC, .target = nfqueue_tg, .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, }, { .name = "NFQUEUE", - .family = NFPROTO_IPV6, - .target = nfqueue_tg, - .targetsize = sizeof(struct xt_NFQ_info), + .revision = 1, + .family = NFPROTO_IPV4, + .checkentry = nfqueue_tg_v1_check, + .target = nfqueue_tg4_v1, + .targetsize = sizeof(struct xt_NFQ_info_v1), .me = THIS_MODULE, }, +#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) { .name = "NFQUEUE", - .family = NFPROTO_ARP, - .target = nfqueue_tg, - .targetsize = sizeof(struct xt_NFQ_info), + .revision = 1, + .family = NFPROTO_IPV6, + .checkentry = nfqueue_tg_v1_check, + .target = nfqueue_tg6_v1, + .targetsize = sizeof(struct xt_NFQ_info_v1), .me = THIS_MODULE, }, +#endif }; static int __init nfqueue_tg_init(void) { + get_random_bytes(&jhash_initval, sizeof(jhash_initval)); return xt_register_targets(nfqueue_tg_reg, ARRAY_SIZE(nfqueue_tg_reg)); } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 4f3b1f808795..eda64c1cb1e5 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -73,11 +73,11 @@ tcpmss_mangle_packet(struct sk_buff *skb, } if (info->mss == XT_TCPMSS_CLAMP_PMTU) { - if (dst_mtu(skb->dst) <= minlen) { + if (dst_mtu(skb_dst(skb)) <= minlen) { if (net_ratelimit()) printk(KERN_ERR "xt_TCPMSS: " "unknown or invalid path-MTU (%u)\n", - dst_mtu(skb->dst)); + dst_mtu(skb_dst(skb))); return -1; } if (in_mtu <= minlen) { @@ -86,7 +86,7 @@ tcpmss_mangle_packet(struct sk_buff *skb, "invalid path-MTU (%u)\n", in_mtu); return -1; } - newmss = min(dst_mtu(skb->dst), in_mtu) - minlen; + newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen; } else newmss = info->mss; diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c new file mode 100644 index 000000000000..863e40977a4d --- /dev/null +++ b/net/netfilter/xt_osf.c @@ -0,0 +1,428 @@ +/* + * Copyright (c) 2003+ Evgeniy Polyakov <zbr@ioremap.net> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/module.h> +#include <linux/kernel.h> + +#include <linux/if.h> +#include <linux/inetdevice.h> +#include <linux/ip.h> +#include <linux/list.h> +#include <linux/rculist.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/tcp.h> + +#include <net/ip.h> +#include <net/tcp.h> + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_log.h> +#include <linux/netfilter/xt_osf.h> + +struct xt_osf_finger { + struct rcu_head rcu_head; + struct list_head finger_entry; + struct xt_osf_user_finger finger; +}; + +enum osf_fmatch_states { + /* Packet does not match the fingerprint */ + FMATCH_WRONG = 0, + /* Packet matches the fingerprint */ + FMATCH_OK, + /* Options do not match the fingerprint, but header does */ + FMATCH_OPT_WRONG, +}; + +/* + * Indexed by dont-fragment bit. + * It is the only constant value in the fingerprint. + */ +static struct list_head xt_osf_fingers[2]; + +static const struct nla_policy xt_osf_policy[OSF_ATTR_MAX + 1] = { + [OSF_ATTR_FINGER] = { .len = sizeof(struct xt_osf_user_finger) }, +}; + +static void xt_osf_finger_free_rcu(struct rcu_head *rcu_head) +{ + struct xt_osf_finger *f = container_of(rcu_head, struct xt_osf_finger, rcu_head); + + kfree(f); +} + +static int xt_osf_add_callback(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) +{ + struct xt_osf_user_finger *f; + struct xt_osf_finger *kf = NULL, *sf; + int err = 0; + + if (!osf_attrs[OSF_ATTR_FINGER]) + return -EINVAL; + + if (!(nlh->nlmsg_flags & NLM_F_CREATE)) + return -EINVAL; + + f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + + kf = kmalloc(sizeof(struct xt_osf_finger), GFP_KERNEL); + if (!kf) + return -ENOMEM; + + memcpy(&kf->finger, f, sizeof(struct xt_osf_user_finger)); + + list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) { + if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger))) + continue; + + kfree(kf); + kf = NULL; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + err = -EEXIST; + break; + } + + /* + * We are protected by nfnl mutex. + */ + if (kf) + list_add_tail_rcu(&kf->finger_entry, &xt_osf_fingers[!!f->df]); + + return err; +} + +static int xt_osf_remove_callback(struct sock *ctnl, struct sk_buff *skb, + struct nlmsghdr *nlh, struct nlattr *osf_attrs[]) +{ + struct xt_osf_user_finger *f; + struct xt_osf_finger *sf; + int err = ENOENT; + + if (!osf_attrs[OSF_ATTR_FINGER]) + return -EINVAL; + + f = nla_data(osf_attrs[OSF_ATTR_FINGER]); + + list_for_each_entry(sf, &xt_osf_fingers[!!f->df], finger_entry) { + if (memcmp(&sf->finger, f, sizeof(struct xt_osf_user_finger))) + continue; + + /* + * We are protected by nfnl mutex. + */ + list_del_rcu(&sf->finger_entry); + call_rcu(&sf->rcu_head, xt_osf_finger_free_rcu); + + err = 0; + break; + } + + return err; +} + +static const struct nfnl_callback xt_osf_nfnetlink_callbacks[OSF_MSG_MAX] = { + [OSF_MSG_ADD] = { + .call = xt_osf_add_callback, + .attr_count = OSF_ATTR_MAX, + .policy = xt_osf_policy, + }, + [OSF_MSG_REMOVE] = { + .call = xt_osf_remove_callback, + .attr_count = OSF_ATTR_MAX, + .policy = xt_osf_policy, + }, +}; + +static const struct nfnetlink_subsystem xt_osf_nfnetlink = { + .name = "osf", + .subsys_id = NFNL_SUBSYS_OSF, + .cb_count = OSF_MSG_MAX, + .cb = xt_osf_nfnetlink_callbacks, +}; + +static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info *info, + unsigned char f_ttl) +{ + const struct iphdr *ip = ip_hdr(skb); + + if (info->flags & XT_OSF_TTL) { + if (info->ttl == XT_OSF_TTL_TRUE) + return ip->ttl == f_ttl; + if (info->ttl == XT_OSF_TTL_NOCHECK) + return 1; + else if (ip->ttl <= f_ttl) + return 1; + else { + struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + int ret = 0; + + for_ifa(in_dev) { + if (inet_ifa_match(ip->saddr, ifa)) { + ret = (ip->ttl == f_ttl); + break; + } + } + endfor_ifa(in_dev); + + return ret; + } + } + + return ip->ttl == f_ttl; +} + +static bool xt_osf_match_packet(const struct sk_buff *skb, + const struct xt_match_param *p) +{ + const struct xt_osf_info *info = p->matchinfo; + const struct iphdr *ip = ip_hdr(skb); + const struct tcphdr *tcp; + struct tcphdr _tcph; + int fmatch = FMATCH_WRONG, fcount = 0; + unsigned int optsize = 0, check_WSS = 0; + u16 window, totlen, mss = 0; + bool df; + const unsigned char *optp = NULL, *_optp = NULL; + unsigned char opts[MAX_IPOPTLEN]; + const struct xt_osf_finger *kf; + const struct xt_osf_user_finger *f; + + if (!info) + return false; + + tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); + if (!tcp) + return false; + + if (!tcp->syn) + return false; + + totlen = ntohs(ip->tot_len); + df = ntohs(ip->frag_off) & IP_DF; + window = ntohs(tcp->window); + + if (tcp->doff * 4 > sizeof(struct tcphdr)) { + optsize = tcp->doff * 4 - sizeof(struct tcphdr); + + _optp = optp = skb_header_pointer(skb, ip_hdrlen(skb) + + sizeof(struct tcphdr), optsize, opts); + } + + rcu_read_lock(); + list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) { + f = &kf->finger; + + if (!(info->flags & XT_OSF_LOG) && strcmp(info->genre, f->genre)) + continue; + + optp = _optp; + fmatch = FMATCH_WRONG; + + if (totlen == f->ss && xt_osf_ttl(skb, info, f->ttl)) { + int foptsize, optnum; + + /* + * Should not happen if userspace parser was written correctly. + */ + if (f->wss.wc >= OSF_WSS_MAX) + continue; + + /* Check options */ + + foptsize = 0; + for (optnum = 0; optnum < f->opt_num; ++optnum) + foptsize += f->opt[optnum].length; + + if (foptsize > MAX_IPOPTLEN || + optsize > MAX_IPOPTLEN || + optsize != foptsize) + continue; + + check_WSS = f->wss.wc; + + for (optnum = 0; optnum < f->opt_num; ++optnum) { + if (f->opt[optnum].kind == (*optp)) { + __u32 len = f->opt[optnum].length; + const __u8 *optend = optp + len; + int loop_cont = 0; + + fmatch = FMATCH_OK; + + switch (*optp) { + case OSFOPT_MSS: + mss = optp[3]; + mss <<= 8; + mss |= optp[2]; + + mss = ntohs(mss); + break; + case OSFOPT_TS: + loop_cont = 1; + break; + } + + optp = optend; + } else + fmatch = FMATCH_OPT_WRONG; + + if (fmatch != FMATCH_OK) + break; + } + + if (fmatch != FMATCH_OPT_WRONG) { + fmatch = FMATCH_WRONG; + + switch (check_WSS) { + case OSF_WSS_PLAIN: + if (f->wss.val == 0 || window == f->wss.val) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MSS: + /* + * Some smart modems decrease mangle MSS to + * SMART_MSS_2, so we check standard, decreased + * and the one provided in the fingerprint MSS + * values. + */ +#define SMART_MSS_1 1460 +#define SMART_MSS_2 1448 + if (window == f->wss.val * mss || + window == f->wss.val * SMART_MSS_1 || + window == f->wss.val * SMART_MSS_2) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MTU: + if (window == f->wss.val * (mss + 40) || + window == f->wss.val * (SMART_MSS_1 + 40) || + window == f->wss.val * (SMART_MSS_2 + 40)) + fmatch = FMATCH_OK; + break; + case OSF_WSS_MODULO: + if ((window % f->wss.val) == 0) + fmatch = FMATCH_OK; + break; + } + } + + if (fmatch != FMATCH_OK) + continue; + + fcount++; + + if (info->flags & XT_OSF_LOG) + nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + "%s [%s:%s] : %pi4:%d -> %pi4:%d hops=%d\n", + f->genre, f->version, f->subtype, + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest), + f->ttl - ip->ttl); + + if ((info->flags & XT_OSF_LOG) && + info->loglevel == XT_OSF_LOGLEVEL_FIRST) + break; + } + } + rcu_read_unlock(); + + if (!fcount && (info->flags & XT_OSF_LOG)) + nf_log_packet(p->hooknum, 0, skb, p->in, p->out, NULL, + "Remote OS is not known: %pi4:%u -> %pi4:%u\n", + &ip->saddr, ntohs(tcp->source), + &ip->daddr, ntohs(tcp->dest)); + + if (fcount) + fmatch = FMATCH_OK; + + return fmatch == FMATCH_OK; +} + +static struct xt_match xt_osf_match = { + .name = "osf", + .revision = 0, + .family = NFPROTO_IPV4, + .proto = IPPROTO_TCP, + .hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_FORWARD), + .match = xt_osf_match_packet, + .matchsize = sizeof(struct xt_osf_info), + .me = THIS_MODULE, +}; + +static int __init xt_osf_init(void) +{ + int err = -EINVAL; + int i; + + for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i) + INIT_LIST_HEAD(&xt_osf_fingers[i]); + + err = nfnetlink_subsys_register(&xt_osf_nfnetlink); + if (err < 0) { + printk(KERN_ERR "Failed (%d) to register OSF nsfnetlink helper.\n", err); + goto err_out_exit; + } + + err = xt_register_match(&xt_osf_match); + if (err) { + printk(KERN_ERR "Failed (%d) to register OS fingerprint " + "matching module.\n", err); + goto err_out_remove; + } + + return 0; + +err_out_remove: + nfnetlink_subsys_unregister(&xt_osf_nfnetlink); +err_out_exit: + return err; +} + +static void __exit xt_osf_fini(void) +{ + struct xt_osf_finger *f; + int i; + + nfnetlink_subsys_unregister(&xt_osf_nfnetlink); + xt_unregister_match(&xt_osf_match); + + rcu_read_lock(); + for (i=0; i<ARRAY_SIZE(xt_osf_fingers); ++i) { + + list_for_each_entry_rcu(f, &xt_osf_fingers[i], finger_entry) { + list_del_rcu(&f->finger_entry); + call_rcu(&f->rcu_head, xt_osf_finger_free_rcu); + } + } + rcu_read_unlock(); + + rcu_barrier(); +} + +module_init(xt_osf_init); +module_exit(xt_osf_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>"); +MODULE_DESCRIPTION("Passive OS fingerprint matching."); +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF); diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 328bd20ddd25..4cbfebda8fa1 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -86,7 +86,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info, unsigned short family) { const struct xt_policy_elem *e; - const struct dst_entry *dst = skb->dst; + const struct dst_entry *dst = skb_dst(skb); int strict = info->flags & XT_POLICY_MATCH_STRICT; int i, pos; diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index 67419287bc7e..484d1689bfde 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -25,7 +25,7 @@ static bool realm_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const struct xt_realm_info *info = par->matchinfo; - const struct dst_entry *dst = skb->dst; + const struct dst_entry *dst = skb_dst(skb); return (info->id == (dst->tclassid & info->mask)) ^ info->invert; } diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 1acc089be7e9..ebf00ad5b194 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,6 +22,8 @@ #include <net/netfilter/nf_tproxy_core.h> #include <net/netfilter/ipv4/nf_defrag_ipv4.h> +#include <linux/netfilter/xt_socket.h> + #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #define XT_SOCKET_HAVE_CONNTRACK 1 #include <net/netfilter/nf_conntrack.h> @@ -86,7 +88,8 @@ extract_icmp_fields(const struct sk_buff *skb, static bool -socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) +socket_match(const struct sk_buff *skb, const struct xt_match_param *par, + const struct xt_socket_mtinfo1 *info) { const struct iphdr *iph = ip_hdr(skb); struct udphdr _hdr, *hp = NULL; @@ -141,10 +144,24 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, sport, dport, par->in, false); if (sk != NULL) { - bool wildcard = (sk->sk_state != TCP_TIME_WAIT && inet_sk(sk)->rcv_saddr == 0); + bool wildcard; + bool transparent = true; + + /* Ignore sockets listening on INADDR_ANY */ + wildcard = (sk->sk_state != TCP_TIME_WAIT && + inet_sk(sk)->rcv_saddr == 0); + + /* Ignore non-transparent sockets, + if XT_SOCKET_TRANSPARENT is used */ + if (info && info->flags & XT_SOCKET_TRANSPARENT) + transparent = ((sk->sk_state != TCP_TIME_WAIT && + inet_sk(sk)->transparent) || + (sk->sk_state == TCP_TIME_WAIT && + inet_twsk(sk)->tw_transparent)); nf_tproxy_put_sock(sk); - if (wildcard) + + if (wildcard || !transparent) sk = NULL; } @@ -157,23 +174,47 @@ socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) return (sk != NULL); } -static struct xt_match socket_mt_reg __read_mostly = { - .name = "socket", - .family = AF_INET, - .match = socket_mt, - .hooks = 1 << NF_INET_PRE_ROUTING, - .me = THIS_MODULE, +static bool +socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) +{ + return socket_match(skb, par, NULL); +} + +static bool +socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) +{ + return socket_match(skb, par, par->matchinfo); +} + +static struct xt_match socket_mt_reg[] __read_mostly = { + { + .name = "socket", + .revision = 0, + .family = NFPROTO_IPV4, + .match = socket_mt_v0, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, + { + .name = "socket", + .revision = 1, + .family = NFPROTO_IPV4, + .match = socket_mt_v1, + .matchsize = sizeof(struct xt_socket_mtinfo1), + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, + }, }; static int __init socket_mt_init(void) { nf_defrag_ipv4_enable(); - return xt_register_match(&socket_mt_reg); + return xt_register_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } static void __exit socket_mt_exit(void) { - xt_unregister_match(&socket_mt_reg); + xt_unregister_matches(socket_mt_reg, ARRAY_SIZE(socket_mt_reg)); } module_init(socket_mt_init); |