From be2cef49904b34dd5f75d96bbc8cd8341bab1bc0 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Fri, 3 Jun 2016 17:56:50 +0200 Subject: ipvs: count pre-established TCP states as active Some users observed that "least connection" distribution algorithm doesn't handle well bursts of TCP connections from reconnecting clients after a node or network failure. This is because the algorithm counts active connection as worth 256 inactive ones where for TCP, "active" only means TCP connections in ESTABLISHED state. In case of a connection burst, new connections are handled before previous ones have finished the three way handshaking so that all are still counted as "inactive", i.e. cheap ones. The become "active" quickly but at that time, all of them are already assigned to one real server (or few), resulting in highly unbalanced distribution. Address this by counting the "pre-established" states as "active". Signed-off-by: Michal Kubecek Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- net/netfilter/ipvs/ip_vs_proto_tcp.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index d7024b2ed769..5117bcb7d2f0 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -395,6 +395,20 @@ static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = { [IP_VS_TCP_S_LAST] = "BUG!", }; +static const bool tcp_state_active_table[IP_VS_TCP_S_LAST] = { + [IP_VS_TCP_S_NONE] = false, + [IP_VS_TCP_S_ESTABLISHED] = true, + [IP_VS_TCP_S_SYN_SENT] = true, + [IP_VS_TCP_S_SYN_RECV] = true, + [IP_VS_TCP_S_FIN_WAIT] = false, + [IP_VS_TCP_S_TIME_WAIT] = false, + [IP_VS_TCP_S_CLOSE] = false, + [IP_VS_TCP_S_CLOSE_WAIT] = false, + [IP_VS_TCP_S_LAST_ACK] = false, + [IP_VS_TCP_S_LISTEN] = false, + [IP_VS_TCP_S_SYNACK] = true, +}; + #define sNO IP_VS_TCP_S_NONE #define sES IP_VS_TCP_S_ESTABLISHED #define sSS IP_VS_TCP_S_SYN_SENT @@ -418,6 +432,13 @@ static const char * tcp_state_name(int state) return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?"; } +static bool tcp_state_active(int state) +{ + if (state >= IP_VS_TCP_S_LAST) + return false; + return tcp_state_active_table[state]; +} + static struct tcp_states_t tcp_states [] = { /* INPUT */ /* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */ @@ -540,12 +561,12 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp, if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && - (new_state != IP_VS_TCP_S_ESTABLISHED)) { + !tcp_state_active(new_state)) { atomic_dec(&dest->activeconns); atomic_inc(&dest->inactconns); cp->flags |= IP_VS_CONN_F_INACTIVE; } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) && - (new_state == IP_VS_TCP_S_ESTABLISHED)) { + tcp_state_active(new_state)) { atomic_inc(&dest->activeconns); atomic_dec(&dest->inactconns); cp->flags &= ~IP_VS_CONN_F_INACTIVE; -- cgit v1.2.3-59-g8ed1b From 64b87639c9cbeb03e26bc65528416c961b1dde96 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 3 Jul 2016 13:18:43 +0800 Subject: netfilter: conntrack: fix race between nf_conntrack proc read and hash resize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we do "cat /proc/net/nf_conntrack", and meanwhile resize the conntrack hash table via /sys/module/nf_conntrack/parameters/hashsize, race will happen, because reader can observe a newly allocated hash but the old size (or vice versa). So oops will happen like follows: BUG: unable to handle kernel NULL pointer dereference at 0000000000000017 IP: [] seq_print_acct+0x11/0x50 [nf_conntrack] Call Trace: [] ? ct_seq_show+0x14e/0x340 [nf_conntrack] [] seq_read+0x2cc/0x390 [] proc_reg_read+0x42/0x70 [] __vfs_read+0x37/0x130 [] ? security_file_permission+0xa0/0xc0 [] vfs_read+0x95/0x140 [] SyS_read+0x55/0xc0 [] entry_SYSCALL_64_fastpath+0x1a/0xa4 It is very easy to reproduce this kernel crash. 1. open one shell and input the following cmds: while : ; do echo $RANDOM > /sys/module/nf_conntrack/parameters/hashsize done 2. open more shells and input the following cmds: while : ; do cat /proc/net/nf_conntrack done 3. just wait a monent, oops will happen soon. The solution in this patch is based on Florian's Commit 5e3c61f98175 ("netfilter: conntrack: fix lookup race during hash resize"). And add a wrapper function nf_conntrack_get_ht to get hash and hsize suggested by Florian Westphal. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_core.h | 2 ++ net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 14 ++++++++++---- net/netfilter/nf_conntrack_core.c | 17 +++++++++++++++++ net/netfilter/nf_conntrack_standalone.c | 14 +++++++++----- 4 files changed, 38 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 3e2f3328945c..79d7ac5c9740 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -51,6 +51,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto); +void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize); + /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index c6f3c406f707..63923710f325 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -26,6 +26,8 @@ struct ct_iter_state { struct seq_net_private p; + struct hlist_nulls_head *hash; + unsigned int htable_size; unsigned int bucket; }; @@ -35,10 +37,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < st->htable_size; st->bucket++) { n = rcu_dereference( - hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); + hlist_nulls_first_rcu(&st->hash[st->bucket])); if (!is_a_nulls(n)) return n; } @@ -53,11 +55,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(hlist_nulls_next_rcu(head)); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= st->htable_size) return NULL; } head = rcu_dereference( - hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); + hlist_nulls_first_rcu(&st->hash[st->bucket])); } return head; } @@ -75,7 +77,11 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) static void *ct_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { + struct ct_iter_state *st = seq->private; + rcu_read_lock(); + + nf_conntrack_get_ht(&st->hash, &st->htable_size); return ct_get_idx(seq, *pos); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 153e33ffeeaa..1289e7e5e0de 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -460,6 +460,23 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } +/* must be called with rcu read lock held */ +void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) +{ + struct hlist_nulls_head *hptr; + unsigned int sequence, hsz; + + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hsz = nf_conntrack_htable_size; + hptr = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + *hash = hptr; + *hsize = hsz; +} +EXPORT_SYMBOL_GPL(nf_conntrack_get_ht); + /* * Warning : * - Caller must take a reference on returned object diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 2aaa188ee961..958a1455ca7f 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(print_tuple); struct ct_iter_state { struct seq_net_private p; + struct hlist_nulls_head *hash; + unsigned int htable_size; unsigned int bucket; u_int64_t time_now; }; @@ -58,9 +60,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) struct hlist_nulls_node *n; for (st->bucket = 0; - st->bucket < nf_conntrack_htable_size; + st->bucket < st->htable_size; st->bucket++) { - n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket])); + n = rcu_dereference( + hlist_nulls_first_rcu(&st->hash[st->bucket])); if (!is_a_nulls(n)) return n; } @@ -75,12 +78,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, head = rcu_dereference(hlist_nulls_next_rcu(head)); while (is_a_nulls(head)) { if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= nf_conntrack_htable_size) + if (++st->bucket >= st->htable_size) return NULL; } head = rcu_dereference( - hlist_nulls_first_rcu( - &nf_conntrack_hash[st->bucket])); + hlist_nulls_first_rcu(&st->hash[st->bucket])); } return head; } @@ -102,6 +104,8 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos) st->time_now = ktime_get_real_ns(); rcu_read_lock(); + + nf_conntrack_get_ht(&st->hash, &st->htable_size); return ct_get_idx(seq, *pos); } -- cgit v1.2.3-59-g8ed1b From 474803d37e7fb6291d22cb964014afe457ba5212 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 3 Jul 2016 13:18:44 +0800 Subject: netfilter: cttimeout: unlink timeout obj again when hash resize happen Imagine such situation, nf_conntrack_htable_size now is 4096, we are doing ctnl_untimeout, and iterate on 3000# bucket. Meanwhile, another user try to reduce hash size to 2048, then all nf_conn are removed to the new hashtable. When this hash resize operation finished, we still try to itreate ct begin from 3000# bucket, find nothing to do and just return. We may miss unlinking some timeout objects. And later we will end up with invalid references to timeout object that are already gone. So when we find that hash resize happened, try to unlink timeout objects from the 0# bucket again. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_cttimeout.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 3c84f14326f5..4cdcd969b64c 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -303,16 +303,24 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) { struct nf_conntrack_tuple_hash *h; const struct hlist_nulls_node *nn; + unsigned int last_hsize; + spinlock_t *lock; int i; local_bh_disable(); - for (i = 0; i < nf_conntrack_htable_size; i++) { - nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < nf_conntrack_htable_size) { - hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) - untimeout(h, timeout); +restart: + last_hsize = nf_conntrack_htable_size; + for (i = 0; i < last_hsize; i++) { + lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS]; + nf_conntrack_lock(lock); + if (last_hsize != nf_conntrack_htable_size) { + spin_unlock(lock); + goto restart; } - spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); + + hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) + untimeout(h, timeout); + spin_unlock(lock); } local_bh_enable(); } -- cgit v1.2.3-59-g8ed1b From 8786a9716d028083f56f944996883f7d1a05919e Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sun, 3 Jul 2016 13:18:45 +0800 Subject: netfilter: nf_ct_helper: unlink helper again when hash resize happen From: Liping Zhang Similar to ctnl_untimeout, when hash resize happened, we should try to do unhelp from the 0# bucket again. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3a1a88b9bafa..a4294e949cdc 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -409,6 +409,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) struct nf_conntrack_expect *exp; const struct hlist_node *next; const struct hlist_nulls_node *nn; + unsigned int last_hsize; + spinlock_t *lock; struct net *net; unsigned int i; @@ -446,13 +448,18 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) rtnl_unlock(); local_bh_disable(); - for (i = 0; i < nf_conntrack_htable_size; i++) { - nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); - if (i < nf_conntrack_htable_size) { - hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) - unhelp(h, me); +restart: + last_hsize = nf_conntrack_htable_size; + for (i = 0; i < last_hsize; i++) { + lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS]; + nf_conntrack_lock(lock); + if (last_hsize != nf_conntrack_htable_size) { + spin_unlock(lock); + goto restart; } - spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); + hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode) + unhelp(h, me); + spin_unlock(lock); } local_bh_enable(); } -- cgit v1.2.3-59-g8ed1b From 242922a027176cd260c5adce4ba6bbfa3a05190c Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 3 Jul 2016 20:44:01 +0200 Subject: netfilter: conntrack: simplify early_drop We don't need to acquire the bucket lock during early drop, we can use lockless traveral just like ____nf_conntrack_find. The timer deletion serves as synchronization point, if another cpu attempts to evict same entry, only one will succeed with timer deletion. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 1 + net/netfilter/nf_conntrack_core.c | 95 ++++++++++++++++++------------------ 2 files changed, 48 insertions(+), 48 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 5d3397f34583..2a5133e214c9 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -301,6 +301,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl); #define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count) #define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count) +#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v)) #define MODULE_ALIAS_NFCT_HELPER(helper) \ MODULE_ALIAS("nfct-helper-" helper) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 1289e7e5e0de..e0e9c9a0f5ba 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -834,67 +834,66 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static noinline int early_drop(struct net *net, unsigned int _hash) +static unsigned int early_drop_list(struct net *net, + struct hlist_nulls_head *head) { - /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; - struct nf_conn *tmp; struct hlist_nulls_node *n; - unsigned int i, hash, sequence; - struct nf_conn *ct = NULL; - spinlock_t *lockp; - bool ret = false; + unsigned int drops = 0; + struct nf_conn *tmp; - i = 0; + hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { + tmp = nf_ct_tuplehash_to_ctrack(h); - local_bh_disable(); -restart: - sequence = read_seqcount_begin(&nf_conntrack_generation); - for (; i < NF_CT_EVICTION_RANGE; i++) { - hash = scale_hash(_hash++); - lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS]; - nf_conntrack_lock(lockp); - if (read_seqcount_retry(&nf_conntrack_generation, sequence)) { - spin_unlock(lockp); - goto restart; - } - hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], - hnnode) { - tmp = nf_ct_tuplehash_to_ctrack(h); - - if (test_bit(IPS_ASSURED_BIT, &tmp->status) || - !net_eq(nf_ct_net(tmp), net) || - nf_ct_is_dying(tmp)) - continue; - - if (atomic_inc_not_zero(&tmp->ct_general.use)) { - ct = tmp; - break; - } - } + if (test_bit(IPS_ASSURED_BIT, &tmp->status) || + !net_eq(nf_ct_net(tmp), net) || + nf_ct_is_dying(tmp)) + continue; - spin_unlock(lockp); - if (ct) - break; + if (!atomic_inc_not_zero(&tmp->ct_general.use)) + continue; + + /* kill only if still in same netns -- might have moved due to + * SLAB_DESTROY_BY_RCU rules. + * + * We steal the timer reference. If that fails timer has + * already fired or someone else deleted it. Just drop ref + * and move to next entry. + */ + if (net_eq(nf_ct_net(tmp), net) && + nf_ct_is_confirmed(tmp) && + del_timer(&tmp->timeout) && + nf_ct_delete(tmp, 0, 0)) + drops++; + + nf_ct_put(tmp); } - local_bh_enable(); + return drops; +} - if (!ct) - return false; +static noinline int early_drop(struct net *net, unsigned int _hash) +{ + unsigned int i; - /* kill only if in same netns -- might have moved due to - * SLAB_DESTROY_BY_RCU rules - */ - if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) { - if (nf_ct_delete(ct, 0, 0)) { - NF_CT_STAT_INC_ATOMIC(net, early_drop); - ret = true; + for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { + struct hlist_nulls_head *ct_hash; + unsigned hash, sequence, drops; + + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hash = scale_hash(_hash++); + ct_hash = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + drops = early_drop_list(net, &ct_hash[hash]); + if (drops) { + NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops); + return true; } } - nf_ct_put(ct); - return ret; + return false; } static struct nf_conn * -- cgit v1.2.3-59-g8ed1b From 7c9664351980aaa6a4b8837a314360b3a4ad382a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Jul 2016 12:07:23 +0200 Subject: netfilter: move nat hlist_head to nf_conn The nat extension structure is 32bytes in size on x86_64: struct nf_conn_nat { struct hlist_node bysource; /* 0 16 */ struct nf_conn * ct; /* 16 8 */ union nf_conntrack_nat_help help; /* 24 4 */ int masq_index; /* 28 4 */ /* size: 32, cachelines: 1, members: 4 */ /* last cacheline: 32 bytes */ }; The hlist is needed to quickly check for possible tuple collisions when installing a new nat binding. Storing this in the extension area has two drawbacks: 1. We need ct backpointer to get the conntrack struct from the extension. 2. When reallocation of extension area occurs we need to fixup the bysource hash head via hlist_replace_rcu. We can avoid both by placing the hlist_head in nf_conn and place nf_conn in the bysource hash rather than the extenstion. We can also remove the ->move support; no other extension needs it. Moving the entire nat extension into nf_conn would be possible as well but then we have to add yet another callback for deletion from the bysource hash table rather than just using nat extension ->destroy hook for this. nf_conn size doesn't increase due to aligment, followup patch replaces hlist_node with single pointer. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +++ include/net/netfilter/nf_conntrack_extend.h | 3 --- include/net/netfilter/nf_nat.h | 2 -- net/netfilter/nf_conntrack_extend.c | 15 ++----------- net/netfilter/nf_nat_core.c | 33 ++++++----------------------- 5 files changed, 12 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 2a5133e214c9..e5135d8728b4 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -117,6 +117,9 @@ struct nf_conn { /* Extensions */ struct nf_ct_ext *ext; +#if IS_ENABLED(CONFIG_NF_NAT) + struct hlist_node nat_bysource; +#endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; }; diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h index b925395fa5ed..1c3035dda31f 100644 --- a/include/net/netfilter/nf_conntrack_extend.h +++ b/include/net/netfilter/nf_conntrack_extend.h @@ -99,9 +99,6 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, struct nf_ct_ext_type { /* Destroys relationships (can be NULL). */ void (*destroy)(struct nf_conn *ct); - /* Called when realloacted (can be NULL). - Contents has already been moved. */ - void (*move)(void *new, void *old); enum nf_ct_ext_id id; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 344b1ab19220..02515f7ed4cc 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -29,8 +29,6 @@ struct nf_conn; /* The structure embedded in the conntrack structure. */ struct nf_conn_nat { - struct hlist_node bysource; - struct nf_conn *ct; union nf_conntrack_nat_help help; #if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \ IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6) diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 1a9545965c0d..02bcf00c2492 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -73,7 +73,7 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, size_t var_alloc_len, gfp_t gfp) { struct nf_ct_ext *old, *new; - int i, newlen, newoff; + int newlen, newoff; struct nf_ct_ext_type *t; /* Conntrack must not be confirmed to avoid races on reallocation. */ @@ -99,19 +99,8 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id, return NULL; if (new != old) { - for (i = 0; i < NF_CT_EXT_NUM; i++) { - if (!__nf_ct_ext_exist(old, i)) - continue; - - rcu_read_lock(); - t = rcu_dereference(nf_ct_ext_types[i]); - if (t && t->move) - t->move((void *)new + new->offset[i], - (void *)old + old->offset[i]); - rcu_read_unlock(); - } kfree_rcu(old, rcu); - ct->ext = new; + rcu_assign_pointer(ct->ext, new); } new->offset[id] = newoff; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 6877a396f8fc..692534701426 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -198,11 +198,9 @@ find_appropriate_src(struct net *net, const struct nf_nat_range *range) { unsigned int h = hash_by_src(net, tuple); - const struct nf_conn_nat *nat; const struct nf_conn *ct; - hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) { - ct = nat->ct; + hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) { if (same_src(ct, tuple) && net_eq(net, nf_ct_net(ct)) && nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { @@ -435,8 +433,7 @@ nf_nat_setup_info(struct nf_conn *ct, spin_lock_bh(&nf_nat_lock); /* nf_conntrack_alter_reply might re-allocate extension aera */ nat = nfct_nat(ct); - nat->ct = ct; - hlist_add_head_rcu(&nat->bysource, + hlist_add_head_rcu(&ct->nat_bysource, &nf_nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -543,7 +540,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) if (nf_nat_proto_remove(ct, data)) return 1; - if (!nat || !nat->ct) + if (!nat) return 0; /* This netns is being destroyed, and conntrack has nat null binding. @@ -556,9 +553,8 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) return 1; spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); + hlist_del_rcu(&ct->nat_bysource); ct->status &= ~IPS_NAT_DONE_MASK; - nat->ct = NULL; spin_unlock_bh(&nf_nat_lock); add_timer(&ct->timeout); @@ -688,27 +684,13 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT); - if (nat == NULL || nat->ct == NULL) + if (!nat) return; - NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE); - - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&nat->bysource); - spin_unlock_bh(&nf_nat_lock); -} - -static void nf_nat_move_storage(void *new, void *old) -{ - struct nf_conn_nat *new_nat = new; - struct nf_conn_nat *old_nat = old; - struct nf_conn *ct = old_nat->ct; - - if (!ct || !(ct->status & IPS_SRC_NAT_DONE)) - return; + NF_CT_ASSERT(ct->status & IPS_SRC_NAT_DONE); spin_lock_bh(&nf_nat_lock); - hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource); + hlist_del_rcu(&ct->nat_bysource); spin_unlock_bh(&nf_nat_lock); } @@ -716,7 +698,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .len = sizeof(struct nf_conn_nat), .align = __alignof__(struct nf_conn_nat), .destroy = nf_nat_cleanup_conntrack, - .move = nf_nat_move_storage, .id = NF_CT_EXT_NAT, .flags = NF_CT_EXT_F_PREALLOC, }; -- cgit v1.2.3-59-g8ed1b From 870190a9ec9075205c0fa795a09fa931694a3ff1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 5 Jul 2016 12:07:24 +0200 Subject: netfilter: nat: convert nat bysrc hash to rhashtable It did use a fixed-size bucket list plus single lock to protect add/del. Unlike the main conntrack table we only need to add and remove keys. Convert it to rhashtable to get table autosizing and per-bucket locking. The maximum number of entries is -- as before -- tied to the number of conntracks so we do not need another upperlimit. The change does not handle rhashtable_remove_fast error, only possible "error" is -ENOENT, and that is something that can happen legitimetely, e.g. because nat module was inserted at a later time and no src manip took place yet. Tested with http-client-benchmark + httpterm with DNAT and SNAT rules in place. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack.h | 3 +- include/net/netfilter/nf_nat.h | 1 + net/netfilter/nf_nat_core.c | 126 +++++++++++++++++++---------------- 3 files changed, 71 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index e5135d8728b4..a08825b7e955 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -118,7 +119,7 @@ struct nf_conn { struct nf_ct_ext *ext; #if IS_ENABLED(CONFIG_NF_NAT) - struct hlist_node nat_bysource; + struct rhash_head nat_bysource; #endif /* Storage reserved for other modules, must be the last member */ union nf_conntrack_proto proto; diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 02515f7ed4cc..c327a431a6f3 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -1,5 +1,6 @@ #ifndef _NF_NAT_H #define _NF_NAT_H +#include #include #include #include diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 692534701426..de31818417b8 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -30,17 +30,19 @@ #include #include -static DEFINE_SPINLOCK(nf_nat_lock); - static DEFINE_MUTEX(nf_nat_proto_mutex); static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] __read_mostly; static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] __read_mostly; -static struct hlist_head *nf_nat_bysource __read_mostly; -static unsigned int nf_nat_htable_size __read_mostly; -static unsigned int nf_nat_hash_rnd __read_mostly; +struct nf_nat_conn_key { + const struct net *net; + const struct nf_conntrack_tuple *tuple; + const struct nf_conntrack_zone *zone; +}; + +static struct rhashtable nf_nat_bysource_table; inline const struct nf_nat_l3proto * __nf_nat_l3proto_find(u8 family) @@ -119,19 +121,17 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) EXPORT_SYMBOL(nf_xfrm_me_harder); #endif /* CONFIG_XFRM */ -/* We keep an extra hash for each conntrack, for fast searching. */ -static inline unsigned int -hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) +static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed) { - unsigned int hash; - - get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd)); + const struct nf_conntrack_tuple *t; + const struct nf_conn *ct = data; + t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; /* Original src, to ensure we map it consistently if poss. */ - hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), - tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n)); - return reciprocal_scale(hash, nf_nat_htable_size); + seed ^= net_hash_mix(nf_ct_net(ct)); + return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32), + t->dst.protonum ^ seed); } /* Is this tuple already taken? (not by us) */ @@ -187,6 +187,26 @@ same_src(const struct nf_conn *ct, t->src.u.all == tuple->src.u.all); } +static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg, + const void *obj) +{ + const struct nf_nat_conn_key *key = arg->key; + const struct nf_conn *ct = obj; + + return same_src(ct, key->tuple) && + net_eq(nf_ct_net(ct), key->net) && + nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL); +} + +static struct rhashtable_params nf_nat_bysource_params = { + .head_offset = offsetof(struct nf_conn, nat_bysource), + .obj_hashfn = nf_nat_bysource_hash, + .obj_cmpfn = nf_nat_bysource_cmp, + .nelem_hint = 256, + .min_size = 1024, + .nulls_base = (1U << RHT_BASE_SHIFT), +}; + /* Only called for SRC manip */ static int find_appropriate_src(struct net *net, @@ -197,23 +217,23 @@ find_appropriate_src(struct net *net, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { - unsigned int h = hash_by_src(net, tuple); const struct nf_conn *ct; + struct nf_nat_conn_key key = { + .net = net, + .tuple = tuple, + .zone = zone + }; - hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) { - if (same_src(ct, tuple) && - net_eq(net, nf_ct_net(ct)) && - nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { - /* Copy source part from reply tuple. */ - nf_ct_invert_tuplepr(result, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - result->dst = tuple->dst; - - if (in_range(l3proto, l4proto, result, range)) - return 1; - } - } - return 0; + ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key, + nf_nat_bysource_params); + if (!ct) + return 0; + + nf_ct_invert_tuplepr(result, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + result->dst = tuple->dst; + + return in_range(l3proto, l4proto, result, range); } /* For [FUTURE] fragmentation handling, we want the least-used @@ -385,7 +405,6 @@ nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { - struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; @@ -426,16 +445,13 @@ nf_nat_setup_info(struct nf_conn *ct, } if (maniptype == NF_NAT_MANIP_SRC) { - unsigned int srchash; - - srchash = hash_by_src(net, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - spin_lock_bh(&nf_nat_lock); - /* nf_conntrack_alter_reply might re-allocate extension aera */ - nat = nfct_nat(ct); - hlist_add_head_rcu(&ct->nat_bysource, - &nf_nat_bysource[srchash]); - spin_unlock_bh(&nf_nat_lock); + int err; + + err = rhashtable_insert_fast(&nf_nat_bysource_table, + &ct->nat_bysource, + nf_nat_bysource_params); + if (err) + return NF_DROP; } /* It's done. */ @@ -552,10 +568,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) if (!del_timer(&ct->timeout)) return 1; - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&ct->nat_bysource); ct->status &= ~IPS_NAT_DONE_MASK; - spin_unlock_bh(&nf_nat_lock); + + rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, + nf_nat_bysource_params); add_timer(&ct->timeout); @@ -687,11 +703,8 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct) if (!nat) return; - NF_CT_ASSERT(ct->status & IPS_SRC_NAT_DONE); - - spin_lock_bh(&nf_nat_lock); - hlist_del_rcu(&ct->nat_bysource); - spin_unlock_bh(&nf_nat_lock); + rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, + nf_nat_bysource_params); } static struct nf_ct_ext_type nat_extend __read_mostly = { @@ -826,16 +839,13 @@ static int __init nf_nat_init(void) { int ret; - /* Leave them the same for the moment. */ - nf_nat_htable_size = nf_conntrack_htable_size; - - nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); - if (!nf_nat_bysource) - return -ENOMEM; + ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params); + if (ret) + return ret; ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { - nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); + rhashtable_destroy(&nf_nat_bysource_table); printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); return ret; } @@ -859,7 +869,7 @@ static int __init nf_nat_init(void) return 0; cleanup_extend: - nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); + rhashtable_destroy(&nf_nat_bysource_table); nf_ct_extend_unregister(&nat_extend); return ret; } @@ -877,8 +887,8 @@ static void __exit nf_nat_cleanup(void) #endif for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); - synchronize_net(); - nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); + + rhashtable_destroy(&nf_nat_bysource_table); } MODULE_LICENSE("GPL"); -- cgit v1.2.3-59-g8ed1b From 47c74456257d2e50ace8c473d358cf4b9c0a912f Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 5 Jul 2016 20:55:36 +0800 Subject: netfilter: physdev: physdev-is-out should not work with OUTPUT chain physdev_mt() will check skb->nf_bridge first, which was alloced in br_nf_pre_routing. So if we want to use --physdev-out and physdev-is-out, we need to match it in FORWARD or POSTROUTING chain. physdev_mt_check() only checked physdev-out and missed physdev-is-out. Fix it and update the debug message to make it clearer. Signed-off-by: Hangbin Liu Reviewed-by: Marcelo R Leitner Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_physdev.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 1caaccbc306c..e5f18988aee0 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -102,14 +102,14 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) return -EINVAL; - if (info->bitmask & XT_PHYSDEV_OP_OUT && + if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { - pr_info("using --physdev-out in the OUTPUT, FORWARD and " - "POSTROUTING chains for non-bridged traffic is not " - "supported anymore.\n"); + pr_info("using --physdev-out and --physdev-is-out are only" + "supported in the FORWARD and POSTROUTING chains with" + "bridged traffic.\n"); if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; } -- cgit v1.2.3-59-g8ed1b From 3f8b61b7f9aea414d162821817d89a7a6aae41c3 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 5 Jul 2016 23:23:00 +0800 Subject: netfilter: nft_ct: make byte/packet expr more friendly If we want to use ct packets expr, and add a rule like follows: # nft add rule filter input ct packets gt 1 counter We will find that no packets will hit it, because nf_conntrack_acct is disabled by default. So It will not work until we enable it manually via "echo 1 > /proc/sys/net/netfilter/nf_conntrack_acct". This is not friendly, so like xt_connbytes do, if the user want to use ct byte/packet expr, enable nf_conntrack_acct automatically. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 137e308d5b24..7ce8fd7ace78 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -355,6 +355,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; + if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS) + nf_ct_set_acct(ctx->net, true); + return 0; } -- cgit v1.2.3-59-g8ed1b From 42a55769132fdf4f44bac1471b371d7f80bcde35 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Jul 2016 14:41:49 +0200 Subject: netfilter: nf_tables: get rid of possible_net_t from set and basechain We can pass the netns pointer as parameter to the functions that need to gain access to it. From basechains, I didn't find any client for this field anymore so let's remove this too. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 21 +++++++++++---------- net/netfilter/nf_tables_api.c | 10 ++++------ net/netfilter/nft_hash.c | 20 ++++++++++---------- net/netfilter/nft_lookup.c | 2 +- net/netfilter/nft_rbtree.c | 26 ++++++++++++++------------ 5 files changed, 40 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 30c1d9489ae2..f2f13399ce44 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -236,7 +236,8 @@ struct nft_expr; * @features: features supported by the implementation */ struct nft_set_ops { - bool (*lookup)(const struct nft_set *set, + bool (*lookup)(const struct net *net, + const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext); bool (*update)(struct nft_set *set, @@ -248,11 +249,14 @@ struct nft_set_ops { struct nft_regs *regs, const struct nft_set_ext **ext); - int (*insert)(const struct nft_set *set, + int (*insert)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); - void (*activate)(const struct nft_set *set, + void (*activate)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); - void * (*deactivate)(const struct nft_set *set, + void * (*deactivate)(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem); void (*remove)(const struct nft_set *set, const struct nft_set_elem *elem); @@ -295,7 +299,6 @@ void nft_unregister_set(struct nft_set_ops *ops); * @udlen: user data length * @udata: user data * @ops: set ops - * @pnet: network namespace * @flags: set flags * @genmask: generation mask * @klen: key length @@ -318,7 +321,6 @@ struct nft_set { unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - possible_net_t pnet; u16 flags:14, genmask:2; u8 klen; @@ -804,7 +806,6 @@ struct nft_stats { * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops - * @pnet: net namespace that this chain belongs to * @type: chain type * @policy: default policy * @stats: per-cpu chain stats @@ -813,7 +814,6 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops[NFT_HOOK_OPS_MAX]; - possible_net_t pnet; const struct nf_chain_type *type; u8 policy; u8 flags; @@ -1009,10 +1009,11 @@ static inline bool nft_set_elem_active(const struct nft_set_ext *ext, return !(ext->genmask & genmask); } -static inline void nft_set_elem_change_active(const struct nft_set *set, +static inline void nft_set_elem_change_active(const struct net *net, + const struct nft_set *set, struct nft_set_ext *ext) { - ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet)); + ext->genmask ^= nft_genmask_next(net); } /* diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 18b7f8578ee0..0211eaec9060 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1405,7 +1405,6 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, rcu_assign_pointer(basechain->stats, stats); } - write_pnet(&basechain->pnet, net); basechain->type = type; chain = &basechain->chain; @@ -2841,7 +2840,6 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, } INIT_LIST_HEAD(&set->bindings); - write_pnet(&set->pnet, net); set->ops = ops; set->ktype = ktype; set->klen = desc.klen; @@ -3520,7 +3518,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err4; ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; - err = set->ops->insert(set, &elem); + err = set->ops->insert(ctx->net, set, &elem); if (err < 0) goto err5; @@ -3644,7 +3642,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, goto err3; } - priv = set->ops->deactivate(set, &elem); + priv = set->ops->deactivate(ctx->net, set, &elem); if (priv == NULL) { err = -ENOENT; goto err4; @@ -4018,7 +4016,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWSETELEM: te = (struct nft_trans_elem *)trans->data; - te->set->ops->activate(te->set, &te->elem); + te->set->ops->activate(net, te->set, &te->elem); nf_tables_setelem_notify(&trans->ctx, te->set, &te->elem, NFT_MSG_NEWSETELEM, 0); @@ -4143,7 +4141,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; - te->set->ops->activate(te->set, &te->elem); + te->set->ops->activate(net, te->set, &te->elem); te->set->ndeact--; nft_trans_destroy(trans); diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index ea924816b7b8..564fa7929ed5 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -71,13 +71,13 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, return 0; } -static bool nft_hash_lookup(const struct nft_set *set, const u32 *key, - const struct nft_set_ext **ext) +static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); const struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_cur(read_pnet(&set->pnet)), + .genmask = nft_genmask_cur(net), .set = set, .key = key, }; @@ -125,13 +125,13 @@ err1: return false; } -static int nft_hash_insert(const struct nft_set *set, +static int nft_hash_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); struct nft_hash_elem *he = elem->priv; struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_next(read_pnet(&set->pnet)), + .genmask = nft_genmask_next(net), .set = set, .key = elem->key.val.data, }; @@ -140,20 +140,20 @@ static int nft_hash_insert(const struct nft_set *set, nft_hash_params); } -static void nft_hash_activate(const struct nft_set *set, +static void nft_hash_activate(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash_elem *he = elem->priv; - nft_set_elem_change_active(set, &he->ext); + nft_set_elem_change_active(net, set, &he->ext); nft_set_elem_clear_busy(&he->ext); } -static void *nft_hash_deactivate(const struct nft_set *set, +static void *nft_hash_deactivate(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_hash *priv = nft_set_priv(set); - struct net *net = read_pnet(&set->pnet); struct nft_hash_elem *he; struct nft_hash_cmp_arg arg = { .genmask = nft_genmask_next(net), @@ -166,7 +166,7 @@ static void *nft_hash_deactivate(const struct nft_set *set, if (he != NULL) { if (!nft_set_elem_mark_busy(&he->ext) || !nft_is_active(net, &he->ext)) - nft_set_elem_change_active(set, &he->ext); + nft_set_elem_change_active(net, set, &he->ext); else he = NULL; } diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index b8d18f598569..e164325d1bc0 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -35,7 +35,7 @@ static void nft_lookup_eval(const struct nft_expr *expr, const struct nft_set_ext *ext; bool found; - found = set->ops->lookup(set, ®s->data[priv->sreg], &ext) ^ + found = set->ops->lookup(pkt->net, set, ®s->data[priv->sreg], &ext) ^ priv->invert; if (!found) { diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index c0f638745adc..6473936d05c6 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -41,13 +41,13 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this, return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; } -static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, - const struct nft_set_ext **ext) +static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) { const struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; + u8 genmask = nft_genmask_cur(net); const struct rb_node *parent; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); const void *this; int d; @@ -93,13 +93,13 @@ out: return false; } -static int __nft_rbtree_insert(const struct nft_set *set, +static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new) { struct nft_rbtree *priv = nft_set_priv(set); + u8 genmask = nft_genmask_next(net); struct nft_rbtree_elem *rbe; struct rb_node *parent, **p; - u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); int d; parent = NULL; @@ -132,14 +132,14 @@ static int __nft_rbtree_insert(const struct nft_set *set, return 0; } -static int nft_rbtree_insert(const struct nft_set *set, +static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree_elem *rbe = elem->priv; int err; spin_lock_bh(&nft_rbtree_lock); - err = __nft_rbtree_insert(set, rbe); + err = __nft_rbtree_insert(net, set, rbe); spin_unlock_bh(&nft_rbtree_lock); return err; @@ -156,21 +156,23 @@ static void nft_rbtree_remove(const struct nft_set *set, spin_unlock_bh(&nft_rbtree_lock); } -static void nft_rbtree_activate(const struct nft_set *set, +static void nft_rbtree_activate(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { struct nft_rbtree_elem *rbe = elem->priv; - nft_set_elem_change_active(set, &rbe->ext); + nft_set_elem_change_active(net, set, &rbe->ext); } -static void *nft_rbtree_deactivate(const struct nft_set *set, +static void *nft_rbtree_deactivate(const struct net *net, + const struct nft_set *set, const struct nft_set_elem *elem) { const struct nft_rbtree *priv = nft_set_priv(set); const struct rb_node *parent = priv->root.rb_node; struct nft_rbtree_elem *rbe, *this = elem->priv; - u8 genmask = nft_genmask_next(read_pnet(&set->pnet)); + u8 genmask = nft_genmask_next(net); int d; while (parent != NULL) { @@ -196,7 +198,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, parent = parent->rb_right; continue; } - nft_set_elem_change_active(set, &rbe->ext); + nft_set_elem_change_active(net, set, &rbe->ext); return rbe; } } -- cgit v1.2.3-59-g8ed1b From c2b9b4fee8ab86f2bb657e5ac48d803879e92765 Mon Sep 17 00:00:00 2001 From: Toby DiPasquale Date: Mon, 11 Jul 2016 11:32:45 +0100 Subject: netfilter: nf_conntrack_h323: fix off-by-one in DecodeQ931 This patch corrects an off-by-one error in the DecodeQ931 function in the nf_conntrack_h323 module. This error could result in reading off the end of a Q.931 frame. Signed-off-by: Toby DiPasquale Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_asn1.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index bcd5ed6b7130..89b2e46925c4 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -846,9 +846,10 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931) sz -= len; /* Message Type */ - if (sz < 1) + if (sz < 2) return H323_ERROR_BOUND; q931->MessageType = *p++; + sz--; PRINT("MessageType = %02X\n", q931->MessageType); if (*p & 0x80) { p++; -- cgit v1.2.3-59-g8ed1b From 3101e0fc1f6e809d38fbb5845c6c5eb0eefeda07 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 12 Jul 2016 19:45:00 +0800 Subject: netfilter: conntrack: protect early_drop by rcu read lock User can add ct entry via nfnetlink(IPCTNL_MSG_CT_NEW), and if the total number reach the nf_conntrack_max, we will try to drop some ct entries. But in this case(the main function call path is ctnetlink_create_conntrack -> nf_conntrack_alloc -> early_drop), rcu_read_lock is not held, so race with hash resize will happen. Fixes: 242922a02717 ("netfilter: conntrack: simplify early_drop") Cc: Florian Westphal Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index e0e9c9a0f5ba..2d46225501c1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -880,6 +880,7 @@ static noinline int early_drop(struct net *net, unsigned int _hash) struct hlist_nulls_head *ct_hash; unsigned hash, sequence, drops; + rcu_read_lock(); do { sequence = read_seqcount_begin(&nf_conntrack_generation); hash = scale_hash(_hash++); @@ -887,6 +888,8 @@ static noinline int early_drop(struct net *net, unsigned int _hash) } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); drops = early_drop_list(net, &ct_hash[hash]); + rcu_read_unlock(); + if (drops) { NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops); return true; -- cgit v1.2.3-59-g8ed1b From f4dc77713f8016d2e8a3295e1c9c53a21f296def Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 14 Jul 2016 17:51:26 +0200 Subject: netfilter: x_tables: speed up jump target validation The dummy ruleset I used to test the original validation change was broken, most rules were unreachable and were not tested by mark_source_chains(). In some cases rulesets that used to load in a few seconds now require several minutes. sample ruleset that shows the behaviour: echo "*filter" for i in $(seq 0 100000);do printf ":chain_%06x - [0:0]\n" $i done for i in $(seq 0 100000);do printf -- "-A INPUT -j chain_%06x\n" $i printf -- "-A INPUT -j chain_%06x\n" $i printf -- "-A INPUT -j chain_%06x\n" $i done echo COMMIT [ pipe result into iptables-restore ] This ruleset will be about 74mbyte in size, with ~500k searches though all 500k[1] rule entries. iptables-restore will take forever (gave up after 10 minutes) Instead of always searching the entire blob for a match, fill an array with the start offsets of every single ipt_entry struct, then do a binary search to check if the jump target is present or not. After this change ruleset restore times get again close to what one gets when reverting 36472341017529e (~3 seconds on my workstation). [1] every user-defined rule gets an implicit RETURN, so we get 300k jumps + 100k userchains + 100k returns -> 500k rule entries Fixes: 36472341017529e ("netfilter: x_tables: validate targets of jumps") Reported-by: Jeff Wu Tested-by: Jeff Wu Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 4 +++ net/ipv4/netfilter/arp_tables.c | 47 ++++++++++++++++++----------------- net/ipv4/netfilter/ip_tables.c | 45 ++++++++++++++++++---------------- net/ipv6/netfilter/ip6_tables.c | 45 ++++++++++++++++++---------------- net/netfilter/x_tables.c | 50 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 127 insertions(+), 64 deletions(-) (limited to 'net') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index e94e81ab2b58..2ad1a2b289b5 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -250,6 +250,10 @@ int xt_check_entry_offsets(const void *base, const char *elems, unsigned int target_offset, unsigned int next_offset); +unsigned int *xt_alloc_entry_offsets(unsigned int size); +bool xt_find_jump_offset(const unsigned int *offsets, + unsigned int target, unsigned int size); + int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto, bool inv_proto); int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto, diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index c8dd9e26b185..b31df597fd37 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -299,23 +299,12 @@ static inline bool unconditional(const struct arpt_entry *e) memcmp(&e->arp, &uncond, sizeof(uncond)) == 0; } -static bool find_jump_target(const struct xt_table_info *t, - const struct arpt_entry *target) -{ - struct arpt_entry *iter; - - xt_entry_foreach(iter, t->entries, t->size) { - if (iter == target) - return true; - } - return false; -} - /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, - unsigned int valid_hooks, void *entry0) + unsigned int valid_hooks, void *entry0, + unsigned int *offsets) { unsigned int hook; @@ -388,10 +377,11 @@ static int mark_source_chains(const struct xt_table_info *newinfo, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ + if (!xt_find_jump_offset(offsets, newpos, + newinfo->number)) + return 0; e = (struct arpt_entry *) (entry0 + newpos); - if (!find_jump_target(newinfo, e)) - return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -543,6 +533,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, const struct arpt_replace *repl) { struct arpt_entry *iter; + unsigned int *offsets; unsigned int i; int ret = 0; @@ -555,6 +546,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } + offsets = xt_alloc_entry_offsets(newinfo->number); + if (!offsets) + return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ @@ -565,17 +559,20 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, repl->underflow, repl->valid_hooks); if (ret != 0) - break; + goto out_free; + if (i < repl->num_entries) + offsets[i] = (void *)iter - entry0; ++i; if (strcmp(arpt_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } if (ret != 0) - return ret; + goto out_free; + ret = -EINVAL; if (i != repl->num_entries) - return -EINVAL; + goto out_free; /* Check hooks all assigned */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { @@ -583,13 +580,16 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; if (newinfo->underflow[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) - return -ELOOP; + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { + ret = -ELOOP; + goto out_free; + } + kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; @@ -609,6 +609,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, return ret; } + return ret; + out_free: + kvfree(offsets); return ret; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f0df66f54ce6..f993545a3373 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -373,23 +373,12 @@ ipt_do_table(struct sk_buff *skb, else return verdict; } -static bool find_jump_target(const struct xt_table_info *t, - const struct ipt_entry *target) -{ - struct ipt_entry *iter; - - xt_entry_foreach(iter, t->entries, t->size) { - if (iter == target) - return true; - } - return false; -} - /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, - unsigned int valid_hooks, void *entry0) + unsigned int valid_hooks, void *entry0, + unsigned int *offsets) { unsigned int hook; @@ -458,10 +447,11 @@ mark_source_chains(const struct xt_table_info *newinfo, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ + if (!xt_find_jump_offset(offsets, newpos, + newinfo->number)) + return 0; e = (struct ipt_entry *) (entry0 + newpos); - if (!find_jump_target(newinfo, e)) - return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -694,6 +684,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ipt_replace *repl) { struct ipt_entry *iter; + unsigned int *offsets; unsigned int i; int ret = 0; @@ -706,6 +697,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } + offsets = xt_alloc_entry_offsets(newinfo->number); + if (!offsets) + return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { @@ -715,15 +709,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, repl->underflow, repl->valid_hooks); if (ret != 0) - return ret; + goto out_free; + if (i < repl->num_entries) + offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ipt_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } + ret = -EINVAL; if (i != repl->num_entries) - return -EINVAL; + goto out_free; /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -731,13 +728,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; if (newinfo->underflow[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) - return -ELOOP; + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { + ret = -ELOOP; + goto out_free; + } + kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; @@ -757,6 +757,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } + return ret; + out_free: + kvfree(offsets); return ret; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 61ed95054efa..552fac2f390a 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -402,23 +402,12 @@ ip6t_do_table(struct sk_buff *skb, else return verdict; } -static bool find_jump_target(const struct xt_table_info *t, - const struct ip6t_entry *target) -{ - struct ip6t_entry *iter; - - xt_entry_foreach(iter, t->entries, t->size) { - if (iter == target) - return true; - } - return false; -} - /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int mark_source_chains(const struct xt_table_info *newinfo, - unsigned int valid_hooks, void *entry0) + unsigned int valid_hooks, void *entry0, + unsigned int *offsets) { unsigned int hook; @@ -487,10 +476,11 @@ mark_source_chains(const struct xt_table_info *newinfo, XT_STANDARD_TARGET) == 0 && newpos >= 0) { /* This a jump; chase it. */ + if (!xt_find_jump_offset(offsets, newpos, + newinfo->number)) + return 0; e = (struct ip6t_entry *) (entry0 + newpos); - if (!find_jump_target(newinfo, e)) - return 0; } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; @@ -724,6 +714,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ip6t_replace *repl) { struct ip6t_entry *iter; + unsigned int *offsets; unsigned int i; int ret = 0; @@ -736,6 +727,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, newinfo->underflow[i] = 0xFFFFFFFF; } + offsets = xt_alloc_entry_offsets(newinfo->number); + if (!offsets) + return -ENOMEM; i = 0; /* Walk through entries, checking offsets. */ xt_entry_foreach(iter, entry0, newinfo->size) { @@ -745,15 +739,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, repl->underflow, repl->valid_hooks); if (ret != 0) - return ret; + goto out_free; + if (i < repl->num_entries) + offsets[i] = (void *)iter - entry0; ++i; if (strcmp(ip6t_get_target(iter)->u.user.name, XT_ERROR_TARGET) == 0) ++newinfo->stacksize; } + ret = -EINVAL; if (i != repl->num_entries) - return -EINVAL; + goto out_free; /* Check hooks all assigned */ for (i = 0; i < NF_INET_NUMHOOKS; i++) { @@ -761,13 +758,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; if (newinfo->underflow[i] == 0xFFFFFFFF) - return -EINVAL; + goto out_free; } - if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) - return -ELOOP; + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) { + ret = -ELOOP; + goto out_free; + } + kvfree(offsets); /* Finally, each sanity check must pass */ i = 0; @@ -787,6 +787,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } + return ret; + out_free: + kvfree(offsets); return ret; } diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index fe0e2db632c7..e0aa7c1d0224 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -702,6 +702,56 @@ int xt_check_entry_offsets(const void *base, } EXPORT_SYMBOL(xt_check_entry_offsets); +/** + * xt_alloc_entry_offsets - allocate array to store rule head offsets + * + * @size: number of entries + * + * Return: NULL or kmalloc'd or vmalloc'd array + */ +unsigned int *xt_alloc_entry_offsets(unsigned int size) +{ + unsigned int *off; + + off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN); + + if (off) + return off; + + if (size < (SIZE_MAX / sizeof(unsigned int))) + off = vmalloc(size * sizeof(unsigned int)); + + return off; +} +EXPORT_SYMBOL(xt_alloc_entry_offsets); + +/** + * xt_find_jump_offset - check if target is a valid jump offset + * + * @offsets: array containing all valid rule start offsets of a rule blob + * @target: the jump target to search for + * @size: entries in @offset + */ +bool xt_find_jump_offset(const unsigned int *offsets, + unsigned int target, unsigned int size) +{ + int m, low = 0, hi = size; + + while (hi > low) { + m = (low + hi) / 2u; + + if (offsets[m] > target) + hi = m; + else if (offsets[m] < target) + low = m + 1; + else + return true; + } + + return false; +} +EXPORT_SYMBOL(xt_find_jump_offset); + int xt_check_target(struct xt_tgchk_param *par, unsigned int size, u_int8_t proto, bool inv_proto) { -- cgit v1.2.3-59-g8ed1b From 590025a27fe0603e855a054c4ad57d966bd8af07 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 16 Jul 2016 14:27:21 +0800 Subject: netfilter: nft_ct: fix unpaired nf_connlabels_get/put call We only get nf_connlabels if the user add ct label set expr successfully, but we will also put nf_connlabels if the user delete ct lable get expr. This is mismathced, and will cause ct label expr cannot work properly. Also, if we init something fail, we should put nf_connlabels back. Otherwise, we may waste to alloc the memory that will never be used. Signed-off-by: Liping Zhang Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_ct.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 7ce8fd7ace78..d9e44ca34055 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -366,6 +366,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); + bool label_got = false; unsigned int len; int err; @@ -384,6 +385,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1); if (err) return err; + label_got = true; break; #endif default: @@ -393,17 +395,28 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]); err = nft_validate_register_load(priv->sreg, len); if (err < 0) - return err; + goto err1; err = nft_ct_l3proto_try_module_get(ctx->afi->family); if (err < 0) - return err; + goto err1; return 0; + +err1: + if (label_got) + nf_connlabels_put(ctx->net); + return err; +} + +static void nft_ct_get_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + nft_ct_l3proto_module_put(ctx->afi->family); } -static void nft_ct_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_ct_set_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) { struct nft_ct *priv = nft_expr_priv(expr); @@ -475,7 +488,7 @@ static const struct nft_expr_ops nft_ct_get_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), .eval = nft_ct_get_eval, .init = nft_ct_get_init, - .destroy = nft_ct_destroy, + .destroy = nft_ct_get_destroy, .dump = nft_ct_get_dump, }; @@ -484,7 +497,7 @@ static const struct nft_expr_ops nft_ct_set_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)), .eval = nft_ct_set_eval, .init = nft_ct_set_init, - .destroy = nft_ct_destroy, + .destroy = nft_ct_set_destroy, .dump = nft_ct_set_dump, }; -- cgit v1.2.3-59-g8ed1b From 82de0be6862cdca2e6802267bda57cfc8844d3a7 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Mon, 18 Jul 2016 11:39:23 +0800 Subject: netfilter: Add helper array register/unregister functions Add nf_ct_helper_init(), nf_conntrack_helpers_register() and nf_conntrack_helpers_unregister() functions to avoid repetitive opencoded initialization in helpers. This patch keeps an id parameter for nf_ct_helper_init() not to break helper matching by name that has been inconsistently exposed to userspace through ports, eg. ftp-2121, and through an incremental id, eg. tftp-1. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_helper.h | 15 ++++++ net/netfilter/nf_conntrack_ftp.c | 58 +++++++--------------- net/netfilter/nf_conntrack_helper.c | 57 ++++++++++++++++++++++ net/netfilter/nf_conntrack_irc.c | 36 +++++--------- net/netfilter/nf_conntrack_sane.c | 57 ++++++++-------------- net/netfilter/nf_conntrack_sip.c | 75 +++++++++++------------------ net/netfilter/nf_conntrack_tftp.c | 48 ++++++------------ 7 files changed, 165 insertions(+), 181 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 6cf614bc0029..1eaac1f4cd6a 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -58,10 +58,25 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name, struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name, u16 l3num, u8 protonum); +void nf_ct_helper_init(struct nf_conntrack_helper *helper, + u16 l3num, u16 protonum, const char *name, + u16 default_port, u16 spec_port, u32 id, + const struct nf_conntrack_expect_policy *exp_pol, + u32 expect_class_max, u32 data_len, + int (*help)(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo), + int (*from_nlattr)(struct nlattr *attr, + struct nf_conn *ct), + struct module *module); int nf_conntrack_helper_register(struct nf_conntrack_helper *); void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); +int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int); +void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *, + unsigned int); + struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, struct nf_conntrack_helper *helper, gfp_t gfp); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 19efeba02abb..43147005bea3 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -572,7 +572,7 @@ static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct) return 0; } -static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly; +static struct nf_conntrack_helper ftp[MAX_PORTS * 2] __read_mostly; static const struct nf_conntrack_expect_policy ftp_exp_policy = { .max_expected = 1, @@ -582,24 +582,13 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = { /* don't make this __exit, since it's called from __init ! */ static void nf_conntrack_ftp_fini(void) { - int i, j; - for (i = 0; i < ports_c; i++) { - for (j = 0; j < 2; j++) { - if (ftp[i][j].me == NULL) - continue; - - pr_debug("unregistering helper for pf: %d port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); - nf_conntrack_helper_unregister(&ftp[i][j]); - } - } - + nf_conntrack_helpers_unregister(ftp, ports_c * 2); kfree(ftp_buffer); } static int __init nf_conntrack_ftp_init(void) { - int i, j = -1, ret = 0; + int i, ret = 0; ftp_buffer = kmalloc(65536, GFP_KERNEL); if (!ftp_buffer) @@ -611,32 +600,21 @@ static int __init nf_conntrack_ftp_init(void) /* FIXME should be configurable whether IPv4 and IPv6 FTP connections are tracked or not - YK */ for (i = 0; i < ports_c; i++) { - ftp[i][0].tuple.src.l3num = PF_INET; - ftp[i][1].tuple.src.l3num = PF_INET6; - for (j = 0; j < 2; j++) { - ftp[i][j].data_len = sizeof(struct nf_ct_ftp_master); - ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); - ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; - ftp[i][j].expect_policy = &ftp_exp_policy; - ftp[i][j].me = THIS_MODULE; - ftp[i][j].help = help; - ftp[i][j].from_nlattr = nf_ct_ftp_from_nlattr; - if (ports[i] == FTP_PORT) - sprintf(ftp[i][j].name, "ftp"); - else - sprintf(ftp[i][j].name, "ftp-%d", ports[i]); - - pr_debug("registering helper for pf: %d port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); - ret = nf_conntrack_helper_register(&ftp[i][j]); - if (ret) { - pr_err("failed to register helper for pf: %d port: %d\n", - ftp[i][j].tuple.src.l3num, ports[i]); - ports_c = i; - nf_conntrack_ftp_fini(); - return ret; - } - } + nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp", + FTP_PORT, ports[i], ports[i], &ftp_exp_policy, + 0, sizeof(struct nf_ct_ftp_master), help, + nf_ct_ftp_from_nlattr, THIS_MODULE); + nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp", + FTP_PORT, ports[i], ports[i], &ftp_exp_policy, + 0, sizeof(struct nf_ct_ftp_master), help, + nf_ct_ftp_from_nlattr, THIS_MODULE); + } + + ret = nf_conntrack_helpers_register(ftp, ports_c * 2); + if (ret < 0) { + pr_err("failed to register helpers\n"); + kfree(ftp_buffer); + return ret; } return 0; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index a4294e949cdc..b989b81ac156 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -465,6 +465,63 @@ restart: } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); +void nf_ct_helper_init(struct nf_conntrack_helper *helper, + u16 l3num, u16 protonum, const char *name, + u16 default_port, u16 spec_port, u32 id, + const struct nf_conntrack_expect_policy *exp_pol, + u32 expect_class_max, u32 data_len, + int (*help)(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo), + int (*from_nlattr)(struct nlattr *attr, + struct nf_conn *ct), + struct module *module) +{ + helper->tuple.src.l3num = l3num; + helper->tuple.dst.protonum = protonum; + helper->tuple.src.u.all = htons(spec_port); + helper->expect_policy = exp_pol; + helper->expect_class_max = expect_class_max; + helper->data_len = data_len; + helper->help = help; + helper->from_nlattr = from_nlattr; + helper->me = module; + + if (spec_port == default_port) + snprintf(helper->name, sizeof(helper->name), "%s", name); + else + snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id); +} +EXPORT_SYMBOL_GPL(nf_ct_helper_init); + +int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper, + unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = nf_conntrack_helper_register(&helper[i]); + if (err < 0) + goto err; + } + + return err; +err: + if (i > 0) + nf_conntrack_helpers_unregister(helper, i); + return err; +} +EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register); + +void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper, + unsigned int n) +{ + while (n-- > 0) + nf_conntrack_helper_unregister(&helper[n]); +} +EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister); + static struct nf_ct_ext_type helper_extend __read_mostly = { .len = sizeof(struct nf_conn_help), .align = __alignof__(struct nf_conn_help), diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index f97ac61d2536..1972a149f958 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -255,27 +255,18 @@ static int __init nf_conntrack_irc_init(void) ports[ports_c++] = IRC_PORT; for (i = 0; i < ports_c; i++) { - irc[i].tuple.src.l3num = AF_INET; - irc[i].tuple.src.u.tcp.port = htons(ports[i]); - irc[i].tuple.dst.protonum = IPPROTO_TCP; - irc[i].expect_policy = &irc_exp_policy; - irc[i].me = THIS_MODULE; - irc[i].help = help; - - if (ports[i] == IRC_PORT) - sprintf(irc[i].name, "irc"); - else - sprintf(irc[i].name, "irc-%u", i); - - ret = nf_conntrack_helper_register(&irc[i]); - if (ret) { - pr_err("failed to register helper for pf: %u port: %u\n", - irc[i].tuple.src.l3num, ports[i]); - ports_c = i; - nf_conntrack_irc_fini(); - return ret; - } + nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc", + IRC_PORT, ports[i], i, &irc_exp_policy, + 0, 0, help, NULL, THIS_MODULE); + } + + ret = nf_conntrack_helpers_register(&irc[0], ports_c); + if (ret) { + pr_err("failed to register helpers\n"); + kfree(irc_buffer); + return ret; } + return 0; } @@ -283,10 +274,7 @@ static int __init nf_conntrack_irc_init(void) * it is needed by the init function */ static void nf_conntrack_irc_fini(void) { - int i; - - for (i = 0; i < ports_c; i++) - nf_conntrack_helper_unregister(&irc[i]); + nf_conntrack_helpers_unregister(irc, ports_c); kfree(irc_buffer); } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 3fcbaab83b3d..9dcb9ee9b97d 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -166,7 +166,7 @@ out: return ret; } -static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly; +static struct nf_conntrack_helper sane[MAX_PORTS * 2] __read_mostly; static const struct nf_conntrack_expect_policy sane_exp_policy = { .max_expected = 1, @@ -176,22 +176,13 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = { /* don't make this __exit, since it's called from __init ! */ static void nf_conntrack_sane_fini(void) { - int i, j; - - for (i = 0; i < ports_c; i++) { - for (j = 0; j < 2; j++) { - pr_debug("unregistering helper for pf: %d port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); - nf_conntrack_helper_unregister(&sane[i][j]); - } - } - + nf_conntrack_helpers_unregister(sane, ports_c * 2); kfree(sane_buffer); } static int __init nf_conntrack_sane_init(void) { - int i, j = -1, ret = 0; + int i, ret = 0; sane_buffer = kmalloc(65536, GFP_KERNEL); if (!sane_buffer) @@ -203,31 +194,23 @@ static int __init nf_conntrack_sane_init(void) /* FIXME should be configurable whether IPv4 and IPv6 connections are tracked or not - YK */ for (i = 0; i < ports_c; i++) { - sane[i][0].tuple.src.l3num = PF_INET; - sane[i][1].tuple.src.l3num = PF_INET6; - for (j = 0; j < 2; j++) { - sane[i][j].data_len = sizeof(struct nf_ct_sane_master); - sane[i][j].tuple.src.u.tcp.port = htons(ports[i]); - sane[i][j].tuple.dst.protonum = IPPROTO_TCP; - sane[i][j].expect_policy = &sane_exp_policy; - sane[i][j].me = THIS_MODULE; - sane[i][j].help = help; - if (ports[i] == SANE_PORT) - sprintf(sane[i][j].name, "sane"); - else - sprintf(sane[i][j].name, "sane-%d", ports[i]); - - pr_debug("registering helper for pf: %d port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); - ret = nf_conntrack_helper_register(&sane[i][j]); - if (ret) { - pr_err("failed to register helper for pf: %d port: %d\n", - sane[i][j].tuple.src.l3num, ports[i]); - ports_c = i; - nf_conntrack_sane_fini(); - return ret; - } - } + nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane", + SANE_PORT, ports[i], ports[i], + &sane_exp_policy, 0, + sizeof(struct nf_ct_sane_master), help, NULL, + THIS_MODULE); + nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane", + SANE_PORT, ports[i], ports[i], + &sane_exp_policy, 0, + sizeof(struct nf_ct_sane_master), help, NULL, + THIS_MODULE); + } + + ret = nf_conntrack_helpers_register(sane, ports_c * 2); + if (ret < 0) { + pr_err("failed to register helpers\n"); + kfree(sane_buffer); + return ret; } return 0; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index f72ba5587588..8d9db9d4702b 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1589,7 +1589,7 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff, return process_sip_msg(skb, ct, protoff, dataoff, &dptr, &datalen); } -static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly; +static struct nf_conntrack_helper sip[MAX_PORTS * 4] __read_mostly; static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = { [SIP_EXPECT_SIGNALLING] = { @@ -1616,20 +1616,12 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1 static void nf_conntrack_sip_fini(void) { - int i, j; - - for (i = 0; i < ports_c; i++) { - for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { - if (sip[i][j].me == NULL) - continue; - nf_conntrack_helper_unregister(&sip[i][j]); - } - } + nf_conntrack_helpers_unregister(sip, ports_c * 4); } static int __init nf_conntrack_sip_init(void) { - int i, j, ret; + int i, ret; if (ports_c == 0) ports[ports_c++] = SIP_PORT; @@ -1637,43 +1629,32 @@ static int __init nf_conntrack_sip_init(void) for (i = 0; i < ports_c; i++) { memset(&sip[i], 0, sizeof(sip[i])); - sip[i][0].tuple.src.l3num = AF_INET; - sip[i][0].tuple.dst.protonum = IPPROTO_UDP; - sip[i][0].help = sip_help_udp; - sip[i][1].tuple.src.l3num = AF_INET; - sip[i][1].tuple.dst.protonum = IPPROTO_TCP; - sip[i][1].help = sip_help_tcp; - - sip[i][2].tuple.src.l3num = AF_INET6; - sip[i][2].tuple.dst.protonum = IPPROTO_UDP; - sip[i][2].help = sip_help_udp; - sip[i][3].tuple.src.l3num = AF_INET6; - sip[i][3].tuple.dst.protonum = IPPROTO_TCP; - sip[i][3].help = sip_help_tcp; - - for (j = 0; j < ARRAY_SIZE(sip[i]); j++) { - sip[i][j].data_len = sizeof(struct nf_ct_sip_master); - sip[i][j].tuple.src.u.udp.port = htons(ports[i]); - sip[i][j].expect_policy = sip_exp_policy; - sip[i][j].expect_class_max = SIP_EXPECT_MAX; - sip[i][j].me = THIS_MODULE; - - if (ports[i] == SIP_PORT) - sprintf(sip[i][j].name, "sip"); - else - sprintf(sip[i][j].name, "sip-%u", i); - - pr_debug("port #%u: %u\n", i, ports[i]); + nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip", + SIP_PORT, ports[i], i, sip_exp_policy, + SIP_EXPECT_MAX, + sizeof(struct nf_ct_sip_master), sip_help_udp, + NULL, THIS_MODULE); + nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip", + SIP_PORT, ports[i], i, sip_exp_policy, + SIP_EXPECT_MAX, + sizeof(struct nf_ct_sip_master), sip_help_tcp, + NULL, THIS_MODULE); + nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip", + SIP_PORT, ports[i], i, sip_exp_policy, + SIP_EXPECT_MAX, + sizeof(struct nf_ct_sip_master), sip_help_udp, + NULL, THIS_MODULE); + nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip", + SIP_PORT, ports[i], i, sip_exp_policy, + SIP_EXPECT_MAX, + sizeof(struct nf_ct_sip_master), sip_help_tcp, + NULL, THIS_MODULE); + } - ret = nf_conntrack_helper_register(&sip[i][j]); - if (ret) { - pr_err("failed to register helper for pf: %u port: %u\n", - sip[i][j].tuple.src.l3num, ports[i]); - ports_c = i; - nf_conntrack_sip_fini(); - return ret; - } - } + ret = nf_conntrack_helpers_register(sip, ports_c * 4); + if (ret < 0) { + pr_err("failed to register helpers\n"); + return ret; } return 0; } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 2e65b5430fba..b1227dc6f75e 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -97,7 +97,7 @@ static int tftp_help(struct sk_buff *skb, return ret; } -static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly; +static struct nf_conntrack_helper tftp[MAX_PORTS * 2] __read_mostly; static const struct nf_conntrack_expect_policy tftp_exp_policy = { .max_expected = 1, @@ -106,47 +106,29 @@ static const struct nf_conntrack_expect_policy tftp_exp_policy = { static void nf_conntrack_tftp_fini(void) { - int i, j; - - for (i = 0; i < ports_c; i++) { - for (j = 0; j < 2; j++) - nf_conntrack_helper_unregister(&tftp[i][j]); - } + nf_conntrack_helpers_unregister(tftp, ports_c * 2); } static int __init nf_conntrack_tftp_init(void) { - int i, j, ret; + int i, ret; if (ports_c == 0) ports[ports_c++] = TFTP_PORT; for (i = 0; i < ports_c; i++) { - memset(&tftp[i], 0, sizeof(tftp[i])); - - tftp[i][0].tuple.src.l3num = AF_INET; - tftp[i][1].tuple.src.l3num = AF_INET6; - for (j = 0; j < 2; j++) { - tftp[i][j].tuple.dst.protonum = IPPROTO_UDP; - tftp[i][j].tuple.src.u.udp.port = htons(ports[i]); - tftp[i][j].expect_policy = &tftp_exp_policy; - tftp[i][j].me = THIS_MODULE; - tftp[i][j].help = tftp_help; - - if (ports[i] == TFTP_PORT) - sprintf(tftp[i][j].name, "tftp"); - else - sprintf(tftp[i][j].name, "tftp-%u", i); - - ret = nf_conntrack_helper_register(&tftp[i][j]); - if (ret) { - pr_err("failed to register helper for pf: %u port: %u\n", - tftp[i][j].tuple.src.l3num, ports[i]); - ports_c = i; - nf_conntrack_tftp_fini(); - return ret; - } - } + nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp", + TFTP_PORT, ports[i], i, &tftp_exp_policy, + 0, 0, tftp_help, NULL, THIS_MODULE); + nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp", + TFTP_PORT, ports[i], i, &tftp_exp_policy, + 0, 0, tftp_help, NULL, THIS_MODULE); + } + + ret = nf_conntrack_helpers_register(tftp, ports_c * 2); + if (ret < 0) { + pr_err("failed to register helpers\n"); + return ret; } return 0; } -- cgit v1.2.3-59-g8ed1b From c2d9a4293ced88d7dad7c35c893a31f49f8b64f5 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 18 Jul 2016 20:44:15 +0800 Subject: netfilter: nft_log: fix possible memory leak if log expr init fail Suppose that we specify the NFTA_LOG_PREFIX, then NFTA_LOG_LEVEL and NFTA_LOG_GROUP are specified together or nf_logger_find_get call returns fail, i.e. expr init fail, memory leak will happen. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_log.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 713d66837705..e1b34ff0ebd0 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -52,6 +52,14 @@ static int nft_log_init(const struct nft_ctx *ctx, struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; + int err; + + li->type = NF_LOG_TYPE_LOG; + if (tb[NFTA_LOG_LEVEL] != NULL && + tb[NFTA_LOG_GROUP] != NULL) + return -EINVAL; + if (tb[NFTA_LOG_GROUP] != NULL) + li->type = NF_LOG_TYPE_ULOG; nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -63,13 +71,6 @@ static int nft_log_init(const struct nft_ctx *ctx, priv->prefix = (char *)nft_log_null_prefix; } - li->type = NF_LOG_TYPE_LOG; - if (tb[NFTA_LOG_LEVEL] != NULL && - tb[NFTA_LOG_GROUP] != NULL) - return -EINVAL; - if (tb[NFTA_LOG_GROUP] != NULL) - li->type = NF_LOG_TYPE_ULOG; - switch (li->type) { case NF_LOG_TYPE_LOG: if (tb[NFTA_LOG_LEVEL] != NULL) { @@ -96,7 +97,16 @@ static int nft_log_init(const struct nft_ctx *ctx, break; } - return nf_logger_find_get(ctx->afi->family, li->type); + err = nf_logger_find_get(ctx->afi->family, li->type); + if (err < 0) + goto err1; + + return 0; + +err1: + if (priv->prefix != nft_log_null_prefix) + kfree(priv->prefix); + return err; } static void nft_log_destroy(const struct nft_ctx *ctx, -- cgit v1.2.3-59-g8ed1b From 1bc4e0136cb32282d7968e11cfabc40763fdb03c Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 18 Jul 2016 20:44:16 +0800 Subject: netfilter: nft_log: check the validity of log level User can specify the log level larger than 7(debug level) via nfnetlink, this is invalid. So in this case, we should report EINVAL to the userspace. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_log.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index e1b34ff0ebd0..5f6f088ff06e 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -79,6 +79,11 @@ static int nft_log_init(const struct nft_ctx *ctx, } else { li->u.log.level = LOGLEVEL_WARNING; } + if (li->u.log.level > LOGLEVEL_DEBUG) { + err = -EINVAL; + goto err1; + } + if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); -- cgit v1.2.3-59-g8ed1b From cc37c1ad42ba6bc07c3d7a999f898e11d69a2580 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 18 Jul 2016 20:44:17 +0800 Subject: netfilter: nft_log: fix snaplen does not truncate packets There's a similar problem in xt_NFLOG, and was fixed by commit 7643507fe8b5 ("netfilter: xt_NFLOG: nflog-range does not truncate packets"). Only set copy_len here does not work, so we should enable NF_LOG_F_COPY_LEN also. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_log.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 5f6f088ff06e..24a73bb26e94 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -92,6 +92,7 @@ static int nft_log_init(const struct nft_ctx *ctx, case NF_LOG_TYPE_ULOG: li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); if (tb[NFTA_LOG_SNAPLEN] != NULL) { + li->u.ulog.flags |= NF_LOG_F_COPY_LEN; li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); } @@ -149,7 +150,7 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) goto nla_put_failure; - if (li->u.ulog.copy_len) { + if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) { if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, htonl(li->u.ulog.copy_len))) goto nla_put_failure; -- cgit v1.2.3-59-g8ed1b From 6e1f760e13c75eb0c21c75c6eed918e25b54cd07 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Jul 2016 12:20:45 +0200 Subject: netfilter: nf_tables: allow to filter out rules by table and chain If the table and/or chain attributes are set in a rule dump request, we filter out the rules based on this selection. Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0211eaec9060..13d50e7cfe2f 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1857,10 +1857,16 @@ err: return err; } +struct nft_rule_dump_ctx { + char table[NFT_TABLE_MAXNAMELEN]; + char chain[NFT_CHAIN_MAXNAMELEN]; +}; + static int nf_tables_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + const struct nft_rule_dump_ctx *ctx = cb->data; const struct nft_af_info *afi; const struct nft_table *table; const struct nft_chain *chain; @@ -1877,7 +1883,15 @@ static int nf_tables_dump_rules(struct sk_buff *skb, continue; list_for_each_entry_rcu(table, &afi->tables, list) { + if (ctx && ctx->table[0] && + strcmp(ctx->table, table->name) != 0) + continue; + list_for_each_entry_rcu(chain, &table->chains, list) { + if (ctx && ctx->chain[0] && + strcmp(ctx->chain, chain->name) != 0) + continue; + list_for_each_entry_rcu(rule, &chain->rules, list) { if (!nft_is_active(net, rule)) goto cont; @@ -1907,6 +1921,12 @@ done: return skb->len; } +static int nf_tables_dump_rules_done(struct netlink_callback *cb) +{ + kfree(cb->data); + return 0; +} + static int nf_tables_getrule(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1924,7 +1944,25 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = nf_tables_dump_rules, + .done = nf_tables_dump_rules_done, }; + + if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) { + struct nft_rule_dump_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + + if (nla[NFTA_RULE_TABLE]) + nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE], + sizeof(ctx->table)); + if (nla[NFTA_RULE_CHAIN]) + nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN], + sizeof(ctx->chain)); + c.data = ctx; + } + return netlink_dump_start(nlsk, skb, nlh, &c); } -- cgit v1.2.3-59-g8ed1b From 23014011ba4209a086931ff402eac1c41abbe456 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Jul 2016 12:51:16 +0200 Subject: netfilter: conntrack: support a fixed size of 128 distinct labels The conntrack label extension is currently variable-sized, e.g. if only 2 labels are used by iptables rules then the labels->bits[] array will only contain one element. We track size of each label storage area in the 'words' member. But in nftables and openvswitch we always have to ask for worst-case since we don't know what bit will be used at configuration time. As most arches are 64bit we need to allocate 24 bytes in this case: struct nf_conn_labels { u8 words; /* 0 1 */ /* XXX 7 bytes hole, try to pack */ long unsigned bits[2]; /* 8 24 */ Make bits a fixed size and drop the words member, it simplifies the code and only increases memory requirements on x86 when less than 64bit labels are required. We still only allocate the extension if its needed. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 16 ++++------------ net/netfilter/nf_conntrack_labels.c | 13 +++---------- net/netfilter/nf_conntrack_netlink.c | 10 +++++----- net/netfilter/nft_ct.c | 13 +++---------- net/netfilter/xt_connlabel.c | 2 +- net/openvswitch/conntrack.c | 4 ++-- 6 files changed, 18 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index c5f8fc736b3d..0fd4989de836 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -10,8 +10,7 @@ #define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) struct nf_conn_labels { - u8 words; - unsigned long bits[]; + unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)]; }; static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) @@ -26,20 +25,13 @@ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct) static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) { #ifdef CONFIG_NF_CONNTRACK_LABELS - struct nf_conn_labels *cl_ext; struct net *net = nf_ct_net(ct); - u8 words; - words = ACCESS_ONCE(net->ct.label_words); - if (words == 0) + if (net->ct.labels_used == 0) return NULL; - cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, - words * sizeof(long), GFP_ATOMIC); - if (cl_ext != NULL) - cl_ext->words = words; - - return cl_ext; + return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, + sizeof(struct nf_conn_labels), GFP_ATOMIC); #else return NULL; #endif diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 252e6a7cd2f1..7686200f9ace 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -20,7 +20,7 @@ int nf_connlabel_set(struct nf_conn *ct, u16 bit) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); - if (!labels || BIT_WORD(bit) >= labels->words) + if (!labels) return -ENOSPC; if (test_bit(bit, labels->bits)) @@ -60,7 +60,7 @@ int nf_connlabels_replace(struct nf_conn *ct, if (!labels) return -ENOSPC; - size = labels->words * sizeof(long); + size = sizeof(labels->bits); if (size < (words32 * sizeof(u32))) words32 = size / sizeof(u32); @@ -80,16 +80,11 @@ EXPORT_SYMBOL_GPL(nf_connlabels_replace); int nf_connlabels_get(struct net *net, unsigned int bits) { - size_t words; - - words = BIT_WORD(bits) + 1; - if (words > NF_CT_LABELS_MAX_SIZE / sizeof(long)) + if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long)) return -ERANGE; spin_lock(&nf_connlabels_lock); net->ct.labels_used++; - if (words > net->ct.label_words) - net->ct.label_words = words; spin_unlock(&nf_connlabels_lock); return 0; @@ -100,8 +95,6 @@ void nf_connlabels_put(struct net *net) { spin_lock(&nf_connlabels_lock); net->ct.labels_used--; - if (net->ct.labels_used == 0) - net->ct.label_words = 0; spin_unlock(&nf_connlabels_lock); } EXPORT_SYMBOL_GPL(nf_connlabels_put); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a18d1ceabad5..050bb3420a6b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -346,25 +346,25 @@ static inline int ctnetlink_label_size(const struct nf_conn *ct) if (!labels) return 0; - return nla_total_size(labels->words * sizeof(long)); + return nla_total_size(sizeof(labels->bits)); } static int ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct) { struct nf_conn_labels *labels = nf_ct_labels_find(ct); - unsigned int len, i; + unsigned int i; if (!labels) return 0; - len = labels->words * sizeof(long); i = 0; do { if (labels->bits[i] != 0) - return nla_put(skb, CTA_LABELS, len, labels->bits); + return nla_put(skb, CTA_LABELS, sizeof(labels->bits), + labels->bits); i++; - } while (i < labels->words); + } while (i < ARRAY_SIZE(labels->bits)); return 0; } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index d9e44ca34055..2f47d5d3ae3b 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -113,18 +113,11 @@ static void nft_ct_get_eval(const struct nft_expr *expr, #ifdef CONFIG_NF_CONNTRACK_LABELS case NFT_CT_LABELS: { struct nf_conn_labels *labels = nf_ct_labels_find(ct); - unsigned int size; - if (!labels) { + if (labels) + memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE); + else memset(dest, 0, NF_CT_LABELS_MAX_SIZE); - return; - } - - size = labels->words * sizeof(long); - memcpy(dest, labels->bits, size); - if (size < NF_CT_LABELS_MAX_SIZE) - memset(((char *) dest) + size, 0, - NF_CT_LABELS_MAX_SIZE - size); return; } #endif diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index a79af255561a..c9fba8ade3d5 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -25,7 +25,7 @@ static bool connlabel_match(const struct nf_conn *ct, u16 bit) if (!labels) return false; - return BIT_WORD(bit) < labels->words && test_bit(bit, labels->bits); + return test_bit(bit, labels->bits); } static bool diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index b4069a90e375..c644c78ed485 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -135,7 +135,7 @@ static void ovs_ct_get_labels(const struct nf_conn *ct, struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL; if (cl) { - size_t len = cl->words * sizeof(long); + size_t len = sizeof(cl->bits); if (len > OVS_CT_LABELS_LEN) len = OVS_CT_LABELS_LEN; @@ -274,7 +274,7 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key, nf_ct_labels_ext_add(ct); cl = nf_ct_labels_find(ct); } - if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN) + if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN) return -ENOSPC; err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask, -- cgit v1.2.3-59-g8ed1b From 857ed310c013fe0d0059f955048dab589fa7a57a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 21 Jul 2016 12:51:17 +0200 Subject: netfilter: connlabels: move set helper to xt_connlabel xt_connlabel is the only user so move it. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_labels.h | 2 -- net/netfilter/nf_conntrack_labels.c | 17 ----------------- net/netfilter/xt_connlabel.c | 29 ++++++++++++++++------------- 3 files changed, 16 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index 0fd4989de836..498814626e28 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -37,8 +37,6 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) #endif } -int nf_connlabel_set(struct nf_conn *ct, u16 bit); - int nf_connlabels_replace(struct nf_conn *ct, const u32 *data, const u32 *mask, unsigned int words); diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 7686200f9ace..bcab8bde7312 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -16,23 +16,6 @@ static spinlock_t nf_connlabels_lock; -int nf_connlabel_set(struct nf_conn *ct, u16 bit) -{ - struct nf_conn_labels *labels = nf_ct_labels_find(ct); - - if (!labels) - return -ENOSPC; - - if (test_bit(bit, labels->bits)) - return 0; - - if (!test_and_set_bit(bit, labels->bits)) - nf_conntrack_event_cache(IPCT_LABEL, ct); - - return 0; -} -EXPORT_SYMBOL_GPL(nf_connlabel_set); - static int replace_u32(u32 *address, u32 mask, u32 new) { u32 old, tmp; diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index c9fba8ade3d5..03d66f1c5e69 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -18,21 +19,12 @@ MODULE_DESCRIPTION("Xtables: add/match connection trackling labels"); MODULE_ALIAS("ipt_connlabel"); MODULE_ALIAS("ip6t_connlabel"); -static bool connlabel_match(const struct nf_conn *ct, u16 bit) -{ - struct nf_conn_labels *labels = nf_ct_labels_find(ct); - - if (!labels) - return false; - - return test_bit(bit, labels->bits); -} - static bool connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_connlabel_mtinfo *info = par->matchinfo; enum ip_conntrack_info ctinfo; + struct nf_conn_labels *labels; struct nf_conn *ct; bool invert = info->options & XT_CONNLABEL_OP_INVERT; @@ -40,10 +32,21 @@ connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par) if (ct == NULL || nf_ct_is_untracked(ct)) return invert; - if (info->options & XT_CONNLABEL_OP_SET) - return (nf_connlabel_set(ct, info->bit) == 0) ^ invert; + labels = nf_ct_labels_find(ct); + if (!labels) + return invert; + + if (test_bit(info->bit, labels->bits)) + return !invert; + + if (info->options & XT_CONNLABEL_OP_SET) { + if (!test_and_set_bit(info->bit, labels->bits)) + nf_conntrack_event_cache(IPCT_LABEL, ct); + + return !invert; + } - return connlabel_match(ct, info->bit) ^ invert; + return invert; } static int connlabel_mt_check(const struct xt_mtchk_param *par) -- cgit v1.2.3-59-g8ed1b From 96d1327ac2e3dc3ac4204fe3656dad0043fc0efd Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 22 Jul 2016 12:59:15 +0800 Subject: netfilter: h323: Use mod_timer instead of set_expect_timeout Simplify the code without any side effect. The set_expect_timeout is used to modify the timer expired time. It tries to delete timer, and add it again. So we could use mod_timer directly. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_h323_main.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 9511af04dc81..bb77a97961bf 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1272,19 +1272,6 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, return NULL; } -/****************************************************************************/ -static int set_expect_timeout(struct nf_conntrack_expect *exp, - unsigned int timeout) -{ - if (!exp || !del_timer(&exp->timeout)) - return 0; - - exp->timeout.expires = jiffies + timeout * HZ; - add_timer(&exp->timeout); - - return 1; -} - /****************************************************************************/ static int expect_q931(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, @@ -1486,7 +1473,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct, "timeout to %u seconds for", info->timeout); nf_ct_dump_tuple(&exp->tuple); - set_expect_timeout(exp, info->timeout); + mod_timer(&exp->timeout, jiffies + info->timeout * HZ); } spin_unlock_bh(&nf_conntrack_expect_lock); } -- cgit v1.2.3-59-g8ed1b From 2bf4fade54de995f84d319ae02003609dca450f3 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 23 Jul 2016 16:00:31 +0800 Subject: netfilter: nft_compat: put back match/target module if init fail If the user specify the invalid NFTA_MATCH_INFO/NFTA_TARGET_INFO attr or memory alloc fail, we should call module_put to the related match or target. Otherwise, we cannot remove the module even nobody use it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 6228c422c766..2e07cec50ffd 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -634,6 +634,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, struct xt_match *match; char *mt_name; u32 rev, family; + int err; if (tb[NFTA_MATCH_NAME] == NULL || tb[NFTA_MATCH_REV] == NULL || @@ -660,13 +661,17 @@ nft_match_select_ops(const struct nft_ctx *ctx, if (IS_ERR(match)) return ERR_PTR(-ENOENT); - if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO])) - return ERR_PTR(-EINVAL); + if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO])) { + err = -EINVAL; + goto err; + } /* This is the first time we use this match, allocate operations */ nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); - if (nft_match == NULL) - return ERR_PTR(-ENOMEM); + if (nft_match == NULL) { + err = -ENOMEM; + goto err; + } nft_match->ops.type = &nft_match_type; nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); @@ -680,6 +685,9 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_add(&nft_match->head, &nft_match_list); return &nft_match->ops; +err: + module_put(match->me); + return ERR_PTR(err); } static void nft_match_release(void) @@ -717,6 +725,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, struct xt_target *target; char *tg_name; u32 rev, family; + int err; if (tb[NFTA_TARGET_NAME] == NULL || tb[NFTA_TARGET_REV] == NULL || @@ -743,13 +752,17 @@ nft_target_select_ops(const struct nft_ctx *ctx, if (IS_ERR(target)) return ERR_PTR(-ENOENT); - if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) - return ERR_PTR(-EINVAL); + if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) { + err = -EINVAL; + goto err; + } /* This is the first time we use this target, allocate operations */ nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL); - if (nft_target == NULL) - return ERR_PTR(-ENOMEM); + if (nft_target == NULL) { + err = -ENOMEM; + goto err; + } nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); @@ -767,6 +780,9 @@ nft_target_select_ops(const struct nft_ctx *ctx, list_add(&nft_target->head, &nft_target_list); return &nft_target->ops; +err: + module_put(target->me); + return ERR_PTR(err); } static void nft_target_release(void) -- cgit v1.2.3-59-g8ed1b From 4b512e1c1f8de6b9ceb796ecef8658e0a083cab7 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 23 Jul 2016 16:00:32 +0800 Subject: netfilter: nft_compat: fix crash when related match/target module is removed We "cache" the loaded match/target modules and reuse them, but when the modules are removed, we still point to them. Then we may end up with invalid memory references when using iptables-compat to add rules later. Input the following commands will reproduce the kernel crash: # iptables-compat -A INPUT -j LOG # iptables-compat -D INPUT -j LOG # rmmod xt_LOG # iptables-compat -A INPUT -j LOG BUG: unable to handle kernel paging request at ffffffffa05a9010 IP: [] strcmp+0xe/0x30 Call Trace: [] nft_target_select_ops+0x83/0x1f0 [nft_compat] [] nf_tables_expr_parse+0x147/0x1f0 [nf_tables] [] nf_tables_newrule+0x301/0x810 [nf_tables] [] ? nla_parse+0x20/0x100 [] nfnetlink_rcv+0x33f/0x53d [nfnetlink] [] ? nfnetlink_rcv+0x1fb/0x53d [nfnetlink] [] netlink_unicast+0x178/0x220 [] netlink_sendmsg+0x2fb/0x3a0 [] sock_sendmsg+0x38/0x50 [] ___sys_sendmsg+0x28e/0x2a0 [] ? release_sock+0x1e/0xb0 [] ? _raw_spin_unlock_bh+0x35/0x40 [] ? release_sock+0x82/0xb0 [] __sys_sendmsg+0x54/0x90 [] SyS_sendmsg+0x12/0x20 [] entry_SYSCALL_64_fastpath+0x1a/0xa9 So when nobody use the related match/target module, there's no need to "cache" it. And nft_[match|target]_release are useless anymore, remove them. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_compat.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 2e07cec50ffd..c21e7eb8dce0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -23,6 +23,20 @@ #include #include +struct nft_xt { + struct list_head head; + struct nft_expr_ops ops; + unsigned int refcnt; +}; + +static void nft_xt_put(struct nft_xt *xt) +{ + if (--xt->refcnt == 0) { + list_del(&xt->head); + kfree(xt); + } +} + static int nft_compat_chain_validate_dependency(const char *tablename, const struct nft_chain *chain) { @@ -260,6 +274,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.target->destroy != NULL) par.target->destroy(&par); + nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); module_put(target->me); } @@ -442,6 +457,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.match->destroy != NULL) par.match->destroy(&par); + nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); module_put(match->me); } @@ -612,11 +628,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { static LIST_HEAD(nft_match_list); -struct nft_xt { - struct list_head head; - struct nft_expr_ops ops; -}; - static struct nft_expr_type nft_match_type; static bool nft_match_cmp(const struct xt_match *match, @@ -653,6 +664,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, if (!try_module_get(match->me)) return ERR_PTR(-ENOENT); + nft_match->refcnt++; return &nft_match->ops; } } @@ -673,6 +685,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } + nft_match->refcnt = 1; nft_match->ops.type = &nft_match_type; nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); nft_match->ops.eval = nft_match_eval; @@ -690,14 +703,6 @@ err: return ERR_PTR(err); } -static void nft_match_release(void) -{ - struct nft_xt *nft_match, *tmp; - - list_for_each_entry_safe(nft_match, tmp, &nft_match_list, head) - kfree(nft_match); -} - static struct nft_expr_type nft_match_type __read_mostly = { .name = "match", .select_ops = nft_match_select_ops, @@ -744,6 +749,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, if (!try_module_get(target->me)) return ERR_PTR(-ENOENT); + nft_target->refcnt++; return &nft_target->ops; } } @@ -764,6 +770,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } + nft_target->refcnt = 1; nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; @@ -785,14 +792,6 @@ err: return ERR_PTR(err); } -static void nft_target_release(void) -{ - struct nft_xt *nft_target, *tmp; - - list_for_each_entry_safe(nft_target, tmp, &nft_target_list, head) - kfree(nft_target); -} - static struct nft_expr_type nft_target_type __read_mostly = { .name = "target", .select_ops = nft_target_select_ops, @@ -835,8 +834,6 @@ static void __exit nft_compat_module_exit(void) nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); - nft_match_release(); - nft_target_release(); } MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT); -- cgit v1.2.3-59-g8ed1b