aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter/nf_nat_core.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 12:03:20 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-27 12:03:20 -0700
commit468fc7ed5537615efe671d94248446ac24679773 (patch)
tree27bc9de792e863d6ec1630927b77ac9e7dabb38a /net/netfilter/nf_nat_core.c
parentMerge tag 'for-linus-4.8-rc0-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip (diff)
parentxgene: Fix build warning with ACPI disabled. (diff)
downloadlinux-dev-468fc7ed5537615efe671d94248446ac24679773.tar.xz
linux-dev-468fc7ed5537615efe671d94248446ac24679773.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) Unified UDP encapsulation offload methods for drivers, from Alexander Duyck. 2) Make DSA binding more sane, from Andrew Lunn. 3) Support QCA9888 chips in ath10k, from Anilkumar Kolli. 4) Several workqueue usage cleanups, from Bhaktipriya Shridhar. 5) Add XDP (eXpress Data Path), essentially running BPF programs on RX packets as soon as the device sees them, with the option to mirror the packet on TX via the same interface. From Brenden Blanco and others. 6) Allow qdisc/class stats dumps to run lockless, from Eric Dumazet. 7) Add VLAN support to b53 and bcm_sf2, from Florian Fainelli. 8) Simplify netlink conntrack entry layout, from Florian Westphal. 9) Add ipv4 forwarding support to mlxsw spectrum driver, from Ido Schimmel, Yotam Gigi, and Jiri Pirko. 10) Add SKB array infrastructure and convert tun and macvtap over to it. From Michael S Tsirkin and Jason Wang. 11) Support qdisc packet injection in pktgen, from John Fastabend. 12) Add neighbour monitoring framework to TIPC, from Jon Paul Maloy. 13) Add NV congestion control support to TCP, from Lawrence Brakmo. 14) Add GSO support to SCTP, from Marcelo Ricardo Leitner. 15) Allow GRO and RPS to function on macsec devices, from Paolo Abeni. 16) Support MPLS over IPV4, from Simon Horman. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1622 commits) xgene: Fix build warning with ACPI disabled. be2net: perform temperature query in adapter regardless of its interface state l2tp: Correctly return -EBADF from pppol2tp_getname. net/mlx5_core/health: Remove deprecated create_singlethread_workqueue net: ipmr/ip6mr: update lastuse on entry change macsec: ensure rx_sa is set when validation is disabled tipc: dump monitor attributes tipc: add a function to get the bearer name tipc: get monitor threshold for the cluster tipc: make cluster size threshold for monitoring configurable tipc: introduce constants for tipc address validation net: neigh: disallow transition to NUD_STALE if lladdr is unchanged in neigh_update() MAINTAINERS: xgene: Add driver and documentation path Documentation: dtb: xgene: Add MDIO node dtb: xgene: Add MDIO node drivers: net: xgene: ethtool: Use phy_ethtool_gset and sset drivers: net: xgene: Use exported functions drivers: net: xgene: Enable MDIO driver drivers: net: xgene: Add backward compatibility drivers: net: phy: xgene: Add MDIO driver ...
Diffstat (limited to 'net/netfilter/nf_nat_core.c')
-rw-r--r--net/netfilter/nf_nat_core.c149
1 files changed, 70 insertions, 79 deletions
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6877a396f8fc..de31818417b8 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,17 +30,19 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
-static DEFINE_SPINLOCK(nf_nat_lock);
-
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
-static struct hlist_head *nf_nat_bysource __read_mostly;
-static unsigned int nf_nat_htable_size __read_mostly;
-static unsigned int nf_nat_hash_rnd __read_mostly;
+struct nf_nat_conn_key {
+ const struct net *net;
+ const struct nf_conntrack_tuple *tuple;
+ const struct nf_conntrack_zone *zone;
+};
+
+static struct rhashtable nf_nat_bysource_table;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
@@ -119,19 +121,17 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
EXPORT_SYMBOL(nf_xfrm_me_harder);
#endif /* CONFIG_XFRM */
-/* We keep an extra hash for each conntrack, for fast searching. */
-static inline unsigned int
-hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
+static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
{
- unsigned int hash;
-
- get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
+ const struct nf_conntrack_tuple *t;
+ const struct nf_conn *ct = data;
+ t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* Original src, to ensure we map it consistently if poss. */
- hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
- tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- return reciprocal_scale(hash, nf_nat_htable_size);
+ seed ^= net_hash_mix(nf_ct_net(ct));
+ return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
+ t->dst.protonum ^ seed);
}
/* Is this tuple already taken? (not by us) */
@@ -187,6 +187,26 @@ same_src(const struct nf_conn *ct,
t->src.u.all == tuple->src.u.all);
}
+static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct nf_nat_conn_key *key = arg->key;
+ const struct nf_conn *ct = obj;
+
+ return same_src(ct, key->tuple) &&
+ net_eq(nf_ct_net(ct), key->net) &&
+ nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL);
+}
+
+static struct rhashtable_params nf_nat_bysource_params = {
+ .head_offset = offsetof(struct nf_conn, nat_bysource),
+ .obj_hashfn = nf_nat_bysource_hash,
+ .obj_cmpfn = nf_nat_bysource_cmp,
+ .nelem_hint = 256,
+ .min_size = 1024,
+ .nulls_base = (1U << RHT_BASE_SHIFT),
+};
+
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net,
@@ -197,25 +217,23 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
- unsigned int h = hash_by_src(net, tuple);
- const struct nf_conn_nat *nat;
const struct nf_conn *ct;
+ struct nf_nat_conn_key key = {
+ .net = net,
+ .tuple = tuple,
+ .zone = zone
+ };
- hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) {
- ct = nat->ct;
- if (same_src(ct, tuple) &&
- net_eq(net, nf_ct_net(ct)) &&
- nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
- /* Copy source part from reply tuple. */
- nf_ct_invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
-
- if (in_range(l3proto, l4proto, result, range))
- return 1;
- }
- }
- return 0;
+ ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key,
+ nf_nat_bysource_params);
+ if (!ct)
+ return 0;
+
+ nf_ct_invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+ return in_range(l3proto, l4proto, result, range);
}
/* For [FUTURE] fragmentation handling, we want the least-used
@@ -387,7 +405,6 @@ nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
- struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
struct nf_conn_nat *nat;
@@ -428,17 +445,13 @@ nf_nat_setup_info(struct nf_conn *ct,
}
if (maniptype == NF_NAT_MANIP_SRC) {
- unsigned int srchash;
-
- srchash = hash_by_src(net,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- spin_lock_bh(&nf_nat_lock);
- /* nf_conntrack_alter_reply might re-allocate extension aera */
- nat = nfct_nat(ct);
- nat->ct = ct;
- hlist_add_head_rcu(&nat->bysource,
- &nf_nat_bysource[srchash]);
- spin_unlock_bh(&nf_nat_lock);
+ int err;
+
+ err = rhashtable_insert_fast(&nf_nat_bysource_table,
+ &ct->nat_bysource,
+ nf_nat_bysource_params);
+ if (err)
+ return NF_DROP;
}
/* It's done. */
@@ -543,7 +556,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
if (nf_nat_proto_remove(ct, data))
return 1;
- if (!nat || !nat->ct)
+ if (!nat)
return 0;
/* This netns is being destroyed, and conntrack has nat null binding.
@@ -555,11 +568,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
if (!del_timer(&ct->timeout))
return 1;
- spin_lock_bh(&nf_nat_lock);
- hlist_del_rcu(&nat->bysource);
ct->status &= ~IPS_NAT_DONE_MASK;
- nat->ct = NULL;
- spin_unlock_bh(&nf_nat_lock);
+
+ rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
+ nf_nat_bysource_params);
add_timer(&ct->timeout);
@@ -688,35 +700,17 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
- if (nat == NULL || nat->ct == NULL)
- return;
-
- NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
-
- spin_lock_bh(&nf_nat_lock);
- hlist_del_rcu(&nat->bysource);
- spin_unlock_bh(&nf_nat_lock);
-}
-
-static void nf_nat_move_storage(void *new, void *old)
-{
- struct nf_conn_nat *new_nat = new;
- struct nf_conn_nat *old_nat = old;
- struct nf_conn *ct = old_nat->ct;
-
- if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
+ if (!nat)
return;
- spin_lock_bh(&nf_nat_lock);
- hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
- spin_unlock_bh(&nf_nat_lock);
+ rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
+ nf_nat_bysource_params);
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
.len = sizeof(struct nf_conn_nat),
.align = __alignof__(struct nf_conn_nat),
.destroy = nf_nat_cleanup_conntrack,
- .move = nf_nat_move_storage,
.id = NF_CT_EXT_NAT,
.flags = NF_CT_EXT_F_PREALLOC,
};
@@ -845,16 +839,13 @@ static int __init nf_nat_init(void)
{
int ret;
- /* Leave them the same for the moment. */
- nf_nat_htable_size = nf_conntrack_htable_size;
-
- nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
- if (!nf_nat_bysource)
- return -ENOMEM;
+ ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
+ if (ret)
+ return ret;
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
- nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+ rhashtable_destroy(&nf_nat_bysource_table);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
@@ -878,7 +869,7 @@ static int __init nf_nat_init(void)
return 0;
cleanup_extend:
- nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+ rhashtable_destroy(&nf_nat_bysource_table);
nf_ct_extend_unregister(&nat_extend);
return ret;
}
@@ -896,8 +887,8 @@ static void __exit nf_nat_cleanup(void)
#endif
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
- synchronize_net();
- nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+
+ rhashtable_destroy(&nf_nat_bysource_table);
}
MODULE_LICENSE("GPL");