From 056ff3e3bd1563969a311697323ff929df94415c Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 3 Feb 2010 12:58:06 +0100
Subject: netfilter: nf_conntrack: fix memory corruption with multiple
 namespaces

As discovered by Jon Masters <jonathan@jonmasters.org>, the "untracked"
conntrack, which is located in the data section, might be accidentally
freed when a new namespace is instantiated while the untracked conntrack
is attached to a skb because the reference count it re-initialized.

The best fix would be to use a seperate untracked conntrack per
namespace since it includes a namespace pointer. Unfortunately this is
not possible without larger changes since the namespace is not easily
available everywhere we need it. For now move the untracked conntrack
initialization to the init_net setup function to make sure the reference
count is not re-initialized and handle cleanup in the init_net cleanup
function to make sure namespaces can exit properly while the untracked
conntrack is in use in other namespaces.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_core.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0e98c3282d42..37e2b88313f2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1113,6 +1113,10 @@ static void nf_ct_release_dying_list(struct net *net)
 
 static void nf_conntrack_cleanup_init_net(void)
 {
+	/* wait until all references to nf_conntrack_untracked are dropped */
+	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
+		schedule();
+
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
 	kmem_cache_destroy(nf_conntrack_cachep);
@@ -1127,9 +1131,6 @@ static void nf_conntrack_cleanup_net(struct net *net)
 		schedule();
 		goto i_see_dead_people;
 	}
-	/* wait until all references to nf_conntrack_untracked are dropped */
-	while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1)
-		schedule();
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
@@ -1288,6 +1289,14 @@ static int nf_conntrack_init_init_net(void)
 	if (ret < 0)
 		goto err_helper;
 
+	/* Set up fake conntrack: to never be deleted, not in any hashes */
+#ifdef CONFIG_NET_NS
+	nf_conntrack_untracked.ct_net = &init_net;
+#endif
+	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
+	/*  - and look it like as a confirmed connection */
+	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
+
 	return 0;
 
 err_helper:
@@ -1333,15 +1342,6 @@ static int nf_conntrack_init_net(struct net *net)
 	if (ret < 0)
 		goto err_ecache;
 
-	/* Set up fake conntrack:
-	    - to never be deleted, not in any hashes */
-#ifdef CONFIG_NET_NS
-	nf_conntrack_untracked.ct_net = &init_net;
-#endif
-	atomic_set(&nf_conntrack_untracked.ct_general.use, 1);
-	/*  - and look it like as a confirmed connection */
-	set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status);
-
 	return 0;
 
 err_ecache:
-- 
cgit v1.2.3-59-g8ed1b


From ab59b19be78aac65cdd599fb5002c9019885e061 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 4 Feb 2010 14:54:05 +0100
Subject: netfilter: nf_conntrack: per netns nf_conntrack_cachep

nf_conntrack_cachep is currently shared by all netns instances, but
because of SLAB_DESTROY_BY_RCU special semantics, this is wrong.

If we use a shared slab cache, one object can instantly flight between
one hash table (netns ONE) to another one (netns TWO), and concurrent
reader (doing a lookup in netns ONE, 'finding' an object of netns TWO)
can be fooled without notice, because no RCU grace period has to be
observed between object freeing and its reuse.

We dont have this problem with UDP/TCP slab caches because TCP/UDP
hashtables are global to the machine (and each object has a pointer to
its netns).

If we use per netns conntrack hash tables, we also *must* use per netns
conntrack slab caches, to guarantee an object can not escape from one
namespace to another one.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
[Patrick: added unique slab name allocation]
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/conntrack.h     |  2 ++
 net/netfilter/nf_conntrack_core.c | 39 +++++++++++++++++++++++----------------
 2 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index ba1ba0c5efd1..aed23b6c8478 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
 	struct hlist_nulls_head	unconfirmed;
@@ -28,5 +29,6 @@ struct netns_ct {
 #endif
 	int			hash_vmalloc;
 	int			expect_vmalloc;
+	char			*slabname;
 };
 #endif
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 37e2b88313f2..9de4bd4c0dd7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -63,8 +63,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
 struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
-static struct kmem_cache *nf_conntrack_cachep __read_mostly;
-
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -572,7 +570,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 	 * Do not use kmem_cache_zalloc(), as this cache uses
 	 * SLAB_DESTROY_BY_RCU.
 	 */
-	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
+	ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
 	if (ct == NULL) {
 		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
 		atomic_dec(&net->ct.count);
@@ -611,7 +609,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	nf_ct_ext_destroy(ct);
 	atomic_dec(&net->ct.count);
 	nf_ct_ext_free(ct);
-	kmem_cache_free(nf_conntrack_cachep, ct);
+	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
@@ -1119,7 +1117,6 @@ static void nf_conntrack_cleanup_init_net(void)
 
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
-	kmem_cache_destroy(nf_conntrack_cachep);
 }
 
 static void nf_conntrack_cleanup_net(struct net *net)
@@ -1137,6 +1134,8 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+	kfree(net->ct.slabname);
 	free_percpu(net->ct.stat);
 }
 
@@ -1272,15 +1271,6 @@ static int nf_conntrack_init_init_net(void)
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
 
-	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
-						sizeof(struct nf_conn),
-						0, SLAB_DESTROY_BY_RCU, NULL);
-	if (!nf_conntrack_cachep) {
-		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
-		ret = -ENOMEM;
-		goto err_cache;
-	}
-
 	ret = nf_conntrack_proto_init();
 	if (ret < 0)
 		goto err_proto;
@@ -1302,8 +1292,6 @@ static int nf_conntrack_init_init_net(void)
 err_helper:
 	nf_conntrack_proto_fini();
 err_proto:
-	kmem_cache_destroy(nf_conntrack_cachep);
-err_cache:
 	return ret;
 }
 
@@ -1325,6 +1313,21 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_stat;
 	}
+
+	net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+	if (!net->ct.slabname) {
+		ret = -ENOMEM;
+		goto err_slabname;
+	}
+
+	net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
+							sizeof(struct nf_conn), 0,
+							SLAB_DESTROY_BY_RCU, NULL);
+	if (!net->ct.nf_conntrack_cachep) {
+		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
+		ret = -ENOMEM;
+		goto err_cache;
+	}
 	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
@@ -1352,6 +1355,10 @@ err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 err_hash:
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+err_cache:
+	kfree(net->ct.slabname);
+err_slabname:
 	free_percpu(net->ct.stat);
 err_stat:
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 38c7233b287481dfb3327dde136801ce500aba58 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 4 Feb 2010 18:24:06 +0100
Subject: netfilter: nf_conntrack: restrict runtime expect hashsize
 modifications

Expectation hashtable size was simply glued to a variable with no code
to rehash expectations, so it was a bug to allow writing to it.
Make "expect_hashsize" readonly.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_expect.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index fdf5d2a1d9b4..4ad7d1d809af 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -569,7 +569,7 @@ static void exp_proc_remove(struct net *net)
 #endif /* CONFIG_PROC_FS */
 }
 
-module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
+module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
 
 int nf_conntrack_expect_init(struct net *net)
 {
-- 
cgit v1.2.3-59-g8ed1b


From dab1531a07ad7c5be4ebe715a3d08742f0c638e3 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 8 Feb 2010 15:44:07 +0100
Subject: netfilter: xtables: compat out of scope fix

As per C99 6.2.4(2) when temporary table data goes out of scope,
the behaviour is undefined:

	if (compat) {
		struct foo tmp;
		...
		private = &tmp;
	}
	[dereference private]

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 4 ++--
 net/ipv4/netfilter/ip_tables.c  | 4 ++--
 net/ipv6/netfilter/ip6_tables.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 06632762ba5f..90203e1b9187 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -925,10 +925,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 	if (t && !IS_ERR(t)) {
 		struct arpt_getinfo info;
 		const struct xt_table_info *private = t->private;
-
 #ifdef CONFIG_COMPAT
+		struct xt_table_info tmp;
+
 		if (compat) {
-			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
 			xt_compat_flush_offsets(NFPROTO_ARP);
 			private = &tmp;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 572330a552ef..3ce53cf13d5a 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1132,10 +1132,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 	if (t && !IS_ERR(t)) {
 		struct ipt_getinfo info;
 		const struct xt_table_info *private = t->private;
-
 #ifdef CONFIG_COMPAT
+		struct xt_table_info tmp;
+
 		if (compat) {
-			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
 			xt_compat_flush_offsets(AF_INET);
 			private = &tmp;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 480d7f8c9802..8a7e0f52e177 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1164,10 +1164,10 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
 	if (t && !IS_ERR(t)) {
 		struct ip6t_getinfo info;
 		const struct xt_table_info *private = t->private;
-
 #ifdef CONFIG_COMPAT
+		struct xt_table_info tmp;
+
 		if (compat) {
-			struct xt_table_info tmp;
 			ret = compat_table_info(private, &tmp);
 			xt_compat_flush_offsets(AF_INET6);
 			private = &tmp;
-- 
cgit v1.2.3-59-g8ed1b


From 9ab48ddcb144fdee908708669448dd136cf4894a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 8 Feb 2010 17:35:23 +0100
Subject: netfilter: nf_conntrack: fix hash resizing with namespaces

As noticed by Jon Masters <jonathan@jonmasters.org>, the conntrack hash
size is global and not per namespace, but modifiable at runtime through
/sys/module/nf_conntrack/hashsize. Changing the hash size will only
resize the hash in the current namespace however, so other namespaces
will use an invalid hash size. This can cause crashes when enlarging
the hashsize, or false negative lookups when shrinking it.

Move the hash size into the per-namespace data and only use the global
hash size to initialize the per-namespace value when instanciating a
new namespace. Additionally restrict hash resizing to init_net for
now as other namespaces are not handled currently.

Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/conntrack.h                      |  1 +
 include/net/netns/ipv4.h                           |  1 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c     |  2 +-
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   |  4 +-
 net/ipv4/netfilter/nf_nat_core.c                   | 22 ++++-----
 net/netfilter/nf_conntrack_core.c                  | 53 ++++++++++++----------
 net/netfilter/nf_conntrack_expect.c                |  2 +-
 net/netfilter/nf_conntrack_helper.c                |  2 +-
 net/netfilter/nf_conntrack_netlink.c               |  2 +-
 net/netfilter/nf_conntrack_standalone.c            |  7 +--
 10 files changed, 49 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index aed23b6c8478..63d449807d9b 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+	unsigned int		htable_size;
 	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2eb3814d6258..9a4b8b714079 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -40,6 +40,7 @@ struct netns_ipv4 {
 	struct xt_table		*iptable_security;
 	struct xt_table		*nat_table;
 	struct hlist_head	*nat_bysource;
+	unsigned int		nat_htable_size;
 	int			nat_vmalloced;
 #endif
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index d171b123a656..d1ea38a7c490 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = {
 	},
 	{
 		.procname	= "ip_conntrack_buckets",
-		.data		= &nf_conntrack_htable_size,
+		.data		= &init_net.ct.htable_size,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 8668a3defda6..2fb7b76da94f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index fe1a64479dd0..26066a2327ad 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock);
 
 static struct nf_conntrack_l3proto *l3proto __read_mostly;
 
-/* Calculated at init based on memory size */
-static unsigned int nf_nat_htable_size __read_mostly;
-
 #define MAX_IP_NAT_PROTO 256
 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
 						__read_mostly;
@@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
-hash_by_src(const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	unsigned int hash;
 
@@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple)
 	hash = jhash_3words((__force u32)tuple->src.u3.ip,
 			    (__force u32)tuple->src.u.all,
 			    tuple->dst.protonum, 0);
-	return ((u64)hash * nf_nat_htable_size) >> 32;
+	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -147,7 +144,7 @@ find_appropriate_src(struct net *net,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
-	unsigned int h = hash_by_src(tuple);
+	unsigned int h = hash_by_src(net, tuple);
 	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
 	const struct hlist_node *n;
@@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (have_to_hash) {
 		unsigned int srchash;
 
-		srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		spin_lock_bh(&nf_nat_lock);
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
 		nat = nfct_nat(ct);
@@ -679,8 +676,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 
 static int __net_init nf_nat_net_init(struct net *net)
 {
-	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
-						      &net->ipv4.nat_vmalloced, 0);
+	/* Leave them the same for the moment. */
+	net->ipv4.nat_htable_size = net->ct.htable_size;
+	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
+						       &net->ipv4.nat_vmalloced, 0);
 	if (!net->ipv4.nat_bysource)
 		return -ENOMEM;
 	return 0;
@@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
 	synchronize_rcu();
 	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-			     nf_nat_htable_size);
+			     net->ipv4.nat_htable_size);
 }
 
 static struct pernet_operations nf_nat_net_ops = {
@@ -724,9 +723,6 @@ static int __init nf_nat_init(void)
 		return ret;
 	}
 
-	/* Leave them the same for the moment. */
-	nf_nat_htable_size = nf_conntrack_htable_size;
-
 	ret = register_pernet_subsys(&nf_nat_net_ops);
 	if (ret < 0)
 		goto cleanup_extend;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9de4bd4c0dd7..4d79e3c1616c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -30,6 +30,7 @@
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <linux/mm.h>
+#include <linux/nsproxy.h>
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
@@ -84,9 +85,10 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 	return ((u64)h * size) >> 32;
 }
 
-static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
+static inline u_int32_t hash_conntrack(const struct net *net,
+				       const struct nf_conntrack_tuple *tuple)
 {
-	return __hash_conntrack(tuple, nf_conntrack_htable_size,
+	return __hash_conntrack(tuple, net->ct.htable_size,
 				nf_conntrack_hash_rnd);
 }
 
@@ -294,7 +296,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we normally need to disable them
 	 * at least once for the stats anyway.
@@ -364,10 +366,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	unsigned int hash, repl_hash;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 }
@@ -395,8 +398,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		return NF_ACCEPT;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	/* We're not in hash table, and we refuse to set up related
 	   connections for unconfirmed conns.  But packet copies and
@@ -466,7 +469,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we need to disable them at
 	 * least once for the stats anyway.
@@ -501,7 +504,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	int dropped = 0;
 
 	rcu_read_lock();
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
 					 hnnode) {
 			tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -521,7 +524,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 		if (cnt >= NF_CT_EVICTION_RANGE)
 			break;
 
-		hash = (hash + 1) % nf_conntrack_htable_size;
+		hash = (hash + 1) % net->ct.htable_size;
 	}
 	rcu_read_unlock();
 
@@ -555,7 +558,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
-		unsigned int hash = hash_conntrack(orig);
+		unsigned int hash = hash_conntrack(net, orig);
 		if (!early_drop(net, hash)) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
@@ -1012,7 +1015,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 	struct hlist_nulls_node *n;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+	for (; *bucket < net->ct.htable_size; (*bucket)++) {
 		hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
 			ct = nf_ct_tuplehash_to_ctrack(h);
 			if (iter(ct, data))
@@ -1130,7 +1133,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	}
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
@@ -1190,10 +1193,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 {
 	int i, bucket, vmalloced, old_vmalloced;
 	unsigned int hashsize, old_size;
-	int rnd;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
 
+	if (current->nsproxy->net_ns != &init_net)
+		return -EOPNOTSUPP;
+
 	/* On boot, we can set this without any fancy locking. */
 	if (!nf_conntrack_htable_size)
 		return param_set_uint(val, kp);
@@ -1206,33 +1211,29 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hash)
 		return -ENOMEM;
 
-	/* We have to rehahs for the new table anyway, so we also can
-	 * use a newrandom seed */
-	get_random_bytes(&rnd, sizeof(rnd));
-
 	/* Lookups in the old hash might happen in parallel, which means we
 	 * might get false negatives during connection lookup. New connections
 	 * created because of a false negative won't make it into the hash
 	 * though since that required taking the lock.
 	 */
 	spin_lock_bh(&nf_conntrack_lock);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < init_net.ct.htable_size; i++) {
 		while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
 			h = hlist_nulls_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnnode);
 			hlist_nulls_del_rcu(&h->hnnode);
-			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+			bucket = __hash_conntrack(&h->tuple, hashsize,
+						  nf_conntrack_hash_rnd);
 			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
 		}
 	}
-	old_size = nf_conntrack_htable_size;
+	old_size = init_net.ct.htable_size;
 	old_vmalloced = init_net.ct.hash_vmalloc;
 	old_hash = init_net.ct.hash;
 
-	nf_conntrack_htable_size = hashsize;
+	init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
 	init_net.ct.hash_vmalloc = vmalloced;
 	init_net.ct.hash = hash;
-	nf_conntrack_hash_rnd = rnd;
 	spin_unlock_bh(&nf_conntrack_lock);
 
 	nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
@@ -1328,7 +1329,9 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_cache;
 	}
-	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+
+	net->ct.htable_size = nf_conntrack_htable_size;
+	net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
 		ret = -ENOMEM;
@@ -1353,7 +1356,7 @@ err_acct:
 	nf_conntrack_expect_fini(net);
 err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 err_hash:
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4ad7d1d809af..2f25ff610982 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -577,7 +577,7 @@ int nf_conntrack_expect_init(struct net *net)
 
 	if (net_eq(net, &init_net)) {
 		if (!nf_ct_expect_hsize) {
-			nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+			nf_ct_expect_hsize = net->ct.htable_size / 256;
 			if (!nf_ct_expect_hsize)
 				nf_ct_expect_hsize = 1;
 		}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 65c2a7bc3afc..4b1a56bd074c 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -192,7 +192,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 	/* Get rid of expecteds, set helpers to NULL. */
 	hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
 		unhelp(h, me);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 			unhelp(h, me);
 	}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 42f21c01a93e..0ffe689dfe97 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -594,7 +594,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rcu_read_lock();
 	last = (struct nf_conn *)cb->args[1];
-	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
+	for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) {
 restart:
 		hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
 					 hnnode) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 028aba667ef7..e310f1561bb2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -51,7 +51,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -69,7 +69,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
@@ -355,7 +355,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	},
 	{
 		.procname       = "nf_conntrack_buckets",
-		.data           = &nf_conntrack_htable_size,
+		.data           = &init_net.ct.htable_size,
 		.maxlen         = sizeof(unsigned int),
 		.mode           = 0444,
 		.proc_handler   = proc_dointvec,
@@ -421,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 		goto out_kmemdup;
 
 	table[1].data = &net->ct.count;
+	table[2].data = &net->ct.htable_size;
 	table[3].data = &net->ct.sysctl_checksum;
 	table[4].data = &net->ct.sysctl_log_invalid;
 
-- 
cgit v1.2.3-59-g8ed1b


From 0898f99a267f89a7dc72cc687955f17613a711b8 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 8 Mar 2010 13:15:59 +0100
Subject: netfilter: ebt_ip6: Use ipv6_masked_addr_cmp()

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/netfilter/ebt_ip6.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index bbf2534ef026..4644cc9c0579 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -35,8 +35,6 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	struct ipv6hdr _ip6h;
 	const struct tcpudphdr *pptr;
 	struct tcpudphdr _ports;
-	struct in6_addr tmp_addr;
-	int i;
 
 	ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h);
 	if (ih6 == NULL)
@@ -44,18 +42,10 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (info->bitmask & EBT_IP6_TCLASS &&
 	   FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS))
 		return false;
-	for (i = 0; i < 4; i++)
-		tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] &
-			info->smsk.in6_u.u6_addr32[i];
-	if (info->bitmask & EBT_IP6_SOURCE &&
-		FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0),
-			EBT_IP6_SOURCE))
-		return false;
-	for (i = 0; i < 4; i++)
-		tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] &
-			info->dmsk.in6_u.u6_addr32[i];
-	if (info->bitmask & EBT_IP6_DEST &&
-	   FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST))
+	if (FWINV(ipv6_masked_addr_cmp(&ih6->saddr, &info->smsk,
+				       &info->saddr), EBT_IP6_SOURCE) ||
+	    FWINV(ipv6_masked_addr_cmp(&ih6->daddr, &info->dmsk,
+				       &info->daddr), EBT_IP6_DEST))
 		return false;
 	if (info->bitmask & EBT_IP6_PROTO) {
 		uint8_t nexthdr = ih6->nexthdr;
-- 
cgit v1.2.3-59-g8ed1b


From 62bb2ac5cb6c2f813e151617525ec518e2d1c649 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 17 Feb 2010 21:27:26 +0100
Subject: mac80211: deprecate RX status noise

The noise value as is won't be used, isn't
filled by most drivers and doesn't really
make a whole lot of sense on a per packet
basis -- proper cfg80211 survey support in
mac80211 will need to be different.

Mark the struct member as deprecated so it
will be removed from drivers.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     | 4 ++--
 net/mac80211/debugfs_sta.c | 2 --
 net/mac80211/rx.c          | 9 ---------
 net/mac80211/sta_info.h    | 2 --
 4 files changed, 2 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 45d7d44d7cbe..936bc410d061 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -543,7 +543,7 @@ enum mac80211_rx_flags {
  * @signal: signal strength when receiving this frame, either in dBm, in dB or
  *	unspecified depending on the hardware capabilities flags
  *	@IEEE80211_HW_SIGNAL_*
- * @noise: noise when receiving this frame, in dBm.
+ * @noise: noise when receiving this frame, in dBm (DEPRECATED).
  * @antenna: antenna used
  * @rate_idx: index of data rate into band's supported rates or MCS index if
  *	HT rates are use (RX_FLAG_HT)
@@ -554,7 +554,7 @@ struct ieee80211_rx_status {
 	enum ieee80211_band band;
 	int freq;
 	int signal;
-	int noise;
+	int noise __deprecated;
 	int antenna;
 	int rate_idx;
 	int flag;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index d92800bb2d2f..23e720034577 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -57,7 +57,6 @@ STA_FILE(tx_filtered, tx_filtered_count, LU);
 STA_FILE(tx_retry_failed, tx_retry_failed, LU);
 STA_FILE(tx_retry_count, tx_retry_count, LU);
 STA_FILE(last_signal, last_signal, D);
-STA_FILE(last_noise, last_noise, D);
 STA_FILE(wep_weak_iv_count, wep_weak_iv_count, LU);
 
 static ssize_t sta_flags_read(struct file *file, char __user *userbuf,
@@ -289,7 +288,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta)
 	DEBUGFS_ADD(tx_retry_failed);
 	DEBUGFS_ADD(tx_retry_count);
 	DEBUGFS_ADD(last_signal);
-	DEBUGFS_ADD(last_noise);
 	DEBUGFS_ADD(wep_weak_iv_count);
 	DEBUGFS_ADD(ht_capa);
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b5c48de81d8b..1da57c8e849a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -178,14 +178,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 		pos++;
 	}
 
-	/* IEEE80211_RADIOTAP_DBM_ANTNOISE */
-	if (local->hw.flags & IEEE80211_HW_NOISE_DBM) {
-		*pos = status->noise;
-		rthdr->it_present |=
-			cpu_to_le32(1 << IEEE80211_RADIOTAP_DBM_ANTNOISE);
-		pos++;
-	}
-
 	/* IEEE80211_RADIOTAP_LOCK_QUALITY is missing */
 
 	/* IEEE80211_RADIOTAP_ANTENNA */
@@ -1077,7 +1069,6 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx)
 	sta->rx_fragments++;
 	sta->rx_bytes += rx->skb->len;
 	sta->last_signal = status->signal;
-	sta->last_noise = status->noise;
 
 	/*
 	 * Change STA power saving mode only at the end of a frame
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 822d84522937..2b635909de5c 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -200,7 +200,6 @@ struct sta_ampdu_mlme {
  * @rx_fragments: number of received MPDUs
  * @rx_dropped: number of dropped MPDUs from this STA
  * @last_signal: signal of last received frame from this STA
- * @last_noise: noise of last received frame from this STA
  * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue)
  * @tx_filtered_count: number of frames the hardware filtered for this STA
  * @tx_retry_failed: number of frames that failed retry
@@ -267,7 +266,6 @@ struct sta_info {
 	unsigned long rx_fragments;
 	unsigned long rx_dropped;
 	int last_signal;
-	int last_noise;
 	__le16 last_seq_ctrl[NUM_RX_DATA_QUEUES];
 
 	/* Updated from TX status path only, no locking requirements */
-- 
cgit v1.2.3-59-g8ed1b


From b4d59a9317e41faec3d0b6a03f0454d1e8abb710 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Tue, 23 Feb 2010 18:51:13 +0900
Subject: mac80211: fix rates setup on IBSS merge

when an IBSS merge happened, the supported rates for the newly added station
were left empty, causing the rate control module to be initialized with only
the basic rates.

also the section of the ibss code which deals with updating supported rates for
an already existing station fails to inform the rate control module about the
new rates. as i don't know how to fix this (minstrel does not have an update
function), i have just added a comment for now.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f3e942486749..b840d9072de9 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -276,6 +276,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				    (unsigned long long) sta->sta.supp_rates[band]);
 #endif
 			rcu_read_unlock();
+
+			/* FIXME: update rate control */
 		} else {
 			rcu_read_unlock();
 			ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
@@ -370,6 +372,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		       sdata->name, mgmt->bssid);
 #endif
 		ieee80211_sta_join_ibss(sdata, bss);
+		supp_rates = ieee80211_sta_get_rates(local, elems, band);
 		ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
 				       supp_rates, GFP_KERNEL);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From df13cce53a7b28a81460e6bfc4857e9df4956141 Mon Sep 17 00:00:00 2001
From: Helmut Schaa <Helmut.Schaa@gmx.de>
Date: Wed, 24 Feb 2010 14:19:21 +0100
Subject: mac80211: Improve software scan timing

The current software scan implemenation in mac80211 returns to the operating
channel after each scanned channel. However, in some situations (e.g. no
traffic) it would be nicer to scan a few channels in a row to speed up
the scan itself.

Hence, after scanning a channel, check if we have queued up any tx frames and
return to the operating channel in that case.

Unfortunately we don't know if the AP has buffered any frames for us. Hence,
scan only as many channels in a row as the pm_qos latency and the negotiated
listen interval allows us to.

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/scan.c        | 71 ++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 66 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 241533e1bc03..b84126491ab1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -745,6 +745,7 @@ struct ieee80211_local {
 	int scan_channel_idx;
 	int scan_ies_len;
 
+	unsigned long leave_oper_channel_time;
 	enum mac80211_scan_state next_scan_state;
 	struct delayed_work scan_work;
 	struct ieee80211_sub_if_data *scan_sdata;
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index b822dce97867..75a85978c3b3 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -14,6 +14,8 @@
 
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
+#include <linux/pm_qos_params.h>
+#include <net/sch_generic.h>
 #include <net/mac80211.h>
 
 #include "ieee80211_i.h"
@@ -321,6 +323,7 @@ static int ieee80211_start_sw_scan(struct ieee80211_local *local)
 
 	ieee80211_offchannel_stop_beaconing(local);
 
+	local->leave_oper_channel_time = 0;
 	local->next_scan_state = SCAN_DECISION;
 	local->scan_channel_idx = 0;
 
@@ -425,11 +428,28 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	return rc;
 }
 
+static unsigned long
+ieee80211_scan_get_channel_time(struct ieee80211_channel *chan)
+{
+	/*
+	 * TODO: channel switching also consumes quite some time,
+	 * add that delay as well to get a better estimation
+	 */
+	if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN)
+		return IEEE80211_PASSIVE_CHANNEL_TIME;
+	return IEEE80211_PROBE_DELAY + IEEE80211_CHANNEL_TIME;
+}
+
 static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 					 unsigned long *next_delay)
 {
 	bool associated = false;
+	bool tx_empty = true;
+	bool bad_latency;
+	bool listen_int_exceeded;
+	unsigned long min_beacon_int = 0;
 	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_channel *next_chan;
 
 	/* if no more bands/channels left, complete scan and advance to the idle state */
 	if (local->scan_channel_idx >= local->scan_req->n_channels) {
@@ -437,7 +457,11 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 		return 1;
 	}
 
-	/* check if at least one STA interface is associated */
+	/*
+	 * check if at least one STA interface is associated,
+	 * check if at least one STA interface has pending tx frames
+	 * and grab the lowest used beacon interval
+	 */
 	mutex_lock(&local->iflist_mtx);
 	list_for_each_entry(sdata, &local->interfaces, list) {
 		if (!ieee80211_sdata_running(sdata))
@@ -446,7 +470,16 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 		if (sdata->vif.type == NL80211_IFTYPE_STATION) {
 			if (sdata->u.mgd.associated) {
 				associated = true;
-				break;
+
+				if (sdata->vif.bss_conf.beacon_int <
+				    min_beacon_int || min_beacon_int == 0)
+					min_beacon_int =
+						sdata->vif.bss_conf.beacon_int;
+
+				if (!qdisc_all_tx_empty(sdata->dev)) {
+					tx_empty = false;
+					break;
+				}
 			}
 		}
 	}
@@ -455,11 +488,34 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local,
 	if (local->scan_channel) {
 		/*
 		 * we're currently scanning a different channel, let's
-		 * switch back to the operating channel now if at least
-		 * one interface is associated. Otherwise just scan the
-		 * next channel
+		 * see if we can scan another channel without interfering
+		 * with the current traffic situation.
+		 *
+		 * Since we don't know if the AP has pending frames for us
+		 * we can only check for our tx queues and use the current
+		 * pm_qos requirements for rx. Hence, if no tx traffic occurs
+		 * at all we will scan as many channels in a row as the pm_qos
+		 * latency allows us to. Additionally we also check for the
+		 * currently negotiated listen interval to prevent losing
+		 * frames unnecessarily.
+		 *
+		 * Otherwise switch back to the operating channel.
 		 */
-		if (associated)
+		next_chan = local->scan_req->channels[local->scan_channel_idx];
+
+		bad_latency = time_after(jiffies +
+				ieee80211_scan_get_channel_time(next_chan),
+				local->leave_oper_channel_time +
+				usecs_to_jiffies(pm_qos_requirement(PM_QOS_NETWORK_LATENCY)));
+
+		listen_int_exceeded = time_after(jiffies +
+				ieee80211_scan_get_channel_time(next_chan),
+				local->leave_oper_channel_time +
+				usecs_to_jiffies(min_beacon_int * 1024) *
+				local->hw.conf.listen_interval);
+
+		if (associated && ( !tx_empty || bad_latency ||
+		    listen_int_exceeded))
 			local->next_scan_state = SCAN_ENTER_OPER_CHANNEL;
 		else
 			local->next_scan_state = SCAN_SET_CHANNEL;
@@ -491,6 +547,9 @@ static void ieee80211_scan_state_leave_oper_channel(struct ieee80211_local *loca
 	else
 		*next_delay = HZ / 10;
 
+	/* remember when we left the operating channel */
+	local->leave_oper_channel_time = jiffies;
+
 	/* advance to the next channel to be scanned */
 	local->next_scan_state = SCAN_SET_CHANNEL;
 }
-- 
cgit v1.2.3-59-g8ed1b


From fa9029f8c34576e121a4b6ddbbd645081fe50c74 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 25 Feb 2010 15:13:11 +0100
Subject: mac80211: use different MAC addresses for virtual interfaces

Drivers can now advertise to cfg80211 that they have
multiple MAC addresses reserved for a device, but we
don't currently make use of that in mac80211.

Change that and assign different addresses to new
virtual interfaces (if addresses are available) in
order to make it easier for users to use multiple
virtual interfaces; they no longer need to always
assign a new MAC address manually.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/iface.c | 116 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 114 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 0793d7a8d743..d5571b9420cd 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -815,6 +815,118 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
+static void ieee80211_assign_perm_addr(struct ieee80211_local *local,
+				       struct net_device *dev,
+				       enum nl80211_iftype type)
+{
+	struct ieee80211_sub_if_data *sdata;
+	u64 mask, start, addr, val, inc;
+	u8 *m;
+	u8 tmp_addr[ETH_ALEN];
+	int i;
+
+	/* default ... something at least */
+	memcpy(dev->perm_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
+
+	if (is_zero_ether_addr(local->hw.wiphy->addr_mask) &&
+	    local->hw.wiphy->n_addresses <= 1)
+		return;
+
+
+	mutex_lock(&local->iflist_mtx);
+
+	switch (type) {
+	case NL80211_IFTYPE_MONITOR:
+		/* doesn't matter */
+		break;
+	case NL80211_IFTYPE_WDS:
+	case NL80211_IFTYPE_AP_VLAN:
+		/* match up with an AP interface */
+		list_for_each_entry(sdata, &local->interfaces, list) {
+			if (sdata->vif.type != NL80211_IFTYPE_AP)
+				continue;
+			memcpy(dev->perm_addr, sdata->vif.addr, ETH_ALEN);
+			break;
+		}
+		/* keep default if no AP interface present */
+		break;
+	default:
+		/* assign a new address if possible -- try n_addresses first */
+		for (i = 0; i < local->hw.wiphy->n_addresses; i++) {
+			bool used = false;
+
+			list_for_each_entry(sdata, &local->interfaces, list) {
+				if (memcmp(local->hw.wiphy->addresses[i].addr,
+					   sdata->vif.addr, ETH_ALEN) == 0) {
+					used = true;
+					break;
+				}
+			}
+
+			if (!used) {
+				memcpy(dev->perm_addr,
+				       local->hw.wiphy->addresses[i].addr,
+				       ETH_ALEN);
+				break;
+			}
+		}
+
+		/* try mask if available */
+		if (is_zero_ether_addr(local->hw.wiphy->addr_mask))
+			break;
+
+		m = local->hw.wiphy->addr_mask;
+		mask =	((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
+			((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
+			((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
+
+		if (__ffs64(mask) + hweight64(mask) != fls64(mask)) {
+			/* not a contiguous mask ... not handled now! */
+			printk(KERN_DEBUG "not contiguous\n");
+			break;
+		}
+
+		m = local->hw.wiphy->perm_addr;
+		start = ((u64)m[0] << 5*8) | ((u64)m[1] << 4*8) |
+			((u64)m[2] << 3*8) | ((u64)m[3] << 2*8) |
+			((u64)m[4] << 1*8) | ((u64)m[5] << 0*8);
+
+		inc = 1ULL<<__ffs64(mask);
+		val = (start & mask);
+		addr = (start & ~mask) | (val & mask);
+		do {
+			bool used = false;
+
+			tmp_addr[5] = addr >> 0*8;
+			tmp_addr[4] = addr >> 1*8;
+			tmp_addr[3] = addr >> 2*8;
+			tmp_addr[2] = addr >> 3*8;
+			tmp_addr[1] = addr >> 4*8;
+			tmp_addr[0] = addr >> 5*8;
+
+			val += inc;
+
+			list_for_each_entry(sdata, &local->interfaces, list) {
+				if (memcmp(tmp_addr, sdata->vif.addr,
+							ETH_ALEN) == 0) {
+					used = true;
+					break;
+				}
+			}
+
+			if (!used) {
+				memcpy(dev->perm_addr, tmp_addr, ETH_ALEN);
+				break;
+			}
+			addr = (start & ~mask) | (val & mask);
+		} while (addr != start);
+
+		break;
+	}
+
+	mutex_unlock(&local->iflist_mtx);
+}
+
 int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 		     struct net_device **new_dev, enum nl80211_iftype type,
 		     struct vif_params *params)
@@ -844,8 +956,8 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 	if (ret < 0)
 		goto fail;
 
-	memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN);
-	memcpy(ndev->perm_addr, ndev->dev_addr, ETH_ALEN);
+	ieee80211_assign_perm_addr(local, ndev, type);
+	memcpy(ndev->dev_addr, ndev->perm_addr, ETH_ALEN);
 	SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy));
 
 	/* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */
-- 
cgit v1.2.3-59-g8ed1b


From 6c26361e4be3cf0dad7083e38ca52001a987e3e6 Mon Sep 17 00:00:00 2001
From: "florian@mickler.org" <florian@mickler.org>
Date: Fri, 26 Feb 2010 12:01:34 +0100
Subject: enhance sysfs rfkill interface

This commit introduces two new sysfs knobs.

/sys/class/rfkill/rfkill[0-9]+/blocked_hw: (ro)
	hardblock kill state
/sys/class/rfkill/rfkill[0-9]+/blocked_sw: (rw)
	softblock kill state

Signed-off-by: Florian Mickler <florian@mickler.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/ABI/stable/sysfs-class-rfkill | 25 +++++++++++++
 net/rfkill/core.c                           | 58 +++++++++++++++++++++++++++++
 2 files changed, 83 insertions(+)

(limited to 'net')

diff --git a/Documentation/ABI/stable/sysfs-class-rfkill b/Documentation/ABI/stable/sysfs-class-rfkill
index 97d5064f4f84..b91c3f37453a 100644
--- a/Documentation/ABI/stable/sysfs-class-rfkill
+++ b/Documentation/ABI/stable/sysfs-class-rfkill
@@ -40,3 +40,28 @@ Description: 	Whether the soft blocked state is initialised from non-volatile
 Values: 	A numeric value.
 		0: false
 		1: true
+
+
+What:		/sys/class/rfkill/rfkill[0-9]+/blocked_hw
+Date:		23-Feb-2010
+KernelVersion	v2.6.34
+Contact:	linux-wireless@vger.kernel.org
+Description: 	Current hardblock state. This file is read only.
+Values: 	A numeric value.
+		0: inactive
+			The transmitter is (potentially) active.
+		1: active
+			The transmitter is forced off by something outside of
+			the driver's control.
+
+
+What:		/sys/class/rfkill/rfkill[0-9]+/blocked_sw
+Date:		23-Feb-2010
+KernelVersion	v2.6.34
+Contact:	linux-wireless@vger.kernel.org
+Description:	Current softblock state. This file is read and write.
+Values: 	A numeric value.
+		0: inactive
+			The transmitter is (potentially) active.
+		1: active
+			The transmitter is turned off by software.
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index c218e07e5caf..5f33151d70ae 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -628,6 +628,61 @@ static ssize_t rfkill_persistent_show(struct device *dev,
 	return sprintf(buf, "%d\n", rfkill->persistent);
 }
 
+static ssize_t rfkill_blocked_hw_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+	unsigned long flags;
+	u32 state;
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	state = rfkill->state;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	return sprintf(buf, "%d\n", (state & RFKILL_BLOCK_HW) ? 1 : 0 );
+}
+
+static ssize_t rfkill_blocked_sw_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+	unsigned long flags;
+	u32 state;
+
+	spin_lock_irqsave(&rfkill->lock, flags);
+	state = rfkill->state;
+	spin_unlock_irqrestore(&rfkill->lock, flags);
+
+	return sprintf(buf, "%d\n", (state & RFKILL_BLOCK_SW) ? 1 : 0 );
+}
+
+static ssize_t rfkill_blocked_sw_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
+{
+	struct rfkill *rfkill = to_rfkill(dev);
+	unsigned long state;
+	int err;
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	err = strict_strtoul(buf, 0, &state);
+	if (err)
+		return err;
+
+	if (state > 1 )
+		return -EINVAL;
+
+	mutex_lock(&rfkill_global_mutex);
+	rfkill_set_block(rfkill, state);
+	mutex_unlock(&rfkill_global_mutex);
+
+	return err ?: count;
+}
+
 static u8 user_state_from_blocked(unsigned long state)
 {
 	if (state & RFKILL_BLOCK_HW)
@@ -700,6 +755,9 @@ static struct device_attribute rfkill_dev_attrs[] = {
 	__ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL),
 	__ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
 	__ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
+	__ATTR(sw, S_IRUGO|S_IWUSR, rfkill_blocked_sw_show,
+			rfkill_blocked_sw_store),
+	__ATTR(hw, S_IRUGO, rfkill_blocked_hw_show, NULL),
 	__ATTR_NULL
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From c2ef355bf3ef0b8006b96128726684fba47ac928 Mon Sep 17 00:00:00 2001
From: Andres Salomon <dilinger@collabora.co.uk>
Date: Thu, 25 Feb 2010 19:18:47 -0500
Subject: mac80211: give warning if building w/out rate ctrl algorithm

I discovered that if EMBEDDED=y, one can accidentally build a mac80211 stack
and drivers w/ no rate control algorithm.  For drivers like RTL8187 that don't
supply their own RC algorithms, this will cause ieee80211_register_hw to
fail (making the driver unusable).

This will tell kconfig to provide a warning if no rate control algorithms
have been selected.  That'll at least warn the user; users that know that
their drivers supply a rate control algorithm can safely ignore the
warning, and those who don't know (or who expect to be using multiple
drivers) can select a default RC algorithm.

Signed-off-by: Andres Salomon <dilinger@collabora.co.uk>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index a952b7f8c648..334c359da5e8 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -15,8 +15,12 @@ comment "CFG80211 needs to be enabled for MAC80211"
 
 if MAC80211 != n
 
+config MAC80211_HAS_RC
+	def_bool n
+
 config MAC80211_RC_PID
 	bool "PID controller based rate control algorithm" if EMBEDDED
+	select MAC80211_HAS_RC
 	---help---
 	  This option enables a TX rate control algorithm for
 	  mac80211 that uses a PID controller to select the TX
@@ -24,12 +28,14 @@ config MAC80211_RC_PID
 
 config MAC80211_RC_MINSTREL
 	bool "Minstrel" if EMBEDDED
+	select MAC80211_HAS_RC
 	default y
 	---help---
 	  This option enables the 'minstrel' TX rate control algorithm
 
 choice
 	prompt "Default rate control algorithm"
+	depends on MAC80211_HAS_RC
 	default MAC80211_RC_DEFAULT_MINSTREL
 	---help---
 	  This option selects the default rate control algorithm
@@ -62,6 +68,9 @@ config MAC80211_RC_DEFAULT
 
 endif
 
+comment "Some wireless drivers require a rate control algorithm"
+	depends on MAC80211_HAS_RC=n
+
 config MAC80211_MESH
 	bool "Enable mac80211 mesh networking (pre-802.11s) support"
 	depends on MAC80211 && EXPERIMENTAL
-- 
cgit v1.2.3-59-g8ed1b


From 44ac91ea8450b0e7a27b4a1fd64aefd35a144728 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 1 Mar 2010 22:17:38 +0100
Subject: minstrel: simplify and fix debugfs code

This patch cleans up the debugfs read function for the statistics by
using simple_read_from_buffer instead of its own semi-broken hack.
Also removes a useless member of the minstrel debugfs info struct.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel.h         |  5 +++++
 net/mac80211/rc80211_minstrel_debugfs.c | 35 ++++++---------------------------
 2 files changed, 11 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 38bf4168fc3a..9372656f3f5e 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -80,6 +80,11 @@ struct minstrel_priv {
 	unsigned int lookaround_rate_mrr;
 };
 
+struct minstrel_debugfs_info {
+	size_t len;
+	char buf[];
+};
+
 void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
 void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
 
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index a715d9454f64..3e83402ece17 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -52,21 +52,15 @@
 #include <net/mac80211.h>
 #include "rc80211_minstrel.h"
 
-struct minstrel_stats_info {
-	struct minstrel_sta_info *mi;
-	char buf[4096];
-	size_t len;
-};
-
 static int
 minstrel_stats_open(struct inode *inode, struct file *file)
 {
 	struct minstrel_sta_info *mi = inode->i_private;
-	struct minstrel_stats_info *ms;
+	struct minstrel_debugfs_info *ms;
 	unsigned int i, tp, prob, eprob;
 	char *p;
 
-	ms = kmalloc(sizeof(*ms), GFP_KERNEL);
+	ms = kmalloc(sizeof(*ms) + 4096, GFP_KERNEL);
 	if (!ms)
 		return -ENOMEM;
 
@@ -107,35 +101,18 @@ minstrel_stats_open(struct inode *inode, struct file *file)
 }
 
 static ssize_t
-minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *o)
+minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
 {
-	struct minstrel_stats_info *ms;
-	char *src;
+	struct minstrel_debugfs_info *ms;
 
 	ms = file->private_data;
-	src = ms->buf;
-
-	len = min(len, ms->len);
-	if (len <= *o)
-		return 0;
-
-	src += *o;
-	len -= *o;
-	*o += len;
-
-	if (copy_to_user(buf, src, len))
-		return -EFAULT;
-
-	return len;
+	return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
 }
 
 static int
 minstrel_stats_release(struct inode *inode, struct file *file)
 {
-	struct minstrel_stats_info *ms = file->private_data;
-
-	kfree(ms);
-
+	kfree(file->private_data);
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From eae44756d60c4e938259358090dba5df675dced0 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 1 Mar 2010 22:21:40 +0100
Subject: minstrel: make the rate control ops reusable from another rc
 implementation

This patch makes it possible to reuse the minstrel rate control ops
from another rate control module. This is useful in preparing for the
new 802.11n implementation of minstrel, which will reuse the old code
for legacy stations.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rc80211_minstrel.c         | 2 +-
 net/mac80211/rc80211_minstrel.h         | 6 ++++++
 net/mac80211/rc80211_minstrel_debugfs.c | 6 +++---
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index 6e5d68b4e427..4926d929fd9f 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -541,7 +541,7 @@ minstrel_free(void *priv)
 	kfree(priv);
 }
 
-static struct rate_control_ops mac80211_minstrel = {
+struct rate_control_ops mac80211_minstrel = {
 	.name = "minstrel",
 	.tx_status = minstrel_tx_status,
 	.get_rate = minstrel_get_rate,
diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h
index 9372656f3f5e..0f5a83370aa6 100644
--- a/net/mac80211/rc80211_minstrel.h
+++ b/net/mac80211/rc80211_minstrel.h
@@ -85,7 +85,13 @@ struct minstrel_debugfs_info {
 	char buf[];
 };
 
+extern struct rate_control_ops mac80211_minstrel;
 void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir);
 void minstrel_remove_sta_debugfs(void *priv, void *priv_sta);
 
+/* debugfs */
+int minstrel_stats_open(struct inode *inode, struct file *file);
+ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos);
+int minstrel_stats_release(struct inode *inode, struct file *file);
+
 #endif
diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c
index 3e83402ece17..56d0f24957d9 100644
--- a/net/mac80211/rc80211_minstrel_debugfs.c
+++ b/net/mac80211/rc80211_minstrel_debugfs.c
@@ -52,7 +52,7 @@
 #include <net/mac80211.h>
 #include "rc80211_minstrel.h"
 
-static int
+int
 minstrel_stats_open(struct inode *inode, struct file *file)
 {
 	struct minstrel_sta_info *mi = inode->i_private;
@@ -100,7 +100,7 @@ minstrel_stats_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static ssize_t
+ssize_t
 minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos)
 {
 	struct minstrel_debugfs_info *ms;
@@ -109,7 +109,7 @@ minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppo
 	return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len);
 }
 
-static int
+int
 minstrel_stats_release(struct inode *inode, struct file *file)
 {
 	kfree(file->private_data);
-- 
cgit v1.2.3-59-g8ed1b


From 1da05f50f6a766c7611102382f85183b4db96c2d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 15 Mar 2010 18:03:05 +0100
Subject: netfilter: net/netfilter/ipvs/ip_vs_ftp.c: Remove use of NIPQUAD

NIPQUAD has very few uses left.

Remove this use and make the code have the identical form of the only
other use of "%u,%u,%u,%u,%u,%u" in net/ipv4/netfilter/nf_nat_ftp.c

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_ftp.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 73f38ea98f25..9f6328303844 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -208,8 +208,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
 		 */
 		from.ip = n_cp->vaddr.ip;
 		port = n_cp->vport;
-		sprintf(buf, "%u,%u,%u,%u,%u,%u", NIPQUAD(from.ip),
-			(ntohs(port)>>8)&255, ntohs(port)&255);
+		snprintf(buf, sizeof(buf), "%u,%u,%u,%u,%u,%u",
+			 ((unsigned char *)&from.ip)[0],
+			 ((unsigned char *)&from.ip)[1],
+			 ((unsigned char *)&from.ip)[2],
+			 ((unsigned char *)&from.ip)[3],
+			 ntohs(port) >> 8,
+			 ntohs(port) & 0xFF);
+
 		buf_len = strlen(buf);
 
 		/*
-- 
cgit v1.2.3-59-g8ed1b


From eaf55530c94cb7adcd320c28ed6c7d463c9a3727 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Thu, 11 Mar 2010 16:28:24 +0100
Subject: mac80211: optimize tx status processing

When a cooked monitor interface is active, ieee80211_tx_status()
generates a radiotap header for every single frame, even if it wasn't
injected and thus won't be sent to a monitor interface.
This patch reduces cpu utilization by moving the cooked monitor check a
bit earlier, before it generates the rtap header.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/status.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 56d5b9a6ec5b..11805a3a626f 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -171,7 +171,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct net_device *prev_dev = NULL;
 	struct sta_info *sta, *tmp;
 	int retry_count = -1, i;
-	bool injected;
+	bool send_to_cooked;
 
 	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
 		/* the HW cannot have attempted that rate */
@@ -296,11 +296,15 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	/* this was a transmitted frame, but now we want to reuse it */
 	skb_orphan(skb);
 
+	/* Need to make a copy before skb->cb gets cleared */
+	send_to_cooked = !!(info->flags & IEEE80211_TX_CTL_INJECTED) ||
+			(type != IEEE80211_FTYPE_DATA);
+
 	/*
 	 * This is a bit racy but we can avoid a lot of work
 	 * with this test...
 	 */
-	if (!local->monitors && !local->cooked_mntrs) {
+	if (!local->monitors && (!send_to_cooked || !local->cooked_mntrs)) {
 		dev_kfree_skb(skb);
 		return;
 	}
@@ -345,9 +349,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	/* for now report the total retry_count */
 	rthdr->data_retries = retry_count;
 
-	/* Need to make a copy before skb->cb gets cleared */
-	injected = !!(info->flags & IEEE80211_TX_CTL_INJECTED);
-
 	/* XXX: is this sufficient for BPF? */
 	skb_set_mac_header(skb, 0);
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -362,8 +363,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 				continue;
 
 			if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) &&
-			    !injected &&
-			    (type == IEEE80211_FTYPE_DATA))
+			    !send_to_cooked)
 				continue;
 
 			if (prev_dev) {
-- 
cgit v1.2.3-59-g8ed1b


From 819386dfc67e770b4a0b59983f7948f8ddaa357e Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Tue, 16 Mar 2010 15:02:35 -0400
Subject: Revert "mac80211: fix rates setup on IBSS merge"

I accidentally merged an incomplete version of the patch...

This reverts commit b4d59a9317e41faec3d0b6a03f0454d1e8abb710.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index b840d9072de9..f3e942486749 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -276,8 +276,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 				    (unsigned long long) sta->sta.supp_rates[band]);
 #endif
 			rcu_read_unlock();
-
-			/* FIXME: update rate control */
 		} else {
 			rcu_read_unlock();
 			ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
@@ -372,7 +370,6 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		       sdata->name, mgmt->bssid);
 #endif
 		ieee80211_sta_join_ibss(sdata, bss);
-		supp_rates = ieee80211_sta_get_rates(local, elems, band);
 		ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
 				       supp_rates, GFP_KERNEL);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 09a08cff3d13315c948e6aee5cf912f8f1db54e7 Mon Sep 17 00:00:00 2001
From: Bruno Randolf <br1@einfach.org>
Date: Wed, 3 Mar 2010 18:45:42 +0900
Subject: mac80211: (really) fix rates setup on IBSS merge

when an IBSS merge happened, the supported rates for the newly added station
were left empty, causing the rate control module to be initialized with only
the basic rates.

the section of the ibss code which deals with updating supported rates for
an already existing station failed to inform the rate control module about the
new rates. as both minstrel and pid don't have an update function i just use
the init function.

also remove unnecessary (unsigned long long) casts and edit debug message.

Signed-off-by: Bruno Randolf <br1@einfach.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index f3e942486749..01974c2510a8 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -264,17 +264,16 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 			sta->sta.supp_rates[band] = supp_rates |
 				ieee80211_mandatory_rates(local, band);
 
+			if (sta->sta.supp_rates[band] != prev_rates) {
 #ifdef CONFIG_MAC80211_IBSS_DEBUG
-			if (sta->sta.supp_rates[band] != prev_rates)
 				printk(KERN_DEBUG "%s: updated supp_rates set "
-				    "for %pM based on beacon info (0x%llx | "
-				    "0x%llx -> 0x%llx)\n",
-				    sdata->name,
-				    sta->sta.addr,
-				    (unsigned long long) prev_rates,
-				    (unsigned long long) supp_rates,
-				    (unsigned long long) sta->sta.supp_rates[band]);
+				    "for %pM based on beacon/probe_response "
+				    "(0x%x -> 0x%x)\n",
+				    sdata->name, sta->sta.addr,
+				    prev_rates, sta->sta.supp_rates[band]);
 #endif
+				rate_control_rate_init(sta);
+			}
 			rcu_read_unlock();
 		} else {
 			rcu_read_unlock();
@@ -370,6 +369,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 		       sdata->name, mgmt->bssid);
 #endif
 		ieee80211_sta_join_ibss(sdata, bss);
+		supp_rates = ieee80211_sta_get_rates(local, elems, band);
 		ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa,
 				       supp_rates, GFP_KERNEL);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 735f61e62611161588123930823af6e6a9fd5c2c Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:49:55 +0000
Subject: RDS: Do not BUG() on error returned from ib_post_send

BUGging on a runtime error code should be avoided. This
patch also eliminates all other BUG()s that have no real
reason to exist.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_recv.c        | 4 ++--
 net/rds/ib_send.c        | 7 +++----
 net/rds/iw_recv.c        | 4 ++--
 net/rds/iw_send.c        | 3 +--
 net/rds/rdma_transport.c | 3 +--
 5 files changed, 9 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 04dc0d3f3c95..c338881eca71 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -468,8 +468,8 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi
 		set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 
 		rds_ib_stats_inc(s_ib_ack_send_failure);
-		/* Need to finesse this later. */
-		BUG();
+
+		rds_ib_conn_error(ic->conn, "sending ack failed\n");
 	} else
 		rds_ib_stats_inc(s_ib_ack_sent);
 }
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index a10fab6886d1..f380c3f01256 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -574,8 +574,7 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 		rds_ib_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
 		adv_credits += posted;
 		BUG_ON(adv_credits > 255);
-	} else if (ic->i_rm != rm)
-		BUG();
+	}
 
 	send = &ic->i_sends[pos];
 	first = send;
@@ -714,8 +713,8 @@ add_header:
 			ic->i_rm = prev->s_rm;
 			prev->s_rm = NULL;
 		}
-		/* Finesse this later */
-		BUG();
+
+		rds_ib_conn_error(ic->conn, "ib_post_send failed\n");
 		goto out;
 	}
 
diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c
index 54af7d6b92da..337e4e5025e2 100644
--- a/net/rds/iw_recv.c
+++ b/net/rds/iw_recv.c
@@ -468,8 +468,8 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi
 		set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags);
 
 		rds_iw_stats_inc(s_iw_ack_send_failure);
-		/* Need to finesse this later. */
-		BUG();
+
+		rds_iw_conn_error(ic->conn, "sending ack failed\n");
 	} else
 		rds_iw_stats_inc(s_iw_ack_sent);
 }
diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c
index 1379e9d66a78..52182ff7519e 100644
--- a/net/rds/iw_send.c
+++ b/net/rds/iw_send.c
@@ -616,8 +616,7 @@ int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm,
 		rds_iw_send_grab_credits(ic, 0, &posted, 1, RDS_MAX_ADV_CREDIT - adv_credits);
 		adv_credits += posted;
 		BUG_ON(adv_credits > 255);
-	} else if (ic->i_rm != rm)
-		BUG();
+	}
 
 	send = &ic->i_sends[pos];
 	first = send;
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 9ece910ea394..31f9c72b8d76 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -109,8 +109,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 
 	default:
 		/* things like device disconnect? */
-		printk(KERN_ERR "unknown event %u\n", event->event);
-		BUG();
+		printk(KERN_ERR "RDS: unknown event %u!\n", event->event);
 		break;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 1123fd734df6ad82373a5a27f0f2ed3115555b9d Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:49:56 +0000
Subject: RDS: sendmsg() should check sndtimeo, not rcvtimeo

Most likely cut n paste error - sendmsg() was checking sock_rcvtimeo.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/send.c b/net/rds/send.c
index b2fccfc20769..ad2e46947c8a 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -815,7 +815,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	int ret = 0;
 	int queued = 0, allocated_mr = 0;
 	int nonblock = msg->msg_flags & MSG_DONTWAIT;
-	long timeo = sock_rcvtimeo(sk, nonblock);
+	long timeo = sock_sndtimeo(sk, nonblock);
 
 	/* Mirror Linux UDP mirror of BSD error message compatibility */
 	/* XXX: Perhaps MSG_MORE someday */
-- 
cgit v1.2.3-59-g8ed1b


From b075cfdb666d6fa90c55c8619186398a3c4fd865 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:49:57 +0000
Subject: RDS: update copy_to_user state in tcp transport

Other transports use rds_page_copy_user, which updates our
s_copy_to_user counter. TCP doesn't, so it needs to explicity
call rds_stats_add().

Reported-by: Richard Frank <richard.frank@oracle.com>
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp_recv.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index c00dafffbb5a..40bfcf887465 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -97,6 +97,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov,
 				goto out;
 			}
 
+			rds_stats_add(s_copy_to_user, to_copy);
 			size -= to_copy;
 			ret += to_copy;
 			skb_off += to_copy;
-- 
cgit v1.2.3-59-g8ed1b


From 8e82376e5f72bb576504c8c6117685e56c1b97db Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:49:58 +0000
Subject: RDS/TCP: Wait to wake thread when write space available

Instead of waking the send thread whenever any send space is available,
wait until it is at least half empty. This is modeled on how
sock_def_write_space() does it, and may help to minimize context
switches.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/tcp_send.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 34fdcc059e54..a28b895ff0d1 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -240,7 +240,9 @@ void rds_tcp_write_space(struct sock *sk)
 	tc->t_last_seen_una = rds_tcp_snd_una(tc);
 	rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked);
 
-	queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+        if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf)
+		queue_delayed_work(rds_wq, &conn->c_send_w, 0);
+
 out:
 	read_unlock(&sk->sk_callback_lock);
 
-- 
cgit v1.2.3-59-g8ed1b


From 2e7b3b994529d4760231a45a6b88950187bda877 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:49:59 +0000
Subject: RDS: Fix congestion issues for loopback

We have two kinds of loopback: software (via loop transport)
and hardware (via IB). sw is used for 127.0.0.1, and doesn't
support rdma ops. hw is used for sends to local device IPs,
and supports rdma. Both are used in different cases.

For both of these, when there is a congestion map update, we
want to call rds_cong_map_updated() but not actually send
anything -- since loopback local and foreign congestion maps
point to the same spot, they're already in sync.

The old code never called sw loop's xmit_cong_map(),so
rds_cong_map_updated() wasn't being called for it. sw loop
ports would not work right with the congestion monitor.

Fixing that meant that hw loopback now would send congestion maps
to itself. This is also undesirable (racy), so we check for this
case in the ib-specific xmit code.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/cong.c    | 2 --
 net/rds/ib_send.c | 7 +++++++
 net/rds/loop.c    | 7 -------
 3 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/rds/cong.c b/net/rds/cong.c
index 6d06cac2649c..dd2711df640b 100644
--- a/net/rds/cong.c
+++ b/net/rds/cong.c
@@ -218,8 +218,6 @@ void rds_cong_queue_updates(struct rds_cong_map *map)
 	spin_lock_irqsave(&rds_cong_lock, flags);
 
 	list_for_each_entry(conn, &map->m_conn_list, c_map_item) {
-		if (conn->c_loopback)
-			continue;
 		if (!test_and_set_bit(0, &conn->c_map_queued)) {
 			rds_stats_inc(s_cong_update_queued);
 			queue_delayed_work(rds_wq, &conn->c_send_w, 0);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index f380c3f01256..c18228aec779 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -482,6 +482,13 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
 	BUG_ON(off % RDS_FRAG_SIZE);
 	BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header));
 
+	/* Do not send cong updates to IB loopback */
+	if (conn->c_loopback
+	    && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
+		rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
+		return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
+	}
+
 	/* FIXME we may overallocate here */
 	if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0)
 		i = 1;
diff --git a/net/rds/loop.c b/net/rds/loop.c
index 4a61997f554d..93a45f1ce61f 100644
--- a/net/rds/loop.c
+++ b/net/rds/loop.c
@@ -80,16 +80,9 @@ static int rds_loop_xmit_cong_map(struct rds_connection *conn,
 				  struct rds_cong_map *map,
 				  unsigned long offset)
 {
-	unsigned long i;
-
 	BUG_ON(offset);
 	BUG_ON(map != conn->c_lcong);
 
-	for (i = 0; i < RDS_CONG_MAP_PAGES; i++) {
-		memcpy((void *)conn->c_fcong->m_page_addrs[i],
-		       (void *)map->m_page_addrs[i], PAGE_SIZE);
-	}
-
 	rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
 
 	return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
-- 
cgit v1.2.3-59-g8ed1b


From 048c15e641289d902f7ef9f1241068d8045e210c Mon Sep 17 00:00:00 2001
From: Tina Yang <Tina.Yang@oracle.com>
Date: Thu, 11 Mar 2010 13:50:00 +0000
Subject: RDS: Fix send locking issue

Fix a deadlock between rds_rdma_send_complete() and
rds_send_remove_from_sock() when rds socket lock and
rds message lock are acquired out-of-order.

Signed-off-by: Tina Yang <Tina.Yang@oracle.com>
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/rds/send.c b/net/rds/send.c
index ad2e46947c8a..1a2ef24fbc94 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -533,14 +533,13 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
 
 		if (rs != rm->m_rs) {
 			if (rs) {
-				spin_unlock(&rs->rs_lock);
 				rds_wake_sk_sleep(rs);
 				sock_put(rds_rs_to_sk(rs));
 			}
 			rs = rm->m_rs;
-			spin_lock(&rs->rs_lock);
 			sock_hold(rds_rs_to_sk(rs));
 		}
+		spin_lock(&rs->rs_lock);
 
 		if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
 			struct rds_rdma_op *ro = rm->m_rdma_op;
@@ -560,6 +559,7 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
 			rds_message_put(rm);
 			rm->m_rs = NULL;
 		}
+		spin_unlock(&rs->rs_lock);
 
 unlock_and_drop:
 		spin_unlock(&rm->m_rs_lock);
@@ -567,7 +567,6 @@ unlock_and_drop:
 	}
 
 	if (rs) {
-		spin_unlock(&rs->rs_lock);
 		rds_wake_sk_sleep(rs);
 		sock_put(rds_rs_to_sk(rs));
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 571c02fa81e43ebb4b793f626d6c7bf0fa18902b Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:50:01 +0000
Subject: RDS: Workaround for in-use MRs on close causing crash

if a machine is shut down without closing sockets properly, and
freeing all MRs, then a BUG_ON will bring it down. This patch
changes these to WARN_ONs -- leaking MRs is not fatal (although
not ideal, and there is more work to do here for a proper fix.)

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_rdma.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 4b0da865a72c..65e668defe42 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -234,8 +234,8 @@ void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
 {
 	flush_workqueue(rds_wq);
 	rds_ib_flush_mr_pool(pool, 1);
-	BUG_ON(atomic_read(&pool->item_count));
-	BUG_ON(atomic_read(&pool->free_pinned));
+	WARN_ON(atomic_read(&pool->item_count));
+	WARN_ON(atomic_read(&pool->free_pinned));
 	kfree(pool);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 97069788d6784ac92d050557a02f6e7bf4d1f53d Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:50:02 +0000
Subject: RDS: Turn down alarming reconnect messages

RDS's error messages when a connection goes down are a little
extreme. A connection may go down, and it will be re-established,
and everything is fine. This patch links these messages through
rdsdebug(), instead of to printk directly.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_cm.c          | 3 ++-
 net/rds/iw_cm.c          | 4 +++-
 net/rds/rdma_transport.c | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index 647cb8ffc39b..e1f124bf03bb 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -203,9 +203,10 @@ static void rds_ib_qp_event_handler(struct ib_event *event, void *data)
 		rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST);
 		break;
 	default:
-		rds_ib_conn_error(conn, "RDS/IB: Fatal QP Event %u "
+		rdsdebug("Fatal QP Event %u "
 			"- connection %pI4->%pI4, reconnecting\n",
 			event->event, &conn->c_laddr, &conn->c_faddr);
+		rds_conn_drop(conn);
 		break;
 	}
 }
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 394cf6b4d0aa..6bc638fd252c 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -156,9 +156,11 @@ static void rds_iw_qp_event_handler(struct ib_event *event, void *data)
 	case IB_EVENT_QP_REQ_ERR:
 	case IB_EVENT_QP_FATAL:
 	default:
-		rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n",
+		rdsdebug("Fatal QP Event %u "
+			"- connection %pI4->%pI4, reconnecting\n",
 			event->event, &conn->c_laddr,
 			&conn->c_faddr);
+		rds_conn_drop(conn);
 		break;
 	}
 }
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 31f9c72b8d76..5ea82fc47c3e 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -101,7 +101,7 @@ int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
 		break;
 
 	case RDMA_CM_EVENT_DISCONNECTED:
-		printk(KERN_WARNING "RDS/RDMA: DISCONNECT event - dropping connection "
+		rdsdebug("DISCONNECT event - dropping connection "
 			"%pI4->%pI4\n", &conn->c_laddr,
 			 &conn->c_faddr);
 		rds_conn_drop(conn);
-- 
cgit v1.2.3-59-g8ed1b


From 550a8002e4340eaf3bc333e33b59427e9c20272d Mon Sep 17 00:00:00 2001
From: Tina Yang <tina.yang@oracle.com>
Date: Thu, 11 Mar 2010 13:50:03 +0000
Subject: RDS: Fix locking in rds_send_drop_to()

It seems rds_send_drop_to() called
__rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED)
with only rds_sock lock, but not rds_message lock. It raced with
other threads that is attempting to modify the rds_message as well,
such as from within rds_rdma_send_complete().

Signed-off-by: Tina Yang <tina.yang@oracle.com>
Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/send.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/rds/send.c b/net/rds/send.c
index 1a2ef24fbc94..192a480076d0 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -632,9 +632,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		list_move(&rm->m_sock_item, &list);
 		rds_send_sndbuf_remove(rs, rm);
 		clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags);
-
-		/* If this is a RDMA operation, notify the app. */
-		__rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
 	}
 
 	/* order flag updates with the rs lock */
@@ -643,9 +640,6 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 
 	spin_unlock_irqrestore(&rs->rs_lock, flags);
 
-	if (wake)
-		rds_wake_sk_sleep(rs);
-
 	conn = NULL;
 
 	/* now remove the messages from the conn list as needed */
@@ -653,6 +647,10 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 		/* We do this here rather than in the loop above, so that
 		 * we don't have to nest m_rs_lock under rs->rs_lock */
 		spin_lock_irqsave(&rm->m_rs_lock, flags2);
+		/* If this is a RDMA operation, notify the app. */
+		spin_lock(&rs->rs_lock);
+		__rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED);
+		spin_unlock(&rs->rs_lock);
 		rm->m_rs = NULL;
 		spin_unlock_irqrestore(&rm->m_rs_lock, flags2);
 
@@ -681,6 +679,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest)
 	if (conn)
 		spin_unlock_irqrestore(&conn->c_lock, flags);
 
+	if (wake)
+		rds_wake_sk_sleep(rs);
+
 	while (!list_empty(&list)) {
 		rm = list_entry(list.next, struct rds_message, m_sock_item);
 		list_del_init(&rm->m_sock_item);
-- 
cgit v1.2.3-59-g8ed1b


From b98ba52f96e7cdb4dbe2b06bced83d95d94c9d02 Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:50:04 +0000
Subject: RDS: only put sockets that have seen congestion on the poll_waitq

rds_poll_waitq's listeners will be awoken if we receive a congestion
notification. Bad performance may result because *all* polled sockets
contend for this single lock. However, it should not be necessary to
wake pollers when a congestion update arrives if they have never
experienced congestion, and not putting these on the waitq will
hopefully greatly reduce contention.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/af_rds.c | 7 ++++++-
 net/rds/rds.h    | 2 ++
 net/rds/send.c   | 4 +++-
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 853c52be781f..937ecda4abe7 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -159,7 +159,8 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
 
 	poll_wait(file, sk->sk_sleep, wait);
 
-	poll_wait(file, &rds_poll_waitq, wait);
+	if (rs->rs_seen_congestion)
+		poll_wait(file, &rds_poll_waitq, wait);
 
 	read_lock_irqsave(&rs->rs_recv_lock, flags);
 	if (!rs->rs_cong_monitor) {
@@ -181,6 +182,10 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
 		mask |= (POLLOUT | POLLWRNORM);
 	read_unlock_irqrestore(&rs->rs_recv_lock, flags);
 
+	/* clear state any time we wake a seen-congested socket */
+	if (mask)
+		rs->rs_seen_congestion = 0;
+
 	return mask;
 }
 
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 85d6f897ecc7..4bec6e2ed495 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -388,6 +388,8 @@ struct rds_sock {
 
 	/* flag indicating we were congested or not */
 	int			rs_congested;
+	/* seen congestion (ENOBUFS) when sending? */
+	int			rs_seen_congestion;
 
 	/* rs_lock protects all these adjacent members before the newline */
 	spinlock_t		rs_lock;
diff --git a/net/rds/send.c b/net/rds/send.c
index 192a480076d0..51e2def50b12 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -894,8 +894,10 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		queue_delayed_work(rds_wq, &conn->c_conn_w, 0);
 
 	ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs);
-	if (ret)
+	if (ret) {
+		rs->rs_seen_congestion = 1;
 		goto out;
+	}
 
 	while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port,
 				  dport, &queued)) {
-- 
cgit v1.2.3-59-g8ed1b


From 450d06c0208ad195ccd74a7edd11321e316791ad Mon Sep 17 00:00:00 2001
From: Sherman Pun <sherman.pun@sun.com>
Date: Thu, 11 Mar 2010 13:50:05 +0000
Subject: RDS: Properly unmap when getting a remote access error

If the RDMA op has aborted with a remote access error,
in addition to what we already do (tell userspace it has
completed with an error) also unmap it and put() the rm.

Otherwise, hangs may occur on arches that track maps and
will not exit without proper cleanup.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_send.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index c18228aec779..17fa80803ab0 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -243,8 +243,12 @@ void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context)
 				struct rds_message *rm;
 
 				rm = rds_send_get_message(conn, send->s_op);
-				if (rm)
+				if (rm) {
+					if (rm->m_rdma_op)
+						rds_ib_send_unmap_rdma(ic, rm->m_rdma_op);
 					rds_ib_send_rdma_complete(rm, wc.status);
+					rds_message_put(rm);
+				}
 			}
 
 			oldest = (oldest + 1) % ic->i_send_ring.w_nr;
-- 
cgit v1.2.3-59-g8ed1b


From 561c7df63e259203515509a7ad075382a42bff0c Mon Sep 17 00:00:00 2001
From: Andy Grover <andy.grover@oracle.com>
Date: Thu, 11 Mar 2010 13:50:06 +0000
Subject: RDS: Do not call set_page_dirty() with irqs off

set_page_dirty() unconditionally re-enables interrupts, so
if we call it with irqs off, they will be on after the call,
and that's bad. This patch moves the call after we've re-enabled
interrupts in send_drop_to(), so it's safe.

Also, add BUG_ONs to let us know if we ever do call set_page_dirty
with interrupts off.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/ib_rdma.c |  1 +
 net/rds/rdma.c    |  4 +++-
 net/rds/send.c    | 14 ++++++++------
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index 65e668defe42..cfb1d904ed00 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -440,6 +440,7 @@ static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
 
 			/* FIXME we need a way to tell a r/w MR
 			 * from a r/o MR */
+			BUG_ON(in_interrupt());
 			set_page_dirty(page);
 			put_page(page);
 		}
diff --git a/net/rds/rdma.c b/net/rds/rdma.c
index 4c64daa1f5d5..61b359d9dffd 100644
--- a/net/rds/rdma.c
+++ b/net/rds/rdma.c
@@ -438,8 +438,10 @@ void rds_rdma_free_op(struct rds_rdma_op *ro)
 		/* Mark page dirty if it was possibly modified, which
 		 * is the case for a RDMA_READ which copies from remote
 		 * to local memory */
-		if (!ro->r_write)
+		if (!ro->r_write) {
+			BUG_ON(in_interrupt());
 			set_page_dirty(page);
+		}
 		put_page(page);
 	}
 
diff --git a/net/rds/send.c b/net/rds/send.c
index 51e2def50b12..4629a0b63bbd 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -507,12 +507,13 @@ EXPORT_SYMBOL_GPL(rds_send_get_message);
  */
 void rds_send_remove_from_sock(struct list_head *messages, int status)
 {
-	unsigned long flags = 0; /* silence gcc :P */
+	unsigned long flags;
 	struct rds_sock *rs = NULL;
 	struct rds_message *rm;
 
-	local_irq_save(flags);
 	while (!list_empty(messages)) {
+		int was_on_sock = 0;
+
 		rm = list_entry(messages->next, struct rds_message,
 				m_conn_item);
 		list_del_init(&rm->m_conn_item);
@@ -527,7 +528,7 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
 		 * while we're messing with it. It does not prevent the
 		 * message from being removed from the socket, though.
 		 */
-		spin_lock(&rm->m_rs_lock);
+		spin_lock_irqsave(&rm->m_rs_lock, flags);
 		if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags))
 			goto unlock_and_drop;
 
@@ -556,21 +557,22 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
 					notifier->n_status = status;
 				rm->m_rdma_op->r_notifier = NULL;
 			}
-			rds_message_put(rm);
+			was_on_sock = 1;
 			rm->m_rs = NULL;
 		}
 		spin_unlock(&rs->rs_lock);
 
 unlock_and_drop:
-		spin_unlock(&rm->m_rs_lock);
+		spin_unlock_irqrestore(&rm->m_rs_lock, flags);
 		rds_message_put(rm);
+		if (was_on_sock)
+			rds_message_put(rm);
 	}
 
 	if (rs) {
 		rds_wake_sk_sleep(rs);
 		sock_put(rds_rs_to_sk(rs));
 	}
-	local_irq_restore(flags);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 768bbedf9ca4cc4784eae2003f37abe0818fe0b0 Mon Sep 17 00:00:00 2001
From: Tina Yang <tina.yang@oracle.com>
Date: Thu, 11 Mar 2010 13:50:07 +0000
Subject: RDS: Enable per-cpu workqueue threads

Create per-cpu workqueue threads instead of a single
krdsd thread. This is a step towards better scalability.

Signed-off-by: Andy Grover <andy.grover@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/threads.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/rds/threads.c b/net/rds/threads.c
index 00fa10e59af8..786c20eaaf5e 100644
--- a/net/rds/threads.c
+++ b/net/rds/threads.c
@@ -259,7 +259,7 @@ void rds_threads_exit(void)
 
 int __init rds_threads_init(void)
 {
-	rds_wq = create_singlethread_workqueue("krdsd");
+	rds_wq = create_workqueue("krdsd");
 	if (rds_wq == NULL)
 		return -ENOMEM;
 
-- 
cgit v1.2.3-59-g8ed1b


From 0a9627f2649a02bea165cfd529d7bcb625c2fcad Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 16 Mar 2010 08:03:29 +0000
Subject: rps: Receive Packet Steering

This patch implements software receive side packet steering (RPS).  RPS
distributes the load of received packet processing across multiple CPUs.

Problem statement: Protocol processing done in the NAPI context for received
packets is serialized per device queue and becomes a bottleneck under high
packet load.  This substantially limits pps that can be achieved on a single
queue NIC and provides no scaling with multiple cores.

This solution queues packets early on in the receive path on the backlog queues
of other CPUs.   This allows protocol processing (e.g. IP and TCP) to be
performed on packets in parallel.   For each device (or each receive queue in
a multi-queue device) a mask of CPUs is set to indicate the CPUs that can
process packets. A CPU is selected on a per packet basis by hashing contents
of the packet header (e.g. the TCP or UDP 4-tuple) and using the result to index
into the CPU mask.  The IPI mechanism is used to raise networking receive
softirqs between CPUs.  This effectively emulates in software what a multi-queue
NIC can provide, but is generic requiring no device support.

Many devices now provide a hash over the 4-tuple on a per packet basis
(e.g. the Toeplitz hash).  This patch allow drivers to set the HW reported hash
in an skb field, and that value in turn is used to index into the RPS maps.
Using the HW generated hash can avoid cache misses on the packet when
steering it to a remote CPU.

The CPU mask is set on a per device and per queue basis in the sysfs variable
/sys/class/net/<device>/queues/rx-<n>/rps_cpus.  This is a set of canonical
bit maps for receive queues in the device (numbered by <n>).  If a device
does not support multi-queue, a single variable is used for the device (rx-0).

Generally, we have found this technique increases pps capabilities of a single
queue device with good CPU utilization.  Optimal settings for the CPU mask
seem to depend on architectures and cache hierarcy.  Below are some results
running 500 instances of netperf TCP_RR test with 1 byte req. and resp.
Results show cumulative transaction rate and system CPU utilization.

e1000e on 8 core Intel
   Without RPS: 108K tps at 33% CPU
   With RPS:    311K tps at 64% CPU

forcedeth on 16 core AMD
   Without RPS: 156K tps at 15% CPU
   With RPS:    404K tps at 49% CPU

bnx2x on 16 core AMD
   Without RPS  567K tps at 61% CPU (4 HW RX queues)
   Without RPS  738K tps at 96% CPU (8 HW RX queues)
   With RPS:    854K tps at 76% CPU (4 HW RX queues)

Caveats:
- The benefits of this patch are dependent on architecture and cache hierarchy.
Tuning the masks to get best performance is probably necessary.
- This patch adds overhead in the path for processing a single packet.  In
a lightly loaded server this overhead may eliminate the advantages of
increased parallelism, and possibly cause some relative performance degradation.
We have found that masks that are cache aware (share same caches with
the interrupting CPU) mitigate much of this.
- The RPS masks can be changed dynamically, however whenever the mask is changed
this introduces the possibility of generating out of order packets.  It's
probably best not change the masks too frequently.

Signed-off-by: Tom Herbert <therbert@google.com>

 include/linux/netdevice.h |   32 ++++-
 include/linux/skbuff.h    |    3 +
 net/core/dev.c            |  335 +++++++++++++++++++++++++++++++++++++--------
 net/core/net-sysfs.c      |  225 ++++++++++++++++++++++++++++++-
 net/core/skbuff.c         |    2 +
 5 files changed, 538 insertions(+), 59 deletions(-)
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  32 ++++-
 include/linux/skbuff.h    |   3 +
 net/core/dev.c            | 335 ++++++++++++++++++++++++++++++++++++++--------
 net/core/net-sysfs.c      | 225 ++++++++++++++++++++++++++++++-
 net/core/skbuff.c         |   2 +
 5 files changed, 538 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c79a88be7c33..de1a52bcb9e0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -223,6 +223,7 @@ struct netif_rx_stats {
 	unsigned dropped;
 	unsigned time_squeeze;
 	unsigned cpu_collision;
+	unsigned received_rps;
 };
 
 DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat);
@@ -530,6 +531,24 @@ struct netdev_queue {
 	unsigned long		tx_dropped;
 } ____cacheline_aligned_in_smp;
 
+/*
+ * This structure holds an RPS map which can be of variable length.  The
+ * map is an array of CPUs.
+ */
+struct rps_map {
+	unsigned int len;
+	struct rcu_head rcu;
+	u16 cpus[0];
+};
+#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
+
+/* This structure contains an instance of an RX queue. */
+struct netdev_rx_queue {
+	struct rps_map *rps_map;
+	struct kobject kobj;
+	struct netdev_rx_queue *first;
+	atomic_t count;
+} ____cacheline_aligned_in_smp;
 
 /*
  * This structure defines the management hooks for network devices.
@@ -878,6 +897,13 @@ struct net_device {
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
+	struct kset		*queues_kset;
+
+	struct netdev_rx_queue	*_rx;
+
+	/* Number of RX queues allocated at alloc_netdev_mq() time  */
+	unsigned int		num_rx_queues;
+
 	struct netdev_queue	rx_queue;
 
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
@@ -1311,14 +1337,16 @@ static inline int unregister_gifconf(unsigned int family)
  */
 struct softnet_data {
 	struct Qdisc		*output_queue;
-	struct sk_buff_head	input_pkt_queue;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
 
+	/* Elements below can be accessed between CPUs for RPS */
+	struct call_single_data	csd ____cacheline_aligned_in_smp;
+	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
 };
 
-DECLARE_PER_CPU(struct softnet_data,softnet_data);
+DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
 #define HAVE_NETIF_QUEUE
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 03f816a9b659..def10b064f29 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -300,6 +300,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@nfct_reasm: netfilter conntrack re-assembly pointer
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *	@skb_iif: ifindex of device we arrived on
+ *	@rxhash: the packet hash computed on receive
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@tc_index: Traffic control index
  *	@tc_verd: traffic control verdict
@@ -375,6 +376,8 @@ struct sk_buff {
 #endif
 #endif
 
+	__u32			rxhash;
+
 	kmemcheck_bitfield_begin(flags2);
 	__u16			queue_mapping:16;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
diff --git a/net/core/dev.c b/net/core/dev.c
index bcc490cc9452..17b168671501 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1931,7 +1931,7 @@ out_kfree_skb:
 	return rc;
 }
 
-static u32 skb_tx_hashrnd;
+static u32 hashrnd __read_mostly;
 
 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 {
@@ -1949,7 +1949,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 	else
 		hash = skb->protocol;
 
-	hash = jhash_1word(hash, skb_tx_hashrnd);
+	hash = jhash_1word(hash, hashrnd);
 
 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
 }
@@ -1959,10 +1959,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
 		if (net_ratelimit()) {
-			WARN(1, "%s selects TX queue %d, but "
+			netdev_warn(dev, "selects TX queue %d, but "
 			     "real number of TX queues is %d\n",
-			     dev->name, queue_index,
-			     dev->real_num_tx_queues);
+			     queue_index, dev->real_num_tx_queues);
 		}
 		return 0;
 	}
@@ -2175,6 +2174,172 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6;
+	struct iphdr *ip;
+	struct netdev_rx_queue *rxqueue;
+	struct rps_map *map;
+	int cpu = -1;
+	u8 ip_proto;
+	u32 addr1, addr2, ports, ihl;
+
+	rcu_read_lock();
+
+	if (skb_rx_queue_recorded(skb)) {
+		u16 index = skb_get_rx_queue(skb);
+		if (unlikely(index >= dev->num_rx_queues)) {
+			if (net_ratelimit()) {
+				netdev_warn(dev, "received packet on queue "
+				    "%u, but number of RX queues is %u\n",
+				     index, dev->num_rx_queues);
+			}
+			goto done;
+		}
+		rxqueue = dev->_rx + index;
+	} else
+		rxqueue = dev->_rx;
+
+	if (!rxqueue->rps_map)
+		goto done;
+
+	if (skb->rxhash)
+		goto got_hash; /* Skip hash computation on packet header */
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		if (!pskb_may_pull(skb, sizeof(*ip)))
+			goto done;
+
+		ip = (struct iphdr *) skb->data;
+		ip_proto = ip->protocol;
+		addr1 = ip->saddr;
+		addr2 = ip->daddr;
+		ihl = ip->ihl;
+		break;
+	case __constant_htons(ETH_P_IPV6):
+		if (!pskb_may_pull(skb, sizeof(*ip6)))
+			goto done;
+
+		ip6 = (struct ipv6hdr *) skb->data;
+		ip_proto = ip6->nexthdr;
+		addr1 = ip6->saddr.s6_addr32[3];
+		addr2 = ip6->daddr.s6_addr32[3];
+		ihl = (40 >> 2);
+		break;
+	default:
+		goto done;
+	}
+	ports = 0;
+	switch (ip_proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_DCCP:
+	case IPPROTO_ESP:
+	case IPPROTO_AH:
+	case IPPROTO_SCTP:
+	case IPPROTO_UDPLITE:
+		if (pskb_may_pull(skb, (ihl * 4) + 4))
+			ports = *((u32 *) (skb->data + (ihl * 4)));
+		break;
+
+	default:
+		break;
+	}
+
+	skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
+	if (!skb->rxhash)
+		skb->rxhash = 1;
+
+got_hash:
+	map = rcu_dereference(rxqueue->rps_map);
+	if (map) {
+		u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+
+		if (cpu_online(tcpu)) {
+			cpu = tcpu;
+			goto done;
+		}
+	}
+
+done:
+	rcu_read_unlock();
+	return cpu;
+}
+
+/*
+ * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
+ * to be sent to kick remote softirq processing.  There are two masks since
+ * the sending of IPIs must be done with interrupts enabled.  The select field
+ * indicates the current mask that enqueue_backlog uses to schedule IPIs.
+ * select is flipped before net_rps_action is called while still under lock,
+ * net_rps_action then uses the non-selected mask to send the IPIs and clears
+ * it without conflicting with enqueue_backlog operation.
+ */
+struct rps_remote_softirq_cpus {
+	cpumask_t mask[2];
+	int select;
+};
+static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
+
+/* Called from hardirq (IPI) context */
+static void trigger_softirq(void *data)
+{
+	struct softnet_data *queue = data;
+	__napi_schedule(&queue->backlog);
+	__get_cpu_var(netdev_rx_stat).received_rps++;
+}
+
+/*
+ * enqueue_to_backlog is called to queue an skb to a per CPU backlog
+ * queue (may be a remote CPU queue).
+ */
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+{
+	struct softnet_data *queue;
+	unsigned long flags;
+
+	queue = &per_cpu(softnet_data, cpu);
+
+	local_irq_save(flags);
+	__get_cpu_var(netdev_rx_stat).total++;
+
+	spin_lock(&queue->input_pkt_queue.lock);
+	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+		if (queue->input_pkt_queue.qlen) {
+enqueue:
+			__skb_queue_tail(&queue->input_pkt_queue, skb);
+			spin_unlock_irqrestore(&queue->input_pkt_queue.lock,
+			    flags);
+			return NET_RX_SUCCESS;
+		}
+
+		/* Schedule NAPI for backlog device */
+		if (napi_schedule_prep(&queue->backlog)) {
+			if (cpu != smp_processor_id()) {
+				struct rps_remote_softirq_cpus *rcpus =
+				    &__get_cpu_var(rps_remote_softirq_cpus);
+
+				cpu_set(cpu, rcpus->mask[rcpus->select]);
+				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+			} else
+				__napi_schedule(&queue->backlog);
+		}
+		goto enqueue;
+	}
+
+	spin_unlock(&queue->input_pkt_queue.lock);
+
+	__get_cpu_var(netdev_rx_stat).dropped++;
+	local_irq_restore(flags);
+
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
 
 /**
  *	netif_rx	-	post buffer to the network code
@@ -2193,8 +2358,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
 int netif_rx(struct sk_buff *skb)
 {
-	struct softnet_data *queue;
-	unsigned long flags;
+	int cpu;
 
 	/* if netpoll wants it, pretend we never saw it */
 	if (netpoll_rx(skb))
@@ -2203,31 +2367,11 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
-	/*
-	 * The code is rearranged so that the path is the most
-	 * short when CPU is congested, but is still operating.
-	 */
-	local_irq_save(flags);
-	queue = &__get_cpu_var(softnet_data);
-
-	__get_cpu_var(netdev_rx_stat).total++;
-	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (queue->input_pkt_queue.qlen) {
-enqueue:
-			__skb_queue_tail(&queue->input_pkt_queue, skb);
-			local_irq_restore(flags);
-			return NET_RX_SUCCESS;
-		}
-
-		napi_schedule(&queue->backlog);
-		goto enqueue;
-	}
-
-	__get_cpu_var(netdev_rx_stat).dropped++;
-	local_irq_restore(flags);
+	cpu = get_rps_cpu(skb->dev, skb);
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
-	kfree_skb(skb);
-	return NET_RX_DROP;
+	return enqueue_to_backlog(skb, cpu);
 }
 EXPORT_SYMBOL(netif_rx);
 
@@ -2464,22 +2608,7 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
-/**
- *	netif_receive_skb - process receive buffer from network
- *	@skb: buffer to process
- *
- *	netif_receive_skb() is the main receive data processing function.
- *	It always succeeds. The buffer may be dropped during processing
- *	for congestion control or by the protocol layers.
- *
- *	This function may only be called from softirq context and interrupts
- *	should be enabled.
- *
- *	Return values (usually ignored):
- *	NET_RX_SUCCESS: no congestion
- *	NET_RX_DROP: packet was dropped
- */
-int netif_receive_skb(struct sk_buff *skb)
+int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
@@ -2588,6 +2717,33 @@ out:
 	rcu_read_unlock();
 	return ret;
 }
+
+/**
+ *	netif_receive_skb - process receive buffer from network
+ *	@skb: buffer to process
+ *
+ *	netif_receive_skb() is the main receive data processing function.
+ *	It always succeeds. The buffer may be dropped during processing
+ *	for congestion control or by the protocol layers.
+ *
+ *	This function may only be called from softirq context and interrupts
+ *	should be enabled.
+ *
+ *	Return values (usually ignored):
+ *	NET_RX_SUCCESS: no congestion
+ *	NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb(struct sk_buff *skb)
+{
+	int cpu;
+
+	cpu = get_rps_cpu(skb->dev, skb);
+
+	if (cpu < 0)
+		return __netif_receive_skb(skb);
+	else
+		return enqueue_to_backlog(skb, cpu);
+}
 EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending  */
@@ -2914,16 +3070,16 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	do {
 		struct sk_buff *skb;
 
-		local_irq_disable();
+		spin_lock_irq(&queue->input_pkt_queue.lock);
 		skb = __skb_dequeue(&queue->input_pkt_queue);
 		if (!skb) {
 			__napi_complete(napi);
-			local_irq_enable();
+			spin_unlock_irq(&queue->input_pkt_queue.lock);
 			break;
 		}
-		local_irq_enable();
+		spin_unlock_irq(&queue->input_pkt_queue.lock);
 
-		netif_receive_skb(skb);
+		__netif_receive_skb(skb);
 	} while (++work < quota && jiffies == start_time);
 
 	return work;
@@ -3012,6 +3168,22 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
+/*
+ * net_rps_action sends any pending IPI's for rps.  This is only called from
+ * softirq and interrupts must be enabled.
+ */
+static void net_rps_action(cpumask_t *mask)
+{
+	int cpu;
+
+	/* Send pending IPI's to kick RPS processing on remote cpus. */
+	for_each_cpu_mask_nr(cpu, *mask) {
+		struct softnet_data *queue = &per_cpu(softnet_data, cpu);
+		if (cpu_online(cpu))
+			__smp_call_function_single(cpu, &queue->csd, 0);
+	}
+	cpus_clear(*mask);
+}
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3019,6 +3191,8 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
+	int select;
+	struct rps_remote_softirq_cpus *rcpus;
 
 	local_irq_disable();
 
@@ -3081,8 +3255,14 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
+	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
+	select = rcpus->select;
+	rcpus->select ^= 1;
+
 	local_irq_enable();
 
+	net_rps_action(&rcpus->mask[select]);
+
 #ifdef CONFIG_NET_DMA
 	/*
 	 * There may not be any more sk_buffs coming right now, so push
@@ -3327,10 +3507,10 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
 {
 	struct netif_rx_stats *s = v;
 
-	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
 		   s->total, s->dropped, s->time_squeeze, 0,
 		   0, 0, 0, 0, /* was fastroute */
-		   s->cpu_collision);
+		   s->cpu_collision, s->received_rps);
 	return 0;
 }
 
@@ -5067,6 +5247,23 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
+	if (!dev->num_rx_queues) {
+		/*
+		 * Allocate a single RX queue if driver never called
+		 * alloc_netdev_mq
+		 */
+
+		dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
+		if (!dev->_rx) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		dev->_rx->first = dev->_rx;
+		atomic_set(&dev->_rx->count, 1);
+		dev->num_rx_queues = 1;
+	}
+
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
@@ -5424,9 +5621,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *), unsigned int queue_count)
 {
 	struct netdev_queue *tx;
+	struct netdev_rx_queue *rx;
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
+	int i;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
@@ -5452,11 +5651,27 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		goto free_p;
 	}
 
+	rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+	if (!rx) {
+		printk(KERN_ERR "alloc_netdev: Unable to allocate "
+		       "rx queues.\n");
+		goto free_tx;
+	}
+
+	atomic_set(&rx->count, queue_count);
+
+	/*
+	 * Set a pointer to first element in the array which holds the
+	 * reference count.
+	 */
+	for (i = 0; i < queue_count; i++)
+		rx[i].first = rx;
+
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
 	if (dev_addr_init(dev))
-		goto free_tx;
+		goto free_rx;
 
 	dev_unicast_init(dev);
 
@@ -5466,6 +5681,9 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev->num_tx_queues = queue_count;
 	dev->real_num_tx_queues = queue_count;
 
+	dev->_rx = rx;
+	dev->num_rx_queues = queue_count;
+
 	dev->gso_max_size = GSO_MAX_SIZE;
 
 	netdev_init_queues(dev);
@@ -5480,9 +5698,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	strcpy(dev->name, name);
 	return dev;
 
+free_rx:
+	kfree(rx);
 free_tx:
 	kfree(tx);
-
 free_p:
 	kfree(p);
 	return NULL;
@@ -5985,6 +6204,10 @@ static int __init net_dev_init(void)
 		queue->completion_queue = NULL;
 		INIT_LIST_HEAD(&queue->poll_list);
 
+		queue->csd.func = trigger_softirq;
+		queue->csd.info = queue;
+		queue->csd.flags = 0;
+
 		queue->backlog.poll = process_backlog;
 		queue->backlog.weight = weight_p;
 		queue->backlog.gro_list = NULL;
@@ -6023,7 +6246,7 @@ subsys_initcall(net_dev_init);
 
 static int __init initialize_hashrnd(void)
 {
-	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+	get_random_bytes(&hashrnd, sizeof(hashrnd));
 	return 0;
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 099c753c4213..7a46343d5ae3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -466,6 +466,216 @@ static struct attribute_group wireless_group = {
 };
 #endif
 
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct netdev_rx_queue *queue,
+	    struct rx_queue_attribute *attr, char *buf);
+	ssize_t (*store)(struct netdev_rx_queue *queue,
+	    struct rx_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_rx_queue_attr(_attr) container_of(_attr,		\
+    struct rx_queue_attribute, attr)
+
+#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
+
+static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
+				  char *buf)
+{
+	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(queue, attribute, buf, count);
+}
+
+static struct sysfs_ops rx_queue_sysfs_ops = {
+	.show = rx_queue_attr_show,
+	.store = rx_queue_attr_store,
+};
+
+static ssize_t show_rps_map(struct netdev_rx_queue *queue,
+			    struct rx_queue_attribute *attribute, char *buf)
+{
+	struct rps_map *map;
+	cpumask_var_t mask;
+	size_t len = 0;
+	int i;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	rcu_read_lock();
+	map = rcu_dereference(queue->rps_map);
+	if (map)
+		for (i = 0; i < map->len; i++)
+			cpumask_set_cpu(map->cpus[i], mask);
+
+	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+	if (PAGE_SIZE - len < 3) {
+		rcu_read_unlock();
+		free_cpumask_var(mask);
+		return -EINVAL;
+	}
+	rcu_read_unlock();
+
+	free_cpumask_var(mask);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+static void rps_map_release(struct rcu_head *rcu)
+{
+	struct rps_map *map = container_of(rcu, struct rps_map, rcu);
+
+	kfree(map);
+}
+
+ssize_t store_rps_map(struct netdev_rx_queue *queue,
+		      struct rx_queue_attribute *attribute,
+		      const char *buf, size_t len)
+{
+	struct rps_map *old_map, *map;
+	cpumask_var_t mask;
+	int err, cpu, i;
+	static DEFINE_SPINLOCK(rps_map_lock);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+	if (err) {
+		free_cpumask_var(mask);
+		return err;
+	}
+
+	map = kzalloc(max_t(unsigned,
+	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+	    GFP_KERNEL);
+	if (!map) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+
+	i = 0;
+	for_each_cpu_and(cpu, mask, cpu_online_mask)
+		map->cpus[i++] = cpu;
+
+	if (i)
+		map->len = i;
+	else {
+		kfree(map);
+		map = NULL;
+	}
+
+	spin_lock(&rps_map_lock);
+	old_map = queue->rps_map;
+	rcu_assign_pointer(queue->rps_map, map);
+	spin_unlock(&rps_map_lock);
+
+	if (old_map)
+		call_rcu(&old_map->rcu, rps_map_release);
+
+	free_cpumask_var(mask);
+	return len;
+}
+
+static struct rx_queue_attribute rps_cpus_attribute =
+	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+
+static struct attribute *rx_queue_default_attrs[] = {
+	&rps_cpus_attribute.attr,
+	NULL
+};
+
+static void rx_queue_release(struct kobject *kobj)
+{
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+	struct rps_map *map = queue->rps_map;
+	struct netdev_rx_queue *first = queue->first;
+
+	if (map)
+		call_rcu(&map->rcu, rps_map_release);
+
+	if (atomic_dec_and_test(&first->count))
+		kfree(first);
+}
+
+static struct kobj_type rx_queue_ktype = {
+	.sysfs_ops = &rx_queue_sysfs_ops,
+	.release = rx_queue_release,
+	.default_attrs = rx_queue_default_attrs,
+};
+
+static int rx_queue_add_kobject(struct net_device *net, int index)
+{
+	struct netdev_rx_queue *queue = net->_rx + index;
+	struct kobject *kobj = &queue->kobj;
+	int error = 0;
+
+	kobj->kset = net->queues_kset;
+	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
+	    "rx-%u", index);
+	if (error) {
+		kobject_put(kobj);
+		return error;
+	}
+
+	kobject_uevent(kobj, KOBJ_ADD);
+
+	return error;
+}
+
+static int rx_queue_register_kobjects(struct net_device *net)
+{
+	int i;
+	int error = 0;
+
+	net->queues_kset = kset_create_and_add("queues",
+	    NULL, &net->dev.kobj);
+	if (!net->queues_kset)
+		return -ENOMEM;
+	for (i = 0; i < net->num_rx_queues; i++) {
+		error = rx_queue_add_kobject(net, i);
+		if (error)
+			break;
+	}
+
+	if (error)
+		while (--i >= 0)
+			kobject_put(&net->_rx[i].kobj);
+
+	return error;
+}
+
+static void rx_queue_remove_kobjects(struct net_device *net)
+{
+	int i;
+
+	for (i = 0; i < net->num_rx_queues; i++)
+		kobject_put(&net->_rx[i].kobj);
+	kset_unregister(net->queues_kset);
+}
+
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
@@ -529,6 +739,8 @@ void netdev_unregister_kobject(struct net_device * net)
 	if (!net_eq(dev_net(net), &init_net))
 		return;
 
+	rx_queue_remove_kobjects(net);
+
 	device_del(dev);
 }
 
@@ -537,6 +749,7 @@ int netdev_register_kobject(struct net_device *net)
 {
 	struct device *dev = &(net->dev);
 	const struct attribute_group **groups = net->sysfs_groups;
+	int error = 0;
 
 	dev->class = &net_class;
 	dev->platform_data = net;
@@ -563,7 +776,17 @@ int netdev_register_kobject(struct net_device *net)
 	if (!net_eq(dev_net(net), &init_net))
 		return 0;
 
-	return device_add(dev);
+	error = device_add(dev);
+	if (error)
+		return error;
+
+	error = rx_queue_register_kobjects(net);
+	if (error) {
+		device_del(dev);
+		return error;
+	}
+
+	return error;
 }
 
 int netdev_class_create_file(struct class_attribute *class_attr)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93c4e060c91e..bdea0efdf8cb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -534,6 +534,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
 	skb_dst_set(new, dst_clone(skb_dst(old)));
+	new->rxhash		= old->rxhash;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
@@ -581,6 +582,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(len);
 	C(data_len);
 	C(mac_len);
+	C(rxhash);
 	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
 	n->cloned = 1;
 	n->nohdr = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 14bb4789833a2e2610f30e2d3e1451701ac96ec1 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 2 Mar 2010 13:32:09 +0000
Subject: bridge: per-cpu packet statistics (v3)

The shared packet statistics are a potential source of slow down
on bridged traffic. Convert to per-cpu array, but only keep those
statistics which change per-packet.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 43 +++++++++++++++++++++++++++++++++++++++----
 net/bridge/br_if.c      |  6 ++++++
 net/bridge/br_input.c   |  6 ++++--
 net/bridge/br_private.h |  8 ++++++++
 4 files changed, 57 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 90a9024e5c1e..5b8a6e73b02f 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -26,11 +26,12 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	const unsigned char *dest = skb->data;
 	struct net_bridge_fdb_entry *dst;
 	struct net_bridge_mdb_entry *mdst;
+	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
-	BR_INPUT_SKB_CB(skb)->brdev = dev;
+	brstats->tx_packets++;
+	brstats->tx_bytes += skb->len;
 
-	dev->stats.tx_packets++;
-	dev->stats.tx_bytes += skb->len;
+	BR_INPUT_SKB_CB(skb)->brdev = dev;
 
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
@@ -81,6 +82,31 @@ static int br_dev_stop(struct net_device *dev)
 	return 0;
 }
 
+static struct net_device_stats *br_get_stats(struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct br_cpu_netstats sum = { 0 };
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		const struct br_cpu_netstats *bstats
+			= per_cpu_ptr(br->stats, cpu);
+
+		sum.tx_bytes   += bstats->tx_bytes;
+		sum.tx_packets += bstats->tx_packets;
+		sum.rx_bytes   += bstats->rx_bytes;
+		sum.rx_packets += bstats->rx_packets;
+	}
+
+	stats->tx_bytes   = sum.tx_bytes;
+	stats->tx_packets = sum.tx_packets;
+	stats->rx_bytes   = sum.rx_bytes;
+	stats->rx_packets = sum.rx_packets;
+
+	return stats;
+}
+
 static int br_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct net_bridge *br = netdev_priv(dev);
@@ -180,19 +206,28 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_open		 = br_dev_open,
 	.ndo_stop		 = br_dev_stop,
 	.ndo_start_xmit		 = br_dev_xmit,
+	.ndo_get_stats		 = br_get_stats,
 	.ndo_set_mac_address	 = br_set_mac_address,
 	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
 };
 
+static void br_dev_free(struct net_device *dev)
+{
+	struct net_bridge *br = netdev_priv(dev);
+
+	free_percpu(br->stats);
+	free_netdev(dev);
+}
+
 void br_dev_setup(struct net_device *dev)
 {
 	random_ether_addr(dev->dev_addr);
 	ether_setup(dev);
 
 	dev->netdev_ops = &br_netdev_ops;
-	dev->destructor = free_netdev;
+	dev->destructor = br_dev_free;
 	SET_ETHTOOL_OPS(dev, &br_ethtool_ops);
 	dev->tx_queue_len = 0;
 	dev->priv_flags = IFF_EBRIDGE;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index b6a3872f5681..b7cdd2e98050 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -185,6 +185,12 @@ static struct net_device *new_bridge_dev(struct net *net, const char *name)
 	br = netdev_priv(dev);
 	br->dev = dev;
 
+	br->stats = alloc_percpu(struct br_cpu_netstats);
+	if (!br->stats) {
+		free_netdev(dev);
+		return NULL;
+	}
+
 	spin_lock_init(&br->lock);
 	INIT_LIST_HEAD(&br->port_list);
 	spin_lock_init(&br->hash_lock);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d74d570fc848..333dfb7c5886 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -23,9 +23,11 @@ const u8 br_group_address[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
 static int br_pass_frame_up(struct sk_buff *skb)
 {
 	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
+	struct net_bridge *br = netdev_priv(brdev);
+	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
-	brdev->stats.rx_packets++;
-	brdev->stats.rx_bytes += skb->len;
+	brstats->rx_packets++;
+	brstats->rx_bytes += skb->len;
 
 	indev = skb->dev;
 	skb->dev = brdev;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 846d7d1e2075..791d4ab0fd4d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -135,6 +135,14 @@ struct net_bridge
 	spinlock_t			lock;
 	struct list_head		port_list;
 	struct net_device		*dev;
+
+	struct br_cpu_netstats __percpu {
+		unsigned long	rx_packets;
+		unsigned long	rx_bytes;
+		unsigned long	tx_packets;
+		unsigned long	tx_bytes;
+	} *stats;
+
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
 	unsigned long			feature_mask;
-- 
cgit v1.2.3-59-g8ed1b


From 2fb3573dfbca0bd853ddc1e47617eb446fa3deae Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Mar 2010 20:03:38 +0000
Subject: net: remove rcu locking from fib_rules_event()

We hold RTNL at this point and dont use RCU variants of list traversals,
we dont need rcu_read_lock()/rcu_read_unlock()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9a24377146bf..2ff34894357a 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -108,7 +108,7 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
 	struct fib_rules_ops *ops;
 	int err;
 
-	ops = kmemdup(tmpl, sizeof (*ops), GFP_KERNEL);
+	ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL);
 	if (ops == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -123,7 +123,6 @@ fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
 
 	return ops;
 }
-
 EXPORT_SYMBOL_GPL(fib_rules_register);
 
 void fib_rules_cleanup_ops(struct fib_rules_ops *ops)
@@ -157,7 +156,6 @@ void fib_rules_unregister(struct fib_rules_ops *ops)
 
 	call_rcu(&ops->rcu, fib_rules_put_rcu);
 }
-
 EXPORT_SYMBOL_GPL(fib_rules_unregister);
 
 static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops,
@@ -220,7 +218,6 @@ out:
 
 	return err;
 }
-
 EXPORT_SYMBOL_GPL(fib_rules_lookup);
 
 static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb,
@@ -613,7 +610,7 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
 			break;
 
 		cb->args[1] = 0;
-	skip:
+skip:
 		idx++;
 	}
 	rcu_read_unlock();
@@ -685,7 +682,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
 	struct fib_rules_ops *ops;
 
 	ASSERT_RTNL();
-	rcu_read_lock();
 
 	switch (event) {
 	case NETDEV_REGISTER:
@@ -699,8 +695,6 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
 		break;
 	}
 
-	rcu_read_unlock();
-
 	return NOTIFY_DONE;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From dd2acaa7bcb150dadac0b17c8eb654b0712a62ab Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 11 Mar 2010 09:57:27 +0000
Subject: net: tcp: make hybla selectable as default congestion module

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 0c94a1ac2946..731c47ba6ad2 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -587,6 +587,9 @@ choice
 	config DEFAULT_HTCP
 		bool "Htcp" if TCP_CONG_HTCP=y
 
+	config DEFAULT_HYBLA
+		bool "Hybla" if TCP_CONG_HYBLA=y
+
 	config DEFAULT_VEGAS
 		bool "Vegas" if TCP_CONG_VEGAS=y
 
@@ -610,6 +613,7 @@ config DEFAULT_TCP_CONG
 	default "bic" if DEFAULT_BIC
 	default "cubic" if DEFAULT_CUBIC
 	default "htcp" if DEFAULT_HTCP
+	default "hybla" if DEFAULT_HYBLA
 	default "vegas" if DEFAULT_VEGAS
 	default "westwood" if DEFAULT_WESTWOOD
 	default "reno" if DEFAULT_RENO
-- 
cgit v1.2.3-59-g8ed1b


From 6ce1a6df6efbbeaa262a225a1a439ebc30a75d2e Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 11 Mar 2010 09:57:28 +0000
Subject: net: tcp: make veno selectable as default congestion module

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/Kconfig | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 731c47ba6ad2..c9a1c68767ff 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -593,6 +593,9 @@ choice
 	config DEFAULT_VEGAS
 		bool "Vegas" if TCP_CONG_VEGAS=y
 
+	config DEFAULT_VENO
+		bool "Veno" if TCP_CONG_VENO=y
+
 	config DEFAULT_WESTWOOD
 		bool "Westwood" if TCP_CONG_WESTWOOD=y
 
@@ -616,6 +619,7 @@ config DEFAULT_TCP_CONG
 	default "hybla" if DEFAULT_HYBLA
 	default "vegas" if DEFAULT_VEGAS
 	default "westwood" if DEFAULT_WESTWOOD
+	default "veno" if DEFAULT_VENO
 	default "reno" if DEFAULT_RENO
 	default "cubic"
 
-- 
cgit v1.2.3-59-g8ed1b


From 10708f37ae729baba9b67bd134c3720709d4ae62 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 11 Mar 2010 09:57:29 +0000
Subject: net: core: add IFLA_STATS64 support

`ip -s link` shows interface counters truncated to 32 bit. This is
because interface statistics are transported only in 32-bit quantity
to userspace. This commit adds a new IFLA_STATS64 attribute that
exports them in full 64 bit.

References: http://lkml.indiana.edu/hypermail/linux/kernel/0307.3/0215.html
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h | 33 +++++++++++++++++++++++++++++++++
 net/core/rtnetlink.c    | 42 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c9bf92cd7653..cfd420ba72df 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -37,6 +37,38 @@ struct rtnl_link_stats {
 	__u32	tx_compressed;
 };
 
+struct rtnl_link_stats64 {
+	__u64	rx_packets;		/* total packets received	*/
+	__u64	tx_packets;		/* total packets transmitted	*/
+	__u64	rx_bytes;		/* total bytes received 	*/
+	__u64	tx_bytes;		/* total bytes transmitted	*/
+	__u64	rx_errors;		/* bad packets received		*/
+	__u64	tx_errors;		/* packet transmit problems	*/
+	__u64	rx_dropped;		/* no space in linux buffers	*/
+	__u64	tx_dropped;		/* no space available in linux	*/
+	__u64	multicast;		/* multicast packets received	*/
+	__u64	collisions;
+
+	/* detailed rx_errors: */
+	__u64	rx_length_errors;
+	__u64	rx_over_errors;		/* receiver ring buff overflow	*/
+	__u64	rx_crc_errors;		/* recved pkt with crc error	*/
+	__u64	rx_frame_errors;	/* recv'd frame alignment error */
+	__u64	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	__u64	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	__u64	tx_aborted_errors;
+	__u64	tx_carrier_errors;
+	__u64	tx_fifo_errors;
+	__u64	tx_heartbeat_errors;
+	__u64	tx_window_errors;
+
+	/* for cslip etc */
+	__u64	rx_compressed;
+	__u64	tx_compressed;
+};
+
 /* The struct should be in sync with struct ifmap */
 struct rtnl_link_ifmap {
 	__u64	mem_start;
@@ -83,6 +115,7 @@ enum {
 	IFLA_VF_VLAN,
 	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
 	IFLA_VFINFO,
+	IFLA_STATS64,
 	__IFLA_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4568120d8533..e1121f0bca6a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -600,7 +600,39 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 
 	a->rx_compressed = b->rx_compressed;
 	a->tx_compressed = b->tx_compressed;
-};
+}
+
+static void copy_rtnl_link_stats64(struct rtnl_link_stats64 *a,
+				   const struct net_device_stats *b)
+{
+	a->rx_packets = b->rx_packets;
+	a->tx_packets = b->tx_packets;
+	a->rx_bytes = b->rx_bytes;
+	a->tx_bytes = b->tx_bytes;
+	a->rx_errors = b->rx_errors;
+	a->tx_errors = b->tx_errors;
+	a->rx_dropped = b->rx_dropped;
+	a->tx_dropped = b->tx_dropped;
+
+	a->multicast = b->multicast;
+	a->collisions = b->collisions;
+
+	a->rx_length_errors = b->rx_length_errors;
+	a->rx_over_errors = b->rx_over_errors;
+	a->rx_crc_errors = b->rx_crc_errors;
+	a->rx_frame_errors = b->rx_frame_errors;
+	a->rx_fifo_errors = b->rx_fifo_errors;
+	a->rx_missed_errors = b->rx_missed_errors;
+
+	a->tx_aborted_errors = b->tx_aborted_errors;
+	a->tx_carrier_errors = b->tx_carrier_errors;
+	a->tx_fifo_errors = b->tx_fifo_errors;
+	a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+	a->tx_window_errors = b->tx_window_errors;
+
+	a->rx_compressed = b->rx_compressed;
+	a->tx_compressed = b->tx_compressed;
+}
 
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
 {
@@ -698,6 +730,14 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	stats = dev_get_stats(dev);
 	copy_rtnl_link_stats(nla_data(attr), stats);
 
+	attr = nla_reserve(skb, IFLA_STATS64,
+			sizeof(struct rtnl_link_stats64));
+	if (attr == NULL)
+		goto nla_put_failure;
+
+	stats = dev_get_stats(dev);
+	copy_rtnl_link_stats64(nla_data(attr), stats);
+
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
 		struct ifla_vf_info ivi;
-- 
cgit v1.2.3-59-g8ed1b


From 1a624832a06b465d0e5651901bcbc3680c78d374 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 15 Mar 2010 08:02:24 +0000
Subject: tipc: Increase frequency of load distribution over broadcast link

Forward port commit 29eb572941501c40ac6e62dbc5043bf9ee76ee56
from git://tipc.cslab.ericsson.net/pub/git/people/allan/tipc.git

Origional commit message:
Increase frequency of load distribution over broadcast link

This patch enhances the behavior of TIPC's broadcast link so that it
alternates between redundant bearers (if available) after every
message sent, rather than after every 10 messages.  This change helps
to speed up delivery of retransmitted messages by ensuring that
they are not sent repeatedly over a bearer that is no longer working,
but not yet recognized as failed.

Tested by myself in the latest net-2.6 tree using the tipc sanity test suite

Origionally-signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

bcast.c |   35 ++++++++++++++---------------------
1 file changed, 14 insertions(+), 21 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 35 ++++++++++++++---------------------
 1 file changed, 14 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a3bfd4064912..90a051912c03 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -558,10 +558,7 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 			      struct tipc_bearer *unused1,
 			      struct tipc_media_addr *unused2)
 {
-	static int send_count = 0;
-
 	int bp_index;
-	int swap_time;
 
 	/* Prepare buffer for broadcasting (if first time trying to send it) */
 
@@ -575,11 +572,6 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 		msg_set_mc_netid(msg, tipc_net_id);
 	}
 
-	/* Determine if bearer pairs should be swapped following this attempt */
-
-	if ((swap_time = (++send_count >= 10)))
-		send_count = 0;
-
 	/* Send buffer over bearers until all targets reached */
 
 	bcbearer->remains = tipc_cltr_bcast_nodes;
@@ -595,21 +587,22 @@ static int tipc_bcbearer_send(struct sk_buff *buf,
 		if (bcbearer->remains_new.count == bcbearer->remains.count)
 			continue;	/* bearer pair doesn't add anything */
 
-		if (!p->publ.blocked &&
-		    !p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
-			if (swap_time && s && !s->publ.blocked)
-				goto swap;
-			else
-				goto update;
+		if (p->publ.blocked ||
+		    p->media->send_msg(buf, &p->publ, &p->media->bcast_addr)) {
+			/* unable to send on primary bearer */
+			if (!s || s->publ.blocked ||
+			    s->media->send_msg(buf, &s->publ,
+					       &s->media->bcast_addr)) {
+				/* unable to send on either bearer */
+				continue;
+			}
+		}
+
+		if (s) {
+			bcbearer->bpairs[bp_index].primary = s;
+			bcbearer->bpairs[bp_index].secondary = p;
 		}
 
-		if (!s || s->publ.blocked ||
-		    s->media->send_msg(buf, &s->publ, &s->media->bcast_addr))
-			continue;	/* unable to send using bearer pair */
-swap:
-		bcbearer->bpairs[bp_index].primary = s;
-		bcbearer->bpairs[bp_index].secondary = p;
-update:
 		if (bcbearer->remains_new.count == 0)
 			return 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From ca50910185fcce694af0cc194de50d2374a7c7ef Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Mon, 15 Mar 2010 07:58:45 +0000
Subject: tipc: Allow retransmission of cloned buffers

Forward port commit
fc477e160af086f6e30c3d4fdf5f5c000d29beb5
from git://tipc.cslab.ericsson.net/pub/git/people/allan/tipc.git

Origional commit message:

Allow retransmission of cloned buffers

This patch fixes an issue with TIPC's message retransmission logic
that prevented retransmission of clone sk_buffs.  Originally intended
as a means of avoiding wasted work in retransmitting messages that
were still on the driver's outbound queue, it also prevented TIPC
from retransmitting messages through other means -- such as the
secondary bearer of the broadcast link, or another interface in a
set of bonded interfaces.  This fix removes existing checks for
cloned sk_buffs that prevented such retransmission.

Origionally-Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1a7e4665af80..49f2be8622a9 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1553,7 +1553,7 @@ u32 tipc_link_push_packet(struct link *l_ptr)
 
 	/* Continue retransmission now, if there is anything: */
 
-	if (r_q_size && buf && !skb_cloned(buf)) {
+	if (r_q_size && buf) {
 		msg_set_ack(buf_msg(buf), mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(buf_msg(buf), l_ptr->owner->bclink.last_in);
 		if (tipc_bearer_send(l_ptr->b_ptr, buf, &l_ptr->media_addr)) {
@@ -1722,15 +1722,16 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
 	dbg("Retransmitting %u in link %x\n", retransmits, l_ptr);
 
 	if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) {
-		if (!skb_cloned(buf)) {
+		if (l_ptr->retransm_queue_size == 0) {
 			msg_dbg(msg, ">NO_RETR->BCONG>");
 			dbg_print_link(l_ptr, "   ");
 			l_ptr->retransm_queue_head = msg_seqno(msg);
 			l_ptr->retransm_queue_size = retransmits;
-			return;
 		} else {
-			/* Don't retransmit if driver already has the buffer */
+			err("Unexpected retransmit on link %s (qsize=%d)\n",
+			    l_ptr->name, l_ptr->retransm_queue_size);
 		}
+		return;
 	} else {
 		/* Detect repeated retransmit failures on uncongested bearer */
 
@@ -1745,7 +1746,7 @@ void tipc_link_retransmit(struct link *l_ptr, struct sk_buff *buf,
 		}
 	}
 
-	while (retransmits && (buf != l_ptr->next_out) && buf && !skb_cloned(buf)) {
+	while (retransmits && (buf != l_ptr->next_out) && buf) {
 		msg = buf_msg(buf);
 		msg_set_ack(msg, mod(l_ptr->next_in_no - 1));
 		msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in);
-- 
cgit v1.2.3-59-g8ed1b


From ff6e2163f28a1094fb5ca5950fe2b43c3cf6bc7a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 1 Mar 2010 05:09:14 +0000
Subject: net: convert multiple drivers to use netdev_for_each_mc_addr, part7

In mlx4, using char * to store mc address in private structure instead.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/media/dvb/dvb-core/dvb_net.c |  6 +----
 drivers/net/mlx4/en_netdev.c         | 49 +++++++++++++++---------------------
 drivers/net/mlx4/mlx4_en.h           |  3 ++-
 drivers/s390/net/qeth_l2_main.c      |  2 +-
 net/bluetooth/bnep/netdev.c          | 10 ++++----
 5 files changed, 29 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index 441c0642b30a..dba1c84058b7 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -1140,7 +1140,6 @@ static void wq_set_multicast_list (struct work_struct *work)
 		dprintk("%s: allmulti mode\n", dev->name);
 		priv->rx_mode = RX_MODE_ALL_MULTI;
 	} else if (!netdev_mc_empty(dev)) {
-		int mci;
 		struct dev_mc_list *mc;
 
 		dprintk("%s: set_mc_list, %d entries\n",
@@ -1149,11 +1148,8 @@ static void wq_set_multicast_list (struct work_struct *work)
 		priv->rx_mode = RX_MODE_MULTI;
 		priv->multi_num = 0;
 
-		for (mci = 0, mc=dev->mc_list;
-		     mci < netdev_mc_count(dev);
-		     mc = mc->next, mci++) {
+		netdev_for_each_mc_addr(mc, dev)
 			dvb_set_mc_filter(dev, mc);
-		}
 	}
 
 	netif_addr_unlock_bh(dev);
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index c48b0f4b17b7..7cd0933735e2 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -160,39 +160,29 @@ static void mlx4_en_do_set_mac(struct work_struct *work)
 static void mlx4_en_clear_list(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
-	struct dev_mc_list *plist = priv->mc_list;
-	struct dev_mc_list *next;
 
-	while (plist) {
-		next = plist->next;
-		kfree(plist);
-		plist = next;
-	}
-	priv->mc_list = NULL;
+	kfree(priv->mc_addrs);
+	priv->mc_addrs_cnt = 0;
 }
 
 static void mlx4_en_cache_mclist(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct dev_mc_list *mclist;
-	struct dev_mc_list *tmp;
-	struct dev_mc_list *plist = NULL;
-
-	for (mclist = dev->mc_list; mclist; mclist = mclist->next) {
-		tmp = kmalloc(sizeof(struct dev_mc_list), GFP_ATOMIC);
-		if (!tmp) {
-			en_err(priv, "failed to allocate multicast list\n");
-			mlx4_en_clear_list(dev);
-			return;
-		}
-		memcpy(tmp, mclist, sizeof(struct dev_mc_list));
-		tmp->next = NULL;
-		if (plist)
-			plist->next = tmp;
-		else
-			priv->mc_list = tmp;
-		plist = tmp;
+	char *mc_addrs;
+	int mc_addrs_cnt = netdev_mc_count(dev);
+	int i;
+
+	mc_addrs = kmalloc(mc_addrs_cnt * ETH_ALEN, GFP_ATOMIC);
+	if (!mc_addrs) {
+		en_err(priv, "failed to allocate multicast list\n");
+		return;
 	}
+	i = 0;
+	netdev_for_each_mc_addr(mclist, dev)
+		memcpy(mc_addrs + i++ * ETH_ALEN, mclist->dmi_addr, ETH_ALEN);
+	priv->mc_addrs = mc_addrs;
+	priv->mc_addrs_cnt = mc_addrs_cnt;
 }
 
 
@@ -212,7 +202,6 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
 						 mcast_task);
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct net_device *dev = priv->dev;
-	struct dev_mc_list *mclist;
 	u64 mcast_addr = 0;
 	int err;
 
@@ -288,6 +277,8 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
 		if (err)
 			en_err(priv, "Failed disabling multicast filter\n");
 	} else {
+		int i;
+
 		err = mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0,
 					  0, MLX4_MCAST_DISABLE);
 		if (err)
@@ -302,8 +293,9 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
 		netif_tx_lock_bh(dev);
 		mlx4_en_cache_mclist(dev);
 		netif_tx_unlock_bh(dev);
-		for (mclist = priv->mc_list; mclist; mclist = mclist->next) {
-			mcast_addr = mlx4_en_mac_to_u64(mclist->dmi_addr);
+		for (i = 0; i < priv->mc_addrs_cnt; i++) {
+			mcast_addr =
+			      mlx4_en_mac_to_u64(priv->mc_addrs + i * ETH_ALEN);
 			mlx4_SET_MCAST_FLTR(mdev->dev, priv->port,
 					    mcast_addr, 0, MLX4_MCAST_CONFIG);
 		}
@@ -984,7 +976,6 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	priv->flags = prof->flags;
 	priv->tx_ring_num = prof->tx_ring_num;
 	priv->rx_ring_num = prof->rx_ring_num;
-	priv->mc_list = NULL;
 	priv->mac_index = -1;
 	priv->msg_enable = MLX4_EN_MSG_LEVEL;
 	spin_lock_init(&priv->stats_lock);
diff --git a/drivers/net/mlx4/mlx4_en.h b/drivers/net/mlx4/mlx4_en.h
index 82c3ebc584e3..b55e46c8b682 100644
--- a/drivers/net/mlx4/mlx4_en.h
+++ b/drivers/net/mlx4/mlx4_en.h
@@ -492,7 +492,8 @@ struct mlx4_en_priv {
 	struct mlx4_en_perf_stats pstats;
 	struct mlx4_en_pkt_stats pkstats;
 	struct mlx4_en_port_stats port_stats;
-	struct dev_mc_list *mc_list;
+	char *mc_addrs;
+	int mc_addrs_cnt;
 	struct mlx4_en_stat_out_mbox hw_stats;
 };
 
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 6f1e3036bafd..7576ad5a833a 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -619,7 +619,7 @@ static void qeth_l2_set_multicast_list(struct net_device *dev)
 		return;
 	qeth_l2_del_all_mc(card);
 	spin_lock_bh(&card->mclock);
-	for (dm = dev->mc_list; dm; dm = dm->next)
+	netdev_for_each_mc_addr(dm, dev)
 		qeth_l2_add_mc(card, dm->da_addr, 0);
 
 	netdev_for_each_uc_addr(ha, dev)
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index b6234b73c4cf..326ab453edb7 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -87,7 +87,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 		memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
 		r->len = htons(ETH_ALEN * 2);
 	} else {
-		struct dev_mc_list *dmi = dev->mc_list;
+		struct dev_mc_list *dmi;
 		int i, len = skb->len;
 
 		if (dev->flags & IFF_BROADCAST) {
@@ -97,12 +97,12 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 
 		/* FIXME: We should group addresses here. */
 
-		for (i = 0;
-		     i < netdev_mc_count(dev) && i < BNEP_MAX_MULTICAST_FILTERS;
-		     i++) {
+		i = 0;
+		netdev_for_each_mc_addr(dmi, dev) {
+			if (i == BNEP_MAX_MULTICAST_FILTERS)
+				break;
 			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
 			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
-			dmi = dmi->next;
 		}
 		r->len = htons(skb->len - len);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 076f7839dd30b379f3cd468b13cf513cdcd5cee7 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 11 Mar 2010 00:38:44 +0100
Subject: netfilter: xt_CT: par->family is an nfproto

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_CT.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 61c50fa84703..fda603edb31a 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -37,13 +37,13 @@ static unsigned int xt_ct_target(struct sk_buff *skb,
 
 static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
 {
-	if (par->family == AF_INET) {
+	if (par->family == NFPROTO_IPV4) {
 		const struct ipt_entry *e = par->entryinfo;
 
 		if (e->ip.invflags & IPT_INV_PROTO)
 			return 0;
 		return e->ip.proto;
-	} else if (par->family == AF_INET6) {
+	} else if (par->family == NFPROTO_IPV6) {
 		const struct ip6t_entry *e = par->entryinfo;
 
 		if (e->ipv6.invflags & IP6T_INV_PROTO)
-- 
cgit v1.2.3-59-g8ed1b


From f76a47c83247b453f25629618056a6d2c1e39103 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 5 Jun 2009 15:22:15 +0200
Subject: netfilter: xt_NFQUEUE: consolidate v4/v6 targets into one

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_NFQUEUE.c | 40 ++++++++++++----------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 12dcd7007c3e..a37e2166858e 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -49,17 +49,6 @@ static u32 hash_v4(const struct sk_buff *skb)
 	return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval);
 }
 
-static unsigned int
-nfqueue_tg4_v1(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_NFQ_info_v1 *info = par->targinfo;
-	u32 queue = info->queuenum;
-
-	if (info->queues_total > 1)
-		queue = hash_v4(skb) % info->queues_total + queue;
-	return NF_QUEUE_NR(queue);
-}
-
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static u32 hash_v6(const struct sk_buff *skb)
 {
@@ -73,18 +62,24 @@ static u32 hash_v6(const struct sk_buff *skb)
 
 	return jhash2((__force u32 *)addr, ARRAY_SIZE(addr), jhash_initval);
 }
+#endif
 
 static unsigned int
-nfqueue_tg6_v1(struct sk_buff *skb, const struct xt_target_param *par)
+nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 queue = info->queuenum;
 
-	if (info->queues_total > 1)
-		queue = hash_v6(skb) % info->queues_total + queue;
+	if (info->queues_total > 1) {
+		if (par->target->family == NFPROTO_IPV4)
+			queue = hash_v4(skb) % info->queues_total + queue;
+#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
+		else if (par->target->family == NFPROTO_IPV6)
+			queue = hash_v6(skb) % info->queues_total + queue;
+#endif
+	}
 	return NF_QUEUE_NR(queue);
 }
-#endif
 
 static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 {
@@ -119,23 +114,12 @@ static struct xt_target nfqueue_tg_reg[] __read_mostly = {
 	{
 		.name		= "NFQUEUE",
 		.revision	= 1,
-		.family		= NFPROTO_IPV4,
-		.checkentry	= nfqueue_tg_v1_check,
-		.target		= nfqueue_tg4_v1,
-		.targetsize	= sizeof(struct xt_NFQ_info_v1),
-		.me		= THIS_MODULE,
-	},
-#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	{
-		.name		= "NFQUEUE",
-		.revision	= 1,
-		.family		= NFPROTO_IPV6,
+		.family		= NFPROTO_UNSPEC,
 		.checkentry	= nfqueue_tg_v1_check,
-		.target		= nfqueue_tg6_v1,
+		.target		= nfqueue_tg_v1,
 		.targetsize	= sizeof(struct xt_NFQ_info_v1),
 		.me		= THIS_MODULE,
 	},
-#endif
 };
 
 static int __init nfqueue_tg_init(void)
-- 
cgit v1.2.3-59-g8ed1b


From 44c5873199a90a45e9d64a86a1f54b1ccab21ec8 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 26 Feb 2010 14:14:22 +0100
Subject: netfilter: xtables: add comment markers to Xtables Kconfig

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/Kconfig | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 18d77b5c351a..abf4ce6d1834 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -316,6 +316,8 @@ if NETFILTER_XTABLES
 
 # alphabetically ordered list of targets
 
+comment "Xtables targets"
+
 config NETFILTER_XT_TARGET_CLASSIFY
 	tristate '"CLASSIFY" target support'
 	depends on NETFILTER_ADVANCED
@@ -552,6 +554,10 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
 	  This option adds a "TCPOPTSTRIP" target, which allows you to strip
 	  TCP options from TCP packets.
 
+# alphabetically ordered list of matches
+
+comment "Xtables matches"
+
 config NETFILTER_XT_MATCH_CLUSTER
 	tristate '"cluster" match support'
 	depends on NF_CONNTRACK
-- 
cgit v1.2.3-59-g8ed1b


From 28b949885f80efb87d7cebdcf879c99db12c37bd Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 28 Feb 2009 03:23:57 +0100
Subject: netfilter: xtables: merge xt_MARK into xt_mark

Two arguments for combining the two:
- xt_mark is pretty useless without xt_MARK
- the actual code is so small anyway that the kmod metadata and the module
  in its loaded state totally outweighs the combined actual code size.

i586-before:
-rw-r--r-- 1 jengelh users 3821 Feb 10 01:01 xt_MARK.ko
-rw-r--r-- 1 jengelh users 2592 Feb 10 00:04 xt_MARK.o
-rw-r--r-- 1 jengelh users 3274 Feb 10 01:01 xt_mark.ko
-rw-r--r-- 1 jengelh users 2108 Feb 10 00:05 xt_mark.o
   text    data     bss     dec     hex filename
    354     264       0     618     26a xt_MARK.o
    223     176       0     399     18f xt_mark.o
And the runtime size is like 14 KB.

i586-after:
-rw-r--r-- 1 jengelh users 3264 Feb 18 17:28 xt_mark.o

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/xt_MARK.h |  6 +----
 include/linux/netfilter/xt_mark.h |  4 +++
 net/netfilter/Kconfig             | 46 ++++++++++++++++++++------------
 net/netfilter/Makefile            |  5 ++--
 net/netfilter/xt_MARK.c           | 56 ---------------------------------------
 net/netfilter/xt_mark.c           | 35 ++++++++++++++++++++++--
 6 files changed, 70 insertions(+), 82 deletions(-)
 delete mode 100644 net/netfilter/xt_MARK.c

(limited to 'net')

diff --git a/include/linux/netfilter/xt_MARK.h b/include/linux/netfilter/xt_MARK.h
index bc9561bdef79..41c456deba22 100644
--- a/include/linux/netfilter/xt_MARK.h
+++ b/include/linux/netfilter/xt_MARK.h
@@ -1,10 +1,6 @@
 #ifndef _XT_MARK_H_target
 #define _XT_MARK_H_target
 
-#include <linux/types.h>
-
-struct xt_mark_tginfo2 {
-	__u32 mark, mask;
-};
+#include <linux/netfilter/xt_mark.h>
 
 #endif /*_XT_MARK_H_target */
diff --git a/include/linux/netfilter/xt_mark.h b/include/linux/netfilter/xt_mark.h
index 6607c8f38ea5..ecadc40d5cde 100644
--- a/include/linux/netfilter/xt_mark.h
+++ b/include/linux/netfilter/xt_mark.h
@@ -3,6 +3,10 @@
 
 #include <linux/types.h>
 
+struct xt_mark_tginfo2 {
+	__u32 mark, mask;
+};
+
 struct xt_mark_mtinfo1 {
 	__u32 mark, mask;
 	__u8 invert;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index abf4ce6d1834..236aa20ce5cc 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -314,6 +314,23 @@ config NETFILTER_XTABLES
 
 if NETFILTER_XTABLES
 
+comment "Xtables combined modules"
+
+config NETFILTER_XT_MARK
+	tristate 'nfmark target and match support'
+	default m if NETFILTER_ADVANCED=n
+	---help---
+	This option adds the "MARK" target and "mark" match.
+
+	Netfilter mark matching allows you to match packets based on the
+	"nfmark" value in the packet.
+	The target allows you to create rules in the "mangle" table which alter
+	the netfilter mark (nfmark) field associated with the packet.
+
+	Prior to routing, the nfmark can influence the routing method (see
+	"Use netfilter MARK value as routing key") and can also be used by
+	other subsystems to change their behavior.
+
 # alphabetically ordered list of targets
 
 comment "Xtables targets"
@@ -425,16 +442,12 @@ config NETFILTER_XT_TARGET_LED
 
 config NETFILTER_XT_TARGET_MARK
 	tristate '"MARK" target support'
-	default m if NETFILTER_ADVANCED=n
-	help
-	  This option adds a `MARK' target, which allows you to create rules
-	  in the `mangle' table which alter the netfilter mark (nfmark) field
-	  associated with the packet prior to routing. This can change
-	  the routing method (see `Use netfilter MARK value as routing
-	  key') and can also be used by other subsystems to change their
-	  behavior.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_MARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
 
 config NETFILTER_XT_TARGET_NFLOG
 	tristate '"NFLOG" target support'
@@ -739,13 +752,12 @@ config NETFILTER_XT_MATCH_MAC
 
 config NETFILTER_XT_MATCH_MARK
 	tristate '"mark" match support'
-	default m if NETFILTER_ADVANCED=n
-	help
-	  Netfilter mark matching allows you to match packets based on the
-	  `nfmark' value in the packet.  This can be set by the MARK target
-	  (see below).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_MARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
 
 config NETFILTER_XT_MATCH_MULTIPORT
 	tristate '"multiport" Multiple port match support'
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f873644f02f6..19775cc30fe5 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -40,6 +40,9 @@ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
+# combos
+obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
+
 # targets
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
@@ -48,7 +51,6 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
-obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
@@ -76,7 +78,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
-obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
deleted file mode 100644
index 225f8d11e173..000000000000
--- a/net/netfilter/xt_MARK.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *	xt_MARK - Netfilter module to modify the NFMARK field of an skb
- *
- *	(C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *	Jan Engelhardt <jengelh@computergmbh.de>
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_MARK.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("Xtables: packet mark modification");
-MODULE_ALIAS("ipt_MARK");
-MODULE_ALIAS("ip6t_MARK");
-
-static unsigned int
-mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_mark_tginfo2 *info = par->targinfo;
-
-	skb->mark = (skb->mark & ~info->mask) ^ info->mark;
-	return XT_CONTINUE;
-}
-
-static struct xt_target mark_tg_reg __read_mostly = {
-	.name           = "MARK",
-	.revision       = 2,
-	.family         = NFPROTO_UNSPEC,
-	.target         = mark_tg,
-	.targetsize     = sizeof(struct xt_mark_tginfo2),
-	.me             = THIS_MODULE,
-};
-
-static int __init mark_tg_init(void)
-{
-	return xt_register_target(&mark_tg_reg);
-}
-
-static void __exit mark_tg_exit(void)
-{
-	xt_unregister_target(&mark_tg_reg);
-}
-
-module_init(mark_tg_init);
-module_exit(mark_tg_exit);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 1db07d8125f8..035c468a0040 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -18,9 +18,20 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("Xtables: packet mark match");
+MODULE_DESCRIPTION("Xtables: packet mark operations");
 MODULE_ALIAS("ipt_mark");
 MODULE_ALIAS("ip6t_mark");
+MODULE_ALIAS("ipt_MARK");
+MODULE_ALIAS("ip6t_MARK");
+
+static unsigned int
+mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_mark_tginfo2 *info = par->targinfo;
+
+	skb->mark = (skb->mark & ~info->mask) ^ info->mark;
+	return XT_CONTINUE;
+}
 
 static bool
 mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
@@ -30,6 +41,15 @@ mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
+static struct xt_target mark_tg_reg __read_mostly = {
+	.name           = "MARK",
+	.revision       = 2,
+	.family         = NFPROTO_UNSPEC,
+	.target         = mark_tg,
+	.targetsize     = sizeof(struct xt_mark_tginfo2),
+	.me             = THIS_MODULE,
+};
+
 static struct xt_match mark_mt_reg __read_mostly = {
 	.name           = "mark",
 	.revision       = 1,
@@ -41,12 +61,23 @@ static struct xt_match mark_mt_reg __read_mostly = {
 
 static int __init mark_mt_init(void)
 {
-	return xt_register_match(&mark_mt_reg);
+	int ret;
+
+	ret = xt_register_target(&mark_tg_reg);
+	if (ret < 0)
+		return ret;
+	ret = xt_register_match(&mark_mt_reg);
+	if (ret < 0) {
+		xt_unregister_target(&mark_tg_reg);
+		return ret;
+	}
+	return 0;
 }
 
 static void __exit mark_mt_exit(void)
 {
 	xt_unregister_match(&mark_mt_reg);
+	xt_unregister_target(&mark_tg_reg);
 }
 
 module_init(mark_mt_init);
-- 
cgit v1.2.3-59-g8ed1b


From b8f00ba27e4acc4a2224360ce3aa8a22354434c5 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 26 Feb 2010 14:20:32 +0100
Subject: netfilter: xtables: merge xt_CONNMARK into xt_connmark

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/xt_CONNMARK.h |  22 +------
 include/linux/netfilter/xt_connmark.h |  11 ++++
 net/netfilter/Kconfig                 |  39 +++++++-----
 net/netfilter/Makefile                |   3 +-
 net/netfilter/xt_CONNMARK.c           | 113 ----------------------------------
 net/netfilter/xt_connmark.c           |  84 ++++++++++++++++++++++++-
 6 files changed, 116 insertions(+), 156 deletions(-)
 delete mode 100644 net/netfilter/xt_CONNMARK.c

(limited to 'net')

diff --git a/include/linux/netfilter/xt_CONNMARK.h b/include/linux/netfilter/xt_CONNMARK.h
index 0a8545866752..2f2e48ec8023 100644
--- a/include/linux/netfilter/xt_CONNMARK.h
+++ b/include/linux/netfilter/xt_CONNMARK.h
@@ -1,26 +1,6 @@
 #ifndef _XT_CONNMARK_H_target
 #define _XT_CONNMARK_H_target
 
-#include <linux/types.h>
-
-/* Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- * by Henrik Nordstrom <hno@marasystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-enum {
-	XT_CONNMARK_SET = 0,
-	XT_CONNMARK_SAVE,
-	XT_CONNMARK_RESTORE
-};
-
-struct xt_connmark_tginfo1 {
-	__u32 ctmark, ctmask, nfmask;
-	__u8 mode;
-};
+#include <linux/netfilter/xt_connmark.h>
 
 #endif /*_XT_CONNMARK_H_target*/
diff --git a/include/linux/netfilter/xt_connmark.h b/include/linux/netfilter/xt_connmark.h
index 619e47cde01a..efc17a8305fb 100644
--- a/include/linux/netfilter/xt_connmark.h
+++ b/include/linux/netfilter/xt_connmark.h
@@ -12,6 +12,17 @@
  * (at your option) any later version.
  */
 
+enum {
+	XT_CONNMARK_SET = 0,
+	XT_CONNMARK_SAVE,
+	XT_CONNMARK_RESTORE
+};
+
+struct xt_connmark_tginfo1 {
+	__u32 ctmark, ctmask, nfmask;
+	__u8 mode;
+};
+
 struct xt_connmark_mtinfo1 {
 	__u32 mark, mask;
 	__u8 invert;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 236aa20ce5cc..8550dfde7804 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -331,6 +331,18 @@ config NETFILTER_XT_MARK
 	"Use netfilter MARK value as routing key") and can also be used by
 	other subsystems to change their behavior.
 
+config NETFILTER_XT_CONNMARK
+	tristate 'ctmark target and match support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	select NF_CONNTRACK_MARK
+	---help---
+	This option adds the "CONNMARK" target and "connmark" match.
+
+	Netfilter allows you to store a mark value per connection (a.k.a.
+	ctmark), similarly to the packet mark (nfmark). Using this
+	target and match, you can set and match on this mark.
+
 # alphabetically ordered list of targets
 
 comment "Xtables targets"
@@ -351,15 +363,11 @@ config NETFILTER_XT_TARGET_CONNMARK
 	tristate  '"CONNMARK" target support'
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
-	select NF_CONNTRACK_MARK
-	help
-	  This option adds a `CONNMARK' target, which allows one to manipulate
-	  the connection mark value.  Similar to the MARK target, but
-	  affects the connection mark value rather than the packet mark value.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  The module will be called
-	  ipt_CONNMARK.  If unsure, say `N'.
+	select NETFILTER_XT_CONNMARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
 
 config NETFILTER_XT_TARGET_CONNSECMARK
 	tristate '"CONNSECMARK" target support'
@@ -621,14 +629,11 @@ config NETFILTER_XT_MATCH_CONNMARK
 	tristate  '"connmark" connection mark match support'
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
-	select NF_CONNTRACK_MARK
-	help
-	  This option adds a `connmark' match, which allows you to match the
-	  connection mark value previously set for the session by `CONNMARK'. 
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  The module will be called
-	  ipt_connmark.  If unsure, say `N'.
+	select NETFILTER_XT_CONNMARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
 
 config NETFILTER_XT_MATCH_CONNTRACK
 	tristate '"conntrack" connection tracking match support'
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 19775cc30fe5..cd31afe0692a 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -42,10 +42,10 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
 # combos
 obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
+obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 
 # targets
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
-obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
@@ -66,7 +66,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
-obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
deleted file mode 100644
index 593457068ae1..000000000000
--- a/net/netfilter/xt_CONNMARK.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *	xt_CONNMARK - Netfilter module to modify the connection mark values
- *
- *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- *	by Henrik Nordstrom <hno@marasystems.com>
- *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *	Jan Engelhardt <jengelh@computergmbh.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
-MODULE_DESCRIPTION("Xtables: connection mark modification");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ipt_CONNMARK");
-MODULE_ALIAS("ip6t_CONNMARK");
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_CONNMARK.h>
-#include <net/netfilter/nf_conntrack_ecache.h>
-
-static unsigned int
-connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_connmark_tginfo1 *info = par->targinfo;
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct;
-	u_int32_t newmark;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (ct == NULL)
-		return XT_CONTINUE;
-
-	switch (info->mode) {
-	case XT_CONNMARK_SET:
-		newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
-		if (ct->mark != newmark) {
-			ct->mark = newmark;
-			nf_conntrack_event_cache(IPCT_MARK, ct);
-		}
-		break;
-	case XT_CONNMARK_SAVE:
-		newmark = (ct->mark & ~info->ctmask) ^
-		          (skb->mark & info->nfmask);
-		if (ct->mark != newmark) {
-			ct->mark = newmark;
-			nf_conntrack_event_cache(IPCT_MARK, ct);
-		}
-		break;
-	case XT_CONNMARK_RESTORE:
-		newmark = (skb->mark & ~info->nfmask) ^
-		          (ct->mark & info->ctmask);
-		skb->mark = newmark;
-		break;
-	}
-
-	return XT_CONTINUE;
-}
-
-static bool connmark_tg_check(const struct xt_tgchk_param *par)
-{
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", par->family);
-		return false;
-	}
-	return true;
-}
-
-static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
-{
-	nf_ct_l3proto_module_put(par->family);
-}
-
-static struct xt_target connmark_tg_reg __read_mostly = {
-	.name           = "CONNMARK",
-	.revision       = 1,
-	.family         = NFPROTO_UNSPEC,
-	.checkentry     = connmark_tg_check,
-	.target         = connmark_tg,
-	.targetsize     = sizeof(struct xt_connmark_tginfo1),
-	.destroy        = connmark_tg_destroy,
-	.me             = THIS_MODULE,
-};
-
-static int __init connmark_tg_init(void)
-{
-	return xt_register_target(&connmark_tg_reg);
-}
-
-static void __exit connmark_tg_exit(void)
-{
-	xt_unregister_target(&connmark_tg_reg);
-}
-
-module_init(connmark_tg_init);
-module_exit(connmark_tg_exit);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 122aa8b0147b..d184515604b6 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -1,5 +1,5 @@
 /*
- *	xt_connmark - Netfilter module to match connection mark values
+ *	xt_connmark - Netfilter module to operate on connection marks
  *
  *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
  *	by Henrik Nordstrom <hno@marasystems.com>
@@ -24,15 +24,71 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_connmark.h>
 
 MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
-MODULE_DESCRIPTION("Xtables: connection mark match");
+MODULE_DESCRIPTION("Xtables: connection mark operations");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_CONNMARK");
+MODULE_ALIAS("ip6t_CONNMARK");
 MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
+static unsigned int
+connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_connmark_tginfo1 *info = par->targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	u_int32_t newmark;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return XT_CONTINUE;
+
+	switch (info->mode) {
+	case XT_CONNMARK_SET:
+		newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
+		if (ct->mark != newmark) {
+			ct->mark = newmark;
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		}
+		break;
+	case XT_CONNMARK_SAVE:
+		newmark = (ct->mark & ~info->ctmask) ^
+		          (skb->mark & info->nfmask);
+		if (ct->mark != newmark) {
+			ct->mark = newmark;
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		}
+		break;
+	case XT_CONNMARK_RESTORE:
+		newmark = (skb->mark & ~info->nfmask) ^
+		          (ct->mark & info->ctmask);
+		skb->mark = newmark;
+		break;
+	}
+
+	return XT_CONTINUE;
+}
+
+static bool connmark_tg_check(const struct xt_tgchk_param *par)
+{
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+		printk(KERN_WARNING "cannot load conntrack support for "
+		       "proto=%u\n", par->family);
+		return false;
+	}
+	return true;
+}
+
+static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_l3proto_module_put(par->family);
+}
+
 static bool
 connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -62,6 +118,17 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
 	nf_ct_l3proto_module_put(par->family);
 }
 
+static struct xt_target connmark_tg_reg __read_mostly = {
+	.name           = "CONNMARK",
+	.revision       = 1,
+	.family         = NFPROTO_UNSPEC,
+	.checkentry     = connmark_tg_check,
+	.target         = connmark_tg,
+	.targetsize     = sizeof(struct xt_connmark_tginfo1),
+	.destroy        = connmark_tg_destroy,
+	.me             = THIS_MODULE,
+};
+
 static struct xt_match connmark_mt_reg __read_mostly = {
 	.name           = "connmark",
 	.revision       = 1,
@@ -75,12 +142,23 @@ static struct xt_match connmark_mt_reg __read_mostly = {
 
 static int __init connmark_mt_init(void)
 {
-	return xt_register_match(&connmark_mt_reg);
+	int ret;
+
+	ret = xt_register_target(&connmark_tg_reg);
+	if (ret < 0)
+		return ret;
+	ret = xt_register_match(&connmark_mt_reg);
+	if (ret < 0) {
+		xt_unregister_target(&connmark_tg_reg);
+		return ret;
+	}
+	return 0;
 }
 
 static void __exit connmark_mt_exit(void)
 {
 	xt_unregister_match(&connmark_mt_reg);
+	xt_unregister_target(&connmark_tg_reg);
 }
 
 module_init(connmark_mt_init);
-- 
cgit v1.2.3-59-g8ed1b


From 408ffaa4a11ddd6f730be520479fd5cd890c57d3 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 28 Feb 2010 23:19:52 +0100
Subject: netfilter: update my email address

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/feature-removal-schedule.txt | 2 +-
 net/bridge/netfilter/ebt_ip6.c             | 2 +-
 net/netfilter/xt_TCPOPTSTRIP.c             | 3 +--
 net/netfilter/xt_connlimit.c               | 1 -
 net/netfilter/xt_connmark.c                | 2 +-
 net/netfilter/xt_hashlimit.c               | 2 +-
 net/netfilter/xt_recent.c                  | 2 +-
 net/netfilter/xt_time.c                    | 3 +--
 net/netfilter/xt_u32.c                     | 3 +--
 9 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 8843fef0fced..fa327f673a32 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -247,7 +247,7 @@ What (Why):
 
 When:	January 2009 or Linux 2.7.0, whichever comes first
 Why:	Superseded by newer revisions or modules
-Who:	Jan Engelhardt <jengelh@computergmbh.de>
+Who:	Jan Engelhardt <jengelh@medozas.de>
 
 ---------------------------
 
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index bbf2534ef026..4cb60f1ae7ad 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -4,7 +4,7 @@
  *	Authors:
  *	Manohar Castelino <manohar.r.castelino@intel.com>
  *	Kuo-Lang Tseng <kuo-lang.tseng@intel.com>
- *	Jan Engelhardt <jengelh@computergmbh.de>
+ *	Jan Engelhardt <jengelh@medozas.de>
  *
  * Summary:
  * This is just a modification of the IPv4 code written by
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 9dd8c8ef63eb..e8b57609ddc0 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -3,7 +3,6 @@
  *
  * Copyright (C) 2007 Sven Schnelle <svens@bitebene.org>
  * Copyright © CC Computer Consultants GmbH, 2007
- * Contact: Jan Engelhardt <jengelh@computergmbh.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -136,7 +135,7 @@ static void __exit tcpoptstrip_tg_exit(void)
 
 module_init(tcpoptstrip_tg_init);
 module_exit(tcpoptstrip_tg_exit);
-MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Sven Schnelle <svens@bitebene.org>, Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: TCP option stripping");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_TCPOPTSTRIP");
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 26997ce90e48..9e624af40f80 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -5,7 +5,6 @@
  *   Nov 2002: Martin Bene <martin.bene@icomedias.com>:
  *		only ignore TIME_WAIT or gone connections
  *   (C) CC Computer Consultants GmbH, 2007
- *   Contact: <jengelh@computergmbh.de>
  *
  * based on ...
  *
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index d184515604b6..97465a472344 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -4,7 +4,7 @@
  *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
  *	by Henrik Nordstrom <hno@marasystems.com>
  *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *	Jan Engelhardt <jengelh@computergmbh.de>
+ *	Jan Engelhardt <jengelh@medozas.de>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 9e9c48963942..ba9601a767aa 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -36,7 +36,7 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: per hash-bucket rate-limit match");
 MODULE_ALIAS("ipt_hashlimit");
 MODULE_ALIAS("ip6t_hashlimit");
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 7073dbb8100c..1af74dd563d5 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -34,7 +34,7 @@
 #include <linux/netfilter/xt_recent.h>
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_recent");
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 93acaa59d108..9a9c9a3b0a5d 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -1,7 +1,6 @@
 /*
  *	xt_time
  *	Copyright © CC Computer Consultants GmbH, 2007
- *	Contact: <jengelh@computergmbh.de>
  *
  *	based on ipt_time by Fabrice MARIE <fabrice@netfilter.org>
  *	This is a module which is used for time matching
@@ -264,7 +263,7 @@ static void __exit time_mt_exit(void)
 
 module_init(time_mt_init);
 module_exit(time_mt_exit);
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: time-based matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_time");
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index 24a527624500..d7c05f03a7e7 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -3,7 +3,6 @@
  *
  *	Original author: Don Cohen <don@isis.cs3-inc.com>
  *	(C) CC Computer Consultants GmbH, 2007
- *	Contact: <jengelh@computergmbh.de>
  */
 
 #include <linux/module.h>
@@ -117,7 +116,7 @@ static void __exit u32_mt_exit(void)
 
 module_init(u32_mt_init);
 module_exit(u32_mt_exit);
-MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
 MODULE_DESCRIPTION("Xtables: arbitrary byte matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_u32");
-- 
cgit v1.2.3-59-g8ed1b


From 8244f4baf5e1793a75a1d4409efc3e2d9d71c9cb Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 28 Feb 2010 23:22:04 +0100
Subject: netfilter: ebt_ip6: add principal maintainer in a MODULE_AUTHOR tag

Cc: Kuo-Lang Tseng <kuo-lang.tseng@intel.com>
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_ip6.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 4cb60f1ae7ad..05d0d0c4ccb9 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -139,4 +139,5 @@ static void __exit ebt_ip6_fini(void)
 module_init(ebt_ip6_init);
 module_exit(ebt_ip6_fini);
 MODULE_DESCRIPTION("Ebtables: IPv6 protocol packet match");
+MODULE_AUTHOR("Kuo-Lang Tseng <kuo-lang.tseng@intel.com>");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-59-g8ed1b


From 06bf514e3199150c715ab72e8e0e335ae8f99666 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 28 Feb 2010 23:22:35 +0100
Subject: netfilter: xt_recent: update description

It had IPv6 for quite a while already :-)

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_recent.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 1af74dd563d5..bcabfbc1cd05 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -35,7 +35,7 @@
 
 MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
 MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
-MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4");
+MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_recent");
 MODULE_ALIAS("ip6t_recent");
-- 
cgit v1.2.3-59-g8ed1b


From 5be4a4f589841ef06fd79b241de3d5353a6c5efa Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 1 Mar 2010 11:55:33 +0100
Subject: netfilter: xt_recent: remove old proc directory

The compat option was introduced in October 2008.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/feature-removal-schedule.txt |  10 ---
 net/netfilter/Kconfig                      |   7 --
 net/netfilter/xt_recent.c                  | 105 -----------------------------
 3 files changed, 122 deletions(-)

(limited to 'net')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index fa327f673a32..ea227e3f528b 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -241,16 +241,6 @@ Who:	Thomas Gleixner <tglx@linutronix.de>
 
 ---------------------------
 
-What (Why):
-	- xt_recent: the old ipt_recent proc dir
-	  (superseded by /proc/net/xt_recent)
-
-When:	January 2009 or Linux 2.7.0, whichever comes first
-Why:	Superseded by newer revisions or modules
-Who:	Jan Engelhardt <jengelh@medozas.de>
-
----------------------------
-
 What:	GPIO autorequest on gpio_direction_{input,output}() in gpiolib
 When:	February 2010
 Why:	All callers should use explicit gpio_request()/gpio_free().
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8550dfde7804..6ac28ef9300c 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -859,13 +859,6 @@ config NETFILTER_XT_MATCH_RECENT
 	Short options are available by using 'iptables -m recent -h'
 	Official Website: <http://snowman.net/projects/ipt_recent/>
 
-config NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	bool 'Enable obsolete /proc/net/ipt_recent'
-	depends on NETFILTER_XT_MATCH_RECENT && PROC_FS
-	---help---
-	This option enables the old /proc/net/ipt_recent interface,
-	which has been obsoleted by /proc/net/xt_recent.
-
 config NETFILTER_XT_MATCH_SCTP
 	tristate  '"sctp" protocol match support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index bcabfbc1cd05..2ff8aae84a38 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -83,9 +83,6 @@ struct recent_net {
 	struct list_head	tables;
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry	*xt_recent;
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	struct proc_dir_entry	*ipt_recent;
-#endif
 #endif
 };
 
@@ -342,17 +339,6 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	}
 	pde->uid = ip_list_uid;
 	pde->gid = ip_list_gid;
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	pde = proc_create_data(t->name, ip_list_perms, recent_net->ipt_recent,
-		      &recent_old_fops, t);
-	if (pde == NULL) {
-		remove_proc_entry(t->name, recent_net->xt_recent);
-		kfree(t);
-		goto out;
-	}
-	pde->uid = ip_list_uid;
-	pde->gid = ip_list_gid;
-#endif
 #endif
 	spin_lock_bh(&recent_lock);
 	list_add_tail(&t->list, &recent_net->tables);
@@ -376,9 +362,6 @@ static void recent_mt_destroy(const struct xt_mtdtor_param *par)
 		list_del(&t->list);
 		spin_unlock_bh(&recent_lock);
 #ifdef CONFIG_PROC_FS
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-		remove_proc_entry(t->name, recent_net->ipt_recent);
-#endif
 		remove_proc_entry(t->name, recent_net->xt_recent);
 #endif
 		recent_table_flush(t);
@@ -470,84 +453,6 @@ static int recent_seq_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-static int recent_old_seq_open(struct inode *inode, struct file *filp)
-{
-	static bool warned_of_old;
-
-	if (unlikely(!warned_of_old)) {
-		printk(KERN_INFO KBUILD_MODNAME ": Use of /proc/net/ipt_recent"
-		       " is deprecated; use /proc/net/xt_recent.\n");
-		warned_of_old = true;
-	}
-	return recent_seq_open(inode, filp);
-}
-
-static ssize_t recent_old_proc_write(struct file *file,
-				     const char __user *input,
-				     size_t size, loff_t *loff)
-{
-	const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
-	struct recent_table *t = pde->data;
-	struct recent_entry *e;
-	char buf[sizeof("+255.255.255.255")], *c = buf;
-	union nf_inet_addr addr = {};
-	int add;
-
-	if (size > sizeof(buf))
-		size = sizeof(buf);
-	if (copy_from_user(buf, input, size))
-		return -EFAULT;
-
-	c = skip_spaces(c);
-
-	if (size - (c - buf) < 5)
-		return c - buf;
-	if (!strncmp(c, "clear", 5)) {
-		c += 5;
-		spin_lock_bh(&recent_lock);
-		recent_table_flush(t);
-		spin_unlock_bh(&recent_lock);
-		return c - buf;
-	}
-
-	switch (*c) {
-	case '-':
-		add = 0;
-		c++;
-		break;
-	case '+':
-		c++;
-	default:
-		add = 1;
-		break;
-	}
-	addr.ip = in_aton(c);
-
-	spin_lock_bh(&recent_lock);
-	e = recent_entry_lookup(t, &addr, NFPROTO_IPV4, 0);
-	if (e == NULL) {
-		if (add)
-			recent_entry_init(t, &addr, NFPROTO_IPV4, 0);
-	} else {
-		if (add)
-			recent_entry_update(t, e);
-		else
-			recent_entry_remove(t, e);
-	}
-	spin_unlock_bh(&recent_lock);
-	return size;
-}
-
-static const struct file_operations recent_old_fops = {
-	.open		= recent_old_seq_open,
-	.read		= seq_read,
-	.write		= recent_old_proc_write,
-	.release	= seq_release_private,
-	.owner		= THIS_MODULE,
-};
-#endif
-
 static ssize_t
 recent_mt_proc_write(struct file *file, const char __user *input,
 		     size_t size, loff_t *loff)
@@ -636,21 +541,11 @@ static int __net_init recent_proc_net_init(struct net *net)
 	recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net);
 	if (!recent_net->xt_recent)
 		return -ENOMEM;
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	recent_net->ipt_recent = proc_mkdir("ipt_recent", net->proc_net);
-	if (!recent_net->ipt_recent) {
-		proc_net_remove(net, "xt_recent");
-		return -ENOMEM;
-	}
-#endif
 	return 0;
 }
 
 static void __net_exit recent_proc_net_exit(struct net *net)
 {
-#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT
-	proc_net_remove(net, "ipt_recent");
-#endif
 	proc_net_remove(net, "xt_recent");
 }
 #else
-- 
cgit v1.2.3-59-g8ed1b


From 0079c5aee34880bcee7feee9960f0502c73dc5fa Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 16 Mar 2010 19:53:13 +0100
Subject: netfilter: xt_recent: add an entry reaper

One of the problems with the way xt_recent is implemented is that
there is no efficient way to remove expired entries. Of course,
one can write a rule '-m recent --remove', but you have to know
beforehand which entry to delete. This commit adds reaper
logic which checks the head of the LRU list when a rule
is invoked that has a '--seconds' value and XT_RECENT_REAP set. If an
entry ceases to accumulate time stamps, then it will eventually bubble
to the top of the LRU list where it is then reaped.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_recent.h |  4 ++++
 net/netfilter/xt_recent.c           | 28 +++++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h
index d2c276609925..bba990ecb018 100644
--- a/include/linux/netfilter/xt_recent.h
+++ b/include/linux/netfilter/xt_recent.h
@@ -9,6 +9,7 @@ enum {
 	XT_RECENT_UPDATE   = 1 << 2,
 	XT_RECENT_REMOVE   = 1 << 3,
 	XT_RECENT_TTL      = 1 << 4,
+	XT_RECENT_REAP     = 1 << 5,
 
 	XT_RECENT_SOURCE   = 0,
 	XT_RECENT_DEST     = 1,
@@ -16,6 +17,9 @@ enum {
 	XT_RECENT_NAME_LEN = 200,
 };
 
+/* Only allowed with --rcheck and --update */
+#define XT_RECENT_MODIFIERS (XT_RECENT_TTL|XT_RECENT_REAP)
+
 struct xt_recent_mtinfo {
 	__u32 seconds;
 	__u32 hit_count;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 2ff8aae84a38..b65eca9e13a3 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -143,6 +143,25 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
 	t->entries--;
 }
 
+/*
+ * Drop entries with timestamps older then 'time'.
+ */
+static void recent_entry_reap(struct recent_table *t, unsigned long time)
+{
+	struct recent_entry *e;
+
+	/*
+	 * The head of the LRU list is always the oldest entry.
+	 */
+	e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
+
+	/*
+	 * The last time stamp is the most recent.
+	 */
+	if (time_after(time, e->stamps[e->index-1]))
+		recent_entry_remove(t, e);
+}
+
 static struct recent_entry *
 recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
 		  u_int16_t family, u_int8_t ttl)
@@ -269,6 +288,10 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 				break;
 			}
 		}
+
+		/* info->seconds must be non-zero */
+		if (info->check_set & XT_RECENT_REAP)
+			recent_entry_reap(t, time);
 	}
 
 	if (info->check_set & XT_RECENT_SET ||
@@ -301,7 +324,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
 		return false;
 	if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
-	    (info->seconds || info->hit_count))
+	    (info->seconds || info->hit_count ||
+	    (info->check_set & XT_RECENT_MODIFIERS)))
+		return false;
+	if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
 		return false;
 	if (info->hit_count > ip_pkt_list_tot) {
 		pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than "
-- 
cgit v1.2.3-59-g8ed1b


From 606a9a02633c02d0e09fc96706f041053dbc57ee Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Wed, 17 Mar 2010 16:18:56 +0100
Subject: netfilter: xt_recent: check for unsupported user space flags

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_recent.h | 3 +++
 net/netfilter/xt_recent.c           | 5 +++++
 2 files changed, 8 insertions(+)

(limited to 'net')

diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h
index bba990ecb018..83318e01425e 100644
--- a/include/linux/netfilter/xt_recent.h
+++ b/include/linux/netfilter/xt_recent.h
@@ -20,6 +20,9 @@ enum {
 /* Only allowed with --rcheck and --update */
 #define XT_RECENT_MODIFIERS (XT_RECENT_TTL|XT_RECENT_REAP)
 
+#define XT_RECENT_VALID_FLAGS (XT_RECENT_CHECK|XT_RECENT_SET|XT_RECENT_UPDATE|\
+			       XT_RECENT_REMOVE|XT_RECENT_TTL|XT_RECENT_REAP)
+
 struct xt_recent_mtinfo {
 	__u32 seconds;
 	__u32 hit_count;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index b65eca9e13a3..d2e7c80cd3c3 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -319,6 +319,11 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
 		hash_rnd_inited = true;
 	}
+	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
+		pr_info(KBUILD_MODNAME ": Unsupported user space flags "
+			"(%08x)\n", info->check_set);
+		return false;
+	}
 	if (hweight8(info->check_set &
 		     (XT_RECENT_SET | XT_RECENT_REMOVE |
 		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
-- 
cgit v1.2.3-59-g8ed1b


From c01ae818a3e3578747861d9a0c7fabe9641786c1 Mon Sep 17 00:00:00 2001
From: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Date: Wed, 17 Mar 2010 16:28:25 +0100
Subject: netfilter: remove unused headers in net/netfilter/nfnetlink.c

Remove unused headers in net/netfilter/nfnetlink.c

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 8eb0cc23ada3..0794f9a106ee 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -18,12 +18,9 @@
 #include <linux/types.h>
 #include <linux/socket.h>
 #include <linux/kernel.h>
-#include <linux/major.h>
-#include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
-#include <linux/fcntl.h>
 #include <linux/skbuff.h>
 #include <asm/uaccess.h>
 #include <asm/system.h>
-- 
cgit v1.2.3-59-g8ed1b


From 7d5f7ed802507999c22949b84686d03987fe57ec Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 9 Mar 2010 23:27:24 +0100
Subject: netfilter: xtables: do without explicit XT_ALIGN

XT_ALIGN is already applied on matchsize/targetsize in x_tables.c,
so it is not strictly needed in the extensions.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_CT.c  | 2 +-
 net/netfilter/xt_LED.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index fda603edb31a..6509e03f1e62 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -137,7 +137,7 @@ static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
 static struct xt_target xt_ct_tg __read_mostly = {
 	.name		= "CT",
 	.family		= NFPROTO_UNSPEC,
-	.targetsize	= XT_ALIGN(sizeof(struct xt_ct_target_info)),
+	.targetsize	= sizeof(struct xt_ct_target_info),
 	.checkentry	= xt_ct_tg_check,
 	.destroy	= xt_ct_tg_destroy,
 	.target		= xt_ct_target,
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 8ff7843bb921..f86dc52367b1 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -141,7 +141,7 @@ static struct xt_target led_tg_reg __read_mostly = {
 	.revision	= 0,
 	.family		= NFPROTO_UNSPEC,
 	.target		= led_tg,
-	.targetsize	= XT_ALIGN(sizeof(struct xt_led_info)),
+	.targetsize	= sizeof(struct xt_led_info),
 	.checkentry	= led_tg_check,
 	.destroy	= led_tg_destroy,
 	.me		= THIS_MODULE,
-- 
cgit v1.2.3-59-g8ed1b


From 1d1c397db95f1c0cd95f6fa633c1e68acfaacec3 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 16 Mar 2010 21:09:04 +0100
Subject: netfilter: xtables: clean up xt_mac match routine

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_mac.c | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index c2007116ce5b..2039d0741b37 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -26,14 +26,16 @@ MODULE_ALIAS("ip6t_mac");
 
 static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
-    const struct xt_mac_info *info = par->matchinfo;
-
-    /* Is mac pointer valid? */
-    return skb_mac_header(skb) >= skb->head &&
-	   skb_mac_header(skb) + ETH_HLEN <= skb->data
-	   /* If so, compare... */
-	   && ((!compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr))
-		^ info->invert);
+	const struct xt_mac_info *info = par->matchinfo;
+	bool ret;
+
+	if (skb_mac_header(skb) < skb->head)
+		return false;
+	if (skb_mac_header(skb) + ETH_HLEN > skb->data)
+		return false;
+	ret  = compare_ether_addr(eth_hdr(skb)->h_source, info->srcaddr) == 0;
+	ret ^= info->invert;
+	return ret;
 }
 
 static struct xt_match mac_mt_reg __read_mostly = {
-- 
cgit v1.2.3-59-g8ed1b


From e5042a290097b7c2fc9b6a247585b6ed07b13026 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 16 Mar 2010 21:44:44 +0100
Subject: netfilter: xtables: limit xt_mac to ethernet devices

I do not see a point of allowing the MAC module to work with devices
that don't possibly have one, e.g. various tunnel interfaces such as
tun and sit.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_mac.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 2039d0741b37..b971ce93773e 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -10,6 +10,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/etherdevice.h>
 
@@ -29,6 +30,8 @@ static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	const struct xt_mac_info *info = par->matchinfo;
 	bool ret;
 
+	if (skb->dev == NULL || skb->dev->type != ARPHRD_ETHER)
+		return false;
 	if (skb_mac_header(skb) < skb->head)
 		return false;
 	if (skb_mac_header(skb) + ETH_HLEN > skb->data)
-- 
cgit v1.2.3-59-g8ed1b


From 115bc8f2874078e6ac78d88652a91b58447d5f4d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 16 Mar 2010 20:06:55 +0100
Subject: netfilter: xtables: resort osf kconfig text

Restore alphabetical ordering of the list and put the xt_osf option
into its 'right' place again.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/Kconfig | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 6ac28ef9300c..8055786b7702 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -774,6 +774,19 @@ config NETFILTER_XT_MATCH_MULTIPORT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_OSF
+	tristate '"osf" Passive OS fingerprint match'
+	depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
+	help
+	  This option selects the Passive OS Fingerprinting match module
+	  that allows to passively match the remote operating system by
+	  analyzing incoming TCP SYN packets.
+
+	  Rules and loading software can be downloaded from
+	  http://www.ioremap.net/projects/osf
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_OWNER
 	tristate '"owner" match support'
 	depends on NETFILTER_ADVANCED
@@ -958,19 +971,6 @@ config NETFILTER_XT_MATCH_U32
 
 	  Details and examples are in the kernel module source.
 
-config NETFILTER_XT_MATCH_OSF
-	tristate '"osf" Passive OS fingerprint match'
-	depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
-	help
-	  This option selects the Passive OS Fingerprinting match module
-	  that allows to passively match the remote operating system by
-	  analyzing incoming TCP SYN packets.
-
-	  Rules and loading software can be downloaded from
-	  http://www.ioremap.net/projects/osf
-
-	  To compile it as a module, choose M here.  If unsure, say N.
-
 endif # NETFILTER_XTABLES
 
 endmenu
-- 
cgit v1.2.3-59-g8ed1b


From aa5fa3185791aac71c9172d4fda3e8729164b5d1 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 00:44:52 +0100
Subject: netfilter: xtables: make use of caller family rather than match
 family

The matches can have .family = NFPROTO_UNSPEC, and though that is not
the case for the touched modules, it seems better to just use the
nfproto from the caller.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_dscp.c      |  2 +-
 net/netfilter/xt_hashlimit.c | 10 +++++-----
 net/netfilter/xt_policy.c    |  4 ++--
 net/netfilter/xt_recent.c    |  6 +++---
 net/netfilter/xt_state.c     |  6 +++---
 5 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 0280d3a8c161..fe58cd01ef99 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -58,7 +58,7 @@ static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
 
-	if (par->match->family == NFPROTO_IPV4)
+	if (par->family == NFPROTO_IPV4)
 		return ((ip_hdr(skb)->tos & info->tos_mask) ==
 		       info->tos_value) ^ !!info->invert;
 	else
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index ba9601a767aa..1fdb50a90f1c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -703,8 +703,8 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 		return false;
 
 	mutex_lock(&hashlimit_mutex);
-	r->hinfo = htable_find_get(net, r->name, par->match->family);
-	if (!r->hinfo && htable_create_v0(net, r, par->match->family) != 0) {
+	r->hinfo = htable_find_get(net, r->name, par->family);
+	if (!r->hinfo && htable_create_v0(net, r, par->family) != 0) {
 		mutex_unlock(&hashlimit_mutex);
 		return false;
 	}
@@ -730,7 +730,7 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 		return false;
 	if (info->name[sizeof(info->name)-1] != '\0')
 		return false;
-	if (par->match->family == NFPROTO_IPV4) {
+	if (par->family == NFPROTO_IPV4) {
 		if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
 			return false;
 	} else {
@@ -739,8 +739,8 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 	}
 
 	mutex_lock(&hashlimit_mutex);
-	info->hinfo = htable_find_get(net, info->name, par->match->family);
-	if (!info->hinfo && htable_create(net, info, par->match->family) != 0) {
+	info->hinfo = htable_find_get(net, info->name, par->family);
+	if (!info->hinfo && htable_create(net, info, par->family) != 0) {
 		mutex_unlock(&hashlimit_mutex);
 		return false;
 	}
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 4cbfebda8fa1..cc033d2c35ea 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -116,9 +116,9 @@ policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	int ret;
 
 	if (info->flags & XT_POLICY_MATCH_IN)
-		ret = match_policy_in(skb, info, par->match->family);
+		ret = match_policy_in(skb, info, par->family);
 	else
-		ret = match_policy_out(skb, info, par->match->family);
+		ret = match_policy_out(skb, info, par->family);
 
 	if (ret < 0)
 		ret = info->flags & XT_POLICY_MATCH_NONE ? true : false;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index d2e7c80cd3c3..a0ea1a21c470 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -233,7 +233,7 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	u_int8_t ttl;
 	bool ret = info->invert;
 
-	if (par->match->family == NFPROTO_IPV4) {
+	if (par->family == NFPROTO_IPV4) {
 		const struct iphdr *iph = ip_hdr(skb);
 
 		if (info->side == XT_RECENT_DEST)
@@ -259,12 +259,12 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 	spin_lock_bh(&recent_lock);
 	t = recent_table_lookup(recent_net, info->name);
-	e = recent_entry_lookup(t, &addr, par->match->family,
+	e = recent_entry_lookup(t, &addr, par->family,
 				(info->check_set & XT_RECENT_TTL) ? ttl : 0);
 	if (e == NULL) {
 		if (!(info->check_set & XT_RECENT_SET))
 			goto out;
-		e = recent_entry_init(t, &addr, par->match->family, ttl);
+		e = recent_entry_init(t, &addr, par->family, ttl);
 		if (e == NULL)
 			*par->hotdrop = true;
 		ret = !ret;
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 4c946cbd731f..ef09b2e2970f 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -39,9 +39,9 @@ state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static bool state_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->match->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->match->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 	return true;
@@ -49,7 +49,7 @@ static bool state_mt_check(const struct xt_mtchk_param *par)
 
 static void state_mt_destroy(const struct xt_mtdtor_param *par)
 {
-	nf_ct_l3proto_module_put(par->match->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
 static struct xt_match state_mt_reg[] __read_mostly = {
-- 
cgit v1.2.3-59-g8ed1b


From 4f948db1915ff05e4ce0fd98e6323db6a3ec0fc0 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 11:03:51 +0100
Subject: netfilter: xtables: remove almost-unused xt_match_param.data member

This member is taking up a "long" per match, yet is only used by one
module out of the roughly 90 modules, ip6t_hbh. ip6t_hbh can be
restructured a little to accomodate for the lack of the .data member.
This variant uses checking the par->match address, which should avoid
having to add two extra functions, including calls, i.e.

(hbh_mt6: call hbhdst_mt6(skb, par, NEXTHDR_OPT),
dst_mt6: call hbhdst_mt6(skb, par, NEXTHDR_DEST))

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h | 3 ---
 net/ipv6/netfilter/ip6t_hbh.c      | 9 ++++++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c68ff82366b6..cf91473624e1 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -315,9 +315,6 @@ struct xt_match {
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
-	/* Free to use by each match */
-	unsigned long data;
-
 	const char *table;
 	unsigned int matchsize;
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index cbe8dec9744b..82593c8bdc3e 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -41,6 +41,8 @@ MODULE_ALIAS("ip6t_dst");
  *	5	-> RTALERT 2 x x
  */
 
+static struct xt_match hbh_mt6_reg[] __read_mostly;
+
 static bool
 hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -58,7 +60,9 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	unsigned int optlen;
 	int err;
 
-	err = ipv6_find_hdr(skb, &ptr, par->match->data, NULL);
+	err = ipv6_find_hdr(skb, &ptr,
+			    (par->match == &hbh_mt6_reg[0]) ?
+			    NEXTHDR_HOP : NEXTHDR_DEST, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
 			*par->hotdrop = true;
@@ -179,13 +183,13 @@ static bool hbh_mt6_check(const struct xt_mtchk_param *par)
 
 static struct xt_match hbh_mt6_reg[] __read_mostly = {
 	{
+		/* Note, hbh_mt6 relies on the order of hbh_mt6_reg */
 		.name		= "hbh",
 		.family		= NFPROTO_IPV6,
 		.match		= hbh_mt6,
 		.matchsize	= sizeof(struct ip6t_opts),
 		.checkentry	= hbh_mt6_check,
 		.me		= THIS_MODULE,
-		.data		= NEXTHDR_HOP,
 	},
 	{
 		.name		= "dst",
@@ -194,7 +198,6 @@ static struct xt_match hbh_mt6_reg[] __read_mostly = {
 		.matchsize	= sizeof(struct ip6t_opts),
 		.checkentry	= hbh_mt6_check,
 		.me		= THIS_MODULE,
-		.data		= NEXTHDR_DEST,
 	},
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 85bc3f38147c5d3fb1eb9ca2236536389b592cae Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 00:27:03 +0100
Subject: netfilter: xtables: do not print any messages on ENOMEM

ENOMEM is a very obvious error code (cf. EINVAL), so I think we do not
really need a warning message. Not to mention that if the allocation
fails, the user is most likely going to get a stack trace from slab
already.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_ulog.c | 7 ++-----
 net/netfilter/xt_LED.c          | 4 +---
 net/netfilter/xt_hashlimit.c    | 8 ++------
 net/netfilter/xt_statistic.c    | 4 +---
 4 files changed, 6 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index c6ac657074a6..84340ab30ed3 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -305,13 +305,10 @@ static int __init ebt_ulog_init(void)
 	ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG,
 					  EBT_ULOG_MAXNLGROUPS, NULL, NULL,
 					  THIS_MODULE);
-	if (!ebtulognl) {
-		printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to "
-		       "call netlink_kernel_create\n");
+	if (!ebtulognl)
 		ret = -ENOMEM;
-	} else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) {
+	else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0)
 		netlink_kernel_release(ebtulognl);
-	}
 
 	if (ret == 0)
 		nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger);
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index f86dc52367b1..0d6c2885ebd6 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -92,10 +92,8 @@ static bool led_tg_check(const struct xt_tgchk_param *par)
 	}
 
 	ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
-	if (!ledinternal) {
-		printk(KERN_CRIT KBUILD_MODNAME ": out of memory\n");
+	if (!ledinternal)
 		return false;
-	}
 
 	ledinternal->netfilter_led_trigger.name = ledinfo->id;
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 1fdb50a90f1c..52327c5c1f1d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -215,10 +215,8 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 	/* FIXME: don't use vmalloc() here or anywhere else -HW */
 	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
 			sizeof(struct list_head) * size);
-	if (!hinfo) {
-		printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
+	if (!hinfo)
 		return -1;
-	}
 	minfo->hinfo = hinfo;
 
 	/* copy match config into hashtable config */
@@ -288,10 +286,8 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	/* FIXME: don't use vmalloc() here or anywhere else -HW */
 	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
 	                sizeof(struct list_head) * size);
-	if (hinfo == NULL) {
-		printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
+	if (hinfo == NULL)
 		return -1;
-	}
 	minfo->hinfo = hinfo;
 
 	/* copy match config into hashtable config */
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index d8c0f8f1a78e..51ac1bbb4f52 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -61,10 +61,8 @@ static bool statistic_mt_check(const struct xt_mtchk_param *par)
 		return false;
 
 	info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
-	if (info->master == NULL) {
-		printk(KERN_ERR KBUILD_MODNAME ": Out of memory\n");
+	if (info->master == NULL)
 		return false;
-	}
 	info->master->count = info->u.nth.count;
 
 	return true;
-- 
cgit v1.2.3-59-g8ed1b


From be91fd5e323b46450ca82f6828e933e3791fb2f2 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 02:22:32 +0100
Subject: netfilter: xtables: replace custom duprintf with pr_debug

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ipt_ah.c  | 18 ++++++------------
 net/ipv6/netfilter/ip6t_mh.c | 11 +++--------
 net/netfilter/x_tables.c     | 12 +++---------
 net/netfilter/xt_esp.c       | 18 ++++++------------
 net/netfilter/xt_multiport.c | 16 +++++-----------
 net/netfilter/xt_sctp.c      | 29 +++++++++++++----------------
 net/netfilter/xt_tcpudp.c    | 16 +++++-----------
 7 files changed, 41 insertions(+), 79 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 0104c0b399de..4f27e170c630 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -5,7 +5,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -18,21 +18,15 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
 MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match");
 
-#ifdef DEBUG_CONNTRACK
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
 static inline bool
 spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
 	bool r;
-	duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
-		min,spi,max);
+	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+		 invert ? '!' : ' ', min, spi, max);
 	r=(spi >= min && spi <= max) ^ invert;
-	duprintf(" result %s\n",r? "PASS" : "FAILED");
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
 	return r;
 }
 
@@ -51,7 +45,7 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
-		duprintf("Dropping evil AH tinygram.\n");
+		pr_debug("Dropping evil AH tinygram.\n");
 		*par->hotdrop = true;
 		return 0;
 	}
@@ -67,7 +61,7 @@ static bool ah_mt_check(const struct xt_mtchk_param *par)
 
 	/* Must specify no unknown invflags */
 	if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
-		duprintf("ipt_ah: unknown flags %X\n", ahinfo->invflags);
+		pr_debug("unknown flags %X\n", ahinfo->invflags);
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index aafe4e66577b..0181eb81d24b 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -11,6 +11,7 @@
  * Based on net/netfilter/xt_tcpudp.c
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/module.h>
 #include <net/ip.h>
@@ -24,12 +25,6 @@
 MODULE_DESCRIPTION("Xtables: IPv6 Mobility Header match");
 MODULE_LICENSE("GPL");
 
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 /* Returns 1 if the type is matched by the range, 0 otherwise */
 static inline bool
 type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
@@ -51,13 +46,13 @@ static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (mh == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil MH tinygram.\n");
+		pr_debug("Dropping evil MH tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
-		duprintf("Dropping invalid MH Payload Proto: %u\n",
+		pr_debug("Dropping invalid MH Payload Proto: %u\n",
 			 mh->ip6mh_proto);
 		*par->hotdrop = true;
 		return false;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 0a12cedfe9e3..9a248d4a877f 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -12,7 +12,7 @@
  * published by the Free Software Foundation.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
 #include <linux/socket.h>
 #include <linux/net.h>
@@ -54,12 +54,6 @@ struct xt_af {
 
 static struct xt_af *xt;
 
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
 	[NFPROTO_UNSPEC] = "x",
 	[NFPROTO_IPV4]   = "ip",
@@ -720,7 +714,7 @@ xt_replace_table(struct xt_table *table,
 
 	/* Check inside lock: is the old number correct? */
 	if (num_counters != private->number) {
-		duprintf("num_counters != table->private->number (%u/%u)\n",
+		pr_debug("num_counters != table->private->number (%u/%u)\n",
 			 num_counters, private->number);
 		local_bh_enable();
 		*error = -EAGAIN;
@@ -777,7 +771,7 @@ struct xt_table *xt_register_table(struct net *net,
 		goto unlock;
 
 	private = table->private;
-	duprintf("table->private->number = %u\n", private->number);
+	pr_debug("table->private->number = %u\n", private->number);
 
 	/* save number of initial entries */
 	private->initial_entries = private->number;
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 609439967c2c..f9deecbef875 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/in.h>
@@ -24,21 +24,15 @@ MODULE_DESCRIPTION("Xtables: IPsec-ESP packet match");
 MODULE_ALIAS("ipt_esp");
 MODULE_ALIAS("ip6t_esp");
 
-#if 0
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 /* Returns 1 if the spi is matched by the range, 0 otherwise */
 static inline bool
 spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
 	bool r;
-	duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ',
-		 min, spi, max);
+	pr_debug("esp spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+		 invert ? '!' : ' ', min, spi, max);
 	r = (spi >= min && spi <= max) ^ invert;
-	duprintf(" result %s\n", r ? "PASS" : "FAILED");
+	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
 	return r;
 }
 
@@ -57,7 +51,7 @@ static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
-		duprintf("Dropping evil ESP tinygram.\n");
+		pr_debug("Dropping evil ESP tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
@@ -71,7 +65,7 @@ static bool esp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_esp *espinfo = par->matchinfo;
 
 	if (espinfo->invflags & ~XT_ESP_INV_MASK) {
-		duprintf("xt_esp: unknown flags %X\n", espinfo->invflags);
+		pr_debug("unknown flags %X\n", espinfo->invflags);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index d06bb2dd3900..4fa90c86fdb5 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -8,7 +8,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/udp.h>
@@ -26,12 +26,6 @@ MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP
 MODULE_ALIAS("ipt_multiport");
 MODULE_ALIAS("ip6t_multiport");
 
-#if 0
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 /* Returns 1 if the port is matched by the test, 0 otherwise. */
 static inline bool
 ports_match_v0(const u_int16_t *portlist, enum xt_multiport_flags flags,
@@ -63,7 +57,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 		if (minfo->pflags[i]) {
 			/* range port matching */
 			e = minfo->ports[++i];
-			duprintf("src or dst matches with %d-%d?\n", s, e);
+			pr_debug("src or dst matches with %d-%d?\n", s, e);
 
 			if (minfo->flags == XT_MULTIPORT_SOURCE
 			    && src >= s && src <= e)
@@ -77,7 +71,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 				return true ^ minfo->invert;
 		} else {
 			/* exact port matching */
-			duprintf("src or dst matches with %d?\n", s);
+			pr_debug("src or dst matches with %d?\n", s);
 
 			if (minfo->flags == XT_MULTIPORT_SOURCE
 			    && src == s)
@@ -109,7 +103,7 @@ multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
-		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
+		pr_debug("Dropping evil offset=0 tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
@@ -133,7 +127,7 @@ multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		/* We've been asked to examine this packet, and we
 		 * can't.  Hence, no choice but to drop.
 		 */
-		duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n");
+		pr_debug("Dropping evil offset=0 tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index a189ada9128f..43c7e1de532c 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -1,3 +1,4 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/ip.h>
@@ -15,12 +16,6 @@ MODULE_DESCRIPTION("Xtables: SCTP protocol packet match");
 MODULE_ALIAS("ipt_sctp");
 MODULE_ALIAS("ip6t_sctp");
 
-#ifdef DEBUG_SCTP
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
 #define SCCHECK(cond, option, flag, invflag) (!((flag) & (option)) \
 					      || (!!((invflag) & (option)) ^ (cond)))
 
@@ -52,7 +47,7 @@ match_packet(const struct sk_buff *skb,
 	const struct xt_sctp_flag_info *flag_info = info->flag_info;
 	int flag_count = info->flag_count;
 
-#ifdef DEBUG_SCTP
+#ifdef DEBUG
 	int i = 0;
 #endif
 
@@ -62,17 +57,19 @@ match_packet(const struct sk_buff *skb,
 	do {
 		sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch);
 		if (sch == NULL || sch->length == 0) {
-			duprintf("Dropping invalid SCTP packet.\n");
+			pr_debug("Dropping invalid SCTP packet.\n");
 			*hotdrop = true;
 			return false;
 		}
-
-		duprintf("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d\tflags: %x\n",
-				++i, offset, sch->type, htons(sch->length), sch->flags);
-
+#ifdef DEBUG
+		pr_debug("Chunk num: %d\toffset: %d\ttype: %d\tlength: %d"
+			 "\tflags: %x\n",
+			 ++i, offset, sch->type, htons(sch->length),
+			 sch->flags);
+#endif
 		offset += (ntohs(sch->length) + 3) & ~3;
 
-		duprintf("skb->len: %d\toffset: %d\n", skb->len, offset);
+		pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset);
 
 		if (SCTP_CHUNKMAP_IS_SET(info->chunkmap, sch->type)) {
 			switch (chunk_match_type) {
@@ -124,17 +121,17 @@ sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	sctp_sctphdr_t _sh;
 
 	if (par->fragoff != 0) {
-		duprintf("Dropping non-first fragment.. FIXME\n");
+		pr_debug("Dropping non-first fragment.. FIXME\n");
 		return false;
 	}
 
 	sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh);
 	if (sh == NULL) {
-		duprintf("Dropping evil TCP offset=0 tinygram.\n");
+		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
-	duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
+	pr_debug("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
 
 	return  SCCHECK(ntohs(sh->source) >= info->spts[0]
 			&& ntohs(sh->source) <= info->spts[1],
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 1ebdc4934eed..b53887f83c44 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -1,3 +1,4 @@
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/module.h>
 #include <net/ip.h>
@@ -19,13 +20,6 @@ MODULE_ALIAS("ipt_tcp");
 MODULE_ALIAS("ip6t_udp");
 MODULE_ALIAS("ip6t_tcp");
 
-#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
-
 /* Returns 1 if the port is matched by the range, 0 otherwise */
 static inline bool
 port_match(u_int16_t min, u_int16_t max, u_int16_t port, bool invert)
@@ -46,7 +40,7 @@ tcp_find_option(u_int8_t option,
 	u_int8_t _opt[60 - sizeof(struct tcphdr)];
 	unsigned int i;
 
-	duprintf("tcp_match: finding option\n");
+	pr_debug("finding option\n");
 
 	if (!optlen)
 		return invert;
@@ -82,7 +76,7 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		   flag overwrite to pass the direction checks.
 		*/
 		if (par->fragoff == 1) {
-			duprintf("Dropping evil TCP offset=1 frag.\n");
+			pr_debug("Dropping evil TCP offset=1 frag.\n");
 			*par->hotdrop = true;
 		}
 		/* Must not be a fragment. */
@@ -95,7 +89,7 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (th == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil TCP offset=0 tinygram.\n");
+		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
@@ -148,7 +142,7 @@ static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (uh == NULL) {
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
-		duprintf("Dropping evil UDP tinygram.\n");
+		pr_debug("Dropping evil UDP tinygram.\n");
 		*par->hotdrop = true;
 		return false;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 8bee4bad03c5b601bd6cea123c31025680587ccc Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 17 Mar 2010 16:04:40 +0100
Subject: netfilter: xt extensions: use pr_<level>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_CONNSECMARK.c | 13 ++++++-------
 net/netfilter/xt_DSCP.c        |  4 ++--
 net/netfilter/xt_HL.c          | 10 ++++------
 net/netfilter/xt_LED.c         | 10 ++++------
 net/netfilter/xt_SECMARK.c     | 20 ++++++++++----------
 net/netfilter/xt_TCPMSS.c      | 18 +++++++++---------
 net/netfilter/xt_cluster.c     | 11 ++++++-----
 net/netfilter/xt_connbytes.c   |  5 +++--
 net/netfilter/xt_connlimit.c   |  5 +++--
 net/netfilter/xt_connmark.c    |  8 ++++----
 net/netfilter/xt_conntrack.c   |  6 +++---
 net/netfilter/xt_dscp.c        |  4 ++--
 net/netfilter/xt_hashlimit.c   | 18 ++++++++----------
 net/netfilter/xt_helper.c      |  6 +++---
 net/netfilter/xt_hl.c          | 12 ------------
 net/netfilter/xt_limit.c       |  6 +++---
 net/netfilter/xt_osf.c         |  8 ++++----
 net/netfilter/xt_physdev.c     |  8 ++++----
 net/netfilter/xt_policy.c      | 13 +++++--------
 net/netfilter/xt_recent.c      |  6 +++---
 net/netfilter/xt_state.c       |  4 ++--
 21 files changed, 88 insertions(+), 107 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index b54c3756fdc3..6812865488d6 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -15,6 +15,7 @@
  * published by the Free Software Foundation.
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
@@ -22,8 +23,6 @@
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_ecache.h>
 
-#define PFX "CONNSECMARK: "
-
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("James Morris <jmorris@redhat.com>");
 MODULE_DESCRIPTION("Xtables: target for copying between connection and security mark");
@@ -91,8 +90,8 @@ static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "security") != 0) {
-		printk(KERN_INFO PFX "target only valid in the \'mangle\' "
-		       "or \'security\' tables, not \'%s\'.\n", par->table);
+		pr_info("target only valid in the \'mangle\' "
+			"or \'security\' tables, not \'%s\'.\n", par->table);
 		return false;
 	}
 
@@ -102,13 +101,13 @@ static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
 		break;
 
 	default:
-		printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode);
+		pr_info("invalid mode: %hu\n", info->mode);
 		return false;
 	}
 
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 74ce89260056..bbf08a91c600 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -9,7 +9,7 @@
  *
  * See RFC2474 for a description of the DSCP field within the IP Header.
 */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -65,7 +65,7 @@ static bool dscp_tg_check(const struct xt_tgchk_param *par)
 	const struct xt_DSCP_info *info = par->targinfo;
 
 	if (info->dscp > XT_DSCP_MAX) {
-		printk(KERN_WARNING "DSCP: dscp %x out of range\n", info->dscp);
+		pr_info("dscp %x out of range\n", info->dscp);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 10e789e2d12a..7004ed2ffa44 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -9,7 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -106,8 +106,7 @@ static bool ttl_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_TTL_info *info = par->targinfo;
 
 	if (info->mode > IPT_TTL_MAXMODE) {
-		printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
-			info->mode);
+		pr_info("TTL: invalid or unknown mode %u\n", info->mode);
 		return false;
 	}
 	if (info->mode != IPT_TTL_SET && info->ttl == 0)
@@ -120,12 +119,11 @@ static bool hl_tg6_check(const struct xt_tgchk_param *par)
 	const struct ip6t_HL_info *info = par->targinfo;
 
 	if (info->mode > IP6T_HL_MAXMODE) {
-		printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n",
-			info->mode);
+		pr_info("invalid or unknown mode %u\n", info->mode);
 		return false;
 	}
 	if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
-		printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't "
+		pr_info("increment/decrement does not "
 			"make sense with value 0\n");
 		return false;
 	}
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 0d6c2885ebd6..f511bea9464a 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -18,7 +18,7 @@
  * 02110-1301 USA.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
@@ -87,7 +87,7 @@ static bool led_tg_check(const struct xt_tgchk_param *par)
 	int err;
 
 	if (ledinfo->id[0] == '\0') {
-		printk(KERN_ERR KBUILD_MODNAME ": No 'id' parameter given.\n");
+		pr_info("No 'id' parameter given.\n");
 		return false;
 	}
 
@@ -99,11 +99,9 @@ static bool led_tg_check(const struct xt_tgchk_param *par)
 
 	err = led_trigger_register(&ledinternal->netfilter_led_trigger);
 	if (err) {
-		printk(KERN_CRIT KBUILD_MODNAME
-			": led_trigger_register() failed\n");
+		pr_warning("led_trigger_register() failed\n");
 		if (err == -EEXIST)
-			printk(KERN_ERR KBUILD_MODNAME
-				": Trigger name is already in use.\n");
+			pr_warning("Trigger name is already in use.\n");
 		goto exit_alloc;
 	}
 
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 7a6f9e6f5dfa..4855fd9d7c6f 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -12,6 +12,7 @@
  * published by the Free Software Foundation.
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/selinux.h>
@@ -59,20 +60,19 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
 	err = selinux_string_to_sid(sel->selctx, &sel->selsid);
 	if (err) {
 		if (err == -EINVAL)
-			printk(KERN_INFO PFX "invalid SELinux context \'%s\'\n",
-			       sel->selctx);
+			pr_info("invalid SELinux context \'%s\'\n",
+				sel->selctx);
 		return false;
 	}
 
 	if (!sel->selsid) {
-		printk(KERN_INFO PFX "unable to map SELinux context \'%s\'\n",
-		       sel->selctx);
+		pr_info("unable to map SELinux context \'%s\'\n", sel->selctx);
 		return false;
 	}
 
 	err = selinux_secmark_relabel_packet_permission(sel->selsid);
 	if (err) {
-		printk(KERN_INFO PFX "unable to obtain relabeling permission\n");
+		pr_info("unable to obtain relabeling permission\n");
 		return false;
 	}
 
@@ -86,14 +86,14 @@ static bool secmark_tg_check(const struct xt_tgchk_param *par)
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "security") != 0) {
-		printk(KERN_INFO PFX "target only valid in the \'mangle\' "
-		       "or \'security\' tables, not \'%s\'.\n", par->table);
+		pr_info("target only valid in the \'mangle\' "
+			"or \'security\' tables, not \'%s\'.\n", par->table);
 		return false;
 	}
 
 	if (mode && mode != info->mode) {
-		printk(KERN_INFO PFX "mode already set to %hu cannot mix with "
-		       "rules for mode %hu\n", mode, info->mode);
+		pr_info("mode already set to %hu cannot mix with "
+			"rules for mode %hu\n", mode, info->mode);
 		return false;
 	}
 
@@ -104,7 +104,7 @@ static bool secmark_tg_check(const struct xt_tgchk_param *par)
 		break;
 
 	default:
-		printk(KERN_INFO PFX "invalid mode: %hu\n", info->mode);
+		pr_info("invalid mode: %hu\n", info->mode);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 0e357ac9a2a8..2077da31c973 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -7,7 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -67,14 +67,14 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
 		if (dst_mtu(skb_dst(skb)) <= minlen) {
 			if (net_ratelimit())
-				printk(KERN_ERR "xt_TCPMSS: "
+				pr_err("xt_TCPMSS: "
 				       "unknown or invalid path-MTU (%u)\n",
 				       dst_mtu(skb_dst(skb)));
 			return -1;
 		}
 		if (in_mtu <= minlen) {
 			if (net_ratelimit())
-				printk(KERN_ERR "xt_TCPMSS: unknown or "
+				pr_err("xt_TCPMSS: unknown or "
 				       "invalid path-MTU (%u)\n", in_mtu);
 			return -1;
 		}
@@ -245,14 +245,14 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 			   (1 << NF_INET_LOCAL_OUT) |
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
-		printk("xt_TCPMSS: path-MTU clamping only supported in "
-		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
+		pr_info("path-MTU clamping only supported in "
+			"FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
 	}
 	xt_ematch_foreach(ematch, e)
 		if (find_syn_match(ematch))
 			return true;
-	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
+	pr_info("Only works on TCP SYN packets\n");
 	return false;
 }
 
@@ -267,14 +267,14 @@ static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
 	    (par->hook_mask & ~((1 << NF_INET_FORWARD) |
 			   (1 << NF_INET_LOCAL_OUT) |
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
-		printk("xt_TCPMSS: path-MTU clamping only supported in "
-		       "FORWARD, OUTPUT and POSTROUTING hooks\n");
+		pr_info("path-MTU clamping only supported in "
+			"FORWARD, OUTPUT and POSTROUTING hooks\n");
 		return false;
 	}
 	xt_ematch_foreach(ematch, e)
 		if (find_syn_match(ematch))
 			return true;
-	printk("xt_TCPMSS: Only works on TCP SYN packets\n");
+	pr_info("Only works on TCP SYN packets\n");
 	return false;
 }
 #endif
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 225ee3ecd69d..4c273e871301 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -5,6 +5,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/jhash.h>
@@ -136,14 +137,14 @@ static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 	struct xt_cluster_match_info *info = par->matchinfo;
 
 	if (info->total_nodes > XT_CLUSTER_NODES_MAX) {
-		printk(KERN_ERR "xt_cluster: you have exceeded the maximum "
-				"number of cluster nodes (%u > %u)\n",
-				info->total_nodes, XT_CLUSTER_NODES_MAX);
+		pr_info("you have exceeded the maximum "
+			"number of cluster nodes (%u > %u)\n",
+			info->total_nodes, XT_CLUSTER_NODES_MAX);
 		return false;
 	}
 	if (info->node_mask >= (1ULL << info->total_nodes)) {
-		printk(KERN_ERR "xt_cluster: this node mask cannot be "
-				"higher than the total number of nodes\n");
+		pr_info("this node mask cannot be "
+			"higher than the total number of nodes\n");
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 955e6598a7f0..edb7bbd9ae54 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -1,6 +1,7 @@
 /* Kernel module to match connection tracking byte counter.
  * GPL (C) 2002 Martin Devera (devik@cdi.cz).
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/bitops.h>
 #include <linux/skbuff.h>
@@ -107,8 +108,8 @@ static bool connbytes_mt_check(const struct xt_mtchk_param *par)
 		return false;
 
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 9e624af40f80..d5b26dab9e26 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -11,6 +11,7 @@
  * Kernel module to match connection tracking information.
  * GPL (C) 1999  Rusty Russell (rusty@rustcorp.com.au).
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <linux/ip.h>
@@ -225,8 +226,8 @@ static bool connlimit_mt_check(const struct xt_mtchk_param *par)
 		connlimit_rnd_inited = true;
 	}
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "cannot load conntrack support for "
-		       "address family %u\n", par->family);
+		pr_info("cannot load conntrack support for "
+			"address family %u\n", par->family);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 97465a472344..7a51ba63f545 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -77,8 +77,8 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 static bool connmark_tg_check(const struct xt_tgchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	return true;
@@ -106,8 +106,8 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 static bool connmark_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index ae66305f0fe5..387172b6b0d8 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -9,7 +9,7 @@
  *	it under the terms of the GNU General Public License version 2 as
  *	published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/ipv6.h>
@@ -209,8 +209,8 @@ conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par)
 static bool conntrack_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index fe58cd01ef99..6ecedc13db0c 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -47,7 +47,7 @@ static bool dscp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_dscp_info *info = par->matchinfo;
 
 	if (info->dscp > XT_DSCP_MAX) {
-		printk(KERN_ERR "xt_dscp: dscp %x out of range\n", info->dscp);
+		pr_info("dscp %x out of range\n", info->dscp);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 52327c5c1f1d..8f3e0c02ca54 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -7,6 +7,7 @@
  *
  * Development of this code was funded by Astaro AG, http://www.astaro.com/
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/random.h>
@@ -166,17 +167,14 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
 	if (ht->cfg.max && ht->count >= ht->cfg.max) {
 		/* FIXME: do something. question is what.. */
 		if (net_ratelimit())
-			printk(KERN_WARNING
-				"xt_hashlimit: max count of %u reached\n",
-				ht->cfg.max);
+			pr_err("max count of %u reached\n", ht->cfg.max);
 		return NULL;
 	}
 
 	ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
 	if (!ent) {
 		if (net_ratelimit())
-			printk(KERN_ERR
-				"xt_hashlimit: can't allocate dsthash_ent\n");
+			pr_err("cannot allocate dsthash_ent\n");
 		return NULL;
 	}
 	memcpy(&ent->dst, dst, sizeof(ent->dst));
@@ -681,8 +679,8 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	/* Check for overflow. */
 	if (r->cfg.burst == 0 ||
 	    user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
-		printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
-		       r->cfg.avg, r->cfg.burst);
+		pr_info("overflow, try lower: %u/%u\n",
+			r->cfg.avg, r->cfg.burst);
 		return false;
 	}
 	if (r->cfg.mode == 0 ||
@@ -718,8 +716,8 @@ static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
 	if (info->cfg.burst == 0 ||
 	    user2credits(info->cfg.avg * info->cfg.burst) <
 	    user2credits(info->cfg.avg)) {
-		printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
-		       info->cfg.avg, info->cfg.burst);
+		pr_info("overflow, try lower: %u/%u\n",
+			info->cfg.avg, info->cfg.burst);
 		return false;
 	}
 	if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
@@ -1018,7 +1016,7 @@ static int __init hashlimit_mt_init(void)
 					    sizeof(struct dsthash_ent), 0, 0,
 					    NULL);
 	if (!hashlimit_cachep) {
-		printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
+		pr_warning("unable to create slab cache\n");
 		goto err2;
 	}
 	return 0;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 64fc7f277221..482aff2ccf7c 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
@@ -59,8 +59,8 @@ static bool helper_mt_check(const struct xt_mtchk_param *par)
 	struct xt_helper_info *info = par->matchinfo;
 
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	info->name[29] = '\0';
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
index 7726154c87b2..be53f7299623 100644
--- a/net/netfilter/xt_hl.c
+++ b/net/netfilter/xt_hl.c
@@ -39,10 +39,6 @@ static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			return ttl < info->ttl;
 		case IPT_TTL_GT:
 			return ttl > info->ttl;
-		default:
-			printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
-				info->mode);
-			return false;
 	}
 
 	return false;
@@ -56,20 +52,12 @@ static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	switch (info->mode) {
 		case IP6T_HL_EQ:
 			return ip6h->hop_limit == info->hop_limit;
-			break;
 		case IP6T_HL_NE:
 			return ip6h->hop_limit != info->hop_limit;
-			break;
 		case IP6T_HL_LT:
 			return ip6h->hop_limit < info->hop_limit;
-			break;
 		case IP6T_HL_GT:
 			return ip6h->hop_limit > info->hop_limit;
-			break;
-		default:
-			printk(KERN_WARNING "ip6t_hl: unknown mode %d\n",
-				info->mode);
-			return false;
 	}
 
 	return false;
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index a0ca5339af41..b3dfca63fa52 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -5,7 +5,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
@@ -105,8 +105,8 @@ static bool limit_mt_check(const struct xt_mtchk_param *par)
 	/* Check for overflow. */
 	if (r->burst == 0
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
-		printk("Overflow in xt_limit, try lower: %u/%u\n",
-		       r->avg, r->burst);
+		pr_info("Overflow, try lower: %u/%u\n",
+			r->avg, r->burst);
 		return false;
 	}
 
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 4169e200588d..8dcde13a0781 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -16,7 +16,7 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/kernel.h>
 
@@ -382,14 +382,14 @@ static int __init xt_osf_init(void)
 
 	err = nfnetlink_subsys_register(&xt_osf_nfnetlink);
 	if (err < 0) {
-		printk(KERN_ERR "Failed (%d) to register OSF nsfnetlink helper.\n", err);
+		pr_err("Failed to register OSF nsfnetlink helper (%d)\n", err);
 		goto err_out_exit;
 	}
 
 	err = xt_register_match(&xt_osf_match);
 	if (err) {
-		printk(KERN_ERR "Failed (%d) to register OS fingerprint "
-				"matching module.\n", err);
+		pr_err("Failed to register OS fingerprint "
+		       "matching module (%d)\n", err);
 		goto err_out_remove;
 	}
 
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 8d28ca5848bc..3d42a278408f 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -7,7 +7,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter_bridge.h>
@@ -95,9 +95,9 @@ static bool physdev_mt_check(const struct xt_mtchk_param *par)
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
 	    par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
 	    (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
-		printk(KERN_WARNING "physdev match: using --physdev-out in the "
-		       "OUTPUT, FORWARD and POSTROUTING chains for non-bridged "
-		       "traffic is not supported anymore.\n");
+		pr_info("using --physdev-out in the OUTPUT, FORWARD and "
+			"POSTROUTING chains for non-bridged traffic is not "
+			"supported anymore.\n");
 		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
 			return false;
 	}
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index cc033d2c35ea..de3aded6afb8 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -133,24 +133,21 @@ static bool policy_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_policy_info *info = par->matchinfo;
 
 	if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
-		printk(KERN_ERR "xt_policy: neither incoming nor "
-				"outgoing policy selected\n");
+		pr_info("neither incoming nor outgoing policy selected\n");
 		return false;
 	}
 	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
 	    (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
-		printk(KERN_ERR "xt_policy: output policy not valid in "
-				"PRE_ROUTING and INPUT\n");
+		pr_info("output policy not valid in PREROUTING and INPUT\n");
 		return false;
 	}
 	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
 	    (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
-		printk(KERN_ERR "xt_policy: input policy not valid in "
-				"POST_ROUTING and OUTPUT\n");
+		pr_info("input policy not valid in POSTROUTING and OUTPUT\n");
 		return false;
 	}
 	if (info->len > XT_POLICY_MAX_ELEM) {
-		printk(KERN_ERR "xt_policy: too many policy elements\n");
+		pr_info("too many policy elements\n");
 		return false;
 	}
 	return true;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index a0ea1a21c470..aa9817e91338 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -12,6 +12,7 @@
  * Author: Stephen Frost <sfrost@snowman.net>
  * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/init.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
@@ -520,7 +521,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
 		add = true;
 		break;
 	default:
-		printk(KERN_INFO KBUILD_MODNAME ": Need +ip, -ip or /\n");
+		pr_info("Need \"+ip\", \"-ip\" or \"/\"\n");
 		return -EINVAL;
 	}
 
@@ -535,8 +536,7 @@ recent_mt_proc_write(struct file *file, const char __user *input,
 	}
 
 	if (!succ) {
-		printk(KERN_INFO KBUILD_MODNAME ": illegal address written "
-		       "to procfs\n");
+		pr_info("illegal address written to procfs\n");
 		return -EINVAL;
 	}
 
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index ef09b2e2970f..94893be80276 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -40,8 +40,8 @@ state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 static bool state_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 	return true;
-- 
cgit v1.2.3-59-g8ed1b


From 1e94d72feab025b8f7c55d07020602f82f3a97dd Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 18 Mar 2010 17:45:44 -0700
Subject: rps: Fixed build with CONFIG_SMP not enabled.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index de1a52bcb9e0..726ecd1af535 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1341,7 +1341,9 @@ struct softnet_data {
 	struct sk_buff		*completion_queue;
 
 	/* Elements below can be accessed between CPUs for RPS */
+#ifdef CONFIG_SMP
 	struct call_single_data	csd ____cacheline_aligned_in_smp;
+#endif
 	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 17b168671501..1a7e1d1d5ad9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2174,6 +2174,7 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
+#ifdef CONFIG_SMP
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
@@ -2293,6 +2294,7 @@ static void trigger_softirq(void *data)
 	__napi_schedule(&queue->backlog);
 	__get_cpu_var(netdev_rx_stat).received_rps++;
 }
+#endif /* CONFIG_SMP */
 
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
@@ -2320,6 +2322,7 @@ enqueue:
 
 		/* Schedule NAPI for backlog device */
 		if (napi_schedule_prep(&queue->backlog)) {
+#ifdef CONFIG_SMP
 			if (cpu != smp_processor_id()) {
 				struct rps_remote_softirq_cpus *rcpus =
 				    &__get_cpu_var(rps_remote_softirq_cpus);
@@ -2328,6 +2331,9 @@ enqueue:
 				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 			} else
 				__napi_schedule(&queue->backlog);
+#else
+			__napi_schedule(&queue->backlog);
+#endif
 		}
 		goto enqueue;
 	}
@@ -2367,9 +2373,13 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
+#ifdef CONFIG_SMP
 	cpu = get_rps_cpu(skb->dev, skb);
 	if (cpu < 0)
 		cpu = smp_processor_id();
+#else
+	cpu = smp_processor_id();
+#endif
 
 	return enqueue_to_backlog(skb, cpu);
 }
@@ -2735,6 +2745,7 @@ out:
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
+#ifdef CONFIG_SMP
 	int cpu;
 
 	cpu = get_rps_cpu(skb->dev, skb);
@@ -2743,6 +2754,9 @@ int netif_receive_skb(struct sk_buff *skb)
 		return __netif_receive_skb(skb);
 	else
 		return enqueue_to_backlog(skb, cpu);
+#else
+	return __netif_receive_skb(skb);
+#endif
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
@@ -3168,6 +3182,7 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
+#ifdef CONFIG_SMP
 /*
  * net_rps_action sends any pending IPI's for rps.  This is only called from
  * softirq and interrupts must be enabled.
@@ -3184,6 +3199,7 @@ static void net_rps_action(cpumask_t *mask)
 	}
 	cpus_clear(*mask);
 }
+#endif
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3191,8 +3207,10 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
+#ifdef CONFIG_SMP
 	int select;
 	struct rps_remote_softirq_cpus *rcpus;
+#endif
 
 	local_irq_disable();
 
@@ -3255,6 +3273,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
+#ifdef CONFIG_SMP
 	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
 	select = rcpus->select;
 	rcpus->select ^= 1;
@@ -3262,6 +3281,9 @@ out:
 	local_irq_enable();
 
 	net_rps_action(&rcpus->mask[select]);
+#else
+	local_irq_enable();
+#endif
 
 #ifdef CONFIG_NET_DMA
 	/*
@@ -6204,9 +6226,11 @@ static int __init net_dev_init(void)
 		queue->completion_queue = NULL;
 		INIT_LIST_HEAD(&queue->poll_list);
 
+#ifdef CONFIG_SMP
 		queue->csd.func = trigger_softirq;
 		queue->csd.info = queue;
 		queue->csd.flags = 0;
+#endif
 
 		queue->backlog.poll = process_backlog;
 		queue->backlog.weight = weight_p;
-- 
cgit v1.2.3-59-g8ed1b


From 93d9b7d7a85cfb4e1711d5226eba73586dd4919f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 10 Mar 2010 10:28:56 +0000
Subject: net: rename notifier defines for netdev type change

Since generally there could be more netdevices changing type other
than bonding, making this event type name "bonding-unrelated"

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  6 ++++--
 include/linux/notifier.h        |  4 ++--
 net/ipv4/devinet.c              |  4 ++--
 net/ipv6/addrconf.c             | 16 ++++++++--------
 4 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 430c02267d7e..7eeb18751d67 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1480,14 +1480,16 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 				 bond_dev->name,
 				 bond_dev->type, slave_dev->type);
 
-			netdev_bonding_change(bond_dev, NETDEV_BONDING_OLDTYPE);
+			netdev_bonding_change(bond_dev,
+					      NETDEV_PRE_TYPE_CHANGE);
 
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
 			else
 				ether_setup(bond_dev);
 
-			netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE);
+			netdev_bonding_change(bond_dev,
+					      NETDEV_POST_TYPE_CHANGE);
 		}
 	} else if (bond_dev->type != slave_dev->type) {
 		pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n",
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index fee6c2f68075..f3635fc6e942 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -199,8 +199,8 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_FEAT_CHANGE	0x000B
 #define NETDEV_BONDING_FAILOVER 0x000C
 #define NETDEV_PRE_UP		0x000D
-#define NETDEV_BONDING_OLDTYPE  0x000E
-#define NETDEV_BONDING_NEWTYPE  0x000F
+#define NETDEV_PRE_TYPE_CHANGE	0x000E
+#define NETDEV_POST_TYPE_CHANGE	0x000F
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 51ca946e3392..c75320ef95c2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1095,10 +1095,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 	case NETDEV_DOWN:
 		ip_mc_down(in_dev);
 		break;
-	case NETDEV_BONDING_OLDTYPE:
+	case NETDEV_PRE_TYPE_CHANGE:
 		ip_mc_unmap(in_dev);
 		break;
-	case NETDEV_BONDING_NEWTYPE:
+	case NETDEV_POST_TYPE_CHANGE:
 		ip_mc_remap(in_dev);
 		break;
 	case NETDEV_CHANGEMTU:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3381b4317c27..8d41abc40db5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -137,8 +137,8 @@ static DEFINE_SPINLOCK(addrconf_verify_lock);
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
 
-static void addrconf_bonding_change(struct net_device *dev,
-				    unsigned long event);
+static void addrconf_type_change(struct net_device *dev,
+				 unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
@@ -2584,9 +2584,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				return notifier_from_errno(err);
 		}
 		break;
-	case NETDEV_BONDING_OLDTYPE:
-	case NETDEV_BONDING_NEWTYPE:
-		addrconf_bonding_change(dev, event);
+	case NETDEV_PRE_TYPE_CHANGE:
+	case NETDEV_POST_TYPE_CHANGE:
+		addrconf_type_change(dev, event);
 		break;
 	}
 
@@ -2601,16 +2601,16 @@ static struct notifier_block ipv6_dev_notf = {
 	.priority = 0
 };
 
-static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
+static void addrconf_type_change(struct net_device *dev, unsigned long event)
 {
 	struct inet6_dev *idev;
 	ASSERT_RTNL();
 
 	idev = __in6_dev_get(dev);
 
-	if (event == NETDEV_BONDING_NEWTYPE)
+	if (event == NETDEV_POST_TYPE_CHANGE)
 		ipv6_mc_remap(idev);
-	else if (event == NETDEV_BONDING_OLDTYPE)
+	else if (event == NETDEV_PRE_TYPE_CHANGE)
 		ipv6_mc_unmap(idev);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 3ca5b4042ecae5e73c59de62e4ac0db31c10e0f8 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 10 Mar 2010 10:29:35 +0000
Subject: bonding: check return value of nofitier when changing type

This patch adds the possibility to refuse the bonding type change for
other subsystems (such as for example bridge, vlan, etc.)

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 11 +++++++++--
 include/linux/netdevice.h       |  2 +-
 net/core/dev.c                  |  4 ++--
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7eeb18751d67..cbe9e353d46a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1480,8 +1480,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 				 bond_dev->name,
 				 bond_dev->type, slave_dev->type);
 
-			netdev_bonding_change(bond_dev,
-					      NETDEV_PRE_TYPE_CHANGE);
+			res = netdev_bonding_change(bond_dev,
+						    NETDEV_PRE_TYPE_CHANGE);
+			res = notifier_to_errno(res);
+			if (res) {
+				pr_err("%s: refused to change device type\n",
+				       bond_dev->name);
+				res = -EBUSY;
+				goto err_undo_flags;
+			}
 
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 726ecd1af535..813bed723f58 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2005,7 +2005,7 @@ extern void		__dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct
 extern int		dev_set_promiscuity(struct net_device *dev, int inc);
 extern int		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
-extern void		netdev_bonding_change(struct net_device *dev,
+extern int		netdev_bonding_change(struct net_device *dev,
 					      unsigned long event);
 extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
diff --git a/net/core/dev.c b/net/core/dev.c
index 1a7e1d1d5ad9..d1f027c41e73 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1084,9 +1084,9 @@ void netdev_state_change(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_state_change);
 
-void netdev_bonding_change(struct net_device *dev, unsigned long event)
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
 {
-	call_netdevice_notifiers(event, dev);
+	return call_netdevice_notifiers(event, dev);
 }
 EXPORT_SYMBOL(netdev_bonding_change);
 
-- 
cgit v1.2.3-59-g8ed1b


From 1c01fe14a87332cc88266fbd6e598319322eb96f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 10 Mar 2010 10:30:19 +0000
Subject: net: forbid underlaying devices to change its type

It's not desired for underlaying devices to change type. At the time,
there is for example possible to have bond with changed type from
Ethernet to Infiniband as a port of a bridge. This patch fixes this.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c  | 3 +++
 net/8021q/vlan.c       | 4 ++++
 net/bridge/br_notify.c | 4 ++++
 3 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 40faa368b07a..445e73c343ba 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -748,6 +748,9 @@ static int macvlan_device_event(struct notifier_block *unused,
 		list_for_each_entry_safe(vlan, next, &port->vlans, list)
 			vlan->dev->rtnl_link_ops->dellink(vlan->dev, NULL);
 		break;
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid underlaying device to change its type. */
+		return NOTIFY_BAD;
 	}
 	return NOTIFY_DONE;
 }
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 453512266ea1..c39a5f41169c 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -530,6 +530,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		}
 		unregister_netdevice_many(&list);
 		break;
+
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid underlaying device to change its type. */
+		return NOTIFY_BAD;
 	}
 
 out:
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 763a3ec292e5..1413b72acc7f 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -82,6 +82,10 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	case NETDEV_UNREGISTER:
 		br_del_if(br, dev);
 		break;
+
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid underlaying device to change its type. */
+		return NOTIFY_BAD;
 	}
 
 	/* Events that may cause spanning tree to refresh */
-- 
cgit v1.2.3-59-g8ed1b


From 1159683ef48469de71dc26f0ee1a9c30d131cf89 Mon Sep 17 00:00:00 2001
From: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Date: Fri, 19 Mar 2010 16:01:54 +0100
Subject: netfilter: remove unused headers in net/ipv6/netfilter/ip6t_LOG.c

Remove unused headers in net/ipv6/netfilter/ip6t_LOG.c

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6t_LOG.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index b285fdf19050..e16c0c7d086d 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -11,7 +11,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
 #include <linux/ip.h>
-- 
cgit v1.2.3-59-g8ed1b


From 2c46cd8163b25bf6f38e612e9f1d162f0357c8f1 Mon Sep 17 00:00:00 2001
From: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Date: Fri, 19 Mar 2010 16:04:10 +0100
Subject: netfilter: remove unused headers in net/ipv4/netfilter/nf_nat_h323.c

Remove unused headers in net/ipv4/netfilter/nf_nat_h323.c

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_h323.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 7e8e6fc75413..d4c061874f8f 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/tcp.h>
 #include <net/tcp.h>
 
-- 
cgit v1.2.3-59-g8ed1b


From 819bfecc4fc6b6e5a793f719a45b7146ce423b79 Mon Sep 17 00:00:00 2001
From: "florian@mickler.org" <florian@mickler.org>
Date: Sat, 13 Mar 2010 13:31:05 +0100
Subject: rename new rfkill sysfs knobs

This patch renames the (never officially released) sysfs-knobs
"blocked_hw" and "blocked_sw" to "hard" and "soft", as the hardware vs
software conotation is misleading.

It also gets rid of not needed locks around u32-read-access.

Signed-off-by: Florian Mickler <florian@mickler.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/ABI/stable/sysfs-class-rfkill | 10 ++++-----
 net/rfkill/core.c                           | 35 +++++++----------------------
 2 files changed, 13 insertions(+), 32 deletions(-)

(limited to 'net')

diff --git a/Documentation/ABI/stable/sysfs-class-rfkill b/Documentation/ABI/stable/sysfs-class-rfkill
index b91c3f37453a..097f522c33bb 100644
--- a/Documentation/ABI/stable/sysfs-class-rfkill
+++ b/Documentation/ABI/stable/sysfs-class-rfkill
@@ -4,7 +4,7 @@ For details to this subsystem look at Documentation/rfkill.txt.
 
 For the deprecated /sys/class/rfkill/*/state and
 /sys/class/rfkill/*/claim knobs of this interface look in
-Documentation/ABI/obsolte/sysfs-class-rfkill.
+Documentation/ABI/obsolete/sysfs-class-rfkill.
 
 What: 		/sys/class/rfkill
 Date:		09-Jul-2007
@@ -42,8 +42,8 @@ Values: 	A numeric value.
 		1: true
 
 
-What:		/sys/class/rfkill/rfkill[0-9]+/blocked_hw
-Date:		23-Feb-2010
+What:		/sys/class/rfkill/rfkill[0-9]+/hard
+Date:		12-March-2010
 KernelVersion	v2.6.34
 Contact:	linux-wireless@vger.kernel.org
 Description: 	Current hardblock state. This file is read only.
@@ -55,8 +55,8 @@ Values: 	A numeric value.
 			the driver's control.
 
 
-What:		/sys/class/rfkill/rfkill[0-9]+/blocked_sw
-Date:		23-Feb-2010
+What:		/sys/class/rfkill/rfkill[0-9]+/soft
+Date:		12-March-2010
 KernelVersion	v2.6.34
 Contact:	linux-wireless@vger.kernel.org
 Description:	Current softblock state. This file is read and write.
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index 5f33151d70ae..7ae58b5b5a08 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -628,37 +628,25 @@ static ssize_t rfkill_persistent_show(struct device *dev,
 	return sprintf(buf, "%d\n", rfkill->persistent);
 }
 
-static ssize_t rfkill_blocked_hw_show(struct device *dev,
+static ssize_t rfkill_hard_show(struct device *dev,
 				 struct device_attribute *attr,
 				 char *buf)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
-	unsigned long flags;
-	u32 state;
 
-	spin_lock_irqsave(&rfkill->lock, flags);
-	state = rfkill->state;
-	spin_unlock_irqrestore(&rfkill->lock, flags);
-
-	return sprintf(buf, "%d\n", (state & RFKILL_BLOCK_HW) ? 1 : 0 );
+	return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0 );
 }
 
-static ssize_t rfkill_blocked_sw_show(struct device *dev,
+static ssize_t rfkill_soft_show(struct device *dev,
 				 struct device_attribute *attr,
 				 char *buf)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
-	unsigned long flags;
-	u32 state;
-
-	spin_lock_irqsave(&rfkill->lock, flags);
-	state = rfkill->state;
-	spin_unlock_irqrestore(&rfkill->lock, flags);
 
-	return sprintf(buf, "%d\n", (state & RFKILL_BLOCK_SW) ? 1 : 0 );
+	return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0 );
 }
 
-static ssize_t rfkill_blocked_sw_store(struct device *dev,
+static ssize_t rfkill_soft_store(struct device *dev,
 				  struct device_attribute *attr,
 				  const char *buf, size_t count)
 {
@@ -698,14 +686,8 @@ static ssize_t rfkill_state_show(struct device *dev,
 				 char *buf)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
-	unsigned long flags;
-	u32 state;
-
-	spin_lock_irqsave(&rfkill->lock, flags);
-	state = rfkill->state;
-	spin_unlock_irqrestore(&rfkill->lock, flags);
 
-	return sprintf(buf, "%d\n", user_state_from_blocked(state));
+	return sprintf(buf, "%d\n", user_state_from_blocked(rfkill->state));
 }
 
 static ssize_t rfkill_state_store(struct device *dev,
@@ -755,9 +737,8 @@ static struct device_attribute rfkill_dev_attrs[] = {
 	__ATTR(persistent, S_IRUGO, rfkill_persistent_show, NULL),
 	__ATTR(state, S_IRUGO|S_IWUSR, rfkill_state_show, rfkill_state_store),
 	__ATTR(claim, S_IRUGO|S_IWUSR, rfkill_claim_show, rfkill_claim_store),
-	__ATTR(sw, S_IRUGO|S_IWUSR, rfkill_blocked_sw_show,
-			rfkill_blocked_sw_store),
-	__ATTR(hw, S_IRUGO, rfkill_blocked_hw_show, NULL),
+	__ATTR(soft, S_IRUGO|S_IWUSR, rfkill_soft_show, rfkill_soft_store),
+	__ATTR(hard, S_IRUGO, rfkill_hard_show, NULL),
 	__ATTR_NULL
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 372e6c8f1f7b2bb68f9992d2e664925c73552a1d Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:09 +0000
Subject: ipv6: convert temporary address list to list macros

Use list macros instead of open coded linked list.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h |  4 ++--
 net/ipv6/addrconf.c    | 30 ++++++++++++------------------
 2 files changed, 14 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 545d8b059bef..09d906460a43 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -58,7 +58,7 @@ struct inet6_ifaddr {
 	struct inet6_ifaddr	*if_next;       /* next addr in inet6_dev */
 
 #ifdef CONFIG_IPV6_PRIVACY
-	struct inet6_ifaddr	*tmp_next;	/* next addr in tempaddr_lst */
+	struct list_head	tmp_list;
 	struct inet6_ifaddr	*ifpub;
 	int			regen_count;
 #endif
@@ -175,7 +175,7 @@ struct inet6_dev {
 #ifdef CONFIG_IPV6_PRIVACY
 	u8			rndid[8];
 	struct timer_list	regen_timer;
-	struct inet6_ifaddr	*tempaddr_list;
+	struct list_head	tempaddr_list;
 #endif
 
 	struct neigh_parms	*nd_parms;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8d41abc40db5..f372f895cd41 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -401,6 +401,7 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 #endif
 
 #ifdef CONFIG_IPV6_PRIVACY
+	INIT_LIST_HEAD(&ndev->tempaddr_list);
 	setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);
 	if ((dev->flags&IFF_LOOPBACK) ||
 	    dev->type == ARPHRD_TUNNEL ||
@@ -679,8 +680,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 
 #ifdef CONFIG_IPV6_PRIVACY
 	if (ifa->flags&IFA_F_TEMPORARY) {
-		ifa->tmp_next = idev->tempaddr_list;
-		idev->tempaddr_list = ifa;
+		list_add(&ifa->tmp_list, &idev->tempaddr_list);
 		in6_ifa_hold(ifa);
 	}
 #endif
@@ -732,19 +732,12 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	write_lock_bh(&idev->lock);
 #ifdef CONFIG_IPV6_PRIVACY
 	if (ifp->flags&IFA_F_TEMPORARY) {
-		for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
-		     ifap = &ifa->tmp_next) {
-			if (ifa == ifp) {
-				*ifap = ifa->tmp_next;
-				if (ifp->ifpub) {
-					in6_ifa_put(ifp->ifpub);
-					ifp->ifpub = NULL;
-				}
-				__in6_ifa_put(ifp);
-				ifa->tmp_next = NULL;
-				break;
-			}
+		list_del(&ifp->tmp_list);
+		if (ifp->ifpub) {
+			in6_ifa_put(ifp->ifpub);
+			ifp->ifpub = NULL;
 		}
+		__in6_ifa_put(ifp);
 	}
 #endif
 
@@ -1970,7 +1963,7 @@ ok:
 #ifdef CONFIG_IPV6_PRIVACY
 			read_lock_bh(&in6_dev->lock);
 			/* update all temporary addresses in the list */
-			for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
+			list_for_each_entry(ift, &in6_dev->tempaddr_list, tmp_list) {
 				/*
 				 * When adjusting the lifetimes of an existing
 				 * temporary address, only lower the lifetimes.
@@ -2675,9 +2668,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		in6_dev_put(idev);
 
 	/* clear tempaddr list */
-	while ((ifa = idev->tempaddr_list) != NULL) {
-		idev->tempaddr_list = ifa->tmp_next;
-		ifa->tmp_next = NULL;
+	while (!list_empty(&idev->tempaddr_list)) {
+		ifa = list_first_entry(&idev->tempaddr_list,
+				       struct inet6_ifaddr, tmp_list);
+		list_del(&ifa->tmp_list);
 		ifa->dead = 1;
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
-- 
cgit v1.2.3-59-g8ed1b


From c2e21293c054817c42eb5fa9c613d2ad51954136 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:10 +0000
Subject: ipv6: convert addrconf list to hlist

Using hash list macros, simplifies code and helps later RCU.

This patch includes some initialization that is not strictly necessary,
since an empty hlist node/list is all zero; and list is in BSS
and node is allocated with kzalloc.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h |   2 +-
 net/ipv6/addrconf.c    | 128 ++++++++++++++++++++-----------------------------
 2 files changed, 54 insertions(+), 76 deletions(-)

(limited to 'net')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 09d906460a43..498401541519 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -54,7 +54,7 @@ struct inet6_ifaddr {
 	struct inet6_dev	*idev;
 	struct rt6_info		*rt;
 
-	struct inet6_ifaddr	*lst_next;      /* next addr in addr_lst */
+	struct hlist_node	addr_lst;
 	struct inet6_ifaddr	*if_next;       /* next addr in inet6_dev */
 
 #ifdef CONFIG_IPV6_PRIVACY
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f372f895cd41..0488b9f8071d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -126,7 +126,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
 /*
  *	Configured unicast address hash table
  */
-static struct inet6_ifaddr		*inet6_addr_lst[IN6_ADDR_HSIZE];
+static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
 static DEFINE_RWLOCK(addrconf_hash_lock);
 
 static void addrconf_verify(unsigned long);
@@ -528,7 +528,7 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
 	WARN_ON(ifp->if_next != NULL);
-	WARN_ON(ifp->lst_next != NULL);
+	WARN_ON(!hlist_unhashed(&ifp->addr_lst));
 
 #ifdef NET_REFCNT_DEBUG
 	printk(KERN_DEBUG "inet6_ifa_finish_destroy\n");
@@ -643,6 +643,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 
 	spin_lock_init(&ifa->lock);
 	init_timer(&ifa->timer);
+	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->timer.data = (unsigned long) ifa;
 	ifa->scope = scope;
 	ifa->prefix_len = pfxlen;
@@ -669,8 +670,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	/* Add to big hash table */
 	hash = ipv6_addr_hash(addr);
 
-	ifa->lst_next = inet6_addr_lst[hash];
-	inet6_addr_lst[hash] = ifa;
+	hlist_add_head(&ifa->addr_lst, &inet6_addr_lst[hash]);
 	in6_ifa_hold(ifa);
 	write_unlock(&addrconf_hash_lock);
 
@@ -718,15 +718,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	ifp->dead = 1;
 
 	write_lock_bh(&addrconf_hash_lock);
-	for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
-	     ifap = &ifa->lst_next) {
-		if (ifa == ifp) {
-			*ifap = ifa->lst_next;
-			__in6_ifa_put(ifp);
-			ifa->lst_next = NULL;
-			break;
-		}
-	}
+	hlist_del_init(&ifp->addr_lst);
+	__in6_ifa_put(ifp);
 	write_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
@@ -1277,11 +1270,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 		  struct net_device *dev, int strict)
 {
-	struct inet6_ifaddr * ifp;
+	struct inet6_ifaddr *ifp = NULL;
+	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
 	read_lock_bh(&addrconf_hash_lock);
-	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1300,10 +1294,11 @@ static
 int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 		       struct net_device *dev)
 {
-	struct inet6_ifaddr * ifp;
+	struct inet6_ifaddr *ifp;
+	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
-	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1342,11 +1337,12 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
 				     struct net_device *dev, int strict)
 {
-	struct inet6_ifaddr * ifp;
+	struct inet6_ifaddr *ifp = NULL;
+	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
 	read_lock_bh(&addrconf_hash_lock);
-	for(ifp = inet6_addr_lst[hash]; ifp; ifp=ifp->lst_next) {
+	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -2612,7 +2608,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa, *keep_list, **bifa;
 	struct net *net = dev_net(dev);
-	int i;
 
 	ASSERT_RTNL();
 
@@ -2637,25 +2632,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	}
 
-	/* Step 2: clear hash table */
-	for (i=0; i<IN6_ADDR_HSIZE; i++) {
-		bifa = &inet6_addr_lst[i];
-
-		write_lock_bh(&addrconf_hash_lock);
-		while ((ifa = *bifa) != NULL) {
-			if (ifa->idev == idev &&
-			    (how || !(ifa->flags&IFA_F_PERMANENT) ||
-			     ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
-				*bifa = ifa->lst_next;
-				ifa->lst_next = NULL;
-				__in6_ifa_put(ifa);
-				continue;
-			}
-			bifa = &ifa->lst_next;
-		}
-		write_unlock_bh(&addrconf_hash_lock);
-	}
-
 	write_lock_bh(&idev->lock);
 
 	/* Step 3: clear flags for stateless addrconf */
@@ -2721,6 +2697,12 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		}
 		write_unlock_bh(&idev->lock);
 
+		/* clear hash table */
+		write_lock_bh(&addrconf_hash_lock);
+		hlist_del_init(&ifa->addr_lst);
+		__in6_ifa_put(ifa);
+		write_unlock_bh(&addrconf_hash_lock);
+
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
 		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
 		in6_ifa_put(ifa);
@@ -2963,36 +2945,37 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
 	struct net *net = seq_file_net(seq);
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
-		ifa = inet6_addr_lst[state->bucket];
-
-		while (ifa && !net_eq(dev_net(ifa->idev->dev), net))
-			ifa = ifa->lst_next;
-		if (ifa)
-			break;
+		struct hlist_node *n;
+		hlist_for_each_entry(ifa, n,
+				     &inet6_addr_lst[state->bucket], addr_lst) {
+			if (net_eq(dev_net(ifa->idev->dev), net))
+				return ifa;
+		}
 	}
-	return ifa;
+	return NULL;
 }
 
-static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct inet6_ifaddr *ifa)
+static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
+					 struct inet6_ifaddr *ifa)
 {
 	struct if6_iter_state *state = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct hlist_node *n = &ifa->addr_lst;
 
-	ifa = ifa->lst_next;
-try_again:
-	if (ifa) {
-		if (!net_eq(dev_net(ifa->idev->dev), net)) {
-			ifa = ifa->lst_next;
-			goto try_again;
-		}
+	hlist_for_each_entry_continue(ifa, n, addr_lst) {
+		if (net_eq(dev_net(ifa->idev->dev), net))
+			return ifa;
 	}
 
-	if (!ifa && ++state->bucket < IN6_ADDR_HSIZE) {
-		ifa = inet6_addr_lst[state->bucket];
-		goto try_again;
+	while (++state->bucket < IN6_ADDR_HSIZE) {
+		hlist_for_each_entry(ifa, n,
+				     &inet6_addr_lst[state->bucket], addr_lst) {
+			if (net_eq(dev_net(ifa->idev->dev), net))
+				return ifa;
+		}
 	}
 
-	return ifa;
+	return NULL;
 }
 
 static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
@@ -3094,10 +3077,12 @@ void if6_proc_exit(void)
 int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 {
 	int ret = 0;
-	struct inet6_ifaddr * ifp;
+	struct inet6_ifaddr *ifp = NULL;
+	struct hlist_node *n;
 	u8 hash = ipv6_addr_hash(addr);
+
 	read_lock_bh(&addrconf_hash_lock);
-	for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) {
+	hlist_for_each_entry(ifp, n, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3118,6 +3103,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 static void addrconf_verify(unsigned long foo)
 {
 	struct inet6_ifaddr *ifp;
+	struct hlist_node *node;
 	unsigned long now, next;
 	int i;
 
@@ -3131,7 +3117,7 @@ static void addrconf_verify(unsigned long foo)
 
 restart:
 		read_lock(&addrconf_hash_lock);
-		for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
+		hlist_for_each_entry(ifp, node, &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 #ifdef CONFIG_IPV6_PRIVACY
 			unsigned long regen_advance;
@@ -4550,7 +4536,7 @@ EXPORT_SYMBOL(unregister_inet6addr_notifier);
 
 int __init addrconf_init(void)
 {
-	int err;
+	int i, err;
 
 	if ((err = ipv6_addr_label_init()) < 0) {
 		printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n",
@@ -4585,6 +4571,9 @@ int __init addrconf_init(void)
 	if (err)
 		goto errlo;
 
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&inet6_addr_lst[i]);
+
 	register_netdevice_notifier(&ipv6_dev_notf);
 
 	addrconf_verify(0);
@@ -4613,7 +4602,6 @@ errlo:
 
 void addrconf_cleanup(void)
 {
-	struct inet6_ifaddr *ifa;
 	struct net_device *dev;
 	int i;
 
@@ -4634,18 +4622,8 @@ void addrconf_cleanup(void)
 	 *	Check hash table.
 	 */
 	write_lock_bh(&addrconf_hash_lock);
-	for (i=0; i < IN6_ADDR_HSIZE; i++) {
-		for (ifa=inet6_addr_lst[i]; ifa; ) {
-			struct inet6_ifaddr *bifa;
-
-			bifa = ifa;
-			ifa = ifa->lst_next;
-			printk(KERN_DEBUG "bug: IPv6 address leakage detected: ifa=%p\n", bifa);
-			/* Do not free it; something is wrong.
-			   Now we can investigate it with debugger.
-			 */
-		}
-	}
+	for (i = 0; i < IN6_ADDR_HSIZE; i++)
+		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
 	write_unlock_bh(&addrconf_hash_lock);
 
 	del_timer(&addr_chk_timer);
-- 
cgit v1.2.3-59-g8ed1b


From 5c578aedcb21d79eeb4e9cf04ca5b276ac82614c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:11 +0000
Subject: IPv6: convert addrconf hash list to RCU

Convert from reader/writer lock to RCU and spinlock for addrconf
hash list.

Adds an additional helper macro for hlist_for_each_entry_continue_rcu
to handle the continue case.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist.h | 13 ++++++++
 include/net/if_inet6.h  |  1 +
 net/ipv6/addrconf.c     | 85 +++++++++++++++++++++++++------------------------
 3 files changed, 58 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 2c9b46cff3d7..004908b104d5 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -428,5 +428,18 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(pos->next))
 
+/**
+ * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue_rcu(tpos, pos, member)		\
+	for (pos = rcu_dereference((pos)->next);			\
+	     pos && ({ prefetch(pos->next); 1; }) &&			\
+	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
+	     pos = rcu_dereference(pos->next))
+
+
 #endif	/* __KERNEL__ */
 #endif
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 498401541519..22a00b1d2c38 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -64,6 +64,7 @@ struct inet6_ifaddr {
 #endif
 
 	int			dead;
+	struct rcu_head		rcu;
 };
 
 struct ip6_sf_socklist {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0488b9f8071d..7ffd5eeab967 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -127,7 +127,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
  *	Configured unicast address hash table
  */
 static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
-static DEFINE_RWLOCK(addrconf_hash_lock);
+static DEFINE_SPINLOCK(addrconf_hash_lock);
 
 static void addrconf_verify(unsigned long);
 
@@ -523,8 +523,13 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 }
 #endif
 
-/* Nobody refers to this ifaddr, destroy it */
+static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
+{
+	struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu);
+	kfree(ifp);
+}
 
+/* Nobody refers to this ifaddr, destroy it */
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
 	WARN_ON(ifp->if_next != NULL);
@@ -545,7 +550,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 	}
 	dst_release(&ifp->rt->u.dst);
 
-	kfree(ifp);
+	call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
 }
 
 static void
@@ -616,7 +621,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 		goto out2;
 	}
 
-	write_lock(&addrconf_hash_lock);
+	spin_lock(&addrconf_hash_lock);
 
 	/* Ignore adding duplicate addresses on an interface */
 	if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
@@ -670,9 +675,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	/* Add to big hash table */
 	hash = ipv6_addr_hash(addr);
 
-	hlist_add_head(&ifa->addr_lst, &inet6_addr_lst[hash]);
+	hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
 	in6_ifa_hold(ifa);
-	write_unlock(&addrconf_hash_lock);
+	spin_unlock(&addrconf_hash_lock);
 
 	write_lock(&idev->lock);
 	/* Add to inet6_dev unicast addr list. */
@@ -699,7 +704,7 @@ out2:
 
 	return ifa;
 out:
-	write_unlock(&addrconf_hash_lock);
+	spin_unlock(&addrconf_hash_lock);
 	goto out2;
 }
 
@@ -717,10 +722,10 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	ifp->dead = 1;
 
-	write_lock_bh(&addrconf_hash_lock);
-	hlist_del_init(&ifp->addr_lst);
+	spin_lock_bh(&addrconf_hash_lock);
+	hlist_del_init_rcu(&ifp->addr_lst);
 	__in6_ifa_put(ifp);
-	write_unlock_bh(&addrconf_hash_lock);
+	spin_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
 #ifdef CONFIG_IPV6_PRIVACY
@@ -1274,8 +1279,8 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
-	read_lock_bh(&addrconf_hash_lock);
-	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1285,7 +1290,8 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 				break;
 		}
 	}
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
+
 	return ifp != NULL;
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
@@ -1341,8 +1347,8 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
-	read_lock_bh(&addrconf_hash_lock);
-	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1353,7 +1359,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 			}
 		}
 	}
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
 
 	return ifp;
 }
@@ -2698,10 +2704,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_unlock_bh(&idev->lock);
 
 		/* clear hash table */
-		write_lock_bh(&addrconf_hash_lock);
-		hlist_del_init(&ifa->addr_lst);
+		spin_lock_bh(&addrconf_hash_lock);
+		hlist_del_init_rcu(&ifa->addr_lst);
 		__in6_ifa_put(ifa);
-		write_unlock_bh(&addrconf_hash_lock);
+		spin_unlock_bh(&addrconf_hash_lock);
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
 		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
@@ -2946,11 +2952,10 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
 		struct hlist_node *n;
-		hlist_for_each_entry(ifa, n,
-				     &inet6_addr_lst[state->bucket], addr_lst) {
+		hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket],
+					 addr_lst)
 			if (net_eq(dev_net(ifa->idev->dev), net))
 				return ifa;
-		}
 	}
 	return NULL;
 }
@@ -2962,10 +2967,9 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct hlist_node *n = &ifa->addr_lst;
 
-	hlist_for_each_entry_continue(ifa, n, addr_lst) {
+	hlist_for_each_entry_continue_rcu(ifa, n, addr_lst)
 		if (net_eq(dev_net(ifa->idev->dev), net))
 			return ifa;
-	}
 
 	while (++state->bucket < IN6_ADDR_HSIZE) {
 		hlist_for_each_entry(ifa, n,
@@ -2989,9 +2993,9 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(addrconf_hash_lock)
+	__acquires(rcu)
 {
-	read_lock_bh(&addrconf_hash_lock);
+	rcu_read_lock_bh();
 	return if6_get_idx(seq, *pos);
 }
 
@@ -3005,9 +3009,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void if6_seq_stop(struct seq_file *seq, void *v)
-	__releases(addrconf_hash_lock)
+	__releases(rcu)
 {
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
 }
 
 static int if6_seq_show(struct seq_file *seq, void *v)
@@ -3081,8 +3085,8 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 	struct hlist_node *n;
 	u8 hash = ipv6_addr_hash(addr);
 
-	read_lock_bh(&addrconf_hash_lock);
-	hlist_for_each_entry(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3091,7 +3095,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 			break;
 		}
 	}
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
 	return ret;
 }
 #endif
@@ -3107,7 +3111,8 @@ static void addrconf_verify(unsigned long foo)
 	unsigned long now, next;
 	int i;
 
-	spin_lock_bh(&addrconf_verify_lock);
+	rcu_read_lock_bh();
+	spin_lock(&addrconf_verify_lock);
 	now = jiffies;
 	next = now + ADDR_CHECK_FREQUENCY;
 
@@ -3116,8 +3121,8 @@ static void addrconf_verify(unsigned long foo)
 	for (i=0; i < IN6_ADDR_HSIZE; i++) {
 
 restart:
-		read_lock(&addrconf_hash_lock);
-		hlist_for_each_entry(ifp, node, &inet6_addr_lst[i], addr_lst) {
+		hlist_for_each_entry_rcu(ifp, node,
+					 &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 #ifdef CONFIG_IPV6_PRIVACY
 			unsigned long regen_advance;
@@ -3139,7 +3144,6 @@ restart:
 			    age >= ifp->valid_lft) {
 				spin_unlock(&ifp->lock);
 				in6_ifa_hold(ifp);
-				read_unlock(&addrconf_hash_lock);
 				ipv6_del_addr(ifp);
 				goto restart;
 			} else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
@@ -3161,7 +3165,6 @@ restart:
 
 				if (deprecate) {
 					in6_ifa_hold(ifp);
-					read_unlock(&addrconf_hash_lock);
 
 					ipv6_ifa_notify(0, ifp);
 					in6_ifa_put(ifp);
@@ -3179,7 +3182,7 @@ restart:
 						in6_ifa_hold(ifp);
 						in6_ifa_hold(ifpub);
 						spin_unlock(&ifp->lock);
-						read_unlock(&addrconf_hash_lock);
+
 						spin_lock(&ifpub->lock);
 						ifpub->regen_count = 0;
 						spin_unlock(&ifpub->lock);
@@ -3199,12 +3202,12 @@ restart:
 				spin_unlock(&ifp->lock);
 			}
 		}
-		read_unlock(&addrconf_hash_lock);
 	}
 
 	addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
 	add_timer(&addr_chk_timer);
-	spin_unlock_bh(&addrconf_verify_lock);
+	spin_unlock(&addrconf_verify_lock);
+	rcu_read_unlock_bh();
 }
 
 static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
@@ -4621,10 +4624,10 @@ void addrconf_cleanup(void)
 	/*
 	 *	Check hash table.
 	 */
-	write_lock_bh(&addrconf_hash_lock);
+	spin_lock_bh(&addrconf_hash_lock);
 	for (i = 0; i < IN6_ADDR_HSIZE; i++)
 		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
-	write_unlock_bh(&addrconf_hash_lock);
+	spin_unlock_bh(&addrconf_hash_lock);
 
 	del_timer(&addr_chk_timer);
 	rtnl_unlock();
-- 
cgit v1.2.3-59-g8ed1b


From 3a88a81d89c20be312b3b219b185bbdde24b8fb8 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:12 +0000
Subject: ipv6: user better hash for addrconf

The existing hash function has a couple of issues:
  * it is hardwired to 16 for IN6_ADDR_HSIZE
  * limited to 256 and callers using int
  * use jhash2 rather than some old BSD algorithm

No need for random seed since this is local only (based on assigned
addresses) table.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7ffd5eeab967..1e5e41fe92bc 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -573,23 +573,14 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 	*ifap = ifp;
 }
 
-/*
- *	Hash function taken from net_alias.c
- */
-static u8 ipv6_addr_hash(const struct in6_addr *addr)
+static u32 ipv6_addr_hash(const struct in6_addr *addr)
 {
-	__u32 word;
-
 	/*
 	 * We perform the hash function over the last 64 bits of the address
 	 * This will include the IEEE address token on links that support it.
 	 */
-
-	word = (__force u32)(addr->s6_addr32[2] ^ addr->s6_addr32[3]);
-	word ^= (word >> 16);
-	word ^= (word >> 8);
-
-	return ((word ^ (word >> 4)) & 0x0f);
+	return jhash_2words(addr->s6_addr32[2],  addr->s6_addr32[3], 0)
+		& (IN6_ADDR_HSIZE - 1);
 }
 
 /* On success it returns ifp with increased reference count */
@@ -600,7 +591,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 {
 	struct inet6_ifaddr *ifa = NULL;
 	struct rt6_info *rt;
-	int hash;
+	unsigned int hash;
 	int err = 0;
 	int addr_type = ipv6_addr_type(addr);
 
@@ -1277,7 +1268,7 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 {
 	struct inet6_ifaddr *ifp = NULL;
 	struct hlist_node *node;
-	u8 hash = ipv6_addr_hash(addr);
+	unsigned int hash = ipv6_addr_hash(addr);
 
 	rcu_read_lock_bh();
 	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
@@ -1302,7 +1293,7 @@ int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 {
 	struct inet6_ifaddr *ifp;
 	struct hlist_node *node;
-	u8 hash = ipv6_addr_hash(addr);
+	unsigned int hash = ipv6_addr_hash(addr);
 
 	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
@@ -1345,7 +1336,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 {
 	struct inet6_ifaddr *ifp = NULL;
 	struct hlist_node *node;
-	u8 hash = ipv6_addr_hash(addr);
+	unsigned int hash = ipv6_addr_hash(addr);
 
 	rcu_read_lock_bh();
 	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
@@ -3083,7 +3074,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 	int ret = 0;
 	struct inet6_ifaddr *ifp = NULL;
 	struct hlist_node *n;
-	u8 hash = ipv6_addr_hash(addr);
+	unsigned int hash = ipv6_addr_hash(addr);
 
 	rcu_read_lock_bh();
 	hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) {
-- 
cgit v1.2.3-59-g8ed1b


From 502a2ffd7376ae27cfde6172257db0ff9d8cfec2 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:13 +0000
Subject: ipv6: convert idev_list to list macros

Convert to list macro's for the list of addresses per interface
in IPv6.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_ipv6.c |  9 ++---
 include/net/if_inet6.h          |  6 ++--
 net/ipv6/addrconf.c             | 78 ++++++++++++++++++++---------------------
 net/sctp/ipv6.c                 |  2 +-
 4 files changed, 48 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c
index 6dd64cf3cb76..969ffed86b9f 100644
--- a/drivers/net/bonding/bond_ipv6.c
+++ b/drivers/net/bonding/bond_ipv6.c
@@ -37,7 +37,6 @@
 static void bond_glean_dev_ipv6(struct net_device *dev, struct in6_addr *addr)
 {
 	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa;
 
 	if (!dev)
 		return;
@@ -47,10 +46,12 @@ static void bond_glean_dev_ipv6(struct net_device *dev, struct in6_addr *addr)
 		return;
 
 	read_lock_bh(&idev->lock);
-	ifa = idev->addr_list;
-	if (ifa)
+	if (!list_empty(&idev->addr_list)) {
+		struct inet6_ifaddr *ifa
+			= list_first_entry(&idev->addr_list,
+					   struct inet6_ifaddr, if_list);
 		ipv6_addr_copy(addr, &ifa->addr);
-	else
+	} else
 		ipv6_addr_set(addr, 0, 0, 0, 0);
 
 	read_unlock_bh(&idev->lock);
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 22a00b1d2c38..13f9fc086d54 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -55,7 +55,7 @@ struct inet6_ifaddr {
 	struct rt6_info		*rt;
 
 	struct hlist_node	addr_lst;
-	struct inet6_ifaddr	*if_next;       /* next addr in inet6_dev */
+	struct list_head	if_list;
 
 #ifdef CONFIG_IPV6_PRIVACY
 	struct list_head	tmp_list;
@@ -152,9 +152,9 @@ struct ipv6_devstat {
 };
 
 struct inet6_dev {
-	struct net_device		*dev;
+	struct net_device	*dev;
 
-	struct inet6_ifaddr	*addr_list;
+	struct list_head	addr_list;
 
 	struct ifmcaddr6	*mc_list;
 	struct ifmcaddr6	*mc_tomb;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1e5e41fe92bc..6dbf0f79b762 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -317,7 +317,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 {
 	struct net_device *dev = idev->dev;
 
-	WARN_ON(idev->addr_list != NULL);
+	WARN_ON(!list_empty(&idev->addr_list));
 	WARN_ON(idev->mc_list != NULL);
 
 #ifdef NET_REFCNT_DEBUG
@@ -350,6 +350,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
 
 	rwlock_init(&ndev->lock);
 	ndev->dev = dev;
+	INIT_LIST_HEAD(&ndev->addr_list);
+
 	memcpy(&ndev->cnf, dev_net(dev)->ipv6.devconf_dflt, sizeof(ndev->cnf));
 	ndev->cnf.mtu6 = dev->mtu;
 	ndev->cnf.sysctl = NULL;
@@ -466,7 +468,8 @@ static void dev_forward_change(struct inet6_dev *idev)
 		else
 			ipv6_dev_mc_dec(dev, &in6addr_linklocal_allrouters);
 	}
-	for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) {
+
+	list_for_each_entry(ifa, &idev->addr_list, if_list) {
 		if (ifa->flags&IFA_F_TENTATIVE)
 			continue;
 		if (idev->cnf.forwarding)
@@ -532,7 +535,6 @@ static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
 /* Nobody refers to this ifaddr, destroy it */
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
-	WARN_ON(ifp->if_next != NULL);
 	WARN_ON(!hlist_unhashed(&ifp->addr_lst));
 
 #ifdef NET_REFCNT_DEBUG
@@ -556,21 +558,21 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 static void
 ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 {
-	struct inet6_ifaddr *ifa, **ifap;
+	struct list_head *p;
 	int ifp_scope = ipv6_addr_src_scope(&ifp->addr);
 
 	/*
 	 * Each device address list is sorted in order of scope -
 	 * global before linklocal.
 	 */
-	for (ifap = &idev->addr_list; (ifa = *ifap) != NULL;
-	     ifap = &ifa->if_next) {
+	list_for_each(p, &idev->addr_list) {
+		struct inet6_ifaddr *ifa
+			= list_entry(p, struct inet6_ifaddr, if_list);
 		if (ifp_scope >= ipv6_addr_src_scope(&ifa->addr))
 			break;
 	}
 
-	ifp->if_next = *ifap;
-	*ifap = ifp;
+	list_add(&ifp->if_list, p);
 }
 
 static u32 ipv6_addr_hash(const struct in6_addr *addr)
@@ -703,7 +705,7 @@ out:
 
 static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 {
-	struct inet6_ifaddr *ifa, **ifap;
+	struct inet6_ifaddr *ifa, *ifn;
 	struct inet6_dev *idev = ifp->idev;
 	int hash;
 	int deleted = 0, onlink = 0;
@@ -730,11 +732,11 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 	}
 #endif
 
-	for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;) {
+	list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) {
 		if (ifa == ifp) {
-			*ifap = ifa->if_next;
+			list_del_init(&ifp->if_list);
 			__in6_ifa_put(ifp);
-			ifa->if_next = NULL;
+
 			if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0)
 				break;
 			deleted = 1;
@@ -767,7 +769,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 				}
 			}
 		}
-		ifap = &ifa->if_next;
 	}
 	write_unlock_bh(&idev->lock);
 
@@ -1146,7 +1147,7 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev,
 			continue;
 
 		read_lock_bh(&idev->lock);
-		for (score->ifa = idev->addr_list; score->ifa; score->ifa = score->ifa->if_next) {
+		list_for_each_entry(score->ifa, &idev->addr_list, if_list) {
 			int i;
 
 			/*
@@ -1238,8 +1239,9 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 		struct inet6_ifaddr *ifp;
 
 		read_lock_bh(&idev->lock);
-		for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
-			if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) {
+		list_for_each_entry(ifp, &idev->addr_list, if_list) {
+			if (ifp->scope == IFA_LINK &&
+			    !(ifp->flags & banned_flags)) {
 				ipv6_addr_copy(addr, &ifp->addr);
 				err = 0;
 				break;
@@ -1257,7 +1259,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
-	for (ifp=idev->addr_list; ifp; ifp=ifp->if_next)
+	list_for_each_entry(ifp, &idev->addr_list, if_list)
 		cnt++;
 	read_unlock_bh(&idev->lock);
 	return cnt;
@@ -1317,7 +1319,7 @@ int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
 	idev = __in6_dev_get(dev);
 	if (idev) {
 		read_lock_bh(&idev->lock);
-		for (ifa = idev->addr_list; ifa; ifa = ifa->if_next) {
+		list_for_each_entry(ifa, &idev->addr_list, if_list) {
 			onlink = ipv6_prefix_equal(addr, &ifa->addr,
 						   ifa->prefix_len);
 			if (onlink)
@@ -1555,7 +1557,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
-	for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {
 			memcpy(eui, ifp->addr.s6_addr+8, 8);
 			err = 0;
@@ -2159,7 +2161,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
 		return -ENXIO;
 
 	read_lock_bh(&idev->lock);
-	for (ifp = idev->addr_list; ifp; ifp=ifp->if_next) {
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		if (ifp->prefix_len == plen &&
 		    ipv6_addr_equal(pfx, &ifp->addr)) {
 			in6_ifa_hold(ifp);
@@ -2170,7 +2172,7 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
 			/* If the last address is deleted administratively,
 			   disable IPv6 on this interface.
 			 */
-			if (idev->addr_list == NULL)
+			if (list_empty(&idev->addr_list))
 				addrconf_ifdown(idev->dev, 1);
 			return 0;
 		}
@@ -2602,9 +2604,10 @@ static void addrconf_type_change(struct net_device *dev, unsigned long event)
 
 static int addrconf_ifdown(struct net_device *dev, int how)
 {
-	struct inet6_dev *idev;
-	struct inet6_ifaddr *ifa, *keep_list, **bifa;
 	struct net *net = dev_net(dev);
+	struct inet6_dev *idev;
+	struct inet6_ifaddr *ifa;
+	LIST_HEAD(keep_list);
 
 	ASSERT_RTNL();
 
@@ -2658,12 +2661,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_lock_bh(&idev->lock);
 	}
 #endif
-	keep_list = NULL;
-	bifa = &keep_list;
-	while ((ifa = idev->addr_list) != NULL) {
-		idev->addr_list = ifa->if_next;
-		ifa->if_next = NULL;
 
+	while (!list_empty(&idev->addr_list)) {
+		ifa = list_first_entry(&idev->addr_list,
+				       struct inet6_ifaddr, if_list);
 		addrconf_del_timer(ifa);
 
 		/* If just doing link down, and address is permanent
@@ -2671,10 +2672,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		if (how == 0 &&
 		    (ifa->flags&IFA_F_PERMANENT) &&
 		    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
-
-			/* Move to holding list */
-			*bifa = ifa;
-			bifa = &ifa->if_next;
+			list_move_tail(&ifa->if_list, &keep_list);
 
 			/* If not doing DAD on this address, just keep it. */
 			if ((dev->flags&(IFF_NOARP|IFF_LOOPBACK)) ||
@@ -2690,6 +2688,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			ifa->flags |= IFA_F_TENTATIVE;
 			in6_ifa_hold(ifa);
 		} else {
+			list_del(&ifa->if_list);
 			ifa->dead = 1;
 		}
 		write_unlock_bh(&idev->lock);
@@ -2707,7 +2706,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_lock_bh(&idev->lock);
 	}
 
-	idev->addr_list = keep_list;
+	list_splice(&keep_list, &idev->addr_list);
 
 	write_unlock_bh(&idev->lock);
 
@@ -2917,7 +2916,7 @@ static void addrconf_dad_run(struct inet6_dev *idev) {
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
-	for (ifp = idev->addr_list; ifp; ifp = ifp->if_next) {
+	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		spin_lock(&ifp->lock);
 		if (!(ifp->flags & IFA_F_TENTATIVE)) {
 			spin_unlock(&ifp->lock);
@@ -3500,7 +3499,6 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 			  struct netlink_callback *cb, enum addr_type_t type,
 			  int s_ip_idx, int *p_ip_idx)
 {
-	struct inet6_ifaddr *ifa;
 	struct ifmcaddr6 *ifmca;
 	struct ifacaddr6 *ifaca;
 	int err = 1;
@@ -3508,11 +3506,12 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 
 	read_lock_bh(&idev->lock);
 	switch (type) {
-	case UNICAST_ADDR:
+	case UNICAST_ADDR: {
+		struct inet6_ifaddr *ifa;
+
 		/* unicast address incl. temp addr */
-		for (ifa = idev->addr_list; ifa;
-		     ifa = ifa->if_next, ip_idx++) {
-			if (ip_idx < s_ip_idx)
+		list_for_each_entry(ifa, &idev->addr_list, if_list) {
+			if (++ip_idx < s_ip_idx)
 				continue;
 			err = inet6_fill_ifaddr(skb, ifa,
 						NETLINK_CB(cb->skb).pid,
@@ -3523,6 +3522,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 				break;
 		}
 		break;
+	}
 	case MULTICAST_ADDR:
 		/* multicast address */
 		for (ifmca = idev->mc_list; ifmca;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 1d7ac70ba39f..240dceba06e5 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -371,7 +371,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 	}
 
 	read_lock_bh(&in6_dev->lock);
-	for (ifp = in6_dev->addr_list; ifp; ifp = ifp->if_next) {
+	list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
 		/* Add the address to the local list.  */
 		addr = t_new(struct sctp_sockaddr_entry, GFP_ATOMIC);
 		if (addr) {
-- 
cgit v1.2.3-59-g8ed1b


From bcdd553fd3037d8700082ec4cbb6b25437ea06d6 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sat, 20 Mar 2010 16:08:18 -0700
Subject: IPv6: addrconf cleanups

Some minor stuff, reformat comments and add whitespace for clarity

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 51 ++++++++++++++++++++++++++++-----------------------
 1 file changed, 28 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6dbf0f79b762..bcb55b78746f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2456,6 +2456,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				return notifier_from_errno(-ENOMEM);
 		}
 		break;
+
 	case NETDEV_UP:
 	case NETDEV_CHANGE:
 		if (dev->flags & IFF_SLAVE)
@@ -2485,10 +2486,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			}
 
 			if (idev) {
-				if (idev->if_flags & IF_READY) {
+				if (idev->if_flags & IF_READY)
 					/* device is already configured. */
 					break;
-				}
 				idev->if_flags |= IF_READY;
 			}
 
@@ -2517,25 +2517,30 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			addrconf_dev_config(dev);
 			break;
 		}
+
 		if (idev) {
 			if (run_pending)
 				addrconf_dad_run(idev);
 
-			/* If the MTU changed during the interface down, when the
-			   interface up, the changed MTU must be reflected in the
-			   idev as well as routers.
+			/*
+			 * If the MTU changed during the interface down,
+			 * when the interface up, the changed MTU must be
+			 * reflected in the idev as well as routers.
 			 */
-			if (idev->cnf.mtu6 != dev->mtu && dev->mtu >= IPV6_MIN_MTU) {
+			if (idev->cnf.mtu6 != dev->mtu &&
+			    dev->mtu >= IPV6_MIN_MTU) {
 				rt6_mtu_change(dev, dev->mtu);
 				idev->cnf.mtu6 = dev->mtu;
 			}
 			idev->tstamp = jiffies;
 			inet6_ifinfo_notify(RTM_NEWLINK, idev);
-			/* If the changed mtu during down is lower than IPV6_MIN_MTU
-			   stop IPv6 on this interface.
+
+			/*
+			 * If the changed mtu during down is lower than
+			 * IPV6_MIN_MTU stop IPv6 on this interface.
 			 */
 			if (dev->mtu < IPV6_MIN_MTU)
-				addrconf_ifdown(dev, event != NETDEV_DOWN);
+				addrconf_ifdown(dev, 1);
 		}
 		break;
 
@@ -2552,7 +2557,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				break;
 		}
 
-		/* MTU falled under IPV6_MIN_MTU. Stop IPv6 on this interface. */
+		/*
+		 * MTU falled under IPV6_MIN_MTU.
+		 * Stop IPv6 on this interface.
+		 */
 
 	case NETDEV_DOWN:
 	case NETDEV_UNREGISTER:
@@ -2572,6 +2580,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				return notifier_from_errno(err);
 		}
 		break;
+
 	case NETDEV_PRE_TYPE_CHANGE:
 	case NETDEV_POST_TYPE_CHANGE:
 		addrconf_type_change(dev, event);
@@ -2586,7 +2595,6 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
  */
 static struct notifier_block ipv6_dev_notf = {
 	.notifier_call = addrconf_notify,
-	.priority = 0
 };
 
 static void addrconf_type_change(struct net_device *dev, unsigned long event)
@@ -2618,8 +2626,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	if (idev == NULL)
 		return -ENODEV;
 
-	/* Step 1: remove reference to ipv6 device from parent device.
-		   Do not dev_put!
+	/*
+	 * Step 1: remove reference to ipv6 device from parent device.
+	 *	   Do not dev_put!
 	 */
 	if (how) {
 		idev->dead = 1;
@@ -2634,16 +2643,15 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	write_lock_bh(&idev->lock);
 
-	/* Step 3: clear flags for stateless addrconf */
+	/* Step 2: clear flags for stateless addrconf */
 	if (!how)
 		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY);
 
-	/* Step 4: clear address list */
 #ifdef CONFIG_IPV6_PRIVACY
 	if (how && del_timer(&idev->regen_timer))
 		in6_dev_put(idev);
 
-	/* clear tempaddr list */
+	/* Step 3: clear tempaddr list */
 	while (!list_empty(&idev->tempaddr_list)) {
 		ifa = list_first_entry(&idev->tempaddr_list,
 				       struct inet6_ifaddr, tmp_list);
@@ -2669,7 +2677,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 		/* If just doing link down, and address is permanent
 		   and not link-local, then retain it. */
-		if (how == 0 &&
+		if (!how &&
 		    (ifa->flags&IFA_F_PERMANENT) &&
 		    !(ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL)) {
 			list_move_tail(&ifa->if_list, &keep_list);
@@ -2711,7 +2719,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	write_unlock_bh(&idev->lock);
 
 	/* Step 5: Discard multicast list */
-
 	if (how)
 		ipv6_mc_destroy_dev(idev);
 	else
@@ -2719,8 +2726,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 	idev->tstamp = jiffies;
 
-	/* Shot the device (if unregistered) */
-
+	/* Last: Shot the device (if unregistered) */
 	if (how) {
 		addrconf_sysctl_unregister(idev);
 		neigh_parms_release(&nd_tbl, idev->nd_parms);
@@ -3108,8 +3114,7 @@ static void addrconf_verify(unsigned long foo)
 
 	del_timer(&addr_chk_timer);
 
-	for (i=0; i < IN6_ADDR_HSIZE; i++) {
-
+	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
 		hlist_for_each_entry_rcu(ifp, node,
 					 &inet6_addr_lst[i], addr_lst) {
@@ -4376,7 +4381,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
 	if (t == NULL)
 		goto out;
 
-	for (i=0; t->addrconf_vars[i].data; i++) {
+	for (i = 0; t->addrconf_vars[i].data; i++) {
 		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
 		t->addrconf_vars[i].extra2 = net;
-- 
cgit v1.2.3-59-g8ed1b


From e21e8467d3188a36f7f0af0d4b9aae74e23fda0e Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Sat, 20 Mar 2010 16:09:01 -0700
Subject: addrconf: checkpatch fixes

Fix some of the checkpatch complaints.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 352 ++++++++++++++++++++++++++--------------------------
 1 file changed, 179 insertions(+), 173 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index bcb55b78746f..279580eab309 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -81,7 +81,7 @@
 #include <linux/random.h>
 #endif
 
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 #include <asm/unaligned.h>
 
 #include <linux/proc_fs.h>
@@ -97,7 +97,7 @@
 #endif
 
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
-#define TIME_DELTA(a,b) ((unsigned long)((long)(a) - (long)(b)))
+#define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
 
 #ifdef CONFIG_SYSCTL
 static void addrconf_sysctl_register(struct inet6_dev *idev);
@@ -249,8 +249,7 @@ static void addrconf_del_timer(struct inet6_ifaddr *ifp)
 		__in6_ifa_put(ifp);
 }
 
-enum addrconf_timer_t
-{
+enum addrconf_timer_t {
 	AC_NONE,
 	AC_DAD,
 	AC_RS,
@@ -270,7 +269,8 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp,
 	case AC_RS:
 		ifp->timer.function = addrconf_rs_timer;
 		break;
-	default:;
+	default:
+		break;
 	}
 	ifp->timer.expires = jiffies + when;
 	add_timer(&ifp->timer);
@@ -325,7 +325,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev)
 #endif
 	dev_put(dev);
 	if (!idev->dead) {
-		printk("Freeing alive inet6 device %p\n", idev);
+		pr_warning("Freeing alive inet6 device %p\n", idev);
 		return;
 	}
 	snmp6_free_dev(idev);
@@ -441,8 +441,10 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	if ((idev = __in6_dev_get(dev)) == NULL) {
-		if ((idev = ipv6_add_dev(dev)) == NULL)
+	idev = __in6_dev_get(dev);
+	if (!idev) {
+		idev = ipv6_add_dev(dev);
+		if (!idev)
 			return NULL;
 	}
 
@@ -544,10 +546,10 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 	in6_dev_put(ifp->idev);
 
 	if (del_timer(&ifp->timer))
-		printk("Timer is still running, when freeing ifa=%p\n", ifp);
+		pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
 
 	if (!ifp->dead) {
-		printk("Freeing alive inet6 address %p\n", ifp);
+		pr_warning("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
 	dst_release(&ifp->rt->u.dst);
@@ -1225,7 +1227,6 @@ try_nextdev:
 	in6_ifa_put(hiscore->ifa);
 	return 0;
 }
-
 EXPORT_SYMBOL(ipv6_dev_get_saddr);
 
 int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
@@ -1235,7 +1236,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 	int err = -EADDRNOTAVAIL;
 
 	rcu_read_lock();
-	if ((idev = __in6_dev_get(dev)) != NULL) {
+	idev = __in6_dev_get(dev);
+	if (idev) {
 		struct inet6_ifaddr *ifp;
 
 		read_lock_bh(&idev->lock);
@@ -1725,7 +1727,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	if ((idev = ipv6_find_idev(dev)) == NULL)
+	idev = ipv6_find_idev(dev);
+	if (!idev)
 		return NULL;
 
 	/* Add default multicast route */
@@ -2433,7 +2436,8 @@ static void addrconf_ip6_tnl_config(struct net_device *dev)
 
 	ASSERT_RTNL();
 
-	if ((idev = addrconf_add_dev(dev)) == NULL) {
+	idev = addrconf_add_dev(dev);
+	if (!idev) {
 		printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
 		return;
 	}
@@ -2448,7 +2452,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 	int run_pending = 0;
 	int err;
 
-	switch(event) {
+	switch (event) {
 	case NETDEV_REGISTER:
 		if (!idev && dev->mtu >= IPV6_MIN_MTU) {
 			idev = ipv6_add_dev(dev);
@@ -2500,7 +2504,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 			run_pending = 1;
 		}
 
-		switch(dev->type) {
+		switch (dev->type) {
 #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE)
 		case ARPHRD_SIT:
 			addrconf_sit_config(dev);
@@ -2837,7 +2841,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	 * Optimistic nodes can start receiving
 	 * Frames right away
 	 */
-	if(ifp->flags & IFA_F_OPTIMISTIC)
+	if (ifp->flags & IFA_F_OPTIMISTIC)
 		ip6_ins_rt(ifp->rt);
 
 	addrconf_dad_kick(ifp);
@@ -2887,7 +2891,7 @@ out:
 
 static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 {
-	struct net_device *	dev = ifp->idev->dev;
+	struct net_device *dev = ifp->idev->dev;
 
 	/*
 	 *	Configure the address for reception. Now it is valid.
@@ -2918,7 +2922,8 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
 	}
 }
 
-static void addrconf_dad_run(struct inet6_dev *idev) {
+static void addrconf_dad_run(struct inet6_dev *idev)
+{
 	struct inet6_ifaddr *ifp;
 
 	read_lock_bh(&idev->lock);
@@ -2983,7 +2988,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
 	struct inet6_ifaddr *ifa = if6_get_first(seq);
 
 	if (ifa)
-		while(pos && (ifa = if6_get_next(seq, ifa)) != NULL)
+		while (pos && (ifa = if6_get_next(seq, ifa)) != NULL)
 			--pos;
 	return pos ? NULL : ifa;
 }
@@ -3492,8 +3497,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 	return nlmsg_end(skb, nlh);
 }
 
-enum addr_type_t
-{
+enum addr_type_t {
 	UNICAST_ADDR,
 	MULTICAST_ADDR,
 	ANYCAST_ADDR,
@@ -3592,7 +3596,8 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
 			if (idx > s_idx)
 				s_ip_idx = 0;
 			ip_idx = 0;
-			if ((idev = __in6_dev_get(dev)) == NULL)
+			idev = __in6_dev_get(dev);
+			if (!idev)
 				goto cont;
 
 			if (in6_dump_addrs(idev, skb, cb, type,
@@ -3659,12 +3664,14 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh,
 	if (ifm->ifa_index)
 		dev = __dev_get_by_index(net, ifm->ifa_index);
 
-	if ((ifa = ipv6_get_ifaddr(net, addr, dev, 1)) == NULL) {
+	ifa = ipv6_get_ifaddr(net, addr, dev, 1);
+	if (!ifa) {
 		err = -EADDRNOTAVAIL;
 		goto errout;
 	}
 
-	if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) {
+	skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL);
+	if (!skb) {
 		err = -ENOBUFS;
 		goto errout_ifa;
 	}
@@ -3789,7 +3796,7 @@ static inline void __snmp6_fill_stats(u64 *stats, void __percpu **mib,
 static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
 			     int bytes)
 {
-	switch(attrtype) {
+	switch (attrtype) {
 	case IFLA_INET6_STATS:
 		__snmp6_fill_stats(stats, (void __percpu **)idev->stats.ipv6, IPSTATS_MIB_MAX, bytes);
 		break;
@@ -4141,211 +4148,211 @@ static struct addrconf_sysctl_table
 	.sysctl_header = NULL,
 	.addrconf_vars = {
 		{
-			.procname	=	"forwarding",
-			.data		=	&ipv6_devconf.forwarding,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	addrconf_sysctl_forward,
+			.procname	= "forwarding",
+			.data		= &ipv6_devconf.forwarding,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= addrconf_sysctl_forward,
 		},
 		{
-			.procname	=	"hop_limit",
-			.data		=	&ipv6_devconf.hop_limit,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "hop_limit",
+			.data		= &ipv6_devconf.hop_limit,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"mtu",
-			.data		=	&ipv6_devconf.mtu6,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "mtu",
+			.data		= &ipv6_devconf.mtu6,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"accept_ra",
-			.data		=	&ipv6_devconf.accept_ra,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_ra",
+			.data		= &ipv6_devconf.accept_ra,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"accept_redirects",
-			.data		=	&ipv6_devconf.accept_redirects,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_redirects",
+			.data		= &ipv6_devconf.accept_redirects,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"autoconf",
-			.data		=	&ipv6_devconf.autoconf,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "autoconf",
+			.data		= &ipv6_devconf.autoconf,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"dad_transmits",
-			.data		=	&ipv6_devconf.dad_transmits,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "dad_transmits",
+			.data		= &ipv6_devconf.dad_transmits,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"router_solicitations",
-			.data		=	&ipv6_devconf.rtr_solicits,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "router_solicitations",
+			.data		= &ipv6_devconf.rtr_solicits,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"router_solicitation_interval",
-			.data		=	&ipv6_devconf.rtr_solicit_interval,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec_jiffies,
+			.procname	= "router_solicitation_interval",
+			.data		= &ipv6_devconf.rtr_solicit_interval,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
 		},
 		{
-			.procname	=	"router_solicitation_delay",
-			.data		=	&ipv6_devconf.rtr_solicit_delay,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec_jiffies,
+			.procname	= "router_solicitation_delay",
+			.data		= &ipv6_devconf.rtr_solicit_delay,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
 		},
 		{
-			.procname	=	"force_mld_version",
-			.data		=	&ipv6_devconf.force_mld_version,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "force_mld_version",
+			.data		= &ipv6_devconf.force_mld_version,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 #ifdef CONFIG_IPV6_PRIVACY
 		{
-			.procname	=	"use_tempaddr",
-			.data		=	&ipv6_devconf.use_tempaddr,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "use_tempaddr",
+			.data		= &ipv6_devconf.use_tempaddr,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"temp_valid_lft",
-			.data		=	&ipv6_devconf.temp_valid_lft,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "temp_valid_lft",
+			.data		= &ipv6_devconf.temp_valid_lft,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"temp_prefered_lft",
-			.data		=	&ipv6_devconf.temp_prefered_lft,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "temp_prefered_lft",
+			.data		= &ipv6_devconf.temp_prefered_lft,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"regen_max_retry",
-			.data		=	&ipv6_devconf.regen_max_retry,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "regen_max_retry",
+			.data		= &ipv6_devconf.regen_max_retry,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"max_desync_factor",
-			.data		=	&ipv6_devconf.max_desync_factor,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "max_desync_factor",
+			.data		= &ipv6_devconf.max_desync_factor,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 #endif
 		{
-			.procname	=	"max_addresses",
-			.data		=	&ipv6_devconf.max_addresses,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "max_addresses",
+			.data		= &ipv6_devconf.max_addresses,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"accept_ra_defrtr",
-			.data		=	&ipv6_devconf.accept_ra_defrtr,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_ra_defrtr",
+			.data		= &ipv6_devconf.accept_ra_defrtr,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"accept_ra_pinfo",
-			.data		=	&ipv6_devconf.accept_ra_pinfo,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_ra_pinfo",
+			.data		= &ipv6_devconf.accept_ra_pinfo,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 #ifdef CONFIG_IPV6_ROUTER_PREF
 		{
-			.procname	=	"accept_ra_rtr_pref",
-			.data		=	&ipv6_devconf.accept_ra_rtr_pref,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_ra_rtr_pref",
+			.data		= &ipv6_devconf.accept_ra_rtr_pref,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"router_probe_interval",
-			.data		=	&ipv6_devconf.rtr_probe_interval,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec_jiffies,
+			.procname	= "router_probe_interval",
+			.data		= &ipv6_devconf.rtr_probe_interval,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec_jiffies,
 		},
 #ifdef CONFIG_IPV6_ROUTE_INFO
 		{
-			.procname	=	"accept_ra_rt_info_max_plen",
-			.data		=	&ipv6_devconf.accept_ra_rt_info_max_plen,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_ra_rt_info_max_plen",
+			.data		= &ipv6_devconf.accept_ra_rt_info_max_plen,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 #endif
 #endif
 		{
-			.procname	=	"proxy_ndp",
-			.data		=	&ipv6_devconf.proxy_ndp,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "proxy_ndp",
+			.data		= &ipv6_devconf.proxy_ndp,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
-			.procname	=	"accept_source_route",
-			.data		=	&ipv6_devconf.accept_source_route,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_source_route",
+			.data		= &ipv6_devconf.accept_source_route,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 		{
-			.procname       =       "optimistic_dad",
-			.data           =       &ipv6_devconf.optimistic_dad,
-			.maxlen         =       sizeof(int),
-			.mode           =       0644,
-			.proc_handler   =       proc_dointvec,
+			.procname       = "optimistic_dad",
+			.data           = &ipv6_devconf.optimistic_dad,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec,
 
 		},
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 		{
-			.procname	=	"mc_forwarding",
-			.data		=	&ipv6_devconf.mc_forwarding,
-			.maxlen		=	sizeof(int),
-			.mode		=	0444,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "mc_forwarding",
+			.data		= &ipv6_devconf.mc_forwarding,
+			.maxlen		= sizeof(int),
+			.mode		= 0444,
+			.proc_handler	= proc_dointvec,
 		},
 #endif
 		{
-			.procname	=	"disable_ipv6",
-			.data		=	&ipv6_devconf.disable_ipv6,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	addrconf_sysctl_disable,
+			.procname	= "disable_ipv6",
+			.data		= &ipv6_devconf.disable_ipv6,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= addrconf_sysctl_disable,
 		},
 		{
-			.procname	=	"accept_dad",
-			.data		=	&ipv6_devconf.accept_dad,
-			.maxlen		=	sizeof(int),
-			.mode		=	0644,
-			.proc_handler	=	proc_dointvec,
+			.procname	= "accept_dad",
+			.data		= &ipv6_devconf.accept_dad,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
 		},
 		{
 			.procname       = "force_tllao",
@@ -4382,7 +4389,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name,
 		goto out;
 
 	for (i = 0; t->addrconf_vars[i].data; i++) {
-		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+		t->addrconf_vars[i].data += (char *)p - (char *)&ipv6_devconf;
 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
 		t->addrconf_vars[i].extra2 = net;
 	}
@@ -4519,14 +4526,12 @@ int register_inet6addr_notifier(struct notifier_block *nb)
 {
 	return atomic_notifier_chain_register(&inet6addr_chain, nb);
 }
-
 EXPORT_SYMBOL(register_inet6addr_notifier);
 
 int unregister_inet6addr_notifier(struct notifier_block *nb)
 {
-	return atomic_notifier_chain_unregister(&inet6addr_chain,nb);
+	return atomic_notifier_chain_unregister(&inet6addr_chain, nb);
 }
-
 EXPORT_SYMBOL(unregister_inet6addr_notifier);
 
 /*
@@ -4537,9 +4542,10 @@ int __init addrconf_init(void)
 {
 	int i, err;
 
-	if ((err = ipv6_addr_label_init()) < 0) {
-		printk(KERN_CRIT "IPv6 Addrconf: cannot initialize default policy table: %d.\n",
-			err);
+	err = ipv6_addr_label_init();
+	if (err < 0) {
+		printk(KERN_CRIT "IPv6 Addrconf:"
+		       " cannot initialize default policy table: %d.\n", err);
 		return err;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 88949cf484bfc399e1d662b5dda6892aaca21aae Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:17 +0000
Subject: IPv6: addrconf cleanup addrconf_verify

The variable regen_advance is only used in the privacy case.
Move it to simplify code and eliminate ifdef's

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 279580eab309..36ebb4ad8a0a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3124,9 +3124,6 @@ restart:
 		hlist_for_each_entry_rcu(ifp, node,
 					 &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
-#ifdef CONFIG_IPV6_PRIVACY
-			unsigned long regen_advance;
-#endif
 
 			if (ifp->flags & IFA_F_PERMANENT)
 				continue;
@@ -3134,12 +3131,6 @@ restart:
 			spin_lock(&ifp->lock);
 			age = (now - ifp->tstamp) / HZ;
 
-#ifdef CONFIG_IPV6_PRIVACY
-			regen_advance = ifp->idev->cnf.regen_max_retry *
-					ifp->idev->cnf.dad_transmits *
-					ifp->idev->nd_parms->retrans_time / HZ;
-#endif
-
 			if (ifp->valid_lft != INFINITY_LIFE_TIME &&
 			    age >= ifp->valid_lft) {
 				spin_unlock(&ifp->lock);
@@ -3173,6 +3164,10 @@ restart:
 #ifdef CONFIG_IPV6_PRIVACY
 			} else if ((ifp->flags&IFA_F_TEMPORARY) &&
 				   !(ifp->flags&IFA_F_TENTATIVE)) {
+				unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *
+					ifp->idev->cnf.dad_transmits *
+					ifp->idev->nd_parms->retrans_time / HZ;
+
 				if (age >= ifp->prefered_lft - regen_advance) {
 					struct inet6_ifaddr *ifpub = ifp->ifpub;
 					if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
-- 
cgit v1.2.3-59-g8ed1b


From b2db756449f63f98049587f7ede4a8e85e0c79b1 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sat, 20 Mar 2010 16:11:12 -0700
Subject: ipv6: Reduce timer events for addrconf_verify().

This patch reduces timer events while keeping accuracy by rounding
our timer and/or batching several address validations in addrconf_verify().

addrconf_verify() is called at earliest timeout among interface addresses'
timeouts, but at maximum ADDR_CHECK_FREQUENCY (120 secs).

In most cases, all of timeouts of interface addresses are long enough
(e.g. several hours or days vs 2 minutes), this timer is usually called
every ADDR_CHECK_FREQUENCY, and it is okay to be lazy.
(Note this timer could be eliminated if all code paths which modifies
variables related to timeouts call us manually, but it is another story.)

However, in other least but important cases, we try keeping accuracy.

When the real interface address timeout is coming, and the timeout
is just before the rounded timeout, we accept some error.

When a timeout has been reached, we also try batching other several
events in very near future.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 36ebb4ad8a0a..7d7d4b17c0f0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -99,6 +99,10 @@
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
 #define TIME_DELTA(a, b) ((unsigned long)((long)(a) - (long)(b)))
 
+#define ADDRCONF_TIMER_FUZZ_MINUS	(HZ > 50 ? HZ/50 : 1)
+#define ADDRCONF_TIMER_FUZZ		(HZ / 4)
+#define ADDRCONF_TIMER_FUZZ_MAX		(HZ)
+
 #ifdef CONFIG_SYSCTL
 static void addrconf_sysctl_register(struct inet6_dev *idev);
 static void addrconf_sysctl_unregister(struct inet6_dev *idev);
@@ -3107,15 +3111,15 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 
 static void addrconf_verify(unsigned long foo)
 {
+	unsigned long now, next, next_sec, next_sched;
 	struct inet6_ifaddr *ifp;
 	struct hlist_node *node;
-	unsigned long now, next;
 	int i;
 
 	rcu_read_lock_bh();
 	spin_lock(&addrconf_verify_lock);
 	now = jiffies;
-	next = now + ADDR_CHECK_FREQUENCY;
+	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);
 
 	del_timer(&addr_chk_timer);
 
@@ -3129,7 +3133,8 @@ restart:
 				continue;
 
 			spin_lock(&ifp->lock);
-			age = (now - ifp->tstamp) / HZ;
+			/* We try to batch several events at once. */
+			age = (now - ifp->tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
 
 			if (ifp->valid_lft != INFINITY_LIFE_TIME &&
 			    age >= ifp->valid_lft) {
@@ -3199,7 +3204,21 @@ restart:
 		}
 	}
 
-	addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
+	next_sec = round_jiffies_up(next);
+	next_sched = next;
+
+	/* If rounded timeout is accurate enough, accept it. */
+	if (time_before(next_sec, next + ADDRCONF_TIMER_FUZZ))
+		next_sched = next_sec;
+
+	/* And minimum interval is ADDRCONF_TIMER_FUZZ_MAX. */
+	if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
+		next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
+
+	ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+	      now, next, next_sec, next_sched));
+
+	addr_chk_timer.expires = next_sched;
 	add_timer(&addr_chk_timer);
 	spin_unlock(&addrconf_verify_lock);
 	rcu_read_unlock_bh();
-- 
cgit v1.2.3-59-g8ed1b


From 3e81c6da39a265e11ef48f52bd15bf7ca0068c75 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 20 Mar 2010 16:18:00 -0700
Subject: ipv6: Fix bug in ipv6_chk_same_addr().

hlist_for_each_entry(p...) will not necessarily initialize 'p'
to anything if the hlist is empty.  GCC notices this and emits
a warning.

Just return true explicitly when we hit a match, and return
false is we fall out of the loop without one.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7d7d4b17c0f0..68e5809a2153 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -155,8 +155,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa);
 
 static void inet6_prefix_notify(int event, struct inet6_dev *idev,
 				struct prefix_info *pinfo);
-static int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
-			      struct net_device *dev);
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+			       struct net_device *dev);
 
 static ATOMIC_NOTIFIER_HEAD(inet6addr_chain);
 
@@ -1295,23 +1295,22 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
-static
-int ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
-		       struct net_device *dev)
+static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
+			       struct net_device *dev)
 {
+	unsigned int hash = ipv6_addr_hash(addr);
 	struct inet6_ifaddr *ifp;
 	struct hlist_node *node;
-	unsigned int hash = ipv6_addr_hash(addr);
 
 	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
 			if (dev == NULL || ifp->idev->dev == dev)
-				break;
+				return true;
 		}
 	}
-	return ifp != NULL;
+	return false;
 }
 
 int ipv6_chk_prefix(struct in6_addr *addr, struct net_device *dev)
-- 
cgit v1.2.3-59-g8ed1b


From 755d0e77ac9c8d125388922dc33434ed5b2ebe80 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 19 Mar 2010 04:42:24 +0000
Subject: net: rtnetlink: ignore NETDEV_PRE_TYPE_CHANGE in rtnetlink_event()

Ignore the new NETDEV_PRE_TYPE_CHANGE event in rtnetlink_event() since
there have been no changes userspace needs to be notified of.

Also add a comment to the netdev notifier event definitions to remind
people to update the exclusion list when adding new event types.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/notifier.h | 5 ++++-
 net/core/rtnetlink.c     | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index f3635fc6e942..9c5d3fad01f3 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -182,7 +182,10 @@ static inline int notifier_to_errno(int ret)
  *	VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
  */
  
-/* netdevice notifier chain */
+/* netdevice notifier chain. Please remember to update the rtnetlink
+ * notification exclusion list in rtnetlink_event() when adding new
+ * types.
+ */
 #define NETDEV_UP	0x0001	/* For now you can't veto a device up/down */
 #define NETDEV_DOWN	0x0002
 #define NETDEV_REBOOT	0x0003	/* Tell a protocol stack a network interface
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e1121f0bca6a..ffc6cf3495ac 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1513,6 +1513,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_POST_INIT:
 	case NETDEV_REGISTER:
 	case NETDEV_CHANGE:
+	case NETDEV_PRE_TYPE_CHANGE:
 	case NETDEV_GOING_DOWN:
 	case NETDEV_UNREGISTER:
 	case NETDEV_UNREGISTER_BATCH:
-- 
cgit v1.2.3-59-g8ed1b


From 32a806c194ea112cfab00f558482dd97bee5e44e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 19 Mar 2010 04:00:23 +0000
Subject: bonding: flush unicast and multicast lists when changing type

After the type change, addresses in unicast and multicast lists wouldn't make
sense, not to mention possible different lenghts. So flush both lists here.

Note "dev_addr_discard" will be very soon replaced by "dev_mc_flush" (once
mc_list conversion will be done).

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 4 ++++
 include/linux/netdevice.h       | 2 ++
 net/core/dev.c                  | 6 ++++--
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index cbe9e353d46a..c2aceaab0143 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1490,6 +1490,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 				goto err_undo_flags;
 			}
 
+			/* Flush unicast and multicast addresses */
+			dev_unicast_flush(bond_dev);
+			dev_addr_discard(bond_dev);
+
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
 			else
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9fc6ee8e7508..c96c41e08e37 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1994,10 +1994,12 @@ extern int		dev_unicast_delete(struct net_device *dev, void *addr);
 extern int		dev_unicast_add(struct net_device *dev, void *addr);
 extern int		dev_unicast_sync(struct net_device *to, struct net_device *from);
 extern void		dev_unicast_unsync(struct net_device *to, struct net_device *from);
+extern void		dev_unicast_flush(struct net_device *dev);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern int		dev_mc_sync(struct net_device *to, struct net_device *from);
 extern void		dev_mc_unsync(struct net_device *to, struct net_device *from);
+extern void		dev_addr_discard(struct net_device *dev);
 extern int 		__dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all);
 extern int		__dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
 extern int		__dev_addr_sync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count);
diff --git a/net/core/dev.c b/net/core/dev.c
index c0e260870c0a..fe2a754238a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4457,12 +4457,13 @@ void dev_unicast_unsync(struct net_device *to, struct net_device *from)
 }
 EXPORT_SYMBOL(dev_unicast_unsync);
 
-static void dev_unicast_flush(struct net_device *dev)
+void dev_unicast_flush(struct net_device *dev)
 {
 	netif_addr_lock_bh(dev);
 	__hw_addr_flush(&dev->uc);
 	netif_addr_unlock_bh(dev);
 }
+EXPORT_SYMBOL(dev_unicast_flush);
 
 static void dev_unicast_init(struct net_device *dev)
 {
@@ -4484,7 +4485,7 @@ static void __dev_addr_discard(struct dev_addr_list **list)
 	}
 }
 
-static void dev_addr_discard(struct net_device *dev)
+void dev_addr_discard(struct net_device *dev)
 {
 	netif_addr_lock_bh(dev);
 
@@ -4493,6 +4494,7 @@ static void dev_addr_discard(struct net_device *dev)
 
 	netif_addr_unlock_bh(dev);
 }
+EXPORT_SYMBOL(dev_addr_discard);
 
 /**
  *	dev_get_flags - get flags reported to userspace
-- 
cgit v1.2.3-59-g8ed1b


From 907cdda5205b012eec7513f66713749b293188c9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 19 Mar 2010 05:37:18 +0000
Subject: tcp: Add SNMP counter for DEFER_ACCEPT

Its currently hard to diagnose when ACK frames are dropped because an
application set TCP_DEFER_ACCEPT on its listening socket.

See http://bugzilla.kernel.org/show_bug.cgi?id=15507

This patch adds a SNMP value, named TCPDeferAcceptDrop

netstat -s | grep TCPDeferAcceptDrop
    TCPDeferAcceptDrop: 0

This counter is incremented every time we drop a pure ACK frame received
by a socket in SYN_RECV state because its SYNACK retrans count is lower
than defer_accept value.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h     | 1 +
 net/ipv4/proc.c          | 1 +
 net/ipv4/tcp_minisocks.c | 1 +
 3 files changed, 3 insertions(+)

(limited to 'net')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 4435d1084755..d2a9aa3c6c88 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -227,6 +227,7 @@ enum
 	LINUX_MIB_SACKSHIFTFALLBACK,
 	LINUX_MIB_TCPBACKLOGDROP,
 	LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */
+	LINUX_MIB_TCPDEFERACCEPTDROP,
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4f1f337f4337..3dc9914c1dce 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -251,6 +251,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
 	SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
 	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
+	SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4199bc6915c5..32f96278a24a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -671,6 +671,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
 	    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
 		inet_rsk(req)->acked = 1;
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
 		return NULL;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 283f2fe87e980d8af5ad8aa63751e7e3258ee05a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 13:37:40 +0000
Subject: net: speedup netdev_set_master()

We currently force a synchronize_net() in netdev_set_master()

This seems necessary only when a slave had a master and we dismantle it.

In the other case ("ifenslave bond0 ethO"), we dont need this long
delay.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index fe2a754238a9..2d01f18f303a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3757,11 +3757,10 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
 
 	slave->master = master;
 
-	synchronize_net();
-
-	if (old)
+	if (old) {
+		synchronize_net();
 		dev_put(old);
-
+	}
 	if (master)
 		slave->flags |= IFF_SLAVE;
 	else
-- 
cgit v1.2.3-59-g8ed1b


From 62c97ac04a67c120ec37a9bfd445a8d5dbbc1ed2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 13:48:26 +0000
Subject: atm: Use kasprintf

Use kasprintf in atm_proc_dev_register()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/atm/proc.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/atm/proc.c b/net/atm/proc.c
index 7a96b2376bd7..f188a399c679 100644
--- a/net/atm/proc.c
+++ b/net/atm/proc.c
@@ -406,7 +406,6 @@ EXPORT_SYMBOL(atm_proc_root);
 
 int atm_proc_dev_register(struct atm_dev *dev)
 {
-	int digits, num;
 	int error;
 
 	/* No proc info */
@@ -414,16 +413,9 @@ int atm_proc_dev_register(struct atm_dev *dev)
 		return 0;
 
 	error = -ENOMEM;
-	digits = 0;
-	for (num = dev->number; num; num /= 10)
-		digits++;
-	if (!digits)
-		digits++;
-
-	dev->proc_name = kmalloc(strlen(dev->type) + digits + 2, GFP_KERNEL);
+	dev->proc_name = kasprintf(GFP_KERNEL, "%s:%d", dev->type, dev->number);
 	if (!dev->proc_name)
 		goto err_out;
-	sprintf(dev->proc_name, "%s:%d", dev->type, dev->number);
 
 	dev->proc_entry = proc_create_data(dev->proc_name, 0, atm_proc_root,
 					   &proc_atm_dev_ops, dev);
-- 
cgit v1.2.3-59-g8ed1b


From ec733b15a3ef0b5759141a177f8044a2f40c41e7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 20:36:06 +0000
Subject: net: snmp mib cleanup

There is no point to align or pad mibs to cache lines, they are per cpu
allocated with a 8 bytes alignment anyway.
This wastes space for no gain. This patch removes __SNMP_MIB_ALIGN__

Since SNMP mibs contain "unsigned long" fields only, we can relax the
allocation alignment from "unsigned long long" to "unsigned long"

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h |  2 +-
 include/net/snmp.h      | 29 +++++++----------------------
 net/dccp/dccp.h         |  2 +-
 net/ipv4/af_inet.c      |  4 ++--
 4 files changed, 11 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 78740ec57d5d..59151557406c 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -268,7 +268,7 @@ enum {
 #define SCTP_MIB_MAX    __SCTP_MIB_MAX
 struct sctp_mib {
         unsigned long   mibs[SCTP_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 
 
 /* Print debugging messages.  */
diff --git a/include/net/snmp.h b/include/net/snmp.h
index 692ee0061dc4..884fdbb74b23 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -52,26 +52,11 @@ struct snmp_mib {
  * count on the 20Gb/s + networks people expect in a few years time!
  */
 
-/* 
- * The rule for padding: 
- * Best is power of two because then the right structure can be found by a 
- * simple shift. The structure should be always cache line aligned.
- * gcc needs n=alignto(cachelinesize, popcnt(sizeof(bla_mib))) shift/add 
- * instructions to emulate multiply in case it is not power-of-two. 
- * Currently n is always <=3 for all sizes so simple cache line alignment 
- * is enough. 
- * 
- * The best solution would be a global CPU local area , especially on 64 
- * and 128byte cacheline machine it makes a *lot* of sense -AK
- */ 
-
-#define __SNMP_MIB_ALIGN__	____cacheline_aligned
-
 /* IPstats */
 #define IPSTATS_MIB_MAX	__IPSTATS_MIB_MAX
 struct ipstats_mib {
 	unsigned long	mibs[IPSTATS_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 
 /* ICMP */
 #define ICMP_MIB_DUMMY	__ICMP_MIB_MAX
@@ -79,36 +64,36 @@ struct ipstats_mib {
 
 struct icmp_mib {
 	unsigned long	mibs[ICMP_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 
 #define ICMPMSG_MIB_MAX	__ICMPMSG_MIB_MAX
 struct icmpmsg_mib {
 	unsigned long	mibs[ICMPMSG_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 
 /* ICMP6 (IPv6-ICMP) */
 #define ICMP6_MIB_MAX	__ICMP6_MIB_MAX
 struct icmpv6_mib {
 	unsigned long	mibs[ICMP6_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 
 #define ICMP6MSG_MIB_MAX  __ICMP6MSG_MIB_MAX
 struct icmpv6msg_mib {
 	unsigned long	mibs[ICMP6MSG_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 
 
 /* TCP */
 #define TCP_MIB_MAX	__TCP_MIB_MAX
 struct tcp_mib {
 	unsigned long	mibs[TCP_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 
 /* UDP */
 #define UDP_MIB_MAX	__UDP_MIB_MAX
 struct udp_mib {
 	unsigned long	mibs[UDP_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 
 /* Linux */
 #define LINUX_MIB_MAX	__LINUX_MIB_MAX
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 5ef32c2f0d6a..53f8e12d0c10 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -189,7 +189,7 @@ enum {
 #define DCCP_MIB_MAX	__DCCP_MIB_MAX
 struct dccp_mib {
 	unsigned long	mibs[DCCP_MIB_MAX];
-} __SNMP_MIB_ALIGN__;
+};
 
 DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
 #define DCCP_INC_STATS(field)	    SNMP_INC_STATS(dccp_statistics, field)
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 33b7dffa7732..55e11906a73a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1401,10 +1401,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field);
 int snmp_mib_init(void __percpu *ptr[2], size_t mibsize)
 {
 	BUG_ON(ptr == NULL);
-	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+	ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long));
 	if (!ptr[0])
 		goto err0;
-	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long));
+	ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long));
 	if (!ptr[1])
 		goto err1;
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 99fe3c391d50d381687fd84ed0ab22d57079e41f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 18 Mar 2010 11:27:25 +0000
Subject: net: dev_getfirstbyhwtype() optimization

Use RCU to avoid RTNL use in dev_getfirstbyhwtype()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2d01f18f303a..a03aab45e84f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -772,14 +772,17 @@ EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 
 struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
-	struct net_device *dev;
+	struct net_device *dev, *ret = NULL;
 
-	rtnl_lock();
-	dev = __dev_getfirstbyhwtype(net, type);
-	if (dev)
-		dev_hold(dev);
-	rtnl_unlock();
-	return dev;
+	rcu_read_lock();
+	for_each_netdev_rcu(net, dev)
+		if (dev->type == type) {
+			dev_hold(dev);
+			ret = dev;
+			break;
+		}
+	rcu_read_unlock();
+	return ret;
 }
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
-- 
cgit v1.2.3-59-g8ed1b


From e99b99b471c21b071132e51bb7aa6b7a8796dc02 Mon Sep 17 00:00:00 2001
From: Robert Olsson <robert@herjulf.net>
Date: Thu, 18 Mar 2010 22:44:30 +0000
Subject: pktgen node allocation

Here is patch to manipulate packet node allocation and implicitly
how packets are DMA'd etc.

The flag NODE_ALLOC enables the function and numa_node_id();
when enabled it can also be explicitly controlled via a new
node parameter

Tested this with 10 Intel 82599 ports w. TYAN S7025 E5520 CPU's.
Was able to TX/DMA ~80 Gbit/s to Ethernet wires.

Signed-off-by: Robert Olsson <robert.olsson@its.uu.se>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 43923811bd6a..2ad68da418df 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -169,7 +169,7 @@
 #include <asm/dma.h>
 #include <asm/div64.h>		/* do_div */
 
-#define VERSION 	"2.72"
+#define VERSION 	"2.73"
 #define IP_NAME_SZ 32
 #define MAX_MPLS_LABELS 16 /* This is the max label stack depth */
 #define MPLS_STACK_BOTTOM htonl(0x00000100)
@@ -190,6 +190,7 @@
 #define F_IPSEC_ON    (1<<12)	/* ipsec on for flows */
 #define F_QUEUE_MAP_RND (1<<13)	/* queue map Random */
 #define F_QUEUE_MAP_CPU (1<<14)	/* queue map mirrors smp_processor_id() */
+#define F_NODE          (1<<15)	/* Node memory alloc*/
 
 /* Thread control flag bits */
 #define T_STOP        (1<<0)	/* Stop run */
@@ -372,6 +373,7 @@ struct pktgen_dev {
 
 	u16 queue_map_min;
 	u16 queue_map_max;
+	int node;               /* Memory node */
 
 #ifdef CONFIG_XFRM
 	__u8	ipsmode;		/* IPSEC mode (config) */
@@ -607,6 +609,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->traffic_class)
 		seq_printf(seq, "     traffic_class: 0x%02x\n", pkt_dev->traffic_class);
 
+	if (pkt_dev->node >= 0)
+		seq_printf(seq, "     node: %d\n", pkt_dev->node);
+
 	seq_printf(seq, "     Flags: ");
 
 	if (pkt_dev->flags & F_IPV6)
@@ -660,6 +665,9 @@ static int pktgen_if_show(struct seq_file *seq, void *v)
 	if (pkt_dev->flags & F_SVID_RND)
 		seq_printf(seq, "SVID_RND  ");
 
+	if (pkt_dev->flags & F_NODE)
+		seq_printf(seq, "NODE_ALLOC  ");
+
 	seq_puts(seq, "\n");
 
 	/* not really stopped, more like last-running-at */
@@ -1074,6 +1082,21 @@ static ssize_t pktgen_if_write(struct file *file,
 			pkt_dev->dst_mac_count);
 		return count;
 	}
+	if (!strcmp(name, "node")) {
+		len = num_arg(&user_buffer[i], 10, &value);
+		if (len < 0)
+			return len;
+
+		i += len;
+
+		if (node_possible(value)) {
+			pkt_dev->node = value;
+			sprintf(pg_result, "OK: node=%d", pkt_dev->node);
+		}
+		else
+			sprintf(pg_result, "ERROR: node not possible");
+		return count;
+	}
 	if (!strcmp(name, "flag")) {
 		char f[32];
 		memset(f, 0, 32);
@@ -1166,12 +1189,18 @@ static ssize_t pktgen_if_write(struct file *file,
 		else if (strcmp(f, "!IPV6") == 0)
 			pkt_dev->flags &= ~F_IPV6;
 
+		else if (strcmp(f, "NODE_ALLOC") == 0)
+			pkt_dev->flags |= F_NODE;
+
+		else if (strcmp(f, "!NODE_ALLOC") == 0)
+			pkt_dev->flags &= ~F_NODE;
+
 		else {
 			sprintf(pg_result,
 				"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
 				f,
 				"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
-				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC\n");
+				"MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
 			return count;
 		}
 		sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
@@ -2572,9 +2601,27 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	mod_cur_headers(pkt_dev);
 
 	datalen = (odev->hard_header_len + 16) & ~0xf;
-	skb = __netdev_alloc_skb(odev,
-				 pkt_dev->cur_pkt_size + 64
-				 + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
+	if (pkt_dev->flags & F_NODE) {
+		int node;
+
+		if (pkt_dev->node >= 0)
+			node = pkt_dev->node;
+		else
+			node =  numa_node_id();
+
+		skb = __alloc_skb(NET_SKB_PAD + pkt_dev->cur_pkt_size + 64
+				  + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT, 0, node);
+		if (likely(skb)) {
+			skb_reserve(skb, NET_SKB_PAD);
+			skb->dev = odev;
+		}
+	}
+	else
+	  skb = __netdev_alloc_skb(odev,
+				   pkt_dev->cur_pkt_size + 64
+				   + datalen + pkt_dev->pkt_overhead, GFP_NOWAIT);
+
 	if (!skb) {
 		sprintf(pkt_dev->result, "No memory");
 		return NULL;
@@ -3674,6 +3721,7 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname)
 	pkt_dev->svlan_p = 0;
 	pkt_dev->svlan_cfi = 0;
 	pkt_dev->svlan_id = 0xffff;
+	pkt_dev->node = -1;
 
 	err = pktgen_setup_dev(pkt_dev, ifname);
 	if (err)
-- 
cgit v1.2.3-59-g8ed1b


From 7668448ea91cda36661878da54c851f8eb239d8e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Sat, 20 Mar 2010 01:20:49 +0000
Subject: bridge: cleanup: remove unused assignment

We never actually use iph again so this assignment can be removed.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6980625537ca..9f0c4f065604 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1003,8 +1003,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	if (!pskb_may_pull(skb2, sizeof(*ih)))
 		goto out;
 
-	iph = ip_hdr(skb2);
-
 	switch (skb2->ip_summed) {
 	case CHECKSUM_COMPLETE:
 		if (!csum_fold(skb2->csum))
-- 
cgit v1.2.3-59-g8ed1b


From e880eb6c5c9d98e389ffc0d8947f75d70785361a Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 22 Mar 2010 18:06:47 -0700
Subject: rps: Fix build with CONFIG_SYSFS enabled

Fix build with CONFIG_SYSFS not enabled.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7a46343d5ae3..f6b6bfee72ae 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -739,7 +739,9 @@ void netdev_unregister_kobject(struct net_device * net)
 	if (!net_eq(dev_net(net), &init_net))
 		return;
 
+#ifdef CONFIG_SYSFS
 	rx_queue_remove_kobjects(net);
+#endif
 
 	device_del(dev);
 }
@@ -780,11 +782,13 @@ int netdev_register_kobject(struct net_device *net)
 	if (error)
 		return error;
 
+#ifdef CONFIG_SYSFS
 	error = rx_queue_register_kobjects(net);
 	if (error) {
 		device_del(dev);
 		return error;
 	}
+#endif
 
 	return error;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7316ae88c43d47f6503f4c29b4973204e33c3411 Mon Sep 17 00:00:00 2001
From: Tom Goff <thomas.goff@boeing.com>
Date: Fri, 19 Mar 2010 15:40:13 +0000
Subject: net_sched: make traffic control network namespace aware

Mostly minor changes to add a net argument to various functions and
remove initial network namespace checks.

Make /proc/net/psched per network namespace.

Signed-off-by: Tom Goff <thomas.goff@boeing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c |  45 +++++++++++----------
 net/sched/cls_api.c |  30 ++++++--------
 net/sched/sch_api.c | 112 +++++++++++++++++++++++++++++++++-------------------
 3 files changed, 107 insertions(+), 80 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 64f5e328cee9..7a558da99bb6 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -667,7 +667,8 @@ nlmsg_failure:
 }
 
 static int
-act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
+act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n,
+	       struct tc_action *a, int event)
 {
 	struct sk_buff *skb;
 
@@ -679,7 +680,7 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event)
 		return -EINVAL;
 	}
 
-	return rtnl_unicast(skb, &init_net, pid);
+	return rtnl_unicast(skb, net, pid);
 }
 
 static struct tc_action *
@@ -749,7 +750,8 @@ static struct tc_action *create_a(int i)
 	return act;
 }
 
-static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
+static int tca_action_flush(struct net *net, struct nlattr *nla,
+			    struct nlmsghdr *n, u32 pid)
 {
 	struct sk_buff *skb;
 	unsigned char *b;
@@ -808,7 +810,7 @@ static int tca_action_flush(struct nlattr *nla, struct nlmsghdr *n, u32 pid)
 	nlh->nlmsg_flags |= NLM_F_ROOT;
 	module_put(a->ops->owner);
 	kfree(a);
-	err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 	if (err > 0)
 		return 0;
 
@@ -825,7 +827,8 @@ noflush_out:
 }
 
 static int
-tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
+tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
+	      u32 pid, int event)
 {
 	int i, ret;
 	struct nlattr *tb[TCA_ACT_MAX_PRIO+1];
@@ -837,7 +840,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
 
 	if (event == RTM_DELACTION && n->nlmsg_flags&NLM_F_ROOT) {
 		if (tb[1] != NULL)
-			return tca_action_flush(tb[1], n, pid);
+			return tca_action_flush(net, tb[1], n, pid);
 		else
 			return -EINVAL;
 	}
@@ -858,7 +861,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
 	}
 
 	if (event == RTM_GETACTION)
-		ret = act_get_notify(pid, n, head, event);
+		ret = act_get_notify(net, pid, n, head, event);
 	else { /* delete */
 		struct sk_buff *skb;
 
@@ -877,7 +880,7 @@ tca_action_gd(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int event)
 
 		/* now do the delete */
 		tcf_action_destroy(head, 0);
-		ret = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+		ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC,
 				     n->nlmsg_flags&NLM_F_ECHO);
 		if (ret > 0)
 			return 0;
@@ -888,8 +891,8 @@ err:
 	return ret;
 }
 
-static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
-			  u16 flags)
+static int tcf_add_notify(struct net *net, struct tc_action *a,
+			  u32 pid, u32 seq, int event, u16 flags)
 {
 	struct tcamsg *t;
 	struct nlmsghdr *nlh;
@@ -922,7 +925,7 @@ static int tcf_add_notify(struct tc_action *a, u32 pid, u32 seq, int event,
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	NETLINK_CB(skb).dst_group = RTNLGRP_TC;
 
-	err = rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
+	err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags&NLM_F_ECHO);
 	if (err > 0)
 		err = 0;
 	return err;
@@ -935,7 +938,8 @@ nlmsg_failure:
 
 
 static int
-tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
+tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
+	       u32 pid, int ovr)
 {
 	int ret = 0;
 	struct tc_action *act;
@@ -953,7 +957,7 @@ tcf_action_add(struct nlattr *nla, struct nlmsghdr *n, u32 pid, int ovr)
 	/* dump then free all the actions after update; inserted policy
 	 * stays intact
 	 * */
-	ret = tcf_add_notify(act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
+	ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags);
 	for (a = act; a; a = act) {
 		act = a->next;
 		kfree(a);
@@ -969,9 +973,6 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	u32 pid = skb ? NETLINK_CB(skb).pid : 0;
 	int ret = 0, ovr = 0;
 
-	if (!net_eq(net, &init_net))
-		return -EINVAL;
-
 	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL);
 	if (ret < 0)
 		return ret;
@@ -994,15 +995,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		if (n->nlmsg_flags&NLM_F_REPLACE)
 			ovr = 1;
 replay:
-		ret = tcf_action_add(tca[TCA_ACT_TAB], n, pid, ovr);
+		ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr);
 		if (ret == -EAGAIN)
 			goto replay;
 		break;
 	case RTM_DELACTION:
-		ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_DELACTION);
+		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
+				    pid, RTM_DELACTION);
 		break;
 	case RTM_GETACTION:
-		ret = tca_action_gd(tca[TCA_ACT_TAB], n, pid, RTM_GETACTION);
+		ret = tca_action_gd(net, tca[TCA_ACT_TAB], n,
+				    pid, RTM_GETACTION);
 		break;
 	default:
 		BUG();
@@ -1042,7 +1045,6 @@ find_dump_kind(const struct nlmsghdr *n)
 static int
 tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = sock_net(skb->sk);
 	struct nlmsghdr *nlh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
@@ -1052,9 +1054,6 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tcamsg *t = (struct tcamsg *) NLMSG_DATA(cb->nlh);
 	struct nlattr *kind = find_dump_kind(cb->nlh);
 
-	if (!net_eq(net, &init_net))
-		return 0;
-
 	if (kind == NULL) {
 		printk("tc_dump_action: action bad kind\n");
 		return 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3725d8fa29db..4a795d966172 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -98,8 +98,9 @@ out:
 }
 EXPORT_SYMBOL(unregister_tcf_proto_ops);
 
-static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-			  struct tcf_proto *tp, unsigned long fh, int event);
+static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+			  struct nlmsghdr *n, struct tcf_proto *tp,
+			  unsigned long fh, int event);
 
 
 /* Select new prio value from the range, managed by kernel. */
@@ -137,9 +138,6 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	int err;
 	int tp_created = 0;
 
-	if (!net_eq(net, &init_net))
-		return -EINVAL;
-
 replay:
 	t = NLMSG_DATA(n);
 	protocol = TC_H_MIN(t->tcm_info);
@@ -158,7 +156,7 @@ replay:
 	/* Find head of filter chain. */
 
 	/* Find link */
-	dev = __dev_get_by_index(&init_net, t->tcm_ifindex);
+	dev = __dev_get_by_index(net, t->tcm_ifindex);
 	if (dev == NULL)
 		return -ENODEV;
 
@@ -282,7 +280,7 @@ replay:
 			*back = tp->next;
 			spin_unlock_bh(root_lock);
 
-			tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+			tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
 			tcf_destroy(tp);
 			err = 0;
 			goto errout;
@@ -305,10 +303,10 @@ replay:
 		case RTM_DELTFILTER:
 			err = tp->ops->delete(tp, fh);
 			if (err == 0)
-				tfilter_notify(skb, n, tp, fh, RTM_DELTFILTER);
+				tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER);
 			goto errout;
 		case RTM_GETTFILTER:
-			err = tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+			err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
 			goto errout;
 		default:
 			err = -EINVAL;
@@ -324,7 +322,7 @@ replay:
 			*back = tp;
 			spin_unlock_bh(root_lock);
 		}
-		tfilter_notify(skb, n, tp, fh, RTM_NEWTFILTER);
+		tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER);
 	} else {
 		if (tp_created)
 			tcf_destroy(tp);
@@ -370,8 +368,9 @@ nla_put_failure:
 	return -1;
 }
 
-static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-			  struct tcf_proto *tp, unsigned long fh, int event)
+static int tfilter_notify(struct net *net, struct sk_buff *oskb,
+			  struct nlmsghdr *n, struct tcf_proto *tp,
+			  unsigned long fh, int event)
 {
 	struct sk_buff *skb;
 	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -385,7 +384,7 @@ static int tfilter_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC,
+	return rtnetlink_send(skb, net, pid, RTNLGRP_TC,
 			      n->nlmsg_flags & NLM_F_ECHO);
 }
 
@@ -418,12 +417,9 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	const struct Qdisc_class_ops *cops;
 	struct tcf_dump_args arg;
 
-	if (!net_eq(net, &init_net))
-		return 0;
-
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 		return skb->len;
-	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
 		return skb->len;
 
 	if (!tcm->tcm_parent)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6cd491013b50..6d6fe16289f3 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -34,10 +34,12 @@
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
-static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+			struct nlmsghdr *n, u32 clid,
 			struct Qdisc *old, struct Qdisc *new);
-static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-			 struct Qdisc *q, unsigned long cl, int event);
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
+			 struct nlmsghdr *n, struct Qdisc *q,
+			 unsigned long cl, int event);
 
 /*
 
@@ -638,11 +640,12 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
 }
 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
 
-static void notify_and_destroy(struct sk_buff *skb, struct nlmsghdr *n, u32 clid,
+static void notify_and_destroy(struct net *net, struct sk_buff *skb,
+			       struct nlmsghdr *n, u32 clid,
 			       struct Qdisc *old, struct Qdisc *new)
 {
 	if (new || old)
-		qdisc_notify(skb, n, clid, old, new);
+		qdisc_notify(net, skb, n, clid, old, new);
 
 	if (old)
 		qdisc_destroy(old);
@@ -662,6 +665,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		       struct Qdisc *new, struct Qdisc *old)
 {
 	struct Qdisc *q = old;
+	struct net *net = dev_net(dev);
 	int err = 0;
 
 	if (parent == NULL) {
@@ -698,12 +702,13 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 		}
 
 		if (!ingress) {
-			notify_and_destroy(skb, n, classid, dev->qdisc, new);
+			notify_and_destroy(net, skb, n, classid,
+					   dev->qdisc, new);
 			if (new && !new->ops->attach)
 				atomic_inc(&new->refcnt);
 			dev->qdisc = new ? : &noop_qdisc;
 		} else {
-			notify_and_destroy(skb, n, classid, old, new);
+			notify_and_destroy(net, skb, n, classid, old, new);
 		}
 
 		if (dev->flags & IFF_UP)
@@ -721,7 +726,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
 				err = -ENOENT;
 		}
 		if (!err)
-			notify_and_destroy(skb, n, classid, old, new);
+			notify_and_destroy(net, skb, n, classid, old, new);
 	}
 	return err;
 }
@@ -947,10 +952,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	struct Qdisc *p = NULL;
 	int err;
 
-	if (!net_eq(net, &init_net))
-		return -EINVAL;
-
-	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -990,7 +992,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		if ((err = qdisc_graft(dev, p, skb, n, clid, NULL, q)) != 0)
 			return err;
 	} else {
-		qdisc_notify(skb, n, clid, NULL, q);
+		qdisc_notify(net, skb, n, clid, NULL, q);
 	}
 	return 0;
 }
@@ -1009,16 +1011,13 @@ static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	struct Qdisc *q, *p;
 	int err;
 
-	if (!net_eq(net, &init_net))
-		return -EINVAL;
-
 replay:
 	/* Reinit, just in case something touches this. */
 	tcm = NLMSG_DATA(n);
 	clid = tcm->tcm_parent;
 	q = p = NULL;
 
-	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1105,7 +1104,7 @@ replay:
 		return -EINVAL;
 	err = qdisc_change(q, tca);
 	if (err == 0)
-		qdisc_notify(skb, n, clid, NULL, q);
+		qdisc_notify(net, skb, n, clid, NULL, q);
 	return err;
 
 create_n_graft:
@@ -1195,8 +1194,9 @@ nla_put_failure:
 	return -1;
 }
 
-static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-			u32 clid, struct Qdisc *old, struct Qdisc *new)
+static int qdisc_notify(struct net *net, struct sk_buff *oskb,
+			struct nlmsghdr *n, u32 clid,
+			struct Qdisc *old, struct Qdisc *new)
 {
 	struct sk_buff *skb;
 	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -1215,7 +1215,7 @@ static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 	}
 
 	if (skb->len)
-		return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+		return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 
 err_out:
 	kfree_skb(skb);
@@ -1274,15 +1274,12 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
 	int s_idx, s_q_idx;
 	struct net_device *dev;
 
-	if (!net_eq(net, &init_net))
-		return 0;
-
 	s_idx = cb->args[0];
 	s_q_idx = q_idx = cb->args[1];
 
 	rcu_read_lock();
 	idx = 0;
-	for_each_netdev_rcu(&init_net, dev) {
+	for_each_netdev_rcu(net, dev) {
 		struct netdev_queue *dev_queue;
 
 		if (idx < s_idx)
@@ -1334,10 +1331,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	u32 qid = TC_H_MAJ(clid);
 	int err;
 
-	if (!net_eq(net, &init_net))
-		return -EINVAL;
-
-	if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+	if ((dev = __dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
 		return -ENODEV;
 
 	err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
@@ -1418,10 +1412,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 			if (cops->delete)
 				err = cops->delete(q, cl);
 			if (err == 0)
-				tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
+				tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
 			goto out;
 		case RTM_GETTCLASS:
-			err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
+			err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
 			goto out;
 		default:
 			err = -EINVAL;
@@ -1434,7 +1428,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 	if (cops->change)
 		err = cops->change(q, clid, pid, tca, &new_cl);
 	if (err == 0)
-		tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
+		tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
 
 out:
 	if (cl)
@@ -1486,8 +1480,9 @@ nla_put_failure:
 	return -1;
 }
 
-static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
-			  struct Qdisc *q, unsigned long cl, int event)
+static int tclass_notify(struct net *net, struct sk_buff *oskb,
+			 struct nlmsghdr *n, struct Qdisc *q,
+			 unsigned long cl, int event)
 {
 	struct sk_buff *skb;
 	u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
@@ -1501,7 +1496,7 @@ static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
 		return -EINVAL;
 	}
 
-	return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
+	return rtnetlink_send(skb, net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
 }
 
 struct qdisc_dump_args
@@ -1576,12 +1571,9 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
 	struct net_device *dev;
 	int t, s_t;
 
-	if (!net_eq(net, &init_net))
-		return 0;
-
 	if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
 		return 0;
-	if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
+	if ((dev = dev_get_by_index(net, tcm->tcm_ifindex)) == NULL)
 		return 0;
 
 	s_t = cb->args[0];
@@ -1701,15 +1693,55 @@ static const struct file_operations psched_fops = {
 	.llseek = seq_lseek,
 	.release = single_release,
 };
+
+static int __net_init psched_net_init(struct net *net)
+{
+	struct proc_dir_entry *e;
+
+	e = proc_net_fops_create(net, "psched", 0, &psched_fops);
+	if (e == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void __net_exit psched_net_exit(struct net *net)
+{
+	proc_net_remove(net, "psched");
+
+	return;
+}
+#else
+static int __net_init psched_net_init(struct net *net)
+{
+	return 0;
+}
+
+static void __net_exit psched_net_exit(struct net *net)
+{
+}
 #endif
 
+static struct pernet_operations psched_net_ops = {
+	.init = psched_net_init,
+	.exit = psched_net_exit,
+};
+
 static int __init pktsched_init(void)
 {
+	int err;
+
+	err = register_pernet_subsys(&psched_net_ops);
+	if (err) {
+		printk(KERN_ERR "pktsched_init: "
+		       "cannot initialize per netns operations\n");
+		return err;
+	}
+
 	register_qdisc(&pfifo_qdisc_ops);
 	register_qdisc(&bfifo_qdisc_ops);
 	register_qdisc(&pfifo_head_drop_qdisc_ops);
 	register_qdisc(&mq_qdisc_ops);
-	proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
 
 	rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
 	rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
-- 
cgit v1.2.3-59-g8ed1b


From 44608f801283f0f69d8a04d9976837748e410084 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 18 Mar 2010 18:29:35 -0700
Subject: net/wireless/wext_core.c: Use IW_IOCTL_IDX macro

There's a wireless.h macro for this, might as well use it.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext-core.c | 112 +++++++++++++++++++++++------------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index 5e1656bdf23b..dbde22b8f30f 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -28,226 +28,226 @@ typedef int (*wext_ioctl_func)(struct net_device *, struct iwreq *,
  * know about.
  */
 static const struct iw_ioctl_description standard_ioctl[] = {
-	[SIOCSIWCOMMIT	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWCOMMIT)] = {
 		.header_type	= IW_HEADER_TYPE_NULL,
 	},
-	[SIOCGIWNAME	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWNAME)] = {
 		.header_type	= IW_HEADER_TYPE_CHAR,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWNWID	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWNWID)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 		.flags		= IW_DESCR_FLAG_EVENT,
 	},
-	[SIOCGIWNWID	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWNWID)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWFREQ	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWFREQ)] = {
 		.header_type	= IW_HEADER_TYPE_FREQ,
 		.flags		= IW_DESCR_FLAG_EVENT,
 	},
-	[SIOCGIWFREQ	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWFREQ)] = {
 		.header_type	= IW_HEADER_TYPE_FREQ,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWMODE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWMODE)] = {
 		.header_type	= IW_HEADER_TYPE_UINT,
 		.flags		= IW_DESCR_FLAG_EVENT,
 	},
-	[SIOCGIWMODE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWMODE)] = {
 		.header_type	= IW_HEADER_TYPE_UINT,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWSENS	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWSENS)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWSENS	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWSENS)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWRANGE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWRANGE)] = {
 		.header_type	= IW_HEADER_TYPE_NULL,
 	},
-	[SIOCGIWRANGE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWRANGE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= sizeof(struct iw_range),
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWPRIV	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWPRIV)] = {
 		.header_type	= IW_HEADER_TYPE_NULL,
 	},
-	[SIOCGIWPRIV	- SIOCIWFIRST] = { /* (handled directly by us) */
+	[IW_IOCTL_IDX(SIOCGIWPRIV)] = { /* (handled directly by us) */
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct iw_priv_args),
 		.max_tokens	= 16,
 		.flags		= IW_DESCR_FLAG_NOMAX,
 	},
-	[SIOCSIWSTATS	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWSTATS)] = {
 		.header_type	= IW_HEADER_TYPE_NULL,
 	},
-	[SIOCGIWSTATS	- SIOCIWFIRST] = { /* (handled directly by us) */
+	[IW_IOCTL_IDX(SIOCGIWSTATS)] = { /* (handled directly by us) */
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= sizeof(struct iw_statistics),
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWSPY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWSPY)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct sockaddr),
 		.max_tokens	= IW_MAX_SPY,
 	},
-	[SIOCGIWSPY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWSPY)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct sockaddr) +
 				  sizeof(struct iw_quality),
 		.max_tokens	= IW_MAX_SPY,
 	},
-	[SIOCSIWTHRSPY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWTHRSPY)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct iw_thrspy),
 		.min_tokens	= 1,
 		.max_tokens	= 1,
 	},
-	[SIOCGIWTHRSPY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWTHRSPY)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct iw_thrspy),
 		.min_tokens	= 1,
 		.max_tokens	= 1,
 	},
-	[SIOCSIWAP	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWAP)] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 	},
-	[SIOCGIWAP	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWAP)] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWMLME	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWMLME)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.min_tokens	= sizeof(struct iw_mlme),
 		.max_tokens	= sizeof(struct iw_mlme),
 	},
-	[SIOCGIWAPLIST	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWAPLIST)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= sizeof(struct sockaddr) +
 				  sizeof(struct iw_quality),
 		.max_tokens	= IW_MAX_AP,
 		.flags		= IW_DESCR_FLAG_NOMAX,
 	},
-	[SIOCSIWSCAN	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWSCAN)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.min_tokens	= 0,
 		.max_tokens	= sizeof(struct iw_scan_req),
 	},
-	[SIOCGIWSCAN	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWSCAN)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_SCAN_MAX_DATA,
 		.flags		= IW_DESCR_FLAG_NOMAX,
 	},
-	[SIOCSIWESSID	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWESSID)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ESSID_MAX_SIZE,
 		.flags		= IW_DESCR_FLAG_EVENT,
 	},
-	[SIOCGIWESSID	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWESSID)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ESSID_MAX_SIZE,
 		.flags		= IW_DESCR_FLAG_DUMP,
 	},
-	[SIOCSIWNICKN	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWNICKN)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ESSID_MAX_SIZE,
 	},
-	[SIOCGIWNICKN	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWNICKN)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ESSID_MAX_SIZE,
 	},
-	[SIOCSIWRATE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWRATE)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWRATE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWRATE)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWRTS	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWRTS)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWRTS	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWRTS)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWFRAG	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWFRAG)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWFRAG	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWFRAG)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWTXPOW	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWTXPOW)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWTXPOW	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWTXPOW)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWRETRY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWRETRY)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWRETRY	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWRETRY)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWENCODE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWENCODE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ENCODING_TOKEN_MAX,
 		.flags		= IW_DESCR_FLAG_EVENT | IW_DESCR_FLAG_RESTRICT,
 	},
-	[SIOCGIWENCODE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWENCODE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_ENCODING_TOKEN_MAX,
 		.flags		= IW_DESCR_FLAG_DUMP | IW_DESCR_FLAG_RESTRICT,
 	},
-	[SIOCSIWPOWER	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWPOWER)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWPOWER	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWPOWER)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWGENIE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWGENIE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_GENERIC_IE_MAX,
 	},
-	[SIOCGIWGENIE	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWGENIE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_GENERIC_IE_MAX,
 	},
-	[SIOCSIWAUTH	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWAUTH)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCGIWAUTH	- SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWAUTH)] = {
 		.header_type	= IW_HEADER_TYPE_PARAM,
 	},
-	[SIOCSIWENCODEEXT - SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWENCODEEXT)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.min_tokens	= sizeof(struct iw_encode_ext),
 		.max_tokens	= sizeof(struct iw_encode_ext) +
 				  IW_ENCODING_TOKEN_MAX,
 	},
-	[SIOCGIWENCODEEXT - SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCGIWENCODEEXT)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.min_tokens	= sizeof(struct iw_encode_ext),
 		.max_tokens	= sizeof(struct iw_encode_ext) +
 				  IW_ENCODING_TOKEN_MAX,
 	},
-	[SIOCSIWPMKSA - SIOCIWFIRST] = {
+	[IW_IOCTL_IDX(SIOCSIWPMKSA)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.min_tokens	= sizeof(struct iw_pmksa),
@@ -449,7 +449,7 @@ void wireless_send_event(struct net_device *	dev,
 
 	/* Get the description of the Event */
 	if (cmd <= SIOCIWLAST) {
-		cmd_index = cmd - SIOCIWFIRST;
+		cmd_index = IW_IOCTL_IDX(cmd);
 		if (cmd_index < standard_ioctl_num)
 			descr = &(standard_ioctl[cmd_index]);
 	} else {
@@ -662,7 +662,7 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
 		return NULL;
 
 	/* Try as a standard command */
-	index = cmd - SIOCIWFIRST;
+	index = IW_IOCTL_IDX(cmd);
 	if (index < handlers->num_standard)
 		return handlers->standard[index];
 
@@ -954,9 +954,9 @@ static int ioctl_standard_call(struct net_device *	dev,
 	int					ret = -EINVAL;
 
 	/* Get the description of the IOCTL */
-	if ((cmd - SIOCIWFIRST) >= standard_ioctl_num)
+	if (IW_IOCTL_IDX(cmd) >= standard_ioctl_num)
 		return -EOPNOTSUPP;
-	descr = &(standard_ioctl[cmd - SIOCIWFIRST]);
+	descr = &(standard_ioctl[IW_IOCTL_IDX(cmd)]);
 
 	/* Check if we have a pointer to user space data or not */
 	if (descr->header_type != IW_HEADER_TYPE_POINT) {
@@ -1012,7 +1012,7 @@ static int compat_standard_call(struct net_device	*dev,
 	struct iw_point iwp;
 	int err;
 
-	descr = standard_ioctl + (cmd - SIOCIWFIRST);
+	descr = standard_ioctl + IW_IOCTL_IDX(cmd);
 
 	if (descr->header_type != IW_HEADER_TYPE_POINT)
 		return ioctl_standard_call(dev, iwr, cmd, info, handler);
-- 
cgit v1.2.3-59-g8ed1b


From 76326f1d4c98fe01daf363e3d07f84bafed1feec Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 18 Mar 2010 18:29:36 -0700
Subject: net/wireless/wext-core.c: Use IW_EVENT_IDX macro

There's a wireless.h macro for this, might as well use it.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/wext-core.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index dbde22b8f30f..bfcbeee23f9c 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -261,44 +261,44 @@ static const unsigned standard_ioctl_num = ARRAY_SIZE(standard_ioctl);
  * we know about.
  */
 static const struct iw_ioctl_description standard_event[] = {
-	[IWEVTXDROP	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVTXDROP)] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 	},
-	[IWEVQUAL	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVQUAL)] = {
 		.header_type	= IW_HEADER_TYPE_QUAL,
 	},
-	[IWEVCUSTOM	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVCUSTOM)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_CUSTOM_MAX,
 	},
-	[IWEVREGISTERED	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVREGISTERED)] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 	},
-	[IWEVEXPIRED	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVEXPIRED)] = {
 		.header_type	= IW_HEADER_TYPE_ADDR,
 	},
-	[IWEVGENIE	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVGENIE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_GENERIC_IE_MAX,
 	},
-	[IWEVMICHAELMICFAILURE	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVMICHAELMICFAILURE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= sizeof(struct iw_michaelmicfailure),
 	},
-	[IWEVASSOCREQIE	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVASSOCREQIE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_GENERIC_IE_MAX,
 	},
-	[IWEVASSOCRESPIE	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVASSOCRESPIE)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= IW_GENERIC_IE_MAX,
 	},
-	[IWEVPMKIDCAND	- IWEVFIRST] = {
+	[IW_EVENT_IDX(IWEVPMKIDCAND)] = {
 		.header_type	= IW_HEADER_TYPE_POINT,
 		.token_size	= 1,
 		.max_tokens	= sizeof(struct iw_pmkid_cand),
@@ -453,7 +453,7 @@ void wireless_send_event(struct net_device *	dev,
 		if (cmd_index < standard_ioctl_num)
 			descr = &(standard_ioctl[cmd_index]);
 	} else {
-		cmd_index = cmd - IWEVFIRST;
+		cmd_index = IW_EVENT_IDX(cmd);
 		if (cmd_index < standard_event_num)
 			descr = &(standard_event[cmd_index]);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 1e4dcd012423b6a28f968a55886d2b27896a1586 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Fri, 19 Mar 2010 07:14:53 +0200
Subject: mac80211: Add support for connection monitor in hardware

This patch is based on a RFC patch by Kalle Valo.

The wl1271 has a feature which handles the connection monitor logic
in hardware, basically sending periodically nullfunc frames and reporting
to the host if AP is lost, after attempting to recover by sending
probe-requests to the AP.

Add support to mac80211 by adding a new flag IEEE80211_HW_CONNECTION_MONITOR
which prevents conn_mon_timer from triggering during idle periods, and
prevents sending probe-requests to the AP if beacon-loss is indicated by the
hardware.

Cc: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h     | 24 +++++++++++++++--
 net/mac80211/ieee80211_i.h |  4 +--
 net/mac80211/iface.c       |  2 +-
 net/mac80211/mlme.c        | 64 +++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 82 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 936bc410d061..d14226f29ffc 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -954,6 +954,11 @@ enum ieee80211_tkip_key_type {
  *	Hardware can provide ack status reports of Tx frames to
  *	the stack.
  *
+ * @IEEE80211_HW_CONNECTION_MONITOR:
+ *      The hardware performs its own connection monitoring, including
+ *      periodic keep-alives to the AP and probing the AP on beacon loss.
+ *      When this flag is set, signaling beacon-loss will cause an immediate
+ *      change to disassociated state.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -975,6 +980,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS		= 1<<16,
 	IEEE80211_HW_SUPPORTS_UAPSD			= 1<<17,
 	IEEE80211_HW_REPORTS_TX_ACK_STATUS		= 1<<18,
+	IEEE80211_HW_CONNECTION_MONITOR			= 1<<19,
 };
 
 /**
@@ -2364,12 +2370,26 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
  *
  * @vif: &struct ieee80211_vif pointer from the add_interface callback.
  *
- * When beacon filtering is enabled with IEEE80211_HW_BEACON_FILTERING and
- * IEEE80211_CONF_PS is set, the driver needs to inform whenever the
+ * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING and
+ * %IEEE80211_CONF_PS is set, the driver needs to inform whenever the
  * hardware is not receiving beacons with this function.
  */
 void ieee80211_beacon_loss(struct ieee80211_vif *vif);
 
+/**
+ * ieee80211_connection_loss - inform hardware has lost connection to the AP
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING, and
+ * %IEEE80211_CONF_PS and %IEEE80211_HW_CONNECTION_MONITOR are set, the driver
+ * needs to inform if the connection to the AP has been lost.
+ *
+ * This function will cause immediate change to disassociated state,
+ * without connection recovery attempts.
+ */
+void ieee80211_connection_loss(struct ieee80211_vif *vif);
+
 /* Rate control API */
 
 /**
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b84126491ab1..ab369e2a5282 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -327,7 +327,7 @@ struct ieee80211_if_managed {
 	struct work_struct work;
 	struct work_struct monitor_work;
 	struct work_struct chswitch_work;
-	struct work_struct beacon_loss_work;
+	struct work_struct beacon_connection_loss_work;
 
 	unsigned long probe_timeout;
 	int probe_send_count;
@@ -1156,7 +1156,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local,
 			     int powersave);
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_hdr *hdr);
-void ieee80211_beacon_loss_work(struct work_struct *work);
+void ieee80211_beacon_connection_loss_work(struct work_struct *work);
 
 void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw,
 				     enum queue_stop_reason reason);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d5571b9420cd..b4ec59a8dc03 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -486,7 +486,7 @@ static int ieee80211_stop(struct net_device *dev)
 		cancel_work_sync(&sdata->u.mgd.work);
 		cancel_work_sync(&sdata->u.mgd.chswitch_work);
 		cancel_work_sync(&sdata->u.mgd.monitor_work);
-		cancel_work_sync(&sdata->u.mgd.beacon_loss_work);
+		cancel_work_sync(&sdata->u.mgd.beacon_connection_loss_work);
 
 		/*
 		 * When we get here, the interface is marked down.
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0ab284c32135..865ea1cfb7bb 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -851,6 +851,9 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
 	if (is_multicast_ether_addr(hdr->addr1))
 		return;
 
+	if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+		return;
+
 	mod_timer(&sdata->u.mgd.conn_mon_timer,
 		  round_jiffies_up(jiffies + IEEE80211_CONNECTION_IDLE_TIME));
 }
@@ -928,23 +931,68 @@ static void ieee80211_mgd_probe_ap(struct ieee80211_sub_if_data *sdata,
 	mutex_unlock(&ifmgd->mtx);
 }
 
-void ieee80211_beacon_loss_work(struct work_struct *work)
+static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
+{
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct ieee80211_local *local = sdata->local;
+	u8 bssid[ETH_ALEN];
+
+	mutex_lock(&ifmgd->mtx);
+	if (!ifmgd->associated) {
+		mutex_unlock(&ifmgd->mtx);
+		return;
+	}
+
+	memcpy(bssid, ifmgd->associated->bssid, ETH_ALEN);
+
+	printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
+
+	ieee80211_set_disassoc(sdata);
+	ieee80211_recalc_idle(local);
+	mutex_unlock(&ifmgd->mtx);
+	/*
+	 * must be outside lock due to cfg80211,
+	 * but that's not a problem.
+	 */
+	ieee80211_send_deauth_disassoc(sdata, bssid,
+				       IEEE80211_STYPE_DEAUTH,
+				       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
+				       NULL);
+}
+
+void ieee80211_beacon_connection_loss_work(struct work_struct *work)
 {
 	struct ieee80211_sub_if_data *sdata =
 		container_of(work, struct ieee80211_sub_if_data,
-			     u.mgd.beacon_loss_work);
+			     u.mgd.beacon_connection_loss_work);
 
-	ieee80211_mgd_probe_ap(sdata, true);
+	if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+		__ieee80211_connection_loss(sdata);
+	else
+		ieee80211_mgd_probe_ap(sdata, true);
 }
 
 void ieee80211_beacon_loss(struct ieee80211_vif *vif)
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_hw *hw = &sdata->local->hw;
 
-	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_loss_work);
+	WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR);
+	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
 }
 EXPORT_SYMBOL(ieee80211_beacon_loss);
 
+void ieee80211_connection_loss(struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+	struct ieee80211_hw *hw = &sdata->local->hw;
+
+	WARN_ON(!(hw->flags & IEEE80211_HW_CONNECTION_MONITOR));
+	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
+}
+EXPORT_SYMBOL(ieee80211_connection_loss);
+
+
 static enum rx_mgmt_action __must_check
 ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 			 struct ieee80211_mgmt *mgmt, size_t len)
@@ -1634,7 +1682,8 @@ static void ieee80211_sta_bcn_mon_timer(unsigned long data)
 	if (local->quiescing)
 		return;
 
-	ieee80211_queue_work(&sdata->local->hw, &sdata->u.mgd.beacon_loss_work);
+	ieee80211_queue_work(&sdata->local->hw,
+			     &sdata->u.mgd.beacon_connection_loss_work);
 }
 
 static void ieee80211_sta_conn_mon_timer(unsigned long data)
@@ -1686,7 +1735,7 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata)
 	 */
 
 	cancel_work_sync(&ifmgd->work);
-	cancel_work_sync(&ifmgd->beacon_loss_work);
+	cancel_work_sync(&ifmgd->beacon_connection_loss_work);
 	if (del_timer_sync(&ifmgd->timer))
 		set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running);
 
@@ -1720,7 +1769,8 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 	INIT_WORK(&ifmgd->work, ieee80211_sta_work);
 	INIT_WORK(&ifmgd->monitor_work, ieee80211_sta_monitor_work);
 	INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work);
-	INIT_WORK(&ifmgd->beacon_loss_work, ieee80211_beacon_loss_work);
+	INIT_WORK(&ifmgd->beacon_connection_loss_work,
+		  ieee80211_beacon_connection_loss_work);
 	setup_timer(&ifmgd->timer, ieee80211_sta_timer,
 		    (unsigned long) sdata);
 	setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer,
-- 
cgit v1.2.3-59-g8ed1b


From e51d739ab79110c43ca03daf3ddb3c52dadd38b7 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 23 Mar 2010 13:39:19 +0000
Subject: net: Fix locking in flush_backlog

Need to take spinlocks when dequeuing from input_pkt_queue in flush_backlog.
Also, flush_backlog can now be called directly from netdev_run_todo.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a03aab45e84f..5e3dc28cbf5a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2766,17 +2766,19 @@ int netif_receive_skb(struct sk_buff *skb)
 EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending  */
-static void flush_backlog(void *arg)
+static void flush_backlog(struct net_device *dev, int cpu)
 {
-	struct net_device *dev = arg;
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct softnet_data *queue = &per_cpu(softnet_data, cpu);
 	struct sk_buff *skb, *tmp;
+	unsigned long flags;
 
+	spin_lock_irqsave(&queue->input_pkt_queue.lock, flags);
 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &queue->input_pkt_queue);
 			kfree_skb(skb);
 		}
+	spin_unlock_irqrestore(&queue->input_pkt_queue.lock, flags);
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -5545,6 +5547,7 @@ void netdev_run_todo(void)
 	while (!list_empty(&list)) {
 		struct net_device *dev
 			= list_first_entry(&list, struct net_device, todo_list);
+		int i;
 		list_del(&dev->todo_list);
 
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
@@ -5556,7 +5559,8 @@ void netdev_run_todo(void)
 
 		dev->reg_state = NETREG_UNREGISTERED;
 
-		on_each_cpu(flush_backlog, dev, 1);
+		for_each_online_cpu(i)
+			flush_backlog(dev, i);
 
 		netdev_wait_allrefs(dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From d6dc1a386358979e12366d1f35eeb68fc181e101 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 23 Mar 2010 09:02:33 +0200
Subject: cfg80211: Add connection quality monitoring support to nl80211

Add support for basic configuration of a connection quality monitoring to the
nl80211 interface, and basic support for notifying about triggered monitoring
events.

Via this interface a user-space connection manager may configure and receive
pre-warning events of deteriorating WLAN connection quality, and start
preparing for roaming in advance, before the connection is already lost.

An example usage of such a trigger is starting scanning for nearby AP's in
an attempt to find one with better connection quality, and associate to it
before the connection characteristics of the existing connection become too bad
or the association is even lost, leading in a prolonged delay in connectivity.

The interface currently supports only RSSI, but it could be later extended
to include other parameters, such as signal-to-noise ratio, if need for that
arises.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  46 +++++++++++++++++
 include/net/cfg80211.h  |  19 +++++++
 net/wireless/mlme.c     |  13 +++++
 net/wireless/nl80211.c  | 131 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |   6 +++
 5 files changed, 215 insertions(+)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 28ba20fda3e2..89947597b9ce 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -323,6 +323,12 @@
  *	the TX command and %NL80211_ATTR_FRAME includes the contents of the
  *	frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged
  *	the frame.
+ * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command
+ *	is used to configure connection quality monitoring notification trigger
+ *	levels.
+ * @NL80211_CMD_NOTIFY_CQM: Connection quality monitor notification. This
+ *	command is used as an event to indicate the that a trigger level was
+ *	reached.
  *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
@@ -419,6 +425,9 @@ enum nl80211_commands {
 	NL80211_CMD_SET_POWER_SAVE,
 	NL80211_CMD_GET_POWER_SAVE,
 
+	NL80211_CMD_SET_CQM,
+	NL80211_CMD_NOTIFY_CQM,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -691,6 +700,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was
  *	acknowledged by the recipient.
  *
+ * @NL80211_ATTR_CQM: connection quality monitor configuration in a
+ *	nested attribute with %NL80211_ATTR_CQM_* sub-attributes.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -842,6 +854,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PS_STATE,
 
+	NL80211_ATTR_CQM,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1583,4 +1597,36 @@ enum nl80211_ps_state {
 	NL80211_PS_ENABLED,
 };
 
+/**
+ * enum nl80211_attr_cqm - connection quality monitor attributes
+ * @__NL80211_ATTR_CQM_INVALID: invalid
+ * @NL80211_ATTR_CQM_RSSI_THOLD: RSSI threshold in dBm (zero to disable)
+ * @NL80211_ATTR_CQM_RSSI_HYST: RSSI hysteresis in dBm
+ * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event
+ * @__NL80211_ATTR_CQM_AFTER_LAST: internal
+ * @NL80211_ATTR_CQM_MAX: highest key attribute
+ */
+enum nl80211_attr_cqm {
+	__NL80211_ATTR_CQM_INVALID,
+	NL80211_ATTR_CQM_RSSI_THOLD,
+	NL80211_ATTR_CQM_RSSI_HYST,
+	NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
+
+	/* keep last */
+	__NL80211_ATTR_CQM_AFTER_LAST,
+	NL80211_ATTR_CQM_MAX = __NL80211_ATTR_CQM_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event
+ * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW - The RSSI level is lower than the
+ *      configured threshold
+ * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH - The RSSI is higher than the
+ *      configured threshold
+ */
+enum nl80211_cqm_rssi_threshold_event {
+	NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
+	NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3d134a1fb96b..868cfd3b9724 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1007,6 +1007,7 @@ struct cfg80211_pmksa {
  *	RSN IE. It allows for faster roaming between WPA2 BSSIDs.
  * @del_pmksa: Delete a cached PMKID.
  * @flush_pmksa: Flush all cached PMKIDs.
+ * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold.
  *
  */
 struct cfg80211_ops {
@@ -1152,6 +1153,10 @@ struct cfg80211_ops {
 
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 				  bool enabled, int timeout);
+
+	int	(*set_cqm_rssi_config)(struct wiphy *wiphy,
+				       struct net_device *dev,
+				       s32 rssi_thold, u32 rssi_hyst);
 };
 
 /*
@@ -2337,4 +2342,18 @@ bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf,
 void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
 			       const u8 *buf, size_t len, bool ack, gfp_t gfp);
 
+
+/**
+ * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event
+ * @dev: network device
+ * @rssi_event: the triggered RSSI event
+ * @gfp: context flags
+ *
+ * This function is called when a configured connection quality monitoring
+ * rssi threshold reached event occurs.
+ */
+void cfg80211_cqm_rssi_notify(struct net_device *dev,
+			      enum nl80211_cqm_rssi_threshold_event rssi_event,
+			      gfp_t gfp);
+
 #endif /* __NET_CFG80211_H */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 62bc8855e123..0855f0d32349 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -894,3 +894,16 @@ void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
 	nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
 }
 EXPORT_SYMBOL(cfg80211_action_tx_status);
+
+void cfg80211_cqm_rssi_notify(struct net_device *dev,
+			      enum nl80211_cqm_rssi_threshold_event rssi_event,
+			      gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	/* Indicate roaming trigger event to user space */
+	nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
+}
+EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e447db04cf76..a7fc3d83f5f6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -149,6 +149,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 				 .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
 	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
+	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 };
 
 /* policy for the attributes */
@@ -4778,6 +4779,84 @@ unlock_rtnl:
 	return err;
 }
 
+static struct nla_policy
+nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] __read_mostly = {
+	[NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 },
+	[NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 },
+	[NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 },
+};
+
+static int nl80211_set_cqm_rssi(struct genl_info *info,
+				s32 threshold, u32 hysteresis)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+	struct net_device *dev;
+	int err;
+
+	if (threshold > 0)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rdev;
+
+	wdev = dev->ieee80211_ptr;
+
+	if (!rdev->ops->set_cqm_rssi_config) {
+		err = -EOPNOTSUPP;
+		goto unlock_rdev;
+	}
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto unlock_rdev;
+	}
+
+	err = rdev->ops->set_cqm_rssi_config(wdev->wiphy, dev,
+					     threshold, hysteresis);
+
+unlock_rdev:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+	rtnl_unlock();
+
+	return err;
+}
+
+static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr *attrs[NL80211_ATTR_CQM_MAX + 1];
+	struct nlattr *cqm;
+	int err;
+
+	cqm = info->attrs[NL80211_ATTR_CQM];
+	if (!cqm) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
+			       nl80211_attr_cqm_policy);
+	if (err)
+		goto out;
+
+	if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] &&
+	    attrs[NL80211_ATTR_CQM_RSSI_HYST]) {
+		s32 threshold;
+		u32 hysteresis;
+		threshold = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
+		hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
+		err = nl80211_set_cqm_rssi(info, threshold, hysteresis);
+	} else
+		err = -EINVAL;
+
+out:
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -5082,6 +5161,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		/* can be retrieved by unprivileged users */
 	},
+	{
+		.cmd = NL80211_CMD_SET_CQM,
+		.doit = nl80211_set_cqm,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -5832,6 +5917,52 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void
+nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev,
+			     enum nl80211_cqm_rssi_threshold_event rssi_event,
+			     gfp_t gfp)
+{
+	struct sk_buff *msg;
+	struct nlattr *pinfoattr;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+
+	pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
+	if (!pinfoattr)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
+		    rssi_event);
+
+	nla_nest_end(msg, pinfoattr);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 static int nl80211_netlink_notify(struct notifier_block * nb,
 				  unsigned long state,
 				  void *_notify)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 4ca511102c6c..2ad7fbc7d9f1 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -82,4 +82,10 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
 				   const u8 *buf, size_t len, bool ack,
 				   gfp_t gfp);
 
+void
+nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev,
+			     enum nl80211_cqm_rssi_threshold_event rssi_event,
+			     gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3-59-g8ed1b


From a97c13c34509be460dea23c86f31c02daa2428b5 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 23 Mar 2010 09:02:34 +0200
Subject: mac80211: Add support for connection quality monitoring

Add support for the set_cqm_config op. This op function configures the
requested connection quality monitor rssi threshold and rssi hysteresis
values to the hardware  if the hardware supports
IEEE80211_HW_SUPPORTS_CQM.

For unsupported hardware, currently -EOPNOTSUPP is returned, so the mac80211
is currently not doing connection quality monitoring on the host. This could be
added later, if needed.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 30 ++++++++++++++++++++++++++++++
 net/mac80211/cfg.c     | 27 +++++++++++++++++++++++++++
 net/mac80211/mlme.c    | 15 +++++++++++++++
 3 files changed, 72 insertions(+)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index d14226f29ffc..1a8f50af49a0 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -144,6 +144,7 @@ struct ieee80211_low_level_stats {
  *	new beacon (beaconing modes)
  * @BSS_CHANGED_BEACON_ENABLED: Beaconing should be
  *	enabled/disabled (beaconing modes)
+ * @BSS_CHANGED_CQM: Connection quality monitor config changed
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -156,6 +157,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_BSSID		= 1<<7,
 	BSS_CHANGED_BEACON		= 1<<8,
 	BSS_CHANGED_BEACON_ENABLED	= 1<<9,
+	BSS_CHANGED_CQM			= 1<<10,
 };
 
 /**
@@ -185,6 +187,9 @@ enum ieee80211_bss_change {
  * @enable_beacon: whether beaconing should be enabled or not
  * @ht_operation_mode: HT operation mode (like in &struct ieee80211_ht_info).
  *	This field is only valid when the channel type is one of the HT types.
+ * @cqm_rssi_thold: Connection quality monitor RSSI threshold, a zero value
+ *	implies disabled
+ * @cqm_rssi_hyst: Connection quality monitor RSSI hysteresis
  */
 struct ieee80211_bss_conf {
 	const u8 *bssid;
@@ -202,6 +207,8 @@ struct ieee80211_bss_conf {
 	u64 timestamp;
 	u32 basic_rates;
 	u16 ht_operation_mode;
+	s32 cqm_rssi_thold;
+	u32 cqm_rssi_hyst;
 };
 
 /**
@@ -959,6 +966,12 @@ enum ieee80211_tkip_key_type {
  *      periodic keep-alives to the AP and probing the AP on beacon loss.
  *      When this flag is set, signaling beacon-loss will cause an immediate
  *      change to disassociated state.
+ *
+ * @IEEE80211_HW_SUPPORTS_CQM_RSSI:
+ *	Hardware can do connection quality monitoring - i.e. it can monitor
+ *	connection quality related parameters, such as the RSSI level and
+ *	provide notifications if configured trigger levels are reached.
+ *
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -981,6 +994,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_SUPPORTS_UAPSD			= 1<<17,
 	IEEE80211_HW_REPORTS_TX_ACK_STATUS		= 1<<18,
 	IEEE80211_HW_CONNECTION_MONITOR			= 1<<19,
+	IEEE80211_HW_SUPPORTS_CQM_RSSI			= 1<<20,
 };
 
 /**
@@ -2390,6 +2404,22 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif);
  */
 void ieee80211_connection_loss(struct ieee80211_vif *vif);
 
+/**
+ * ieee80211_cqm_rssi_notify - inform a configured connection quality monitoring
+ *	rssi threshold triggered
+ *
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @rssi_event: the RSSI trigger event type
+ * @gfp: context flags
+ *
+ * When the %IEEE80211_HW_SUPPORTS_CQM_RSSI is set, and a connection quality
+ * monitoring is configured with an rssi threshold, the driver will inform
+ * whenever the rssi level reaches the threshold.
+ */
+void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
+			       enum nl80211_cqm_rssi_threshold_event rssi_event,
+			       gfp_t gfp);
+
 /* Rate control API */
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b7116ef84a3b..c8f520529eec 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1402,6 +1402,32 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 	return 0;
 }
 
+static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
+					 struct net_device *dev,
+					 s32 rssi_thold, u32 rssi_hyst)
+{
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+	struct ieee80211_vif *vif = &sdata->vif;
+	struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
+
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI))
+		return -EOPNOTSUPP;
+
+	if (rssi_thold == bss_conf->cqm_rssi_thold &&
+	    rssi_hyst == bss_conf->cqm_rssi_hyst)
+		return 0;
+
+	bss_conf->cqm_rssi_thold = rssi_thold;
+	bss_conf->cqm_rssi_hyst = rssi_hyst;
+
+	/* tell the driver upon association, unless already associated */
+	if (sdata->u.mgd.associated)
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM);
+
+	return 0;
+}
+
 static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 				      struct net_device *dev,
 				      const u8 *addr,
@@ -1506,4 +1532,5 @@ struct cfg80211_ops mac80211_config_ops = {
 	.remain_on_channel = ieee80211_remain_on_channel,
 	.cancel_remain_on_channel = ieee80211_cancel_remain_on_channel,
 	.action = ieee80211_action,
+	.set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
 };
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 865ea1cfb7bb..65eafda5738a 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -750,6 +750,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	/* And the BSSID changed - we're associated now */
 	bss_info_changed |= BSS_CHANGED_BSSID;
 
+	/* Tell the driver to monitor connection quality (if supported) */
+	if ((local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI) &&
+	    sdata->vif.bss_conf.cqm_rssi_thold)
+		bss_info_changed |= BSS_CHANGED_CQM;
+
 	ieee80211_bss_info_change_notify(sdata, bss_info_changed);
 
 	mutex_lock(&local->iflist_mtx);
@@ -2182,3 +2187,13 @@ int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
 	*cookie = (unsigned long) skb;
 	return 0;
 }
+
+void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
+			       enum nl80211_cqm_rssi_threshold_event rssi_event,
+			       gfp_t gfp)
+{
+	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+
+	cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp);
+}
+EXPORT_SYMBOL(ieee80211_cqm_rssi_notify);
-- 
cgit v1.2.3-59-g8ed1b


From 14b44974d5a3c1ca59f6809b7313d7229eb55fd8 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 24 Mar 2010 14:56:41 +0300
Subject: mac80211: remove unneed variable from ieee80211_tx_pending()

We don't need "sdata" any more after:
	d84f323477260e773d5317ad7cbe50f76115cb47
	mac80211: remove dev_hold/put calls

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index cbe53ed4fb0b..08e1f17a4226 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2010,14 +2010,12 @@ void ieee80211_tx_pending(unsigned long data)
 		while (!skb_queue_empty(&local->pending[i])) {
 			struct sk_buff *skb = __skb_dequeue(&local->pending[i]);
 			struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-			struct ieee80211_sub_if_data *sdata;
 
 			if (WARN_ON(!info->control.vif)) {
 				kfree_skb(skb);
 				continue;
 			}
 
-			sdata = vif_to_sdata(info->control.vif);
 			spin_unlock_irqrestore(&local->queue_stop_reason_lock,
 						flags);
 
-- 
cgit v1.2.3-59-g8ed1b


From a3dcce97b285ba54810f38fe2eccc295d69a76ce Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 24 Mar 2010 01:55:10 +0000
Subject: llc: cleanup: remove dead code from llc_init()

We don't need "dev" any more after:
	a5a04819c5740cb1aa217af2cc8f5ef26f33d744
	[LLC]: station source mac address

Signed-off-by: Dan Carpenter <error27@gmail.com>
Acked-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/llc/llc_core.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'net')

diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 78167e81dfeb..2bb0ddff8c0f 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -144,12 +144,6 @@ static struct packet_type llc_tr_packet_type __read_mostly = {
 
 static int __init llc_init(void)
 {
-	struct net_device *dev;
-
-	dev = first_net_device(&init_net);
-	if (dev != NULL)
-		dev = next_net_device(dev);
-
 	dev_add_pack(&llc_packet_type);
 	dev_add_pack(&llc_tr_packet_type);
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From a424077a0a48d5b2e1bdbb8cc56fd43abfd7fd6c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 24 Mar 2010 01:56:03 +0000
Subject: wimax: remove unneeded variable

We never actually use "dev" so I removed it.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wimax/op-reset.c     | 2 --
 net/wimax/op-state-get.c | 2 --
 2 files changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c
index 4dc82a54ba30..68bedf3e5443 100644
--- a/net/wimax/op-reset.c
+++ b/net/wimax/op-reset.c
@@ -110,7 +110,6 @@ int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
 {
 	int result, ifindex;
 	struct wimax_dev *wimax_dev;
-	struct device *dev;
 
 	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
 	result = -ENODEV;
@@ -123,7 +122,6 @@ int wimax_gnl_doit_reset(struct sk_buff *skb, struct genl_info *info)
 	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
 	if (wimax_dev == NULL)
 		goto error_no_wimax_dev;
-	dev = wimax_dev_to_dev(wimax_dev);
 	/* Execute the operation and send the result back to user space */
 	result = wimax_reset(wimax_dev);
 	dev_put(wimax_dev->net_dev);
diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c
index 11ad3356eb56..aff8776e2d41 100644
--- a/net/wimax/op-state-get.c
+++ b/net/wimax/op-state-get.c
@@ -53,7 +53,6 @@ int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
 {
 	int result, ifindex;
 	struct wimax_dev *wimax_dev;
-	struct device *dev;
 
 	d_fnstart(3, NULL, "(skb %p info %p)\n", skb, info);
 	result = -ENODEV;
@@ -66,7 +65,6 @@ int wimax_gnl_doit_state_get(struct sk_buff *skb, struct genl_info *info)
 	wimax_dev = wimax_dev_get_by_genl_info(info, ifindex);
 	if (wimax_dev == NULL)
 		goto error_no_wimax_dev;
-	dev = wimax_dev_to_dev(wimax_dev);
 	/* Execute the operation and send the result back to user space */
 	result = wimax_state_get(wimax_dev);
 	dev_put(wimax_dev->net_dev);
-- 
cgit v1.2.3-59-g8ed1b


From 18062ca94714a66e75da8a22e010d0e8e61ab4cd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 24 Mar 2010 01:57:30 +0000
Subject: rds: cleanup: remove unneeded variable

We never use "sk" so this patch removes it.

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/af_rds.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 937ecda4abe7..c2e45e8efa20 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -451,7 +451,6 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
 			      struct rds_info_lengths *lens)
 {
 	struct rds_sock *rs;
-	struct sock *sk;
 	struct rds_incoming *inc;
 	unsigned long flags;
 	unsigned int total = 0;
@@ -461,7 +460,6 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len,
 	spin_lock_irqsave(&rds_sock_lock, flags);
 
 	list_for_each_entry(rs, &rds_sock_list, rs_item) {
-		sk = rds_rs_to_sk(rs);
 		read_lock(&rs->rs_recv_lock);
 
 		/* XXX too lazy to maintain counts.. */
-- 
cgit v1.2.3-59-g8ed1b


From b138338056fc423c61a583d45f8aa64cfad87131 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Wed, 24 Mar 2010 07:57:28 +0000
Subject: net: remove trailing space in messages

Signed-off-by: Frans Pop <elendil@planet.nl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/ccids/ccid3.c                | 2 +-
 net/dccp/input.c                      | 2 +-
 net/ipv4/ipconfig.c                   | 2 +-
 net/ipv4/tcp_input.c                  | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c         | 4 ++--
 net/irda/ircomm/ircomm_param.c        | 2 +-
 net/sched/cls_u32.c                   | 4 ++--
 net/sunrpc/auth_gss/gss_spkm3_token.c | 2 +-
 net/sunrpc/bc_svc.c                   | 2 +-
 9 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index bcd7632299f5..d3235899c7e3 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -208,7 +208,7 @@ static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 		goto restart_timer;
 	}
 
-	ccid3_pr_debug("%s(%p, state=%s) - entry \n", dccp_role(sk), sk,
+	ccid3_pr_debug("%s(%p, state=%s) - entry\n", dccp_role(sk), sk,
 		       ccid3_tx_state_name(hc->tx_state));
 
 	if (hc->tx_state == TFRC_SSTATE_FBACK)
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 7648f316310f..5daa4bdfdb10 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -414,7 +414,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 		if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq,
 			       dp->dccps_awl, dp->dccps_awh)) {
 			dccp_pr_debug("invalid ackno: S.AWL=%llu, "
-				      "P.ackno=%llu, S.AWH=%llu \n",
+				      "P.ackno=%llu, S.AWH=%llu\n",
 				      (unsigned long long)dp->dccps_awl,
 			   (unsigned long long)DCCP_SKB_CB(skb)->dccpd_ack_seq,
 				      (unsigned long long)dp->dccps_awh);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 678909281648..bf12d2a7a0c7 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -975,7 +975,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str
 	/* Is it a reply for the device we are configuring? */
 	if (b->xid != ic_dev_xid) {
 		if (net_ratelimit())
-			printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet \n");
+			printk(KERN_ERR "DHCP/BOOTP: Ignoring delayed packet\n");
 		goto drop_unlock;
 	}
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c096a4218b8f..7b31476a4063 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4318,7 +4318,7 @@ static void tcp_ofo_queue(struct sock *sk)
 		}
 
 		if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) {
-			SOCK_DEBUG(sk, "ofo packet was already received \n");
+			SOCK_DEBUG(sk, "ofo packet was already received\n");
 			__skb_unlink(skb, &tp->out_of_order_queue);
 			__kfree_skb(skb);
 			continue;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index cbe8dec9744b..e60677519e40 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -141,11 +141,11 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 			}
 
 			/* Step to the next */
-			pr_debug("len%04X \n", optlen);
+			pr_debug("len%04X\n", optlen);
 
 			if ((ptr > skb->len - optlen || hdrlen < optlen) &&
 			    temp < optinfo->optsnr - 1) {
-				pr_debug("new pointer is too large! \n");
+				pr_debug("new pointer is too large!\n");
 				break;
 			}
 			ptr += optlen;
diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c
index d57aefd9fe77..0804532d970f 100644
--- a/net/irda/ircomm/ircomm_param.c
+++ b/net/irda/ircomm/ircomm_param.c
@@ -474,7 +474,7 @@ static int ircomm_param_dce(void *instance, irda_param_t *param, int get)
 	/* Check if any of the settings have changed */
 	if (dce & 0x0f) {
 		if (dce & IRCOMM_DELTA_CTS) {
-			IRDA_DEBUG(2, "%s(), CTS \n", __func__ );
+			IRDA_DEBUG(2, "%s(), CTS\n", __func__ );
 		}
 	}
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 07372f60bee3..1ef76871a57b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -772,10 +772,10 @@ static int __init init_u32(void)
 	printk("    Performance counters on\n");
 #endif
 #ifdef CONFIG_NET_CLS_IND
-	printk("    input device check on \n");
+	printk("    input device check on\n");
 #endif
 #ifdef CONFIG_NET_CLS_ACT
-	printk("    Actions configured \n");
+	printk("    Actions configured\n");
 #endif
 	return register_tcf_proto_ops(&cls_u32_ops);
 }
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 3308157436d2..a99825d7caa0 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -223,7 +223,7 @@ spkm3_verify_mic_token(unsigned char **tokp, int *mic_hdrlen, unsigned char **ck
 
 	/* only support SPKM_MIC_TOK */
 	if((ptr[6] != 0x01) || (ptr[7] != 0x01)) {
-		dprintk("RPC:       ERROR unsupported SPKM3 token \n");
+		dprintk("RPC:       ERROR unsupported SPKM3 token\n");
 		goto out;
 	}
 
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 13f214f53120..8a610fb0cfec 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -75,7 +75,7 @@ int bc_send(struct rpc_rqst *req)
 		rpc_put_task(task);
 	}
 	return ret;
-	dprintk("RPC:       bc_send ret= %d \n", ret);
+	dprintk("RPC:       bc_send ret= %d\n", ret);
 }
 
 #endif /* CONFIG_NFS_V4_1 */
-- 
cgit v1.2.3-59-g8ed1b


From a570f095eac34b7439eed2df6728381708c55bdc Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Wed, 24 Mar 2010 07:57:29 +0000
Subject: tipc: remove trailing space in messages

Signed-off-by: Frans Pop <elendil@planet.nl>
Cc: Per Liden <per.liden@ericsson.com>
Cc: Jon Maloy <jon.maloy@ericsson.com>
Cc: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 8 ++++----
 net/tipc/net.c  | 2 +-
 net/tipc/node.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 49f2be8622a9..c76e82e5f982 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -877,7 +877,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 		case TIMEOUT_EVT:
 			dbg_link("TIM ");
 			if (l_ptr->next_in_no != l_ptr->checkpoint) {
-				dbg_link("-> WW \n");
+				dbg_link("-> WW\n");
 				l_ptr->state = WORKING_WORKING;
 				l_ptr->fsm_msg_cnt = 0;
 				l_ptr->checkpoint = l_ptr->next_in_no;
@@ -934,7 +934,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 			link_set_timer(l_ptr, cont_intv);
 			break;
 		case RESET_MSG:
-			dbg_link("RES \n");
+			dbg_link("RES\n");
 			dbg_link(" -> RR\n");
 			l_ptr->state = RESET_RESET;
 			l_ptr->fsm_msg_cnt = 0;
@@ -947,7 +947,7 @@ static void link_state_event(struct link *l_ptr, unsigned event)
 			l_ptr->started = 1;
 			/* fall through */
 		case TIMEOUT_EVT:
-			dbg_link("TIM \n");
+			dbg_link("TIM\n");
 			tipc_link_send_proto_msg(l_ptr, RESET_MSG, 0, 0, 0, 0, 0);
 			l_ptr->fsm_msg_cnt++;
 			link_set_timer(l_ptr, cont_intv);
@@ -3295,7 +3295,7 @@ static void link_dump_rec_queue(struct link *l_ptr)
 			info("buffer %x invalid\n", crs);
 			return;
 		}
-		msg_dbg(buf_msg(crs), "In rec queue: \n");
+		msg_dbg(buf_msg(crs), "In rec queue:\n");
 		crs = crs->next;
 	}
 }
diff --git a/net/tipc/net.c b/net/tipc/net.c
index f25b1cdb64eb..79ce8fa2b77a 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -291,6 +291,6 @@ void tipc_net_stop(void)
 	tipc_bclink_stop();
 	net_stop();
 	write_unlock_bh(&tipc_net_lock);
-	info("Left network mode \n");
+	info("Left network mode\n");
 }
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 2c24e7d6d950..17cc394f424f 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -278,7 +278,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 			n_ptr->link_cnt++;
 			return n_ptr;
 		}
-		err("Attempt to establish second link on <%s> to %s \n",
+		err("Attempt to establish second link on <%s> to %s\n",
 		    l_ptr->b_ptr->publ.name,
 		    addr_string_fill(addr_string, l_ptr->addr));
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 0d34545563f99886b997be7da63f0e8084af3bc5 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 18:47:51 +0100
Subject: netfilter: xtables: make use of caller family rather than target
 family

Supplement to aa5fa3185791aac71c9172d4fda3e8729164b5d1.
The semantic patch for this change is:

// <smpl>
@@
struct xt_target_param *par;
@@
-par->target->family
+par->family

@@
struct xt_tgchk_param *par;
@@
-par->target->family
+par->family

@@
struct xt_tgdtor_param *par;
@@
-par->target->family
+par->family
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 6 +++---
 net/netfilter/xt_NFQUEUE.c         | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0886f96c736b..a20bee75b02c 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -401,9 +401,9 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 	}
 	cipinfo->config = config;
 
-	if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->target->family);
+				    "proto=%u\n", par->family);
 		return false;
 	}
 
@@ -421,7 +421,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
 
 	clusterip_config_put(cipinfo->config);
 
-	nf_ct_l3proto_module_put(par->target->family);
+	nf_ct_l3proto_module_put(par->family);
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index a37e2166858e..7cc0de63aa0f 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -71,10 +71,10 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
 	u32 queue = info->queuenum;
 
 	if (info->queues_total > 1) {
-		if (par->target->family == NFPROTO_IPV4)
+		if (par->family == NFPROTO_IPV4)
 			queue = hash_v4(skb) % info->queues_total + queue;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-		else if (par->target->family == NFPROTO_IPV6)
+		else if (par->family == NFPROTO_IPV6)
 			queue = hash_v6(skb) % info->queues_total + queue;
 #endif
 	}
-- 
cgit v1.2.3-59-g8ed1b


From ff67e4e42bd178b1179c4d8e5c1fde18758ce84f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 21:08:16 +0100
Subject: netfilter: xt extensions: use pr_<level> (2)

Supplement to 1159683ef48469de71dc26f0ee1a9c30d131cf89.

Downgrade the log level to INFO for most checkentry messages as they
are, IMO, just an extra information to the -EINVAL code that is
returned as part of a parameter "constraint violation". Leave errors
to real errors, such as being unable to create a LED trigger.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_among.c    | 15 ++++++-------
 net/bridge/netfilter/ebt_limit.c    |  3 ++-
 net/bridge/netfilter/ebt_ulog.c     | 23 ++++++++------------
 net/bridge/netfilter/ebt_vlan.c     | 36 ++++++++++++-------------------
 net/bridge/netfilter/ebtables.c     |  5 ++---
 net/ipv4/netfilter/ip_tables.c      | 18 +++++++---------
 net/ipv4/netfilter/ipt_CLUSTERIP.c  | 42 ++++++++++++++++++-------------------
 net/ipv4/netfilter/ipt_ECN.c        | 11 ++++------
 net/ipv4/netfilter/ipt_LOG.c        |  7 +++----
 net/ipv4/netfilter/ipt_MASQUERADE.c |  8 +++----
 net/ipv4/netfilter/ipt_NETMAP.c     |  6 +++---
 net/ipv4/netfilter/ipt_REDIRECT.c   |  6 +++---
 net/ipv4/netfilter/ipt_REJECT.c     |  6 +++---
 net/ipv4/netfilter/ipt_ULOG.c       | 37 +++++++++++++-------------------
 net/ipv4/netfilter/ipt_addrtype.c   | 14 ++++++-------
 net/ipv4/netfilter/ipt_ecn.c        |  5 ++---
 net/ipv4/netfilter/nf_nat_rule.c    |  5 +++--
 net/ipv6/netfilter/ip6_tables.c     |  9 ++++----
 net/ipv6/netfilter/ip6t_LOG.c       |  7 +++----
 net/ipv6/netfilter/ip6t_REJECT.c    | 20 +++++++++---------
 net/ipv6/netfilter/ip6t_ah.c        |  6 +++---
 net/ipv6/netfilter/ip6t_frag.c      |  6 +++---
 net/ipv6/netfilter/ip6t_hbh.c       |  6 +++---
 net/ipv6/netfilter/ip6t_rt.c        |  6 +++---
 net/netfilter/xt_TCPMSS.c           |  7 +++----
 net/netfilter/xt_TPROXY.c           |  4 ++--
 net/netfilter/xt_esp.c              |  2 +-
 net/netfilter/xt_iprange.c          |  1 +
 net/netfilter/xt_recent.c           |  6 +++---
 net/netfilter/xt_socket.c           |  5 ++---
 net/netfilter/xt_time.c             |  4 ++--
 31 files changed, 151 insertions(+), 185 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index b595f091f35b..60ad6308bc1f 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -7,6 +7,7 @@
  *  August, 2003
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/ip.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
@@ -186,21 +187,17 @@ static bool ebt_among_mt_check(const struct xt_mtchk_param *par)
 	expected_length += ebt_mac_wormhash_size(wh_src);
 
 	if (em->match_size != EBT_ALIGN(expected_length)) {
-		printk(KERN_WARNING
-		       "ebtables: among: wrong size: %d "
-		       "against expected %d, rounded to %Zd\n",
-		       em->match_size, expected_length,
-		       EBT_ALIGN(expected_length));
+		pr_info("wrong size: %d against expected %d, rounded to %Zd\n",
+			em->match_size, expected_length,
+			EBT_ALIGN(expected_length));
 		return false;
 	}
 	if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) {
-		printk(KERN_WARNING
-		       "ebtables: among: dst integrity fail: %x\n", -err);
+		pr_info("dst integrity fail: %x\n", -err);
 		return false;
 	}
 	if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) {
-		printk(KERN_WARNING
-		       "ebtables: among: src integrity fail: %x\n", -err);
+		pr_info("src integrity fail: %x\n", -err);
 		return false;
 	}
 	return true;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 7a8182710eb3..5b7330b62541 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -10,6 +10,7 @@
  *  September, 2003
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/spinlock.h>
@@ -71,7 +72,7 @@ static bool ebt_limit_mt_check(const struct xt_mtchk_param *par)
 	/* Check for overflow. */
 	if (info->burst == 0 ||
 	    user2credits(info->avg * info->burst) < user2credits(info->avg)) {
-		printk("Overflow in ebt_limit, try lower: %u/%u\n",
+		pr_info("overflow, try lower: %u/%u\n",
 			info->avg, info->burst);
 		return false;
 	}
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 84340ab30ed3..789ea36f1db1 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -27,7 +27,7 @@
  *   flushed even if it is not full yet.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/socket.h>
@@ -43,9 +43,6 @@
 #include <net/sock.h>
 #include "../br_private.h"
 
-#define PRINTR(format, args...) do { if (net_ratelimit()) \
-				printk(format , ## args); } while (0)
-
 static unsigned int nlbufsiz = NLMSG_GOODSIZE;
 module_param(nlbufsiz, uint, 0600);
 MODULE_PARM_DESC(nlbufsiz, "netlink buffer size (number of bytes) "
@@ -106,15 +103,14 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
 	n = max(size, nlbufsiz);
 	skb = alloc_skb(n, GFP_ATOMIC);
 	if (!skb) {
-		PRINTR(KERN_ERR "ebt_ulog: can't alloc whole buffer "
-		       "of size %ub!\n", n);
+		pr_debug("cannot alloc whole buffer of size %ub!\n", n);
 		if (n > size) {
 			/* try to allocate only as much as we need for
 			 * current packet */
 			skb = alloc_skb(size, GFP_ATOMIC);
 			if (!skb)
-				PRINTR(KERN_ERR "ebt_ulog: can't even allocate "
-				       "buffer of size %ub\n", size);
+				pr_debug("cannot even allocate "
+					 "buffer of size %ub\n", size);
 		}
 	}
 
@@ -141,8 +137,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 
 	size = NLMSG_SPACE(sizeof(*pm) + copy_len);
 	if (size > nlbufsiz) {
-		PRINTR("ebt_ulog: Size %Zd needed, but nlbufsiz=%d\n",
-		       size, nlbufsiz);
+		pr_debug("Size %Zd needed, but nlbufsiz=%d\n", size, nlbufsiz);
 		return;
 	}
 
@@ -216,8 +211,8 @@ unlock:
 	return;
 
 nlmsg_failure:
-	printk(KERN_CRIT "ebt_ulog: error during NLMSG_PUT. This should "
-	       "not happen, please report to author.\n");
+	pr_debug("error during NLMSG_PUT. This should "
+		 "not happen, please report to author.\n");
 	goto unlock;
 alloc_failure:
 	goto unlock;
@@ -291,8 +286,8 @@ static int __init ebt_ulog_init(void)
 	int i;
 
 	if (nlbufsiz >= 128*1024) {
-		printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB,"
-		       " please try a smaller nlbufsiz parameter.\n");
+		pr_warning("Netlink buffer has to be <= 128kB,"
+			   " please try a smaller nlbufsiz parameter.\n");
 		return -EINVAL;
 	}
 
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index be1dd2e1f615..5c44f51063c3 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -26,17 +26,12 @@
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_vlan.h>
 
-static int debug;
 #define MODULE_VERS "0.6"
 
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "debug=1 is turn on debug messages");
 MODULE_AUTHOR("Nick Fedchik <nick@fedchik.org.ua>");
 MODULE_DESCRIPTION("Ebtables: 802.1Q VLAN tag match");
 MODULE_LICENSE("GPL");
 
-
-#define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args)
 #define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
@@ -91,24 +86,23 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 
 	/* Is it 802.1Q frame checked? */
 	if (e->ethproto != htons(ETH_P_8021Q)) {
-		DEBUG_MSG
-		    ("passed entry proto %2.4X is not 802.1Q (8100)\n",
-		     (unsigned short) ntohs(e->ethproto));
+		pr_debug("passed entry proto %2.4X is not 802.1Q (8100)\n",
+			 ntohs(e->ethproto));
 		return false;
 	}
 
 	/* Check for bitmask range
 	 * True if even one bit is out of mask */
 	if (info->bitmask & ~EBT_VLAN_MASK) {
-		DEBUG_MSG("bitmask %2X is out of mask (%2X)\n",
-			  info->bitmask, EBT_VLAN_MASK);
+		pr_debug("bitmask %2X is out of mask (%2X)\n",
+			 info->bitmask, EBT_VLAN_MASK);
 		return false;
 	}
 
 	/* Check for inversion flags range */
 	if (info->invflags & ~EBT_VLAN_MASK) {
-		DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n",
-			  info->invflags, EBT_VLAN_MASK);
+		pr_debug("inversion flags %2X is out of mask (%2X)\n",
+			 info->invflags, EBT_VLAN_MASK);
 		return false;
 	}
 
@@ -121,9 +115,8 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 	if (GET_BITMASK(EBT_VLAN_ID)) {
 		if (!!info->id) { /* if id!=0 => check vid range */
 			if (info->id > VLAN_GROUP_ARRAY_LEN) {
-				DEBUG_MSG
-				    ("id %d is out of range (1-4096)\n",
-				     info->id);
+				pr_debug("id %d is out of range (1-4096)\n",
+					 info->id);
 				return false;
 			}
 			/* Note: This is valid VLAN-tagged frame point.
@@ -137,8 +130,8 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 
 	if (GET_BITMASK(EBT_VLAN_PRIO)) {
 		if ((unsigned char) info->prio > 7) {
-			DEBUG_MSG("prio %d is out of range (0-7)\n",
-			     info->prio);
+			pr_debug("prio %d is out of range (0-7)\n",
+				 info->prio);
 			return false;
 		}
 	}
@@ -147,9 +140,8 @@ static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 	 * if_ether.h:  ETH_ZLEN        60   -  Min. octets in frame sans FCS */
 	if (GET_BITMASK(EBT_VLAN_ENCAP)) {
 		if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) {
-			DEBUG_MSG
-			    ("encap frame length %d is less than minimal\n",
-			     ntohs(info->encap));
+			pr_debug("encap frame length %d is less than "
+				 "minimal\n", ntohs(info->encap));
 			return false;
 		}
 	}
@@ -169,9 +161,7 @@ static struct xt_match ebt_vlan_mt_reg __read_mostly = {
 
 static int __init ebt_vlan_init(void)
 {
-	DEBUG_MSG("ebtables 802.1Q extension module v"
-		  MODULE_VERS "\n");
-	DEBUG_MSG("module debug=%d\n", !!debug);
+	pr_debug("ebtables 802.1Q extension module v" MODULE_VERS "\n");
 	return xt_register_match(&ebt_vlan_mt_reg);
 }
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index dfb58056a89a..989d72cc8148 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -14,8 +14,7 @@
  *  as published by the Free Software Foundation; either version
  *  2 of the License, or (at your option) any later version.
  */
-
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kmod.h>
 #include <linux/module.h>
 #include <linux/vmalloc.h>
@@ -2127,7 +2126,7 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base,
 			return ret;
 		new_offset += ret;
 		if (offsets_update && new_offset) {
-			pr_debug("ebtables: change offset %d to %d\n",
+			pr_debug("change offset %d to %d\n",
 				offsets_update[i], offsets[j] + new_offset);
 			offsets_update[i] = offsets[j] + new_offset;
 		}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index b29c66df8d1f..73fdf20263ed 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -39,13 +39,13 @@ MODULE_DESCRIPTION("IPv4 packet filter");
 /*#define DEBUG_IP_FIREWALL_USER*/
 
 #ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...)  printk(format , ## args)
+#define dprintf(format, args...) pr_info(format , ## args)
 #else
 #define dprintf(format, args...)
 #endif
 
 #ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
+#define duprintf(format, args...) pr_info(format , ## args)
 #else
 #define duprintf(format, args...)
 #endif
@@ -168,8 +168,7 @@ static unsigned int
 ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	if (net_ratelimit())
-		printk("ip_tables: error: `%s'\n",
-		       (const char *)par->targinfo);
+		pr_info("error: `%s'\n", (const char *)par->targinfo);
 
 	return NF_DROP;
 }
@@ -591,7 +590,7 @@ check_entry(const struct ipt_entry *e, const char *name)
 	const struct ipt_entry_target *t;
 
 	if (!ip_checkentry(&e->ip)) {
-		duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+		duprintf("ip check failed %p %s.\n", e, name);
 		return -EINVAL;
 	}
 
@@ -618,8 +617,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
 	ret = xt_check_match(par, m->u.match_size - sizeof(*m),
 	      ip->proto, ip->invflags & IPT_INV_PROTO);
 	if (ret < 0) {
-		duprintf("ip_tables: check failed for `%s'.\n",
-			 par.match->name);
+		duprintf("check failed for `%s'.\n", par.match->name);
 		return ret;
 	}
 	return 0;
@@ -667,7 +665,7 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name)
 	ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
 	      e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
 	if (ret < 0) {
-		duprintf("ip_tables: check failed for `%s'.\n",
+		duprintf("check failed for `%s'.\n",
 			 t->u.kernel.target->name);
 		return ret;
 	}
@@ -1311,7 +1309,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	if (ret != 0)
 		goto free_newinfo;
 
-	duprintf("ip_tables: Translated table\n");
+	duprintf("Translated table\n");
 
 	ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
 			   tmp.num_counters, tmp.counters);
@@ -2276,7 +2274,7 @@ static int __init ip_tables_init(void)
 	if (ret < 0)
 		goto err5;
 
-	printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
+	pr_info("(C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 
 err5:
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index a20bee75b02c..fcaa0dc8e075 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -9,6 +9,7 @@
  * published by the Free Software Foundation.
  *
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/proc_fs.h>
 #include <linux/jhash.h>
@@ -238,8 +239,7 @@ clusterip_hashfn(const struct sk_buff *skb,
 		break;
 	default:
 		if (net_ratelimit())
-			printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n",
-				iph->protocol);
+			pr_info("unknown protocol %u\n", iph->protocol);
 		sport = dport = 0;
 	}
 
@@ -261,7 +261,7 @@ clusterip_hashfn(const struct sk_buff *skb,
 		hashval = 0;
 		/* This cannot happen, unless the check function wasn't called
 		 * at rule load time */
-		printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode);
+		pr_info("unknown mode %u\n", config->hash_mode);
 		BUG();
 		break;
 	}
@@ -294,7 +294,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
 
 	ct = nf_ct_get(skb, &ctinfo);
 	if (ct == NULL) {
-		printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
+		pr_info("no conntrack!\n");
 			/* FIXME: need to drop invalid ones, since replies
 			 * to outgoing connections of other nodes will be
 			 * marked as INVALID */
@@ -357,14 +357,13 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 	if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
-		printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n",
-			cipinfo->hash_mode);
+		pr_info("unknown mode %u\n", cipinfo->hash_mode);
 		return false;
 
 	}
 	if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
 	    e->ip.dst.s_addr == 0) {
-		printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n");
+		pr_info("Please specify destination IP\n");
 		return false;
 	}
 
@@ -373,26 +372,28 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 	config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
 	if (!config) {
 		if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
-			printk(KERN_WARNING "CLUSTERIP: no config found for %pI4, need 'new'\n", &e->ip.dst.s_addr);
+			pr_info("no config found for %pI4, need 'new'\n",
+				&e->ip.dst.s_addr);
 			return false;
 		} else {
 			struct net_device *dev;
 
 			if (e->ip.iniface[0] == '\0') {
-				printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n");
+				pr_info("Please specify an interface name\n");
 				return false;
 			}
 
 			dev = dev_get_by_name(&init_net, e->ip.iniface);
 			if (!dev) {
-				printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
+				pr_info("no such interface %s\n",
+					e->ip.iniface);
 				return false;
 			}
 
 			config = clusterip_config_init(cipinfo,
 							e->ip.dst.s_addr, dev);
 			if (!config) {
-				printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n");
+				pr_info("cannot allocate config\n");
 				dev_put(dev);
 				return false;
 			}
@@ -402,8 +403,8 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 	cipinfo->config = config;
 
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "can't load conntrack support for "
-				    "proto=%u\n", par->family);
+		pr_info("cannot load conntrack support for proto=%u\n",
+			par->family);
 		return false;
 	}
 
@@ -478,8 +479,8 @@ static void arp_print(struct arp_payload *payload)
 	}
 	hbuffer[--k]='\0';
 
-	printk("src %pI4@%s, dst %pI4\n",
-		&payload->src_ip, hbuffer, &payload->dst_ip);
+	pr_debug("src %pI4@%s, dst %pI4\n",
+		 &payload->src_ip, hbuffer, &payload->dst_ip);
 }
 #endif
 
@@ -518,7 +519,7 @@ arp_mangle(unsigned int hook,
 	 * this wouldn't work, since we didn't subscribe the mcast group on
 	 * other interfaces */
 	if (c->dev != out) {
-		pr_debug("CLUSTERIP: not mangling arp reply on different "
+		pr_debug("not mangling arp reply on different "
 			 "interface: cip'%s'-skb'%s'\n",
 			 c->dev->name, out->name);
 		clusterip_config_put(c);
@@ -529,7 +530,7 @@ arp_mangle(unsigned int hook,
 	memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
 
 #ifdef DEBUG
-	pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: ");
+	pr_debug("mangled arp reply: ");
 	arp_print(payload);
 #endif
 
@@ -705,13 +706,13 @@ static int __init clusterip_tg_init(void)
 #ifdef CONFIG_PROC_FS
 	clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
 	if (!clusterip_procdir) {
-		printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n");
+		pr_err("Unable to proc dir entry\n");
 		ret = -ENOMEM;
 		goto cleanup_hook;
 	}
 #endif /* CONFIG_PROC_FS */
 
-	printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n",
+	pr_info("ClusterIP Version %s loaded successfully\n",
 		CLUSTERIP_VERSION);
 	return 0;
 
@@ -726,8 +727,7 @@ cleanup_target:
 
 static void __exit clusterip_tg_exit(void)
 {
-	printk(KERN_NOTICE "ClusterIP Version %s unloading\n",
-		CLUSTERIP_VERSION);
+	pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
 #ifdef CONFIG_PROC_FS
 	remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
 #endif
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index ea5cea2415c1..01988752547e 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
 */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -99,19 +99,16 @@ static bool ecn_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_entry *e = par->entryinfo;
 
 	if (einfo->operation & IPT_ECN_OP_MASK) {
-		printk(KERN_WARNING "ECN: unsupported ECN operation %x\n",
-			einfo->operation);
+		pr_info("unsupported ECN operation %x\n", einfo->operation);
 		return false;
 	}
 	if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
-		printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n",
-			einfo->ip_ect);
+		pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
 		return false;
 	}
 	if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
 	    (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
-		printk(KERN_WARNING "ECN: cannot use TCP operations on a "
-		       "non-tcp rule\n");
+		pr_info("cannot use TCP operations on a non-tcp rule\n");
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index ee128efa1c8d..b3bf623fa222 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -9,7 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/skbuff.h>
@@ -444,12 +444,11 @@ static bool log_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_log_info *loginfo = par->targinfo;
 
 	if (loginfo->level >= 8) {
-		pr_debug("LOG: level %u >= 8\n", loginfo->level);
+		pr_debug("level %u >= 8\n", loginfo->level);
 		return false;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
-		pr_debug("LOG: prefix term %i\n",
-			 loginfo->prefix[sizeof(loginfo->prefix)-1]);
+		pr_debug("prefix is not null-terminated\n");
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 650b54042b01..5063ddac7c04 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -8,7 +8,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/inetdevice.h>
 #include <linux/ip.h>
@@ -33,11 +33,11 @@ static bool masquerade_tg_check(const struct xt_tgchk_param *par)
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
-		pr_debug("masquerade_check: bad MAP_IPS.\n");
+		pr_debug("bad MAP_IPS.\n");
 		return false;
 	}
 	if (mr->rangesize != 1) {
-		pr_debug("masquerade_check: bad rangesize %u\n", mr->rangesize);
+		pr_debug("bad rangesize %u\n", mr->rangesize);
 		return false;
 	}
 	return true;
@@ -72,7 +72,7 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	rt = skb_rtable(skb);
 	newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
 	if (!newsrc) {
-		printk("MASQUERADE: %s ate my IP address\n", par->out->name);
+		pr_info("%s ate my IP address\n", par->out->name);
 		return NF_DROP;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 7c29582d4ec8..51ab01a0a95d 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -9,7 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
@@ -27,11 +27,11 @@ static bool netmap_tg_check(const struct xt_tgchk_param *par)
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
-		pr_debug("NETMAP:check: bad MAP_IPS.\n");
+		pr_debug("bad MAP_IPS.\n");
 		return false;
 	}
 	if (mr->rangesize != 1) {
-		pr_debug("NETMAP:check: bad rangesize %u.\n", mr->rangesize);
+		pr_debug("bad rangesize %u.\n", mr->rangesize);
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 698e5e78685b..74f1f55fd61a 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/ip.h>
 #include <linux/timer.h>
@@ -31,11 +31,11 @@ static bool redirect_tg_check(const struct xt_tgchk_param *par)
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
-		pr_debug("redirect_check: bad MAP_IPS.\n");
+		pr_debug("bad MAP_IPS.\n");
 		return false;
 	}
 	if (mr->rangesize != 1) {
-		pr_debug("redirect_check: bad rangesize %u.\n", mr->rangesize);
+		pr_debug("bad rangesize %u.\n", mr->rangesize);
 		return false;
 	}
 	return true;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 5113b8f1a379..ff32252bad59 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -9,7 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -180,13 +180,13 @@ static bool reject_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_entry *e = par->entryinfo;
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
-		printk("ipt_REJECT: ECHOREPLY no longer supported.\n");
+		pr_info("ECHOREPLY no longer supported.\n");
 		return false;
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ip.proto != IPPROTO_TCP ||
 		    (e->ip.invflags & XT_INV_PROTO)) {
-			printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n");
+			pr_info("TCP_RESET invalid for non-tcp\n");
 			return false;
 		}
 	}
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 09a5d3f7cc41..d926201560dd 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -29,7 +29,7 @@
  *   Specify, after how many hundredths of a second the queue should be
  *   flushed even if it is not full yet.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/socket.h>
@@ -56,8 +56,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
 #define ULOG_NL_EVENT		111		/* Harald's favorite number */
 #define ULOG_MAXNLGROUPS	32		/* numer of nlgroups */
 
-#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
-
 static unsigned int nlbufsiz = NLMSG_GOODSIZE;
 module_param(nlbufsiz, uint, 0400);
 MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
@@ -90,12 +88,12 @@ static void ulog_send(unsigned int nlgroupnum)
 	ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
 
 	if (timer_pending(&ub->timer)) {
-		pr_debug("ipt_ULOG: ulog_send: timer was pending, deleting\n");
+		pr_debug("ulog_send: timer was pending, deleting\n");
 		del_timer(&ub->timer);
 	}
 
 	if (!ub->skb) {
-		pr_debug("ipt_ULOG: ulog_send: nothing to send\n");
+		pr_debug("ulog_send: nothing to send\n");
 		return;
 	}
 
@@ -104,7 +102,7 @@ static void ulog_send(unsigned int nlgroupnum)
 		ub->lastnlh->nlmsg_type = NLMSG_DONE;
 
 	NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
-	pr_debug("ipt_ULOG: throwing %d packets to netlink group %u\n",
+	pr_debug("throwing %d packets to netlink group %u\n",
 		 ub->qlen, nlgroupnum + 1);
 	netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
 
@@ -117,7 +115,7 @@ static void ulog_send(unsigned int nlgroupnum)
 /* timer function to flush queue in flushtimeout time */
 static void ulog_timer(unsigned long data)
 {
-	pr_debug("ipt_ULOG: timer function called, calling ulog_send\n");
+	pr_debug("timer function called, calling ulog_send\n");
 
 	/* lock to protect against somebody modifying our structure
 	 * from ipt_ulog_target at the same time */
@@ -138,7 +136,7 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
 	n = max(size, nlbufsiz);
 	skb = alloc_skb(n, GFP_ATOMIC);
 	if (!skb) {
-		PRINTR("ipt_ULOG: can't alloc whole buffer %ub!\n", n);
+		pr_debug("cannot alloc whole buffer %ub!\n", n);
 
 		if (n > size) {
 			/* try to allocate only as much as we need for
@@ -146,8 +144,7 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
 
 			skb = alloc_skb(size, GFP_ATOMIC);
 			if (!skb)
-				PRINTR("ipt_ULOG: can't even allocate %ub\n",
-				       size);
+				pr_debug("cannot even allocate %ub\n", size);
 		}
 	}
 
@@ -198,8 +195,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
 			goto alloc_failure;
 	}
 
-	pr_debug("ipt_ULOG: qlen %d, qthreshold %Zu\n", ub->qlen,
-		 loginfo->qthreshold);
+	pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold);
 
 	/* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */
 	nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
@@ -272,11 +268,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
 	return;
 
 nlmsg_failure:
-	PRINTR("ipt_ULOG: error during NLMSG_PUT\n");
-
+	pr_debug("error during NLMSG_PUT\n");
 alloc_failure:
-	PRINTR("ipt_ULOG: Error building netlink message\n");
-
+	pr_debug("Error building netlink message\n");
 	spin_unlock_bh(&ulog_lock);
 }
 
@@ -318,12 +312,11 @@ static bool ulog_tg_check(const struct xt_tgchk_param *par)
 	const struct ipt_ulog_info *loginfo = par->targinfo;
 
 	if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
-		pr_debug("ipt_ULOG: prefix term %i\n",
-			 loginfo->prefix[sizeof(loginfo->prefix) - 1]);
+		pr_debug("prefix not null-terminated\n");
 		return false;
 	}
 	if (loginfo->qthreshold > ULOG_MAX_QLEN) {
-		pr_debug("ipt_ULOG: queue threshold %Zu > MAX_QLEN\n",
+		pr_debug("queue threshold %Zu > MAX_QLEN\n",
 			 loginfo->qthreshold);
 		return false;
 	}
@@ -389,10 +382,10 @@ static int __init ulog_tg_init(void)
 {
 	int ret, i;
 
-	pr_debug("ipt_ULOG: init module\n");
+	pr_debug("init module\n");
 
 	if (nlbufsiz > 128*1024) {
-		printk("Netlink buffer has to be <= 128kB\n");
+		pr_warning("Netlink buffer has to be <= 128kB\n");
 		return -EINVAL;
 	}
 
@@ -422,7 +415,7 @@ static void __exit ulog_tg_exit(void)
 	ulog_buff_t *ub;
 	int i;
 
-	pr_debug("ipt_ULOG: cleanup_module\n");
+	pr_debug("cleanup_module\n");
 
 	if (nflog)
 		nf_log_unregister(&ipt_ulog_logger);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 3b216be3bc9f..ea4f58a46c6e 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -8,7 +8,7 @@
  *  it under the terms of the GNU General Public License version 2 as
  *  published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -76,24 +76,24 @@ static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 
 	if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
-		printk(KERN_ERR "ipt_addrtype: both incoming and outgoing "
-				"interface limitation cannot be selected\n");
+		pr_info("both incoming and outgoing "
+			"interface limitation cannot be selected\n");
 		return false;
 	}
 
 	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
 	    (1 << NF_INET_LOCAL_IN)) &&
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
-		printk(KERN_ERR "ipt_addrtype: output interface limitation "
-				"not valid in PRE_ROUTING and INPUT\n");
+		pr_info("output interface limitation "
+			"not valid in PREROUTING and INPUT\n");
 		return false;
 	}
 
 	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
 	    (1 << NF_INET_LOCAL_OUT)) &&
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
-		printk(KERN_ERR "ipt_addrtype: input interface limitation "
-				"not valid in POST_ROUTING and OUTPUT\n");
+		pr_info("input interface limitation "
+			"not valid in POSTROUTING and OUTPUT\n");
 		return false;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 2a1e56b71908..e661108c73f1 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <net/ip.h>
@@ -98,8 +98,7 @@ static bool ecn_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
 	    ip->proto != IPPROTO_TCP) {
-		printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for"
-		       " non-tcp packets\n");
+		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
 		return false;
 	}
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index ab74cc0535e2..7d6345e416c7 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -7,6 +7,7 @@
  */
 
 /* Everything about the rules for NAT. */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/types.h>
 #include <linux/ip.h>
 #include <linux/netfilter.h>
@@ -79,7 +80,7 @@ static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
-		printk("SNAT: multiple ranges no longer supported\n");
+		pr_info("SNAT: multiple ranges no longer supported\n");
 		return false;
 	}
 	return true;
@@ -91,7 +92,7 @@ static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
-		printk("DNAT: multiple ranges no longer supported\n");
+		pr_info("DNAT: multiple ranges no longer supported\n");
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 9210e312edf1..1b2414e03a34 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -40,13 +40,13 @@ MODULE_DESCRIPTION("IPv6 packet filter");
 /*#define DEBUG_IP_FIREWALL_USER*/
 
 #ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...)  printk(format , ## args)
+#define dprintf(format, args...) pr_info(format , ## args)
 #else
 #define dprintf(format, args...)
 #endif
 
 #ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
+#define duprintf(format, args...) pr_info(format , ## args)
 #else
 #define duprintf(format, args...)
 #endif
@@ -200,8 +200,7 @@ static unsigned int
 ip6t_error(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	if (net_ratelimit())
-		printk("ip6_tables: error: `%s'\n",
-		       (const char *)par->targinfo);
+		pr_info("error: `%s'\n", (const char *)par->targinfo);
 
 	return NF_DROP;
 }
@@ -2308,7 +2307,7 @@ static int __init ip6_tables_init(void)
 	if (ret < 0)
 		goto err5;
 
-	printk(KERN_INFO "ip6_tables: (C) 2000-2006 Netfilter Core Team\n");
+	pr_info("(C) 2000-2006 Netfilter Core Team\n");
 	return 0;
 
 err5:
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index e16c0c7d086d..5a79883220e0 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -9,7 +9,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
@@ -456,12 +456,11 @@ static bool log_tg6_check(const struct xt_tgchk_param *par)
 	const struct ip6t_log_info *loginfo = par->targinfo;
 
 	if (loginfo->level >= 8) {
-		pr_debug("LOG: level %u >= 8\n", loginfo->level);
+		pr_debug("level %u >= 8\n", loginfo->level);
 		return false;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
-		pr_debug("LOG: prefix term %i\n",
-			 loginfo->prefix[sizeof(loginfo->prefix)-1]);
+		pr_debug("prefix not null-terminated\n");
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index dd8afbaf00a8..45efb9f38fcb 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -14,7 +14,7 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/icmpv6.h>
@@ -49,7 +49,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 
 	if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) ||
 	    (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) {
-		pr_debug("ip6t_REJECT: addr is not unicast.\n");
+		pr_debug("addr is not unicast.\n");
 		return;
 	}
 
@@ -57,7 +57,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto);
 
 	if ((tcphoff < 0) || (tcphoff > oldskb->len)) {
-		pr_debug("ip6t_REJECT: Can't get TCP header.\n");
+		pr_debug("Cannot get TCP header.\n");
 		return;
 	}
 
@@ -65,7 +65,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 
 	/* IP header checks: fragment, too short. */
 	if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) {
-		pr_debug("ip6t_REJECT: proto(%d) != IPPROTO_TCP, "
+		pr_debug("proto(%d) != IPPROTO_TCP, "
 			 "or too short. otcplen = %d\n",
 			 proto, otcplen);
 		return;
@@ -76,14 +76,14 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 
 	/* No RST for RST. */
 	if (otcph.rst) {
-		pr_debug("ip6t_REJECT: RST is set\n");
+		pr_debug("RST is set\n");
 		return;
 	}
 
 	/* Check checksum. */
 	if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP,
 			    skb_checksum(oldskb, tcphoff, otcplen, 0))) {
-		pr_debug("ip6t_REJECT: TCP checksum is invalid\n");
+		pr_debug("TCP checksum is invalid\n");
 		return;
 	}
 
@@ -107,7 +107,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 
 	if (!nskb) {
 		if (net_ratelimit())
-			printk("ip6t_REJECT: Can't alloc skb\n");
+			pr_debug("cannot alloc skb\n");
 		dst_release(dst);
 		return;
 	}
@@ -206,7 +206,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 		break;
 	default:
 		if (net_ratelimit())
-			printk(KERN_WARNING "ip6t_REJECT: case %u not handled yet\n", reject->with);
+			pr_info("case %u not handled yet\n", reject->with);
 		break;
 	}
 
@@ -219,13 +219,13 @@ static bool reject_tg6_check(const struct xt_tgchk_param *par)
 	const struct ip6t_entry *e = par->entryinfo;
 
 	if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
-		printk("ip6t_REJECT: ECHOREPLY is not supported.\n");
+		pr_info("ECHOREPLY is not supported.\n");
 		return false;
 	} else if (rejinfo->with == IP6T_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ipv6.proto != IPPROTO_TCP ||
 		    (e->ipv6.invflags & XT_INV_PROTO)) {
-			printk("ip6t_REJECT: TCP_RESET illegal for non-tcp\n");
+			pr_info("TCP_RESET illegal for non-tcp\n");
 			return false;
 		}
 	}
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index ac0b7c629d78..4429bfd39e11 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -29,7 +29,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
 	bool r;
 
-	pr_debug("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",
+	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
 		 invert ? '!' : ' ', min, spi, max);
 	r = (spi >= min && spi <= max) ^ invert;
 	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
@@ -92,7 +92,7 @@ static bool ah_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_ah *ahinfo = par->matchinfo;
 
 	if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
-		pr_debug("ip6t_ah: unknown flags %X\n", ahinfo->invflags);
+		pr_debug("unknown flags %X\n", ahinfo->invflags);
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 7b91c2598ed5..5c0da913b4ab 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
@@ -27,7 +27,7 @@ static inline bool
 id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 {
 	bool r;
-	pr_debug("frag id_match:%c 0x%x <= 0x%x <= 0x%x", invert ? '!' : ' ',
+	pr_debug("id_match:%c 0x%x <= 0x%x <= 0x%x\n", invert ? '!' : ' ',
 		 min, id, max);
 	r = (id >= min && id <= max) ^ invert;
 	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
@@ -107,7 +107,7 @@ static bool frag_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_frag *fraginfo = par->matchinfo;
 
 	if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
-		pr_debug("ip6t_frag: unknown flags %X\n", fraginfo->invflags);
+		pr_debug("unknown flags %X\n", fraginfo->invflags);
 		return false;
 	}
 	return true;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 82593c8bdc3e..f4b73889d00a 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
@@ -169,12 +169,12 @@ static bool hbh_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_opts *optsinfo = par->matchinfo;
 
 	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
-		pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags);
+		pr_debug("unknown flags %X\n", optsinfo->invflags);
 		return false;
 	}
 
 	if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
-		pr_debug("ip6t_opts: Not strict - not implemented");
+		pr_debug("Not strict - not implemented");
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index b77307fc8743..c58d65336577 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -6,7 +6,7 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ipv6.h>
@@ -29,7 +29,7 @@ static inline bool
 segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 {
 	bool r;
-	pr_debug("rt segsleft_match:%c 0x%x <= 0x%x <= 0x%x",
+	pr_debug("segsleft_match:%c 0x%x <= 0x%x <= 0x%x\n",
 		 invert ? '!' : ' ', min, id, max);
 	r = (id >= min && id <= max) ^ invert;
 	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
@@ -188,7 +188,7 @@ static bool rt_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_rt *rtinfo = par->matchinfo;
 
 	if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
-		pr_debug("ip6t_rt: unknown flags %X\n", rtinfo->invflags);
+		pr_debug("unknown flags %X\n", rtinfo->invflags);
 		return false;
 	}
 	if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 2077da31c973..45161d9a9f23 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -67,15 +67,14 @@ tcpmss_mangle_packet(struct sk_buff *skb,
 	if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
 		if (dst_mtu(skb_dst(skb)) <= minlen) {
 			if (net_ratelimit())
-				pr_err("xt_TCPMSS: "
-				       "unknown or invalid path-MTU (%u)\n",
+				pr_err("unknown or invalid path-MTU (%u)\n",
 				       dst_mtu(skb_dst(skb)));
 			return -1;
 		}
 		if (in_mtu <= minlen) {
 			if (net_ratelimit())
-				pr_err("xt_TCPMSS: unknown or "
-				       "invalid path-MTU (%u)\n", in_mtu);
+				pr_err("unknown or invalid path-MTU (%u)\n",
+				       in_mtu);
 			return -1;
 		}
 		newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 1340c2fa3621..e9244fdc123a 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -9,7 +9,7 @@
  * published by the Free Software Foundation.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
@@ -67,7 +67,7 @@ static bool tproxy_tg_check(const struct xt_tgchk_param *par)
 	    && !(i->invflags & IPT_INV_PROTO))
 		return true;
 
-	pr_info("xt_TPROXY: Can be used only in combination with "
+	pr_info("Can be used only in combination with "
 		"either -p tcp or -p udp\n");
 	return false;
 }
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index f9deecbef875..1a446d626769 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -29,7 +29,7 @@ static inline bool
 spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 {
 	bool r;
-	pr_debug("esp spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+	pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
 		 invert ? '!' : ' ', min, spi, max);
 	r = (spi >= min && spi <= max) ^ invert;
 	pr_debug(" result %s\n", r ? "PASS" : "FAILED");
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index ffc96387d556..8471d9715bde 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -8,6 +8,7 @@
  *	it under the terms of the GNU General Public License version 2 as
  *	published by the Free Software Foundation.
  */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/ip.h>
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index aa9817e91338..72cbced48a8d 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -321,8 +321,8 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 		hash_rnd_inited = true;
 	}
 	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
-		pr_info(KBUILD_MODNAME ": Unsupported user space flags "
-			"(%08x)\n", info->check_set);
+		pr_info("Unsupported user space flags (%08x)\n",
+			info->check_set);
 		return false;
 	}
 	if (hweight8(info->check_set &
@@ -336,7 +336,7 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 	if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
 		return false;
 	if (info->hit_count > ip_pkt_list_tot) {
-		pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than "
+		pr_info("hitcount (%u) is larger than "
 			"packets to be remembered (%u)\n",
 			info->hit_count, ip_pkt_list_tot);
 		return false;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 6a902564d24f..a9b16867e1f7 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -9,7 +9,7 @@
  * published by the Free Software Foundation.
  *
  */
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter/x_tables.h>
@@ -165,8 +165,7 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
 			sk = NULL;
 	}
 
-	pr_debug("socket match: proto %u %08x:%u -> %08x:%u "
-		 "(orig %08x:%u) sock %p\n",
+	pr_debug("proto %u %08x:%u -> %08x:%u (orig %08x:%u) sock %p\n",
 		 protocol, ntohl(saddr), ntohs(sport),
 		 ntohl(daddr), ntohs(dport),
 		 ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk);
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 9a9c9a3b0a5d..45ed05b5161f 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -223,8 +223,8 @@ static bool time_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->daytime_start > XT_TIME_MAX_DAYTIME ||
 	    info->daytime_stop > XT_TIME_MAX_DAYTIME) {
-		printk(KERN_WARNING "xt_time: invalid argument - start or "
-		       "stop time greater than 23:59:59\n");
+		pr_info("invalid argument - start or "
+			"stop time greater than 23:59:59\n");
 		return false;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From d2a7b6bad2c38e41eddb0b24d03627d9e7aa3f7b Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 10 Jul 2009 18:55:11 +0200
Subject: netfilter: xtables: make use of xt_request_find_target

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebtables.c | 13 ++-----------
 net/ipv4/netfilter/arp_tables.c | 20 ++++++++------------
 net/ipv4/netfilter/ip_tables.c  | 20 ++++++++------------
 net/ipv6/netfilter/ip6_tables.c | 20 ++++++++------------
 net/netfilter/x_tables.c        |  4 +---
 net/sched/act_ipt.c             |  4 ++--
 6 files changed, 29 insertions(+), 52 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 989d72cc8148..6d3b256d2f61 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -395,13 +395,9 @@ ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par,
 	   left - sizeof(struct ebt_entry_watcher) < w->watcher_size)
 		return -EINVAL;
 
-	watcher = try_then_request_module(
-		  xt_find_target(NFPROTO_BRIDGE, w->u.name, 0),
-		  "ebt_%s", w->u.name);
+	watcher = xt_request_find_target(NFPROTO_BRIDGE, w->u.name, 0);
 	if (IS_ERR(watcher))
 		return PTR_ERR(watcher);
-	if (watcher == NULL)
-		return -ENOENT;
 	w->u.watcher = watcher;
 
 	par->target   = watcher;
@@ -714,15 +710,10 @@ ebt_check_entry(struct ebt_entry *e, struct net *net,
 	t = (struct ebt_entry_target *)(((char *)e) + e->target_offset);
 	gap = e->next_offset - e->target_offset;
 
-	target = try_then_request_module(
-		 xt_find_target(NFPROTO_BRIDGE, t->u.name, 0),
-		 "ebt_%s", t->u.name);
+	target = xt_request_find_target(NFPROTO_BRIDGE, t->u.name, 0);
 	if (IS_ERR(target)) {
 		ret = PTR_ERR(target);
 		goto cleanup_watchers;
-	} else if (target == NULL) {
-		ret = -ENOENT;
-		goto cleanup_watchers;
 	}
 
 	t->u.target = target;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f07d77f65751..e8e363d90365 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -523,13 +523,11 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
 		return ret;
 
 	t = arpt_get_target(e);
-	target = try_then_request_module(xt_find_target(NFPROTO_ARP,
-							t->u.user.name,
-							t->u.user.revision),
-					 "arpt_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto out;
 	}
 	t->u.kernel.target = target;
@@ -1252,14 +1250,12 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
 	entry_offset = (void *)e - (void *)base;
 
 	t = compat_arpt_get_target(e);
-	target = try_then_request_module(xt_find_target(NFPROTO_ARP,
-							t->u.user.name,
-							t->u.user.revision),
-					 "arpt_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
 			 t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto out;
 	}
 	t->u.kernel.target = target;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 73fdf20263ed..e24ec48ee8cd 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -701,13 +701,11 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
 	}
 
 	t = ipt_get_target(e);
-	target = try_then_request_module(xt_find_target(AF_INET,
-							t->u.user.name,
-							t->u.user.revision),
-					 "ipt_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto cleanup_matches;
 	}
 	t->u.kernel.target = target;
@@ -1547,14 +1545,12 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
 	}
 
 	t = compat_ipt_get_target(e);
-	target = try_then_request_module(xt_find_target(AF_INET,
-							t->u.user.name,
-							t->u.user.revision),
-					 "ipt_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
 			 t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto release_matches;
 	}
 	t->u.kernel.target = target;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1b2414e03a34..842bef374dcc 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -733,13 +733,11 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
 	}
 
 	t = ip6t_get_target(e);
-	target = try_then_request_module(xt_find_target(AF_INET6,
-							t->u.user.name,
-							t->u.user.revision),
-					 "ip6t_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto cleanup_matches;
 	}
 	t->u.kernel.target = target;
@@ -1581,14 +1579,12 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
 	}
 
 	t = compat_ip6t_get_target(e);
-	target = try_then_request_module(xt_find_target(AF_INET6,
-							t->u.user.name,
-							t->u.user.revision),
-					 "ip6t_%s", t->u.user.name);
-	if (IS_ERR(target) || !target) {
+	target = xt_request_find_target(NFPROTO_IPV6, t->u.user.name,
+					t->u.user.revision);
+	if (IS_ERR(target)) {
 		duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
 			 t->u.user.name);
-		ret = target ? PTR_ERR(target) : -ENOENT;
+		ret = PTR_ERR(target);
 		goto release_matches;
 	}
 	t->u.kernel.target = target;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 9a248d4a877f..bf2806afd920 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -250,9 +250,7 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
 
 	target = try_then_request_module(xt_find_target(af, name, revision),
 					 "%st_%s", xt_prefix[af], name);
-	if (IS_ERR(target) || !target)
-		return NULL;
-	return target;
+	return (target != NULL) ? target : ERR_PTR(-ENOENT);
 }
 EXPORT_SYMBOL_GPL(xt_request_find_target);
 
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 082c520b0def..b9f79c251d75 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -46,8 +46,8 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
 
 	target = xt_request_find_target(AF_INET, t->u.user.name,
 					t->u.user.revision);
-	if (!target)
-		return -ENOENT;
+	if (IS_ERR(target))
+		return PTR_ERR(target);
 
 	t->u.kernel.target = target;
 	par.table     = table;
-- 
cgit v1.2.3-59-g8ed1b


From fd0ec0e6216baea854465bbdb177f2d1b2ccaf22 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 10 Jul 2009 19:27:47 +0200
Subject: netfilter: xtables: consolidate code into xt_request_find_match

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h |  2 ++
 net/bridge/netfilter/ebtables.c    |  5 +----
 net/ipv4/netfilter/ip_tables.c     | 18 ++++++++----------
 net/ipv6/netfilter/ip6_tables.c    | 18 ++++++++----------
 net/netfilter/x_tables.c           | 11 +++++++++++
 5 files changed, 30 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index f8f555153373..dd9d15a73a96 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -436,6 +436,8 @@ extern struct xt_table_info *xt_replace_table(struct xt_table *table,
 
 extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
 extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
+extern struct xt_match *xt_request_find_match(u8 af, const char *name,
+					      u8 revision);
 extern struct xt_target *xt_request_find_target(u8 af, const char *name,
 						u8 revision);
 extern int xt_find_revision(u8 af, const char *name, u8 revision,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 6d3b256d2f61..c41f3fad0587 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -361,12 +361,9 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
 	    left - sizeof(struct ebt_entry_match) < m->match_size)
 		return -EINVAL;
 
-	match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE,
-		m->u.name, 0), "ebt_%s", m->u.name);
+	match = xt_request_find_match(NFPROTO_BRIDGE, m->u.name, 0);
 	if (IS_ERR(match))
 		return PTR_ERR(match);
-	if (match == NULL)
-		return -ENOENT;
 	m->u.match = match;
 
 	par->match     = match;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e24ec48ee8cd..09f6567a85b7 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -629,12 +629,11 @@ find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
 	struct xt_match *match;
 	int ret;
 
-	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
-						      m->u.user.revision),
-					"ipt_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 
@@ -1472,13 +1471,12 @@ compat_find_calc_match(struct ipt_entry_match *m,
 {
 	struct xt_match *match;
 
-	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
-						      m->u.user.revision),
-					"ipt_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("compat_check_calc_match: `%s' not found\n",
 			 m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 	*size += xt_compat_match_offset(match);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 842bef374dcc..41e2429c0163 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -660,12 +660,11 @@ find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
 	struct xt_match *match;
 	int ret;
 
-	match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
-						      m->u.user.revision),
-					"ip6t_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 
@@ -1506,13 +1505,12 @@ compat_find_calc_match(struct ip6t_entry_match *m,
 {
 	struct xt_match *match;
 
-	match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
-						      m->u.user.revision),
-					"ip6t_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("compat_check_calc_match: `%s' not found\n",
 			 m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 	*size += xt_compat_match_offset(match);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index bf2806afd920..ee7fe215b3e1 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -214,6 +214,17 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
 }
 EXPORT_SYMBOL(xt_find_match);
 
+struct xt_match *
+xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
+{
+	struct xt_match *match;
+
+	match = try_then_request_module(xt_find_match(nfproto, name, revision),
+					"%st_%s", xt_prefix[nfproto], name);
+	return (match != NULL) ? match : ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(xt_request_find_match);
+
 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
 struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 5dc7a6d5749d3ddbf9fbea9512cb45762428512c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 21:29:08 +0100
Subject: netfilter: xt_recent: allow changing ip_list_[ug]id at runtime

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_recent.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 72cbced48a8d..85309448c5e7 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -51,14 +51,14 @@ module_param(ip_list_tot, uint, 0400);
 module_param(ip_pkt_list_tot, uint, 0400);
 module_param(ip_list_hash_size, uint, 0400);
 module_param(ip_list_perms, uint, 0400);
-module_param(ip_list_uid, uint, 0400);
-module_param(ip_list_gid, uint, 0400);
+module_param(ip_list_uid, uint, S_IRUGO | S_IWUSR);
+module_param(ip_list_gid, uint, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list");
 MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)");
 MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs");
 MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files");
-MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files");
-MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_uid, "default owner of /proc/net/xt_recent/* files");
+MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* files");
 
 struct recent_entry {
 	struct list_head	list;
-- 
cgit v1.2.3-59-g8ed1b


From 713aefa3fb3929ce36305d4d1b7b4059d87ed115 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 04:07:21 +0100
Subject: netfilter: bridge: use NFPROTO values for NF_HOOK invocation

The first argument to NF_HOOK* is an nfproto since quite some time.
Commit v2.6.27-2457-gfdc9314 was the first to practically start using
the new names. Do that now for the remaining NF_HOOK calls.

The semantic patch used was:
// <smpl>
@@
@@
(NF_HOOK
|NF_HOOK_THRESH
)(
-PF_BRIDGE,
+NFPROTO_BRIDGE,
 ...)

@@
@@
 NF_HOOK(
-PF_INET6,
+NFPROTO_IPV6,
 ...)

@@
@@
 NF_HOOK(
-PF_INET,
+NFPROTO_IPV4,
 ...)
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/br_forward.c   | 10 +++++-----
 net/bridge/br_input.c     |  6 +++---
 net/bridge/br_multicast.c |  2 +-
 net/bridge/br_netfilter.c | 15 ++++++++-------
 net/bridge/br_stp_bpdu.c  |  2 +-
 5 files changed, 18 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 8dbec83e50ca..7ab52d07b477 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -58,7 +58,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 
 int br_forward_finish(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
+	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev,
 		       br_dev_queue_push_xmit);
 
 }
@@ -66,8 +66,8 @@ int br_forward_finish(struct sk_buff *skb)
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
 	skb->dev = to->dev;
-	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
-			br_forward_finish);
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+		br_forward_finish);
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
@@ -83,8 +83,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
 	skb->dev = to->dev;
 	skb_forward_csum(skb);
 
-	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
-			br_forward_finish);
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev,
+		br_forward_finish);
 }
 
 /* called with rcu_read_lock */
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 333dfb7c5886..c9018fc72d24 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -32,7 +32,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
 	indev = skb->dev;
 	skb->dev = brdev;
 
-	return NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
+	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL,
 		       netif_receive_skb);
 }
 
@@ -155,7 +155,7 @@ struct sk_buff *br_handle_frame(struct net_bridge_port *p, struct sk_buff *skb)
 		if (p->br->stp_enabled == BR_NO_STP && dest[5] == 0)
 			goto forward;
 
-		if (NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
+		if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev,
 			    NULL, br_handle_local_finish))
 			return NULL;	/* frame consumed by filter */
 		else
@@ -176,7 +176,7 @@ forward:
 		if (!compare_ether_addr(p->br->dev->dev_addr, dest))
 			skb->pkt_type = PACKET_HOST;
 
-		NF_HOOK(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+		NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 			br_handle_frame_finish);
 		break;
 	default:
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 6980625537ca..ed19b0a730ab 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -608,7 +608,7 @@ static void br_multicast_send_query(struct net_bridge *br,
 	if (port) {
 		__skb_push(skb, sizeof(struct ethhdr));
 		skb->dev = port->dev;
-		NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+		NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 			dev_queue_xmit);
 	} else
 		netif_rx(skb);
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 268e2e725888..bc6b57248494 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -246,7 +246,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
 
 	return 0;
@@ -396,7 +396,8 @@ bridged_dnat:
 				nf_bridge->mask |= BRNF_BRIDGED_DNAT;
 				skb->dev = nf_bridge->physindev;
 				nf_bridge_push_encap_header(skb);
-				NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING,
+				NF_HOOK_THRESH(NFPROTO_BRIDGE,
+					       NF_BR_PRE_ROUTING,
 					       skb, skb->dev, NULL,
 					       br_nf_pre_routing_finish_bridge,
 					       1);
@@ -417,7 +418,7 @@ bridged_dnat:
 
 	skb->dev = nf_bridge->physindev;
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
 
 	return 0;
@@ -534,7 +535,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	if (!setup_pre_routing(skb))
 		return NF_DROP;
 
-	NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish_ipv6);
 
 	return NF_STOLEN;
@@ -607,7 +608,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 		return NF_DROP;
 	store_orig_dstaddr(skb);
 
-	NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish);
 
 	return NF_STOLEN;
@@ -655,7 +656,7 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 		in = *((struct net_device **)(skb->cb));
 	}
 	nf_bridge_push_encap_header(skb);
-	NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in,
+	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in,
 		       skb->dev, br_forward_finish, 1);
 	return 0;
 }
@@ -786,7 +787,7 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
 	}
 	nf_bridge_push_encap_header(skb);
 
-	NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
 		br_forward_finish);
 	return NF_STOLEN;
 }
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 81ae40b3f655..11b0157f69c3 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -49,7 +49,7 @@ static void br_send_bpdu(struct net_bridge_port *p,
 
 	llc_mac_hdr_init(skb, p->dev->dev_addr, p->br->group_addr);
 
-	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
+	NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 		dev_queue_xmit);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 9bbc768aa911a3ef336272eaa6d220abfba8ce50 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 04:07:29 +0100
Subject: netfilter: ipv4: use NFPROTO values for NF_HOOK invocation

The semantic patch that was used:
// <smpl>
@@
@@
(NF_HOOK
|NF_HOOK_COND
|nf_hook
)(
-PF_INET,
+NFPROTO_IPV4,
 ...)
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/ip_forward.c   |  4 ++--
 net/ipv4/ip_input.c     |  4 ++--
 net/ipv4/ip_output.c    | 18 +++++++++---------
 net/ipv4/ipmr.c         |  2 +-
 net/ipv4/raw.c          |  4 ++--
 net/ipv4/xfrm4_input.c  |  2 +-
 net/ipv4/xfrm4_output.c |  2 +-
 7 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index a2991bc8e32e..9f2cd47ceeb7 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -111,8 +111,8 @@ int ip_forward(struct sk_buff *skb)
 
 	skb->priority = rt_tos2priority(iph->tos);
 
-	return NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, rt->u.dst.dev,
-		       ip_forward_finish);
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev,
+		       rt->u.dst.dev, ip_forward_finish);
 
 sr_failed:
 	/*
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index c29de9879fda..091b5c7e04e1 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -265,7 +265,7 @@ int ip_local_deliver(struct sk_buff *skb)
 			return 0;
 	}
 
-	return NF_HOOK(PF_INET, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
 		       ip_local_deliver_finish);
 }
 
@@ -443,7 +443,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL,
+	return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL,
 		       ip_rcv_finish);
 
 inhdr_error:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3451799e3dbf..f09135e1e14f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -95,8 +95,8 @@ int __ip_local_out(struct sk_buff *skb)
 
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
-	return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
-		       dst_output);
+	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
+		       skb_dst(skb)->dev, dst_output);
 }
 
 int ip_local_out(struct sk_buff *skb)
@@ -271,8 +271,8 @@ int ip_mc_output(struct sk_buff *skb)
 		   ) {
 			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 			if (newskb)
-				NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb,
-					NULL, newskb->dev,
+				NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING,
+					newskb, NULL, newskb->dev,
 					ip_dev_loopback_xmit);
 		}
 
@@ -287,12 +287,12 @@ int ip_mc_output(struct sk_buff *skb)
 	if (rt->rt_flags&RTCF_BROADCAST) {
 		struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 		if (newskb)
-			NF_HOOK(PF_INET, NF_INET_POST_ROUTING, newskb, NULL,
-				newskb->dev, ip_dev_loopback_xmit);
+			NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb,
+				NULL, newskb->dev, ip_dev_loopback_xmit);
 	}
 
-	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
-			    ip_finish_output,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL,
+			    skb->dev, ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
@@ -305,7 +305,7 @@ int ip_output(struct sk_buff *skb)
 	skb->dev = dev;
 	skb->protocol = htons(ETH_P_IP);
 
-	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev,
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 8582e12e4a62..1d42f6103c8d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1319,7 +1319,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	 * not mrouter) cannot join to more than one interface - it will
 	 * result in receiving multiple packets.
 	 */
-	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
 		ipmr_forward_finish);
 	return;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ce154b47f1da..34d9adb83590 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -382,8 +382,8 @@ static int raw_send_hdrinc(struct sock *sk, void *from, size_t length,
 		icmp_out_count(net, ((struct icmphdr *)
 			skb_transport_header(skb))->type);
 
-	err = NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		      dst_output);
+	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL,
+		      rt->u.dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index f9f922a0ba88..c3969e0f96c3 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -60,7 +60,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async)
 	iph->tot_len = htons(skb->len);
 	ip_send_check(iph);
 
-	NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		xfrm4_rcv_encap_finish);
 	return 0;
 }
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index c908bd99bcba..571aa96a175c 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -86,7 +86,7 @@ static int xfrm4_output_finish(struct sk_buff *skb)
 
 int xfrm4_output(struct sk_buff *skb)
 {
-	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb,
+	return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb,
 			    NULL, skb_dst(skb)->dev, xfrm4_output_finish,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
-- 
cgit v1.2.3-59-g8ed1b


From b2e0b385d77069031edb957839aaaa8441b47287 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 04:09:07 +0100
Subject: netfilter: ipv6: use NFPROTO values for NF_HOOK invocation

The semantic patch that was used:
// <smpl>
@@
@@
(NF_HOOK
|NF_HOOK_THRESH
|nf_hook
)(
-PF_INET6,
+NFPROTO_IPV6,
 ...)
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv6/ip6_input.c                    |  4 ++--
 net/ipv6/ip6_output.c                   | 16 ++++++++--------
 net/ipv6/ip6mr.c                        |  2 +-
 net/ipv6/mcast.c                        |  4 ++--
 net/ipv6/ndisc.c                        |  4 ++--
 net/ipv6/netfilter/nf_conntrack_reasm.c |  2 +-
 net/ipv6/raw.c                          |  4 ++--
 net/ipv6/xfrm6_input.c                  |  2 +-
 net/ipv6/xfrm6_output.c                 |  4 ++--
 9 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index e28f9203deca..2c01dc65794d 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -142,7 +142,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
 	/* Must drop socket now because of tproxy. */
 	skb_orphan(skb);
 
-	return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, dev, NULL,
 		       ip6_rcv_finish);
 err:
 	IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS);
@@ -235,7 +235,7 @@ discard:
 
 int ip6_input(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, skb, skb->dev, NULL,
 		       ip6_input_finish);
 }
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index dabf108ad811..4535b7a0169b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -66,8 +66,8 @@ int __ip6_local_out(struct sk_buff *skb)
 		len = 0;
 	ipv6_hdr(skb)->payload_len = htons(len);
 
-	return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
-		       dst_output);
+	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+		       skb_dst(skb)->dev, dst_output);
 }
 
 int ip6_local_out(struct sk_buff *skb)
@@ -134,8 +134,8 @@ static int ip6_output2(struct sk_buff *skb)
 			   is not supported in any case.
 			 */
 			if (newskb)
-				NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
-					NULL, newskb->dev,
+				NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
+					newskb, NULL, newskb->dev,
 					ip6_dev_loopback_xmit);
 
 			if (ipv6_hdr(skb)->hop_limit == 0) {
@@ -150,7 +150,7 @@ static int ip6_output2(struct sk_buff *skb)
 				skb->len);
 	}
 
-	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
 		       ip6_output_finish);
 }
 
@@ -260,8 +260,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
 		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
 			      IPSTATS_MIB_OUT, skb->len);
-		return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
-				dst_output);
+		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+			       dst->dev, dst_output);
 	}
 
 	if (net_ratelimit())
@@ -537,7 +537,7 @@ int ip6_forward(struct sk_buff *skb)
 	hdr->hop_limit--;
 
 	IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
-	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
 		       ip6_forward_finish);
 
 error:
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 52e0f74fdfe0..430372e0bf24 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1566,7 +1566,7 @@ static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 
 	IP6CB(skb)->flags |= IP6SKB_FORWARDED;
 
-	return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
 		       ip6mr_forward2_finish);
 
 out_free:
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bcd971915969..773b9d18b748 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1479,7 +1479,7 @@ static void mld_sendpack(struct sk_buff *skb)
 
 	payload_len = skb->len;
 
-	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
 		      dst_output);
 out:
 	if (!err) {
@@ -1847,7 +1847,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 		goto err_out;
 
 	skb_dst_set(skb, dst);
-	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev,
 		      dst_output);
 out:
 	if (!err) {
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 8bcc4b7db3bf..8e96a350f52f 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -535,7 +535,7 @@ void ndisc_send_skb(struct sk_buff *skb,
 	idev = in6_dev_get(dst->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
 
-	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
 		      dst_output);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
@@ -1617,7 +1617,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	skb_dst_set(buff, dst);
 	idev = in6_dev_get(dst->dev);
 	IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
-	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
 		      dst_output);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index f1171b744650..8f80e245f370 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -643,7 +643,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
 		s2 = s->next;
 		s->next = NULL;
 
-		NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn,
+		NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn,
 			       NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
 		s = s2;
 	}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index ed31c37c6e39..e9e1f774b0b7 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -636,8 +636,8 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
 		goto error_fault;
 
 	IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
-	err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
-		      dst_output);
+	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
+		      rt->u.dst.dev, dst_output);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 2bc98ede1235..f8c3cf842f53 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -42,7 +42,7 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	ipv6_hdr(skb)->payload_len = htons(skb->len);
 	__skb_push(skb, skb->data - skb_network_header(skb));
 
-	NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
+	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		ip6_rcv_finish);
 	return -1;
 }
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 0c92112dcba3..6434bd5ce088 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -90,6 +90,6 @@ static int xfrm6_output_finish(struct sk_buff *skb)
 
 int xfrm6_output(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb_dst(skb)->dev,
-		       xfrm6_output_finish);
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL,
+		       skb_dst(skb)->dev, xfrm6_output_finish);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 5d877d876cfb96c0c3254184171b4767501f4f95 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 04:09:14 +0100
Subject: netfilter: decnet: use NFPROTO values for NF_HOOK invocation

The semantic patch used was:
// <smpl>
@@
@@
 NF_HOOK(
-PF_DECnet,
+NFPROTO_DECNET,
 ...)
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/decnet/dn_neigh.c  |  9 ++++++---
 net/decnet/dn_nsp_in.c |  3 ++-
 net/decnet/dn_route.c  | 28 ++++++++++++++++++++--------
 3 files changed, 28 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 794b5bf95af1..76622c0442be 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -265,7 +265,8 @@ static int dn_long_output(struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
+		       neigh->dev, dn_neigh_output_packet);
 }
 
 static int dn_short_output(struct sk_buff *skb)
@@ -304,7 +305,8 @@ static int dn_short_output(struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
+		       neigh->dev, dn_neigh_output_packet);
 }
 
 /*
@@ -346,7 +348,8 @@ static int dn_phase3_output(struct sk_buff *skb)
 
 	skb_reset_network_header(skb);
 
-	return NF_HOOK(PF_DECnet, NF_DN_POST_ROUTING, skb, NULL, neigh->dev, dn_neigh_output_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL,
+		       neigh->dev, dn_neigh_output_packet);
 }
 
 /*
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index 932408dca86d..65531ad96e70 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -809,7 +809,8 @@ free_out:
 
 int dn_nsp_rx(struct sk_buff *skb)
 {
-	return NF_HOOK(PF_DECnet, NF_DN_LOCAL_IN, skb, skb->dev, NULL, dn_nsp_rx_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, skb, skb->dev, NULL,
+		       dn_nsp_rx_packet);
 }
 
 /*
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a7bf03ca0a36..86eca5f7f678 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -517,7 +517,8 @@ static int dn_route_rx_long(struct sk_buff *skb)
 	ptr++;
 	cb->hops = *ptr++; /* Visit Count */
 
-	return NF_HOOK(PF_DECnet, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, dn_route_rx_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL,
+		       dn_route_rx_packet);
 
 drop_it:
 	kfree_skb(skb);
@@ -543,7 +544,8 @@ static int dn_route_rx_short(struct sk_buff *skb)
 	ptr += 2;
 	cb->hops = *ptr & 0x3f;
 
-	return NF_HOOK(PF_DECnet, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, dn_route_rx_packet);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL,
+		       dn_route_rx_packet);
 
 drop_it:
 	kfree_skb(skb);
@@ -645,16 +647,24 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type
 
 		switch(flags & DN_RT_CNTL_MSK) {
 			case DN_RT_PKT_HELO:
-				return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_route_ptp_hello);
+				return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
+					       skb, skb->dev, NULL,
+					       dn_route_ptp_hello);
 
 			case DN_RT_PKT_L1RT:
 			case DN_RT_PKT_L2RT:
-				return NF_HOOK(PF_DECnet, NF_DN_ROUTE, skb, skb->dev, NULL, dn_route_discard);
+				return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE,
+					       skb, skb->dev, NULL,
+					       dn_route_discard);
 			case DN_RT_PKT_ERTH:
-				return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_neigh_router_hello);
+				return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
+					       skb, skb->dev, NULL,
+					       dn_neigh_router_hello);
 
 			case DN_RT_PKT_EEDH:
-				return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_neigh_endnode_hello);
+				return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO,
+					       skb, skb->dev, NULL,
+					       dn_neigh_endnode_hello);
 		}
 	} else {
 		if (dn->parms.state != DN_DEV_S_RU)
@@ -703,7 +713,8 @@ static int dn_output(struct sk_buff *skb)
 	cb->rt_flags |= DN_RT_F_IE;
 	cb->hops = 0;
 
-	return NF_HOOK(PF_DECnet, NF_DN_LOCAL_OUT, skb, NULL, dev, neigh->output);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, skb, NULL, dev,
+		       neigh->output);
 
 error:
 	if (net_ratelimit())
@@ -752,7 +763,8 @@ static int dn_forward(struct sk_buff *skb)
 	if (rt->rt_flags & RTCF_DOREDIRECT)
 		cb->rt_flags |= DN_RT_F_IE;
 
-	return NF_HOOK(PF_DECnet, NF_DN_FORWARD, skb, dev, skb->dev, neigh->output);
+	return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, skb, dev, skb->dev,
+		       neigh->output);
 
 drop:
 	kfree_skb(skb);
-- 
cgit v1.2.3-59-g8ed1b


From 7911b5c75b613f533b6cb6f999041dd5ea3bb004 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 04:08:46 +0100
Subject: netfilter: ipvs: use NFPROTO values for NF_HOOK invocation

Semantic patch:
// <smpl>
@@
@@
 IP_VS_XMIT(
-PF_INET6,
+NFPROTO_IPV6,
 ...)

@@
@@
 IP_VS_XMIT(
-PF_INET,
+NFPROTO_IPV4,
 ...)
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/ipvs/ip_vs_xmit.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 223b5018c7dc..d0a7b7b05ddb 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -269,7 +269,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -333,7 +333,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -409,7 +409,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -485,7 +485,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -784,7 +784,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -837,7 +837,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
 
 	LeaveFunction(10);
 	return NF_STOLEN;
@@ -911,7 +911,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV4, skb, rt);
 
 	rc = NF_STOLEN;
 	goto out;
@@ -986,7 +986,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* Another hack: avoid icmp_send in ip_fragment */
 	skb->local_df = 1;
 
-	IP_VS_XMIT(PF_INET6, skb, rt);
+	IP_VS_XMIT(NFPROTO_IPV6, skb, rt);
 
 	rc = NF_STOLEN;
 	goto out;
-- 
cgit v1.2.3-59-g8ed1b


From 9f5673174161cc026a6c87f70d9b457e7ad82a80 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 17:40:13 +0100
Subject: netfilter: xtables: untangle spaghetti if clauses in checkentry

As I'm changing the return values soon, I want to have a clear visual
path.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_dccp.c | 10 +++++++---
 net/netfilter/xt_sctp.c | 20 ++++++++++++--------
 2 files changed, 19 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 0989f29ade2e..8f6014f7c881 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -127,9 +127,13 @@ static bool dccp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 
-	return !(info->flags & ~XT_DCCP_VALID_FLAGS)
-		&& !(info->invflags & ~XT_DCCP_VALID_FLAGS)
-		&& !(info->invflags & ~info->flags);
+	if (info->flags & ~XT_DCCP_VALID_FLAGS)
+		return false;
+	if (info->invflags & ~XT_DCCP_VALID_FLAGS)
+		return false;
+	if (info->invflags & ~info->flags)
+		return false;
+	return true;
 }
 
 static struct xt_match dccp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 43c7e1de532c..977b182dea59 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -148,14 +148,18 @@ static bool sctp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 
-	return !(info->flags & ~XT_SCTP_VALID_FLAGS)
-		&& !(info->invflags & ~XT_SCTP_VALID_FLAGS)
-		&& !(info->invflags & ~info->flags)
-		&& ((!(info->flags & XT_SCTP_CHUNK_TYPES)) ||
-			(info->chunk_match_type &
-				(SCTP_CHUNK_MATCH_ALL
-				| SCTP_CHUNK_MATCH_ANY
-				| SCTP_CHUNK_MATCH_ONLY)));
+	if (info->flags & ~XT_SCTP_VALID_FLAGS)
+		return false;
+	if (info->invflags & ~XT_SCTP_VALID_FLAGS)
+		return false;
+	if (info->invflags & ~info->flags)
+		return false;
+	if (!(info->flags & XT_SCTP_CHUNK_TYPES))
+		return true;
+	if (info->chunk_match_type & (SCTP_CHUNK_MATCH_ALL |
+	    SCTP_CHUNK_MATCH_ANY | SCTP_CHUNK_MATCH_ONLY))
+		return true;
+	return false;
 }
 
 static struct xt_match sctp_mt_reg[] __read_mostly = {
-- 
cgit v1.2.3-59-g8ed1b


From b0f38452ff73da7e9e0ddc68cd5c6b93c897ca0d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 17:16:42 +0100
Subject: netfilter: xtables: change xt_match.checkentry return type

Restore function signatures from bool to int so that we can report
memory allocation failures or similar using -ENOMEM rather than
always having to pass -EINVAL back.

This semantic patch may not be too precise (checking for functions
that use xt_mtchk_param rather than functions referenced by
xt_match.checkentry), but reviewed, it produced the intended result.

// <smpl>
@@
type bool;
identifier check, par;
@@
-bool check
+int check
 (struct xt_mtchk_param *par) { ... }
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h   | 2 +-
 net/bridge/netfilter/ebt_802_3.c     | 2 +-
 net/bridge/netfilter/ebt_among.c     | 2 +-
 net/bridge/netfilter/ebt_arp.c       | 2 +-
 net/bridge/netfilter/ebt_ip.c        | 2 +-
 net/bridge/netfilter/ebt_ip6.c       | 2 +-
 net/bridge/netfilter/ebt_limit.c     | 2 +-
 net/bridge/netfilter/ebt_mark_m.c    | 2 +-
 net/bridge/netfilter/ebt_pkttype.c   | 2 +-
 net/bridge/netfilter/ebt_stp.c       | 2 +-
 net/bridge/netfilter/ebt_vlan.c      | 2 +-
 net/ipv4/netfilter/ip_tables.c       | 2 +-
 net/ipv4/netfilter/ipt_addrtype.c    | 2 +-
 net/ipv4/netfilter/ipt_ah.c          | 2 +-
 net/ipv4/netfilter/ipt_ecn.c         | 2 +-
 net/ipv6/netfilter/ip6_tables.c      | 2 +-
 net/ipv6/netfilter/ip6t_ah.c         | 2 +-
 net/ipv6/netfilter/ip6t_frag.c       | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c        | 2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c | 2 +-
 net/ipv6/netfilter/ip6t_mh.c         | 2 +-
 net/ipv6/netfilter/ip6t_rt.c         | 2 +-
 net/netfilter/xt_cluster.c           | 2 +-
 net/netfilter/xt_connbytes.c         | 2 +-
 net/netfilter/xt_connlimit.c         | 2 +-
 net/netfilter/xt_connmark.c          | 2 +-
 net/netfilter/xt_conntrack.c         | 2 +-
 net/netfilter/xt_dccp.c              | 2 +-
 net/netfilter/xt_dscp.c              | 2 +-
 net/netfilter/xt_esp.c               | 2 +-
 net/netfilter/xt_hashlimit.c         | 4 ++--
 net/netfilter/xt_helper.c            | 2 +-
 net/netfilter/xt_limit.c             | 2 +-
 net/netfilter/xt_multiport.c         | 8 ++++----
 net/netfilter/xt_physdev.c           | 2 +-
 net/netfilter/xt_policy.c            | 2 +-
 net/netfilter/xt_quota.c             | 2 +-
 net/netfilter/xt_rateest.c           | 2 +-
 net/netfilter/xt_recent.c            | 2 +-
 net/netfilter/xt_sctp.c              | 2 +-
 net/netfilter/xt_state.c             | 2 +-
 net/netfilter/xt_statistic.c         | 2 +-
 net/netfilter/xt_string.c            | 2 +-
 net/netfilter/xt_tcpudp.c            | 4 ++--
 net/netfilter/xt_time.c              | 2 +-
 45 files changed, 50 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index dd9d15a73a96..33c1a62a0997 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -303,7 +303,7 @@ struct xt_match {
 		      const struct xt_match_param *);
 
 	/* Called when user tries to insert an entry of this type. */
-	bool (*checkentry)(const struct xt_mtchk_param *);
+	int (*checkentry)(const struct xt_mtchk_param *);
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_mtdtor_param *);
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 5d1176758ca5..7b6f4c4cccb7 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -36,7 +36,7 @@ ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_802_3_mt_check(const struct xt_mtchk_param *par)
+static int ebt_802_3_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_802_3_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 60ad6308bc1f..8a75d399b510 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -172,7 +172,7 @@ ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_among_mt_check(const struct xt_mtchk_param *par)
+static int ebt_among_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
 	const struct ebt_entry_match *em =
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index e727697c5847..fc62055adb17 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -100,7 +100,7 @@ ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_arp_mt_check(const struct xt_mtchk_param *par)
+static int ebt_arp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_arp_info *info = par->matchinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 5de6df6f86b8..d1a555dc8878 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -77,7 +77,7 @@ ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_ip_mt_check(const struct xt_mtchk_param *par)
+static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_ip_info *info = par->matchinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 63e3888d20cf..fa4ecf50fdc9 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -80,7 +80,7 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_ip6_mt_check(const struct xt_mtchk_param *par)
+static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_entry *e = par->entryinfo;
 	struct ebt_ip6_info *info = par->matchinfo;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 5b7330b62541..abfb0ecd7c17 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -65,7 +65,7 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE;
 }
 
-static bool ebt_limit_mt_check(const struct xt_mtchk_param *par)
+static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct ebt_limit_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 8de8c396d913..1e5b0b316fbe 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -22,7 +22,7 @@ ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool ebt_mark_mt_check(const struct xt_mtchk_param *par)
+static int ebt_mark_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index e2a07e6cbef3..9b3c64516605 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -20,7 +20,7 @@ ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (skb->pkt_type == info->pkt_type) ^ info->invert;
 }
 
-static bool ebt_pkttype_mt_check(const struct xt_mtchk_param *par)
+static int ebt_pkttype_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 92a93d363765..521186fa6994 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -153,7 +153,7 @@ ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_stp_mt_check(const struct xt_mtchk_param *par)
+static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
 	const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 5c44f51063c3..04a9575389d8 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -79,7 +79,7 @@ ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
+static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 {
 	struct ebt_vlan_info *info = par->matchinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 09f6567a85b7..771ffa7b9aff 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2176,7 +2176,7 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
 				    !!(icmpinfo->invflags&IPT_ICMP_INV));
 }
 
-static bool icmp_checkentry(const struct xt_mtchk_param *par)
+static int icmp_checkentry(const struct xt_mtchk_param *par)
 {
 	const struct ipt_icmp *icmpinfo = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index ea4f58a46c6e..81197f456d7f 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -70,7 +70,7 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
+static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 {
 	struct ipt_addrtype_info_v1 *info = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 4f27e170c630..667ded16e120 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -55,7 +55,7 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			 !!(ahinfo->invflags & IPT_AH_INV_SPI));
 }
 
-static bool ah_mt_check(const struct xt_mtchk_param *par)
+static int ah_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ah *ahinfo = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index e661108c73f1..d1e234fe7f1a 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -85,7 +85,7 @@ static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ecn_mt_check(const struct xt_mtchk_param *par)
+static int ecn_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ecn_info *info = par->matchinfo;
 	const struct ipt_ip *ip = par->entryinfo;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 41e2429c0163..595b45d52ff3 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2209,7 +2209,7 @@ icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 /* Called when user tries to insert an entry of this type. */
-static bool icmp6_checkentry(const struct xt_mtchk_param *par)
+static int icmp6_checkentry(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_icmp *icmpinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 4429bfd39e11..3d570446deef 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -87,7 +87,7 @@ static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 		!(ahinfo->hdrres && ah->reserved);
 }
 
-static bool ah_mt6_check(const struct xt_mtchk_param *par)
+static int ah_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ah *ahinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 5c0da913b4ab..c2dba2701fa3 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -102,7 +102,7 @@ frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 		  (ntohs(fh->frag_off) & IP6_MF));
 }
 
-static bool frag_mt6_check(const struct xt_mtchk_param *par)
+static int frag_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_frag *fraginfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index f4b73889d00a..1b294317707b 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -164,7 +164,7 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool hbh_mt6_check(const struct xt_mtchk_param *par)
+static int hbh_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_opts *optsinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 91490ad9302c..90e1e04b7932 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -118,7 +118,7 @@ ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	}
 }
 
-static bool ipv6header_mt6_check(const struct xt_mtchk_param *par)
+static int ipv6header_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ipv6header_info *info = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 0181eb81d24b..d9408045994c 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -62,7 +62,7 @@ static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 			  !!(mhinfo->invflags & IP6T_MH_INV_TYPE));
 }
 
-static bool mh_mt6_check(const struct xt_mtchk_param *par)
+static int mh_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_mh *mhinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index c58d65336577..76397f35eafd 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -183,7 +183,7 @@ static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool rt_mt6_check(const struct xt_mtchk_param *par)
+static int rt_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_rt *rtinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 4c273e871301..1f2c35ef1427 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -132,7 +132,7 @@ xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	       !!(info->flags & XT_CLUSTER_F_INV);
 }
 
-static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
+static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 {
 	struct xt_cluster_match_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index edb7bbd9ae54..136ef4ccdacb 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -93,7 +93,7 @@ connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		return what >= sinfo->count.from;
 }
 
-static bool connbytes_mt_check(const struct xt_mtchk_param *par)
+static int connbytes_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index d5b26dab9e26..a9fec38ab029 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -216,7 +216,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool connlimit_mt_check(const struct xt_mtchk_param *par)
+static int connlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_connlimit_info *info = par->matchinfo;
 	unsigned int i;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 7a51ba63f545..df7eaff874f1 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -103,7 +103,7 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool connmark_mt_check(const struct xt_mtchk_param *par)
+static int connmark_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 387172b6b0d8..500e0338a187 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -206,7 +206,7 @@ conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par)
 	return conntrack_mt(skb, par, info->state_mask, info->status_mask);
 }
 
-static bool conntrack_mt_check(const struct xt_mtchk_param *par)
+static int conntrack_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 8f6014f7c881..da8c301d24ea 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -123,7 +123,7 @@ dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
-static bool dccp_mt_check(const struct xt_mtchk_param *par)
+static int dccp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 6ecedc13db0c..295da4ce822c 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -42,7 +42,7 @@ dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (dscp == info->dscp) ^ !!info->invert;
 }
 
-static bool dscp_mt_check(const struct xt_mtchk_param *par)
+static int dscp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 1a446d626769..9f5da9795674 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -60,7 +60,7 @@ static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			 !!(espinfo->invflags & XT_ESP_INV_SPI));
 }
 
-static bool esp_mt_check(const struct xt_mtchk_param *par)
+static int esp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_esp *espinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 8f3e0c02ca54..d13800c95930 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -671,7 +671,7 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
 	struct xt_hashlimit_info *r = par->matchinfo;
@@ -707,7 +707,7 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 482aff2ccf7c..6e177b279f90 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -54,7 +54,7 @@ helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool helper_mt_check(const struct xt_mtchk_param *par)
+static int helper_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_helper_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index b3dfca63fa52..138a324df8df 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -97,7 +97,7 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE;
 }
 
-static bool limit_mt_check(const struct xt_mtchk_param *par)
+static int limit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_rateinfo *r = par->matchinfo;
 	struct xt_limit_priv *priv;
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 4fa90c86fdb5..b446738eab1a 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -152,7 +152,7 @@ check(u_int16_t proto,
 		&& count <= XT_MULTI_PORTS;
 }
 
-static bool multiport_mt_check_v0(const struct xt_mtchk_param *par)
+static int multiport_mt_check_v0(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ip *ip = par->entryinfo;
 	const struct xt_multiport *multiinfo = par->matchinfo;
@@ -161,7 +161,7 @@ static bool multiport_mt_check_v0(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static bool multiport_mt_check(const struct xt_mtchk_param *par)
+static int multiport_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ip *ip = par->entryinfo;
 	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
@@ -170,7 +170,7 @@ static bool multiport_mt_check(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par)
+static int multiport_mt6_check_v0(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ip6 *ip = par->entryinfo;
 	const struct xt_multiport *multiinfo = par->matchinfo;
@@ -179,7 +179,7 @@ static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static bool multiport_mt6_check(const struct xt_mtchk_param *par)
+static int multiport_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ip6 *ip = par->entryinfo;
 	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 3d42a278408f..850e412c83ef 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -83,7 +83,7 @@ match_outdev:
 	return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
 }
 
-static bool physdev_mt_check(const struct xt_mtchk_param *par)
+static int physdev_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_physdev_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index de3aded6afb8..c9965b640b16 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -128,7 +128,7 @@ policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool policy_mt_check(const struct xt_mtchk_param *par)
+static int policy_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_policy_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 390b7d09fe51..2861fac5f2e1 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -43,7 +43,7 @@ quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool quota_mt_check(const struct xt_mtchk_param *par)
+static int quota_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_quota_info *q = par->matchinfo;
 
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 4fc6a917f6de..3b5e3d613b18 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -74,7 +74,7 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
+static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 {
 	struct xt_rateest_match_info *info = par->matchinfo;
 	struct xt_rateest *est1, *est2;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 85309448c5e7..52042c8bf7f2 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -305,7 +305,7 @@ out:
 	return ret;
 }
 
-static bool recent_mt_check(const struct xt_mtchk_param *par)
+static int recent_mt_check(const struct xt_mtchk_param *par)
 {
 	struct recent_net *recent_net = recent_pernet(par->net);
 	const struct xt_recent_mtinfo *info = par->matchinfo;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 977b182dea59..5037a7a0059c 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -144,7 +144,7 @@ sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
-static bool sctp_mt_check(const struct xt_mtchk_param *par)
+static int sctp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 94893be80276..8b15b1317f1f 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -37,7 +37,7 @@ state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (sinfo->statemask & statebit);
 }
 
-static bool state_mt_check(const struct xt_mtchk_param *par)
+static int state_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 51ac1bbb4f52..a577ab008f57 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -52,7 +52,7 @@ statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool statistic_mt_check(const struct xt_mtchk_param *par)
+static int statistic_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_statistic_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index b4d774111311..7d1412154e27 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -40,7 +40,7 @@ string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
 
-static bool string_mt_check(const struct xt_mtchk_param *par)
+static int string_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_string_info *conf = par->matchinfo;
 	struct ts_config *ts_conf;
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index b53887f83c44..00728410099f 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -120,7 +120,7 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool tcp_mt_check(const struct xt_mtchk_param *par)
+static int tcp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_tcp *tcpinfo = par->matchinfo;
 
@@ -155,7 +155,7 @@ static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			      !!(udpinfo->invflags & XT_UDP_INV_DSTPT));
 }
 
-static bool udp_mt_check(const struct xt_mtchk_param *par)
+static int udp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_udp *udpinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 45ed05b5161f..db74f4fd57df 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -217,7 +217,7 @@ time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool time_mt_check(const struct xt_mtchk_param *par)
+static int time_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_time_info *info = par->matchinfo;
 
-- 
cgit v1.2.3-59-g8ed1b


From 135367b8f6a18507af6b9a6910a14b5699415309 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 17:16:42 +0100
Subject: netfilter: xtables: change xt_target.checkentry return type

Restore function signatures from bool to int so that we can report
memory allocation failures or similar using -ENOMEM rather than
always having to pass -EINVAL back.

// <smpl>
@@
type bool;
identifier check, par;
@@
-bool check
+int check
 (struct xt_tgchk_param *par) { ... }
// </smpl>

Minus the change it does to xt_ct_find_proto.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h  | 4 ++--
 net/bridge/netfilter/ebt_arpreply.c | 2 +-
 net/bridge/netfilter/ebt_dnat.c     | 2 +-
 net/bridge/netfilter/ebt_log.c      | 2 +-
 net/bridge/netfilter/ebt_mark.c     | 2 +-
 net/bridge/netfilter/ebt_nflog.c    | 2 +-
 net/bridge/netfilter/ebt_redirect.c | 2 +-
 net/bridge/netfilter/ebt_snat.c     | 2 +-
 net/bridge/netfilter/ebt_ulog.c     | 2 +-
 net/ipv4/netfilter/arpt_mangle.c    | 2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c  | 2 +-
 net/ipv4/netfilter/ipt_ECN.c        | 2 +-
 net/ipv4/netfilter/ipt_LOG.c        | 2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +-
 net/ipv4/netfilter/ipt_NETMAP.c     | 2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c   | 2 +-
 net/ipv4/netfilter/ipt_REJECT.c     | 2 +-
 net/ipv4/netfilter/ipt_ULOG.c       | 2 +-
 net/ipv4/netfilter/nf_nat_rule.c    | 4 ++--
 net/ipv6/netfilter/ip6t_LOG.c       | 2 +-
 net/ipv6/netfilter/ip6t_REJECT.c    | 2 +-
 net/netfilter/xt_CONNSECMARK.c      | 2 +-
 net/netfilter/xt_CT.c               | 2 +-
 net/netfilter/xt_DSCP.c             | 2 +-
 net/netfilter/xt_HL.c               | 4 ++--
 net/netfilter/xt_LED.c              | 2 +-
 net/netfilter/xt_NFLOG.c            | 2 +-
 net/netfilter/xt_NFQUEUE.c          | 2 +-
 net/netfilter/xt_RATEEST.c          | 2 +-
 net/netfilter/xt_SECMARK.c          | 2 +-
 net/netfilter/xt_TCPMSS.c           | 4 ++--
 net/netfilter/xt_TPROXY.c           | 2 +-
 net/netfilter/xt_connmark.c         | 2 +-
 33 files changed, 37 insertions(+), 37 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 33c1a62a0997..1a65d45ee4f5 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -342,8 +342,8 @@ struct xt_target {
 	/* Called when user tries to insert an entry of this type:
            hook_mask is a bitmask of hooks from which it can be
            called. */
-	/* Should return true or false. */
-	bool (*checkentry)(const struct xt_tgchk_param *);
+	/* Should return true or false, or an error code (-Exxxx). */
+	int (*checkentry)(const struct xt_tgchk_param *);
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_tgdtor_param *);
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index f392e9d93f53..2491564e9e08 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -57,7 +57,7 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
+static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_arpreply_info *info = par->targinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 2bb40d728a35..5fddebea45c2 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -26,7 +26,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par)
+static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 	unsigned int hook_mask;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index e873924ddb5d..a0aeac6176ee 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -24,7 +24,7 @@
 
 static DEFINE_SPINLOCK(ebt_log_lock);
 
-static bool ebt_log_tg_check(const struct xt_tgchk_param *par)
+static int ebt_log_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ebt_log_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 2b5ce533d6b9..dd94dafa6155 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -36,7 +36,7 @@ ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static bool ebt_mark_tg_check(const struct xt_tgchk_param *par)
+static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_mark_t_info *info = par->targinfo;
 	int tmp;
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 40dbd248b9ae..1f2b7bbdde73 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -35,7 +35,7 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return EBT_CONTINUE;
 }
 
-static bool ebt_nflog_tg_check(const struct xt_tgchk_param *par)
+static int ebt_nflog_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ebt_nflog_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9be8fbcd370b..73c4d3ac6f2e 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -32,7 +32,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool ebt_redirect_tg_check(const struct xt_tgchk_param *par)
+static int ebt_redirect_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_redirect_info *info = par->targinfo;
 	unsigned int hook_mask;
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 9c7b520765a2..94bcecd90d74 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -42,7 +42,7 @@ out:
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static bool ebt_snat_tg_check(const struct xt_tgchk_param *par)
+static int ebt_snat_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 	int tmp;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 789ea36f1db1..f554bc2515d6 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -249,7 +249,7 @@ ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return EBT_CONTINUE;
 }
 
-static bool ebt_ulog_tg_check(const struct xt_tgchk_param *par)
+static int ebt_ulog_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ebt_ulog_info *uloginfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index b0d5b1d0a769..4b51a027f307 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -54,7 +54,7 @@ target(struct sk_buff *skb, const struct xt_target_param *par)
 	return mangle->target;
 }
 
-static bool checkentry(const struct xt_tgchk_param *par)
+static int checkentry(const struct xt_tgchk_param *par)
 {
 	const struct arpt_mangle *mangle = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index fcaa0dc8e075..290a7b9b393e 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -347,7 +347,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool clusterip_tg_check(const struct xt_tgchk_param *par)
+static int clusterip_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 01988752547e..9d96500a4157 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -93,7 +93,7 @@ ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool ecn_tg_check(const struct xt_tgchk_param *par)
+static int ecn_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_ECN_info *einfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b3bf623fa222..c9ee5c40d1bb 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -439,7 +439,7 @@ log_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool log_tg_check(const struct xt_tgchk_param *par)
+static int log_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_log_info *loginfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5063ddac7c04..5a182f6de5d5 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -28,7 +28,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* FIXME: Multiple targets. --RR */
-static bool masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 51ab01a0a95d..cbfe5f7e082a 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -22,7 +22,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
 MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
 
-static bool netmap_tg_check(const struct xt_tgchk_param *par)
+static int netmap_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 74f1f55fd61a..f8daec20fb04 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -26,7 +26,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
 
 /* FIXME: Take multiple ranges --RR */
-static bool redirect_tg_check(const struct xt_tgchk_param *par)
+static int redirect_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index ff32252bad59..cf76f1bc3f10 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -174,7 +174,7 @@ reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool reject_tg_check(const struct xt_tgchk_param *par)
+static int reject_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_reject_info *rejinfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d926201560dd..7f73bbe2193c 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -307,7 +307,7 @@ static void ipt_logfn(u_int8_t pf,
 	ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
 }
 
-static bool ulog_tg_check(const struct xt_tgchk_param *par)
+static int ulog_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_ulog_info *loginfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 7d6345e416c7..117226708738 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -74,7 +74,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
 	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
 }
 
-static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
+static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
@@ -86,7 +86,7 @@ static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par)
+static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 5a79883220e0..bcc3fc19374a 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -451,7 +451,7 @@ log_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 
-static bool log_tg6_check(const struct xt_tgchk_param *par)
+static int log_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_log_info *loginfo = par->targinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 45efb9f38fcb..8d5141ece671 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -213,7 +213,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool reject_tg6_check(const struct xt_tgchk_param *par)
+static int reject_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_reject_info *rejinfo = par->targinfo;
 	const struct ip6t_entry *e = par->entryinfo;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 6812865488d6..3f9d0f4f852d 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -84,7 +84,7 @@ connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
+static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_connsecmark_target_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 6509e03f1e62..c1553bf06cf6 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -53,7 +53,7 @@ static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
 		return 0;
 }
 
-static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
+static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_ct_target_info *info = par->targinfo;
 	struct nf_conntrack_tuple t;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index bbf08a91c600..1fa7b67bf225 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -60,7 +60,7 @@ dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool dscp_tg_check(const struct xt_tgchk_param *par)
+static int dscp_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_DSCP_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 7004ed2ffa44..15ba16108182 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -101,7 +101,7 @@ hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool ttl_tg_check(const struct xt_tgchk_param *par)
+static int ttl_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_TTL_info *info = par->targinfo;
 
@@ -114,7 +114,7 @@ static bool ttl_tg_check(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static bool hl_tg6_check(const struct xt_tgchk_param *par)
+static int hl_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_HL_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index f511bea9464a..1a3e3dd5a774 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -80,7 +80,7 @@ static void led_timeout_callback(unsigned long data)
 	led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
 }
 
-static bool led_tg_check(const struct xt_tgchk_param *par)
+static int led_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_led_info *ledinfo = par->targinfo;
 	struct xt_led_info_internal *ledinternal;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index a57c5cf018ec..13e6c0002c8a 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -37,7 +37,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool nflog_tg_check(const struct xt_tgchk_param *par)
+static int nflog_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_nflog_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 7cc0de63aa0f..d435579a64ca 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -81,7 +81,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_QUEUE_NR(queue);
 }
 
-static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 maxid;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 87ae97e5516f..9743e50be8ef 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -85,7 +85,7 @@ xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
+static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 {
 	struct xt_rateest_target_info *info = par->targinfo;
 	struct xt_rateest *est;
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 4855fd9d7c6f..48f8e4f7ea8a 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -80,7 +80,7 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
 	return true;
 }
 
-static bool secmark_tg_check(const struct xt_tgchk_param *par)
+static int secmark_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_secmark_target_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 45161d9a9f23..70288dc31583 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -234,7 +234,7 @@ static inline bool find_syn_match(const struct xt_entry_match *m)
 	return false;
 }
 
-static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
+static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
@@ -256,7 +256,7 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 }
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
+static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ip6t_entry *e = par->entryinfo;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index e9244fdc123a..189df9af4de6 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -59,7 +59,7 @@ tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool tproxy_tg_check(const struct xt_tgchk_param *par)
+static int tproxy_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_ip *i = par->entryinfo;
 
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index df7eaff874f1..0e69427f8cda 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -74,7 +74,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool connmark_tg_check(const struct xt_tgchk_param *par)
+static int connmark_tg_check(const struct xt_tgchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
-- 
cgit v1.2.3-59-g8ed1b


From bd414ee605ff3ac5fcd79f57269a897879ee4cde Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 23 Mar 2010 16:35:56 +0100
Subject: netfilter: xtables: change matches to return error code

The following semantic patch does part of the transformation:
// <smpl>
@ rule1 @
struct xt_match ops;
identifier check;
@@
 ops.checkentry = check;

@@
identifier rule1.check;
@@
 check(...) { <...
-return true;
+return 0;
 ...> }

@@
identifier rule1.check;
@@
 check(...) { <...
-return false;
+return -EINVAL;
 ...> }
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_802_3.c     |  4 ++--
 net/bridge/netfilter/ebt_among.c     |  8 ++++----
 net/bridge/netfilter/ebt_arp.c       |  6 +++---
 net/bridge/netfilter/ebt_ip.c        | 14 +++++++-------
 net/bridge/netfilter/ebt_ip6.c       | 14 +++++++-------
 net/bridge/netfilter/ebt_limit.c     |  4 ++--
 net/bridge/netfilter/ebt_mark_m.c    |  8 ++++----
 net/bridge/netfilter/ebt_pkttype.c   |  4 ++--
 net/bridge/netfilter/ebt_stp.c       |  6 +++---
 net/bridge/netfilter/ebt_vlan.c      | 14 +++++++-------
 net/ipv4/netfilter/ip_tables.c       |  2 +-
 net/ipv4/netfilter/ipt_addrtype.c    |  8 ++++----
 net/ipv4/netfilter/ipt_ah.c          |  4 ++--
 net/ipv4/netfilter/ipt_ecn.c         |  8 ++++----
 net/ipv6/netfilter/ip6_tables.c      |  2 +-
 net/ipv6/netfilter/ip6t_ah.c         |  4 ++--
 net/ipv6/netfilter/ip6t_frag.c       |  4 ++--
 net/ipv6/netfilter/ip6t_hbh.c        |  6 +++---
 net/ipv6/netfilter/ip6t_ipv6header.c |  4 ++--
 net/ipv6/netfilter/ip6t_mh.c         |  2 +-
 net/ipv6/netfilter/ip6t_rt.c         |  6 +++---
 net/netfilter/x_tables.c             | 12 ++++++++++--
 net/netfilter/xt_cluster.c           |  6 +++---
 net/netfilter/xt_connbytes.c         |  8 ++++----
 net/netfilter/xt_connlimit.c         |  6 +++---
 net/netfilter/xt_connmark.c          |  8 ++++----
 net/netfilter/xt_conntrack.c         |  4 ++--
 net/netfilter/xt_dccp.c              |  8 ++++----
 net/netfilter/xt_dscp.c              |  4 ++--
 net/netfilter/xt_esp.c               |  4 ++--
 net/netfilter/xt_hashlimit.c         | 29 ++++++++++++++---------------
 net/netfilter/xt_helper.c            |  4 ++--
 net/netfilter/xt_limit.c             |  6 +++---
 net/netfilter/xt_physdev.c           |  6 +++---
 net/netfilter/xt_policy.c            | 10 +++++-----
 net/netfilter/xt_quota.c             |  6 +++---
 net/netfilter/xt_rateest.c           |  4 ++--
 net/netfilter/xt_recent.c            | 18 +++++++++---------
 net/netfilter/xt_sctp.c              | 12 ++++++------
 net/netfilter/xt_state.c             |  4 ++--
 net/netfilter/xt_statistic.c         |  6 +++---
 net/netfilter/xt_string.c            | 13 ++++++-------
 net/netfilter/xt_tcpudp.c            |  4 ++--
 net/netfilter/xt_time.c              |  4 ++--
 44 files changed, 162 insertions(+), 156 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 7b6f4c4cccb7..f7de8dbc3422 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -41,9 +41,9 @@ static int ebt_802_3_mt_check(const struct xt_mtchk_param *par)
 	const struct ebt_802_3_info *info = par->matchinfo;
 
 	if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK)
-		return false;
+		return -EINVAL;
 
-	return true;
+	return 0;
 }
 
 static struct xt_match ebt_802_3_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 8a75d399b510..20068e03fa81 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -190,17 +190,17 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par)
 		pr_info("wrong size: %d against expected %d, rounded to %Zd\n",
 			em->match_size, expected_length,
 			EBT_ALIGN(expected_length));
-		return false;
+		return -EINVAL;
 	}
 	if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) {
 		pr_info("dst integrity fail: %x\n", -err);
-		return false;
+		return -EINVAL;
 	}
 	if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) {
 		pr_info("src integrity fail: %x\n", -err);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match ebt_among_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index fc62055adb17..952150cd5e7d 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -108,10 +108,10 @@ static int ebt_arp_mt_check(const struct xt_mtchk_param *par)
 	if ((e->ethproto != htons(ETH_P_ARP) &&
 	   e->ethproto != htons(ETH_P_RARP)) ||
 	   e->invflags & EBT_IPROTO)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_match ebt_arp_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index d1a555dc8878..a1c76c7e5219 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -84,24 +84,24 @@ static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
 
 	if (e->ethproto != htons(ETH_P_IP) ||
 	   e->invflags & EBT_IPROTO)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) {
 		if (info->invflags & EBT_IP_PROTO)
-			return false;
+			return -EINVAL;
 		if (info->protocol != IPPROTO_TCP &&
 		    info->protocol != IPPROTO_UDP &&
 		    info->protocol != IPPROTO_UDPLITE &&
 		    info->protocol != IPPROTO_SCTP &&
 		    info->protocol != IPPROTO_DCCP)
-			 return false;
+			 return -EINVAL;
 	}
 	if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1])
-		return false;
+		return -EINVAL;
 	if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1])
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_match ebt_ip_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index fa4ecf50fdc9..33f8413f05ad 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -86,24 +86,24 @@ static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
 	struct ebt_ip6_info *info = par->matchinfo;
 
 	if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) {
 		if (info->invflags & EBT_IP6_PROTO)
-			return false;
+			return -EINVAL;
 		if (info->protocol != IPPROTO_TCP &&
 		    info->protocol != IPPROTO_UDP &&
 		    info->protocol != IPPROTO_UDPLITE &&
 		    info->protocol != IPPROTO_SCTP &&
 		    info->protocol != IPPROTO_DCCP)
-			return false;
+			return -EINVAL;
 	}
 	if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1])
-		return false;
+		return -EINVAL;
 	if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1])
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_match ebt_ip6_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index abfb0ecd7c17..4b0e2e53fa57 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -74,7 +74,7 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
 	    user2credits(info->avg * info->burst) < user2credits(info->avg)) {
 		pr_info("overflow, try lower: %u/%u\n",
 			info->avg, info->burst);
-		return false;
+		return -EINVAL;
 	}
 
 	/* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */
@@ -82,7 +82,7 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
 	info->credit = user2credits(info->avg * info->burst);
 	info->credit_cap = user2credits(info->avg * info->burst);
 	info->cost = user2credits(info->avg);
-	return true;
+	return 0;
 }
 
 
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 1e5b0b316fbe..e4366c0a1a43 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -27,12 +27,12 @@ static int ebt_mark_mt_check(const struct xt_mtchk_param *par)
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
 	if (info->bitmask & ~EBT_MARK_MASK)
-		return false;
+		return -EINVAL;
 	if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND))
-		return false;
+		return -EINVAL;
 	if (!info->bitmask)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 9b3c64516605..f34bcc3197bd 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -25,9 +25,9 @@ static int ebt_pkttype_mt_check(const struct xt_mtchk_param *par)
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
 	if (info->invert != 0 && info->invert != 1)
-		return false;
+		return -EINVAL;
 	/* Allow any pkt_type value */
-	return true;
+	return 0;
 }
 
 static struct xt_match ebt_pkttype_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 521186fa6994..02f28fdda393 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -162,13 +162,13 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK ||
 	    !(info->bitmask & EBT_STP_MASK))
-		return false;
+		return -EINVAL;
 	/* Make sure the match only receives stp frames */
 	if (compare_ether_addr(e->destmac, bridge_ula) ||
 	    compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC))
-		return false;
+		return -EINVAL;
 
-	return true;
+	return 0;
 }
 
 static struct xt_match ebt_stp_mt_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 04a9575389d8..bf8ae5c7a0c5 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -88,7 +88,7 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 	if (e->ethproto != htons(ETH_P_8021Q)) {
 		pr_debug("passed entry proto %2.4X is not 802.1Q (8100)\n",
 			 ntohs(e->ethproto));
-		return false;
+		return -EINVAL;
 	}
 
 	/* Check for bitmask range
@@ -96,14 +96,14 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 	if (info->bitmask & ~EBT_VLAN_MASK) {
 		pr_debug("bitmask %2X is out of mask (%2X)\n",
 			 info->bitmask, EBT_VLAN_MASK);
-		return false;
+		return -EINVAL;
 	}
 
 	/* Check for inversion flags range */
 	if (info->invflags & ~EBT_VLAN_MASK) {
 		pr_debug("inversion flags %2X is out of mask (%2X)\n",
 			 info->invflags, EBT_VLAN_MASK);
-		return false;
+		return -EINVAL;
 	}
 
 	/* Reserved VLAN ID (VID) values
@@ -117,7 +117,7 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 			if (info->id > VLAN_GROUP_ARRAY_LEN) {
 				pr_debug("id %d is out of range (1-4096)\n",
 					 info->id);
-				return false;
+				return -EINVAL;
 			}
 			/* Note: This is valid VLAN-tagged frame point.
 			 * Any value of user_priority are acceptable,
@@ -132,7 +132,7 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 		if ((unsigned char) info->prio > 7) {
 			pr_debug("prio %d is out of range (0-7)\n",
 				 info->prio);
-			return false;
+			return -EINVAL;
 		}
 	}
 	/* Check for encapsulated proto range - it is possible to be
@@ -142,11 +142,11 @@ static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 		if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) {
 			pr_debug("encap frame length %d is less than "
 				 "minimal\n", ntohs(info->encap));
-			return false;
+			return -EINVAL;
 		}
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match ebt_vlan_mt_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 771ffa7b9aff..18c5b1573f3e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2181,7 +2181,7 @@ static int icmp_checkentry(const struct xt_mtchk_param *par)
 	const struct ipt_icmp *icmpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
-	return !(icmpinfo->invflags & ~IPT_ICMP_INV);
+	return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
 }
 
 /* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 81197f456d7f..e4b8f2bf8aaa 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -78,7 +78,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
 		pr_info("both incoming and outgoing "
 			"interface limitation cannot be selected\n");
-		return false;
+		return -EINVAL;
 	}
 
 	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
@@ -86,7 +86,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
 		pr_info("output interface limitation "
 			"not valid in PREROUTING and INPUT\n");
-		return false;
+		return -EINVAL;
 	}
 
 	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
@@ -94,10 +94,10 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 	    info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
 		pr_info("input interface limitation "
 			"not valid in POSTROUTING and OUTPUT\n");
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match addrtype_mt_reg[] __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 667ded16e120..9f9810204892 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -62,9 +62,9 @@ static int ah_mt_check(const struct xt_mtchk_param *par)
 	/* Must specify no unknown invflags */
 	if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
 		pr_debug("unknown flags %X\n", ahinfo->invflags);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match ah_mt_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index d1e234fe7f1a..32e24100d8d1 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -91,18 +91,18 @@ static int ecn_mt_check(const struct xt_mtchk_param *par)
 	const struct ipt_ip *ip = par->entryinfo;
 
 	if (info->operation & IPT_ECN_OP_MATCH_MASK)
-		return false;
+		return -EINVAL;
 
 	if (info->invert & IPT_ECN_OP_MATCH_MASK)
-		return false;
+		return -EINVAL;
 
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
 	    ip->proto != IPPROTO_TCP) {
 		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match ecn_mt_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 595b45d52ff3..f2b815e72329 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2214,7 +2214,7 @@ static int icmp6_checkentry(const struct xt_mtchk_param *par)
 	const struct ip6t_icmp *icmpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
-	return !(icmpinfo->invflags & ~IP6T_ICMP_INV);
+	return (icmpinfo->invflags & ~IP6T_ICMP_INV) ? -EINVAL : 0;
 }
 
 /* The built-in targets: standard (NULL) and error. */
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 3d570446deef..1580693c86c1 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -93,9 +93,9 @@ static int ah_mt6_check(const struct xt_mtchk_param *par)
 
 	if (ahinfo->invflags & ~IP6T_AH_INV_MASK) {
 		pr_debug("unknown flags %X\n", ahinfo->invflags);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match ah_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index c2dba2701fa3..a5daf0ffb4ec 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -108,9 +108,9 @@ static int frag_mt6_check(const struct xt_mtchk_param *par)
 
 	if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) {
 		pr_debug("unknown flags %X\n", fraginfo->invflags);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match frag_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 1b294317707b..5e6acdae6d80 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -170,15 +170,15 @@ static int hbh_mt6_check(const struct xt_mtchk_param *par)
 
 	if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) {
 		pr_debug("unknown flags %X\n", optsinfo->invflags);
-		return false;
+		return -EINVAL;
 	}
 
 	if (optsinfo->flags & IP6T_OPTS_NSTRICT) {
 		pr_debug("Not strict - not implemented");
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match hbh_mt6_reg[] __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 90e1e04b7932..46fbabb493fa 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -125,9 +125,9 @@ static int ipv6header_mt6_check(const struct xt_mtchk_param *par)
 	/* invflags is 0 or 0xff in hard mode */
 	if ((!info->modeflag) && info->invflags != 0x00 &&
 	    info->invflags != 0xFF)
-		return false;
+		return -EINVAL;
 
-	return true;
+	return 0;
 }
 
 static struct xt_match ipv6header_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index d9408045994c..c9f443e0138f 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -67,7 +67,7 @@ static int mh_mt6_check(const struct xt_mtchk_param *par)
 	const struct ip6t_mh *mhinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
-	return !(mhinfo->invflags & ~IP6T_MH_INV_MASK);
+	return (mhinfo->invflags & ~IP6T_MH_INV_MASK) ? -EINVAL : 0;
 }
 
 static struct xt_match mh_mt6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 76397f35eafd..09322720d2a6 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -189,17 +189,17 @@ static int rt_mt6_check(const struct xt_mtchk_param *par)
 
 	if (rtinfo->invflags & ~IP6T_RT_INV_MASK) {
 		pr_debug("unknown flags %X\n", rtinfo->invflags);
-		return false;
+		return -EINVAL;
 	}
 	if ((rtinfo->flags & (IP6T_RT_RES | IP6T_RT_FST_MASK)) &&
 	    (!(rtinfo->flags & IP6T_RT_TYP) ||
 	     (rtinfo->rt_type != 0) ||
 	     (rtinfo->invflags & IP6T_RT_INV_TYP))) {
 		pr_debug("`--rt-type 0' required before `--rt-0-*'");
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match rt_mt6_reg __read_mostly = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index ee7fe215b3e1..7ee177746172 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -363,6 +363,8 @@ static char *textify_hooks(char *buf, size_t size, unsigned int mask)
 int xt_check_match(struct xt_mtchk_param *par,
 		   unsigned int size, u_int8_t proto, bool inv_proto)
 {
+	int ret;
+
 	if (XT_ALIGN(par->match->matchsize) != size &&
 	    par->match->matchsize != -1) {
 		/*
@@ -399,8 +401,14 @@ int xt_check_match(struct xt_mtchk_param *par,
 		       par->match->proto);
 		return -EINVAL;
 	}
-	if (par->match->checkentry != NULL && !par->match->checkentry(par))
-		return -EINVAL;
+	if (par->match->checkentry != NULL) {
+		ret = par->match->checkentry(par);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			/* Flag up potential errors. */
+			return -EIO;
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_check_match);
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 1f2c35ef1427..30cb7762fc41 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -140,14 +140,14 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 		pr_info("you have exceeded the maximum "
 			"number of cluster nodes (%u > %u)\n",
 			info->total_nodes, XT_CLUSTER_NODES_MAX);
-		return false;
+		return -EINVAL;
 	}
 	if (info->node_mask >= (1ULL << info->total_nodes)) {
 		pr_info("this node mask cannot be "
 			"higher than the total number of nodes\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match xt_cluster_match __read_mostly = {
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 136ef4ccdacb..bf8e286361c3 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -100,20 +100,20 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 	if (sinfo->what != XT_CONNBYTES_PKTS &&
 	    sinfo->what != XT_CONNBYTES_BYTES &&
 	    sinfo->what != XT_CONNBYTES_AVGPKT)
-		return false;
+		return -EINVAL;
 
 	if (sinfo->direction != XT_CONNBYTES_DIR_ORIGINAL &&
 	    sinfo->direction != XT_CONNBYTES_DIR_REPLY &&
 	    sinfo->direction != XT_CONNBYTES_DIR_BOTH)
-		return false;
+		return -EINVAL;
 
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index a9fec38ab029..68e89f08140b 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -228,21 +228,21 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for "
 			"address family %u\n", par->family);
-		return false;
+		return -EINVAL;
 	}
 
 	/* init private data */
 	info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
 	if (info->data == NULL) {
 		nf_ct_l3proto_module_put(par->family);
-		return false;
+		return -EINVAL;
 	}
 
 	spin_lock_init(&info->data->lock);
 	for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
 		INIT_LIST_HEAD(&info->data->iphash[i]);
 
-	return true;
+	return 0;
 }
 
 static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 0e69427f8cda..e137af5559e0 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -79,9 +79,9 @@ static int connmark_tg_check(const struct xt_tgchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
@@ -108,9 +108,9 @@ static int connmark_mt_check(const struct xt_mtchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 500e0338a187..26e34aa7f8d1 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -211,9 +211,9 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index da8c301d24ea..f54699ca5609 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -128,12 +128,12 @@ static int dccp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_dccp_info *info = par->matchinfo;
 
 	if (info->flags & ~XT_DCCP_VALID_FLAGS)
-		return false;
+		return -EINVAL;
 	if (info->invflags & ~XT_DCCP_VALID_FLAGS)
-		return false;
+		return -EINVAL;
 	if (info->invflags & ~info->flags)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_match dccp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 295da4ce822c..f355fb9e06fa 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -48,10 +48,10 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->dscp > XT_DSCP_MAX) {
 		pr_info("dscp %x out of range\n", info->dscp);
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 9f5da9795674..143bfdc8e38f 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -66,10 +66,10 @@ static int esp_mt_check(const struct xt_mtchk_param *par)
 
 	if (espinfo->invflags & ~XT_ESP_INV_MASK) {
 		pr_debug("unknown flags %X\n", espinfo->invflags);
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match esp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index d13800c95930..0c0152902b3b 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -681,30 +681,29 @@ static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	    user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
 		pr_info("overflow, try lower: %u/%u\n",
 			r->cfg.avg, r->cfg.burst);
-		return false;
+		return -EINVAL;
 	}
 	if (r->cfg.mode == 0 ||
 	    r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
 			   XT_HASHLIMIT_HASH_DIP |
 			   XT_HASHLIMIT_HASH_SIP |
 			   XT_HASHLIMIT_HASH_SPT))
-		return false;
+		return -EINVAL;
 	if (!r->cfg.gc_interval)
-		return false;
+		return -EINVAL;
 	if (!r->cfg.expire)
-		return false;
+		return -EINVAL;
 	if (r->name[sizeof(r->name) - 1] != '\0')
-		return false;
+		return -EINVAL;
 
 	mutex_lock(&hashlimit_mutex);
 	r->hinfo = htable_find_get(net, r->name, par->family);
 	if (!r->hinfo && htable_create_v0(net, r, par->family) != 0) {
 		mutex_unlock(&hashlimit_mutex);
-		return false;
+		return -EINVAL;
 	}
 	mutex_unlock(&hashlimit_mutex);
-
-	return true;
+	return 0;
 }
 
 static int hashlimit_mt_check(const struct xt_mtchk_param *par)
@@ -718,28 +717,28 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 	    user2credits(info->cfg.avg)) {
 		pr_info("overflow, try lower: %u/%u\n",
 			info->cfg.avg, info->cfg.burst);
-		return false;
+		return -EINVAL;
 	}
 	if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
-		return false;
+		return -EINVAL;
 	if (info->name[sizeof(info->name)-1] != '\0')
-		return false;
+		return -EINVAL;
 	if (par->family == NFPROTO_IPV4) {
 		if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
-			return false;
+			return -EINVAL;
 	} else {
 		if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128)
-			return false;
+			return -EINVAL;
 	}
 
 	mutex_lock(&hashlimit_mutex);
 	info->hinfo = htable_find_get(net, info->name, par->family);
 	if (!info->hinfo && htable_create(net, info, par->family) != 0) {
 		mutex_unlock(&hashlimit_mutex);
-		return false;
+		return -EINVAL;
 	}
 	mutex_unlock(&hashlimit_mutex);
-	return true;
+	return 0;
 }
 
 static void
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 6e177b279f90..eb308b32bfe0 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -61,10 +61,10 @@ static int helper_mt_check(const struct xt_mtchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
 	info->name[29] = '\0';
-	return true;
+	return 0;
 }
 
 static void helper_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 138a324df8df..5ff0580ce878 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -107,12 +107,12 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
 		pr_info("Overflow, try lower: %u/%u\n",
 			r->avg, r->burst);
-		return false;
+		return -EINVAL;
 	}
 
 	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
 	if (priv == NULL)
-		return false;
+		return -EINVAL;
 
 	/* For SMP, we only want to use one set of state. */
 	r->master = priv;
@@ -124,7 +124,7 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
 		r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */
 		r->cost = user2credits(r->avg);
 	}
-	return true;
+	return 0;
 }
 
 static void limit_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 850e412c83ef..d0bdf3dd4d25 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -89,7 +89,7 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
 
 	if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
 	    info->bitmask & ~XT_PHYSDEV_OP_MASK)
-		return false;
+		return -EINVAL;
 	if (info->bitmask & XT_PHYSDEV_OP_OUT &&
 	    (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
 	     info->invert & XT_PHYSDEV_OP_BRIDGED) &&
@@ -99,9 +99,9 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
 			"POSTROUTING chains for non-bridged traffic is not "
 			"supported anymore.\n");
 		if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
-			return false;
+			return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match physdev_mt_reg __read_mostly = {
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index c9965b640b16..1fa239c1fb93 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -134,23 +134,23 @@ static int policy_mt_check(const struct xt_mtchk_param *par)
 
 	if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) {
 		pr_info("neither incoming nor outgoing policy selected\n");
-		return false;
+		return -EINVAL;
 	}
 	if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
 	    (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) {
 		pr_info("output policy not valid in PREROUTING and INPUT\n");
-		return false;
+		return -EINVAL;
 	}
 	if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
 	    (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) {
 		pr_info("input policy not valid in POSTROUTING and OUTPUT\n");
-		return false;
+		return -EINVAL;
 	}
 	if (info->len > XT_POLICY_MAX_ELEM) {
 		pr_info("too many policy elements\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_match policy_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 2861fac5f2e1..766e71c6dc55 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -48,14 +48,14 @@ static int quota_mt_check(const struct xt_mtchk_param *par)
 	struct xt_quota_info *q = par->matchinfo;
 
 	if (q->flags & ~XT_QUOTA_MASK)
-		return false;
+		return -EINVAL;
 
 	q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
 	if (q->master == NULL)
-		return false;
+		return -EINVAL;
 
 	q->master->quota = q->quota;
-	return true;
+	return 0;
 }
 
 static void quota_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 3b5e3d613b18..0b5c6122737d 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -109,12 +109,12 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 
 	info->est1 = est1;
 	info->est2 = est2;
-	return true;
+	return 0;
 
 err2:
 	xt_rateest_put(est1);
 err1:
-	return false;
+	return -EINVAL;
 }
 
 static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 52042c8bf7f2..0994ff54a731 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -314,7 +314,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
 	struct proc_dir_entry *pde;
 #endif
 	unsigned i;
-	bool ret = false;
+	int ret = -EINVAL;
 
 	if (unlikely(!hash_rnd_inited)) {
 		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
@@ -323,33 +323,33 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
 	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
 		pr_info("Unsupported user space flags (%08x)\n",
 			info->check_set);
-		return false;
+		return -EINVAL;
 	}
 	if (hweight8(info->check_set &
 		     (XT_RECENT_SET | XT_RECENT_REMOVE |
 		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
-		return false;
+		return -EINVAL;
 	if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
 	    (info->seconds || info->hit_count ||
 	    (info->check_set & XT_RECENT_MODIFIERS)))
-		return false;
+		return -EINVAL;
 	if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
-		return false;
+		return -EINVAL;
 	if (info->hit_count > ip_pkt_list_tot) {
 		pr_info("hitcount (%u) is larger than "
 			"packets to be remembered (%u)\n",
 			info->hit_count, ip_pkt_list_tot);
-		return false;
+		return -EINVAL;
 	}
 	if (info->name[0] == '\0' ||
 	    strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN)
-		return false;
+		return -EINVAL;
 
 	mutex_lock(&recent_mutex);
 	t = recent_table_lookup(recent_net, info->name);
 	if (t != NULL) {
 		t->refcnt++;
-		ret = true;
+		ret = 0;
 		goto out;
 	}
 
@@ -375,7 +375,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
 	spin_lock_bh(&recent_lock);
 	list_add_tail(&t->list, &recent_net->tables);
 	spin_unlock_bh(&recent_lock);
-	ret = true;
+	ret = 0;
 out:
 	mutex_unlock(&recent_mutex);
 	return ret;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 5037a7a0059c..c3694df54672 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -149,17 +149,17 @@ static int sctp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_sctp_info *info = par->matchinfo;
 
 	if (info->flags & ~XT_SCTP_VALID_FLAGS)
-		return false;
+		return -EINVAL;
 	if (info->invflags & ~XT_SCTP_VALID_FLAGS)
-		return false;
+		return -EINVAL;
 	if (info->invflags & ~info->flags)
-		return false;
+		return -EINVAL;
 	if (!(info->flags & XT_SCTP_CHUNK_TYPES))
-		return true;
+		return 0;
 	if (info->chunk_match_type & (SCTP_CHUNK_MATCH_ALL |
 	    SCTP_CHUNK_MATCH_ANY | SCTP_CHUNK_MATCH_ONLY))
-		return true;
-	return false;
+		return 0;
+	return -EINVAL;
 }
 
 static struct xt_match sctp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 8b15b1317f1f..8e8c9df51784 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -42,9 +42,9 @@ static int state_mt_check(const struct xt_mtchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static void state_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index a577ab008f57..29d76f8f1880 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -58,14 +58,14 @@ static int statistic_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->mode > XT_STATISTIC_MODE_MAX ||
 	    info->flags & ~XT_STATISTIC_MASK)
-		return false;
+		return -EINVAL;
 
 	info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
 	if (info->master == NULL)
-		return false;
+		return -EINVAL;
 	info->master->count = info->u.nth.count;
 
-	return true;
+	return 0;
 }
 
 static void statistic_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index 7d1412154e27..e1f22a7a4152 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -48,26 +48,25 @@ static int string_mt_check(const struct xt_mtchk_param *par)
 
 	/* Damn, can't handle this case properly with iptables... */
 	if (conf->from_offset > conf->to_offset)
-		return false;
+		return -EINVAL;
 	if (conf->algo[XT_STRING_MAX_ALGO_NAME_SIZE - 1] != '\0')
-		return false;
+		return -EINVAL;
 	if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
-		return false;
+		return -EINVAL;
 	if (par->match->revision == 1) {
 		if (conf->u.v1.flags &
 		    ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
-			return false;
+			return -EINVAL;
 		if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE)
 			flags |= TS_IGNORECASE;
 	}
 	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
 				     GFP_KERNEL, flags);
 	if (IS_ERR(ts_conf))
-		return false;
+		return -EINVAL;
 
 	conf->config = ts_conf;
-
-	return true;
+	return 0;
 }
 
 static void string_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 00728410099f..efa2ede24ae6 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -125,7 +125,7 @@ static int tcp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_tcp *tcpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
-	return !(tcpinfo->invflags & ~XT_TCP_INV_MASK);
+	return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0;
 }
 
 static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
@@ -160,7 +160,7 @@ static int udp_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_udp *udpinfo = par->matchinfo;
 
 	/* Must specify no unknown invflags */
-	return !(udpinfo->invflags & ~XT_UDP_INV_MASK);
+	return (udpinfo->invflags & ~XT_UDP_INV_MASK) ? -EINVAL : 0;
 }
 
 static struct xt_match tcpudp_mt_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index db74f4fd57df..8dde5e51ff19 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -225,10 +225,10 @@ static int time_mt_check(const struct xt_mtchk_param *par)
 	    info->daytime_stop > XT_TIME_MAX_DAYTIME) {
 		pr_info("invalid argument - start or "
 			"stop time greater than 23:59:59\n");
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 static struct xt_match xt_time_mt_reg __read_mostly = {
-- 
cgit v1.2.3-59-g8ed1b


From d6b00a5345ce4e86e8b00a88bb84a2c0c1f69ddc Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 25 Mar 2010 16:34:45 +0100
Subject: netfilter: xtables: change targets to return error code

Part of the transition of done by this semantic patch:
// <smpl>
@ rule1 @
struct xt_target ops;
identifier check;
@@
 ops.checkentry = check;

@@
identifier rule1.check;
@@
 check(...) { <...
-return true;
+return 0;
 ...> }

@@
identifier rule1.check;
@@
 check(...) { <...
-return false;
+return -EINVAL;
 ...> }
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/bridge/netfilter/ebt_arpreply.c |  6 +++---
 net/bridge/netfilter/ebt_dnat.c     |  8 ++++----
 net/bridge/netfilter/ebt_log.c      |  6 +++---
 net/bridge/netfilter/ebt_mark.c     |  8 ++++----
 net/bridge/netfilter/ebt_nflog.c    |  4 ++--
 net/bridge/netfilter/ebt_redirect.c |  8 ++++----
 net/bridge/netfilter/ebt_snat.c     |  8 ++++----
 net/bridge/netfilter/ebt_ulog.c     |  4 ++--
 net/ipv4/netfilter/ipt_CLUSTERIP.c  | 16 ++++++++--------
 net/ipv4/netfilter/ipt_ECN.c        |  8 ++++----
 net/ipv4/netfilter/ipt_LOG.c        |  6 +++---
 net/ipv4/netfilter/ipt_MASQUERADE.c |  6 +++---
 net/ipv4/netfilter/ipt_NETMAP.c     |  6 +++---
 net/ipv4/netfilter/ipt_REDIRECT.c   |  6 +++---
 net/ipv4/netfilter/ipt_REJECT.c     |  6 +++---
 net/ipv4/netfilter/ipt_ULOG.c       |  6 +++---
 net/ipv4/netfilter/nf_nat_rule.c    |  8 ++++----
 net/ipv6/netfilter/ip6t_LOG.c       |  6 +++---
 net/ipv6/netfilter/ip6t_REJECT.c    |  6 +++---
 net/netfilter/x_tables.c            | 12 ++++++++++--
 net/netfilter/xt_CONNSECMARK.c      |  6 +++---
 net/netfilter/xt_CT.c               |  6 +++---
 net/netfilter/xt_DSCP.c             |  4 ++--
 net/netfilter/xt_HL.c               | 10 +++++-----
 net/netfilter/xt_LED.c              | 10 ++++------
 net/netfilter/xt_NFLOG.c            |  6 +++---
 net/netfilter/xt_NFQUEUE.c          |  6 +++---
 net/netfilter/xt_RATEEST.c          |  9 ++++-----
 net/netfilter/xt_SECMARK.c          | 10 +++++-----
 net/netfilter/xt_TCPMSS.c           | 12 ++++++------
 net/netfilter/xt_TPROXY.c           |  4 ++--
 31 files changed, 116 insertions(+), 111 deletions(-)

(limited to 'net')

diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 2491564e9e08..4581adb27583 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -63,11 +63,11 @@ static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 	const struct ebt_entry *e = par->entryinfo;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
-		return false;
+		return -EINVAL;
 	if (e->ethproto != htons(ETH_P_ARP) ||
 	    e->invflags & EBT_IPROTO)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target ebt_arpreply_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 5fddebea45c2..59d5b7c8a557 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -32,7 +32,7 @@ static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
 	unsigned int hook_mask;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
-		return false;
+		return -EINVAL;
 
 	hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS);
 	if ((strcmp(par->table, "nat") != 0 ||
@@ -40,10 +40,10 @@ static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
 	    (1 << NF_BR_LOCAL_OUT)))) &&
 	    (strcmp(par->table, "broute") != 0 ||
 	    hook_mask & ~(1 << NF_BR_BROUTING)))
-		return false;
+		return -EINVAL;
 	if (INVALID_TARGET)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target ebt_dnat_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index a0aeac6176ee..c46024156539 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -29,11 +29,11 @@ static int ebt_log_tg_check(const struct xt_tgchk_param *par)
 	struct ebt_log_info *info = par->targinfo;
 
 	if (info->bitmask & ~EBT_LOG_MASK)
-		return false;
+		return -EINVAL;
 	if (info->loglevel >= 8)
-		return false;
+		return -EINVAL;
 	info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0';
-	return true;
+	return 0;
 }
 
 struct tcpudphdr
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index dd94dafa6155..126e536ff8f4 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -43,14 +43,14 @@ static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
 
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
-		return false;
+		return -EINVAL;
 	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
-		return false;
+		return -EINVAL;
 	tmp = info->target & ~EBT_VERDICT_BITS;
 	if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
 	    tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 #ifdef CONFIG_COMPAT
 struct compat_ebt_mark_t_info {
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 1f2b7bbdde73..22e2ad5f23e8 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -40,9 +40,9 @@ static int ebt_nflog_tg_check(const struct xt_tgchk_param *par)
 	struct ebt_nflog_info *info = par->targinfo;
 
 	if (info->flags & ~EBT_NFLOG_MASK)
-		return false;
+		return -EINVAL;
 	info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0';
-	return true;
+	return 0;
 }
 
 static struct xt_target ebt_nflog_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 73c4d3ac6f2e..a6044a6f2383 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -38,17 +38,17 @@ static int ebt_redirect_tg_check(const struct xt_tgchk_param *par)
 	unsigned int hook_mask;
 
 	if (BASE_CHAIN && info->target == EBT_RETURN)
-		return false;
+		return -EINVAL;
 
 	hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS);
 	if ((strcmp(par->table, "nat") != 0 ||
 	    hook_mask & ~(1 << NF_BR_PRE_ROUTING)) &&
 	    (strcmp(par->table, "broute") != 0 ||
 	    hook_mask & ~(1 << NF_BR_BROUTING)))
-		return false;
+		return -EINVAL;
 	if (INVALID_TARGET)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target ebt_redirect_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 94bcecd90d74..79caca34ae2b 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -49,14 +49,14 @@ static int ebt_snat_tg_check(const struct xt_tgchk_param *par)
 
 	tmp = info->target | ~EBT_VERDICT_BITS;
 	if (BASE_CHAIN && tmp == EBT_RETURN)
-		return false;
+		return -EINVAL;
 
 	if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
-		return false;
+		return -EINVAL;
 	tmp = info->target | EBT_VERDICT_BITS;
 	if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target ebt_snat_tg_reg __read_mostly = {
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index f554bc2515d6..f77b42d8e87d 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -254,14 +254,14 @@ static int ebt_ulog_tg_check(const struct xt_tgchk_param *par)
 	struct ebt_ulog_info *uloginfo = par->targinfo;
 
 	if (uloginfo->nlgroup > 31)
-		return false;
+		return -EINVAL;
 
 	uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0';
 
 	if (uloginfo->qthreshold > EBT_ULOG_MAX_QLEN)
 		uloginfo->qthreshold = EBT_ULOG_MAX_QLEN;
 
-	return true;
+	return 0;
 }
 
 static struct xt_target ebt_ulog_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 290a7b9b393e..1302de2ae0ae 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -358,13 +358,13 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
 		pr_info("unknown mode %u\n", cipinfo->hash_mode);
-		return false;
+		return -EINVAL;
 
 	}
 	if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
 	    e->ip.dst.s_addr == 0) {
 		pr_info("Please specify destination IP\n");
-		return false;
+		return -EINVAL;
 	}
 
 	/* FIXME: further sanity checks */
@@ -374,20 +374,20 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 		if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
 			pr_info("no config found for %pI4, need 'new'\n",
 				&e->ip.dst.s_addr);
-			return false;
+			return -EINVAL;
 		} else {
 			struct net_device *dev;
 
 			if (e->ip.iniface[0] == '\0') {
 				pr_info("Please specify an interface name\n");
-				return false;
+				return -EINVAL;
 			}
 
 			dev = dev_get_by_name(&init_net, e->ip.iniface);
 			if (!dev) {
 				pr_info("no such interface %s\n",
 					e->ip.iniface);
-				return false;
+				return -EINVAL;
 			}
 
 			config = clusterip_config_init(cipinfo,
@@ -395,7 +395,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 			if (!config) {
 				pr_info("cannot allocate config\n");
 				dev_put(dev);
-				return false;
+				return -EINVAL;
 			}
 			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
 		}
@@ -405,10 +405,10 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
 
-	return true;
+	return 0;
 }
 
 /* drop reference count of cluster config when rule is deleted */
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 9d96500a4157..563049f31aef 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -100,18 +100,18 @@ static int ecn_tg_check(const struct xt_tgchk_param *par)
 
 	if (einfo->operation & IPT_ECN_OP_MASK) {
 		pr_info("unsupported ECN operation %x\n", einfo->operation);
-		return false;
+		return -EINVAL;
 	}
 	if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
 		pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
-		return false;
+		return -EINVAL;
 	}
 	if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
 	    (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
 		pr_info("cannot use TCP operations on a non-tcp rule\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target ecn_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index c9ee5c40d1bb..a6a454b25502 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -445,13 +445,13 @@ static int log_tg_check(const struct xt_tgchk_param *par)
 
 	if (loginfo->level >= 8) {
 		pr_debug("level %u >= 8\n", loginfo->level);
-		return false;
+		return -EINVAL;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
 		pr_debug("prefix is not null-terminated\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target log_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5a182f6de5d5..02b1bc477998 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -34,13 +34,13 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		pr_debug("bad MAP_IPS.\n");
-		return false;
+		return -EINVAL;
 	}
 	if (mr->rangesize != 1) {
 		pr_debug("bad rangesize %u\n", mr->rangesize);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index cbfe5f7e082a..708c7f8f7eea 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -28,13 +28,13 @@ static int netmap_tg_check(const struct xt_tgchk_param *par)
 
 	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
 		pr_debug("bad MAP_IPS.\n");
-		return false;
+		return -EINVAL;
 	}
 	if (mr->rangesize != 1) {
 		pr_debug("bad rangesize %u.\n", mr->rangesize);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index f8daec20fb04..3cf101916523 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -32,13 +32,13 @@ static int redirect_tg_check(const struct xt_tgchk_param *par)
 
 	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
 		pr_debug("bad MAP_IPS.\n");
-		return false;
+		return -EINVAL;
 	}
 	if (mr->rangesize != 1) {
 		pr_debug("bad rangesize %u.\n", mr->rangesize);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static unsigned int
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index cf76f1bc3f10..b026014e7a5b 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -181,16 +181,16 @@ static int reject_tg_check(const struct xt_tgchk_param *par)
 
 	if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
 		pr_info("ECHOREPLY no longer supported.\n");
-		return false;
+		return -EINVAL;
 	} else if (rejinfo->with == IPT_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ip.proto != IPPROTO_TCP ||
 		    (e->ip.invflags & XT_INV_PROTO)) {
 			pr_info("TCP_RESET invalid for non-tcp\n");
-			return false;
+			return -EINVAL;
 		}
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target reject_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 7f73bbe2193c..04c86dc5d538 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -313,14 +313,14 @@ static int ulog_tg_check(const struct xt_tgchk_param *par)
 
 	if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
 		pr_debug("prefix not null-terminated\n");
-		return false;
+		return -EINVAL;
 	}
 	if (loginfo->qthreshold > ULOG_MAX_QLEN) {
 		pr_debug("queue threshold %Zu > MAX_QLEN\n",
 			 loginfo->qthreshold);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 117226708738..b66137c80bc7 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -81,9 +81,9 @@ static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
 		pr_info("SNAT: multiple ranges no longer supported\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
@@ -93,9 +93,9 @@ static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
 		pr_info("DNAT: multiple ranges no longer supported\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 unsigned int
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index bcc3fc19374a..439ededd5300 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -457,13 +457,13 @@ static int log_tg6_check(const struct xt_tgchk_param *par)
 
 	if (loginfo->level >= 8) {
 		pr_debug("level %u >= 8\n", loginfo->level);
-		return false;
+		return -EINVAL;
 	}
 	if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
 		pr_debug("prefix not null-terminated\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target log_tg6_reg __read_mostly = {
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 8d5141ece671..55b9b2da1340 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -220,16 +220,16 @@ static int reject_tg6_check(const struct xt_tgchk_param *par)
 
 	if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) {
 		pr_info("ECHOREPLY is not supported.\n");
-		return false;
+		return -EINVAL;
 	} else if (rejinfo->with == IP6T_TCP_RESET) {
 		/* Must specify that it's a TCP packet */
 		if (e->ipv6.proto != IPPROTO_TCP ||
 		    (e->ipv6.invflags & XT_INV_PROTO)) {
 			pr_info("TCP_RESET illegal for non-tcp\n");
-			return false;
+			return -EINVAL;
 		}
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target reject_tg6_reg __read_mostly = {
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 7ee177746172..8e23d8f68459 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -528,6 +528,8 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
 int xt_check_target(struct xt_tgchk_param *par,
 		    unsigned int size, u_int8_t proto, bool inv_proto)
 {
+	int ret;
+
 	if (XT_ALIGN(par->target->targetsize) != size) {
 		pr_err("%s_tables: %s.%u target: invalid size "
 		       "%u (kernel) != (user) %u\n",
@@ -559,8 +561,14 @@ int xt_check_target(struct xt_tgchk_param *par,
 		       par->target->proto);
 		return -EINVAL;
 	}
-	if (par->target->checkentry != NULL && !par->target->checkentry(par))
-		return -EINVAL;
+	if (par->target->checkentry != NULL) {
+		ret = par->target->checkentry(par);
+		if (ret < 0)
+			return ret;
+		else if (ret > 0)
+			/* Flag up potential errors. */
+			return -EIO;
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(xt_check_target);
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 3f9d0f4f852d..2287a82a0703 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -92,7 +92,7 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 	    strcmp(par->table, "security") != 0) {
 		pr_info("target only valid in the \'mangle\' "
 			"or \'security\' tables, not \'%s\'.\n", par->table);
-		return false;
+		return -EINVAL;
 	}
 
 	switch (info->mode) {
@@ -108,9 +108,9 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index c1553bf06cf6..ee566e2e4534 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -62,7 +62,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	u8 proto;
 
 	if (info->flags & ~XT_CT_NOTRACK)
-		return false;
+		return -EINVAL;
 
 	if (info->flags & XT_CT_NOTRACK) {
 		ct = &nf_conntrack_untracked;
@@ -108,14 +108,14 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	__set_bit(IPS_CONFIRMED_BIT, &ct->status);
 out:
 	info->ct = ct;
-	return true;
+	return 0;
 
 err3:
 	nf_conntrack_free(ct);
 err2:
 	nf_ct_l3proto_module_put(par->family);
 err1:
-	return false;
+	return -EINVAL;
 }
 
 static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 1fa7b67bf225..aa263b80f8c0 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -66,9 +66,9 @@ static int dscp_tg_check(const struct xt_tgchk_param *par)
 
 	if (info->dscp > XT_DSCP_MAX) {
 		pr_info("dscp %x out of range\n", info->dscp);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static unsigned int
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 15ba16108182..7a47383ec723 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -110,8 +110,8 @@ static int ttl_tg_check(const struct xt_tgchk_param *par)
 		return false;
 	}
 	if (info->mode != IPT_TTL_SET && info->ttl == 0)
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static int hl_tg6_check(const struct xt_tgchk_param *par)
@@ -120,14 +120,14 @@ static int hl_tg6_check(const struct xt_tgchk_param *par)
 
 	if (info->mode > IP6T_HL_MAXMODE) {
 		pr_info("invalid or unknown mode %u\n", info->mode);
-		return false;
+		return -EINVAL;
 	}
 	if (info->mode != IP6T_HL_SET && info->hop_limit == 0) {
 		pr_info("increment/decrement does not "
 			"make sense with value 0\n");
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target hl_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 1a3e3dd5a774..22b5b7057397 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -88,12 +88,12 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 
 	if (ledinfo->id[0] == '\0') {
 		pr_info("No 'id' parameter given.\n");
-		return false;
+		return -EINVAL;
 	}
 
 	ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
 	if (!ledinternal)
-		return false;
+		return -EINVAL;
 
 	ledinternal->netfilter_led_trigger.name = ledinfo->id;
 
@@ -111,13 +111,11 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 			    (unsigned long)ledinfo);
 
 	ledinfo->internal_data = ledinternal;
-
-	return true;
+	return 0;
 
 exit_alloc:
 	kfree(ledinternal);
-
-	return false;
+	return -EINVAL;
 }
 
 static void led_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 13e6c0002c8a..42dd8747b421 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -42,10 +42,10 @@ static int nflog_tg_check(const struct xt_tgchk_param *par)
 	const struct xt_nflog_info *info = par->targinfo;
 
 	if (info->flags & ~XT_NFLOG_MASK)
-		return false;
+		return -EINVAL;
 	if (info->prefix[sizeof(info->prefix) - 1] != '\0')
-		return false;
-	return true;
+		return -EINVAL;
+	return 0;
 }
 
 static struct xt_target nflog_tg_reg __read_mostly = {
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index d435579a64ca..add1789ae4a8 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -92,15 +92,15 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 	}
 	if (info->queues_total == 0) {
 		pr_err("NFQUEUE: number of total queues is 0\n");
-		return false;
+		return -EINVAL;
 	}
 	maxid = info->queues_total - 1 + info->queuenum;
 	if (maxid > 0xffff) {
 		pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
 		       info->queues_total, maxid);
-		return false;
+		return -EINVAL;
 	}
-	return true;
+	return 0;
 }
 
 static struct xt_target nfqueue_tg_reg[] __read_mostly = {
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 9743e50be8ef..7af5fba39cdd 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -109,10 +109,10 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 		    (info->interval != est->params.interval ||
 		     info->ewma_log != est->params.ewma_log)) {
 			xt_rateest_put(est);
-			return false;
+			return -EINVAL;
 		}
 		info->est = est;
-		return true;
+		return 0;
 	}
 
 	est = kzalloc(sizeof(*est), GFP_KERNEL);
@@ -136,13 +136,12 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 
 	info->est = est;
 	xt_rateest_hash_insert(est);
-
-	return true;
+	return 0;
 
 err2:
 	kfree(est);
 err1:
-	return false;
+	return -EINVAL;
 }
 
 static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 48f8e4f7ea8a..39098fc9887d 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -88,29 +88,29 @@ static int secmark_tg_check(const struct xt_tgchk_param *par)
 	    strcmp(par->table, "security") != 0) {
 		pr_info("target only valid in the \'mangle\' "
 			"or \'security\' tables, not \'%s\'.\n", par->table);
-		return false;
+		return -EINVAL;
 	}
 
 	if (mode && mode != info->mode) {
 		pr_info("mode already set to %hu cannot mix with "
 			"rules for mode %hu\n", mode, info->mode);
-		return false;
+		return -EINVAL;
 	}
 
 	switch (info->mode) {
 	case SECMARK_MODE_SEL:
 		if (!checkentry_selinux(info))
-			return false;
+			return -EINVAL;
 		break;
 
 	default:
 		pr_info("invalid mode: %hu\n", info->mode);
-		return false;
+		return -EINVAL;
 	}
 
 	if (!mode)
 		mode = info->mode;
-	return true;
+	return 0;
 }
 
 static void secmark_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 70288dc31583..385677b963d5 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -246,13 +246,13 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
 		pr_info("path-MTU clamping only supported in "
 			"FORWARD, OUTPUT and POSTROUTING hooks\n");
-		return false;
+		return -EINVAL;
 	}
 	xt_ematch_foreach(ematch, e)
 		if (find_syn_match(ematch))
-			return true;
+			return 0;
 	pr_info("Only works on TCP SYN packets\n");
-	return false;
+	return -EINVAL;
 }
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
@@ -268,13 +268,13 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
 			   (1 << NF_INET_POST_ROUTING))) != 0) {
 		pr_info("path-MTU clamping only supported in "
 			"FORWARD, OUTPUT and POSTROUTING hooks\n");
-		return false;
+		return -EINVAL;
 	}
 	xt_ematch_foreach(ematch, e)
 		if (find_syn_match(ematch))
-			return true;
+			return 0;
 	pr_info("Only works on TCP SYN packets\n");
-	return false;
+	return -EINVAL;
 }
 #endif
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 189df9af4de6..4f246ddc5c48 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -65,11 +65,11 @@ static int tproxy_tg_check(const struct xt_tgchk_param *par)
 
 	if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP)
 	    && !(i->invflags & IPT_INV_PROTO))
-		return true;
+		return 0;
 
 	pr_info("Can be used only in combination with "
 		"either -p tcp or -p udp\n");
-	return false;
+	return -EINVAL;
 }
 
 static struct xt_target tproxy_tg_reg __read_mostly = {
-- 
cgit v1.2.3-59-g8ed1b


From 4a5a5c73b7cfee46a0b1411903cfa0dea532deec Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 17:32:59 +0100
Subject: netfilter: xtables: slightly better error reporting

When extended status codes are available, such as ENOMEM on failed
allocations, or subsequent functions (e.g. nf_ct_get_l3proto), passing
them up to userspace seems like a good idea compared to just always
EINVAL.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 11 ++++++-----
 net/netfilter/xt_CONNSECMARK.c     |  8 +++++---
 net/netfilter/xt_CT.c              | 11 +++++++++--
 net/netfilter/xt_DSCP.c            |  2 +-
 net/netfilter/xt_HL.c              |  2 +-
 net/netfilter/xt_LED.c             |  4 ++--
 net/netfilter/xt_NFQUEUE.c         |  2 +-
 net/netfilter/xt_RATEEST.c         |  9 ++++++---
 net/netfilter/xt_SECMARK.c         | 16 +++++++++-------
 net/netfilter/xt_cluster.c         |  2 +-
 net/netfilter/xt_connbytes.c       |  6 ++++--
 net/netfilter/xt_connlimit.c       |  8 +++++---
 net/netfilter/xt_connmark.c        | 14 ++++++++++----
 net/netfilter/xt_conntrack.c       |  7 +++++--
 net/netfilter/xt_dscp.c            |  2 +-
 net/netfilter/xt_hashlimit.c       | 32 ++++++++++++++++++++------------
 net/netfilter/xt_helper.c          |  6 ++++--
 net/netfilter/xt_limit.c           |  4 ++--
 net/netfilter/xt_quota.c           |  2 +-
 net/netfilter/xt_rateest.c         |  2 ++
 net/netfilter/xt_recent.c          |  5 ++++-
 net/netfilter/xt_state.c           |  7 +++++--
 net/netfilter/xt_statistic.c       |  2 +-
 net/netfilter/xt_string.c          |  2 +-
 net/netfilter/xt_time.c            |  2 +-
 25 files changed, 107 insertions(+), 61 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1302de2ae0ae..1faf5fa06ac8 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -351,8 +351,8 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
-
 	struct clusterip_config *config;
+	int ret;
 
 	if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
 	    cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
@@ -387,7 +387,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 			if (!dev) {
 				pr_info("no such interface %s\n",
 					e->ip.iniface);
-				return -EINVAL;
+				return -ENOENT;
 			}
 
 			config = clusterip_config_init(cipinfo,
@@ -395,17 +395,18 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 			if (!config) {
 				pr_info("cannot allocate config\n");
 				dev_put(dev);
-				return -EINVAL;
+				return -ENOMEM;
 			}
 			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
 		}
 	}
 	cipinfo->config = config;
 
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 
 	return 0;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 2287a82a0703..105a62e985d3 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -87,6 +87,7 @@ connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_connsecmark_target_info *info = par->targinfo;
+	int ret;
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "security") != 0) {
@@ -102,13 +103,14 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 
 	default:
 		pr_info("invalid mode: %hu\n", info->mode);
-		return false;
+		return -EINVAL;
 	}
 
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index ee566e2e4534..65dd348ae361 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -59,6 +59,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	struct nf_conntrack_tuple t;
 	struct nf_conn_help *help;
 	struct nf_conn *ct;
+	int ret = 0;
 	u8 proto;
 
 	if (info->flags & ~XT_CT_NOTRACK)
@@ -75,28 +76,34 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 		goto err1;
 #endif
 
-	if (nf_ct_l3proto_try_module_get(par->family) < 0)
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0)
 		goto err1;
 
 	memset(&t, 0, sizeof(t));
 	ct = nf_conntrack_alloc(par->net, info->zone, &t, &t, GFP_KERNEL);
+	ret = PTR_ERR(ct);
 	if (IS_ERR(ct))
 		goto err2;
 
+	ret = 0;
 	if ((info->ct_events || info->exp_events) &&
 	    !nf_ct_ecache_ext_add(ct, info->ct_events, info->exp_events,
 				  GFP_KERNEL))
 		goto err3;
 
 	if (info->helper[0]) {
+		ret = -ENOENT;
 		proto = xt_ct_find_proto(par);
 		if (!proto)
 			goto err3;
 
+		ret = -ENOMEM;
 		help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
 		if (help == NULL)
 			goto err3;
 
+		ret = -ENOENT;
 		help->helper = nf_conntrack_helper_try_module_get(info->helper,
 								  par->family,
 								  proto);
@@ -115,7 +122,7 @@ err3:
 err2:
 	nf_ct_l3proto_module_put(par->family);
 err1:
-	return -EINVAL;
+	return ret;
 }
 
 static void xt_ct_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index aa263b80f8c0..969634f293e5 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -66,7 +66,7 @@ static int dscp_tg_check(const struct xt_tgchk_param *par)
 
 	if (info->dscp > XT_DSCP_MAX) {
 		pr_info("dscp %x out of range\n", info->dscp);
-		return -EINVAL;
+		return -EDOM;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 7a47383ec723..77b99f732711 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -107,7 +107,7 @@ static int ttl_tg_check(const struct xt_tgchk_param *par)
 
 	if (info->mode > IPT_TTL_MAXMODE) {
 		pr_info("TTL: invalid or unknown mode %u\n", info->mode);
-		return false;
+		return -EINVAL;
 	}
 	if (info->mode != IPT_TTL_SET && info->ttl == 0)
 		return -EINVAL;
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 22b5b7057397..efcf56db23e8 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -93,7 +93,7 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 
 	ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
 	if (!ledinternal)
-		return -EINVAL;
+		return -ENOMEM;
 
 	ledinternal->netfilter_led_trigger.name = ledinfo->id;
 
@@ -115,7 +115,7 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 
 exit_alloc:
 	kfree(ledinternal);
-	return -EINVAL;
+	return err;
 }
 
 static void led_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index add1789ae4a8..f9217cb56fe3 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -98,7 +98,7 @@ static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 	if (maxid > 0xffff) {
 		pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n",
 		       info->queues_total, maxid);
-		return -EINVAL;
+		return -ERANGE;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 7af5fba39cdd..40751c618e70 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -93,6 +93,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 		struct nlattr		opt;
 		struct gnet_estimator	est;
 	} cfg;
+	int ret;
 
 	if (unlikely(!rnd_inited)) {
 		get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
@@ -115,6 +116,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 		return 0;
 	}
 
+	ret = -ENOMEM;
 	est = kzalloc(sizeof(*est), GFP_KERNEL);
 	if (!est)
 		goto err1;
@@ -130,8 +132,9 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 	cfg.est.interval	= info->interval;
 	cfg.est.ewma_log	= info->ewma_log;
 
-	if (gen_new_estimator(&est->bstats, &est->rstats, &est->lock,
-			      &cfg.opt) < 0)
+	ret = gen_new_estimator(&est->bstats, &est->rstats,
+				&est->lock, &cfg.opt);
+	if (ret < 0)
 		goto err2;
 
 	info->est = est;
@@ -141,7 +144,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 err2:
 	kfree(est);
 err1:
-	return -EINVAL;
+	return ret;
 }
 
 static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 39098fc9887d..a91d4a7d5a2c 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -50,7 +50,7 @@ secmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool checkentry_selinux(struct xt_secmark_target_info *info)
+static int checkentry_selinux(struct xt_secmark_target_info *info)
 {
 	int err;
 	struct xt_secmark_target_selinux_info *sel = &info->u.sel;
@@ -62,27 +62,28 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
 		if (err == -EINVAL)
 			pr_info("invalid SELinux context \'%s\'\n",
 				sel->selctx);
-		return false;
+		return err;
 	}
 
 	if (!sel->selsid) {
 		pr_info("unable to map SELinux context \'%s\'\n", sel->selctx);
-		return false;
+		return -ENOENT;
 	}
 
 	err = selinux_secmark_relabel_packet_permission(sel->selsid);
 	if (err) {
 		pr_info("unable to obtain relabeling permission\n");
-		return false;
+		return err;
 	}
 
 	selinux_secmark_refcount_inc();
-	return true;
+	return 0;
 }
 
 static int secmark_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_secmark_target_info *info = par->targinfo;
+	int err;
 
 	if (strcmp(par->table, "mangle") != 0 &&
 	    strcmp(par->table, "security") != 0) {
@@ -99,8 +100,9 @@ static int secmark_tg_check(const struct xt_tgchk_param *par)
 
 	switch (info->mode) {
 	case SECMARK_MODE_SEL:
-		if (!checkentry_selinux(info))
-			return -EINVAL;
+		err = checkentry_selinux(info);
+		if (err <= 0)
+			return err;
 		break;
 
 	default:
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 30cb7762fc41..6c941e1c6b9e 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -145,7 +145,7 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 	if (info->node_mask >= (1ULL << info->total_nodes)) {
 		pr_info("this node mask cannot be "
 			"higher than the total number of nodes\n");
-		return -EINVAL;
+		return -EDOM;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index bf8e286361c3..2ff332ecc342 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -96,6 +96,7 @@ connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 static int connbytes_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
+	int ret;
 
 	if (sinfo->what != XT_CONNBYTES_PKTS &&
 	    sinfo->what != XT_CONNBYTES_BYTES &&
@@ -107,10 +108,11 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 	    sinfo->direction != XT_CONNBYTES_DIR_BOTH)
 		return -EINVAL;
 
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 
 	return 0;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 68e89f08140b..370088ec5764 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -220,22 +220,24 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_connlimit_info *info = par->matchinfo;
 	unsigned int i;
+	int ret;
 
 	if (unlikely(!connlimit_rnd_inited)) {
 		get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
 		connlimit_rnd_inited = true;
 	}
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for "
 			"address family %u\n", par->family);
-		return -EINVAL;
+		return ret;
 	}
 
 	/* init private data */
 	info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
 	if (info->data == NULL) {
 		nf_ct_l3proto_module_put(par->family);
-		return -EINVAL;
+		return -ENOMEM;
 	}
 
 	spin_lock_init(&info->data->lock);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index e137af5559e0..71e38a1fd656 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -76,10 +76,13 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 
 static int connmark_tg_check(const struct xt_tgchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	int ret;
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	return 0;
 }
@@ -105,10 +108,13 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static int connmark_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	int ret;
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 26e34aa7f8d1..e0bcf8d2cf33 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -208,10 +208,13 @@ conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static int conntrack_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	int ret;
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index f355fb9e06fa..9db51fddbdb8 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -48,7 +48,7 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 
 	if (info->dscp > XT_DSCP_MAX) {
 		pr_info("dscp %x out of range\n", info->dscp);
-		return -EINVAL;
+		return -EDOM;
 	}
 
 	return 0;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 0c0152902b3b..c89fde7d1234 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -214,7 +214,7 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
 			sizeof(struct list_head) * size);
 	if (!hinfo)
-		return -1;
+		return -ENOMEM;
 	minfo->hinfo = hinfo;
 
 	/* copy match config into hashtable config */
@@ -250,7 +250,7 @@ static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_
 		&dl_file_ops, hinfo);
 	if (!hinfo->pde) {
 		vfree(hinfo);
-		return -1;
+		return -ENOMEM;
 	}
 	hinfo->net = net;
 
@@ -285,7 +285,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
 	                sizeof(struct list_head) * size);
 	if (hinfo == NULL)
-		return -1;
+		return -ENOMEM;
 	minfo->hinfo = hinfo;
 
 	/* copy match config into hashtable config */
@@ -311,7 +311,7 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 		&dl_file_ops, hinfo);
 	if (hinfo->pde == NULL) {
 		vfree(hinfo);
-		return -1;
+		return -ENOMEM;
 	}
 	hinfo->net = net;
 
@@ -675,13 +675,14 @@ static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
 	struct xt_hashlimit_info *r = par->matchinfo;
+	int ret;
 
 	/* Check for overflow. */
 	if (r->cfg.burst == 0 ||
 	    user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
 		pr_info("overflow, try lower: %u/%u\n",
 			r->cfg.avg, r->cfg.burst);
-		return -EINVAL;
+		return -ERANGE;
 	}
 	if (r->cfg.mode == 0 ||
 	    r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
@@ -698,9 +699,12 @@ static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 
 	mutex_lock(&hashlimit_mutex);
 	r->hinfo = htable_find_get(net, r->name, par->family);
-	if (!r->hinfo && htable_create_v0(net, r, par->family) != 0) {
-		mutex_unlock(&hashlimit_mutex);
-		return -EINVAL;
+	if (r->hinfo == NULL) {
+		ret = htable_create_v0(net, r, par->family);
+		if (ret < 0) {
+			mutex_unlock(&hashlimit_mutex);
+			return ret;
+		}
 	}
 	mutex_unlock(&hashlimit_mutex);
 	return 0;
@@ -710,6 +714,7 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+	int ret;
 
 	/* Check for overflow. */
 	if (info->cfg.burst == 0 ||
@@ -717,7 +722,7 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 	    user2credits(info->cfg.avg)) {
 		pr_info("overflow, try lower: %u/%u\n",
 			info->cfg.avg, info->cfg.burst);
-		return -EINVAL;
+		return -ERANGE;
 	}
 	if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
 		return -EINVAL;
@@ -733,9 +738,12 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 
 	mutex_lock(&hashlimit_mutex);
 	info->hinfo = htable_find_get(net, info->name, par->family);
-	if (!info->hinfo && htable_create(net, info, par->family) != 0) {
-		mutex_unlock(&hashlimit_mutex);
-		return -EINVAL;
+	if (info->hinfo == NULL) {
+		ret = htable_create(net, info, par->family);
+		if (ret < 0) {
+			mutex_unlock(&hashlimit_mutex);
+			return ret;
+		}
 	}
 	mutex_unlock(&hashlimit_mutex);
 	return 0;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index eb308b32bfe0..b8b3e13dc71e 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -57,11 +57,13 @@ helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 static int helper_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_helper_info *info = par->matchinfo;
+	int ret;
 
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	info->name[29] = '\0';
 	return 0;
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 5ff0580ce878..e2a284ebb415 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -107,12 +107,12 @@ static int limit_mt_check(const struct xt_mtchk_param *par)
 	    || user2credits(r->avg * r->burst) < user2credits(r->avg)) {
 		pr_info("Overflow, try lower: %u/%u\n",
 			r->avg, r->burst);
-		return -EINVAL;
+		return -ERANGE;
 	}
 
 	priv = kmalloc(sizeof(*priv), GFP_KERNEL);
 	if (priv == NULL)
-		return -EINVAL;
+		return -ENOMEM;
 
 	/* For SMP, we only want to use one set of state. */
 	r->master = priv;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 766e71c6dc55..3e5cbd85a65b 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -52,7 +52,7 @@ static int quota_mt_check(const struct xt_mtchk_param *par)
 
 	q->master = kmalloc(sizeof(*q->master), GFP_KERNEL);
 	if (q->master == NULL)
-		return -EINVAL;
+		return -ENOMEM;
 
 	q->master->quota = q->quota;
 	return 0;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 0b5c6122737d..23805f8a444b 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -78,6 +78,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 {
 	struct xt_rateest_match_info *info = par->matchinfo;
 	struct xt_rateest *est1, *est2;
+	int ret = false;
 
 	if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS |
 				     XT_RATEEST_MATCH_REL)) != 1)
@@ -95,6 +96,7 @@ static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 		goto err1;
 	}
 
+	ret  = -ENOENT;
 	est1 = xt_rateest_lookup(info->name1);
 	if (!est1)
 		goto err1;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 0994ff54a731..0d9f80b1dd9f 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -355,8 +355,10 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
 
 	t = kzalloc(sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size,
 		    GFP_KERNEL);
-	if (t == NULL)
+	if (t == NULL) {
+		ret = -ENOMEM;
 		goto out;
+	}
 	t->refcnt = 1;
 	strcpy(t->name, info->name);
 	INIT_LIST_HEAD(&t->lru_list);
@@ -367,6 +369,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par)
 		  &recent_mt_fops, t);
 	if (pde == NULL) {
 		kfree(t);
+		ret = -ENOMEM;
 		goto out;
 	}
 	pde->uid = ip_list_uid;
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 8e8c9df51784..2b75230d15ca 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -39,10 +39,13 @@ state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 static int state_mt_check(const struct xt_mtchk_param *par)
 {
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+	int ret;
+
+	ret = nf_ct_l3proto_try_module_get(par->family);
+	if (ret < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return -EINVAL;
+		return ret;
 	}
 	return 0;
 }
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 29d76f8f1880..8ed2b2905091 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -62,7 +62,7 @@ static int statistic_mt_check(const struct xt_mtchk_param *par)
 
 	info->master = kzalloc(sizeof(*info->master), GFP_KERNEL);
 	if (info->master == NULL)
-		return -EINVAL;
+		return -ENOMEM;
 	info->master->count = info->u.nth.count;
 
 	return 0;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index e1f22a7a4152..b0f8292db6f8 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -63,7 +63,7 @@ static int string_mt_check(const struct xt_mtchk_param *par)
 	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
 				     GFP_KERNEL, flags);
 	if (IS_ERR(ts_conf))
-		return -EINVAL;
+		return PTR_ERR(ts_conf);
 
 	conf->config = ts_conf;
 	return 0;
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 8dde5e51ff19..d8556fdda440 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -225,7 +225,7 @@ static int time_mt_check(const struct xt_mtchk_param *par)
 	    info->daytime_stop > XT_TIME_MAX_DAYTIME) {
 		pr_info("invalid argument - start or "
 			"stop time greater than 23:59:59\n");
-		return -EINVAL;
+		return -EDOM;
 	}
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From f95c74e33eff5e3fe9798e2dc0a7749150ea3f80 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 21 Mar 2010 04:05:56 +0100
Subject: netfilter: xtables: shorten up return clause

The return value of nf_ct_l3proto_get can directly be returned even in
the case of success.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c |  7 ++-----
 net/netfilter/xt_CONNSECMARK.c     |  6 ++----
 net/netfilter/xt_connbytes.c       |  7 ++-----
 net/netfilter/xt_connmark.c        | 12 ++++--------
 net/netfilter/xt_conntrack.c       |  6 ++----
 net/netfilter/xt_state.c           |  6 ++----
 6 files changed, 14 insertions(+), 30 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 1faf5fa06ac8..5d70c43302bb 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -403,13 +403,10 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 	cipinfo->config = config;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-
-	return 0;
+	return ret;
 }
 
 /* drop reference count of cluster config when rule is deleted */
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 105a62e985d3..e953e302141d 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -107,12 +107,10 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 	}
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-	return 0;
+	return ret;
 }
 
 static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par)
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 2ff332ecc342..ff738a5f963a 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -109,13 +109,10 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par)
 		return -EINVAL;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-
-	return 0;
+	return ret;
 }
 
 static void connbytes_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 71e38a1fd656..ae1015484ae2 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -79,12 +79,10 @@ static int connmark_tg_check(const struct xt_tgchk_param *par)
 	int ret;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-	return 0;
+	return ret;
 }
 
 static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
@@ -111,12 +109,10 @@ static int connmark_mt_check(const struct xt_mtchk_param *par)
 	int ret;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-	return 0;
+	return ret;
 }
 
 static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index e0bcf8d2cf33..3348706ce56d 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -211,12 +211,10 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par)
 	int ret;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-	return 0;
+	return ret;
 }
 
 static void conntrack_mt_destroy(const struct xt_mtdtor_param *par)
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 2b75230d15ca..be00d7b1f53b 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -42,12 +42,10 @@ static int state_mt_check(const struct xt_mtchk_param *par)
 	int ret;
 
 	ret = nf_ct_l3proto_try_module_get(par->family);
-	if (ret < 0) {
+	if (ret < 0)
 		pr_info("cannot load conntrack support for proto=%u\n",
 			par->family);
-		return ret;
-	}
-	return 0;
+	return ret;
 }
 
 static void state_mt_destroy(const struct xt_mtdtor_param *par)
-- 
cgit v1.2.3-59-g8ed1b


From 779dd630d8e198a2dc5581a48efae546161cc726 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 22 Mar 2010 19:28:53 +0100
Subject: netfilter: xtables: remove xt_hashlimit revision 0

Superseded by xt_hashlimit revision 1 (linux v2.6.24-6212-g09e410d,
iptables 1.4.1-rc1).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_hashlimit.c | 221 -------------------------------------------
 1 file changed, 221 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index c89fde7d1234..5470bb097c48 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -193,76 +193,6 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 }
 static void htable_gc(unsigned long htlong);
 
-static int htable_create_v0(struct net *net, struct xt_hashlimit_info *minfo, u_int8_t family)
-{
-	struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
-	struct xt_hashlimit_htable *hinfo;
-	unsigned int size;
-	unsigned int i;
-
-	if (minfo->cfg.size)
-		size = minfo->cfg.size;
-	else {
-		size = ((totalram_pages << PAGE_SHIFT) / 16384) /
-		       sizeof(struct list_head);
-		if (totalram_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
-			size = 8192;
-		if (size < 16)
-			size = 16;
-	}
-	/* FIXME: don't use vmalloc() here or anywhere else -HW */
-	hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
-			sizeof(struct list_head) * size);
-	if (!hinfo)
-		return -ENOMEM;
-	minfo->hinfo = hinfo;
-
-	/* copy match config into hashtable config */
-	hinfo->cfg.mode        = minfo->cfg.mode;
-	hinfo->cfg.avg         = minfo->cfg.avg;
-	hinfo->cfg.burst       = minfo->cfg.burst;
-	hinfo->cfg.max         = minfo->cfg.max;
-	hinfo->cfg.gc_interval = minfo->cfg.gc_interval;
-	hinfo->cfg.expire      = minfo->cfg.expire;
-
-	if (family == NFPROTO_IPV4)
-		hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32;
-	else
-		hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128;
-
-	hinfo->cfg.size = size;
-	if (!hinfo->cfg.max)
-		hinfo->cfg.max = 8 * hinfo->cfg.size;
-	else if (hinfo->cfg.max < hinfo->cfg.size)
-		hinfo->cfg.max = hinfo->cfg.size;
-
-	for (i = 0; i < hinfo->cfg.size; i++)
-		INIT_HLIST_HEAD(&hinfo->hash[i]);
-
-	hinfo->use = 1;
-	hinfo->count = 0;
-	hinfo->family = family;
-	hinfo->rnd_initialized = false;
-	spin_lock_init(&hinfo->lock);
-	hinfo->pde = proc_create_data(minfo->name, 0,
-		(family == NFPROTO_IPV4) ?
-		hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
-		&dl_file_ops, hinfo);
-	if (!hinfo->pde) {
-		vfree(hinfo);
-		return -ENOMEM;
-	}
-	hinfo->net = net;
-
-	setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
-	hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
-	add_timer(&hinfo->timer);
-
-	hlist_add_head(&hinfo->node, &hashlimit_net->htables);
-
-	return 0;
-}
-
 static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
 			 u_int8_t family)
 {
@@ -570,57 +500,6 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 	return 0;
 }
 
-static bool
-hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const struct xt_hashlimit_info *r = par->matchinfo;
-	struct xt_hashlimit_htable *hinfo = r->hinfo;
-	unsigned long now = jiffies;
-	struct dsthash_ent *dh;
-	struct dsthash_dst dst;
-
-	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
-		goto hotdrop;
-
-	spin_lock_bh(&hinfo->lock);
-	dh = dsthash_find(hinfo, &dst);
-	if (!dh) {
-		dh = dsthash_alloc_init(hinfo, &dst);
-		if (!dh) {
-			spin_unlock_bh(&hinfo->lock);
-			goto hotdrop;
-		}
-
-		dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
-		dh->rateinfo.prev = jiffies;
-		dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
-						   hinfo->cfg.burst);
-		dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
-						       hinfo->cfg.burst);
-		dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
-	} else {
-		/* update expiration timeout */
-		dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-		rateinfo_recalc(dh, now);
-	}
-
-	if (dh->rateinfo.credit >= dh->rateinfo.cost) {
-		/* We're underlimit. */
-		dh->rateinfo.credit -= dh->rateinfo.cost;
-		spin_unlock_bh(&hinfo->lock);
-		return true;
-	}
-
-	spin_unlock_bh(&hinfo->lock);
-
-	/* default case: we're overlimit, thus don't match */
-	return false;
-
-hotdrop:
-	*par->hotdrop = true;
-	return false;
-}
-
 static bool
 hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -671,45 +550,6 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
-{
-	struct net *net = par->net;
-	struct xt_hashlimit_info *r = par->matchinfo;
-	int ret;
-
-	/* Check for overflow. */
-	if (r->cfg.burst == 0 ||
-	    user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
-		pr_info("overflow, try lower: %u/%u\n",
-			r->cfg.avg, r->cfg.burst);
-		return -ERANGE;
-	}
-	if (r->cfg.mode == 0 ||
-	    r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
-			   XT_HASHLIMIT_HASH_DIP |
-			   XT_HASHLIMIT_HASH_SIP |
-			   XT_HASHLIMIT_HASH_SPT))
-		return -EINVAL;
-	if (!r->cfg.gc_interval)
-		return -EINVAL;
-	if (!r->cfg.expire)
-		return -EINVAL;
-	if (r->name[sizeof(r->name) - 1] != '\0')
-		return -EINVAL;
-
-	mutex_lock(&hashlimit_mutex);
-	r->hinfo = htable_find_get(net, r->name, par->family);
-	if (r->hinfo == NULL) {
-		ret = htable_create_v0(net, r, par->family);
-		if (ret < 0) {
-			mutex_unlock(&hashlimit_mutex);
-			return ret;
-		}
-	}
-	mutex_unlock(&hashlimit_mutex);
-	return 0;
-}
-
 static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
@@ -749,14 +589,6 @@ static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
-static void
-hashlimit_mt_destroy_v0(const struct xt_mtdtor_param *par)
-{
-	const struct xt_hashlimit_info *r = par->matchinfo;
-
-	htable_put(r->hinfo);
-}
-
 static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
@@ -764,46 +596,7 @@ static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
 	htable_put(info->hinfo);
 }
 
-#ifdef CONFIG_COMPAT
-struct compat_xt_hashlimit_info {
-	char name[IFNAMSIZ];
-	struct hashlimit_cfg cfg;
-	compat_uptr_t hinfo;
-	compat_uptr_t master;
-};
-
-static void hashlimit_mt_compat_from_user(void *dst, const void *src)
-{
-	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
-
-	memcpy(dst, src, off);
-	memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
-}
-
-static int hashlimit_mt_compat_to_user(void __user *dst, const void *src)
-{
-	int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
-
-	return copy_to_user(dst, src, off) ? -EFAULT : 0;
-}
-#endif
-
 static struct xt_match hashlimit_mt_reg[] __read_mostly = {
-	{
-		.name		= "hashlimit",
-		.revision	= 0,
-		.family		= NFPROTO_IPV4,
-		.match		= hashlimit_mt_v0,
-		.matchsize	= sizeof(struct xt_hashlimit_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_hashlimit_info),
-		.compat_from_user = hashlimit_mt_compat_from_user,
-		.compat_to_user	= hashlimit_mt_compat_to_user,
-#endif
-		.checkentry	= hashlimit_mt_check_v0,
-		.destroy	= hashlimit_mt_destroy_v0,
-		.me		= THIS_MODULE
-	},
 	{
 		.name           = "hashlimit",
 		.revision       = 1,
@@ -815,20 +608,6 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {
 		.me             = THIS_MODULE,
 	},
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-	{
-		.name		= "hashlimit",
-		.family		= NFPROTO_IPV6,
-		.match		= hashlimit_mt_v0,
-		.matchsize	= sizeof(struct xt_hashlimit_info),
-#ifdef CONFIG_COMPAT
-		.compatsize	= sizeof(struct compat_xt_hashlimit_info),
-		.compat_from_user = hashlimit_mt_compat_from_user,
-		.compat_to_user	= hashlimit_mt_compat_to_user,
-#endif
-		.checkentry	= hashlimit_mt_check_v0,
-		.destroy	= hashlimit_mt_destroy_v0,
-		.me		= THIS_MODULE
-	},
 	{
 		.name           = "hashlimit",
 		.revision       = 1,
-- 
cgit v1.2.3-59-g8ed1b


From 110d99ed1c846458654e2771b3ad626319f19394 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 22 Mar 2010 19:35:01 +0100
Subject: netfilter: xtables: remove xt_multiport revision 0

Superseded by xt_multiport revision 1 (introduction already predates
linux.git).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_multiport.c | 77 --------------------------------------------
 1 file changed, 77 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index b446738eab1a..83b77ceb264f 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -26,23 +26,6 @@ MODULE_DESCRIPTION("Xtables: multiple port matching for TCP, UDP, UDP-Lite, SCTP
 MODULE_ALIAS("ipt_multiport");
 MODULE_ALIAS("ip6t_multiport");
 
-/* Returns 1 if the port is matched by the test, 0 otherwise. */
-static inline bool
-ports_match_v0(const u_int16_t *portlist, enum xt_multiport_flags flags,
-	       u_int8_t count, u_int16_t src, u_int16_t dst)
-{
-	unsigned int i;
-	for (i = 0; i < count; i++) {
-		if (flags != XT_MULTIPORT_DESTINATION && portlist[i] == src)
-			return true;
-
-		if (flags != XT_MULTIPORT_SOURCE && portlist[i] == dst)
-			return true;
-	}
-
-	return false;
-}
-
 /* Returns 1 if the port is matched by the test, 0 otherwise. */
 static inline bool
 ports_match_v1(const struct xt_multiport_v1 *minfo,
@@ -88,30 +71,6 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 	return minfo->invert;
 }
 
-static bool
-multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
-	const __be16 *pptr;
-	__be16 _ports[2];
-	const struct xt_multiport *multiinfo = par->matchinfo;
-
-	if (par->fragoff != 0)
-		return false;
-
-	pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports);
-	if (pptr == NULL) {
-		/* We've been asked to examine this packet, and we
-		 * can't.  Hence, no choice but to drop.
-		 */
-		pr_debug("Dropping evil offset=0 tinygram.\n");
-		*par->hotdrop = true;
-		return false;
-	}
-
-	return ports_match_v0(multiinfo->ports, multiinfo->flags,
-	       multiinfo->count, ntohs(pptr[0]), ntohs(pptr[1]));
-}
-
 static bool
 multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -152,15 +111,6 @@ check(u_int16_t proto,
 		&& count <= XT_MULTI_PORTS;
 }
 
-static int multiport_mt_check_v0(const struct xt_mtchk_param *par)
-{
-	const struct ipt_ip *ip = par->entryinfo;
-	const struct xt_multiport *multiinfo = par->matchinfo;
-
-	return check(ip->proto, ip->invflags, multiinfo->flags,
-		     multiinfo->count);
-}
-
 static int multiport_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ip *ip = par->entryinfo;
@@ -170,15 +120,6 @@ static int multiport_mt_check(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static int multiport_mt6_check_v0(const struct xt_mtchk_param *par)
-{
-	const struct ip6t_ip6 *ip = par->entryinfo;
-	const struct xt_multiport *multiinfo = par->matchinfo;
-
-	return check(ip->proto, ip->invflags, multiinfo->flags,
-		     multiinfo->count);
-}
-
 static int multiport_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ip6 *ip = par->entryinfo;
@@ -189,15 +130,6 @@ static int multiport_mt6_check(const struct xt_mtchk_param *par)
 }
 
 static struct xt_match multiport_mt_reg[] __read_mostly = {
-	{
-		.name		= "multiport",
-		.family		= NFPROTO_IPV4,
-		.revision	= 0,
-		.checkentry	= multiport_mt_check_v0,
-		.match		= multiport_mt_v0,
-		.matchsize	= sizeof(struct xt_multiport),
-		.me		= THIS_MODULE,
-	},
 	{
 		.name		= "multiport",
 		.family		= NFPROTO_IPV4,
@@ -207,15 +139,6 @@ static struct xt_match multiport_mt_reg[] __read_mostly = {
 		.matchsize	= sizeof(struct xt_multiport_v1),
 		.me		= THIS_MODULE,
 	},
-	{
-		.name		= "multiport",
-		.family		= NFPROTO_IPV6,
-		.revision	= 0,
-		.checkentry	= multiport_mt6_check_v0,
-		.match		= multiport_mt_v0,
-		.matchsize	= sizeof(struct xt_multiport),
-		.me		= THIS_MODULE,
-	},
 	{
 		.name		= "multiport",
 		.family		= NFPROTO_IPV6,
-- 
cgit v1.2.3-59-g8ed1b


From d879e19e18ebc69fc20a9b95612e9dd0acf4d7aa Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 22 Mar 2010 19:39:04 +0100
Subject: netfilter: xtables: remove xt_string revision 0

Superseded by xt_string revision 1 (linux v2.6.26-rc8-1127-g4ad3f26,
iptables 1.4.2-rc1).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 Documentation/Changes     |  2 +-
 net/netfilter/xt_string.c | 53 ++++++++++++++++-------------------------------
 2 files changed, 19 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/Documentation/Changes b/Documentation/Changes
index f08b313cd235..eca9f6e6fbe6 100644
--- a/Documentation/Changes
+++ b/Documentation/Changes
@@ -49,7 +49,7 @@ o  oprofile               0.9                     # oprofiled --version
 o  udev                   081                     # udevinfo -V
 o  grub                   0.93                    # grub --version
 o  mcelog		  0.6
-o  iptables               1.4.1                   # iptables -V
+o  iptables               1.4.2                   # iptables -V
 
 
 Kernel compilation
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index b0f8292db6f8..488e368a2c4e 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -26,12 +26,10 @@ string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
 	const struct xt_string_info *conf = par->matchinfo;
 	struct ts_state state;
-	int invert;
+	bool invert;
 
 	memset(&state, 0, sizeof(struct ts_state));
-
-	invert = (par->match->revision == 0 ? conf->u.v0.invert :
-				    conf->u.v1.flags & XT_STRING_FLAG_INVERT);
+	invert = conf->u.v1.flags & XT_STRING_FLAG_INVERT;
 
 	return (skb_find_text((struct sk_buff *)skb, conf->from_offset,
 			     conf->to_offset, conf->config, &state)
@@ -53,13 +51,11 @@ static int string_mt_check(const struct xt_mtchk_param *par)
 		return -EINVAL;
 	if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE)
 		return -EINVAL;
-	if (par->match->revision == 1) {
-		if (conf->u.v1.flags &
-		    ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
-			return -EINVAL;
-		if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE)
-			flags |= TS_IGNORECASE;
-	}
+	if (conf->u.v1.flags &
+	    ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT))
+		return -EINVAL;
+	if (conf->u.v1.flags & XT_STRING_FLAG_IGNORECASE)
+		flags |= TS_IGNORECASE;
 	ts_conf = textsearch_prepare(conf->algo, conf->pattern, conf->patlen,
 				     GFP_KERNEL, flags);
 	if (IS_ERR(ts_conf))
@@ -74,38 +70,25 @@ static void string_mt_destroy(const struct xt_mtdtor_param *par)
 	textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config);
 }
 
-static struct xt_match xt_string_mt_reg[] __read_mostly = {
-	{
-		.name 		= "string",
-		.revision	= 0,
-		.family		= NFPROTO_UNSPEC,
-		.checkentry	= string_mt_check,
-		.match 		= string_mt,
-		.destroy 	= string_mt_destroy,
-		.matchsize	= sizeof(struct xt_string_info),
-		.me 		= THIS_MODULE
-	},
-	{
-		.name 		= "string",
-		.revision	= 1,
-		.family		= NFPROTO_UNSPEC,
-		.checkentry	= string_mt_check,
-		.match 		= string_mt,
-		.destroy 	= string_mt_destroy,
-		.matchsize	= sizeof(struct xt_string_info),
-		.me 		= THIS_MODULE
-	},
+static struct xt_match xt_string_mt_reg __read_mostly = {
+	.name       = "string",
+	.revision   = 1,
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = string_mt_check,
+	.match      = string_mt,
+	.destroy    = string_mt_destroy,
+	.matchsize  = sizeof(struct xt_string_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init string_mt_init(void)
 {
-	return xt_register_matches(xt_string_mt_reg,
-				   ARRAY_SIZE(xt_string_mt_reg));
+	return xt_register_match(&xt_string_mt_reg);
 }
 
 static void __exit string_mt_exit(void)
 {
-	xt_unregister_matches(xt_string_mt_reg, ARRAY_SIZE(xt_string_mt_reg));
+	xt_unregister_match(&xt_string_mt_reg);
 }
 
 module_init(string_mt_init);
-- 
cgit v1.2.3-59-g8ed1b


From b44672889c11e13e4f4dc0a8ee23f0e64f1e57c6 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 24 Mar 2010 22:50:01 +0100
Subject: netfilter: xtables: merge registration structure to NFPROTO_UNSPEC

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_state.c | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index be00d7b1f53b..bb1271852d50 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -53,35 +53,24 @@ static void state_mt_destroy(const struct xt_mtdtor_param *par)
 	nf_ct_l3proto_module_put(par->family);
 }
 
-static struct xt_match state_mt_reg[] __read_mostly = {
-	{
-		.name		= "state",
-		.family		= NFPROTO_IPV4,
-		.checkentry	= state_mt_check,
-		.match		= state_mt,
-		.destroy	= state_mt_destroy,
-		.matchsize	= sizeof(struct xt_state_info),
-		.me		= THIS_MODULE,
-	},
-	{
-		.name		= "state",
-		.family		= NFPROTO_IPV6,
-		.checkentry	= state_mt_check,
-		.match		= state_mt,
-		.destroy	= state_mt_destroy,
-		.matchsize	= sizeof(struct xt_state_info),
-		.me		= THIS_MODULE,
-	},
+static struct xt_match state_mt_reg __read_mostly = {
+	.name       = "state",
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = state_mt_check,
+	.match      = state_mt,
+	.destroy    = state_mt_destroy,
+	.matchsize  = sizeof(struct xt_state_info),
+	.me         = THIS_MODULE,
 };
 
 static int __init state_mt_init(void)
 {
-	return xt_register_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
+	return xt_register_match(&state_mt_reg);
 }
 
 static void __exit state_mt_exit(void)
 {
-	xt_unregister_matches(state_mt_reg, ARRAY_SIZE(state_mt_reg));
+	xt_unregister_match(&state_mt_reg);
 }
 
 module_init(state_mt_init);
-- 
cgit v1.2.3-59-g8ed1b


From df3345457a7a174dfb5872a070af80d456985038 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 24 Mar 2010 19:13:54 +0000
Subject: rps: add CONFIG_RPS

RPS currently depends on SMP and SYSFS

Adding a CONFIG_RPS makes sense in case this requirement changes in the
future. This patch saves about 1500 bytes of kernel text in case SMP is
on but SYSFS is off.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++++
 net/Kconfig               |  5 +++++
 net/core/dev.c            | 29 +++++++++++++++++++----------
 3 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c96c41e08e37..53c272f2a734 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -531,6 +531,7 @@ struct netdev_queue {
 	unsigned long		tx_dropped;
 } ____cacheline_aligned_in_smp;
 
+#ifdef CONFIG_RPS
 /*
  * This structure holds an RPS map which can be of variable length.  The
  * map is an array of CPUs.
@@ -549,6 +550,7 @@ struct netdev_rx_queue {
 	struct netdev_rx_queue *first;
 	atomic_t count;
 } ____cacheline_aligned_in_smp;
+#endif
 
 /*
  * This structure defines the management hooks for network devices.
@@ -897,12 +899,14 @@ struct net_device {
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
+#ifdef CONFIG_RPS
 	struct kset		*queues_kset;
 
 	struct netdev_rx_queue	*_rx;
 
 	/* Number of RX queues allocated at alloc_netdev_mq() time  */
 	unsigned int		num_rx_queues;
+#endif
 
 	struct netdev_queue	rx_queue;
 
diff --git a/net/Kconfig b/net/Kconfig
index 041c35edb763..68514644ce91 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -203,6 +203,11 @@ source "net/ieee802154/Kconfig"
 source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
 
+config RPS
+	boolean
+	depends on SMP && SYSFS
+	default y
+
 menu "Network testing"
 
 config NET_PKTGEN
diff --git a/net/core/dev.c b/net/core/dev.c
index 5e3dc28cbf5a..bcb3ed26af1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2177,7 +2177,7 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
@@ -2325,7 +2325,7 @@ enqueue:
 
 		/* Schedule NAPI for backlog device */
 		if (napi_schedule_prep(&queue->backlog)) {
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 			if (cpu != smp_processor_id()) {
 				struct rps_remote_softirq_cpus *rcpus =
 				    &__get_cpu_var(rps_remote_softirq_cpus);
@@ -2376,7 +2376,7 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	cpu = get_rps_cpu(skb->dev, skb);
 	if (cpu < 0)
 		cpu = smp_processor_id();
@@ -2750,7 +2750,7 @@ out:
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	int cpu;
 
 	cpu = get_rps_cpu(skb->dev, skb);
@@ -3189,7 +3189,7 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 /*
  * net_rps_action sends any pending IPI's for rps.  This is only called from
  * softirq and interrupts must be enabled.
@@ -3214,7 +3214,7 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	int select;
 	struct rps_remote_softirq_cpus *rcpus;
 #endif
@@ -3280,7 +3280,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
 	select = rcpus->select;
 	rcpus->select ^= 1;
@@ -5277,6 +5277,7 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
+#ifdef CONFIG_RPS
 	if (!dev->num_rx_queues) {
 		/*
 		 * Allocate a single RX queue if driver never called
@@ -5293,7 +5294,7 @@ int register_netdevice(struct net_device *dev)
 		atomic_set(&dev->_rx->count, 1);
 		dev->num_rx_queues = 1;
 	}
-
+#endif
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
@@ -5653,11 +5654,13 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *), unsigned int queue_count)
 {
 	struct netdev_queue *tx;
-	struct netdev_rx_queue *rx;
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
+#ifdef CONFIG_RPS
+	struct netdev_rx_queue *rx;
 	int i;
+#endif
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
@@ -5683,6 +5686,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		goto free_p;
 	}
 
+#ifdef CONFIG_RPS
 	rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
 	if (!rx) {
 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
@@ -5698,6 +5702,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	 */
 	for (i = 0; i < queue_count; i++)
 		rx[i].first = rx;
+#endif
 
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
@@ -5713,8 +5718,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev->num_tx_queues = queue_count;
 	dev->real_num_tx_queues = queue_count;
 
+#ifdef CONFIG_RPS
 	dev->_rx = rx;
 	dev->num_rx_queues = queue_count;
+#endif
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 
@@ -5731,8 +5738,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	return dev;
 
 free_rx:
+#ifdef CONFIG_RPS
 	kfree(rx);
 free_tx:
+#endif
 	kfree(tx);
 free_p:
 	kfree(p);
@@ -6236,7 +6245,7 @@ static int __init net_dev_init(void)
 		queue->completion_queue = NULL;
 		INIT_LIST_HEAD(&queue->poll_list);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 		queue->csd.func = trigger_softirq;
 		queue->csd.info = queue;
 		queue->csd.flags = 0;
-- 
cgit v1.2.3-59-g8ed1b


From b54c9b98bbfb4836b1f7441c5a9db24affd3c2e9 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 25 Mar 2010 21:25:30 -0700
Subject: ipv6: Preserve pervious behavior in ipv6_link_dev_addr().

Use list_add_tail() to get the behavior we had before
the list_head conversion for ipv6 address lists.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 68e5809a2153..999568344721 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -578,7 +578,7 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp)
 			break;
 	}
 
-	list_add(&ifp->if_list, p);
+	list_add_tail(&ifp->if_list, p);
 }
 
 static u32 ipv6_addr_hash(const struct in6_addr *addr)
-- 
cgit v1.2.3-59-g8ed1b


From b79d1d54cf0672f764402fe4711ef5306f917bd3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 25 Mar 2010 21:39:21 -0700
Subject: ipv6: Fix result generation in ipv6_get_ifaddr().

Finishing naturally from hlist_for_each_entry(x, ...) does not result
in 'x' being NULL.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 999568344721..21b4c9e1a682 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1341,9 +1341,9 @@ EXPORT_SYMBOL(ipv6_chk_prefix);
 struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr,
 				     struct net_device *dev, int strict)
 {
-	struct inet6_ifaddr *ifp = NULL;
-	struct hlist_node *node;
+	struct inet6_ifaddr *ifp, *result = NULL;
 	unsigned int hash = ipv6_addr_hash(addr);
+	struct hlist_node *node;
 
 	rcu_read_lock_bh();
 	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
@@ -1352,6 +1352,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
 			if (dev == NULL || ifp->idev->dev == dev ||
 			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) {
+				result = ifp;
 				in6_ifa_hold(ifp);
 				break;
 			}
@@ -1359,7 +1360,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 	}
 	rcu_read_unlock_bh();
 
-	return ifp;
+	return result;
 }
 
 /* Gets referenced address, destroys ifaddr */
-- 
cgit v1.2.3-59-g8ed1b


From 66aa4a55fe0548c8b13a195c61774db65c5896cd Mon Sep 17 00:00:00 2001
From: Tom Goff <thomas.goff@boeing.com>
Date: Fri, 19 Mar 2010 15:38:50 +0000
Subject: netlink: use the appropriate namespace pid

This was included in OpenVZ kernels but wasn't integrated upstream.
>From git://git.openvz.org/pub/linux-2.6.24-openvz:

	commit 5c69402f18adf7276352e051ece2cf31feefab02
	Author: Alexey Dobriyan <adobriyan@openvz.org>
	Date:   Mon Dec 24 14:37:45 2007 +0300

	    netlink: fixup ->tgid to work in multiple PID namespaces

Signed-off-by: Tom Goff <thomas.goff@boeing.com>
Acked-by: Alexey Dobriyan <adobriyan@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netlink/af_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index acbbae1e89b5..274d977166b7 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -545,7 +545,7 @@ static int netlink_autobind(struct socket *sock)
 	struct hlist_head *head;
 	struct sock *osk;
 	struct hlist_node *node;
-	s32 pid = current->tgid;
+	s32 pid = task_tgid_vnr(current);
 	int err;
 	static s32 rover = -4097;
 
-- 
cgit v1.2.3-59-g8ed1b


From 14a4b42bd6082b4ce3b94bad00cd367707cc1e97 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 27 Mar 2010 16:35:50 -0700
Subject: net: fix unaligned access in IFLA_STATS64

Tony Luck observes that the original IFLA_STATS64 submission causes
unaligned accesses. This is because nla_data() returns a pointer to a
memory region that is only aligned to 32 bits. Do some memcpying to
workaround this.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 62 ++++++++++++++++++++++++++--------------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ffc6cf3495ac..ed0766f0181a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -602,36 +602,38 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 }
 
-static void copy_rtnl_link_stats64(struct rtnl_link_stats64 *a,
-				   const struct net_device_stats *b)
+static void copy_rtnl_link_stats64(void *v, const struct net_device_stats *b)
 {
-	a->rx_packets = b->rx_packets;
-	a->tx_packets = b->tx_packets;
-	a->rx_bytes = b->rx_bytes;
-	a->tx_bytes = b->tx_bytes;
-	a->rx_errors = b->rx_errors;
-	a->tx_errors = b->tx_errors;
-	a->rx_dropped = b->rx_dropped;
-	a->tx_dropped = b->tx_dropped;
-
-	a->multicast = b->multicast;
-	a->collisions = b->collisions;
-
-	a->rx_length_errors = b->rx_length_errors;
-	a->rx_over_errors = b->rx_over_errors;
-	a->rx_crc_errors = b->rx_crc_errors;
-	a->rx_frame_errors = b->rx_frame_errors;
-	a->rx_fifo_errors = b->rx_fifo_errors;
-	a->rx_missed_errors = b->rx_missed_errors;
-
-	a->tx_aborted_errors = b->tx_aborted_errors;
-	a->tx_carrier_errors = b->tx_carrier_errors;
-	a->tx_fifo_errors = b->tx_fifo_errors;
-	a->tx_heartbeat_errors = b->tx_heartbeat_errors;
-	a->tx_window_errors = b->tx_window_errors;
-
-	a->rx_compressed = b->rx_compressed;
-	a->tx_compressed = b->tx_compressed;
+	struct rtnl_link_stats64 a;
+
+	a.rx_packets = b->rx_packets;
+	a.tx_packets = b->tx_packets;
+	a.rx_bytes = b->rx_bytes;
+	a.tx_bytes = b->tx_bytes;
+	a.rx_errors = b->rx_errors;
+	a.tx_errors = b->tx_errors;
+	a.rx_dropped = b->rx_dropped;
+	a.tx_dropped = b->tx_dropped;
+
+	a.multicast = b->multicast;
+	a.collisions = b->collisions;
+
+	a.rx_length_errors = b->rx_length_errors;
+	a.rx_over_errors = b->rx_over_errors;
+	a.rx_crc_errors = b->rx_crc_errors;
+	a.rx_frame_errors = b->rx_frame_errors;
+	a.rx_fifo_errors = b->rx_fifo_errors;
+	a.rx_missed_errors = b->rx_missed_errors;
+
+	a.tx_aborted_errors = b->tx_aborted_errors;
+	a.tx_carrier_errors = b->tx_carrier_errors;
+	a.tx_fifo_errors = b->tx_fifo_errors;
+	a.tx_heartbeat_errors = b->tx_heartbeat_errors;
+	a.tx_window_errors = b->tx_window_errors;
+
+	a.rx_compressed = b->rx_compressed;
+	a.tx_compressed = b->tx_compressed;
+	memcpy(v, &a, sizeof(a));
 }
 
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
@@ -734,8 +736,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			sizeof(struct rtnl_link_stats64));
 	if (attr == NULL)
 		goto nla_put_failure;
-
-	stats = dev_get_stats(dev);
 	copy_rtnl_link_stats64(nla_data(attr), stats);
 
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
-- 
cgit v1.2.3-59-g8ed1b


From adcfe1964e627b62fbc6e45609b1f0db2c64dd14 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 27 Mar 2010 17:15:29 -0700
Subject: net: increase preallocated size of nlmsg to accomodate for
 IFLA_STATS64

When more data is stuffed into an nlmsg than initially projected, an
extra allocation needs to be done. Reserve enough for IFLA_STATS64 so
that this does not to needlessy happen.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index ed0766f0181a..bf919b6acea2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -653,6 +653,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */
 	       + nla_total_size(sizeof(struct rtnl_link_ifmap))
 	       + nla_total_size(sizeof(struct rtnl_link_stats))
+	       + nla_total_size(sizeof(struct rtnl_link_stats64))
 	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */
 	       + nla_total_size(MAX_ADDR_LEN) /* IFLA_BROADCAST */
 	       + nla_total_size(4) /* IFLA_TXQLEN */
-- 
cgit v1.2.3-59-g8ed1b


From 10f744d205dde72a0016dbdb11e239da8269958b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 28 Mar 2010 23:07:20 -0700
Subject: net: __netif_receive_skb should be static

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index bcb3ed26af1c..887aa84fcd46 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2621,7 +2621,7 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
-int __netif_receive_skb(struct sk_buff *skb)
+static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
-- 
cgit v1.2.3-59-g8ed1b


From 30bde1f5076a9b6bd4b6a168523930ce242c7449 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 29 Mar 2010 01:00:44 -0700
Subject: rps: fix net-sysfs build for !CONFIG_RPS

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f6b6bfee72ae..1e7fdd6029a2 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -466,6 +466,7 @@ static struct attribute_group wireless_group = {
 };
 #endif
 
+#ifdef CONFIG_RPS
 /*
  * RX queue sysfs structures and functions.
  */
@@ -675,7 +676,7 @@ static void rx_queue_remove_kobjects(struct net_device *net)
 		kobject_put(&net->_rx[i].kobj);
 	kset_unregister(net->queues_kset);
 }
-
+#endif /* CONFIG_RPS */
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
@@ -739,7 +740,7 @@ void netdev_unregister_kobject(struct net_device * net)
 	if (!net_eq(dev_net(net), &init_net))
 		return;
 
-#ifdef CONFIG_SYSFS
+#ifdef CONFIG_RPS
 	rx_queue_remove_kobjects(net);
 #endif
 
@@ -782,7 +783,7 @@ int netdev_register_kobject(struct net_device *net)
 	if (error)
 		return error;
 
-#ifdef CONFIG_SYSFS
+#ifdef CONFIG_RPS
 	error = rx_queue_register_kobjects(net);
 	if (error) {
 		device_del(dev);
-- 
cgit v1.2.3-59-g8ed1b


From b482cd2053e3b90a7b33a78c63cdb6badf2ec383 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:23 +0000
Subject: net-caif: add CAIF core protocol stack

CAIF generic protocol implementation. This layer is
somewhat generic in order to be able to use and test it outside
the Linux Kernel.

cfctrl.c     - CAIF control protocol layer
cfdbgl.c     - CAIF debug protocol layer
cfdgml.c     - CAIF datagram protocol layer
cffrml.c     - CAIF framing protocol layer
cfmuxl.c     - CAIF mux protocol layer
cfrfml.c     - CAIF remote file manager protocol layer
cfserl.c     - CAIF serial (fragmentation) protocol layer
cfsrvl.c     - CAIF generic service layer functions
cfutill.c    - CAIF utility protocol layer
cfveil.c     - CAIF AT protocol layer
cfvidl.c     - CAIF video protocol layer

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfctrl.c  | 664 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/caif/cfdbgl.c  |  40 ++++
 net/caif/cfdgml.c  | 108 +++++++++
 net/caif/cffrml.c  | 151 ++++++++++++
 net/caif/cfmuxl.c  | 246 ++++++++++++++++++++
 net/caif/cfrfml.c  | 108 +++++++++
 net/caif/cfserl.c  | 192 ++++++++++++++++
 net/caif/cfsrvl.c  | 185 +++++++++++++++
 net/caif/cfutill.c | 115 ++++++++++
 net/caif/cfveil.c  | 107 +++++++++
 net/caif/cfvidl.c  |  65 ++++++
 11 files changed, 1981 insertions(+)
 create mode 100644 net/caif/cfctrl.c
 create mode 100644 net/caif/cfdbgl.c
 create mode 100644 net/caif/cfdgml.c
 create mode 100644 net/caif/cffrml.c
 create mode 100644 net/caif/cfmuxl.c
 create mode 100644 net/caif/cfrfml.c
 create mode 100644 net/caif/cfserl.c
 create mode 100644 net/caif/cfsrvl.c
 create mode 100644 net/caif/cfutill.c
 create mode 100644 net/caif/cfveil.c
 create mode 100644 net/caif/cfvidl.c

(limited to 'net')

diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
new file mode 100644
index 000000000000..11f80140f3cb
--- /dev/null
+++ b/net/caif/cfctrl.c
@@ -0,0 +1,664 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfctrl.h>
+
+#define container_obj(layr) container_of(layr, struct cfctrl, serv.layer)
+#define UTILITY_NAME_LENGTH 16
+#define CFPKT_CTRL_PKT_LEN 20
+
+
+#ifdef CAIF_NO_LOOP
+static int handle_loop(struct cfctrl *ctrl,
+			      int cmd, struct cfpkt *pkt){
+	return CAIF_FAILURE;
+}
+#else
+static int handle_loop(struct cfctrl *ctrl,
+		int cmd, struct cfpkt *pkt);
+#endif
+static int cfctrl_recv(struct cflayer *layr, struct cfpkt *pkt);
+static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+			   int phyid);
+
+
+struct cflayer *cfctrl_create(void)
+{
+	struct cfctrl *this =
+		kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
+	if (!this) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
+	memset(this, 0, sizeof(*this));
+	spin_lock_init(&this->info_list_lock);
+	atomic_set(&this->req_seq_no, 1);
+	atomic_set(&this->rsp_seq_no, 1);
+	this->serv.dev_info.id = 0xff;
+	this->serv.layer.id = 0;
+	this->serv.layer.receive = cfctrl_recv;
+	sprintf(this->serv.layer.name, "ctrl");
+	this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
+	spin_lock_init(&this->loop_linkid_lock);
+	this->loop_linkid = 1;
+	return &this->serv.layer;
+}
+
+static bool param_eq(struct cfctrl_link_param *p1, struct cfctrl_link_param *p2)
+{
+	bool eq =
+	    p1->linktype == p2->linktype &&
+	    p1->priority == p2->priority &&
+	    p1->phyid == p2->phyid &&
+	    p1->endpoint == p2->endpoint && p1->chtype == p2->chtype;
+
+	if (!eq)
+		return false;
+
+	switch (p1->linktype) {
+	case CFCTRL_SRV_VEI:
+		return true;
+	case CFCTRL_SRV_DATAGRAM:
+		return p1->u.datagram.connid == p2->u.datagram.connid;
+	case CFCTRL_SRV_RFM:
+		return
+		    p1->u.rfm.connid == p2->u.rfm.connid &&
+		    strcmp(p1->u.rfm.volume, p2->u.rfm.volume) == 0;
+	case CFCTRL_SRV_UTIL:
+		return
+		    p1->u.utility.fifosize_kb == p2->u.utility.fifosize_kb
+		    && p1->u.utility.fifosize_bufs ==
+		    p2->u.utility.fifosize_bufs
+		    && strcmp(p1->u.utility.name, p2->u.utility.name) == 0
+		    && p1->u.utility.paramlen == p2->u.utility.paramlen
+		    && memcmp(p1->u.utility.params, p2->u.utility.params,
+			      p1->u.utility.paramlen) == 0;
+
+	case CFCTRL_SRV_VIDEO:
+		return p1->u.video.connid == p2->u.video.connid;
+	case CFCTRL_SRV_DBG:
+		return true;
+	case CFCTRL_SRV_DECM:
+		return false;
+	default:
+		return false;
+	}
+	return false;
+}
+
+bool cfctrl_req_eq(struct cfctrl_request_info *r1,
+		   struct cfctrl_request_info *r2)
+{
+	if (r1->cmd != r2->cmd)
+		return false;
+	if (r1->cmd == CFCTRL_CMD_LINK_SETUP)
+		return param_eq(&r1->param, &r2->param);
+	else
+		return r1->channel_id == r2->channel_id;
+}
+
+/* Insert request at the end */
+void cfctrl_insert_req(struct cfctrl *ctrl,
+			      struct cfctrl_request_info *req)
+{
+	struct cfctrl_request_info *p;
+	spin_lock(&ctrl->info_list_lock);
+	req->next = NULL;
+	atomic_inc(&ctrl->req_seq_no);
+	req->sequence_no = atomic_read(&ctrl->req_seq_no);
+	if (ctrl->first_req == NULL) {
+		ctrl->first_req = req;
+		spin_unlock(&ctrl->info_list_lock);
+		return;
+	}
+	p = ctrl->first_req;
+	while (p->next != NULL)
+		p = p->next;
+	p->next = req;
+	spin_unlock(&ctrl->info_list_lock);
+}
+
+static void cfctrl_insert_req2(struct cfctrl *ctrl, enum cfctrl_cmd cmd,
+			       u8 linkid, struct cflayer *user_layer)
+{
+	struct cfctrl_request_info *req = kmalloc(sizeof(*req), GFP_KERNEL);
+	if (!req) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	req->client_layer = user_layer;
+	req->cmd = cmd;
+	req->channel_id = linkid;
+	cfctrl_insert_req(ctrl, req);
+}
+
+/* Compare and remove request */
+struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
+					      struct cfctrl_request_info *req)
+{
+	struct cfctrl_request_info *p;
+	struct cfctrl_request_info *ret;
+
+	spin_lock(&ctrl->info_list_lock);
+	if (ctrl->first_req == NULL) {
+		spin_unlock(&ctrl->info_list_lock);
+		return NULL;
+	}
+
+	if (cfctrl_req_eq(req, ctrl->first_req)) {
+		ret = ctrl->first_req;
+		caif_assert(ctrl->first_req);
+		atomic_set(&ctrl->rsp_seq_no,
+				 ctrl->first_req->sequence_no);
+		ctrl->first_req = ctrl->first_req->next;
+		spin_unlock(&ctrl->info_list_lock);
+		return ret;
+	}
+
+	p = ctrl->first_req;
+
+	while (p->next != NULL) {
+		if (cfctrl_req_eq(req, p->next)) {
+			pr_warning("CAIF: %s(): Requests are not "
+					"received in order\n",
+					__func__);
+			ret = p->next;
+			atomic_set(&ctrl->rsp_seq_no,
+					p->next->sequence_no);
+			p->next = p->next->next;
+			spin_unlock(&ctrl->info_list_lock);
+			return ret;
+		}
+		p = p->next;
+	}
+	spin_unlock(&ctrl->info_list_lock);
+
+	pr_warning("CAIF: %s(): Request does not match\n",
+		   __func__);
+	return NULL;
+}
+
+struct cfctrl_rsp *cfctrl_get_respfuncs(struct cflayer *layer)
+{
+	struct cfctrl *this = container_obj(layer);
+	return &this->res;
+}
+
+void cfctrl_set_dnlayer(struct cflayer *this, struct cflayer *dn)
+{
+	this->dn = dn;
+}
+
+void cfctrl_set_uplayer(struct cflayer *this, struct cflayer *up)
+{
+	this->up = up;
+}
+
+static void init_info(struct caif_payload_info *info, struct cfctrl *cfctrl)
+{
+	info->hdr_len = 0;
+	info->channel_id = cfctrl->serv.layer.id;
+	info->dev_info = &cfctrl->serv.dev_info;
+}
+
+void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
+{
+	struct cfctrl *cfctrl = container_obj(layer);
+	int ret;
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
+	init_info(cfpkt_info(pkt), cfctrl);
+	cfpkt_info(pkt)->dev_info->id = physlinkid;
+	cfctrl->serv.dev_info.id = physlinkid;
+	cfpkt_addbdy(pkt, CFCTRL_CMD_ENUM);
+	cfpkt_addbdy(pkt, physlinkid);
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0) {
+		pr_err("CAIF: %s(): Could not transmit enum message\n",
+			__func__);
+		cfpkt_destroy(pkt);
+	}
+}
+
+void cfctrl_linkup_request(struct cflayer *layer,
+			   struct cfctrl_link_param *param,
+			   struct cflayer *user_layer)
+{
+	struct cfctrl *cfctrl = container_obj(layer);
+	u32 tmp32;
+	u16 tmp16;
+	u8 tmp8;
+	struct cfctrl_request_info *req;
+	int ret;
+	char utility_name[16];
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
+	cfpkt_addbdy(pkt, (param->chtype << 4) + param->linktype);
+	cfpkt_addbdy(pkt, (param->priority << 3) + param->phyid);
+	cfpkt_addbdy(pkt, param->endpoint & 0x03);
+
+	switch (param->linktype) {
+	case CFCTRL_SRV_VEI:
+		break;
+	case CFCTRL_SRV_VIDEO:
+		cfpkt_addbdy(pkt, (u8) param->u.video.connid);
+		break;
+	case CFCTRL_SRV_DBG:
+		break;
+	case CFCTRL_SRV_DATAGRAM:
+		tmp32 = cpu_to_le32(param->u.datagram.connid);
+		cfpkt_add_body(pkt, &tmp32, 4);
+		break;
+	case CFCTRL_SRV_RFM:
+		/* Construct a frame, convert DatagramConnectionID to network
+		 * format long and copy it out...
+		 */
+		tmp32 = cpu_to_le32(param->u.rfm.connid);
+		cfpkt_add_body(pkt, &tmp32, 4);
+		/* Add volume name, including zero termination... */
+		cfpkt_add_body(pkt, param->u.rfm.volume,
+			       strlen(param->u.rfm.volume) + 1);
+		break;
+	case CFCTRL_SRV_UTIL:
+		tmp16 = cpu_to_le16(param->u.utility.fifosize_kb);
+		cfpkt_add_body(pkt, &tmp16, 2);
+		tmp16 = cpu_to_le16(param->u.utility.fifosize_bufs);
+		cfpkt_add_body(pkt, &tmp16, 2);
+		memset(utility_name, 0, sizeof(utility_name));
+		strncpy(utility_name, param->u.utility.name,
+			UTILITY_NAME_LENGTH - 1);
+		cfpkt_add_body(pkt, utility_name, UTILITY_NAME_LENGTH);
+		tmp8 = param->u.utility.paramlen;
+		cfpkt_add_body(pkt, &tmp8, 1);
+		cfpkt_add_body(pkt, param->u.utility.params,
+			       param->u.utility.paramlen);
+		break;
+	default:
+		pr_warning("CAIF: %s():Request setup of bad link type = %d\n",
+			   __func__, param->linktype);
+	}
+	req = kmalloc(sizeof(*req), GFP_KERNEL);
+	if (!req) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	memset(req, 0, sizeof(*req));
+	req->client_layer = user_layer;
+	req->cmd = CFCTRL_CMD_LINK_SETUP;
+	req->param = *param;
+	cfctrl_insert_req(cfctrl, req);
+	init_info(cfpkt_info(pkt), cfctrl);
+	cfpkt_info(pkt)->dev_info->id = param->phyid;
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0) {
+		pr_err("CAIF: %s(): Could not transmit linksetup request\n",
+			__func__);
+		cfpkt_destroy(pkt);
+	}
+}
+
+int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
+				struct cflayer *client)
+{
+	int ret;
+	struct cfctrl *cfctrl = container_obj(layer);
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return -ENOMEM;
+	}
+	cfctrl_insert_req2(cfctrl, CFCTRL_CMD_LINK_DESTROY, channelid, client);
+	cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
+	cfpkt_addbdy(pkt, channelid);
+	init_info(cfpkt_info(pkt), cfctrl);
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0) {
+		pr_err("CAIF: %s(): Could not transmit link-down request\n",
+			__func__);
+		cfpkt_destroy(pkt);
+	}
+	return ret;
+}
+
+void cfctrl_sleep_req(struct cflayer *layer)
+{
+	int ret;
+	struct cfctrl *cfctrl = container_obj(layer);
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	cfpkt_addbdy(pkt, CFCTRL_CMD_SLEEP);
+	init_info(cfpkt_info(pkt), cfctrl);
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0)
+		cfpkt_destroy(pkt);
+}
+
+void cfctrl_wake_req(struct cflayer *layer)
+{
+	int ret;
+	struct cfctrl *cfctrl = container_obj(layer);
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	cfpkt_addbdy(pkt, CFCTRL_CMD_WAKE);
+	init_info(cfpkt_info(pkt), cfctrl);
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0)
+		cfpkt_destroy(pkt);
+}
+
+void cfctrl_getstartreason_req(struct cflayer *layer)
+{
+	int ret;
+	struct cfctrl *cfctrl = container_obj(layer);
+	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
+	if (!pkt) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	cfpkt_addbdy(pkt, CFCTRL_CMD_START_REASON);
+	init_info(cfpkt_info(pkt), cfctrl);
+	ret =
+	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
+	if (ret < 0)
+		cfpkt_destroy(pkt);
+}
+
+
+static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
+{
+	u8 cmdrsp;
+	u8 cmd;
+	int ret = -1;
+	u16 tmp16;
+	u8 len;
+	u8 param[255];
+	u8 linkid;
+	struct cfctrl *cfctrl = container_obj(layer);
+	struct cfctrl_request_info rsp, *req;
+
+
+	cfpkt_extr_head(pkt, &cmdrsp, 1);
+	cmd = cmdrsp & CFCTRL_CMD_MASK;
+	if (cmd != CFCTRL_CMD_LINK_ERR
+	    && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
+		if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE) {
+			pr_info("CAIF: %s() CAIF Protocol error:"
+				"Response bit not set\n", __func__);
+			goto error;
+		}
+	}
+
+	switch (cmd) {
+	case CFCTRL_CMD_LINK_SETUP:
+		{
+			enum cfctrl_srv serv;
+			enum cfctrl_srv servtype;
+			u8 endpoint;
+			u8 physlinkid;
+			u8 prio;
+			u8 tmp;
+			u32 tmp32;
+			u8 *cp;
+			int i;
+			struct cfctrl_link_param linkparam;
+			memset(&linkparam, 0, sizeof(linkparam));
+
+			cfpkt_extr_head(pkt, &tmp, 1);
+
+			serv = tmp & CFCTRL_SRV_MASK;
+			linkparam.linktype = serv;
+
+			servtype = tmp >> 4;
+			linkparam.chtype = servtype;
+
+			cfpkt_extr_head(pkt, &tmp, 1);
+			physlinkid = tmp & 0x07;
+			prio = tmp >> 3;
+
+			linkparam.priority = prio;
+			linkparam.phyid = physlinkid;
+			cfpkt_extr_head(pkt, &endpoint, 1);
+			linkparam.endpoint = endpoint & 0x03;
+
+			switch (serv) {
+			case CFCTRL_SRV_VEI:
+			case CFCTRL_SRV_DBG:
+				/* Link ID */
+				cfpkt_extr_head(pkt, &linkid, 1);
+				break;
+			case CFCTRL_SRV_VIDEO:
+				cfpkt_extr_head(pkt, &tmp, 1);
+				linkparam.u.video.connid = tmp;
+				/* Link ID */
+				cfpkt_extr_head(pkt, &linkid, 1);
+				break;
+
+			case CFCTRL_SRV_DATAGRAM:
+				cfpkt_extr_head(pkt, &tmp32, 4);
+				linkparam.u.datagram.connid =
+				    le32_to_cpu(tmp32);
+				/* Link ID */
+				cfpkt_extr_head(pkt, &linkid, 1);
+				break;
+			case CFCTRL_SRV_RFM:
+				/* Construct a frame, convert
+				 * DatagramConnectionID
+				 * to network format long and copy it out...
+				 */
+				cfpkt_extr_head(pkt, &tmp32, 4);
+				linkparam.u.rfm.connid =
+				  le32_to_cpu(tmp32);
+				cp = (u8 *) linkparam.u.rfm.volume;
+				for (cfpkt_extr_head(pkt, &tmp, 1);
+				     cfpkt_more(pkt) && tmp != '\0';
+				     cfpkt_extr_head(pkt, &tmp, 1))
+					*cp++ = tmp;
+				*cp = '\0';
+
+				/* Link ID */
+				cfpkt_extr_head(pkt, &linkid, 1);
+
+				break;
+			case CFCTRL_SRV_UTIL:
+				/* Construct a frame, convert
+				 * DatagramConnectionID
+				 * to network format long and copy it out...
+				 */
+				/* Fifosize KB */
+				cfpkt_extr_head(pkt, &tmp16, 2);
+				linkparam.u.utility.fifosize_kb =
+				    le16_to_cpu(tmp16);
+				/* Fifosize bufs */
+				cfpkt_extr_head(pkt, &tmp16, 2);
+				linkparam.u.utility.fifosize_bufs =
+				    le16_to_cpu(tmp16);
+				/* name */
+				cp = (u8 *) linkparam.u.utility.name;
+				caif_assert(sizeof(linkparam.u.utility.name)
+					     >= UTILITY_NAME_LENGTH);
+				for (i = 0;
+				     i < UTILITY_NAME_LENGTH
+				     && cfpkt_more(pkt); i++) {
+					cfpkt_extr_head(pkt, &tmp, 1);
+					*cp++ = tmp;
+				}
+				/* Length */
+				cfpkt_extr_head(pkt, &len, 1);
+				linkparam.u.utility.paramlen = len;
+				/* Param Data */
+				cp = linkparam.u.utility.params;
+				while (cfpkt_more(pkt) && len--) {
+					cfpkt_extr_head(pkt, &tmp, 1);
+					*cp++ = tmp;
+				}
+				/* Link ID */
+				cfpkt_extr_head(pkt, &linkid, 1);
+				/* Length */
+				cfpkt_extr_head(pkt, &len, 1);
+				/* Param Data */
+				cfpkt_extr_head(pkt, &param, len);
+				break;
+			default:
+				pr_warning("CAIF: %s(): Request setup "
+					   "- invalid link type (%d)",
+					   __func__, serv);
+				goto error;
+			}
+
+			rsp.cmd = cmd;
+			rsp.param = linkparam;
+			req = cfctrl_remove_req(cfctrl, &rsp);
+
+			if (CFCTRL_ERR_BIT == (CFCTRL_ERR_BIT & cmdrsp) ||
+				cfpkt_erroneous(pkt)) {
+				pr_err("CAIF: %s(): Invalid O/E bit or parse "
+				       "error on CAIF control channel",
+					__func__);
+				cfctrl->res.reject_rsp(cfctrl->serv.layer.up,
+						       0,
+						       req ? req->client_layer
+						       : NULL);
+			} else {
+				cfctrl->res.linksetup_rsp(cfctrl->serv.
+							  layer.up, linkid,
+							  serv, physlinkid,
+							  req ? req->
+							  client_layer : NULL);
+			}
+
+			if (req != NULL)
+				kfree(req);
+		}
+		break;
+	case CFCTRL_CMD_LINK_DESTROY:
+		cfpkt_extr_head(pkt, &linkid, 1);
+		rsp.cmd = cmd;
+		rsp.channel_id = linkid;
+		req = cfctrl_remove_req(cfctrl, &rsp);
+		cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid,
+					    req ? req->client_layer : NULL);
+		if (req != NULL)
+			kfree(req);
+		break;
+	case CFCTRL_CMD_LINK_ERR:
+		pr_err("CAIF: %s(): Frame Error Indication received\n",
+			__func__);
+		cfctrl->res.linkerror_ind();
+		break;
+	case CFCTRL_CMD_ENUM:
+		cfctrl->res.enum_rsp();
+		break;
+	case CFCTRL_CMD_SLEEP:
+		cfctrl->res.sleep_rsp();
+		break;
+	case CFCTRL_CMD_WAKE:
+		cfctrl->res.wake_rsp();
+		break;
+	case CFCTRL_CMD_LINK_RECONF:
+		cfctrl->res.restart_rsp();
+		break;
+	case CFCTRL_CMD_RADIO_SET:
+		cfctrl->res.radioset_rsp();
+		break;
+	default:
+		pr_err("CAIF: %s(): Unrecognized Control Frame\n", __func__);
+		goto error;
+		break;
+	}
+	ret = 0;
+error:
+	cfpkt_destroy(pkt);
+	return ret;
+}
+
+static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+			int phyid)
+{
+	struct cfctrl *this = container_obj(layr);
+	switch (ctrl) {
+	case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
+	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		spin_lock(&this->info_list_lock);
+		if (this->first_req != NULL) {
+			pr_warning("CAIF: %s(): Received flow off in "
+				   "control layer", __func__);
+		}
+		spin_unlock(&this->info_list_lock);
+		break;
+	default:
+		break;
+	}
+}
+
+#ifndef CAIF_NO_LOOP
+static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
+{
+	static int last_linkid;
+	u8 linkid, linktype, tmp;
+	switch (cmd) {
+	case CFCTRL_CMD_LINK_SETUP:
+		spin_lock(&ctrl->loop_linkid_lock);
+		for (linkid = last_linkid + 1; linkid < 255; linkid++)
+			if (!ctrl->loop_linkused[linkid])
+				goto found;
+		for (linkid = last_linkid - 1; linkid > 0; linkid--)
+			if (!ctrl->loop_linkused[linkid])
+				goto found;
+		spin_unlock(&ctrl->loop_linkid_lock);
+		return -EINVAL;
+found:
+		if (!ctrl->loop_linkused[linkid])
+			ctrl->loop_linkused[linkid] = 1;
+
+		last_linkid = linkid;
+
+		cfpkt_add_trail(pkt, &linkid, 1);
+		spin_unlock(&ctrl->loop_linkid_lock);
+		cfpkt_peek_head(pkt, &linktype, 1);
+		if (linktype ==  CFCTRL_SRV_UTIL) {
+			tmp = 0x01;
+			cfpkt_add_trail(pkt, &tmp, 1);
+			cfpkt_add_trail(pkt, &tmp, 1);
+		}
+		break;
+
+	case CFCTRL_CMD_LINK_DESTROY:
+		spin_lock(&ctrl->loop_linkid_lock);
+		cfpkt_peek_head(pkt, &linkid, 1);
+		ctrl->loop_linkused[linkid] = 0;
+		spin_unlock(&ctrl->loop_linkid_lock);
+		break;
+	default:
+		break;
+	}
+	return CAIF_SUCCESS;
+}
+#endif
diff --git a/net/caif/cfdbgl.c b/net/caif/cfdbgl.c
new file mode 100644
index 000000000000..ab6b6dc34cf8
--- /dev/null
+++ b/net/caif/cfdbgl.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+
+struct cflayer *cfdbgl_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *dbg = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!dbg) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	memset(dbg, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(dbg, channel_id, dev_info);
+	dbg->layer.receive = cfdbgl_receive;
+	dbg->layer.transmit = cfdbgl_transmit;
+	snprintf(dbg->layer.name, CAIF_LAYER_NAME_SZ - 1, "dbg%d", channel_id);
+	return &dbg->layer;
+}
+
+static int cfdbgl_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	return layr->up->receive(layr->up, pkt);
+}
+
+static int cfdbgl_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	return layr->dn->transmit(layr->dn, pkt);
+}
diff --git a/net/caif/cfdgml.c b/net/caif/cfdgml.c
new file mode 100644
index 000000000000..53194840ecb6
--- /dev/null
+++ b/net/caif/cfdgml.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define container_obj(layr) ((struct cfsrvl *) layr)
+
+#define DGM_CMD_BIT  0x80
+#define DGM_FLOW_OFF 0x81
+#define DGM_FLOW_ON  0x80
+#define DGM_CTRL_PKT_SIZE 1
+
+static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt);
+
+struct cflayer *cfdgml_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *dgm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!dgm) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	memset(dgm, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(dgm, channel_id, dev_info);
+	dgm->layer.receive = cfdgml_receive;
+	dgm->layer.transmit = cfdgml_transmit;
+	snprintf(dgm->layer.name, CAIF_LAYER_NAME_SZ - 1, "dgm%d", channel_id);
+	dgm->layer.name[CAIF_LAYER_NAME_SZ - 1] = '\0';
+	return &dgm->layer;
+}
+
+static int cfdgml_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 cmd = -1;
+	u8 dgmhdr[3];
+	int ret;
+	caif_assert(layr->up != NULL);
+	caif_assert(layr->receive != NULL);
+	caif_assert(layr->ctrlcmd != NULL);
+
+	if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+
+	if ((cmd & DGM_CMD_BIT) == 0) {
+		if (cfpkt_extr_head(pkt, &dgmhdr, 3) < 0) {
+			pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+			cfpkt_destroy(pkt);
+			return -EPROTO;
+		}
+		ret = layr->up->receive(layr->up, pkt);
+		return ret;
+	}
+
+	switch (cmd) {
+	case DGM_FLOW_OFF:	/* FLOW OFF */
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	case DGM_FLOW_ON:	/* FLOW ON */
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	default:
+		cfpkt_destroy(pkt);
+		pr_info("CAIF: %s(): Unknown datagram control %d (0x%x)\n",
+			__func__, cmd, cmd);
+		return -EPROTO;
+	}
+}
+
+static int cfdgml_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u32 zero = 0;
+	struct caif_payload_info *info;
+	struct cfsrvl *service = container_obj(layr);
+	int ret;
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+
+	cfpkt_add_head(pkt, &zero, 4);
+
+	/* Add info for MUX-layer to route the packet out. */
+	info = cfpkt_info(pkt);
+	info->channel_id = service->layer.id;
+	/* To optimize alignment, we add up the size of CAIF header
+	 * before payload.
+	 */
+	info->hdr_len = 4;
+	info->dev_info = &service->dev_info;
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0) {
+		u32 tmp32;
+		cfpkt_extr_head(pkt, &tmp32, 4);
+	}
+	return ret;
+}
diff --git a/net/caif/cffrml.c b/net/caif/cffrml.c
new file mode 100644
index 000000000000..e86a4ca3b217
--- /dev/null
+++ b/net/caif/cffrml.c
@@ -0,0 +1,151 @@
+/*
+ * CAIF Framing Layer.
+ *
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/crc-ccitt.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cffrml.h>
+
+#define container_obj(layr) container_of(layr, struct cffrml, layer)
+
+struct cffrml {
+	struct cflayer layer;
+	bool dofcs;		/* !< FCS active */
+};
+
+static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid);
+
+static u32 cffrml_rcv_error;
+static u32 cffrml_rcv_checsum_error;
+struct cflayer *cffrml_create(u16 phyid, bool use_fcs)
+{
+	struct cffrml *this = kmalloc(sizeof(struct cffrml), GFP_ATOMIC);
+	if (!this) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cffrml, layer) == 0);
+
+	memset(this, 0, sizeof(struct cflayer));
+	this->layer.receive = cffrml_receive;
+	this->layer.transmit = cffrml_transmit;
+	this->layer.ctrlcmd = cffrml_ctrlcmd;
+	snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "frm%d", phyid);
+	this->dofcs = use_fcs;
+	this->layer.id = phyid;
+	return (struct cflayer *) this;
+}
+
+void cffrml_set_uplayer(struct cflayer *this, struct cflayer *up)
+{
+	this->up = up;
+}
+
+void cffrml_set_dnlayer(struct cflayer *this, struct cflayer *dn)
+{
+	this->dn = dn;
+}
+
+static u16 cffrml_checksum(u16 chks, void *buf, u16 len)
+{
+	/* FIXME: FCS should be moved to glue in order to use OS-Specific
+	 * solutions
+	 */
+	return crc_ccitt(chks, buf, len);
+}
+
+static int cffrml_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u16 tmp;
+	u16 len;
+	u16 hdrchks;
+	u16 pktchks;
+	struct cffrml *this;
+	this = container_obj(layr);
+
+	cfpkt_extr_head(pkt, &tmp, 2);
+	len = le16_to_cpu(tmp);
+
+	/* Subtract for FCS on length if FCS is not used. */
+	if (!this->dofcs)
+		len -= 2;
+
+	if (cfpkt_setlen(pkt, len) < 0) {
+		++cffrml_rcv_error;
+		pr_err("CAIF: %s():Framing length error (%d)\n", __func__, len);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+	/*
+	 * Don't do extract if FCS is false, rather do setlen - then we don't
+	 * get a cache-miss.
+	 */
+	if (this->dofcs) {
+		cfpkt_extr_trail(pkt, &tmp, 2);
+		hdrchks = le16_to_cpu(tmp);
+		pktchks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
+		if (pktchks != hdrchks) {
+			cfpkt_add_trail(pkt, &tmp, 2);
+			++cffrml_rcv_error;
+			++cffrml_rcv_checsum_error;
+			pr_info("CAIF: %s(): Frame checksum error "
+				"(0x%x != 0x%x)\n", __func__, hdrchks, pktchks);
+			return -EILSEQ;
+		}
+	}
+	if (cfpkt_erroneous(pkt)) {
+		++cffrml_rcv_error;
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+	return layr->up->receive(layr->up, pkt);
+}
+
+static int cffrml_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	int tmp;
+	u16 chks;
+	u16 len;
+	int ret;
+	struct cffrml *this = container_obj(layr);
+	if (this->dofcs) {
+		chks = cfpkt_iterate(pkt, cffrml_checksum, 0xffff);
+		tmp = cpu_to_le16(chks);
+		cfpkt_add_trail(pkt, &tmp, 2);
+	} else {
+		cfpkt_pad_trail(pkt, 2);
+	}
+	len = cfpkt_getlen(pkt);
+	tmp = cpu_to_le16(len);
+	cfpkt_add_head(pkt, &tmp, 2);
+	cfpkt_info(pkt)->hdr_len += 2;
+	if (cfpkt_erroneous(pkt)) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		return -EPROTO;
+	}
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0) {
+		/* Remove header on faulty packet. */
+		cfpkt_extr_head(pkt, &tmp, 2);
+	}
+	return ret;
+}
+
+static void cffrml_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+					int phyid)
+{
+	if (layr->up->ctrlcmd)
+		layr->up->ctrlcmd(layr->up, ctrl, layr->id);
+}
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
new file mode 100644
index 000000000000..6fb9f9e96cf8
--- /dev/null
+++ b/net/caif/cfmuxl.c
@@ -0,0 +1,246 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfmuxl.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cffrml.h>
+
+#define container_obj(layr) container_of(layr, struct cfmuxl, layer)
+
+#define CAIF_CTRL_CHANNEL 0
+#define UP_CACHE_SIZE 8
+#define DN_CACHE_SIZE 8
+
+struct cfmuxl {
+	struct cflayer layer;
+	struct list_head srvl_list;
+	struct list_head frml_list;
+	struct cflayer *up_cache[UP_CACHE_SIZE];
+	struct cflayer *dn_cache[DN_CACHE_SIZE];
+	/*
+	 * Set when inserting or removing downwards layers.
+	 */
+	spinlock_t transmit_lock;
+
+	/*
+	 * Set when inserting or removing upwards layers.
+	 */
+	spinlock_t receive_lock;
+
+};
+
+static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid);
+static struct cflayer *get_up(struct cfmuxl *muxl, u16 id);
+
+struct cflayer *cfmuxl_create(void)
+{
+	struct cfmuxl *this = kmalloc(sizeof(struct cfmuxl), GFP_ATOMIC);
+	if (!this)
+		return NULL;
+	memset(this, 0, sizeof(*this));
+	this->layer.receive = cfmuxl_receive;
+	this->layer.transmit = cfmuxl_transmit;
+	this->layer.ctrlcmd = cfmuxl_ctrlcmd;
+	INIT_LIST_HEAD(&this->srvl_list);
+	INIT_LIST_HEAD(&this->frml_list);
+	spin_lock_init(&this->transmit_lock);
+	spin_lock_init(&this->receive_lock);
+	snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "mux");
+	return &this->layer;
+}
+
+int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid)
+{
+	struct cfmuxl *muxl = container_obj(layr);
+	spin_lock(&muxl->receive_lock);
+	list_add(&up->node, &muxl->srvl_list);
+	spin_unlock(&muxl->receive_lock);
+	return 0;
+}
+
+bool cfmuxl_is_phy_inuse(struct cflayer *layr, u8 phyid)
+{
+	struct list_head *node;
+	struct cflayer *layer;
+	struct cfmuxl *muxl = container_obj(layr);
+	bool match = false;
+	spin_lock(&muxl->receive_lock);
+
+	list_for_each(node, &muxl->srvl_list) {
+		layer = list_entry(node, struct cflayer, node);
+		if (cfsrvl_phyid_match(layer, phyid)) {
+			match = true;
+			break;
+		}
+
+	}
+	spin_unlock(&muxl->receive_lock);
+	return match;
+}
+
+u8 cfmuxl_get_phyid(struct cflayer *layr, u8 channel_id)
+{
+	struct cflayer *up;
+	int phyid;
+	struct cfmuxl *muxl = container_obj(layr);
+	spin_lock(&muxl->receive_lock);
+	up = get_up(muxl, channel_id);
+	if (up != NULL)
+		phyid = cfsrvl_getphyid(up);
+	else
+		phyid = 0;
+	spin_unlock(&muxl->receive_lock);
+	return phyid;
+}
+
+int cfmuxl_set_dnlayer(struct cflayer *layr, struct cflayer *dn, u8 phyid)
+{
+	struct cfmuxl *muxl = (struct cfmuxl *) layr;
+	spin_lock(&muxl->transmit_lock);
+	list_add(&dn->node, &muxl->frml_list);
+	spin_unlock(&muxl->transmit_lock);
+	return 0;
+}
+
+static struct cflayer *get_from_id(struct list_head *list, u16 id)
+{
+	struct list_head *node;
+	struct cflayer *layer;
+	list_for_each(node, list) {
+		layer = list_entry(node, struct cflayer, node);
+		if (layer->id == id)
+			return layer;
+	}
+	return NULL;
+}
+
+struct cflayer *cfmuxl_remove_dnlayer(struct cflayer *layr, u8 phyid)
+{
+	struct cfmuxl *muxl = container_obj(layr);
+	struct cflayer *dn;
+	spin_lock(&muxl->transmit_lock);
+	memset(muxl->dn_cache, 0, sizeof(muxl->dn_cache));
+	dn = get_from_id(&muxl->frml_list, phyid);
+	if (dn == NULL) {
+		spin_unlock(&muxl->transmit_lock);
+		return NULL;
+	}
+	list_del(&dn->node);
+	caif_assert(dn != NULL);
+	spin_unlock(&muxl->transmit_lock);
+	return dn;
+}
+
+/* Invariant: lock is taken */
+static struct cflayer *get_up(struct cfmuxl *muxl, u16 id)
+{
+	struct cflayer *up;
+	int idx = id % UP_CACHE_SIZE;
+	up = muxl->up_cache[idx];
+	if (up == NULL || up->id != id) {
+		up = get_from_id(&muxl->srvl_list, id);
+		muxl->up_cache[idx] = up;
+	}
+	return up;
+}
+
+/* Invariant: lock is taken */
+static struct cflayer *get_dn(struct cfmuxl *muxl, struct dev_info *dev_info)
+{
+	struct cflayer *dn;
+	int idx = dev_info->id % DN_CACHE_SIZE;
+	dn = muxl->dn_cache[idx];
+	if (dn == NULL || dn->id != dev_info->id) {
+		dn = get_from_id(&muxl->frml_list, dev_info->id);
+		muxl->dn_cache[idx] = dn;
+	}
+	return dn;
+}
+
+struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
+{
+	struct cflayer *up;
+	struct cfmuxl *muxl = container_obj(layr);
+	spin_lock(&muxl->receive_lock);
+	up = get_up(muxl, id);
+	memset(muxl->up_cache, 0, sizeof(muxl->up_cache));
+	list_del(&up->node);
+	spin_unlock(&muxl->receive_lock);
+	return up;
+}
+
+static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	int ret;
+	struct cfmuxl *muxl = container_obj(layr);
+	u8 id;
+	struct cflayer *up;
+	if (cfpkt_extr_head(pkt, &id, 1) < 0) {
+		pr_err("CAIF: %s(): erroneous Caif Packet\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+
+	spin_lock(&muxl->receive_lock);
+	up = get_up(muxl, id);
+	spin_unlock(&muxl->receive_lock);
+	if (up == NULL) {
+		pr_info("CAIF: %s():Received data on unknown link ID = %d "
+			"(0x%x)	 up == NULL", __func__, id, id);
+		cfpkt_destroy(pkt);
+		/*
+		 * Don't return ERROR, since modem misbehaves and sends out
+		 * flow on before linksetup response.
+		 */
+		return /* CFGLU_EPROT; */ 0;
+	}
+
+	ret = up->receive(up, pkt);
+	return ret;
+}
+
+static int cfmuxl_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	int ret;
+	struct cfmuxl *muxl = container_obj(layr);
+	u8 linkid;
+	struct cflayer *dn;
+	struct caif_payload_info *info = cfpkt_info(pkt);
+	dn = get_dn(muxl, cfpkt_info(pkt)->dev_info);
+	if (dn == NULL) {
+		pr_warning("CAIF: %s(): Send data on unknown phy "
+			   "ID = %d (0x%x)\n",
+			   __func__, info->dev_info->id, info->dev_info->id);
+		return -ENOTCONN;
+	}
+	info->hdr_len += 1;
+	linkid = info->channel_id;
+	cfpkt_add_head(pkt, &linkid, 1);
+	ret = dn->transmit(dn, pkt);
+	/* Remove MUX protocol header upon error. */
+	if (ret < 0)
+		cfpkt_extr_head(pkt, &linkid, 1);
+	return ret;
+}
+
+static void cfmuxl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid)
+{
+	struct cfmuxl *muxl = container_obj(layr);
+	struct list_head *node;
+	struct cflayer *layer;
+	list_for_each(node, &muxl->srvl_list) {
+		layer = list_entry(node, struct cflayer, node);
+		if (cfsrvl_phyid_match(layer, phyid))
+			layer->ctrlcmd(layer, ctrl, phyid);
+	}
+}
diff --git a/net/caif/cfrfml.c b/net/caif/cfrfml.c
new file mode 100644
index 000000000000..cd2830fec935
--- /dev/null
+++ b/net/caif/cfrfml.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
+
+#define RFM_SEGMENTATION_BIT 0x01
+#define RFM_PAYLOAD  0x00
+#define RFM_CMD_BIT  0x80
+#define RFM_FLOW_OFF 0x81
+#define RFM_FLOW_ON  0x80
+#define RFM_SET_PIN  0x82
+#define RFM_CTRL_PKT_SIZE 1
+
+static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl);
+
+struct cflayer *cfrfml_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *rfm = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!rfm) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	memset(rfm, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(rfm, channel_id, dev_info);
+	rfm->layer.modemcmd = cfservl_modemcmd;
+	rfm->layer.receive = cfrfml_receive;
+	rfm->layer.transmit = cfrfml_transmit;
+	snprintf(rfm->layer.name, CAIF_LAYER_NAME_SZ, "rfm%d", channel_id);
+	return &rfm->layer;
+}
+
+static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
+{
+       return -EPROTO;
+}
+
+static int cfrfml_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 tmp;
+	bool segmented;
+	int ret;
+	caif_assert(layr->up != NULL);
+	caif_assert(layr->receive != NULL);
+
+	/*
+	 * RFM is taking care of segmentation and stripping of
+	 * segmentation bit.
+	 */
+	if (cfpkt_extr_head(pkt, &tmp, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+	segmented = tmp & RFM_SEGMENTATION_BIT;
+	caif_assert(!segmented);
+
+	ret = layr->up->receive(layr->up, pkt);
+	return ret;
+}
+
+static int cfrfml_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 tmp = 0;
+	int ret;
+	struct cfsrvl *service = container_obj(layr);
+
+	caif_assert(layr->dn != NULL);
+	caif_assert(layr->dn->transmit != NULL);
+
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+
+	if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+		pr_err("CAIF: %s():Packet too large - size=%d\n",
+			__func__, cfpkt_getlen(pkt));
+		return -EOVERFLOW;
+	}
+	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		return -EPROTO;
+	}
+
+	/* Add info for MUX-layer to route the packet out. */
+	cfpkt_info(pkt)->channel_id = service->layer.id;
+	/*
+	 * To optimize alignment, we add up the size of CAIF header before
+	 * payload.
+	 */
+	cfpkt_info(pkt)->hdr_len = 1;
+	cfpkt_info(pkt)->dev_info = &service->dev_info;
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0)
+		cfpkt_extr_head(pkt, &tmp, 1);
+	return ret;
+}
diff --git a/net/caif/cfserl.c b/net/caif/cfserl.c
new file mode 100644
index 000000000000..06029ea2da2f
--- /dev/null
+++ b/net/caif/cfserl.c
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfserl.h>
+
+#define container_obj(layr) ((struct cfserl *) layr)
+
+#define CFSERL_STX 0x02
+#define CAIF_MINIUM_PACKET_SIZE 4
+struct cfserl {
+	struct cflayer layer;
+	struct cfpkt *incomplete_frm;
+	/* Protects parallel processing of incoming packets */
+	spinlock_t sync;
+	bool usestx;
+};
+#define STXLEN(layr) (layr->usestx ? 1 : 0)
+
+static int cfserl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfserl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid);
+
+struct cflayer *cfserl_create(int type, int instance, bool use_stx)
+{
+	struct cfserl *this = kmalloc(sizeof(struct cfserl), GFP_ATOMIC);
+	if (!this) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfserl, layer) == 0);
+	memset(this, 0, sizeof(struct cfserl));
+	this->layer.receive = cfserl_receive;
+	this->layer.transmit = cfserl_transmit;
+	this->layer.ctrlcmd = cfserl_ctrlcmd;
+	this->layer.type = type;
+	this->usestx = use_stx;
+	spin_lock_init(&this->sync);
+	snprintf(this->layer.name, CAIF_LAYER_NAME_SZ, "ser1");
+	return &this->layer;
+}
+
+static int cfserl_receive(struct cflayer *l, struct cfpkt *newpkt)
+{
+	struct cfserl *layr = container_obj(l);
+	u16 pkt_len;
+	struct cfpkt *pkt = NULL;
+	struct cfpkt *tail_pkt = NULL;
+	u8 tmp8;
+	u16 tmp;
+	u8 stx = CFSERL_STX;
+	int ret;
+	u16 expectlen = 0;
+	caif_assert(newpkt != NULL);
+	spin_lock(&layr->sync);
+
+	if (layr->incomplete_frm != NULL) {
+
+		layr->incomplete_frm =
+		    cfpkt_append(layr->incomplete_frm, newpkt, expectlen);
+		pkt = layr->incomplete_frm;
+	} else {
+		pkt = newpkt;
+	}
+	layr->incomplete_frm = NULL;
+
+	do {
+		/* Search for STX at start of pkt if STX is used */
+		if (layr->usestx) {
+			cfpkt_extr_head(pkt, &tmp8, 1);
+			if (tmp8 != CFSERL_STX) {
+				while (cfpkt_more(pkt)
+				       && tmp8 != CFSERL_STX) {
+					cfpkt_extr_head(pkt, &tmp8, 1);
+				}
+				if (!cfpkt_more(pkt)) {
+					cfpkt_destroy(pkt);
+					layr->incomplete_frm = NULL;
+					spin_unlock(&layr->sync);
+					return -EPROTO;
+				}
+			}
+		}
+
+		pkt_len = cfpkt_getlen(pkt);
+
+		/*
+		 *  pkt_len is the accumulated length of the packet data
+		 *  we have received so far.
+		 *  Exit if frame doesn't hold length.
+		 */
+
+		if (pkt_len < 2) {
+			if (layr->usestx)
+				cfpkt_add_head(pkt, &stx, 1);
+			layr->incomplete_frm = pkt;
+			spin_unlock(&layr->sync);
+			return 0;
+		}
+
+		/*
+		 *  Find length of frame.
+		 *  expectlen is the length we need for a full frame.
+		 */
+		cfpkt_peek_head(pkt, &tmp, 2);
+		expectlen = le16_to_cpu(tmp) + 2;
+		/*
+		 * Frame error handling
+		 */
+		if (expectlen < CAIF_MINIUM_PACKET_SIZE
+		    || expectlen > CAIF_MAX_FRAMESIZE) {
+			if (!layr->usestx) {
+				if (pkt != NULL)
+					cfpkt_destroy(pkt);
+				layr->incomplete_frm = NULL;
+				expectlen = 0;
+				spin_unlock(&layr->sync);
+				return -EPROTO;
+			}
+			continue;
+		}
+
+		if (pkt_len < expectlen) {
+			/* Too little received data */
+			if (layr->usestx)
+				cfpkt_add_head(pkt, &stx, 1);
+			layr->incomplete_frm = pkt;
+			spin_unlock(&layr->sync);
+			return 0;
+		}
+
+		/*
+		 * Enough data for at least one frame.
+		 * Split the frame, if too long
+		 */
+		if (pkt_len > expectlen)
+			tail_pkt = cfpkt_split(pkt, expectlen);
+		else
+			tail_pkt = NULL;
+
+		/* Send the first part of packet upwards.*/
+		spin_unlock(&layr->sync);
+		ret = layr->layer.up->receive(layr->layer.up, pkt);
+		spin_lock(&layr->sync);
+		if (ret == -EILSEQ) {
+			if (layr->usestx) {
+				if (tail_pkt != NULL)
+					pkt = cfpkt_append(pkt, tail_pkt, 0);
+
+				/* Start search for next STX if frame failed */
+				continue;
+			} else {
+				cfpkt_destroy(pkt);
+				pkt = NULL;
+			}
+		}
+
+		pkt = tail_pkt;
+
+	} while (pkt != NULL);
+
+	spin_unlock(&layr->sync);
+	return 0;
+}
+
+static int cfserl_transmit(struct cflayer *layer, struct cfpkt *newpkt)
+{
+	struct cfserl *layr = container_obj(layer);
+	int ret;
+	u8 tmp8 = CFSERL_STX;
+	if (layr->usestx)
+		cfpkt_add_head(newpkt, &tmp8, 1);
+	ret = layer->dn->transmit(layer->dn, newpkt);
+	if (ret < 0)
+		cfpkt_extr_head(newpkt, &tmp8, 1);
+
+	return ret;
+}
+
+static void cfserl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid)
+{
+	layr->up->ctrlcmd(layr->up, ctrl, phyid);
+}
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
new file mode 100644
index 000000000000..d470c51c6431
--- /dev/null
+++ b/net/caif/cfsrvl.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define SRVL_CTRL_PKT_SIZE 1
+#define SRVL_FLOW_OFF 0x81
+#define SRVL_FLOW_ON  0x80
+#define SRVL_SET_PIN  0x82
+#define SRVL_CTRL_PKT_SIZE 1
+
+#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
+
+static void cfservl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
+				int phyid)
+{
+	struct cfsrvl *service = container_obj(layr);
+	caif_assert(layr->up != NULL);
+	caif_assert(layr->up->ctrlcmd != NULL);
+	switch (ctrl) {
+	case CAIF_CTRLCMD_INIT_RSP:
+		service->open = true;
+		layr->up->ctrlcmd(layr->up, ctrl, phyid);
+		break;
+	case CAIF_CTRLCMD_DEINIT_RSP:
+	case CAIF_CTRLCMD_INIT_FAIL_RSP:
+		service->open = false;
+		layr->up->ctrlcmd(layr->up, ctrl, phyid);
+		break;
+	case _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND:
+		if (phyid != service->dev_info.id)
+			break;
+		if (service->modem_flow_on)
+			layr->up->ctrlcmd(layr->up,
+					  CAIF_CTRLCMD_FLOW_OFF_IND, phyid);
+		service->phy_flow_on = false;
+		break;
+	case _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND:
+		if (phyid != service->dev_info.id)
+			return;
+		if (service->modem_flow_on) {
+			layr->up->ctrlcmd(layr->up,
+					   CAIF_CTRLCMD_FLOW_ON_IND,
+					   phyid);
+		}
+		service->phy_flow_on = true;
+		break;
+	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		if (service->phy_flow_on) {
+			layr->up->ctrlcmd(layr->up,
+					  CAIF_CTRLCMD_FLOW_OFF_IND, phyid);
+		}
+		service->modem_flow_on = false;
+		break;
+	case CAIF_CTRLCMD_FLOW_ON_IND:
+		if (service->phy_flow_on) {
+			layr->up->ctrlcmd(layr->up,
+					  CAIF_CTRLCMD_FLOW_ON_IND, phyid);
+		}
+		service->modem_flow_on = true;
+		break;
+	case _CAIF_CTRLCMD_PHYIF_DOWN_IND:
+		/* In case interface is down, let's fake a remove shutdown */
+		layr->up->ctrlcmd(layr->up,
+				CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, phyid);
+		break;
+	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+		layr->up->ctrlcmd(layr->up, ctrl, phyid);
+		break;
+	default:
+		pr_warning("CAIF: %s(): "
+			   "Unexpected ctrl in cfsrvl (%d)\n", __func__, ctrl);
+		/* We have both modem and phy flow on, send flow on */
+		layr->up->ctrlcmd(layr->up, ctrl, phyid);
+		service->phy_flow_on = true;
+		break;
+	}
+}
+
+static int cfservl_modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
+{
+	struct cfsrvl *service = container_obj(layr);
+	caif_assert(layr != NULL);
+	caif_assert(layr->dn != NULL);
+	caif_assert(layr->dn->transmit != NULL);
+	switch (ctrl) {
+	case CAIF_MODEMCMD_FLOW_ON_REQ:
+		{
+			struct cfpkt *pkt;
+			struct caif_payload_info *info;
+			u8 flow_on = SRVL_FLOW_ON;
+			pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
+			if (!pkt) {
+				pr_warning("CAIF: %s(): Out of memory\n",
+					__func__);
+				return -ENOMEM;
+			}
+
+			if (cfpkt_add_head(pkt, &flow_on, 1) < 0) {
+				pr_err("CAIF: %s(): Packet is erroneous!\n",
+					__func__);
+				cfpkt_destroy(pkt);
+				return -EPROTO;
+			}
+			info = cfpkt_info(pkt);
+			info->channel_id = service->layer.id;
+			info->hdr_len = 1;
+			info->dev_info = &service->dev_info;
+			return layr->dn->transmit(layr->dn, pkt);
+		}
+	case CAIF_MODEMCMD_FLOW_OFF_REQ:
+		{
+			struct cfpkt *pkt;
+			struct caif_payload_info *info;
+			u8 flow_off = SRVL_FLOW_OFF;
+			pkt = cfpkt_create(SRVL_CTRL_PKT_SIZE);
+			if (cfpkt_add_head(pkt, &flow_off, 1) < 0) {
+				pr_err("CAIF: %s(): Packet is erroneous!\n",
+					__func__);
+				cfpkt_destroy(pkt);
+				return -EPROTO;
+			}
+			info = cfpkt_info(pkt);
+			info->channel_id = service->layer.id;
+			info->hdr_len = 1;
+			info->dev_info = &service->dev_info;
+			return layr->dn->transmit(layr->dn, pkt);
+		}
+	default:
+	  break;
+	}
+	return -EINVAL;
+}
+
+void cfservl_destroy(struct cflayer *layer)
+{
+	kfree(layer);
+}
+
+void cfsrvl_init(struct cfsrvl *service,
+		 u8 channel_id,
+		 struct dev_info *dev_info)
+{
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	service->open = false;
+	service->modem_flow_on = true;
+	service->phy_flow_on = true;
+	service->layer.id = channel_id;
+	service->layer.ctrlcmd = cfservl_ctrlcmd;
+	service->layer.modemcmd = cfservl_modemcmd;
+	service->dev_info = *dev_info;
+}
+
+bool cfsrvl_ready(struct cfsrvl *service, int *err)
+{
+	if (service->open && service->modem_flow_on && service->phy_flow_on)
+		return true;
+	if (!service->open) {
+		*err = -ENOTCONN;
+		return false;
+	}
+	caif_assert(!(service->modem_flow_on && service->phy_flow_on));
+	*err = -EAGAIN;
+	return false;
+}
+u8 cfsrvl_getphyid(struct cflayer *layer)
+{
+	struct cfsrvl *servl = container_obj(layer);
+	return servl->dev_info.id;
+}
+
+bool cfsrvl_phyid_match(struct cflayer *layer, int phyid)
+{
+	struct cfsrvl *servl = container_obj(layer);
+	return servl->dev_info.id == phyid;
+}
diff --git a/net/caif/cfutill.c b/net/caif/cfutill.c
new file mode 100644
index 000000000000..5fd2c9ea8b42
--- /dev/null
+++ b/net/caif/cfutill.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define container_obj(layr) ((struct cfsrvl *) layr)
+#define UTIL_PAYLOAD  0x00
+#define UTIL_CMD_BIT  0x80
+#define UTIL_REMOTE_SHUTDOWN 0x82
+#define UTIL_FLOW_OFF 0x81
+#define UTIL_FLOW_ON  0x80
+#define UTIL_CTRL_PKT_SIZE 1
+static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt);
+
+struct cflayer *cfutill_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *util = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!util) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	memset(util, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(util, channel_id, dev_info);
+	util->layer.receive = cfutill_receive;
+	util->layer.transmit = cfutill_transmit;
+	snprintf(util->layer.name, CAIF_LAYER_NAME_SZ - 1, "util1");
+	return &util->layer;
+}
+
+static int cfutill_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 cmd = -1;
+	struct cfsrvl *service = container_obj(layr);
+	caif_assert(layr != NULL);
+	caif_assert(layr->up != NULL);
+	caif_assert(layr->up->receive != NULL);
+	caif_assert(layr->up->ctrlcmd != NULL);
+	if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+
+	switch (cmd) {
+	case UTIL_PAYLOAD:
+		return layr->up->receive(layr->up, pkt);
+	case UTIL_FLOW_OFF:
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	case UTIL_FLOW_ON:
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	case UTIL_REMOTE_SHUTDOWN:	/* Remote Shutdown Request */
+		pr_err("CAIF: %s(): REMOTE SHUTDOWN REQUEST RECEIVED\n",
+			__func__);
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND, 0);
+		service->open = false;
+		cfpkt_destroy(pkt);
+		return 0;
+	default:
+		cfpkt_destroy(pkt);
+		pr_warning("CAIF: %s(): Unknown service control %d (0x%x)\n",
+			   __func__, cmd, cmd);
+		return -EPROTO;
+	}
+}
+
+static int cfutill_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 zero = 0;
+	struct caif_payload_info *info;
+	int ret;
+	struct cfsrvl *service = container_obj(layr);
+	caif_assert(layr != NULL);
+	caif_assert(layr->dn != NULL);
+	caif_assert(layr->dn->transmit != NULL);
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+
+	if (cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+		pr_err("CAIF: %s(): packet too large size=%d\n",
+			__func__, cfpkt_getlen(pkt));
+		return -EOVERFLOW;
+	}
+
+	cfpkt_add_head(pkt, &zero, 1);
+	/* Add info for MUX-layer to route the packet out. */
+	info = cfpkt_info(pkt);
+	info->channel_id = service->layer.id;
+	/*
+	 * To optimize alignment, we add up the size of CAIF header before
+	 * payload.
+	 */
+	info->hdr_len = 1;
+	info->dev_info = &service->dev_info;
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0) {
+		u32 tmp32;
+		cfpkt_extr_head(pkt, &tmp32, 4);
+	}
+	return ret;
+}
diff --git a/net/caif/cfveil.c b/net/caif/cfveil.c
new file mode 100644
index 000000000000..0fd827f49491
--- /dev/null
+++ b/net/caif/cfveil.c
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/stddef.h>
+#include <linux/slab.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define VEI_PAYLOAD  0x00
+#define VEI_CMD_BIT  0x80
+#define VEI_FLOW_OFF 0x81
+#define VEI_FLOW_ON  0x80
+#define VEI_SET_PIN  0x82
+#define VEI_CTRL_PKT_SIZE 1
+#define container_obj(layr) container_of(layr, struct cfsrvl, layer)
+
+static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt);
+
+struct cflayer *cfvei_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *vei = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!vei) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+	memset(vei, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(vei, channel_id, dev_info);
+	vei->layer.receive = cfvei_receive;
+	vei->layer.transmit = cfvei_transmit;
+	snprintf(vei->layer.name, CAIF_LAYER_NAME_SZ - 1, "vei%d", channel_id);
+	return &vei->layer;
+}
+
+static int cfvei_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 cmd;
+	int ret;
+	caif_assert(layr->up != NULL);
+	caif_assert(layr->receive != NULL);
+	caif_assert(layr->ctrlcmd != NULL);
+
+
+	if (cfpkt_extr_head(pkt, &cmd, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+	switch (cmd) {
+	case VEI_PAYLOAD:
+		ret = layr->up->receive(layr->up, pkt);
+		return ret;
+	case VEI_FLOW_OFF:
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_OFF_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	case VEI_FLOW_ON:
+		layr->ctrlcmd(layr, CAIF_CTRLCMD_FLOW_ON_IND, 0);
+		cfpkt_destroy(pkt);
+		return 0;
+	case VEI_SET_PIN:	/* SET RS232 PIN */
+		cfpkt_destroy(pkt);
+		return 0;
+	default:		/* SET RS232 PIN */
+		pr_warning("CAIF: %s():Unknown VEI control packet %d (0x%x)!\n",
+			   __func__, cmd, cmd);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+}
+
+static int cfvei_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u8 tmp = 0;
+	struct caif_payload_info *info;
+	int ret;
+	struct cfsrvl *service = container_obj(layr);
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+	caif_assert(layr->dn != NULL);
+	caif_assert(layr->dn->transmit != NULL);
+	if (!cfpkt_getlen(pkt) > CAIF_MAX_PAYLOAD_SIZE) {
+		pr_warning("CAIF: %s(): Packet too large - size=%d\n",
+			   __func__, cfpkt_getlen(pkt));
+		return -EOVERFLOW;
+	}
+
+	if (cfpkt_add_head(pkt, &tmp, 1) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		return -EPROTO;
+	}
+
+	/* Add info-> for MUX-layer to route the packet out. */
+	info = cfpkt_info(pkt);
+	info->channel_id = service->layer.id;
+	info->hdr_len = 1;
+	info->dev_info = &service->dev_info;
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0)
+		cfpkt_extr_head(pkt, &tmp, 1);
+	return ret;
+}
diff --git a/net/caif/cfvidl.c b/net/caif/cfvidl.c
new file mode 100644
index 000000000000..89ad4ea239f1
--- /dev/null
+++ b/net/caif/cfvidl.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfsrvl.h>
+#include <net/caif/cfpkt.h>
+
+#define container_obj(layr) ((struct cfsrvl *) layr)
+
+static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt);
+static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt);
+
+struct cflayer *cfvidl_create(u8 channel_id, struct dev_info *dev_info)
+{
+	struct cfsrvl *vid = kmalloc(sizeof(struct cfsrvl), GFP_ATOMIC);
+	if (!vid) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	caif_assert(offsetof(struct cfsrvl, layer) == 0);
+
+	memset(vid, 0, sizeof(struct cfsrvl));
+	cfsrvl_init(vid, channel_id, dev_info);
+	vid->layer.receive = cfvidl_receive;
+	vid->layer.transmit = cfvidl_transmit;
+	snprintf(vid->layer.name, CAIF_LAYER_NAME_SZ - 1, "vid1");
+	return &vid->layer;
+}
+
+static int cfvidl_receive(struct cflayer *layr, struct cfpkt *pkt)
+{
+	u32 videoheader;
+	if (cfpkt_extr_head(pkt, &videoheader, 4) < 0) {
+		pr_err("CAIF: %s(): Packet is erroneous!\n", __func__);
+		cfpkt_destroy(pkt);
+		return -EPROTO;
+	}
+	return layr->up->receive(layr->up, pkt);
+}
+
+static int cfvidl_transmit(struct cflayer *layr, struct cfpkt *pkt)
+{
+	struct cfsrvl *service = container_obj(layr);
+	struct caif_payload_info *info;
+	u32 videoheader = 0;
+	int ret;
+	if (!cfsrvl_ready(service, &ret))
+		return ret;
+	cfpkt_add_head(pkt, &videoheader, 4);
+	/* Add info for MUX-layer to route the packet out */
+	info = cfpkt_info(pkt);
+	info->channel_id = service->layer.id;
+	info->dev_info = &service->dev_info;
+	ret = layr->dn->transmit(layr->dn, pkt);
+	if (ret < 0)
+		cfpkt_extr_head(pkt, &videoheader, 4);
+	return ret;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 15c9ac0c80e390df09ce5730a7b08b13e07a8dd5 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:24 +0000
Subject: net-caif: add CAIF generic caif support functions

Support functions for the caif protocol stack:
cfcnfg.c        - CAIF Configuration Module used for
                  adding and removing drivers and connection
cfpkt_skbuff.c  - CAIF Packet layer (SKB helper functions)

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c       | 529 ++++++++++++++++++++++++++++++++++++++++++++
 net/caif/cfpkt_skbuff.c | 571 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1100 insertions(+)
 create mode 100644 net/caif/cfcnfg.c
 create mode 100644 net/caif/cfpkt_skbuff.c

(limited to 'net')

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
new file mode 100644
index 000000000000..70a733d3d3da
--- /dev/null
+++ b/net/caif/cfcnfg.c
@@ -0,0 +1,529 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfcnfg.h>
+#include <net/caif/cfctrl.h>
+#include <net/caif/cfmuxl.h>
+#include <net/caif/cffrml.h>
+#include <net/caif/cfserl.h>
+#include <net/caif/cfsrvl.h>
+
+#include <linux/module.h>
+#include <asm/atomic.h>
+
+#define MAX_PHY_LAYERS 7
+#define PHY_NAME_LEN 20
+
+#define container_obj(layr) container_of(layr, struct cfcnfg, layer)
+
+/* Information about CAIF physical interfaces held by Config Module in order
+ * to manage physical interfaces
+ */
+struct cfcnfg_phyinfo {
+	/* Pointer to the layer below the MUX (framing layer) */
+	struct cflayer *frm_layer;
+	/* Pointer to the lowest actual physical layer */
+	struct cflayer *phy_layer;
+	/* Unique identifier of the physical interface */
+	unsigned int id;
+	/* Preference of the physical in interface */
+	enum cfcnfg_phy_preference pref;
+
+	/* Reference count, number of channels using the device */
+	int phy_ref_count;
+
+	/* Information about the physical device */
+	struct dev_info dev_info;
+};
+
+struct cfcnfg {
+	struct cflayer layer;
+	struct cflayer *ctrl;
+	struct cflayer *mux;
+	u8 last_phyid;
+	struct cfcnfg_phyinfo phy_layers[MAX_PHY_LAYERS];
+};
+
+static void cncfg_linkup_rsp(struct cflayer *layer, u8 linkid,
+			     enum cfctrl_srv serv, u8 phyid,
+			     struct cflayer *adapt_layer);
+static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
+				  struct cflayer *client_layer);
+static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
+			     struct cflayer *adapt_layer);
+static void cfctrl_resp_func(void);
+static void cfctrl_enum_resp(void);
+
+struct cfcnfg *cfcnfg_create(void)
+{
+	struct cfcnfg *this;
+	struct cfctrl_rsp *resp;
+	/* Initiate this layer */
+	this = kmalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
+	if (!this) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return NULL;
+	}
+	memset(this, 0, sizeof(struct cfcnfg));
+	this->mux = cfmuxl_create();
+	if (!this->mux)
+		goto out_of_mem;
+	this->ctrl = cfctrl_create();
+	if (!this->ctrl)
+		goto out_of_mem;
+	/* Initiate response functions */
+	resp = cfctrl_get_respfuncs(this->ctrl);
+	resp->enum_rsp = cfctrl_enum_resp;
+	resp->linkerror_ind = cfctrl_resp_func;
+	resp->linkdestroy_rsp = cncfg_linkdestroy_rsp;
+	resp->sleep_rsp = cfctrl_resp_func;
+	resp->wake_rsp = cfctrl_resp_func;
+	resp->restart_rsp = cfctrl_resp_func;
+	resp->radioset_rsp = cfctrl_resp_func;
+	resp->linksetup_rsp = cncfg_linkup_rsp;
+	resp->reject_rsp = cncfg_reject_rsp;
+
+	this->last_phyid = 1;
+
+	cfmuxl_set_uplayer(this->mux, this->ctrl, 0);
+	layer_set_dn(this->ctrl, this->mux);
+	layer_set_up(this->ctrl, this);
+	return this;
+out_of_mem:
+	pr_warning("CAIF: %s(): Out of memory\n", __func__);
+	kfree(this->mux);
+	kfree(this->ctrl);
+	kfree(this);
+	return NULL;
+}
+EXPORT_SYMBOL(cfcnfg_create);
+
+void cfcnfg_remove(struct cfcnfg *cfg)
+{
+	if (cfg) {
+		kfree(cfg->mux);
+		kfree(cfg->ctrl);
+		kfree(cfg);
+	}
+}
+
+static void cfctrl_resp_func(void)
+{
+}
+
+static void cfctrl_enum_resp(void)
+{
+}
+
+struct dev_info *cfcnfg_get_phyid(struct cfcnfg *cnfg,
+				  enum cfcnfg_phy_preference phy_pref)
+{
+	u16 i;
+
+	/* Try to match with specified preference */
+	for (i = 1; i < MAX_PHY_LAYERS; i++) {
+		if (cnfg->phy_layers[i].id == i &&
+		     cnfg->phy_layers[i].pref == phy_pref &&
+		     cnfg->phy_layers[i].frm_layer != NULL) {
+			caif_assert(cnfg->phy_layers != NULL);
+			caif_assert(cnfg->phy_layers[i].id == i);
+			return &cnfg->phy_layers[i].dev_info;
+		}
+	}
+	/* Otherwise just return something */
+	for (i = 1; i < MAX_PHY_LAYERS; i++) {
+		if (cnfg->phy_layers[i].id == i) {
+			caif_assert(cnfg->phy_layers != NULL);
+			caif_assert(cnfg->phy_layers[i].id == i);
+			return &cnfg->phy_layers[i].dev_info;
+		}
+	}
+
+	return NULL;
+}
+
+static struct cfcnfg_phyinfo *cfcnfg_get_phyinfo(struct cfcnfg *cnfg,
+							u8 phyid)
+{
+	int i;
+	/* Try to match with specified preference */
+	for (i = 0; i < MAX_PHY_LAYERS; i++)
+		if (cnfg->phy_layers[i].frm_layer != NULL &&
+		    cnfg->phy_layers[i].id == phyid)
+			return &cnfg->phy_layers[i];
+	return NULL;
+}
+
+int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
+{
+	int i;
+
+	/* Try to match with specified name */
+	for (i = 0; i < MAX_PHY_LAYERS; i++) {
+		if (cnfg->phy_layers[i].frm_layer != NULL
+		    && strcmp(cnfg->phy_layers[i].phy_layer->name,
+			      name) == 0)
+			return cnfg->phy_layers[i].frm_layer->id;
+	}
+	return 0;
+}
+
+/*
+ * NOTE: What happens on destroy failure:
+ *	 1a) No response - Too early
+ *	      This will not happen because enumerate has already
+ *	      completed.
+ *	 1b) No response - FATAL
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ *	      Modem error, response is really expected -  this
+ *	      case is not really handled.
+ *	 2) O/E-bit indicate error
+ *	      Ignored - this link is destroyed anyway.
+ *	 3) Not able to match on request
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ *	 4) Link-Error - (no response)
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ */
+
+int cfcnfg_del_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
+{
+	u8 channel_id = 0;
+	int ret = 0;
+	struct cfcnfg_phyinfo *phyinfo = NULL;
+	u8 phyid = 0;
+
+	caif_assert(adap_layer != NULL);
+	channel_id = adap_layer->id;
+	if (channel_id == 0) {
+		pr_err("CAIF: %s():adap_layer->id is 0\n", __func__);
+		ret = -ENOTCONN;
+		goto end;
+	}
+
+	if (adap_layer->dn == NULL) {
+		pr_err("CAIF: %s():adap_layer->dn is NULL\n", __func__);
+		ret = -ENODEV;
+		goto end;
+	}
+
+	if (adap_layer->dn != NULL)
+		phyid = cfsrvl_getphyid(adap_layer->dn);
+
+	phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
+	if (phyinfo == NULL) {
+		pr_warning("CAIF: %s(): No interface to send disconnect to\n",
+			   __func__);
+		ret = -ENODEV;
+		goto end;
+	}
+
+	if (phyinfo->id != phyid
+		|| phyinfo->phy_layer->id != phyid
+		|| phyinfo->frm_layer->id != phyid) {
+
+		pr_err("CAIF: %s(): Inconsistency in phy registration\n",
+			__func__);
+		ret = -EINVAL;
+		goto end;
+	}
+
+	ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
+
+end:
+	if (phyinfo != NULL && --phyinfo->phy_ref_count == 0 &&
+		phyinfo->phy_layer != NULL &&
+		phyinfo->phy_layer->modemcmd != NULL) {
+		phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
+					     _CAIF_MODEMCMD_PHYIF_USELESS);
+	}
+	return ret;
+
+}
+EXPORT_SYMBOL(cfcnfg_del_adapt_layer);
+
+static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
+				  struct cflayer *client_layer)
+{
+	struct cfcnfg *cnfg = container_obj(layer);
+	struct cflayer *servl;
+
+	/*
+	 * 1) Remove service from the MUX layer. The MUX must
+	 *    guarante that no more payload sent "upwards" (receive)
+	 */
+	servl = cfmuxl_remove_uplayer(cnfg->mux, linkid);
+
+	if (servl == NULL) {
+		pr_err("CAIF: %s(): PROTOCOL ERROR "
+		       "- Error removing service_layer Linkid(%d)",
+			__func__, linkid);
+		return;
+	}
+	caif_assert(linkid == servl->id);
+
+	if (servl != client_layer && servl->up != client_layer) {
+		pr_err("CAIF: %s(): Error removing service_layer "
+		       "Linkid(%d) %p %p",
+			__func__, linkid, (void *) servl,
+			(void *) client_layer);
+		return;
+	}
+
+	/*
+	 * 2) DEINIT_RSP must guarantee that no more packets are transmitted
+	 *    from client (adap_layer) when it returns.
+	 */
+
+	if (servl->ctrlcmd == NULL) {
+		pr_err("CAIF: %s(): Error servl->ctrlcmd == NULL", __func__);
+		return;
+	}
+
+	servl->ctrlcmd(servl, CAIF_CTRLCMD_DEINIT_RSP, 0);
+
+	/* 3) It is now safe to destroy the service layer. */
+	cfservl_destroy(servl);
+}
+
+/*
+ * NOTE: What happens on linksetup failure:
+ *	 1a) No response - Too early
+ *	      This will not happen because enumerate is secured
+ *	      before using interface.
+ *	 1b) No response - FATAL
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ *	      Modem error, response is really expected -  this case is
+ *	      not really handled.
+ *	 2) O/E-bit indicate error
+ *	      Handled in cnfg_reject_rsp
+ *	 3) Not able to match on request
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ *	 4) Link-Error - (no response)
+ *	      Not handled, but this should be a CAIF PROTOCOL ERROR
+ */
+
+int
+cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
+				struct cfctrl_link_param *param,
+				struct cflayer *adap_layer)
+{
+	struct cflayer *frml;
+	if (adap_layer == NULL) {
+		pr_err("CAIF: %s(): adap_layer is zero", __func__);
+		return -EINVAL;
+	}
+	if (adap_layer->receive == NULL) {
+		pr_err("CAIF: %s(): adap_layer->receive is NULL", __func__);
+		return -EINVAL;
+	}
+	if (adap_layer->ctrlcmd == NULL) {
+		pr_err("CAIF: %s(): adap_layer->ctrlcmd == NULL", __func__);
+		return -EINVAL;
+	}
+	frml = cnfg->phy_layers[param->phyid].frm_layer;
+	if (frml == NULL) {
+		pr_err("CAIF: %s(): Specified PHY type does not exist!",
+			__func__);
+		return -ENODEV;
+	}
+	caif_assert(param->phyid == cnfg->phy_layers[param->phyid].id);
+	caif_assert(cnfg->phy_layers[param->phyid].frm_layer->id ==
+		     param->phyid);
+	caif_assert(cnfg->phy_layers[param->phyid].phy_layer->id ==
+		     param->phyid);
+	/* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
+	cfctrl_enum_req(cnfg->ctrl, param->phyid);
+	cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
+	return 0;
+}
+EXPORT_SYMBOL(cfcnfg_add_adaptation_layer);
+
+static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
+			     struct cflayer *adapt_layer)
+{
+	if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
+		adapt_layer->ctrlcmd(adapt_layer,
+				     CAIF_CTRLCMD_INIT_FAIL_RSP, 0);
+}
+
+static void
+cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv,
+		 u8 phyid, struct cflayer *adapt_layer)
+{
+	struct cfcnfg *cnfg = container_obj(layer);
+	struct cflayer *servicel = NULL;
+	struct cfcnfg_phyinfo *phyinfo;
+	if (adapt_layer == NULL) {
+		pr_err("CAIF: %s(): PROTOCOL ERROR "
+			"- LinkUp Request/Response did not match\n", __func__);
+		return;
+	}
+
+	caif_assert(cnfg != NULL);
+	caif_assert(phyid != 0);
+	phyinfo = &cnfg->phy_layers[phyid];
+	caif_assert(phyinfo != NULL);
+	caif_assert(phyinfo->id == phyid);
+	caif_assert(phyinfo->phy_layer != NULL);
+	caif_assert(phyinfo->phy_layer->id == phyid);
+
+	if (phyinfo != NULL &&
+	    phyinfo->phy_ref_count++ == 0 &&
+	    phyinfo->phy_layer != NULL &&
+	    phyinfo->phy_layer->modemcmd != NULL) {
+		caif_assert(phyinfo->phy_layer->id == phyid);
+		phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
+					     _CAIF_MODEMCMD_PHYIF_USEFULL);
+
+	}
+	adapt_layer->id = linkid;
+
+	switch (serv) {
+	case CFCTRL_SRV_VEI:
+		servicel = cfvei_create(linkid, &phyinfo->dev_info);
+		break;
+	case CFCTRL_SRV_DATAGRAM:
+		servicel = cfdgml_create(linkid, &phyinfo->dev_info);
+		break;
+	case CFCTRL_SRV_RFM:
+		servicel = cfrfml_create(linkid, &phyinfo->dev_info);
+		break;
+	case CFCTRL_SRV_UTIL:
+		servicel = cfutill_create(linkid, &phyinfo->dev_info);
+		break;
+	case CFCTRL_SRV_VIDEO:
+		servicel = cfvidl_create(linkid, &phyinfo->dev_info);
+		break;
+	case CFCTRL_SRV_DBG:
+		servicel = cfdbgl_create(linkid, &phyinfo->dev_info);
+		break;
+	default:
+		pr_err("CAIF: %s(): Protocol error. "
+			"Link setup response - unknown channel type\n",
+			__func__);
+		return;
+	}
+	if (!servicel) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	layer_set_dn(servicel, cnfg->mux);
+	cfmuxl_set_uplayer(cnfg->mux, servicel, linkid);
+	layer_set_up(servicel, adapt_layer);
+	layer_set_dn(adapt_layer, servicel);
+	servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0);
+}
+
+void
+cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
+		     void *dev, struct cflayer *phy_layer, u16 *phyid,
+		     enum cfcnfg_phy_preference pref,
+		     bool fcs, bool stx)
+{
+	struct cflayer *frml;
+	struct cflayer *phy_driver = NULL;
+	int i;
+
+
+	if (cnfg->phy_layers[cnfg->last_phyid].frm_layer == NULL) {
+		*phyid = cnfg->last_phyid;
+
+		/* range: * 1..(MAX_PHY_LAYERS-1) */
+		cnfg->last_phyid =
+		    (cnfg->last_phyid % (MAX_PHY_LAYERS - 1)) + 1;
+	} else {
+		*phyid = 0;
+		for (i = 1; i < MAX_PHY_LAYERS; i++) {
+			if (cnfg->phy_layers[i].frm_layer == NULL) {
+				*phyid = i;
+				break;
+			}
+		}
+	}
+	if (*phyid == 0) {
+		pr_err("CAIF: %s(): No Available PHY ID\n", __func__);
+		return;
+	}
+
+	switch (phy_type) {
+	case CFPHYTYPE_FRAG:
+		phy_driver =
+		    cfserl_create(CFPHYTYPE_FRAG, *phyid, stx);
+		if (!phy_driver) {
+			pr_warning("CAIF: %s(): Out of memory\n", __func__);
+			return;
+		}
+
+		break;
+	case CFPHYTYPE_CAIF:
+		phy_driver = NULL;
+		break;
+	default:
+		pr_err("CAIF: %s(): %d", __func__, phy_type);
+		return;
+		break;
+	}
+
+	phy_layer->id = *phyid;
+	cnfg->phy_layers[*phyid].pref = pref;
+	cnfg->phy_layers[*phyid].id = *phyid;
+	cnfg->phy_layers[*phyid].dev_info.id = *phyid;
+	cnfg->phy_layers[*phyid].dev_info.dev = dev;
+	cnfg->phy_layers[*phyid].phy_layer = phy_layer;
+	cnfg->phy_layers[*phyid].phy_ref_count = 0;
+	phy_layer->type = phy_type;
+	frml = cffrml_create(*phyid, fcs);
+	if (!frml) {
+		pr_warning("CAIF: %s(): Out of memory\n", __func__);
+		return;
+	}
+	cnfg->phy_layers[*phyid].frm_layer = frml;
+	cfmuxl_set_dnlayer(cnfg->mux, frml, *phyid);
+	layer_set_up(frml, cnfg->mux);
+
+	if (phy_driver != NULL) {
+		phy_driver->id = *phyid;
+		layer_set_dn(frml, phy_driver);
+		layer_set_up(phy_driver, frml);
+		layer_set_dn(phy_driver, phy_layer);
+		layer_set_up(phy_layer, phy_driver);
+	} else {
+		layer_set_dn(frml, phy_layer);
+		layer_set_up(phy_layer, frml);
+	}
+}
+EXPORT_SYMBOL(cfcnfg_add_phy_layer);
+
+int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer)
+{
+	struct cflayer *frml, *frml_dn;
+	u16 phyid;
+	phyid = phy_layer->id;
+	caif_assert(phyid == cnfg->phy_layers[phyid].id);
+	caif_assert(phy_layer == cnfg->phy_layers[phyid].phy_layer);
+	caif_assert(phy_layer->id == phyid);
+	caif_assert(cnfg->phy_layers[phyid].frm_layer->id == phyid);
+
+	memset(&cnfg->phy_layers[phy_layer->id], 0,
+	       sizeof(struct cfcnfg_phyinfo));
+	frml = cfmuxl_remove_dnlayer(cnfg->mux, phy_layer->id);
+	frml_dn = frml->dn;
+	cffrml_set_uplayer(frml, NULL);
+	cffrml_set_dnlayer(frml, NULL);
+	kfree(frml);
+
+	if (phy_layer != frml_dn) {
+		layer_set_up(frml_dn, NULL);
+		layer_set_dn(frml_dn, NULL);
+		kfree(frml_dn);
+	}
+	layer_set_up(phy_layer, NULL);
+	return 0;
+}
+EXPORT_SYMBOL(cfcnfg_del_phy_layer);
diff --git a/net/caif/cfpkt_skbuff.c b/net/caif/cfpkt_skbuff.c
new file mode 100644
index 000000000000..83fff2ff6658
--- /dev/null
+++ b/net/caif/cfpkt_skbuff.c
@@ -0,0 +1,571 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/string.h>
+#include <linux/skbuff.h>
+#include <linux/hardirq.h>
+#include <net/caif/cfpkt.h>
+
+#define PKT_PREFIX CAIF_NEEDED_HEADROOM
+#define PKT_POSTFIX CAIF_NEEDED_TAILROOM
+#define PKT_LEN_WHEN_EXTENDING 128
+#define PKT_ERROR(pkt, errmsg) do {	   \
+    cfpkt_priv(pkt)->erronous = true;	   \
+    skb_reset_tail_pointer(&pkt->skb);	   \
+    pr_warning("CAIF: " errmsg);\
+  } while (0)
+
+struct cfpktq {
+	struct sk_buff_head head;
+	atomic_t count;
+	/* Lock protects count updates */
+	spinlock_t lock;
+};
+
+/*
+ * net/caif/ is generic and does not
+ * understand SKB, so we do this typecast
+ */
+struct cfpkt {
+	struct sk_buff skb;
+};
+
+/* Private data inside SKB */
+struct cfpkt_priv_data {
+	struct dev_info dev_info;
+	bool erronous;
+};
+
+inline struct cfpkt_priv_data *cfpkt_priv(struct cfpkt *pkt)
+{
+	return (struct cfpkt_priv_data *) pkt->skb.cb;
+}
+
+inline bool is_erronous(struct cfpkt *pkt)
+{
+	return cfpkt_priv(pkt)->erronous;
+}
+
+inline struct sk_buff *pkt_to_skb(struct cfpkt *pkt)
+{
+	return &pkt->skb;
+}
+
+inline struct cfpkt *skb_to_pkt(struct sk_buff *skb)
+{
+	return (struct cfpkt *) skb;
+}
+
+
+struct cfpkt *cfpkt_fromnative(enum caif_direction dir, void *nativepkt)
+{
+	struct cfpkt *pkt = skb_to_pkt(nativepkt);
+	cfpkt_priv(pkt)->erronous = false;
+	return pkt;
+}
+EXPORT_SYMBOL(cfpkt_fromnative);
+
+void *cfpkt_tonative(struct cfpkt *pkt)
+{
+	return (void *) pkt;
+}
+EXPORT_SYMBOL(cfpkt_tonative);
+
+static struct cfpkt *cfpkt_create_pfx(u16 len, u16 pfx)
+{
+	struct sk_buff *skb;
+
+	if (likely(in_interrupt()))
+		skb = alloc_skb(len + pfx, GFP_ATOMIC);
+	else
+		skb = alloc_skb(len + pfx, GFP_KERNEL);
+
+	if (unlikely(skb == NULL))
+		return NULL;
+
+	skb_reserve(skb, pfx);
+	return skb_to_pkt(skb);
+}
+
+inline struct cfpkt *cfpkt_create(u16 len)
+{
+	return cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
+}
+EXPORT_SYMBOL(cfpkt_create);
+
+void cfpkt_destroy(struct cfpkt *pkt)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	kfree_skb(skb);
+}
+EXPORT_SYMBOL(cfpkt_destroy);
+
+inline bool cfpkt_more(struct cfpkt *pkt)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	return skb->len > 0;
+}
+EXPORT_SYMBOL(cfpkt_more);
+
+int cfpkt_peek_head(struct cfpkt *pkt, void *data, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	if (skb_headlen(skb) >= len) {
+		memcpy(data, skb->data, len);
+		return 0;
+	}
+	return !cfpkt_extr_head(pkt, data, len) &&
+	    !cfpkt_add_head(pkt, data, len);
+}
+EXPORT_SYMBOL(cfpkt_peek_head);
+
+int cfpkt_extr_head(struct cfpkt *pkt, void *data, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	u8 *from;
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+
+	if (unlikely(len > skb->len)) {
+		PKT_ERROR(pkt, "cfpkt_extr_head read beyond end of packet\n");
+		return -EPROTO;
+	}
+
+	if (unlikely(len > skb_headlen(skb))) {
+		if (unlikely(skb_linearize(skb) != 0)) {
+			PKT_ERROR(pkt, "cfpkt_extr_head linearize failed\n");
+			return -EPROTO;
+		}
+	}
+	from = skb_pull(skb, len);
+	from -= len;
+	memcpy(data, from, len);
+	return 0;
+}
+EXPORT_SYMBOL(cfpkt_extr_head);
+
+int cfpkt_extr_trail(struct cfpkt *pkt, void *dta, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	u8 *data = dta;
+	u8 *from;
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+
+	if (unlikely(skb_linearize(skb) != 0)) {
+		PKT_ERROR(pkt, "cfpkt_extr_trail linearize failed\n");
+		return -EPROTO;
+	}
+	if (unlikely(skb->data + len > skb_tail_pointer(skb))) {
+		PKT_ERROR(pkt, "cfpkt_extr_trail read beyond end of packet\n");
+		return -EPROTO;
+	}
+	from = skb_tail_pointer(skb) - len;
+	skb_trim(skb, skb->len - len);
+	memcpy(data, from, len);
+	return 0;
+}
+EXPORT_SYMBOL(cfpkt_extr_trail);
+
+int cfpkt_pad_trail(struct cfpkt *pkt, u16 len)
+{
+	return cfpkt_add_body(pkt, NULL, len);
+}
+EXPORT_SYMBOL(cfpkt_pad_trail);
+
+int cfpkt_add_body(struct cfpkt *pkt, const void *data, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	struct sk_buff *lastskb;
+	u8 *to;
+	u16 addlen = 0;
+
+
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+
+	lastskb = skb;
+
+	/* Check whether we need to add space at the tail */
+	if (unlikely(skb_tailroom(skb) < len)) {
+		if (likely(len < PKT_LEN_WHEN_EXTENDING))
+			addlen = PKT_LEN_WHEN_EXTENDING;
+		else
+			addlen = len;
+	}
+
+	/* Check whether we need to change the SKB before writing to the tail */
+	if (unlikely((addlen > 0) || skb_cloned(skb) || skb_shared(skb))) {
+
+		/* Make sure data is writable */
+		if (unlikely(skb_cow_data(skb, addlen, &lastskb) < 0)) {
+			PKT_ERROR(pkt, "cfpkt_add_body: cow failed\n");
+			return -EPROTO;
+		}
+		/*
+		 * Is the SKB non-linear after skb_cow_data()? If so, we are
+		 * going to add data to the last SKB, so we need to adjust
+		 * lengths of the top SKB.
+		 */
+		if (lastskb != skb) {
+			pr_warning("CAIF: %s(): Packet is non-linear\n",
+				   __func__);
+			skb->len += len;
+			skb->data_len += len;
+		}
+	}
+
+	/* All set to put the last SKB and optionally write data there. */
+	to = skb_put(lastskb, len);
+	if (likely(data))
+		memcpy(to, data, len);
+	return 0;
+}
+EXPORT_SYMBOL(cfpkt_add_body);
+
+inline int cfpkt_addbdy(struct cfpkt *pkt, u8 data)
+{
+	return cfpkt_add_body(pkt, &data, 1);
+}
+EXPORT_SYMBOL(cfpkt_addbdy);
+
+int cfpkt_add_head(struct cfpkt *pkt, const void *data2, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	struct sk_buff *lastskb;
+	u8 *to;
+	const u8 *data = data2;
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+	if (unlikely(skb_headroom(skb) < len)) {
+		PKT_ERROR(pkt, "cfpkt_add_head: no headroom\n");
+		return -EPROTO;
+	}
+
+	/* Make sure data is writable */
+	if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
+		PKT_ERROR(pkt, "cfpkt_add_head: cow failed\n");
+		return -EPROTO;
+	}
+
+	to = skb_push(skb, len);
+	memcpy(to, data, len);
+	return 0;
+}
+EXPORT_SYMBOL(cfpkt_add_head);
+
+inline int cfpkt_add_trail(struct cfpkt *pkt, const void *data, u16 len)
+{
+	return cfpkt_add_body(pkt, data, len);
+}
+EXPORT_SYMBOL(cfpkt_add_trail);
+
+inline u16 cfpkt_getlen(struct cfpkt *pkt)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	return skb->len;
+}
+EXPORT_SYMBOL(cfpkt_getlen);
+
+inline u16 cfpkt_iterate(struct cfpkt *pkt,
+			    u16 (*iter_func)(u16, void *, u16),
+			    u16 data)
+{
+	/*
+	 * Don't care about the performance hit of linearizing,
+	 * Checksum should not be used on high-speed interfaces anyway.
+	 */
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+	if (unlikely(skb_linearize(&pkt->skb) != 0)) {
+		PKT_ERROR(pkt, "cfpkt_iterate: linearize failed\n");
+		return -EPROTO;
+	}
+	return iter_func(data, pkt->skb.data, cfpkt_getlen(pkt));
+}
+EXPORT_SYMBOL(cfpkt_iterate);
+
+int cfpkt_setlen(struct cfpkt *pkt, u16 len)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+
+
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+
+	if (likely(len <= skb->len)) {
+		if (unlikely(skb->data_len))
+			___pskb_trim(skb, len);
+		else
+			skb_trim(skb, len);
+
+			return cfpkt_getlen(pkt);
+	}
+
+	/* Need to expand SKB */
+	if (unlikely(!cfpkt_pad_trail(pkt, len - skb->len)))
+		PKT_ERROR(pkt, "cfpkt_setlen: skb_pad_trail failed\n");
+
+	return cfpkt_getlen(pkt);
+}
+EXPORT_SYMBOL(cfpkt_setlen);
+
+struct cfpkt *cfpkt_create_uplink(const unsigned char *data, unsigned int len)
+{
+	struct cfpkt *pkt = cfpkt_create_pfx(len + PKT_POSTFIX, PKT_PREFIX);
+	if (unlikely(data != NULL))
+		cfpkt_add_body(pkt, data, len);
+	return pkt;
+}
+EXPORT_SYMBOL(cfpkt_create_uplink);
+
+struct cfpkt *cfpkt_append(struct cfpkt *dstpkt,
+			     struct cfpkt *addpkt,
+			     u16 expectlen)
+{
+	struct sk_buff *dst = pkt_to_skb(dstpkt);
+	struct sk_buff *add = pkt_to_skb(addpkt);
+	u16 addlen = skb_headlen(add);
+	u16 neededtailspace;
+	struct sk_buff *tmp;
+	u16 dstlen;
+	u16 createlen;
+	if (unlikely(is_erronous(dstpkt) || is_erronous(addpkt))) {
+		cfpkt_destroy(addpkt);
+		return dstpkt;
+	}
+	if (expectlen > addlen)
+		neededtailspace = expectlen;
+	else
+		neededtailspace = addlen;
+
+	if (dst->tail + neededtailspace > dst->end) {
+		/* Create a dumplicate of 'dst' with more tail space */
+		dstlen = skb_headlen(dst);
+		createlen = dstlen + neededtailspace;
+		tmp = pkt_to_skb(
+			cfpkt_create(createlen + PKT_PREFIX + PKT_POSTFIX));
+		if (!tmp)
+			return NULL;
+		skb_set_tail_pointer(tmp, dstlen);
+		tmp->len = dstlen;
+		memcpy(tmp->data, dst->data, dstlen);
+		cfpkt_destroy(dstpkt);
+		dst = tmp;
+	}
+	memcpy(skb_tail_pointer(dst), add->data, skb_headlen(add));
+	cfpkt_destroy(addpkt);
+	dst->tail += addlen;
+	dst->len += addlen;
+	return skb_to_pkt(dst);
+}
+EXPORT_SYMBOL(cfpkt_append);
+
+struct cfpkt *cfpkt_split(struct cfpkt *pkt, u16 pos)
+{
+	struct sk_buff *skb2;
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	u8 *split = skb->data + pos;
+	u16 len2nd = skb_tail_pointer(skb) - split;
+
+	if (unlikely(is_erronous(pkt)))
+		return NULL;
+
+	if (skb->data + pos > skb_tail_pointer(skb)) {
+		PKT_ERROR(pkt,
+			  "cfpkt_split: trying to split beyond end of packet");
+		return NULL;
+	}
+
+	/* Create a new packet for the second part of the data */
+	skb2 = pkt_to_skb(
+		cfpkt_create_pfx(len2nd + PKT_PREFIX + PKT_POSTFIX,
+				 PKT_PREFIX));
+
+	if (skb2 == NULL)
+		return NULL;
+
+	/* Reduce the length of the original packet */
+	skb_set_tail_pointer(skb, pos);
+	skb->len = pos;
+
+	memcpy(skb2->data, split, len2nd);
+	skb2->tail += len2nd;
+	skb2->len += len2nd;
+	return skb_to_pkt(skb2);
+}
+EXPORT_SYMBOL(cfpkt_split);
+
+char *cfpkt_log_pkt(struct cfpkt *pkt, char *buf, int buflen)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	char *p = buf;
+	int i;
+
+	/*
+	 * Sanity check buffer length, it needs to be at least as large as
+	 * the header info: ~=50+ bytes
+	 */
+	if (buflen < 50)
+		return NULL;
+
+	snprintf(buf, buflen, "%s: pkt:%p len:%ld(%ld+%ld) {%ld,%ld} data: [",
+		is_erronous(pkt) ? "ERRONOUS-SKB" :
+		 (skb->data_len != 0 ? "COMPLEX-SKB" : "SKB"),
+		 skb,
+		 (long) skb->len,
+		 (long) (skb_tail_pointer(skb) - skb->data),
+		 (long) skb->data_len,
+		 (long) (skb->data - skb->head),
+		 (long) (skb_tail_pointer(skb) - skb->head));
+	p = buf + strlen(buf);
+
+	for (i = 0; i < skb_tail_pointer(skb) - skb->data && i < 300; i++) {
+		if (p > buf + buflen - 10) {
+			sprintf(p, "...");
+			p = buf + strlen(buf);
+			break;
+		}
+		sprintf(p, "%02x,", skb->data[i]);
+		p = buf + strlen(buf);
+	}
+	sprintf(p, "]\n");
+	return buf;
+}
+EXPORT_SYMBOL(cfpkt_log_pkt);
+
+int cfpkt_raw_append(struct cfpkt *pkt, void **buf, unsigned int buflen)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+	struct sk_buff *lastskb;
+
+	caif_assert(buf != NULL);
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+	/* Make sure SKB is writable */
+	if (unlikely(skb_cow_data(skb, 0, &lastskb) < 0)) {
+		PKT_ERROR(pkt, "cfpkt_raw_append: skb_cow_data failed\n");
+		return -EPROTO;
+	}
+
+	if (unlikely(skb_linearize(skb) != 0)) {
+		PKT_ERROR(pkt, "cfpkt_raw_append: linearize failed\n");
+		return -EPROTO;
+	}
+
+	if (unlikely(skb_tailroom(skb) < buflen)) {
+		PKT_ERROR(pkt, "cfpkt_raw_append: buffer too short - failed\n");
+		return -EPROTO;
+	}
+
+	*buf = skb_put(skb, buflen);
+	return 1;
+}
+EXPORT_SYMBOL(cfpkt_raw_append);
+
+int cfpkt_raw_extract(struct cfpkt *pkt, void **buf, unsigned int buflen)
+{
+	struct sk_buff *skb = pkt_to_skb(pkt);
+
+	caif_assert(buf != NULL);
+	if (unlikely(is_erronous(pkt)))
+		return -EPROTO;
+
+	if (unlikely(buflen > skb->len)) {
+		PKT_ERROR(pkt, "cfpkt_raw_extract: buflen too large "
+				"- failed\n");
+		return -EPROTO;
+	}
+
+	if (unlikely(buflen > skb_headlen(skb))) {
+		if (unlikely(skb_linearize(skb) != 0)) {
+			PKT_ERROR(pkt, "cfpkt_raw_extract: linearize failed\n");
+			return -EPROTO;
+		}
+	}
+
+	*buf = skb->data;
+	skb_pull(skb, buflen);
+
+	return 1;
+}
+EXPORT_SYMBOL(cfpkt_raw_extract);
+
+inline bool cfpkt_erroneous(struct cfpkt *pkt)
+{
+	return cfpkt_priv(pkt)->erronous;
+}
+EXPORT_SYMBOL(cfpkt_erroneous);
+
+struct cfpktq *cfpktq_create(void)
+{
+	struct cfpktq *q = kmalloc(sizeof(struct cfpktq), GFP_ATOMIC);
+	if (!q)
+		return NULL;
+	skb_queue_head_init(&q->head);
+	atomic_set(&q->count, 0);
+	spin_lock_init(&q->lock);
+	return q;
+}
+EXPORT_SYMBOL(cfpktq_create);
+
+void cfpkt_queue(struct cfpktq *pktq, struct cfpkt *pkt, unsigned short prio)
+{
+	atomic_inc(&pktq->count);
+	spin_lock(&pktq->lock);
+	skb_queue_tail(&pktq->head, pkt_to_skb(pkt));
+	spin_unlock(&pktq->lock);
+
+}
+EXPORT_SYMBOL(cfpkt_queue);
+
+struct cfpkt *cfpkt_qpeek(struct cfpktq *pktq)
+{
+	struct cfpkt *tmp;
+	spin_lock(&pktq->lock);
+	tmp = skb_to_pkt(skb_peek(&pktq->head));
+	spin_unlock(&pktq->lock);
+	return tmp;
+}
+EXPORT_SYMBOL(cfpkt_qpeek);
+
+struct cfpkt *cfpkt_dequeue(struct cfpktq *pktq)
+{
+	struct cfpkt *pkt;
+	spin_lock(&pktq->lock);
+	pkt = skb_to_pkt(skb_dequeue(&pktq->head));
+	if (pkt) {
+		atomic_dec(&pktq->count);
+		caif_assert(atomic_read(&pktq->count) >= 0);
+	}
+	spin_unlock(&pktq->lock);
+	return pkt;
+}
+EXPORT_SYMBOL(cfpkt_dequeue);
+
+int cfpkt_qcount(struct cfpktq *pktq)
+{
+	return atomic_read(&pktq->count);
+}
+EXPORT_SYMBOL(cfpkt_qcount);
+
+struct cfpkt *cfpkt_clone_release(struct cfpkt *pkt)
+{
+	struct cfpkt *clone;
+	clone  = skb_to_pkt(skb_clone(pkt_to_skb(pkt), GFP_ATOMIC));
+	/* Free original packet. */
+	cfpkt_destroy(pkt);
+	if (!clone)
+		return NULL;
+	return clone;
+}
+EXPORT_SYMBOL(cfpkt_clone_release);
+
+struct caif_payload_info *cfpkt_info(struct cfpkt *pkt)
+{
+	return (struct caif_payload_info *)&pkt_to_skb(pkt)->cb;
+}
+EXPORT_SYMBOL(cfpkt_info);
-- 
cgit v1.2.3-59-g8ed1b


From c72dfae2f77620e5b3fcee1beeee7e536a42b2ad Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:25 +0000
Subject: net-caif: add CAIF device registration functionality

Registration and deregistration of CAIF Link Layer.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_config_util.c |  87 ++++++++++
 net/caif/caif_dev.c         | 413 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 500 insertions(+)
 create mode 100644 net/caif/caif_config_util.c
 create mode 100644 net/caif/caif_dev.c

(limited to 'net')

diff --git a/net/caif/caif_config_util.c b/net/caif/caif_config_util.c
new file mode 100644
index 000000000000..6f36580366f0
--- /dev/null
+++ b/net/caif/caif_config_util.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <net/caif/cfctrl.h>
+#include <net/caif/cfcnfg.h>
+#include <net/caif/caif_dev.h>
+
+int connect_req_to_link_param(struct cfcnfg *cnfg,
+				struct caif_connect_request *s,
+				struct cfctrl_link_param *l)
+{
+	struct dev_info *dev_info;
+	enum cfcnfg_phy_preference pref;
+	memset(l, 0, sizeof(*l));
+	l->priority = s->priority;
+
+	if (s->link_name[0] != '\0')
+		l->phyid = cfcnfg_get_named(cnfg, s->link_name);
+	else {
+		switch (s->link_selector) {
+		case CAIF_LINK_HIGH_BANDW:
+			pref = CFPHYPREF_HIGH_BW;
+			break;
+		case CAIF_LINK_LOW_LATENCY:
+			pref = CFPHYPREF_LOW_LAT;
+			break;
+		default:
+			return -EINVAL;
+		}
+		dev_info = cfcnfg_get_phyid(cnfg, pref);
+		if (dev_info == NULL)
+			return -ENODEV;
+		l->phyid = dev_info->id;
+	}
+	switch (s->protocol) {
+	case CAIFPROTO_AT:
+		l->linktype = CFCTRL_SRV_VEI;
+		if (s->sockaddr.u.at.type == CAIF_ATTYPE_PLAIN)
+			l->chtype = 0x02;
+		else
+			l->chtype = s->sockaddr.u.at.type;
+		l->endpoint = 0x00;
+		break;
+	case CAIFPROTO_DATAGRAM:
+		l->linktype = CFCTRL_SRV_DATAGRAM;
+		l->chtype = 0x00;
+		l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
+		break;
+	case CAIFPROTO_DATAGRAM_LOOP:
+		l->linktype = CFCTRL_SRV_DATAGRAM;
+		l->chtype = 0x03;
+		l->endpoint = 0x00;
+		l->u.datagram.connid = s->sockaddr.u.dgm.connection_id;
+		break;
+	case CAIFPROTO_RFM:
+		l->linktype = CFCTRL_SRV_RFM;
+		l->u.datagram.connid = s->sockaddr.u.rfm.connection_id;
+		strncpy(l->u.rfm.volume, s->sockaddr.u.rfm.volume,
+			sizeof(l->u.rfm.volume)-1);
+		l->u.rfm.volume[sizeof(l->u.rfm.volume)-1] = 0;
+		break;
+	case CAIFPROTO_UTIL:
+		l->linktype = CFCTRL_SRV_UTIL;
+		l->endpoint = 0x00;
+		l->chtype = 0x00;
+		strncpy(l->u.utility.name, s->sockaddr.u.util.service,
+			sizeof(l->u.utility.name)-1);
+		l->u.utility.name[sizeof(l->u.utility.name)-1] = 0;
+		caif_assert(sizeof(l->u.utility.name) > 10);
+		l->u.utility.paramlen = s->param.size;
+		if (l->u.utility.paramlen > sizeof(l->u.utility.params))
+			l->u.utility.paramlen = sizeof(l->u.utility.params);
+
+		memcpy(l->u.utility.params, s->param.data,
+		       l->u.utility.paramlen);
+
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
new file mode 100644
index 000000000000..e84837e1bc86
--- /dev/null
+++ b/net/caif/caif_dev.c
@@ -0,0 +1,413 @@
+/*
+ * CAIF Interface registration.
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ *
+ * Borrowed heavily from file: pn_dev.c. Thanks to
+ *  Remi Denis-Courmont <remi.denis-courmont@nokia.com>
+ *  and Sakari Ailus <sakari.ailus@nokia.com>
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/if_arp.h>
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+#include <net/netns/generic.h>
+#include <net/net_namespace.h>
+#include <net/pkt_sched.h>
+#include <net/caif/caif_device.h>
+#include <net/caif/caif_dev.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/cfcnfg.h>
+
+MODULE_LICENSE("GPL");
+#define TIMEOUT (HZ*5)
+
+/* Used for local tracking of the CAIF net devices */
+struct caif_device_entry {
+	struct cflayer layer;
+	struct list_head list;
+	atomic_t in_use;
+	atomic_t state;
+	u16 phyid;
+	struct net_device *netdev;
+	wait_queue_head_t event;
+};
+
+struct caif_device_entry_list {
+	struct list_head list;
+	/* Protects simulanous deletes in list */
+	spinlock_t lock;
+};
+
+struct caif_net {
+	struct caif_device_entry_list caifdevs;
+};
+
+static int caif_net_id;
+static struct cfcnfg *cfg;
+
+static struct caif_device_entry_list *caif_device_list(struct net *net)
+{
+	struct caif_net *caifn;
+	BUG_ON(!net);
+	caifn = net_generic(net, caif_net_id);
+	BUG_ON(!caifn);
+	return &caifn->caifdevs;
+}
+
+/* Allocate new CAIF device. */
+static struct caif_device_entry *caif_device_alloc(struct net_device *dev)
+{
+	struct caif_device_entry_list *caifdevs;
+	struct caif_device_entry *caifd;
+	caifdevs = caif_device_list(dev_net(dev));
+	BUG_ON(!caifdevs);
+	caifd = kzalloc(sizeof(*caifd), GFP_ATOMIC);
+	if (!caifd)
+		return NULL;
+	caifd->netdev = dev;
+	list_add(&caifd->list, &caifdevs->list);
+	init_waitqueue_head(&caifd->event);
+	return caifd;
+}
+
+static struct caif_device_entry *caif_get(struct net_device *dev)
+{
+	struct caif_device_entry_list *caifdevs =
+	    caif_device_list(dev_net(dev));
+	struct caif_device_entry *caifd;
+	BUG_ON(!caifdevs);
+	list_for_each_entry(caifd, &caifdevs->list, list) {
+		if (caifd->netdev == dev)
+			return caifd;
+	}
+	return NULL;
+}
+
+static void caif_device_destroy(struct net_device *dev)
+{
+	struct caif_device_entry_list *caifdevs =
+	    caif_device_list(dev_net(dev));
+	struct caif_device_entry *caifd;
+	ASSERT_RTNL();
+	if (dev->type != ARPHRD_CAIF)
+		return;
+
+	spin_lock_bh(&caifdevs->lock);
+	caifd = caif_get(dev);
+	if (caifd == NULL) {
+		spin_unlock_bh(&caifdevs->lock);
+		return;
+	}
+
+	list_del(&caifd->list);
+	spin_unlock_bh(&caifdevs->lock);
+
+	kfree(caifd);
+	return;
+}
+
+static int transmit(struct cflayer *layer, struct cfpkt *pkt)
+{
+	struct caif_device_entry *caifd =
+	    container_of(layer, struct caif_device_entry, layer);
+	struct sk_buff *skb, *skb2;
+	int ret = -EINVAL;
+	skb = cfpkt_tonative(pkt);
+	skb->dev = caifd->netdev;
+	/*
+	 * Don't allow SKB to be destroyed upon error, but signal resend
+	 * notification to clients. We can't rely on the return value as
+	 * congestion (NET_XMIT_CN) sometimes drops the packet, sometimes don't.
+	 */
+	if (netif_queue_stopped(caifd->netdev))
+		return -EAGAIN;
+	skb2 = skb_get(skb);
+
+	ret = dev_queue_xmit(skb2);
+
+	if (!ret)
+		kfree_skb(skb);
+	else
+		return -EAGAIN;
+
+	return 0;
+}
+
+static int modemcmd(struct cflayer *layr, enum caif_modemcmd ctrl)
+{
+	struct caif_device_entry *caifd;
+	struct caif_dev_common *caifdev;
+	caifd = container_of(layr, struct caif_device_entry, layer);
+	caifdev = netdev_priv(caifd->netdev);
+	if (ctrl == _CAIF_MODEMCMD_PHYIF_USEFULL) {
+		atomic_set(&caifd->in_use, 1);
+		wake_up_interruptible(&caifd->event);
+
+	} else if (ctrl == _CAIF_MODEMCMD_PHYIF_USELESS) {
+		atomic_set(&caifd->in_use, 0);
+		wake_up_interruptible(&caifd->event);
+	}
+	return 0;
+}
+
+/*
+ * Stuff received packets to associated sockets.
+ * On error, returns non-zero and releases the skb.
+ */
+static int receive(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pkttype, struct net_device *orig_dev)
+{
+	struct net *net;
+	struct cfpkt *pkt;
+	struct caif_device_entry *caifd;
+	net = dev_net(dev);
+	pkt = cfpkt_fromnative(CAIF_DIR_IN, skb);
+	caifd = caif_get(dev);
+	if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
+		return NET_RX_DROP;
+
+	if (caifd->layer.up->receive(caifd->layer.up, pkt))
+		return NET_RX_DROP;
+
+	return 0;
+}
+
+static struct packet_type caif_packet_type __read_mostly = {
+	.type = cpu_to_be16(ETH_P_CAIF),
+	.func = receive,
+};
+
+static void dev_flowctrl(struct net_device *dev, int on)
+{
+	struct caif_device_entry *caifd = caif_get(dev);
+	if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
+		return;
+
+	caifd->layer.up->ctrlcmd(caifd->layer.up,
+				 on ?
+				 _CAIF_CTRLCMD_PHYIF_FLOW_ON_IND :
+				 _CAIF_CTRLCMD_PHYIF_FLOW_OFF_IND,
+				 caifd->layer.id);
+}
+
+/* notify Caif of device events */
+static int caif_device_notify(struct notifier_block *me, unsigned long what,
+			      void *arg)
+{
+	struct net_device *dev = arg;
+	struct caif_device_entry *caifd = NULL;
+	struct caif_dev_common *caifdev;
+	enum cfcnfg_phy_preference pref;
+	int res = -EINVAL;
+	enum cfcnfg_phy_type phy_type;
+
+	if (dev->type != ARPHRD_CAIF)
+		return 0;
+
+	switch (what) {
+	case NETDEV_REGISTER:
+		pr_info("CAIF: %s():register %s\n", __func__, dev->name);
+		caifd = caif_device_alloc(dev);
+		if (caifd == NULL)
+			break;
+		caifdev = netdev_priv(dev);
+		caifdev->flowctrl = dev_flowctrl;
+		atomic_set(&caifd->state, what);
+		res = 0;
+		break;
+
+	case NETDEV_UP:
+		pr_info("CAIF: %s(): up %s\n", __func__, dev->name);
+		caifd = caif_get(dev);
+		if (caifd == NULL)
+			break;
+		caifdev = netdev_priv(dev);
+		if (atomic_read(&caifd->state) == NETDEV_UP) {
+			pr_info("CAIF: %s():%s already up\n",
+				__func__, dev->name);
+			break;
+		}
+		atomic_set(&caifd->state, what);
+		caifd->layer.transmit = transmit;
+		caifd->layer.modemcmd = modemcmd;
+
+		if (caifdev->use_frag)
+			phy_type = CFPHYTYPE_FRAG;
+		else
+			phy_type = CFPHYTYPE_CAIF;
+
+		switch (caifdev->link_select) {
+		case CAIF_LINK_HIGH_BANDW:
+			pref = CFPHYPREF_LOW_LAT;
+			break;
+		case CAIF_LINK_LOW_LATENCY:
+			pref = CFPHYPREF_HIGH_BW;
+			break;
+		default:
+			pref = CFPHYPREF_HIGH_BW;
+			break;
+		}
+
+		cfcnfg_add_phy_layer(get_caif_conf(),
+				     phy_type,
+				     dev,
+				     &caifd->layer,
+				     &caifd->phyid,
+				     pref,
+				     caifdev->use_fcs,
+				     caifdev->use_stx);
+		strncpy(caifd->layer.name, dev->name,
+			sizeof(caifd->layer.name) - 1);
+		caifd->layer.name[sizeof(caifd->layer.name) - 1] = 0;
+		break;
+
+	case NETDEV_GOING_DOWN:
+		caifd = caif_get(dev);
+		if (caifd == NULL)
+			break;
+		pr_info("CAIF: %s():going down %s\n", __func__, dev->name);
+
+		if (atomic_read(&caifd->state) == NETDEV_GOING_DOWN ||
+			atomic_read(&caifd->state) == NETDEV_DOWN)
+			break;
+
+		atomic_set(&caifd->state, what);
+		if (!caifd || !caifd->layer.up || !caifd->layer.up->ctrlcmd)
+			return -EINVAL;
+		caifd->layer.up->ctrlcmd(caifd->layer.up,
+					 _CAIF_CTRLCMD_PHYIF_DOWN_IND,
+					 caifd->layer.id);
+		res = wait_event_interruptible_timeout(caifd->event,
+					atomic_read(&caifd->in_use) == 0,
+					TIMEOUT);
+		break;
+
+	case NETDEV_DOWN:
+		caifd = caif_get(dev);
+		if (caifd == NULL)
+			break;
+		pr_info("CAIF: %s(): down %s\n", __func__, dev->name);
+		if (atomic_read(&caifd->in_use))
+			pr_warning("CAIF: %s(): "
+				   "Unregistering an active CAIF device: %s\n",
+				   __func__, dev->name);
+		cfcnfg_del_phy_layer(get_caif_conf(), &caifd->layer);
+		atomic_set(&caifd->state, what);
+		break;
+
+	case NETDEV_UNREGISTER:
+		caifd = caif_get(dev);
+		pr_info("CAIF: %s(): unregister %s\n", __func__, dev->name);
+		atomic_set(&caifd->state, what);
+		caif_device_destroy(dev);
+		break;
+	}
+	return 0;
+}
+
+static struct notifier_block caif_device_notifier = {
+	.notifier_call = caif_device_notify,
+	.priority = 0,
+};
+
+
+struct cfcnfg *get_caif_conf(void)
+{
+	return cfg;
+}
+EXPORT_SYMBOL(get_caif_conf);
+
+int caif_connect_client(struct caif_connect_request *conn_req,
+			   struct cflayer *client_layer)
+{
+	struct cfctrl_link_param param;
+	if (connect_req_to_link_param(get_caif_conf(), conn_req, &param) == 0)
+		/* Hook up the adaptation layer. */
+		return cfcnfg_add_adaptation_layer(get_caif_conf(),
+						&param, client_layer);
+
+	return -EINVAL;
+
+	caif_assert(0);
+}
+EXPORT_SYMBOL(caif_connect_client);
+
+int caif_disconnect_client(struct cflayer *adap_layer)
+{
+	return cfcnfg_del_adapt_layer(get_caif_conf(), adap_layer);
+}
+EXPORT_SYMBOL(caif_disconnect_client);
+
+/* Per-namespace Caif devices handling */
+static int caif_init_net(struct net *net)
+{
+	struct caif_net *caifn = net_generic(net, caif_net_id);
+	INIT_LIST_HEAD(&caifn->caifdevs.list);
+	spin_lock_init(&caifn->caifdevs.lock);
+	return 0;
+}
+
+static void caif_exit_net(struct net *net)
+{
+	struct net_device *dev;
+	int res;
+	rtnl_lock();
+	for_each_netdev(net, dev) {
+		if (dev->type != ARPHRD_CAIF)
+			continue;
+		res = dev_close(dev);
+		caif_device_destroy(dev);
+	}
+	rtnl_unlock();
+}
+
+static struct pernet_operations caif_net_ops = {
+	.init = caif_init_net,
+	.exit = caif_exit_net,
+	.id   = &caif_net_id,
+	.size = sizeof(struct caif_net),
+};
+
+/* Initialize Caif devices list */
+static int __init caif_device_init(void)
+{
+	int result;
+	cfg = cfcnfg_create();
+	if (!cfg) {
+		pr_warning("CAIF: %s(): can't create cfcnfg.\n", __func__);
+		goto err_cfcnfg_create_failed;
+	}
+	result = register_pernet_device(&caif_net_ops);
+
+	if (result) {
+		kfree(cfg);
+		cfg = NULL;
+		return result;
+	}
+	dev_add_pack(&caif_packet_type);
+	register_netdevice_notifier(&caif_device_notifier);
+
+	return result;
+err_cfcnfg_create_failed:
+	return -ENODEV;
+}
+
+static void __exit caif_device_exit(void)
+{
+	dev_remove_pack(&caif_packet_type);
+	unregister_pernet_device(&caif_net_ops);
+	unregister_netdevice_notifier(&caif_device_notifier);
+	cfcnfg_remove(cfg);
+}
+
+module_init(caif_device_init);
+module_exit(caif_device_exit);
-- 
cgit v1.2.3-59-g8ed1b


From e6f95ec8db312491235b4f06343fbd991a82ce20 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:26 +0000
Subject: net-caif: add CAIF socket implementation

Implementation of CAIF sockets for protocol and address family
PF_CAIF and AF_CAIF.
CAIF socket is connection oriented implementing SOCK_SEQPACKET
and SOCK_STREAM interface with supporting blocking and non-blocking mode.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_socket.c | 1391 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1391 insertions(+)
 create mode 100644 net/caif/caif_socket.c

(limited to 'net')

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
new file mode 100644
index 000000000000..cdf62b9fefac
--- /dev/null
+++ b/net/caif/caif_socket.c
@@ -0,0 +1,1391 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland sjur.brandeland@stericsson.com
+ *		Per Sigmond per.sigmond@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <linux/tcp.h>
+#include <linux/uaccess.h>
+#include <asm/atomic.h>
+
+#include <linux/caif/caif_socket.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/caif_dev.h>
+#include <net/caif/cfpkt.h>
+
+MODULE_LICENSE("GPL");
+
+#define CHNL_SKT_READ_QUEUE_HIGH 200
+#define CHNL_SKT_READ_QUEUE_LOW 100
+
+static int caif_sockbuf_size = 40000;
+static atomic_t caif_nr_socks = ATOMIC_INIT(0);
+
+#define CONN_STATE_OPEN_BIT	      1
+#define CONN_STATE_PENDING_BIT	      2
+#define CONN_STATE_PEND_DESTROY_BIT   3
+#define CONN_REMOTE_SHUTDOWN_BIT      4
+
+#define TX_FLOW_ON_BIT		      1
+#define RX_FLOW_ON_BIT		      2
+
+#define STATE_IS_OPEN(cf_sk) test_bit(CONN_STATE_OPEN_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+#define STATE_IS_REMOTE_SHUTDOWN(cf_sk) test_bit(CONN_REMOTE_SHUTDOWN_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+#define STATE_IS_PENDING(cf_sk) test_bit(CONN_STATE_PENDING_BIT,\
+				       (void *) &(cf_sk)->conn_state)
+#define STATE_IS_PENDING_DESTROY(cf_sk) test_bit(CONN_STATE_PEND_DESTROY_BIT,\
+				       (void *) &(cf_sk)->conn_state)
+
+#define SET_STATE_PENDING_DESTROY(cf_sk) set_bit(CONN_STATE_PEND_DESTROY_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+#define SET_STATE_OPEN(cf_sk) set_bit(CONN_STATE_OPEN_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+#define SET_STATE_CLOSED(cf_sk) clear_bit(CONN_STATE_OPEN_BIT,\
+					(void *) &(cf_sk)->conn_state)
+#define SET_PENDING_ON(cf_sk) set_bit(CONN_STATE_PENDING_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+#define SET_PENDING_OFF(cf_sk) clear_bit(CONN_STATE_PENDING_BIT,\
+				       (void *) &(cf_sk)->conn_state)
+#define SET_REMOTE_SHUTDOWN(cf_sk) set_bit(CONN_REMOTE_SHUTDOWN_BIT,\
+				    (void *) &(cf_sk)->conn_state)
+
+#define SET_REMOTE_SHUTDOWN_OFF(dev) clear_bit(CONN_REMOTE_SHUTDOWN_BIT,\
+				    (void *) &(dev)->conn_state)
+#define RX_FLOW_IS_ON(cf_sk) test_bit(RX_FLOW_ON_BIT,\
+				    (void *) &(cf_sk)->flow_state)
+#define TX_FLOW_IS_ON(cf_sk) test_bit(TX_FLOW_ON_BIT,\
+				    (void *) &(cf_sk)->flow_state)
+
+#define SET_RX_FLOW_OFF(cf_sk) clear_bit(RX_FLOW_ON_BIT,\
+				       (void *) &(cf_sk)->flow_state)
+#define SET_RX_FLOW_ON(cf_sk) set_bit(RX_FLOW_ON_BIT,\
+				    (void *) &(cf_sk)->flow_state)
+#define SET_TX_FLOW_OFF(cf_sk) clear_bit(TX_FLOW_ON_BIT,\
+				       (void *) &(cf_sk)->flow_state)
+#define SET_TX_FLOW_ON(cf_sk) set_bit(TX_FLOW_ON_BIT,\
+				    (void *) &(cf_sk)->flow_state)
+
+#define SKT_READ_FLAG 0x01
+#define SKT_WRITE_FLAG 0x02
+static struct dentry *debugfsdir;
+#include <linux/debugfs.h>
+
+#ifdef CONFIG_DEBUG_FS
+struct debug_fs_counter {
+	atomic_t num_open;
+	atomic_t num_close;
+	atomic_t num_init;
+	atomic_t num_init_resp;
+	atomic_t num_init_fail_resp;
+	atomic_t num_deinit;
+	atomic_t num_deinit_resp;
+	atomic_t num_remote_shutdown_ind;
+	atomic_t num_tx_flow_off_ind;
+	atomic_t num_tx_flow_on_ind;
+	atomic_t num_rx_flow_off;
+	atomic_t num_rx_flow_on;
+	atomic_t skb_in_use;
+	atomic_t skb_alloc;
+	atomic_t skb_free;
+};
+static struct debug_fs_counter cnt;
+#define	dbfs_atomic_inc(v) atomic_inc(v)
+#define	dbfs_atomic_dec(v) atomic_dec(v)
+#else
+#define	dbfs_atomic_inc(v)
+#define	dbfs_atomic_dec(v)
+#endif
+
+/* The AF_CAIF socket */
+struct caifsock {
+	/* NOTE: sk has to be the first member */
+	struct sock sk;
+	struct cflayer layer;
+	char name[CAIF_LAYER_NAME_SZ];
+	u32 conn_state;
+	u32 flow_state;
+	struct cfpktq *pktq;
+	int file_mode;
+	struct caif_connect_request conn_req;
+	int read_queue_len;
+	/* protect updates of read_queue_len */
+	spinlock_t read_queue_len_lock;
+	struct dentry *debugfs_socket_dir;
+};
+
+static void drain_queue(struct caifsock *cf_sk);
+
+/* Packet Receive Callback function called from CAIF Stack */
+static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
+{
+	struct caifsock *cf_sk;
+	int read_queue_high;
+	cf_sk = container_of(layr, struct caifsock, layer);
+
+	if (!STATE_IS_OPEN(cf_sk)) {
+		/*FIXME: This should be allowed finally!*/
+		pr_debug("CAIF: %s(): called after close request\n", __func__);
+		cfpkt_destroy(pkt);
+		return 0;
+	}
+	/* NOTE: This function may be called in Tasklet context! */
+
+	/* The queue has its own lock */
+	cfpkt_queue(cf_sk->pktq, pkt, 0);
+
+	spin_lock(&cf_sk->read_queue_len_lock);
+	cf_sk->read_queue_len++;
+
+	read_queue_high = (cf_sk->read_queue_len > CHNL_SKT_READ_QUEUE_HIGH);
+	spin_unlock(&cf_sk->read_queue_len_lock);
+
+	if (RX_FLOW_IS_ON(cf_sk) && read_queue_high) {
+		dbfs_atomic_inc(&cnt.num_rx_flow_off);
+		SET_RX_FLOW_OFF(cf_sk);
+
+		/* Send flow off (NOTE: must not sleep) */
+		pr_debug("CAIF: %s():"
+			" sending flow OFF (queue len = %d)\n",
+			__func__,
+		     cf_sk->read_queue_len);
+		caif_assert(cf_sk->layer.dn);
+		caif_assert(cf_sk->layer.dn->ctrlcmd);
+
+		(void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+					       CAIF_MODEMCMD_FLOW_OFF_REQ);
+	}
+
+	/* Signal reader that data is available. */
+
+	wake_up_interruptible(cf_sk->sk.sk_sleep);
+
+	return 0;
+}
+
+/* Packet Flow Control Callback function called from CAIF */
+static void caif_sktflowctrl_cb(struct cflayer *layr,
+				enum caif_ctrlcmd flow,
+				int phyid)
+{
+	struct caifsock *cf_sk;
+
+	/* NOTE: This function may be called in Tasklet context! */
+	pr_debug("CAIF: %s(): flowctrl func called: %s.\n",
+		      __func__,
+		      flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
+		      flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
+		      flow == CAIF_CTRLCMD_INIT_RSP ? "INIT_RSP" :
+		      flow == CAIF_CTRLCMD_DEINIT_RSP ? "DEINIT_RSP" :
+		      flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "INIT_FAIL_RSP" :
+		      flow ==
+		      CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? "REMOTE_SHUTDOWN" :
+		      "UKNOWN CTRL COMMAND");
+
+	if (layr == NULL)
+		return;
+
+	cf_sk = container_of(layr, struct caifsock, layer);
+
+	switch (flow) {
+	case CAIF_CTRLCMD_FLOW_ON_IND:
+		dbfs_atomic_inc(&cnt.num_tx_flow_on_ind);
+		/* Signal reader that data is available. */
+		SET_TX_FLOW_ON(cf_sk);
+		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		break;
+
+	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		dbfs_atomic_inc(&cnt.num_tx_flow_off_ind);
+		SET_TX_FLOW_OFF(cf_sk);
+		break;
+
+	case CAIF_CTRLCMD_INIT_RSP:
+		dbfs_atomic_inc(&cnt.num_init_resp);
+		/* Signal reader that data is available. */
+		caif_assert(STATE_IS_OPEN(cf_sk));
+		SET_PENDING_OFF(cf_sk);
+		SET_TX_FLOW_ON(cf_sk);
+		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		break;
+
+	case CAIF_CTRLCMD_DEINIT_RSP:
+		dbfs_atomic_inc(&cnt.num_deinit_resp);
+		caif_assert(!STATE_IS_OPEN(cf_sk));
+		SET_PENDING_OFF(cf_sk);
+		if (!STATE_IS_PENDING_DESTROY(cf_sk)) {
+			if (cf_sk->sk.sk_sleep != NULL)
+				wake_up_interruptible(cf_sk->sk.sk_sleep);
+		}
+		dbfs_atomic_inc(&cnt.num_deinit);
+		sock_put(&cf_sk->sk);
+		break;
+
+	case CAIF_CTRLCMD_INIT_FAIL_RSP:
+		dbfs_atomic_inc(&cnt.num_init_fail_resp);
+		caif_assert(STATE_IS_OPEN(cf_sk));
+		SET_STATE_CLOSED(cf_sk);
+		SET_PENDING_OFF(cf_sk);
+		SET_TX_FLOW_OFF(cf_sk);
+		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		break;
+
+	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+		dbfs_atomic_inc(&cnt.num_remote_shutdown_ind);
+		SET_REMOTE_SHUTDOWN(cf_sk);
+		/* Use sk_shutdown to indicate remote shutdown indication */
+		cf_sk->sk.sk_shutdown |= RCV_SHUTDOWN;
+		cf_sk->file_mode = 0;
+		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		break;
+
+	default:
+		pr_debug("CAIF: %s(): Unexpected flow command %d\n",
+			      __func__, flow);
+	}
+}
+
+static void skb_destructor(struct sk_buff *skb)
+{
+	dbfs_atomic_inc(&cnt.skb_free);
+	dbfs_atomic_dec(&cnt.skb_in_use);
+}
+
+
+static int caif_recvmsg(struct kiocb *iocb, struct socket *sock,
+				struct msghdr *m, size_t buf_len, int flags)
+
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	struct cfpkt *pkt = NULL;
+	size_t len;
+	int result;
+	struct sk_buff *skb;
+	ssize_t ret = -EIO;
+	int read_queue_low;
+
+	if (cf_sk == NULL) {
+		pr_debug("CAIF: %s(): private_data not set!\n",
+			      __func__);
+		ret = -EBADFD;
+		goto read_error;
+	}
+
+	/* Don't do multiple iovec entries yet */
+	if (m->msg_iovlen != 1)
+		return -EOPNOTSUPP;
+
+	if (unlikely(!buf_len))
+		return -EINVAL;
+
+	lock_sock(&(cf_sk->sk));
+
+	caif_assert(cf_sk->pktq);
+
+	if (!STATE_IS_OPEN(cf_sk)) {
+		/* Socket is closed or closing. */
+		if (!STATE_IS_PENDING(cf_sk)) {
+			pr_debug("CAIF: %s(): socket is closed (by remote)\n",
+				 __func__);
+			ret = -EPIPE;
+		} else {
+			pr_debug("CAIF: %s(): socket is closing..\n", __func__);
+			ret = -EBADF;
+		}
+		goto read_error;
+	}
+	/* Socket is open or opening. */
+	if (STATE_IS_PENDING(cf_sk)) {
+		pr_debug("CAIF: %s(): socket is opening...\n", __func__);
+
+		if (flags & MSG_DONTWAIT) {
+			/* We can't block. */
+			pr_debug("CAIF: %s():state pending and MSG_DONTWAIT\n",
+				 __func__);
+			ret = -EAGAIN;
+			goto read_error;
+		}
+
+		/*
+		 * Blocking mode; state is pending and we need to wait
+		 * for its conclusion.
+		 */
+		release_sock(&cf_sk->sk);
+
+		result =
+		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					     !STATE_IS_PENDING(cf_sk));
+
+		lock_sock(&(cf_sk->sk));
+
+		if (result == -ERESTARTSYS) {
+			pr_debug("CAIF: %s(): wait_event_interruptible"
+				 " woken by a signal (1)", __func__);
+			ret = -ERESTARTSYS;
+			goto read_error;
+		}
+	}
+
+	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
+		!STATE_IS_OPEN(cf_sk) ||
+		STATE_IS_PENDING(cf_sk)) {
+
+		pr_debug("CAIF: %s(): socket closed\n",
+			__func__);
+		ret = -ESHUTDOWN;
+		goto read_error;
+	}
+
+	/*
+	 * Block if we don't have any received buffers.
+	 * The queue has its own lock.
+	 */
+	while ((pkt = cfpkt_qpeek(cf_sk->pktq)) == NULL) {
+
+		if (flags & MSG_DONTWAIT) {
+			pr_debug("CAIF: %s(): MSG_DONTWAIT\n", __func__);
+			ret = -EAGAIN;
+			goto read_error;
+		}
+		trace_printk("CAIF: %s() wait_event\n", __func__);
+
+		/* Let writers in. */
+		release_sock(&cf_sk->sk);
+
+		/* Block reader until data arrives or socket is closed. */
+		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					cfpkt_qpeek(cf_sk->pktq)
+					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
+					|| !STATE_IS_OPEN(cf_sk)) ==
+		    -ERESTARTSYS) {
+			pr_debug("CAIF: %s():"
+				" wait_event_interruptible woken by "
+				"a signal, signal_pending(current) = %d\n",
+				__func__,
+				signal_pending(current));
+			return -ERESTARTSYS;
+		}
+
+		trace_printk("CAIF: %s() awake\n", __func__);
+		if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
+			pr_debug("CAIF: %s(): "
+				 "received remote_shutdown indication\n",
+				 __func__);
+			ret = -ESHUTDOWN;
+			goto read_error_no_unlock;
+		}
+
+		/* I want to be alone on cf_sk (except status and queue). */
+		lock_sock(&(cf_sk->sk));
+
+		if (!STATE_IS_OPEN(cf_sk)) {
+			/* Someone closed the link, report error. */
+			pr_debug("CAIF: %s(): remote end shutdown!\n",
+				      __func__);
+			ret = -EPIPE;
+			goto read_error;
+		}
+	}
+
+	/* The queue has its own lock. */
+	len = cfpkt_getlen(pkt);
+
+	/* Check max length that can be copied. */
+	if (len <= buf_len)
+		pkt = cfpkt_dequeue(cf_sk->pktq);
+	else {
+		pr_debug("CAIF: %s(): user buffer too small (%ld,%ld)\n",
+			 __func__, (long) len, (long) buf_len);
+		if (sock->type == SOCK_SEQPACKET) {
+			ret = -EMSGSIZE;
+			goto read_error;
+		}
+		len = buf_len;
+	}
+
+
+	spin_lock(&cf_sk->read_queue_len_lock);
+	cf_sk->read_queue_len--;
+	read_queue_low = (cf_sk->read_queue_len < CHNL_SKT_READ_QUEUE_LOW);
+	spin_unlock(&cf_sk->read_queue_len_lock);
+
+	if (!RX_FLOW_IS_ON(cf_sk) && read_queue_low) {
+		dbfs_atomic_inc(&cnt.num_rx_flow_on);
+		SET_RX_FLOW_ON(cf_sk);
+
+		/* Send flow on. */
+		pr_debug("CAIF: %s(): sending flow ON (queue len = %d)\n",
+			 __func__, cf_sk->read_queue_len);
+		caif_assert(cf_sk->layer.dn);
+		caif_assert(cf_sk->layer.dn->ctrlcmd);
+		(void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+					       CAIF_MODEMCMD_FLOW_ON_REQ);
+
+		caif_assert(cf_sk->read_queue_len >= 0);
+	}
+
+	skb = cfpkt_tonative(pkt);
+	result = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
+	skb_pull(skb, len);
+
+	if (result) {
+		pr_debug("CAIF: %s(): copy to_iovec failed\n", __func__);
+		cfpkt_destroy(pkt);
+		ret = -EFAULT;
+		goto read_error;
+	}
+
+	/* Free packet and remove from queue */
+	if (skb->len == 0)
+		skb_free_datagram(sk, skb);
+
+	/* Let the others in. */
+	release_sock(&cf_sk->sk);
+	return len;
+
+read_error:
+	release_sock(&cf_sk->sk);
+read_error_no_unlock:
+	return ret;
+}
+
+/* Send a signal as a consequence of sendmsg, sendto or caif_sendmsg. */
+static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			struct msghdr *msg, size_t len)
+{
+
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	size_t payload_size = msg->msg_iov->iov_len;
+	struct cfpkt *pkt = NULL;
+	struct caif_payload_info info;
+	unsigned char *txbuf;
+	ssize_t ret = -EIO;
+	int result;
+	struct sk_buff *skb;
+	caif_assert(msg->msg_iovlen == 1);
+
+	if (cf_sk == NULL) {
+		pr_debug("CAIF: %s(): private_data not set!\n",
+			      __func__);
+		ret = -EBADFD;
+		goto write_error_no_unlock;
+	}
+
+	if (unlikely(msg->msg_iov->iov_base == NULL)) {
+		pr_warning("CAIF: %s(): Buffer is NULL.\n", __func__);
+		ret = -EINVAL;
+		goto write_error_no_unlock;
+	}
+
+	if (payload_size > CAIF_MAX_PAYLOAD_SIZE) {
+		pr_debug("CAIF: %s(): buffer too long\n", __func__);
+		if (sock->type == SOCK_SEQPACKET) {
+			ret = -EINVAL;
+			goto write_error_no_unlock;
+		}
+		payload_size = CAIF_MAX_PAYLOAD_SIZE;
+	}
+
+	/* I want to be alone on cf_sk (except status and queue) */
+	lock_sock(&(cf_sk->sk));
+
+	caif_assert(cf_sk->pktq);
+
+	if (!STATE_IS_OPEN(cf_sk)) {
+		/* Socket is closed or closing */
+		if (!STATE_IS_PENDING(cf_sk)) {
+			pr_debug("CAIF: %s(): socket is closed (by remote)\n",
+				 __func__);
+			ret = -EPIPE;
+		} else {
+			pr_debug("CAIF: %s(): socket is closing...\n",
+				 __func__);
+			ret = -EBADF;
+		}
+		goto write_error;
+	}
+
+	/* Socket is open or opening */
+	if (STATE_IS_PENDING(cf_sk)) {
+		pr_debug("CAIF: %s(): socket is opening...\n", __func__);
+
+		if (msg->msg_flags & MSG_DONTWAIT) {
+			/* We can't block */
+			trace_printk("CAIF: %s():state pending:"
+				     "state=MSG_DONTWAIT\n", __func__);
+			ret = -EAGAIN;
+			goto write_error;
+		}
+		/* Let readers in */
+		release_sock(&cf_sk->sk);
+
+		/*
+		 * Blocking mode; state is pending and we need to wait
+		 * for its conclusion.
+		 */
+		result =
+		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					     !STATE_IS_PENDING(cf_sk));
+		/* I want to be alone on cf_sk (except status and queue) */
+		lock_sock(&(cf_sk->sk));
+
+		if (result == -ERESTARTSYS) {
+			pr_debug("CAIF: %s(): wait_event_interruptible"
+				 " woken by a signal (1)", __func__);
+			ret = -ERESTARTSYS;
+			goto write_error;
+		}
+	}
+	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
+		!STATE_IS_OPEN(cf_sk) ||
+		STATE_IS_PENDING(cf_sk)) {
+
+		pr_debug("CAIF: %s(): socket closed\n",
+			__func__);
+		ret = -ESHUTDOWN;
+		goto write_error;
+	}
+
+	if (!TX_FLOW_IS_ON(cf_sk)) {
+
+		/* Flow is off. Check non-block flag */
+		if (msg->msg_flags & MSG_DONTWAIT) {
+			trace_printk("CAIF: %s(): MSG_DONTWAIT and tx flow off",
+				 __func__);
+			ret = -EAGAIN;
+			goto write_error;
+		}
+
+		/* release lock before waiting */
+		release_sock(&cf_sk->sk);
+
+		/* Wait until flow is on or socket is closed */
+		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					TX_FLOW_IS_ON(cf_sk)
+					|| !STATE_IS_OPEN(cf_sk)
+					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
+					) == -ERESTARTSYS) {
+			pr_debug("CAIF: %s():"
+				 " wait_event_interruptible woken by a signal",
+				 __func__);
+			ret = -ERESTARTSYS;
+			goto write_error_no_unlock;
+		}
+
+		/* I want to be alone on cf_sk (except status and queue) */
+		lock_sock(&(cf_sk->sk));
+
+		if (!STATE_IS_OPEN(cf_sk)) {
+			/* someone closed the link, report error */
+			pr_debug("CAIF: %s(): remote end shutdown!\n",
+				      __func__);
+			ret = -EPIPE;
+			goto write_error;
+		}
+
+		if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
+			pr_debug("CAIF: %s(): "
+				 "received remote_shutdown indication\n",
+				 __func__);
+			ret = -ESHUTDOWN;
+			goto write_error;
+		}
+	}
+
+	pkt = cfpkt_create(payload_size);
+	skb = (struct sk_buff *)pkt;
+	skb->destructor = skb_destructor;
+	skb->sk = sk;
+	dbfs_atomic_inc(&cnt.skb_alloc);
+	dbfs_atomic_inc(&cnt.skb_in_use);
+	if (cfpkt_raw_append(pkt, (void **) &txbuf, payload_size) < 0) {
+		pr_debug("CAIF: %s(): cfpkt_raw_append failed\n", __func__);
+		cfpkt_destroy(pkt);
+		ret = -EINVAL;
+		goto write_error;
+	}
+
+	/* Copy data into buffer. */
+	if (copy_from_user(txbuf, msg->msg_iov->iov_base, payload_size)) {
+		pr_debug("CAIF: %s(): copy_from_user returned non zero.\n",
+			 __func__);
+		cfpkt_destroy(pkt);
+		ret = -EINVAL;
+		goto write_error;
+	}
+	memset(&info, 0, sizeof(info));
+
+	/* Send the packet down the stack. */
+	caif_assert(cf_sk->layer.dn);
+	caif_assert(cf_sk->layer.dn->transmit);
+
+	do {
+		ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
+
+		if (likely((ret >= 0) || (ret != -EAGAIN)))
+			break;
+
+		/* EAGAIN - retry */
+		if (msg->msg_flags & MSG_DONTWAIT) {
+			pr_debug("CAIF: %s(): NONBLOCK and transmit failed,"
+				 " error = %ld\n", __func__, (long) ret);
+			ret = -EAGAIN;
+			goto write_error;
+		}
+
+		/* Let readers in */
+		release_sock(&cf_sk->sk);
+
+		/* Wait until flow is on or socket is closed */
+		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					TX_FLOW_IS_ON(cf_sk)
+					|| !STATE_IS_OPEN(cf_sk)
+					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
+					) == -ERESTARTSYS) {
+			pr_debug("CAIF: %s(): wait_event_interruptible"
+				 " woken by a signal", __func__);
+			ret = -ERESTARTSYS;
+			goto write_error_no_unlock;
+		}
+
+		/* I want to be alone on cf_sk (except status and queue) */
+		lock_sock(&(cf_sk->sk));
+
+	} while (ret == -EAGAIN);
+
+	if (ret < 0) {
+		cfpkt_destroy(pkt);
+		pr_debug("CAIF: %s(): transmit failed, error = %ld\n",
+			 __func__, (long) ret);
+
+		goto write_error;
+	}
+
+	release_sock(&cf_sk->sk);
+	return payload_size;
+
+write_error:
+	release_sock(&cf_sk->sk);
+write_error_no_unlock:
+	return ret;
+}
+
+static unsigned int caif_poll(struct file *file, struct socket *sock,
+						poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	u32 mask = 0;
+	poll_wait(file, sk->sk_sleep, wait);
+	lock_sock(&(cf_sk->sk));
+	if (!STATE_IS_OPEN(cf_sk)) {
+		if (!STATE_IS_PENDING(cf_sk))
+			mask |= POLLHUP;
+	} else {
+		if (cfpkt_qpeek(cf_sk->pktq) != NULL)
+			mask |= (POLLIN | POLLRDNORM);
+		if (TX_FLOW_IS_ON(cf_sk))
+			mask |= (POLLOUT | POLLWRNORM);
+	}
+	release_sock(&cf_sk->sk);
+	trace_printk("CAIF: %s(): poll mask=0x%04x\n",
+		      __func__, mask);
+	return mask;
+}
+
+static void drain_queue(struct caifsock *cf_sk)
+{
+	struct cfpkt *pkt = NULL;
+
+	/* Empty the queue */
+	do {
+		/* The queue has its own lock */
+		if (!cf_sk->pktq)
+			break;
+
+		pkt = cfpkt_dequeue(cf_sk->pktq);
+		if (!pkt)
+			break;
+		pr_debug("CAIF: %s(): freeing packet from read queue\n",
+			 __func__);
+		cfpkt_destroy(pkt);
+
+	} while (1);
+
+	cf_sk->read_queue_len = 0;
+}
+
+static int setsockopt(struct socket *sock,
+			int lvl, int opt, char __user *ov, unsigned int ol)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int prio, linksel;
+	struct ifreq ifreq;
+
+	if (STATE_IS_OPEN(cf_sk)) {
+		pr_debug("CAIF: %s(): setsockopt "
+			 "cannot be done on a connected socket\n",
+			 __func__);
+		return -ENOPROTOOPT;
+	}
+	switch (opt) {
+	case CAIFSO_LINK_SELECT:
+		if (ol < sizeof(int)) {
+			pr_debug("CAIF: %s(): setsockopt"
+				 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
+			return -EINVAL;
+		}
+		if (lvl != SOL_CAIF)
+			goto bad_sol;
+		if (copy_from_user(&linksel, ov, sizeof(int)))
+			return -EINVAL;
+		lock_sock(&(cf_sk->sk));
+		cf_sk->conn_req.link_selector = linksel;
+		release_sock(&cf_sk->sk);
+		return 0;
+
+	case SO_PRIORITY:
+		if (lvl != SOL_SOCKET)
+			goto bad_sol;
+		if (ol < sizeof(int)) {
+			pr_debug("CAIF: %s(): setsockopt"
+				 " SO_PRIORITY bad size\n", __func__);
+			return -EINVAL;
+		}
+		if (copy_from_user(&prio, ov, sizeof(int)))
+			return -EINVAL;
+		lock_sock(&(cf_sk->sk));
+		cf_sk->conn_req.priority = prio;
+		pr_debug("CAIF: %s(): Setting sockopt priority=%d\n", __func__,
+			cf_sk->conn_req.priority);
+		release_sock(&cf_sk->sk);
+		return 0;
+
+	case SO_BINDTODEVICE:
+		if (lvl != SOL_SOCKET)
+			goto bad_sol;
+		if (ol < sizeof(struct ifreq)) {
+			pr_debug("CAIF: %s(): setsockopt"
+				 " SO_PRIORITY bad size\n", __func__);
+			return -EINVAL;
+		}
+		if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
+			return -EFAULT;
+		lock_sock(&(cf_sk->sk));
+		strncpy(cf_sk->conn_req.link_name, ifreq.ifr_name,
+			sizeof(cf_sk->conn_req.link_name));
+		cf_sk->conn_req.link_name
+			[sizeof(cf_sk->conn_req.link_name)-1] = 0;
+		release_sock(&cf_sk->sk);
+		return 0;
+
+	case CAIFSO_REQ_PARAM:
+		if (lvl != SOL_CAIF)
+			goto bad_sol;
+		if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL)
+			return -ENOPROTOOPT;
+		if (ol > sizeof(cf_sk->conn_req.param.data))
+			goto req_param_bad_size;
+
+		lock_sock(&(cf_sk->sk));
+		cf_sk->conn_req.param.size = ol;
+		if (copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
+			release_sock(&cf_sk->sk);
+req_param_bad_size:
+			pr_debug("CAIF: %s(): setsockopt"
+				 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
+			return -EINVAL;
+		}
+
+		release_sock(&cf_sk->sk);
+		return 0;
+
+	default:
+		pr_debug("CAIF: %s(): unhandled option %d\n", __func__, opt);
+		return -EINVAL;
+	}
+
+	return 0;
+bad_sol:
+	pr_debug("CAIF: %s(): setsockopt bad level\n", __func__);
+	return -ENOPROTOOPT;
+
+}
+
+static int caif_connect(struct socket *sock, struct sockaddr *uservaddr,
+	       int sockaddr_len, int flags)
+{
+	struct caifsock *cf_sk = NULL;
+	int result = -1;
+	int mode = 0;
+	int ret = -EIO;
+	struct sock *sk = sock->sk;
+	BUG_ON(sk == NULL);
+
+	cf_sk = container_of(sk, struct caifsock, sk);
+
+	trace_printk("CAIF: %s(): cf_sk=%p OPEN=%d, TX_FLOW=%d, RX_FLOW=%d\n",
+		 __func__, cf_sk,
+		STATE_IS_OPEN(cf_sk),
+		TX_FLOW_IS_ON(cf_sk), RX_FLOW_IS_ON(cf_sk));
+
+
+	if (sock->type == SOCK_SEQPACKET || sock->type == SOCK_STREAM)
+		sock->state	= SS_CONNECTING;
+	else
+		goto out;
+
+	/* I want to be alone on cf_sk (except status and queue) */
+	lock_sock(&(cf_sk->sk));
+
+	if (sockaddr_len != sizeof(struct sockaddr_caif)) {
+		pr_debug("CAIF: %s(): Bad address len (%ld,%lu)\n",
+			 __func__, (long) sockaddr_len,
+			(long unsigned) sizeof(struct sockaddr_caif));
+		ret = -EINVAL;
+		goto open_error;
+	}
+
+	if (uservaddr->sa_family != AF_CAIF) {
+		pr_debug("CAIF: %s(): Bad address family (%d)\n",
+			 __func__, uservaddr->sa_family);
+		ret = -EAFNOSUPPORT;
+		goto open_error;
+	}
+
+	memcpy(&cf_sk->conn_req.sockaddr, uservaddr,
+		sizeof(struct sockaddr_caif));
+
+	dbfs_atomic_inc(&cnt.num_open);
+	mode = SKT_READ_FLAG | SKT_WRITE_FLAG;
+
+	/* If socket is not open, make sure socket is in fully closed state */
+	if (!STATE_IS_OPEN(cf_sk)) {
+		/* Has link close response been received (if we ever sent it)?*/
+		if (STATE_IS_PENDING(cf_sk)) {
+			/*
+			 * Still waiting for close response from remote.
+			 * If opened non-blocking, report "would block"
+			 */
+			if (flags & O_NONBLOCK) {
+				pr_debug("CAIF: %s(): O_NONBLOCK"
+					" && close pending\n", __func__);
+				ret = -EAGAIN;
+				goto open_error;
+			}
+
+			pr_debug("CAIF: %s(): Wait for close response"
+				 " from remote...\n", __func__);
+
+			release_sock(&cf_sk->sk);
+
+			/*
+			 * Blocking mode; close is pending and we need to wait
+			 * for its conclusion.
+			 */
+			result =
+			    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+						     !STATE_IS_PENDING(cf_sk));
+
+			lock_sock(&(cf_sk->sk));
+			if (result == -ERESTARTSYS) {
+				pr_debug("CAIF: %s(): wait_event_interruptible"
+					 "woken by a signal (1)", __func__);
+				ret = -ERESTARTSYS;
+				goto open_error;
+			}
+		}
+	}
+
+	/* socket is now either closed, pending open or open */
+	if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
+		/* Open */
+		pr_debug("CAIF: %s(): Socket is already opened (cf_sk=%p)"
+			" check access f_flags = 0x%x file_mode = 0x%x\n",
+			 __func__, cf_sk, mode, cf_sk->file_mode);
+
+	} else {
+		/* We are closed or pending open.
+		 * If closed:	    send link setup
+		 * If pending open: link setup already sent (we could have been
+		 *		    interrupted by a signal last time)
+		 */
+		if (!STATE_IS_OPEN(cf_sk)) {
+			/* First opening of file; connect lower layers: */
+			/* Drain queue (very unlikely) */
+			drain_queue(cf_sk);
+
+			cf_sk->layer.receive = caif_sktrecv_cb;
+			SET_STATE_OPEN(cf_sk);
+			SET_PENDING_ON(cf_sk);
+
+			/* Register this channel. */
+			result =
+				caif_connect_client(&cf_sk->conn_req,
+							&cf_sk->layer);
+			if (result < 0) {
+				pr_debug("CAIF: %s(): can't register channel\n",
+					__func__);
+				ret = -EIO;
+				SET_STATE_CLOSED(cf_sk);
+				SET_PENDING_OFF(cf_sk);
+				goto open_error;
+			}
+			dbfs_atomic_inc(&cnt.num_init);
+		}
+
+		/* If opened non-blocking, report "success".
+		 */
+		if (flags & O_NONBLOCK) {
+			pr_debug("CAIF: %s(): O_NONBLOCK success\n",
+				 __func__);
+			ret = -EINPROGRESS;
+			cf_sk->sk.sk_err = -EINPROGRESS;
+			goto open_error;
+		}
+
+		trace_printk("CAIF: %s(): Wait for connect response\n",
+			     __func__);
+
+		/* release lock before waiting */
+		release_sock(&cf_sk->sk);
+
+		result =
+		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+					     !STATE_IS_PENDING(cf_sk));
+
+		lock_sock(&(cf_sk->sk));
+
+		if (result == -ERESTARTSYS) {
+			pr_debug("CAIF: %s(): wait_event_interruptible"
+				 "woken by a signal (2)", __func__);
+			ret = -ERESTARTSYS;
+			goto open_error;
+		}
+
+		if (!STATE_IS_OPEN(cf_sk)) {
+			/* Lower layers said "no" */
+			pr_debug("CAIF: %s(): Closed received\n", __func__);
+			ret = -EPIPE;
+			goto open_error;
+		}
+
+		trace_printk("CAIF: %s(): Connect received\n", __func__);
+	}
+	/* Open is ok */
+	cf_sk->file_mode |= mode;
+
+	trace_printk("CAIF: %s(): Connected - file mode = %x\n",
+		  __func__, cf_sk->file_mode);
+
+	release_sock(&cf_sk->sk);
+	return 0;
+open_error:
+	sock->state	= SS_UNCONNECTED;
+	release_sock(&cf_sk->sk);
+out:
+	return ret;
+}
+
+static int caif_shutdown(struct socket *sock, int how)
+{
+	struct caifsock *cf_sk = NULL;
+	int result = 0;
+	int tx_flow_state_was_on;
+	struct sock *sk = sock->sk;
+
+	trace_printk("CAIF: %s(): enter\n", __func__);
+	pr_debug("f_flags=%x\n", sock->file->f_flags);
+
+	if (how != SHUT_RDWR)
+		return -EOPNOTSUPP;
+
+	cf_sk = container_of(sk, struct caifsock, sk);
+	if (cf_sk == NULL) {
+		pr_debug("CAIF: %s(): COULD NOT FIND SOCKET\n", __func__);
+		return -EBADF;
+	}
+
+	/* I want to be alone on cf_sk (except status queue) */
+	lock_sock(&(cf_sk->sk));
+	sock_hold(&cf_sk->sk);
+
+	/* IS_CLOSED have double meaning:
+	 * 1) Spontanous Remote Shutdown Request.
+	 * 2) Ack on a channel teardown(disconnect)
+	 * Must clear bit in case we previously received
+	 * remote shudown request.
+	 */
+	if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
+		SET_STATE_CLOSED(cf_sk);
+		SET_PENDING_ON(cf_sk);
+		tx_flow_state_was_on = TX_FLOW_IS_ON(cf_sk);
+		SET_TX_FLOW_OFF(cf_sk);
+
+		/* Hold the socket until DEINIT_RSP is received */
+		sock_hold(&cf_sk->sk);
+		result = caif_disconnect_client(&cf_sk->layer);
+
+		if (result < 0) {
+			pr_debug("CAIF: %s(): "
+					"caif_disconnect_client() failed\n",
+					 __func__);
+			SET_STATE_CLOSED(cf_sk);
+			SET_PENDING_OFF(cf_sk);
+			SET_TX_FLOW_OFF(cf_sk);
+			release_sock(&cf_sk->sk);
+			sock_put(&cf_sk->sk);
+			return -EIO;
+		}
+
+	}
+	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
+		SET_PENDING_OFF(cf_sk);
+		SET_REMOTE_SHUTDOWN_OFF(cf_sk);
+	}
+
+	/*
+	 * Socket is no longer in state pending close,
+	 * and we can release the reference.
+	 */
+
+	dbfs_atomic_inc(&cnt.num_close);
+	drain_queue(cf_sk);
+	SET_RX_FLOW_ON(cf_sk);
+	cf_sk->file_mode = 0;
+	sock_put(&cf_sk->sk);
+	release_sock(&cf_sk->sk);
+	if (!result && (sock->file->f_flags & O_NONBLOCK)) {
+		pr_debug("nonblocking shutdown returing -EAGAIN\n");
+		return -EAGAIN;
+	} else
+		return result;
+}
+
+static ssize_t caif_sock_no_sendpage(struct socket *sock,
+				     struct page *page,
+				     int offset, size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+/* This function is called as part of close. */
+static int caif_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = NULL;
+	int res;
+	caif_assert(sk != NULL);
+	cf_sk = container_of(sk, struct caifsock, sk);
+
+	if (cf_sk->debugfs_socket_dir != NULL)
+		debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
+
+	res = caif_shutdown(sock, SHUT_RDWR);
+	if (res && res != -EINPROGRESS)
+		return res;
+
+	/*
+	 * FIXME: Shutdown should probably be possible to do async
+	 * without flushing queues, allowing reception of frames while
+	 * waiting for DEINIT_IND.
+	 * Release should always block, to allow secure decoupling of
+	 * CAIF stack.
+	 */
+	if (!(sock->file->f_flags & O_NONBLOCK)) {
+		res = wait_event_interruptible(*cf_sk->sk.sk_sleep,
+						!STATE_IS_PENDING(cf_sk));
+
+		if (res == -ERESTARTSYS) {
+			pr_debug("CAIF: %s(): wait_event_interruptible"
+				"woken by a signal (1)", __func__);
+		}
+	}
+	lock_sock(&(cf_sk->sk));
+
+	sock->sk = NULL;
+
+	/* Detach the socket from its process context by making it orphan. */
+	sock_orphan(sk);
+
+	/*
+	 * Setting SHUTDOWN_MASK means that both send and receive are shutdown
+	 * for the socket.
+	 */
+	sk->sk_shutdown = SHUTDOWN_MASK;
+
+	/*
+	 * Set the socket state to closed, the TCP_CLOSE macro is used when
+	 * closing any socket.
+	 */
+
+	/* Flush out this sockets receive queue. */
+	drain_queue(cf_sk);
+
+	/* Finally release the socket. */
+	SET_STATE_PENDING_DESTROY(cf_sk);
+
+	release_sock(&cf_sk->sk);
+
+	sock_put(sk);
+
+	/*
+	 * The rest of the cleanup will be handled from the
+	 * caif_sock_destructor
+	 */
+	return res;
+}
+
+static const struct proto_ops caif_ops = {
+	.family = PF_CAIF,
+	.owner = THIS_MODULE,
+	.release = caif_release,
+	.bind = sock_no_bind,
+	.connect = caif_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = sock_no_getname,
+	.poll = caif_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = caif_shutdown,
+	.setsockopt = setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = caif_sendmsg,
+	.recvmsg = caif_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = caif_sock_no_sendpage,
+};
+
+/* This function is called when a socket is finally destroyed. */
+static void caif_sock_destructor(struct sock *sk)
+{
+	struct caifsock *cf_sk = NULL;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	/* Error checks. */
+	caif_assert(!atomic_read(&sk->sk_wmem_alloc));
+	caif_assert(sk_unhashed(sk));
+	caif_assert(!sk->sk_socket);
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		pr_debug("CAIF: %s(): 0x%p", __func__, sk);
+		return;
+	}
+
+	if (STATE_IS_OPEN(cf_sk)) {
+		pr_debug("CAIF: %s(): socket is opened (cf_sk=%p)"
+			 " file_mode = 0x%x\n", __func__,
+			 cf_sk, cf_sk->file_mode);
+		return;
+	}
+	drain_queue(cf_sk);
+	kfree(cf_sk->pktq);
+
+	trace_printk("CAIF: %s(): caif_sock_destructor: Removing socket %s\n",
+		__func__, cf_sk->name);
+	atomic_dec(&caif_nr_socks);
+}
+
+static int caif_create(struct net *net, struct socket *sock, int protocol,
+		       int kern)
+{
+	struct sock *sk = NULL;
+	struct caifsock *cf_sk = NULL;
+	int result = 0;
+	static struct proto prot = {.name = "PF_CAIF",
+		.owner = THIS_MODULE,
+		.obj_size = sizeof(struct caifsock),
+	};
+
+	/*
+	 * The sock->type specifies the socket type to use.
+	 * in SEQPACKET mode packet boundaries are enforced.
+	 */
+	if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+		return -ESOCKTNOSUPPORT;
+
+	if (net != &init_net)
+		return -EAFNOSUPPORT;
+
+	if (protocol < 0 || protocol >= CAIFPROTO_MAX)
+		return -EPROTONOSUPPORT;
+	/*
+	 * Set the socket state to unconnected.	 The socket state is really
+	 * not used at all in the net/core or socket.c but the
+	 * initialization makes sure that sock->state is not uninitialized.
+	 */
+	sock->state = SS_UNCONNECTED;
+
+	sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot);
+	if (!sk)
+		return -ENOMEM;
+
+	cf_sk = container_of(sk, struct caifsock, sk);
+
+	/* Store the protocol */
+	sk->sk_protocol = (unsigned char) protocol;
+
+	spin_lock_init(&cf_sk->read_queue_len_lock);
+
+	/* Fill in some information concerning the misc socket. */
+	snprintf(cf_sk->name, sizeof(cf_sk->name), "cf_sk%d",
+		atomic_read(&caif_nr_socks));
+
+	/*
+	 * Lock in order to try to stop someone from opening the socket
+	 * too early.
+	 */
+	lock_sock(&(cf_sk->sk));
+
+	/* Initialize the nozero default sock structure data. */
+	sock_init_data(sock, sk);
+	sock->ops = &caif_ops;
+	sk->sk_destruct = caif_sock_destructor;
+	sk->sk_sndbuf = caif_sockbuf_size;
+	sk->sk_rcvbuf = caif_sockbuf_size;
+
+	cf_sk->pktq = cfpktq_create();
+
+	if (!cf_sk->pktq) {
+		pr_err("CAIF: %s(): queue create failed.\n", __func__);
+		result = -ENOMEM;
+		release_sock(&cf_sk->sk);
+		goto err_failed;
+	}
+	cf_sk->layer.ctrlcmd = caif_sktflowctrl_cb;
+	SET_STATE_CLOSED(cf_sk);
+	SET_PENDING_OFF(cf_sk);
+	SET_TX_FLOW_OFF(cf_sk);
+	SET_RX_FLOW_ON(cf_sk);
+
+	/* Set default options on configuration */
+	cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
+	cf_sk->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
+	cf_sk->conn_req.protocol = protocol;
+	/* Increase the number of sockets created. */
+	atomic_inc(&caif_nr_socks);
+	if (!IS_ERR(debugfsdir)) {
+		cf_sk->debugfs_socket_dir =
+			debugfs_create_dir(cf_sk->name, debugfsdir);
+		debugfs_create_u32("conn_state", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir, &cf_sk->conn_state);
+		debugfs_create_u32("flow_state", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir, &cf_sk->flow_state);
+		debugfs_create_u32("read_queue_len", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir,
+				(u32 *) &cf_sk->read_queue_len);
+		debugfs_create_u32("identity", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir,
+				(u32 *) &cf_sk->layer.id);
+	}
+	release_sock(&cf_sk->sk);
+	return 0;
+err_failed:
+	sk_free(sk);
+	return result;
+}
+
+static struct net_proto_family caif_family_ops = {
+	.family = PF_CAIF,
+	.create = caif_create,
+	.owner = THIS_MODULE,
+};
+
+static int af_caif_init(void)
+{
+	int err;
+	err = sock_register(&caif_family_ops);
+
+	if (!err)
+		return err;
+
+	return 0;
+}
+
+static int __init caif_sktinit_module(void)
+{
+	int stat;
+#ifdef CONFIG_DEBUG_FS
+	debugfsdir = debugfs_create_dir("chnl_skt", NULL);
+	if (!IS_ERR(debugfsdir)) {
+		debugfs_create_u32("skb_inuse", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.skb_in_use);
+		debugfs_create_u32("skb_alloc", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.skb_alloc);
+		debugfs_create_u32("skb_free", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.skb_free);
+		debugfs_create_u32("num_sockets", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &caif_nr_socks);
+		debugfs_create_u32("num_open", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_open);
+		debugfs_create_u32("num_close", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_close);
+		debugfs_create_u32("num_init", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_init);
+		debugfs_create_u32("num_init_resp", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_init_resp);
+		debugfs_create_u32("num_init_fail_resp", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_init_fail_resp);
+		debugfs_create_u32("num_deinit", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_deinit);
+		debugfs_create_u32("num_deinit_resp", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_deinit_resp);
+		debugfs_create_u32("num_remote_shutdown_ind",
+				S_IRUSR | S_IWUSR, debugfsdir,
+				(u32 *) &cnt.num_remote_shutdown_ind);
+		debugfs_create_u32("num_tx_flow_off_ind", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_tx_flow_off_ind);
+		debugfs_create_u32("num_tx_flow_on_ind", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_tx_flow_on_ind);
+		debugfs_create_u32("num_rx_flow_off", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_rx_flow_off);
+		debugfs_create_u32("num_rx_flow_on", S_IRUSR | S_IWUSR,
+				debugfsdir,
+				(u32 *) &cnt.num_rx_flow_on);
+	}
+#endif
+	stat = af_caif_init();
+	if (stat) {
+		pr_err("CAIF: %s(): Failed to initialize CAIF socket layer.",
+		       __func__);
+		return stat;
+	}
+	return 0;
+}
+
+static void __exit caif_sktexit_module(void)
+{
+	sock_unregister(PF_CAIF);
+	if (debugfsdir != NULL)
+		debugfs_remove_recursive(debugfsdir);
+}
+
+module_init(caif_sktinit_module);
+module_exit(caif_sktexit_module);
-- 
cgit v1.2.3-59-g8ed1b


From cc36a070b5901cd54386348b4d79d2daac91ce75 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:27 +0000
Subject: net-caif: add CAIF netdevice

Adding GPRS Net Device for PDP Contexts.
The device can be managed by RTNL as defined in if_caif.h.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/chnl_net.c | 451 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 451 insertions(+)
 create mode 100644 net/caif/chnl_net.c

(limited to 'net')

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
new file mode 100644
index 000000000000..f622ff1d39ba
--- /dev/null
+++ b/net/caif/chnl_net.c
@@ -0,0 +1,451 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Authors:	Sjur Brendeland/sjur.brandeland@stericsson.com
+ *		Daniel Martensson / Daniel.Martensson@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/version.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/if_ether.h>
+#include <linux/moduleparam.h>
+#include <linux/ip.h>
+#include <linux/sched.h>
+#include <linux/sockios.h>
+#include <linux/caif/if_caif.h>
+#include <net/rtnetlink.h>
+#include <net/caif/caif_layer.h>
+#include <net/caif/cfcnfg.h>
+#include <net/caif/cfpkt.h>
+#include <net/caif/caif_dev.h>
+
+#define CAIF_CONNECT_TIMEOUT 30
+#define SIZE_MTU 1500
+#define SIZE_MTU_MAX 4080
+#define SIZE_MTU_MIN 68
+#define CAIF_NET_DEFAULT_QUEUE_LEN 500
+
+#undef pr_debug
+#define pr_debug pr_warning
+
+/*This list is protected by the rtnl lock. */
+static LIST_HEAD(chnl_net_list);
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_RTNL_LINK("caif");
+
+struct chnl_net {
+	struct cflayer chnl;
+	struct net_device_stats stats;
+	struct caif_connect_request conn_req;
+	struct list_head list_field;
+	struct net_device *netdev;
+	char name[256];
+	wait_queue_head_t netmgmt_wq;
+	/* Flow status to remember and control the transmission. */
+	bool flowenabled;
+	bool pending_close;
+};
+
+static void robust_list_del(struct list_head *delete_node)
+{
+	struct list_head *list_node;
+	struct list_head *n;
+	ASSERT_RTNL();
+	list_for_each_safe(list_node, n, &chnl_net_list) {
+		if (list_node == delete_node) {
+			list_del(list_node);
+			break;
+		}
+	}
+}
+
+static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
+{
+	struct sk_buff *skb;
+	struct chnl_net *priv  = NULL;
+	int pktlen;
+	int err = 0;
+
+	priv = container_of(layr, struct chnl_net, chnl);
+
+	if (!priv)
+		return -EINVAL;
+
+	/* Get length of CAIF packet. */
+	pktlen = cfpkt_getlen(pkt);
+
+	skb = (struct sk_buff *) cfpkt_tonative(pkt);
+	/* Pass some minimum information and
+	 * send the packet to the net stack.
+	 */
+	skb->dev = priv->netdev;
+	skb->protocol = htons(ETH_P_IP);
+
+	/* If we change the header in loop mode, the checksum is corrupted. */
+	if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	else
+		skb->ip_summed = CHECKSUM_NONE;
+
+	/* FIXME: Drivers should call this in tasklet context. */
+	if (in_interrupt())
+		netif_rx(skb);
+	else
+		netif_rx_ni(skb);
+
+	/* Update statistics. */
+	priv->netdev->stats.rx_packets++;
+	priv->netdev->stats.rx_bytes += pktlen;
+
+	return err;
+}
+
+static int delete_device(struct chnl_net *dev)
+{
+	ASSERT_RTNL();
+	if (dev->netdev)
+		unregister_netdevice(dev->netdev);
+	return 0;
+}
+
+static void close_work(struct work_struct *work)
+{
+	struct chnl_net *dev = NULL;
+	struct list_head *list_node;
+	struct list_head *_tmp;
+	rtnl_lock();
+	list_for_each_safe(list_node, _tmp, &chnl_net_list) {
+		dev = list_entry(list_node, struct chnl_net, list_field);
+		if (!dev->pending_close)
+			continue;
+		list_del(list_node);
+		delete_device(dev);
+	}
+	rtnl_unlock();
+}
+static DECLARE_WORK(close_worker, close_work);
+
+static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
+				int phyid)
+{
+	struct chnl_net *priv;
+	pr_debug("CAIF: %s(): NET flowctrl func called flow: %s.\n",
+		__func__,
+		flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
+		flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
+		flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
+		flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" :
+		flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" :
+		flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ?
+		 "REMOTE_SHUTDOWN" : "UKNOWN CTRL COMMAND");
+
+	priv = container_of(layr, struct chnl_net, chnl);
+
+	switch (flow) {
+	case CAIF_CTRLCMD_FLOW_OFF_IND:
+	case CAIF_CTRLCMD_DEINIT_RSP:
+	case CAIF_CTRLCMD_INIT_FAIL_RSP:
+	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+		priv->flowenabled = false;
+		netif_tx_disable(priv->netdev);
+		pr_warning("CAIF: %s(): done\n", __func__);
+		priv->pending_close = 1;
+		schedule_work(&close_worker);
+		break;
+	case CAIF_CTRLCMD_FLOW_ON_IND:
+	case CAIF_CTRLCMD_INIT_RSP:
+		priv->flowenabled = true;
+		netif_wake_queue(priv->netdev);
+		wake_up_interruptible(&priv->netmgmt_wq);
+		break;
+	default:
+		break;
+	}
+}
+
+static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct chnl_net *priv;
+	struct cfpkt *pkt = NULL;
+	int len;
+	int result = -1;
+	/* Get our private data. */
+	priv = netdev_priv(dev);
+
+	if (skb->len > priv->netdev->mtu) {
+		pr_warning("CAIF: %s(): Size of skb exceeded MTU\n", __func__);
+		return -ENOSPC;
+	}
+
+	if (!priv->flowenabled) {
+		pr_debug("CAIF: %s(): dropping packets flow off\n", __func__);
+		return NETDEV_TX_BUSY;
+	}
+
+	if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP)
+		swap(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
+
+	/* Store original SKB length. */
+	len = skb->len;
+
+	pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb);
+
+	pr_debug("CAIF: %s(): transmit inst %s %d,%p\n",
+		__func__, dev->name, priv->chnl.dn->id, &priv->chnl.dn);
+
+	/* Send the packet down the stack. */
+	result = priv->chnl.dn->transmit(priv->chnl.dn, pkt);
+	if (result) {
+		if (result == -EAGAIN)
+			result = NETDEV_TX_BUSY;
+		return result;
+	}
+
+	/* Update statistics. */
+	dev->stats.tx_packets++;
+	dev->stats.tx_bytes += len;
+
+	return NETDEV_TX_OK;
+}
+
+static int chnl_net_open(struct net_device *dev)
+{
+	struct chnl_net *priv = NULL;
+	int result = -1;
+	ASSERT_RTNL();
+
+	priv = netdev_priv(dev);
+	pr_debug("CAIF: %s(): dev name: %s\n", __func__, priv->name);
+
+	if (!priv) {
+		pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__);
+		return -ENODEV;
+	}
+	result = caif_connect_client(&priv->conn_req, &priv->chnl);
+	if (result != 0) {
+		pr_debug("CAIF: %s(): err: "
+			 "Unable to register and open device, Err:%d\n",
+			__func__,
+			result);
+		return -ENODEV;
+	}
+	result = wait_event_interruptible(priv->netmgmt_wq, priv->flowenabled);
+
+	if (result == -ERESTARTSYS) {
+		pr_debug("CAIF: %s(): wait_event_interruptible"
+			 " woken by a signal\n", __func__);
+		return -ERESTARTSYS;
+	} else
+		pr_debug("CAIF: %s(): Flow on recieved\n", __func__);
+
+	return 0;
+}
+
+static int chnl_net_stop(struct net_device *dev)
+{
+	struct chnl_net *priv;
+	int result = -1;
+	ASSERT_RTNL();
+	priv = netdev_priv(dev);
+
+	result = caif_disconnect_client(&priv->chnl);
+	if (result != 0) {
+		pr_debug("CAIF: %s(): chnl_net_stop: err: "
+			 "Unable to STOP device, Err:%d\n",
+			 __func__, result);
+		return -EBUSY;
+	}
+	result = wait_event_interruptible(priv->netmgmt_wq,
+					  !priv->flowenabled);
+
+	if (result == -ERESTARTSYS) {
+		pr_debug("CAIF: %s(): wait_event_interruptible woken by"
+			 " signal, signal_pending(current) = %d\n",
+			 __func__,
+			 signal_pending(current));
+	} else {
+		pr_debug("CAIF: %s(): disconnect received\n", __func__);
+
+	}
+
+	return 0;
+}
+
+static int chnl_net_init(struct net_device *dev)
+{
+	struct chnl_net *priv;
+	ASSERT_RTNL();
+	priv = netdev_priv(dev);
+	strncpy(priv->name, dev->name, sizeof(priv->name));
+	return 0;
+}
+
+static void chnl_net_uninit(struct net_device *dev)
+{
+	struct chnl_net *priv;
+	ASSERT_RTNL();
+	priv = netdev_priv(dev);
+	robust_list_del(&priv->list_field);
+}
+
+static const struct net_device_ops netdev_ops = {
+	.ndo_open = chnl_net_open,
+	.ndo_stop = chnl_net_stop,
+	.ndo_init = chnl_net_init,
+	.ndo_uninit = chnl_net_uninit,
+	.ndo_start_xmit = chnl_net_start_xmit,
+};
+
+static void ipcaif_net_setup(struct net_device *dev)
+{
+	struct chnl_net *priv;
+	dev->netdev_ops = &netdev_ops;
+	dev->destructor = free_netdev;
+	dev->flags |= IFF_NOARP;
+	dev->flags |= IFF_POINTOPOINT;
+	dev->needed_headroom = CAIF_NEEDED_HEADROOM;
+	dev->needed_tailroom = CAIF_NEEDED_TAILROOM;
+	dev->mtu = SIZE_MTU;
+	dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN;
+
+	priv = netdev_priv(dev);
+	priv->chnl.receive = chnl_recv_cb;
+	priv->chnl.ctrlcmd = chnl_flowctrl_cb;
+	priv->netdev = dev;
+	priv->conn_req.protocol = CAIFPROTO_DATAGRAM;
+	priv->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
+	priv->conn_req.priority = CAIF_PRIO_LOW;
+	/* Insert illegal value */
+	priv->conn_req.sockaddr.u.dgm.connection_id = -1;
+	priv->flowenabled = false;
+
+	ASSERT_RTNL();
+	init_waitqueue_head(&priv->netmgmt_wq);
+	list_add(&priv->list_field, &chnl_net_list);
+}
+
+
+static int ipcaif_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct chnl_net *priv;
+	u8 loop;
+	priv = netdev_priv(dev);
+	NLA_PUT_U32(skb, IFLA_CAIF_IPV4_CONNID,
+		    priv->conn_req.sockaddr.u.dgm.connection_id);
+	NLA_PUT_U32(skb, IFLA_CAIF_IPV6_CONNID,
+		    priv->conn_req.sockaddr.u.dgm.connection_id);
+	loop = priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP;
+	NLA_PUT_U8(skb, IFLA_CAIF_LOOPBACK, loop);
+
+
+	return 0;
+nla_put_failure:
+	return -EMSGSIZE;
+
+}
+
+static void caif_netlink_parms(struct nlattr *data[],
+				struct caif_connect_request *conn_req)
+{
+	if (!data) {
+		pr_warning("CAIF: %s: no params data found\n", __func__);
+		return;
+	}
+	if (data[IFLA_CAIF_IPV4_CONNID])
+		conn_req->sockaddr.u.dgm.connection_id =
+			nla_get_u32(data[IFLA_CAIF_IPV4_CONNID]);
+	if (data[IFLA_CAIF_IPV6_CONNID])
+		conn_req->sockaddr.u.dgm.connection_id =
+			nla_get_u32(data[IFLA_CAIF_IPV6_CONNID]);
+	if (data[IFLA_CAIF_LOOPBACK]) {
+		if (nla_get_u8(data[IFLA_CAIF_LOOPBACK]))
+			conn_req->protocol = CAIFPROTO_DATAGRAM_LOOP;
+		else
+			conn_req->protocol = CAIFPROTO_DATAGRAM;
+	}
+}
+
+static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
+			  struct nlattr *tb[], struct nlattr *data[])
+{
+	int ret;
+	struct chnl_net *caifdev;
+	ASSERT_RTNL();
+	caifdev = netdev_priv(dev);
+	caif_netlink_parms(data, &caifdev->conn_req);
+	ret = register_netdevice(dev);
+	if (ret)
+		pr_warning("CAIF: %s(): device rtml registration failed\n",
+			   __func__);
+	return ret;
+}
+
+static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[],
+				struct nlattr *data[])
+{
+	struct chnl_net *caifdev;
+	ASSERT_RTNL();
+	caifdev = netdev_priv(dev);
+	caif_netlink_parms(data, &caifdev->conn_req);
+	netdev_state_change(dev);
+	return 0;
+}
+
+static size_t ipcaif_get_size(const struct net_device *dev)
+{
+	return
+		/* IFLA_CAIF_IPV4_CONNID */
+		nla_total_size(4) +
+		/* IFLA_CAIF_IPV6_CONNID */
+		nla_total_size(4) +
+		/* IFLA_CAIF_LOOPBACK */
+		nla_total_size(2) +
+		0;
+}
+
+static const struct nla_policy ipcaif_policy[IFLA_CAIF_MAX + 1] = {
+	[IFLA_CAIF_IPV4_CONNID]	      = { .type = NLA_U32 },
+	[IFLA_CAIF_IPV6_CONNID]	      = { .type = NLA_U32 },
+	[IFLA_CAIF_LOOPBACK]	      = { .type = NLA_U8 }
+};
+
+
+static struct rtnl_link_ops ipcaif_link_ops __read_mostly = {
+	.kind		= "caif",
+	.priv_size	= sizeof(struct chnl_net),
+	.setup		= ipcaif_net_setup,
+	.maxtype	= IFLA_CAIF_MAX,
+	.policy		= ipcaif_policy,
+	.newlink	= ipcaif_newlink,
+	.changelink	= ipcaif_changelink,
+	.get_size	= ipcaif_get_size,
+	.fill_info	= ipcaif_fill_info,
+
+};
+
+static int __init chnl_init_module(void)
+{
+	return rtnl_link_register(&ipcaif_link_ops);
+}
+
+static void __exit chnl_exit_module(void)
+{
+	struct chnl_net *dev = NULL;
+	struct list_head *list_node;
+	struct list_head *_tmp;
+	rtnl_link_unregister(&ipcaif_link_ops);
+	rtnl_lock();
+	list_for_each_safe(list_node, _tmp, &chnl_net_list) {
+		dev = list_entry(list_node, struct chnl_net, list_field);
+		list_del(list_node);
+		delete_device(dev);
+	}
+	rtnl_unlock();
+}
+
+module_init(chnl_init_module);
+module_exit(chnl_exit_module);
-- 
cgit v1.2.3-59-g8ed1b


From 3908c6902372206cc582ecf459af889b09a150c9 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:28 +0000
Subject: net-caif: add CAIF Kconfig and Makefiles

Kconfig and Makefiles with options for:
CAIF:        Including caif
CAIF_DEBUG:  CAIF Debug
CAIF_NETDEV: CAIF Network Device for GPRS Contexts

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Kconfig       |  2 ++
 net/Makefile      |  1 +
 net/caif/Kconfig  | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/caif/Makefile | 26 ++++++++++++++++++++++++++
 4 files changed, 77 insertions(+)
 create mode 100644 net/caif/Kconfig
 create mode 100644 net/caif/Makefile

(limited to 'net')

diff --git a/net/Kconfig b/net/Kconfig
index 68514644ce91..e10d55c8ee5c 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -280,5 +280,7 @@ source "net/wimax/Kconfig"
 
 source "net/rfkill/Kconfig"
 source "net/9p/Kconfig"
+source "net/caif/Kconfig"
+
 
 endif   # if NET
diff --git a/net/Makefile b/net/Makefile
index 1542e7268a7b..a5eae27aa42d 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NETLABEL)		+= netlabel/
 obj-$(CONFIG_IUCV)		+= iucv/
 obj-$(CONFIG_RFKILL)		+= rfkill/
 obj-$(CONFIG_NET_9P)		+= 9p/
+obj-$(CONFIG_CAIF)		+= caif/
 ifneq ($(CONFIG_DCB),)
 obj-y				+= dcb/
 endif
diff --git a/net/caif/Kconfig b/net/caif/Kconfig
new file mode 100644
index 000000000000..cd1daf6008bd
--- /dev/null
+++ b/net/caif/Kconfig
@@ -0,0 +1,48 @@
+#
+# CAIF net configurations
+#
+
+#menu "CAIF Support"
+comment "CAIF Support"
+menuconfig CAIF
+	tristate "Enable CAIF support"
+	select CRC_CCITT
+	default n
+	---help---
+	The "Communication CPU to Application CPU Interface" (CAIF) is a packet
+	based connection-oriented MUX protocol developed by ST-Ericsson for use
+	with its modems. It is accessed from user space as sockets (PF_CAIF).
+
+	Say Y (or M) here if you build for a phone product (e.g. Android or
+	MeeGo ) that uses CAIF as transport, if unsure say N.
+
+	If you select to build it as module then CAIF_NETDEV also needs to be
+	built as modules. You will also need to say yes to any CAIF physical
+	devices that your platform requires.
+
+	See Documentation/networking/caif for a further explanation on how to
+	use and configure CAIF.
+
+if CAIF
+
+config  CAIF_DEBUG
+	bool "Enable Debug"
+	default n
+	--- help ---
+	Enable the inclusion of debug code in the CAIF stack.
+	Be aware that doing this will impact performance.
+	If unsure say N.
+
+
+config CAIF_NETDEV
+	tristate "CAIF GPRS Network device"
+	default CAIF
+	---help---
+	Say Y if you will be using a CAIF based GPRS network device.
+	This can be either built-in or a loadable module,
+	If you select to build it as a built-in then the main CAIF device must
+	also be a built-in.
+	If unsure say Y.
+
+endif
+#endmenu
diff --git a/net/caif/Makefile b/net/caif/Makefile
new file mode 100644
index 000000000000..34852af2595e
--- /dev/null
+++ b/net/caif/Makefile
@@ -0,0 +1,26 @@
+ifeq ($(CONFIG_CAIF_DEBUG),1)
+CAIF_DBG_FLAGS := -DDEBUG
+endif
+
+ccflags-y := $(CAIF_FLAGS) $(CAIF_DBG_FLAGS)
+
+caif-objs := caif_dev.o \
+	cfcnfg.o cfmuxl.o cfctrl.o  \
+	cffrml.o cfveil.o cfdbgl.o\
+	cfserl.o cfdgml.o  \
+	cfrfml.o cfvidl.o cfutill.o \
+	cfsrvl.o cfpkt_skbuff.o caif_config_util.o
+clean-dirs:= .tmp_versions
+
+clean-files:= \
+	Module.symvers \
+	modules.order \
+	*.cmd \
+	*.o \
+	*~
+
+obj-$(CONFIG_CAIF) += caif.o
+obj-$(CONFIG_CAIF_NETDEV) += chnl_net.o
+obj-$(CONFIG_CAIF) += caif_socket.o
+
+export-objs := caif.o
-- 
cgit v1.2.3-59-g8ed1b


From 7e5ab157813993356f021757d0b0dcbdca7c55a1 Mon Sep 17 00:00:00 2001
From: Tom Goff <thomas.goff@boeing.com>
Date: Tue, 30 Mar 2010 19:44:56 -0700
Subject: net_sched: minor netns related cleanup

These changes were suggested by Alexey Dobriyan <adobriyan@gmail.com>:

  - psched_show() does not use any private data so just pass NULL to
    psched_open()

  - remove unnecessary return statement

Signed-off-by: Tom Goff <thomas.goff@boeing.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 6d6fe16289f3..c65866da17bc 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1683,7 +1683,7 @@ static int psched_show(struct seq_file *seq, void *v)
 
 static int psched_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, psched_show, PDE(inode)->data);
+	return single_open(file, psched_show, NULL);
 }
 
 static const struct file_operations psched_fops = {
@@ -1708,8 +1708,6 @@ static int __net_init psched_net_init(struct net *net)
 static void __net_exit psched_net_exit(struct net *net)
 {
 	proc_net_remove(net, "psched");
-
-	return;
 }
 #else
 static int __net_init psched_net_init(struct net *net)
-- 
cgit v1.2.3-59-g8ed1b


From de7737e056d65ad6b0f135f7bb24d86458af0d47 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Fri, 26 Mar 2010 08:34:30 +0000
Subject: sctp: Use ipv6_addr_diff() in sctp_v6_addr_match_len().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 240dceba06e5..216d88f27236 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -276,20 +276,7 @@ static struct dst_entry *sctp_v6_get_dst(struct sctp_association *asoc,
 static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
 					 union sctp_addr *s2)
 {
-	struct in6_addr *a1 = &s1->v6.sin6_addr;
-	struct in6_addr *a2 = &s2->v6.sin6_addr;
-	int i, j;
-
-	for (i = 0; i < 4 ; i++) {
-		__be32 a1xora2;
-
-		a1xora2 = a1->s6_addr32[i] ^ a2->s6_addr32[i];
-
-		if ((j = fls(ntohl(a1xora2))))
-			return (i * 32 + 32 - j);
-	}
-
-	return (i*32);
+	return ipv6_addr_diff(&s1->v6.sin6_addr, &s2->v6.sin6_addr);
 }
 
 /* Fills in the source address(saddr) based on the destination address(daddr)
-- 
cgit v1.2.3-59-g8ed1b


From 02cdce53f3d0d3eee8188944c96150ee8c97100d Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sat, 27 Mar 2010 01:24:16 +0000
Subject: ipv6 fib: Use "Sweezle" to optimize addr_bit_test().

addr_bit_test() is used in various places in IPv6 routing table
subsystem.  It checks if the given fn_bit is set,
where fn_bit counts bits from MSB in words in network-order.

 fn_bit        :   0 .... 31 32 .... 64 65 .... 95 96 ....127

fn_bit >> 5 gives offset of word, and (~fn_bit & 0x1f) gives
count from LSB in the network-endian word in question.

 fn_bit >> 5   :       0          1          2          3
 ~fn_bit & 0x1f:  31 ....  0 31 ....  0 31 ....  0 31 ....  0

Thus, the mask was generated as htonl(1 << (~fn_bit & 0x1f)).
This can be optimized by "sweezle" (See include/asm-generic/bitops/le.h).

In little-endian,
  htonl(1 << bit) = 1 << (bit ^ BITOP_BE32_SWIZZLE)
where
  BITOP_BE32_SWIZZLE is (0x1f & ~7)
So,
  htonl(1 << (~fn_bit & 0x1f)) = 1 << ((~fn_bit & 0x1f) ^ (0x1f & ~7))
                               = 1 << ((~fn_bit ^ ~7) & 0x1f)
                               = 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)

In big-endian, BITOP_BE32_SWIZZLE is equal to 0.
  1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
                               = 1 << ((~fn_bit) & 0x1f)
                               = htonl(1 << (~fn_bit & 0x1f))

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 2f9847924fa5..68119ef62869 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -127,12 +127,23 @@ static __inline__ u32 fib6_new_sernum(void)
 /*
  *	test bit
  */
+#if defined(__LITTLE_ENDIAN)
+# define BITOP_BE32_SWIZZLE	(0x1F & ~7)
+#else
+# define BITOP_BE32_SWIZZLE	0
+#endif
 
 static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
 {
 	__be32 *addr = token;
-
-	return htonl(1 << ((~fn_bit)&0x1F)) & addr[fn_bit>>5];
+	/*
+	 * Here,
+	 * 	1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)
+	 * is optimized version of
+	 *	htonl(1 << ((~fn_bit)&0x1F))
+	 * See include/asm-generic/bitops/le.h.
+	 */
+	return (1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & addr[fn_bit >> 5];
 }
 
 static __inline__ struct fib6_node * node_alloc(void)
-- 
cgit v1.2.3-59-g8ed1b


From b00fabb4020d17bda4bea59507e09fadf573088d Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 29 Mar 2010 14:47:27 +0000
Subject: netdev: ethtool RXHASH flag

This adds ethtool and device feature flag to allow control
of receive hashing offload.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h   | 1 +
 include/linux/netdevice.h | 1 +
 net/core/ethtool.c        | 7 ++++++-
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b33f316bb92e..b391969a0dd9 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -310,6 +310,7 @@ struct ethtool_perm_addr {
 enum ethtool_flags {
 	ETH_FLAG_LRO		= (1 << 15),	/* LRO is enabled */
 	ETH_FLAG_NTUPLE		= (1 << 27),	/* N-tuple filters enabled */
+	ETH_FLAG_RXHASH		= (1 << 28),
 };
 
 /* The following structures are for supporting RX network flow
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 53c272f2a734..b5670ab5107c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -785,6 +785,7 @@ struct net_device {
 #define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
 #define NETIF_F_FCOE_MTU	(1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
 #define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
+#define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f4cb6b6299d9..73c81edde8d9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -121,7 +121,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
  * NETIF_F_xxx values in include/linux/netdevice.h
  */
 static const u32 flags_dup_features =
-	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
+	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
 
 u32 ethtool_op_get_flags(struct net_device *dev)
 {
@@ -152,6 +152,11 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
 		features &= ~NETIF_F_NTUPLE;
 	}
 
+	if (data & ETH_FLAG_RXHASH)
+		features |= NETIF_F_RXHASH;
+	else
+		features &= ~NETIF_F_RXHASH;
+
 	dev->features = features;
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 598ed9367a36ee1fd4ae3271a54a3547a33975a5 Mon Sep 17 00:00:00 2001
From: laurent chavey <chavey@google.com>
Date: Mon, 29 Mar 2010 10:41:36 +0000
Subject: fix net/core/dst.c coding style error and warnings

Fix coding style errors and warnings output while running checkpatch.pl
on the file net/core/dst.c.

Signed-off-by: chavey <chavey@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index cb1b3488b739..2076d84203d1 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -43,7 +43,7 @@ static atomic_t			 dst_total = ATOMIC_INIT(0);
  */
 static struct {
 	spinlock_t		lock;
-	struct dst_entry 	*list;
+	struct dst_entry	*list;
 	unsigned long		timer_inc;
 	unsigned long		timer_expires;
 } dst_garbage = {
@@ -51,7 +51,7 @@ static struct {
 	.timer_inc = DST_GC_MAX,
 };
 static void dst_gc_task(struct work_struct *work);
-static void ___dst_free(struct dst_entry * dst);
+static void ___dst_free(struct dst_entry *dst);
 
 static DECLARE_DELAYED_WORK(dst_gc_work, dst_gc_task);
 
@@ -135,8 +135,8 @@ loop:
 		}
 		expires = dst_garbage.timer_expires;
 		/*
-		 * if the next desired timer is more than 4 seconds in the future
-		 * then round the timer to whole seconds
+		 * if the next desired timer is more than 4 seconds in the
+		 * future then round the timer to whole seconds
 		 */
 		if (expires > 4*HZ)
 			expires = round_jiffies_relative(expires);
@@ -151,7 +151,8 @@ loop:
 		" expires: %lu elapsed: %lu us\n",
 		atomic_read(&dst_total), delayed, work_performed,
 		expires,
-		elapsed.tv_sec * USEC_PER_SEC + elapsed.tv_nsec / NSEC_PER_USEC);
+		elapsed.tv_sec * USEC_PER_SEC +
+		  elapsed.tv_nsec / NSEC_PER_USEC);
 #endif
 }
 
@@ -162,9 +163,9 @@ int dst_discard(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dst_discard);
 
-void * dst_alloc(struct dst_ops * ops)
+void *dst_alloc(struct dst_ops *ops)
 {
-	struct dst_entry * dst;
+	struct dst_entry *dst;
 
 	if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) {
 		if (ops->gc(ops))
@@ -184,19 +185,20 @@ void * dst_alloc(struct dst_ops * ops)
 	atomic_inc(&ops->entries);
 	return dst;
 }
+EXPORT_SYMBOL(dst_alloc);
 
-static void ___dst_free(struct dst_entry * dst)
+static void ___dst_free(struct dst_entry *dst)
 {
 	/* The first case (dev==NULL) is required, when
 	   protocol module is unloaded.
 	 */
-	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP)) {
+	if (dst->dev == NULL || !(dst->dev->flags&IFF_UP))
 		dst->input = dst->output = dst_discard;
-	}
 	dst->obsolete = 2;
 }
+EXPORT_SYMBOL(__dst_free);
 
-void __dst_free(struct dst_entry * dst)
+void __dst_free(struct dst_entry *dst)
 {
 	spin_lock_bh(&dst_garbage.lock);
 	___dst_free(dst);
@@ -261,15 +263,16 @@ again:
 	}
 	return NULL;
 }
+EXPORT_SYMBOL(dst_destroy);
 
 void dst_release(struct dst_entry *dst)
 {
 	if (dst) {
-               int newrefcnt;
+		int newrefcnt;
 
 		smp_mb__before_atomic_dec();
-               newrefcnt = atomic_dec_return(&dst->__refcnt);
-               WARN_ON(newrefcnt < 0);
+		newrefcnt = atomic_dec_return(&dst->__refcnt);
+		WARN_ON(newrefcnt < 0);
 	}
 }
 EXPORT_SYMBOL(dst_release);
@@ -305,7 +308,8 @@ static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 	}
 }
 
-static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+static int dst_dev_event(struct notifier_block *this, unsigned long event,
+			 void *ptr)
 {
 	struct net_device *dev = ptr;
 	struct dst_entry *dst, *last = NULL;
@@ -328,9 +332,8 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, void
 			last->next = dst;
 		else
 			dst_busy_list = dst;
-		for (; dst; dst = dst->next) {
+		for (; dst; dst = dst->next)
 			dst_ifdown(dst, dev, event != NETDEV_DOWN);
-		}
 		mutex_unlock(&dst_gc_mutex);
 		break;
 	}
@@ -345,7 +348,3 @@ void __init dst_init(void)
 {
 	register_netdevice_notifier(&dst_dev_notifier);
 }
-
-EXPORT_SYMBOL(__dst_free);
-EXPORT_SYMBOL(dst_alloc);
-EXPORT_SYMBOL(dst_destroy);
-- 
cgit v1.2.3-59-g8ed1b


From 8379d07031e59a5d72bc73a6060c4d63aac956ce Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Tue, 30 Mar 2010 14:24:12 +0000
Subject: tipc: define needless global scoped variable static

struct _zone *tipc_zones has local scope level and
should defined with the correct scoping.

CC: Per Liden <per.liden@nospam.ericsson.com>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/net.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/net.c b/net/tipc/net.c
index 79ce8fa2b77a..d7cd1e064a80 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -116,7 +116,7 @@
 */
 
 DEFINE_RWLOCK(tipc_net_lock);
-struct _zone *tipc_zones[256] = { NULL, };
+static struct _zone *tipc_zones[256] = { NULL, };
 struct network tipc_net = { tipc_zones };
 
 struct tipc_node *tipc_net_select_remote_node(u32 addr, u32 ref)
-- 
cgit v1.2.3-59-g8ed1b


From b68c92460d380c59891ba97531edbe5b01f5ea0b Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Tue, 30 Mar 2010 14:24:57 +0000
Subject: sctp: eliminate useless code

Remove duplicate declaration of symbol: struct hlist_node *node was
already declared, the seconds declaration shadows the first one.

CC: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index dfc5c127efd4..d80ee3a2f110 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5481,7 +5481,6 @@ pp_found:
 		 */
 		int reuse = sk->sk_reuse;
 		struct sock *sk2;
-		struct hlist_node *node;
 
 		SCTP_DEBUG_PRINTK("sctp_get_port() found a possible match\n");
 		if (pp->fastreuse && sk->sk_reuse &&
-- 
cgit v1.2.3-59-g8ed1b


From 55f98938b5cea8949077c79813c4f86ef0018858 Mon Sep 17 00:00:00 2001
From: Frans Pop <elendil@planet.nl>
Date: Wed, 24 Mar 2010 19:46:29 +0100
Subject: wireless: remove trailing space in messages

Also correct indentation in net/wireless/reg.c.

Signed-off-by: Frans Pop <elendil@planet.nl>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 4 ++--
 net/wireless/reg.c    | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 5538e1b4a697..bb4ac70fc97a 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -414,7 +414,7 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local,
 					 struct sta_info *sta, u16 tid)
 {
 #ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid);
+	printk(KERN_DEBUG "Aggregation is on for tid %d\n", tid);
 #endif
 
 	spin_lock(&local->ampdu_lock);
@@ -674,7 +674,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local,
 	del_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer);
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
-	printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid);
+	printk(KERN_DEBUG "switched off addBA timer for tid %d\n", tid);
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
 	if (le16_to_cpu(mgmt->u.action.u.addba_resp.status)
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index ed89c59bb431..e857d72c7e8c 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2357,10 +2357,10 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
 					rdev->country_ie_alpha2[1]);
 			} else
 				printk(KERN_INFO "cfg80211: Current regulatory "
-					"domain intersected: \n");
+					"domain intersected:\n");
 		} else
-				printk(KERN_INFO "cfg80211: Current regulatory "
-					"domain intersected: \n");
+			printk(KERN_INFO "cfg80211: Current regulatory "
+				"domain intersected:\n");
 	} else if (is_world_regdom(rd->alpha2))
 		printk(KERN_INFO "cfg80211: World regulatory "
 			"domain updated:\n");
-- 
cgit v1.2.3-59-g8ed1b


From e3cf8b3f7b9eefbe1d39b160726d6e5c2cbb4c5d Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Mon, 29 Mar 2010 17:35:07 +0800
Subject: mac80211: support paged rx SKBs

Mac80211 drivers can now pass paged SKBs to mac80211 via
ieee80211_rx{_irqsafe}. The implementation currently use
skb_linearize() in a few places i.e. management frame
handling, software decryption, defragmentation and A-MSDU
process. We will optimize them one by one later.

Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Cc: Kalle Valo <kalle.valo@iki.fi>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  5 ++++-
 net/mac80211/rx.c      | 36 ++++++++++++++++++++++++++++++++----
 net/wireless/util.c    | 24 ++++++++++++++++++------
 3 files changed, 54 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1a8f50af49a0..ecaae10426f9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1822,7 +1822,10 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw);
  * ieee80211_rx - receive frame
  *
  * Use this function to hand received frames to mac80211. The receive
- * buffer in @skb must start with an IEEE 802.11 header.
+ * buffer in @skb must start with an IEEE 802.11 header. In case of a
+ * paged @skb is used, the driver is recommended to put the ieee80211
+ * header of the frame on the linear part of the @skb to avoid memory
+ * allocation and/or memcpy by the stack.
  *
  * This function may not be called in IRQ context. Calls to this function
  * for a single hardware must be synchronized against each other. Calls to
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 1da57c8e849a..11ed5aa90f83 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -38,7 +38,7 @@ static struct sk_buff *remove_monitor_info(struct ieee80211_local *local,
 {
 	if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) {
 		if (likely(skb->len > FCS_LEN))
-			skb_trim(skb, skb->len - FCS_LEN);
+			__pskb_trim(skb, skb->len - FCS_LEN);
 		else {
 			/* driver bug */
 			WARN_ON(1);
@@ -227,6 +227,12 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 	if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS)
 		present_fcs_len = FCS_LEN;
 
+	/* make sure hdr->frame_control is on the linear part */
+	if (!pskb_may_pull(origskb, 2)) {
+		dev_kfree_skb(origskb);
+		return NULL;
+	}
+
 	if (!local->monitors) {
 		if (should_drop_frame(origskb, present_fcs_len)) {
 			dev_kfree_skb(origskb);
@@ -931,6 +937,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		return RX_DROP_MONITOR;
 	}
 
+	if (skb_linearize(rx->skb))
+		return RX_DROP_UNUSABLE;
+
 	/* Check for weak IVs if possible */
 	if (rx->sta && rx->key->conf.alg == ALG_WEP &&
 	    ieee80211_is_data(hdr->frame_control) &&
@@ -1231,6 +1240,9 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	}
 	I802_DEBUG_INC(rx->local->rx_handlers_fragments);
 
+	if (skb_linearize(rx->skb))
+		return RX_DROP_UNUSABLE;
+
 	seq = (sc & IEEE80211_SCTL_SEQ) >> 4;
 
 	if (frag == 0) {
@@ -1588,6 +1600,9 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
 	skb->dev = dev;
 	__skb_queue_head_init(&frame_list);
 
+	if (skb_linearize(skb))
+		return RX_DROP_UNUSABLE;
+
 	ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
 				 rx->sdata->vif.type,
 				 rx->local->hw.extra_tx_headroom);
@@ -2357,29 +2372,42 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 	struct ieee80211_hdr *hdr;
+	__le16 fc;
 	struct ieee80211_rx_data rx;
 	int prepares;
 	struct ieee80211_sub_if_data *prev = NULL;
 	struct sk_buff *skb_new;
 	struct sta_info *sta, *tmp;
 	bool found_sta = false;
+	int err = 0;
 
-	hdr = (struct ieee80211_hdr *)skb->data;
+	fc = ((struct ieee80211_hdr *)skb->data)->frame_control;
 	memset(&rx, 0, sizeof(rx));
 	rx.skb = skb;
 	rx.local = local;
 
-	if (ieee80211_is_data(hdr->frame_control) || ieee80211_is_mgmt(hdr->frame_control))
+	if (ieee80211_is_data(fc) || ieee80211_is_mgmt(fc))
 		local->dot11ReceivedFragmentCount++;
 
 	if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) ||
 		     test_bit(SCAN_OFF_CHANNEL, &local->scanning)))
 		rx.flags |= IEEE80211_RX_IN_SCAN;
 
+	if (ieee80211_is_mgmt(fc))
+		err = skb_linearize(skb);
+	else
+		err = !pskb_may_pull(skb, ieee80211_hdrlen(fc));
+
+	if (err) {
+		dev_kfree_skb(skb);
+		return;
+	}
+
+	hdr = (struct ieee80211_hdr *)skb->data;
 	ieee80211_parse_qos(&rx);
 	ieee80211_verify_alignment(&rx);
 
-	if (ieee80211_is_data(hdr->frame_control)) {
+	if (ieee80211_is_data(fc)) {
 		for_each_sta_info(local, hdr->addr2, sta, tmp) {
 			rx.sta = sta;
 			found_sta = true;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index be2ab8c59e3a..7acb81b9675d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -330,11 +330,18 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
 			struct ieee80211s_hdr *meshdr =
 				(struct ieee80211s_hdr *) (skb->data + hdrlen);
-			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
+			/* make sure meshdr->flags is on the linear part */
+			if (!pskb_may_pull(skb, hdrlen + 1))
+				return -1;
 			if (meshdr->flags & MESH_FLAGS_AE_A5_A6) {
-				memcpy(dst, meshdr->eaddr1, ETH_ALEN);
-				memcpy(src, meshdr->eaddr2, ETH_ALEN);
+				skb_copy_bits(skb, hdrlen +
+					offsetof(struct ieee80211s_hdr, eaddr1),
+				       	dst, ETH_ALEN);
+				skb_copy_bits(skb, hdrlen +
+					offsetof(struct ieee80211s_hdr, eaddr2),
+				        src, ETH_ALEN);
 			}
+			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
 		}
 		break;
 	case cpu_to_le16(IEEE80211_FCTL_FROMDS):
@@ -346,9 +353,14 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 		if (iftype == NL80211_IFTYPE_MESH_POINT) {
 			struct ieee80211s_hdr *meshdr =
 				(struct ieee80211s_hdr *) (skb->data + hdrlen);
-			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
+			/* make sure meshdr->flags is on the linear part */
+			if (!pskb_may_pull(skb, hdrlen + 1))
+				return -1;
 			if (meshdr->flags & MESH_FLAGS_AE_A4)
-				memcpy(src, meshdr->eaddr1, ETH_ALEN);
+				skb_copy_bits(skb, hdrlen +
+					offsetof(struct ieee80211s_hdr, eaddr1),
+					src, ETH_ALEN);
+			hdrlen += ieee80211_get_mesh_hdrlen(meshdr);
 		}
 		break;
 	case cpu_to_le16(0):
@@ -357,7 +369,7 @@ int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr,
 		break;
 	}
 
-	if (unlikely(skb->len - hdrlen < 8))
+	if (!pskb_may_pull(skb, hdrlen + 8))
 		return -1;
 
 	payload = skb->data + hdrlen;
-- 
cgit v1.2.3-59-g8ed1b


From e1b3ec1a2a336c328c336cfa5485a5f0484cc90d Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Mon, 29 Mar 2010 12:18:34 +0200
Subject: mac80211: explicitly disable/enable QoS

Add interface to disable/enable QoS (aka WMM or WME). Currently drivers
enable it explicitly when ->conf_tx method is called, and newer disable.
Disabling is needed for some APs, which do not support QoS, such
we should send QoS frames to them.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 5 +++++
 net/mac80211/mlme.c    | 9 ++++++++-
 net/mac80211/util.c    | 5 +++++
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ecaae10426f9..dcf3c5f23c96 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -587,11 +587,15 @@ struct ieee80211_rx_status {
  *	may turn the device off as much as possible. Typically, this flag will
  *	be set when an interface is set UP but not associated or scanning, but
  *	it can also be unset in that case when monitor interfaces are active.
+ * @IEEE80211_CONF_QOS: Enable 802.11e QoS also know as WMM (Wireless
+ *      Multimedia). On some drivers (iwlwifi is one of know) we have
+ *      to enable/disable QoS explicitly.
  */
 enum ieee80211_conf_flags {
 	IEEE80211_CONF_MONITOR		= (1<<0),
 	IEEE80211_CONF_PS		= (1<<1),
 	IEEE80211_CONF_IDLE		= (1<<2),
+	IEEE80211_CONF_QOS		= (1<<3),
 };
 
 
@@ -616,6 +620,7 @@ enum ieee80211_conf_changed {
 	IEEE80211_CONF_CHANGE_CHANNEL		= BIT(6),
 	IEEE80211_CONF_CHANGE_RETRY_LIMITS	= BIT(7),
 	IEEE80211_CONF_CHANGE_IDLE		= BIT(8),
+	IEEE80211_CONF_CHANGE_QOS		= BIT(9),
 };
 
 /**
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 65eafda5738a..c686d1b90f9f 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -586,6 +586,9 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	int count;
 	u8 *pos, uapsd_queues = 0;
 
+	if (!local->ops->conf_tx)
+		return;
+
 	if (local->hw.queues < 4)
 		return;
 
@@ -660,11 +663,15 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 		       params.aifs, params.cw_min, params.cw_max, params.txop,
 		       params.uapsd);
 #endif
-		if (drv_conf_tx(local, queue, &params) && local->ops->conf_tx)
+		if (drv_conf_tx(local, queue, &params))
 			printk(KERN_DEBUG "%s: failed to set TX queue "
 			       "parameters for queue %d\n",
 			       wiphy_name(local->hw.wiphy), queue);
 	}
+
+	/* enable WMM or activate new settings */
+	local->hw.conf.flags |=	IEEE80211_CONF_QOS;
+	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
 }
 
 static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index c453226f06b2..7b2c170af71c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -796,6 +796,11 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
 
 		drv_conf_tx(local, queue, &qparam);
 	}
+
+	/* after reinitialize QoS TX queues setting to default,
+	 * disable QoS at all */
+	local->hw.conf.flags &=	~IEEE80211_CONF_QOS;
+	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
 }
 
 void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.3-59-g8ed1b


From 0af26b278bc1d747370b451595b7586cb7b3455c Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Mon, 29 Mar 2010 12:18:36 +0200
Subject: mac80211: enable QoS explicitly in AP mode

Enable QoS explicitly, when user space AP program will setup a QoS
queues. Currently this is not needed as iwlwifi not work in AP mode
and no other driver implement enable/disable QoS.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index c8f520529eec..a4ca425e4f3f 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1136,6 +1136,10 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 		return -EINVAL;
 	}
 
+	/* enable WMM or activate new settings */
+	local->hw.conf.flags |= IEEE80211_CONF_QOS;
+	drv_config(local, IEEE80211_CONF_CHANGE_QOS);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 17e4ec147f4939ca8c81b41b4261ec7974531381 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 29 Mar 2010 23:28:30 -0700
Subject: mac80211: Track Beacon signal strength and implement cqm events

Calculate a running average of the signal strength reported for Beacon
frames and indicate cqm events if the average value moves below or
above the configured threshold value (and filter out repetitive events
with by using the configured hysteresis).

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c            |  9 ++++++---
 net/mac80211/debugfs_netdev.c | 12 ++++++++++++
 net/mac80211/ieee80211_i.h    | 19 ++++++++++++++++++
 net/mac80211/mlme.c           | 45 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a4ca425e4f3f..4edd73cbf052 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1415,9 +1415,6 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
 	struct ieee80211_vif *vif = &sdata->vif;
 	struct ieee80211_bss_conf *bss_conf = &vif->bss_conf;
 
-	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI))
-		return -EOPNOTSUPP;
-
 	if (rssi_thold == bss_conf->cqm_rssi_thold &&
 	    rssi_hyst == bss_conf->cqm_rssi_hyst)
 		return 0;
@@ -1425,6 +1422,12 @@ static int ieee80211_set_cqm_rssi_config(struct wiphy *wiphy,
 	bss_conf->cqm_rssi_thold = rssi_thold;
 	bss_conf->cqm_rssi_hyst = rssi_hyst;
 
+	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
+		if (sdata->vif.type != NL80211_IFTYPE_STATION)
+			return -EOPNOTSUPP;
+		return 0;
+	}
+
 	/* tell the driver upon association, unless already associated */
 	if (sdata->u.mgd.associated)
 		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_CQM);
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 9affe2cd185f..ee61a9f6fabc 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -95,6 +95,14 @@ static ssize_t ieee80211_if_fmt_##name(					\
 	return scnprintf(buf, buflen, "%pM\n", sdata->field);		\
 }
 
+#define IEEE80211_IF_FMT_DEC_DIV_16(name, field)			\
+static ssize_t ieee80211_if_fmt_##name(					\
+	const struct ieee80211_sub_if_data *sdata,			\
+	char *buf, int buflen)						\
+{									\
+	return scnprintf(buf, buflen, "%d\n", sdata->field / 16);	\
+}
+
 #define __IEEE80211_IF_FILE(name, _write)				\
 static ssize_t ieee80211_if_read_##name(struct file *file,		\
 					char __user *userbuf,		\
@@ -135,6 +143,8 @@ IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ],
 /* STA attributes */
 IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
 IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
+IEEE80211_IF_FILE(last_beacon, u.mgd.last_beacon_signal, DEC);
+IEEE80211_IF_FILE(ave_beacon, u.mgd.ave_beacon_signal, DEC_DIV_16);
 
 static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
 			      enum ieee80211_smps_mode smps_mode)
@@ -271,6 +281,8 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 
 	DEBUGFS_ADD(bssid);
 	DEBUGFS_ADD(aid);
+	DEBUGFS_ADD(last_beacon);
+	DEBUGFS_ADD(ave_beacon);
 	DEBUGFS_ADD_MODE(smps, 0600);
 }
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ab369e2a5282..741fb8bbc4a0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -317,6 +317,7 @@ enum ieee80211_sta_flags {
 	IEEE80211_STA_MFP_ENABLED	= BIT(6),
 	IEEE80211_STA_UAPSD_ENABLED	= BIT(7),
 	IEEE80211_STA_NULLFUNC_ACKED	= BIT(8),
+	IEEE80211_STA_RESET_SIGNAL_AVE	= BIT(9),
 };
 
 struct ieee80211_if_managed {
@@ -359,6 +360,24 @@ struct ieee80211_if_managed {
 	int wmm_last_param_set;
 
 	u8 use_4addr;
+
+	/* Signal strength from the last Beacon frame in the current BSS. */
+	int last_beacon_signal;
+
+	/*
+	 * Weighted average of the signal strength from Beacon frames in the
+	 * current BSS. This is in units of 1/16 of the signal unit to maintain
+	 * accuracy and to speed up calculations, i.e., the value need to be
+	 * divided by 16 to get the actual value.
+	 */
+	int ave_beacon_signal;
+
+	/*
+	 * Last Beacon frame signal strength average (ave_beacon_signal / 16)
+	 * that triggered a cqm event. 0 indicates that no event has been
+	 * generated for the current association.
+	 */
+	int last_cqm_event_signal;
 };
 
 enum ieee80211_ibss_request {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c686d1b90f9f..de7519eb2b5d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -46,6 +46,13 @@
  */
 #define IEEE80211_PROBE_WAIT		(HZ / 2)
 
+/*
+ * Weight given to the latest Beacon frame when calculating average signal
+ * strength for Beacon frames received in the current BSS. This must be
+ * between 1 and 15.
+ */
+#define IEEE80211_SIGNAL_AVE_WEIGHT	3
+
 #define TMR_RUNNING_TIMER	0
 #define TMR_RUNNING_CHANSW	1
 
@@ -732,6 +739,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	sdata->u.mgd.associated = cbss;
 	memcpy(sdata->u.mgd.bssid, cbss->bssid, ETH_ALEN);
 
+	sdata->u.mgd.flags |= IEEE80211_STA_RESET_SIGNAL_AVE;
+
 	/* just to be sure */
 	sdata->u.mgd.flags &= ~(IEEE80211_STA_CONNECTION_POLL |
 				IEEE80211_STA_BEACON_POLL);
@@ -1347,6 +1356,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 				     struct ieee80211_rx_status *rx_status)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 	size_t baselen;
 	struct ieee802_11_elems elems;
 	struct ieee80211_local *local = sdata->local;
@@ -1382,6 +1392,41 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	if (memcmp(bssid, mgmt->bssid, ETH_ALEN) != 0)
 		return;
 
+	/* Track average RSSI from the Beacon frames of the current AP */
+	ifmgd->last_beacon_signal = rx_status->signal;
+	if (ifmgd->flags & IEEE80211_STA_RESET_SIGNAL_AVE) {
+		ifmgd->flags &= ~IEEE80211_STA_RESET_SIGNAL_AVE;
+		ifmgd->ave_beacon_signal = rx_status->signal;
+		ifmgd->last_cqm_event_signal = 0;
+	} else {
+		ifmgd->ave_beacon_signal =
+			(IEEE80211_SIGNAL_AVE_WEIGHT * rx_status->signal * 16 +
+			 (16 - IEEE80211_SIGNAL_AVE_WEIGHT) *
+			 ifmgd->ave_beacon_signal) / 16;
+	}
+	if (bss_conf->cqm_rssi_thold &&
+	    !(local->hw.flags & IEEE80211_HW_SUPPORTS_CQM_RSSI)) {
+		int sig = ifmgd->ave_beacon_signal / 16;
+		int last_event = ifmgd->last_cqm_event_signal;
+		int thold = bss_conf->cqm_rssi_thold;
+		int hyst = bss_conf->cqm_rssi_hyst;
+		if (sig < thold &&
+		    (last_event == 0 || sig < last_event - hyst)) {
+			ifmgd->last_cqm_event_signal = sig;
+			ieee80211_cqm_rssi_notify(
+				&sdata->vif,
+				NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
+				GFP_KERNEL);
+		} else if (sig > thold &&
+			   (last_event == 0 || sig > last_event + hyst)) {
+			ifmgd->last_cqm_event_signal = sig;
+			ieee80211_cqm_rssi_notify(
+				&sdata->vif,
+				NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
+				GFP_KERNEL);
+		}
+	}
+
 	if (ifmgd->flags & IEEE80211_STA_BEACON_POLL) {
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		if (net_ratelimit()) {
-- 
cgit v1.2.3-59-g8ed1b


From e69e95dbecfb73f76765cdd16dadc6219a9068e3 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 29 Mar 2010 23:29:31 -0700
Subject: mac80211: Send deauth/disassoc prior to dropping STA entry

When management frame protection (IEEE 802.11w) is used, the
deauthentication and disassociation frames must be protected whenever
the encryption keys are configured. We were removing the STA entry and
with it, the keys, just before actually sending out these frames which
meant that the frames went out unprotected. The AP will drop them in
such a case. Fix this by reordering the operations a bit so that
sta_info_destroy_addr() gets called only after
ieee80211_send_deauth_disassoc().

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index de7519eb2b5d..57a3c62139e2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -782,7 +782,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	netif_carrier_on(sdata->dev);
 }
 
-static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
+static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
+				   bool remove_sta)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_local *local = sdata->local;
@@ -855,7 +856,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata)
 	changed |= BSS_CHANGED_BSSID;
 	ieee80211_bss_info_change_notify(sdata, changed);
 
-	sta_info_destroy_addr(sdata, bssid);
+	if (remove_sta)
+		sta_info_destroy_addr(sdata, bssid);
 }
 
 void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata,
@@ -968,7 +970,7 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
 
 	printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid);
 
-	ieee80211_set_disassoc(sdata);
+	ieee80211_set_disassoc(sdata, true);
 	ieee80211_recalc_idle(local);
 	mutex_unlock(&ifmgd->mtx);
 	/*
@@ -1034,7 +1036,7 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: deauthenticated from %pM (Reason: %u)\n",
 			sdata->name, bssid, reason_code);
 
-	ieee80211_set_disassoc(sdata);
+	ieee80211_set_disassoc(sdata, true);
 	ieee80211_recalc_idle(sdata->local);
 
 	return RX_MGMT_CFG80211_DEAUTH;
@@ -1064,7 +1066,7 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: disassociated from %pM (Reason: %u)\n",
 			sdata->name, mgmt->sa, reason_code);
 
-	ieee80211_set_disassoc(sdata);
+	ieee80211_set_disassoc(sdata, true);
 	ieee80211_recalc_idle(sdata->local);
 	return RX_MGMT_CFG80211_DISASSOC;
 }
@@ -1712,7 +1714,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 			printk(KERN_DEBUG "No probe response from AP %pM"
 				" after %dms, disconnecting.\n",
 				bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ);
-			ieee80211_set_disassoc(sdata);
+			ieee80211_set_disassoc(sdata, true);
 			ieee80211_recalc_idle(local);
 			mutex_unlock(&ifmgd->mtx);
 			/*
@@ -2014,7 +2016,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		}
 
 		/* Trying to reassociate - clear previous association state */
-		ieee80211_set_disassoc(sdata);
+		ieee80211_set_disassoc(sdata, true);
 	}
 	mutex_unlock(&ifmgd->mtx);
 
@@ -2118,7 +2120,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 
 	if (ifmgd->associated == req->bss) {
 		bssid = req->bss->bssid;
-		ieee80211_set_disassoc(sdata);
+		ieee80211_set_disassoc(sdata, true);
 		mutex_unlock(&ifmgd->mtx);
 	} else {
 		bool not_auth_yet = false;
@@ -2175,6 +2177,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 			   void *cookie)
 {
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	u8 bssid[ETH_ALEN];
 
 	mutex_lock(&ifmgd->mtx);
 
@@ -2192,13 +2195,15 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: disassociating from %pM by local choice (reason=%d)\n",
 	       sdata->name, req->bss->bssid, req->reason_code);
 
-	ieee80211_set_disassoc(sdata);
+	memcpy(bssid, req->bss->bssid, ETH_ALEN);
+	ieee80211_set_disassoc(sdata, false);
 
 	mutex_unlock(&ifmgd->mtx);
 
 	ieee80211_send_deauth_disassoc(sdata, req->bss->bssid,
 			IEEE80211_STYPE_DISASSOC, req->reason_code,
 			cookie);
+	sta_info_destroy_addr(sdata, bssid);
 
 	ieee80211_recalc_idle(sdata->local);
 
-- 
cgit v1.2.3-59-g8ed1b


From ecbcd3243651ae8ac2b73a96c320992a4cf01c5b Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 29 Mar 2010 23:35:23 -0700
Subject: mac80211: Fix BIP to be used only with group-addressed frames

BIP (part of IEEE 802.11w) is only supposed to be used with
group-addressed frames. We ended up picking it as a default mechanism
for every management whenever we did not have a STA entry for the
destination (e.g., for Probe Response to a STA that is not
associated). While the extra MMIE in the end of management frames
should not break frames completed in most cases, there is no point in
doing this. Fix key selection to pick the default management key only
if the frame is sent to multicast/broadcast address and the frame is a
robust management frame.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 08e1f17a4226..350096afe79a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -513,6 +513,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 	else if (tx->sta && (key = rcu_dereference(tx->sta->key)))
 		tx->key = key;
 	else if (ieee80211_is_mgmt(hdr->frame_control) &&
+		 is_multicast_ether_addr(hdr->addr1) &&
+		 ieee80211_is_robust_mgmt_frame(hdr) &&
 		 (key = rcu_dereference(tx->sdata->default_mgmt_key)))
 		tx->key = key;
 	else if ((key = rcu_dereference(tx->sdata->default_key)))
-- 
cgit v1.2.3-59-g8ed1b


From fa83a2189870cdcd6fb4deeed391e0b988dc9a19 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Mon, 29 Mar 2010 23:36:15 -0700
Subject: mac80211: Fix dropping of unprotected robust multicast frames

When selecting the RX key for group-addressed robust management
frames, we do not actually select any BIP key if the frame is
unprotected (since we cannot find the key index from MMIE). This
results in the drop_unencrypted check in failing to drop the frame. It
is enough to verify that we have a STA entry for the transmitter and
that MFP is enabled for that STA; we do not need to check rx->key
here. This fixes BIP processing for unprotected, group-addressed,
robust management frames.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 11ed5aa90f83..ea71e1abc4c8 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1421,8 +1421,7 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
 			return -EACCES;
 		/* BIP does not use Protected field, so need to check MMIE */
 		if (unlikely(ieee80211_is_multicast_robust_mgmt_frame(rx->skb) &&
-			     ieee80211_get_mmie_keyidx(rx->skb) < 0 &&
-			     rx->key))
+			     ieee80211_get_mmie_keyidx(rx->skb) < 0))
 			return -EACCES;
 		/*
 		 * When using MFP, Action frames are not allowed prior to
-- 
cgit v1.2.3-59-g8ed1b


From d5d9de024c157a3dfbab191241c5c51e4d4c069a Mon Sep 17 00:00:00 2001
From: Marco Porsch <marco.porsch@siemens.com>
Date: Tue, 30 Mar 2010 10:00:16 +0200
Subject: nl80211: reenable station del for mesh

iw dev <devname> station del <MAC address> is quiet useful in mesh mode and should be possible.

Signed-off-by: Marco Porsch <marco.porsch@siemens.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/nl80211.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a7fc3d83f5f6..95149f303409 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2096,7 +2096,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info)
 		goto out_rtnl;
 
 	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN) {
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
+	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) {
 		err = -EINVAL;
 		goto out;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From e3efca0a63b4ac4d8849d37d082a95cf1a75162d Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sun, 28 Mar 2010 22:31:15 -0700
Subject: mac80211: Fix drop_unencrypted for MFP with hwaccel

Commit bef5d1c70d132145c0fc75b3586a19841a9a82e4 split
ieee80211_drop_unencrypted() into separate functions that are used for
Data and Management frames. However, it did not handle the
RX_FLAG_DECRYPTED correctly for Management frames:
ieee80211_drop_unencrypted() can only return 0 for Management frames,
so there is no point in calling it here. Instead, just check the
status->flag directly.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ea71e1abc4c8..14366d4afbed 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1408,12 +1408,15 @@ static int
 ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
 {
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
 	__le16 fc = hdr->frame_control;
-	int res;
 
-	res = ieee80211_drop_unencrypted(rx, fc);
-	if (unlikely(res))
-		return res;
+	/*
+	 * Pass through unencrypted frames if the hardware has
+	 * decrypted them already.
+	 */
+	if (status->flag & RX_FLAG_DECRYPTED)
+		return 0;
 
 	if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) {
 		if (unlikely(ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
-- 
cgit v1.2.3-59-g8ed1b


From 6c57990696a16ae43ea9fddb131b2784292068ba Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 1 Apr 2010 00:28:49 -0700
Subject: net-caif: using kmalloc/kfree requires the include of slab.h

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 70a733d3d3da..c873e3d4387c 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -5,6 +5,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/stddef.h>
+#include <linux/slab.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/cfpkt.h>
 #include <net/caif/cfcnfg.h>
-- 
cgit v1.2.3-59-g8ed1b


From d26e6a02835affa8bafe09a51e37f9fbc339e415 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 1 Apr 2010 12:39:19 +0200
Subject: netfilter: ctnetlink: compute message size properly

Message size should be dependent on the presence of an accounting
extension, not on CONFIG_NF_CT_ACCT definition.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_netlink.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 2b2af631d2b8..9a0c0d99dbfd 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -425,6 +425,17 @@ ctnetlink_proto_size(const struct nf_conn *ct)
 	return len;
 }
 
+static inline size_t
+ctnetlink_counters_size(const struct nf_conn *ct)
+{
+	if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT))
+		return 0;
+	return 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
+	       + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
+	       + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
+	       ;
+}
+
 static inline size_t
 ctnetlink_nlmsg_size(const struct nf_conn *ct)
 {
@@ -435,11 +446,7 @@ ctnetlink_nlmsg_size(const struct nf_conn *ct)
 	       + 3 * nla_total_size(sizeof(u_int8_t)) /* CTA_PROTO_NUM */
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_ID */
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_STATUS */
-#ifdef CONFIG_NF_CT_ACCT
-	       + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
-	       + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
-	       + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
-#endif
+	       + ctnetlink_counters_size(ct)
 	       + nla_total_size(sizeof(u_int32_t)) /* CTA_TIMEOUT */
 	       + nla_total_size(0) /* CTA_PROTOINFO */
 	       + nla_total_size(0) /* CTA_HELP */
-- 
cgit v1.2.3-59-g8ed1b


From 902a3dd5e6b19048604ec533203d7d38a39505a2 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 1 Apr 2010 12:54:09 +0200
Subject: netfilter: CLUSTERIP: clusterip_seq_stop() fix

If clusterip_seq_start() memory allocation fails, we crash later in
clusterip_seq_start(), trying to kfree(ERR_PTR(-ENOMEM))

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 5d70c43302bb..c6be74e57264 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -599,7 +599,8 @@ static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
 
 static void clusterip_seq_stop(struct seq_file *s, void *v)
 {
-	kfree(v);
+	if (!IS_ERR(v))
+		kfree(v);
 }
 
 static int clusterip_seq_show(struct seq_file *s, void *v)
-- 
cgit v1.2.3-59-g8ed1b


From 02e4eb75912a5c8babccc1acdc9cc913989be04e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 1 Apr 2010 14:35:56 +0200
Subject: netfilter: xt_hashlimit: RCU conversion

xt_hashlimit uses a central lock per hash table and suffers from
contention on some workloads. (Multiqueue NIC or if RPS is enabled)

After RCU conversion, central lock is only used when a writer wants to
add or delete an entry.

For 'readers', updating an existing entry, they use an individual lock
per entry.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_hashlimit.c | 70 +++++++++++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 5470bb097c48..453178d25cba 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -81,12 +81,14 @@ struct dsthash_ent {
 	struct dsthash_dst dst;
 
 	/* modified structure members in the end */
+	spinlock_t lock;
 	unsigned long expires;		/* precalculated expiry time */
 	struct {
 		unsigned long prev;	/* last modification */
 		u_int32_t credit;
 		u_int32_t credit_cap, cost;
 	} rateinfo;
+	struct rcu_head rcu;
 };
 
 struct xt_hashlimit_htable {
@@ -143,9 +145,11 @@ dsthash_find(const struct xt_hashlimit_htable *ht,
 	u_int32_t hash = hash_dst(ht, dst);
 
 	if (!hlist_empty(&ht->hash[hash])) {
-		hlist_for_each_entry(ent, pos, &ht->hash[hash], node)
-			if (dst_cmp(ent, dst))
+		hlist_for_each_entry_rcu(ent, pos, &ht->hash[hash], node)
+			if (dst_cmp(ent, dst)) {
+				spin_lock(&ent->lock);
 				return ent;
+			}
 	}
 	return NULL;
 }
@@ -157,9 +161,10 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
 {
 	struct dsthash_ent *ent;
 
+	spin_lock(&ht->lock);
 	/* initialize hash with random val at the time we allocate
 	 * the first hashtable entry */
-	if (!ht->rnd_initialized) {
+	if (unlikely(!ht->rnd_initialized)) {
 		get_random_bytes(&ht->rnd, sizeof(ht->rnd));
 		ht->rnd_initialized = true;
 	}
@@ -168,27 +173,36 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht,
 		/* FIXME: do something. question is what.. */
 		if (net_ratelimit())
 			pr_err("max count of %u reached\n", ht->cfg.max);
-		return NULL;
-	}
-
-	ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
+		ent = NULL;
+	} else
+		ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
 	if (!ent) {
 		if (net_ratelimit())
 			pr_err("cannot allocate dsthash_ent\n");
-		return NULL;
-	}
-	memcpy(&ent->dst, dst, sizeof(ent->dst));
+	} else {
+		memcpy(&ent->dst, dst, sizeof(ent->dst));
+		spin_lock_init(&ent->lock);
 
-	hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]);
-	ht->count++;
+		spin_lock(&ent->lock);
+		hlist_add_head_rcu(&ent->node, &ht->hash[hash_dst(ht, dst)]);
+		ht->count++;
+	}
+	spin_unlock(&ht->lock);
 	return ent;
 }
 
+static void dsthash_free_rcu(struct rcu_head *head)
+{
+	struct dsthash_ent *ent = container_of(head, struct dsthash_ent, rcu);
+
+	kmem_cache_free(hashlimit_cachep, ent);
+}
+
 static inline void
 dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
 {
-	hlist_del(&ent->node);
-	kmem_cache_free(hashlimit_cachep, ent);
+	hlist_del_rcu(&ent->node);
+	call_rcu_bh(&ent->rcu, dsthash_free_rcu);
 	ht->count--;
 }
 static void htable_gc(unsigned long htlong);
@@ -512,15 +526,14 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
 		goto hotdrop;
 
-	spin_lock_bh(&hinfo->lock);
+	rcu_read_lock_bh();
 	dh = dsthash_find(hinfo, &dst);
 	if (dh == NULL) {
 		dh = dsthash_alloc_init(hinfo, &dst);
 		if (dh == NULL) {
-			spin_unlock_bh(&hinfo->lock);
+			rcu_read_unlock_bh();
 			goto hotdrop;
 		}
-
 		dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
 		dh->rateinfo.prev = jiffies;
 		dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
@@ -537,11 +550,13 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	if (dh->rateinfo.credit >= dh->rateinfo.cost) {
 		/* below the limit */
 		dh->rateinfo.credit -= dh->rateinfo.cost;
-		spin_unlock_bh(&hinfo->lock);
+		spin_unlock(&dh->lock);
+		rcu_read_unlock_bh();
 		return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
 	}
 
-	spin_unlock_bh(&hinfo->lock);
+	spin_unlock(&dh->lock);
+	rcu_read_unlock_bh();
 	/* default match is underlimit - so over the limit, we need to invert */
 	return info->cfg.mode & XT_HASHLIMIT_INVERT;
 
@@ -666,12 +681,15 @@ static void dl_seq_stop(struct seq_file *s, void *v)
 static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				   struct seq_file *s)
 {
+	int res;
+
+	spin_lock(&ent->lock);
 	/* recalculate to show accurate numbers */
 	rateinfo_recalc(ent, jiffies);
 
 	switch (family) {
 	case NFPROTO_IPV4:
-		return seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
+		res = seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
 				 (long)(ent->expires - jiffies)/HZ,
 				 &ent->dst.ip.src,
 				 ntohs(ent->dst.src_port),
@@ -679,9 +697,10 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				 ntohs(ent->dst.dst_port),
 				 ent->rateinfo.credit, ent->rateinfo.credit_cap,
 				 ent->rateinfo.cost);
+		break;
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 	case NFPROTO_IPV6:
-		return seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
+		res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
 				 (long)(ent->expires - jiffies)/HZ,
 				 &ent->dst.ip6.src,
 				 ntohs(ent->dst.src_port),
@@ -689,11 +708,14 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
 				 ntohs(ent->dst.dst_port),
 				 ent->rateinfo.credit, ent->rateinfo.credit_cap,
 				 ent->rateinfo.cost);
+		break;
 #endif
 	default:
 		BUG();
-		return 0;
+		res = 0;
 	}
+	spin_unlock(&ent->lock);
+	return res;
 }
 
 static int dl_seq_show(struct seq_file *s, void *v)
@@ -817,9 +839,11 @@ err1:
 
 static void __exit hashlimit_mt_exit(void)
 {
-	kmem_cache_destroy(hashlimit_cachep);
 	xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
 	unregister_pernet_subsys(&hashlimit_net_ops);
+
+	rcu_barrier_bh();
+	kmem_cache_destroy(hashlimit_cachep);
 }
 
 module_init(hashlimit_mt_init);
-- 
cgit v1.2.3-59-g8ed1b


From d4fc6dbb5ae51430e35b2005f6d68938861f8d8b Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Wed, 31 Mar 2010 14:54:46 +0000
Subject: ipv4: remove redundant verification code

The check if error signaling is wanted (inet->recverr != 0) is done by
the caller: raw.c:raw_err() and udp.c:__udp4_lib_err(), so there is no
need to check this condition again.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 644dc43a55de..f4b47acbf7b6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -286,12 +286,8 @@ int ip_ra_control(struct sock *sk, unsigned char on,
 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
 		   __be16 port, u32 info, u8 *payload)
 {
-	struct inet_sock *inet = inet_sk(sk);
 	struct sock_exterr_skb *serr;
 
-	if (!inet->recverr)
-		return;
-
 	skb = skb_clone(skb, GFP_ATOMIC);
 	if (!skb)
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From 5d944c640b4ae5f37c537acf491c2f0eb89fa0d6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 31 Mar 2010 07:06:04 +0000
Subject: gen_estimator: deadlock fix

One of my test machine got a deadlock during "tc" sessions,
adding/deleting classes & filters, using traffic estimators.

After some analysis, I believe we have a potential use after free case
in est_timer() :

spin_lock(e->stats_lock); << HERE >>
read_lock(&est_lock);
if (e->bstats == NULL)   << TEST >>
	goto skip;

Test is done a bit late, because after estimator is killed, and before
rcu grace period elapsed, we might already have freed/reuse memory where
e->stats_locks points to (some qdisc->q.lock)

A possible fix is to respect a rcu grace period at Qdisc dismantle time.

On 64bit, sizeof(struct Qdisc) is exactly 192 bytes. Adding 16 bytes to
it (for struct rcu_head) is a problem because it might change
performance, given QDISC_ALIGNTO is 32 bytes.

This is why I also change QDISC_ALIGNTO to 64 bytes, to satisfy most
current alignment requirements.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h   |  2 +-
 include/net/sch_generic.h |  1 +
 net/sched/sch_generic.c   | 15 +++++++++++++--
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index b6cdc33b39c1..9d4d87cc970e 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -12,7 +12,7 @@ struct qdisc_walker {
 	int	(*fn)(struct Qdisc *, unsigned long cl, struct qdisc_walker *);
 };
 
-#define QDISC_ALIGNTO		32
+#define QDISC_ALIGNTO		64
 #define QDISC_ALIGN(len)	(((len) + QDISC_ALIGNTO-1) & ~(QDISC_ALIGNTO-1))
 
 static inline void *qdisc_priv(struct Qdisc *q)
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 67dc08eaaa45..03ca5d826757 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -73,6 +73,7 @@ struct Qdisc {
 	struct sk_buff_head	q;
 	struct gnet_stats_basic_packed bstats;
 	struct gnet_stats_queue	qstats;
+	struct rcu_head     rcu_head;
 };
 
 struct Qdisc_class_ops {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 5173c1e1b19c..17513252e83f 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -528,7 +528,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	unsigned int size;
 	int err = -ENOBUFS;
 
-	/* ensure that the Qdisc and the private data are 32-byte aligned */
+	/* ensure that the Qdisc and the private data are 64-byte aligned */
 	size = QDISC_ALIGN(sizeof(*sch));
 	size += ops->priv_size + (QDISC_ALIGNTO - 1);
 
@@ -590,6 +590,13 @@ void qdisc_reset(struct Qdisc *qdisc)
 }
 EXPORT_SYMBOL(qdisc_reset);
 
+static void qdisc_rcu_free(struct rcu_head *head)
+{
+	struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
+
+	kfree((char *) qdisc - qdisc->padded);
+}
+
 void qdisc_destroy(struct Qdisc *qdisc)
 {
 	const struct Qdisc_ops  *ops = qdisc->ops;
@@ -613,7 +620,11 @@ void qdisc_destroy(struct Qdisc *qdisc)
 	dev_put(qdisc_dev(qdisc));
 
 	kfree_skb(qdisc->gso_skb);
-	kfree((char *) qdisc - qdisc->padded);
+	/*
+	 * gen_estimator est_timer() might access qdisc->q.lock,
+	 * wait a RCU grace period before freeing qdisc.
+	 */
+	call_rcu(&qdisc->rcu_head, qdisc_rcu_free);
 }
 EXPORT_SYMBOL(qdisc_destroy);
 
-- 
cgit v1.2.3-59-g8ed1b


From 152102c7f2bf191690f1069bae292ea3925adf14 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 30 Mar 2010 20:16:22 +0000
Subject: rps: keep the old behavior on SMP without rps

keep the old behavior on SMP without rps

RPS introduces a lock operation to per cpu variable input_pkt_queue on
SMP whenever rps is enabled or not. On SMP without RPS, this lock isn't
needed at all.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
net/core/dev.c | 42 ++++++++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 14 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 42 ++++++++++++++++++++++++++++--------------
 1 file changed, 28 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 887aa84fcd46..427cd53c118d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -206,6 +206,20 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
 
+static inline void rps_lock(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+	spin_lock(&queue->input_pkt_queue.lock);
+#endif
+}
+
+static inline void rps_unlock(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+	spin_unlock(&queue->input_pkt_queue.lock);
+#endif
+}
+
 /* Device list insertion */
 static int list_netdevice(struct net_device *dev)
 {
@@ -2313,13 +2327,13 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
 	local_irq_save(flags);
 	__get_cpu_var(netdev_rx_stat).total++;
 
-	spin_lock(&queue->input_pkt_queue.lock);
+	rps_lock(queue);
 	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
 		if (queue->input_pkt_queue.qlen) {
 enqueue:
 			__skb_queue_tail(&queue->input_pkt_queue, skb);
-			spin_unlock_irqrestore(&queue->input_pkt_queue.lock,
-			    flags);
+			rps_unlock(queue);
+			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
 		}
 
@@ -2341,7 +2355,7 @@ enqueue:
 		goto enqueue;
 	}
 
-	spin_unlock(&queue->input_pkt_queue.lock);
+	rps_unlock(queue);
 
 	__get_cpu_var(netdev_rx_stat).dropped++;
 	local_irq_restore(flags);
@@ -2766,19 +2780,19 @@ int netif_receive_skb(struct sk_buff *skb)
 EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending  */
-static void flush_backlog(struct net_device *dev, int cpu)
+static void flush_backlog(void *arg)
 {
-	struct softnet_data *queue = &per_cpu(softnet_data, cpu);
+	struct net_device *dev = arg;
+	struct softnet_data *queue = &__get_cpu_var(softnet_data);
 	struct sk_buff *skb, *tmp;
-	unsigned long flags;
 
-	spin_lock_irqsave(&queue->input_pkt_queue.lock, flags);
+	rps_lock(queue);
 	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &queue->input_pkt_queue);
 			kfree_skb(skb);
 		}
-	spin_unlock_irqrestore(&queue->input_pkt_queue.lock, flags);
+	rps_unlock(queue);
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -3091,14 +3105,16 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	do {
 		struct sk_buff *skb;
 
-		spin_lock_irq(&queue->input_pkt_queue.lock);
+		local_irq_disable();
+		rps_lock(queue);
 		skb = __skb_dequeue(&queue->input_pkt_queue);
 		if (!skb) {
 			__napi_complete(napi);
 			spin_unlock_irq(&queue->input_pkt_queue.lock);
 			break;
 		}
-		spin_unlock_irq(&queue->input_pkt_queue.lock);
+		rps_unlock(queue);
+		local_irq_enable();
 
 		__netif_receive_skb(skb);
 	} while (++work < quota && jiffies == start_time);
@@ -5548,7 +5564,6 @@ void netdev_run_todo(void)
 	while (!list_empty(&list)) {
 		struct net_device *dev
 			= list_first_entry(&list, struct net_device, todo_list);
-		int i;
 		list_del(&dev->todo_list);
 
 		if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
@@ -5560,8 +5575,7 @@ void netdev_run_todo(void)
 
 		dev->reg_state = NETREG_UNREGISTERED;
 
-		for_each_online_cpu(i)
-			flush_backlog(dev, i);
+		on_each_cpu(flush_backlog, dev, 1);
 
 		netdev_wait_allrefs(dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From 34996cb91dd72f0b0456d8fd3fef4aaee62232f2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 31 Mar 2010 01:19:49 +0000
Subject: xfrm: Remove xfrm_state_genid
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xfrm state genid only needs to be matched against the copy
saved in xfrm_dst.  So we don't need a global genid at all.  In
fact, we don't even need to initialise it.

Based on observation by Timo Teräs.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_state.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 17d5b96f2fc8..71f8f33d637b 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -37,7 +37,6 @@
 static DEFINE_SPINLOCK(xfrm_state_lock);
 
 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
-static unsigned int xfrm_state_genid;
 
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
@@ -923,8 +922,6 @@ static void __xfrm_state_insert(struct xfrm_state *x)
 	struct net *net = xs_net(x);
 	unsigned int h;
 
-	x->genid = ++xfrm_state_genid;
-
 	list_add(&x->km.all, &net->xfrm.state_all);
 
 	h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
@@ -970,7 +967,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
 		    (mark & x->mark.m) == x->mark.v &&
 		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
 		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
-			x->genid = xfrm_state_genid;
+			x->genid++;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From c8bf4d04f970fafb3430d332533e1cf103f2a018 Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Wed, 31 Mar 2010 00:17:04 +0000
Subject: xfrm_user: verify policy direction at XFRM_MSG_POLEXPIRE handler

Add missing check for policy direction verification. This is
especially important since without this xfrm_user may end up
deleting per-socket policy which is not allowed.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 6106b72826d3..da5ba86181de 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1741,6 +1741,10 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err)
 		return err;
 
+	err = verify_policy_dir(p->dir);
+	if (err)
+		return err;
+
 	if (p->index)
 		xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err);
 	else {
-- 
cgit v1.2.3-59-g8ed1b


From ea2dea9dacc256fe927857feb423872051642ae7 Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Wed, 31 Mar 2010 00:17:05 +0000
Subject: xfrm: remove policy lock when accessing policy->walk.dead

All of the code considers ->dead as a hint that the cached policy
needs to get refreshed. The read side can just drop the read lock
without any side effects.

The write side needs to make sure that it's written only exactly
once. Only possible race is at xfrm_policy_kill(). This is fixed
by checking result of __xfrm_policy_unlink() when needed. It will
always succeed if the policy object is looked up from the hash
list (so some checks are removed), but it needs to be checked if
we are trying to unlink policy via a reference (appropriate
checks added).

Since policy->walk.dead is written exactly once, it no longer
needs to be protected with a write lock.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 31 +++++++++----------------------
 net/xfrm/xfrm_user.c   |  6 +-----
 2 files changed, 10 insertions(+), 27 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 843e066649cb..82789cf1c632 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -156,7 +156,7 @@ static void xfrm_policy_timer(unsigned long data)
 
 	read_lock(&xp->lock);
 
-	if (xp->walk.dead)
+	if (unlikely(xp->walk.dead))
 		goto out;
 
 	dir = xfrm_policy_id2dir(xp->index);
@@ -297,17 +297,7 @@ static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
 
 static void xfrm_policy_kill(struct xfrm_policy *policy)
 {
-	int dead;
-
-	write_lock_bh(&policy->lock);
-	dead = policy->walk.dead;
 	policy->walk.dead = 1;
-	write_unlock_bh(&policy->lock);
-
-	if (unlikely(dead)) {
-		WARN_ON(1);
-		return;
-	}
 
 	spin_lock_bh(&xfrm_policy_gc_lock);
 	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
@@ -776,7 +766,6 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi
 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 {
 	int dir, err = 0, cnt = 0;
-	struct xfrm_policy *dp;
 
 	write_lock_bh(&xfrm_policy_lock);
 
@@ -794,10 +783,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 				     &net->xfrm.policy_inexact[dir], bydst) {
 			if (pol->type != type)
 				continue;
-			dp = __xfrm_policy_unlink(pol, dir);
+			__xfrm_policy_unlink(pol, dir);
 			write_unlock_bh(&xfrm_policy_lock);
-			if (dp)
-				cnt++;
+			cnt++;
 
 			xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
 						 audit_info->sessionid,
@@ -816,10 +804,9 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 					     bydst) {
 				if (pol->type != type)
 					continue;
-				dp = __xfrm_policy_unlink(pol, dir);
+				__xfrm_policy_unlink(pol, dir);
 				write_unlock_bh(&xfrm_policy_lock);
-				if (dp)
-					cnt++;
+				cnt++;
 
 				xfrm_audit_policy_delete(pol, 1,
 							 audit_info->loginuid,
@@ -1132,6 +1119,9 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 	}
 	if (old_pol)
+		/* Unlinking succeeds always. This is the only function
+		 * allowed to delete or replace socket policy.
+		 */
 		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
 	write_unlock_bh(&xfrm_policy_lock);
 
@@ -1737,11 +1727,8 @@ restart:
 			goto error;
 		}
 
-		for (pi = 0; pi < npols; pi++) {
-			read_lock_bh(&pols[pi]->lock);
+		for (pi = 0; pi < npols; pi++)
 			pol_dead |= pols[pi]->walk.dead;
-			read_unlock_bh(&pols[pi]->lock);
-		}
 
 		write_lock_bh(&policy->lock);
 		if (unlikely(pol_dead || stale_bundle(dst))) {
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index da5ba86181de..a267fbdda525 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1770,13 +1770,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (xp == NULL)
 		return -ENOENT;
 
-	read_lock(&xp->lock);
-	if (xp->walk.dead) {
-		read_unlock(&xp->lock);
+	if (unlikely(xp->walk.dead))
 		goto out;
-	}
 
-	read_unlock(&xp->lock);
 	err = 0;
 	if (up->hard) {
 		uid_t loginuid = NETLINK_CB(skb).loginuid;
-- 
cgit v1.2.3-59-g8ed1b


From d7997fe1f4584da12e9c29fb682c18e9bdc13b73 Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Wed, 31 Mar 2010 00:17:06 +0000
Subject: flow: structurize flow cache

Group all per-cpu data to one structure instead of having many
globals. Also prepare the internals so that we can have multiple
instances of the flow cache if needed.

Only the kmem_cache is left as a global as all flow caches share
the same element size, and benefit from using a common cache.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow.c | 223 ++++++++++++++++++++++++++++++--------------------------
 1 file changed, 119 insertions(+), 104 deletions(-)

(limited to 'net')

diff --git a/net/core/flow.c b/net/core/flow.c
index 96015871ecea..1d27ca6b421d 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -35,104 +35,105 @@ struct flow_cache_entry {
 	atomic_t		*object_ref;
 };
 
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
-
-static u32 flow_hash_shift;
-#define flow_hash_size	(1 << flow_hash_shift)
-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
-
-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
-
-static struct kmem_cache *flow_cachep __read_mostly;
-
-static int flow_lwm, flow_hwm;
-
-struct flow_percpu_info {
-	int hash_rnd_recalc;
-	u32 hash_rnd;
-	int count;
+struct flow_cache_percpu {
+	struct flow_cache_entry **	hash_table;
+	int				hash_count;
+	u32				hash_rnd;
+	int				hash_rnd_recalc;
+	struct tasklet_struct		flush_tasklet;
 };
-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
-
-#define flow_hash_rnd_recalc(cpu) \
-	(per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
-#define flow_hash_rnd(cpu) \
-	(per_cpu(flow_hash_info, cpu).hash_rnd)
-#define flow_count(cpu) \
-	(per_cpu(flow_hash_info, cpu).count)
-
-static struct timer_list flow_hash_rnd_timer;
-
-#define FLOW_HASH_RND_PERIOD	(10 * 60 * HZ)
 
 struct flow_flush_info {
-	atomic_t cpuleft;
-	struct completion completion;
+	struct flow_cache *		cache;
+	atomic_t			cpuleft;
+	struct completion		completion;
 };
-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
 
-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
+struct flow_cache {
+	u32				hash_shift;
+	unsigned long			order;
+	struct flow_cache_percpu *	percpu;
+	struct notifier_block		hotcpu_notifier;
+	int				low_watermark;
+	int				high_watermark;
+	struct timer_list		rnd_timer;
+};
+
+atomic_t flow_cache_genid = ATOMIC_INIT(0);
+static struct flow_cache flow_cache_global;
+static struct kmem_cache *flow_cachep;
+
+#define flow_cache_hash_size(cache)	(1 << (cache)->hash_shift)
+#define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
 
 static void flow_cache_new_hashrnd(unsigned long arg)
 {
+	struct flow_cache *fc = (void *) arg;
 	int i;
 
 	for_each_possible_cpu(i)
-		flow_hash_rnd_recalc(i) = 1;
+		per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
 
-	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&flow_hash_rnd_timer);
+	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&fc->rnd_timer);
 }
 
-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache *fc,
+			    struct flow_cache_percpu *fcp,
+			    struct flow_cache_entry *fle)
 {
 	if (fle->object)
 		atomic_dec(fle->object_ref);
 	kmem_cache_free(flow_cachep, fle);
-	flow_count(cpu)--;
+	fcp->hash_count--;
 }
 
-static void __flow_cache_shrink(int cpu, int shrink_to)
+static void __flow_cache_shrink(struct flow_cache *fc,
+				struct flow_cache_percpu *fcp,
+				int shrink_to)
 {
 	struct flow_cache_entry *fle, **flp;
 	int i;
 
-	for (i = 0; i < flow_hash_size; i++) {
+	for (i = 0; i < flow_cache_hash_size(fc); i++) {
 		int k = 0;
 
-		flp = &flow_table(cpu)[i];
+		flp = &fcp->hash_table[i];
 		while ((fle = *flp) != NULL && k < shrink_to) {
 			k++;
 			flp = &fle->next;
 		}
 		while ((fle = *flp) != NULL) {
 			*flp = fle->next;
-			flow_entry_kill(cpu, fle);
+			flow_entry_kill(fc, fcp, fle);
 		}
 	}
 }
 
-static void flow_cache_shrink(int cpu)
+static void flow_cache_shrink(struct flow_cache *fc,
+			      struct flow_cache_percpu *fcp)
 {
-	int shrink_to = flow_lwm / flow_hash_size;
+	int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
 
-	__flow_cache_shrink(cpu, shrink_to);
+	__flow_cache_shrink(fc, fcp, shrink_to);
 }
 
-static void flow_new_hash_rnd(int cpu)
+static void flow_new_hash_rnd(struct flow_cache *fc,
+			      struct flow_cache_percpu *fcp)
 {
-	get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
-	flow_hash_rnd_recalc(cpu) = 0;
-
-	__flow_cache_shrink(cpu, 0);
+	get_random_bytes(&fcp->hash_rnd, sizeof(u32));
+	fcp->hash_rnd_recalc = 0;
+	__flow_cache_shrink(fc, fcp, 0);
 }
 
-static u32 flow_hash_code(struct flowi *key, int cpu)
+static u32 flow_hash_code(struct flow_cache *fc,
+			  struct flow_cache_percpu *fcp,
+			  struct flowi *key)
 {
 	u32 *k = (u32 *) key;
 
-	return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
-		(flow_hash_size - 1));
+	return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
+		& (flow_cache_hash_size(fc) - 1));
 }
 
 #if (BITS_PER_LONG == 64)
@@ -168,24 +169,25 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 			flow_resolve_t resolver)
 {
+	struct flow_cache *fc = &flow_cache_global;
+	struct flow_cache_percpu *fcp;
 	struct flow_cache_entry *fle, **head;
 	unsigned int hash;
-	int cpu;
 
 	local_bh_disable();
-	cpu = smp_processor_id();
+	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
 
 	fle = NULL;
 	/* Packet really early in init?  Making flow_cache_init a
 	 * pre-smp initcall would solve this.  --RR */
-	if (!flow_table(cpu))
+	if (!fcp->hash_table)
 		goto nocache;
 
-	if (flow_hash_rnd_recalc(cpu))
-		flow_new_hash_rnd(cpu);
-	hash = flow_hash_code(key, cpu);
+	if (fcp->hash_rnd_recalc)
+		flow_new_hash_rnd(fc, fcp);
+	hash = flow_hash_code(fc, fcp, key);
 
-	head = &flow_table(cpu)[hash];
+	head = &fcp->hash_table[hash];
 	for (fle = *head; fle; fle = fle->next) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
@@ -204,8 +206,8 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 	}
 
 	if (!fle) {
-		if (flow_count(cpu) > flow_hwm)
-			flow_cache_shrink(cpu);
+		if (fcp->hash_count > fc->high_watermark)
+			flow_cache_shrink(fc, fcp);
 
 		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
 		if (fle) {
@@ -215,7 +217,7 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 			fle->dir = dir;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
-			flow_count(cpu)++;
+			fcp->hash_count++;
 		}
 	}
 
@@ -249,14 +251,15 @@ nocache:
 static void flow_cache_flush_tasklet(unsigned long data)
 {
 	struct flow_flush_info *info = (void *)data;
+	struct flow_cache *fc = info->cache;
+	struct flow_cache_percpu *fcp;
 	int i;
-	int cpu;
 
-	cpu = smp_processor_id();
-	for (i = 0; i < flow_hash_size; i++) {
+	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+	for (i = 0; i < flow_cache_hash_size(fc); i++) {
 		struct flow_cache_entry *fle;
 
-		fle = flow_table(cpu)[i];
+		fle = fcp->hash_table[i];
 		for (; fle; fle = fle->next) {
 			unsigned genid = atomic_read(&flow_cache_genid);
 
@@ -272,7 +275,6 @@ static void flow_cache_flush_tasklet(unsigned long data)
 		complete(&info->completion);
 }
 
-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
 static void flow_cache_flush_per_cpu(void *data)
 {
 	struct flow_flush_info *info = data;
@@ -280,8 +282,7 @@ static void flow_cache_flush_per_cpu(void *data)
 	struct tasklet_struct *tasklet;
 
 	cpu = smp_processor_id();
-
-	tasklet = flow_flush_tasklet(cpu);
+	tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
 	tasklet->data = (unsigned long)info;
 	tasklet_schedule(tasklet);
 }
@@ -294,6 +295,7 @@ void flow_cache_flush(void)
 	/* Don't want cpus going down or up during this. */
 	get_online_cpus();
 	mutex_lock(&flow_flush_sem);
+	info.cache = &flow_cache_global;
 	atomic_set(&info.cpuleft, num_online_cpus());
 	init_completion(&info.completion);
 
@@ -307,62 +309,75 @@ void flow_cache_flush(void)
 	put_online_cpus();
 }
 
-static void __init flow_cache_cpu_prepare(int cpu)
+static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
+					  struct flow_cache_percpu *fcp)
 {
-	struct tasklet_struct *tasklet;
-	unsigned long order;
-
-	for (order = 0;
-	     (PAGE_SIZE << order) <
-		     (sizeof(struct flow_cache_entry *)*flow_hash_size);
-	     order++)
-		/* NOTHING */;
-
-	flow_table(cpu) = (struct flow_cache_entry **)
-		__get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
-	if (!flow_table(cpu))
-		panic("NET: failed to allocate flow cache order %lu\n", order);
-
-	flow_hash_rnd_recalc(cpu) = 1;
-	flow_count(cpu) = 0;
-
-	tasklet = flow_flush_tasklet(cpu);
-	tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
+	fcp->hash_table = (struct flow_cache_entry **)
+		__get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
+	if (!fcp->hash_table)
+		panic("NET: failed to allocate flow cache order %lu\n", fc->order);
+
+	fcp->hash_rnd_recalc = 1;
+	fcp->hash_count = 0;
+	tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
 }
 
 static int flow_cache_cpu(struct notifier_block *nfb,
 			  unsigned long action,
 			  void *hcpu)
 {
+	struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+	int cpu = (unsigned long) hcpu;
+	struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
+
 	if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
-		__flow_cache_shrink((unsigned long)hcpu, 0);
+		__flow_cache_shrink(fc, fcp, 0);
 	return NOTIFY_OK;
 }
 
-static int __init flow_cache_init(void)
+static int flow_cache_init(struct flow_cache *fc)
 {
+	unsigned long order;
 	int i;
 
-	flow_cachep = kmem_cache_create("flow_cache",
-					sizeof(struct flow_cache_entry),
-					0, SLAB_PANIC,
-					NULL);
-	flow_hash_shift = 10;
-	flow_lwm = 2 * flow_hash_size;
-	flow_hwm = 4 * flow_hash_size;
+	fc->hash_shift = 10;
+	fc->low_watermark = 2 * flow_cache_hash_size(fc);
+	fc->high_watermark = 4 * flow_cache_hash_size(fc);
+
+	for (order = 0;
+	     (PAGE_SIZE << order) <
+		     (sizeof(struct flow_cache_entry *)*flow_cache_hash_size(fc));
+	     order++)
+		/* NOTHING */;
+	fc->order = order;
+	fc->percpu = alloc_percpu(struct flow_cache_percpu);
 
-	setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
-	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
-	add_timer(&flow_hash_rnd_timer);
+	setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
+		    (unsigned long) fc);
+	fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+	add_timer(&fc->rnd_timer);
 
 	for_each_possible_cpu(i)
-		flow_cache_cpu_prepare(i);
+		flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
+
+	fc->hotcpu_notifier = (struct notifier_block){
+		.notifier_call = flow_cache_cpu,
+	};
+	register_hotcpu_notifier(&fc->hotcpu_notifier);
 
-	hotcpu_notifier(flow_cache_cpu, 0);
 	return 0;
 }
 
-module_init(flow_cache_init);
+static int __init flow_cache_init_global(void)
+{
+	flow_cachep = kmem_cache_create("flow_cache",
+					sizeof(struct flow_cache_entry),
+					0, SLAB_PANIC, NULL);
+
+	return flow_cache_init(&flow_cache_global);
+}
+
+module_init(flow_cache_init_global);
 
 EXPORT_SYMBOL(flow_cache_genid);
 EXPORT_SYMBOL(flow_cache_lookup);
-- 
cgit v1.2.3-59-g8ed1b


From 5acbbd428db47b12f137a8a2aa96b3c0a96b744e Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Tue, 30 Mar 2010 22:35:50 +0000
Subject: net: change illegal_highdma to use dma_mask

Robert Hancock pointed out two problems about NETIF_F_HIGHDMA:

-Many drivers only set the flag when they detect they can use 64-bit DMA,
since otherwise they could receive DMA addresses that they can't handle
(which on platforms without IOMMU/SWIOTLB support is fatal). This means that if
64-bit support isn't available, even buffers located below 4GB will get copied
unnecessarily.

-Some drivers set the flag even though they can't actually handle 64-bit DMA,
which would mean that on platforms without IOMMU/SWIOTLB they would get a DMA
mapping error if the memory they received happened to be located above 4GB.

http://lkml.org/lkml/2010/3/3/530

We can use the dma_mask if we need bouncing or not here. Then we can
safely fix drivers that misuse NETIF_F_HIGHDMA.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 427cd53c118d..e19cdae49fef 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,7 @@
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <trace/events/napi.h>
+#include <linux/pci.h>
 
 #include "net-sysfs.h"
 
@@ -1804,14 +1805,21 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;
+	if (!(dev->features & NETIF_F_HIGHDMA)) {
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
+			if (PageHighMem(skb_shinfo(skb)->frags[i].page))
+				return 1;
+	}
 
-	if (dev->features & NETIF_F_HIGHDMA)
-		return 0;
-
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
-		if (PageHighMem(skb_shinfo(skb)->frags[i].page))
-			return 1;
+	if (PCI_DMA_BUS_IS_PHYS) {
+		struct device *pdev = dev->dev.parent;
 
+		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+			dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
+			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
+				return 1;
+		}
+	}
 #endif
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 9092c658bab215b2752fa59d2a36c05b74d1e9e9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 2 Apr 2010 13:34:49 -0700
Subject: net: illegal_highdma() fix

Followup to commit 5acbbd428db47b12f137a8a2aa96b3c0a96b744e
(net: change illegal_highdma to use dma_mask)

If dev->dev.parent is NULL, we should not try to dereference it.

Dont force inline illegal_highdma() as its pretty big now.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e19cdae49fef..c6b52068d5ec 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1801,7 +1801,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
  * 2. No high memory really exists on this machine.
  */
 
-static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_HIGHMEM
 	int i;
@@ -1814,6 +1814,8 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 	if (PCI_DMA_BUS_IS_PHYS) {
 		struct device *pdev = dev->dev.parent;
 
+		if (!pdev)
+			return 0;
 		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
 			dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page);
 			if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask)
-- 
cgit v1.2.3-59-g8ed1b


From a748ee2426817a95b1f03012d8f339c45c722ae1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 1 Apr 2010 21:22:09 +0000
Subject: net: move address list functions to a separate file

+little renaming of unicast functions to be smooth with multicast ones

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |   2 +-
 drivers/net/macvlan.c           |  10 +-
 drivers/scsi/fcoe/fcoe.c        |  14 +-
 include/linux/netdevice.h       |  15 +-
 net/8021q/vlan.c                |   4 +-
 net/8021q/vlan_dev.c            |  14 +-
 net/core/Makefile               |   3 +-
 net/core/dev.c                  | 430 +-----------------------------------
 net/core/dev_addr_lists.c       | 478 ++++++++++++++++++++++++++++++++++++++++
 net/dsa/slave.c                 |  14 +-
 net/packet/af_packet.c          |   4 +-
 11 files changed, 524 insertions(+), 464 deletions(-)
 create mode 100644 net/core/dev_addr_lists.c

(limited to 'net')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f8ab60b914c4..d6ae63b2cf00 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1491,7 +1491,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 			}
 
 			/* Flush unicast and multicast addresses */
-			dev_unicast_flush(bond_dev);
+			dev_uc_flush(bond_dev);
 			dev_addr_discard(bond_dev);
 
 			if (slave_dev->type != ARPHRD_ETHER)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 445e73c343ba..9a939d828b47 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -282,7 +282,7 @@ static int macvlan_open(struct net_device *dev)
 	if (macvlan_addr_busy(vlan->port, dev->dev_addr))
 		goto out;
 
-	err = dev_unicast_add(lowerdev, dev->dev_addr);
+	err = dev_uc_add(lowerdev, dev->dev_addr);
 	if (err < 0)
 		goto out;
 	if (dev->flags & IFF_ALLMULTI) {
@@ -294,7 +294,7 @@ static int macvlan_open(struct net_device *dev)
 	return 0;
 
 del_unicast:
-	dev_unicast_delete(lowerdev, dev->dev_addr);
+	dev_uc_del(lowerdev, dev->dev_addr);
 out:
 	return err;
 }
@@ -308,7 +308,7 @@ static int macvlan_stop(struct net_device *dev)
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(lowerdev, -1);
 
-	dev_unicast_delete(lowerdev, dev->dev_addr);
+	dev_uc_del(lowerdev, dev->dev_addr);
 
 	macvlan_hash_del(vlan);
 	return 0;
@@ -332,11 +332,11 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p)
 		if (macvlan_addr_busy(vlan->port, addr->sa_data))
 			return -EBUSY;
 
-		err = dev_unicast_add(lowerdev, addr->sa_data);
+		err = dev_uc_add(lowerdev, addr->sa_data);
 		if (err)
 			return err;
 
-		dev_unicast_delete(lowerdev, dev->dev_addr);
+		dev_uc_del(lowerdev, dev->dev_addr);
 
 		macvlan_hash_change_addr(vlan, addr->sa_data);
 	}
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 2f47ae7cce91..de33e38a4059 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -308,9 +308,9 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe,
 	 * for multiple unicast MACs.
 	 */
 	memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
-	dev_unicast_add(netdev, flogi_maddr);
+	dev_uc_add(netdev, flogi_maddr);
 	if (fip->spma)
-		dev_unicast_add(netdev, fip->ctl_src_addr);
+		dev_uc_add(netdev, fip->ctl_src_addr);
 	dev_mc_add(netdev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
 
 	/*
@@ -394,9 +394,9 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
 
 	/* Delete secondary MAC addresses */
 	memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
-	dev_unicast_delete(netdev, flogi_maddr);
+	dev_uc_del(netdev, flogi_maddr);
 	if (fip->spma)
-		dev_unicast_delete(netdev, fip->ctl_src_addr);
+		dev_uc_del(netdev, fip->ctl_src_addr);
 	dev_mc_delete(netdev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
 
 	/* Tell the LLD we are done w/ FCoE */
@@ -490,9 +490,9 @@ static void fcoe_update_src_mac(struct fc_lport *lport, u8 *addr)
 
 	rtnl_lock();
 	if (!is_zero_ether_addr(port->data_src_addr))
-		dev_unicast_delete(fcoe->netdev, port->data_src_addr);
+		dev_uc_del(fcoe->netdev, port->data_src_addr);
 	if (!is_zero_ether_addr(addr))
-		dev_unicast_add(fcoe->netdev, addr);
+		dev_uc_add(fcoe->netdev, addr);
 	memcpy(port->data_src_addr, addr, ETH_ALEN);
 	rtnl_unlock();
 }
@@ -819,7 +819,7 @@ static void fcoe_if_destroy(struct fc_lport *lport)
 
 	rtnl_lock();
 	if (!is_zero_ether_addr(port->data_src_addr))
-		dev_unicast_delete(netdev, port->data_src_addr);
+		dev_uc_del(netdev, port->data_src_addr);
 	rtnl_unlock();
 
 	/* receives may not be stopped until after this */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b5670ab5107c..60f0c83192fe 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1991,15 +1991,20 @@ extern int dev_addr_add_multiple(struct net_device *to_dev,
 extern int dev_addr_del_multiple(struct net_device *to_dev,
 				 struct net_device *from_dev,
 				 unsigned char addr_type);
+extern void dev_addr_flush(struct net_device *dev);
+extern int dev_addr_init(struct net_device *dev);
+
+/* Functions used for unicast addresses handling */
+extern int dev_uc_add(struct net_device *dev, unsigned char *addr);
+extern int dev_uc_del(struct net_device *dev, unsigned char *addr);
+extern int dev_uc_sync(struct net_device *to, struct net_device *from);
+extern void dev_uc_unsync(struct net_device *to, struct net_device *from);
+extern void dev_uc_flush(struct net_device *dev);
+extern void dev_uc_init(struct net_device *dev);
 
 /* Functions used for secondary unicast and multicast support */
 extern void		dev_set_rx_mode(struct net_device *dev);
 extern void		__dev_set_rx_mode(struct net_device *dev);
-extern int		dev_unicast_delete(struct net_device *dev, void *addr);
-extern int		dev_unicast_add(struct net_device *dev, void *addr);
-extern int		dev_unicast_sync(struct net_device *to, struct net_device *from);
-extern void		dev_unicast_unsync(struct net_device *to, struct net_device *from);
-extern void		dev_unicast_flush(struct net_device *dev);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern int		dev_mc_sync(struct net_device *to, struct net_device *from);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index c39a5f41169c..bd33f02013ec 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -356,13 +356,13 @@ static void vlan_sync_address(struct net_device *dev,
 	 * the new address */
 	if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
 	    !compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
-		dev_unicast_delete(dev, vlandev->dev_addr);
+		dev_uc_del(dev, vlandev->dev_addr);
 
 	/* vlan address was equal to the old address and is different from
 	 * the new address */
 	if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
 	    compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
-		dev_unicast_add(dev, vlandev->dev_addr);
+		dev_uc_add(dev, vlandev->dev_addr);
 
 	memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
 }
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 9e83272fc5b0..7f4d247237e4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -461,7 +461,7 @@ static int vlan_dev_open(struct net_device *dev)
 		return -ENETDOWN;
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
-		err = dev_unicast_add(real_dev, dev->dev_addr);
+		err = dev_uc_add(real_dev, dev->dev_addr);
 		if (err < 0)
 			goto out;
 	}
@@ -490,7 +490,7 @@ clear_allmulti:
 		dev_set_allmulti(real_dev, -1);
 del_unicast:
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr);
+		dev_uc_del(real_dev, dev->dev_addr);
 out:
 	netif_carrier_off(dev);
 	return err;
@@ -505,14 +505,14 @@ static int vlan_dev_stop(struct net_device *dev)
 		vlan_gvrp_request_leave(dev);
 
 	dev_mc_unsync(real_dev, dev);
-	dev_unicast_unsync(real_dev, dev);
+	dev_uc_unsync(real_dev, dev);
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(real_dev, -1);
 	if (dev->flags & IFF_PROMISC)
 		dev_set_promiscuity(real_dev, -1);
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr);
+		dev_uc_del(real_dev, dev->dev_addr);
 
 	netif_carrier_off(dev);
 	return 0;
@@ -531,13 +531,13 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 		goto out;
 
 	if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) {
-		err = dev_unicast_add(real_dev, addr->sa_data);
+		err = dev_uc_add(real_dev, addr->sa_data);
 		if (err < 0)
 			return err;
 	}
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr);
+		dev_uc_del(real_dev, dev->dev_addr);
 
 out:
 	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
@@ -654,7 +654,7 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
 {
 	dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
-	dev_unicast_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
+	dev_uc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
 }
 
 /*
diff --git a/net/core/Makefile b/net/core/Makefile
index 08791ac3e05a..0a899f1aadb9 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -8,7 +8,8 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
-			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
+			dev_addr_lists.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index c6b52068d5ec..949c62dba719 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3968,314 +3968,6 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
-/* hw addresses list handling functions */
-
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-	int alloc_size;
-
-	if (addr_len > MAX_ADDR_LEN)
-		return -EINVAL;
-
-	list_for_each_entry(ha, &list->list, list) {
-		if (!memcmp(ha->addr, addr, addr_len) &&
-		    ha->type == addr_type) {
-			ha->refcount++;
-			return 0;
-		}
-	}
-
-
-	alloc_size = sizeof(*ha);
-	if (alloc_size < L1_CACHE_BYTES)
-		alloc_size = L1_CACHE_BYTES;
-	ha = kmalloc(alloc_size, GFP_ATOMIC);
-	if (!ha)
-		return -ENOMEM;
-	memcpy(ha->addr, addr, addr_len);
-	ha->type = addr_type;
-	ha->refcount = 1;
-	ha->synced = false;
-	list_add_tail_rcu(&ha->list, &list->list);
-	list->count++;
-	return 0;
-}
-
-static void ha_rcu_free(struct rcu_head *head)
-{
-	struct netdev_hw_addr *ha;
-
-	ha = container_of(head, struct netdev_hw_addr, rcu_head);
-	kfree(ha);
-}
-
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-
-	list_for_each_entry(ha, &list->list, list) {
-		if (!memcmp(ha->addr, addr, addr_len) &&
-		    (ha->type == addr_type || !addr_type)) {
-			if (--ha->refcount)
-				return 0;
-			list_del_rcu(&ha->list);
-			call_rcu(&ha->rcu_head, ha_rcu_free);
-			list->count--;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-				  struct netdev_hw_addr_list *from_list,
-				  int addr_len,
-				  unsigned char addr_type)
-{
-	int err;
-	struct netdev_hw_addr *ha, *ha2;
-	unsigned char type;
-
-	list_for_each_entry(ha, &from_list->list, list) {
-		type = addr_type ? addr_type : ha->type;
-		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
-		if (err)
-			goto unroll;
-	}
-	return 0;
-
-unroll:
-	list_for_each_entry(ha2, &from_list->list, list) {
-		if (ha2 == ha)
-			break;
-		type = addr_type ? addr_type : ha2->type;
-		__hw_addr_del(to_list, ha2->addr, addr_len, type);
-	}
-	return err;
-}
-
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-				   struct netdev_hw_addr_list *from_list,
-				   int addr_len,
-				   unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-	unsigned char type;
-
-	list_for_each_entry(ha, &from_list->list, list) {
-		type = addr_type ? addr_type : ha->type;
-		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
-	}
-}
-
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-			  struct netdev_hw_addr_list *from_list,
-			  int addr_len)
-{
-	int err = 0;
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-		if (!ha->synced) {
-			err = __hw_addr_add(to_list, ha->addr,
-					    addr_len, ha->type);
-			if (err)
-				break;
-			ha->synced = true;
-			ha->refcount++;
-		} else if (ha->refcount == 1) {
-			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
-			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
-		}
-	}
-	return err;
-}
-
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-			     struct netdev_hw_addr_list *from_list,
-			     int addr_len)
-{
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-		if (ha->synced) {
-			__hw_addr_del(to_list, ha->addr,
-				      addr_len, ha->type);
-			ha->synced = false;
-			__hw_addr_del(from_list, ha->addr,
-				      addr_len, ha->type);
-		}
-	}
-}
-
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
-{
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &list->list, list) {
-		list_del_rcu(&ha->list);
-		call_rcu(&ha->rcu_head, ha_rcu_free);
-	}
-	list->count = 0;
-}
-
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
-{
-	INIT_LIST_HEAD(&list->list);
-	list->count = 0;
-}
-
-/* Device addresses handling functions */
-
-static void dev_addr_flush(struct net_device *dev)
-{
-	/* rtnl_mutex must be held here */
-
-	__hw_addr_flush(&dev->dev_addrs);
-	dev->dev_addr = NULL;
-}
-
-static int dev_addr_init(struct net_device *dev)
-{
-	unsigned char addr[MAX_ADDR_LEN];
-	struct netdev_hw_addr *ha;
-	int err;
-
-	/* rtnl_mutex must be held here */
-
-	__hw_addr_init(&dev->dev_addrs);
-	memset(addr, 0, sizeof(addr));
-	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
-			    NETDEV_HW_ADDR_T_LAN);
-	if (!err) {
-		/*
-		 * Get the first (previously created) address from the list
-		 * and set dev_addr pointer to this location.
-		 */
-		ha = list_first_entry(&dev->dev_addrs.list,
-				      struct netdev_hw_addr, list);
-		dev->dev_addr = ha->addr;
-	}
-	return err;
-}
-
-/**
- *	dev_addr_add	- Add a device address
- *	@dev: device
- *	@addr: address to add
- *	@addr_type: address type
- *
- *	Add a device address to the device or increase the reference count if
- *	it already exists.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
-		 unsigned char addr_type)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_add);
-
-/**
- *	dev_addr_del	- Release a device address.
- *	@dev: device
- *	@addr: address to delete
- *	@addr_type: address type
- *
- *	Release reference to a device address and remove it from the device
- *	if the reference count drops to zero.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
-		 unsigned char addr_type)
-{
-	int err;
-	struct netdev_hw_addr *ha;
-
-	ASSERT_RTNL();
-
-	/*
-	 * We can not remove the first address from the list because
-	 * dev->dev_addr points to that.
-	 */
-	ha = list_first_entry(&dev->dev_addrs.list,
-			      struct netdev_hw_addr, list);
-	if (ha->addr == dev->dev_addr && ha->refcount == 1)
-		return -ENOENT;
-
-	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
-			    addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_del);
-
-/**
- *	dev_addr_add_multiple	- Add device addresses from another device
- *	@to_dev: device to which addresses will be added
- *	@from_dev: device from which addresses will be added
- *	@addr_type: address type - 0 means type will be used from from_dev
- *
- *	Add device addresses of the one device to another.
- **
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_add_multiple(struct net_device *to_dev,
-			  struct net_device *from_dev,
-			  unsigned char addr_type)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	if (from_dev->addr_len != to_dev->addr_len)
-		return -EINVAL;
-	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-				     to_dev->addr_len, addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_add_multiple);
-
-/**
- *	dev_addr_del_multiple	- Delete device addresses by another device
- *	@to_dev: device where the addresses will be deleted
- *	@from_dev: device by which addresses the addresses will be deleted
- *	@addr_type: address type - 0 means type will used from from_dev
- *
- *	Deletes addresses in to device by the list of addresses in from device.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_del_multiple(struct net_device *to_dev,
-			  struct net_device *from_dev,
-			  unsigned char addr_type)
-{
-	ASSERT_RTNL();
-
-	if (from_dev->addr_len != to_dev->addr_len)
-		return -EINVAL;
-	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-			       to_dev->addr_len, addr_type);
-	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-	return 0;
-}
-EXPORT_SYMBOL(dev_addr_del_multiple);
-
 /* multicast addresses handling functions */
 
 int __dev_addr_delete(struct dev_addr_list **list, int *count,
@@ -4336,57 +4028,6 @@ int __dev_addr_add(struct dev_addr_list **list, int *count,
 	return 0;
 }
 
-/**
- *	dev_unicast_delete	- Release secondary unicast address.
- *	@dev: device
- *	@addr: address to delete
- *
- *	Release reference to a secondary unicast address and remove it
- *	from the device if the reference count drops to zero.
- *
- * 	The caller must hold the rtnl_mutex.
- */
-int dev_unicast_delete(struct net_device *dev, void *addr)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	netif_addr_lock_bh(dev);
-	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
-			    NETDEV_HW_ADDR_T_UNICAST);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_delete);
-
-/**
- *	dev_unicast_add		- add a secondary unicast address
- *	@dev: device
- *	@addr: address to add
- *
- *	Add a secondary unicast address to the device or increase
- *	the reference count if it already exists.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_unicast_add(struct net_device *dev, void *addr)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	netif_addr_lock_bh(dev);
-	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
-			    NETDEV_HW_ADDR_T_UNICAST);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_add);
 
 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
 		    struct dev_addr_list **from, int *from_count)
@@ -4436,71 +4077,6 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
 }
 EXPORT_SYMBOL_GPL(__dev_addr_unsync);
 
-/**
- *	dev_unicast_sync - Synchronize device's unicast list to another device
- *	@to: destination device
- *	@from: source device
- *
- *	Add newly added addresses to the destination device and release
- *	addresses that have no users left. The source device must be
- *	locked by netif_tx_lock_bh.
- *
- *	This function is intended to be called from the dev->set_rx_mode
- *	function of layered software devices.
- */
-int dev_unicast_sync(struct net_device *to, struct net_device *from)
-{
-	int err = 0;
-
-	if (to->addr_len != from->addr_len)
-		return -EINVAL;
-
-	netif_addr_lock_bh(to);
-	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
-	if (!err)
-		__dev_set_rx_mode(to);
-	netif_addr_unlock_bh(to);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_sync);
-
-/**
- *	dev_unicast_unsync - Remove synchronized addresses from the destination device
- *	@to: destination device
- *	@from: source device
- *
- *	Remove all addresses that were added to the destination device by
- *	dev_unicast_sync(). This function is intended to be called from the
- *	dev->stop function of layered software devices.
- */
-void dev_unicast_unsync(struct net_device *to, struct net_device *from)
-{
-	if (to->addr_len != from->addr_len)
-		return;
-
-	netif_addr_lock_bh(from);
-	netif_addr_lock(to);
-	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
-	__dev_set_rx_mode(to);
-	netif_addr_unlock(to);
-	netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_unicast_unsync);
-
-void dev_unicast_flush(struct net_device *dev)
-{
-	netif_addr_lock_bh(dev);
-	__hw_addr_flush(&dev->uc);
-	netif_addr_unlock_bh(dev);
-}
-EXPORT_SYMBOL(dev_unicast_flush);
-
-static void dev_unicast_init(struct net_device *dev)
-{
-	__hw_addr_init(&dev->uc);
-}
-
-
 static void __dev_addr_discard(struct dev_addr_list **list)
 {
 	struct dev_addr_list *tmp;
@@ -5153,7 +4729,7 @@ static void rollback_registered_many(struct list_head *head)
 		/*
 		 *	Flush the unicast and multicast chains
 		 */
-		dev_unicast_flush(dev);
+		dev_uc_flush(dev);
 		dev_addr_discard(dev);
 
 		if (dev->netdev_ops->ndo_uninit)
@@ -5734,7 +5310,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	if (dev_addr_init(dev))
 		goto free_rx;
 
-	dev_unicast_init(dev);
+	dev_uc_init(dev);
 
 	dev_net_set(dev, &init_net);
 
@@ -5968,7 +5544,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	/*
 	 *	Flush the unicast and multicast chains
 	 */
-	dev_unicast_flush(dev);
+	dev_uc_flush(dev);
 	dev_addr_discard(dev);
 
 	netdev_unregister_kobject(dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 000000000000..7e52b6d18add
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,478 @@
+/*
+ * net/core/dev_addr_lists.c - Functions for handling net device lists
+ * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This file contains functions for working with unicast, multicast and device
+ * addresses lists.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/list.h>
+
+/*
+ * General list handling functions
+ */
+
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+	int alloc_size;
+
+	if (addr_len > MAX_ADDR_LEN)
+		return -EINVAL;
+
+	list_for_each_entry(ha, &list->list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    ha->type == addr_type) {
+			ha->refcount++;
+			return 0;
+		}
+	}
+
+
+	alloc_size = sizeof(*ha);
+	if (alloc_size < L1_CACHE_BYTES)
+		alloc_size = L1_CACHE_BYTES;
+	ha = kmalloc(alloc_size, GFP_ATOMIC);
+	if (!ha)
+		return -ENOMEM;
+	memcpy(ha->addr, addr, addr_len);
+	ha->type = addr_type;
+	ha->refcount = 1;
+	ha->synced = false;
+	list_add_tail_rcu(&ha->list, &list->list);
+	list->count++;
+	return 0;
+}
+
+static void ha_rcu_free(struct rcu_head *head)
+{
+	struct netdev_hw_addr *ha;
+
+	ha = container_of(head, struct netdev_hw_addr, rcu_head);
+	kfree(ha);
+}
+
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+
+	list_for_each_entry(ha, &list->list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    (ha->type == addr_type || !addr_type)) {
+			if (--ha->refcount)
+				return 0;
+			list_del_rcu(&ha->list);
+			call_rcu(&ha->rcu_head, ha_rcu_free);
+			list->count--;
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+				  struct netdev_hw_addr_list *from_list,
+				  int addr_len,
+				  unsigned char addr_type)
+{
+	int err;
+	struct netdev_hw_addr *ha, *ha2;
+	unsigned char type;
+
+	list_for_each_entry(ha, &from_list->list, list) {
+		type = addr_type ? addr_type : ha->type;
+		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+		if (err)
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	list_for_each_entry(ha2, &from_list->list, list) {
+		if (ha2 == ha)
+			break;
+		type = addr_type ? addr_type : ha2->type;
+		__hw_addr_del(to_list, ha2->addr, addr_len, type);
+	}
+	return err;
+}
+
+static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+				   struct netdev_hw_addr_list *from_list,
+				   int addr_len,
+				   unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+	unsigned char type;
+
+	list_for_each_entry(ha, &from_list->list, list) {
+		type = addr_type ? addr_type : ha->type;
+		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+	}
+}
+
+static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+			  struct netdev_hw_addr_list *from_list,
+			  int addr_len)
+{
+	int err = 0;
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (!ha->synced) {
+			err = __hw_addr_add(to_list, ha->addr,
+					    addr_len, ha->type);
+			if (err)
+				break;
+			ha->synced = true;
+			ha->refcount++;
+		} else if (ha->refcount == 1) {
+			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
+			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
+		}
+	}
+	return err;
+}
+
+static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+			     struct netdev_hw_addr_list *from_list,
+			     int addr_len)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (ha->synced) {
+			__hw_addr_del(to_list, ha->addr,
+				      addr_len, ha->type);
+			ha->synced = false;
+			__hw_addr_del(from_list, ha->addr,
+				      addr_len, ha->type);
+		}
+	}
+}
+
+static void __hw_addr_flush(struct netdev_hw_addr_list *list)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		list_del_rcu(&ha->list);
+		call_rcu(&ha->rcu_head, ha_rcu_free);
+	}
+	list->count = 0;
+}
+
+static void __hw_addr_init(struct netdev_hw_addr_list *list)
+{
+	INIT_LIST_HEAD(&list->list);
+	list->count = 0;
+}
+
+/*
+ * Device addresses handling functions
+ */
+
+/**
+ *	dev_addr_flush - Flush device address list
+ *	@dev: device
+ *
+ *	Flush device address list and reset ->dev_addr.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void dev_addr_flush(struct net_device *dev)
+{
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_flush(&dev->dev_addrs);
+	dev->dev_addr = NULL;
+}
+EXPORT_SYMBOL(dev_addr_flush);
+
+/**
+ *	dev_addr_init - Init device address list
+ *	@dev: device
+ *
+ *	Init device address list and create the first element,
+ *	used by ->dev_addr.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_init(struct net_device *dev)
+{
+	unsigned char addr[MAX_ADDR_LEN];
+	struct netdev_hw_addr *ha;
+	int err;
+
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_init(&dev->dev_addrs);
+	memset(addr, 0, sizeof(addr));
+	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
+			    NETDEV_HW_ADDR_T_LAN);
+	if (!err) {
+		/*
+		 * Get the first (previously created) address from the list
+		 * and set dev_addr pointer to this location.
+		 */
+		ha = list_first_entry(&dev->dev_addrs.list,
+				      struct netdev_hw_addr, list);
+		dev->dev_addr = ha->addr;
+	}
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_init);
+
+/**
+ *	dev_addr_add - Add a device address
+ *	@dev: device
+ *	@addr: address to add
+ *	@addr_type: address type
+ *
+ *	Add a device address to the device or increase the reference count if
+ *	it already exists.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ *	dev_addr_del - Release a device address.
+ *	@dev: device
+ *	@addr: address to delete
+ *	@addr_type: address type
+ *
+ *	Release reference to a device address and remove it from the device
+ *	if the reference count drops to zero.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+	struct netdev_hw_addr *ha;
+
+	ASSERT_RTNL();
+
+	/*
+	 * We can not remove the first address from the list because
+	 * dev->dev_addr points to that.
+	 */
+	ha = list_first_entry(&dev->dev_addrs.list,
+			      struct netdev_hw_addr, list);
+	if (ha->addr == dev->dev_addr && ha->refcount == 1)
+		return -ENOENT;
+
+	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
+			    addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ *	dev_addr_add_multiple - Add device addresses from another device
+ *	@to_dev: device to which addresses will be added
+ *	@from_dev: device from which addresses will be added
+ *	@addr_type: address type - 0 means type will be used from from_dev
+ *
+ *	Add device addresses of the one device to another.
+ **
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+				     to_dev->addr_len, addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ *	dev_addr_del_multiple - Delete device addresses by another device
+ *	@to_dev: device where the addresses will be deleted
+ *	@from_dev: device by which addresses the addresses will be deleted
+ *	@addr_type: address type - 0 means type will used from from_dev
+ *
+ *	Deletes addresses in to device by the list of addresses in from device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+			       to_dev->addr_len, addr_type);
+	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/*
+ * Unicast list handling functions
+ */
+
+/**
+ *	dev_uc_add - Add a secondary unicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a secondary unicast address to the device or increase
+ *	the reference count if it already exists.
+ */
+int dev_uc_add(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_add);
+
+/**
+ *	dev_uc_del - Release secondary unicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a secondary unicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_uc_del(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_del);
+
+/**
+ *	dev_uc_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_tx_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_rx_mode
+ *	function of layered software devices.
+ */
+int dev_uc_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_bh(to);
+	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock_bh(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_sync);
+
+/**
+ *	dev_uc_unsync - Remove synchronized addresses from the destination device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_uc_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_uc_unsync(struct net_device *to, struct net_device *from)
+{
+	if (to->addr_len != from->addr_len)
+		return;
+
+	netif_addr_lock_bh(from);
+	netif_addr_lock(to);
+	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
+	__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_uc_unsync);
+
+/**
+ *	dev_uc_flush - Flush unicast addresses
+ *	@dev: device
+ *
+ *	Flush unicast addresses.
+ */
+void dev_uc_flush(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__hw_addr_flush(&dev->uc);
+	netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_uc_flush);
+
+/**
+ *	dev_uc_flush - Init unicast address list
+ *	@dev: device
+ *
+ *	Init unicast address list.
+ */
+void dev_uc_init(struct net_device *dev)
+{
+	__hw_addr_init(&dev->uc);
+}
+EXPORT_SYMBOL(dev_uc_init);
+
+/*
+ * Multicast list handling functions
+ */
+
+/* To be filled here */
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2175e6d5cc8d..8fdca56bb08f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -67,7 +67,7 @@ static int dsa_slave_open(struct net_device *dev)
 		return -ENETDOWN;
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr)) {
-		err = dev_unicast_add(master, dev->dev_addr);
+		err = dev_uc_add(master, dev->dev_addr);
 		if (err < 0)
 			goto out;
 	}
@@ -90,7 +90,7 @@ clear_allmulti:
 		dev_set_allmulti(master, -1);
 del_unicast:
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr);
+		dev_uc_del(master, dev->dev_addr);
 out:
 	return err;
 }
@@ -101,14 +101,14 @@ static int dsa_slave_close(struct net_device *dev)
 	struct net_device *master = p->parent->dst->master_netdev;
 
 	dev_mc_unsync(master, dev);
-	dev_unicast_unsync(master, dev);
+	dev_uc_unsync(master, dev);
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(master, -1);
 	if (dev->flags & IFF_PROMISC)
 		dev_set_promiscuity(master, -1);
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr);
+		dev_uc_del(master, dev->dev_addr);
 
 	return 0;
 }
@@ -130,7 +130,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
 	struct net_device *master = p->parent->dst->master_netdev;
 
 	dev_mc_sync(master, dev);
-	dev_unicast_sync(master, dev);
+	dev_uc_sync(master, dev);
 }
 
 static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
@@ -147,13 +147,13 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
 		goto out;
 
 	if (compare_ether_addr(addr->sa_data, master->dev_addr)) {
-		err = dev_unicast_add(master, addr->sa_data);
+		err = dev_uc_add(master, addr->sa_data);
 		if (err < 0)
 			return err;
 	}
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr);
+		dev_uc_del(master, dev->dev_addr);
 
 out:
 	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1612d417d10c..48c1e0ae565f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1705,9 +1705,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 		if (i->alen != dev->addr_len)
 			return -EINVAL;
 		if (what > 0)
-			return dev_unicast_add(dev, i->addr);
+			return dev_uc_add(dev, i->addr);
 		else
-			return dev_unicast_delete(dev, i->addr);
+			return dev_uc_del(dev, i->addr);
 		break;
 	default:
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From 22bedad3ce112d5ca1eaf043d4990fa2ed698c87 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 1 Apr 2010 21:22:57 +0000
Subject: net: convert multicast list to list_head

Converts the list and the core manipulating with it to be the same as uc_list.

+uses two functions for adding/removing mc address (normal and "global"
 variant) instead of a function parameter.
+removes dev_mcast.c completely.
+exposes netdev_hw_addr_list_* macros along with __hw_addr_* functions for
 manipulation with lists on a sandbox (used in bonding and 80211 drivers)

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/nes/nes_nic.c            |   7 +-
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   9 +-
 drivers/media/dvb/dvb-core/dvb_net.c           |  10 +-
 drivers/net/3c505.c                            |   7 +-
 drivers/net/3c523.c                            |   7 +-
 drivers/net/3c527.c                            |   6 +-
 drivers/net/7990.c                             |   6 +-
 drivers/net/8139cp.c                           |   6 +-
 drivers/net/8139too.c                          |   6 +-
 drivers/net/82596.c                            |   6 +-
 drivers/net/a2065.c                            |   6 +-
 drivers/net/amd8111e.c                         |   6 +-
 drivers/net/arm/am79c961a.c                    |   6 +-
 drivers/net/arm/at91_ether.c                   |   6 +-
 drivers/net/arm/ixp4xx_eth.c                   |   8 +-
 drivers/net/arm/ks8695net.c                    |  10 +-
 drivers/net/at1700.c                           |   6 +-
 drivers/net/atl1c/atl1c_main.c                 |   6 +-
 drivers/net/atl1e/atl1e_main.c                 |   6 +-
 drivers/net/atlx/atl2.c                        |   6 +-
 drivers/net/atlx/atlx.c                        |   6 +-
 drivers/net/atp.c                              |   6 +-
 drivers/net/au1000_eth.c                       |   6 +-
 drivers/net/b44.c                              |   6 +-
 drivers/net/bcm63xx_enet.c                     |   6 +-
 drivers/net/benet/be_cmds.c                    |   6 +-
 drivers/net/bfin_mac.c                         |   6 +-
 drivers/net/bmac.c                             |  12 +-
 drivers/net/bnx2.c                             |   5 +-
 drivers/net/bnx2x_main.c                       |  18 +-
 drivers/net/bonding/bond_main.c                | 157 +++++--------
 drivers/net/bonding/bonding.h                  |   2 +-
 drivers/net/cassini.c                          |  12 +-
 drivers/net/chelsio/pm3393.c                   |   7 +-
 drivers/net/cpmac.c                            |  16 +-
 drivers/net/cris/eth_v10.c                     |   6 +-
 drivers/net/cxgb3/xgmac.c                      |   8 +-
 drivers/net/davinci_emac.c                     |   7 +-
 drivers/net/declance.c                         |   6 +-
 drivers/net/defxx.c                            |   6 +-
 drivers/net/depca.c                            |   6 +-
 drivers/net/dl2k.c                             |   6 +-
 drivers/net/dm9000.c                           |   6 +-
 drivers/net/e100.c                             |   6 +-
 drivers/net/e1000/e1000_main.c                 |   7 +-
 drivers/net/e1000e/netdev.c                    |   7 +-
 drivers/net/eepro.c                            |   6 +-
 drivers/net/eexpress.c                         |   6 +-
 drivers/net/ehea/ehea_main.c                   |   6 +-
 drivers/net/enic/enic_main.c                   |   6 +-
 drivers/net/epic100.c                          |   6 +-
 drivers/net/ethoc.c                            |   6 +-
 drivers/net/ewrk3.c                            |   6 +-
 drivers/net/fealnx.c                           |   6 +-
 drivers/net/fec.c                              |  10 +-
 drivers/net/fec_mpc52xx.c                      |   6 +-
 drivers/net/forcedeth.c                        |  10 +-
 drivers/net/fs_enet/mac-fcc.c                  |   6 +-
 drivers/net/fs_enet/mac-fec.c                  |   6 +-
 drivers/net/fs_enet/mac-scc.c                  |   6 +-
 drivers/net/gianfar.c                          |   9 +-
 drivers/net/greth.c                            |   6 +-
 drivers/net/hamachi.c                          |   8 +-
 drivers/net/hp100.c                            |   6 +-
 drivers/net/ibm_newemac/core.c                 |  11 +-
 drivers/net/ibmlana.c                          |   8 +-
 drivers/net/ibmveth.c                          |   6 +-
 drivers/net/igb/igb_main.c                     |   6 +-
 drivers/net/igbvf/netdev.c                     |   6 +-
 drivers/net/ioc3-eth.c                         |   6 +-
 drivers/net/ipg.c                              |   6 +-
 drivers/net/iseries_veth.c                     |   6 +-
 drivers/net/ixgb/ixgb_main.c                   |   6 +-
 drivers/net/ixgbe/ixgbe_common.c               |   6 +-
 drivers/net/ixgbevf/vf.c                       |   6 +-
 drivers/net/jme.c                              |   6 +-
 drivers/net/korina.c                           |   6 +-
 drivers/net/ks8851.c                           |   6 +-
 drivers/net/ks8851_mll.c                       |  12 +-
 drivers/net/ksz884x.c                          |  11 +-
 drivers/net/lib82596.c                         |   6 +-
 drivers/net/lib8390.c                          |   6 +-
 drivers/net/ll_temac_main.c                    |  16 +-
 drivers/net/lp486e.c                           |   6 +-
 drivers/net/macb.c                             |   6 +-
 drivers/net/mace.c                             |   6 +-
 drivers/net/macmace.c                          |   6 +-
 drivers/net/mlx4/en_netdev.c                   |   6 +-
 drivers/net/mv643xx_eth.c                      |   6 +-
 drivers/net/myri10ge/myri10ge.c                |   8 +-
 drivers/net/natsemi.c                          |   6 +-
 drivers/net/netxen/netxen_nic_hw.c             |  12 +-
 drivers/net/ni52.c                             |   6 +-
 drivers/net/niu.c                              |   5 +-
 drivers/net/octeon/octeon_mgmt.c               |   6 +-
 drivers/net/pci-skeleton.c                     |   6 +-
 drivers/net/pcmcia/axnet_cs.c                  |   6 +-
 drivers/net/pcmcia/fmvj18x_cs.c                |   6 +-
 drivers/net/pcmcia/nmclan_cs.c                 |   6 +-
 drivers/net/pcmcia/smc91c92_cs.c               |   8 +-
 drivers/net/pcmcia/xirc2ps_cs.c                |   6 +-
 drivers/net/pcnet32.c                          |   6 +-
 drivers/net/ps3_gelic_net.c                    |   6 +-
 drivers/net/qlcnic/qlcnic_hw.c                 |   6 +-
 drivers/net/qlge/qlge_main.c                   |   6 +-
 drivers/net/r6040.c                            |  10 +-
 drivers/net/r8169.c                            |   6 +-
 drivers/net/s2io.c                             |   8 +-
 drivers/net/sb1250-mac.c                       |   6 +-
 drivers/net/sc92031.c                          |   6 +-
 drivers/net/sfc/efx.c                          |   6 +-
 drivers/net/sis190.c                           |   6 +-
 drivers/net/sis900.c                           |  10 +-
 drivers/net/skfp/skfddi.c                      |  12 +-
 drivers/net/skge.c                             |  12 +-
 drivers/net/sky2.c                             |   6 +-
 drivers/net/smc911x.c                          |  13 +-
 drivers/net/smc9194.c                          |  13 +-
 drivers/net/smc91x.c                           |   8 +-
 drivers/net/smsc911x.c                         |   6 +-
 drivers/net/smsc9420.c                         |   6 +-
 drivers/net/sonic.c                            |   6 +-
 drivers/net/spider_net.c                       |   6 +-
 drivers/net/starfire.c                         |  10 +-
 drivers/net/stmmac/dwmac100.c                  |   6 +-
 drivers/net/stmmac/dwmac1000_core.c            |   6 +-
 drivers/net/sun3_82586.c                       |   6 +-
 drivers/net/sunbmac.c                          |   6 +-
 drivers/net/sundance.c                         |   6 +-
 drivers/net/sungem.c                           |   6 +-
 drivers/net/sunhme.c                           |  12 +-
 drivers/net/sunlance.c                         |   6 +-
 drivers/net/sunqe.c                            |   6 +-
 drivers/net/sunvnet.c                          |   8 +-
 drivers/net/tc35815.c                          |   6 +-
 drivers/net/tehuti.c                           |   6 +-
 drivers/net/tg3.c                              |   6 +-
 drivers/net/tlan.c                             |   8 +-
 drivers/net/tokenring/3c359.c                  |  12 +-
 drivers/net/tokenring/ibmtr.c                  |  12 +-
 drivers/net/tokenring/lanstreamer.c            |  12 +-
 drivers/net/tokenring/olympic.c                |  12 +-
 drivers/net/tokenring/tms380tr.c               |  12 +-
 drivers/net/tsi108_eth.c                       |   6 +-
 drivers/net/tulip/de2104x.c                    |  12 +-
 drivers/net/tulip/de4x5.c                      |  10 +-
 drivers/net/tulip/dmfe.c                       |  12 +-
 drivers/net/tulip/tulip_core.c                 |  27 ++-
 drivers/net/tulip/uli526x.c                    |   6 +-
 drivers/net/tulip/winbond-840.c                |  12 +-
 drivers/net/typhoon.c                          |   6 +-
 drivers/net/ucc_geth.c                         |   8 +-
 drivers/net/usb/asix.c                         |  16 +-
 drivers/net/usb/catc.c                         |   6 +-
 drivers/net/usb/dm9601.c                       |   6 +-
 drivers/net/usb/mcs7830.c                      |   6 +-
 drivers/net/usb/smsc75xx.c                     |   6 +-
 drivers/net/usb/smsc95xx.c                     |   6 +-
 drivers/net/via-rhine.c                        |   6 +-
 drivers/net/via-velocity.c                     |   6 +-
 drivers/net/virtio_net.c                       |   5 +-
 drivers/net/vmxnet3/vmxnet3_drv.c              |   6 +-
 drivers/net/vxge/vxge-main.c                   |   6 +-
 drivers/net/wireless/adm8211.c                 |  12 +-
 drivers/net/wireless/ath/ar9170/main.c         |  14 +-
 drivers/net/wireless/ath/ath5k/base.c          |  17 +-
 drivers/net/wireless/libertas/main.c           |  12 +-
 drivers/net/wireless/libertas_tf/main.c        |  14 +-
 drivers/net/wireless/mwl8k.c                   |  24 +-
 drivers/net/wireless/orinoco/hw.c              |   6 +-
 drivers/net/wireless/orinoco/hw.h              |   1 -
 drivers/net/wireless/ray_cs.c                  |  12 +-
 drivers/net/wireless/rndis_wlan.c              |   6 +-
 drivers/net/wireless/rtl818x/rtl8180_dev.c     |   6 +-
 drivers/net/wireless/rtl818x/rtl8187_dev.c     |   4 +-
 drivers/net/wireless/wl12xx/wl1271_main.c      |  25 +-
 drivers/net/wireless/zd1201.c                  |   6 +-
 drivers/net/wireless/zd1211rw/zd_mac.c         |  13 +-
 drivers/net/yellowfin.c                        |  12 +-
 drivers/s390/net/qeth_l2_main.c                |   5 +-
 drivers/scsi/fcoe/fcoe.c                       |   4 +-
 drivers/staging/arlan/arlan-main.c             |   9 +-
 drivers/staging/et131x/et131x_netdev.c         |   6 +-
 drivers/staging/slicoss/slicoss.c              |   6 +-
 drivers/staging/vt6655/device_main.c           |   6 +-
 drivers/staging/vt6656/main_usb.c              |   6 +-
 drivers/staging/wavelan/wavelan.c              |  10 +-
 drivers/staging/wavelan/wavelan_cs.c           |  12 +-
 drivers/staging/winbond/wbusb.c                |   6 +-
 drivers/staging/wlags49_h2/wl_netdev.c         |  12 +-
 include/linux/netdevice.h                      |  82 +++----
 include/net/mac80211.h                         |   2 +-
 net/802/garp.c                                 |   4 +-
 net/appletalk/ddp.c                            |   2 +-
 net/bluetooth/bnep/netdev.c                    |   8 +-
 net/core/Makefile                              |   5 +-
 net/core/dev.c                                 | 145 +-----------
 net/core/dev_addr_lists.c                      | 305 +++++++++++++++++++++++--
 net/core/dev_mcast.c                           | 232 -------------------
 net/decnet/dn_dev.c                            |  12 +-
 net/ipv4/igmp.c                                |   4 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c             |   4 +-
 net/ipv6/mcast.c                               |   4 +-
 net/mac80211/driver-ops.h                      |   8 +-
 net/mac80211/ieee80211_i.h                     |   3 +-
 net/mac80211/iface.c                           |   6 +-
 net/mac80211/main.c                            |   2 +-
 net/packet/af_packet.c                         |   4 +-
 208 files changed, 1137 insertions(+), 1327 deletions(-)
 delete mode 100644 net/core/dev_mcast.c

(limited to 'net')

diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 91fdde382e82..ce9ef6bc865c 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -876,7 +876,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
 	if (!mc_all_on) {
 		char *addrs;
 		int i;
-		struct dev_mc_list *mcaddr;
+		struct netdev_hw_addr *ha;
 
 		addrs = kmalloc(ETH_ALEN * mc_count, GFP_ATOMIC);
 		if (!addrs) {
@@ -884,9 +884,8 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
 			goto unlock;
 		}
 		i = 0;
-		netdev_for_each_mc_addr(mcaddr, netdev)
-			memcpy(get_addr(addrs, i++),
-			       mcaddr->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, netdev)
+			memcpy(get_addr(addrs, i++), ha->addr, ETH_ALEN);
 
 		perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW +
 						pft_entries_preallocated * 0x8;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 19eba3c877cb..c8a0f7dab5b0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -783,7 +783,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 	struct ipoib_dev_priv *priv =
 		container_of(work, struct ipoib_dev_priv, restart_task);
 	struct net_device *dev = priv->dev;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	struct ipoib_mcast *mcast, *tmcast;
 	LIST_HEAD(remove_list);
 	unsigned long flags;
@@ -808,14 +808,13 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
 
 	/* Mark all of the entries that are found or don't exist */
-	netdev_for_each_mc_addr(mclist, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		union ib_gid mgid;
 
-		if (!ipoib_mcast_addr_is_valid(mclist->dmi_addr,
-					       dev->broadcast))
+		if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast))
 			continue;
 
-		memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid);
+		memcpy(mgid.raw, ha->addr + 4, sizeof mgid);
 
 		mcast = __ipoib_mcast_find(dev, &mgid);
 		if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index dba1c84058b7..cccea412088b 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -1109,14 +1109,14 @@ static int dvb_net_feed_stop(struct net_device *dev)
 }
 
 
-static int dvb_set_mc_filter (struct net_device *dev, struct dev_mc_list *mc)
+static int dvb_set_mc_filter(struct net_device *dev, unsigned char *addr)
 {
 	struct dvb_net_priv *priv = netdev_priv(dev);
 
 	if (priv->multi_num == DVB_NET_MULTICAST_MAX)
 		return -ENOMEM;
 
-	memcpy(priv->multi_macs[priv->multi_num], mc->dmi_addr, 6);
+	memcpy(priv->multi_macs[priv->multi_num], addr, ETH_ALEN);
 
 	priv->multi_num++;
 	return 0;
@@ -1140,7 +1140,7 @@ static void wq_set_multicast_list (struct work_struct *work)
 		dprintk("%s: allmulti mode\n", dev->name);
 		priv->rx_mode = RX_MODE_ALL_MULTI;
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *mc;
+		struct netdev_hw_addr *ha;
 
 		dprintk("%s: set_mc_list, %d entries\n",
 			dev->name, netdev_mc_count(dev));
@@ -1148,8 +1148,8 @@ static void wq_set_multicast_list (struct work_struct *work)
 		priv->rx_mode = RX_MODE_MULTI;
 		priv->multi_num = 0;
 
-		netdev_for_each_mc_addr(mc, dev)
-			dvb_set_mc_filter(dev, mc);
+		netdev_for_each_mc_addr(ha, dev)
+			dvb_set_mc_filter(dev, ha->addr);
 	}
 
 	netif_addr_unlock_bh(dev);
diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c
index 04b5bba19021..81c8b31e629f 100644
--- a/drivers/net/3c505.c
+++ b/drivers/net/3c505.c
@@ -1216,7 +1216,7 @@ static int elp_close(struct net_device *dev)
 static void elp_set_mc_list(struct net_device *dev)
 {
 	elp_device *adapter = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int i;
 	unsigned long flags;
 
@@ -1231,8 +1231,9 @@ static void elp_set_mc_list(struct net_device *dev)
 		adapter->tx_pcb.command = CMD_LOAD_MULTICAST_LIST;
 		adapter->tx_pcb.length = 6 * netdev_mc_count(dev);
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev)
-			memcpy(adapter->tx_pcb.data.multicast[i++], dmi->dmi_addr, 6);
+		netdev_for_each_mc_addr(ha, dev)
+			memcpy(adapter->tx_pcb.data.multicast[i++],
+			       ha->addr, 6);
 		adapter->got[CMD_LOAD_MULTICAST_LIST] = 0;
 		if (!send_pcb(dev, &adapter->tx_pcb))
 			pr_err("%s: couldn't send set_multicast command\n", dev->name);
diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index beed4fa10c6e..966cb12e1e0a 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -625,7 +625,7 @@ static int init586(struct net_device *dev)
 	volatile struct iasetup_cmd_struct *ias_cmd;
 	volatile struct tdr_cmd_struct *tdr_cmd;
 	volatile struct mcsetup_cmd_struct *mc_cmd;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int num_addrs = netdev_mc_count(dev);
 
 	ptr = (void *) ((char *) p->scb + sizeof(struct scb_struct));
@@ -788,8 +788,9 @@ static int init586(struct net_device *dev)
 			mc_cmd->cmd_link = 0xffff;
 			mc_cmd->mc_cnt = num_addrs * 6;
 			i = 0;
-			netdev_for_each_mc_addr(dmi, dev)
-				memcpy((char *) mc_cmd->mc_list[i++], dmi->dmi_addr, 6);
+			netdev_for_each_mc_addr(ha, dev)
+				memcpy((char *) mc_cmd->mc_list[i++],
+				       ha->addr, 6);
 			p->scb->cbl_offset = make16(mc_cmd);
 			p->scb->cmd = CUC_START;
 			elmc_id_attn586();
diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c
index 5c07b147ec99..38395dfa4963 100644
--- a/drivers/net/3c527.c
+++ b/drivers/net/3c527.c
@@ -1533,7 +1533,7 @@ static void do_mc32_set_multicast_list(struct net_device *dev, int retry)
 	{
 		unsigned char block[62];
 		unsigned char *bp;
-		struct dev_mc_list *dmc;
+		struct netdev_hw_addr *ha;
 
 		if(retry==0)
 			lp->mc_list_valid = 0;
@@ -1543,8 +1543,8 @@ static void do_mc32_set_multicast_list(struct net_device *dev, int retry)
 			block[0]=netdev_mc_count(dev);
 			bp=block+2;
 
-			netdev_for_each_mc_addr(dmc, dev) {
-				memcpy(bp, dmc->dmi_addr, 6);
+			netdev_for_each_mc_addr(ha, dev) {
+				memcpy(bp, ha->addr, 6);
 				bp+=6;
 			}
 			if(mc32_command_nowait(dev, 2, block,
diff --git a/drivers/net/7990.c b/drivers/net/7990.c
index 4e9a5a20b6a6..818837d8ffe7 100644
--- a/drivers/net/7990.c
+++ b/drivers/net/7990.c
@@ -595,7 +595,7 @@ static void lance_load_multicast (struct net_device *dev)
         struct lance_private *lp = netdev_priv(dev);
         volatile struct lance_init_block *ib = lp->init_block;
         volatile u16 *mcast_table = (u16 *)&ib->filter;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
         char *addrs;
         u32 crc;
 
@@ -610,8 +610,8 @@ static void lance_load_multicast (struct net_device *dev)
         ib->filter [1] = 0;
 
         /* Add addresses */
-	netdev_for_each_mc_addr(dmi, dev) {
-                addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
                 /* multicast address? */
                 if (!(*addrs & 1))
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 3d4406b16658..e4e2aef11017 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -909,11 +909,11 @@ static void __cp_set_rx_mode (struct net_device *dev)
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		rx_mode = AcceptBroadcast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 			rx_mode |= AcceptMulticast;
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index b4efc913978b..f61784c3c4a2 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -2502,11 +2502,11 @@ static void __set_rx_mode (struct net_device *dev)
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		rx_mode = AcceptBroadcast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 			rx_mode |= AcceptMulticast;
diff --git a/drivers/net/82596.c b/drivers/net/82596.c
index f94d17d78bb0..3a28b1f451d9 100644
--- a/drivers/net/82596.c
+++ b/drivers/net/82596.c
@@ -1542,7 +1542,7 @@ static void set_multicast_list(struct net_device *dev)
 	}
 
 	if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		unsigned char *cp;
 		struct mc_cmd *cmd;
 
@@ -1552,10 +1552,10 @@ static void set_multicast_list(struct net_device *dev)
 		cmd->cmd.command = CmdMulticastList;
 		cmd->mc_cnt = cnt * ETH_ALEN;
 		cp = cmd->mc_addrs;
-		netdev_for_each_mc_addr(dmi, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (!cnt--)
 				break;
-			memcpy(cp, dmi->dmi_addr, ETH_ALEN);
+			memcpy(cp, ha->addr, ETH_ALEN);
 			if (i596_debug > 1)
 				DEB(DEB_MULTI,printk(KERN_INFO "%s: Adding address %pM\n",
 						dev->name, cp));
diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index bd4d829eca12..7cce643793c3 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -603,7 +603,7 @@ static void lance_load_multicast (struct net_device *dev)
 	struct lance_private *lp = netdev_priv(dev);
 	volatile struct lance_init_block *ib = lp->init_block;
 	volatile u16 *mcast_table = (u16 *)&ib->filter;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	u32 crc;
 
@@ -618,8 +618,8 @@ static void lance_load_multicast (struct net_device *dev)
 	ib->filter [1] = 0;
 
 	/* Add addresses */
-	netdev_for_each_mc_addr(dmi, dev) {
-		addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
 		/* multicast address? */
 		if (!(*addrs & 1))
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index b8a59d255b49..d002c764a26c 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -1377,7 +1377,7 @@ list to the device.
 */
 static void amd8111e_set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	u32 mc_filter[2] ;
 	int bit_num;
@@ -1408,8 +1408,8 @@ static void amd8111e_set_multicast_list(struct net_device *dev)
 	/* load all the multicast addresses in the logic filter */
 	lp->options |= OPTION_MULTICAST_ENABLE;
 	mc_filter[1] = mc_filter[0] = 0;
-	netdev_for_each_mc_addr(mc_ptr, dev) {
-		bit_num = (ether_crc_le(ETH_ALEN, mc_ptr->dmi_addr) >> 26) & 0x3f;
+	netdev_for_each_mc_addr(ha, dev) {
+		bit_num = (ether_crc_le(ETH_ALEN, ha->addr) >> 26) & 0x3f;
 		mc_filter[bit_num >> 5] |= 1 << (bit_num & 31);
 	}
 	amd8111e_writeq(*(u64*)mc_filter,lp->mmio+ LADRF);
diff --git a/drivers/net/arm/am79c961a.c b/drivers/net/arm/am79c961a.c
index f1f58c5e27bf..a4b5b08276f8 100644
--- a/drivers/net/arm/am79c961a.c
+++ b/drivers/net/arm/am79c961a.c
@@ -383,12 +383,12 @@ static void am79c961_setmulticastlist (struct net_device *dev)
 	} else if (dev->flags & IFF_ALLMULTI) {
 		memset(multi_hash, 0xff, sizeof(multi_hash));
 	} else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 
 		memset(multi_hash, 0x00, sizeof(multi_hash));
 
-		netdev_for_each_mc_addr(dmi, dev)
-			am79c961_mc_hash(dmi->dmi_addr, multi_hash);
+		netdev_for_each_mc_addr(ha, dev)
+			am79c961_mc_hash(ha->addr, multi_hash);
 	}
 
 	spin_lock_irqsave(&priv->chip_lock, flags);
diff --git a/drivers/net/arm/at91_ether.c b/drivers/net/arm/at91_ether.c
index 8b23d5a175bf..f31e8b6cbf73 100644
--- a/drivers/net/arm/at91_ether.c
+++ b/drivers/net/arm/at91_ether.c
@@ -556,14 +556,14 @@ static int hash_get_index(__u8 *addr)
  */
 static void at91ether_sethashtable(struct net_device *dev)
 {
-	struct dev_mc_list *curr;
+	struct netdev_hw_addr *ha;
 	unsigned long mc_filter[2];
 	unsigned int bitnr;
 
 	mc_filter[0] = mc_filter[1] = 0;
 
-	netdev_for_each_mc_addr(curr, dev) {
-		bitnr = hash_get_index(curr->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		bitnr = hash_get_index(ha->addr);
 		mc_filter[bitnr >> 5] |= 1 << (bitnr & 31);
 	}
 
diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c
index 6e2ae1d06df1..f9d168775d06 100644
--- a/drivers/net/arm/ixp4xx_eth.c
+++ b/drivers/net/arm/ixp4xx_eth.c
@@ -735,7 +735,7 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
 static void eth_set_mcast_list(struct net_device *dev)
 {
 	struct port *port = netdev_priv(dev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	u8 diffs[ETH_ALEN], *addr;
 	int i;
 
@@ -748,11 +748,11 @@ static void eth_set_mcast_list(struct net_device *dev)
 	memset(diffs, 0, ETH_ALEN);
 
 	addr = NULL;
-	netdev_for_each_mc_addr(mclist, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		if (!addr)
-			addr = mclist->dmi_addr; /* first MAC address */
+			addr = ha->addr; /* first MAC address */
 		for (i = 0; i < ETH_ALEN; i++)
-			diffs[i] |= addr[i] ^ mclist->dmi_addr[i];
+			diffs[i] |= addr[i] ^ ha->addr[i];
 	}
 
 	for (i = 0; i < ETH_ALEN; i++) {
diff --git a/drivers/net/arm/ks8695net.c b/drivers/net/arm/ks8695net.c
index e7810b74f396..6ec245c6394b 100644
--- a/drivers/net/arm/ks8695net.c
+++ b/drivers/net/arm/ks8695net.c
@@ -331,16 +331,16 @@ ks8695_init_partial_multicast(struct ks8695_priv *ksp,
 {
 	u32 low, high;
 	int i;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
 	i = 0;
-	netdev_for_each_mc_addr(dmi, ndev) {
+	netdev_for_each_mc_addr(ha, ndev) {
 		/* Ran out of space in chip? */
 		BUG_ON(i == KS8695_NR_ADDRESSES);
 
-		low = (dmi->dmi_addr[2] << 24) | (dmi->dmi_addr[3] << 16) |
-		      (dmi->dmi_addr[4] << 8) | (dmi->dmi_addr[5]);
-		high = (dmi->dmi_addr[0] << 8) | (dmi->dmi_addr[1]);
+		low = (ha->addr[2] << 24) | (ha->addr[3] << 16) |
+		      (ha->addr[4] << 8) | (ha->addr[5]);
+		high = (ha->addr[0] << 8) | (ha->addr[1]);
 
 		ks8695_writereg(ksp, KS8695_AAL_(i), low);
 		ks8695_writereg(ksp, KS8695_AAH_(i), AAH_E | high);
diff --git a/drivers/net/at1700.c b/drivers/net/at1700.c
index 309843ab8869..e2a549a60e25 100644
--- a/drivers/net/at1700.c
+++ b/drivers/net/at1700.c
@@ -848,12 +848,12 @@ set_rx_mode(struct net_device *dev)
 		memset(mc_filter, 0x00, sizeof(mc_filter));
 		outb(1, ioaddr + RX_MODE);	/* Ignore almost all multicasts. */
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			unsigned int bit =
-				ether_crc_le(ETH_ALEN, mclist->dmi_addr) >> 26;
+				ether_crc_le(ETH_ALEN, ha->addr) >> 26;
 			mc_filter[bit >> 3] |= (1 << bit);
 		}
 		outb(0x02, ioaddr + RX_MODE);	/* Use normal mode. */
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index a5508e1b261b..3d7051135c3a 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -354,7 +354,7 @@ static void atl1c_set_multi(struct net_device *netdev)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	struct atl1c_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 mac_ctrl_data;
 	u32 hash_value;
 
@@ -377,8 +377,8 @@ static void atl1c_set_multi(struct net_device *netdev)
 	AT_WRITE_REG_ARRAY(hw, REG_RX_HASH_TABLE, 1, 0);
 
 	/* comoute mc addresses' hash value ,and put it into hash table */
-	netdev_for_each_mc_addr(mc_ptr, netdev) {
-		hash_value = atl1c_hash_mc_addr(hw, mc_ptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev) {
+		hash_value = atl1c_hash_mc_addr(hw, ha->addr);
 		atl1c_hash_set(hw, hash_value);
 	}
 }
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c
index 7231b5779122..b6605d433e91 100644
--- a/drivers/net/atl1e/atl1e_main.c
+++ b/drivers/net/atl1e/atl1e_main.c
@@ -284,7 +284,7 @@ static void atl1e_set_multi(struct net_device *netdev)
 {
 	struct atl1e_adapter *adapter = netdev_priv(netdev);
 	struct atl1e_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 mac_ctrl_data = 0;
 	u32 hash_value;
 
@@ -307,8 +307,8 @@ static void atl1e_set_multi(struct net_device *netdev)
 	AT_WRITE_REG_ARRAY(hw, REG_RX_HASH_TABLE, 1, 0);
 
 	/* comoute mc addresses' hash value ,and put it into hash table */
-	netdev_for_each_mc_addr(mc_ptr, netdev) {
-		hash_value = atl1e_hash_mc_addr(hw, mc_ptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev) {
+		hash_value = atl1e_hash_mc_addr(hw, ha->addr);
 		atl1e_hash_set(hw, hash_value);
 	}
 }
diff --git a/drivers/net/atlx/atl2.c b/drivers/net/atlx/atl2.c
index 199f2c9ce749..078d9d1b427c 100644
--- a/drivers/net/atlx/atl2.c
+++ b/drivers/net/atlx/atl2.c
@@ -135,7 +135,7 @@ static void atl2_set_multi(struct net_device *netdev)
 {
 	struct atl2_adapter *adapter = netdev_priv(netdev);
 	struct atl2_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 rctl;
 	u32 hash_value;
 
@@ -157,8 +157,8 @@ static void atl2_set_multi(struct net_device *netdev)
 	ATL2_WRITE_REG_ARRAY(hw, REG_RX_HASH_TABLE, 1, 0);
 
 	/* comoute mc addresses' hash value ,and put it into hash table */
-	netdev_for_each_mc_addr(mc_ptr, netdev) {
-		hash_value = atl2_hash_mc_addr(hw, mc_ptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev) {
+		hash_value = atl2_hash_mc_addr(hw, ha->addr);
 		atl2_hash_set(hw, hash_value);
 	}
 }
diff --git a/drivers/net/atlx/atlx.c b/drivers/net/atlx/atlx.c
index 72f3306352e2..f979ea2d6d3c 100644
--- a/drivers/net/atlx/atlx.c
+++ b/drivers/net/atlx/atlx.c
@@ -123,7 +123,7 @@ static void atlx_set_multi(struct net_device *netdev)
 {
 	struct atlx_adapter *adapter = netdev_priv(netdev);
 	struct atlx_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 rctl;
 	u32 hash_value;
 
@@ -144,8 +144,8 @@ static void atlx_set_multi(struct net_device *netdev)
 	iowrite32(0, (hw->hw_addr + REG_RX_HASH_TABLE) + (1 << 2));
 
 	/* compute mc addresses' hash value ,and put it into hash table */
-	netdev_for_each_mc_addr(mc_ptr, netdev) {
-		hash_value = atlx_hash_mc_addr(hw, mc_ptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev) {
+		hash_value = atlx_hash_mc_addr(hw, ha->addr);
 		atlx_hash_set(hw, hash_value);
 	}
 }
diff --git a/drivers/net/atp.c b/drivers/net/atp.c
index 6ad16205dc17..0d730c8329d4 100644
--- a/drivers/net/atp.c
+++ b/drivers/net/atp.c
@@ -883,11 +883,11 @@ static void set_rx_mode_8012(struct net_device *dev)
 		memset(mc_filter, 0xff, sizeof(mc_filter));
 		new_mode = CMR2h_Normal;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			int filterbit = ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x3f;
+		netdev_for_each_mc_addr(ha, dev) {
+			int filterbit = ether_crc_le(ETH_ALEN, ha->addr) & 0x3f;
 			mc_filter[filterbit >> 5] |= 1 << (filterbit & 31);
 		}
 		new_mode = CMR2h_Normal;
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index 4da191b87b0d..29631593cc23 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -957,12 +957,12 @@ static void au1000_multicast_list(struct net_device *dev)
 		aup->mac->control &= ~MAC_PROMISCUOUS;
 		printk(KERN_INFO "%s: Pass all multicast\n", dev->name);
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		u32 mc_filter[2];	/* Multicast hash filter */
 
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev)
-			set_bit(ether_crc(ETH_ALEN, mclist->dmi_addr)>>26,
+		netdev_for_each_mc_addr(ha, dev)
+			set_bit(ether_crc(ETH_ALEN, ha->addr)>>26,
 					(long *)mc_filter);
 		aup->mac->multi_hash_high = mc_filter[1];
 		aup->mac->multi_hash_low = mc_filter[0];
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index 332c60356285..b2c5fd7b63af 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -1680,15 +1680,15 @@ static struct net_device_stats *b44_get_stats(struct net_device *dev)
 
 static int __b44_load_mcast(struct b44 *bp, struct net_device *dev)
 {
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	int i, num_ents;
 
 	num_ents = min_t(int, netdev_mc_count(dev), B44_MCAST_TABLE_SIZE);
 	i = 0;
-	netdev_for_each_mc_addr(mclist, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		if (i == num_ents)
 			break;
-		__b44_cam_write(bp, mclist->dmi_addr, i++ + 1);
+		__b44_cam_write(bp, ha->addr, i++ + 1);
 	}
 	return i+1;
 }
diff --git a/drivers/net/bcm63xx_enet.c b/drivers/net/bcm63xx_enet.c
index 37eb8021de1c..51733404478e 100644
--- a/drivers/net/bcm63xx_enet.c
+++ b/drivers/net/bcm63xx_enet.c
@@ -603,7 +603,7 @@ static int bcm_enet_set_mac_address(struct net_device *dev, void *p)
 static void bcm_enet_set_multicast_list(struct net_device *dev)
 {
 	struct bcm_enet_priv *priv;
-	struct dev_mc_list *mc_list;
+	struct netdev_hw_addr *ha;
 	u32 val;
 	int i;
 
@@ -631,14 +631,14 @@ static void bcm_enet_set_multicast_list(struct net_device *dev)
 	}
 
 	i = 0;
-	netdev_for_each_mc_addr(mc_list, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		u8 *dmi_addr;
 		u32 tmp;
 
 		if (i == 3)
 			break;
 		/* update perfect match registers */
-		dmi_addr = mc_list->dmi_addr;
+		dmi_addr = ha->addr;
 		tmp = (dmi_addr[2] << 24) | (dmi_addr[3] << 16) |
 			(dmi_addr[4] << 8) | dmi_addr[5];
 		enet_writel(priv, tmp, ENET_PML_REG(i + 1));
diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c
index 9f53d9e86e2b..61a9afdb83f4 100644
--- a/drivers/net/benet/be_cmds.c
+++ b/drivers/net/benet/be_cmds.c
@@ -1159,13 +1159,13 @@ int be_cmd_multicast_set(struct be_adapter *adapter, u32 if_id,
 	req->interface_id = if_id;
 	if (netdev) {
 		int i;
-		struct dev_mc_list *mc;
+		struct netdev_hw_addr *ha;
 
 		req->num_mac = cpu_to_le16(netdev_mc_count(netdev));
 
 		i = 0;
-		netdev_for_each_mc_addr(mc, netdev)
-			memcpy(req->mac[i].byte, mc->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, netdev)
+			memcpy(req->mac[i].byte, ha->addr, ETH_ALEN);
 	} else {
 		req->promiscuous = 1;
 	}
diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index 587f93cf03f6..c488cea8f455 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -812,14 +812,14 @@ static void bfin_mac_timeout(struct net_device *dev)
 static void bfin_mac_multicast_hash(struct net_device *dev)
 {
 	u32 emac_hashhi, emac_hashlo;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	u32 crc;
 
 	emac_hashhi = emac_hashlo = 0;
 
-	netdev_for_each_mc_addr(dmi, dev) {
-		addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
 		/* skip non-multicast addresses */
 		if (!(*addrs & 1))
diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c
index 119468e76323..1245e9835765 100644
--- a/drivers/net/bmac.c
+++ b/drivers/net/bmac.c
@@ -971,7 +971,7 @@ bmac_remove_multi(struct net_device *dev,
  */
 static void bmac_set_multicast(struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct bmac_data *bp = netdev_priv(dev);
 	int num_addrs = netdev_mc_count(dev);
 	unsigned short rx_cfg;
@@ -1000,8 +1000,8 @@ static void bmac_set_multicast(struct net_device *dev)
 			rx_cfg = bmac_rx_on(dev, 0, 0);
 			XXDEBUG(("bmac: multi disabled, rx_cfg=%#08x\n", rx_cfg));
 		} else {
-			netdev_for_each_mc_addr(dmi, dev)
-				bmac_addhash(bp, dmi->dmi_addr);
+			netdev_for_each_mc_addr(ha, dev)
+				bmac_addhash(bp, ha->addr);
 			bmac_update_hash_table_mask(dev, bp);
 			rx_cfg = bmac_rx_on(dev, 1, 0);
 			XXDEBUG(("bmac: multi enabled, rx_cfg=%#08x\n", rx_cfg));
@@ -1015,7 +1015,7 @@ static void bmac_set_multicast(struct net_device *dev)
 
 static void bmac_set_multicast(struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	int i;
 	unsigned short rx_cfg;
@@ -1039,8 +1039,8 @@ static void bmac_set_multicast(struct net_device *dev)
 
 		for(i = 0; i < 4; i++) hash_table[i] = 0;
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 
 			if(!(*addrs & 1))
 				continue;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 381887ba677c..0b69ffb7951d 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -3544,7 +3544,6 @@ bnx2_set_rx_mode(struct net_device *dev)
 	}
 	else {
 		/* Accept one or more multicast(s). */
-		struct dev_mc_list *mclist;
 		u32 mc_filter[NUM_MC_HASH_REGISTERS];
 		u32 regidx;
 		u32 bit;
@@ -3552,8 +3551,8 @@ bnx2_set_rx_mode(struct net_device *dev)
 
 		memset(mc_filter, 0, 4 * NUM_MC_HASH_REGISTERS);
 
-		netdev_for_each_mc_addr(mclist, dev) {
-			crc = ether_crc_le(ETH_ALEN, mclist->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			crc = ether_crc_le(ETH_ALEN, ha->addr);
 			bit = crc & 0xff;
 			regidx = (bit & 0xe0) >> 5;
 			bit &= 0x1f;
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index f4ea99d06c77..fa9275c2ef5c 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -11496,21 +11496,21 @@ static void bnx2x_set_rx_mode(struct net_device *dev)
 	else { /* some multicasts */
 		if (CHIP_IS_E1(bp)) {
 			int i, old, offset;
-			struct dev_mc_list *mclist;
+			struct netdev_hw_addr *ha;
 			struct mac_configuration_cmd *config =
 						bnx2x_sp(bp, mcast_config);
 
 			i = 0;
-			netdev_for_each_mc_addr(mclist, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				config->config_table[i].
 					cam_entry.msb_mac_addr =
-					swab16(*(u16 *)&mclist->dmi_addr[0]);
+					swab16(*(u16 *)&ha->addr[0]);
 				config->config_table[i].
 					cam_entry.middle_mac_addr =
-					swab16(*(u16 *)&mclist->dmi_addr[2]);
+					swab16(*(u16 *)&ha->addr[2]);
 				config->config_table[i].
 					cam_entry.lsb_mac_addr =
-					swab16(*(u16 *)&mclist->dmi_addr[4]);
+					swab16(*(u16 *)&ha->addr[4]);
 				config->config_table[i].cam_entry.flags =
 							cpu_to_le16(port);
 				config->config_table[i].
@@ -11564,18 +11564,18 @@ static void bnx2x_set_rx_mode(struct net_device *dev)
 				      0);
 		} else { /* E1H */
 			/* Accept one or more multicasts */
-			struct dev_mc_list *mclist;
+			struct netdev_hw_addr *ha;
 			u32 mc_filter[MC_HASH_SIZE];
 			u32 crc, bit, regidx;
 			int i;
 
 			memset(mc_filter, 0, 4 * MC_HASH_SIZE);
 
-			netdev_for_each_mc_addr(mclist, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
-				   mclist->dmi_addr);
+				   ha->addr);
 
-				crc = crc32c_le(0, mclist->dmi_addr, ETH_ALEN);
+				crc = crc32c_le(0, ha->addr, ETH_ALEN);
 				bit = (crc >> 24) & 0xff;
 				regidx = bit >> 5;
 				bit &= 0x1f;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d6ae63b2cf00..22682f1c8473 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -761,32 +761,6 @@ static int bond_check_dev_link(struct bonding *bond,
 
 /*----------------------------- Multicast list ------------------------------*/
 
-/*
- * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise
- */
-static inline int bond_is_dmi_same(const struct dev_mc_list *dmi1,
-				   const struct dev_mc_list *dmi2)
-{
-	return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 &&
-			dmi1->dmi_addrlen == dmi2->dmi_addrlen;
-}
-
-/*
- * returns dmi entry if found, NULL otherwise
- */
-static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi,
-						 struct dev_mc_list *mc_list)
-{
-	struct dev_mc_list *idmi;
-
-	for (idmi = mc_list; idmi; idmi = idmi->next) {
-		if (bond_is_dmi_same(dmi, idmi))
-			return idmi;
-	}
-
-	return NULL;
-}
-
 /*
  * Push the promiscuity flag down to appropriate slaves
  */
@@ -839,18 +813,18 @@ static int bond_set_allmulti(struct bonding *bond, int inc)
  * Add a Multicast address to slaves
  * according to mode
  */
-static void bond_mc_add(struct bonding *bond, void *addr, int alen)
+static void bond_mc_add(struct bonding *bond, void *addr)
 {
 	if (USES_PRIMARY(bond->params.mode)) {
 		/* write lock already acquired */
 		if (bond->curr_active_slave)
-			dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0);
+			dev_mc_add(bond->curr_active_slave->dev, addr);
 	} else {
 		struct slave *slave;
 		int i;
 
 		bond_for_each_slave(bond, slave, i)
-			dev_mc_add(slave->dev, addr, alen, 0);
+			dev_mc_add(slave->dev, addr);
 	}
 }
 
@@ -858,18 +832,17 @@ static void bond_mc_add(struct bonding *bond, void *addr, int alen)
  * Remove a multicast address from slave
  * according to mode
  */
-static void bond_mc_delete(struct bonding *bond, void *addr, int alen)
+static void bond_mc_del(struct bonding *bond, void *addr)
 {
 	if (USES_PRIMARY(bond->params.mode)) {
 		/* write lock already acquired */
 		if (bond->curr_active_slave)
-			dev_mc_delete(bond->curr_active_slave->dev, addr,
-				      alen, 0);
+			dev_mc_del(bond->curr_active_slave->dev, addr);
 	} else {
 		struct slave *slave;
 		int i;
 		bond_for_each_slave(bond, slave, i) {
-			dev_mc_delete(slave->dev, addr, alen, 0);
+			dev_mc_del(slave->dev, addr);
 		}
 	}
 }
@@ -895,50 +868,6 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 	rcu_read_unlock();
 }
 
-/*
- * Totally destroys the mc_list in bond
- */
-static void bond_mc_list_destroy(struct bonding *bond)
-{
-	struct dev_mc_list *dmi;
-
-	dmi = bond->mc_list;
-	while (dmi) {
-		bond->mc_list = dmi->next;
-		kfree(dmi);
-		dmi = bond->mc_list;
-	}
-
-	bond->mc_list = NULL;
-}
-
-/*
- * Copy all the Multicast addresses from src to the bonding device dst
- */
-static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond,
-			     gfp_t gfp_flag)
-{
-	struct dev_mc_list *dmi, *new_dmi;
-
-	for (dmi = mc_list; dmi; dmi = dmi->next) {
-		new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag);
-
-		if (!new_dmi) {
-			/* FIXME: Potential memory leak !!! */
-			return -ENOMEM;
-		}
-
-		new_dmi->next = bond->mc_list;
-		bond->mc_list = new_dmi;
-		new_dmi->dmi_addrlen = dmi->dmi_addrlen;
-		memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen);
-		new_dmi->dmi_users = dmi->dmi_users;
-		new_dmi->dmi_gusers = dmi->dmi_gusers;
-	}
-
-	return 0;
-}
-
 /*
  * flush all members of flush->mc_list from device dev->mc_list
  */
@@ -946,16 +875,16 @@ static void bond_mc_list_flush(struct net_device *bond_dev,
 			       struct net_device *slave_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
-	for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next)
-		dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
+	netdev_for_each_mc_addr(ha, bond_dev)
+		dev_mc_del(slave_dev, ha->addr);
 
 	if (bond->params.mode == BOND_MODE_8023AD) {
 		/* del lacpdu mc addr from mc list */
 		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
 
-		dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
+		dev_mc_del(slave_dev, lacpdu_multicast);
 	}
 }
 
@@ -969,7 +898,7 @@ static void bond_mc_list_flush(struct net_device *bond_dev,
 static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 			 struct slave *old_active)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
 	if (!USES_PRIMARY(bond->params.mode))
 		/* nothing to do -  mc list is already up-to-date on
@@ -984,9 +913,8 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 		if (bond->dev->flags & IFF_ALLMULTI)
 			dev_set_allmulti(old_active->dev, -1);
 
-		for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next)
-			dev_mc_delete(old_active->dev, dmi->dmi_addr,
-				      dmi->dmi_addrlen, 0);
+		netdev_for_each_mc_addr(ha, bond->dev)
+			dev_mc_del(old_active->dev, ha->addr);
 	}
 
 	if (new_active) {
@@ -997,9 +925,8 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 		if (bond->dev->flags & IFF_ALLMULTI)
 			dev_set_allmulti(new_active->dev, 1);
 
-		for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next)
-			dev_mc_add(new_active->dev, dmi->dmi_addr,
-				   dmi->dmi_addrlen, 0);
+		netdev_for_each_mc_addr(ha, bond->dev)
+			dev_mc_add(new_active->dev, ha->addr);
 		bond_resend_igmp_join_requests(bond);
 	}
 }
@@ -1406,7 +1333,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	struct bonding *bond = netdev_priv(bond_dev);
 	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
 	struct slave *new_slave = NULL;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct sockaddr addr;
 	int link_reporting;
 	int old_features = bond_dev->features;
@@ -1492,7 +1419,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 			/* Flush unicast and multicast addresses */
 			dev_uc_flush(bond_dev);
-			dev_addr_discard(bond_dev);
+			dev_mc_flush(bond_dev);
 
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
@@ -1601,9 +1528,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 		netif_addr_lock_bh(bond_dev);
 		/* upload master's mc_list to new slave */
-		for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next)
-			dev_mc_add(slave_dev, dmi->dmi_addr,
-				   dmi->dmi_addrlen, 0);
+		netdev_for_each_mc_addr(ha, bond_dev)
+			dev_mc_add(slave_dev, ha->addr);
 		netif_addr_unlock_bh(bond_dev);
 	}
 
@@ -1611,7 +1537,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		/* add lacpdu mc addr to mc list */
 		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
 
-		dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
+		dev_mc_add(slave_dev, lacpdu_multicast);
 	}
 
 	bond_add_vlans_on_slave(bond, slave_dev);
@@ -3913,10 +3839,24 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
 	return res;
 }
 
+static bool bond_addr_in_mc_list(unsigned char *addr,
+				 struct netdev_hw_addr_list *list,
+				 int addrlen)
+{
+	struct netdev_hw_addr *ha;
+
+	netdev_hw_addr_list_for_each(ha, list)
+		if (!memcmp(ha->addr, addr, addrlen))
+			return true;
+
+	return false;
+}
+
 static void bond_set_multicast_list(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
+	bool found;
 
 	/*
 	 * Do promisc before checking multicast_mode
@@ -3951,20 +3891,25 @@ static void bond_set_multicast_list(struct net_device *bond_dev)
 	bond->flags = bond_dev->flags;
 
 	/* looking for addresses to add to slaves' mc list */
-	for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
-		if (!bond_mc_list_find_dmi(dmi, bond->mc_list))
-			bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen);
+	netdev_for_each_mc_addr(ha, bond_dev) {
+		found = bond_addr_in_mc_list(ha->addr, &bond->mc_list,
+					     bond_dev->addr_len);
+		if (!found)
+			bond_mc_add(bond, ha->addr);
 	}
 
 	/* looking for addresses to delete from slaves' list */
-	for (dmi = bond->mc_list; dmi; dmi = dmi->next) {
-		if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list))
-			bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen);
+	netdev_hw_addr_list_for_each(ha, &bond->mc_list) {
+		found = bond_addr_in_mc_list(ha->addr, &bond_dev->mc,
+					     bond_dev->addr_len);
+		if (!found)
+			bond_mc_del(bond, ha->addr);
 	}
 
 	/* save master's multicast list */
-	bond_mc_list_destroy(bond);
-	bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC);
+	__hw_addr_flush(&bond->mc_list);
+	__hw_addr_add_multiple(&bond->mc_list, &bond_dev->mc,
+			       bond_dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST);
 
 	read_unlock(&bond->lock);
 }
@@ -4534,9 +4479,7 @@ static void bond_uninit(struct net_device *bond_dev)
 	if (bond->wq)
 		destroy_workqueue(bond->wq);
 
-	netif_addr_lock_bh(bond_dev);
-	bond_mc_list_destroy(bond);
-	netif_addr_unlock_bh(bond_dev);
+	__hw_addr_flush(&bond->mc_list);
 }
 
 /*------------------------- Module initialization ---------------------------*/
@@ -4908,6 +4851,8 @@ static int bond_init(struct net_device *bond_dev)
 	list_add_tail(&bond->bond_list, &bn->dev_list);
 
 	bond_prepare_sysfs_group(bond);
+
+	__hw_addr_init(&bond->mc_list);
 	return 0;
 }
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 257a7a4dfce9..2aa336720591 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -202,7 +202,7 @@ struct bonding {
 	char     proc_file_name[IFNAMSIZ];
 #endif /* CONFIG_PROC_FS */
 	struct   list_head bond_list;
-	struct   dev_mc_list *mc_list;
+	struct   netdev_hw_addr_list mc_list;
 	int      (*xmit_hash_policy)(struct sk_buff *, int);
 	__be32   master_ip;
 	u16      flags;
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 9bd155e4111c..bd857a20a755 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2957,20 +2957,20 @@ static void cas_process_mc_list(struct cas *cp)
 {
 	u16 hash_table[16];
 	u32 crc;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int i = 1;
 
 	memset(hash_table, 0, sizeof(hash_table));
-	netdev_for_each_mc_addr(dmi, cp->dev) {
+	netdev_for_each_mc_addr(ha, cp->dev) {
 		if (i <= CAS_MC_EXACT_MATCH_SIZE) {
 			/* use the alternate mac address registers for the
 			 * first 15 multicast addresses
 			 */
-			writel((dmi->dmi_addr[4] << 8) | dmi->dmi_addr[5],
+			writel((ha->addr[4] << 8) | ha->addr[5],
 			       cp->regs + REG_MAC_ADDRN(i*3 + 0));
-			writel((dmi->dmi_addr[2] << 8) | dmi->dmi_addr[3],
+			writel((ha->addr[2] << 8) | ha->addr[3],
 			       cp->regs + REG_MAC_ADDRN(i*3 + 1));
-			writel((dmi->dmi_addr[0] << 8) | dmi->dmi_addr[1],
+			writel((ha->addr[0] << 8) | ha->addr[1],
 			       cp->regs + REG_MAC_ADDRN(i*3 + 2));
 			i++;
 		}
@@ -2978,7 +2978,7 @@ static void cas_process_mc_list(struct cas *cp)
 			/* use hw hash table for the next series of
 			 * multicast addresses
 			 */
-			crc = ether_crc_le(ETH_ALEN, dmi->dmi_addr);
+			crc = ether_crc_le(ETH_ALEN, ha->addr);
 			crc >>= 24;
 			hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf));
 		}
diff --git a/drivers/net/chelsio/pm3393.c b/drivers/net/chelsio/pm3393.c
index a6eb30a6e2b9..13fd9628db1d 100644
--- a/drivers/net/chelsio/pm3393.c
+++ b/drivers/net/chelsio/pm3393.c
@@ -376,12 +376,13 @@ static int pm3393_set_rx_mode(struct cmac *cmac, struct t1_rx_mode *rm)
 		rx_mode |= SUNI1x10GEXP_BITMSK_RXXG_MHASH_EN;
 	} else if (t1_rx_mode_mc_cnt(rm)) {
 		/* Accept one or more multicast(s). */
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int bit;
 		u16 mc_filter[4] = { 0, };
 
-		netdev_for_each_mc_addr(dmi, t1_get_netdev(rm)) {
-			bit = (ether_crc(ETH_ALEN, dmi->dmi_addr) >> 23) & 0x3f; /* bit[23:28] */
+		netdev_for_each_mc_addr(ha, t1_get_netdev(rm)) {
+			/* bit[23:28] */
+			bit = (ether_crc(ETH_ALEN, ha->addr) >> 23) & 0x3f;
 			mc_filter[bit >> 4] |= 1 << (bit & 0xf);
 		}
 		pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_LOW, mc_filter[0]);
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index 60777fd90b33..bdfff784645c 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -328,7 +328,7 @@ static int cpmac_config(struct net_device *dev, struct ifmap *map)
 
 static void cpmac_set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *iter;
+	struct netdev_hw_addr *ha;
 	u8 tmp;
 	u32 mbp, bit, hash[2] = { 0, };
 	struct cpmac_priv *priv = netdev_priv(dev);
@@ -348,19 +348,19 @@ static void cpmac_set_multicast_list(struct net_device *dev)
 			 * cpmac uses some strange mac address hashing
 			 * (not crc32)
 			 */
-			netdev_for_each_mc_addr(iter, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				bit = 0;
-				tmp = iter->dmi_addr[0];
+				tmp = ha->addr[0];
 				bit  ^= (tmp >> 2) ^ (tmp << 4);
-				tmp = iter->dmi_addr[1];
+				tmp = ha->addr[1];
 				bit  ^= (tmp >> 4) ^ (tmp << 2);
-				tmp = iter->dmi_addr[2];
+				tmp = ha->addr[2];
 				bit  ^= (tmp >> 6) ^ tmp;
-				tmp = iter->dmi_addr[3];
+				tmp = ha->addr[3];
 				bit  ^= (tmp >> 2) ^ (tmp << 4);
-				tmp = iter->dmi_addr[4];
+				tmp = ha->addr[4];
 				bit  ^= (tmp >> 4) ^ (tmp << 2);
-				tmp = iter->dmi_addr[5];
+				tmp = ha->addr[5];
 				bit  ^= (tmp >> 6) ^ tmp;
 				bit &= 0x3f;
 				hash[bit / 32] |= 1 << (bit % 32);
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index dd24aadb778c..59110bc119a8 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1596,16 +1596,16 @@ set_multicast_list(struct net_device *dev)
 	} else {
 		/* MC mode, receive normal and MC packets */
 		char hash_ix;
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		char *baddr;
 
 		lo_bits = 0x00000000ul;
 		hi_bits = 0x00000000ul;
-		netdev_for_each_mc_addr(dmi, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			/* Calculate the hash index for the GA registers */
 
 			hash_ix = 0;
-			baddr = dmi->dmi_addr;
+			baddr = ha->addr;
 			hash_ix ^= (*baddr) & 0x3f;
 			hash_ix ^= ((*baddr) >> 6) & 0x03;
 			++baddr;
diff --git a/drivers/net/cxgb3/xgmac.c b/drivers/net/cxgb3/xgmac.c
index c142a2132e9f..3af19a550372 100644
--- a/drivers/net/cxgb3/xgmac.c
+++ b/drivers/net/cxgb3/xgmac.c
@@ -311,16 +311,16 @@ int t3_mac_set_rx_mode(struct cmac *mac, struct net_device *dev)
 	if (dev->flags & IFF_ALLMULTI)
 		hash_lo = hash_hi = 0xffffffff;
 	else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int exact_addr_idx = mac->nucast;
 
 		hash_lo = hash_hi = 0;
-		netdev_for_each_mc_addr(dmi, dev)
+		netdev_for_each_mc_addr(ha, dev)
 			if (exact_addr_idx < EXACT_ADDR_FILTERS)
 				set_addr_filter(mac, exact_addr_idx++,
-						dmi->dmi_addr);
+						ha->addr);
 			else {
-				int hash = hash_hw_addr(dmi->dmi_addr);
+				int hash = hash_hw_addr(ha->addr);
 
 				if (hash < 32)
 					hash_lo |= (1 << hash);
diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 2b8edd2efbf6..1f9df5c6a75a 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -952,13 +952,14 @@ static void emac_dev_mcast_set(struct net_device *ndev)
 			emac_add_mcast(priv, EMAC_ALL_MULTI_SET, NULL);
 		}
 		if (!netdev_mc_empty(ndev)) {
-			struct dev_mc_list *mc_ptr;
+			struct netdev_hw_addr *ha;
+
 			mbp_enable = (mbp_enable | EMAC_MBP_RXMCAST);
 			emac_add_mcast(priv, EMAC_ALL_MULTI_CLR, NULL);
 			/* program multicast address list into EMAC hardware */
-			netdev_for_each_mc_addr(mc_ptr, ndev) {
+			netdev_for_each_mc_addr(ha, ndev) {
 				emac_add_mcast(priv, EMAC_MULTICAST_ADD,
-					       (u8 *) mc_ptr->dmi_addr);
+					       (u8 *) ha->addr);
 			}
 		} else {
 			mbp_enable = (mbp_enable & ~EMAC_MBP_RXMCAST);
diff --git a/drivers/net/declance.c b/drivers/net/declance.c
index 8cf3cc6f20e2..fb3f0984c289 100644
--- a/drivers/net/declance.c
+++ b/drivers/net/declance.c
@@ -940,7 +940,7 @@ static void lance_load_multicast(struct net_device *dev)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	volatile u16 *ib = (volatile u16 *)dev->mem_start;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	u32 crc;
 
@@ -959,8 +959,8 @@ static void lance_load_multicast(struct net_device *dev)
 	*lib_ptr(ib, filter[3], lp->type) = 0;
 
 	/* Add addresses */
-	netdev_for_each_mc_addr(dmi, dev) {
-		addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
 		/* multicast address? */
 		if (!(*addrs & 1))
diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c
index ed53a8d45f4e..e5667c55844e 100644
--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -2195,7 +2195,7 @@ static void dfx_ctl_set_multicast_list(struct net_device *dev)
 {
 	DFX_board_t *bp = netdev_priv(dev);
 	int					i;			/* used as index in for loop */
-	struct dev_mc_list	*dmi;		/* ptr to multicast addr entry */
+	struct netdev_hw_addr *ha;
 
 	/* Enable LLC frame promiscuous mode, if necessary */
 
@@ -2241,9 +2241,9 @@ static void dfx_ctl_set_multicast_list(struct net_device *dev)
 		/* Copy addresses to multicast address table, then update adapter CAM */
 
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev)
+		netdev_for_each_mc_addr(ha, dev)
 			memcpy(&bp->mc_table[i++ * FDDI_K_ALEN],
-			       dmi->dmi_addr, FDDI_K_ALEN);
+			       ha->addr, FDDI_K_ALEN);
 
 		if (dfx_ctl_update_cam(bp) != DFX_K_SUCCESS)
 			{
diff --git a/drivers/net/depca.c b/drivers/net/depca.c
index 744c1928dfca..a88300a0d1e8 100644
--- a/drivers/net/depca.c
+++ b/drivers/net/depca.c
@@ -1272,7 +1272,7 @@ static void set_multicast_list(struct net_device *dev)
 static void SetMulticastFilter(struct net_device *dev)
 {
 	struct depca_private *lp = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	int i, j, bit, byte;
 	u16 hashcode;
@@ -1287,8 +1287,8 @@ static void SetMulticastFilter(struct net_device *dev)
 			lp->init_block.mcast_table[i] = 0;
 		}
 		/* Add multicast addresses */
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 			if ((*addrs & 0x01) == 1) {	/* multicast address? */
 				crc = ether_crc(ETH_ALEN, addrs);
 				hashcode = (crc & 1);	/* hashcode is 6 LSb of CRC ... */
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index b05bad829827..6579225dbd91 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -1132,14 +1132,14 @@ set_multicast (struct net_device *dev)
 		/* Receive broadcast and multicast frames */
 		rx_mode = ReceiveBroadcast | ReceiveMulticast | ReceiveUnicast;
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		/* Receive broadcast frames and multicast frames filtering
 		   by Hashtable */
 		rx_mode =
 		    ReceiveBroadcast | ReceiveMulticastHash | ReceiveUnicast;
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			int bit, index = 0;
-			int crc = ether_crc_le (ETH_ALEN, mclist->dmi_addr);
+			int crc = ether_crc_le(ETH_ALEN, ha->addr);
 			/* The inverted high significant 6 bits of CRC are
 			   used as an index to hashtable */
 			for (bit = 0; bit < 6; bit++)
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 1c67f1138ca7..989f2beb123b 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -724,7 +724,7 @@ static void
 dm9000_hash_table(struct net_device *dev)
 {
 	board_info_t *db = netdev_priv(dev);
-	struct dev_mc_list *mcptr;
+	struct netdev_hw_addr *ha;
 	int i, oft;
 	u32 hash_val;
 	u16 hash_table[4];
@@ -752,8 +752,8 @@ dm9000_hash_table(struct net_device *dev)
 		rcr |= RCR_ALL;
 
 	/* the multicast address in Hash Table : 64 bits */
-	netdev_for_each_mc_addr(mcptr, dev) {
-		hash_val = ether_crc_le(6, mcptr->dmi_addr) & 0x3f;
+	netdev_for_each_mc_addr(ha, dev) {
+		hash_val = ether_crc_le(6, ha->addr) & 0x3f;
 		hash_table[hash_val / 16] |= (u16) 1 << (hash_val % 16);
 	}
 
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index c0cd57656681..3e8d0005540f 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1545,16 +1545,16 @@ static int e100_hw_init(struct nic *nic)
 static void e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb)
 {
 	struct net_device *netdev = nic->netdev;
-	struct dev_mc_list *list;
+	struct netdev_hw_addr *ha;
 	u16 i, count = min(netdev_mc_count(netdev), E100_MAX_MULTICAST_ADDRS);
 
 	cb->command = cpu_to_le16(cb_multi);
 	cb->u.multi.count = cpu_to_le16(count * ETH_ALEN);
 	i = 0;
-	netdev_for_each_mc_addr(list, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == count)
 			break;
-		memcpy(&cb->u.multi.addr[i++ * ETH_ALEN], &list->dmi_addr,
+		memcpy(&cb->u.multi.addr[i++ * ETH_ALEN], &ha->addr,
 			ETH_ALEN);
 	}
 }
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 8be6faee43e6..41330349b07a 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2101,7 +2101,6 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 	struct e1000_hw *hw = &adapter->hw;
 	struct netdev_hw_addr *ha;
 	bool use_uc = false;
-	struct dev_addr_list *mc_ptr;
 	u32 rctl;
 	u32 hash_value;
 	int i, rar_entries = E1000_RAR_ENTRIES;
@@ -2161,17 +2160,17 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 
 	WARN_ON(i == rar_entries);
 
-	netdev_for_each_mc_addr(mc_ptr, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == rar_entries) {
 			/* load any remaining addresses into the hash table */
 			u32 hash_reg, hash_bit, mta;
-			hash_value = e1000_hash_mc_addr(hw, mc_ptr->da_addr);
+			hash_value = e1000_hash_mc_addr(hw, ha->addr);
 			hash_reg = (hash_value >> 5) & 0x7F;
 			hash_bit = hash_value & 0x1F;
 			mta = (1 << hash_bit);
 			mcarray[hash_reg] |= mta;
 		} else {
-			e1000_rar_set(hw, mc_ptr->da_addr, i++);
+			e1000_rar_set(hw, ha->addr, i++);
 		}
 	}
 
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 5304959ae1f7..02f7d20f3c80 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -2567,7 +2567,7 @@ static void e1000_set_multi(struct net_device *netdev)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8  *mta_list;
 	u32 rctl;
 	int i;
@@ -2599,9 +2599,8 @@ static void e1000_set_multi(struct net_device *netdev)
 
 		/* prepare a packed array of only addresses. */
 		i = 0;
-		netdev_for_each_mc_addr(mc_ptr, netdev)
-			memcpy(mta_list + (i++ * ETH_ALEN),
-			       mc_ptr->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, netdev)
+			memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 
 		e1000_update_mc_addr_list(hw, mta_list, i);
 		kfree(mta_list);
diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c
index 15d6266b80c3..b01e6997403b 100644
--- a/drivers/net/eepro.c
+++ b/drivers/net/eepro.c
@@ -1287,7 +1287,7 @@ set_multicast_list(struct net_device *dev)
 	struct eepro_local *lp = netdev_priv(dev);
 	short ioaddr = dev->base_addr;
 	unsigned short mode;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int mc_count = netdev_mc_count(dev);
 
 	if (dev->flags&(IFF_ALLMULTI|IFF_PROMISC) || mc_count > 63)
@@ -1332,8 +1332,8 @@ set_multicast_list(struct net_device *dev)
 		outw(0, ioaddr + IO_PORT);
 		outw(6 * (mc_count + 1), ioaddr + IO_PORT);
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			eaddrs = (unsigned short *) dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			eaddrs = (unsigned short *) ha->addr;
 			outw(*eaddrs++, ioaddr + IO_PORT);
 			outw(*eaddrs++, ioaddr + IO_PORT);
 			outw(*eaddrs++, ioaddr + IO_PORT);
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c
index 7013dc8a6cbc..b3882fd8db6d 100644
--- a/drivers/net/eexpress.c
+++ b/drivers/net/eexpress.c
@@ -1576,7 +1576,7 @@ static void eexp_hw_init586(struct net_device *dev)
 
 static void eexp_setup_filter(struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned short ioaddr = dev->base_addr;
 	int count = netdev_mc_count(dev);
 	int i;
@@ -1589,8 +1589,8 @@ static void eexp_setup_filter(struct net_device *dev)
 	outw(CONF_NR_MULTICAST & ~31, ioaddr+SM_PTR);
 	outw(6*count, ioaddr+SHADOW(CONF_NR_MULTICAST));
 	i = 0;
-	netdev_for_each_mc_addr(dmi, dev) {
-		unsigned short *data = (unsigned short *) dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		unsigned short *data = (unsigned short *) ha->addr;
 
 		if (i == count)
 			break;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index b004eaba3d7b..b97411aaa77c 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1966,7 +1966,7 @@ static void ehea_add_multicast_entry(struct ehea_port *port, u8 *mc_mac_addr)
 static void ehea_set_multicast_list(struct net_device *dev)
 {
 	struct ehea_port *port = netdev_priv(dev);
-	struct dev_mc_list *k_mcl_entry;
+	struct netdev_hw_addr *ha;
 	int ret;
 
 	if (dev->flags & IFF_PROMISC) {
@@ -1997,8 +1997,8 @@ static void ehea_set_multicast_list(struct net_device *dev)
 			goto out;
 		}
 
-		netdev_for_each_mc_addr(k_mcl_entry, dev)
-			ehea_add_multicast_entry(port, k_mcl_entry->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			ehea_add_multicast_entry(port, ha->addr);
 
 	}
 out:
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 6d70c349c954..1232887c243d 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -822,7 +822,7 @@ static int enic_set_mac_addr(struct net_device *netdev, char *addr)
 static void enic_set_multicast_list(struct net_device *netdev)
 {
 	struct enic *enic = netdev_priv(netdev);
-	struct dev_mc_list *list;
+	struct netdev_hw_addr *ha;
 	int directed = 1;
 	int multicast = (netdev->flags & IFF_MULTICAST) ? 1 : 0;
 	int broadcast = (netdev->flags & IFF_BROADCAST) ? 1 : 0;
@@ -852,10 +852,10 @@ static void enic_set_multicast_list(struct net_device *netdev)
 	 */
 
 	i = 0;
-	netdev_for_each_mc_addr(list, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == mc_count)
 			break;
-		memcpy(mc_addr[i++], list->dmi_addr, ETH_ALEN);
+		memcpy(mc_addr[i++], ha->addr, ETH_ALEN);
 	}
 
 	for (i = 0; i < enic->mc_count; i++) {
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index 39c271b6be44..f6584a1ad3bc 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -1400,12 +1400,12 @@ static void set_rx_mode(struct net_device *dev)
 		outl(0x0004, ioaddr + RxCtrl);
 		return;
 	} else {					/* Never executed, for now. */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			unsigned int bit_nr =
-				ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x3f;
+				ether_crc_le(ETH_ALEN, ha->addr) & 0x3f;
 			mc_filter[bit_nr >> 3] |= (1 << bit_nr);
 		}
 	}
diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index 209742304e20..f6be5aeaf94c 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -755,7 +755,7 @@ static void ethoc_set_multicast_list(struct net_device *dev)
 {
 	struct ethoc *priv = netdev_priv(dev);
 	u32 mode = ethoc_read(priv, MODER);
-	struct dev_mc_list *mc;
+	struct netdev_hw_addr *ha;
 	u32 hash[2] = { 0, 0 };
 
 	/* set loopback mode if requested */
@@ -783,8 +783,8 @@ static void ethoc_set_multicast_list(struct net_device *dev)
 		hash[0] = 0xffffffff;
 		hash[1] = 0xffffffff;
 	} else {
-		netdev_for_each_mc_addr(mc, dev) {
-			u32 crc = ether_crc(ETH_ALEN, mc->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			u32 crc = ether_crc(ETH_ALEN, ha->addr);
 			int bit = (crc >> 26) & 0x3f;
 			hash[bit >> 5] |= 1 << (bit & 0x1f);
 		}
diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c
index a2bade586886..11ba70f49971 100644
--- a/drivers/net/ewrk3.c
+++ b/drivers/net/ewrk3.c
@@ -1169,7 +1169,7 @@ static void set_multicast_list(struct net_device *dev)
 static void SetMulticastFilter(struct net_device *dev)
 {
 	struct ewrk3_private *lp = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	u_long iobase = dev->base_addr;
 	int i;
 	char *addrs, bit, byte;
@@ -1213,8 +1213,8 @@ static void SetMulticastFilter(struct net_device *dev)
 		}
 
 		/* Update table */
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 			if ((*addrs & 0x01) == 1) {	/* multicast address? */
 				crc = ether_crc_le(ETH_ALEN, addrs);
 				hashcode = crc & ((1 << 9) - 1);	/* hashcode is 9 LSb of CRC */
diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c
index 9d5ad08a119f..e8a2705237bf 100644
--- a/drivers/net/fealnx.c
+++ b/drivers/net/fealnx.c
@@ -1792,12 +1792,12 @@ static void __set_rx_mode(struct net_device *dev)
 		memset(mc_filter, 0xff, sizeof(mc_filter));
 		rx_mode = CR_W_AB | CR_W_AM;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			unsigned int bit;
-			bit = (ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26) ^ 0x3F;
+			bit = (ether_crc(ETH_ALEN, ha->addr) >> 26) ^ 0x3F;
 			mc_filter[bit >> 5] |= (1 << bit);
 		}
 		rx_mode = CR_W_AB | CR_W_AM;
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 848eb1968abf..2b1651aee13f 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -954,7 +954,7 @@ fec_enet_close(struct net_device *dev)
 static void set_multicast_list(struct net_device *dev)
 {
 	struct fec_enet_private *fep = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned int i, bit, data, crc, tmp;
 	unsigned char hash;
 
@@ -984,16 +984,16 @@ static void set_multicast_list(struct net_device *dev)
 	writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
 	writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
 
-	netdev_for_each_mc_addr(dmi, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		/* Only support group multicast for now */
-		if (!(dmi->dmi_addr[0] & 1))
+		if (!(ha->addr[0] & 1))
 			continue;
 
 		/* calculate crc32 value of mac address */
 		crc = 0xffffffff;
 
-		for (i = 0; i < dmi->dmi_addrlen; i++) {
-			data = dmi->dmi_addr[i];
+		for (i = 0; i < dev->addr_len; i++) {
+			data = ha->addr[i];
 			for (bit = 0; bit < 8; bit++, data >>= 1) {
 				crc = (crc >> 1) ^
 				(((crc ^ data) & 1) ? CRC32_POLY : 0);
diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c
index 704155e2bdd1..667ba1391b9d 100644
--- a/drivers/net/fec_mpc52xx.c
+++ b/drivers/net/fec_mpc52xx.c
@@ -574,12 +574,12 @@ static void mpc52xx_fec_set_multicast_list(struct net_device *dev)
 			out_be32(&fec->gaddr2, 0xffffffff);
 		} else {
 			u32 crc;
-			struct dev_mc_list *dmi;
+			struct netdev_hw_addr *ha;
 			u32 gaddr1 = 0x00000000;
 			u32 gaddr2 = 0x00000000;
 
-			netdev_for_each_mc_addr(dmi, dev) {
-				crc = ether_crc_le(6, dmi->dmi_addr) >> 26;
+			netdev_for_each_mc_addr(ha, dev) {
+				crc = ether_crc_le(6, ha->addr) >> 26;
 				if (crc >= 32)
 					gaddr1 |= 1 << (crc-32);
 				else
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index ca05e5662029..6a2b64f0a7db 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -3103,12 +3103,14 @@ static void nv_set_multicast(struct net_device *dev)
 			if (dev->flags & IFF_ALLMULTI) {
 				alwaysOn[0] = alwaysOn[1] = alwaysOff[0] = alwaysOff[1] = 0;
 			} else {
-				struct dev_mc_list *walk;
+				struct netdev_hw_addr *ha;
 
-				netdev_for_each_mc_addr(walk, dev) {
+				netdev_for_each_mc_addr(ha, dev) {
+					unsigned char *addr = ha->addr;
 					u32 a, b;
-					a = le32_to_cpu(*(__le32 *) walk->dmi_addr);
-					b = le16_to_cpu(*(__le16 *) (&walk->dmi_addr[4]));
+
+					a = le32_to_cpu(*(__le32 *) addr);
+					b = le16_to_cpu(*(__le16 *) (&addr[4]));
 					alwaysOn[0] &= a;
 					alwaysOff[0] &= ~a;
 					alwaysOn[1] &= b;
diff --git a/drivers/net/fs_enet/mac-fcc.c b/drivers/net/fs_enet/mac-fcc.c
index cf4f674f9e2e..b3bad7c15d02 100644
--- a/drivers/net/fs_enet/mac-fcc.c
+++ b/drivers/net/fs_enet/mac-fcc.c
@@ -231,12 +231,12 @@ static void set_multicast_finish(struct net_device *dev)
 
 static void set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *pmc;
+	struct netdev_hw_addr *ha;
 
 	if ((dev->flags & IFF_PROMISC) == 0) {
 		set_multicast_start(dev);
-		netdev_for_each_mc_addr(pmc, dev)
-			set_multicast_one(dev, pmc->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			set_multicast_one(dev, ha->addr);
 		set_multicast_finish(dev);
 	} else
 		set_promiscuous_mode(dev);
diff --git a/drivers/net/fs_enet/mac-fec.c b/drivers/net/fs_enet/mac-fec.c
index cd2c6cca5f24..75974c6d201b 100644
--- a/drivers/net/fs_enet/mac-fec.c
+++ b/drivers/net/fs_enet/mac-fec.c
@@ -232,12 +232,12 @@ static void set_multicast_finish(struct net_device *dev)
 
 static void set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *pmc;
+	struct netdev_hw_addr *ha;
 
 	if ((dev->flags & IFF_PROMISC) == 0) {
 		set_multicast_start(dev);
-		netdev_for_each_mc_addr(pmc, dev)
-			set_multicast_one(dev, pmc->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			set_multicast_one(dev, ha->addr);
 		set_multicast_finish(dev);
 	} else
 		set_promiscuous_mode(dev);
diff --git a/drivers/net/fs_enet/mac-scc.c b/drivers/net/fs_enet/mac-scc.c
index c490a466cae1..0ab6a346a193 100644
--- a/drivers/net/fs_enet/mac-scc.c
+++ b/drivers/net/fs_enet/mac-scc.c
@@ -224,12 +224,12 @@ static void set_multicast_finish(struct net_device *dev)
 
 static void set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *pmc;
+	struct netdev_hw_addr *ha;
 
 	if ((dev->flags & IFF_PROMISC) == 0) {
 		set_multicast_start(dev);
-		netdev_for_each_mc_addr(pmc, dev)
-			set_multicast_one(dev, pmc->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			set_multicast_one(dev, ha->addr);
 		set_multicast_finish(dev);
 	} else
 		set_promiscuous_mode(dev);
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index b6715553cf17..fdd26c2b1a2f 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -2797,7 +2797,7 @@ static void adjust_link(struct net_device *dev)
  * whenever dev->flags is changed */
 static void gfar_set_multi(struct net_device *dev)
 {
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	struct gfar_private *priv = netdev_priv(dev);
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
 	u32 tempval;
@@ -2870,13 +2870,12 @@ static void gfar_set_multi(struct net_device *dev)
 			return;
 
 		/* Parse the list, and set the appropriate bits */
-		netdev_for_each_mc_addr(mc_ptr, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (idx < em_num) {
-				gfar_set_mac_for_addr(dev, idx,
-						mc_ptr->dmi_addr);
+				gfar_set_mac_for_addr(dev, idx, ha->addr);
 				idx++;
 			} else
-				gfar_set_hash_for_addr(dev, mc_ptr->dmi_addr);
+				gfar_set_hash_for_addr(dev, ha->addr);
 		}
 	}
 
diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index c5e0d28a6de9..fbe6ab6b919b 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -988,7 +988,7 @@ static u32 greth_hash_get_index(__u8 *addr)
 
 static void greth_set_hash_filter(struct net_device *dev)
 {
-	struct dev_mc_list *curr;
+	struct netdev_hw_addr *ha;
 	struct greth_private *greth = netdev_priv(dev);
 	struct greth_regs *regs = (struct greth_regs *) greth->regs;
 	u32 mc_filter[2];
@@ -996,8 +996,8 @@ static void greth_set_hash_filter(struct net_device *dev)
 
 	mc_filter[0] = mc_filter[1] = 0;
 
-	netdev_for_each_mc_addr(curr, dev) {
-		bitnr = greth_hash_get_index(curr->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		bitnr = greth_hash_get_index(ha->addr);
 		mc_filter[bitnr >> 5] |= 1 << (bitnr & 31);
 	}
 
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index 373546dd0831..2bfcca6d180c 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1858,12 +1858,12 @@ static void set_rx_mode(struct net_device *dev)
 		/* Too many to match, or accept all multicasts. */
 		writew(0x000B, ioaddr + AddrMode);
 	} else if (!netdev_mc_empty(dev)) { /* Must use the CAM filter. */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		int i = 0;
 
-		netdev_for_each_mc_addr(mclist, dev) {
-			writel(*(u32*)(mclist->dmi_addr), ioaddr + 0x100 + i*8);
-			writel(0x20000 | (*(u16*)&mclist->dmi_addr[4]),
+		netdev_for_each_mc_addr(ha, dev) {
+			writel(*(u32 *)(ha->addr), ioaddr + 0x100 + i*8);
+			writel(0x20000 | (*(u16 *)&ha->addr[4]),
 				   ioaddr + 0x104 + i*8);
 			i++;
 		}
diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c
index b766a69bf0ca..86b2b4332341 100644
--- a/drivers/net/hp100.c
+++ b/drivers/net/hp100.c
@@ -2100,15 +2100,15 @@ static void hp100_set_multicast_list(struct net_device *dev)
 		} else {
 			int i, idx;
 			u_char *addrs;
-			struct dev_mc_list *dmi;
+			struct netdev_hw_addr *ha;
 
 			memset(&lp->hash_bytes, 0x00, 8);
 #ifdef HP100_DEBUG
 			printk("hp100: %s: computing hash filter - mc_count = %i\n",
 			       dev->name, netdev_mc_count(dev));
 #endif
-			netdev_for_each_mc_addr(dmi, dev) {
-				addrs = dmi->dmi_addr;
+			netdev_for_each_mc_addr(ha, dev) {
+				addrs = ha->addr;
 				if ((*addrs & 0x01) == 0x01) {	/* multicast address? */
 #ifdef HP100_DEBUG
 					printk("hp100: %s: multicast = %pM, ",
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index d8533a4ef825..40c78507ef16 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -388,18 +388,19 @@ static void emac_hash_mc(struct emac_instance *dev)
 	const int regs = EMAC_XAHT_REGS(dev);
 	u32 *gaht_base = emac_gaht_base(dev);
 	u32 gaht_temp[regs];
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int i;
 
 	DBG(dev, "hash_mc %d" NL, netdev_mc_count(dev->ndev));
 
 	memset(gaht_temp, 0, sizeof (gaht_temp));
 
-	netdev_for_each_mc_addr(dmi, dev->ndev) {
+	netdev_for_each_mc_addr(ha, dev->ndev) {
 		int slot, reg, mask;
-		DBG2(dev, "mc %pM" NL, dmi->dmi_addr);
+		DBG2(dev, "mc %pM" NL, ha->addr);
 
-		slot = EMAC_XAHT_CRC_TO_SLOT(dev, ether_crc(ETH_ALEN, dmi->dmi_addr));
+		slot = EMAC_XAHT_CRC_TO_SLOT(dev,
+					     ether_crc(ETH_ALEN, ha->addr));
 		reg = EMAC_XAHT_SLOT_TO_REG(dev, slot);
 		mask = EMAC_XAHT_SLOT_TO_MASK(dev, slot);
 
@@ -1176,7 +1177,7 @@ static int emac_open(struct net_device *ndev)
 		netif_carrier_on(dev->ndev);
 
 	/* Required for Pause packet support in EMAC */
-	dev_mc_add(ndev, default_mcast_addr, sizeof(default_mcast_addr), 1);
+	dev_mc_add_global(ndev, default_mcast_addr);
 
 	emac_configure(dev);
 	mal_poll_add(dev->mal, &dev->commac);
diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c
index b5d0f4e973f7..76949e08ee84 100644
--- a/drivers/net/ibmlana.c
+++ b/drivers/net/ibmlana.c
@@ -385,7 +385,7 @@ static void InitBoard(struct net_device *dev)
 	int camcnt;
 	camentry_t cams[16];
 	u32 cammask;
-	struct dev_mc_list *mcptr;
+	struct netdev_hw_addr *ha;
 	u16 rcrval;
 
 	/* reset the SONIC */
@@ -420,8 +420,8 @@ static void InitBoard(struct net_device *dev)
 	/* start putting the multicast addresses into the CAM list.  Stop if
 	   it is full. */
 
-	netdev_for_each_mc_addr(mcptr, dev) {
-		putcam(cams, &camcnt, mcptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		putcam(cams, &camcnt, ha->addr);
 		if (camcnt == 16)
 			break;
 	}
@@ -479,7 +479,7 @@ static void InitBoard(struct net_device *dev)
 	/* if still multicast addresses left or ALLMULTI is set, set the multicast
 	   enable bit */
 
-	if ((dev->flags & IFF_ALLMULTI) || (mcptr != NULL))
+	if ((dev->flags & IFF_ALLMULTI) || netdev_mc_count(dev) > camcnt)
 		rcrval |= RCREG_AMC;
 
 	/* promiscous mode ? */
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 0bc777bac9b4..f468590ed454 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -1072,7 +1072,7 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
 			ibmveth_error_printk("h_multicast_ctrl rc=%ld when entering promisc mode\n", lpar_rc);
 		}
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		/* clear the filter table & disable filtering */
 		lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
 					   IbmVethMcastEnableRecv |
@@ -1083,10 +1083,10 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
 			ibmveth_error_printk("h_multicast_ctrl rc=%ld when attempting to clear filter table\n", lpar_rc);
 		}
 		/* add the addresses to the filter table */
-		netdev_for_each_mc_addr(mclist, netdev) {
+		netdev_for_each_mc_addr(ha, netdev) {
 			// add the multicast address to the filter table
 			unsigned long mcast_addr = 0;
-			memcpy(((char *)&mcast_addr)+2, mclist->dmi_addr, 6);
+			memcpy(((char *)&mcast_addr)+2, ha->addr, 6);
 			lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
 						   IbmVethMcastAddFilter,
 						   mcast_addr);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index ea875709f053..78cc742e233f 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -2864,7 +2864,7 @@ static int igb_write_mc_addr_list(struct net_device *netdev)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8  *mta_list;
 	int i;
 
@@ -2881,8 +2881,8 @@ static int igb_write_mc_addr_list(struct net_device *netdev)
 
 	/* The shared function expects a packed array of only addresses. */
 	i = 0;
-	netdev_for_each_mc_addr(mc_ptr, netdev)
-		memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
+	netdev_for_each_mc_addr(ha, netdev)
+		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 
 	igb_update_mc_addr_list(hw, mta_list, i);
 	kfree(mta_list);
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index a77afd8a14bb..ea8abf5c1ef2 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -1399,7 +1399,7 @@ static void igbvf_set_multi(struct net_device *netdev)
 {
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8  *mta_list = NULL;
 	int i;
 
@@ -1414,8 +1414,8 @@ static void igbvf_set_multi(struct net_device *netdev)
 
 	/* prepare a packed array of only addresses. */
 	i = 0;
-	netdev_for_each_mc_addr(mc_ptr, netdev)
-		memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
+	netdev_for_each_mc_addr(ha, netdev)
+		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 
 	hw->mac.ops.update_mc_addr_list(hw, mta_list, i, 0, 0);
 	kfree(mta_list);
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 70871b9b045a..57d873da9789 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1664,7 +1664,7 @@ static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
 static void ioc3_set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct ioc3_private *ip = netdev_priv(dev);
 	struct ioc3 *ioc3 = ip->regs;
 	u64 ehar = 0;
@@ -1688,8 +1688,8 @@ static void ioc3_set_multicast_list(struct net_device *dev)
 			ip->ehar_h = 0xffffffff;
 			ip->ehar_l = 0xffffffff;
 		} else {
-			netdev_for_each_mc_addr(dmi, dev) {
-				char *addr = dmi->dmi_addr;
+			netdev_for_each_mc_addr(ha, dev) {
+				char *addr = ha->addr;
 
 				if (!(*addr & 1))
 					continue;
diff --git a/drivers/net/ipg.c b/drivers/net/ipg.c
index 0d7ad3f2d0f3..67cfc7d9d895 100644
--- a/drivers/net/ipg.c
+++ b/drivers/net/ipg.c
@@ -569,7 +569,7 @@ static int ipg_config_autoneg(struct net_device *dev)
 static void ipg_nic_set_multicast_list(struct net_device *dev)
 {
 	void __iomem *ioaddr = ipg_ioaddr(dev);
-	struct dev_mc_list *mc_list_ptr;
+	struct netdev_hw_addr *ha;
 	unsigned int hashindex;
 	u32 hashtable[2];
 	u8 receivemode;
@@ -608,9 +608,9 @@ static void ipg_nic_set_multicast_list(struct net_device *dev)
 	hashtable[1] = 0x00000000;
 
 	/* Cycle through all multicast addresses to filter. */
-	netdev_for_each_mc_addr(mc_list_ptr, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		/* Calculate CRC result for each multicast address. */
-		hashindex = crc32_le(0xffffffff, mc_list_ptr->dmi_addr,
+		hashindex = crc32_le(0xffffffff, ha->addr,
 				     ETH_ALEN);
 
 		/* Use only the least significant 6 bits. */
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index e6e972d9b7ca..cd65b8629bcc 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -961,15 +961,15 @@ static void veth_set_multicast_list(struct net_device *dev)
 			(netdev_mc_count(dev) > VETH_MAX_MCAST)) {
 		port->promiscuous = 1;
 	} else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 
 		port->promiscuous = 0;
 
 		/* Update table */
 		port->num_mcast = 0;
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			u8 *addr = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			u8 *addr = ha->addr;
 			u64 xaddr = 0;
 
 			if (addr[0] & 0x01) {/* multicast address? */
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index c9fef65cb98b..912dd1d5772c 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1058,7 +1058,7 @@ ixgb_set_multi(struct net_device *netdev)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
 	struct ixgb_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 rctl;
 	int i;
 
@@ -1089,9 +1089,9 @@ ixgb_set_multi(struct net_device *netdev)
 		IXGB_WRITE_REG(hw, RCTL, rctl);
 
 		i = 0;
-		netdev_for_each_mc_addr(mc_ptr, netdev)
+		netdev_for_each_mc_addr(ha, netdev)
 			memcpy(&mta[i++ * IXGB_ETH_LENGTH_OF_ADDRESS],
-			       mc_ptr->dmi_addr, IXGB_ETH_LENGTH_OF_ADDRESS);
+			       ha->addr, IXGB_ETH_LENGTH_OF_ADDRESS);
 
 		ixgb_mc_addr_list_update(hw, mta, netdev_mc_count(netdev), 0);
 	}
diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c
index 4d1c3a429457..6eb5814ca7da 100644
--- a/drivers/net/ixgbe/ixgbe_common.c
+++ b/drivers/net/ixgbe/ixgbe_common.c
@@ -1494,7 +1494,7 @@ static void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr)
 s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
 				      struct net_device *netdev)
 {
-	struct dev_addr_list *dmi;
+	struct netdev_hw_addr *ha;
 	u32 i;
 
 	/*
@@ -1510,9 +1510,9 @@ s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
 		IXGBE_WRITE_REG(hw, IXGBE_MTA(i), 0);
 
 	/* Add the new addresses */
-	netdev_for_each_mc_addr(dmi, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		hw_dbg(hw, " Adding the multicast addresses:\n");
-		ixgbe_set_mta(hw, dmi->dmi_addr);
+		ixgbe_set_mta(hw, ha->addr);
 	}
 
 	/* Enable mta */
diff --git a/drivers/net/ixgbevf/vf.c b/drivers/net/ixgbevf/vf.c
index f457c52b5ed4..852e9c4fd934 100644
--- a/drivers/net/ixgbevf/vf.c
+++ b/drivers/net/ixgbevf/vf.c
@@ -259,7 +259,7 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
 static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 					  struct net_device *netdev)
 {
-	struct dev_addr_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
 	u16 *vector_list = (u16 *)&msgbuf[1];
@@ -281,10 +281,10 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 	msgbuf[0] |= cnt << IXGBE_VT_MSGINFO_SHIFT;
 
 	i = 0;
-	netdev_for_each_mc_addr(dmi, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == cnt)
 			break;
-		vector_list[i++] = ixgbevf_mta_vector(hw, dmi->dmi_addr);
+		vector_list[i++] = ixgbevf_mta_vector(hw, ha->addr);
 	}
 
 	mbx->ops.write_posted(hw, msgbuf, IXGBE_VFMAILBOX_SIZE);
diff --git a/drivers/net/jme.c b/drivers/net/jme.c
index c0b59a555384..10e816d2caff 100644
--- a/drivers/net/jme.c
+++ b/drivers/net/jme.c
@@ -2009,12 +2009,12 @@ jme_set_multi(struct net_device *netdev)
 	} else if (netdev->flags & IFF_ALLMULTI) {
 		jme->reg_rxmcs |= RXMCS_ALLMULFRAME;
 	} else if (netdev->flags & IFF_MULTICAST) {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		int bit_nr;
 
 		jme->reg_rxmcs |= RXMCS_MULFRAME | RXMCS_MULFILTERED;
-		netdev_for_each_mc_addr(mclist, netdev) {
-			bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) & 0x3F;
+		netdev_for_each_mc_addr(ha, netdev) {
+			bit_nr = ether_crc(ETH_ALEN, ha->addr) & 0x3F;
 			mc_hash[bit_nr >> 5] |= 1 << (bit_nr & 0x1F);
 		}
 
diff --git a/drivers/net/korina.c b/drivers/net/korina.c
index edaedc7aa03f..26bf1b76b997 100644
--- a/drivers/net/korina.c
+++ b/drivers/net/korina.c
@@ -482,7 +482,7 @@ static void korina_multicast_list(struct net_device *dev)
 {
 	struct korina_private *lp = netdev_priv(dev);
 	unsigned long flags;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	u32 recognise = ETH_ARC_AB;	/* always accept broadcasts */
 	int i;
 
@@ -502,8 +502,8 @@ static void korina_multicast_list(struct net_device *dev)
 		for (i = 0; i < 4; i++)
 			hash_table[i] = 0;
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			char *addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			char *addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c
index 66be4e449f02..4dcd61f81ec2 100644
--- a/drivers/net/ks8851.c
+++ b/drivers/net/ks8851.c
@@ -956,13 +956,13 @@ static void ks8851_set_rx_mode(struct net_device *dev)
 		rxctrl.rxcr1 = (RXCR1_RXME | RXCR1_RXAE |
 				RXCR1_RXPAFMA | RXCR1_RXMAFMA);
 	} else if (dev->flags & IFF_MULTICAST && !netdev_mc_empty(dev)) {
-		struct dev_mc_list *mcptr;
+		struct netdev_hw_addr *ha;
 		u32 crc;
 
 		/* accept some multicast */
 
-		netdev_for_each_mc_addr(mcptr, dev) {
-			crc = ether_crc(ETH_ALEN, mcptr->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			crc = ether_crc(ETH_ALEN, ha->addr);
 			crc >>= (32 - 6);  /* get top six bits */
 
 			rxctrl.mchash[crc >> 4] |= (1 << (crc & 0xf));
diff --git a/drivers/net/ks8851_mll.c b/drivers/net/ks8851_mll.c
index ee3fe30b2ad7..70a3d98f4bd7 100644
--- a/drivers/net/ks8851_mll.c
+++ b/drivers/net/ks8851_mll.c
@@ -362,7 +362,6 @@ static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 };
 
 #define MAX_MCAST_LST			32
 #define HW_MCAST_SIZE			8
-#define MAC_ADDR_LEN			6
 
 /**
  * union ks_tx_hdr - tx header data
@@ -450,7 +449,7 @@ struct ks_net {
 	u16			promiscuous;
 	u16			all_mcast;
 	u16			mcast_lst_size;
-	u8			mcast_lst[MAX_MCAST_LST][MAC_ADDR_LEN];
+	u8			mcast_lst[MAX_MCAST_LST][ETH_ALEN];
 	u8			mcast_bits[HW_MCAST_SIZE];
 	u8			mac_addr[6];
 	u8                      fid;
@@ -1170,7 +1169,7 @@ static void ks_set_mcast(struct ks_net *ks, u16 mcast)
 static void ks_set_rx_mode(struct net_device *netdev)
 {
 	struct ks_net *ks = netdev_priv(netdev);
-	struct dev_mc_list *ptr;
+	struct netdev_hw_addr *ha;
 
 	/* Turn on/off promiscuous mode. */
 	if ((netdev->flags & IFF_PROMISC) == IFF_PROMISC)
@@ -1187,13 +1186,12 @@ static void ks_set_rx_mode(struct net_device *netdev)
 		if (netdev_mc_count(netdev) <= MAX_MCAST_LST) {
 			int i = 0;
 
-			netdev_for_each_mc_addr(ptr, netdev) {
-				if (!(*ptr->dmi_addr & 1))
+			netdev_for_each_mc_addr(ha, netdev) {
+				if (!(*ha->addr & 1))
 					continue;
 				if (i >= MAX_MCAST_LST)
 					break;
-				memcpy(ks->mcast_lst[i++], ptr->dmi_addr,
-				MAC_ADDR_LEN);
+				memcpy(ks->mcast_lst[i++], ha->addr, ETH_ALEN);
 			}
 			ks->mcast_lst_size = (u8)i;
 			ks_set_grpaddr(ks);
diff --git a/drivers/net/ksz884x.c b/drivers/net/ksz884x.c
index b843bf7d8c17..348769521615 100644
--- a/drivers/net/ksz884x.c
+++ b/drivers/net/ksz884x.c
@@ -5763,7 +5763,7 @@ static void netdev_set_rx_mode(struct net_device *dev)
 	struct dev_priv *priv = netdev_priv(dev);
 	struct dev_info *hw_priv = priv->adapter;
 	struct ksz_hw *hw = &hw_priv->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	int multicast = (dev->flags & IFF_ALLMULTI);
 
 	dev_set_promiscuous(dev, priv, hw, (dev->flags & IFF_PROMISC));
@@ -5780,7 +5780,7 @@ static void netdev_set_rx_mode(struct net_device *dev)
 		int i = 0;
 
 		/* List too big to support so turn on all multicast mode. */
-		if (dev->mc_count > MAX_MULTICAST_LIST) {
+		if (netdev_mc_count(dev) > MAX_MULTICAST_LIST) {
 			if (MAX_MULTICAST_LIST != hw->multi_list_size) {
 				hw->multi_list_size = MAX_MULTICAST_LIST;
 				++hw->all_multi;
@@ -5789,13 +5789,12 @@ static void netdev_set_rx_mode(struct net_device *dev)
 			return;
 		}
 
-		netdev_for_each_mc_addr(mc_ptr, dev) {
-			if (!(*mc_ptr->dmi_addr & 1))
+		netdev_for_each_mc_addr(ha, dev) {
+			if (!(*ha->addr & 1))
 				continue;
 			if (i >= MAX_MULTICAST_LIST)
 				break;
-			memcpy(hw->multi_list[i++], mc_ptr->dmi_addr,
-				MAC_ADDR_LEN);
+			memcpy(hw->multi_list[i++], ha->addr, MAC_ADDR_LEN);
 		}
 		hw->multi_list_size = (u8) i;
 		hw_set_grp_addr(hw);
diff --git a/drivers/net/lib82596.c b/drivers/net/lib82596.c
index 443c39a3732f..fddaf921885e 100644
--- a/drivers/net/lib82596.c
+++ b/drivers/net/lib82596.c
@@ -1388,7 +1388,7 @@ static void set_multicast_list(struct net_device *dev)
 	}
 
 	if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		unsigned char *cp;
 		struct mc_cmd *cmd;
 
@@ -1396,10 +1396,10 @@ static void set_multicast_list(struct net_device *dev)
 		cmd->cmd.command = SWAP16(CmdMulticastList);
 		cmd->mc_cnt = SWAP16(netdev_mc_count(dev) * 6);
 		cp = cmd->mc_addrs;
-		netdev_for_each_mc_addr(dmi, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (!cnt--)
 				break;
-			memcpy(cp, dmi->dmi_addr, 6);
+			memcpy(cp, ha->addr, 6);
 			if (i596_debug > 1)
 				DEB(DEB_MULTI,
 				    printk(KERN_DEBUG
diff --git a/drivers/net/lib8390.c b/drivers/net/lib8390.c
index 56f66f485400..526dc9cbc3c6 100644
--- a/drivers/net/lib8390.c
+++ b/drivers/net/lib8390.c
@@ -905,10 +905,10 @@ static struct net_device_stats *__ei_get_stats(struct net_device *dev)
 
 static inline void make_mc_bits(u8 *bits, struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
-	netdev_for_each_mc_addr(dmi, dev) {
-		u32 crc = ether_crc(ETH_ALEN, dmi->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		u32 crc = ether_crc(ETH_ALEN, ha->addr);
 		/*
 		 * The 8390 uses the 6 most significant bits of the
 		 * CRC to index the multicast table.
diff --git a/drivers/net/ll_temac_main.c b/drivers/net/ll_temac_main.c
index a18e3485476e..30474d6b15c3 100644
--- a/drivers/net/ll_temac_main.c
+++ b/drivers/net/ll_temac_main.c
@@ -250,20 +250,20 @@ static void temac_set_multicast_list(struct net_device *ndev)
 		temac_indirect_out32(lp, XTE_AFM_OFFSET, XTE_AFM_EPPRM_MASK);
 		dev_info(&ndev->dev, "Promiscuous mode enabled.\n");
 	} else if (!netdev_mc_empty(ndev)) {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		i = 0;
-		netdev_for_each_mc_addr(mclist, ndev) {
+		netdev_for_each_mc_addr(ha, ndev) {
 			if (i >= MULTICAST_CAM_TABLE_NUM)
 				break;
-			multi_addr_msw = ((mclist->dmi_addr[3] << 24) |
-					  (mclist->dmi_addr[2] << 16) |
-					  (mclist->dmi_addr[1] << 8) |
-					  (mclist->dmi_addr[0]));
+			multi_addr_msw = ((ha->addr[3] << 24) |
+					  (ha->addr[2] << 16) |
+					  (ha->addr[1] << 8) |
+					  (ha->addr[0]));
 			temac_indirect_out32(lp, XTE_MAW0_OFFSET,
 					     multi_addr_msw);
-			multi_addr_lsw = ((mclist->dmi_addr[5] << 8) |
-					  (mclist->dmi_addr[4]) | (i << 16));
+			multi_addr_lsw = ((ha->addr[5] << 8) |
+					  (ha->addr[4]) | (i << 16));
 			temac_indirect_out32(lp, XTE_MAW1_OFFSET,
 					     multi_addr_lsw);
 			i++;
diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c
index 3e3cc04defd0..72379c5439dc 100644
--- a/drivers/net/lp486e.c
+++ b/drivers/net/lp486e.c
@@ -1256,7 +1256,7 @@ static void set_multicast_list(struct net_device *dev) {
 			dev->name, netdev_mc_count(dev));
 
 	if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		char *cp;
 		cmd = kmalloc(sizeof(struct i596_cmd) + 2 +
 			      netdev_mc_count(dev) * 6, GFP_ATOMIC);
@@ -1267,8 +1267,8 @@ static void set_multicast_list(struct net_device *dev) {
 		cmd->command = CmdMulticastList;
 		*((unsigned short *) (cmd + 1)) = netdev_mc_count(dev) * 6;
 		cp = ((char *)(cmd + 1))+2;
-		netdev_for_each_mc_addr(dmi, dev) {
-			memcpy(cp, dmi->dmi_addr, 6);
+		netdev_for_each_mc_addr(ha, dev) {
+			memcpy(cp, ha->addr, 6);
 			cp += 6;
 		}
 		if (i596_debug & LOG_SRCDST)
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index c8a18a6203c8..eab121945d7c 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -882,15 +882,15 @@ static int hash_get_index(__u8 *addr)
  */
 static void macb_sethashtable(struct net_device *dev)
 {
-	struct dev_mc_list *curr;
+	struct netdev_hw_addr *ha;
 	unsigned long mc_filter[2];
 	unsigned int bitnr;
 	struct macb *bp = netdev_priv(dev);
 
 	mc_filter[0] = mc_filter[1] = 0;
 
-	netdev_for_each_mc_addr(curr, dev) {
-		bitnr = hash_get_index(curr->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		bitnr = hash_get_index(ha->addr);
 		mc_filter[bitnr >> 5] |= 1 << (bitnr & 31);
 	}
 
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index ab5f0bf6d1ae..2328a7399dcb 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -598,7 +598,7 @@ static void mace_set_multicast(struct net_device *dev)
 	mp->maccc |= PROM;
     } else {
 	unsigned char multicast_filter[8];
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
 	if (dev->flags & IFF_ALLMULTI) {
 	    for (i = 0; i < 8; i++)
@@ -606,8 +606,8 @@ static void mace_set_multicast(struct net_device *dev)
 	} else {
 	    for (i = 0; i < 8; i++)
 		multicast_filter[i] = 0;
-	    netdev_for_each_mc_addr(dmi, dev) {
-	        crc = ether_crc_le(6, dmi->dmi_addr);
+	    netdev_for_each_mc_addr(ha, dev) {
+	        crc = ether_crc_le(6, ha->addr);
 		i = crc >> 26;	/* bit number in multicast_filter */
 		multicast_filter[i >> 3] |= 1 << (i & 7);
 	    }
diff --git a/drivers/net/macmace.c b/drivers/net/macmace.c
index 13ba8f4afb7e..8a50c67e5928 100644
--- a/drivers/net/macmace.c
+++ b/drivers/net/macmace.c
@@ -508,7 +508,7 @@ static void mace_set_multicast(struct net_device *dev)
 		mb->maccc |= PROM;
 	} else {
 		unsigned char multicast_filter[8];
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 
 		if (dev->flags & IFF_ALLMULTI) {
 			for (i = 0; i < 8; i++) {
@@ -517,8 +517,8 @@ static void mace_set_multicast(struct net_device *dev)
 		} else {
 			for (i = 0; i < 8; i++)
 				multicast_filter[i] = 0;
-			netdev_for_each_mc_addr(dmi, dev) {
-				crc = ether_crc_le(6, dmi->dmi_addr);
+			netdev_for_each_mc_addr(ha, dev) {
+				crc = ether_crc_le(6, ha->addr);
 				/* bit number in multicast_filter */
 				i = crc >> 26;
 				multicast_filter[i >> 3] |= 1 << (i & 7);
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index 5eb96fe6ec5c..455464223b43 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -168,7 +168,7 @@ static void mlx4_en_clear_list(struct net_device *dev)
 static void mlx4_en_cache_mclist(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	char *mc_addrs;
 	int mc_addrs_cnt = netdev_mc_count(dev);
 	int i;
@@ -179,8 +179,8 @@ static void mlx4_en_cache_mclist(struct net_device *dev)
 		return;
 	}
 	i = 0;
-	netdev_for_each_mc_addr(mclist, dev)
-		memcpy(mc_addrs + i++ * ETH_ALEN, mclist->dmi_addr, ETH_ALEN);
+	netdev_for_each_mc_addr(ha, dev)
+		memcpy(mc_addrs + i++ * ETH_ALEN, ha->addr, ETH_ALEN);
 	priv->mc_addrs = mc_addrs;
 	priv->mc_addrs_cnt = mc_addrs_cnt;
 }
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index c97b6e4365a9..d5ebe43b0e65 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1769,7 +1769,7 @@ static void mv643xx_eth_program_multicast_filter(struct net_device *dev)
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
 	u32 *mc_spec;
 	u32 *mc_other;
-	struct dev_addr_list *addr;
+	struct netdev_hw_addr *ha;
 	int i;
 
 	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
@@ -1794,8 +1794,8 @@ oom:
 	memset(mc_spec, 0, 0x100);
 	memset(mc_other, 0, 0x100);
 
-	netdev_for_each_mc_addr(addr, dev) {
-		u8 *a = addr->da_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		u8 *a = ha->addr;
 		u32 *table;
 		int entry;
 
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e84dd3ee9c5a..6d33adf988dc 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -3001,7 +3001,7 @@ static void myri10ge_set_multicast_list(struct net_device *dev)
 {
 	struct myri10ge_priv *mgp = netdev_priv(dev);
 	struct myri10ge_cmd cmd;
-	struct dev_mc_list *mc_list;
+	struct netdev_hw_addr *ha;
 	__be32 data[2] = { 0, 0 };
 	int err;
 
@@ -3038,8 +3038,8 @@ static void myri10ge_set_multicast_list(struct net_device *dev)
 	}
 
 	/* Walk the multicast list, and add each address */
-	netdev_for_each_mc_addr(mc_list, dev) {
-		memcpy(data, &mc_list->dmi_addr, 6);
+	netdev_for_each_mc_addr(ha, dev) {
+		memcpy(data, &ha->addr, 6);
 		cmd.data0 = ntohl(data[0]);
 		cmd.data1 = ntohl(data[1]);
 		err = myri10ge_send_cmd(mgp, MXGEFW_JOIN_MULTICAST_GROUP,
@@ -3047,7 +3047,7 @@ static void myri10ge_set_multicast_list(struct net_device *dev)
 
 		if (err != 0) {
 			netdev_err(dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, error status:%d %pM\n",
-				   err, mc_list->dmi_addr);
+				   err, ha->addr);
 			goto abort;
 		}
 	}
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index e52038783245..9250bf6573ec 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -2493,12 +2493,12 @@ static void __set_rx_mode(struct net_device *dev)
 		rx_mode = RxFilterEnable | AcceptBroadcast
 			| AcceptAllMulticast | AcceptMyPhys;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		int i;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			int b = (ether_crc(ETH_ALEN, mclist->dmi_addr) >> 23) & 0x1ff;
+		netdev_for_each_mc_addr(ha, dev) {
+			int b = (ether_crc(ETH_ALEN, ha->addr) >> 23) & 0x1ff;
 			mc_filter[b/8] |= (1 << (b & 0x07));
 		}
 		rx_mode = RxFilterEnable | AcceptBroadcast
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index a945591298a8..781ca893ee0a 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -537,7 +537,7 @@ netxen_nic_set_mcast_addr(struct netxen_adapter *adapter,
 void netxen_p2_nic_set_multi(struct net_device *netdev)
 {
 	struct netxen_adapter *adapter = netdev_priv(netdev);
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8 null_addr[6];
 	int i;
 
@@ -571,8 +571,8 @@ void netxen_p2_nic_set_multi(struct net_device *netdev)
 	netxen_nic_enable_mcast_filter(adapter);
 
 	i = 0;
-	netdev_for_each_mc_addr(mc_ptr, netdev)
-		netxen_nic_set_mcast_addr(adapter, i++, mc_ptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev)
+		netxen_nic_set_mcast_addr(adapter, i++, ha->addr);
 
 	/* Clear out remaining addresses */
 	while (i < adapter->max_mc_count)
@@ -680,7 +680,7 @@ static int nx_p3_nic_add_mac(struct netxen_adapter *adapter,
 void netxen_p3_nic_set_multi(struct net_device *netdev)
 {
 	struct netxen_adapter *adapter = netdev_priv(netdev);
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8 bcast_addr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	u32 mode = VPORT_MISS_MODE_DROP;
 	LIST_HEAD(del_list);
@@ -707,8 +707,8 @@ void netxen_p3_nic_set_multi(struct net_device *netdev)
 	}
 
 	if (!netdev_mc_empty(netdev)) {
-		netdev_for_each_mc_addr(mc_ptr, netdev)
-			nx_p3_nic_add_mac(adapter, mc_ptr->dmi_addr, &del_list);
+		netdev_for_each_mc_addr(ha, netdev)
+			nx_p3_nic_add_mac(adapter, ha->addr, &del_list);
 	}
 
 send_fw_cmd:
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index 05c29c2cef2a..a76fabe26292 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -596,7 +596,7 @@ static int init586(struct net_device *dev)
 	struct iasetup_cmd_struct __iomem *ias_cmd;
 	struct tdr_cmd_struct __iomem *tdr_cmd;
 	struct mcsetup_cmd_struct __iomem *mc_cmd;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int num_addrs = netdev_mc_count(dev);
 
 	ptr = p->scb + 1;
@@ -725,8 +725,8 @@ static int init586(struct net_device *dev)
 		writew(num_addrs * 6, &mc_cmd->mc_cnt);
 
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev)
-			memcpy_toio(mc_cmd->mc_list[i++], dmi->dmi_addr, 6);
+		netdev_for_each_mc_addr(ha, dev)
+			memcpy_toio(mc_cmd->mc_list[i++], ha->addr, 6);
 
 		writew(make16(mc_cmd), &p->scb->cbl_offset);
 		writeb(CUC_START, &p->scb->cmd_cuc);
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 0678f3106cbc..7b52c466cf48 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -6313,7 +6313,6 @@ static void niu_set_rx_mode(struct net_device *dev)
 {
 	struct niu *np = netdev_priv(dev);
 	int i, alt_cnt, err;
-	struct dev_addr_list *addr;
 	struct netdev_hw_addr *ha;
 	unsigned long flags;
 	u16 hash[16] = { 0, };
@@ -6365,8 +6364,8 @@ static void niu_set_rx_mode(struct net_device *dev)
 		for (i = 0; i < 16; i++)
 			hash[i] = 0xffff;
 	} else if (!netdev_mc_empty(dev)) {
-		netdev_for_each_mc_addr(addr, dev) {
-			u32 crc = ether_crc_le(ETH_ALEN, addr->da_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			u32 crc = ether_crc_le(ETH_ALEN, ha->addr);
 
 			crc >>= 24;
 			hash[crc >> 4] |= (1 << (15 - (crc & 0xf)));
diff --git a/drivers/net/octeon/octeon_mgmt.c b/drivers/net/octeon/octeon_mgmt.c
index be368e5cbf75..ee894ed35f7f 100644
--- a/drivers/net/octeon/octeon_mgmt.c
+++ b/drivers/net/octeon/octeon_mgmt.c
@@ -474,7 +474,7 @@ static void octeon_mgmt_set_rx_filtering(struct net_device *netdev)
 	unsigned int cam_mode = 1; /* 1 - Accept on CAM match */
 	unsigned int multicast_mode = 1; /* 1 - Reject all multicast.  */
 	struct octeon_mgmt_cam_state cam_state;
-	struct dev_addr_list *list;
+	struct netdev_hw_addr *ha;
 	struct list_head *pos;
 	int available_cam_entries;
 
@@ -510,8 +510,8 @@ static void octeon_mgmt_set_rx_filtering(struct net_device *netdev)
 		}
 	}
 	if (multicast_mode == 0) {
-		netdev_for_each_mc_addr(list, netdev)
-			octeon_mgmt_cam_state_add(&cam_state, list->da_addr);
+		netdev_for_each_mc_addr(ha, netdev)
+			octeon_mgmt_cam_state_add(&cam_state, ha->addr);
 	}
 
 
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index 36785853a149..dc3b4c7914fd 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -1813,12 +1813,12 @@ static void netdrv_set_rx_mode(struct net_device *dev)
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 		}
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 09291e60d309..333cb3344d54 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -1623,11 +1623,11 @@ static struct net_device_stats *get_stats(struct net_device *dev)
  
 static inline void make_mc_bits(u8 *bits, struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	u32 crc;
 
-	netdev_for_each_mc_addr(dmi, dev) {
-		crc = ether_crc(ETH_ALEN, dmi->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		crc = ether_crc(ETH_ALEN, ha->addr);
 		/* 
 		 * The 8390 uses the 6 most significant bits of the
 		 * CRC to index the multicast table.
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index b9dc80b9d04a..6734f7d6da98 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -1196,11 +1196,11 @@ static void set_rx_mode(struct net_device *dev)
 	memset(mc_filter, 0x00, sizeof(mc_filter));
 	outb(1, ioaddr + RX_MODE);	/* Ignore almost all multicasts. */
     } else {
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 
 	memset(mc_filter, 0, sizeof(mc_filter));
-	netdev_for_each_mc_addr(mclist, dev) {
-	    unsigned int bit = ether_crc_le(ETH_ALEN, mclist->dmi_addr) >> 26;
+	netdev_for_each_mc_addr(ha, dev) {
+	    unsigned int bit = ether_crc_le(ETH_ALEN, ha->addr) >> 26;
 	    mc_filter[bit >> 3] |= (1 << (bit & 7));
 	}
 	outb(2, ioaddr + RX_MODE);	/* Use normal mode. */
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index c717b143f11a..c516c1996354 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -1475,7 +1475,7 @@ static void set_multicast_list(struct net_device *dev)
 {
   mace_private *lp = netdev_priv(dev);
   int adr[ETHER_ADDR_LEN] = {0}; /* Ethernet address */
-  struct dev_mc_list *dmi;
+  struct netdev_hw_addr *ha;
 
 #ifdef PCMCIA_DEBUG
   {
@@ -1495,8 +1495,8 @@ static void set_multicast_list(struct net_device *dev)
   if (num_addrs > 0) {
     /* Calculate multicast logical address filter */
     memset(lp->multicast_ladrf, 0, MACE_LADRF_LEN);
-    netdev_for_each_mc_addr(dmi, dev) {
-      memcpy(adr, dmi->dmi_addr, ETHER_ADDR_LEN);
+    netdev_for_each_mc_addr(ha, dev) {
+      memcpy(adr, ha->addr, ETHER_ADDR_LEN);
       BuildLAF(lp->multicast_ladrf, adr);
     }
   }
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 5adc662c4bfb..f45c626003a4 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -1616,12 +1616,12 @@ static void set_rx_mode(struct net_device *dev)
 	rx_cfg_setting = RxStripCRC | RxEnable | RxAllMulti;
     else {
 	if (!netdev_mc_empty(dev)) {
-	    struct dev_mc_list *mc_addr;
+	    struct netdev_hw_addr *ha;
 
-	    netdev_for_each_mc_addr(mc_addr, dev) {
-		u_int position = ether_crc(6, mc_addr->dmi_addr);
+	    netdev_for_each_mc_addr(ha, dev) {
+		u_int position = ether_crc(6, ha->addr);
 #ifndef final_version		/* Verify multicast address. */
-		if ((mc_addr->dmi_addr[0] & 1) == 0)
+		if ((ha->addr[0] & 1) == 0)
 		    continue;
 #endif
 		multicast_table[position >> 29] |= 1 << ((position >> 26) & 7);
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 4d1802e457be..656be931207a 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -1398,7 +1398,7 @@ static void set_addresses(struct net_device *dev)
 {
 	unsigned int ioaddr = dev->base_addr;
 	local_info_t *lp = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct set_address_info sa_info;
 	int i;
 
@@ -1413,10 +1413,10 @@ static void set_addresses(struct net_device *dev)
 
 	set_address(&sa_info, dev->dev_addr);
 	i = 0;
-	netdev_for_each_mc_addr(dmi, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		if (i++ == 9)
 			break;
-		set_address(&sa_info, dmi->dmi_addr);
+		set_address(&sa_info, ha->addr);
 	}
 	while (i++ < 9)
 		set_address(&sa_info, dev->dev_addr);
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 084d78dd1637..a2254f749a9a 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -2590,7 +2590,7 @@ static void pcnet32_load_multicast(struct net_device *dev)
 	struct pcnet32_private *lp = netdev_priv(dev);
 	volatile struct pcnet32_init_block *ib = lp->init_block;
 	volatile __le16 *mcast_table = (__le16 *)ib->filter;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned long ioaddr = dev->base_addr;
 	char *addrs;
 	int i;
@@ -2611,8 +2611,8 @@ static void pcnet32_load_multicast(struct net_device *dev)
 	ib->filter[1] = 0;
 
 	/* Add addresses */
-	netdev_for_each_mc_addr(dmi, dev) {
-		addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
 		/* multicast address? */
 		if (!(*addrs & 1))
diff --git a/drivers/net/ps3_gelic_net.c b/drivers/net/ps3_gelic_net.c
index a602da6418c8..fed5df9c5ea1 100644
--- a/drivers/net/ps3_gelic_net.c
+++ b/drivers/net/ps3_gelic_net.c
@@ -546,7 +546,7 @@ out:
 void gelic_net_set_multi(struct net_device *netdev)
 {
 	struct gelic_card *card = netdev_card(netdev);
-	struct dev_mc_list *mc;
+	struct netdev_hw_addr *ha;
 	unsigned int i;
 	uint8_t *p;
 	u64 addr;
@@ -580,9 +580,9 @@ void gelic_net_set_multi(struct net_device *netdev)
 	}
 
 	/* set multicast addresses */
-	netdev_for_each_mc_addr(mc, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		addr = 0;
-		p = mc->dmi_addr;
+		p = ha->addr;
 		for (i = 0; i < ETH_ALEN; i++) {
 			addr <<= 8;
 			addr |= *p++;
diff --git a/drivers/net/qlcnic/qlcnic_hw.c b/drivers/net/qlcnic/qlcnic_hw.c
index 14c999ab4222..9a1daa4ebe22 100644
--- a/drivers/net/qlcnic/qlcnic_hw.c
+++ b/drivers/net/qlcnic/qlcnic_hw.c
@@ -415,7 +415,7 @@ static int qlcnic_nic_add_mac(struct qlcnic_adapter *adapter, u8 *addr)
 void qlcnic_set_multi(struct net_device *netdev)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8 bcast_addr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	u32 mode = VPORT_MISS_MODE_DROP;
 
@@ -434,8 +434,8 @@ void qlcnic_set_multi(struct net_device *netdev)
 	}
 
 	if (!netdev_mc_empty(netdev)) {
-		netdev_for_each_mc_addr(mc_ptr, netdev) {
-			qlcnic_nic_add_mac(adapter, mc_ptr->dmi_addr);
+		netdev_for_each_mc_addr(ha, netdev) {
+			qlcnic_nic_add_mac(adapter, ha->addr);
 		}
 	}
 
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index fd34f266c0a8..76df96813a7b 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -4207,7 +4207,7 @@ static struct net_device_stats *qlge_get_stats(struct net_device
 static void qlge_set_multicast_list(struct net_device *ndev)
 {
 	struct ql_adapter *qdev = (struct ql_adapter *)netdev_priv(ndev);
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	int i, status;
 
 	status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
@@ -4271,8 +4271,8 @@ static void qlge_set_multicast_list(struct net_device *ndev)
 		if (status)
 			goto exit;
 		i = 0;
-		netdev_for_each_mc_addr(mc_ptr, ndev) {
-			if (ql_set_mac_addr_reg(qdev, (u8 *) mc_ptr->dmi_addr,
+		netdev_for_each_mc_addr(ha, ndev) {
+			if (ql_set_mac_addr_reg(qdev, (u8 *) ha->addr,
 						MAC_ADDR_TYPE_MULTI_MAC, i)) {
 				netif_err(qdev, hw, qdev->ndev,
 					  "Failed to loadmulticast address.\n");
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 15d5373dc8f3..f5a0e963e688 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -938,7 +938,7 @@ static void r6040_multicast_list(struct net_device *dev)
 	u16 *adrp;
 	u16 reg;
 	unsigned long flags;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int i;
 
 	/* MAC Address */
@@ -973,8 +973,8 @@ static void r6040_multicast_list(struct net_device *dev)
 		for (i = 0; i < 4; i++)
 			hash_table[i] = 0;
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			char *addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			char *addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
@@ -994,9 +994,9 @@ static void r6040_multicast_list(struct net_device *dev)
 	}
 	/* Multicast Address 1~4 case */
 	i = 0;
-	netdev_for_each_mc_addr(dmi, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		if (i < MCAST_MAX) {
-			adrp = (u16 *) dmi->dmi_addr;
+			adrp = (u16 *) ha->addr;
 			iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
 			iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
 			iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index f7ffa5d8ffeb..64cd250f642d 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -4785,12 +4785,12 @@ static void rtl_set_rx_mode(struct net_device *dev)
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		rx_mode = AcceptBroadcast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 			rx_mode |= AcceptMulticast;
 		}
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 2eb7f8a0d926..ad5a6a873b29 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -4964,7 +4964,7 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev)
 static void s2io_set_multicast(struct net_device *dev)
 {
 	int i, j, prev_cnt;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	struct s2io_nic *sp = netdev_priv(dev);
 	struct XENA_dev_config __iomem *bar0 = sp->bar0;
 	u64 val64 = 0, multi_mac = 0x010203040506ULL, mask =
@@ -5093,12 +5093,12 @@ static void s2io_set_multicast(struct net_device *dev)
 
 		/* Create the new Rx filter list and update the same in H/W. */
 		i = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			memcpy(sp->usr_addrs[i].addr, mclist->dmi_addr,
+		netdev_for_each_mc_addr(ha, dev) {
+			memcpy(sp->usr_addrs[i].addr, ha->addr,
 			       ETH_ALEN);
 			mac_addr = 0;
 			for (j = 0; j < ETH_ALEN; j++) {
-				mac_addr |= mclist->dmi_addr[j];
+				mac_addr |= ha->addr[j];
 				mac_addr <<= 8;
 			}
 			mac_addr >>= 8;
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 9944e5d662c0..f7de960ca06f 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -2112,7 +2112,7 @@ static void sbmac_setmulti(struct sbmac_softc *sc)
 	uint64_t reg;
 	void __iomem *port;
 	int idx;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	struct net_device *dev = sc->sbm_dev;
 
 	/*
@@ -2161,10 +2161,10 @@ static void sbmac_setmulti(struct sbmac_softc *sc)
 	 * XXX if the table overflows */
 
 	idx = 1;		/* skip station address */
-	netdev_for_each_mc_addr(mclist, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		if (idx == MAC_ADDR_COUNT)
 			break;
-		reg = sbmac_addr2reg(mclist->dmi_addr);
+		reg = sbmac_addr2reg(ha->addr);
 		port = sc->sbm_base + R_MAC_ADDR_BASE+(idx * sizeof(uint64_t));
 		__raw_writeq(reg, port);
 		idx++;
diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
index d87c4787fffa..1b3260588933 100644
--- a/drivers/net/sc92031.c
+++ b/drivers/net/sc92031.c
@@ -433,13 +433,13 @@ static void _sc92031_set_mar(struct net_device *dev)
 	    (dev->flags & IFF_ALLMULTI))
 		mar0 = mar1 = 0xffffffff;
 	else if (dev->flags & IFF_MULTICAST) {
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 
-		netdev_for_each_mc_addr(mc_list, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			u32 crc;
 			unsigned bit = 0;
 
-			crc = ~ether_crc(ETH_ALEN, mc_list->dmi_addr);
+			crc = ~ether_crc(ETH_ALEN, ha->addr);
 			crc >>= 24;
 
 			if (crc & 0x01)	bit |= 0x02;
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 88f2fb193abe..e07b82b266d9 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -1602,7 +1602,7 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data)
 static void efx_set_multicast_list(struct net_device *net_dev)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	struct dev_mc_list *mc_list;
+	struct netdev_hw_addr *ha;
 	union efx_multicast_hash *mc_hash = &efx->multicast_hash;
 	u32 crc;
 	int bit;
@@ -1614,8 +1614,8 @@ static void efx_set_multicast_list(struct net_device *net_dev)
 		memset(mc_hash, 0xff, sizeof(*mc_hash));
 	} else {
 		memset(mc_hash, 0x00, sizeof(*mc_hash));
-		netdev_for_each_mc_addr(mc_list, net_dev) {
-			crc = ether_crc_le(ETH_ALEN, mc_list->dmi_addr);
+		netdev_for_each_mc_addr(ha, net_dev) {
+			crc = ether_crc_le(ETH_ALEN, ha->addr);
 			bit = crc & (EFX_MCAST_HASH_ENTRIES - 1);
 			set_bit_le(bit, mc_hash->byte);
 		}
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index 760d9e83a465..6a05b93ae6c9 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -848,13 +848,13 @@ static void sis190_set_rx_mode(struct net_device *dev)
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		rx_mode = AcceptBroadcast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			int bit_nr =
-				ether_crc(ETH_ALEN, mclist->dmi_addr) & 0x3f;
+				ether_crc(ETH_ALEN, ha->addr) & 0x3f;
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 			rx_mode |= AcceptMulticast;
 		}
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index bc7ff411aad5..6293592635be 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -2298,12 +2298,14 @@ static void set_rx_mode(struct net_device *net_dev)
 		/* Accept Broadcast packet, destination address matchs our
 		 * MAC address, use Receive Filter to reject unwanted MCAST
 		 * packets */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		rx_mode = RFAAB;
 
-		netdev_for_each_mc_addr(mclist, net_dev) {
-			unsigned int bit_nr =
-				sis900_mcast_bitnr(mclist->dmi_addr, sis_priv->chipset_rev);
+		netdev_for_each_mc_addr(ha, net_dev) {
+			unsigned int bit_nr;
+
+			bit_nr = sis900_mcast_bitnr(ha->addr,
+						    sis_priv->chipset_rev);
 			mc_filter[bit_nr >> 4] |= (1 << (bit_nr & 0xf));
 		}
 	}
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 1921a54ea995..7e5b9f310fb2 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -852,7 +852,7 @@ static void skfp_ctl_set_multicast_list(struct net_device *dev)
 static void skfp_ctl_set_multicast_list_wo_lock(struct net_device *dev)
 {
 	struct s_smc *smc = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
 	/* Enable promiscuous mode, if necessary */
 	if (dev->flags & IFF_PROMISC) {
@@ -876,13 +876,13 @@ static void skfp_ctl_set_multicast_list_wo_lock(struct net_device *dev)
 				/* use exact filtering */
 
 				// point to first multicast addr
-				netdev_for_each_mc_addr(dmi, dev) {
-					mac_add_multicast(smc, 
-							  (struct fddi_addr *)dmi->dmi_addr, 
-							  1);
+				netdev_for_each_mc_addr(ha, dev) {
+					mac_add_multicast(smc,
+						(struct fddi_addr *)ha->addr,
+						1);
 
 					pr_debug(KERN_INFO "ENABLE MC ADDRESS: %pMF\n",
-						dmi->dmi_addr);
+						ha->addr);
 				}
 
 			} else {	// more MC addresses than HW supports
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index bd8c5e8413b1..de5ef3877baf 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2917,7 +2917,7 @@ static void genesis_set_multicast(struct net_device *dev)
 	struct skge_port *skge = netdev_priv(dev);
 	struct skge_hw *hw = skge->hw;
 	int port = skge->port;
-	struct dev_mc_list *list;
+	struct netdev_hw_addr *ha;
 	u32 mode;
 	u8 filter[8];
 
@@ -2937,8 +2937,8 @@ static void genesis_set_multicast(struct net_device *dev)
 		    skge->flow_status == FLOW_STAT_SYMMETRIC)
 			genesis_add_filter(filter, pause_mc_addr);
 
-		netdev_for_each_mc_addr(list, dev)
-			genesis_add_filter(filter, list->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			genesis_add_filter(filter, ha->addr);
 	}
 
 	xm_write32(hw, port, XM_MODE, mode);
@@ -2956,7 +2956,7 @@ static void yukon_set_multicast(struct net_device *dev)
 	struct skge_port *skge = netdev_priv(dev);
 	struct skge_hw *hw = skge->hw;
 	int port = skge->port;
-	struct dev_mc_list *list;
+	struct netdev_hw_addr *ha;
 	int rx_pause = (skge->flow_status == FLOW_STAT_REM_SEND ||
 			skge->flow_status == FLOW_STAT_SYMMETRIC);
 	u16 reg;
@@ -2979,8 +2979,8 @@ static void yukon_set_multicast(struct net_device *dev)
 		if (rx_pause)
 			yukon_add_filter(filter, pause_mc_addr);
 
-		netdev_for_each_mc_addr(list, dev)
-			yukon_add_filter(filter, list->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			yukon_add_filter(filter, ha->addr);
 	}
 
 
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index f1c0ec308bfe..5b97edb7a35f 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -3627,7 +3627,7 @@ static void sky2_set_multicast(struct net_device *dev)
 	struct sky2_port *sky2 = netdev_priv(dev);
 	struct sky2_hw *hw = sky2->hw;
 	unsigned port = sky2->port;
-	struct dev_mc_list *list;
+	struct netdev_hw_addr *ha;
 	u16 reg;
 	u8 filter[8];
 	int rx_pause;
@@ -3651,8 +3651,8 @@ static void sky2_set_multicast(struct net_device *dev)
 		if (rx_pause)
 			sky2_add_filter(filter, pause_mc_addr);
 
-		netdev_for_each_mc_addr(list, dev)
-			sky2_add_filter(filter, list->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			sky2_add_filter(filter, ha->addr);
 	}
 
 	gma_write16(hw, port, GM_MC_ADDR_H1,
diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c
index 0f97b5af91e4..6278734c735f 100644
--- a/drivers/net/smc911x.c
+++ b/drivers/net/smc911x.c
@@ -1341,7 +1341,7 @@ static void smc911x_set_multicast_list(struct net_device *dev)
 	 * within that register.
 	 */
 	else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *cur_addr;
+		struct netdev_hw_addr *ha;
 
 		/* Set the Hash perfec mode */
 		mcr |= MAC_CR_HPFILT_;
@@ -1349,19 +1349,16 @@ static void smc911x_set_multicast_list(struct net_device *dev)
 		/* start with a table of all zeros: reject all */
 		memset(multicast_table, 0, sizeof(multicast_table));
 
-		netdev_for_each_mc_addr(cur_addr, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			u32 position;
 
-			/* do we have a pointer here? */
-			if (!cur_addr)
-				break;
 			/* make sure this is a multicast address -
 				shouldn't this be a given if we have it here ? */
-			if (!(*cur_addr->dmi_addr & 1))
-				 continue;
+			if (!(*ha->addr & 1))
+				continue;
 
 			/* upper 6 bits are used as hash index */
-			position = ether_crc(ETH_ALEN, cur_addr->dmi_addr)>>26;
+			position = ether_crc(ETH_ALEN, ha->addr)>>26;
 
 			multicast_table[position>>5] |= 1 << (position&0x1f);
 		}
diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c
index ee1487791d55..9e5e9e792e86 100644
--- a/drivers/net/smc9194.c
+++ b/drivers/net/smc9194.c
@@ -417,7 +417,7 @@ static void smc_shutdown( int ioaddr )
 
 
 /*
- . Function: smc_setmulticast( int ioaddr, int count, dev_mc_list * adds )
+ . Function: smc_setmulticast( int ioaddr, struct net_device *dev )
  . Purpose:
  .    This sets the internal hardware table to filter out unwanted multicast
  .    packets before they take up memory.
@@ -438,26 +438,23 @@ static void smc_setmulticast(int ioaddr, struct net_device *dev)
 {
 	int			i;
 	unsigned char		multicast_table[ 8 ];
-	struct dev_mc_list *cur_addr;
+	struct netdev_hw_addr *ha;
 	/* table for flipping the order of 3 bits */
 	unsigned char invert3[] = { 0, 4, 2, 6, 1, 5, 3, 7 };
 
 	/* start with a table of all zeros: reject all */
 	memset( multicast_table, 0, sizeof( multicast_table ) );
 
-	netdev_for_each_mc_addr(cur_addr, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		int position;
 
-		/* do we have a pointer here? */
-		if ( !cur_addr )
-			break;
 		/* make sure this is a multicast address - shouldn't this
 		   be a given if we have it here ? */
-		if ( !( *cur_addr->dmi_addr & 1 ) )
+		if (!(*ha->addr & 1))
 			continue;
 
 		/* only use the low order bits */
-		position = ether_crc_le(6, cur_addr->dmi_addr) & 0x3f;
+		position = ether_crc_le(6, ha->addr) & 0x3f;
 
 		/* do some messy swapping to put the bit in the right spot */
 		multicast_table[invert3[position&7]] |=
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 2e8133809dc0..20c756577631 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -1413,7 +1413,7 @@ static void smc_set_multicast_list(struct net_device *dev)
 	 * within that register.
 	 */
 	else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *cur_addr;
+		struct netdev_hw_addr *ha;
 
 		/* table for flipping the order of 3 bits */
 		static const unsigned char invert3[] = {0, 4, 2, 6, 1, 5, 3, 7};
@@ -1421,16 +1421,16 @@ static void smc_set_multicast_list(struct net_device *dev)
 		/* start with a table of all zeros: reject all */
 		memset(multicast_table, 0, sizeof(multicast_table));
 
-		netdev_for_each_mc_addr(cur_addr, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			int position;
 
 			/* make sure this is a multicast address -
 		   	   shouldn't this be a given if we have it here ? */
-			if (!(*cur_addr->dmi_addr & 1))
+			if (!(*ha->addr & 1))
 				continue;
 
 			/* only use the low order bits */
-			position = crc32_le(~0, cur_addr->dmi_addr, 6) & 0x3f;
+			position = crc32_le(~0, ha->addr, 6) & 0x3f;
 
 			/* do some messy swapping to put the bit in the right spot */
 			multicast_table[invert3[position&7]] |=
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 4fd1d8b38788..d6e3a12d65d3 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1383,13 +1383,13 @@ static void smsc911x_set_multicast_list(struct net_device *dev)
 		/* Enabling specific multicast addresses */
 		unsigned int hash_high = 0;
 		unsigned int hash_low = 0;
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 
 		pdata->set_bits_mask = MAC_CR_HPFILT_;
 		pdata->clear_bits_mask = (MAC_CR_PRMS_ | MAC_CR_MCPAS_);
 
-		netdev_for_each_mc_addr(mc_list, dev) {
-			unsigned int bitnum = smsc911x_hash(mc_list->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			unsigned int bitnum = smsc911x_hash(ha->addr);
 			unsigned int mask = 0x01 << (bitnum & 0x1F);
 
 			if (bitnum & 0x20)
diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c
index 34fa10d8ad40..5409ec1a5fd7 100644
--- a/drivers/net/smsc9420.c
+++ b/drivers/net/smsc9420.c
@@ -1063,12 +1063,12 @@ static void smsc9420_set_multicast_list(struct net_device *dev)
 		mac_cr |= MAC_CR_MCPAS_;
 		mac_cr &= (~MAC_CR_HPFILT_);
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 		u32 hash_lo = 0, hash_hi = 0;
 
 		smsc_dbg(HW, "Multicast filter enabled");
-		netdev_for_each_mc_addr(mc_list, dev) {
-			u32 bit_num = smsc9420_hash(mc_list->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			u32 bit_num = smsc9420_hash(ha->addr);
 			u32 mask = 1 << (bit_num & 0x1F);
 
 			if (bit_num & 0x20)
diff --git a/drivers/net/sonic.c b/drivers/net/sonic.c
index 287c251075e5..e5d67327d707 100644
--- a/drivers/net/sonic.c
+++ b/drivers/net/sonic.c
@@ -531,7 +531,7 @@ static void sonic_multicast_list(struct net_device *dev)
 {
 	struct sonic_local *lp = netdev_priv(dev);
 	unsigned int rcr;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned char *addr;
 	int i;
 
@@ -550,8 +550,8 @@ static void sonic_multicast_list(struct net_device *dev)
 				       netdev_mc_count(dev));
 			sonic_set_cam_enable(dev, 1);  /* always enable our own address */
 			i = 1;
-			netdev_for_each_mc_addr(dmi, dev) {
-				addr = dmi->dmi_addr;
+			netdev_for_each_mc_addr(ha, dev) {
+				addr = ha->addr;
 				sonic_cda_put(dev, i, SONIC_CD_CAP0, addr[1] << 8 | addr[0]);
 				sonic_cda_put(dev, i, SONIC_CD_CAP1, addr[3] << 8 | addr[2]);
 				sonic_cda_put(dev, i, SONIC_CD_CAP2, addr[5] << 8 | addr[4]);
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 5ba9d989f8fc..c04c8f9a2d96 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -625,7 +625,7 @@ spider_net_get_multicast_hash(struct net_device *netdev, __u8 *addr)
 static void
 spider_net_set_multi(struct net_device *netdev)
 {
-	struct dev_mc_list *mc;
+	struct netdev_hw_addr *ha;
 	u8 hash;
 	int i;
 	u32 reg;
@@ -646,8 +646,8 @@ spider_net_set_multi(struct net_device *netdev)
 	hash = spider_net_get_multicast_hash(netdev, netdev->broadcast); */
 	set_bit(0xfd, bitmask);
 
-	netdev_for_each_mc_addr(mc, netdev) {
-		hash = spider_net_get_multicast_hash(netdev, mc->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev) {
+		hash = spider_net_get_multicast_hash(netdev, ha->addr);
 		set_bit(hash, bitmask);
 	}
 
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 6dfa69899019..8a6d27cdc0bd 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1766,7 +1766,7 @@ static void set_rx_mode(struct net_device *dev)
 	struct netdev_private *np = netdev_priv(dev);
 	void __iomem *ioaddr = np->base;
 	u32 rx_mode = MinVLANPrio;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	int i;
 #ifdef VLAN_SUPPORT
 
@@ -1804,8 +1804,8 @@ static void set_rx_mode(struct net_device *dev)
 		/* Use the 16 element perfect filter, skip first two entries. */
 		void __iomem *filter_addr = ioaddr + PerfFilterTable + 2 * 16;
 		__be16 *eaddrs;
-		netdev_for_each_mc_addr(mclist, dev) {
-			eaddrs = (__be16 *)mclist->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			eaddrs = (__be16 *) ha->addr;
 			writew(be16_to_cpu(eaddrs[2]), filter_addr); filter_addr += 4;
 			writew(be16_to_cpu(eaddrs[1]), filter_addr); filter_addr += 4;
 			writew(be16_to_cpu(eaddrs[0]), filter_addr); filter_addr += 8;
@@ -1825,10 +1825,10 @@ static void set_rx_mode(struct net_device *dev)
 		__le16 mc_filter[32] __attribute__ ((aligned(sizeof(long))));	/* Multicast hash filter */
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			/* The chip uses the upper 9 CRC bits
 			   as index into the hash table */
-			int bit_nr = ether_crc_le(ETH_ALEN, mclist->dmi_addr) >> 23;
+			int bit_nr = ether_crc_le(ETH_ALEN, ha->addr) >> 23;
 			__le32 *fptr = (__le32 *) &mc_filter[(bit_nr >> 4) & ~1];
 
 			*fptr |= cpu_to_le32(1 << (bit_nr & 31));
diff --git a/drivers/net/stmmac/dwmac100.c b/drivers/net/stmmac/dwmac100.c
index a183de29c39a..c0a1c9df3ac7 100644
--- a/drivers/net/stmmac/dwmac100.c
+++ b/drivers/net/stmmac/dwmac100.c
@@ -316,7 +316,7 @@ static void dwmac100_set_filter(struct net_device *dev)
 			   MAC_CONTROL_HO | MAC_CONTROL_HP);
 	} else {
 		u32 mc_filter[2];
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		/* Perfect filter mode for physical address and Hash
 		   filter for multicast */
@@ -325,11 +325,11 @@ static void dwmac100_set_filter(struct net_device *dev)
 			   MAC_CONTROL_IF | MAC_CONTROL_HO);
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			/* The upper 6 bits of the calculated CRC are used to
 			 * index the contens of the hash table */
 			int bit_nr =
-			    ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+			    ether_crc(ETH_ALEN, ha->addr) >> 26;
 			/* The most significant bit determines the register to
 			 * use (H/L) while the other 5 bits determine the bit
 			 * within the register. */
diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c
index a6538ae4694c..bf73fda6adfb 100644
--- a/drivers/net/stmmac/dwmac1000_core.c
+++ b/drivers/net/stmmac/dwmac1000_core.c
@@ -94,17 +94,17 @@ static void dwmac1000_set_filter(struct net_device *dev)
 		writel(0xffffffff, ioaddr + GMAC_HASH_LOW);
 	} else if (!netdev_mc_empty(dev)) {
 		u32 mc_filter[2];
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		/* Hash filter for multicast */
 		value = GMAC_FRAME_FILTER_HMC;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			/* The upper 6 bits of the calculated CRC are used to
 			   index the contens of the hash table */
 			int bit_nr =
-			    bitrev32(~crc32_le(~0, mclist->dmi_addr, 6)) >> 26;
+			    bitrev32(~crc32_le(~0, ha->addr, 6)) >> 26;
 			/* The most significant bit determines the register to
 			 * use (H/L) while the other 5 bits determine the bit
 			 * within the register. */
diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c
index 2f6a760e5f21..2cfa065c39c0 100644
--- a/drivers/net/sun3_82586.c
+++ b/drivers/net/sun3_82586.c
@@ -413,7 +413,7 @@ static int init586(struct net_device *dev)
 	volatile struct iasetup_cmd_struct *ias_cmd;
 	volatile struct tdr_cmd_struct *tdr_cmd;
 	volatile struct mcsetup_cmd_struct *mc_cmd;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int num_addrs=netdev_mc_count(dev);
 
 	ptr = (void *) ((char *)p->scb + sizeof(struct scb_struct));
@@ -537,9 +537,9 @@ static int init586(struct net_device *dev)
 		mc_cmd->mc_cnt = swab16(num_addrs * 6);
 
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev)
+		netdev_for_each_mc_addr(ha, dev)
 			memcpy((char *) mc_cmd->mc_list[i++],
-			       dmi->dmi_addr, ETH_ALEN);
+			       ha->addr, ETH_ALEN);
 
 		p->scb->cbl_offset = make16(mc_cmd);
 		p->scb->cmd_cuc = CUC_START;
diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index a0bd361d5eca..4dd159b4116b 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -999,7 +999,7 @@ static void bigmac_set_multicast(struct net_device *dev)
 {
 	struct bigmac *bp = netdev_priv(dev);
 	void __iomem *bregs = bp->bregs;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	int i;
 	u32 tmp, crc;
@@ -1028,8 +1028,8 @@ static void bigmac_set_multicast(struct net_device *dev)
 		for (i = 0; i < 4; i++)
 			hash_table[i] = 0;
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
index a855934dfc3b..47e8cce10d1e 100644
--- a/drivers/net/sundance.c
+++ b/drivers/net/sundance.c
@@ -1523,13 +1523,13 @@ static void set_rx_mode(struct net_device *dev)
 		memset(mc_filter, 0xff, sizeof(mc_filter));
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		int bit;
 		int index;
 		int crc;
 		memset (mc_filter, 0, sizeof (mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			crc = ether_crc_le (ETH_ALEN, mclist->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			crc = ether_crc_le(ETH_ALEN, ha->addr);
 			for (index=0, bit=0; bit < 6; bit++, crc <<= 1)
 				if (crc & 0x80000000) index |= 1 << bit;
 			mc_filter[index/16] |= (1 << (index % 16));
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 70196bc5fe61..58a27541ae39 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -1846,12 +1846,12 @@ static u32 gem_setup_multicast(struct gem *gp)
 	} else {
 		u16 hash_table[16];
 		u32 crc;
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int i;
 
 		memset(hash_table, 0, sizeof(hash_table));
-		netdev_for_each_mc_addr(dmi, gp->dev) {
-			char *addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, gp->dev) {
+			char *addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index b17dbb11bd67..20deb14e98ff 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -1523,13 +1523,13 @@ static int happy_meal_init(struct happy_meal *hp)
 		hme_write32(hp, bregs + BMAC_HTABLE3, 0xffff);
 	} else if ((hp->dev->flags & IFF_PROMISC) == 0) {
 		u16 hash_table[4];
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		char *addrs;
 		u32 crc;
 
 		memset(hash_table, 0, sizeof(hash_table));
-		netdev_for_each_mc_addr(dmi, hp->dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, hp->dev) {
+			addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
@@ -2362,7 +2362,7 @@ static void happy_meal_set_multicast(struct net_device *dev)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 	void __iomem *bregs = hp->bigmacregs;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	u32 crc;
 
@@ -2380,8 +2380,8 @@ static void happy_meal_set_multicast(struct net_device *dev)
 		u16 hash_table[4];
 
 		memset(hash_table, 0, sizeof(hash_table));
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index d7c73f478ef5..674570584bd5 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -1170,7 +1170,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static void lance_load_multicast(struct net_device *dev)
 {
 	struct lance_private *lp = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	u32 crc;
 	u32 val;
@@ -1195,8 +1195,8 @@ static void lance_load_multicast(struct net_device *dev)
 		return;
 
 	/* Add addresses */
-	netdev_for_each_mc_addr(dmi, dev) {
-		addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
 		/* multicast address? */
 		if (!(*addrs & 1))
diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c
index be637dce944c..239f09772191 100644
--- a/drivers/net/sunqe.c
+++ b/drivers/net/sunqe.c
@@ -627,7 +627,7 @@ static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static void qe_set_multicast(struct net_device *dev)
 {
 	struct sunqe *qep = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	u8 new_mconfig = qep->mconfig;
 	char *addrs;
 	int i;
@@ -651,8 +651,8 @@ static void qe_set_multicast(struct net_device *dev)
 		u8 *hbytes = (unsigned char *) &hash_table[0];
 
 		memset(hash_table, 0, sizeof(hash_table));
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c
index 6b1b7cea7f6b..6cf8b06be5cd 100644
--- a/drivers/net/sunvnet.c
+++ b/drivers/net/sunvnet.c
@@ -763,12 +763,12 @@ static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr)
 
 static void __update_mc_list(struct vnet *vp, struct net_device *dev)
 {
-	struct dev_addr_list *p;
+	struct netdev_hw_addr *ha;
 
-	netdev_for_each_mc_addr(p, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		struct vnet_mcast_entry *m;
 
-		m = __vnet_mc_find(vp, p->dmi_addr);
+		m = __vnet_mc_find(vp, ha->addr);
 		if (m) {
 			m->hit = 1;
 			continue;
@@ -778,7 +778,7 @@ static void __update_mc_list(struct vnet *vp, struct net_device *dev)
 			m = kzalloc(sizeof(*m), GFP_ATOMIC);
 			if (!m)
 				continue;
-			memcpy(m->addr, p->dmi_addr, ETH_ALEN);
+			memcpy(m->addr, ha->addr, ETH_ALEN);
 			m->hit = 1;
 
 			m->next = vp->mcast_list;
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index 49bd84c0d583..36149ddace46 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1954,16 +1954,16 @@ tc35815_set_multicast_list(struct net_device *dev)
 		/* Disable promiscuous mode, use normal mode. */
 		tc_writel(CAM_CompEn | CAM_BroadAcc | CAM_GroupAcc, &tr->CAM_Ctl);
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *cur_addr;
+		struct netdev_hw_addr *ha;
 		int i;
 		int ena_bits = CAM_Ena_Bit(CAM_ENTRY_SOURCE);
 
 		tc_writel(0, &tr->CAM_Ctl);
 		/* Walk the address list, and load the filter */
 		i = 0;
-		netdev_for_each_mc_addr(cur_addr, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			/* entry 0,1 is reserved. */
-			tc35815_set_cam_entry(dev, i + 2, cur_addr->dmi_addr);
+			tc35815_set_cam_entry(dev, i + 2, ha->addr);
 			ena_bits |= CAM_Ena_Bit(i + 2);
 			i++;
 		}
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index 068a47174fca..a38aede5c8d6 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -808,7 +808,7 @@ static void bdx_setmulti(struct net_device *ndev)
 			WRITE_REG(priv, regRX_MCST_HASH0 + i * 4, ~0);
 	} else if (!netdev_mc_empty(ndev)) {
 		u8 hash;
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		u32 reg, val;
 
 		/* set IMF to deny all multicast frames */
@@ -825,10 +825,10 @@ static void bdx_setmulti(struct net_device *ndev)
 		 * into RX_MAC_MCST regs. we skip this phase now and accept ALL
 		 * multicast frames throu IMF */
 		/* accept the rest of addresses throu IMF */
-		netdev_for_each_mc_addr(mclist, ndev) {
+		netdev_for_each_mc_addr(ha, ndev) {
 			hash = 0;
 			for (i = 0; i < ETH_ALEN; i++)
-				hash ^= mclist->dmi_addr[i];
+				hash ^= ha->addr[i];
 			reg = regRX_MCST_HASH0 + ((hash >> 5) << 2);
 			val = READ_REG(priv, reg);
 			val |= (1 << (hash % 32));
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 22cf1c446de3..aff11f84f525 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -9457,14 +9457,14 @@ static void __tg3_set_rx_mode(struct net_device *dev)
 		tg3_set_multi (tp, 0);
 	} else {
 		/* Accept one or more multicast(s). */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		u32 mc_filter[4] = { 0, };
 		u32 regidx;
 		u32 bit;
 		u32 crc;
 
-		netdev_for_each_mc_addr(mclist, dev) {
-			crc = calc_crc (mclist->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, dev) {
+			crc = calc_crc(ha->addr, ETH_ALEN);
 			bit = ~crc & 0x7f;
 			regidx = (bit & 0x60) >> 5;
 			bit &= 0x1f;
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index 05ea30a94e89..8ffec22b74bf 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -1314,7 +1314,7 @@ static struct net_device_stats *TLan_GetStats( struct net_device *dev )
 
 static void TLan_SetMulticastList( struct net_device *dev )
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	u32			hash1 = 0;
 	u32			hash2 = 0;
 	int			i;
@@ -1336,12 +1336,12 @@ static void TLan_SetMulticastList( struct net_device *dev )
 			TLan_DioWrite32( dev->base_addr, TLAN_HASH_2, 0xFFFFFFFF );
 		} else {
 			i = 0;
-			netdev_for_each_mc_addr(dmi, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				if ( i < 3 ) {
 					TLan_SetMac( dev, i + 1,
-						     (char *) &dmi->dmi_addr );
+						     (char *) &ha->addr);
 				} else {
-					offset = TLan_HashFunc( (u8 *) &dmi->dmi_addr );
+					offset = TLan_HashFunc((u8 *)&ha->addr);
 					if ( offset < 32 )
 						hash1 |= ( 1 << offset );
 					else
diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index 8c54d89e60d7..4673e38c52a9 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -1390,7 +1390,7 @@ static int xl_close(struct net_device *dev)
 static void xl_set_rx_mode(struct net_device *dev) 
 {
 	struct xl_private *xl_priv = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned char dev_mc_address[4] ; 
 	u16 options ; 
 
@@ -1407,11 +1407,11 @@ static void xl_set_rx_mode(struct net_device *dev)
 
 	dev_mc_address[0] = dev_mc_address[1] = dev_mc_address[2] = dev_mc_address[3] = 0 ;
 
-	netdev_for_each_mc_addr(dmi, dev) {
-                dev_mc_address[0] |= dmi->dmi_addr[2] ;
-                dev_mc_address[1] |= dmi->dmi_addr[3] ;
-                dev_mc_address[2] |= dmi->dmi_addr[4] ;
-                dev_mc_address[3] |= dmi->dmi_addr[5] ;
+	netdev_for_each_mc_addr(ha, dev) {
+		dev_mc_address[0] |= ha->addr[2];
+		dev_mc_address[1] |= ha->addr[3];
+		dev_mc_address[2] |= ha->addr[4];
+		dev_mc_address[3] |= ha->addr[5];
         }
 
 	if (memcmp(xl_priv->xl_functional_addr,dev_mc_address,4) != 0) { /* Options have changed, run the command */
diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c
index 1a0967246e2f..eebdaae24328 100644
--- a/drivers/net/tokenring/ibmtr.c
+++ b/drivers/net/tokenring/ibmtr.c
@@ -986,7 +986,7 @@ static void open_sap(unsigned char type, struct net_device *dev)
 static void tok_set_multicast_list(struct net_device *dev)
 {
 	struct tok_info *ti = netdev_priv(dev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	unsigned char address[4];
 
 	int i;
@@ -995,11 +995,11 @@ static void tok_set_multicast_list(struct net_device *dev)
 	/*BMS ifconfig tr down or hot unplug a PCMCIA card ??hownowbrowncow*/
 	if (/*BMSHELPdev->start == 0 ||*/ ti->open_status != OPEN) return;
 	address[0] = address[1] = address[2] = address[3] = 0;
-	netdev_for_each_mc_addr(mclist, dev) {
-		address[0] |= mclist->dmi_addr[2];
-		address[1] |= mclist->dmi_addr[3];
-		address[2] |= mclist->dmi_addr[4];
-		address[3] |= mclist->dmi_addr[5];
+	netdev_for_each_mc_addr(ha, dev) {
+		address[0] |= ha->addr[2];
+		address[1] |= ha->addr[3];
+		address[2] |= ha->addr[4];
+		address[3] |= ha->addr[5];
 	}
 	SET_PAGE(ti->srb_page);
 	for (i = 0; i < sizeof(struct srb_set_funct_addr); i++)
diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c
index 01c780f25e98..88c893100c2b 100644
--- a/drivers/net/tokenring/lanstreamer.c
+++ b/drivers/net/tokenring/lanstreamer.c
@@ -1266,7 +1266,7 @@ static void streamer_set_rx_mode(struct net_device *dev)
 	    netdev_priv(dev);
 	__u8 __iomem *streamer_mmio = streamer_priv->streamer_mmio;
 	__u8 options = 0;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned char dev_mc_address[5];
 
 	writel(streamer_priv->srb, streamer_mmio + LAPA);
@@ -1302,11 +1302,11 @@ static void streamer_set_rx_mode(struct net_device *dev)
 	writel(streamer_priv->srb,streamer_mmio+LAPA);
 	dev_mc_address[0] = dev_mc_address[1] = dev_mc_address[2] = dev_mc_address[3] = 0 ; 
   
-	netdev_for_each_mc_addr(dmi, dev) {
-   	        dev_mc_address[0] |= dmi->dmi_addr[2] ; 
-		dev_mc_address[1] |= dmi->dmi_addr[3] ; 
-		dev_mc_address[2] |= dmi->dmi_addr[4] ; 
-		dev_mc_address[3] |= dmi->dmi_addr[5] ; 
+	netdev_for_each_mc_addr(ha, dev) {
+		dev_mc_address[0] |= ha->addr[2];
+		dev_mc_address[1] |= ha->addr[3];
+		dev_mc_address[2] |= ha->addr[4];
+		dev_mc_address[3] |= ha->addr[5];
 	}
   
 	writew(htons(SRB_SET_FUNC_ADDRESS << 8),streamer_mmio+LAPDINC);
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index 3decaf4b6cf2..3d2fbe60b46e 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -1139,7 +1139,7 @@ static void olympic_set_rx_mode(struct net_device *dev)
    	u8 __iomem *olympic_mmio = olympic_priv->olympic_mmio ; 
 	u8 options = 0; 
 	u8 __iomem *srb;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned char dev_mc_address[4] ; 
 
 	writel(olympic_priv->srb,olympic_mmio+LAPA);
@@ -1177,11 +1177,11 @@ static void olympic_set_rx_mode(struct net_device *dev)
 
 	dev_mc_address[0] = dev_mc_address[1] = dev_mc_address[2] = dev_mc_address[3] = 0 ; 
 
-	netdev_for_each_mc_addr(dmi, dev) {
-		dev_mc_address[0] |= dmi->dmi_addr[2] ; 
-		dev_mc_address[1] |= dmi->dmi_addr[3] ; 
-		dev_mc_address[2] |= dmi->dmi_addr[4] ; 
-		dev_mc_address[3] |= dmi->dmi_addr[5] ; 
+	netdev_for_each_mc_addr(ha, dev) {
+		dev_mc_address[0] |= ha->addr[2];
+		dev_mc_address[1] |= ha->addr[3];
+		dev_mc_address[2] |= ha->addr[4];
+		dev_mc_address[3] |= ha->addr[5];
 	}
 
 	writeb(SRB_SET_FUNC_ADDRESS,srb+0);
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index b545e20d2898..9fa2c36ff904 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -1212,17 +1212,17 @@ static void tms380tr_set_multicast_list(struct net_device *dev)
 		}
 		else
 		{
-			struct dev_mc_list *mclist;
+			struct netdev_hw_addr *ha;
 
-			netdev_for_each_mc_addr(mclist, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				((char *)(&tp->ocpl.FunctAddr))[0] |=
-					mclist->dmi_addr[2];
+					ha->addr[2];
 				((char *)(&tp->ocpl.FunctAddr))[1] |=
-					mclist->dmi_addr[3];
+					ha->addr[3];
 				((char *)(&tp->ocpl.FunctAddr))[2] |=
-					mclist->dmi_addr[4];
+					ha->addr[4];
 				((char *)(&tp->ocpl.FunctAddr))[3] |=
-					mclist->dmi_addr[5];
+					ha->addr[5];
 			}
 		}
 		tms380tr_exec_cmd(dev, OC_SET_FUNCT_ADDR);
diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c
index 80333a4d3287..1366541c30a2 100644
--- a/drivers/net/tsi108_eth.c
+++ b/drivers/net/tsi108_eth.c
@@ -1186,15 +1186,15 @@ static void tsi108_set_rx_mode(struct net_device *dev)
 
 	if (dev->flags & IFF_ALLMULTI || !netdev_mc_empty(dev)) {
 		int i;
-		struct dev_mc_list *mc;
+		struct netdev_hw_addr *ha;
 		rxcfg |= TSI108_EC_RXCFG_MFE | TSI108_EC_RXCFG_MC_HASH;
 
 		memset(data->mc_hash, 0, sizeof(data->mc_hash));
 
-		netdev_for_each_mc_addr(mc, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			u32 hash, crc;
 
-			crc = ether_crc(6, mc->dmi_addr);
+			crc = ether_crc(6, ha->addr);
 			hash = crc >> 23;
 			__set_bit(hash, &data->mc_hash[0]);
 		}
diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index cb429723b2c8..a68b91764627 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -670,15 +670,15 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev)
 {
 	struct de_private *de = netdev_priv(dev);
 	u16 hash_table[32];
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	int i;
 	u16 *eaddrs;
 
 	memset(hash_table, 0, sizeof(hash_table));
 	set_bit_le(255, hash_table); 			/* Broadcast entry */
 	/* This should work on big-endian machines as well. */
-	netdev_for_each_mc_addr(mclist, dev) {
-		int index = ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x1ff;
+	netdev_for_each_mc_addr(ha, dev) {
+		int index = ether_crc_le(ETH_ALEN, ha->addr) & 0x1ff;
 
 		set_bit_le(index, hash_table);
 	}
@@ -699,13 +699,13 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev)
 static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev)
 {
 	struct de_private *de = netdev_priv(dev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	u16 *eaddrs;
 
 	/* We have <= 14 addresses so we can use the wonderful
 	   16 address perfect filtering of the Tulip. */
-	netdev_for_each_mc_addr(mclist, dev) {
-		eaddrs = (u16 *)mclist->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		eaddrs = (u16 *) ha->addr;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index 55ade8949a68..cac4bcc83187 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -1951,7 +1951,7 @@ static void
 SetMulticastFilter(struct net_device *dev)
 {
     struct de4x5_private *lp = netdev_priv(dev);
-    struct dev_mc_list *dmi;
+    struct netdev_hw_addr *ha;
     u_long iobase = dev->base_addr;
     int i, bit, byte;
     u16 hashcode;
@@ -1966,8 +1966,8 @@ SetMulticastFilter(struct net_device *dev)
     if ((dev->flags & IFF_ALLMULTI) || (netdev_mc_count(dev) > 14)) {
 	omr |= OMR_PM;                       /* Pass all multicasts */
     } else if (lp->setup_f == HASH_PERF) {   /* Hash Filtering */
-	netdev_for_each_mc_addr(dmi, dev) {
-	    addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+	    addrs = ha->addr;
 	    if ((*addrs & 0x01) == 1) {      /* multicast address? */
 		crc = ether_crc_le(ETH_ALEN, addrs);
 		hashcode = crc & HASH_BITS;  /* hashcode is 9 LSb of CRC */
@@ -1983,8 +1983,8 @@ SetMulticastFilter(struct net_device *dev)
 	    }
 	}
     } else {                                 /* Perfect filtering */
-	netdev_for_each_mc_addr(dmi, dev) {
-	    addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+	    addrs = ha->addr;
 	    for (i=0; i<ETH_ALEN; i++) {
 		*(pa + (i&1)) = *addrs++;
 		if (i & 0x01) pa += 4;
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index 95b38d803e9b..a039c8ab98b2 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -1454,7 +1454,7 @@ static void update_cr6(u32 cr6_data, unsigned long ioaddr)
 
 static void dm9132_id_table(struct DEVICE *dev)
 {
-	struct dev_mc_list *mcptr;
+	struct netdev_hw_addr *ha;
 	u16 * addrptr;
 	unsigned long ioaddr = dev->base_addr+0xc0;		/* ID Table */
 	u32 hash_val;
@@ -1478,8 +1478,8 @@ static void dm9132_id_table(struct DEVICE *dev)
 	hash_table[3] = 0x8000;
 
 	/* the multicast address in Hash Table : 64 bits */
-	netdev_for_each_mc_addr(mcptr, dev) {
-		hash_val = cal_CRC((char *) mcptr->dmi_addr, 6, 0) & 0x3f;
+	netdev_for_each_mc_addr(ha, dev) {
+		hash_val = cal_CRC((char *) ha->addr, 6, 0) & 0x3f;
 		hash_table[hash_val / 16] |= (u16) 1 << (hash_val % 16);
 	}
 
@@ -1497,7 +1497,7 @@ static void dm9132_id_table(struct DEVICE *dev)
 static void send_filter_frame(struct DEVICE *dev)
 {
 	struct dmfe_board_info *db = netdev_priv(dev);
-	struct dev_mc_list *mcptr;
+	struct netdev_hw_addr *ha;
 	struct tx_desc *txptr;
 	u16 * addrptr;
 	u32 * suptr;
@@ -1520,8 +1520,8 @@ static void send_filter_frame(struct DEVICE *dev)
 	*suptr++ = 0xffff;
 
 	/* fit the multicast address */
-	netdev_for_each_mc_addr(mcptr, dev) {
-		addrptr = (u16 *) mcptr->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrptr = (u16 *) ha->addr;
 		*suptr++ = addrptr[0];
 		*suptr++ = addrptr[1];
 		*suptr++ = addrptr[2];
diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index 7f544ef2f5fc..c4b7cd726b69 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -990,15 +990,15 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev)
 {
 	struct tulip_private *tp = netdev_priv(dev);
 	u16 hash_table[32];
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	int i;
 	u16 *eaddrs;
 
 	memset(hash_table, 0, sizeof(hash_table));
 	set_bit_le(255, hash_table); 			/* Broadcast entry */
 	/* This should work on big-endian machines as well. */
-	netdev_for_each_mc_addr(mclist, dev) {
-		int index = ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x1ff;
+	netdev_for_each_mc_addr(ha, dev) {
+		int index = ether_crc_le(ETH_ALEN, ha->addr) & 0x1ff;
 
 		set_bit_le(index, hash_table);
 	}
@@ -1018,13 +1018,13 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev)
 static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev)
 {
 	struct tulip_private *tp = netdev_priv(dev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	u16 *eaddrs;
 
 	/* We have <= 14 addresses so we can use the wonderful
 	   16 address perfect filtering of the Tulip. */
-	netdev_for_each_mc_addr(mclist, dev) {
-		eaddrs = (u16 *)mclist->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		eaddrs = (u16 *) ha->addr;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
@@ -1061,7 +1061,7 @@ static void set_rx_mode(struct net_device *dev)
 	} else	if (tp->flags & MC_HASH_ONLY) {
 		/* Some work-alikes have only a 64-entry hash filter table. */
 		/* Should verify correctness on big-endian/__powerpc__ */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		if (netdev_mc_count(dev) > 64) {
 			/* Arbitrary non-effective limit. */
 			tp->csr6 |= AcceptAllMulticast;
@@ -1069,18 +1069,21 @@ static void set_rx_mode(struct net_device *dev)
 		} else {
 			u32 mc_filter[2] = {0, 0};		 /* Multicast hash filter */
 			int filterbit;
-			netdev_for_each_mc_addr(mclist, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				if (tp->flags & COMET_MAC_ADDR)
-					filterbit = ether_crc_le(ETH_ALEN, mclist->dmi_addr);
+					filterbit = ether_crc_le(ETH_ALEN,
+								 ha->addr);
 				else
-					filterbit = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+					filterbit = ether_crc(ETH_ALEN,
+							      ha->addr) >> 26;
 				filterbit &= 0x3f;
 				mc_filter[filterbit >> 5] |= 1 << (filterbit & 31);
 				if (tulip_debug > 2)
 					dev_info(&dev->dev,
 						 "Added filter for %pM  %08x bit %d\n",
-						 mclist->dmi_addr,
-						 ether_crc(ETH_ALEN, mclist->dmi_addr), filterbit);
+						 ha->addr,
+						 ether_crc(ETH_ALEN, ha->addr),
+						 filterbit);
 			}
 			if (mc_filter[0] == tp->mc_filter[0]  &&
 				mc_filter[1] == tp->mc_filter[1])
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 0ab05af237e5..b79d908fe34e 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -1392,7 +1392,7 @@ static void update_cr6(u32 cr6_data, unsigned long ioaddr)
 static void send_filter_frame(struct net_device *dev, int mc_cnt)
 {
 	struct uli526x_board_info *db = netdev_priv(dev);
-	struct dev_mc_list *mcptr;
+	struct netdev_hw_addr *ha;
 	struct tx_desc *txptr;
 	u16 * addrptr;
 	u32 * suptr;
@@ -1415,8 +1415,8 @@ static void send_filter_frame(struct net_device *dev, int mc_cnt)
 	*suptr++ = 0xffff << FLT_SHIFT;
 
 	/* fit the multicast address */
-	netdev_for_each_mc_addr(mcptr, dev) {
-		addrptr = (u16 *) mcptr->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrptr = (u16 *) ha->addr;
 		*suptr++ = addrptr[0] << FLT_SHIFT;
 		*suptr++ = addrptr[1] << FLT_SHIFT;
 		*suptr++ = addrptr[2] << FLT_SHIFT;
diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c
index 304f43866c44..3e3822c98a0d 100644
--- a/drivers/net/tulip/winbond-840.c
+++ b/drivers/net/tulip/winbond-840.c
@@ -1367,13 +1367,15 @@ static u32 __set_rx_mode(struct net_device *dev)
 		memset(mc_filter, 0xff, sizeof(mc_filter));
 		rx_mode = RxAcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			int filterbit = (ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26) ^ 0x3F;
-			filterbit &= 0x3f;
-			mc_filter[filterbit >> 5] |= 1 << (filterbit & 31);
+		netdev_for_each_mc_addr(ha, dev) {
+			int filbit;
+
+			filbit = (ether_crc(ETH_ALEN, ha->addr) >> 26) ^ 0x3F;
+			filbit &= 0x3f;
+			mc_filter[filbit >> 5] |= 1 << (filbit & 31);
 		}
 		rx_mode = RxAcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 	}
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index cd24e5f2b2a2..3053f85aa066 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -921,11 +921,11 @@ typhoon_set_rx_mode(struct net_device *dev)
 		/* Too many to match, or accept all multicasts. */
 		filter |= TYPHOON_RX_FILTER_ALL_MCAST;
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit = ether_crc(ETH_ALEN, mclist->dmi_addr) & 0x3f;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit = ether_crc(ETH_ALEN, ha->addr) & 0x3f;
 			mc_filter[bit >> 5] |= 1 << (bit & 0x1f);
 		}
 
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index 75743a76bbe8..081f76bff341 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -1999,7 +1999,7 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth)
 static void ucc_geth_set_multi(struct net_device *dev)
 {
 	struct ucc_geth_private *ugeth;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct ucc_fast __iomem *uf_regs;
 	struct ucc_geth_82xx_address_filtering_pram __iomem *p_82xx_addr_filt;
 
@@ -2028,16 +2028,16 @@ static void ucc_geth_set_multi(struct net_device *dev)
 			out_be32(&p_82xx_addr_filt->gaddr_h, 0x0);
 			out_be32(&p_82xx_addr_filt->gaddr_l, 0x0);
 
-			netdev_for_each_mc_addr(dmi, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				/* Only support group multicast for now.
 				 */
-				if (!(dmi->dmi_addr[0] & 1))
+				if (!(ha->addr[0] & 1))
 					continue;
 
 				/* Ask CPM to run CRC and set bit in
 				 * filter mask.
 				 */
-				hw_add_addr_in_hash(ugeth, dmi->dmi_addr);
+				hw_add_addr_in_hash(ugeth, ha->addr);
 			}
 		}
 	}
diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index 9e05639435f2..763364f0972e 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -557,16 +557,14 @@ static void asix_set_multicast(struct net_device *net)
 		 * for our 8 byte filter buffer
 		 * to avoid allocating memory that
 		 * is tricky to free later */
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 		u32 crc_bits;
 
 		memset(data->multi_filter, 0, AX_MCAST_FILTER_SIZE);
 
 		/* Build the multicast hash filter. */
-		netdev_for_each_mc_addr(mc_list, net) {
-			crc_bits =
-			    ether_crc(ETH_ALEN,
-				      mc_list->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, net) {
+			crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26;
 			data->multi_filter[crc_bits >> 3] |=
 			    1 << (crc_bits & 7);
 		}
@@ -793,16 +791,14 @@ static void ax88172_set_multicast(struct net_device *net)
 		 * for our 8 byte filter buffer
 		 * to avoid allocating memory that
 		 * is tricky to free later */
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 		u32 crc_bits;
 
 		memset(data->multi_filter, 0, AX_MCAST_FILTER_SIZE);
 
 		/* Build the multicast hash filter. */
-		netdev_for_each_mc_addr(mc_list, net) {
-			crc_bits =
-			    ether_crc(ETH_ALEN,
-				      mc_list->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, net) {
+			crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26;
 			data->multi_filter[crc_bits >> 3] |=
 			    1 << (crc_bits & 7);
 		}
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 96f1ebe0d348..0c48ff97f47c 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -629,7 +629,7 @@ static void catc_multicast(unsigned char *addr, u8 *multicast)
 static void catc_set_multicast_list(struct net_device *netdev)
 {
 	struct catc *catc = netdev_priv(netdev);
-	struct dev_mc_list *mc;
+	struct netdev_hw_addr *ha;
 	u8 broadcast[6];
 	u8 rx = RxEnable | RxPolarity | RxMultiCast;
 
@@ -647,8 +647,8 @@ static void catc_set_multicast_list(struct net_device *netdev)
 	if (netdev->flags & IFF_ALLMULTI) {
 		memset(catc->multicast, 0xff, 64);
 	} else {
-		netdev_for_each_mc_addr(mc, netdev) {
-			u32 crc = ether_crc_le(6, mc->dmi_addr);
+		netdev_for_each_mc_addr(ha, netdev) {
+			u32 crc = ether_crc_le(6, ha->addr);
 			if (!catc->is_f5u011) {
 				catc->multicast[(crc >> 3) & 0x3f] |= 1 << (crc & 7);
 			} else {
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 269339769f47..4eb1fb31ff0b 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -386,10 +386,10 @@ static void dm9601_set_multicast(struct net_device *net)
 		   netdev_mc_count(net) > DM_MAX_MCAST) {
 		rx_ctl |= 0x04;
 	} else if (!netdev_mc_empty(net)) {
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 
-		netdev_for_each_mc_addr(mc_list, net) {
-			u32 crc = ether_crc(ETH_ALEN, mc_list->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, net) {
+			u32 crc = ether_crc(ETH_ALEN, ha->addr) >> 26;
 			hashes[crc >> 3] |= 1 << (crc & 0x7);
 		}
 	}
diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c
index 70978219e98a..0dc92c8ba4b2 100644
--- a/drivers/net/usb/mcs7830.c
+++ b/drivers/net/usb/mcs7830.c
@@ -452,12 +452,12 @@ static void mcs7830_data_set_multicast(struct net_device *net)
 		 * for our 8 byte filter buffer
 		 * to avoid allocating memory that
 		 * is tricky to free later */
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 		u32 crc_bits;
 
 		/* Build the multicast hash filter. */
-		netdev_for_each_mc_addr(mc_list, net) {
-			crc_bits = ether_crc(ETH_ALEN, mc_list->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, net) {
+			crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26;
 			data->multi_filter[crc_bits >> 3] |= 1 << (crc_bits & 7);
 		}
 	}
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 300e3e764fa2..b8b00d06ea79 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -444,14 +444,14 @@ static void smsc75xx_set_multicast(struct net_device *netdev)
 		netif_dbg(dev, drv, dev->net, "receive all multicast enabled");
 		pdata->rfe_ctl |= RFE_CTL_AM | RFE_CTL_DPF;
 	} else if (!netdev_mc_empty(dev->net)) {
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 
 		netif_dbg(dev, drv, dev->net, "receive multicast hash filter");
 
 		pdata->rfe_ctl |= RFE_CTL_MHF | RFE_CTL_DPF;
 
-		netdev_for_each_mc_addr(mc_list, netdev) {
-			u32 bitnum = smsc75xx_hash(mc_list->dmi_addr);
+		netdev_for_each_mc_addr(ha, netdev) {
+			u32 bitnum = smsc75xx_hash(ha->addr);
 			pdata->multicast_hash_table[bitnum / 32] |=
 				(1 << (bitnum % 32));
 		}
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 73f9a31cf94d..ccd55ca3a06a 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -384,13 +384,13 @@ static void smsc95xx_set_multicast(struct net_device *netdev)
 		pdata->mac_cr |= MAC_CR_MCPAS_;
 		pdata->mac_cr &= ~(MAC_CR_PRMS_ | MAC_CR_HPFILT_);
 	} else if (!netdev_mc_empty(dev->net)) {
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 
 		pdata->mac_cr |= MAC_CR_HPFILT_;
 		pdata->mac_cr &= ~(MAC_CR_PRMS_ | MAC_CR_MCPAS_);
 
-		netdev_for_each_mc_addr(mc_list, netdev) {
-			u32 bitnum = smsc95xx_hash(mc_list->dmi_addr);
+		netdev_for_each_mc_addr(ha, netdev) {
+			u32 bitnum = smsc95xx_hash(ha->addr);
 			u32 mask = 0x01 << (bitnum & 0x1F);
 			if (bitnum & 0x20)
 				hash_hi |= mask;
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index 50f881aa3939..d9133c62a2f5 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -1704,11 +1704,11 @@ static void rhine_set_rx_mode(struct net_device *dev)
 		iowrite32(0xffffffff, ioaddr + MulticastFilter1);
 		rx_mode = 0x0C;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 		}
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 3a486f3bad3d..91f3b841288c 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1126,7 +1126,7 @@ static void velocity_set_multi(struct net_device *dev)
 	struct mac_regs __iomem *regs = vptr->mac_regs;
 	u8 rx_mode;
 	int i;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 
 	if (dev->flags & IFF_PROMISC) {	/* Set promiscuous. */
 		writel(0xffffffff, &regs->MARCAM[0]);
@@ -1142,8 +1142,8 @@ static void velocity_set_multi(struct net_device *dev)
 		mac_get_cam_mask(regs, vptr->mCAMmask);
 
 		i = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			mac_set_cam(regs, i + offset, mclist->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			mac_set_cam(regs, i + offset, ha->addr);
 			vptr->mCAMmask[(offset + i) / 8] |= 1 << ((offset + i) & 7);
 			i++;
 		}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3f5be35dea25..ecec9a8527f3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -721,7 +721,6 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	struct scatterlist sg[2];
 	u8 promisc, allmulti;
 	struct virtio_net_ctrl_mac *mac_data;
-	struct dev_addr_list *addr;
 	struct netdev_hw_addr *ha;
 	int uc_count;
 	int mc_count;
@@ -778,8 +777,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 
 	mac_data->entries = mc_count;
 	i = 0;
-	netdev_for_each_mc_addr(addr, dev)
-		memcpy(&mac_data->macs[i++][0], addr->da_addr, ETH_ALEN);
+	netdev_for_each_mc_addr(ha, dev)
+		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 
 	sg_set_buf(&sg[1], mac_data,
 		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index cff3485d9673..58dfa367bf86 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1675,11 +1675,11 @@ vmxnet3_copy_mc(struct net_device *netdev)
 		/* We may be called with BH disabled */
 		buf = kmalloc(sz, GFP_ATOMIC);
 		if (buf) {
-			struct dev_mc_list *mc;
+			struct netdev_hw_addr *ha;
 			int i = 0;
 
-			netdev_for_each_mc_addr(mc, netdev)
-				memcpy(buf + i++ * ETH_ALEN, mc->dmi_addr,
+			netdev_for_each_mc_addr(ha, netdev)
+				memcpy(buf + i++ * ETH_ALEN, ha->addr,
 				       ETH_ALEN);
 		}
 	}
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index e5f2d3ee0df3..37836a10d093 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -1117,7 +1117,7 @@ vxge_tx_term(void *dtrh, enum vxge_hw_txdl_state state, void *userdata)
  */
 static void vxge_set_multicast(struct net_device *dev)
 {
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	struct vxgedev *vdev;
 	int i, mcast_cnt = 0;
 	struct __vxge_hw_device  *hldev;
@@ -1217,8 +1217,8 @@ static void vxge_set_multicast(struct net_device *dev)
 		}
 
 		/* Add new ones */
-		netdev_for_each_mc_addr(mclist, dev) {
-			memcpy(mac_info.macaddr, mclist->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, dev) {
+			memcpy(mac_info.macaddr, ha->addr, ETH_ALEN);
 			for (vpath_idx = 0; vpath_idx < vdev->no_of_vpath;
 					vpath_idx++) {
 				mac_info.vpath_no = vpath_idx;
diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 547912e6843f..2277998b7264 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -1317,21 +1317,19 @@ static void adm8211_bss_info_changed(struct ieee80211_hw *dev,
 }
 
 static u64 adm8211_prepare_multicast(struct ieee80211_hw *hw,
-				     int mc_count, struct dev_addr_list *mclist)
+				     struct netdev_hw_addr_list *mc_list)
 {
-	unsigned int bit_nr, i;
+	unsigned int bit_nr;
 	u32 mc_filter[2];
+	struct netdev_hw_addr *ha;
 
 	mc_filter[1] = mc_filter[0] = 0;
 
-	for (i = 0; i < mc_count; i++) {
-		if (!mclist)
-			break;
-		bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+	netdev_hw_addr_list_for_each(ha, mc_list) {
+		bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 
 		bit_nr &= 0x3F;
 		mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
-		mclist = mclist->next;
 	}
 
 	return mc_filter[0] | ((u64)(mc_filter[1]) << 32);
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index 257c734733d1..b43d4b006d7e 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -2045,21 +2045,17 @@ out:
 	return err;
 }
 
-static u64 ar9170_op_prepare_multicast(struct ieee80211_hw *hw, int mc_count,
-				       struct dev_addr_list *mclist)
+static u64 ar9170_op_prepare_multicast(struct ieee80211_hw *hw,
+				       struct netdev_hw_addr_list *mc_list)
 {
 	u64 mchash;
-	int i;
+	struct netdev_hw_addr *ha;
 
 	/* always get broadcast frames */
 	mchash = 1ULL << (0xff >> 2);
 
-	for (i = 0; i < mc_count; i++) {
-		if (WARN_ON(!mclist))
-			break;
-		mchash |= 1ULL << (mclist->dmi_addr[5] >> 2);
-		mclist = mclist->next;
-	}
+	netdev_hw_addr_list_for_each(ha, mc_list)
+		mchash |= 1ULL << (ha->addr[5] >> 2);
 
 	return mchash;
 }
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index b142a78ed1e5..53a2340f52bc 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -230,7 +230,7 @@ static void ath5k_remove_interface(struct ieee80211_hw *hw,
 		struct ieee80211_vif *vif);
 static int ath5k_config(struct ieee80211_hw *hw, u32 changed);
 static u64 ath5k_prepare_multicast(struct ieee80211_hw *hw,
-				   int mc_count, struct dev_addr_list *mc_list);
+				   struct netdev_hw_addr_list *mc_list);
 static void ath5k_configure_filter(struct ieee80211_hw *hw,
 		unsigned int changed_flags,
 		unsigned int *new_flags,
@@ -2999,22 +2999,20 @@ unlock:
 }
 
 static u64 ath5k_prepare_multicast(struct ieee80211_hw *hw,
-				   int mc_count, struct dev_addr_list *mclist)
+				   struct netdev_hw_addr_list *mc_list)
 {
 	u32 mfilt[2], val;
-	int i;
 	u8 pos;
+	struct netdev_hw_addr *ha;
 
 	mfilt[0] = 0;
 	mfilt[1] = 1;
 
-	for (i = 0; i < mc_count; i++) {
-		if (!mclist)
-			break;
+	netdev_hw_addr_list_for_each(ha, mc_list) {
 		/* calculate XOR of eight 6-bit values */
-		val = get_unaligned_le32(mclist->dmi_addr + 0);
+		val = get_unaligned_le32(ha->addr + 0);
 		pos = (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val;
-		val = get_unaligned_le32(mclist->dmi_addr + 3);
+		val = get_unaligned_le32(ha->addr + 3);
 		pos ^= (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val;
 		pos &= 0x3f;
 		mfilt[pos / 32] |= (1 << (pos % 32));
@@ -3022,8 +3020,7 @@ static u64 ath5k_prepare_multicast(struct ieee80211_hw *hw,
 		* but not sure, needs testing, if we do use this we'd
 		* neet to inform below to not reset the mcast */
 		/* ath5k_hw_set_mcast_filterindex(ah,
-		 *      mclist->dmi_addr[5]); */
-		mclist = mclist->next;
+		 *      ha->addr[5]); */
 	}
 
 	return ((u64)(mfilt[1]) << 32) | mfilt[0];
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 3c889f43d909..ff7b0d0cca56 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -318,7 +318,7 @@ static int lbs_add_mcast_addrs(struct cmd_ds_mac_multicast_adr *cmd,
 			       struct net_device *dev, int nr_addrs)
 {
 	int i = nr_addrs;
-	struct dev_mc_list *mc_list;
+	struct netdev_hw_addr *ha;
 	int cnt;
 
 	if ((dev->flags & (IFF_UP|IFF_MULTICAST)) != (IFF_UP|IFF_MULTICAST))
@@ -326,19 +326,19 @@ static int lbs_add_mcast_addrs(struct cmd_ds_mac_multicast_adr *cmd,
 
 	netif_addr_lock_bh(dev);
 	cnt = netdev_mc_count(dev);
-	netdev_for_each_mc_addr(mc_list, dev) {
-		if (mac_in_list(cmd->maclist, nr_addrs, mc_list->dmi_addr)) {
+	netdev_for_each_mc_addr(ha, dev) {
+		if (mac_in_list(cmd->maclist, nr_addrs, ha->addr)) {
 			lbs_deb_net("mcast address %s:%pM skipped\n", dev->name,
-				    mc_list->dmi_addr);
+				    ha->addr);
 			cnt--;
 			continue;
 		}
 
 		if (i == MRVDRV_MAX_MULTICAST_LIST_SIZE)
 			break;
-		memcpy(&cmd->maclist[6*i], mc_list->dmi_addr, ETH_ALEN);
+		memcpy(&cmd->maclist[6*i], ha->addr, ETH_ALEN);
 		lbs_deb_net("mcast address %s:%pM added to filter\n", dev->name,
-			    mc_list->dmi_addr);
+			    ha->addr);
 		i++;
 		cnt--;
 	}
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index 6ab30033c26c..c20eef6b95c2 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -367,22 +367,20 @@ static int lbtf_op_config(struct ieee80211_hw *hw, u32 changed)
 }
 
 static u64 lbtf_op_prepare_multicast(struct ieee80211_hw *hw,
-				     int mc_count, struct dev_addr_list *mclist)
+				     struct netdev_hw_addr_list *mc_list)
 {
 	struct lbtf_private *priv = hw->priv;
 	int i;
+	struct netdev_hw_addr *ha;
+	int mc_count = netdev_hw_addr_list_count(mc_list);
 
 	if (!mc_count || mc_count > MRVDRV_MAX_MULTICAST_LIST_SIZE)
 		return mc_count;
 
 	priv->nr_of_multicastmacaddr = mc_count;
-	for (i = 0; i < mc_count; i++) {
-		if (!mclist)
-			break;
-		memcpy(&priv->multicastlist[i], mclist->da_addr,
-				ETH_ALEN);
-		mclist = mclist->next;
-	}
+	i = 0;
+	netdev_hw_addr_list_for_each(ha, mc_list)
+		memcpy(&priv->multicastlist[i++], ha->addr, ETH_ALEN);
 
 	return mc_count;
 }
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index ac65e13eb0de..6599fd15e675 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -1938,11 +1938,15 @@ struct mwl8k_cmd_mac_multicast_adr {
 
 static struct mwl8k_cmd_pkt *
 __mwl8k_cmd_mac_multicast_adr(struct ieee80211_hw *hw, int allmulti,
-			      int mc_count, struct dev_addr_list *mclist)
+			      struct netdev_hw_addr_list *mc_list)
 {
 	struct mwl8k_priv *priv = hw->priv;
 	struct mwl8k_cmd_mac_multicast_adr *cmd;
 	int size;
+	int mc_count = 0;
+
+	if (mc_list)
+		mc_count = netdev_hw_addr_list_count(mc_list);
 
 	if (allmulti || mc_count > priv->num_mcaddrs) {
 		allmulti = 1;
@@ -1963,17 +1967,13 @@ __mwl8k_cmd_mac_multicast_adr(struct ieee80211_hw *hw, int allmulti,
 	if (allmulti) {
 		cmd->action |= cpu_to_le16(MWL8K_ENABLE_RX_ALL_MULTICAST);
 	} else if (mc_count) {
-		int i;
+		struct netdev_hw_addr *ha;
+		int i = 0;
 
 		cmd->action |= cpu_to_le16(MWL8K_ENABLE_RX_MULTICAST);
 		cmd->numaddr = cpu_to_le16(mc_count);
-		for (i = 0; i < mc_count && mclist; i++) {
-			if (mclist->da_addrlen != ETH_ALEN) {
-				kfree(cmd);
-				return NULL;
-			}
-			memcpy(cmd->addr[i], mclist->da_addr, ETH_ALEN);
-			mclist = mclist->next;
+		netdev_hw_addr_list_for_each(ha, mc_list) {
+			memcpy(cmd->addr[i], ha->addr, ETH_ALEN);
 		}
 	}
 
@@ -3552,7 +3552,7 @@ mwl8k_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 }
 
 static u64 mwl8k_prepare_multicast(struct ieee80211_hw *hw,
-				   int mc_count, struct dev_addr_list *mclist)
+				   struct netdev_hw_addr_list *mc_list)
 {
 	struct mwl8k_cmd_pkt *cmd;
 
@@ -3563,7 +3563,7 @@ static u64 mwl8k_prepare_multicast(struct ieee80211_hw *hw,
 	 * we'll end up throwing this packet away and creating a new
 	 * one in mwl8k_configure_filter().
 	 */
-	cmd = __mwl8k_cmd_mac_multicast_adr(hw, 0, mc_count, mclist);
+	cmd = __mwl8k_cmd_mac_multicast_adr(hw, 0, mc_list);
 
 	return (unsigned long)cmd;
 }
@@ -3686,7 +3686,7 @@ static void mwl8k_configure_filter(struct ieee80211_hw *hw,
 	 */
 	if (*total_flags & FIF_ALLMULTI) {
 		kfree(cmd);
-		cmd = __mwl8k_cmd_mac_multicast_adr(hw, 1, 0, NULL);
+		cmd = __mwl8k_cmd_mac_multicast_adr(hw, 1, NULL);
 	}
 
 	if (cmd != NULL) {
diff --git a/drivers/net/wireless/orinoco/hw.c b/drivers/net/wireless/orinoco/hw.c
index 883b8f868626..9f657afaa3e5 100644
--- a/drivers/net/wireless/orinoco/hw.c
+++ b/drivers/net/wireless/orinoco/hw.c
@@ -1056,14 +1056,14 @@ int __orinoco_hw_set_multicast_list(struct orinoco_private *priv,
 	 * group address if either we want to multicast, or if we were
 	 * multicasting and want to stop */
 	if (!promisc && (mc_count || priv->mc_count)) {
-		struct dev_mc_list *p;
+		struct netdev_hw_addr *ha;
 		struct hermes_multicast mclist;
 		int i = 0;
 
-		netdev_for_each_mc_addr(p, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (i == mc_count)
 				break;
-			memcpy(mclist.addr[i++], p->dmi_addr, ETH_ALEN);
+			memcpy(mclist.addr[i++], ha->addr, ETH_ALEN);
 		}
 
 		err = hermes_write_ltv(hw, USER_BAP,
diff --git a/drivers/net/wireless/orinoco/hw.h b/drivers/net/wireless/orinoco/hw.h
index 9799a1d14a63..97af71e79950 100644
--- a/drivers/net/wireless/orinoco/hw.h
+++ b/drivers/net/wireless/orinoco/hw.h
@@ -22,7 +22,6 @@
 
 /* Forward declarations */
 struct orinoco_private;
-struct dev_addr_list;
 
 int determine_fw_capabilities(struct orinoco_private *priv, char *fw_name,
 			      size_t fw_name_len, u32 *hw_ver);
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 4f5bdb528ef7..54680a3a5acb 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -1875,17 +1875,17 @@ static void ray_update_multi_list(struct net_device *dev, int all)
 		writeb(0xff, &pccs->var);
 		local->num_multi = 0xff;
 	} else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int i = 0;
 
 		/* Copy the kernel's list of MC addresses to card */
-		netdev_for_each_mc_addr(dmi, dev) {
-			memcpy_toio(p, dmi->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, dev) {
+			memcpy_toio(p, ha->addr, ETH_ALEN);
 			dev_dbg(&link->dev,
 			      "ray_update_multi add addr %02x%02x%02x%02x%02x%02x\n",
-			      dmi->dmi_addr[0], dmi->dmi_addr[1],
-			      dmi->dmi_addr[2], dmi->dmi_addr[3],
-			      dmi->dmi_addr[4], dmi->dmi_addr[5]);
+			      ha->addr[0], ha->addr[1],
+			      ha->addr[2], ha->addr[3],
+			      ha->addr[4], ha->addr[5]);
 			p += ETH_ALEN;
 			i++;
 		}
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index aceb95ef7274..8ab9f094747b 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -1545,7 +1545,7 @@ static int remove_key(struct usbnet *usbdev, int index, const u8 *bssid)
 static void set_multicast_list(struct usbnet *usbdev)
 {
 	struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	__le32 filter, basefilter;
 	int ret;
 	char *mc_addrs = NULL;
@@ -1584,9 +1584,9 @@ static void set_multicast_list(struct usbnet *usbdev)
 			return;
 		}
 
-		netdev_for_each_mc_addr(mclist, usbdev->net)
+		netdev_for_each_mc_addr(ha, usbdev->net)
 			memcpy(mc_addrs + i++ * ETH_ALEN,
-			       mclist->dmi_addr, ETH_ALEN);
+			       ha->addr, ETH_ALEN);
 	}
 	netif_addr_unlock_bh(usbdev->net);
 
diff --git a/drivers/net/wireless/rtl818x/rtl8180_dev.c b/drivers/net/wireless/rtl818x/rtl8180_dev.c
index 2b928ecf47bd..fb8a62f2b9b4 100644
--- a/drivers/net/wireless/rtl818x/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180_dev.c
@@ -723,10 +723,10 @@ static void rtl8180_bss_info_changed(struct ieee80211_hw *dev,
 	        priv->rf->conf_erp(dev, info);
 }
 
-static u64 rtl8180_prepare_multicast(struct ieee80211_hw *dev, int mc_count,
-				     struct dev_addr_list *mc_list)
+static u64 rtl8180_prepare_multicast(struct ieee80211_hw *dev,
+				     struct netdev_hw_addr_list *mc_list)
 {
-	return mc_count;
+	return netdev_hw_addr_list_count(mc_list);
 }
 
 static void rtl8180_configure_filter(struct ieee80211_hw *dev,
diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c
index 0fb850e0c656..441d817ed489 100644
--- a/drivers/net/wireless/rtl818x/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c
@@ -1193,9 +1193,9 @@ static void rtl8187_bss_info_changed(struct ieee80211_hw *dev,
 }
 
 static u64 rtl8187_prepare_multicast(struct ieee80211_hw *dev,
-				     int mc_count, struct dev_addr_list *mc_list)
+				     struct netdev_hw_addr_list *mc_list)
 {
-	return mc_count;
+	return netdev_hw_addr_list_count(mc_list);
 }
 
 static void rtl8187_configure_filter(struct ieee80211_hw *dev,
diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c
index 3daba6c0c77f..6036d0206fec 100644
--- a/drivers/net/wireless/wl12xx/wl1271_main.c
+++ b/drivers/net/wireless/wl12xx/wl1271_main.c
@@ -1266,11 +1266,11 @@ struct wl1271_filter_params {
 	u8 mc_list[ACX_MC_ADDRESS_GROUP_MAX][ETH_ALEN];
 };
 
-static u64 wl1271_op_prepare_multicast(struct ieee80211_hw *hw, int mc_count,
-				       struct dev_addr_list *mc_list)
+static u64 wl1271_op_prepare_multicast(struct ieee80211_hw *hw,
+				       struct netdev_hw_addr_list *mc_list)
 {
 	struct wl1271_filter_params *fp;
-	int i;
+	struct netdev_hw_addr *ha;
 
 	fp = kzalloc(sizeof(*fp), GFP_ATOMIC);
 	if (!fp) {
@@ -1279,21 +1279,16 @@ static u64 wl1271_op_prepare_multicast(struct ieee80211_hw *hw, int mc_count,
 	}
 
 	/* update multicast filtering parameters */
-	fp->enabled = true;
-	if (mc_count > ACX_MC_ADDRESS_GROUP_MAX) {
-		mc_count = 0;
-		fp->enabled = false;
-	}
-
 	fp->mc_list_length = 0;
-	for (i = 0; i < mc_count; i++) {
-		if (mc_list->da_addrlen == ETH_ALEN) {
+	if (netdev_hw_addr_list_count(mc_list) > ACX_MC_ADDRESS_GROUP_MAX) {
+		fp->enabled = false;
+	} else {
+		fp->enabled = true;
+		netdev_hw_addr_list_for_each(ha, mc_list) {
 			memcpy(fp->mc_list[fp->mc_list_length],
-			       mc_list->da_addr, ETH_ALEN);
+					ha->addr, ETH_ALEN);
 			fp->mc_list_length++;
-		} else
-			wl1271_warning("Unknown mc address length.");
-		mc_list = mc_list->next;
+		}
 	}
 
 	return (u64)(unsigned long)fp;
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 6917286edcae..74530b2d672c 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -875,7 +875,7 @@ static struct iw_statistics *zd1201_get_wireless_stats(struct net_device *dev)
 static void zd1201_set_multicast(struct net_device *dev)
 {
 	struct zd1201 *zd = netdev_priv(dev);
-	struct dev_mc_list *mc;
+	struct netdev_hw_addr *ha;
 	unsigned char reqbuf[ETH_ALEN*ZD1201_MAXMULTI];
 	int i;
 
@@ -883,8 +883,8 @@ static void zd1201_set_multicast(struct net_device *dev)
 		return;
 
 	i = 0;
-	netdev_for_each_mc_addr(mc, dev)
-		memcpy(reqbuf + i++ * ETH_ALEN, mc->dmi_addr, ETH_ALEN);
+	netdev_for_each_mc_addr(ha, dev)
+		memcpy(reqbuf + i++ * ETH_ALEN, ha->addr, ETH_ALEN);
 	zd1201_setconfig(zd, ZD1201_RID_CNFGROUPADDRESS, reqbuf,
 			 netdev_mc_count(dev) * ETH_ALEN, 0);
 }
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 00e09e26c826..6d95e4d74d7f 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -947,20 +947,17 @@ static void set_rx_filter_handler(struct work_struct *work)
 }
 
 static u64 zd_op_prepare_multicast(struct ieee80211_hw *hw,
-				   int mc_count, struct dev_addr_list *mclist)
+				   struct netdev_hw_addr_list *mc_list)
 {
 	struct zd_mac *mac = zd_hw_mac(hw);
 	struct zd_mc_hash hash;
-	int i;
+	struct netdev_hw_addr *ha;
 
 	zd_mc_clear(&hash);
 
-	for (i = 0; i < mc_count; i++) {
-		if (!mclist)
-			break;
-		dev_dbg_f(zd_mac_dev(mac), "mc addr %pM\n", mclist->dmi_addr);
-		zd_mc_add_addr(&hash, mclist->dmi_addr);
-		mclist = mclist->next;
+	netdev_hw_addr_list_for_each(ha, mc_list) {
+		dev_dbg_f(zd_mac_dev(mac), "mc addr %pM\n", ha->addr);
+		zd_mc_add_addr(&hash, ha->addr);
 	}
 
 	return hash.low | ((u64)hash.high << 32);
diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c
index 7d4107f5eeb0..34c91cf5d839 100644
--- a/drivers/net/yellowfin.c
+++ b/drivers/net/yellowfin.c
@@ -1300,25 +1300,25 @@ static void set_rx_mode(struct net_device *dev)
 		/* Too many to filter well, or accept all multicasts. */
 		iowrite16(0x000B, ioaddr + AddrMode);
 	} else if (!netdev_mc_empty(dev)) { /* Must use the multicast hash table. */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		u16 hash_table[4];
 		int i;
 
 		memset(hash_table, 0, sizeof(hash_table));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			unsigned int bit;
 
 			/* Due to a bug in the early chip versions, multiple filter
 			   slots must be set for each address. */
 			if (yp->drv_flags & HasMulticastBug) {
-				bit = (ether_crc_le(3, mclist->dmi_addr) >> 3) & 0x3f;
+				bit = (ether_crc_le(3, ha->addr) >> 3) & 0x3f;
 				hash_table[bit >> 4] |= (1 << bit);
-				bit = (ether_crc_le(4, mclist->dmi_addr) >> 3) & 0x3f;
+				bit = (ether_crc_le(4, ha->addr) >> 3) & 0x3f;
 				hash_table[bit >> 4] |= (1 << bit);
-				bit = (ether_crc_le(5, mclist->dmi_addr) >> 3) & 0x3f;
+				bit = (ether_crc_le(5, ha->addr) >> 3) & 0x3f;
 				hash_table[bit >> 4] |= (1 << bit);
 			}
-			bit = (ether_crc_le(6, mclist->dmi_addr) >> 3) & 0x3f;
+			bit = (ether_crc_le(6, ha->addr) >> 3) & 0x3f;
 			hash_table[bit >> 4] |= (1 << bit);
 		}
 		/* Copy the hash table to the chip. */
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 7576ad5a833a..945f3e0a9f06 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -607,7 +607,6 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p)
 static void qeth_l2_set_multicast_list(struct net_device *dev)
 {
 	struct qeth_card *card = dev->ml_priv;
-	struct dev_addr_list *dm;
 	struct netdev_hw_addr *ha;
 
 	if (card->info.type == QETH_CARD_TYPE_OSN)
@@ -619,8 +618,8 @@ static void qeth_l2_set_multicast_list(struct net_device *dev)
 		return;
 	qeth_l2_del_all_mc(card);
 	spin_lock_bh(&card->mclock);
-	netdev_for_each_mc_addr(dm, dev)
-		qeth_l2_add_mc(card, dm->da_addr, 0);
+	netdev_for_each_mc_addr(ha, dev)
+		qeth_l2_add_mc(card, ha->addr, 0);
 
 	netdev_for_each_uc_addr(ha, dev)
 		qeth_l2_add_mc(card, ha->addr, 1);
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index de33e38a4059..37de40e01684 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -311,7 +311,7 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe,
 	dev_uc_add(netdev, flogi_maddr);
 	if (fip->spma)
 		dev_uc_add(netdev, fip->ctl_src_addr);
-	dev_mc_add(netdev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
+	dev_mc_add(netdev, FIP_ALL_ENODE_MACS);
 
 	/*
 	 * setup the receive function from ethernet driver
@@ -397,7 +397,7 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
 	dev_uc_del(netdev, flogi_maddr);
 	if (fip->spma)
 		dev_uc_del(netdev, fip->ctl_src_addr);
-	dev_mc_delete(netdev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
+	dev_mc_del(netdev, FIP_ALL_ENODE_MACS);
 
 	/* Tell the LLD we are done w/ FCoE */
 	ops = netdev->netdev_ops;
diff --git a/drivers/staging/arlan/arlan-main.c b/drivers/staging/arlan/arlan-main.c
index 88fdd53cf5d3..80284522c42b 100644
--- a/drivers/staging/arlan/arlan-main.c
+++ b/drivers/staging/arlan/arlan-main.c
@@ -1458,7 +1458,7 @@ static void arlan_rx_interrupt(struct net_device *dev, u_char rxStatus, u_short
 				!netdev_mc_empty(dev))
 			{
 				char hw_dst_addr[6];
-				struct dev_mc_list *dmi;
+				struct netdev_hw_addr *ha;
 				int i;
 
 				memcpy_fromio(hw_dst_addr, arlan->ultimateDestAddress, 6);
@@ -1469,12 +1469,13 @@ static void arlan_rx_interrupt(struct net_device *dev, u_char rxStatus, u_short
 							printk(KERN_ERR "%s mcast 0x0100 \n", dev->name);
 						else if (hw_dst_addr[1] == 0x40)
 							printk(KERN_ERR "%s m/bcast 0x0140 \n", dev->name);
-					netdev_for_each_mc_entry(dmi, dev) {
+					netdev_for_each_mc_entry(ha, dev) {
 						if (arlan_debug & ARLAN_DEBUG_HEADER_DUMP)
 							printk(KERN_ERR "%s mcl %pM\n",
-							       dev->name, dmi->dmi_addr);
+							       dev->name,
+							       ha->addr);
 						for (i = 0; i < 6; i++)
-							if (dmi->dmi_addr[i] != hw_dst_addr[i])
+							if (ha->addr[i] != hw_dst_addr[i])
 								break;
 						if (i == 6)
 							break;
diff --git a/drivers/staging/et131x/et131x_netdev.c b/drivers/staging/et131x/et131x_netdev.c
index 40f8954dde47..2fb89cddef16 100644
--- a/drivers/staging/et131x/et131x_netdev.c
+++ b/drivers/staging/et131x/et131x_netdev.c
@@ -405,7 +405,7 @@ void et131x_multicast(struct net_device *netdev)
 	struct et131x_adapter *adapter = netdev_priv(netdev);
 	uint32_t PacketFilter = 0;
 	unsigned long flags;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	int i;
 
 	spin_lock_irqsave(&adapter->Lock, flags);
@@ -450,10 +450,10 @@ void et131x_multicast(struct net_device *netdev)
 
 	/* Set values in the private adapter struct */
 	i = 0;
-	netdev_for_each_mc_addr(mclist, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == NIC_MAX_MCAST_LIST)
 			break;
-		memcpy(adapter->MCList[i++], mclist->dmi_addr, ETH_ALEN);
+		memcpy(adapter->MCList[i++], ha->addr, ETH_ALEN);
 	}
 	adapter->MCAddressCount = i;
 
diff --git a/drivers/staging/slicoss/slicoss.c b/drivers/staging/slicoss/slicoss.c
index 7daeced317c4..bebf0fd2af85 100644
--- a/drivers/staging/slicoss/slicoss.c
+++ b/drivers/staging/slicoss/slicoss.c
@@ -1367,12 +1367,12 @@ static void slic_mcast_set_list(struct net_device *dev)
 	struct adapter *adapter = netdev_priv(dev);
 	int status = STATUS_SUCCESS;
 	char *addresses;
-	struct dev_mc_list *mc_list;
+	struct netdev_hw_addr *ha;
 
 	ASSERT(adapter);
 
-	netdev_for_each_mc_addr(mc_list, dev) {
-		addresses = (char *) &mc_list->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addresses = (char *) &ha->addr;
 		status = slic_mcast_add_list(adapter, addresses);
 		if (status != STATUS_SUCCESS)
 			break;
diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
index 1d643653a7ed..b698de40a60e 100644
--- a/drivers/staging/vt6655/device_main.c
+++ b/drivers/staging/vt6655/device_main.c
@@ -3079,7 +3079,7 @@ static void device_set_multi(struct net_device *dev) {
 
     PSMgmtObject     pMgmt = pDevice->pMgmt;
     u32              mc_filter[2];
-    struct dev_mc_list *mclist;
+    struct netdev_hw_addr *ha;
 
 
     VNSvInPortB(pDevice->PortOffset + MAC_REG_RCR, &(pDevice->byRxMode));
@@ -3099,8 +3099,8 @@ static void device_set_multi(struct net_device *dev) {
     }
     else {
         memset(mc_filter, 0, sizeof(mc_filter));
-	netdev_for_each_mc_addr(mclist, dev) {
-            int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+	netdev_for_each_mc_addr(ha, dev) {
+            int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
             mc_filter[bit_nr >> 5] |= cpu_to_le32(1 << (bit_nr & 31));
         }
         MACvSelectPage1(pDevice->PortOffset);
diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c
index a8e1adbc9592..49270db98fbb 100644
--- a/drivers/staging/vt6656/main_usb.c
+++ b/drivers/staging/vt6656/main_usb.c
@@ -1596,7 +1596,7 @@ static void device_set_multi(struct net_device *dev) {
     PSMgmtObject     pMgmt = &(pDevice->sMgmtObj);
     u32              mc_filter[2];
     int              ii;
-    struct dev_mc_list *mclist;
+    struct netdev_hw_addr *ha;
     BYTE             pbyData[8] = {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff};
     BYTE             byTmpMode = 0;
     int              rc;
@@ -1632,8 +1632,8 @@ static void device_set_multi(struct net_device *dev) {
     }
     else {
         memset(mc_filter, 0, sizeof(mc_filter));
-	netdev_for_each_mc_addr(mclist, dev) {
-            int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+	netdev_for_each_mc_addr(ha, dev) {
+            int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
             mc_filter[bit_nr >> 5] |= cpu_to_le32(1 << (bit_nr & 31));
         }
         for (ii = 0; ii < 4; ii++) {
diff --git a/drivers/staging/wavelan/wavelan.c b/drivers/staging/wavelan/wavelan.c
index 54ca63196fdd..f44ef351647b 100644
--- a/drivers/staging/wavelan/wavelan.c
+++ b/drivers/staging/wavelan/wavelan.c
@@ -3419,7 +3419,7 @@ static void wv_82586_config(struct net_device * dev)
 	ac_cfg_t cfg;		/* Configure action */
 	ac_ias_t ias;		/* IA-setup action */
 	ac_mcs_t mcs;		/* Multicast setup */
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
 #ifdef DEBUG_CONFIG_TRACE
 	printk(KERN_DEBUG "%s: ->wv_82586_config()\n", dev->name);
@@ -3531,16 +3531,16 @@ static void wv_82586_config(struct net_device * dev)
 
 	/* Any address to set? */
 	if (lp->mc_count) {
-		netdev_for_each_mc_addr(dmi, dev)
-			outsw(PIOP1(ioaddr), (u16 *) dmi->dmi_addr,
+		netdev_for_each_mc_addr(ha, dev)
+			outsw(PIOP1(ioaddr), (u16 *) ha->addr,
 			      WAVELAN_ADDR_SIZE >> 1);
 
 #ifdef DEBUG_CONFIG_INFO
 		printk(KERN_DEBUG
 		       "%s: wv_82586_config(): set %d multicast addresses:\n",
 		       dev->name, lp->mc_count);
-		netdev_for_each_mc_addr(dmi, dev)
-			printk(KERN_DEBUG " %pM\n", dmi->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			printk(KERN_DEBUG " %pM\n", ha->addr);
 #endif
 	}
 
diff --git a/drivers/staging/wavelan/wavelan_cs.c b/drivers/staging/wavelan/wavelan_cs.c
index 04f691d127b4..a90132a204e6 100644
--- a/drivers/staging/wavelan/wavelan_cs.c
+++ b/drivers/staging/wavelan/wavelan_cs.c
@@ -3591,20 +3591,20 @@ wv_82593_config(struct net_device *	dev)
     /* If roaming is enabled, join the "Beacon Request" multicast group... */
     /* But only if it's not in there already! */
   if(do_roaming)
-    dev_mc_add(dev,WAVELAN_BEACON_ADDRESS, WAVELAN_ADDR_SIZE, 1);
+    dev_mc_add(dev, WAVELAN_BEACON_ADDRESS);
 #endif	/* WAVELAN_ROAMING */
 
   /* If any multicast address to set */
   if(lp->mc_count)
     {
-      struct dev_mc_list *dmi;
+      struct netdev_hw_addr *ha;
       int			addrs_len = WAVELAN_ADDR_SIZE * lp->mc_count;
 
 #ifdef DEBUG_CONFIG_INFO
       printk(KERN_DEBUG "%s: wv_hw_config(): set %d multicast addresses:\n",
 	     dev->name, lp->mc_count);
-      netdev_for_each_mc_addr(dmi, dev)
-	printk(KERN_DEBUG " %pM\n", dmi->dmi_addr);
+      netdev_for_each_mc_addr(ha, dev)
+	printk(KERN_DEBUG " %pM\n", ha->addr);
 #endif
 
       /* Initialize adapter's ethernet multicast addresses */
@@ -3612,8 +3612,8 @@ wv_82593_config(struct net_device *	dev)
       outb(((TX_BASE >> 8) & PIORH_MASK) | PIORH_SEL_TX, PIORH(base));
       outb(addrs_len & 0xff, PIOP(base));	/* byte count lsb */
       outb((addrs_len >> 8), PIOP(base));	/* byte count msb */
-      netdev_for_each_mc_addr(dmi, dev)
-	outsb(PIOP(base), dmi->dmi_addr, dmi->dmi_addrlen);
+      netdev_for_each_mc_addr(ha, dev)
+	outsb(PIOP(base), ha->addr, dev->addr_len);
 
       /* reset transmit DMA pointer */
       hacr_write_slow(base, HACR_PWR_STAT | HACR_TX_DMA_RESET);
diff --git a/drivers/staging/winbond/wbusb.c b/drivers/staging/winbond/wbusb.c
index 3482eec18651..5d9499bba9cc 100644
--- a/drivers/staging/winbond/wbusb.c
+++ b/drivers/staging/winbond/wbusb.c
@@ -92,10 +92,10 @@ static int wbsoft_get_stats(struct ieee80211_hw *hw,
 	return 0;
 }
 
-static u64 wbsoft_prepare_multicast(struct ieee80211_hw *hw, int mc_count,
-				    struct dev_addr_list *mc_list)
+static u64 wbsoft_prepare_multicast(struct ieee80211_hw *hw,
+				    struct netdev_hw_addr_list *mc_list)
 {
-	return mc_count;
+	return netdev_hw_addr_list_count(mc_list);
 }
 
 static void wbsoft_configure_filter(struct ieee80211_hw *dev,
diff --git a/drivers/staging/wlags49_h2/wl_netdev.c b/drivers/staging/wlags49_h2/wl_netdev.c
index fa082d90fcad..d7532e89f5fc 100644
--- a/drivers/staging/wlags49_h2/wl_netdev.c
+++ b/drivers/staging/wlags49_h2/wl_netdev.c
@@ -1049,7 +1049,7 @@ void wl_multicast( struct net_device *dev )
 //;?seems reasonable that even an AP-only driver could afford this small additional footprint
 
     int                 x;
-    struct dev_mc_list *mclist;
+    struct netdev_hw_addr *ha;
     struct wl_private   *lp = wl_priv(dev);
     unsigned long       flags;
     /*------------------------------------------------------------------------*/
@@ -1072,9 +1072,9 @@ void wl_multicast( struct net_device *dev )
 
         DBG_PRINT( "  mc_count: %d\n", netdev_mc_count(dev));
 
-	netdev_for_each_mc_addr(mclist, dev)
-            DBG_PRINT( "    %s (%d)\n", DbgHwAddr(mclist->dmi_addr),
-                       mclist->dmi_addrlen );
+	netdev_for_each_mc_addr(ha, dev)
+            DBG_PRINT("    %s (%d)\n", DbgHwAddr(ha->addr),
+		      dev->addr_len);
     }
 #endif /* DBG */
 
@@ -1119,9 +1119,9 @@ void wl_multicast( struct net_device *dev )
                 lp->ltvRecord.typ = CFG_GROUP_ADDR;
 
 		x = 0;
-		netdev_for_each_mc_addr(mclist, dev)
+		netdev_for_each_mc_addr(ha, dev)
                     memcpy(&(lp->ltvRecord.u.u8[x++ * ETH_ALEN]),
-                           mclist->dmi_addr, ETH_ALEN);
+			   ha->addr, ETH_ALEN);
                 DBG_PRINT( "Setting multicast list\n" );
                 hcf_put_info( &( lp->hcfCtx ), (LTVP)&( lp->ltvRecord ));
             } else {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 60f0c83192fe..a343a21ba8b9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -228,25 +228,6 @@ struct netif_rx_stats {
 
 DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat);
 
-struct dev_addr_list {
-	struct dev_addr_list	*next;
-	u8			da_addr[MAX_ADDR_LEN];
-	u8			da_addrlen;
-	u8			da_synced;
-	int			da_users;
-	int			da_gusers;
-};
-
-/*
- *	We tag multicasts with these structures.
- */
-
-#define dev_mc_list	dev_addr_list
-#define dmi_addr	da_addr
-#define dmi_addrlen	da_addrlen
-#define dmi_users	da_users
-#define dmi_gusers	da_gusers
-
 struct netdev_hw_addr {
 	struct list_head	list;
 	unsigned char		addr[MAX_ADDR_LEN];
@@ -255,8 +236,10 @@ struct netdev_hw_addr {
 #define NETDEV_HW_ADDR_T_SAN		2
 #define NETDEV_HW_ADDR_T_SLAVE		3
 #define NETDEV_HW_ADDR_T_UNICAST	4
+#define NETDEV_HW_ADDR_T_MULTICAST	5
 	int			refcount;
 	bool			synced;
+	bool			global_use;
 	struct rcu_head		rcu_head;
 };
 
@@ -265,16 +248,20 @@ struct netdev_hw_addr_list {
 	int			count;
 };
 
-#define netdev_uc_count(dev) ((dev)->uc.count)
-#define netdev_uc_empty(dev) ((dev)->uc.count == 0)
-#define netdev_for_each_uc_addr(ha, dev) \
-	list_for_each_entry(ha, &dev->uc.list, list)
+#define netdev_hw_addr_list_count(l) ((l)->count)
+#define netdev_hw_addr_list_empty(l) (netdev_hw_addr_list_count(l) == 0)
+#define netdev_hw_addr_list_for_each(ha, l) \
+	list_for_each_entry(ha, &(l)->list, list)
 
-#define netdev_mc_count(dev) ((dev)->mc_count)
-#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0)
+#define netdev_uc_count(dev) netdev_hw_addr_list_count(&(dev)->uc)
+#define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc)
+#define netdev_for_each_uc_addr(ha, dev) \
+	netdev_hw_addr_list_for_each(ha, &(dev)->uc)
 
+#define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc)
+#define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc)
 #define netdev_for_each_mc_addr(mclist, dev) \
-	for (mclist = dev->mc_list; mclist; mclist = mclist->next)
+	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
@@ -862,12 +849,10 @@ struct net_device {
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
 
-	struct netdev_hw_addr_list	uc;	/* Secondary unicast
-						   mac addresses */
-	int			uc_promisc;
 	spinlock_t		addr_list_lock;
-	struct dev_addr_list	*mc_list;	/* Multicast mac addresses	*/
-	int			mc_count;	/* Number of installed mcasts	*/
+	struct netdev_hw_addr_list	uc;	/* Unicast mac addresses */
+	struct netdev_hw_addr_list	mc;	/* Multicast mac addresses */
+	int			uc_promisc;
 	unsigned int		promiscuity;
 	unsigned int		allmulti;
 
@@ -1980,6 +1965,22 @@ extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 extern int		register_netdev(struct net_device *dev);
 extern void		unregister_netdev(struct net_device *dev);
 
+/* General hardware address lists handling functions */
+extern int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+				  struct netdev_hw_addr_list *from_list,
+				  int addr_len, unsigned char addr_type);
+extern void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+				   struct netdev_hw_addr_list *from_list,
+				   int addr_len, unsigned char addr_type);
+extern int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+			  struct netdev_hw_addr_list *from_list,
+			  int addr_len);
+extern void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+			     struct netdev_hw_addr_list *from_list,
+			     int addr_len);
+extern void __hw_addr_flush(struct netdev_hw_addr_list *list);
+extern void __hw_addr_init(struct netdev_hw_addr_list *list);
+
 /* Functions used for device addresses handling */
 extern int dev_addr_add(struct net_device *dev, unsigned char *addr,
 			unsigned char addr_type);
@@ -2002,18 +2003,19 @@ extern void dev_uc_unsync(struct net_device *to, struct net_device *from);
 extern void dev_uc_flush(struct net_device *dev);
 extern void dev_uc_init(struct net_device *dev);
 
+/* Functions used for multicast addresses handling */
+extern int dev_mc_add(struct net_device *dev, unsigned char *addr);
+extern int dev_mc_add_global(struct net_device *dev, unsigned char *addr);
+extern int dev_mc_del(struct net_device *dev, unsigned char *addr);
+extern int dev_mc_del_global(struct net_device *dev, unsigned char *addr);
+extern int dev_mc_sync(struct net_device *to, struct net_device *from);
+extern void dev_mc_unsync(struct net_device *to, struct net_device *from);
+extern void dev_mc_flush(struct net_device *dev);
+extern void dev_mc_init(struct net_device *dev);
+
 /* Functions used for secondary unicast and multicast support */
 extern void		dev_set_rx_mode(struct net_device *dev);
 extern void		__dev_set_rx_mode(struct net_device *dev);
-extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
-extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
-extern int		dev_mc_sync(struct net_device *to, struct net_device *from);
-extern void		dev_mc_unsync(struct net_device *to, struct net_device *from);
-extern void		dev_addr_discard(struct net_device *dev);
-extern int 		__dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all);
-extern int		__dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
-extern int		__dev_addr_sync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count);
-extern void		__dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count);
 extern int		dev_set_promiscuity(struct net_device *dev, int inc);
 extern int		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1a8f50af49a0..20823d04e03c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1626,7 +1626,7 @@ struct ieee80211_ops {
 				 struct ieee80211_bss_conf *info,
 				 u32 changed);
 	u64 (*prepare_multicast)(struct ieee80211_hw *hw,
-				 int mc_count, struct dev_addr_list *mc_list);
+				 struct netdev_hw_addr_list *mc_list);
 	void (*configure_filter)(struct ieee80211_hw *hw,
 				 unsigned int changed_flags,
 				 unsigned int *total_flags,
diff --git a/net/802/garp.c b/net/802/garp.c
index 1dcb0660c49d..78cff9ec2cb4 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -575,7 +575,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
 	if (!app)
 		goto err2;
 
-	err = dev_mc_add(dev, appl->proto.group_address, ETH_ALEN, 0);
+	err = dev_mc_add(dev, appl->proto.group_address);
 	if (err < 0)
 		goto err3;
 
@@ -615,7 +615,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
 	garp_pdu_queue(app);
 	garp_queue_xmit(app);
 
-	dev_mc_delete(dev, appl->proto.group_address, ETH_ALEN, 0);
+	dev_mc_del(dev, appl->proto.group_address);
 	kfree(app);
 	garp_release_port(dev);
 }
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9fc4da56fb1d..1d15a60b23af 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -781,7 +781,7 @@ static int atif_ioctl(int cmd, void __user *arg)
 						atrtr_create(&rtdef, dev);
 					}
 			}
-			dev_mc_add(dev, aarp_mcast, 6, 1);
+			dev_mc_add_global(dev, aarp_mcast);
 			return 0;
 
 		case SIOCGIFADDR:
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 326ab453edb7..260a9507e542 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -87,7 +87,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 		memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
 		r->len = htons(ETH_ALEN * 2);
 	} else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int i, len = skb->len;
 
 		if (dev->flags & IFF_BROADCAST) {
@@ -98,11 +98,11 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 		/* FIXME: We should group addresses here. */
 
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (i == BNEP_MAX_MULTICAST_FILTERS)
 				break;
-			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
-			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
+			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
+			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
 		}
 		r->len = htons(skb->len - len);
 	}
diff --git a/net/core/Makefile b/net/core/Makefile
index 0a899f1aadb9..51c3eec850ef 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,9 +7,8 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
-obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
-			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			dev_addr_lists.o
+obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 949c62dba719..2a9b7dd0bb6e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3968,140 +3968,6 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
-/* multicast addresses handling functions */
-
-int __dev_addr_delete(struct dev_addr_list **list, int *count,
-		      void *addr, int alen, int glbl)
-{
-	struct dev_addr_list *da;
-
-	for (; (da = *list) != NULL; list = &da->next) {
-		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-		    alen == da->da_addrlen) {
-			if (glbl) {
-				int old_glbl = da->da_gusers;
-				da->da_gusers = 0;
-				if (old_glbl == 0)
-					break;
-			}
-			if (--da->da_users)
-				return 0;
-
-			*list = da->next;
-			kfree(da);
-			(*count)--;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-int __dev_addr_add(struct dev_addr_list **list, int *count,
-		   void *addr, int alen, int glbl)
-{
-	struct dev_addr_list *da;
-
-	for (da = *list; da != NULL; da = da->next) {
-		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-		    da->da_addrlen == alen) {
-			if (glbl) {
-				int old_glbl = da->da_gusers;
-				da->da_gusers = 1;
-				if (old_glbl)
-					return 0;
-			}
-			da->da_users++;
-			return 0;
-		}
-	}
-
-	da = kzalloc(sizeof(*da), GFP_ATOMIC);
-	if (da == NULL)
-		return -ENOMEM;
-	memcpy(da->da_addr, addr, alen);
-	da->da_addrlen = alen;
-	da->da_users = 1;
-	da->da_gusers = glbl ? 1 : 0;
-	da->next = *list;
-	*list = da;
-	(*count)++;
-	return 0;
-}
-
-
-int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
-		    struct dev_addr_list **from, int *from_count)
-{
-	struct dev_addr_list *da, *next;
-	int err = 0;
-
-	da = *from;
-	while (da != NULL) {
-		next = da->next;
-		if (!da->da_synced) {
-			err = __dev_addr_add(to, to_count,
-					     da->da_addr, da->da_addrlen, 0);
-			if (err < 0)
-				break;
-			da->da_synced = 1;
-			da->da_users++;
-		} else if (da->da_users == 1) {
-			__dev_addr_delete(to, to_count,
-					  da->da_addr, da->da_addrlen, 0);
-			__dev_addr_delete(from, from_count,
-					  da->da_addr, da->da_addrlen, 0);
-		}
-		da = next;
-	}
-	return err;
-}
-EXPORT_SYMBOL_GPL(__dev_addr_sync);
-
-void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
-		       struct dev_addr_list **from, int *from_count)
-{
-	struct dev_addr_list *da, *next;
-
-	da = *from;
-	while (da != NULL) {
-		next = da->next;
-		if (da->da_synced) {
-			__dev_addr_delete(to, to_count,
-					  da->da_addr, da->da_addrlen, 0);
-			da->da_synced = 0;
-			__dev_addr_delete(from, from_count,
-					  da->da_addr, da->da_addrlen, 0);
-		}
-		da = next;
-	}
-}
-EXPORT_SYMBOL_GPL(__dev_addr_unsync);
-
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
-	struct dev_addr_list *tmp;
-
-	while (*list != NULL) {
-		tmp = *list;
-		*list = tmp->next;
-		if (tmp->da_users > tmp->da_gusers)
-			printk("__dev_addr_discard: address leakage! "
-			       "da_users=%d\n", tmp->da_users);
-		kfree(tmp);
-	}
-}
-
-void dev_addr_discard(struct net_device *dev)
-{
-	netif_addr_lock_bh(dev);
-
-	__dev_addr_discard(&dev->mc_list);
-	netdev_mc_count(dev) = 0;
-
-	netif_addr_unlock_bh(dev);
-}
-EXPORT_SYMBOL(dev_addr_discard);
-
 /**
  *	dev_get_flags - get flags reported to userspace
  *	@dev: device
@@ -4412,8 +4278,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 			return -EINVAL;
 		if (!netif_device_present(dev))
 			return -ENODEV;
-		return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
-				  dev->addr_len, 1);
+		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
 
 	case SIOCDELMULTI:
 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4421,8 +4286,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 			return -EINVAL;
 		if (!netif_device_present(dev))
 			return -ENODEV;
-		return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
-				     dev->addr_len, 1);
+		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
 
 	case SIOCSIFTXQLEN:
 		if (ifr->ifr_qlen < 0)
@@ -4730,7 +4594,7 @@ static void rollback_registered_many(struct list_head *head)
 		 *	Flush the unicast and multicast chains
 		 */
 		dev_uc_flush(dev);
-		dev_addr_discard(dev);
+		dev_mc_flush(dev);
 
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
@@ -5310,6 +5174,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	if (dev_addr_init(dev))
 		goto free_rx;
 
+	dev_mc_init(dev);
 	dev_uc_init(dev);
 
 	dev_net_set(dev, &init_net);
@@ -5545,7 +5410,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 *	Flush the unicast and multicast chains
 	 */
 	dev_uc_flush(dev);
-	dev_addr_discard(dev);
+	dev_mc_flush(dev);
 
 	netdev_unregister_kobject(dev);
 
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 7e52b6d18add..37d5975e18a3 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -19,8 +19,9 @@
  * General list handling functions
  */
 
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
+			    unsigned char *addr, int addr_len,
+			    unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
 	int alloc_size;
@@ -31,6 +32,13 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
 	list_for_each_entry(ha, &list->list, list) {
 		if (!memcmp(ha->addr, addr, addr_len) &&
 		    ha->type == addr_type) {
+			if (global) {
+				/* check if addr is already used as global */
+				if (ha->global_use)
+					return 0;
+				else
+					ha->global_use = true;
+			}
 			ha->refcount++;
 			return 0;
 		}
@@ -46,12 +54,19 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
 	memcpy(ha->addr, addr, addr_len);
 	ha->type = addr_type;
 	ha->refcount = 1;
+	ha->global_use = global;
 	ha->synced = false;
 	list_add_tail_rcu(&ha->list, &list->list);
 	list->count++;
 	return 0;
 }
 
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+}
+
 static void ha_rcu_free(struct rcu_head *head)
 {
 	struct netdev_hw_addr *ha;
@@ -60,14 +75,21 @@ static void ha_rcu_free(struct rcu_head *head)
 	kfree(ha);
 }
 
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
+			    unsigned char *addr, int addr_len,
+			    unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
 
 	list_for_each_entry(ha, &list->list, list) {
 		if (!memcmp(ha->addr, addr, addr_len) &&
 		    (ha->type == addr_type || !addr_type)) {
+			if (global) {
+				if (!ha->global_use)
+					break;
+				else
+					ha->global_use = false;
+			}
 			if (--ha->refcount)
 				return 0;
 			list_del_rcu(&ha->list);
@@ -79,10 +101,15 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
 	return -ENOENT;
 }
 
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-				  struct netdev_hw_addr_list *from_list,
-				  int addr_len,
-				  unsigned char addr_type)
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+}
+
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+			   struct netdev_hw_addr_list *from_list,
+			   int addr_len, unsigned char addr_type)
 {
 	int err;
 	struct netdev_hw_addr *ha, *ha2;
@@ -105,11 +132,11 @@ unroll:
 	}
 	return err;
 }
+EXPORT_SYMBOL(__hw_addr_add_multiple);
 
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-				   struct netdev_hw_addr_list *from_list,
-				   int addr_len,
-				   unsigned char addr_type)
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+			    struct netdev_hw_addr_list *from_list,
+			    int addr_len, unsigned char addr_type)
 {
 	struct netdev_hw_addr *ha;
 	unsigned char type;
@@ -119,10 +146,11 @@ static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
 		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
 	}
 }
+EXPORT_SYMBOL(__hw_addr_del_multiple);
 
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-			  struct netdev_hw_addr_list *from_list,
-			  int addr_len)
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+		   struct netdev_hw_addr_list *from_list,
+		   int addr_len)
 {
 	int err = 0;
 	struct netdev_hw_addr *ha, *tmp;
@@ -142,10 +170,11 @@ static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
 	}
 	return err;
 }
+EXPORT_SYMBOL(__hw_addr_sync);
 
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-			     struct netdev_hw_addr_list *from_list,
-			     int addr_len)
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+		      struct netdev_hw_addr_list *from_list,
+		      int addr_len)
 {
 	struct netdev_hw_addr *ha, *tmp;
 
@@ -159,8 +188,9 @@ static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
 		}
 	}
 }
+EXPORT_SYMBOL(__hw_addr_unsync);
 
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
+void __hw_addr_flush(struct netdev_hw_addr_list *list)
 {
 	struct netdev_hw_addr *ha, *tmp;
 
@@ -170,12 +200,14 @@ static void __hw_addr_flush(struct netdev_hw_addr_list *list)
 	}
 	list->count = 0;
 }
+EXPORT_SYMBOL(__hw_addr_flush);
 
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
+void __hw_addr_init(struct netdev_hw_addr_list *list)
 {
 	INIT_LIST_HEAD(&list->list);
 	list->count = 0;
 }
+EXPORT_SYMBOL(__hw_addr_init);
 
 /*
  * Device addresses handling functions
@@ -475,4 +507,235 @@ EXPORT_SYMBOL(dev_uc_init);
  * Multicast list handling functions
  */
 
-/* To be filled here */
+static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+			bool global)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+			       NETDEV_HW_ADDR_T_MULTICAST, global);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+/**
+ *	dev_mc_add - Add a multicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a multicast address to the device or increase
+ *	the reference count if it already exists.
+ */
+int dev_mc_add(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_add(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_add);
+
+/**
+ *	dev_mc_add_global - Add a global multicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a global multicast address to the device.
+ */
+int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_add(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_add_global);
+
+static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+			bool global)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
+			       NETDEV_HW_ADDR_T_MULTICAST, global);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+
+/**
+ *	dev_mc_del - Delete a multicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a multicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_mc_del(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_del(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_del);
+
+/**
+ *	dev_mc_del_global - Delete a global multicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a multicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_del(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_del_global);
+
+/**
+ *	dev_mc_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_tx_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_multicast_list
+ *	or dev->set_rx_mode function of layered software devices.
+ */
+int dev_mc_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_bh(to);
+	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock_bh(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_mc_sync);
+
+/**
+ *	dev_mc_unsync - Remove synchronized addresses from the destination device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_mc_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_mc_unsync(struct net_device *to, struct net_device *from)
+{
+	if (to->addr_len != from->addr_len)
+		return;
+
+	netif_addr_lock_bh(from);
+	netif_addr_lock(to);
+	__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
+	__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_mc_unsync);
+
+/**
+ *	dev_mc_flush - Flush multicast addresses
+ *	@dev: device
+ *
+ *	Flush multicast addresses.
+ */
+void dev_mc_flush(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__hw_addr_flush(&dev->mc);
+	netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_mc_flush);
+
+/**
+ *	dev_mc_flush - Init multicast address list
+ *	@dev: device
+ *
+ *	Init multicast address list.
+ */
+void dev_mc_init(struct net_device *dev)
+{
+	__hw_addr_init(&dev->mc);
+}
+EXPORT_SYMBOL(dev_mc_init);
+
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct netdev_hw_addr *ha;
+	struct net_device *dev = v;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	netif_addr_lock_bh(dev);
+	netdev_for_each_mc_addr(ha, dev) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, ha->refcount, ha->global_use);
+
+		for (i = 0; i < dev->addr_len; i++)
+			seq_printf(seq, "%02x", ha->addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	netif_addr_unlock_bh(dev);
+	return 0;
+}
+
+static const struct seq_operations dev_mc_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_mc_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+#endif
+
+static int __net_init dev_mc_net_init(struct net *net)
+{
+	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+	proc_net_remove(net, "dev_mcast");
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+	.init = dev_mc_net_init,
+	.exit = dev_mc_net_exit,
+};
+
+void __init dev_mcast_init(void)
+{
+	register_pernet_subsys(&dev_mc_net_ops);
+}
+
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
deleted file mode 100644
index 3dc295beb483..000000000000
--- a/net/core/dev_mcast.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- *	Linux NET3:	Multicast List maintenance.
- *
- *	Authors:
- *		Tim Kordas <tjk@nostromo.eeap.cwru.edu>
- *		Richard Underwood <richard@wuzz.demon.co.uk>
- *
- *	Stir fried together from the IP multicast and CAP patches above
- *		Alan Cox <alan@lxorguk.ukuu.org.uk>
- *
- *	Fixes:
- *		Alan Cox	:	Update the device on a real delete
- *					rather than any time but...
- *		Alan Cox	:	IFF_ALLMULTI support.
- *		Alan Cox	: 	New format set_multicast_list() calls.
- *		Gleb Natapov    :       Remove dev_mc_lock.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
-
-
-/*
- *	Device multicast list maintenance.
- *
- *	This is used both by IP and by the user level maintenance functions.
- *	Unlike BSD we maintain a usage count on a given multicast address so
- *	that a casual user application can add/delete multicasts used by
- *	protocols without doing damage to the protocols when it deletes the
- *	entries. It also helps IP as it tracks overlapping maps.
- *
- *	Device mc lists are changed by bh at least if IPv6 is enabled,
- *	so that it must be bh protected.
- *
- *	We block accesses to device mc filters with netif_tx_lock.
- */
-
-/*
- *	Delete a device level multicast
- */
-
-int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
-{
-	int err;
-
-	netif_addr_lock_bh(dev);
-	err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
-				addr, alen, glbl);
-	if (!err) {
-		/*
-		 *	We have altered the list, so the card
-		 *	loaded filter is now wrong. Fix it
-		 */
-
-		__dev_set_rx_mode(dev);
-	}
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-
-/*
- *	Add a device level multicast
- */
-
-int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
-{
-	int err;
-
-	netif_addr_lock_bh(dev);
-	if (alen != dev->addr_len)
-		err = -EINVAL;
-	else
-		err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-
-/**
- *	dev_mc_sync	- Synchronize device's multicast list to another device
- *	@to: destination device
- *	@from: source device
- *
- * 	Add newly added addresses to the destination device and release
- * 	addresses that have no users left. The source device must be
- * 	locked by netif_tx_lock_bh.
- *
- *	This function is intended to be called from the dev->set_multicast_list
- *	or dev->set_rx_mode function of layered software devices.
- */
-int dev_mc_sync(struct net_device *to, struct net_device *from)
-{
-	int err = 0;
-
-	netif_addr_lock_bh(to);
-	err = __dev_addr_sync(&to->mc_list, &to->mc_count,
-			      &from->mc_list, &from->mc_count);
-	if (!err)
-		__dev_set_rx_mode(to);
-	netif_addr_unlock_bh(to);
-
-	return err;
-}
-EXPORT_SYMBOL(dev_mc_sync);
-
-
-/**
- * 	dev_mc_unsync	- Remove synchronized addresses from the destination
- * 			  device
- *	@to: destination device
- *	@from: source device
- *
- * 	Remove all addresses that were added to the destination device by
- * 	dev_mc_sync(). This function is intended to be called from the
- * 	dev->stop function of layered software devices.
- */
-void dev_mc_unsync(struct net_device *to, struct net_device *from)
-{
-	netif_addr_lock_bh(from);
-	netif_addr_lock(to);
-
-	__dev_addr_unsync(&to->mc_list, &to->mc_count,
-			  &from->mc_list, &from->mc_count);
-	__dev_set_rx_mode(to);
-
-	netif_addr_unlock(to);
-	netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_mc_unsync);
-
-#ifdef CONFIG_PROC_FS
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
-	struct dev_addr_list *m;
-	struct net_device *dev = v;
-
-	if (v == SEQ_START_TOKEN)
-		return 0;
-
-	netif_addr_lock_bh(dev);
-	for (m = dev->mc_list; m; m = m->next) {
-		int i;
-
-		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
-			   dev->name, m->dmi_users, m->dmi_gusers);
-
-		for (i = 0; i < m->dmi_addrlen; i++)
-			seq_printf(seq, "%02x", m->dmi_addr[i]);
-
-		seq_putc(seq, '\n');
-	}
-	netif_addr_unlock_bh(dev);
-	return 0;
-}
-
-static const struct seq_operations dev_mc_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = dev_mc_seq_show,
-};
-
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = dev_mc_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-#endif
-
-static int __net_init dev_mc_net_init(struct net *net)
-{
-	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
-		return -ENOMEM;
-	return 0;
-}
-
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
-	proc_net_remove(net, "dev_mcast");
-}
-
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
-	.init = dev_mc_net_init,
-	.exit = dev_mc_net_exit,
-};
-
-void __init dev_mcast_init(void)
-{
-	register_pernet_subsys(&dev_mc_net_ops);
-}
-
-EXPORT_SYMBOL(dev_mc_add);
-EXPORT_SYMBOL(dev_mc_delete);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 238af093495b..f3e4734d207f 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -349,7 +349,7 @@ static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int de
 	if (dn_db->dev->type == ARPHRD_ETHER) {
 		if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) {
 			dn_dn2eth(mac_addr, ifa1->ifa_local);
-			dev_mc_delete(dev, mac_addr, ETH_ALEN, 0);
+			dev_mc_del(dev, mac_addr);
 		}
 	}
 
@@ -380,7 +380,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 	if (dev->type == ARPHRD_ETHER) {
 		if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) {
 			dn_dn2eth(mac_addr, ifa->ifa_local);
-			dev_mc_add(dev, mac_addr, ETH_ALEN, 0);
+			dev_mc_add(dev, mac_addr);
 		}
 	}
 
@@ -1000,9 +1000,9 @@ static int dn_eth_up(struct net_device *dev)
 	struct dn_dev *dn_db = dev->dn_ptr;
 
 	if (dn_db->parms.forwarding == 0)
-		dev_mc_add(dev, dn_rt_all_end_mcast, ETH_ALEN, 0);
+		dev_mc_add(dev, dn_rt_all_end_mcast);
 	else
-		dev_mc_add(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
+		dev_mc_add(dev, dn_rt_all_rt_mcast);
 
 	dn_db->use_long = 1;
 
@@ -1014,9 +1014,9 @@ static void dn_eth_down(struct net_device *dev)
 	struct dn_dev *dn_db = dev->dn_ptr;
 
 	if (dn_db->parms.forwarding == 0)
-		dev_mc_delete(dev, dn_rt_all_end_mcast, ETH_ALEN, 0);
+		dev_mc_del(dev, dn_rt_all_end_mcast);
 	else
-		dev_mc_delete(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
+		dev_mc_del(dev, dn_rt_all_rt_mcast);
 }
 
 static void dn_dev_set_timer(struct net_device *dev);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 63bf298ca109..51824c42b775 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -997,7 +997,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
 	   --ANK
 	   */
 	if (arp_mc_map(addr, buf, dev, 0) == 0)
-		dev_mc_add(dev, buf, dev->addr_len, 0);
+		dev_mc_add(dev, buf);
 }
 
 /*
@@ -1010,7 +1010,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
 	struct net_device *dev = in_dev->dev;
 
 	if (arp_mc_map(addr, buf, dev, 0) == 0)
-		dev_mc_delete(dev, buf, dev->addr_len, 0);
+		dev_mc_del(dev, buf);
 }
 
 #ifdef CONFIG_IP_MULTICAST
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0886f96c736b..a2208b7b313d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -87,7 +87,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 		list_del(&c->list);
 		write_unlock_bh(&clusterip_lock);
 
-		dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
+		dev_mc_del(c->dev, c->clustermac);
 		dev_put(c->dev);
 
 		/* In case anyone still accesses the file, the open/close
@@ -396,7 +396,7 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 				dev_put(dev);
 				return false;
 			}
-			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
+			dev_mc_add(config->dev, config->clustermac);
 		}
 	}
 	cipinfo->config = config;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bcd971915969..37d1868c0064 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -714,7 +714,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
 	if (!(mc->mca_flags&MAF_LOADED)) {
 		mc->mca_flags |= MAF_LOADED;
 		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
-			dev_mc_add(dev, buf, dev->addr_len, 0);
+			dev_mc_add(dev, buf);
 	}
 	spin_unlock_bh(&mc->mca_lock);
 
@@ -740,7 +740,7 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 	if (mc->mca_flags&MAF_LOADED) {
 		mc->mca_flags &= ~MAF_LOADED;
 		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
-			dev_mc_delete(dev, buf, dev->addr_len, 0);
+			dev_mc_del(dev, buf);
 	}
 
 	if (mc->mca_flags & MAF_NOREPORT)
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c3d844093a2f..9179196da264 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -84,16 +84,14 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 }
 
 static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
-					int mc_count,
-					struct dev_addr_list *mc_list)
+					struct netdev_hw_addr_list *mc_list)
 {
 	u64 ret = 0;
 
 	if (local->ops->prepare_multicast)
-		ret = local->ops->prepare_multicast(&local->hw, mc_count,
-						    mc_list);
+		ret = local->ops->prepare_multicast(&local->hw, mc_list);
 
-	trace_drv_prepare_multicast(local, mc_count, ret);
+	trace_drv_prepare_multicast(local, mc_list->count, ret);
 
 	return ret;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ab369e2a5282..7fdacf9408b1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -646,8 +646,7 @@ struct ieee80211_local {
 	struct work_struct recalc_smps;
 
 	/* aggregated multicast list */
-	struct dev_addr_list *mc_list;
-	int mc_count;
+	struct netdev_hw_addr_list mc_list;
 
 	bool tim_in_locked_section; /* see ieee80211_beacon_get() */
 
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b4ec59a8dc03..00f3a93c6b04 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -412,8 +412,7 @@ static int ieee80211_stop(struct net_device *dev)
 
 	netif_addr_lock_bh(dev);
 	spin_lock_bh(&local->filter_lock);
-	__dev_addr_unsync(&local->mc_list, &local->mc_count,
-			  &dev->mc_list, &dev->mc_count);
+	__hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len);
 	spin_unlock_bh(&local->filter_lock);
 	netif_addr_unlock_bh(dev);
 
@@ -596,8 +595,7 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
 		sdata->flags ^= IEEE80211_SDATA_PROMISC;
 	}
 	spin_lock_bh(&local->filter_lock);
-	__dev_addr_sync(&local->mc_list, &local->mc_count,
-			&dev->mc_list, &dev->mc_count);
+	__hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
 	spin_unlock_bh(&local->filter_lock);
 	ieee80211_queue_work(&local->hw, &local->reconfig_filter);
 }
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 06c33b68d8e5..84ad249a4e2e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -71,7 +71,7 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
 	spin_lock_bh(&local->filter_lock);
 	changed_flags = local->filter_flags ^ new_flags;
 
-	mc = drv_prepare_multicast(local, local->mc_count, local->mc_list);
+	mc = drv_prepare_multicast(local, &local->mc_list);
 	spin_unlock_bh(&local->filter_lock);
 
 	/* be a bit nasty */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 48c1e0ae565f..b0f037cc899c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1691,9 +1691,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 		if (i->alen != dev->addr_len)
 			return -EINVAL;
 		if (what > 0)
-			return dev_mc_add(dev, i->addr, i->alen, 0);
+			return dev_mc_add(dev, i->addr);
 		else
-			return dev_mc_delete(dev, i->addr, i->alen, 0);
+			return dev_mc_del(dev, i->addr);
 		break;
 	case PACKET_MR_PROMISC:
 		return dev_set_promiscuity(dev, what);
-- 
cgit v1.2.3-59-g8ed1b


From 21b4aaa14329db793832e865f15000c5c0192ac3 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:28 +0000
Subject: l2tp: Relocate pppol2tp driver to new net/l2tp directory

This patch moves the existing pppol2tp driver from drivers/net into a
new net/l2tp directory, which is where the upcoming L2TPv3 code will
live. The existing CONFIG_PPPOL2TP config option is left in its
current place to avoid "make oldconfig" issues when an existing
pppol2tp user takes this change. (This is the same approach used for
the pppoatm driver, which moved to net/atm.)

There are no code changes. The existing drivers/net/pppol2tp.c is
simply moved to net/l2tp.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Makefile   |    2 +-
 drivers/net/pppol2tp.c | 2680 ------------------------------------------------
 net/Makefile           |    1 +
 net/l2tp/Makefile      |    5 +
 net/l2tp/pppol2tp.c    | 2680 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 2687 insertions(+), 2681 deletions(-)
 delete mode 100644 drivers/net/pppol2tp.c
 create mode 100644 net/l2tp/Makefile
 create mode 100644 net/l2tp/pppol2tp.c

(limited to 'net')

diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index ba06c1c0db8b..f8444f439a65 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -160,7 +160,7 @@ obj-$(CONFIG_PPP_DEFLATE) += ppp_deflate.o
 obj-$(CONFIG_PPP_BSDCOMP) += bsd_comp.o
 obj-$(CONFIG_PPP_MPPE) += ppp_mppe.o
 obj-$(CONFIG_PPPOE) += pppox.o pppoe.o
-obj-$(CONFIG_PPPOL2TP) += pppox.o pppol2tp.o
+obj-$(CONFIG_PPPOL2TP) += pppox.o
 
 obj-$(CONFIG_SLIP) += slip.o
 obj-$(CONFIG_SLHC) += slhc.o
diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c
deleted file mode 100644
index 449a9825200d..000000000000
--- a/drivers/net/pppol2tp.c
+++ /dev/null
@@ -1,2680 +0,0 @@
-/*****************************************************************************
- * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
- *
- * PPPoX    --- Generic PPP encapsulation socket family
- * PPPoL2TP --- PPP over L2TP (RFC 2661)
- *
- * Version:	1.0.0
- *
- * Authors:	Martijn van Oosterhout <kleptog@svana.org>
- *		James Chapman (jchapman@katalix.com)
- * Contributors:
- *		Michal Ostrowski <mostrows@speakeasy.net>
- *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
- *		David S. Miller (davem@redhat.com)
- *
- * License:
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- */
-
-/* This driver handles only L2TP data frames; control frames are handled by a
- * userspace application.
- *
- * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
- * attaches it to a bound UDP socket with local tunnel_id / session_id and
- * peer tunnel_id / session_id set. Data can then be sent or received using
- * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
- * can be read or modified using ioctl() or [gs]etsockopt() calls.
- *
- * When a PPPoL2TP socket is connected with local and peer session_id values
- * zero, the socket is treated as a special tunnel management socket.
- *
- * Here's example userspace code to create a socket for sending/receiving data
- * over an L2TP session:-
- *
- *	struct sockaddr_pppol2tp sax;
- *	int fd;
- *	int session_fd;
- *
- *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
- *
- *	sax.sa_family = AF_PPPOX;
- *	sax.sa_protocol = PX_PROTO_OL2TP;
- *	sax.pppol2tp.fd = tunnel_fd;	// bound UDP socket
- *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
- *	sax.pppol2tp.addr.sin_port = addr->sin_port;
- *	sax.pppol2tp.addr.sin_family = AF_INET;
- *	sax.pppol2tp.s_tunnel  = tunnel_id;
- *	sax.pppol2tp.s_session = session_id;
- *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
- *	sax.pppol2tp.d_session = peer_session_id;
- *
- *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
- *
- * A pppd plugin that allows PPP traffic to be carried over L2TP using
- * this driver is available from the OpenL2TP project at
- * http://openl2tp.sourceforge.net.
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/list.h>
-#include <asm/uaccess.h>
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-
-#include <linux/netdevice.h>
-#include <linux/net.h>
-#include <linux/inetdevice.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/if_pppox.h>
-#include <linux/if_pppol2tp.h>
-#include <net/sock.h>
-#include <linux/ppp_channel.h>
-#include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
-#include <linux/file.h>
-#include <linux/hash.h>
-#include <linux/sort.h>
-#include <linux/proc_fs.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/udp.h>
-#include <net/xfrm.h>
-
-#include <asm/byteorder.h>
-#include <asm/atomic.h>
-
-
-#define PPPOL2TP_DRV_VERSION	"V1.0"
-
-/* L2TP header constants */
-#define L2TP_HDRFLAG_T	   0x8000
-#define L2TP_HDRFLAG_L	   0x4000
-#define L2TP_HDRFLAG_S	   0x0800
-#define L2TP_HDRFLAG_O	   0x0200
-#define L2TP_HDRFLAG_P	   0x0100
-
-#define L2TP_HDR_VER_MASK  0x000F
-#define L2TP_HDR_VER	   0x0002
-
-/* Space for UDP, L2TP and PPP headers */
-#define PPPOL2TP_HEADER_OVERHEAD	40
-
-/* Just some random numbers */
-#define L2TP_TUNNEL_MAGIC	0x42114DDA
-#define L2TP_SESSION_MAGIC	0x0C04EB7D
-
-#define PPPOL2TP_HASH_BITS	4
-#define PPPOL2TP_HASH_SIZE	(1 << PPPOL2TP_HASH_BITS)
-
-/* Default trace flags */
-#define PPPOL2TP_DEFAULT_DEBUG_FLAGS	0
-
-#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
-	do {								\
-		if ((_mask) & (_type))					\
-			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
-	} while(0)
-
-/* Number of bytes to build transmit L2TP headers.
- * Unfortunately the size is different depending on whether sequence numbers
- * are enabled.
- */
-#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
-#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
-
-struct pppol2tp_tunnel;
-
-/* Describes a session. It is the sk_user_data field in the PPPoL2TP
- * socket. Contains information to determine incoming packets and transmit
- * outgoing ones.
- */
-struct pppol2tp_session
-{
-	int			magic;		/* should be
-						 * L2TP_SESSION_MAGIC */
-	int			owner;		/* pid that opened the socket */
-
-	struct sock		*sock;		/* Pointer to the session
-						 * PPPoX socket */
-	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
-						 * socket */
-
-	struct pppol2tp_addr	tunnel_addr;	/* Description of tunnel */
-
-	struct pppol2tp_tunnel	*tunnel;	/* back pointer to tunnel
-						 * context */
-
-	char			name[20];	/* "sess xxxxx/yyyyy", where
-						 * x=tunnel_id, y=session_id */
-	int			mtu;
-	int			mru;
-	int			flags;		/* accessed by PPPIOCGFLAGS.
-						 * Unused. */
-	unsigned		recv_seq:1;	/* expect receive packets with
-						 * sequence numbers? */
-	unsigned		send_seq:1;	/* send packets with sequence
-						 * numbers? */
-	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
-						 * sequence numbers under
-						 * control of LNS. */
-	int			debug;		/* bitmask of debug message
-						 * categories */
-	int			reorder_timeout; /* configured reorder timeout
-						  * (in jiffies) */
-	u16			nr;		/* session NR state (receive) */
-	u16			ns;		/* session NR state (send) */
-	struct sk_buff_head	reorder_q;	/* receive reorder queue */
-	struct pppol2tp_ioc_stats stats;
-	struct hlist_node	hlist;		/* Hash list node */
-};
-
-/* The sk_user_data field of the tunnel's UDP socket. It contains info to track
- * all the associated sessions so incoming packets can be sorted out
- */
-struct pppol2tp_tunnel
-{
-	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
-	rwlock_t		hlist_lock;	/* protect session_hlist */
-	struct hlist_head	session_hlist[PPPOL2TP_HASH_SIZE];
-						/* hashed list of sessions,
-						 * hashed by id */
-	int			debug;		/* bitmask of debug message
-						 * categories */
-	char			name[12];	/* "tunl xxxxx" */
-	struct pppol2tp_ioc_stats stats;
-
-	void (*old_sk_destruct)(struct sock *);
-
-	struct sock		*sock;		/* Parent socket */
-	struct list_head	list;		/* Keep a list of all open
-						 * prepared sockets */
-	struct net		*pppol2tp_net;	/* the net we belong to */
-
-	atomic_t		ref_count;
-};
-
-/* Private data stored for received packets in the skb.
- */
-struct pppol2tp_skb_cb {
-	u16			ns;
-	u16			nr;
-	u16			has_seq;
-	u16			length;
-	unsigned long		expires;
-};
-
-#define PPPOL2TP_SKB_CB(skb)	((struct pppol2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
-
-static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
-static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel);
-
-static atomic_t pppol2tp_tunnel_count;
-static atomic_t pppol2tp_session_count;
-static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
-static const struct proto_ops pppol2tp_ops;
-
-/* per-net private data for this module */
-static int pppol2tp_net_id __read_mostly;
-struct pppol2tp_net {
-	struct list_head pppol2tp_tunnel_list;
-	rwlock_t pppol2tp_tunnel_list_lock;
-};
-
-static inline struct pppol2tp_net *pppol2tp_pernet(struct net *net)
-{
-	BUG_ON(!net);
-
-	return net_generic(net, pppol2tp_net_id);
-}
-
-/* Helpers to obtain tunnel/session contexts from sockets.
- */
-static inline struct pppol2tp_session *pppol2tp_sock_to_session(struct sock *sk)
-{
-	struct pppol2tp_session *session;
-
-	if (sk == NULL)
-		return NULL;
-
-	sock_hold(sk);
-	session = (struct pppol2tp_session *)(sk->sk_user_data);
-	if (session == NULL) {
-		sock_put(sk);
-		goto out;
-	}
-
-	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
-out:
-	return session;
-}
-
-static inline struct pppol2tp_tunnel *pppol2tp_sock_to_tunnel(struct sock *sk)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	if (sk == NULL)
-		return NULL;
-
-	sock_hold(sk);
-	tunnel = (struct pppol2tp_tunnel *)(sk->sk_user_data);
-	if (tunnel == NULL) {
-		sock_put(sk);
-		goto out;
-	}
-
-	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-out:
-	return tunnel;
-}
-
-/* Tunnel reference counts. Incremented per session that is added to
- * the tunnel.
- */
-static inline void pppol2tp_tunnel_inc_refcount(struct pppol2tp_tunnel *tunnel)
-{
-	atomic_inc(&tunnel->ref_count);
-}
-
-static inline void pppol2tp_tunnel_dec_refcount(struct pppol2tp_tunnel *tunnel)
-{
-	if (atomic_dec_and_test(&tunnel->ref_count))
-		pppol2tp_tunnel_free(tunnel);
-}
-
-/* Session hash list.
- * The session_id SHOULD be random according to RFC2661, but several
- * L2TP implementations (Cisco and Microsoft) use incrementing
- * session_ids.  So we do a real hash on the session_id, rather than a
- * simple bitmask.
- */
-static inline struct hlist_head *
-pppol2tp_session_id_hash(struct pppol2tp_tunnel *tunnel, u16 session_id)
-{
-	unsigned long hash_val = (unsigned long) session_id;
-	return &tunnel->session_hlist[hash_long(hash_val, PPPOL2TP_HASH_BITS)];
-}
-
-/* Lookup a session by id
- */
-static struct pppol2tp_session *
-pppol2tp_session_find(struct pppol2tp_tunnel *tunnel, u16 session_id)
-{
-	struct hlist_head *session_list =
-		pppol2tp_session_id_hash(tunnel, session_id);
-	struct pppol2tp_session *session;
-	struct hlist_node *walk;
-
-	read_lock_bh(&tunnel->hlist_lock);
-	hlist_for_each_entry(session, walk, session_list, hlist) {
-		if (session->tunnel_addr.s_session == session_id) {
-			read_unlock_bh(&tunnel->hlist_lock);
-			return session;
-		}
-	}
-	read_unlock_bh(&tunnel->hlist_lock);
-
-	return NULL;
-}
-
-/* Lookup a tunnel by id
- */
-static struct pppol2tp_tunnel *pppol2tp_tunnel_find(struct net *net, u16 tunnel_id)
-{
-	struct pppol2tp_tunnel *tunnel;
-	struct pppol2tp_net *pn = pppol2tp_pernet(net);
-
-	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_for_each_entry(tunnel, &pn->pppol2tp_tunnel_list, list) {
-		if (tunnel->stats.tunnel_id == tunnel_id) {
-			read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-			return tunnel;
-		}
-	}
-	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	return NULL;
-}
-
-/*****************************************************************************
- * Receive data handling
- *****************************************************************************/
-
-/* Queue a skb in order. We come here only if the skb has an L2TP sequence
- * number.
- */
-static void pppol2tp_recv_queue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
-{
-	struct sk_buff *skbp;
-	struct sk_buff *tmp;
-	u16 ns = PPPOL2TP_SKB_CB(skb)->ns;
-
-	spin_lock_bh(&session->reorder_q.lock);
-	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
-		if (PPPOL2TP_SKB_CB(skbp)->ns > ns) {
-			__skb_queue_before(&session->reorder_q, skbp, skb);
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
-			       session->name, ns, PPPOL2TP_SKB_CB(skbp)->ns,
-			       skb_queue_len(&session->reorder_q));
-			session->stats.rx_oos_packets++;
-			goto out;
-		}
-	}
-
-	__skb_queue_tail(&session->reorder_q, skb);
-
-out:
-	spin_unlock_bh(&session->reorder_q.lock);
-}
-
-/* Dequeue a single skb.
- */
-static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
-{
-	struct pppol2tp_tunnel *tunnel = session->tunnel;
-	int length = PPPOL2TP_SKB_CB(skb)->length;
-	struct sock *session_sock = NULL;
-
-	/* We're about to requeue the skb, so return resources
-	 * to its current owner (a socket receive buffer).
-	 */
-	skb_orphan(skb);
-
-	tunnel->stats.rx_packets++;
-	tunnel->stats.rx_bytes += length;
-	session->stats.rx_packets++;
-	session->stats.rx_bytes += length;
-
-	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-		/* Bump our Nr */
-		session->nr++;
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: updated nr to %hu\n", session->name, session->nr);
-	}
-
-	/* If the socket is bound, send it in to PPP's input queue. Otherwise
-	 * queue it on the session socket.
-	 */
-	session_sock = session->sock;
-	if (session_sock->sk_state & PPPOX_BOUND) {
-		struct pppox_sock *po;
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: recv %d byte data frame, passing to ppp\n",
-		       session->name, length);
-
-		/* We need to forget all info related to the L2TP packet
-		 * gathered in the skb as we are going to reuse the same
-		 * skb for the inner packet.
-		 * Namely we need to:
-		 * - reset xfrm (IPSec) information as it applies to
-		 *   the outer L2TP packet and not to the inner one
-		 * - release the dst to force a route lookup on the inner
-		 *   IP packet since skb->dst currently points to the dst
-		 *   of the UDP tunnel
-		 * - reset netfilter information as it doesn't apply
-		 *   to the inner packet either
-		 */
-		secpath_reset(skb);
-		skb_dst_drop(skb);
-		nf_reset(skb);
-
-		po = pppox_sk(session_sock);
-		ppp_input(&po->chan, skb);
-	} else {
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: socket not bound\n", session->name);
-
-		/* Not bound. Nothing we can do, so discard. */
-		session->stats.rx_errors++;
-		kfree_skb(skb);
-	}
-
-	sock_put(session->sock);
-}
-
-/* Dequeue skbs from the session's reorder_q, subject to packet order.
- * Skbs that have been in the queue for too long are simply discarded.
- */
-static void pppol2tp_recv_dequeue(struct pppol2tp_session *session)
-{
-	struct sk_buff *skb;
-	struct sk_buff *tmp;
-
-	/* If the pkt at the head of the queue has the nr that we
-	 * expect to send up next, dequeue it and any other
-	 * in-sequence packets behind it.
-	 */
-	spin_lock_bh(&session->reorder_q.lock);
-	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
-		if (time_after(jiffies, PPPOL2TP_SKB_CB(skb)->expires)) {
-			session->stats.rx_seq_discards++;
-			session->stats.rx_errors++;
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-			       "%s: oos pkt %hu len %d discarded (too old), "
-			       "waiting for %hu, reorder_q_len=%d\n",
-			       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-			       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-			       skb_queue_len(&session->reorder_q));
-			__skb_unlink(skb, &session->reorder_q);
-			kfree_skb(skb);
-			sock_put(session->sock);
-			continue;
-		}
-
-		if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
-				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: holding oos pkt %hu len %d, "
-				       "waiting for %hu, reorder_q_len=%d\n",
-				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-				       skb_queue_len(&session->reorder_q));
-				goto out;
-			}
-		}
-		__skb_unlink(skb, &session->reorder_q);
-
-		/* Process the skb. We release the queue lock while we
-		 * do so to let other contexts process the queue.
-		 */
-		spin_unlock_bh(&session->reorder_q.lock);
-		pppol2tp_recv_dequeue_skb(session, skb);
-		spin_lock_bh(&session->reorder_q.lock);
-	}
-
-out:
-	spin_unlock_bh(&session->reorder_q.lock);
-}
-
-static inline int pppol2tp_verify_udp_checksum(struct sock *sk,
-					       struct sk_buff *skb)
-{
-	struct udphdr *uh = udp_hdr(skb);
-	u16 ulen = ntohs(uh->len);
-	struct inet_sock *inet;
-	__wsum psum;
-
-	if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
-		return 0;
-
-	inet = inet_sk(sk);
-	psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
-				  IPPROTO_UDP, 0);
-
-	if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
-	    !csum_fold(csum_add(psum, skb->csum)))
-		return 0;
-
-	skb->csum = psum;
-
-	return __skb_checksum_complete(skb);
-}
-
-/* Internal receive frame. Do the real work of receiving an L2TP data frame
- * here. The skb is not on a list when we get here.
- * Returns 0 if the packet was a data packet and was successfully passed on.
- * Returns 1 if the packet was not a good data packet and could not be
- * forwarded.  All such packets are passed up to userspace to deal with.
- */
-static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb)
-{
-	struct pppol2tp_session *session = NULL;
-	struct pppol2tp_tunnel *tunnel;
-	unsigned char *ptr, *optr;
-	u16 hdrflags;
-	u16 tunnel_id, session_id;
-	int length;
-	int offset;
-
-	tunnel = pppol2tp_sock_to_tunnel(sock);
-	if (tunnel == NULL)
-		goto no_tunnel;
-
-	if (tunnel->sock && pppol2tp_verify_udp_checksum(tunnel->sock, skb))
-		goto discard_bad_csum;
-
-	/* UDP always verifies the packet length. */
-	__skb_pull(skb, sizeof(struct udphdr));
-
-	/* Short packet? */
-	if (!pskb_may_pull(skb, 12)) {
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
-		goto error;
-	}
-
-	/* Point to L2TP header */
-	optr = ptr = skb->data;
-
-	/* Get L2TP header flags */
-	hdrflags = ntohs(*(__be16*)ptr);
-
-	/* Trace packet contents, if enabled */
-	if (tunnel->debug & PPPOL2TP_MSG_DATA) {
-		length = min(16u, skb->len);
-		if (!pskb_may_pull(skb, length))
-			goto error;
-
-		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
-
-		offset = 0;
-		do {
-			printk(" %02X", ptr[offset]);
-		} while (++offset < length);
-
-		printk("\n");
-	}
-
-	/* Get length of L2TP packet */
-	length = skb->len;
-
-	/* If type is control packet, it is handled by userspace. */
-	if (hdrflags & L2TP_HDRFLAG_T) {
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: recv control packet, len=%d\n", tunnel->name, length);
-		goto error;
-	}
-
-	/* Skip flags */
-	ptr += 2;
-
-	/* If length is present, skip it */
-	if (hdrflags & L2TP_HDRFLAG_L)
-		ptr += 2;
-
-	/* Extract tunnel and session ID */
-	tunnel_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-	session_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-
-	/* Find the session context */
-	session = pppol2tp_session_find(tunnel, session_id);
-	if (!session) {
-		/* Not found? Pass to userspace to deal with */
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: no socket found (%hu/%hu). Passing up.\n",
-		       tunnel->name, tunnel_id, session_id);
-		goto error;
-	}
-	sock_hold(session->sock);
-
-	/* The ref count on the socket was increased by the above call since
-	 * we now hold a pointer to the session. Take care to do sock_put()
-	 * when exiting this function from now on...
-	 */
-
-	/* Handle the optional sequence numbers.  If we are the LAC,
-	 * enable/disable sequence numbers under the control of the LNS.  If
-	 * no sequence numbers present but we were expecting them, discard
-	 * frame.
-	 */
-	if (hdrflags & L2TP_HDRFLAG_S) {
-		u16 ns, nr;
-		ns = ntohs(*(__be16 *) ptr);
-		ptr += 2;
-		nr = ntohs(*(__be16 *) ptr);
-		ptr += 2;
-
-		/* Received a packet with sequence numbers. If we're the LNS,
-		 * check if we sre sending sequence numbers and if not,
-		 * configure it so.
-		 */
-		if ((!session->lns_mode) && (!session->send_seq)) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
-			       "%s: requested to enable seq numbers by LNS\n",
-			       session->name);
-			session->send_seq = -1;
-		}
-
-		/* Store L2TP info in the skb */
-		PPPOL2TP_SKB_CB(skb)->ns = ns;
-		PPPOL2TP_SKB_CB(skb)->nr = nr;
-		PPPOL2TP_SKB_CB(skb)->has_seq = 1;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
-		       session->name, ns, nr, session->nr);
-	} else {
-		/* No sequence numbers.
-		 * If user has configured mandatory sequence numbers, discard.
-		 */
-		if (session->recv_seq) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
-			       "%s: recv data has no seq numbers when required. "
-			       "Discarding\n", session->name);
-			session->stats.rx_seq_discards++;
-			goto discard;
-		}
-
-		/* If we're the LAC and we're sending sequence numbers, the
-		 * LNS has requested that we no longer send sequence numbers.
-		 * If we're the LNS and we're sending sequence numbers, the
-		 * LAC is broken. Discard the frame.
-		 */
-		if ((!session->lns_mode) && (session->send_seq)) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
-			       "%s: requested to disable seq numbers by LNS\n",
-			       session->name);
-			session->send_seq = 0;
-		} else if (session->send_seq) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
-			       "%s: recv data has no seq numbers when required. "
-			       "Discarding\n", session->name);
-			session->stats.rx_seq_discards++;
-			goto discard;
-		}
-
-		/* Store L2TP info in the skb */
-		PPPOL2TP_SKB_CB(skb)->has_seq = 0;
-	}
-
-	/* If offset bit set, skip it. */
-	if (hdrflags & L2TP_HDRFLAG_O) {
-		offset = ntohs(*(__be16 *)ptr);
-		ptr += 2 + offset;
-	}
-
-	offset = ptr - optr;
-	if (!pskb_may_pull(skb, offset))
-		goto discard;
-
-	__skb_pull(skb, offset);
-
-	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
-	 * don't send the PPP header (PPP header compression enabled), but
-	 * other clients can include the header. So we cope with both cases
-	 * here. The PPP header is always FF03 when using L2TP.
-	 *
-	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
-	 * the field may be unaligned.
-	 */
-	if (!pskb_may_pull(skb, 2))
-		goto discard;
-
-	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
-		skb_pull(skb, 2);
-
-	/* Prepare skb for adding to the session's reorder_q.  Hold
-	 * packets for max reorder_timeout or 1 second if not
-	 * reordering.
-	 */
-	PPPOL2TP_SKB_CB(skb)->length = length;
-	PPPOL2TP_SKB_CB(skb)->expires = jiffies +
-		(session->reorder_timeout ? session->reorder_timeout : HZ);
-
-	/* Add packet to the session's receive queue. Reordering is done here, if
-	 * enabled. Saved L2TP protocol info is stored in skb->sb[].
-	 */
-	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-		if (session->reorder_timeout != 0) {
-			/* Packet reordering enabled. Add skb to session's
-			 * reorder queue, in order of ns.
-			 */
-			pppol2tp_recv_queue_skb(session, skb);
-		} else {
-			/* Packet reordering disabled. Discard out-of-sequence
-			 * packets
-			 */
-			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
-				session->stats.rx_seq_discards++;
-				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: oos pkt %hu len %d discarded, "
-				       "waiting for %hu, reorder_q_len=%d\n",
-				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-				       skb_queue_len(&session->reorder_q));
-				goto discard;
-			}
-			skb_queue_tail(&session->reorder_q, skb);
-		}
-	} else {
-		/* No sequence numbers. Add the skb to the tail of the
-		 * reorder queue. This ensures that it will be
-		 * delivered after all previous sequenced skbs.
-		 */
-		skb_queue_tail(&session->reorder_q, skb);
-	}
-
-	/* Try to dequeue as many skbs from reorder_q as we can. */
-	pppol2tp_recv_dequeue(session);
-	sock_put(sock);
-
-	return 0;
-
-discard:
-	session->stats.rx_errors++;
-	kfree_skb(skb);
-	sock_put(session->sock);
-	sock_put(sock);
-
-	return 0;
-
-discard_bad_csum:
-	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
-	UDP_INC_STATS_USER(&init_net, UDP_MIB_INERRORS, 0);
-	tunnel->stats.rx_errors++;
-	kfree_skb(skb);
-	sock_put(sock);
-
-	return 0;
-
-error:
-	/* Put UDP header back */
-	__skb_push(skb, sizeof(struct udphdr));
-	sock_put(sock);
-
-no_tunnel:
-	return 1;
-}
-
-/* UDP encapsulation receive handler. See net/ipv4/udp.c.
- * Return codes:
- * 0 : success.
- * <0: error
- * >0: skb should be passed up to userspace as UDP.
- */
-static int pppol2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	tunnel = pppol2tp_sock_to_tunnel(sk);
-	if (tunnel == NULL)
-		goto pass_up;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-	       "%s: received %d bytes\n", tunnel->name, skb->len);
-
-	if (pppol2tp_recv_core(sk, skb))
-		goto pass_up_put;
-
-	sock_put(sk);
-	return 0;
-
-pass_up_put:
-	sock_put(sk);
-pass_up:
-	return 1;
-}
-
-/* Receive message. This is the recvmsg for the PPPoL2TP socket.
- */
-static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *msg, size_t len,
-			    int flags)
-{
-	int err;
-	struct sk_buff *skb;
-	struct sock *sk = sock->sk;
-
-	err = -EIO;
-	if (sk->sk_state & PPPOX_BOUND)
-		goto end;
-
-	msg->msg_namelen = 0;
-
-	err = 0;
-	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
-				flags & MSG_DONTWAIT, &err);
-	if (!skb)
-		goto end;
-
-	if (len > skb->len)
-		len = skb->len;
-	else if (len < skb->len)
-		msg->msg_flags |= MSG_TRUNC;
-
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
-	if (likely(err == 0))
-		err = len;
-
-	kfree_skb(skb);
-end:
-	return err;
-}
-
-/************************************************************************
- * Transmit handling
- ***********************************************************************/
-
-/* Tell how big L2TP headers are for a particular session. This
- * depends on whether sequence numbers are being used.
- */
-static inline int pppol2tp_l2tp_header_len(struct pppol2tp_session *session)
-{
-	if (session->send_seq)
-		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
-
-	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-}
-
-/* Build an L2TP header for the session into the buffer provided.
- */
-static void pppol2tp_build_l2tp_header(struct pppol2tp_session *session,
-				       void *buf)
-{
-	__be16 *bufp = buf;
-	u16 flags = L2TP_HDR_VER;
-
-	if (session->send_seq)
-		flags |= L2TP_HDRFLAG_S;
-
-	/* Setup L2TP header.
-	 * FIXME: Can this ever be unaligned? Is direct dereferencing of
-	 * 16-bit header fields safe here for all architectures?
-	 */
-	*bufp++ = htons(flags);
-	*bufp++ = htons(session->tunnel_addr.d_tunnel);
-	*bufp++ = htons(session->tunnel_addr.d_session);
-	if (session->send_seq) {
-		*bufp++ = htons(session->ns);
-		*bufp++ = 0;
-		session->ns++;
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: updated ns to %hu\n", session->name, session->ns);
-	}
-}
-
-/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
- * when a user application does a sendmsg() on the session socket. L2TP and
- * PPP headers must be inserted into the user's data.
- */
-static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
-			    size_t total_len)
-{
-	static const unsigned char ppph[2] = { 0xff, 0x03 };
-	struct sock *sk = sock->sk;
-	struct inet_sock *inet;
-	__wsum csum;
-	struct sk_buff *skb;
-	int error;
-	int hdr_len;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	struct udphdr *uh;
-	unsigned int len;
-	struct sock *sk_tun;
-	u16 udp_len;
-
-	error = -ENOTCONN;
-	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
-		goto error;
-
-	/* Get session and tunnel contexts */
-	error = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto error;
-
-	sk_tun = session->tunnel_sock;
-	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
-	if (tunnel == NULL)
-		goto error_put_sess;
-
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
-	/* Allocate a socket buffer */
-	error = -ENOMEM;
-	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
-			   sizeof(struct udphdr) + hdr_len +
-			   sizeof(ppph) + total_len,
-			   0, GFP_KERNEL);
-	if (!skb)
-		goto error_put_sess_tun;
-
-	/* Reserve space for headers. */
-	skb_reserve(skb, NET_SKB_PAD);
-	skb_reset_network_header(skb);
-	skb_reserve(skb, sizeof(struct iphdr));
-	skb_reset_transport_header(skb);
-
-	/* Build UDP header */
-	inet = inet_sk(sk_tun);
-	udp_len = hdr_len + sizeof(ppph) + total_len;
-	uh = (struct udphdr *) skb->data;
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
-	skb_put(skb, sizeof(struct udphdr));
-
-	/* Build L2TP header */
-	pppol2tp_build_l2tp_header(session, skb->data);
-	skb_put(skb, hdr_len);
-
-	/* Add PPP header */
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
-	skb_put(skb, 2);
-
-	/* Copy user data into skb */
-	error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
-	if (error < 0) {
-		kfree_skb(skb);
-		goto error_put_sess_tun;
-	}
-	skb_put(skb, total_len);
-
-	/* Calculate UDP checksum if configured to do so */
-	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
-	}
-
-	/* Debug */
-	if (session->send_seq)
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %Zd bytes, ns=%hu\n", session->name,
-		       total_len, session->ns - 1);
-	else
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %Zd bytes\n", session->name, total_len);
-
-	if (session->debug & PPPOL2TP_MSG_DATA) {
-		int i;
-		unsigned char *datap = skb->data;
-
-		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < total_len; i++) {
-			printk(" %02X", *datap++);
-			if (i == 15) {
-				printk(" ...");
-				break;
-			}
-		}
-		printk("\n");
-	}
-
-	/* Queue the packet to IP for output */
-	len = skb->len;
-	error = ip_queue_xmit(skb, 1);
-
-	/* Update stats */
-	if (error >= 0) {
-		tunnel->stats.tx_packets++;
-		tunnel->stats.tx_bytes += len;
-		session->stats.tx_packets++;
-		session->stats.tx_bytes += len;
-	} else {
-		tunnel->stats.tx_errors++;
-		session->stats.tx_errors++;
-	}
-
-	return error;
-
-error_put_sess_tun:
-	sock_put(session->tunnel_sock);
-error_put_sess:
-	sock_put(sk);
-error:
-	return error;
-}
-
-/* Automatically called when the skb is freed.
- */
-static void pppol2tp_sock_wfree(struct sk_buff *skb)
-{
-	sock_put(skb->sk);
-}
-
-/* For data skbs that we transmit, we associate with the tunnel socket
- * but don't do accounting.
- */
-static inline void pppol2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
-	sock_hold(sk);
-	skb->sk = sk;
-	skb->destructor = pppol2tp_sock_wfree;
-}
-
-/* Transmit function called by generic PPP driver.  Sends PPP frame
- * over PPPoL2TP socket.
- *
- * This is almost the same as pppol2tp_sendmsg(), but rather than
- * being called with a msghdr from userspace, it is called with a skb
- * from the kernel.
- *
- * The supplied skb from ppp doesn't have enough headroom for the
- * insertion of L2TP, UDP and IP headers so we need to allocate more
- * headroom in the skb. This will create a cloned skb. But we must be
- * careful in the error case because the caller will expect to free
- * the skb it supplied, not our cloned skb. So we take care to always
- * leave the original skb unfreed if we return an error.
- */
-static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
-{
-	static const u8 ppph[2] = { 0xff, 0x03 };
-	struct sock *sk = (struct sock *) chan->private;
-	struct sock *sk_tun;
-	int hdr_len;
-	u16 udp_len;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	int rc;
-	int headroom;
-	int data_len = skb->len;
-	struct inet_sock *inet;
-	__wsum csum;
-	struct udphdr *uh;
-	unsigned int len;
-	int old_headroom;
-	int new_headroom;
-
-	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
-		goto abort;
-
-	/* Get session and tunnel contexts from the socket */
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto abort;
-
-	sk_tun = session->tunnel_sock;
-	if (sk_tun == NULL)
-		goto abort_put_sess;
-	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
-	if (tunnel == NULL)
-		goto abort_put_sess;
-
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
-	/* Check that there's enough headroom in the skb to insert IP,
-	 * UDP and L2TP and PPP headers. If not enough, expand it to
-	 * make room. Adjust truesize.
-	 */
-	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
-		sizeof(struct udphdr) + hdr_len + sizeof(ppph);
-	old_headroom = skb_headroom(skb);
-	if (skb_cow_head(skb, headroom))
-		goto abort_put_sess_tun;
-
-	new_headroom = skb_headroom(skb);
-	skb_orphan(skb);
-	skb->truesize += new_headroom - old_headroom;
-
-	/* Setup PPP header */
-	__skb_push(skb, sizeof(ppph));
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
-
-	/* Setup L2TP header */
-	pppol2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
-
-	udp_len = sizeof(struct udphdr) + hdr_len + sizeof(ppph) + data_len;
-
-	/* Setup UDP header */
-	inet = inet_sk(sk_tun);
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
-
-	/* Debug */
-	if (session->send_seq)
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %d bytes, ns=%hu\n", session->name,
-		       data_len, session->ns - 1);
-	else
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %d bytes\n", session->name, data_len);
-
-	if (session->debug & PPPOL2TP_MSG_DATA) {
-		int i;
-		unsigned char *datap = skb->data;
-
-		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < data_len; i++) {
-			printk(" %02X", *datap++);
-			if (i == 31) {
-				printk(" ...");
-				break;
-			}
-		}
-		printk("\n");
-	}
-
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
-			      IPSKB_REROUTED);
-	nf_reset(skb);
-
-	/* Get routing info from the tunnel socket */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, dst_clone(__sk_dst_get(sk_tun)));
-	pppol2tp_skb_set_owner_w(skb, sk_tun);
-
-	/* Calculate UDP checksum if configured to do so */
-	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
-		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
-	}
-
-	/* Queue the packet to IP for output */
-	len = skb->len;
-	rc = ip_queue_xmit(skb, 1);
-
-	/* Update stats */
-	if (rc >= 0) {
-		tunnel->stats.tx_packets++;
-		tunnel->stats.tx_bytes += len;
-		session->stats.tx_packets++;
-		session->stats.tx_bytes += len;
-	} else {
-		tunnel->stats.tx_errors++;
-		session->stats.tx_errors++;
-	}
-
-	sock_put(sk_tun);
-	sock_put(sk);
-	return 1;
-
-abort_put_sess_tun:
-	sock_put(sk_tun);
-abort_put_sess:
-	sock_put(sk);
-abort:
-	/* Free the original skb */
-	kfree_skb(skb);
-	return 1;
-}
-
-/*****************************************************************************
- * Session (and tunnel control) socket create/destroy.
- *****************************************************************************/
-
-/* When the tunnel UDP socket is closed, all the attached sockets need to go
- * too.
- */
-static void pppol2tp_tunnel_closeall(struct pppol2tp_tunnel *tunnel)
-{
-	int hash;
-	struct hlist_node *walk;
-	struct hlist_node *tmp;
-	struct pppol2tp_session *session;
-	struct sock *sk;
-
-	BUG_ON(tunnel == NULL);
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: closing all sessions...\n", tunnel->name);
-
-	write_lock_bh(&tunnel->hlist_lock);
-	for (hash = 0; hash < PPPOL2TP_HASH_SIZE; hash++) {
-again:
-		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
-			struct sk_buff *skb;
-
-			session = hlist_entry(walk, struct pppol2tp_session, hlist);
-
-			sk = session->sock;
-
-			PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-			       "%s: closing session\n", session->name);
-
-			hlist_del_init(&session->hlist);
-
-			/* Since we should hold the sock lock while
-			 * doing any unbinding, we need to release the
-			 * lock we're holding before taking that lock.
-			 * Hold a reference to the sock so it doesn't
-			 * disappear as we're jumping between locks.
-			 */
-			sock_hold(sk);
-			write_unlock_bh(&tunnel->hlist_lock);
-			lock_sock(sk);
-
-			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
-				pppox_unbind_sock(sk);
-				sk->sk_state = PPPOX_DEAD;
-				sk->sk_state_change(sk);
-			}
-
-			/* Purge any queued data */
-			skb_queue_purge(&sk->sk_receive_queue);
-			skb_queue_purge(&sk->sk_write_queue);
-			while ((skb = skb_dequeue(&session->reorder_q))) {
-				kfree_skb(skb);
-				sock_put(sk);
-			}
-
-			release_sock(sk);
-			sock_put(sk);
-
-			/* Now restart from the beginning of this hash
-			 * chain.  We always remove a session from the
-			 * list so we are guaranteed to make forward
-			 * progress.
-			 */
-			write_lock_bh(&tunnel->hlist_lock);
-			goto again;
-		}
-	}
-	write_unlock_bh(&tunnel->hlist_lock);
-}
-
-/* Really kill the tunnel.
- * Come here only when all sessions have been cleared from the tunnel.
- */
-static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel)
-{
-	struct pppol2tp_net *pn = pppol2tp_pernet(tunnel->pppol2tp_net);
-
-	/* Remove from socket list */
-	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_del_init(&tunnel->list);
-	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	atomic_dec(&pppol2tp_tunnel_count);
-	kfree(tunnel);
-}
-
-/* Tunnel UDP socket destruct hook.
- * The tunnel context is deleted only when all session sockets have been
- * closed.
- */
-static void pppol2tp_tunnel_destruct(struct sock *sk)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	tunnel = sk->sk_user_data;
-	if (tunnel == NULL)
-		goto end;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: closing...\n", tunnel->name);
-
-	/* Close all sessions */
-	pppol2tp_tunnel_closeall(tunnel);
-
-	/* No longer an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = 0;
-	(udp_sk(sk))->encap_rcv = NULL;
-
-	/* Remove hooks into tunnel socket */
-	tunnel->sock = NULL;
-	sk->sk_destruct = tunnel->old_sk_destruct;
-	sk->sk_user_data = NULL;
-
-	/* Call original (UDP) socket descructor */
-	if (sk->sk_destruct != NULL)
-		(*sk->sk_destruct)(sk);
-
-	pppol2tp_tunnel_dec_refcount(tunnel);
-
-end:
-	return;
-}
-
-/* Really kill the session socket. (Called from sock_put() if
- * refcnt == 0.)
- */
-static void pppol2tp_session_destruct(struct sock *sk)
-{
-	struct pppol2tp_session *session = NULL;
-
-	if (sk->sk_user_data != NULL) {
-		struct pppol2tp_tunnel *tunnel;
-
-		session = sk->sk_user_data;
-		if (session == NULL)
-			goto out;
-
-		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
-
-		/* Don't use pppol2tp_sock_to_tunnel() here to
-		 * get the tunnel context because the tunnel
-		 * socket might have already been closed (its
-		 * sk->sk_user_data will be NULL) so use the
-		 * session's private tunnel ptr instead.
-		 */
-		tunnel = session->tunnel;
-		if (tunnel != NULL) {
-			BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-			/* If session_id is zero, this is a null
-			 * session context, which was created for a
-			 * socket that is being used only to manage
-			 * tunnels.
-			 */
-			if (session->tunnel_addr.s_session != 0) {
-				/* Delete the session socket from the
-				 * hash
-				 */
-				write_lock_bh(&tunnel->hlist_lock);
-				hlist_del_init(&session->hlist);
-				write_unlock_bh(&tunnel->hlist_lock);
-
-				atomic_dec(&pppol2tp_session_count);
-			}
-
-			/* This will delete the tunnel context if this
-			 * is the last session on the tunnel.
-			 */
-			session->tunnel = NULL;
-			session->tunnel_sock = NULL;
-			pppol2tp_tunnel_dec_refcount(tunnel);
-		}
-	}
-
-	kfree(session);
-out:
-	return;
-}
-
-/* Called when the PPPoX socket (session) is closed.
- */
-static int pppol2tp_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session;
-	int error;
-
-	if (!sk)
-		return 0;
-
-	error = -EBADF;
-	lock_sock(sk);
-	if (sock_flag(sk, SOCK_DEAD) != 0)
-		goto error;
-
-	pppox_unbind_sock(sk);
-
-	/* Signal the death of the socket. */
-	sk->sk_state = PPPOX_DEAD;
-	sock_orphan(sk);
-	sock->sk = NULL;
-
-	session = pppol2tp_sock_to_session(sk);
-
-	/* Purge any queued data */
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sk->sk_write_queue);
-	if (session != NULL) {
-		struct sk_buff *skb;
-		while ((skb = skb_dequeue(&session->reorder_q))) {
-			kfree_skb(skb);
-			sock_put(sk);
-		}
-		sock_put(sk);
-	}
-
-	release_sock(sk);
-
-	/* This will delete the session context via
-	 * pppol2tp_session_destruct() if the socket's refcnt drops to
-	 * zero.
-	 */
-	sock_put(sk);
-
-	return 0;
-
-error:
-	release_sock(sk);
-	return error;
-}
-
-/* Internal function to prepare a tunnel (UDP) socket to have PPPoX
- * sockets attached to it.
- */
-static struct sock *pppol2tp_prepare_tunnel_socket(struct net *net,
-					int fd, u16 tunnel_id, int *error)
-{
-	int err;
-	struct socket *sock = NULL;
-	struct sock *sk;
-	struct pppol2tp_tunnel *tunnel;
-	struct pppol2tp_net *pn;
-	struct sock *ret = NULL;
-
-	/* Get the tunnel UDP socket from the fd, which was opened by
-	 * the userspace L2TP daemon.
-	 */
-	err = -EBADF;
-	sock = sockfd_lookup(fd, &err);
-	if (!sock) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
-		       tunnel_id, fd, err);
-		goto err;
-	}
-
-	sk = sock->sk;
-
-	/* Quick sanity checks */
-	err = -EPROTONOSUPPORT;
-	if (sk->sk_protocol != IPPROTO_UDP) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
-		goto err;
-	}
-	err = -EAFNOSUPPORT;
-	if (sock->ops->family != AF_INET) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: fd %d wrong family, got %d, expected %d\n",
-		       tunnel_id, fd, sock->ops->family, AF_INET);
-		goto err;
-	}
-
-	err = -ENOTCONN;
-
-	/* Check if this socket has already been prepped */
-	tunnel = (struct pppol2tp_tunnel *)sk->sk_user_data;
-	if (tunnel != NULL) {
-		/* User-data field already set */
-		err = -EBUSY;
-		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-		/* This socket has already been prepped */
-		ret = tunnel->sock;
-		goto out;
-	}
-
-	/* This socket is available and needs prepping. Create a new tunnel
-	 * context and init it.
-	 */
-	sk->sk_user_data = tunnel = kzalloc(sizeof(struct pppol2tp_tunnel), GFP_KERNEL);
-	if (sk->sk_user_data == NULL) {
-		err = -ENOMEM;
-		goto err;
-	}
-
-	tunnel->magic = L2TP_TUNNEL_MAGIC;
-	sprintf(&tunnel->name[0], "tunl %hu", tunnel_id);
-
-	tunnel->stats.tunnel_id = tunnel_id;
-	tunnel->debug = PPPOL2TP_DEFAULT_DEBUG_FLAGS;
-
-	/* Hook on the tunnel socket destructor so that we can cleanup
-	 * if the tunnel socket goes away.
-	 */
-	tunnel->old_sk_destruct = sk->sk_destruct;
-	sk->sk_destruct = pppol2tp_tunnel_destruct;
-
-	tunnel->sock = sk;
-	sk->sk_allocation = GFP_ATOMIC;
-
-	/* Misc init */
-	rwlock_init(&tunnel->hlist_lock);
-
-	/* The net we belong to */
-	tunnel->pppol2tp_net = net;
-	pn = pppol2tp_pernet(net);
-
-	/* Add tunnel to our list */
-	INIT_LIST_HEAD(&tunnel->list);
-	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_add(&tunnel->list, &pn->pppol2tp_tunnel_list);
-	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-	atomic_inc(&pppol2tp_tunnel_count);
-
-	/* Bump the reference count. The tunnel context is deleted
-	 * only when this drops to zero.
-	 */
-	pppol2tp_tunnel_inc_refcount(tunnel);
-
-	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = UDP_ENCAP_L2TPINUDP;
-	(udp_sk(sk))->encap_rcv = pppol2tp_udp_encap_recv;
-
-	ret = tunnel->sock;
-
-	*error = 0;
-out:
-	if (sock)
-		sockfd_put(sock);
-
-	return ret;
-
-err:
-	*error = err;
-	goto out;
-}
-
-static struct proto pppol2tp_sk_proto = {
-	.name	  = "PPPOL2TP",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct pppox_sock),
-};
-
-/* socket() handler. Initialize a new struct sock.
- */
-static int pppol2tp_create(struct net *net, struct socket *sock)
-{
-	int error = -ENOMEM;
-	struct sock *sk;
-
-	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
-	if (!sk)
-		goto out;
-
-	sock_init_data(sock, sk);
-
-	sock->state  = SS_UNCONNECTED;
-	sock->ops    = &pppol2tp_ops;
-
-	sk->sk_backlog_rcv = pppol2tp_recv_core;
-	sk->sk_protocol	   = PX_PROTO_OL2TP;
-	sk->sk_family	   = PF_PPPOX;
-	sk->sk_state	   = PPPOX_NONE;
-	sk->sk_type	   = SOCK_STREAM;
-	sk->sk_destruct	   = pppol2tp_session_destruct;
-
-	error = 0;
-
-out:
-	return error;
-}
-
-/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
- */
-static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
-			    int sockaddr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
-	struct pppox_sock *po = pppox_sk(sk);
-	struct sock *tunnel_sock = NULL;
-	struct pppol2tp_session *session = NULL;
-	struct pppol2tp_tunnel *tunnel;
-	struct dst_entry *dst;
-	int error = 0;
-
-	lock_sock(sk);
-
-	error = -EINVAL;
-	if (sp->sa_protocol != PX_PROTO_OL2TP)
-		goto end;
-
-	/* Check for already bound sockets */
-	error = -EBUSY;
-	if (sk->sk_state & PPPOX_CONNECTED)
-		goto end;
-
-	/* We don't supporting rebinding anyway */
-	error = -EALREADY;
-	if (sk->sk_user_data)
-		goto end; /* socket is already attached */
-
-	/* Don't bind if s_tunnel is 0 */
-	error = -EINVAL;
-	if (sp->pppol2tp.s_tunnel == 0)
-		goto end;
-
-	/* Special case: prepare tunnel socket if s_session and
-	 * d_session is 0. Otherwise look up tunnel using supplied
-	 * tunnel id.
-	 */
-	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
-		tunnel_sock = pppol2tp_prepare_tunnel_socket(sock_net(sk),
-							     sp->pppol2tp.fd,
-							     sp->pppol2tp.s_tunnel,
-							     &error);
-		if (tunnel_sock == NULL)
-			goto end;
-
-		sock_hold(tunnel_sock);
-		tunnel = tunnel_sock->sk_user_data;
-	} else {
-		tunnel = pppol2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
-
-		/* Error if we can't find the tunnel */
-		error = -ENOENT;
-		if (tunnel == NULL)
-			goto end;
-
-		tunnel_sock = tunnel->sock;
-	}
-
-	/* Check that this session doesn't already exist */
-	error = -EEXIST;
-	session = pppol2tp_session_find(tunnel, sp->pppol2tp.s_session);
-	if (session != NULL)
-		goto end;
-
-	/* Allocate and initialize a new session context. */
-	session = kzalloc(sizeof(struct pppol2tp_session), GFP_KERNEL);
-	if (session == NULL) {
-		error = -ENOMEM;
-		goto end;
-	}
-
-	skb_queue_head_init(&session->reorder_q);
-
-	session->magic	     = L2TP_SESSION_MAGIC;
-	session->owner	     = current->pid;
-	session->sock	     = sk;
-	session->tunnel	     = tunnel;
-	session->tunnel_sock = tunnel_sock;
-	session->tunnel_addr = sp->pppol2tp;
-	sprintf(&session->name[0], "sess %hu/%hu",
-		session->tunnel_addr.s_tunnel,
-		session->tunnel_addr.s_session);
-
-	session->stats.tunnel_id  = session->tunnel_addr.s_tunnel;
-	session->stats.session_id = session->tunnel_addr.s_session;
-
-	INIT_HLIST_NODE(&session->hlist);
-
-	/* Inherit debug options from tunnel */
-	session->debug = tunnel->debug;
-
-	/* Default MTU must allow space for UDP/L2TP/PPP
-	 * headers.
-	 */
-	session->mtu = session->mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-
-	/* If PMTU discovery was enabled, use the MTU that was discovered */
-	dst = sk_dst_get(sk);
-	if (dst != NULL) {
-		u32 pmtu = dst_mtu(__sk_dst_get(sk));
-		if (pmtu != 0)
-			session->mtu = session->mru = pmtu -
-				PPPOL2TP_HEADER_OVERHEAD;
-		dst_release(dst);
-	}
-
-	/* Special case: if source & dest session_id == 0x0000, this socket is
-	 * being created to manage the tunnel. Don't add the session to the
-	 * session hash list, just set up the internal context for use by
-	 * ioctl() and sockopt() handlers.
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		error = 0;
-		sk->sk_user_data = session;
-		goto out_no_ppp;
-	}
-
-	/* Get tunnel context from the tunnel socket */
-	tunnel = pppol2tp_sock_to_tunnel(tunnel_sock);
-	if (tunnel == NULL) {
-		error = -EBADF;
-		goto end;
-	}
-
-	/* Right now, because we don't have a way to push the incoming skb's
-	 * straight through the UDP layer, the only header we need to worry
-	 * about is the L2TP header. This size is different depending on
-	 * whether sequence numbers are enabled for the data channel.
-	 */
-	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-
-	po->chan.private = sk;
-	po->chan.ops	 = &pppol2tp_chan_ops;
-	po->chan.mtu	 = session->mtu;
-
-	error = ppp_register_net_channel(sock_net(sk), &po->chan);
-	if (error)
-		goto end_put_tun;
-
-	/* This is how we get the session context from the socket. */
-	sk->sk_user_data = session;
-
-	/* Add session to the tunnel's hash list */
-	write_lock_bh(&tunnel->hlist_lock);
-	hlist_add_head(&session->hlist,
-		       pppol2tp_session_id_hash(tunnel,
-						session->tunnel_addr.s_session));
-	write_unlock_bh(&tunnel->hlist_lock);
-
-	atomic_inc(&pppol2tp_session_count);
-
-out_no_ppp:
-	pppol2tp_tunnel_inc_refcount(tunnel);
-	sk->sk_state = PPPOX_CONNECTED;
-	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: created\n", session->name);
-
-end_put_tun:
-	sock_put(tunnel_sock);
-end:
-	release_sock(sk);
-
-	if (error != 0) {
-		if (session)
-			PRINTK(session->debug,
-				PPPOL2TP_MSG_CONTROL, KERN_WARNING,
-				"%s: connect failed: %d\n",
-				session->name, error);
-		else
-			PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_WARNING,
-				"connect failed: %d\n", error);
-	}
-
-	return error;
-}
-
-/* getname() support.
- */
-static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
-			    int *usockaddr_len, int peer)
-{
-	int len = sizeof(struct sockaddr_pppol2tp);
-	struct sockaddr_pppol2tp sp;
-	int error = 0;
-	struct pppol2tp_session *session;
-
-	error = -ENOTCONN;
-	if (sock->sk->sk_state != PPPOX_CONNECTED)
-		goto end;
-
-	session = pppol2tp_sock_to_session(sock->sk);
-	if (session == NULL) {
-		error = -EBADF;
-		goto end;
-	}
-
-	sp.sa_family	= AF_PPPOX;
-	sp.sa_protocol	= PX_PROTO_OL2TP;
-	memcpy(&sp.pppol2tp, &session->tunnel_addr,
-	       sizeof(struct pppol2tp_addr));
-
-	memcpy(uaddr, &sp, len);
-
-	*usockaddr_len = len;
-
-	error = 0;
-	sock_put(sock->sk);
-
-end:
-	return error;
-}
-
-/****************************************************************************
- * ioctl() handlers.
- *
- * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
- * sockets. However, in order to control kernel tunnel features, we allow
- * userspace to create a special "tunnel" PPPoX socket which is used for
- * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
- * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
- * calls.
- ****************************************************************************/
-
-/* Session ioctl helper.
- */
-static int pppol2tp_session_ioctl(struct pppol2tp_session *session,
-				  unsigned int cmd, unsigned long arg)
-{
-	struct ifreq ifr;
-	int err = 0;
-	struct sock *sk = session->sock;
-	int val = (int) arg;
-
-	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
-	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
-	       session->name, cmd, arg);
-
-	sock_hold(sk);
-
-	switch (cmd) {
-	case SIOCGIFMTU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
-			break;
-		ifr.ifr_mtu = session->mtu;
-		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get mtu=%d\n", session->name, session->mtu);
-		err = 0;
-		break;
-
-	case SIOCSIFMTU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
-			break;
-
-		session->mtu = ifr.ifr_mtu;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set mtu=%d\n", session->name, session->mtu);
-		err = 0;
-		break;
-
-	case PPPIOCGMRU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (put_user(session->mru, (int __user *) arg))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get mru=%d\n", session->name, session->mru);
-		err = 0;
-		break;
-
-	case PPPIOCSMRU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (get_user(val,(int __user *) arg))
-			break;
-
-		session->mru = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set mru=%d\n", session->name, session->mru);
-		err = 0;
-		break;
-
-	case PPPIOCGFLAGS:
-		err = -EFAULT;
-		if (put_user(session->flags, (int __user *) arg))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get flags=%d\n", session->name, session->flags);
-		err = 0;
-		break;
-
-	case PPPIOCSFLAGS:
-		err = -EFAULT;
-		if (get_user(val, (int __user *) arg))
-			break;
-		session->flags = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set flags=%d\n", session->name, session->flags);
-		err = 0;
-		break;
-
-	case PPPIOCGL2TPSTATS:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		if (copy_to_user((void __user *) arg, &session->stats,
-				 sizeof(session->stats)))
-			break;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get L2TP stats\n", session->name);
-		err = 0;
-		break;
-
-	default:
-		err = -ENOSYS;
-		break;
-	}
-
-	sock_put(sk);
-
-	return err;
-}
-
-/* Tunnel ioctl helper.
- *
- * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
- * specifies a session_id, the session ioctl handler is called. This allows an
- * application to retrieve session stats via a tunnel socket.
- */
-static int pppol2tp_tunnel_ioctl(struct pppol2tp_tunnel *tunnel,
-				 unsigned int cmd, unsigned long arg)
-{
-	int err = 0;
-	struct sock *sk = tunnel->sock;
-	struct pppol2tp_ioc_stats stats_req;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
-	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n", tunnel->name,
-	       cmd, arg);
-
-	sock_hold(sk);
-
-	switch (cmd) {
-	case PPPIOCGL2TPSTATS:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		if (copy_from_user(&stats_req, (void __user *) arg,
-				   sizeof(stats_req))) {
-			err = -EFAULT;
-			break;
-		}
-		if (stats_req.session_id != 0) {
-			/* resend to session ioctl handler */
-			struct pppol2tp_session *session =
-				pppol2tp_session_find(tunnel, stats_req.session_id);
-			if (session != NULL)
-				err = pppol2tp_session_ioctl(session, cmd, arg);
-			else
-				err = -EBADR;
-			break;
-		}
-#ifdef CONFIG_XFRM
-		tunnel->stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
-#endif
-		if (copy_to_user((void __user *) arg, &tunnel->stats,
-				 sizeof(tunnel->stats))) {
-			err = -EFAULT;
-			break;
-		}
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get L2TP stats\n", tunnel->name);
-		err = 0;
-		break;
-
-	default:
-		err = -ENOSYS;
-		break;
-	}
-
-	sock_put(sk);
-
-	return err;
-}
-
-/* Main ioctl() handler.
- * Dispatch to tunnel or session helpers depending on the socket.
- */
-static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
-			  unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	int err;
-
-	if (!sk)
-		return 0;
-
-	err = -EBADF;
-	if (sock_flag(sk, SOCK_DEAD) != 0)
-		goto end;
-
-	err = -ENOTCONN;
-	if ((sk->sk_user_data == NULL) ||
-	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
-		goto end;
-
-	/* Get session context from the socket */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session's session_id is zero, treat ioctl as a
-	 * tunnel ioctl
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
-		sock_put(session->tunnel_sock);
-		goto end_put_sess;
-	}
-
-	err = pppol2tp_session_ioctl(session, cmd, arg);
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/*****************************************************************************
- * setsockopt() / getsockopt() support.
- *
- * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
- * sockets. In order to control kernel tunnel features, we allow userspace to
- * create a special "tunnel" PPPoX socket which is used for control only.
- * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
- * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
- *****************************************************************************/
-
-/* Tunnel setsockopt() helper.
- */
-static int pppol2tp_tunnel_setsockopt(struct sock *sk,
-				      struct pppol2tp_tunnel *tunnel,
-				      int optname, int val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_DEBUG:
-		tunnel->debug = val;
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Session setsockopt helper.
- */
-static int pppol2tp_session_setsockopt(struct sock *sk,
-				       struct pppol2tp_session *session,
-				       int optname, int val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_RECVSEQ:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->recv_seq = val ? -1 : 0;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set recv_seq=%d\n", session->name,
-		       session->recv_seq);
-		break;
-
-	case PPPOL2TP_SO_SENDSEQ:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->send_seq = val ? -1 : 0;
-		{
-			struct sock *ssk      = session->sock;
-			struct pppox_sock *po = pppox_sk(ssk);
-			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
-				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-		}
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set send_seq=%d\n", session->name, session->send_seq);
-		break;
-
-	case PPPOL2TP_SO_LNSMODE:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->lns_mode = val ? -1 : 0;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set lns_mode=%d\n", session->name,
-		       session->lns_mode);
-		break;
-
-	case PPPOL2TP_SO_DEBUG:
-		session->debug = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set debug=%x\n", session->name, session->debug);
-		break;
-
-	case PPPOL2TP_SO_REORDERTO:
-		session->reorder_timeout = msecs_to_jiffies(val);
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set reorder_timeout=%d\n", session->name,
-		       session->reorder_timeout);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Main setsockopt() entry point.
- * Does API checks, then calls either the tunnel or session setsockopt
- * handler, according to whether the PPPoL2TP socket is a for a regular
- * session or the special tunnel type.
- */
-static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
-			       char __user *optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session = sk->sk_user_data;
-	struct pppol2tp_tunnel *tunnel;
-	int val;
-	int err;
-
-	if (level != SOL_PPPOL2TP)
-		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
-
-	if (optlen < sizeof(int))
-		return -EINVAL;
-
-	if (get_user(val, (int __user *)optval))
-		return -EFAULT;
-
-	err = -ENOTCONN;
-	if (sk->sk_user_data == NULL)
-		goto end;
-
-	/* Get session context from the socket */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session_id == 0x0000, treat as operation on tunnel
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
-		sock_put(session->tunnel_sock);
-	} else
-		err = pppol2tp_session_setsockopt(sk, session, optname, val);
-
-	err = 0;
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/* Tunnel getsockopt helper. Called with sock locked.
- */
-static int pppol2tp_tunnel_getsockopt(struct sock *sk,
-				      struct pppol2tp_tunnel *tunnel,
-				      int optname, int *val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_DEBUG:
-		*val = tunnel->debug;
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Session getsockopt helper. Called with sock locked.
- */
-static int pppol2tp_session_getsockopt(struct sock *sk,
-				       struct pppol2tp_session *session,
-				       int optname, int *val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_RECVSEQ:
-		*val = session->recv_seq;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get recv_seq=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_SENDSEQ:
-		*val = session->send_seq;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get send_seq=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_LNSMODE:
-		*val = session->lns_mode;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get lns_mode=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_DEBUG:
-		*val = session->debug;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get debug=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_REORDERTO:
-		*val = (int) jiffies_to_msecs(session->reorder_timeout);
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get reorder_timeout=%d\n", session->name, *val);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-	}
-
-	return err;
-}
-
-/* Main getsockopt() entry point.
- * Does API checks, then calls either the tunnel or session getsockopt
- * handler, according to whether the PPPoX socket is a for a regular session
- * or the special tunnel type.
- */
-static int pppol2tp_getsockopt(struct socket *sock, int level,
-			       int optname, char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session = sk->sk_user_data;
-	struct pppol2tp_tunnel *tunnel;
-	int val, len;
-	int err;
-
-	if (level != SOL_PPPOL2TP)
-		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
-
-	if (get_user(len, (int __user *) optlen))
-		return -EFAULT;
-
-	len = min_t(unsigned int, len, sizeof(int));
-
-	if (len < 0)
-		return -EINVAL;
-
-	err = -ENOTCONN;
-	if (sk->sk_user_data == NULL)
-		goto end;
-
-	/* Get the session context */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
-		sock_put(session->tunnel_sock);
-	} else
-		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
-
-	err = -EFAULT;
-	if (put_user(len, (int __user *) optlen))
-		goto end_put_sess;
-
-	if (copy_to_user((void __user *) optval, &val, len))
-		goto end_put_sess;
-
-	err = 0;
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/*****************************************************************************
- * /proc filesystem for debug
- *****************************************************************************/
-
-#ifdef CONFIG_PROC_FS
-
-#include <linux/seq_file.h>
-
-struct pppol2tp_seq_data {
-	struct seq_net_private p;
-	struct pppol2tp_tunnel *tunnel;		/* current tunnel */
-	struct pppol2tp_session *session;	/* NULL means get first session in tunnel */
-};
-
-static struct pppol2tp_session *next_session(struct pppol2tp_tunnel *tunnel, struct pppol2tp_session *curr)
-{
-	struct pppol2tp_session *session = NULL;
-	struct hlist_node *walk;
-	int found = 0;
-	int next = 0;
-	int i;
-
-	read_lock_bh(&tunnel->hlist_lock);
-	for (i = 0; i < PPPOL2TP_HASH_SIZE; i++) {
-		hlist_for_each_entry(session, walk, &tunnel->session_hlist[i], hlist) {
-			if (curr == NULL) {
-				found = 1;
-				goto out;
-			}
-			if (session == curr) {
-				next = 1;
-				continue;
-			}
-			if (next) {
-				found = 1;
-				goto out;
-			}
-		}
-	}
-out:
-	read_unlock_bh(&tunnel->hlist_lock);
-	if (!found)
-		session = NULL;
-
-	return session;
-}
-
-static struct pppol2tp_tunnel *next_tunnel(struct pppol2tp_net *pn,
-					   struct pppol2tp_tunnel *curr)
-{
-	struct pppol2tp_tunnel *tunnel = NULL;
-
-	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	if (list_is_last(&curr->list, &pn->pppol2tp_tunnel_list)) {
-		goto out;
-	}
-	tunnel = list_entry(curr->list.next, struct pppol2tp_tunnel, list);
-out:
-	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	return tunnel;
-}
-
-static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
-{
-	struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
-	struct pppol2tp_net *pn;
-	loff_t pos = *offs;
-
-	if (!pos)
-		goto out;
-
-	BUG_ON(m->private == NULL);
-	pd = m->private;
-	pn = pppol2tp_pernet(seq_file_net(m));
-
-	if (pd->tunnel == NULL) {
-		if (!list_empty(&pn->pppol2tp_tunnel_list))
-			pd->tunnel = list_entry(pn->pppol2tp_tunnel_list.next, struct pppol2tp_tunnel, list);
-	} else {
-		pd->session = next_session(pd->tunnel, pd->session);
-		if (pd->session == NULL) {
-			pd->tunnel = next_tunnel(pn, pd->tunnel);
-		}
-	}
-
-	/* NULL tunnel and session indicates end of list */
-	if ((pd->tunnel == NULL) && (pd->session == NULL))
-		pd = NULL;
-
-out:
-	return pd;
-}
-
-static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return NULL;
-}
-
-static void pppol2tp_seq_stop(struct seq_file *p, void *v)
-{
-	/* nothing to do */
-}
-
-static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_tunnel *tunnel = v;
-
-	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
-		   tunnel->name,
-		   (tunnel == tunnel->sock->sk_user_data) ? 'Y':'N',
-		   atomic_read(&tunnel->ref_count) - 1);
-	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
-		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
-}
-
-static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_session *session = v;
-
-	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
-		   "%04X/%04X %d %c\n",
-		   session->name,
-		   ntohl(session->tunnel_addr.addr.sin_addr.s_addr),
-		   ntohs(session->tunnel_addr.addr.sin_port),
-		   session->tunnel_addr.s_tunnel,
-		   session->tunnel_addr.s_session,
-		   session->tunnel_addr.d_tunnel,
-		   session->tunnel_addr.d_session,
-		   session->sock->sk_state,
-		   (session == session->sock->sk_user_data) ?
-		   'Y' : 'N');
-	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
-		   session->mtu, session->mru,
-		   session->recv_seq ? 'R' : '-',
-		   session->send_seq ? 'S' : '-',
-		   session->lns_mode ? "LNS" : "LAC",
-		   session->debug,
-		   jiffies_to_msecs(session->reorder_timeout));
-	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
-		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
-}
-
-static int pppol2tp_seq_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_seq_data *pd = v;
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
-		seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
-		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
-		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
-			 "dest-tid/sid state user-data-ok\n");
-		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
-		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
-		goto out;
-	}
-
-	/* Show the tunnel or session context.
-	 */
-	if (pd->session == NULL)
-		pppol2tp_seq_tunnel_show(m, pd->tunnel);
-	else
-		pppol2tp_seq_session_show(m, pd->session);
-
-out:
-	return 0;
-}
-
-static const struct seq_operations pppol2tp_seq_ops = {
-	.start		= pppol2tp_seq_start,
-	.next		= pppol2tp_seq_next,
-	.stop		= pppol2tp_seq_stop,
-	.show		= pppol2tp_seq_show,
-};
-
-/* Called when our /proc file is opened. We allocate data for use when
- * iterating our tunnel / session contexts and store it in the private
- * data of the seq_file.
- */
-static int pppol2tp_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &pppol2tp_seq_ops,
-			    sizeof(struct pppol2tp_seq_data));
-}
-
-static const struct file_operations pppol2tp_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pppol2tp_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
-#endif /* CONFIG_PROC_FS */
-
-/*****************************************************************************
- * Init and cleanup
- *****************************************************************************/
-
-static const struct proto_ops pppol2tp_ops = {
-	.family		= AF_PPPOX,
-	.owner		= THIS_MODULE,
-	.release	= pppol2tp_release,
-	.bind		= sock_no_bind,
-	.connect	= pppol2tp_connect,
-	.socketpair	= sock_no_socketpair,
-	.accept		= sock_no_accept,
-	.getname	= pppol2tp_getname,
-	.poll		= datagram_poll,
-	.listen		= sock_no_listen,
-	.shutdown	= sock_no_shutdown,
-	.setsockopt	= pppol2tp_setsockopt,
-	.getsockopt	= pppol2tp_getsockopt,
-	.sendmsg	= pppol2tp_sendmsg,
-	.recvmsg	= pppol2tp_recvmsg,
-	.mmap		= sock_no_mmap,
-	.ioctl		= pppox_ioctl,
-};
-
-static struct pppox_proto pppol2tp_proto = {
-	.create		= pppol2tp_create,
-	.ioctl		= pppol2tp_ioctl
-};
-
-static __net_init int pppol2tp_init_net(struct net *net)
-{
-	struct pppol2tp_net *pn = pppol2tp_pernet(net);
-	struct proc_dir_entry *pde;
-
-	INIT_LIST_HEAD(&pn->pppol2tp_tunnel_list);
-	rwlock_init(&pn->pppol2tp_tunnel_list_lock);
-
-	pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
-#ifdef CONFIG_PROC_FS
-	if (!pde)
-		return -ENOMEM;
-#endif
-
-	return 0;
-}
-
-static __net_exit void pppol2tp_exit_net(struct net *net)
-{
-	proc_net_remove(net, "pppol2tp");
-}
-
-static struct pernet_operations pppol2tp_net_ops = {
-	.init = pppol2tp_init_net,
-	.exit = pppol2tp_exit_net,
-	.id   = &pppol2tp_net_id,
-	.size = sizeof(struct pppol2tp_net),
-};
-
-static int __init pppol2tp_init(void)
-{
-	int err;
-
-	err = proto_register(&pppol2tp_sk_proto, 0);
-	if (err)
-		goto out;
-	err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
-	if (err)
-		goto out_unregister_pppol2tp_proto;
-
-	err = register_pernet_device(&pppol2tp_net_ops);
-	if (err)
-		goto out_unregister_pppox_proto;
-
-	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
-	       PPPOL2TP_DRV_VERSION);
-
-out:
-	return err;
-out_unregister_pppox_proto:
-	unregister_pppox_proto(PX_PROTO_OL2TP);
-out_unregister_pppol2tp_proto:
-	proto_unregister(&pppol2tp_sk_proto);
-	goto out;
-}
-
-static void __exit pppol2tp_exit(void)
-{
-	unregister_pppox_proto(PX_PROTO_OL2TP);
-	unregister_pernet_device(&pppol2tp_net_ops);
-	proto_unregister(&pppol2tp_sk_proto);
-}
-
-module_init(pppol2tp_init);
-module_exit(pppol2tp_exit);
-
-MODULE_AUTHOR("Martijn van Oosterhout <kleptog@svana.org>, "
-	      "James Chapman <jchapman@katalix.com>");
-MODULE_DESCRIPTION("PPP over L2TP over UDP");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(PPPOL2TP_DRV_VERSION);
diff --git a/net/Makefile b/net/Makefile
index a5eae27aa42d..13ca77e0eb08 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
+obj-$(CONFIG_PPPOL2TP)		+= l2tp/
 obj-$(CONFIG_DECNET)		+= decnet/
 obj-$(CONFIG_ECONET)		+= econet/
 obj-$(CONFIG_PHONET)		+= phonet/
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
new file mode 100644
index 000000000000..9af41e898a04
--- /dev/null
+++ b/net/l2tp/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the L2TP.
+#
+
+obj-$(CONFIG_PPPOL2TP) += pppol2tp.o
diff --git a/net/l2tp/pppol2tp.c b/net/l2tp/pppol2tp.c
new file mode 100644
index 000000000000..449a9825200d
--- /dev/null
+++ b/net/l2tp/pppol2tp.c
@@ -0,0 +1,2680 @@
+/*****************************************************************************
+ * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
+ *
+ * PPPoX    --- Generic PPP encapsulation socket family
+ * PPPoL2TP --- PPP over L2TP (RFC 2661)
+ *
+ * Version:	1.0.0
+ *
+ * Authors:	Martijn van Oosterhout <kleptog@svana.org>
+ *		James Chapman (jchapman@katalix.com)
+ * Contributors:
+ *		Michal Ostrowski <mostrows@speakeasy.net>
+ *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
+ *		David S. Miller (davem@redhat.com)
+ *
+ * License:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* This driver handles only L2TP data frames; control frames are handled by a
+ * userspace application.
+ *
+ * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
+ * attaches it to a bound UDP socket with local tunnel_id / session_id and
+ * peer tunnel_id / session_id set. Data can then be sent or received using
+ * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
+ * can be read or modified using ioctl() or [gs]etsockopt() calls.
+ *
+ * When a PPPoL2TP socket is connected with local and peer session_id values
+ * zero, the socket is treated as a special tunnel management socket.
+ *
+ * Here's example userspace code to create a socket for sending/receiving data
+ * over an L2TP session:-
+ *
+ *	struct sockaddr_pppol2tp sax;
+ *	int fd;
+ *	int session_fd;
+ *
+ *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+ *
+ *	sax.sa_family = AF_PPPOX;
+ *	sax.sa_protocol = PX_PROTO_OL2TP;
+ *	sax.pppol2tp.fd = tunnel_fd;	// bound UDP socket
+ *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+ *	sax.pppol2tp.addr.sin_port = addr->sin_port;
+ *	sax.pppol2tp.addr.sin_family = AF_INET;
+ *	sax.pppol2tp.s_tunnel  = tunnel_id;
+ *	sax.pppol2tp.s_session = session_id;
+ *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
+ *	sax.pppol2tp.d_session = peer_session_id;
+ *
+ *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
+ *
+ * A pppd plugin that allows PPP traffic to be carried over L2TP using
+ * this driver is available from the OpenL2TP project at
+ * http://openl2tp.sourceforge.net.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <asm/uaccess.h>
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/if_pppox.h>
+#include <linux/if_pppol2tp.h>
+#include <net/sock.h>
+#include <linux/ppp_channel.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/file.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+
+#define PPPOL2TP_DRV_VERSION	"V1.0"
+
+/* L2TP header constants */
+#define L2TP_HDRFLAG_T	   0x8000
+#define L2TP_HDRFLAG_L	   0x4000
+#define L2TP_HDRFLAG_S	   0x0800
+#define L2TP_HDRFLAG_O	   0x0200
+#define L2TP_HDRFLAG_P	   0x0100
+
+#define L2TP_HDR_VER_MASK  0x000F
+#define L2TP_HDR_VER	   0x0002
+
+/* Space for UDP, L2TP and PPP headers */
+#define PPPOL2TP_HEADER_OVERHEAD	40
+
+/* Just some random numbers */
+#define L2TP_TUNNEL_MAGIC	0x42114DDA
+#define L2TP_SESSION_MAGIC	0x0C04EB7D
+
+#define PPPOL2TP_HASH_BITS	4
+#define PPPOL2TP_HASH_SIZE	(1 << PPPOL2TP_HASH_BITS)
+
+/* Default trace flags */
+#define PPPOL2TP_DEFAULT_DEBUG_FLAGS	0
+
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
+	do {								\
+		if ((_mask) & (_type))					\
+			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
+	} while(0)
+
+/* Number of bytes to build transmit L2TP headers.
+ * Unfortunately the size is different depending on whether sequence numbers
+ * are enabled.
+ */
+#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
+#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
+
+struct pppol2tp_tunnel;
+
+/* Describes a session. It is the sk_user_data field in the PPPoL2TP
+ * socket. Contains information to determine incoming packets and transmit
+ * outgoing ones.
+ */
+struct pppol2tp_session
+{
+	int			magic;		/* should be
+						 * L2TP_SESSION_MAGIC */
+	int			owner;		/* pid that opened the socket */
+
+	struct sock		*sock;		/* Pointer to the session
+						 * PPPoX socket */
+	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
+						 * socket */
+
+	struct pppol2tp_addr	tunnel_addr;	/* Description of tunnel */
+
+	struct pppol2tp_tunnel	*tunnel;	/* back pointer to tunnel
+						 * context */
+
+	char			name[20];	/* "sess xxxxx/yyyyy", where
+						 * x=tunnel_id, y=session_id */
+	int			mtu;
+	int			mru;
+	int			flags;		/* accessed by PPPIOCGFLAGS.
+						 * Unused. */
+	unsigned		recv_seq:1;	/* expect receive packets with
+						 * sequence numbers? */
+	unsigned		send_seq:1;	/* send packets with sequence
+						 * numbers? */
+	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
+						 * sequence numbers under
+						 * control of LNS. */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			reorder_timeout; /* configured reorder timeout
+						  * (in jiffies) */
+	u16			nr;		/* session NR state (receive) */
+	u16			ns;		/* session NR state (send) */
+	struct sk_buff_head	reorder_q;	/* receive reorder queue */
+	struct pppol2tp_ioc_stats stats;
+	struct hlist_node	hlist;		/* Hash list node */
+};
+
+/* The sk_user_data field of the tunnel's UDP socket. It contains info to track
+ * all the associated sessions so incoming packets can be sorted out
+ */
+struct pppol2tp_tunnel
+{
+	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
+	rwlock_t		hlist_lock;	/* protect session_hlist */
+	struct hlist_head	session_hlist[PPPOL2TP_HASH_SIZE];
+						/* hashed list of sessions,
+						 * hashed by id */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	char			name[12];	/* "tunl xxxxx" */
+	struct pppol2tp_ioc_stats stats;
+
+	void (*old_sk_destruct)(struct sock *);
+
+	struct sock		*sock;		/* Parent socket */
+	struct list_head	list;		/* Keep a list of all open
+						 * prepared sockets */
+	struct net		*pppol2tp_net;	/* the net we belong to */
+
+	atomic_t		ref_count;
+};
+
+/* Private data stored for received packets in the skb.
+ */
+struct pppol2tp_skb_cb {
+	u16			ns;
+	u16			nr;
+	u16			has_seq;
+	u16			length;
+	unsigned long		expires;
+};
+
+#define PPPOL2TP_SKB_CB(skb)	((struct pppol2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
+
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
+static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel);
+
+static atomic_t pppol2tp_tunnel_count;
+static atomic_t pppol2tp_session_count;
+static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
+static const struct proto_ops pppol2tp_ops;
+
+/* per-net private data for this module */
+static int pppol2tp_net_id __read_mostly;
+struct pppol2tp_net {
+	struct list_head pppol2tp_tunnel_list;
+	rwlock_t pppol2tp_tunnel_list_lock;
+};
+
+static inline struct pppol2tp_net *pppol2tp_pernet(struct net *net)
+{
+	BUG_ON(!net);
+
+	return net_generic(net, pppol2tp_net_id);
+}
+
+/* Helpers to obtain tunnel/session contexts from sockets.
+ */
+static inline struct pppol2tp_session *pppol2tp_sock_to_session(struct sock *sk)
+{
+	struct pppol2tp_session *session;
+
+	if (sk == NULL)
+		return NULL;
+
+	sock_hold(sk);
+	session = (struct pppol2tp_session *)(sk->sk_user_data);
+	if (session == NULL) {
+		sock_put(sk);
+		goto out;
+	}
+
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+out:
+	return session;
+}
+
+static inline struct pppol2tp_tunnel *pppol2tp_sock_to_tunnel(struct sock *sk)
+{
+	struct pppol2tp_tunnel *tunnel;
+
+	if (sk == NULL)
+		return NULL;
+
+	sock_hold(sk);
+	tunnel = (struct pppol2tp_tunnel *)(sk->sk_user_data);
+	if (tunnel == NULL) {
+		sock_put(sk);
+		goto out;
+	}
+
+	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+out:
+	return tunnel;
+}
+
+/* Tunnel reference counts. Incremented per session that is added to
+ * the tunnel.
+ */
+static inline void pppol2tp_tunnel_inc_refcount(struct pppol2tp_tunnel *tunnel)
+{
+	atomic_inc(&tunnel->ref_count);
+}
+
+static inline void pppol2tp_tunnel_dec_refcount(struct pppol2tp_tunnel *tunnel)
+{
+	if (atomic_dec_and_test(&tunnel->ref_count))
+		pppol2tp_tunnel_free(tunnel);
+}
+
+/* Session hash list.
+ * The session_id SHOULD be random according to RFC2661, but several
+ * L2TP implementations (Cisco and Microsoft) use incrementing
+ * session_ids.  So we do a real hash on the session_id, rather than a
+ * simple bitmask.
+ */
+static inline struct hlist_head *
+pppol2tp_session_id_hash(struct pppol2tp_tunnel *tunnel, u16 session_id)
+{
+	unsigned long hash_val = (unsigned long) session_id;
+	return &tunnel->session_hlist[hash_long(hash_val, PPPOL2TP_HASH_BITS)];
+}
+
+/* Lookup a session by id
+ */
+static struct pppol2tp_session *
+pppol2tp_session_find(struct pppol2tp_tunnel *tunnel, u16 session_id)
+{
+	struct hlist_head *session_list =
+		pppol2tp_session_id_hash(tunnel, session_id);
+	struct pppol2tp_session *session;
+	struct hlist_node *walk;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	hlist_for_each_entry(session, walk, session_list, hlist) {
+		if (session->tunnel_addr.s_session == session_id) {
+			read_unlock_bh(&tunnel->hlist_lock);
+			return session;
+		}
+	}
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	return NULL;
+}
+
+/* Lookup a tunnel by id
+ */
+static struct pppol2tp_tunnel *pppol2tp_tunnel_find(struct net *net, u16 tunnel_id)
+{
+	struct pppol2tp_tunnel *tunnel;
+	struct pppol2tp_net *pn = pppol2tp_pernet(net);
+
+	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
+	list_for_each_entry(tunnel, &pn->pppol2tp_tunnel_list, list) {
+		if (tunnel->stats.tunnel_id == tunnel_id) {
+			read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
+			return tunnel;
+		}
+	}
+	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
+
+	return NULL;
+}
+
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+
+/* Queue a skb in order. We come here only if the skb has an L2TP sequence
+ * number.
+ */
+static void pppol2tp_recv_queue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
+{
+	struct sk_buff *skbp;
+	struct sk_buff *tmp;
+	u16 ns = PPPOL2TP_SKB_CB(skb)->ns;
+
+	spin_lock_bh(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
+		if (PPPOL2TP_SKB_CB(skbp)->ns > ns) {
+			__skb_queue_before(&session->reorder_q, skbp, skb);
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
+			       session->name, ns, PPPOL2TP_SKB_CB(skbp)->ns,
+			       skb_queue_len(&session->reorder_q));
+			session->stats.rx_oos_packets++;
+			goto out;
+		}
+	}
+
+	__skb_queue_tail(&session->reorder_q, skb);
+
+out:
+	spin_unlock_bh(&session->reorder_q.lock);
+}
+
+/* Dequeue a single skb.
+ */
+static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
+{
+	struct pppol2tp_tunnel *tunnel = session->tunnel;
+	int length = PPPOL2TP_SKB_CB(skb)->length;
+	struct sock *session_sock = NULL;
+
+	/* We're about to requeue the skb, so return resources
+	 * to its current owner (a socket receive buffer).
+	 */
+	skb_orphan(skb);
+
+	tunnel->stats.rx_packets++;
+	tunnel->stats.rx_bytes += length;
+	session->stats.rx_packets++;
+	session->stats.rx_bytes += length;
+
+	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
+		/* Bump our Nr */
+		session->nr++;
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated nr to %hu\n", session->name, session->nr);
+	}
+
+	/* If the socket is bound, send it in to PPP's input queue. Otherwise
+	 * queue it on the session socket.
+	 */
+	session_sock = session->sock;
+	if (session_sock->sk_state & PPPOX_BOUND) {
+		struct pppox_sock *po;
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv %d byte data frame, passing to ppp\n",
+		       session->name, length);
+
+		/* We need to forget all info related to the L2TP packet
+		 * gathered in the skb as we are going to reuse the same
+		 * skb for the inner packet.
+		 * Namely we need to:
+		 * - reset xfrm (IPSec) information as it applies to
+		 *   the outer L2TP packet and not to the inner one
+		 * - release the dst to force a route lookup on the inner
+		 *   IP packet since skb->dst currently points to the dst
+		 *   of the UDP tunnel
+		 * - reset netfilter information as it doesn't apply
+		 *   to the inner packet either
+		 */
+		secpath_reset(skb);
+		skb_dst_drop(skb);
+		nf_reset(skb);
+
+		po = pppox_sk(session_sock);
+		ppp_input(&po->chan, skb);
+	} else {
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: socket not bound\n", session->name);
+
+		/* Not bound. Nothing we can do, so discard. */
+		session->stats.rx_errors++;
+		kfree_skb(skb);
+	}
+
+	sock_put(session->sock);
+}
+
+/* Dequeue skbs from the session's reorder_q, subject to packet order.
+ * Skbs that have been in the queue for too long are simply discarded.
+ */
+static void pppol2tp_recv_dequeue(struct pppol2tp_session *session)
+{
+	struct sk_buff *skb;
+	struct sk_buff *tmp;
+
+	/* If the pkt at the head of the queue has the nr that we
+	 * expect to send up next, dequeue it and any other
+	 * in-sequence packets behind it.
+	 */
+	spin_lock_bh(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
+		if (time_after(jiffies, PPPOL2TP_SKB_CB(skb)->expires)) {
+			session->stats.rx_seq_discards++;
+			session->stats.rx_errors++;
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: oos pkt %hu len %d discarded (too old), "
+			       "waiting for %hu, reorder_q_len=%d\n",
+			       session->name, PPPOL2TP_SKB_CB(skb)->ns,
+			       PPPOL2TP_SKB_CB(skb)->length, session->nr,
+			       skb_queue_len(&session->reorder_q));
+			__skb_unlink(skb, &session->reorder_q);
+			kfree_skb(skb);
+			sock_put(session->sock);
+			continue;
+		}
+
+		if (PPPOL2TP_SKB_CB(skb)->has_seq) {
+			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
+				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: holding oos pkt %hu len %d, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
+				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto out;
+			}
+		}
+		__skb_unlink(skb, &session->reorder_q);
+
+		/* Process the skb. We release the queue lock while we
+		 * do so to let other contexts process the queue.
+		 */
+		spin_unlock_bh(&session->reorder_q.lock);
+		pppol2tp_recv_dequeue_skb(session, skb);
+		spin_lock_bh(&session->reorder_q.lock);
+	}
+
+out:
+	spin_unlock_bh(&session->reorder_q.lock);
+}
+
+static inline int pppol2tp_verify_udp_checksum(struct sock *sk,
+					       struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	u16 ulen = ntohs(uh->len);
+	struct inet_sock *inet;
+	__wsum psum;
+
+	if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
+		return 0;
+
+	inet = inet_sk(sk);
+	psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
+				  IPPROTO_UDP, 0);
+
+	if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
+	    !csum_fold(csum_add(psum, skb->csum)))
+		return 0;
+
+	skb->csum = psum;
+
+	return __skb_checksum_complete(skb);
+}
+
+/* Internal receive frame. Do the real work of receiving an L2TP data frame
+ * here. The skb is not on a list when we get here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded.  All such packets are passed up to userspace to deal with.
+ */
+static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb)
+{
+	struct pppol2tp_session *session = NULL;
+	struct pppol2tp_tunnel *tunnel;
+	unsigned char *ptr, *optr;
+	u16 hdrflags;
+	u16 tunnel_id, session_id;
+	int length;
+	int offset;
+
+	tunnel = pppol2tp_sock_to_tunnel(sock);
+	if (tunnel == NULL)
+		goto no_tunnel;
+
+	if (tunnel->sock && pppol2tp_verify_udp_checksum(tunnel->sock, skb))
+		goto discard_bad_csum;
+
+	/* UDP always verifies the packet length. */
+	__skb_pull(skb, sizeof(struct udphdr));
+
+	/* Short packet? */
+	if (!pskb_may_pull(skb, 12)) {
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+		goto error;
+	}
+
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
+	/* Get L2TP header flags */
+	hdrflags = ntohs(*(__be16*)ptr);
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & PPPOL2TP_MSG_DATA) {
+		length = min(16u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto error;
+
+		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	/* Get length of L2TP packet */
+	length = skb->len;
+
+	/* If type is control packet, it is handled by userspace. */
+	if (hdrflags & L2TP_HDRFLAG_T) {
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv control packet, len=%d\n", tunnel->name, length);
+		goto error;
+	}
+
+	/* Skip flags */
+	ptr += 2;
+
+	/* If length is present, skip it */
+	if (hdrflags & L2TP_HDRFLAG_L)
+		ptr += 2;
+
+	/* Extract tunnel and session ID */
+	tunnel_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+	session_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+
+	/* Find the session context */
+	session = pppol2tp_session_find(tunnel, session_id);
+	if (!session) {
+		/* Not found? Pass to userspace to deal with */
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: no socket found (%hu/%hu). Passing up.\n",
+		       tunnel->name, tunnel_id, session_id);
+		goto error;
+	}
+	sock_hold(session->sock);
+
+	/* The ref count on the socket was increased by the above call since
+	 * we now hold a pointer to the session. Take care to do sock_put()
+	 * when exiting this function from now on...
+	 */
+
+	/* Handle the optional sequence numbers.  If we are the LAC,
+	 * enable/disable sequence numbers under the control of the LNS.  If
+	 * no sequence numbers present but we were expecting them, discard
+	 * frame.
+	 */
+	if (hdrflags & L2TP_HDRFLAG_S) {
+		u16 ns, nr;
+		ns = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+		nr = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+
+		/* Received a packet with sequence numbers. If we're the LNS,
+		 * check if we sre sending sequence numbers and if not,
+		 * configure it so.
+		 */
+		if ((!session->lns_mode) && (!session->send_seq)) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to enable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = -1;
+		}
+
+		/* Store L2TP info in the skb */
+		PPPOL2TP_SKB_CB(skb)->ns = ns;
+		PPPOL2TP_SKB_CB(skb)->nr = nr;
+		PPPOL2TP_SKB_CB(skb)->has_seq = 1;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
+		       session->name, ns, nr, session->nr);
+	} else {
+		/* No sequence numbers.
+		 * If user has configured mandatory sequence numbers, discard.
+		 */
+		if (session->recv_seq) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			goto discard;
+		}
+
+		/* If we're the LAC and we're sending sequence numbers, the
+		 * LNS has requested that we no longer send sequence numbers.
+		 * If we're the LNS and we're sending sequence numbers, the
+		 * LAC is broken. Discard the frame.
+		 */
+		if ((!session->lns_mode) && (session->send_seq)) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to disable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = 0;
+		} else if (session->send_seq) {
+			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			goto discard;
+		}
+
+		/* Store L2TP info in the skb */
+		PPPOL2TP_SKB_CB(skb)->has_seq = 0;
+	}
+
+	/* If offset bit set, skip it. */
+	if (hdrflags & L2TP_HDRFLAG_O) {
+		offset = ntohs(*(__be16 *)ptr);
+		ptr += 2 + offset;
+	}
+
+	offset = ptr - optr;
+	if (!pskb_may_pull(skb, offset))
+		goto discard;
+
+	__skb_pull(skb, offset);
+
+	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
+	 * don't send the PPP header (PPP header compression enabled), but
+	 * other clients can include the header. So we cope with both cases
+	 * here. The PPP header is always FF03 when using L2TP.
+	 *
+	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
+	 * the field may be unaligned.
+	 */
+	if (!pskb_may_pull(skb, 2))
+		goto discard;
+
+	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+		skb_pull(skb, 2);
+
+	/* Prepare skb for adding to the session's reorder_q.  Hold
+	 * packets for max reorder_timeout or 1 second if not
+	 * reordering.
+	 */
+	PPPOL2TP_SKB_CB(skb)->length = length;
+	PPPOL2TP_SKB_CB(skb)->expires = jiffies +
+		(session->reorder_timeout ? session->reorder_timeout : HZ);
+
+	/* Add packet to the session's receive queue. Reordering is done here, if
+	 * enabled. Saved L2TP protocol info is stored in skb->sb[].
+	 */
+	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
+		if (session->reorder_timeout != 0) {
+			/* Packet reordering enabled. Add skb to session's
+			 * reorder queue, in order of ns.
+			 */
+			pppol2tp_recv_queue_skb(session, skb);
+		} else {
+			/* Packet reordering disabled. Discard out-of-sequence
+			 * packets
+			 */
+			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
+				session->stats.rx_seq_discards++;
+				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: oos pkt %hu len %d discarded, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
+				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto discard;
+			}
+			skb_queue_tail(&session->reorder_q, skb);
+		}
+	} else {
+		/* No sequence numbers. Add the skb to the tail of the
+		 * reorder queue. This ensures that it will be
+		 * delivered after all previous sequenced skbs.
+		 */
+		skb_queue_tail(&session->reorder_q, skb);
+	}
+
+	/* Try to dequeue as many skbs from reorder_q as we can. */
+	pppol2tp_recv_dequeue(session);
+	sock_put(sock);
+
+	return 0;
+
+discard:
+	session->stats.rx_errors++;
+	kfree_skb(skb);
+	sock_put(session->sock);
+	sock_put(sock);
+
+	return 0;
+
+discard_bad_csum:
+	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
+	UDP_INC_STATS_USER(&init_net, UDP_MIB_INERRORS, 0);
+	tunnel->stats.rx_errors++;
+	kfree_skb(skb);
+	sock_put(sock);
+
+	return 0;
+
+error:
+	/* Put UDP header back */
+	__skb_push(skb, sizeof(struct udphdr));
+	sock_put(sock);
+
+no_tunnel:
+	return 1;
+}
+
+/* UDP encapsulation receive handler. See net/ipv4/udp.c.
+ * Return codes:
+ * 0 : success.
+ * <0: error
+ * >0: skb should be passed up to userspace as UDP.
+ */
+static int pppol2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct pppol2tp_tunnel *tunnel;
+
+	tunnel = pppol2tp_sock_to_tunnel(sk);
+	if (tunnel == NULL)
+		goto pass_up;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+	       "%s: received %d bytes\n", tunnel->name, skb->len);
+
+	if (pppol2tp_recv_core(sk, skb))
+		goto pass_up_put;
+
+	sock_put(sk);
+	return 0;
+
+pass_up_put:
+	sock_put(sk);
+pass_up:
+	return 1;
+}
+
+/* Receive message. This is the recvmsg for the PPPoL2TP socket.
+ */
+static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
+			    struct msghdr *msg, size_t len,
+			    int flags)
+{
+	int err;
+	struct sk_buff *skb;
+	struct sock *sk = sock->sk;
+
+	err = -EIO;
+	if (sk->sk_state & PPPOX_BOUND)
+		goto end;
+
+	msg->msg_namelen = 0;
+
+	err = 0;
+	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+				flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		goto end;
+
+	if (len > skb->len)
+		len = skb->len;
+	else if (len < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+	if (likely(err == 0))
+		err = len;
+
+	kfree_skb(skb);
+end:
+	return err;
+}
+
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+
+/* Tell how big L2TP headers are for a particular session. This
+ * depends on whether sequence numbers are being used.
+ */
+static inline int pppol2tp_l2tp_header_len(struct pppol2tp_session *session)
+{
+	if (session->send_seq)
+		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
+
+	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+}
+
+/* Build an L2TP header for the session into the buffer provided.
+ */
+static void pppol2tp_build_l2tp_header(struct pppol2tp_session *session,
+				       void *buf)
+{
+	__be16 *bufp = buf;
+	u16 flags = L2TP_HDR_VER;
+
+	if (session->send_seq)
+		flags |= L2TP_HDRFLAG_S;
+
+	/* Setup L2TP header.
+	 * FIXME: Can this ever be unaligned? Is direct dereferencing of
+	 * 16-bit header fields safe here for all architectures?
+	 */
+	*bufp++ = htons(flags);
+	*bufp++ = htons(session->tunnel_addr.d_tunnel);
+	*bufp++ = htons(session->tunnel_addr.d_session);
+	if (session->send_seq) {
+		*bufp++ = htons(session->ns);
+		*bufp++ = 0;
+		session->ns++;
+		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated ns to %hu\n", session->name, session->ns);
+	}
+}
+
+/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
+ * when a user application does a sendmsg() on the session socket. L2TP and
+ * PPP headers must be inserted into the user's data.
+ */
+static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+			    size_t total_len)
+{
+	static const unsigned char ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = sock->sk;
+	struct inet_sock *inet;
+	__wsum csum;
+	struct sk_buff *skb;
+	int error;
+	int hdr_len;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	struct udphdr *uh;
+	unsigned int len;
+	struct sock *sk_tun;
+	u16 udp_len;
+
+	error = -ENOTCONN;
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto error;
+
+	/* Get session and tunnel contexts */
+	error = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto error;
+
+	sk_tun = session->tunnel_sock;
+	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
+	if (tunnel == NULL)
+		goto error_put_sess;
+
+	/* What header length is configured for this session? */
+	hdr_len = pppol2tp_l2tp_header_len(session);
+
+	/* Allocate a socket buffer */
+	error = -ENOMEM;
+	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
+			   sizeof(struct udphdr) + hdr_len +
+			   sizeof(ppph) + total_len,
+			   0, GFP_KERNEL);
+	if (!skb)
+		goto error_put_sess_tun;
+
+	/* Reserve space for headers. */
+	skb_reserve(skb, NET_SKB_PAD);
+	skb_reset_network_header(skb);
+	skb_reserve(skb, sizeof(struct iphdr));
+	skb_reset_transport_header(skb);
+
+	/* Build UDP header */
+	inet = inet_sk(sk_tun);
+	udp_len = hdr_len + sizeof(ppph) + total_len;
+	uh = (struct udphdr *) skb->data;
+	uh->source = inet->inet_sport;
+	uh->dest = inet->inet_dport;
+	uh->len = htons(udp_len);
+	uh->check = 0;
+	skb_put(skb, sizeof(struct udphdr));
+
+	/* Build L2TP header */
+	pppol2tp_build_l2tp_header(session, skb->data);
+	skb_put(skb, hdr_len);
+
+	/* Add PPP header */
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+	skb_put(skb, 2);
+
+	/* Copy user data into skb */
+	error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
+	if (error < 0) {
+		kfree_skb(skb);
+		goto error_put_sess_tun;
+	}
+	skb_put(skb, total_len);
+
+	/* Calculate UDP checksum if configured to do so */
+	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
+		skb->ip_summed = CHECKSUM_NONE;
+	else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		csum = skb_checksum(skb, 0, udp_len, 0);
+		uh->check = csum_tcpudp_magic(inet->inet_saddr,
+					      inet->inet_daddr,
+					      udp_len, IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+					       inet->inet_daddr,
+					       udp_len, IPPROTO_UDP, 0);
+	}
+
+	/* Debug */
+	if (session->send_seq)
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %Zd bytes, ns=%hu\n", session->name,
+		       total_len, session->ns - 1);
+	else
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %Zd bytes\n", session->name, total_len);
+
+	if (session->debug & PPPOL2TP_MSG_DATA) {
+		int i;
+		unsigned char *datap = skb->data;
+
+		printk(KERN_DEBUG "%s: xmit:", session->name);
+		for (i = 0; i < total_len; i++) {
+			printk(" %02X", *datap++);
+			if (i == 15) {
+				printk(" ...");
+				break;
+			}
+		}
+		printk("\n");
+	}
+
+	/* Queue the packet to IP for output */
+	len = skb->len;
+	error = ip_queue_xmit(skb, 1);
+
+	/* Update stats */
+	if (error >= 0) {
+		tunnel->stats.tx_packets++;
+		tunnel->stats.tx_bytes += len;
+		session->stats.tx_packets++;
+		session->stats.tx_bytes += len;
+	} else {
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+	}
+
+	return error;
+
+error_put_sess_tun:
+	sock_put(session->tunnel_sock);
+error_put_sess:
+	sock_put(sk);
+error:
+	return error;
+}
+
+/* Automatically called when the skb is freed.
+ */
+static void pppol2tp_sock_wfree(struct sk_buff *skb)
+{
+	sock_put(skb->sk);
+}
+
+/* For data skbs that we transmit, we associate with the tunnel socket
+ * but don't do accounting.
+ */
+static inline void pppol2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
+{
+	sock_hold(sk);
+	skb->sk = sk;
+	skb->destructor = pppol2tp_sock_wfree;
+}
+
+/* Transmit function called by generic PPP driver.  Sends PPP frame
+ * over PPPoL2TP socket.
+ *
+ * This is almost the same as pppol2tp_sendmsg(), but rather than
+ * being called with a msghdr from userspace, it is called with a skb
+ * from the kernel.
+ *
+ * The supplied skb from ppp doesn't have enough headroom for the
+ * insertion of L2TP, UDP and IP headers so we need to allocate more
+ * headroom in the skb. This will create a cloned skb. But we must be
+ * careful in the error case because the caller will expect to free
+ * the skb it supplied, not our cloned skb. So we take care to always
+ * leave the original skb unfreed if we return an error.
+ */
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	static const u8 ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = (struct sock *) chan->private;
+	struct sock *sk_tun;
+	int hdr_len;
+	u16 udp_len;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	int rc;
+	int headroom;
+	int data_len = skb->len;
+	struct inet_sock *inet;
+	__wsum csum;
+	struct udphdr *uh;
+	unsigned int len;
+	int old_headroom;
+	int new_headroom;
+
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto abort;
+
+	/* Get session and tunnel contexts from the socket */
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto abort;
+
+	sk_tun = session->tunnel_sock;
+	if (sk_tun == NULL)
+		goto abort_put_sess;
+	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
+	if (tunnel == NULL)
+		goto abort_put_sess;
+
+	/* What header length is configured for this session? */
+	hdr_len = pppol2tp_l2tp_header_len(session);
+
+	/* Check that there's enough headroom in the skb to insert IP,
+	 * UDP and L2TP and PPP headers. If not enough, expand it to
+	 * make room. Adjust truesize.
+	 */
+	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
+		sizeof(struct udphdr) + hdr_len + sizeof(ppph);
+	old_headroom = skb_headroom(skb);
+	if (skb_cow_head(skb, headroom))
+		goto abort_put_sess_tun;
+
+	new_headroom = skb_headroom(skb);
+	skb_orphan(skb);
+	skb->truesize += new_headroom - old_headroom;
+
+	/* Setup PPP header */
+	__skb_push(skb, sizeof(ppph));
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+
+	/* Setup L2TP header */
+	pppol2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
+
+	udp_len = sizeof(struct udphdr) + hdr_len + sizeof(ppph) + data_len;
+
+	/* Setup UDP header */
+	inet = inet_sk(sk_tun);
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+	uh->source = inet->inet_sport;
+	uh->dest = inet->inet_dport;
+	uh->len = htons(udp_len);
+	uh->check = 0;
+
+	/* Debug */
+	if (session->send_seq)
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %d bytes, ns=%hu\n", session->name,
+		       data_len, session->ns - 1);
+	else
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %d bytes\n", session->name, data_len);
+
+	if (session->debug & PPPOL2TP_MSG_DATA) {
+		int i;
+		unsigned char *datap = skb->data;
+
+		printk(KERN_DEBUG "%s: xmit:", session->name);
+		for (i = 0; i < data_len; i++) {
+			printk(" %02X", *datap++);
+			if (i == 31) {
+				printk(" ...");
+				break;
+			}
+		}
+		printk("\n");
+	}
+
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
+	nf_reset(skb);
+
+	/* Get routing info from the tunnel socket */
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst_clone(__sk_dst_get(sk_tun)));
+	pppol2tp_skb_set_owner_w(skb, sk_tun);
+
+	/* Calculate UDP checksum if configured to do so */
+	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
+		skb->ip_summed = CHECKSUM_NONE;
+	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
+		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		csum = skb_checksum(skb, 0, udp_len, 0);
+		uh->check = csum_tcpudp_magic(inet->inet_saddr,
+					      inet->inet_daddr,
+					      udp_len, IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+					       inet->inet_daddr,
+					       udp_len, IPPROTO_UDP, 0);
+	}
+
+	/* Queue the packet to IP for output */
+	len = skb->len;
+	rc = ip_queue_xmit(skb, 1);
+
+	/* Update stats */
+	if (rc >= 0) {
+		tunnel->stats.tx_packets++;
+		tunnel->stats.tx_bytes += len;
+		session->stats.tx_packets++;
+		session->stats.tx_bytes += len;
+	} else {
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+	}
+
+	sock_put(sk_tun);
+	sock_put(sk);
+	return 1;
+
+abort_put_sess_tun:
+	sock_put(sk_tun);
+abort_put_sess:
+	sock_put(sk);
+abort:
+	/* Free the original skb */
+	kfree_skb(skb);
+	return 1;
+}
+
+/*****************************************************************************
+ * Session (and tunnel control) socket create/destroy.
+ *****************************************************************************/
+
+/* When the tunnel UDP socket is closed, all the attached sockets need to go
+ * too.
+ */
+static void pppol2tp_tunnel_closeall(struct pppol2tp_tunnel *tunnel)
+{
+	int hash;
+	struct hlist_node *walk;
+	struct hlist_node *tmp;
+	struct pppol2tp_session *session;
+	struct sock *sk;
+
+	BUG_ON(tunnel == NULL);
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing all sessions...\n", tunnel->name);
+
+	write_lock_bh(&tunnel->hlist_lock);
+	for (hash = 0; hash < PPPOL2TP_HASH_SIZE; hash++) {
+again:
+		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+			struct sk_buff *skb;
+
+			session = hlist_entry(walk, struct pppol2tp_session, hlist);
+
+			sk = session->sock;
+
+			PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+			       "%s: closing session\n", session->name);
+
+			hlist_del_init(&session->hlist);
+
+			/* Since we should hold the sock lock while
+			 * doing any unbinding, we need to release the
+			 * lock we're holding before taking that lock.
+			 * Hold a reference to the sock so it doesn't
+			 * disappear as we're jumping between locks.
+			 */
+			sock_hold(sk);
+			write_unlock_bh(&tunnel->hlist_lock);
+			lock_sock(sk);
+
+			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+				pppox_unbind_sock(sk);
+				sk->sk_state = PPPOX_DEAD;
+				sk->sk_state_change(sk);
+			}
+
+			/* Purge any queued data */
+			skb_queue_purge(&sk->sk_receive_queue);
+			skb_queue_purge(&sk->sk_write_queue);
+			while ((skb = skb_dequeue(&session->reorder_q))) {
+				kfree_skb(skb);
+				sock_put(sk);
+			}
+
+			release_sock(sk);
+			sock_put(sk);
+
+			/* Now restart from the beginning of this hash
+			 * chain.  We always remove a session from the
+			 * list so we are guaranteed to make forward
+			 * progress.
+			 */
+			write_lock_bh(&tunnel->hlist_lock);
+			goto again;
+		}
+	}
+	write_unlock_bh(&tunnel->hlist_lock);
+}
+
+/* Really kill the tunnel.
+ * Come here only when all sessions have been cleared from the tunnel.
+ */
+static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel)
+{
+	struct pppol2tp_net *pn = pppol2tp_pernet(tunnel->pppol2tp_net);
+
+	/* Remove from socket list */
+	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
+	list_del_init(&tunnel->list);
+	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
+
+	atomic_dec(&pppol2tp_tunnel_count);
+	kfree(tunnel);
+}
+
+/* Tunnel UDP socket destruct hook.
+ * The tunnel context is deleted only when all session sockets have been
+ * closed.
+ */
+static void pppol2tp_tunnel_destruct(struct sock *sk)
+{
+	struct pppol2tp_tunnel *tunnel;
+
+	tunnel = sk->sk_user_data;
+	if (tunnel == NULL)
+		goto end;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing...\n", tunnel->name);
+
+	/* Close all sessions */
+	pppol2tp_tunnel_closeall(tunnel);
+
+	/* No longer an encapsulation socket. See net/ipv4/udp.c */
+	(udp_sk(sk))->encap_type = 0;
+	(udp_sk(sk))->encap_rcv = NULL;
+
+	/* Remove hooks into tunnel socket */
+	tunnel->sock = NULL;
+	sk->sk_destruct = tunnel->old_sk_destruct;
+	sk->sk_user_data = NULL;
+
+	/* Call original (UDP) socket descructor */
+	if (sk->sk_destruct != NULL)
+		(*sk->sk_destruct)(sk);
+
+	pppol2tp_tunnel_dec_refcount(tunnel);
+
+end:
+	return;
+}
+
+/* Really kill the session socket. (Called from sock_put() if
+ * refcnt == 0.)
+ */
+static void pppol2tp_session_destruct(struct sock *sk)
+{
+	struct pppol2tp_session *session = NULL;
+
+	if (sk->sk_user_data != NULL) {
+		struct pppol2tp_tunnel *tunnel;
+
+		session = sk->sk_user_data;
+		if (session == NULL)
+			goto out;
+
+		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+		/* Don't use pppol2tp_sock_to_tunnel() here to
+		 * get the tunnel context because the tunnel
+		 * socket might have already been closed (its
+		 * sk->sk_user_data will be NULL) so use the
+		 * session's private tunnel ptr instead.
+		 */
+		tunnel = session->tunnel;
+		if (tunnel != NULL) {
+			BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+			/* If session_id is zero, this is a null
+			 * session context, which was created for a
+			 * socket that is being used only to manage
+			 * tunnels.
+			 */
+			if (session->tunnel_addr.s_session != 0) {
+				/* Delete the session socket from the
+				 * hash
+				 */
+				write_lock_bh(&tunnel->hlist_lock);
+				hlist_del_init(&session->hlist);
+				write_unlock_bh(&tunnel->hlist_lock);
+
+				atomic_dec(&pppol2tp_session_count);
+			}
+
+			/* This will delete the tunnel context if this
+			 * is the last session on the tunnel.
+			 */
+			session->tunnel = NULL;
+			session->tunnel_sock = NULL;
+			pppol2tp_tunnel_dec_refcount(tunnel);
+		}
+	}
+
+	kfree(session);
+out:
+	return;
+}
+
+/* Called when the PPPoX socket (session) is closed.
+ */
+static int pppol2tp_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session;
+	int error;
+
+	if (!sk)
+		return 0;
+
+	error = -EBADF;
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto error;
+
+	pppox_unbind_sock(sk);
+
+	/* Signal the death of the socket. */
+	sk->sk_state = PPPOX_DEAD;
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	session = pppol2tp_sock_to_session(sk);
+
+	/* Purge any queued data */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+	if (session != NULL) {
+		struct sk_buff *skb;
+		while ((skb = skb_dequeue(&session->reorder_q))) {
+			kfree_skb(skb);
+			sock_put(sk);
+		}
+		sock_put(sk);
+	}
+
+	release_sock(sk);
+
+	/* This will delete the session context via
+	 * pppol2tp_session_destruct() if the socket's refcnt drops to
+	 * zero.
+	 */
+	sock_put(sk);
+
+	return 0;
+
+error:
+	release_sock(sk);
+	return error;
+}
+
+/* Internal function to prepare a tunnel (UDP) socket to have PPPoX
+ * sockets attached to it.
+ */
+static struct sock *pppol2tp_prepare_tunnel_socket(struct net *net,
+					int fd, u16 tunnel_id, int *error)
+{
+	int err;
+	struct socket *sock = NULL;
+	struct sock *sk;
+	struct pppol2tp_tunnel *tunnel;
+	struct pppol2tp_net *pn;
+	struct sock *ret = NULL;
+
+	/* Get the tunnel UDP socket from the fd, which was opened by
+	 * the userspace L2TP daemon.
+	 */
+	err = -EBADF;
+	sock = sockfd_lookup(fd, &err);
+	if (!sock) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
+		       "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+		       tunnel_id, fd, err);
+		goto err;
+	}
+
+	sk = sock->sk;
+
+	/* Quick sanity checks */
+	err = -EPROTONOSUPPORT;
+	if (sk->sk_protocol != IPPROTO_UDP) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
+		       "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+		goto err;
+	}
+	err = -EAFNOSUPPORT;
+	if (sock->ops->family != AF_INET) {
+		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
+		       "tunl %hu: fd %d wrong family, got %d, expected %d\n",
+		       tunnel_id, fd, sock->ops->family, AF_INET);
+		goto err;
+	}
+
+	err = -ENOTCONN;
+
+	/* Check if this socket has already been prepped */
+	tunnel = (struct pppol2tp_tunnel *)sk->sk_user_data;
+	if (tunnel != NULL) {
+		/* User-data field already set */
+		err = -EBUSY;
+		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+		/* This socket has already been prepped */
+		ret = tunnel->sock;
+		goto out;
+	}
+
+	/* This socket is available and needs prepping. Create a new tunnel
+	 * context and init it.
+	 */
+	sk->sk_user_data = tunnel = kzalloc(sizeof(struct pppol2tp_tunnel), GFP_KERNEL);
+	if (sk->sk_user_data == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	tunnel->magic = L2TP_TUNNEL_MAGIC;
+	sprintf(&tunnel->name[0], "tunl %hu", tunnel_id);
+
+	tunnel->stats.tunnel_id = tunnel_id;
+	tunnel->debug = PPPOL2TP_DEFAULT_DEBUG_FLAGS;
+
+	/* Hook on the tunnel socket destructor so that we can cleanup
+	 * if the tunnel socket goes away.
+	 */
+	tunnel->old_sk_destruct = sk->sk_destruct;
+	sk->sk_destruct = pppol2tp_tunnel_destruct;
+
+	tunnel->sock = sk;
+	sk->sk_allocation = GFP_ATOMIC;
+
+	/* Misc init */
+	rwlock_init(&tunnel->hlist_lock);
+
+	/* The net we belong to */
+	tunnel->pppol2tp_net = net;
+	pn = pppol2tp_pernet(net);
+
+	/* Add tunnel to our list */
+	INIT_LIST_HEAD(&tunnel->list);
+	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
+	list_add(&tunnel->list, &pn->pppol2tp_tunnel_list);
+	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
+	atomic_inc(&pppol2tp_tunnel_count);
+
+	/* Bump the reference count. The tunnel context is deleted
+	 * only when this drops to zero.
+	 */
+	pppol2tp_tunnel_inc_refcount(tunnel);
+
+	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+	(udp_sk(sk))->encap_type = UDP_ENCAP_L2TPINUDP;
+	(udp_sk(sk))->encap_rcv = pppol2tp_udp_encap_recv;
+
+	ret = tunnel->sock;
+
+	*error = 0;
+out:
+	if (sock)
+		sockfd_put(sock);
+
+	return ret;
+
+err:
+	*error = err;
+	goto out;
+}
+
+static struct proto pppol2tp_sk_proto = {
+	.name	  = "PPPOL2TP",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+/* socket() handler. Initialize a new struct sock.
+ */
+static int pppol2tp_create(struct net *net, struct socket *sock)
+{
+	int error = -ENOMEM;
+	struct sock *sk;
+
+	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
+	if (!sk)
+		goto out;
+
+	sock_init_data(sock, sk);
+
+	sock->state  = SS_UNCONNECTED;
+	sock->ops    = &pppol2tp_ops;
+
+	sk->sk_backlog_rcv = pppol2tp_recv_core;
+	sk->sk_protocol	   = PX_PROTO_OL2TP;
+	sk->sk_family	   = PF_PPPOX;
+	sk->sk_state	   = PPPOX_NONE;
+	sk->sk_type	   = SOCK_STREAM;
+	sk->sk_destruct	   = pppol2tp_session_destruct;
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
+ */
+static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+			    int sockaddr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct sock *tunnel_sock = NULL;
+	struct pppol2tp_session *session = NULL;
+	struct pppol2tp_tunnel *tunnel;
+	struct dst_entry *dst;
+	int error = 0;
+
+	lock_sock(sk);
+
+	error = -EINVAL;
+	if (sp->sa_protocol != PX_PROTO_OL2TP)
+		goto end;
+
+	/* Check for already bound sockets */
+	error = -EBUSY;
+	if (sk->sk_state & PPPOX_CONNECTED)
+		goto end;
+
+	/* We don't supporting rebinding anyway */
+	error = -EALREADY;
+	if (sk->sk_user_data)
+		goto end; /* socket is already attached */
+
+	/* Don't bind if s_tunnel is 0 */
+	error = -EINVAL;
+	if (sp->pppol2tp.s_tunnel == 0)
+		goto end;
+
+	/* Special case: prepare tunnel socket if s_session and
+	 * d_session is 0. Otherwise look up tunnel using supplied
+	 * tunnel id.
+	 */
+	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
+		tunnel_sock = pppol2tp_prepare_tunnel_socket(sock_net(sk),
+							     sp->pppol2tp.fd,
+							     sp->pppol2tp.s_tunnel,
+							     &error);
+		if (tunnel_sock == NULL)
+			goto end;
+
+		sock_hold(tunnel_sock);
+		tunnel = tunnel_sock->sk_user_data;
+	} else {
+		tunnel = pppol2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
+
+		/* Error if we can't find the tunnel */
+		error = -ENOENT;
+		if (tunnel == NULL)
+			goto end;
+
+		tunnel_sock = tunnel->sock;
+	}
+
+	/* Check that this session doesn't already exist */
+	error = -EEXIST;
+	session = pppol2tp_session_find(tunnel, sp->pppol2tp.s_session);
+	if (session != NULL)
+		goto end;
+
+	/* Allocate and initialize a new session context. */
+	session = kzalloc(sizeof(struct pppol2tp_session), GFP_KERNEL);
+	if (session == NULL) {
+		error = -ENOMEM;
+		goto end;
+	}
+
+	skb_queue_head_init(&session->reorder_q);
+
+	session->magic	     = L2TP_SESSION_MAGIC;
+	session->owner	     = current->pid;
+	session->sock	     = sk;
+	session->tunnel	     = tunnel;
+	session->tunnel_sock = tunnel_sock;
+	session->tunnel_addr = sp->pppol2tp;
+	sprintf(&session->name[0], "sess %hu/%hu",
+		session->tunnel_addr.s_tunnel,
+		session->tunnel_addr.s_session);
+
+	session->stats.tunnel_id  = session->tunnel_addr.s_tunnel;
+	session->stats.session_id = session->tunnel_addr.s_session;
+
+	INIT_HLIST_NODE(&session->hlist);
+
+	/* Inherit debug options from tunnel */
+	session->debug = tunnel->debug;
+
+	/* Default MTU must allow space for UDP/L2TP/PPP
+	 * headers.
+	 */
+	session->mtu = session->mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+
+	/* If PMTU discovery was enabled, use the MTU that was discovered */
+	dst = sk_dst_get(sk);
+	if (dst != NULL) {
+		u32 pmtu = dst_mtu(__sk_dst_get(sk));
+		if (pmtu != 0)
+			session->mtu = session->mru = pmtu -
+				PPPOL2TP_HEADER_OVERHEAD;
+		dst_release(dst);
+	}
+
+	/* Special case: if source & dest session_id == 0x0000, this socket is
+	 * being created to manage the tunnel. Don't add the session to the
+	 * session hash list, just set up the internal context for use by
+	 * ioctl() and sockopt() handlers.
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		error = 0;
+		sk->sk_user_data = session;
+		goto out_no_ppp;
+	}
+
+	/* Get tunnel context from the tunnel socket */
+	tunnel = pppol2tp_sock_to_tunnel(tunnel_sock);
+	if (tunnel == NULL) {
+		error = -EBADF;
+		goto end;
+	}
+
+	/* Right now, because we don't have a way to push the incoming skb's
+	 * straight through the UDP layer, the only header we need to worry
+	 * about is the L2TP header. This size is different depending on
+	 * whether sequence numbers are enabled for the data channel.
+	 */
+	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+
+	po->chan.private = sk;
+	po->chan.ops	 = &pppol2tp_chan_ops;
+	po->chan.mtu	 = session->mtu;
+
+	error = ppp_register_net_channel(sock_net(sk), &po->chan);
+	if (error)
+		goto end_put_tun;
+
+	/* This is how we get the session context from the socket. */
+	sk->sk_user_data = session;
+
+	/* Add session to the tunnel's hash list */
+	write_lock_bh(&tunnel->hlist_lock);
+	hlist_add_head(&session->hlist,
+		       pppol2tp_session_id_hash(tunnel,
+						session->tunnel_addr.s_session));
+	write_unlock_bh(&tunnel->hlist_lock);
+
+	atomic_inc(&pppol2tp_session_count);
+
+out_no_ppp:
+	pppol2tp_tunnel_inc_refcount(tunnel);
+	sk->sk_state = PPPOX_CONNECTED;
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: created\n", session->name);
+
+end_put_tun:
+	sock_put(tunnel_sock);
+end:
+	release_sock(sk);
+
+	if (error != 0) {
+		if (session)
+			PRINTK(session->debug,
+				PPPOL2TP_MSG_CONTROL, KERN_WARNING,
+				"%s: connect failed: %d\n",
+				session->name, error);
+		else
+			PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_WARNING,
+				"connect failed: %d\n", error);
+	}
+
+	return error;
+}
+
+/* getname() support.
+ */
+static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
+			    int *usockaddr_len, int peer)
+{
+	int len = sizeof(struct sockaddr_pppol2tp);
+	struct sockaddr_pppol2tp sp;
+	int error = 0;
+	struct pppol2tp_session *session;
+
+	error = -ENOTCONN;
+	if (sock->sk->sk_state != PPPOX_CONNECTED)
+		goto end;
+
+	session = pppol2tp_sock_to_session(sock->sk);
+	if (session == NULL) {
+		error = -EBADF;
+		goto end;
+	}
+
+	sp.sa_family	= AF_PPPOX;
+	sp.sa_protocol	= PX_PROTO_OL2TP;
+	memcpy(&sp.pppol2tp, &session->tunnel_addr,
+	       sizeof(struct pppol2tp_addr));
+
+	memcpy(uaddr, &sp, len);
+
+	*usockaddr_len = len;
+
+	error = 0;
+	sock_put(sock->sk);
+
+end:
+	return error;
+}
+
+/****************************************************************************
+ * ioctl() handlers.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. However, in order to control kernel tunnel features, we allow
+ * userspace to create a special "tunnel" PPPoX socket which is used for
+ * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
+ * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
+ * calls.
+ ****************************************************************************/
+
+/* Session ioctl helper.
+ */
+static int pppol2tp_session_ioctl(struct pppol2tp_session *session,
+				  unsigned int cmd, unsigned long arg)
+{
+	struct ifreq ifr;
+	int err = 0;
+	struct sock *sk = session->sock;
+	int val = (int) arg;
+
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
+	       session->name, cmd, arg);
+
+	sock_hold(sk);
+
+	switch (cmd) {
+	case SIOCGIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+		ifr.ifr_mtu = session->mtu;
+		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case SIOCSIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+
+		session->mtu = ifr.ifr_mtu;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case PPPIOCGMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (put_user(session->mru, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCSMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (get_user(val,(int __user *) arg))
+			break;
+
+		session->mru = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCGFLAGS:
+		err = -EFAULT;
+		if (put_user(session->flags, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get flags=%d\n", session->name, session->flags);
+		err = 0;
+		break;
+
+	case PPPIOCSFLAGS:
+		err = -EFAULT;
+		if (get_user(val, (int __user *) arg))
+			break;
+		session->flags = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set flags=%d\n", session->name, session->flags);
+		err = 0;
+		break;
+
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_to_user((void __user *) arg, &session->stats,
+				 sizeof(session->stats)))
+			break;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", session->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Tunnel ioctl helper.
+ *
+ * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
+ * specifies a session_id, the session ioctl handler is called. This allows an
+ * application to retrieve session stats via a tunnel socket.
+ */
+static int pppol2tp_tunnel_ioctl(struct pppol2tp_tunnel *tunnel,
+				 unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct sock *sk = tunnel->sock;
+	struct pppol2tp_ioc_stats stats_req;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n", tunnel->name,
+	       cmd, arg);
+
+	sock_hold(sk);
+
+	switch (cmd) {
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_from_user(&stats_req, (void __user *) arg,
+				   sizeof(stats_req))) {
+			err = -EFAULT;
+			break;
+		}
+		if (stats_req.session_id != 0) {
+			/* resend to session ioctl handler */
+			struct pppol2tp_session *session =
+				pppol2tp_session_find(tunnel, stats_req.session_id);
+			if (session != NULL)
+				err = pppol2tp_session_ioctl(session, cmd, arg);
+			else
+				err = -EBADR;
+			break;
+		}
+#ifdef CONFIG_XFRM
+		tunnel->stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
+#endif
+		if (copy_to_user((void __user *) arg, &tunnel->stats,
+				 sizeof(tunnel->stats))) {
+			err = -EFAULT;
+			break;
+		}
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", tunnel->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Main ioctl() handler.
+ * Dispatch to tunnel or session helpers depending on the socket.
+ */
+static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session;
+	struct pppol2tp_tunnel *tunnel;
+	int err;
+
+	if (!sk)
+		return 0;
+
+	err = -EBADF;
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto end;
+
+	err = -ENOTCONN;
+	if ((sk->sk_user_data == NULL) ||
+	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session's session_id is zero, treat ioctl as a
+	 * tunnel ioctl
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		err = -EBADF;
+		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
+		sock_put(session->tunnel_sock);
+		goto end_put_sess;
+	}
+
+	err = pppol2tp_session_ioctl(session, cmd, arg);
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * setsockopt() / getsockopt() support.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. In order to control kernel tunnel features, we allow userspace to
+ * create a special "tunnel" PPPoX socket which is used for control only.
+ * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
+ * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
+ *****************************************************************************/
+
+/* Tunnel setsockopt() helper.
+ */
+static int pppol2tp_tunnel_setsockopt(struct sock *sk,
+				      struct pppol2tp_tunnel *tunnel,
+				      int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		tunnel->debug = val;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session setsockopt helper.
+ */
+static int pppol2tp_session_setsockopt(struct sock *sk,
+				       struct pppol2tp_session *session,
+				       int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->recv_seq = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set recv_seq=%d\n", session->name,
+		       session->recv_seq);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->send_seq = val ? -1 : 0;
+		{
+			struct sock *ssk      = session->sock;
+			struct pppox_sock *po = pppox_sk(ssk);
+			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
+				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+		}
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set send_seq=%d\n", session->name, session->send_seq);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->lns_mode = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set lns_mode=%d\n", session->name,
+		       session->lns_mode);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		session->debug = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", session->name, session->debug);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		session->reorder_timeout = msecs_to_jiffies(val);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set reorder_timeout=%d\n", session->name,
+		       session->reorder_timeout);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Main setsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session setsockopt
+ * handler, according to whether the PPPoL2TP socket is a for a regular
+ * session or the special tunnel type.
+ */
+static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session = sk->sk_user_data;
+	struct pppol2tp_tunnel *tunnel;
+	int val;
+	int err;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel
+	 */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		err = -EBADF;
+		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
+		sock_put(session->tunnel_sock);
+	} else
+		err = pppol2tp_session_setsockopt(sk, session, optname, val);
+
+	err = 0;
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/* Tunnel getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_tunnel_getsockopt(struct sock *sk,
+				      struct pppol2tp_tunnel *tunnel,
+				      int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		*val = tunnel->debug;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_session_getsockopt(struct sock *sk,
+				       struct pppol2tp_session *session,
+				       int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		*val = session->recv_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get recv_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		*val = session->send_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get send_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		*val = session->lns_mode;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get lns_mode=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		*val = session->debug;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		*val = (int) jiffies_to_msecs(session->reorder_timeout);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get reorder_timeout=%d\n", session->name, *val);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+	}
+
+	return err;
+}
+
+/* Main getsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session getsockopt
+ * handler, according to whether the PPPoX socket is a for a regular session
+ * or the special tunnel type.
+ */
+static int pppol2tp_getsockopt(struct socket *sock, int level,
+			       int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct pppol2tp_session *session = sk->sk_user_data;
+	struct pppol2tp_tunnel *tunnel;
+	int val, len;
+	int err;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+
+	if (get_user(len, (int __user *) optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	if (len < 0)
+		return -EINVAL;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get the session context */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
+	if ((session->tunnel_addr.s_session == 0) &&
+	    (session->tunnel_addr.d_session == 0)) {
+		err = -EBADF;
+		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
+		sock_put(session->tunnel_sock);
+	} else
+		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+
+	err = -EFAULT;
+	if (put_user(len, (int __user *) optlen))
+		goto end_put_sess;
+
+	if (copy_to_user((void __user *) optval, &val, len))
+		goto end_put_sess;
+
+	err = 0;
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * /proc filesystem for debug
+ *****************************************************************************/
+
+#ifdef CONFIG_PROC_FS
+
+#include <linux/seq_file.h>
+
+struct pppol2tp_seq_data {
+	struct seq_net_private p;
+	struct pppol2tp_tunnel *tunnel;		/* current tunnel */
+	struct pppol2tp_session *session;	/* NULL means get first session in tunnel */
+};
+
+static struct pppol2tp_session *next_session(struct pppol2tp_tunnel *tunnel, struct pppol2tp_session *curr)
+{
+	struct pppol2tp_session *session = NULL;
+	struct hlist_node *walk;
+	int found = 0;
+	int next = 0;
+	int i;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	for (i = 0; i < PPPOL2TP_HASH_SIZE; i++) {
+		hlist_for_each_entry(session, walk, &tunnel->session_hlist[i], hlist) {
+			if (curr == NULL) {
+				found = 1;
+				goto out;
+			}
+			if (session == curr) {
+				next = 1;
+				continue;
+			}
+			if (next) {
+				found = 1;
+				goto out;
+			}
+		}
+	}
+out:
+	read_unlock_bh(&tunnel->hlist_lock);
+	if (!found)
+		session = NULL;
+
+	return session;
+}
+
+static struct pppol2tp_tunnel *next_tunnel(struct pppol2tp_net *pn,
+					   struct pppol2tp_tunnel *curr)
+{
+	struct pppol2tp_tunnel *tunnel = NULL;
+
+	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
+	if (list_is_last(&curr->list, &pn->pppol2tp_tunnel_list)) {
+		goto out;
+	}
+	tunnel = list_entry(curr->list.next, struct pppol2tp_tunnel, list);
+out:
+	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
+
+	return tunnel;
+}
+
+static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
+{
+	struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
+	struct pppol2tp_net *pn;
+	loff_t pos = *offs;
+
+	if (!pos)
+		goto out;
+
+	BUG_ON(m->private == NULL);
+	pd = m->private;
+	pn = pppol2tp_pernet(seq_file_net(m));
+
+	if (pd->tunnel == NULL) {
+		if (!list_empty(&pn->pppol2tp_tunnel_list))
+			pd->tunnel = list_entry(pn->pppol2tp_tunnel_list.next, struct pppol2tp_tunnel, list);
+	} else {
+		pd->session = next_session(pd->tunnel, pd->session);
+		if (pd->session == NULL) {
+			pd->tunnel = next_tunnel(pn, pd->tunnel);
+		}
+	}
+
+	/* NULL tunnel and session indicates end of list */
+	if ((pd->tunnel == NULL) && (pd->session == NULL))
+		pd = NULL;
+
+out:
+	return pd;
+}
+
+static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return NULL;
+}
+
+static void pppol2tp_seq_stop(struct seq_file *p, void *v)
+{
+	/* nothing to do */
+}
+
+static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_tunnel *tunnel = v;
+
+	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
+		   tunnel->name,
+		   (tunnel == tunnel->sock->sk_user_data) ? 'Y':'N',
+		   atomic_read(&tunnel->ref_count) - 1);
+	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+		   tunnel->debug,
+		   (unsigned long long)tunnel->stats.tx_packets,
+		   (unsigned long long)tunnel->stats.tx_bytes,
+		   (unsigned long long)tunnel->stats.tx_errors,
+		   (unsigned long long)tunnel->stats.rx_packets,
+		   (unsigned long long)tunnel->stats.rx_bytes,
+		   (unsigned long long)tunnel->stats.rx_errors);
+}
+
+static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_session *session = v;
+
+	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
+		   "%04X/%04X %d %c\n",
+		   session->name,
+		   ntohl(session->tunnel_addr.addr.sin_addr.s_addr),
+		   ntohs(session->tunnel_addr.addr.sin_port),
+		   session->tunnel_addr.s_tunnel,
+		   session->tunnel_addr.s_session,
+		   session->tunnel_addr.d_tunnel,
+		   session->tunnel_addr.d_session,
+		   session->sock->sk_state,
+		   (session == session->sock->sk_user_data) ?
+		   'Y' : 'N');
+	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
+		   session->mtu, session->mru,
+		   session->recv_seq ? 'R' : '-',
+		   session->send_seq ? 'S' : '-',
+		   session->lns_mode ? "LNS" : "LAC",
+		   session->debug,
+		   jiffies_to_msecs(session->reorder_timeout));
+	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+		   session->nr, session->ns,
+		   (unsigned long long)session->stats.tx_packets,
+		   (unsigned long long)session->stats.tx_bytes,
+		   (unsigned long long)session->stats.tx_errors,
+		   (unsigned long long)session->stats.rx_packets,
+		   (unsigned long long)session->stats.rx_bytes,
+		   (unsigned long long)session->stats.rx_errors);
+}
+
+static int pppol2tp_seq_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_seq_data *pd = v;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
+		seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
+		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
+			 "dest-tid/sid state user-data-ok\n");
+		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
+		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		goto out;
+	}
+
+	/* Show the tunnel or session context.
+	 */
+	if (pd->session == NULL)
+		pppol2tp_seq_tunnel_show(m, pd->tunnel);
+	else
+		pppol2tp_seq_session_show(m, pd->session);
+
+out:
+	return 0;
+}
+
+static const struct seq_operations pppol2tp_seq_ops = {
+	.start		= pppol2tp_seq_start,
+	.next		= pppol2tp_seq_next,
+	.stop		= pppol2tp_seq_stop,
+	.show		= pppol2tp_seq_show,
+};
+
+/* Called when our /proc file is opened. We allocate data for use when
+ * iterating our tunnel / session contexts and store it in the private
+ * data of the seq_file.
+ */
+static int pppol2tp_proc_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &pppol2tp_seq_ops,
+			    sizeof(struct pppol2tp_seq_data));
+}
+
+static const struct file_operations pppol2tp_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pppol2tp_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+
+static const struct proto_ops pppol2tp_ops = {
+	.family		= AF_PPPOX,
+	.owner		= THIS_MODULE,
+	.release	= pppol2tp_release,
+	.bind		= sock_no_bind,
+	.connect	= pppol2tp_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= pppol2tp_getname,
+	.poll		= datagram_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= pppol2tp_setsockopt,
+	.getsockopt	= pppol2tp_getsockopt,
+	.sendmsg	= pppol2tp_sendmsg,
+	.recvmsg	= pppol2tp_recvmsg,
+	.mmap		= sock_no_mmap,
+	.ioctl		= pppox_ioctl,
+};
+
+static struct pppox_proto pppol2tp_proto = {
+	.create		= pppol2tp_create,
+	.ioctl		= pppol2tp_ioctl
+};
+
+static __net_init int pppol2tp_init_net(struct net *net)
+{
+	struct pppol2tp_net *pn = pppol2tp_pernet(net);
+	struct proc_dir_entry *pde;
+
+	INIT_LIST_HEAD(&pn->pppol2tp_tunnel_list);
+	rwlock_init(&pn->pppol2tp_tunnel_list_lock);
+
+	pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
+#ifdef CONFIG_PROC_FS
+	if (!pde)
+		return -ENOMEM;
+#endif
+
+	return 0;
+}
+
+static __net_exit void pppol2tp_exit_net(struct net *net)
+{
+	proc_net_remove(net, "pppol2tp");
+}
+
+static struct pernet_operations pppol2tp_net_ops = {
+	.init = pppol2tp_init_net,
+	.exit = pppol2tp_exit_net,
+	.id   = &pppol2tp_net_id,
+	.size = sizeof(struct pppol2tp_net),
+};
+
+static int __init pppol2tp_init(void)
+{
+	int err;
+
+	err = proto_register(&pppol2tp_sk_proto, 0);
+	if (err)
+		goto out;
+	err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
+	if (err)
+		goto out_unregister_pppol2tp_proto;
+
+	err = register_pernet_device(&pppol2tp_net_ops);
+	if (err)
+		goto out_unregister_pppox_proto;
+
+	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
+	       PPPOL2TP_DRV_VERSION);
+
+out:
+	return err;
+out_unregister_pppox_proto:
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+out_unregister_pppol2tp_proto:
+	proto_unregister(&pppol2tp_sk_proto);
+	goto out;
+}
+
+static void __exit pppol2tp_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+	unregister_pernet_device(&pppol2tp_net_ops);
+	proto_unregister(&pppol2tp_sk_proto);
+}
+
+module_init(pppol2tp_init);
+module_exit(pppol2tp_exit);
+
+MODULE_AUTHOR("Martijn van Oosterhout <kleptog@svana.org>, "
+	      "James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("PPP over L2TP over UDP");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PPPOL2TP_DRV_VERSION);
-- 
cgit v1.2.3-59-g8ed1b


From fd558d186df2c13a22455373858bae634a4795af Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:33 +0000
Subject: l2tp: Split pppol2tp patch into separate l2tp and ppp parts

This patch splits the pppol2tp driver into separate L2TP and PPP parts
to prepare for L2TPv3 support. In L2TPv3, protocols other than PPP can
be carried, so this split creates a common L2TP core that will handle
the common L2TP bits which protocol support modules such as PPP will
use.

Note that the existing pppol2tp module is split into l2tp_core and
l2tp_ppp by this change.

There are no feature changes here. Internally, however, there are
significant changes, mostly to handle the separation of PPP-specific
data from the L2TP session and to provide hooks in the core for
modules like PPP to access.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig         |    7 +-
 include/linux/if_pppol2tp.h |    2 +-
 net/Kconfig                 |    1 +
 net/Makefile                |    2 +-
 net/l2tp/Kconfig            |   28 +
 net/l2tp/Makefile           |    5 +-
 net/l2tp/l2tp_core.c        | 1227 ++++++++++++++++++++
 net/l2tp/l2tp_core.h        |  254 ++++
 net/l2tp/l2tp_ppp.c         | 1658 ++++++++++++++++++++++++++
 net/l2tp/pppol2tp.c         | 2680 -------------------------------------------
 10 files changed, 3175 insertions(+), 2689 deletions(-)
 create mode 100644 net/l2tp/Kconfig
 create mode 100644 net/l2tp/l2tp_core.c
 create mode 100644 net/l2tp/l2tp_core.h
 create mode 100644 net/l2tp/l2tp_ppp.c
 delete mode 100644 net/l2tp/pppol2tp.c

(limited to 'net')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 2cffcb84a241..a55bff26a264 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -3158,17 +3158,12 @@ config PPPOATM
 
 config PPPOL2TP
 	tristate "PPP over L2TP (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && PPP && INET
+	depends on EXPERIMENTAL && L2TP && PPP
 	help
 	  Support for PPP-over-L2TP socket family. L2TP is a protocol
 	  used by ISPs and enterprises to tunnel PPP traffic over UDP
 	  tunnels. L2TP is replacing PPTP for VPN uses.
 
-	  This kernel component handles only L2TP data packets: a
-	  userland daemon handles L2TP the control protocol (tunnel
-	  and session setup). One such daemon is OpenL2TP
-	  (http://openl2tp.sourceforge.net/).
-
 config SLIP
 	tristate "SLIP (serial line) support"
 	---help---
diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h
index c58baea4a25b..1a1fb6e5d933 100644
--- a/include/linux/if_pppol2tp.h
+++ b/include/linux/if_pppol2tp.h
@@ -2,7 +2,7 @@
  * Linux PPP over L2TP (PPPoL2TP) Socket Implementation (RFC 2661)
  *
  * This file supplies definitions required by the PPP over L2TP driver
- * (pppol2tp.c).  All version information wrt this file is located in pppol2tp.c
+ * (l2tp_ppp.c).  All version information wrt this file is located in l2tp_ppp.c
  *
  * License:
  *		This program is free software; you can redistribute it and/or
diff --git a/net/Kconfig b/net/Kconfig
index e10d55c8ee5c..0d68b40fc0e6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -186,6 +186,7 @@ source "net/sctp/Kconfig"
 source "net/rds/Kconfig"
 source "net/tipc/Kconfig"
 source "net/atm/Kconfig"
+source "net/l2tp/Kconfig"
 source "net/802/Kconfig"
 source "net/bridge/Kconfig"
 source "net/dsa/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 13ca77e0eb08..cb7bdc1210cb 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
-obj-$(CONFIG_PPPOL2TP)		+= l2tp/
+obj-$(CONFIG_L2TP)		+= l2tp/
 obj-$(CONFIG_DECNET)		+= decnet/
 obj-$(CONFIG_ECONET)		+= econet/
 obj-$(CONFIG_PHONET)		+= phonet/
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
new file mode 100644
index 000000000000..ec88c5cdc397
--- /dev/null
+++ b/net/l2tp/Kconfig
@@ -0,0 +1,28 @@
+#
+# Layer Two Tunneling Protocol (L2TP)
+#
+
+menuconfig L2TP
+	tristate "Layer Two Tunneling Protocol (L2TP)"
+	depends on INET
+	---help---
+	  Layer Two Tunneling Protocol
+
+	  From RFC 2661 <http://www.ietf.org/rfc/rfc2661.txt>.
+
+	  L2TP facilitates the tunneling of packets across an
+	  intervening network in a way that is as transparent as
+	  possible to both end-users and applications.
+
+	  L2TP is often used to tunnel PPP traffic over IP
+	  tunnels. One IP tunnel may carry thousands of individual PPP
+	  connections. L2TP is also used as a VPN protocol, popular
+	  with home workers to connect to their offices.
+
+	  The kernel component handles only L2TP data packets: a
+	  userland daemon handles L2TP the control protocol (tunnel
+	  and session setup). One such daemon is OpenL2TP
+	  (http://openl2tp.org/).
+
+	  If you don't need L2TP, say N. To compile all L2TP code as
+	  modules, choose M here.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 9af41e898a04..c91f208b1693 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -2,4 +2,7 @@
 # Makefile for the L2TP.
 #
 
-obj-$(CONFIG_PPPOL2TP) += pppol2tp.o
+obj-$(CONFIG_L2TP) += l2tp_core.o
+
+# Build l2tp as modules if L2TP is M
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
new file mode 100644
index 000000000000..4b6da3689893
--- /dev/null
+++ b/net/l2tp/l2tp_core.c
@@ -0,0 +1,1227 @@
+/*
+ * L2TP core.
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * This file contains some code of the original L2TPv2 pppol2tp
+ * driver, which has the following copyright:
+ *
+ * Authors:	Martijn van Oosterhout <kleptog@svana.org>
+ *		James Chapman (jchapman@katalix.com)
+ * Contributors:
+ *		Michal Ostrowski <mostrows@speakeasy.net>
+ *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
+ *		David S. Miller (davem@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/uaccess.h>
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/file.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+#include "l2tp_core.h"
+
+#define L2TP_DRV_VERSION	"V2.0"
+
+/* L2TP header constants */
+#define L2TP_HDRFLAG_T	   0x8000
+#define L2TP_HDRFLAG_L	   0x4000
+#define L2TP_HDRFLAG_S	   0x0800
+#define L2TP_HDRFLAG_O	   0x0200
+#define L2TP_HDRFLAG_P	   0x0100
+
+#define L2TP_HDR_VER_MASK  0x000F
+#define L2TP_HDR_VER_2	   0x0002
+
+/* L2TPv3 default L2-specific sublayer */
+#define L2TP_SLFLAG_S	   0x40000000
+#define L2TP_SL_SEQ_MASK   0x00ffffff
+
+#define L2TP_HDR_SIZE_SEQ		10
+#define L2TP_HDR_SIZE_NOSEQ		6
+
+/* Default trace flags */
+#define L2TP_DEFAULT_DEBUG_FLAGS	0
+
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
+	do {								\
+		if ((_mask) & (_type))					\
+			printk(_lvl "L2TP: " _fmt, ##args);		\
+	} while (0)
+
+/* Private data stored for received packets in the skb.
+ */
+struct l2tp_skb_cb {
+	u16			ns;
+	u16			has_seq;
+	u16			length;
+	unsigned long		expires;
+};
+
+#define L2TP_SKB_CB(skb)	((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
+
+static atomic_t l2tp_tunnel_count;
+static atomic_t l2tp_session_count;
+
+/* per-net private data for this module */
+static unsigned int l2tp_net_id;
+struct l2tp_net {
+	struct list_head l2tp_tunnel_list;
+	rwlock_t l2tp_tunnel_list_lock;
+};
+
+static inline struct l2tp_net *l2tp_pernet(struct net *net)
+{
+	BUG_ON(!net);
+
+	return net_generic(net, l2tp_net_id);
+}
+
+/* Session hash list.
+ * The session_id SHOULD be random according to RFC2661, but several
+ * L2TP implementations (Cisco and Microsoft) use incrementing
+ * session_ids.  So we do a real hash on the session_id, rather than a
+ * simple bitmask.
+ */
+static inline struct hlist_head *
+l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
+{
+	return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
+}
+
+/* Lookup a session by id
+ */
+struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id)
+{
+	struct hlist_head *session_list =
+		l2tp_session_id_hash(tunnel, session_id);
+	struct l2tp_session *session;
+	struct hlist_node *walk;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	hlist_for_each_entry(session, walk, session_list, hlist) {
+		if (session->session_id == session_id) {
+			read_unlock_bh(&tunnel->hlist_lock);
+			return session;
+		}
+	}
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find);
+
+struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
+{
+	int hash;
+	struct hlist_node *walk;
+	struct l2tp_session *session;
+	int count = 0;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+		hlist_for_each_entry(session, walk, &tunnel->session_hlist[hash], hlist) {
+			if (++count > nth) {
+				read_unlock_bh(&tunnel->hlist_lock);
+				return session;
+			}
+		}
+	}
+
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
+
+/* Lookup a tunnel by id
+ */
+struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
+{
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_net *pn = l2tp_pernet(net);
+
+	read_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+		if (tunnel->tunnel_id == tunnel_id) {
+			read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+			return tunnel;
+		}
+	}
+	read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
+
+struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
+{
+	struct l2tp_net *pn = l2tp_pernet(net);
+	struct l2tp_tunnel *tunnel;
+	int count = 0;
+
+	read_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+		if (++count > nth) {
+			read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+			return tunnel;
+		}
+	}
+
+	read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth);
+
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+
+/* Queue a skb in order. We come here only if the skb has an L2TP sequence
+ * number.
+ */
+static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *skb)
+{
+	struct sk_buff *skbp;
+	struct sk_buff *tmp;
+	u16 ns = L2TP_SKB_CB(skb)->ns;
+
+	spin_lock_bh(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
+		if (L2TP_SKB_CB(skbp)->ns > ns) {
+			__skb_queue_before(&session->reorder_q, skbp, skb);
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
+			       session->name, ns, L2TP_SKB_CB(skbp)->ns,
+			       skb_queue_len(&session->reorder_q));
+			session->stats.rx_oos_packets++;
+			goto out;
+		}
+	}
+
+	__skb_queue_tail(&session->reorder_q, skb);
+
+out:
+	spin_unlock_bh(&session->reorder_q.lock);
+}
+
+/* Dequeue a single skb.
+ */
+static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *skb)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	int length = L2TP_SKB_CB(skb)->length;
+
+	/* We're about to requeue the skb, so return resources
+	 * to its current owner (a socket receive buffer).
+	 */
+	skb_orphan(skb);
+
+	tunnel->stats.rx_packets++;
+	tunnel->stats.rx_bytes += length;
+	session->stats.rx_packets++;
+	session->stats.rx_bytes += length;
+
+	if (L2TP_SKB_CB(skb)->has_seq) {
+		/* Bump our Nr */
+		session->nr++;
+		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated nr to %hu\n", session->name, session->nr);
+	}
+
+	/* call private receive handler */
+	if (session->recv_skb != NULL)
+		(*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
+	else
+		kfree_skb(skb);
+
+	if (session->deref)
+		(*session->deref)(session);
+}
+
+/* Dequeue skbs from the session's reorder_q, subject to packet order.
+ * Skbs that have been in the queue for too long are simply discarded.
+ */
+static void l2tp_recv_dequeue(struct l2tp_session *session)
+{
+	struct sk_buff *skb;
+	struct sk_buff *tmp;
+
+	/* If the pkt at the head of the queue has the nr that we
+	 * expect to send up next, dequeue it and any other
+	 * in-sequence packets behind it.
+	 */
+	spin_lock_bh(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
+		if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
+			session->stats.rx_seq_discards++;
+			session->stats.rx_errors++;
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: oos pkt %hu len %d discarded (too old), "
+			       "waiting for %hu, reorder_q_len=%d\n",
+			       session->name, L2TP_SKB_CB(skb)->ns,
+			       L2TP_SKB_CB(skb)->length, session->nr,
+			       skb_queue_len(&session->reorder_q));
+			__skb_unlink(skb, &session->reorder_q);
+			kfree_skb(skb);
+			if (session->deref)
+				(*session->deref)(session);
+			continue;
+		}
+
+		if (L2TP_SKB_CB(skb)->has_seq) {
+			if (L2TP_SKB_CB(skb)->ns != session->nr) {
+				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: holding oos pkt %hu len %d, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, L2TP_SKB_CB(skb)->ns,
+				       L2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto out;
+			}
+		}
+		__skb_unlink(skb, &session->reorder_q);
+
+		/* Process the skb. We release the queue lock while we
+		 * do so to let other contexts process the queue.
+		 */
+		spin_unlock_bh(&session->reorder_q.lock);
+		l2tp_recv_dequeue_skb(session, skb);
+		spin_lock_bh(&session->reorder_q.lock);
+	}
+
+out:
+	spin_unlock_bh(&session->reorder_q.lock);
+}
+
+static inline int l2tp_verify_udp_checksum(struct sock *sk,
+					   struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	u16 ulen = ntohs(uh->len);
+	struct inet_sock *inet;
+	__wsum psum;
+
+	if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
+		return 0;
+
+	inet = inet_sk(sk);
+	psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
+				  IPPROTO_UDP, 0);
+
+	if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
+	    !csum_fold(csum_add(psum, skb->csum)))
+		return 0;
+
+	skb->csum = psum;
+
+	return __skb_checksum_complete(skb);
+}
+
+/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
+ * here. The skb is not on a list when we get here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded.  All such packets are passed up to userspace to deal with.
+ */
+int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
+		       int (*payload_hook)(struct sk_buff *skb))
+{
+	struct l2tp_session *session = NULL;
+	unsigned char *ptr, *optr;
+	u16 hdrflags;
+	u32 tunnel_id, session_id;
+	int length;
+	int offset;
+	u16 version;
+	u16 ns, nr;
+
+	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
+		goto discard_bad_csum;
+
+	/* UDP always verifies the packet length. */
+	__skb_pull(skb, sizeof(struct udphdr));
+
+	/* Short packet? */
+	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+		goto error;
+	}
+
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & L2TP_MSG_DATA) {
+		length = min(32u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto error;
+
+		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	/* Get L2TP header flags */
+	hdrflags = ntohs(*(__be16 *)ptr);
+
+	/* Check protocol version */
+	version = hdrflags & L2TP_HDR_VER_MASK;
+	if (version != tunnel->version) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv protocol version mismatch: got %d expected %d\n",
+		       tunnel->name, version, tunnel->version);
+		goto error;
+	}
+
+	/* Get length of L2TP packet */
+	length = skb->len;
+
+	/* If type is control packet, it is handled by userspace. */
+	if (hdrflags & L2TP_HDRFLAG_T) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv control packet, len=%d\n", tunnel->name, length);
+		goto error;
+	}
+
+	/* Skip flags */
+	ptr += 2;
+
+	/* If length is present, skip it */
+	if (hdrflags & L2TP_HDRFLAG_L)
+		ptr += 2;
+
+	/* Extract tunnel and session ID */
+	tunnel_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+	session_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+
+	/* Find the session context */
+	session = l2tp_session_find(tunnel, session_id);
+	if (!session) {
+		/* Not found? Pass to userspace to deal with */
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: no session found (%hu/%hu). Passing up.\n",
+		       tunnel->name, tunnel_id, session_id);
+		goto error;
+	}
+
+	/* The ref count is increased since we now hold a pointer to
+	 * the session. Take care to decrement the refcnt when exiting
+	 * this function from now on...
+	 */
+	l2tp_session_inc_refcount(session);
+	if (session->ref)
+		(*session->ref)(session);
+
+	/* Handle the optional sequence numbers. Sequence numbers are
+	 * in different places for L2TPv2 and L2TPv3.
+	 *
+	 * If we are the LAC, enable/disable sequence numbers under
+	 * the control of the LNS.  If no sequence numbers present but
+	 * we were expecting them, discard frame.
+	 */
+	ns = nr = 0;
+	L2TP_SKB_CB(skb)->has_seq = 0;
+	if (hdrflags & L2TP_HDRFLAG_S) {
+		ns = (u16) ntohs(*(__be16 *) ptr);
+		ptr += 2;
+		nr = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+
+		/* Store L2TP info in the skb */
+		L2TP_SKB_CB(skb)->ns = ns;
+		L2TP_SKB_CB(skb)->has_seq = 1;
+
+		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
+		       session->name, ns, nr, session->nr);
+	}
+
+	if (L2TP_SKB_CB(skb)->has_seq) {
+		/* Received a packet with sequence numbers. If we're the LNS,
+		 * check if we sre sending sequence numbers and if not,
+		 * configure it so.
+		 */
+		if ((!session->lns_mode) && (!session->send_seq)) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to enable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = -1;
+		}
+	} else {
+		/* No sequence numbers.
+		 * If user has configured mandatory sequence numbers, discard.
+		 */
+		if (session->recv_seq) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			goto discard;
+		}
+
+		/* If we're the LAC and we're sending sequence numbers, the
+		 * LNS has requested that we no longer send sequence numbers.
+		 * If we're the LNS and we're sending sequence numbers, the
+		 * LAC is broken. Discard the frame.
+		 */
+		if ((!session->lns_mode) && (session->send_seq)) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to disable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = 0;
+		} else if (session->send_seq) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			goto discard;
+		}
+	}
+
+	/* If offset bit set, skip it. */
+	if (hdrflags & L2TP_HDRFLAG_O) {
+		offset = ntohs(*(__be16 *)ptr);
+		ptr += 2 + offset;
+	}
+
+	offset = ptr - optr;
+	if (!pskb_may_pull(skb, offset))
+		goto discard;
+
+	__skb_pull(skb, offset);
+
+	/* If caller wants to process the payload before we queue the
+	 * packet, do so now.
+	 */
+	if (payload_hook)
+		if ((*payload_hook)(skb))
+			goto discard;
+
+	/* Prepare skb for adding to the session's reorder_q.  Hold
+	 * packets for max reorder_timeout or 1 second if not
+	 * reordering.
+	 */
+	L2TP_SKB_CB(skb)->length = length;
+	L2TP_SKB_CB(skb)->expires = jiffies +
+		(session->reorder_timeout ? session->reorder_timeout : HZ);
+
+	/* Add packet to the session's receive queue. Reordering is done here, if
+	 * enabled. Saved L2TP protocol info is stored in skb->sb[].
+	 */
+	if (L2TP_SKB_CB(skb)->has_seq) {
+		if (session->reorder_timeout != 0) {
+			/* Packet reordering enabled. Add skb to session's
+			 * reorder queue, in order of ns.
+			 */
+			l2tp_recv_queue_skb(session, skb);
+		} else {
+			/* Packet reordering disabled. Discard out-of-sequence
+			 * packets
+			 */
+			if (L2TP_SKB_CB(skb)->ns != session->nr) {
+				session->stats.rx_seq_discards++;
+				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: oos pkt %hu len %d discarded, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, L2TP_SKB_CB(skb)->ns,
+				       L2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto discard;
+			}
+			skb_queue_tail(&session->reorder_q, skb);
+		}
+	} else {
+		/* No sequence numbers. Add the skb to the tail of the
+		 * reorder queue. This ensures that it will be
+		 * delivered after all previous sequenced skbs.
+		 */
+		skb_queue_tail(&session->reorder_q, skb);
+	}
+
+	/* Try to dequeue as many skbs from reorder_q as we can. */
+	l2tp_recv_dequeue(session);
+
+	l2tp_session_dec_refcount(session);
+
+	return 0;
+
+discard:
+	session->stats.rx_errors++;
+	kfree_skb(skb);
+
+	if (session->deref)
+		(*session->deref)(session);
+
+	l2tp_session_dec_refcount(session);
+
+	return 0;
+
+discard_bad_csum:
+	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
+	UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
+	tunnel->stats.rx_errors++;
+	kfree_skb(skb);
+
+	return 0;
+
+error:
+	/* Put UDP header back */
+	__skb_push(skb, sizeof(struct udphdr));
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(l2tp_udp_recv_core);
+
+/* UDP encapsulation receive handler. See net/ipv4/udp.c.
+ * Return codes:
+ * 0 : success.
+ * <0: error
+ * >0: skb should be passed up to userspace as UDP.
+ */
+int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct l2tp_tunnel *tunnel;
+
+	tunnel = l2tp_sock_to_tunnel(sk);
+	if (tunnel == NULL)
+		goto pass_up;
+
+	PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+	       "%s: received %d bytes\n", tunnel->name, skb->len);
+
+	if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook))
+		goto pass_up_put;
+
+	sock_put(sk);
+	return 0;
+
+pass_up_put:
+	sock_put(sk);
+pass_up:
+	return 1;
+}
+EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
+
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+
+/* Build an L2TP header for the session into the buffer provided.
+ */
+static void l2tp_build_l2tpv2_header(struct l2tp_tunnel *tunnel,
+				     struct l2tp_session *session,
+				     void *buf)
+{
+	__be16 *bufp = buf;
+	u16 flags = L2TP_HDR_VER_2;
+	u32 tunnel_id = tunnel->peer_tunnel_id;
+	u32 session_id = session->peer_session_id;
+
+	if (session->send_seq)
+		flags |= L2TP_HDRFLAG_S;
+
+	/* Setup L2TP header. */
+	*bufp++ = htons(flags);
+	*bufp++ = htons(tunnel_id);
+	*bufp++ = htons(session_id);
+	if (session->send_seq) {
+		*bufp++ = htons(session->ns);
+		*bufp++ = 0;
+		session->ns++;
+		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated ns to %hu\n", session->name, session->ns);
+	}
+}
+
+void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
+
+	BUG_ON(tunnel->version != L2TP_HDR_VER_2);
+	l2tp_build_l2tpv2_header(tunnel, session, buf);
+}
+EXPORT_SYMBOL_GPL(l2tp_build_l2tp_header);
+
+int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	unsigned int len = skb->len;
+	int error;
+
+	/* Debug */
+	if (session->send_seq)
+		PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %Zd bytes, ns=%hu\n", session->name,
+		       data_len, session->ns - 1);
+	else
+		PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %Zd bytes\n", session->name, data_len);
+
+	if (session->debug & L2TP_MSG_DATA) {
+		int i;
+		unsigned char *datap = skb->data + sizeof(struct udphdr);
+
+		printk(KERN_DEBUG "%s: xmit:", session->name);
+		for (i = 0; i < (len - sizeof(struct udphdr)); i++) {
+			printk(" %02X", *datap++);
+			if (i == 31) {
+				printk(" ...");
+				break;
+			}
+		}
+		printk("\n");
+	}
+
+	/* Queue the packet to IP for output */
+	error = ip_queue_xmit(skb, 1);
+
+	/* Update stats */
+	if (error >= 0) {
+		tunnel->stats.tx_packets++;
+		tunnel->stats.tx_bytes += len;
+		session->stats.tx_packets++;
+		session->stats.tx_bytes += len;
+	} else {
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_xmit_core);
+
+/* Automatically called when the skb is freed.
+ */
+static void l2tp_sock_wfree(struct sk_buff *skb)
+{
+	sock_put(skb->sk);
+}
+
+/* For data skbs that we transmit, we associate with the tunnel socket
+ * but don't do accounting.
+ */
+static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
+{
+	sock_hold(sk);
+	skb->sk = sk;
+	skb->destructor = l2tp_sock_wfree;
+}
+
+/* If caller requires the skb to have a ppp header, the header must be
+ * inserted in the skb data before calling this function.
+ */
+int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len)
+{
+	int data_len = skb->len;
+	struct sock *sk = session->tunnel->sock;
+	struct udphdr *uh;
+	unsigned int udp_len;
+	struct inet_sock *inet;
+	__wsum csum;
+	int old_headroom;
+	int new_headroom;
+	int headroom;
+
+	/* Check that there's enough headroom in the skb to insert IP,
+	 * UDP and L2TP headers. If not enough, expand it to
+	 * make room. Adjust truesize.
+	 */
+	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
+		sizeof(struct udphdr) + hdr_len;
+	old_headroom = skb_headroom(skb);
+	if (skb_cow_head(skb, headroom))
+		goto abort;
+
+	new_headroom = skb_headroom(skb);
+	skb_orphan(skb);
+	skb->truesize += new_headroom - old_headroom;
+
+	/* Setup L2TP header */
+	l2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
+	udp_len = sizeof(struct udphdr) + hdr_len + data_len;
+
+	/* Setup UDP header */
+	inet = inet_sk(sk);
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+	uh->source = inet->inet_sport;
+	uh->dest = inet->inet_dport;
+	uh->len = htons(udp_len);
+
+	uh->check = 0;
+
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
+	nf_reset(skb);
+
+	/* Get routing info from the tunnel socket */
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
+	l2tp_skb_set_owner_w(skb, sk);
+
+	/* Calculate UDP checksum if configured to do so */
+	if (sk->sk_no_check == UDP_CSUM_NOXMIT)
+		skb->ip_summed = CHECKSUM_NONE;
+	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
+		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		csum = skb_checksum(skb, 0, udp_len, 0);
+		uh->check = csum_tcpudp_magic(inet->inet_saddr,
+					      inet->inet_daddr,
+					      udp_len, IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+					       inet->inet_daddr,
+					       udp_len, IPPROTO_UDP, 0);
+	}
+
+	l2tp_xmit_core(session, skb, data_len);
+
+abort:
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
+
+/*****************************************************************************
+ * Tinnel and session create/destroy.
+ *****************************************************************************/
+
+/* Tunnel socket destruct hook.
+ * The tunnel context is deleted only when all session sockets have been
+ * closed.
+ */
+void l2tp_tunnel_destruct(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel;
+
+	tunnel = sk->sk_user_data;
+	if (tunnel == NULL)
+		goto end;
+
+	PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing...\n", tunnel->name);
+
+	/* Close all sessions */
+	l2tp_tunnel_closeall(tunnel);
+
+	/* No longer an encapsulation socket. See net/ipv4/udp.c */
+	(udp_sk(sk))->encap_type = 0;
+	(udp_sk(sk))->encap_rcv = NULL;
+
+	/* Remove hooks into tunnel socket */
+	tunnel->sock = NULL;
+	sk->sk_destruct = tunnel->old_sk_destruct;
+	sk->sk_user_data = NULL;
+
+	/* Call the original destructor */
+	if (sk->sk_destruct)
+		(*sk->sk_destruct)(sk);
+
+	/* We're finished with the socket */
+	l2tp_tunnel_dec_refcount(tunnel);
+
+end:
+	return;
+}
+EXPORT_SYMBOL(l2tp_tunnel_destruct);
+
+/* When the tunnel is closed, all the attached sessions need to go too.
+ */
+void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+{
+	int hash;
+	struct hlist_node *walk;
+	struct hlist_node *tmp;
+	struct l2tp_session *session;
+
+	BUG_ON(tunnel == NULL);
+
+	PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing all sessions...\n", tunnel->name);
+
+	write_lock_bh(&tunnel->hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+again:
+		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+			session = hlist_entry(walk, struct l2tp_session, hlist);
+
+			PRINTK(session->debug, L2TP_MSG_CONTROL, KERN_INFO,
+			       "%s: closing session\n", session->name);
+
+			hlist_del_init(&session->hlist);
+
+			/* Since we should hold the sock lock while
+			 * doing any unbinding, we need to release the
+			 * lock we're holding before taking that lock.
+			 * Hold a reference to the sock so it doesn't
+			 * disappear as we're jumping between locks.
+			 */
+			if (session->ref != NULL)
+				(*session->ref)(session);
+
+			write_unlock_bh(&tunnel->hlist_lock);
+
+			if (session->session_close != NULL)
+				(*session->session_close)(session);
+
+			if (session->deref != NULL)
+				(*session->deref)(session);
+
+			write_lock_bh(&tunnel->hlist_lock);
+
+			/* Now restart from the beginning of this hash
+			 * chain.  We always remove a session from the
+			 * list so we are guaranteed to make forward
+			 * progress.
+			 */
+			goto again;
+		}
+	}
+	write_unlock_bh(&tunnel->hlist_lock);
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
+
+/* Really kill the tunnel.
+ * Come here only when all sessions have been cleared from the tunnel.
+ */
+void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+{
+	struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+	BUG_ON(atomic_read(&tunnel->ref_count) != 0);
+	BUG_ON(tunnel->sock != NULL);
+
+	PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: free...\n", tunnel->name);
+
+	/* Remove from tunnel list */
+	write_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_del_init(&tunnel->list);
+	write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	atomic_dec(&l2tp_tunnel_count);
+	kfree(tunnel);
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
+
+int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
+{
+	struct l2tp_tunnel *tunnel = NULL;
+	int err;
+	struct socket *sock = NULL;
+	struct sock *sk = NULL;
+	struct l2tp_net *pn;
+
+	/* Get the tunnel socket from the fd, which was opened by
+	 * the userspace L2TP daemon.
+	 */
+	err = -EBADF;
+	sock = sockfd_lookup(fd, &err);
+	if (!sock) {
+		printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+		       tunnel_id, fd, err);
+		goto err;
+	}
+
+	sk = sock->sk;
+
+	/* Quick sanity checks */
+	err = -EPROTONOSUPPORT;
+	if (sk->sk_protocol != IPPROTO_UDP) {
+		printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+		goto err;
+	}
+	err = -EAFNOSUPPORT;
+	if (sock->ops->family != AF_INET) {
+		printk(KERN_ERR "tunl %hu: fd %d wrong family, got %d, expected %d\n",
+		       tunnel_id, fd, sock->ops->family, AF_INET);
+		goto err;
+	}
+
+	/* Check if this socket has already been prepped */
+	tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
+	if (tunnel != NULL) {
+		/* This socket has already been prepped */
+		err = -EBUSY;
+		goto err;
+	}
+
+	if (version != L2TP_HDR_VER_2)
+		goto err;
+
+	tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
+	if (tunnel == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	tunnel->version = version;
+	tunnel->tunnel_id = tunnel_id;
+	tunnel->peer_tunnel_id = peer_tunnel_id;
+	tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS;
+
+	tunnel->magic = L2TP_TUNNEL_MAGIC;
+	sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
+	rwlock_init(&tunnel->hlist_lock);
+
+	/* The net we belong to */
+	tunnel->l2tp_net = net;
+	pn = l2tp_pernet(net);
+
+	if (cfg)
+		tunnel->debug = cfg->debug;
+
+	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+	udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
+	udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+
+	sk->sk_user_data = tunnel;
+
+	/* Hook on the tunnel socket destructor so that we can cleanup
+	 * if the tunnel socket goes away.
+	 */
+	tunnel->old_sk_destruct = sk->sk_destruct;
+	sk->sk_destruct = &l2tp_tunnel_destruct;
+	tunnel->sock = sk;
+	sk->sk_allocation = GFP_ATOMIC;
+
+	/* Add tunnel to our list */
+	INIT_LIST_HEAD(&tunnel->list);
+	write_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_add(&tunnel->list, &pn->l2tp_tunnel_list);
+	write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	atomic_inc(&l2tp_tunnel_count);
+
+	/* Bump the reference count. The tunnel context is deleted
+	 * only when this drops to zero.
+	 */
+	l2tp_tunnel_inc_refcount(tunnel);
+
+	err = 0;
+err:
+	if (tunnelp)
+		*tunnelp = tunnel;
+
+	if (sock)
+		sockfd_put(sock);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+
+/* Really kill the session.
+ */
+void l2tp_session_free(struct l2tp_session *session)
+{
+	struct l2tp_tunnel *tunnel;
+
+	BUG_ON(atomic_read(&session->ref_count) != 0);
+
+	tunnel = session->tunnel;
+	if (tunnel != NULL) {
+		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+		/* Delete the session from the hash */
+		write_lock_bh(&tunnel->hlist_lock);
+		hlist_del_init(&session->hlist);
+		write_unlock_bh(&tunnel->hlist_lock);
+
+		if (session->session_id != 0)
+			atomic_dec(&l2tp_session_count);
+
+		sock_put(tunnel->sock);
+
+		/* This will delete the tunnel context if this
+		 * is the last session on the tunnel.
+		 */
+		session->tunnel = NULL;
+		l2tp_tunnel_dec_refcount(tunnel);
+	}
+
+	kfree(session);
+
+	return;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_free);
+
+struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+	struct l2tp_session *session;
+
+	session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
+	if (session != NULL) {
+		session->magic = L2TP_SESSION_MAGIC;
+		session->tunnel = tunnel;
+
+		session->session_id = session_id;
+		session->peer_session_id = peer_session_id;
+
+		sprintf(&session->name[0], "sess %u/%u",
+			tunnel->tunnel_id, session->session_id);
+
+		skb_queue_head_init(&session->reorder_q);
+
+		INIT_HLIST_NODE(&session->hlist);
+
+		/* Inherit debug options from tunnel */
+		session->debug = tunnel->debug;
+
+		if (cfg) {
+			session->debug = cfg->debug;
+			session->hdr_len = cfg->hdr_len;
+			session->mtu = cfg->mtu;
+			session->mru = cfg->mru;
+			session->send_seq = cfg->send_seq;
+			session->recv_seq = cfg->recv_seq;
+			session->lns_mode = cfg->lns_mode;
+		}
+
+		/* Bump the reference count. The session context is deleted
+		 * only when this drops to zero.
+		 */
+		l2tp_session_inc_refcount(session);
+		l2tp_tunnel_inc_refcount(tunnel);
+
+		/* Ensure tunnel socket isn't deleted */
+		sock_hold(tunnel->sock);
+
+		/* Add session to the tunnel's hash list */
+		write_lock_bh(&tunnel->hlist_lock);
+		hlist_add_head(&session->hlist,
+			       l2tp_session_id_hash(tunnel, session_id));
+		write_unlock_bh(&tunnel->hlist_lock);
+
+		/* Ignore management session in session count value */
+		if (session->session_id != 0)
+			atomic_inc(&l2tp_session_count);
+	}
+
+	return session;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_create);
+
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+
+static __net_init int l2tp_init_net(struct net *net)
+{
+	struct l2tp_net *pn;
+	int err;
+
+	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
+	if (!pn)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
+	rwlock_init(&pn->l2tp_tunnel_list_lock);
+
+	err = net_assign_generic(net, l2tp_net_id, pn);
+	if (err)
+		goto out;
+
+	return 0;
+
+out:
+	kfree(pn);
+	return err;
+}
+
+static __net_exit void l2tp_exit_net(struct net *net)
+{
+	struct l2tp_net *pn;
+
+	pn = net_generic(net, l2tp_net_id);
+	/*
+	 * if someone has cached our net then
+	 * further net_generic call will return NULL
+	 */
+	net_assign_generic(net, l2tp_net_id, NULL);
+	kfree(pn);
+}
+
+static struct pernet_operations l2tp_net_ops = {
+	.init = l2tp_init_net,
+	.exit = l2tp_exit_net,
+	.id   = &l2tp_net_id,
+	.size = sizeof(struct l2tp_net),
+};
+
+static int __init l2tp_init(void)
+{
+	int rc = 0;
+
+	rc = register_pernet_device(&l2tp_net_ops);
+	if (rc)
+		goto out;
+
+	printk(KERN_INFO "L2TP core driver, %s\n", L2TP_DRV_VERSION);
+
+out:
+	return rc;
+}
+
+static void __exit l2tp_exit(void)
+{
+	unregister_pernet_device(&l2tp_net_ops);
+}
+
+module_init(l2tp_init);
+module_exit(l2tp_exit);
+
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP core");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(L2TP_DRV_VERSION);
+
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
new file mode 100644
index 000000000000..2efe1a3ada98
--- /dev/null
+++ b/net/l2tp/l2tp_core.h
@@ -0,0 +1,254 @@
+/*
+ * L2TP internal definitions.
+ *
+ * Copyright (c) 2008,2009 Katalix Systems Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _L2TP_CORE_H_
+#define _L2TP_CORE_H_
+
+/* Just some random numbers */
+#define L2TP_TUNNEL_MAGIC	0x42114DDA
+#define L2TP_SESSION_MAGIC	0x0C04EB7D
+
+#define L2TP_HASH_BITS	4
+#define L2TP_HASH_SIZE	(1 << L2TP_HASH_BITS)
+
+/* Debug message categories for the DEBUG socket option */
+enum {
+	L2TP_MSG_DEBUG		= (1 << 0),	/* verbose debug (if
+						 * compiled in) */
+	L2TP_MSG_CONTROL	= (1 << 1),	/* userspace - kernel
+						 * interface */
+	L2TP_MSG_SEQ		= (1 << 2),	/* sequence numbers */
+	L2TP_MSG_DATA		= (1 << 3),	/* data packets */
+};
+
+struct sk_buff;
+
+struct l2tp_stats {
+	u64			tx_packets;
+	u64			tx_bytes;
+	u64			tx_errors;
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_seq_discards;
+	u64			rx_oos_packets;
+	u64			rx_errors;
+};
+
+struct l2tp_tunnel;
+
+/* Describes a session. Contains information to determine incoming
+ * packets and transmit outgoing ones.
+ */
+struct l2tp_session_cfg {
+	unsigned		data_seq:2;	/* data sequencing level
+						 * 0 => none, 1 => IP only,
+						 * 2 => all
+						 */
+	unsigned		recv_seq:1;	/* expect receive packets with
+						 * sequence numbers? */
+	unsigned		send_seq:1;	/* send packets with sequence
+						 * numbers? */
+	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
+						 * sequence numbers under
+						 * control of LNS. */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			offset;		/* offset to payload */
+	int			reorder_timeout; /* configured reorder timeout
+						  * (in jiffies) */
+	int			mtu;
+	int			mru;
+	int			hdr_len;
+};
+
+struct l2tp_session {
+	int			magic;		/* should be
+						 * L2TP_SESSION_MAGIC */
+
+	struct l2tp_tunnel	*tunnel;	/* back pointer to tunnel
+						 * context */
+	u32			session_id;
+	u32			peer_session_id;
+	u16			nr;		/* session NR state (receive) */
+	u16			ns;		/* session NR state (send) */
+	struct sk_buff_head	reorder_q;	/* receive reorder queue */
+	struct hlist_node	hlist;		/* Hash list node */
+	atomic_t		ref_count;
+
+	char			name[32];	/* for logging */
+	unsigned		data_seq:2;	/* data sequencing level
+						 * 0 => none, 1 => IP only,
+						 * 2 => all
+						 */
+	unsigned		recv_seq:1;	/* expect receive packets with
+						 * sequence numbers? */
+	unsigned		send_seq:1;	/* send packets with sequence
+						 * numbers? */
+	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
+						 * sequence numbers under
+						 * control of LNS. */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			reorder_timeout; /* configured reorder timeout
+						  * (in jiffies) */
+	int			mtu;
+	int			mru;
+	int			hdr_len;
+	struct l2tp_stats	stats;
+
+	void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
+	void (*session_close)(struct l2tp_session *session);
+	void (*ref)(struct l2tp_session *session);
+	void (*deref)(struct l2tp_session *session);
+
+	uint8_t			priv[0];	/* private data */
+};
+
+/* Describes the tunnel. It contains info to track all the associated
+ * sessions so incoming packets can be sorted out
+ */
+struct l2tp_tunnel_cfg {
+	int			debug;		/* bitmask of debug message
+						 * categories */
+};
+
+struct l2tp_tunnel {
+	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
+	rwlock_t		hlist_lock;	/* protect session_hlist */
+	struct hlist_head	session_hlist[L2TP_HASH_SIZE];
+						/* hashed list of sessions,
+						 * hashed by id */
+	u32			tunnel_id;
+	u32			peer_tunnel_id;
+	int			version;	/* 2=>L2TPv2, 3=>L2TPv3 */
+
+	char			name[20];	/* for logging */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			hdr_len;
+	struct l2tp_stats	stats;
+
+	struct list_head	list;		/* Keep a list of all tunnels */
+	struct net		*l2tp_net;	/* the net we belong to */
+
+	atomic_t		ref_count;
+
+	int (*recv_payload_hook)(struct sk_buff *skb);
+	void (*old_sk_destruct)(struct sock *);
+	struct sock		*sock;		/* Parent socket */
+	int			fd;
+
+	uint8_t			priv[0];	/* private data */
+};
+
+static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
+{
+	return &tunnel->priv[0];
+}
+
+static inline void *l2tp_session_priv(struct l2tp_session *session)
+{
+	return &session->priv[0];
+}
+
+static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel;
+
+	if (sk == NULL)
+		return NULL;
+
+	sock_hold(sk);
+	tunnel = (struct l2tp_tunnel *)(sk->sk_user_data);
+	if (tunnel == NULL) {
+		sock_put(sk);
+		goto out;
+	}
+
+	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+out:
+	return tunnel;
+}
+
+extern struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id);
+extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
+extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
+
+extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
+extern void l2tp_session_free(struct l2tp_session *session);
+extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
+extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
+
+extern void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf);
+extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
+extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
+extern void l2tp_tunnel_destruct(struct sock *sk);
+extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+
+/* Tunnel reference counts. Incremented per session that is added to
+ * the tunnel.
+ */
+static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
+{
+	atomic_inc(&tunnel->ref_count);
+}
+
+static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
+{
+	if (atomic_dec_and_test(&tunnel->ref_count))
+		l2tp_tunnel_free(tunnel);
+}
+#ifdef L2TP_REFCNT_DEBUG
+#define l2tp_tunnel_inc_refcount(_t) do { \
+		printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+		l2tp_tunnel_inc_refcount_1(_t);				\
+	} while (0)
+#define l2tp_tunnel_dec_refcount(_t) do { \
+		printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+		l2tp_tunnel_dec_refcount_1(_t);				\
+	} while (0)
+#else
+#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
+#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
+#endif
+
+/* Session reference counts. Incremented when code obtains a reference
+ * to a session.
+ */
+static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session)
+{
+	atomic_inc(&session->ref_count);
+}
+
+static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session)
+{
+	if (atomic_dec_and_test(&session->ref_count))
+		l2tp_session_free(session);
+}
+
+#ifdef L2TP_REFCNT_DEBUG
+#define l2tp_session_inc_refcount(_s) do { \
+		printk(KERN_DEBUG "l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_s)->name, atomic_read(&_s->ref_count)); \
+		l2tp_session_inc_refcount_1(_s);				\
+	} while (0)
+#define l2tp_session_dec_refcount(_s) do { \
+		printk(KERN_DEBUG "l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_s)->name, atomic_read(&_s->ref_count)); \
+		l2tp_session_dec_refcount_1(_s);				\
+	} while (0)
+#else
+#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s)
+#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s)
+#endif
+
+#endif /* _L2TP_CORE_H_ */
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
new file mode 100644
index 000000000000..baac072761aa
--- /dev/null
+++ b/net/l2tp/l2tp_ppp.c
@@ -0,0 +1,1658 @@
+/*****************************************************************************
+ * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
+ *
+ * PPPoX    --- Generic PPP encapsulation socket family
+ * PPPoL2TP --- PPP over L2TP (RFC 2661)
+ *
+ * Version:	2.0.0
+ *
+ * Authors:	James Chapman (jchapman@katalix.com)
+ *
+ * Based on original work by Martijn van Oosterhout <kleptog@svana.org>
+ *
+ * License:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* This driver handles only L2TP data frames; control frames are handled by a
+ * userspace application.
+ *
+ * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
+ * attaches it to a bound UDP socket with local tunnel_id / session_id and
+ * peer tunnel_id / session_id set. Data can then be sent or received using
+ * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
+ * can be read or modified using ioctl() or [gs]etsockopt() calls.
+ *
+ * When a PPPoL2TP socket is connected with local and peer session_id values
+ * zero, the socket is treated as a special tunnel management socket.
+ *
+ * Here's example userspace code to create a socket for sending/receiving data
+ * over an L2TP session:-
+ *
+ *	struct sockaddr_pppol2tp sax;
+ *	int fd;
+ *	int session_fd;
+ *
+ *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+ *
+ *	sax.sa_family = AF_PPPOX;
+ *	sax.sa_protocol = PX_PROTO_OL2TP;
+ *	sax.pppol2tp.fd = tunnel_fd;	// bound UDP socket
+ *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+ *	sax.pppol2tp.addr.sin_port = addr->sin_port;
+ *	sax.pppol2tp.addr.sin_family = AF_INET;
+ *	sax.pppol2tp.s_tunnel  = tunnel_id;
+ *	sax.pppol2tp.s_session = session_id;
+ *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
+ *	sax.pppol2tp.d_session = peer_session_id;
+ *
+ *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
+ *
+ * A pppd plugin that allows PPP traffic to be carried over L2TP using
+ * this driver is available from the OpenL2TP project at
+ * http://openl2tp.sourceforge.net.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/uaccess.h>
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/if_pppox.h>
+#include <linux/if_pppol2tp.h>
+#include <net/sock.h>
+#include <linux/ppp_channel.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/file.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+#include "l2tp_core.h"
+
+#define PPPOL2TP_DRV_VERSION	"V2.0"
+
+/* Space for UDP, L2TP and PPP headers */
+#define PPPOL2TP_HEADER_OVERHEAD	40
+
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
+	do {								\
+		if ((_mask) & (_type))					\
+			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
+	} while (0)
+
+/* Number of bytes to build transmit L2TP headers.
+ * Unfortunately the size is different depending on whether sequence numbers
+ * are enabled.
+ */
+#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
+#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
+
+/* Private data of each session. This data lives at the end of struct
+ * l2tp_session, referenced via session->priv[].
+ */
+struct pppol2tp_session {
+	int			owner;		/* pid that opened the socket */
+
+	struct sock		*sock;		/* Pointer to the session
+						 * PPPoX socket */
+	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
+						 * socket */
+	int			flags;		/* accessed by PPPIOCGFLAGS.
+						 * Unused. */
+};
+
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
+
+static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
+static const struct proto_ops pppol2tp_ops;
+
+/* Helpers to obtain tunnel/session contexts from sockets.
+ */
+static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
+{
+	struct l2tp_session *session;
+
+	if (sk == NULL)
+		return NULL;
+
+	sock_hold(sk);
+	session = (struct l2tp_session *)(sk->sk_user_data);
+	if (session == NULL) {
+		sock_put(sk);
+		goto out;
+	}
+
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+out:
+	return session;
+}
+
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+
+static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
+{
+	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
+	 * don't send the PPP header (PPP header compression enabled), but
+	 * other clients can include the header. So we cope with both cases
+	 * here. The PPP header is always FF03 when using L2TP.
+	 *
+	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
+	 * the field may be unaligned.
+	 */
+	if (!pskb_may_pull(skb, 2))
+		return 1;
+
+	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+		skb_pull(skb, 2);
+
+	return 0;
+}
+
+/* Receive message. This is the recvmsg for the PPPoL2TP socket.
+ */
+static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
+			    struct msghdr *msg, size_t len,
+			    int flags)
+{
+	int err;
+	struct sk_buff *skb;
+	struct sock *sk = sock->sk;
+
+	err = -EIO;
+	if (sk->sk_state & PPPOX_BOUND)
+		goto end;
+
+	msg->msg_namelen = 0;
+
+	err = 0;
+	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+				flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		goto end;
+
+	if (len > skb->len)
+		len = skb->len;
+	else if (len < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+	if (likely(err == 0))
+		err = len;
+
+	kfree_skb(skb);
+end:
+	return err;
+}
+
+static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct sock *sk = NULL;
+
+	/* If the socket is bound, send it in to PPP's input queue. Otherwise
+	 * queue it on the session socket.
+	 */
+	sk = ps->sock;
+	if (sk == NULL)
+		goto no_sock;
+
+	if (sk->sk_state & PPPOX_BOUND) {
+		struct pppox_sock *po;
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv %d byte data frame, passing to ppp\n",
+		       session->name, data_len);
+
+		/* We need to forget all info related to the L2TP packet
+		 * gathered in the skb as we are going to reuse the same
+		 * skb for the inner packet.
+		 * Namely we need to:
+		 * - reset xfrm (IPSec) information as it applies to
+		 *   the outer L2TP packet and not to the inner one
+		 * - release the dst to force a route lookup on the inner
+		 *   IP packet since skb->dst currently points to the dst
+		 *   of the UDP tunnel
+		 * - reset netfilter information as it doesn't apply
+		 *   to the inner packet either
+		 */
+		secpath_reset(skb);
+		skb_dst_drop(skb);
+		nf_reset(skb);
+
+		po = pppox_sk(sk);
+		ppp_input(&po->chan, skb);
+	} else {
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: socket not bound\n", session->name);
+
+		/* Not bound. Nothing we can do, so discard. */
+		session->stats.rx_errors++;
+		kfree_skb(skb);
+	}
+
+	return;
+
+no_sock:
+	PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+	       "%s: no socket\n", session->name);
+	kfree_skb(skb);
+}
+
+static void pppol2tp_session_sock_hold(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps->sock)
+		sock_hold(ps->sock);
+}
+
+static void pppol2tp_session_sock_put(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps->sock)
+		sock_put(ps->sock);
+}
+
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+
+/* Tell how big L2TP headers are for a particular session. This
+ * depends on whether sequence numbers are being used.
+ */
+static inline int pppol2tp_l2tp_header_len(struct l2tp_session *session)
+{
+	if (session->send_seq)
+		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
+
+	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+}
+
+/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
+ * when a user application does a sendmsg() on the session socket. L2TP and
+ * PPP headers must be inserted into the user's data.
+ */
+static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+			    size_t total_len)
+{
+	static const unsigned char ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int error;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+
+	error = -ENOTCONN;
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto error;
+
+	/* Get session and tunnel contexts */
+	error = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto error;
+
+	ps = l2tp_session_priv(session);
+	tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+	if (tunnel == NULL)
+		goto error_put_sess;
+
+	/* Allocate a socket buffer */
+	error = -ENOMEM;
+	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
+			   sizeof(struct udphdr) + session->hdr_len +
+			   sizeof(ppph) + total_len,
+			   0, GFP_KERNEL);
+	if (!skb)
+		goto error_put_sess_tun;
+
+	/* Reserve space for headers. */
+	skb_reserve(skb, NET_SKB_PAD);
+	skb_reset_network_header(skb);
+	skb_reserve(skb, sizeof(struct iphdr));
+	skb_reset_transport_header(skb);
+	skb_reserve(skb, sizeof(struct udphdr));
+
+	/* Add PPP header */
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+	skb_put(skb, 2);
+
+	/* Copy user data into skb */
+	error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
+	if (error < 0) {
+		kfree_skb(skb);
+		goto error_put_sess_tun;
+	}
+	skb_put(skb, total_len);
+
+	l2tp_xmit_skb(session, skb, session->hdr_len);
+
+	sock_put(ps->tunnel_sock);
+
+	return error;
+
+error_put_sess_tun:
+	sock_put(ps->tunnel_sock);
+error_put_sess:
+	sock_put(sk);
+error:
+	return error;
+}
+
+/* Transmit function called by generic PPP driver.  Sends PPP frame
+ * over PPPoL2TP socket.
+ *
+ * This is almost the same as pppol2tp_sendmsg(), but rather than
+ * being called with a msghdr from userspace, it is called with a skb
+ * from the kernel.
+ *
+ * The supplied skb from ppp doesn't have enough headroom for the
+ * insertion of L2TP, UDP and IP headers so we need to allocate more
+ * headroom in the skb. This will create a cloned skb. But we must be
+ * careful in the error case because the caller will expect to free
+ * the skb it supplied, not our cloned skb. So we take care to always
+ * leave the original skb unfreed if we return an error.
+ */
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	static const u8 ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = (struct sock *) chan->private;
+	struct sock *sk_tun;
+	int hdr_len;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	int old_headroom;
+	int new_headroom;
+
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto abort;
+
+	/* Get session and tunnel contexts from the socket */
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto abort;
+
+	ps = l2tp_session_priv(session);
+	sk_tun = ps->tunnel_sock;
+	if (sk_tun == NULL)
+		goto abort_put_sess;
+	tunnel = l2tp_sock_to_tunnel(sk_tun);
+	if (tunnel == NULL)
+		goto abort_put_sess;
+
+	/* What header length is configured for this session? */
+	hdr_len = pppol2tp_l2tp_header_len(session);
+
+	old_headroom = skb_headroom(skb);
+	if (skb_cow_head(skb, sizeof(ppph)))
+		goto abort_put_sess_tun;
+
+	new_headroom = skb_headroom(skb);
+	skb->truesize += new_headroom - old_headroom;
+
+	/* Setup PPP header */
+	__skb_push(skb, sizeof(ppph));
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+
+	l2tp_xmit_skb(session, skb, hdr_len);
+
+	sock_put(sk_tun);
+	sock_put(sk);
+	return 1;
+
+abort_put_sess_tun:
+	sock_put(sk_tun);
+abort_put_sess:
+	sock_put(sk);
+abort:
+	/* Free the original skb */
+	kfree_skb(skb);
+	return 1;
+}
+
+/*****************************************************************************
+ * Session (and tunnel control) socket create/destroy.
+ *****************************************************************************/
+
+/* Called by l2tp_core when a session socket is being closed.
+ */
+static void pppol2tp_session_close(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct sock *sk = ps->sock;
+	struct sk_buff *skb;
+
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+	if (session->session_id == 0)
+		goto out;
+
+	if (sk != NULL) {
+		lock_sock(sk);
+
+		if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+			pppox_unbind_sock(sk);
+			sk->sk_state = PPPOX_DEAD;
+			sk->sk_state_change(sk);
+		}
+
+		/* Purge any queued data */
+		skb_queue_purge(&sk->sk_receive_queue);
+		skb_queue_purge(&sk->sk_write_queue);
+		while ((skb = skb_dequeue(&session->reorder_q))) {
+			kfree_skb(skb);
+			sock_put(sk);
+		}
+
+		release_sock(sk);
+	}
+
+out:
+	return;
+}
+
+/* Really kill the session socket. (Called from sock_put() if
+ * refcnt == 0.)
+ */
+static void pppol2tp_session_destruct(struct sock *sk)
+{
+	struct l2tp_session *session;
+
+	if (sk->sk_user_data != NULL) {
+		session = sk->sk_user_data;
+		if (session == NULL)
+			goto out;
+
+		sk->sk_user_data = NULL;
+		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+		l2tp_session_dec_refcount(session);
+	}
+
+out:
+	return;
+}
+
+/* Called when the PPPoX socket (session) is closed.
+ */
+static int pppol2tp_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	int error;
+
+	if (!sk)
+		return 0;
+
+	error = -EBADF;
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto error;
+
+	pppox_unbind_sock(sk);
+
+	/* Signal the death of the socket. */
+	sk->sk_state = PPPOX_DEAD;
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	session = pppol2tp_sock_to_session(sk);
+
+	/* Purge any queued data */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+	if (session != NULL) {
+		struct sk_buff *skb;
+		while ((skb = skb_dequeue(&session->reorder_q))) {
+			kfree_skb(skb);
+			sock_put(sk);
+		}
+		sock_put(sk);
+	}
+
+	release_sock(sk);
+
+	/* This will delete the session context via
+	 * pppol2tp_session_destruct() if the socket's refcnt drops to
+	 * zero.
+	 */
+	sock_put(sk);
+
+	return 0;
+
+error:
+	release_sock(sk);
+	return error;
+}
+
+static struct proto pppol2tp_sk_proto = {
+	.name	  = "PPPOL2TP",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb)
+{
+	int rc;
+
+	rc = l2tp_udp_encap_recv(sk, skb);
+	if (rc)
+		kfree_skb(skb);
+
+	return NET_RX_SUCCESS;
+}
+
+/* socket() handler. Initialize a new struct sock.
+ */
+static int pppol2tp_create(struct net *net, struct socket *sock)
+{
+	int error = -ENOMEM;
+	struct sock *sk;
+
+	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
+	if (!sk)
+		goto out;
+
+	sock_init_data(sock, sk);
+
+	sock->state  = SS_UNCONNECTED;
+	sock->ops    = &pppol2tp_ops;
+
+	sk->sk_backlog_rcv = pppol2tp_backlog_recv;
+	sk->sk_protocol	   = PX_PROTO_OL2TP;
+	sk->sk_family	   = PF_PPPOX;
+	sk->sk_state	   = PPPOX_NONE;
+	sk->sk_type	   = SOCK_STREAM;
+	sk->sk_destruct	   = pppol2tp_session_destruct;
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
+ */
+static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+			    int sockaddr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct l2tp_session *session = NULL;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	struct dst_entry *dst;
+	struct l2tp_session_cfg cfg = { 0, };
+	int error = 0;
+
+	lock_sock(sk);
+
+	error = -EINVAL;
+	if (sp->sa_protocol != PX_PROTO_OL2TP)
+		goto end;
+
+	/* Check for already bound sockets */
+	error = -EBUSY;
+	if (sk->sk_state & PPPOX_CONNECTED)
+		goto end;
+
+	/* We don't supporting rebinding anyway */
+	error = -EALREADY;
+	if (sk->sk_user_data)
+		goto end; /* socket is already attached */
+
+	/* Don't bind if s_tunnel is 0 */
+	error = -EINVAL;
+	if (sp->pppol2tp.s_tunnel == 0)
+		goto end;
+
+	/* Special case: create tunnel context if s_session and
+	 * d_session is 0. Otherwise look up tunnel using supplied
+	 * tunnel id.
+	 */
+	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
+		error = l2tp_tunnel_create(sock_net(sk), sp->pppol2tp.fd, 2, sp->pppol2tp.s_tunnel, sp->pppol2tp.d_tunnel, NULL, &tunnel);
+		if (error < 0)
+			goto end;
+	} else {
+		tunnel = l2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
+
+		/* Error if we can't find the tunnel */
+		error = -ENOENT;
+		if (tunnel == NULL)
+			goto end;
+
+		/* Error if socket is not prepped */
+		if (tunnel->sock == NULL)
+			goto end;
+	}
+
+	if (tunnel->recv_payload_hook == NULL)
+		tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
+
+	/* Check that this session doesn't already exist */
+	error = -EEXIST;
+	session = l2tp_session_find(tunnel, sp->pppol2tp.s_session);
+	if (session != NULL)
+		goto end;
+
+	/* Default MTU must allow space for UDP/L2TP/PPP
+	 * headers.
+	 */
+	cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	cfg.hdr_len = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+	cfg.debug = tunnel->debug;
+
+	/* Allocate and initialize a new session context. */
+	session = l2tp_session_create(sizeof(struct pppol2tp_session),
+				      tunnel, sp->pppol2tp.s_session,
+				      sp->pppol2tp.d_session, &cfg);
+	if (session == NULL) {
+		error = -ENOMEM;
+		goto end;
+	}
+
+	ps = l2tp_session_priv(session);
+	ps->owner	     = current->pid;
+	ps->sock	     = sk;
+	ps->tunnel_sock = tunnel->sock;
+
+	session->recv_skb	= pppol2tp_recv;
+	session->session_close	= pppol2tp_session_close;
+
+	/* We need to know each time a skb is dropped from the reorder
+	 * queue.
+	 */
+	session->ref = pppol2tp_session_sock_hold;
+	session->deref = pppol2tp_session_sock_put;
+
+	/* If PMTU discovery was enabled, use the MTU that was discovered */
+	dst = sk_dst_get(sk);
+	if (dst != NULL) {
+		u32 pmtu = dst_mtu(__sk_dst_get(sk));
+		if (pmtu != 0)
+			session->mtu = session->mru = pmtu -
+				PPPOL2TP_HEADER_OVERHEAD;
+		dst_release(dst);
+	}
+
+	/* Special case: if source & dest session_id == 0x0000, this
+	 * socket is being created to manage the tunnel. Just set up
+	 * the internal context for use by ioctl() and sockopt()
+	 * handlers.
+	 */
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		error = 0;
+		goto out_no_ppp;
+	}
+
+	/* The only header we need to worry about is the L2TP
+	 * header. This size is different depending on whether
+	 * sequence numbers are enabled for the data channel.
+	 */
+	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+
+	po->chan.private = sk;
+	po->chan.ops	 = &pppol2tp_chan_ops;
+	po->chan.mtu	 = session->mtu;
+
+	error = ppp_register_net_channel(sock_net(sk), &po->chan);
+	if (error)
+		goto end;
+
+out_no_ppp:
+	/* This is how we get the session context from the socket. */
+	sk->sk_user_data = session;
+	sk->sk_state = PPPOX_CONNECTED;
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: created\n", session->name);
+
+end:
+	release_sock(sk);
+
+	return error;
+}
+
+/* getname() support.
+ */
+static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
+			    int *usockaddr_len, int peer)
+{
+	int len = sizeof(struct sockaddr_pppol2tp);
+	struct sockaddr_pppol2tp sp;
+	int error = 0;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct sock *sk = sock->sk;
+	struct inet_sock *inet;
+	struct pppol2tp_session *pls;
+
+	error = -ENOTCONN;
+	if (sk == NULL)
+		goto end;
+	if (sk->sk_state != PPPOX_CONNECTED)
+		goto end;
+
+	error = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	pls = l2tp_session_priv(session);
+	tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
+	if (tunnel == NULL) {
+		error = -EBADF;
+		goto end_put_sess;
+	}
+
+	memset(&sp, 0, len);
+	sp.sa_family	= AF_PPPOX;
+	sp.sa_protocol	= PX_PROTO_OL2TP;
+	sp.pppol2tp.fd  = tunnel->fd;
+	sp.pppol2tp.pid = pls->owner;
+	sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+	sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+	sp.pppol2tp.s_session = session->session_id;
+	sp.pppol2tp.d_session = session->peer_session_id;
+	inet = inet_sk(sk);
+	sp.pppol2tp.addr.sin_family = AF_INET;
+	sp.pppol2tp.addr.sin_port = inet->inet_dport;
+	sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+
+	memcpy(uaddr, &sp, len);
+
+	*usockaddr_len = len;
+
+	sock_put(pls->tunnel_sock);
+end_put_sess:
+	sock_put(sk);
+	error = 0;
+
+end:
+	return error;
+}
+
+/****************************************************************************
+ * ioctl() handlers.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. However, in order to control kernel tunnel features, we allow
+ * userspace to create a special "tunnel" PPPoX socket which is used for
+ * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
+ * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
+ * calls.
+ ****************************************************************************/
+
+static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
+				struct l2tp_stats *stats)
+{
+	dest->tx_packets = stats->tx_packets;
+	dest->tx_bytes = stats->tx_bytes;
+	dest->tx_errors = stats->tx_errors;
+	dest->rx_packets = stats->rx_packets;
+	dest->rx_bytes = stats->rx_bytes;
+	dest->rx_seq_discards = stats->rx_seq_discards;
+	dest->rx_oos_packets = stats->rx_oos_packets;
+	dest->rx_errors = stats->rx_errors;
+}
+
+/* Session ioctl helper.
+ */
+static int pppol2tp_session_ioctl(struct l2tp_session *session,
+				  unsigned int cmd, unsigned long arg)
+{
+	struct ifreq ifr;
+	int err = 0;
+	struct sock *sk;
+	int val = (int) arg;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct pppol2tp_ioc_stats stats;
+
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
+	       session->name, cmd, arg);
+
+	sk = ps->sock;
+	sock_hold(sk);
+
+	switch (cmd) {
+	case SIOCGIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+		ifr.ifr_mtu = session->mtu;
+		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case SIOCSIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+
+		session->mtu = ifr.ifr_mtu;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case PPPIOCGMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (put_user(session->mru, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCSMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (get_user(val, (int __user *) arg))
+			break;
+
+		session->mru = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCGFLAGS:
+		err = -EFAULT;
+		if (put_user(ps->flags, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get flags=%d\n", session->name, ps->flags);
+		err = 0;
+		break;
+
+	case PPPIOCSFLAGS:
+		err = -EFAULT;
+		if (get_user(val, (int __user *) arg))
+			break;
+		ps->flags = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set flags=%d\n", session->name, ps->flags);
+		err = 0;
+		break;
+
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		memset(&stats, 0, sizeof(stats));
+		stats.tunnel_id = tunnel->tunnel_id;
+		stats.session_id = session->session_id;
+		pppol2tp_copy_stats(&stats, &session->stats);
+		if (copy_to_user((void __user *) arg, &stats,
+				 sizeof(stats)))
+			break;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", session->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Tunnel ioctl helper.
+ *
+ * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
+ * specifies a session_id, the session ioctl handler is called. This allows an
+ * application to retrieve session stats via a tunnel socket.
+ */
+static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
+				 unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct sock *sk;
+	struct pppol2tp_ioc_stats stats;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n",
+	       tunnel->name, cmd, arg);
+
+	sk = tunnel->sock;
+	sock_hold(sk);
+
+	switch (cmd) {
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_from_user(&stats, (void __user *) arg,
+				   sizeof(stats))) {
+			err = -EFAULT;
+			break;
+		}
+		if (stats.session_id != 0) {
+			/* resend to session ioctl handler */
+			struct l2tp_session *session =
+				l2tp_session_find(tunnel, stats.session_id);
+			if (session != NULL)
+				err = pppol2tp_session_ioctl(session, cmd, arg);
+			else
+				err = -EBADR;
+			break;
+		}
+#ifdef CONFIG_XFRM
+		stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
+#endif
+		pppol2tp_copy_stats(&stats, &tunnel->stats);
+		if (copy_to_user((void __user *) arg, &stats, sizeof(stats))) {
+			err = -EFAULT;
+			break;
+		}
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", tunnel->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Main ioctl() handler.
+ * Dispatch to tunnel or session helpers depending on the socket.
+ */
+static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	int err;
+
+	if (!sk)
+		return 0;
+
+	err = -EBADF;
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto end;
+
+	err = -ENOTCONN;
+	if ((sk->sk_user_data == NULL) ||
+	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session's session_id is zero, treat ioctl as a
+	 * tunnel ioctl
+	 */
+	ps = l2tp_session_priv(session);
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		err = -EBADF;
+		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
+		sock_put(ps->tunnel_sock);
+		goto end_put_sess;
+	}
+
+	err = pppol2tp_session_ioctl(session, cmd, arg);
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * setsockopt() / getsockopt() support.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. In order to control kernel tunnel features, we allow userspace to
+ * create a special "tunnel" PPPoX socket which is used for control only.
+ * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
+ * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
+ *****************************************************************************/
+
+/* Tunnel setsockopt() helper.
+ */
+static int pppol2tp_tunnel_setsockopt(struct sock *sk,
+				      struct l2tp_tunnel *tunnel,
+				      int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		tunnel->debug = val;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session setsockopt helper.
+ */
+static int pppol2tp_session_setsockopt(struct sock *sk,
+				       struct l2tp_session *session,
+				       int optname, int val)
+{
+	int err = 0;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->recv_seq = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set recv_seq=%d\n", session->name, session->recv_seq);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->send_seq = val ? -1 : 0;
+		{
+			struct sock *ssk      = ps->sock;
+			struct pppox_sock *po = pppox_sk(ssk);
+			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
+				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+		}
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set send_seq=%d\n", session->name, session->send_seq);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->lns_mode = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set lns_mode=%d\n", session->name, session->lns_mode);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		session->debug = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", session->name, session->debug);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		session->reorder_timeout = msecs_to_jiffies(val);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set reorder_timeout=%d\n", session->name, session->reorder_timeout);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Main setsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session setsockopt
+ * handler, according to whether the PPPoL2TP socket is a for a regular
+ * session or the special tunnel type.
+ */
+static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	int val;
+	int err;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel
+	 */
+	ps = l2tp_session_priv(session);
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		err = -EBADF;
+		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
+		sock_put(ps->tunnel_sock);
+	} else
+		err = pppol2tp_session_setsockopt(sk, session, optname, val);
+
+	err = 0;
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/* Tunnel getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_tunnel_getsockopt(struct sock *sk,
+				      struct l2tp_tunnel *tunnel,
+				      int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		*val = tunnel->debug;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_session_getsockopt(struct sock *sk,
+				       struct l2tp_session *session,
+				       int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		*val = session->recv_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get recv_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		*val = session->send_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get send_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		*val = session->lns_mode;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get lns_mode=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		*val = session->debug;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		*val = (int) jiffies_to_msecs(session->reorder_timeout);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get reorder_timeout=%d\n", session->name, *val);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+	}
+
+	return err;
+}
+
+/* Main getsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session getsockopt
+ * handler, according to whether the PPPoX socket is a for a regular session
+ * or the special tunnel type.
+ */
+static int pppol2tp_getsockopt(struct socket *sock, int level,
+			       int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	int val, len;
+	int err;
+	struct pppol2tp_session *ps;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+
+	if (get_user(len, (int __user *) optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	if (len < 0)
+		return -EINVAL;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get the session context */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
+	ps = l2tp_session_priv(session);
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		err = -EBADF;
+		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
+		sock_put(ps->tunnel_sock);
+	} else
+		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+
+	err = -EFAULT;
+	if (put_user(len, (int __user *) optlen))
+		goto end_put_sess;
+
+	if (copy_to_user((void __user *) optval, &val, len))
+		goto end_put_sess;
+
+	err = 0;
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * /proc filesystem for debug
+ *****************************************************************************/
+
+static unsigned int pppol2tp_net_id;
+
+#ifdef CONFIG_PROC_FS
+
+struct pppol2tp_seq_data {
+	struct seq_net_private p;
+	int tunnel_idx;			/* current tunnel */
+	int session_idx;		/* index of session within current tunnel */
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;	/* NULL means get next tunnel */
+};
+
+static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
+{
+	pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
+	pd->tunnel_idx++;
+}
+
+static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
+{
+	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+	pd->session_idx++;
+	if (pd->session == NULL) {
+		pd->session_idx = 0;
+		pppol2tp_next_tunnel(net, pd);
+	}
+}
+
+static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
+{
+	struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
+	loff_t pos = *offs;
+	struct net *net;
+
+	if (!pos)
+		goto out;
+
+	BUG_ON(m->private == NULL);
+	pd = m->private;
+	net = seq_file_net(m);
+
+	if (pd->tunnel == NULL)
+		pppol2tp_next_tunnel(net, pd);
+	else
+		pppol2tp_next_session(net, pd);
+
+	/* NULL tunnel and session indicates end of list */
+	if ((pd->tunnel == NULL) && (pd->session == NULL))
+		pd = NULL;
+
+out:
+	return pd;
+}
+
+static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return NULL;
+}
+
+static void pppol2tp_seq_stop(struct seq_file *p, void *v)
+{
+	/* nothing to do */
+}
+
+static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
+{
+	struct l2tp_tunnel *tunnel = v;
+
+	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
+		   tunnel->name,
+		   (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
+		   atomic_read(&tunnel->ref_count) - 1);
+	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+		   tunnel->debug,
+		   (unsigned long long)tunnel->stats.tx_packets,
+		   (unsigned long long)tunnel->stats.tx_bytes,
+		   (unsigned long long)tunnel->stats.tx_errors,
+		   (unsigned long long)tunnel->stats.rx_packets,
+		   (unsigned long long)tunnel->stats.rx_bytes,
+		   (unsigned long long)tunnel->stats.rx_errors);
+}
+
+static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
+{
+	struct l2tp_session *session = v;
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	u32 ip = 0;
+	u16 port = 0;
+
+	if (tunnel->sock) {
+		struct inet_sock *inet = inet_sk(tunnel->sock);
+		ip = ntohl(inet->inet_saddr);
+		port = ntohs(inet->inet_sport);
+	}
+
+	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
+		   "%04X/%04X %d %c\n",
+		   session->name, ip, port,
+		   tunnel->tunnel_id,
+		   session->session_id,
+		   tunnel->peer_tunnel_id,
+		   session->peer_session_id,
+		   ps->sock->sk_state,
+		   (session == ps->sock->sk_user_data) ?
+		   'Y' : 'N');
+	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
+		   session->mtu, session->mru,
+		   session->recv_seq ? 'R' : '-',
+		   session->send_seq ? 'S' : '-',
+		   session->lns_mode ? "LNS" : "LAC",
+		   session->debug,
+		   jiffies_to_msecs(session->reorder_timeout));
+	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+		   session->nr, session->ns,
+		   (unsigned long long)session->stats.tx_packets,
+		   (unsigned long long)session->stats.tx_bytes,
+		   (unsigned long long)session->stats.tx_errors,
+		   (unsigned long long)session->stats.rx_packets,
+		   (unsigned long long)session->stats.rx_bytes,
+		   (unsigned long long)session->stats.rx_errors);
+}
+
+static int pppol2tp_seq_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_seq_data *pd = v;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
+		seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
+		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
+			 "dest-tid/sid state user-data-ok\n");
+		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
+		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		goto out;
+	}
+
+	/* Show the tunnel or session context.
+	 */
+	if (pd->session == NULL)
+		pppol2tp_seq_tunnel_show(m, pd->tunnel);
+	else
+		pppol2tp_seq_session_show(m, pd->session);
+
+out:
+	return 0;
+}
+
+static const struct seq_operations pppol2tp_seq_ops = {
+	.start		= pppol2tp_seq_start,
+	.next		= pppol2tp_seq_next,
+	.stop		= pppol2tp_seq_stop,
+	.show		= pppol2tp_seq_show,
+};
+
+/* Called when our /proc file is opened. We allocate data for use when
+ * iterating our tunnel / session contexts and store it in the private
+ * data of the seq_file.
+ */
+static int pppol2tp_proc_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &pppol2tp_seq_ops,
+			    sizeof(struct pppol2tp_seq_data));
+}
+
+static const struct file_operations pppol2tp_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pppol2tp_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+/*****************************************************************************
+ * Network namespace
+ *****************************************************************************/
+
+static __net_init int pppol2tp_init_net(struct net *net)
+{
+	struct proc_dir_entry *pde;
+	int err = 0;
+
+	pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
+	if (!pde) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+out:
+	return err;
+}
+
+static __net_exit void pppol2tp_exit_net(struct net *net)
+{
+	proc_net_remove(net, "pppol2tp");
+}
+
+static struct pernet_operations pppol2tp_net_ops = {
+	.init = pppol2tp_init_net,
+	.exit = pppol2tp_exit_net,
+	.id   = &pppol2tp_net_id,
+};
+
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+
+static const struct proto_ops pppol2tp_ops = {
+	.family		= AF_PPPOX,
+	.owner		= THIS_MODULE,
+	.release	= pppol2tp_release,
+	.bind		= sock_no_bind,
+	.connect	= pppol2tp_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= pppol2tp_getname,
+	.poll		= datagram_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= pppol2tp_setsockopt,
+	.getsockopt	= pppol2tp_getsockopt,
+	.sendmsg	= pppol2tp_sendmsg,
+	.recvmsg	= pppol2tp_recvmsg,
+	.mmap		= sock_no_mmap,
+	.ioctl		= pppox_ioctl,
+};
+
+static struct pppox_proto pppol2tp_proto = {
+	.create		= pppol2tp_create,
+	.ioctl		= pppol2tp_ioctl
+};
+
+static int __init pppol2tp_init(void)
+{
+	int err;
+
+	err = register_pernet_device(&pppol2tp_net_ops);
+	if (err)
+		goto out;
+
+	err = proto_register(&pppol2tp_sk_proto, 0);
+	if (err)
+		goto out_unregister_pppol2tp_pernet;
+
+	err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
+	if (err)
+		goto out_unregister_pppol2tp_proto;
+
+	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
+	       PPPOL2TP_DRV_VERSION);
+
+out:
+	return err;
+out_unregister_pppol2tp_proto:
+	proto_unregister(&pppol2tp_sk_proto);
+out_unregister_pppol2tp_pernet:
+	unregister_pernet_device(&pppol2tp_net_ops);
+	goto out;
+}
+
+static void __exit pppol2tp_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+	proto_unregister(&pppol2tp_sk_proto);
+	unregister_pernet_device(&pppol2tp_net_ops);
+}
+
+module_init(pppol2tp_init);
+module_exit(pppol2tp_exit);
+
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("PPP over L2TP over UDP");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PPPOL2TP_DRV_VERSION);
diff --git a/net/l2tp/pppol2tp.c b/net/l2tp/pppol2tp.c
deleted file mode 100644
index 449a9825200d..000000000000
--- a/net/l2tp/pppol2tp.c
+++ /dev/null
@@ -1,2680 +0,0 @@
-/*****************************************************************************
- * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
- *
- * PPPoX    --- Generic PPP encapsulation socket family
- * PPPoL2TP --- PPP over L2TP (RFC 2661)
- *
- * Version:	1.0.0
- *
- * Authors:	Martijn van Oosterhout <kleptog@svana.org>
- *		James Chapman (jchapman@katalix.com)
- * Contributors:
- *		Michal Ostrowski <mostrows@speakeasy.net>
- *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
- *		David S. Miller (davem@redhat.com)
- *
- * License:
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- */
-
-/* This driver handles only L2TP data frames; control frames are handled by a
- * userspace application.
- *
- * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
- * attaches it to a bound UDP socket with local tunnel_id / session_id and
- * peer tunnel_id / session_id set. Data can then be sent or received using
- * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
- * can be read or modified using ioctl() or [gs]etsockopt() calls.
- *
- * When a PPPoL2TP socket is connected with local and peer session_id values
- * zero, the socket is treated as a special tunnel management socket.
- *
- * Here's example userspace code to create a socket for sending/receiving data
- * over an L2TP session:-
- *
- *	struct sockaddr_pppol2tp sax;
- *	int fd;
- *	int session_fd;
- *
- *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
- *
- *	sax.sa_family = AF_PPPOX;
- *	sax.sa_protocol = PX_PROTO_OL2TP;
- *	sax.pppol2tp.fd = tunnel_fd;	// bound UDP socket
- *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
- *	sax.pppol2tp.addr.sin_port = addr->sin_port;
- *	sax.pppol2tp.addr.sin_family = AF_INET;
- *	sax.pppol2tp.s_tunnel  = tunnel_id;
- *	sax.pppol2tp.s_session = session_id;
- *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
- *	sax.pppol2tp.d_session = peer_session_id;
- *
- *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
- *
- * A pppd plugin that allows PPP traffic to be carried over L2TP using
- * this driver is available from the OpenL2TP project at
- * http://openl2tp.sourceforge.net.
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/list.h>
-#include <asm/uaccess.h>
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-
-#include <linux/netdevice.h>
-#include <linux/net.h>
-#include <linux/inetdevice.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/if_pppox.h>
-#include <linux/if_pppol2tp.h>
-#include <net/sock.h>
-#include <linux/ppp_channel.h>
-#include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
-#include <linux/file.h>
-#include <linux/hash.h>
-#include <linux/sort.h>
-#include <linux/proc_fs.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/udp.h>
-#include <net/xfrm.h>
-
-#include <asm/byteorder.h>
-#include <asm/atomic.h>
-
-
-#define PPPOL2TP_DRV_VERSION	"V1.0"
-
-/* L2TP header constants */
-#define L2TP_HDRFLAG_T	   0x8000
-#define L2TP_HDRFLAG_L	   0x4000
-#define L2TP_HDRFLAG_S	   0x0800
-#define L2TP_HDRFLAG_O	   0x0200
-#define L2TP_HDRFLAG_P	   0x0100
-
-#define L2TP_HDR_VER_MASK  0x000F
-#define L2TP_HDR_VER	   0x0002
-
-/* Space for UDP, L2TP and PPP headers */
-#define PPPOL2TP_HEADER_OVERHEAD	40
-
-/* Just some random numbers */
-#define L2TP_TUNNEL_MAGIC	0x42114DDA
-#define L2TP_SESSION_MAGIC	0x0C04EB7D
-
-#define PPPOL2TP_HASH_BITS	4
-#define PPPOL2TP_HASH_SIZE	(1 << PPPOL2TP_HASH_BITS)
-
-/* Default trace flags */
-#define PPPOL2TP_DEFAULT_DEBUG_FLAGS	0
-
-#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
-	do {								\
-		if ((_mask) & (_type))					\
-			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
-	} while(0)
-
-/* Number of bytes to build transmit L2TP headers.
- * Unfortunately the size is different depending on whether sequence numbers
- * are enabled.
- */
-#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
-#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
-
-struct pppol2tp_tunnel;
-
-/* Describes a session. It is the sk_user_data field in the PPPoL2TP
- * socket. Contains information to determine incoming packets and transmit
- * outgoing ones.
- */
-struct pppol2tp_session
-{
-	int			magic;		/* should be
-						 * L2TP_SESSION_MAGIC */
-	int			owner;		/* pid that opened the socket */
-
-	struct sock		*sock;		/* Pointer to the session
-						 * PPPoX socket */
-	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
-						 * socket */
-
-	struct pppol2tp_addr	tunnel_addr;	/* Description of tunnel */
-
-	struct pppol2tp_tunnel	*tunnel;	/* back pointer to tunnel
-						 * context */
-
-	char			name[20];	/* "sess xxxxx/yyyyy", where
-						 * x=tunnel_id, y=session_id */
-	int			mtu;
-	int			mru;
-	int			flags;		/* accessed by PPPIOCGFLAGS.
-						 * Unused. */
-	unsigned		recv_seq:1;	/* expect receive packets with
-						 * sequence numbers? */
-	unsigned		send_seq:1;	/* send packets with sequence
-						 * numbers? */
-	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
-						 * sequence numbers under
-						 * control of LNS. */
-	int			debug;		/* bitmask of debug message
-						 * categories */
-	int			reorder_timeout; /* configured reorder timeout
-						  * (in jiffies) */
-	u16			nr;		/* session NR state (receive) */
-	u16			ns;		/* session NR state (send) */
-	struct sk_buff_head	reorder_q;	/* receive reorder queue */
-	struct pppol2tp_ioc_stats stats;
-	struct hlist_node	hlist;		/* Hash list node */
-};
-
-/* The sk_user_data field of the tunnel's UDP socket. It contains info to track
- * all the associated sessions so incoming packets can be sorted out
- */
-struct pppol2tp_tunnel
-{
-	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
-	rwlock_t		hlist_lock;	/* protect session_hlist */
-	struct hlist_head	session_hlist[PPPOL2TP_HASH_SIZE];
-						/* hashed list of sessions,
-						 * hashed by id */
-	int			debug;		/* bitmask of debug message
-						 * categories */
-	char			name[12];	/* "tunl xxxxx" */
-	struct pppol2tp_ioc_stats stats;
-
-	void (*old_sk_destruct)(struct sock *);
-
-	struct sock		*sock;		/* Parent socket */
-	struct list_head	list;		/* Keep a list of all open
-						 * prepared sockets */
-	struct net		*pppol2tp_net;	/* the net we belong to */
-
-	atomic_t		ref_count;
-};
-
-/* Private data stored for received packets in the skb.
- */
-struct pppol2tp_skb_cb {
-	u16			ns;
-	u16			nr;
-	u16			has_seq;
-	u16			length;
-	unsigned long		expires;
-};
-
-#define PPPOL2TP_SKB_CB(skb)	((struct pppol2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
-
-static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
-static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel);
-
-static atomic_t pppol2tp_tunnel_count;
-static atomic_t pppol2tp_session_count;
-static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
-static const struct proto_ops pppol2tp_ops;
-
-/* per-net private data for this module */
-static int pppol2tp_net_id __read_mostly;
-struct pppol2tp_net {
-	struct list_head pppol2tp_tunnel_list;
-	rwlock_t pppol2tp_tunnel_list_lock;
-};
-
-static inline struct pppol2tp_net *pppol2tp_pernet(struct net *net)
-{
-	BUG_ON(!net);
-
-	return net_generic(net, pppol2tp_net_id);
-}
-
-/* Helpers to obtain tunnel/session contexts from sockets.
- */
-static inline struct pppol2tp_session *pppol2tp_sock_to_session(struct sock *sk)
-{
-	struct pppol2tp_session *session;
-
-	if (sk == NULL)
-		return NULL;
-
-	sock_hold(sk);
-	session = (struct pppol2tp_session *)(sk->sk_user_data);
-	if (session == NULL) {
-		sock_put(sk);
-		goto out;
-	}
-
-	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
-out:
-	return session;
-}
-
-static inline struct pppol2tp_tunnel *pppol2tp_sock_to_tunnel(struct sock *sk)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	if (sk == NULL)
-		return NULL;
-
-	sock_hold(sk);
-	tunnel = (struct pppol2tp_tunnel *)(sk->sk_user_data);
-	if (tunnel == NULL) {
-		sock_put(sk);
-		goto out;
-	}
-
-	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-out:
-	return tunnel;
-}
-
-/* Tunnel reference counts. Incremented per session that is added to
- * the tunnel.
- */
-static inline void pppol2tp_tunnel_inc_refcount(struct pppol2tp_tunnel *tunnel)
-{
-	atomic_inc(&tunnel->ref_count);
-}
-
-static inline void pppol2tp_tunnel_dec_refcount(struct pppol2tp_tunnel *tunnel)
-{
-	if (atomic_dec_and_test(&tunnel->ref_count))
-		pppol2tp_tunnel_free(tunnel);
-}
-
-/* Session hash list.
- * The session_id SHOULD be random according to RFC2661, but several
- * L2TP implementations (Cisco and Microsoft) use incrementing
- * session_ids.  So we do a real hash on the session_id, rather than a
- * simple bitmask.
- */
-static inline struct hlist_head *
-pppol2tp_session_id_hash(struct pppol2tp_tunnel *tunnel, u16 session_id)
-{
-	unsigned long hash_val = (unsigned long) session_id;
-	return &tunnel->session_hlist[hash_long(hash_val, PPPOL2TP_HASH_BITS)];
-}
-
-/* Lookup a session by id
- */
-static struct pppol2tp_session *
-pppol2tp_session_find(struct pppol2tp_tunnel *tunnel, u16 session_id)
-{
-	struct hlist_head *session_list =
-		pppol2tp_session_id_hash(tunnel, session_id);
-	struct pppol2tp_session *session;
-	struct hlist_node *walk;
-
-	read_lock_bh(&tunnel->hlist_lock);
-	hlist_for_each_entry(session, walk, session_list, hlist) {
-		if (session->tunnel_addr.s_session == session_id) {
-			read_unlock_bh(&tunnel->hlist_lock);
-			return session;
-		}
-	}
-	read_unlock_bh(&tunnel->hlist_lock);
-
-	return NULL;
-}
-
-/* Lookup a tunnel by id
- */
-static struct pppol2tp_tunnel *pppol2tp_tunnel_find(struct net *net, u16 tunnel_id)
-{
-	struct pppol2tp_tunnel *tunnel;
-	struct pppol2tp_net *pn = pppol2tp_pernet(net);
-
-	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_for_each_entry(tunnel, &pn->pppol2tp_tunnel_list, list) {
-		if (tunnel->stats.tunnel_id == tunnel_id) {
-			read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-			return tunnel;
-		}
-	}
-	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	return NULL;
-}
-
-/*****************************************************************************
- * Receive data handling
- *****************************************************************************/
-
-/* Queue a skb in order. We come here only if the skb has an L2TP sequence
- * number.
- */
-static void pppol2tp_recv_queue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
-{
-	struct sk_buff *skbp;
-	struct sk_buff *tmp;
-	u16 ns = PPPOL2TP_SKB_CB(skb)->ns;
-
-	spin_lock_bh(&session->reorder_q.lock);
-	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
-		if (PPPOL2TP_SKB_CB(skbp)->ns > ns) {
-			__skb_queue_before(&session->reorder_q, skbp, skb);
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
-			       session->name, ns, PPPOL2TP_SKB_CB(skbp)->ns,
-			       skb_queue_len(&session->reorder_q));
-			session->stats.rx_oos_packets++;
-			goto out;
-		}
-	}
-
-	__skb_queue_tail(&session->reorder_q, skb);
-
-out:
-	spin_unlock_bh(&session->reorder_q.lock);
-}
-
-/* Dequeue a single skb.
- */
-static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
-{
-	struct pppol2tp_tunnel *tunnel = session->tunnel;
-	int length = PPPOL2TP_SKB_CB(skb)->length;
-	struct sock *session_sock = NULL;
-
-	/* We're about to requeue the skb, so return resources
-	 * to its current owner (a socket receive buffer).
-	 */
-	skb_orphan(skb);
-
-	tunnel->stats.rx_packets++;
-	tunnel->stats.rx_bytes += length;
-	session->stats.rx_packets++;
-	session->stats.rx_bytes += length;
-
-	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-		/* Bump our Nr */
-		session->nr++;
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: updated nr to %hu\n", session->name, session->nr);
-	}
-
-	/* If the socket is bound, send it in to PPP's input queue. Otherwise
-	 * queue it on the session socket.
-	 */
-	session_sock = session->sock;
-	if (session_sock->sk_state & PPPOX_BOUND) {
-		struct pppox_sock *po;
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: recv %d byte data frame, passing to ppp\n",
-		       session->name, length);
-
-		/* We need to forget all info related to the L2TP packet
-		 * gathered in the skb as we are going to reuse the same
-		 * skb for the inner packet.
-		 * Namely we need to:
-		 * - reset xfrm (IPSec) information as it applies to
-		 *   the outer L2TP packet and not to the inner one
-		 * - release the dst to force a route lookup on the inner
-		 *   IP packet since skb->dst currently points to the dst
-		 *   of the UDP tunnel
-		 * - reset netfilter information as it doesn't apply
-		 *   to the inner packet either
-		 */
-		secpath_reset(skb);
-		skb_dst_drop(skb);
-		nf_reset(skb);
-
-		po = pppox_sk(session_sock);
-		ppp_input(&po->chan, skb);
-	} else {
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: socket not bound\n", session->name);
-
-		/* Not bound. Nothing we can do, so discard. */
-		session->stats.rx_errors++;
-		kfree_skb(skb);
-	}
-
-	sock_put(session->sock);
-}
-
-/* Dequeue skbs from the session's reorder_q, subject to packet order.
- * Skbs that have been in the queue for too long are simply discarded.
- */
-static void pppol2tp_recv_dequeue(struct pppol2tp_session *session)
-{
-	struct sk_buff *skb;
-	struct sk_buff *tmp;
-
-	/* If the pkt at the head of the queue has the nr that we
-	 * expect to send up next, dequeue it and any other
-	 * in-sequence packets behind it.
-	 */
-	spin_lock_bh(&session->reorder_q.lock);
-	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
-		if (time_after(jiffies, PPPOL2TP_SKB_CB(skb)->expires)) {
-			session->stats.rx_seq_discards++;
-			session->stats.rx_errors++;
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-			       "%s: oos pkt %hu len %d discarded (too old), "
-			       "waiting for %hu, reorder_q_len=%d\n",
-			       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-			       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-			       skb_queue_len(&session->reorder_q));
-			__skb_unlink(skb, &session->reorder_q);
-			kfree_skb(skb);
-			sock_put(session->sock);
-			continue;
-		}
-
-		if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
-				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: holding oos pkt %hu len %d, "
-				       "waiting for %hu, reorder_q_len=%d\n",
-				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-				       skb_queue_len(&session->reorder_q));
-				goto out;
-			}
-		}
-		__skb_unlink(skb, &session->reorder_q);
-
-		/* Process the skb. We release the queue lock while we
-		 * do so to let other contexts process the queue.
-		 */
-		spin_unlock_bh(&session->reorder_q.lock);
-		pppol2tp_recv_dequeue_skb(session, skb);
-		spin_lock_bh(&session->reorder_q.lock);
-	}
-
-out:
-	spin_unlock_bh(&session->reorder_q.lock);
-}
-
-static inline int pppol2tp_verify_udp_checksum(struct sock *sk,
-					       struct sk_buff *skb)
-{
-	struct udphdr *uh = udp_hdr(skb);
-	u16 ulen = ntohs(uh->len);
-	struct inet_sock *inet;
-	__wsum psum;
-
-	if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
-		return 0;
-
-	inet = inet_sk(sk);
-	psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
-				  IPPROTO_UDP, 0);
-
-	if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
-	    !csum_fold(csum_add(psum, skb->csum)))
-		return 0;
-
-	skb->csum = psum;
-
-	return __skb_checksum_complete(skb);
-}
-
-/* Internal receive frame. Do the real work of receiving an L2TP data frame
- * here. The skb is not on a list when we get here.
- * Returns 0 if the packet was a data packet and was successfully passed on.
- * Returns 1 if the packet was not a good data packet and could not be
- * forwarded.  All such packets are passed up to userspace to deal with.
- */
-static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb)
-{
-	struct pppol2tp_session *session = NULL;
-	struct pppol2tp_tunnel *tunnel;
-	unsigned char *ptr, *optr;
-	u16 hdrflags;
-	u16 tunnel_id, session_id;
-	int length;
-	int offset;
-
-	tunnel = pppol2tp_sock_to_tunnel(sock);
-	if (tunnel == NULL)
-		goto no_tunnel;
-
-	if (tunnel->sock && pppol2tp_verify_udp_checksum(tunnel->sock, skb))
-		goto discard_bad_csum;
-
-	/* UDP always verifies the packet length. */
-	__skb_pull(skb, sizeof(struct udphdr));
-
-	/* Short packet? */
-	if (!pskb_may_pull(skb, 12)) {
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
-		goto error;
-	}
-
-	/* Point to L2TP header */
-	optr = ptr = skb->data;
-
-	/* Get L2TP header flags */
-	hdrflags = ntohs(*(__be16*)ptr);
-
-	/* Trace packet contents, if enabled */
-	if (tunnel->debug & PPPOL2TP_MSG_DATA) {
-		length = min(16u, skb->len);
-		if (!pskb_may_pull(skb, length))
-			goto error;
-
-		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
-
-		offset = 0;
-		do {
-			printk(" %02X", ptr[offset]);
-		} while (++offset < length);
-
-		printk("\n");
-	}
-
-	/* Get length of L2TP packet */
-	length = skb->len;
-
-	/* If type is control packet, it is handled by userspace. */
-	if (hdrflags & L2TP_HDRFLAG_T) {
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: recv control packet, len=%d\n", tunnel->name, length);
-		goto error;
-	}
-
-	/* Skip flags */
-	ptr += 2;
-
-	/* If length is present, skip it */
-	if (hdrflags & L2TP_HDRFLAG_L)
-		ptr += 2;
-
-	/* Extract tunnel and session ID */
-	tunnel_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-	session_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-
-	/* Find the session context */
-	session = pppol2tp_session_find(tunnel, session_id);
-	if (!session) {
-		/* Not found? Pass to userspace to deal with */
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: no socket found (%hu/%hu). Passing up.\n",
-		       tunnel->name, tunnel_id, session_id);
-		goto error;
-	}
-	sock_hold(session->sock);
-
-	/* The ref count on the socket was increased by the above call since
-	 * we now hold a pointer to the session. Take care to do sock_put()
-	 * when exiting this function from now on...
-	 */
-
-	/* Handle the optional sequence numbers.  If we are the LAC,
-	 * enable/disable sequence numbers under the control of the LNS.  If
-	 * no sequence numbers present but we were expecting them, discard
-	 * frame.
-	 */
-	if (hdrflags & L2TP_HDRFLAG_S) {
-		u16 ns, nr;
-		ns = ntohs(*(__be16 *) ptr);
-		ptr += 2;
-		nr = ntohs(*(__be16 *) ptr);
-		ptr += 2;
-
-		/* Received a packet with sequence numbers. If we're the LNS,
-		 * check if we sre sending sequence numbers and if not,
-		 * configure it so.
-		 */
-		if ((!session->lns_mode) && (!session->send_seq)) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
-			       "%s: requested to enable seq numbers by LNS\n",
-			       session->name);
-			session->send_seq = -1;
-		}
-
-		/* Store L2TP info in the skb */
-		PPPOL2TP_SKB_CB(skb)->ns = ns;
-		PPPOL2TP_SKB_CB(skb)->nr = nr;
-		PPPOL2TP_SKB_CB(skb)->has_seq = 1;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
-		       session->name, ns, nr, session->nr);
-	} else {
-		/* No sequence numbers.
-		 * If user has configured mandatory sequence numbers, discard.
-		 */
-		if (session->recv_seq) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
-			       "%s: recv data has no seq numbers when required. "
-			       "Discarding\n", session->name);
-			session->stats.rx_seq_discards++;
-			goto discard;
-		}
-
-		/* If we're the LAC and we're sending sequence numbers, the
-		 * LNS has requested that we no longer send sequence numbers.
-		 * If we're the LNS and we're sending sequence numbers, the
-		 * LAC is broken. Discard the frame.
-		 */
-		if ((!session->lns_mode) && (session->send_seq)) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
-			       "%s: requested to disable seq numbers by LNS\n",
-			       session->name);
-			session->send_seq = 0;
-		} else if (session->send_seq) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
-			       "%s: recv data has no seq numbers when required. "
-			       "Discarding\n", session->name);
-			session->stats.rx_seq_discards++;
-			goto discard;
-		}
-
-		/* Store L2TP info in the skb */
-		PPPOL2TP_SKB_CB(skb)->has_seq = 0;
-	}
-
-	/* If offset bit set, skip it. */
-	if (hdrflags & L2TP_HDRFLAG_O) {
-		offset = ntohs(*(__be16 *)ptr);
-		ptr += 2 + offset;
-	}
-
-	offset = ptr - optr;
-	if (!pskb_may_pull(skb, offset))
-		goto discard;
-
-	__skb_pull(skb, offset);
-
-	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
-	 * don't send the PPP header (PPP header compression enabled), but
-	 * other clients can include the header. So we cope with both cases
-	 * here. The PPP header is always FF03 when using L2TP.
-	 *
-	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
-	 * the field may be unaligned.
-	 */
-	if (!pskb_may_pull(skb, 2))
-		goto discard;
-
-	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
-		skb_pull(skb, 2);
-
-	/* Prepare skb for adding to the session's reorder_q.  Hold
-	 * packets for max reorder_timeout or 1 second if not
-	 * reordering.
-	 */
-	PPPOL2TP_SKB_CB(skb)->length = length;
-	PPPOL2TP_SKB_CB(skb)->expires = jiffies +
-		(session->reorder_timeout ? session->reorder_timeout : HZ);
-
-	/* Add packet to the session's receive queue. Reordering is done here, if
-	 * enabled. Saved L2TP protocol info is stored in skb->sb[].
-	 */
-	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-		if (session->reorder_timeout != 0) {
-			/* Packet reordering enabled. Add skb to session's
-			 * reorder queue, in order of ns.
-			 */
-			pppol2tp_recv_queue_skb(session, skb);
-		} else {
-			/* Packet reordering disabled. Discard out-of-sequence
-			 * packets
-			 */
-			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
-				session->stats.rx_seq_discards++;
-				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: oos pkt %hu len %d discarded, "
-				       "waiting for %hu, reorder_q_len=%d\n",
-				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-				       skb_queue_len(&session->reorder_q));
-				goto discard;
-			}
-			skb_queue_tail(&session->reorder_q, skb);
-		}
-	} else {
-		/* No sequence numbers. Add the skb to the tail of the
-		 * reorder queue. This ensures that it will be
-		 * delivered after all previous sequenced skbs.
-		 */
-		skb_queue_tail(&session->reorder_q, skb);
-	}
-
-	/* Try to dequeue as many skbs from reorder_q as we can. */
-	pppol2tp_recv_dequeue(session);
-	sock_put(sock);
-
-	return 0;
-
-discard:
-	session->stats.rx_errors++;
-	kfree_skb(skb);
-	sock_put(session->sock);
-	sock_put(sock);
-
-	return 0;
-
-discard_bad_csum:
-	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
-	UDP_INC_STATS_USER(&init_net, UDP_MIB_INERRORS, 0);
-	tunnel->stats.rx_errors++;
-	kfree_skb(skb);
-	sock_put(sock);
-
-	return 0;
-
-error:
-	/* Put UDP header back */
-	__skb_push(skb, sizeof(struct udphdr));
-	sock_put(sock);
-
-no_tunnel:
-	return 1;
-}
-
-/* UDP encapsulation receive handler. See net/ipv4/udp.c.
- * Return codes:
- * 0 : success.
- * <0: error
- * >0: skb should be passed up to userspace as UDP.
- */
-static int pppol2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	tunnel = pppol2tp_sock_to_tunnel(sk);
-	if (tunnel == NULL)
-		goto pass_up;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-	       "%s: received %d bytes\n", tunnel->name, skb->len);
-
-	if (pppol2tp_recv_core(sk, skb))
-		goto pass_up_put;
-
-	sock_put(sk);
-	return 0;
-
-pass_up_put:
-	sock_put(sk);
-pass_up:
-	return 1;
-}
-
-/* Receive message. This is the recvmsg for the PPPoL2TP socket.
- */
-static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *msg, size_t len,
-			    int flags)
-{
-	int err;
-	struct sk_buff *skb;
-	struct sock *sk = sock->sk;
-
-	err = -EIO;
-	if (sk->sk_state & PPPOX_BOUND)
-		goto end;
-
-	msg->msg_namelen = 0;
-
-	err = 0;
-	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
-				flags & MSG_DONTWAIT, &err);
-	if (!skb)
-		goto end;
-
-	if (len > skb->len)
-		len = skb->len;
-	else if (len < skb->len)
-		msg->msg_flags |= MSG_TRUNC;
-
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
-	if (likely(err == 0))
-		err = len;
-
-	kfree_skb(skb);
-end:
-	return err;
-}
-
-/************************************************************************
- * Transmit handling
- ***********************************************************************/
-
-/* Tell how big L2TP headers are for a particular session. This
- * depends on whether sequence numbers are being used.
- */
-static inline int pppol2tp_l2tp_header_len(struct pppol2tp_session *session)
-{
-	if (session->send_seq)
-		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
-
-	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-}
-
-/* Build an L2TP header for the session into the buffer provided.
- */
-static void pppol2tp_build_l2tp_header(struct pppol2tp_session *session,
-				       void *buf)
-{
-	__be16 *bufp = buf;
-	u16 flags = L2TP_HDR_VER;
-
-	if (session->send_seq)
-		flags |= L2TP_HDRFLAG_S;
-
-	/* Setup L2TP header.
-	 * FIXME: Can this ever be unaligned? Is direct dereferencing of
-	 * 16-bit header fields safe here for all architectures?
-	 */
-	*bufp++ = htons(flags);
-	*bufp++ = htons(session->tunnel_addr.d_tunnel);
-	*bufp++ = htons(session->tunnel_addr.d_session);
-	if (session->send_seq) {
-		*bufp++ = htons(session->ns);
-		*bufp++ = 0;
-		session->ns++;
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: updated ns to %hu\n", session->name, session->ns);
-	}
-}
-
-/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
- * when a user application does a sendmsg() on the session socket. L2TP and
- * PPP headers must be inserted into the user's data.
- */
-static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
-			    size_t total_len)
-{
-	static const unsigned char ppph[2] = { 0xff, 0x03 };
-	struct sock *sk = sock->sk;
-	struct inet_sock *inet;
-	__wsum csum;
-	struct sk_buff *skb;
-	int error;
-	int hdr_len;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	struct udphdr *uh;
-	unsigned int len;
-	struct sock *sk_tun;
-	u16 udp_len;
-
-	error = -ENOTCONN;
-	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
-		goto error;
-
-	/* Get session and tunnel contexts */
-	error = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto error;
-
-	sk_tun = session->tunnel_sock;
-	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
-	if (tunnel == NULL)
-		goto error_put_sess;
-
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
-	/* Allocate a socket buffer */
-	error = -ENOMEM;
-	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
-			   sizeof(struct udphdr) + hdr_len +
-			   sizeof(ppph) + total_len,
-			   0, GFP_KERNEL);
-	if (!skb)
-		goto error_put_sess_tun;
-
-	/* Reserve space for headers. */
-	skb_reserve(skb, NET_SKB_PAD);
-	skb_reset_network_header(skb);
-	skb_reserve(skb, sizeof(struct iphdr));
-	skb_reset_transport_header(skb);
-
-	/* Build UDP header */
-	inet = inet_sk(sk_tun);
-	udp_len = hdr_len + sizeof(ppph) + total_len;
-	uh = (struct udphdr *) skb->data;
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
-	skb_put(skb, sizeof(struct udphdr));
-
-	/* Build L2TP header */
-	pppol2tp_build_l2tp_header(session, skb->data);
-	skb_put(skb, hdr_len);
-
-	/* Add PPP header */
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
-	skb_put(skb, 2);
-
-	/* Copy user data into skb */
-	error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
-	if (error < 0) {
-		kfree_skb(skb);
-		goto error_put_sess_tun;
-	}
-	skb_put(skb, total_len);
-
-	/* Calculate UDP checksum if configured to do so */
-	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
-	}
-
-	/* Debug */
-	if (session->send_seq)
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %Zd bytes, ns=%hu\n", session->name,
-		       total_len, session->ns - 1);
-	else
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %Zd bytes\n", session->name, total_len);
-
-	if (session->debug & PPPOL2TP_MSG_DATA) {
-		int i;
-		unsigned char *datap = skb->data;
-
-		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < total_len; i++) {
-			printk(" %02X", *datap++);
-			if (i == 15) {
-				printk(" ...");
-				break;
-			}
-		}
-		printk("\n");
-	}
-
-	/* Queue the packet to IP for output */
-	len = skb->len;
-	error = ip_queue_xmit(skb, 1);
-
-	/* Update stats */
-	if (error >= 0) {
-		tunnel->stats.tx_packets++;
-		tunnel->stats.tx_bytes += len;
-		session->stats.tx_packets++;
-		session->stats.tx_bytes += len;
-	} else {
-		tunnel->stats.tx_errors++;
-		session->stats.tx_errors++;
-	}
-
-	return error;
-
-error_put_sess_tun:
-	sock_put(session->tunnel_sock);
-error_put_sess:
-	sock_put(sk);
-error:
-	return error;
-}
-
-/* Automatically called when the skb is freed.
- */
-static void pppol2tp_sock_wfree(struct sk_buff *skb)
-{
-	sock_put(skb->sk);
-}
-
-/* For data skbs that we transmit, we associate with the tunnel socket
- * but don't do accounting.
- */
-static inline void pppol2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
-	sock_hold(sk);
-	skb->sk = sk;
-	skb->destructor = pppol2tp_sock_wfree;
-}
-
-/* Transmit function called by generic PPP driver.  Sends PPP frame
- * over PPPoL2TP socket.
- *
- * This is almost the same as pppol2tp_sendmsg(), but rather than
- * being called with a msghdr from userspace, it is called with a skb
- * from the kernel.
- *
- * The supplied skb from ppp doesn't have enough headroom for the
- * insertion of L2TP, UDP and IP headers so we need to allocate more
- * headroom in the skb. This will create a cloned skb. But we must be
- * careful in the error case because the caller will expect to free
- * the skb it supplied, not our cloned skb. So we take care to always
- * leave the original skb unfreed if we return an error.
- */
-static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
-{
-	static const u8 ppph[2] = { 0xff, 0x03 };
-	struct sock *sk = (struct sock *) chan->private;
-	struct sock *sk_tun;
-	int hdr_len;
-	u16 udp_len;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	int rc;
-	int headroom;
-	int data_len = skb->len;
-	struct inet_sock *inet;
-	__wsum csum;
-	struct udphdr *uh;
-	unsigned int len;
-	int old_headroom;
-	int new_headroom;
-
-	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
-		goto abort;
-
-	/* Get session and tunnel contexts from the socket */
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto abort;
-
-	sk_tun = session->tunnel_sock;
-	if (sk_tun == NULL)
-		goto abort_put_sess;
-	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
-	if (tunnel == NULL)
-		goto abort_put_sess;
-
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
-	/* Check that there's enough headroom in the skb to insert IP,
-	 * UDP and L2TP and PPP headers. If not enough, expand it to
-	 * make room. Adjust truesize.
-	 */
-	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
-		sizeof(struct udphdr) + hdr_len + sizeof(ppph);
-	old_headroom = skb_headroom(skb);
-	if (skb_cow_head(skb, headroom))
-		goto abort_put_sess_tun;
-
-	new_headroom = skb_headroom(skb);
-	skb_orphan(skb);
-	skb->truesize += new_headroom - old_headroom;
-
-	/* Setup PPP header */
-	__skb_push(skb, sizeof(ppph));
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
-
-	/* Setup L2TP header */
-	pppol2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
-
-	udp_len = sizeof(struct udphdr) + hdr_len + sizeof(ppph) + data_len;
-
-	/* Setup UDP header */
-	inet = inet_sk(sk_tun);
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
-
-	/* Debug */
-	if (session->send_seq)
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %d bytes, ns=%hu\n", session->name,
-		       data_len, session->ns - 1);
-	else
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %d bytes\n", session->name, data_len);
-
-	if (session->debug & PPPOL2TP_MSG_DATA) {
-		int i;
-		unsigned char *datap = skb->data;
-
-		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < data_len; i++) {
-			printk(" %02X", *datap++);
-			if (i == 31) {
-				printk(" ...");
-				break;
-			}
-		}
-		printk("\n");
-	}
-
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
-			      IPSKB_REROUTED);
-	nf_reset(skb);
-
-	/* Get routing info from the tunnel socket */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, dst_clone(__sk_dst_get(sk_tun)));
-	pppol2tp_skb_set_owner_w(skb, sk_tun);
-
-	/* Calculate UDP checksum if configured to do so */
-	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
-		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
-	}
-
-	/* Queue the packet to IP for output */
-	len = skb->len;
-	rc = ip_queue_xmit(skb, 1);
-
-	/* Update stats */
-	if (rc >= 0) {
-		tunnel->stats.tx_packets++;
-		tunnel->stats.tx_bytes += len;
-		session->stats.tx_packets++;
-		session->stats.tx_bytes += len;
-	} else {
-		tunnel->stats.tx_errors++;
-		session->stats.tx_errors++;
-	}
-
-	sock_put(sk_tun);
-	sock_put(sk);
-	return 1;
-
-abort_put_sess_tun:
-	sock_put(sk_tun);
-abort_put_sess:
-	sock_put(sk);
-abort:
-	/* Free the original skb */
-	kfree_skb(skb);
-	return 1;
-}
-
-/*****************************************************************************
- * Session (and tunnel control) socket create/destroy.
- *****************************************************************************/
-
-/* When the tunnel UDP socket is closed, all the attached sockets need to go
- * too.
- */
-static void pppol2tp_tunnel_closeall(struct pppol2tp_tunnel *tunnel)
-{
-	int hash;
-	struct hlist_node *walk;
-	struct hlist_node *tmp;
-	struct pppol2tp_session *session;
-	struct sock *sk;
-
-	BUG_ON(tunnel == NULL);
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: closing all sessions...\n", tunnel->name);
-
-	write_lock_bh(&tunnel->hlist_lock);
-	for (hash = 0; hash < PPPOL2TP_HASH_SIZE; hash++) {
-again:
-		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
-			struct sk_buff *skb;
-
-			session = hlist_entry(walk, struct pppol2tp_session, hlist);
-
-			sk = session->sock;
-
-			PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-			       "%s: closing session\n", session->name);
-
-			hlist_del_init(&session->hlist);
-
-			/* Since we should hold the sock lock while
-			 * doing any unbinding, we need to release the
-			 * lock we're holding before taking that lock.
-			 * Hold a reference to the sock so it doesn't
-			 * disappear as we're jumping between locks.
-			 */
-			sock_hold(sk);
-			write_unlock_bh(&tunnel->hlist_lock);
-			lock_sock(sk);
-
-			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
-				pppox_unbind_sock(sk);
-				sk->sk_state = PPPOX_DEAD;
-				sk->sk_state_change(sk);
-			}
-
-			/* Purge any queued data */
-			skb_queue_purge(&sk->sk_receive_queue);
-			skb_queue_purge(&sk->sk_write_queue);
-			while ((skb = skb_dequeue(&session->reorder_q))) {
-				kfree_skb(skb);
-				sock_put(sk);
-			}
-
-			release_sock(sk);
-			sock_put(sk);
-
-			/* Now restart from the beginning of this hash
-			 * chain.  We always remove a session from the
-			 * list so we are guaranteed to make forward
-			 * progress.
-			 */
-			write_lock_bh(&tunnel->hlist_lock);
-			goto again;
-		}
-	}
-	write_unlock_bh(&tunnel->hlist_lock);
-}
-
-/* Really kill the tunnel.
- * Come here only when all sessions have been cleared from the tunnel.
- */
-static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel)
-{
-	struct pppol2tp_net *pn = pppol2tp_pernet(tunnel->pppol2tp_net);
-
-	/* Remove from socket list */
-	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_del_init(&tunnel->list);
-	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	atomic_dec(&pppol2tp_tunnel_count);
-	kfree(tunnel);
-}
-
-/* Tunnel UDP socket destruct hook.
- * The tunnel context is deleted only when all session sockets have been
- * closed.
- */
-static void pppol2tp_tunnel_destruct(struct sock *sk)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	tunnel = sk->sk_user_data;
-	if (tunnel == NULL)
-		goto end;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: closing...\n", tunnel->name);
-
-	/* Close all sessions */
-	pppol2tp_tunnel_closeall(tunnel);
-
-	/* No longer an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = 0;
-	(udp_sk(sk))->encap_rcv = NULL;
-
-	/* Remove hooks into tunnel socket */
-	tunnel->sock = NULL;
-	sk->sk_destruct = tunnel->old_sk_destruct;
-	sk->sk_user_data = NULL;
-
-	/* Call original (UDP) socket descructor */
-	if (sk->sk_destruct != NULL)
-		(*sk->sk_destruct)(sk);
-
-	pppol2tp_tunnel_dec_refcount(tunnel);
-
-end:
-	return;
-}
-
-/* Really kill the session socket. (Called from sock_put() if
- * refcnt == 0.)
- */
-static void pppol2tp_session_destruct(struct sock *sk)
-{
-	struct pppol2tp_session *session = NULL;
-
-	if (sk->sk_user_data != NULL) {
-		struct pppol2tp_tunnel *tunnel;
-
-		session = sk->sk_user_data;
-		if (session == NULL)
-			goto out;
-
-		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
-
-		/* Don't use pppol2tp_sock_to_tunnel() here to
-		 * get the tunnel context because the tunnel
-		 * socket might have already been closed (its
-		 * sk->sk_user_data will be NULL) so use the
-		 * session's private tunnel ptr instead.
-		 */
-		tunnel = session->tunnel;
-		if (tunnel != NULL) {
-			BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-			/* If session_id is zero, this is a null
-			 * session context, which was created for a
-			 * socket that is being used only to manage
-			 * tunnels.
-			 */
-			if (session->tunnel_addr.s_session != 0) {
-				/* Delete the session socket from the
-				 * hash
-				 */
-				write_lock_bh(&tunnel->hlist_lock);
-				hlist_del_init(&session->hlist);
-				write_unlock_bh(&tunnel->hlist_lock);
-
-				atomic_dec(&pppol2tp_session_count);
-			}
-
-			/* This will delete the tunnel context if this
-			 * is the last session on the tunnel.
-			 */
-			session->tunnel = NULL;
-			session->tunnel_sock = NULL;
-			pppol2tp_tunnel_dec_refcount(tunnel);
-		}
-	}
-
-	kfree(session);
-out:
-	return;
-}
-
-/* Called when the PPPoX socket (session) is closed.
- */
-static int pppol2tp_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session;
-	int error;
-
-	if (!sk)
-		return 0;
-
-	error = -EBADF;
-	lock_sock(sk);
-	if (sock_flag(sk, SOCK_DEAD) != 0)
-		goto error;
-
-	pppox_unbind_sock(sk);
-
-	/* Signal the death of the socket. */
-	sk->sk_state = PPPOX_DEAD;
-	sock_orphan(sk);
-	sock->sk = NULL;
-
-	session = pppol2tp_sock_to_session(sk);
-
-	/* Purge any queued data */
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sk->sk_write_queue);
-	if (session != NULL) {
-		struct sk_buff *skb;
-		while ((skb = skb_dequeue(&session->reorder_q))) {
-			kfree_skb(skb);
-			sock_put(sk);
-		}
-		sock_put(sk);
-	}
-
-	release_sock(sk);
-
-	/* This will delete the session context via
-	 * pppol2tp_session_destruct() if the socket's refcnt drops to
-	 * zero.
-	 */
-	sock_put(sk);
-
-	return 0;
-
-error:
-	release_sock(sk);
-	return error;
-}
-
-/* Internal function to prepare a tunnel (UDP) socket to have PPPoX
- * sockets attached to it.
- */
-static struct sock *pppol2tp_prepare_tunnel_socket(struct net *net,
-					int fd, u16 tunnel_id, int *error)
-{
-	int err;
-	struct socket *sock = NULL;
-	struct sock *sk;
-	struct pppol2tp_tunnel *tunnel;
-	struct pppol2tp_net *pn;
-	struct sock *ret = NULL;
-
-	/* Get the tunnel UDP socket from the fd, which was opened by
-	 * the userspace L2TP daemon.
-	 */
-	err = -EBADF;
-	sock = sockfd_lookup(fd, &err);
-	if (!sock) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
-		       tunnel_id, fd, err);
-		goto err;
-	}
-
-	sk = sock->sk;
-
-	/* Quick sanity checks */
-	err = -EPROTONOSUPPORT;
-	if (sk->sk_protocol != IPPROTO_UDP) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
-		goto err;
-	}
-	err = -EAFNOSUPPORT;
-	if (sock->ops->family != AF_INET) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: fd %d wrong family, got %d, expected %d\n",
-		       tunnel_id, fd, sock->ops->family, AF_INET);
-		goto err;
-	}
-
-	err = -ENOTCONN;
-
-	/* Check if this socket has already been prepped */
-	tunnel = (struct pppol2tp_tunnel *)sk->sk_user_data;
-	if (tunnel != NULL) {
-		/* User-data field already set */
-		err = -EBUSY;
-		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-		/* This socket has already been prepped */
-		ret = tunnel->sock;
-		goto out;
-	}
-
-	/* This socket is available and needs prepping. Create a new tunnel
-	 * context and init it.
-	 */
-	sk->sk_user_data = tunnel = kzalloc(sizeof(struct pppol2tp_tunnel), GFP_KERNEL);
-	if (sk->sk_user_data == NULL) {
-		err = -ENOMEM;
-		goto err;
-	}
-
-	tunnel->magic = L2TP_TUNNEL_MAGIC;
-	sprintf(&tunnel->name[0], "tunl %hu", tunnel_id);
-
-	tunnel->stats.tunnel_id = tunnel_id;
-	tunnel->debug = PPPOL2TP_DEFAULT_DEBUG_FLAGS;
-
-	/* Hook on the tunnel socket destructor so that we can cleanup
-	 * if the tunnel socket goes away.
-	 */
-	tunnel->old_sk_destruct = sk->sk_destruct;
-	sk->sk_destruct = pppol2tp_tunnel_destruct;
-
-	tunnel->sock = sk;
-	sk->sk_allocation = GFP_ATOMIC;
-
-	/* Misc init */
-	rwlock_init(&tunnel->hlist_lock);
-
-	/* The net we belong to */
-	tunnel->pppol2tp_net = net;
-	pn = pppol2tp_pernet(net);
-
-	/* Add tunnel to our list */
-	INIT_LIST_HEAD(&tunnel->list);
-	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_add(&tunnel->list, &pn->pppol2tp_tunnel_list);
-	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-	atomic_inc(&pppol2tp_tunnel_count);
-
-	/* Bump the reference count. The tunnel context is deleted
-	 * only when this drops to zero.
-	 */
-	pppol2tp_tunnel_inc_refcount(tunnel);
-
-	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = UDP_ENCAP_L2TPINUDP;
-	(udp_sk(sk))->encap_rcv = pppol2tp_udp_encap_recv;
-
-	ret = tunnel->sock;
-
-	*error = 0;
-out:
-	if (sock)
-		sockfd_put(sock);
-
-	return ret;
-
-err:
-	*error = err;
-	goto out;
-}
-
-static struct proto pppol2tp_sk_proto = {
-	.name	  = "PPPOL2TP",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct pppox_sock),
-};
-
-/* socket() handler. Initialize a new struct sock.
- */
-static int pppol2tp_create(struct net *net, struct socket *sock)
-{
-	int error = -ENOMEM;
-	struct sock *sk;
-
-	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
-	if (!sk)
-		goto out;
-
-	sock_init_data(sock, sk);
-
-	sock->state  = SS_UNCONNECTED;
-	sock->ops    = &pppol2tp_ops;
-
-	sk->sk_backlog_rcv = pppol2tp_recv_core;
-	sk->sk_protocol	   = PX_PROTO_OL2TP;
-	sk->sk_family	   = PF_PPPOX;
-	sk->sk_state	   = PPPOX_NONE;
-	sk->sk_type	   = SOCK_STREAM;
-	sk->sk_destruct	   = pppol2tp_session_destruct;
-
-	error = 0;
-
-out:
-	return error;
-}
-
-/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
- */
-static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
-			    int sockaddr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
-	struct pppox_sock *po = pppox_sk(sk);
-	struct sock *tunnel_sock = NULL;
-	struct pppol2tp_session *session = NULL;
-	struct pppol2tp_tunnel *tunnel;
-	struct dst_entry *dst;
-	int error = 0;
-
-	lock_sock(sk);
-
-	error = -EINVAL;
-	if (sp->sa_protocol != PX_PROTO_OL2TP)
-		goto end;
-
-	/* Check for already bound sockets */
-	error = -EBUSY;
-	if (sk->sk_state & PPPOX_CONNECTED)
-		goto end;
-
-	/* We don't supporting rebinding anyway */
-	error = -EALREADY;
-	if (sk->sk_user_data)
-		goto end; /* socket is already attached */
-
-	/* Don't bind if s_tunnel is 0 */
-	error = -EINVAL;
-	if (sp->pppol2tp.s_tunnel == 0)
-		goto end;
-
-	/* Special case: prepare tunnel socket if s_session and
-	 * d_session is 0. Otherwise look up tunnel using supplied
-	 * tunnel id.
-	 */
-	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
-		tunnel_sock = pppol2tp_prepare_tunnel_socket(sock_net(sk),
-							     sp->pppol2tp.fd,
-							     sp->pppol2tp.s_tunnel,
-							     &error);
-		if (tunnel_sock == NULL)
-			goto end;
-
-		sock_hold(tunnel_sock);
-		tunnel = tunnel_sock->sk_user_data;
-	} else {
-		tunnel = pppol2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
-
-		/* Error if we can't find the tunnel */
-		error = -ENOENT;
-		if (tunnel == NULL)
-			goto end;
-
-		tunnel_sock = tunnel->sock;
-	}
-
-	/* Check that this session doesn't already exist */
-	error = -EEXIST;
-	session = pppol2tp_session_find(tunnel, sp->pppol2tp.s_session);
-	if (session != NULL)
-		goto end;
-
-	/* Allocate and initialize a new session context. */
-	session = kzalloc(sizeof(struct pppol2tp_session), GFP_KERNEL);
-	if (session == NULL) {
-		error = -ENOMEM;
-		goto end;
-	}
-
-	skb_queue_head_init(&session->reorder_q);
-
-	session->magic	     = L2TP_SESSION_MAGIC;
-	session->owner	     = current->pid;
-	session->sock	     = sk;
-	session->tunnel	     = tunnel;
-	session->tunnel_sock = tunnel_sock;
-	session->tunnel_addr = sp->pppol2tp;
-	sprintf(&session->name[0], "sess %hu/%hu",
-		session->tunnel_addr.s_tunnel,
-		session->tunnel_addr.s_session);
-
-	session->stats.tunnel_id  = session->tunnel_addr.s_tunnel;
-	session->stats.session_id = session->tunnel_addr.s_session;
-
-	INIT_HLIST_NODE(&session->hlist);
-
-	/* Inherit debug options from tunnel */
-	session->debug = tunnel->debug;
-
-	/* Default MTU must allow space for UDP/L2TP/PPP
-	 * headers.
-	 */
-	session->mtu = session->mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-
-	/* If PMTU discovery was enabled, use the MTU that was discovered */
-	dst = sk_dst_get(sk);
-	if (dst != NULL) {
-		u32 pmtu = dst_mtu(__sk_dst_get(sk));
-		if (pmtu != 0)
-			session->mtu = session->mru = pmtu -
-				PPPOL2TP_HEADER_OVERHEAD;
-		dst_release(dst);
-	}
-
-	/* Special case: if source & dest session_id == 0x0000, this socket is
-	 * being created to manage the tunnel. Don't add the session to the
-	 * session hash list, just set up the internal context for use by
-	 * ioctl() and sockopt() handlers.
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		error = 0;
-		sk->sk_user_data = session;
-		goto out_no_ppp;
-	}
-
-	/* Get tunnel context from the tunnel socket */
-	tunnel = pppol2tp_sock_to_tunnel(tunnel_sock);
-	if (tunnel == NULL) {
-		error = -EBADF;
-		goto end;
-	}
-
-	/* Right now, because we don't have a way to push the incoming skb's
-	 * straight through the UDP layer, the only header we need to worry
-	 * about is the L2TP header. This size is different depending on
-	 * whether sequence numbers are enabled for the data channel.
-	 */
-	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-
-	po->chan.private = sk;
-	po->chan.ops	 = &pppol2tp_chan_ops;
-	po->chan.mtu	 = session->mtu;
-
-	error = ppp_register_net_channel(sock_net(sk), &po->chan);
-	if (error)
-		goto end_put_tun;
-
-	/* This is how we get the session context from the socket. */
-	sk->sk_user_data = session;
-
-	/* Add session to the tunnel's hash list */
-	write_lock_bh(&tunnel->hlist_lock);
-	hlist_add_head(&session->hlist,
-		       pppol2tp_session_id_hash(tunnel,
-						session->tunnel_addr.s_session));
-	write_unlock_bh(&tunnel->hlist_lock);
-
-	atomic_inc(&pppol2tp_session_count);
-
-out_no_ppp:
-	pppol2tp_tunnel_inc_refcount(tunnel);
-	sk->sk_state = PPPOX_CONNECTED;
-	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: created\n", session->name);
-
-end_put_tun:
-	sock_put(tunnel_sock);
-end:
-	release_sock(sk);
-
-	if (error != 0) {
-		if (session)
-			PRINTK(session->debug,
-				PPPOL2TP_MSG_CONTROL, KERN_WARNING,
-				"%s: connect failed: %d\n",
-				session->name, error);
-		else
-			PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_WARNING,
-				"connect failed: %d\n", error);
-	}
-
-	return error;
-}
-
-/* getname() support.
- */
-static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
-			    int *usockaddr_len, int peer)
-{
-	int len = sizeof(struct sockaddr_pppol2tp);
-	struct sockaddr_pppol2tp sp;
-	int error = 0;
-	struct pppol2tp_session *session;
-
-	error = -ENOTCONN;
-	if (sock->sk->sk_state != PPPOX_CONNECTED)
-		goto end;
-
-	session = pppol2tp_sock_to_session(sock->sk);
-	if (session == NULL) {
-		error = -EBADF;
-		goto end;
-	}
-
-	sp.sa_family	= AF_PPPOX;
-	sp.sa_protocol	= PX_PROTO_OL2TP;
-	memcpy(&sp.pppol2tp, &session->tunnel_addr,
-	       sizeof(struct pppol2tp_addr));
-
-	memcpy(uaddr, &sp, len);
-
-	*usockaddr_len = len;
-
-	error = 0;
-	sock_put(sock->sk);
-
-end:
-	return error;
-}
-
-/****************************************************************************
- * ioctl() handlers.
- *
- * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
- * sockets. However, in order to control kernel tunnel features, we allow
- * userspace to create a special "tunnel" PPPoX socket which is used for
- * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
- * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
- * calls.
- ****************************************************************************/
-
-/* Session ioctl helper.
- */
-static int pppol2tp_session_ioctl(struct pppol2tp_session *session,
-				  unsigned int cmd, unsigned long arg)
-{
-	struct ifreq ifr;
-	int err = 0;
-	struct sock *sk = session->sock;
-	int val = (int) arg;
-
-	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
-	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
-	       session->name, cmd, arg);
-
-	sock_hold(sk);
-
-	switch (cmd) {
-	case SIOCGIFMTU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
-			break;
-		ifr.ifr_mtu = session->mtu;
-		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get mtu=%d\n", session->name, session->mtu);
-		err = 0;
-		break;
-
-	case SIOCSIFMTU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
-			break;
-
-		session->mtu = ifr.ifr_mtu;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set mtu=%d\n", session->name, session->mtu);
-		err = 0;
-		break;
-
-	case PPPIOCGMRU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (put_user(session->mru, (int __user *) arg))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get mru=%d\n", session->name, session->mru);
-		err = 0;
-		break;
-
-	case PPPIOCSMRU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (get_user(val,(int __user *) arg))
-			break;
-
-		session->mru = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set mru=%d\n", session->name, session->mru);
-		err = 0;
-		break;
-
-	case PPPIOCGFLAGS:
-		err = -EFAULT;
-		if (put_user(session->flags, (int __user *) arg))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get flags=%d\n", session->name, session->flags);
-		err = 0;
-		break;
-
-	case PPPIOCSFLAGS:
-		err = -EFAULT;
-		if (get_user(val, (int __user *) arg))
-			break;
-		session->flags = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set flags=%d\n", session->name, session->flags);
-		err = 0;
-		break;
-
-	case PPPIOCGL2TPSTATS:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		if (copy_to_user((void __user *) arg, &session->stats,
-				 sizeof(session->stats)))
-			break;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get L2TP stats\n", session->name);
-		err = 0;
-		break;
-
-	default:
-		err = -ENOSYS;
-		break;
-	}
-
-	sock_put(sk);
-
-	return err;
-}
-
-/* Tunnel ioctl helper.
- *
- * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
- * specifies a session_id, the session ioctl handler is called. This allows an
- * application to retrieve session stats via a tunnel socket.
- */
-static int pppol2tp_tunnel_ioctl(struct pppol2tp_tunnel *tunnel,
-				 unsigned int cmd, unsigned long arg)
-{
-	int err = 0;
-	struct sock *sk = tunnel->sock;
-	struct pppol2tp_ioc_stats stats_req;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
-	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n", tunnel->name,
-	       cmd, arg);
-
-	sock_hold(sk);
-
-	switch (cmd) {
-	case PPPIOCGL2TPSTATS:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		if (copy_from_user(&stats_req, (void __user *) arg,
-				   sizeof(stats_req))) {
-			err = -EFAULT;
-			break;
-		}
-		if (stats_req.session_id != 0) {
-			/* resend to session ioctl handler */
-			struct pppol2tp_session *session =
-				pppol2tp_session_find(tunnel, stats_req.session_id);
-			if (session != NULL)
-				err = pppol2tp_session_ioctl(session, cmd, arg);
-			else
-				err = -EBADR;
-			break;
-		}
-#ifdef CONFIG_XFRM
-		tunnel->stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
-#endif
-		if (copy_to_user((void __user *) arg, &tunnel->stats,
-				 sizeof(tunnel->stats))) {
-			err = -EFAULT;
-			break;
-		}
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get L2TP stats\n", tunnel->name);
-		err = 0;
-		break;
-
-	default:
-		err = -ENOSYS;
-		break;
-	}
-
-	sock_put(sk);
-
-	return err;
-}
-
-/* Main ioctl() handler.
- * Dispatch to tunnel or session helpers depending on the socket.
- */
-static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
-			  unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	int err;
-
-	if (!sk)
-		return 0;
-
-	err = -EBADF;
-	if (sock_flag(sk, SOCK_DEAD) != 0)
-		goto end;
-
-	err = -ENOTCONN;
-	if ((sk->sk_user_data == NULL) ||
-	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
-		goto end;
-
-	/* Get session context from the socket */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session's session_id is zero, treat ioctl as a
-	 * tunnel ioctl
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
-		sock_put(session->tunnel_sock);
-		goto end_put_sess;
-	}
-
-	err = pppol2tp_session_ioctl(session, cmd, arg);
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/*****************************************************************************
- * setsockopt() / getsockopt() support.
- *
- * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
- * sockets. In order to control kernel tunnel features, we allow userspace to
- * create a special "tunnel" PPPoX socket which is used for control only.
- * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
- * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
- *****************************************************************************/
-
-/* Tunnel setsockopt() helper.
- */
-static int pppol2tp_tunnel_setsockopt(struct sock *sk,
-				      struct pppol2tp_tunnel *tunnel,
-				      int optname, int val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_DEBUG:
-		tunnel->debug = val;
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Session setsockopt helper.
- */
-static int pppol2tp_session_setsockopt(struct sock *sk,
-				       struct pppol2tp_session *session,
-				       int optname, int val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_RECVSEQ:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->recv_seq = val ? -1 : 0;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set recv_seq=%d\n", session->name,
-		       session->recv_seq);
-		break;
-
-	case PPPOL2TP_SO_SENDSEQ:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->send_seq = val ? -1 : 0;
-		{
-			struct sock *ssk      = session->sock;
-			struct pppox_sock *po = pppox_sk(ssk);
-			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
-				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-		}
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set send_seq=%d\n", session->name, session->send_seq);
-		break;
-
-	case PPPOL2TP_SO_LNSMODE:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->lns_mode = val ? -1 : 0;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set lns_mode=%d\n", session->name,
-		       session->lns_mode);
-		break;
-
-	case PPPOL2TP_SO_DEBUG:
-		session->debug = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set debug=%x\n", session->name, session->debug);
-		break;
-
-	case PPPOL2TP_SO_REORDERTO:
-		session->reorder_timeout = msecs_to_jiffies(val);
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set reorder_timeout=%d\n", session->name,
-		       session->reorder_timeout);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Main setsockopt() entry point.
- * Does API checks, then calls either the tunnel or session setsockopt
- * handler, according to whether the PPPoL2TP socket is a for a regular
- * session or the special tunnel type.
- */
-static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
-			       char __user *optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session = sk->sk_user_data;
-	struct pppol2tp_tunnel *tunnel;
-	int val;
-	int err;
-
-	if (level != SOL_PPPOL2TP)
-		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
-
-	if (optlen < sizeof(int))
-		return -EINVAL;
-
-	if (get_user(val, (int __user *)optval))
-		return -EFAULT;
-
-	err = -ENOTCONN;
-	if (sk->sk_user_data == NULL)
-		goto end;
-
-	/* Get session context from the socket */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session_id == 0x0000, treat as operation on tunnel
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
-		sock_put(session->tunnel_sock);
-	} else
-		err = pppol2tp_session_setsockopt(sk, session, optname, val);
-
-	err = 0;
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/* Tunnel getsockopt helper. Called with sock locked.
- */
-static int pppol2tp_tunnel_getsockopt(struct sock *sk,
-				      struct pppol2tp_tunnel *tunnel,
-				      int optname, int *val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_DEBUG:
-		*val = tunnel->debug;
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Session getsockopt helper. Called with sock locked.
- */
-static int pppol2tp_session_getsockopt(struct sock *sk,
-				       struct pppol2tp_session *session,
-				       int optname, int *val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_RECVSEQ:
-		*val = session->recv_seq;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get recv_seq=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_SENDSEQ:
-		*val = session->send_seq;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get send_seq=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_LNSMODE:
-		*val = session->lns_mode;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get lns_mode=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_DEBUG:
-		*val = session->debug;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get debug=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_REORDERTO:
-		*val = (int) jiffies_to_msecs(session->reorder_timeout);
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get reorder_timeout=%d\n", session->name, *val);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-	}
-
-	return err;
-}
-
-/* Main getsockopt() entry point.
- * Does API checks, then calls either the tunnel or session getsockopt
- * handler, according to whether the PPPoX socket is a for a regular session
- * or the special tunnel type.
- */
-static int pppol2tp_getsockopt(struct socket *sock, int level,
-			       int optname, char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session = sk->sk_user_data;
-	struct pppol2tp_tunnel *tunnel;
-	int val, len;
-	int err;
-
-	if (level != SOL_PPPOL2TP)
-		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
-
-	if (get_user(len, (int __user *) optlen))
-		return -EFAULT;
-
-	len = min_t(unsigned int, len, sizeof(int));
-
-	if (len < 0)
-		return -EINVAL;
-
-	err = -ENOTCONN;
-	if (sk->sk_user_data == NULL)
-		goto end;
-
-	/* Get the session context */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
-		sock_put(session->tunnel_sock);
-	} else
-		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
-
-	err = -EFAULT;
-	if (put_user(len, (int __user *) optlen))
-		goto end_put_sess;
-
-	if (copy_to_user((void __user *) optval, &val, len))
-		goto end_put_sess;
-
-	err = 0;
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/*****************************************************************************
- * /proc filesystem for debug
- *****************************************************************************/
-
-#ifdef CONFIG_PROC_FS
-
-#include <linux/seq_file.h>
-
-struct pppol2tp_seq_data {
-	struct seq_net_private p;
-	struct pppol2tp_tunnel *tunnel;		/* current tunnel */
-	struct pppol2tp_session *session;	/* NULL means get first session in tunnel */
-};
-
-static struct pppol2tp_session *next_session(struct pppol2tp_tunnel *tunnel, struct pppol2tp_session *curr)
-{
-	struct pppol2tp_session *session = NULL;
-	struct hlist_node *walk;
-	int found = 0;
-	int next = 0;
-	int i;
-
-	read_lock_bh(&tunnel->hlist_lock);
-	for (i = 0; i < PPPOL2TP_HASH_SIZE; i++) {
-		hlist_for_each_entry(session, walk, &tunnel->session_hlist[i], hlist) {
-			if (curr == NULL) {
-				found = 1;
-				goto out;
-			}
-			if (session == curr) {
-				next = 1;
-				continue;
-			}
-			if (next) {
-				found = 1;
-				goto out;
-			}
-		}
-	}
-out:
-	read_unlock_bh(&tunnel->hlist_lock);
-	if (!found)
-		session = NULL;
-
-	return session;
-}
-
-static struct pppol2tp_tunnel *next_tunnel(struct pppol2tp_net *pn,
-					   struct pppol2tp_tunnel *curr)
-{
-	struct pppol2tp_tunnel *tunnel = NULL;
-
-	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	if (list_is_last(&curr->list, &pn->pppol2tp_tunnel_list)) {
-		goto out;
-	}
-	tunnel = list_entry(curr->list.next, struct pppol2tp_tunnel, list);
-out:
-	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	return tunnel;
-}
-
-static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
-{
-	struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
-	struct pppol2tp_net *pn;
-	loff_t pos = *offs;
-
-	if (!pos)
-		goto out;
-
-	BUG_ON(m->private == NULL);
-	pd = m->private;
-	pn = pppol2tp_pernet(seq_file_net(m));
-
-	if (pd->tunnel == NULL) {
-		if (!list_empty(&pn->pppol2tp_tunnel_list))
-			pd->tunnel = list_entry(pn->pppol2tp_tunnel_list.next, struct pppol2tp_tunnel, list);
-	} else {
-		pd->session = next_session(pd->tunnel, pd->session);
-		if (pd->session == NULL) {
-			pd->tunnel = next_tunnel(pn, pd->tunnel);
-		}
-	}
-
-	/* NULL tunnel and session indicates end of list */
-	if ((pd->tunnel == NULL) && (pd->session == NULL))
-		pd = NULL;
-
-out:
-	return pd;
-}
-
-static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return NULL;
-}
-
-static void pppol2tp_seq_stop(struct seq_file *p, void *v)
-{
-	/* nothing to do */
-}
-
-static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_tunnel *tunnel = v;
-
-	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
-		   tunnel->name,
-		   (tunnel == tunnel->sock->sk_user_data) ? 'Y':'N',
-		   atomic_read(&tunnel->ref_count) - 1);
-	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
-		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
-}
-
-static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_session *session = v;
-
-	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
-		   "%04X/%04X %d %c\n",
-		   session->name,
-		   ntohl(session->tunnel_addr.addr.sin_addr.s_addr),
-		   ntohs(session->tunnel_addr.addr.sin_port),
-		   session->tunnel_addr.s_tunnel,
-		   session->tunnel_addr.s_session,
-		   session->tunnel_addr.d_tunnel,
-		   session->tunnel_addr.d_session,
-		   session->sock->sk_state,
-		   (session == session->sock->sk_user_data) ?
-		   'Y' : 'N');
-	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
-		   session->mtu, session->mru,
-		   session->recv_seq ? 'R' : '-',
-		   session->send_seq ? 'S' : '-',
-		   session->lns_mode ? "LNS" : "LAC",
-		   session->debug,
-		   jiffies_to_msecs(session->reorder_timeout));
-	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
-		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
-}
-
-static int pppol2tp_seq_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_seq_data *pd = v;
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
-		seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
-		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
-		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
-			 "dest-tid/sid state user-data-ok\n");
-		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
-		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
-		goto out;
-	}
-
-	/* Show the tunnel or session context.
-	 */
-	if (pd->session == NULL)
-		pppol2tp_seq_tunnel_show(m, pd->tunnel);
-	else
-		pppol2tp_seq_session_show(m, pd->session);
-
-out:
-	return 0;
-}
-
-static const struct seq_operations pppol2tp_seq_ops = {
-	.start		= pppol2tp_seq_start,
-	.next		= pppol2tp_seq_next,
-	.stop		= pppol2tp_seq_stop,
-	.show		= pppol2tp_seq_show,
-};
-
-/* Called when our /proc file is opened. We allocate data for use when
- * iterating our tunnel / session contexts and store it in the private
- * data of the seq_file.
- */
-static int pppol2tp_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &pppol2tp_seq_ops,
-			    sizeof(struct pppol2tp_seq_data));
-}
-
-static const struct file_operations pppol2tp_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pppol2tp_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
-#endif /* CONFIG_PROC_FS */
-
-/*****************************************************************************
- * Init and cleanup
- *****************************************************************************/
-
-static const struct proto_ops pppol2tp_ops = {
-	.family		= AF_PPPOX,
-	.owner		= THIS_MODULE,
-	.release	= pppol2tp_release,
-	.bind		= sock_no_bind,
-	.connect	= pppol2tp_connect,
-	.socketpair	= sock_no_socketpair,
-	.accept		= sock_no_accept,
-	.getname	= pppol2tp_getname,
-	.poll		= datagram_poll,
-	.listen		= sock_no_listen,
-	.shutdown	= sock_no_shutdown,
-	.setsockopt	= pppol2tp_setsockopt,
-	.getsockopt	= pppol2tp_getsockopt,
-	.sendmsg	= pppol2tp_sendmsg,
-	.recvmsg	= pppol2tp_recvmsg,
-	.mmap		= sock_no_mmap,
-	.ioctl		= pppox_ioctl,
-};
-
-static struct pppox_proto pppol2tp_proto = {
-	.create		= pppol2tp_create,
-	.ioctl		= pppol2tp_ioctl
-};
-
-static __net_init int pppol2tp_init_net(struct net *net)
-{
-	struct pppol2tp_net *pn = pppol2tp_pernet(net);
-	struct proc_dir_entry *pde;
-
-	INIT_LIST_HEAD(&pn->pppol2tp_tunnel_list);
-	rwlock_init(&pn->pppol2tp_tunnel_list_lock);
-
-	pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
-#ifdef CONFIG_PROC_FS
-	if (!pde)
-		return -ENOMEM;
-#endif
-
-	return 0;
-}
-
-static __net_exit void pppol2tp_exit_net(struct net *net)
-{
-	proc_net_remove(net, "pppol2tp");
-}
-
-static struct pernet_operations pppol2tp_net_ops = {
-	.init = pppol2tp_init_net,
-	.exit = pppol2tp_exit_net,
-	.id   = &pppol2tp_net_id,
-	.size = sizeof(struct pppol2tp_net),
-};
-
-static int __init pppol2tp_init(void)
-{
-	int err;
-
-	err = proto_register(&pppol2tp_sk_proto, 0);
-	if (err)
-		goto out;
-	err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
-	if (err)
-		goto out_unregister_pppol2tp_proto;
-
-	err = register_pernet_device(&pppol2tp_net_ops);
-	if (err)
-		goto out_unregister_pppox_proto;
-
-	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
-	       PPPOL2TP_DRV_VERSION);
-
-out:
-	return err;
-out_unregister_pppox_proto:
-	unregister_pppox_proto(PX_PROTO_OL2TP);
-out_unregister_pppol2tp_proto:
-	proto_unregister(&pppol2tp_sk_proto);
-	goto out;
-}
-
-static void __exit pppol2tp_exit(void)
-{
-	unregister_pppox_proto(PX_PROTO_OL2TP);
-	unregister_pernet_device(&pppol2tp_net_ops);
-	proto_unregister(&pppol2tp_sk_proto);
-}
-
-module_init(pppol2tp_init);
-module_exit(pppol2tp_exit);
-
-MODULE_AUTHOR("Martijn van Oosterhout <kleptog@svana.org>, "
-	      "James Chapman <jchapman@katalix.com>");
-MODULE_DESCRIPTION("PPP over L2TP over UDP");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(PPPOL2TP_DRV_VERSION);
-- 
cgit v1.2.3-59-g8ed1b


From 9345471bca96d00d4196b3dcc4a5625f1bfae247 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:44 +0000
Subject: l2tp: Add ppp device name to L2TP ppp session data

When dumping L2TP PPP sessions using /proc/net/pppol2tp, get the
assigned PPP device name from PPP using ppp_dev_name().

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_ppp.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index baac072761aa..3ad290dd830a 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1465,6 +1465,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 	struct l2tp_session *session = v;
 	struct l2tp_tunnel *tunnel = session->tunnel;
 	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct pppox_sock *po = pppox_sk(ps->sock);
 	u32 ip = 0;
 	u16 port = 0;
 
@@ -1499,6 +1500,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
 		   (unsigned long long)session->stats.rx_packets,
 		   (unsigned long long)session->stats.rx_bytes,
 		   (unsigned long long)session->stats.rx_errors);
+
+	if (po)
+		seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
 }
 
 static int pppol2tp_seq_show(struct seq_file *m, void *v)
-- 
cgit v1.2.3-59-g8ed1b


From f7faffa3ff8ef6ae712ef16312b8a2aa7a1c95fe Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:49 +0000
Subject: l2tp: Add L2TPv3 protocol support

The L2TPv3 protocol changes the layout of the L2TP packet
header. Tunnel and session ids change from 16-bit to 32-bit values,
data sequence numbers change from 16-bit to 24-bit values and PPP-specific
fields are moved into protocol-specific subheaders.

Although this patch introduces L2TPv3 protocol support, there are no
userspace interfaces to create L2TPv3 sessions yet.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/Kconfig     |  25 +++
 net/l2tp/l2tp_core.c | 532 ++++++++++++++++++++++++++++++++++++++-------------
 net/l2tp/l2tp_core.h |  54 +++++-
 net/l2tp/l2tp_ppp.c  |  21 +-
 4 files changed, 484 insertions(+), 148 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index ec88c5cdc397..d60758d60478 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -19,6 +19,10 @@ menuconfig L2TP
 	  connections. L2TP is also used as a VPN protocol, popular
 	  with home workers to connect to their offices.
 
+	  L2TPv3 allows other protocols as well as PPP to be carried
+	  over L2TP tunnels. L2TPv3 is defined in RFC 3931
+	  <http://www.ietf.org/rfc/rfc3931.txt>.
+
 	  The kernel component handles only L2TP data packets: a
 	  userland daemon handles L2TP the control protocol (tunnel
 	  and session setup). One such daemon is OpenL2TP
@@ -26,3 +30,24 @@ menuconfig L2TP
 
 	  If you don't need L2TP, say N. To compile all L2TP code as
 	  modules, choose M here.
+
+config L2TP_V3
+	bool "L2TPv3 support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL && L2TP
+	help
+	  Layer Two Tunneling Protocol Version 3
+
+	  From RFC 3931 <http://www.ietf.org/rfc/rfc3931.txt>.
+
+	  The Layer Two Tunneling Protocol (L2TP) provides a dynamic
+	  mechanism for tunneling Layer 2 (L2) "circuits" across a
+	  packet-oriented data network (e.g., over IP).  L2TP, as
+	  originally defined in RFC 2661, is a standard method for
+	  tunneling Point-to-Point Protocol (PPP) [RFC1661] sessions.
+	  L2TP has since been adopted for tunneling a number of other
+	  L2 protocols, including ATM, Frame Relay, HDLC and even raw
+	  ethernet frames.
+
+	  If you are connecting to L2TPv3 equipment, or you want to
+	  tunnel raw ethernet frames using L2TP, say Y here. If
+	  unsure, say N.
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 4b6da3689893..0eee1a65f1b1 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -65,6 +65,7 @@
 
 #define L2TP_HDR_VER_MASK  0x000F
 #define L2TP_HDR_VER_2	   0x0002
+#define L2TP_HDR_VER_3	   0x0003
 
 /* L2TPv3 default L2-specific sublayer */
 #define L2TP_SLFLAG_S	   0x40000000
@@ -85,7 +86,7 @@
 /* Private data stored for received packets in the skb.
  */
 struct l2tp_skb_cb {
-	u16			ns;
+	u32			ns;
 	u16			has_seq;
 	u16			length;
 	unsigned long		expires;
@@ -101,6 +102,8 @@ static unsigned int l2tp_net_id;
 struct l2tp_net {
 	struct list_head l2tp_tunnel_list;
 	rwlock_t l2tp_tunnel_list_lock;
+	struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
+	rwlock_t l2tp_session_hlist_lock;
 };
 
 static inline struct l2tp_net *l2tp_pernet(struct net *net)
@@ -110,6 +113,40 @@ static inline struct l2tp_net *l2tp_pernet(struct net *net)
 	return net_generic(net, l2tp_net_id);
 }
 
+/* Session hash global list for L2TPv3.
+ * The session_id SHOULD be random according to RFC3931, but several
+ * L2TP implementations use incrementing session_ids.  So we do a real
+ * hash on the session_id, rather than a simple bitmask.
+ */
+static inline struct hlist_head *
+l2tp_session_id_hash_2(struct l2tp_net *pn, u32 session_id)
+{
+	return &pn->l2tp_session_hlist[hash_32(session_id, L2TP_HASH_BITS_2)];
+
+}
+
+/* Lookup a session by id in the global session list
+ */
+static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
+{
+	struct l2tp_net *pn = l2tp_pernet(net);
+	struct hlist_head *session_list =
+		l2tp_session_id_hash_2(pn, session_id);
+	struct l2tp_session *session;
+	struct hlist_node *walk;
+
+	read_lock_bh(&pn->l2tp_session_hlist_lock);
+	hlist_for_each_entry(session, walk, session_list, global_hlist) {
+		if (session->session_id == session_id) {
+			read_unlock_bh(&pn->l2tp_session_hlist_lock);
+			return session;
+		}
+	}
+	read_unlock_bh(&pn->l2tp_session_hlist_lock);
+
+	return NULL;
+}
+
 /* Session hash list.
  * The session_id SHOULD be random according to RFC2661, but several
  * L2TP implementations (Cisco and Microsoft) use incrementing
@@ -124,13 +161,20 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
 
 /* Lookup a session by id
  */
-struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id)
+struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id)
 {
-	struct hlist_head *session_list =
-		l2tp_session_id_hash(tunnel, session_id);
+	struct hlist_head *session_list;
 	struct l2tp_session *session;
 	struct hlist_node *walk;
 
+	/* In L2TPv3, session_ids are unique over all tunnels and we
+	 * sometimes need to look them up before we know the
+	 * tunnel.
+	 */
+	if (tunnel == NULL)
+		return l2tp_session_find_2(net, session_id);
+
+	session_list = l2tp_session_id_hash(tunnel, session_id);
 	read_lock_bh(&tunnel->hlist_lock);
 	hlist_for_each_entry(session, walk, session_list, hlist) {
 		if (session->session_id == session_id) {
@@ -218,7 +262,7 @@ static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *sk
 {
 	struct sk_buff *skbp;
 	struct sk_buff *tmp;
-	u16 ns = L2TP_SKB_CB(skb)->ns;
+	u32 ns = L2TP_SKB_CB(skb)->ns;
 
 	spin_lock_bh(&session->reorder_q.lock);
 	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
@@ -259,6 +303,11 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *
 	if (L2TP_SKB_CB(skb)->has_seq) {
 		/* Bump our Nr */
 		session->nr++;
+		if (tunnel->version == L2TP_HDR_VER_2)
+			session->nr &= 0xffff;
+		else
+			session->nr &= 0xffffff;
+
 		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
 		       "%s: updated nr to %hu\n", session->name, session->nr);
 	}
@@ -291,8 +340,8 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 			session->stats.rx_seq_discards++;
 			session->stats.rx_errors++;
 			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
-			       "%s: oos pkt %hu len %d discarded (too old), "
-			       "waiting for %hu, reorder_q_len=%d\n",
+			       "%s: oos pkt %u len %d discarded (too old), "
+			       "waiting for %u, reorder_q_len=%d\n",
 			       session->name, L2TP_SKB_CB(skb)->ns,
 			       L2TP_SKB_CB(skb)->length, session->nr,
 			       skb_queue_len(&session->reorder_q));
@@ -306,8 +355,8 @@ static void l2tp_recv_dequeue(struct l2tp_session *session)
 		if (L2TP_SKB_CB(skb)->has_seq) {
 			if (L2TP_SKB_CB(skb)->ns != session->nr) {
 				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: holding oos pkt %hu len %d, "
-				       "waiting for %hu, reorder_q_len=%d\n",
+				       "%s: holding oos pkt %u len %d, "
+				       "waiting for %u, reorder_q_len=%d\n",
 				       session->name, L2TP_SKB_CB(skb)->ns,
 				       L2TP_SKB_CB(skb)->length, session->nr,
 				       skb_queue_len(&session->reorder_q));
@@ -352,100 +401,73 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk,
 	return __skb_checksum_complete(skb);
 }
 
-/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
- * here. The skb is not on a list when we get here.
- * Returns 0 if the packet was a data packet and was successfully passed on.
- * Returns 1 if the packet was not a good data packet and could not be
- * forwarded.  All such packets are passed up to userspace to deal with.
+/* Do receive processing of L2TP data frames. We handle both L2TPv2
+ * and L2TPv3 data frames here.
+ *
+ * L2TPv2 Data Message Header
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |T|L|x|x|S|x|O|P|x|x|x|x|  Ver  |          Length (opt)         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           Tunnel ID           |           Session ID          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |             Ns (opt)          |             Nr (opt)          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      Offset Size (opt)        |    Offset pad... (opt)
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Data frames are marked by T=0. All other fields are the same as
+ * those in L2TP control frames.
+ *
+ * L2TPv3 Data Message Header
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      L2TP Session Header                      |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      L2-Specific Sublayer                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                        Tunnel Payload                      ...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 Session Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Session ID                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Cookie (optional, maximum 64 bits)...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *                                                                 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 L2-Specific Sublayer Format
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |x|S|x|x|x|x|x|x|              Sequence Number                  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Cookie value, sublayer format and offset (pad) are negotiated with
+ * the peer when the session is set up. Unlike L2TPv2, we do not need
+ * to parse the packet header to determine if optional fields are
+ * present.
+ *
+ * Caller must already have parsed the frame and determined that it is
+ * a data (not control) frame before coming here. Fields up to the
+ * session-id have already been parsed and ptr points to the data
+ * after the session-id.
  */
-int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
-		       int (*payload_hook)(struct sk_buff *skb))
+void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
+		      unsigned char *ptr, unsigned char *optr, u16 hdrflags,
+		      int length, int (*payload_hook)(struct sk_buff *skb))
 {
-	struct l2tp_session *session = NULL;
-	unsigned char *ptr, *optr;
-	u16 hdrflags;
-	u32 tunnel_id, session_id;
-	int length;
+	struct l2tp_tunnel *tunnel = session->tunnel;
 	int offset;
-	u16 version;
-	u16 ns, nr;
-
-	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
-		goto discard_bad_csum;
-
-	/* UDP always verifies the packet length. */
-	__skb_pull(skb, sizeof(struct udphdr));
-
-	/* Short packet? */
-	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
-		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
-		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
-		goto error;
-	}
-
-	/* Point to L2TP header */
-	optr = ptr = skb->data;
-
-	/* Trace packet contents, if enabled */
-	if (tunnel->debug & L2TP_MSG_DATA) {
-		length = min(32u, skb->len);
-		if (!pskb_may_pull(skb, length))
-			goto error;
-
-		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
-
-		offset = 0;
-		do {
-			printk(" %02X", ptr[offset]);
-		} while (++offset < length);
-
-		printk("\n");
-	}
-
-	/* Get L2TP header flags */
-	hdrflags = ntohs(*(__be16 *)ptr);
-
-	/* Check protocol version */
-	version = hdrflags & L2TP_HDR_VER_MASK;
-	if (version != tunnel->version) {
-		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
-		       "%s: recv protocol version mismatch: got %d expected %d\n",
-		       tunnel->name, version, tunnel->version);
-		goto error;
-	}
-
-	/* Get length of L2TP packet */
-	length = skb->len;
-
-	/* If type is control packet, it is handled by userspace. */
-	if (hdrflags & L2TP_HDRFLAG_T) {
-		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: recv control packet, len=%d\n", tunnel->name, length);
-		goto error;
-	}
-
-	/* Skip flags */
-	ptr += 2;
-
-	/* If length is present, skip it */
-	if (hdrflags & L2TP_HDRFLAG_L)
-		ptr += 2;
-
-	/* Extract tunnel and session ID */
-	tunnel_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-	session_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-
-	/* Find the session context */
-	session = l2tp_session_find(tunnel, session_id);
-	if (!session) {
-		/* Not found? Pass to userspace to deal with */
-		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
-		       "%s: no session found (%hu/%hu). Passing up.\n",
-		       tunnel->name, tunnel_id, session_id);
-		goto error;
-	}
+	u32 ns, nr;
 
 	/* The ref count is increased since we now hold a pointer to
 	 * the session. Take care to decrement the refcnt when exiting
@@ -455,6 +477,18 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	if (session->ref)
 		(*session->ref)(session);
 
+	/* Parse and check optional cookie */
+	if (session->peer_cookie_len > 0) {
+		if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
+			PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+			       "%s: cookie mismatch (%u/%u). Discarding.\n",
+			       tunnel->name, tunnel->tunnel_id, session->session_id);
+			session->stats.rx_cookie_discards++;
+			goto discard;
+		}
+		ptr += session->peer_cookie_len;
+	}
+
 	/* Handle the optional sequence numbers. Sequence numbers are
 	 * in different places for L2TPv2 and L2TPv3.
 	 *
@@ -464,21 +498,40 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 	 */
 	ns = nr = 0;
 	L2TP_SKB_CB(skb)->has_seq = 0;
-	if (hdrflags & L2TP_HDRFLAG_S) {
-		ns = (u16) ntohs(*(__be16 *) ptr);
-		ptr += 2;
-		nr = ntohs(*(__be16 *) ptr);
-		ptr += 2;
+	if (tunnel->version == L2TP_HDR_VER_2) {
+		if (hdrflags & L2TP_HDRFLAG_S) {
+			ns = ntohs(*(__be16 *) ptr);
+			ptr += 2;
+			nr = ntohs(*(__be16 *) ptr);
+			ptr += 2;
 
-		/* Store L2TP info in the skb */
-		L2TP_SKB_CB(skb)->ns = ns;
-		L2TP_SKB_CB(skb)->has_seq = 1;
+			/* Store L2TP info in the skb */
+			L2TP_SKB_CB(skb)->ns = ns;
+			L2TP_SKB_CB(skb)->has_seq = 1;
 
-		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
-		       session->name, ns, nr, session->nr);
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: recv data ns=%u, nr=%u, session nr=%u\n",
+			       session->name, ns, nr, session->nr);
+		}
+	} else if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
+		u32 l2h = ntohl(*(__be32 *) ptr);
+
+		if (l2h & 0x40000000) {
+			ns = l2h & 0x00ffffff;
+
+			/* Store L2TP info in the skb */
+			L2TP_SKB_CB(skb)->ns = ns;
+			L2TP_SKB_CB(skb)->has_seq = 1;
+
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: recv data ns=%u, session nr=%u\n",
+			       session->name, ns, session->nr);
+		}
 	}
 
+	/* Advance past L2-specific header, if present */
+	ptr += session->l2specific_len;
+
 	if (L2TP_SKB_CB(skb)->has_seq) {
 		/* Received a packet with sequence numbers. If we're the LNS,
 		 * check if we sre sending sequence numbers and if not,
@@ -489,6 +542,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 			       "%s: requested to enable seq numbers by LNS\n",
 			       session->name);
 			session->send_seq = -1;
+			l2tp_session_set_header_len(session, tunnel->version);
 		}
 	} else {
 		/* No sequence numbers.
@@ -512,6 +566,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 			       "%s: requested to disable seq numbers by LNS\n",
 			       session->name);
 			session->send_seq = 0;
+			l2tp_session_set_header_len(session, tunnel->version);
 		} else if (session->send_seq) {
 			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
 			       "%s: recv data has no seq numbers when required. "
@@ -521,11 +576,19 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 		}
 	}
 
-	/* If offset bit set, skip it. */
-	if (hdrflags & L2TP_HDRFLAG_O) {
-		offset = ntohs(*(__be16 *)ptr);
-		ptr += 2 + offset;
-	}
+	/* Session data offset is handled differently for L2TPv2 and
+	 * L2TPv3. For L2TPv2, there is an optional 16-bit value in
+	 * the header. For L2TPv3, the offset is negotiated using AVPs
+	 * in the session setup control protocol.
+	 */
+	if (tunnel->version == L2TP_HDR_VER_2) {
+		/* If offset bit set, skip it. */
+		if (hdrflags & L2TP_HDRFLAG_O) {
+			offset = ntohs(*(__be16 *)ptr);
+			ptr += 2 + offset;
+		}
+	} else
+		ptr += session->offset;
 
 	offset = ptr - optr;
 	if (!pskb_may_pull(skb, offset))
@@ -564,8 +627,8 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 			if (L2TP_SKB_CB(skb)->ns != session->nr) {
 				session->stats.rx_seq_discards++;
 				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: oos pkt %hu len %d discarded, "
-				       "waiting for %hu, reorder_q_len=%d\n",
+				       "%s: oos pkt %u len %d discarded, "
+				       "waiting for %u, reorder_q_len=%d\n",
 				       session->name, L2TP_SKB_CB(skb)->ns,
 				       L2TP_SKB_CB(skb)->length, session->nr,
 				       skb_queue_len(&session->reorder_q));
@@ -586,7 +649,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 
 	l2tp_session_dec_refcount(session);
 
-	return 0;
+	return;
 
 discard:
 	session->stats.rx_errors++;
@@ -596,6 +659,111 @@ discard:
 		(*session->deref)(session);
 
 	l2tp_session_dec_refcount(session);
+}
+EXPORT_SYMBOL(l2tp_recv_common);
+
+/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
+ * here. The skb is not on a list when we get here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded.  All such packets are passed up to userspace to deal with.
+ */
+int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
+		       int (*payload_hook)(struct sk_buff *skb))
+{
+	struct l2tp_session *session = NULL;
+	unsigned char *ptr, *optr;
+	u16 hdrflags;
+	u32 tunnel_id, session_id;
+	int offset;
+	u16 version;
+	int length;
+
+	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
+		goto discard_bad_csum;
+
+	/* UDP always verifies the packet length. */
+	__skb_pull(skb, sizeof(struct udphdr));
+
+	/* Short packet? */
+	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+		goto error;
+	}
+
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & L2TP_MSG_DATA) {
+		length = min(32u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto error;
+
+		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	/* Get L2TP header flags */
+	hdrflags = ntohs(*(__be16 *) ptr);
+
+	/* Check protocol version */
+	version = hdrflags & L2TP_HDR_VER_MASK;
+	if (version != tunnel->version) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv protocol version mismatch: got %d expected %d\n",
+		       tunnel->name, version, tunnel->version);
+		goto error;
+	}
+
+	/* Get length of L2TP packet */
+	length = skb->len;
+
+	/* If type is control packet, it is handled by userspace. */
+	if (hdrflags & L2TP_HDRFLAG_T) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv control packet, len=%d\n", tunnel->name, length);
+		goto error;
+	}
+
+	/* Skip flags */
+	ptr += 2;
+
+	if (tunnel->version == L2TP_HDR_VER_2) {
+		/* If length is present, skip it */
+		if (hdrflags & L2TP_HDRFLAG_L)
+			ptr += 2;
+
+		/* Extract tunnel and session ID */
+		tunnel_id = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+		session_id = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+	} else {
+		ptr += 2;	/* skip reserved bits */
+		tunnel_id = tunnel->tunnel_id;
+		session_id = ntohl(*(__be32 *) ptr);
+		ptr += 4;
+	}
+
+	/* Find the session context */
+	session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
+	if (!session) {
+		/* Not found? Pass to userspace to deal with */
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: no session found (%u/%u). Passing up.\n",
+		       tunnel->name, tunnel_id, session_id);
+		goto error;
+	}
+
+	l2tp_recv_common(session, skb, ptr, optr, hdrflags, length, payload_hook);
 
 	return 0;
 
@@ -651,11 +819,11 @@ EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
 
 /* Build an L2TP header for the session into the buffer provided.
  */
-static void l2tp_build_l2tpv2_header(struct l2tp_tunnel *tunnel,
-				     struct l2tp_session *session,
-				     void *buf)
+static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
 {
+	struct l2tp_tunnel *tunnel = session->tunnel;
 	__be16 *bufp = buf;
+	__be16 *optr = buf;
 	u16 flags = L2TP_HDR_VER_2;
 	u32 tunnel_id = tunnel->peer_tunnel_id;
 	u32 session_id = session->peer_session_id;
@@ -671,19 +839,51 @@ static void l2tp_build_l2tpv2_header(struct l2tp_tunnel *tunnel,
 		*bufp++ = htons(session->ns);
 		*bufp++ = 0;
 		session->ns++;
+		session->ns &= 0xffff;
 		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: updated ns to %hu\n", session->name, session->ns);
+		       "%s: updated ns to %u\n", session->name, session->ns);
 	}
+
+	return bufp - optr;
 }
 
-void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf)
+static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
 {
-	struct l2tp_tunnel *tunnel = session->tunnel;
+	char *bufp = buf;
+	char *optr = bufp;
+	u16 flags = L2TP_HDR_VER_3;
+
+	/* Setup L2TP header. */
+	*((__be16 *) bufp) = htons(flags);
+	bufp += 2;
+	*((__be16 *) bufp) = 0;
+	bufp += 2;
+	*((__be32 *) bufp) = htonl(session->peer_session_id);
+	bufp += 4;
+	if (session->cookie_len) {
+		memcpy(bufp, &session->cookie[0], session->cookie_len);
+		bufp += session->cookie_len;
+	}
+	if (session->l2specific_len) {
+		if (session->l2specific_type == L2TP_L2SPECTYPE_DEFAULT) {
+			u32 l2h = 0;
+			if (session->send_seq) {
+				l2h = 0x40000000 | session->ns;
+				session->ns++;
+				session->ns &= 0xffffff;
+				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: updated ns to %u\n", session->name, session->ns);
+			}
+
+			*((__be32 *) bufp) = htonl(l2h);
+		}
+		bufp += session->l2specific_len;
+	}
+	if (session->offset)
+		bufp += session->offset;
 
-	BUG_ON(tunnel->version != L2TP_HDR_VER_2);
-	l2tp_build_l2tpv2_header(tunnel, session, buf);
+	return bufp - optr;
 }
-EXPORT_SYMBOL_GPL(l2tp_build_l2tp_header);
 
 int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len)
 {
@@ -694,7 +894,7 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
 	/* Debug */
 	if (session->send_seq)
 		PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %Zd bytes, ns=%hu\n", session->name,
+		       "%s: send %Zd bytes, ns=%u\n", session->name,
 		       data_len, session->ns - 1);
 	else
 		PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
@@ -780,7 +980,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	skb->truesize += new_headroom - old_headroom;
 
 	/* Setup L2TP header */
-	l2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
+	session->build_header(session, __skb_push(skb, hdr_len));
 	udp_len = sizeof(struct udphdr) + hdr_len + data_len;
 
 	/* Setup UDP header */
@@ -791,7 +991,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	uh->source = inet->inet_sport;
 	uh->dest = inet->inet_dport;
 	uh->len = htons(udp_len);
-
 	uh->check = 0;
 
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -911,6 +1110,14 @@ again:
 
 			write_unlock_bh(&tunnel->hlist_lock);
 
+			if (tunnel->version != L2TP_HDR_VER_2) {
+				struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+				write_lock_bh(&pn->l2tp_session_hlist_lock);
+				hlist_del_init(&session->global_hlist);
+				write_unlock_bh(&pn->l2tp_session_hlist_lock);
+			}
+
 			if (session->session_close != NULL)
 				(*session->session_close)(session);
 
@@ -997,9 +1204,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 		goto err;
 	}
 
-	if (version != L2TP_HDR_VER_2)
-		goto err;
-
 	tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
 	if (tunnel == NULL) {
 		err = -ENOMEM;
@@ -1077,6 +1281,15 @@ void l2tp_session_free(struct l2tp_session *session)
 		hlist_del_init(&session->hlist);
 		write_unlock_bh(&tunnel->hlist_lock);
 
+		/* Unlink from the global hash if not L2TPv2 */
+		if (tunnel->version != L2TP_HDR_VER_2) {
+			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+			write_lock_bh(&pn->l2tp_session_hlist_lock);
+			hlist_del_init(&session->global_hlist);
+			write_unlock_bh(&pn->l2tp_session_hlist_lock);
+		}
+
 		if (session->session_id != 0)
 			atomic_dec(&l2tp_session_count);
 
@@ -1095,6 +1308,22 @@ void l2tp_session_free(struct l2tp_session *session)
 }
 EXPORT_SYMBOL_GPL(l2tp_session_free);
 
+/* We come here whenever a session's send_seq, cookie_len or
+ * l2specific_len parameters are set.
+ */
+void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+{
+	if (version == L2TP_HDR_VER_2) {
+		session->hdr_len = 6;
+		if (session->send_seq)
+			session->hdr_len += 4;
+	} else {
+		session->hdr_len = 8 + session->cookie_len + session->l2specific_len + session->offset;
+	}
+
+}
+EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
+
 struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
 {
 	struct l2tp_session *session;
@@ -1106,6 +1335,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 
 		session->session_id = session_id;
 		session->peer_session_id = peer_session_id;
+		session->nr = 1;
 
 		sprintf(&session->name[0], "sess %u/%u",
 			tunnel->tunnel_id, session->session_id);
@@ -1113,20 +1343,36 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 		skb_queue_head_init(&session->reorder_q);
 
 		INIT_HLIST_NODE(&session->hlist);
+		INIT_HLIST_NODE(&session->global_hlist);
 
 		/* Inherit debug options from tunnel */
 		session->debug = tunnel->debug;
 
 		if (cfg) {
+			session->pwtype = cfg->pw_type;
 			session->debug = cfg->debug;
-			session->hdr_len = cfg->hdr_len;
 			session->mtu = cfg->mtu;
 			session->mru = cfg->mru;
 			session->send_seq = cfg->send_seq;
 			session->recv_seq = cfg->recv_seq;
 			session->lns_mode = cfg->lns_mode;
+			session->reorder_timeout = cfg->reorder_timeout;
+			session->offset = cfg->offset;
+			session->l2specific_type = cfg->l2specific_type;
+			session->l2specific_len = cfg->l2specific_len;
+			session->cookie_len = cfg->cookie_len;
+			memcpy(&session->cookie[0], &cfg->cookie[0], cfg->cookie_len);
+			session->peer_cookie_len = cfg->peer_cookie_len;
+			memcpy(&session->peer_cookie[0], &cfg->peer_cookie[0], cfg->peer_cookie_len);
 		}
 
+		if (tunnel->version == L2TP_HDR_VER_2)
+			session->build_header = l2tp_build_l2tpv2_header;
+		else
+			session->build_header = l2tp_build_l2tpv3_header;
+
+		l2tp_session_set_header_len(session, tunnel->version);
+
 		/* Bump the reference count. The session context is deleted
 		 * only when this drops to zero.
 		 */
@@ -1142,6 +1388,16 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 			       l2tp_session_id_hash(tunnel, session_id));
 		write_unlock_bh(&tunnel->hlist_lock);
 
+		/* And to the global session list if L2TPv3 */
+		if (tunnel->version != L2TP_HDR_VER_2) {
+			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+			write_lock_bh(&pn->l2tp_session_hlist_lock);
+			hlist_add_head(&session->global_hlist,
+				       l2tp_session_id_hash_2(pn, session_id));
+			write_unlock_bh(&pn->l2tp_session_hlist_lock);
+		}
+
 		/* Ignore management session in session count value */
 		if (session->session_id != 0)
 			atomic_inc(&l2tp_session_count);
@@ -1159,6 +1415,7 @@ static __net_init int l2tp_init_net(struct net *net)
 {
 	struct l2tp_net *pn;
 	int err;
+	int hash;
 
 	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
 	if (!pn)
@@ -1167,6 +1424,11 @@ static __net_init int l2tp_init_net(struct net *net)
 	INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
 	rwlock_init(&pn->l2tp_tunnel_list_lock);
 
+	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
+		INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
+
+	rwlock_init(&pn->l2tp_session_hlist_lock);
+
 	err = net_assign_generic(net, l2tp_net_id, pn);
 	if (err)
 		goto out;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 2efe1a3ada98..5c53eb2a8ad9 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -15,9 +15,14 @@
 #define L2TP_TUNNEL_MAGIC	0x42114DDA
 #define L2TP_SESSION_MAGIC	0x0C04EB7D
 
+/* Per tunnel, session hash table size */
 #define L2TP_HASH_BITS	4
 #define L2TP_HASH_SIZE	(1 << L2TP_HASH_BITS)
 
+/* System-wide, session hash table size */
+#define L2TP_HASH_BITS_2	8
+#define L2TP_HASH_SIZE_2	(1 << L2TP_HASH_BITS_2)
+
 /* Debug message categories for the DEBUG socket option */
 enum {
 	L2TP_MSG_DEBUG		= (1 << 0),	/* verbose debug (if
@@ -28,6 +33,21 @@ enum {
 	L2TP_MSG_DATA		= (1 << 3),	/* data packets */
 };
 
+enum l2tp_pwtype {
+	L2TP_PWTYPE_NONE = 0x0000,
+	L2TP_PWTYPE_ETH_VLAN = 0x0004,
+	L2TP_PWTYPE_ETH = 0x0005,
+	L2TP_PWTYPE_PPP = 0x0007,
+	L2TP_PWTYPE_PPP_AC = 0x0008,
+	L2TP_PWTYPE_IP = 0x000b,
+	__L2TP_PWTYPE_MAX
+};
+
+enum l2tp_l2spec_type {
+	L2TP_L2SPECTYPE_NONE,
+	L2TP_L2SPECTYPE_DEFAULT,
+};
+
 struct sk_buff;
 
 struct l2tp_stats {
@@ -39,6 +59,7 @@ struct l2tp_stats {
 	u64			rx_seq_discards;
 	u64			rx_oos_packets;
 	u64			rx_errors;
+	u64			rx_cookie_discards;
 };
 
 struct l2tp_tunnel;
@@ -47,6 +68,7 @@ struct l2tp_tunnel;
  * packets and transmit outgoing ones.
  */
 struct l2tp_session_cfg {
+	enum l2tp_pwtype	pw_type;
 	unsigned		data_seq:2;	/* data sequencing level
 						 * 0 => none, 1 => IP only,
 						 * 2 => all
@@ -60,12 +82,17 @@ struct l2tp_session_cfg {
 						 * control of LNS. */
 	int			debug;		/* bitmask of debug message
 						 * categories */
-	int			offset;		/* offset to payload */
+	u16			offset;		/* offset to payload */
+	u16			l2specific_len;	/* Layer 2 specific length */
+	u16			l2specific_type; /* Layer 2 specific type */
+	u8			cookie[8];	/* optional cookie */
+	int			cookie_len;	/* 0, 4 or 8 bytes */
+	u8			peer_cookie[8];	/* peer's cookie */
+	int			peer_cookie_len; /* 0, 4 or 8 bytes */
 	int			reorder_timeout; /* configured reorder timeout
 						  * (in jiffies) */
 	int			mtu;
 	int			mru;
-	int			hdr_len;
 };
 
 struct l2tp_session {
@@ -76,8 +103,17 @@ struct l2tp_session {
 						 * context */
 	u32			session_id;
 	u32			peer_session_id;
-	u16			nr;		/* session NR state (receive) */
-	u16			ns;		/* session NR state (send) */
+	u8			cookie[8];
+	int			cookie_len;
+	u8			peer_cookie[8];
+	int			peer_cookie_len;
+	u16			offset;		/* offset from end of L2TP header
+						   to beginning of data */
+	u16			l2specific_len;
+	u16			l2specific_type;
+	u16			hdr_len;
+	u32			nr;		/* session NR state (receive) */
+	u32			ns;		/* session NR state (send) */
 	struct sk_buff_head	reorder_q;	/* receive reorder queue */
 	struct hlist_node	hlist;		/* Hash list node */
 	atomic_t		ref_count;
@@ -100,9 +136,11 @@ struct l2tp_session {
 						  * (in jiffies) */
 	int			mtu;
 	int			mru;
-	int			hdr_len;
+	enum l2tp_pwtype	pwtype;
 	struct l2tp_stats	stats;
+	struct hlist_node	global_hlist;	/* Global hash list node */
 
+	int (*build_header)(struct l2tp_session *session, void *buf);
 	void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
 	void (*session_close)(struct l2tp_session *session);
 	void (*ref)(struct l2tp_session *session);
@@ -132,7 +170,6 @@ struct l2tp_tunnel {
 	char			name[20];	/* for logging */
 	int			debug;		/* bitmask of debug message
 						 * categories */
-	int			hdr_len;
 	struct l2tp_stats	stats;
 
 	struct list_head	list;		/* Keep a list of all tunnels */
@@ -178,7 +215,7 @@ out:
 	return tunnel;
 }
 
-extern struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id);
+extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
 extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
 extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
 extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
@@ -187,14 +224,15 @@ extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_i
 extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
 extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 extern void l2tp_session_free(struct l2tp_session *session);
+extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
 extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
 extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
 
-extern void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf);
 extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
 extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
 extern void l2tp_tunnel_destruct(struct sock *sk);
 extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 
 /* Tunnel reference counts. Incremented per session that is added to
  * the tunnel.
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 3ad290dd830a..bee5b1413ec0 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -670,7 +670,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	/* Check that this session doesn't already exist */
 	error = -EEXIST;
-	session = l2tp_session_find(tunnel, sp->pppol2tp.s_session);
+	session = l2tp_session_find(sock_net(sk), tunnel, sp->pppol2tp.s_session);
 	if (session != NULL)
 		goto end;
 
@@ -678,7 +678,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	 * headers.
 	 */
 	cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-	cfg.hdr_len = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
 	cfg.debug = tunnel->debug;
 
 	/* Allocate and initialize a new session context. */
@@ -999,7 +998,7 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
 		if (stats.session_id != 0) {
 			/* resend to session ioctl handler */
 			struct l2tp_session *session =
-				l2tp_session_find(tunnel, stats.session_id);
+				l2tp_session_find(sock_net(sk), tunnel, stats.session_id);
 			if (session != NULL)
 				err = pppol2tp_session_ioctl(session, cmd, arg);
 			else
@@ -1375,6 +1374,8 @@ end:
 
 /*****************************************************************************
  * /proc filesystem for debug
+ * Since the original pppol2tp driver provided /proc/net/pppol2tp for
+ * L2TPv2, we dump only L2TPv2 tunnels and sessions here.
  *****************************************************************************/
 
 static unsigned int pppol2tp_net_id;
@@ -1391,14 +1392,24 @@ struct pppol2tp_seq_data {
 
 static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
 {
-	pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
-	pd->tunnel_idx++;
+	for (;;) {
+		pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
+		pd->tunnel_idx++;
+
+		if (pd->tunnel == NULL)
+			break;
+
+		/* Ignore L2TPv3 tunnels */
+		if (pd->tunnel->version < 3)
+			break;
+	}
 }
 
 static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
 {
 	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
 	pd->session_idx++;
+
 	if (pd->session == NULL) {
 		pd->session_idx = 0;
 		pppol2tp_next_tunnel(net, pd);
-- 
cgit v1.2.3-59-g8ed1b


From e0d4435f93905f517003cfa7328a36ea19788147 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:54 +0000
Subject: l2tp: Update PPP-over-L2TP driver to work over L2TPv3

This patch makes changes to the L2TP PPP code for L2TPv3.

The existing code has some assumptions about the L2TP header which are
broken by L2TPv3. Also the sockaddr_pppol2tp structure of the original
code is too small to support the increased size of the L2TPv3 tunnel
and session id, so a new sockaddr_pppol2tpv3 structure is needed. In
the socket calls, the size of this structure is used to tell if the
operation is for L2TPv2 or L2TPv3.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_pppol2tp.h |  14 ++++++
 include/linux/if_pppox.h    |   9 ++++
 net/l2tp/l2tp_ppp.c         | 120 +++++++++++++++++++++++++++-----------------
 3 files changed, 97 insertions(+), 46 deletions(-)

(limited to 'net')

diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h
index 1a1fb6e5d933..184bc5566207 100644
--- a/include/linux/if_pppol2tp.h
+++ b/include/linux/if_pppol2tp.h
@@ -35,6 +35,20 @@ struct pppol2tp_addr {
 	__u16 d_tunnel, d_session;	/* For sending outgoing packets */
 };
 
+/* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
+ * bits. So we need a different sockaddr structure.
+ */
+struct pppol2tpv3_addr {
+	pid_t	pid;			/* pid that owns the fd.
+					 * 0 => current */
+	int	fd;			/* FD of UDP or IP socket to use */
+
+	struct sockaddr_in addr;	/* IP address and port to send to */
+
+	__u32 s_tunnel, s_session;	/* For matching incoming packets */
+	__u32 d_tunnel, d_session;	/* For sending outgoing packets */
+};
+
 /* Socket options:
  * DEBUG	- bitmask of debug message categories
  * SENDSEQ	- 0 => don't send packets with sequence numbers
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 90b5fae5d714..a6577af0c4e6 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -72,6 +72,15 @@ struct sockaddr_pppol2tp {
 	struct pppol2tp_addr pppol2tp;
 }__attribute__ ((packed));
 
+/* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
+ * bits. So we need a different sockaddr structure.
+ */
+struct sockaddr_pppol2tpv3 {
+	sa_family_t     sa_family;      /* address family, AF_PPPOX */
+	unsigned int    sa_protocol;    /* protocol identifier */
+	struct pppol2tpv3_addr pppol2tp;
+} __attribute__ ((packed));
+
 /*********************************************************************
  *
  * ioctl interface for defining forwarding of connections
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index bee5b1413ec0..e5b531266541 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -291,17 +291,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session)
  * Transmit handling
  ***********************************************************************/
 
-/* Tell how big L2TP headers are for a particular session. This
- * depends on whether sequence numbers are being used.
- */
-static inline int pppol2tp_l2tp_header_len(struct l2tp_session *session)
-{
-	if (session->send_seq)
-		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
-
-	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-}
-
 /* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
  * when a user application does a sendmsg() on the session socket. L2TP and
  * PPP headers must be inserted into the user's data.
@@ -394,7 +383,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	static const u8 ppph[2] = { 0xff, 0x03 };
 	struct sock *sk = (struct sock *) chan->private;
 	struct sock *sk_tun;
-	int hdr_len;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
@@ -417,9 +405,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	if (tunnel == NULL)
 		goto abort_put_sess;
 
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
 	old_headroom = skb_headroom(skb);
 	if (skb_cow_head(skb, sizeof(ppph)))
 		goto abort_put_sess_tun;
@@ -432,7 +417,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	skb->data[0] = ppph[0];
 	skb->data[1] = ppph[1];
 
-	l2tp_xmit_skb(session, skb, hdr_len);
+	l2tp_xmit_skb(session, skb, session->hdr_len);
 
 	sock_put(sk_tun);
 	sock_put(sk);
@@ -615,6 +600,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 {
 	struct sock *sk = sock->sk;
 	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+	struct sockaddr_pppol2tpv3 *sp3 = (struct sockaddr_pppol2tpv3 *) uservaddr;
 	struct pppox_sock *po = pppox_sk(sk);
 	struct l2tp_session *session = NULL;
 	struct l2tp_tunnel *tunnel;
@@ -622,6 +608,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	struct dst_entry *dst;
 	struct l2tp_session_cfg cfg = { 0, };
 	int error = 0;
+	u32 tunnel_id, peer_tunnel_id;
+	u32 session_id, peer_session_id;
+	int ver = 2;
+	int fd;
 
 	lock_sock(sk);
 
@@ -639,21 +629,40 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (sk->sk_user_data)
 		goto end; /* socket is already attached */
 
-	/* Don't bind if s_tunnel is 0 */
+	/* Get params from socket address. Handle L2TPv2 and L2TPv3 */
+	if (sockaddr_len == sizeof(struct sockaddr_pppol2tp)) {
+		fd = sp->pppol2tp.fd;
+		tunnel_id = sp->pppol2tp.s_tunnel;
+		peer_tunnel_id = sp->pppol2tp.d_tunnel;
+		session_id = sp->pppol2tp.s_session;
+		peer_session_id = sp->pppol2tp.d_session;
+	} else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3)) {
+		ver = 3;
+		fd = sp3->pppol2tp.fd;
+		tunnel_id = sp3->pppol2tp.s_tunnel;
+		peer_tunnel_id = sp3->pppol2tp.d_tunnel;
+		session_id = sp3->pppol2tp.s_session;
+		peer_session_id = sp3->pppol2tp.d_session;
+	} else {
+		error = -EINVAL;
+		goto end; /* bad socket address */
+	}
+
+	/* Don't bind if tunnel_id is 0 */
 	error = -EINVAL;
-	if (sp->pppol2tp.s_tunnel == 0)
+	if (tunnel_id == 0)
 		goto end;
 
-	/* Special case: create tunnel context if s_session and
-	 * d_session is 0. Otherwise look up tunnel using supplied
+	/* Special case: create tunnel context if session_id and
+	 * peer_session_id is 0. Otherwise look up tunnel using supplied
 	 * tunnel id.
 	 */
-	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
-		error = l2tp_tunnel_create(sock_net(sk), sp->pppol2tp.fd, 2, sp->pppol2tp.s_tunnel, sp->pppol2tp.d_tunnel, NULL, &tunnel);
+	if ((session_id == 0) && (peer_session_id == 0)) {
+		error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, NULL, &tunnel);
 		if (error < 0)
 			goto end;
 	} else {
-		tunnel = l2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
+		tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
 
 		/* Error if we can't find the tunnel */
 		error = -ENOENT;
@@ -670,20 +679,21 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	/* Check that this session doesn't already exist */
 	error = -EEXIST;
-	session = l2tp_session_find(sock_net(sk), tunnel, sp->pppol2tp.s_session);
+	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
 	if (session != NULL)
 		goto end;
 
-	/* Default MTU must allow space for UDP/L2TP/PPP
-	 * headers.
-	 */
-	cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	/* Default MTU values. */
+	if (cfg.mtu == 0)
+		cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	if (cfg.mru == 0)
+		cfg.mru = cfg.mtu;
 	cfg.debug = tunnel->debug;
 
 	/* Allocate and initialize a new session context. */
 	session = l2tp_session_create(sizeof(struct pppol2tp_session),
-				      tunnel, sp->pppol2tp.s_session,
-				      sp->pppol2tp.d_session, &cfg);
+				      tunnel, session_id,
+				      peer_session_id, &cfg);
 	if (session == NULL) {
 		error = -ENOMEM;
 		goto end;
@@ -756,8 +766,7 @@ end:
 static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 			    int *usockaddr_len, int peer)
 {
-	int len = sizeof(struct sockaddr_pppol2tp);
-	struct sockaddr_pppol2tp sp;
+	int len = 0;
 	int error = 0;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
@@ -783,21 +792,40 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 		goto end_put_sess;
 	}
 
-	memset(&sp, 0, len);
-	sp.sa_family	= AF_PPPOX;
-	sp.sa_protocol	= PX_PROTO_OL2TP;
-	sp.pppol2tp.fd  = tunnel->fd;
-	sp.pppol2tp.pid = pls->owner;
-	sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
-	sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
-	sp.pppol2tp.s_session = session->session_id;
-	sp.pppol2tp.d_session = session->peer_session_id;
 	inet = inet_sk(sk);
-	sp.pppol2tp.addr.sin_family = AF_INET;
-	sp.pppol2tp.addr.sin_port = inet->inet_dport;
-	sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
-
-	memcpy(uaddr, &sp, len);
+	if (tunnel->version == 2) {
+		struct sockaddr_pppol2tp sp;
+		len = sizeof(sp);
+		memset(&sp, 0, len);
+		sp.sa_family	= AF_PPPOX;
+		sp.sa_protocol	= PX_PROTO_OL2TP;
+		sp.pppol2tp.fd  = tunnel->fd;
+		sp.pppol2tp.pid = pls->owner;
+		sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+		sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+		sp.pppol2tp.s_session = session->session_id;
+		sp.pppol2tp.d_session = session->peer_session_id;
+		sp.pppol2tp.addr.sin_family = AF_INET;
+		sp.pppol2tp.addr.sin_port = inet->inet_dport;
+		sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+		memcpy(uaddr, &sp, len);
+	} else if (tunnel->version == 3) {
+		struct sockaddr_pppol2tpv3 sp;
+		len = sizeof(sp);
+		memset(&sp, 0, len);
+		sp.sa_family	= AF_PPPOX;
+		sp.sa_protocol	= PX_PROTO_OL2TP;
+		sp.pppol2tp.fd  = tunnel->fd;
+		sp.pppol2tp.pid = pls->owner;
+		sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+		sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+		sp.pppol2tp.s_session = session->session_id;
+		sp.pppol2tp.d_session = session->peer_session_id;
+		sp.pppol2tp.addr.sin_family = AF_INET;
+		sp.pppol2tp.addr.sin_port = inet->inet_dport;
+		sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+		memcpy(uaddr, &sp, len);
+	}
 
 	*usockaddr_len = len;
 
-- 
cgit v1.2.3-59-g8ed1b


From 0d76751fad7739014485ba5bd388d4f1b4fd4143 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:00 +0000
Subject: l2tp: Add L2TPv3 IP encapsulation (no UDP) support

This patch adds a new L2TPIP socket family and modifies the core to
handle the case where there is no UDP header in the L2TP
packet. L2TP/IP uses IP protocol 115. Since L2TP/UDP and L2TP/IP
packets differ in layout, the datapath packet handling code needs
changes too. Userspace uses an L2TPIP socket instead of a UDP socket
when IP encapsulation is required.

We can't use raw sockets for this because the semantics of raw sockets
don't lend themselves to the socket-per-tunnel model - we need to

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/l2tp.h |  38 +++
 net/l2tp/Kconfig     |  17 ++
 net/l2tp/Makefile    |   1 +
 net/l2tp/l2tp_core.c | 163 ++++++++-----
 net/l2tp/l2tp_core.h |   7 +
 net/l2tp/l2tp_ip.c   | 679 +++++++++++++++++++++++++++++++++++++++++++++++++++
 net/l2tp/l2tp_ppp.c  |   7 +-
 7 files changed, 850 insertions(+), 62 deletions(-)
 create mode 100644 include/linux/l2tp.h
 create mode 100644 net/l2tp/l2tp_ip.c

(limited to 'net')

diff --git a/include/linux/l2tp.h b/include/linux/l2tp.h
new file mode 100644
index 000000000000..deff7bca4e05
--- /dev/null
+++ b/include/linux/l2tp.h
@@ -0,0 +1,38 @@
+/*
+ * L2TP-over-IP socket for L2TPv3.
+ *
+ * Author: James Chapman <jchapman@katalix.com>
+ */
+
+#ifndef _LINUX_L2TP_H_
+#define _LINUX_L2TP_H_
+
+#include <linux/types.h>
+#ifdef __KERNEL__
+#include <linux/socket.h>
+#include <linux/in.h>
+#endif
+
+#define IPPROTO_L2TP		115
+
+/**
+ * struct sockaddr_l2tpip - the sockaddr structure for L2TP-over-IP sockets
+ * @l2tp_family:  address family number AF_L2TPIP.
+ * @l2tp_addr:    protocol specific address information
+ * @l2tp_conn_id: connection id of tunnel
+ */
+struct sockaddr_l2tpip {
+	/* The first fields must match struct sockaddr_in */
+	sa_family_t	l2tp_family;	/* AF_INET */
+	__be16		l2tp_unused;	/* INET port number (unused) */
+	struct in_addr	l2tp_addr;	/* Internet address */
+
+	__u32		l2tp_conn_id;	/* Connection ID of tunnel */
+
+	/* Pad to size of `struct sockaddr'. */
+	unsigned char	__pad[sizeof(struct sockaddr) - sizeof(sa_family_t) -
+			      sizeof(__be16) - sizeof(struct in_addr) -
+			      sizeof(__u32)];
+};
+
+#endif
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index d60758d60478..0a11ccf2d95b 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -51,3 +51,20 @@ config L2TP_V3
 	  If you are connecting to L2TPv3 equipment, or you want to
 	  tunnel raw ethernet frames using L2TP, say Y here. If
 	  unsure, say N.
+
+config L2TP_IP
+	tristate "L2TP IP encapsulation for L2TPv3"
+	depends on L2TP_V3
+	help
+	  Support for L2TP-over-IP socket family.
+
+	  The L2TPv3 protocol defines two possible encapsulations for
+	  L2TP frames, namely UDP and plain IP (without UDP). This
+	  driver provides a new L2TPIP socket family with which
+	  userspace L2TPv3 daemons may create L2TP/IP tunnel sockets
+	  when UDP encapsulation is not required. When L2TP is carried
+	  in IP packets, it used IP protocol number 115, so this port
+	  must be enabled in firewalls.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called l2tp_ip.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index c91f208b1693..ef28b16f7d6a 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
 
 # Build l2tp as modules if L2TP is M
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 0eee1a65f1b1..1739d04367e4 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -36,8 +36,10 @@
 #include <linux/inetdevice.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
+#include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/udp.h>
+#include <linux/l2tp.h>
 #include <linux/hash.h>
 #include <linux/sort.h>
 #include <linux/file.h>
@@ -48,6 +50,7 @@
 #include <net/ip.h>
 #include <net/udp.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
 
 #include <asm/byteorder.h>
 #include <asm/atomic.h>
@@ -849,15 +852,21 @@ static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
 
 static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
 {
+	struct l2tp_tunnel *tunnel = session->tunnel;
 	char *bufp = buf;
 	char *optr = bufp;
-	u16 flags = L2TP_HDR_VER_3;
 
-	/* Setup L2TP header. */
-	*((__be16 *) bufp) = htons(flags);
-	bufp += 2;
-	*((__be16 *) bufp) = 0;
-	bufp += 2;
+	/* Setup L2TP header. The header differs slightly for UDP and
+	 * IP encapsulations. For UDP, there is 4 bytes of flags.
+	 */
+	if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+		u16 flags = L2TP_HDR_VER_3;
+		*((__be16 *) bufp) = htons(flags);
+		bufp += 2;
+		*((__be16 *) bufp) = 0;
+		bufp += 2;
+	}
+
 	*((__be32 *) bufp) = htonl(session->peer_session_id);
 	bufp += 4;
 	if (session->cookie_len) {
@@ -902,10 +911,11 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
 
 	if (session->debug & L2TP_MSG_DATA) {
 		int i;
-		unsigned char *datap = skb->data + sizeof(struct udphdr);
+		int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+		unsigned char *datap = skb->data + uhlen;
 
 		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < (len - sizeof(struct udphdr)); i++) {
+		for (i = 0; i < (len - uhlen); i++) {
 			printk(" %02X", *datap++);
 			if (i == 31) {
 				printk(" ...");
@@ -956,21 +966,23 @@ static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len)
 {
 	int data_len = skb->len;
-	struct sock *sk = session->tunnel->sock;
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct sock *sk = tunnel->sock;
 	struct udphdr *uh;
-	unsigned int udp_len;
 	struct inet_sock *inet;
 	__wsum csum;
 	int old_headroom;
 	int new_headroom;
 	int headroom;
+	int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+	int udp_len;
 
 	/* Check that there's enough headroom in the skb to insert IP,
 	 * UDP and L2TP headers. If not enough, expand it to
 	 * make room. Adjust truesize.
 	 */
 	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
-		sizeof(struct udphdr) + hdr_len;
+		uhlen + hdr_len;
 	old_headroom = skb_headroom(skb);
 	if (skb_cow_head(skb, headroom))
 		goto abort;
@@ -981,18 +993,8 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 	/* Setup L2TP header */
 	session->build_header(session, __skb_push(skb, hdr_len));
-	udp_len = sizeof(struct udphdr) + hdr_len + data_len;
-
-	/* Setup UDP header */
-	inet = inet_sk(sk);
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
 
+	/* Reset skb netfilter state */
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
@@ -1001,29 +1003,48 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	/* Get routing info from the tunnel socket */
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
-	l2tp_skb_set_owner_w(skb, sk);
 
-	/* Calculate UDP checksum if configured to do so */
-	if (sk->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
-		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
+	switch (tunnel->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		/* Setup UDP header */
+		inet = inet_sk(sk);
+		__skb_push(skb, sizeof(*uh));
+		skb_reset_transport_header(skb);
+		uh = udp_hdr(skb);
+		uh->source = inet->inet_sport;
+		uh->dest = inet->inet_dport;
+		udp_len = uhlen + hdr_len + data_len;
+		uh->len = htons(udp_len);
+		uh->check = 0;
+
+		/* Calculate UDP checksum if configured to do so */
+		if (sk->sk_no_check == UDP_CSUM_NOXMIT)
+			skb->ip_summed = CHECKSUM_NONE;
+		else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
+			 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+			skb->ip_summed = CHECKSUM_COMPLETE;
+			csum = skb_checksum(skb, 0, udp_len, 0);
+			uh->check = csum_tcpudp_magic(inet->inet_saddr,
+						      inet->inet_daddr,
+						      udp_len, IPPROTO_UDP, csum);
+			if (uh->check == 0)
+				uh->check = CSUM_MANGLED_0;
+		} else {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_transport_header(skb) - skb->head;
+			skb->csum_offset = offsetof(struct udphdr, check);
+			uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+						       inet->inet_daddr,
+						       udp_len, IPPROTO_UDP, 0);
+		}
+		break;
+
+	case L2TP_ENCAPTYPE_IP:
+		break;
 	}
 
+	l2tp_skb_set_owner_w(skb, sk);
+
 	l2tp_xmit_core(session, skb, data_len);
 
 abort:
@@ -1053,9 +1074,15 @@ void l2tp_tunnel_destruct(struct sock *sk)
 	/* Close all sessions */
 	l2tp_tunnel_closeall(tunnel);
 
-	/* No longer an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = 0;
-	(udp_sk(sk))->encap_rcv = NULL;
+	switch (tunnel->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		/* No longer an encapsulation socket. See net/ipv4/udp.c */
+		(udp_sk(sk))->encap_type = 0;
+		(udp_sk(sk))->encap_rcv = NULL;
+		break;
+	case L2TP_ENCAPTYPE_IP:
+		break;
+	}
 
 	/* Remove hooks into tunnel socket */
 	tunnel->sock = NULL;
@@ -1168,6 +1195,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	struct socket *sock = NULL;
 	struct sock *sk = NULL;
 	struct l2tp_net *pn;
+	enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
 
 	/* Get the tunnel socket from the fd, which was opened by
 	 * the userspace L2TP daemon.
@@ -1182,18 +1210,27 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 
 	sk = sock->sk;
 
+	if (cfg != NULL)
+		encap = cfg->encap;
+
 	/* Quick sanity checks */
-	err = -EPROTONOSUPPORT;
-	if (sk->sk_protocol != IPPROTO_UDP) {
-		printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
-		goto err;
-	}
-	err = -EAFNOSUPPORT;
-	if (sock->ops->family != AF_INET) {
-		printk(KERN_ERR "tunl %hu: fd %d wrong family, got %d, expected %d\n",
-		       tunnel_id, fd, sock->ops->family, AF_INET);
-		goto err;
+	switch (encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		err = -EPROTONOSUPPORT;
+		if (sk->sk_protocol != IPPROTO_UDP) {
+			printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+			       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+			goto err;
+		}
+		break;
+	case L2TP_ENCAPTYPE_IP:
+		err = -EPROTONOSUPPORT;
+		if (sk->sk_protocol != IPPROTO_L2TP) {
+			printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+			       tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
+			goto err;
+		}
+		break;
 	}
 
 	/* Check if this socket has already been prepped */
@@ -1223,12 +1260,16 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	tunnel->l2tp_net = net;
 	pn = l2tp_pernet(net);
 
-	if (cfg)
+	if (cfg != NULL)
 		tunnel->debug = cfg->debug;
 
 	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
-	udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+	tunnel->encap = encap;
+	if (encap == L2TP_ENCAPTYPE_UDP) {
+		/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+		udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
+		udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+	}
 
 	sk->sk_user_data = tunnel;
 
@@ -1318,7 +1359,9 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
 		if (session->send_seq)
 			session->hdr_len += 4;
 	} else {
-		session->hdr_len = 8 + session->cookie_len + session->l2specific_len + session->offset;
+		session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset;
+		if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
+			session->hdr_len += 4;
 	}
 
 }
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 5c53eb2a8ad9..d2395984645e 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -48,6 +48,11 @@ enum l2tp_l2spec_type {
 	L2TP_L2SPECTYPE_DEFAULT,
 };
 
+enum l2tp_encap_type {
+	L2TP_ENCAPTYPE_UDP,
+	L2TP_ENCAPTYPE_IP,
+};
+
 struct sk_buff;
 
 struct l2tp_stats {
@@ -155,6 +160,7 @@ struct l2tp_session {
 struct l2tp_tunnel_cfg {
 	int			debug;		/* bitmask of debug message
 						 * categories */
+	enum l2tp_encap_type	encap;
 };
 
 struct l2tp_tunnel {
@@ -170,6 +176,7 @@ struct l2tp_tunnel {
 	char			name[20];	/* for logging */
 	int			debug;		/* bitmask of debug message
 						 * categories */
+	enum l2tp_encap_type	encap;
 	struct l2tp_stats	stats;
 
 	struct list_head	list;		/* Keep a list of all tunnels */
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
new file mode 100644
index 000000000000..75bf784ba18d
--- /dev/null
+++ b/net/l2tp/l2tp_ip.c
@@ -0,0 +1,679 @@
+/*
+ * L2TPv3 IP encapsulation support
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/icmp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/socket.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+#include "l2tp_core.h"
+
+struct l2tp_ip_sock {
+	/* inet_sock has to be the first member of l2tp_ip_sock */
+	struct inet_sock	inet;
+
+	__u32			conn_id;
+	__u32			peer_conn_id;
+
+	__u64			tx_packets;
+	__u64			tx_bytes;
+	__u64			tx_errors;
+	__u64			rx_packets;
+	__u64			rx_bytes;
+	__u64			rx_errors;
+};
+
+static DEFINE_RWLOCK(l2tp_ip_lock);
+static struct hlist_head l2tp_ip_table;
+static struct hlist_head l2tp_ip_bind_table;
+
+static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
+{
+	return (struct l2tp_ip_sock *)sk;
+}
+
+static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+{
+	struct hlist_node *node;
+	struct sock *sk;
+
+	sk_for_each_bound(sk, node, &l2tp_ip_bind_table) {
+		struct inet_sock *inet = inet_sk(sk);
+		struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
+
+		if (l2tp == NULL)
+			continue;
+
+		if ((l2tp->conn_id == tunnel_id) &&
+#ifdef CONFIG_NET_NS
+		    (sk->sk_net == net) &&
+#endif
+		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+			goto found;
+	}
+
+	sk = NULL;
+found:
+	return sk;
+}
+
+static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+{
+	struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id);
+	if (sk)
+		sock_hold(sk);
+
+	return sk;
+}
+
+/* When processing receive frames, there are two cases to
+ * consider. Data frames consist of a non-zero session-id and an
+ * optional cookie. Control frames consist of a regular L2TP header
+ * preceded by 32-bits of zeros.
+ *
+ * L2TPv3 Session Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Session ID                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Cookie (optional, maximum 64 bits)...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *                                                                 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 Control Message Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      (32 bits of zeros)                       |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |T|L|x|x|S|x|x|x|x|x|x|x|  Ver  |             Length            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     Control Connection ID                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Ns              |               Nr              |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * All control frames are passed to userspace.
+ */
+static int l2tp_ip_recv(struct sk_buff *skb)
+{
+	struct sock *sk;
+	u32 session_id;
+	u32 tunnel_id;
+	unsigned char *ptr, *optr;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel = NULL;
+	int length;
+	int offset;
+
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
+	if (!pskb_may_pull(skb, 4))
+		goto discard;
+
+	session_id = ntohl(*((__be32 *) ptr));
+	ptr += 4;
+
+	/* RFC3931: L2TP/IP packets have the first 4 bytes containing
+	 * the session_id. If it is 0, the packet is a L2TP control
+	 * frame and the session_id value can be discarded.
+	 */
+	if (session_id == 0) {
+		__skb_pull(skb, 4);
+		goto pass_up;
+	}
+
+	/* Ok, this is a data packet. Lookup the session. */
+	session = l2tp_session_find(&init_net, NULL, session_id);
+	if (session == NULL)
+		goto discard;
+
+	tunnel = session->tunnel;
+	if (tunnel == NULL)
+		goto discard;
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & L2TP_MSG_DATA) {
+		length = min(32u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto discard;
+
+		printk(KERN_DEBUG "%s: ip recv: ", tunnel->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+
+	return 0;
+
+pass_up:
+	/* Get the tunnel_id from the L2TP header */
+	if (!pskb_may_pull(skb, 12))
+		goto discard;
+
+	if ((skb->data[0] & 0xc0) != 0xc0)
+		goto discard;
+
+	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+	tunnel = l2tp_tunnel_find(&init_net, tunnel_id);
+	if (tunnel != NULL)
+		sk = tunnel->sock;
+	else {
+		struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
+
+		read_lock_bh(&l2tp_ip_lock);
+		sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id);
+		read_unlock_bh(&l2tp_ip_lock);
+	}
+
+	if (sk == NULL)
+		goto discard;
+
+	sock_hold(sk);
+
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto discard_put;
+
+	nf_reset(skb);
+
+	return sk_receive_skb(sk, skb, 1);
+
+discard_put:
+	sock_put(sk);
+
+discard:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int l2tp_ip_open(struct sock *sk)
+{
+	/* Prevent autobind. We don't have ports. */
+	inet_sk(sk)->inet_num = IPPROTO_L2TP;
+
+	write_lock_bh(&l2tp_ip_lock);
+	sk_add_node(sk, &l2tp_ip_table);
+	write_unlock_bh(&l2tp_ip_lock);
+
+	return 0;
+}
+
+static void l2tp_ip_close(struct sock *sk, long timeout)
+{
+	write_lock_bh(&l2tp_ip_lock);
+	hlist_del_init(&sk->sk_bind_node);
+	hlist_del_init(&sk->sk_node);
+	write_unlock_bh(&l2tp_ip_lock);
+	sk_common_release(sk);
+}
+
+static void l2tp_ip_destroy_sock(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
+		kfree_skb(skb);
+
+	sk_refcnt_debug_dec(sk);
+}
+
+static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
+	int ret = -EINVAL;
+	int chk_addr_ret;
+
+	ret = -EADDRINUSE;
+	read_lock_bh(&l2tp_ip_lock);
+	if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id))
+		goto out_in_use;
+
+	read_unlock_bh(&l2tp_ip_lock);
+
+	lock_sock(sk);
+	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
+		goto out;
+
+	chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr);
+	ret = -EADDRNOTAVAIL;
+	if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
+	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
+		goto out;
+
+	inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr;
+	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+		inet->inet_saddr = 0;  /* Use device */
+	sk_dst_reset(sk);
+
+	l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
+
+	write_lock_bh(&l2tp_ip_lock);
+	sk_add_bind_node(sk, &l2tp_ip_bind_table);
+	sk_del_node_init(sk);
+	write_unlock_bh(&l2tp_ip_lock);
+	ret = 0;
+out:
+	release_sock(sk);
+
+	return ret;
+
+out_in_use:
+	read_unlock_bh(&l2tp_ip_lock);
+
+	return ret;
+}
+
+static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	int rc;
+	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
+	struct rtable *rt;
+	__be32 saddr;
+	int oif;
+
+	rc = -EINVAL;
+	if (addr_len < sizeof(*lsa))
+		goto out;
+
+	rc = -EAFNOSUPPORT;
+	if (lsa->l2tp_family != AF_INET)
+		goto out;
+
+	sk_dst_reset(sk);
+
+	oif = sk->sk_bound_dev_if;
+	saddr = inet->inet_saddr;
+
+	rc = -EINVAL;
+	if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
+		goto out;
+
+	rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr,
+			      RT_CONN_FLAGS(sk), oif,
+			      IPPROTO_L2TP,
+			      0, 0, sk, 1);
+	if (rc) {
+		if (rc == -ENETUNREACH)
+			IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
+		goto out;
+	}
+
+	rc = -ENETUNREACH;
+	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+		ip_rt_put(rt);
+		goto out;
+	}
+
+	l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
+
+	if (!inet->inet_saddr)
+		inet->inet_saddr = rt->rt_src;
+	if (!inet->inet_rcv_saddr)
+		inet->inet_rcv_saddr = rt->rt_src;
+	inet->inet_daddr = rt->rt_dst;
+	sk->sk_state = TCP_ESTABLISHED;
+	inet->inet_id = jiffies;
+
+	sk_dst_set(sk, &rt->u.dst);
+
+	write_lock_bh(&l2tp_ip_lock);
+	hlist_del_init(&sk->sk_bind_node);
+	sk_add_bind_node(sk, &l2tp_ip_bind_table);
+	write_unlock_bh(&l2tp_ip_lock);
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
+			   int *uaddr_len, int peer)
+{
+	struct sock *sk		= sock->sk;
+	struct inet_sock *inet	= inet_sk(sk);
+	struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
+	struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
+
+	memset(lsa, 0, sizeof(*lsa));
+	lsa->l2tp_family = AF_INET;
+	if (peer) {
+		if (!inet->inet_dport)
+			return -ENOTCONN;
+		lsa->l2tp_conn_id = lsk->peer_conn_id;
+		lsa->l2tp_addr.s_addr = inet->inet_daddr;
+	} else {
+		__be32 addr = inet->inet_rcv_saddr;
+		if (!addr)
+			addr = inet->inet_saddr;
+		lsa->l2tp_conn_id = lsk->conn_id;
+		lsa->l2tp_addr.s_addr = addr;
+	}
+	*uaddr_len = sizeof(*lsa);
+	return 0;
+}
+
+static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
+{
+	int rc;
+
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto drop;
+
+	nf_reset(skb);
+
+	/* Charge it to the socket, dropping if the queue is full. */
+	rc = sock_queue_rcv_skb(sk, skb);
+	if (rc < 0)
+		goto drop;
+
+	return 0;
+
+drop:
+	IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS);
+	kfree_skb(skb);
+	return -1;
+}
+
+/* Userspace will call sendmsg() on the tunnel socket to send L2TP
+ * control frames.
+ */
+static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len)
+{
+	struct sk_buff *skb;
+	int rc;
+	struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk);
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_options *opt = inet->opt;
+	struct rtable *rt = NULL;
+	int connected = 0;
+	__be32 daddr;
+
+	if (sock_flag(sk, SOCK_DEAD))
+		return -ENOTCONN;
+
+	/* Get and verify the address. */
+	if (msg->msg_name) {
+		struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name;
+		if (msg->msg_namelen < sizeof(*lip))
+			return -EINVAL;
+
+		if (lip->l2tp_family != AF_INET) {
+			if (lip->l2tp_family != AF_UNSPEC)
+				return -EAFNOSUPPORT;
+		}
+
+		daddr = lip->l2tp_addr.s_addr;
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -EDESTADDRREQ;
+
+		daddr = inet->inet_daddr;
+		connected = 1;
+	}
+
+	/* Allocate a socket buffer */
+	rc = -ENOMEM;
+	skb = sock_wmalloc(sk, 2 + NET_SKB_PAD + sizeof(struct iphdr) +
+			   4 + len, 0, GFP_KERNEL);
+	if (!skb)
+		goto error;
+
+	/* Reserve space for headers, putting IP header on 4-byte boundary. */
+	skb_reserve(skb, 2 + NET_SKB_PAD);
+	skb_reset_network_header(skb);
+	skb_reserve(skb, sizeof(struct iphdr));
+	skb_reset_transport_header(skb);
+
+	/* Insert 0 session_id */
+	*((__be32 *) skb_put(skb, 4)) = 0;
+
+	/* Copy user data into skb */
+	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	if (rc < 0) {
+		kfree_skb(skb);
+		goto error;
+	}
+
+	if (connected)
+		rt = (struct rtable *) __sk_dst_check(sk, 0);
+
+	if (rt == NULL) {
+		/* Use correct destination address if we have options. */
+		if (opt && opt->srr)
+			daddr = opt->faddr;
+
+		{
+			struct flowi fl = { .oif = sk->sk_bound_dev_if,
+					    .nl_u = { .ip4_u = {
+							.daddr = daddr,
+							.saddr = inet->inet_saddr,
+							.tos = RT_CONN_FLAGS(sk) } },
+					    .proto = sk->sk_protocol,
+					    .flags = inet_sk_flowi_flags(sk),
+					    .uli_u = { .ports = {
+							 .sport = inet->inet_sport,
+							 .dport = inet->inet_dport } } };
+
+			/* If this fails, retransmit mechanism of transport layer will
+			 * keep trying until route appears or the connection times
+			 * itself out.
+			 */
+			security_sk_classify_flow(sk, &fl);
+			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
+				goto no_route;
+		}
+		sk_setup_caps(sk, &rt->u.dst);
+	}
+	skb_dst_set(skb, dst_clone(&rt->u.dst));
+
+	/* Queue the packet to IP for output */
+	rc = ip_queue_xmit(skb, 0);
+
+error:
+	/* Update stats */
+	if (rc >= 0) {
+		lsa->tx_packets++;
+		lsa->tx_bytes += len;
+		rc = len;
+	} else {
+		lsa->tx_errors++;
+	}
+
+	return rc;
+
+no_route:
+	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+	kfree_skb(skb);
+	return -EHOSTUNREACH;
+}
+
+static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+			   size_t len, int noblock, int flags, int *addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
+	size_t copied = 0;
+	int err = -EOPNOTSUPP;
+	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+	struct sk_buff *skb;
+
+	if (flags & MSG_OOB)
+		goto out;
+
+	if (addr_len)
+		*addr_len = sizeof(*sin);
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto done;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	/* Copy the address. */
+	if (sin) {
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
+		sin->sin_port = 0;
+		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+	if (inet->cmsg_flags)
+		ip_cmsg_recv(msg, skb);
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+done:
+	skb_free_datagram(sk, skb);
+out:
+	if (err) {
+		lsk->rx_errors++;
+		return err;
+	}
+
+	lsk->rx_packets++;
+	lsk->rx_bytes += copied;
+
+	return copied;
+}
+
+struct proto l2tp_ip_prot = {
+	.name		   = "L2TP/IP",
+	.owner		   = THIS_MODULE,
+	.init		   = l2tp_ip_open,
+	.close		   = l2tp_ip_close,
+	.bind		   = l2tp_ip_bind,
+	.connect	   = l2tp_ip_connect,
+	.disconnect	   = udp_disconnect,
+	.ioctl		   = udp_ioctl,
+	.destroy	   = l2tp_ip_destroy_sock,
+	.setsockopt	   = ip_setsockopt,
+	.getsockopt	   = ip_getsockopt,
+	.sendmsg	   = l2tp_ip_sendmsg,
+	.recvmsg	   = l2tp_ip_recvmsg,
+	.backlog_rcv	   = l2tp_ip_backlog_recv,
+	.hash		   = inet_hash,
+	.unhash		   = inet_unhash,
+	.obj_size	   = sizeof(struct l2tp_ip_sock),
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_ip_setsockopt,
+	.compat_getsockopt = compat_ip_getsockopt,
+#endif
+};
+
+static const struct proto_ops l2tp_ip_ops = {
+	.family		   = PF_INET,
+	.owner		   = THIS_MODULE,
+	.release	   = inet_release,
+	.bind		   = inet_bind,
+	.connect	   = inet_dgram_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = sock_no_accept,
+	.getname	   = l2tp_ip_getname,
+	.poll		   = datagram_poll,
+	.ioctl		   = inet_ioctl,
+	.listen		   = sock_no_listen,
+	.shutdown	   = inet_shutdown,
+	.setsockopt	   = sock_common_setsockopt,
+	.getsockopt	   = sock_common_getsockopt,
+	.sendmsg	   = inet_sendmsg,
+	.recvmsg	   = sock_common_recvmsg,
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+static struct inet_protosw l2tp_ip_protosw = {
+	.type		= SOCK_DGRAM,
+	.protocol	= IPPROTO_L2TP,
+	.prot		= &l2tp_ip_prot,
+	.ops		= &l2tp_ip_ops,
+	.no_check	= 0,
+};
+
+static struct net_protocol l2tp_ip_protocol __read_mostly = {
+	.handler	= l2tp_ip_recv,
+};
+
+static int __init l2tp_ip_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "L2TP IP encapsulation support (L2TPv3)\n");
+
+	err = proto_register(&l2tp_ip_prot, 1);
+	if (err != 0)
+		goto out;
+
+	err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
+	if (err)
+		goto out1;
+
+	inet_register_protosw(&l2tp_ip_protosw);
+	return 0;
+
+out1:
+	proto_unregister(&l2tp_ip_prot);
+out:
+	return err;
+}
+
+static void __exit l2tp_ip_exit(void)
+{
+	inet_unregister_protosw(&l2tp_ip_protosw);
+	inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
+	proto_unregister(&l2tp_ip_prot);
+}
+
+module_init(l2tp_ip_init);
+module_exit(l2tp_ip_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP over IP");
+MODULE_VERSION("1.0");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index e5b531266541..63fc62baeeb9 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -305,6 +305,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
+	int uhlen;
 
 	error = -ENOTCONN;
 	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -321,10 +322,12 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	if (tunnel == NULL)
 		goto error_put_sess;
 
+	uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+
 	/* Allocate a socket buffer */
 	error = -ENOMEM;
 	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
-			   sizeof(struct udphdr) + session->hdr_len +
+			   uhlen + session->hdr_len +
 			   sizeof(ppph) + total_len,
 			   0, GFP_KERNEL);
 	if (!skb)
@@ -335,7 +338,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	skb_reset_network_header(skb);
 	skb_reserve(skb, sizeof(struct iphdr));
 	skb_reset_transport_header(skb);
-	skb_reserve(skb, sizeof(struct udphdr));
+	skb_reserve(skb, uhlen);
 
 	/* Add PPP header */
 	skb->data[0] = ppph[0];
-- 
cgit v1.2.3-59-g8ed1b


From f408e0ce40270559ef80f231843c93baa9947bc5 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:05 +0000
Subject: netlink: Export genl_lock() API for use by modules

This lets kernel modules which use genl netlink APIs serialize netlink
processing.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genetlink.h | 8 ++++++++
 net/netlink/genetlink.c   | 6 ++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index b834ef6d59fa..61549b26ad6f 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -80,4 +80,12 @@ enum {
 
 #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
 
+#ifdef __KERNEL__
+
+/* All generic netlink requests are serialized by a global lock.  */
+extern void genl_lock(void);
+extern void genl_unlock(void);
+
+#endif /* __KERNEL__ */
+
 #endif	/* __LINUX_GENERIC_NETLINK_H */
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a4b6e148c5de..a28fda7420d9 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -20,15 +20,17 @@
 
 static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
 
-static inline void genl_lock(void)
+void genl_lock(void)
 {
 	mutex_lock(&genl_mutex);
 }
+EXPORT_SYMBOL(genl_lock);
 
-static inline void genl_unlock(void)
+void genl_unlock(void)
 {
 	mutex_unlock(&genl_mutex);
 }
+EXPORT_SYMBOL(genl_unlock);
 
 #define GENL_FAM_TAB_SIZE	16
 #define GENL_FAM_TAB_MASK	(GENL_FAM_TAB_SIZE - 1)
-- 
cgit v1.2.3-59-g8ed1b


From 309795f4bec2d69cd507a631f82065c2198a0825 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:10 +0000
Subject: l2tp: Add netlink control API for L2TP

In L2TPv3, we need to create/delete/modify/query L2TP tunnel and
session contexts. The number of parameters is significant. So let's
use netlink. Userspace uses this API to control L2TP tunnel/session
contexts in the kernel.

The previous pppol2tp driver was managed using [gs]etsockopt(). This
API is retained for backwards compatibility. Unlike L2TPv2 which
carries only PPP frames, L2TPv3 can carry raw ethernet frames or other
frame types and these do not always have an associated socket
family. Therefore, we need a way to use L2TP sessions that doesn't
require a socket type for each supported frame type. Hence netlink is
used.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/l2tp.h    | 125 ++++++++
 net/l2tp/Makefile       |   1 +
 net/l2tp/l2tp_core.c    |  61 +++-
 net/l2tp/l2tp_core.h    |  34 +-
 net/l2tp/l2tp_netlink.c | 830 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/l2tp/l2tp_ppp.c     | 162 ++++++++--
 6 files changed, 1169 insertions(+), 44 deletions(-)
 create mode 100644 net/l2tp/l2tp_netlink.c

(limited to 'net')

diff --git a/include/linux/l2tp.h b/include/linux/l2tp.h
index deff7bca4e05..4bdb31df8e72 100644
--- a/include/linux/l2tp.h
+++ b/include/linux/l2tp.h
@@ -11,6 +11,8 @@
 #ifdef __KERNEL__
 #include <linux/socket.h>
 #include <linux/in.h>
+#else
+#include <netinet/in.h>
 #endif
 
 #define IPPROTO_L2TP		115
@@ -21,6 +23,7 @@
  * @l2tp_addr:    protocol specific address information
  * @l2tp_conn_id: connection id of tunnel
  */
+#define __SOCK_SIZE__	16		/* sizeof(struct sockaddr)	*/
 struct sockaddr_l2tpip {
 	/* The first fields must match struct sockaddr_in */
 	sa_family_t	l2tp_family;	/* AF_INET */
@@ -35,4 +38,126 @@ struct sockaddr_l2tpip {
 			      sizeof(__u32)];
 };
 
+/*****************************************************************************
+ *  NETLINK_GENERIC netlink family.
+ *****************************************************************************/
+
+/*
+ * Commands.
+ * Valid TLVs of each command are:-
+ * TUNNEL_CREATE	- CONN_ID, pw_type, netns, ifname, ipinfo, udpinfo, udpcsum, vlanid
+ * TUNNEL_DELETE	- CONN_ID
+ * TUNNEL_MODIFY	- CONN_ID, udpcsum
+ * TUNNEL_GETSTATS	- CONN_ID, (stats)
+ * TUNNEL_GET		- CONN_ID, (...)
+ * SESSION_CREATE	- SESSION_ID, PW_TYPE, offset, data_seq, cookie, peer_cookie, offset, l2spec
+ * SESSION_DELETE	- SESSION_ID
+ * SESSION_MODIFY	- SESSION_ID, data_seq
+ * SESSION_GET		- SESSION_ID, (...)
+ * SESSION_GETSTATS	- SESSION_ID, (stats)
+ *
+ */
+enum {
+	L2TP_CMD_NOOP,
+	L2TP_CMD_TUNNEL_CREATE,
+	L2TP_CMD_TUNNEL_DELETE,
+	L2TP_CMD_TUNNEL_MODIFY,
+	L2TP_CMD_TUNNEL_GET,
+	L2TP_CMD_SESSION_CREATE,
+	L2TP_CMD_SESSION_DELETE,
+	L2TP_CMD_SESSION_MODIFY,
+	L2TP_CMD_SESSION_GET,
+	__L2TP_CMD_MAX,
+};
+
+#define L2TP_CMD_MAX			(__L2TP_CMD_MAX - 1)
+
+/*
+ * ATTR types defined for L2TP
+ */
+enum {
+	L2TP_ATTR_NONE,			/* no data */
+	L2TP_ATTR_PW_TYPE,		/* u16, enum l2tp_pwtype */
+	L2TP_ATTR_ENCAP_TYPE,		/* u16, enum l2tp_encap_type */
+	L2TP_ATTR_OFFSET,		/* u16 */
+	L2TP_ATTR_DATA_SEQ,		/* u16 */
+	L2TP_ATTR_L2SPEC_TYPE,		/* u8, enum l2tp_l2spec_type */
+	L2TP_ATTR_L2SPEC_LEN,		/* u8, enum l2tp_l2spec_type */
+	L2TP_ATTR_PROTO_VERSION,	/* u8 */
+	L2TP_ATTR_IFNAME,		/* string */
+	L2TP_ATTR_CONN_ID,		/* u32 */
+	L2TP_ATTR_PEER_CONN_ID,		/* u32 */
+	L2TP_ATTR_SESSION_ID,		/* u32 */
+	L2TP_ATTR_PEER_SESSION_ID,	/* u32 */
+	L2TP_ATTR_UDP_CSUM,		/* u8 */
+	L2TP_ATTR_VLAN_ID,		/* u16 */
+	L2TP_ATTR_COOKIE,		/* 0, 4 or 8 bytes */
+	L2TP_ATTR_PEER_COOKIE,		/* 0, 4 or 8 bytes */
+	L2TP_ATTR_DEBUG,		/* u32 */
+	L2TP_ATTR_RECV_SEQ,		/* u8 */
+	L2TP_ATTR_SEND_SEQ,		/* u8 */
+	L2TP_ATTR_LNS_MODE,		/* u8 */
+	L2TP_ATTR_USING_IPSEC,		/* u8 */
+	L2TP_ATTR_RECV_TIMEOUT,		/* msec */
+	L2TP_ATTR_FD,			/* int */
+	L2TP_ATTR_IP_SADDR,		/* u32 */
+	L2TP_ATTR_IP_DADDR,		/* u32 */
+	L2TP_ATTR_UDP_SPORT,		/* u16 */
+	L2TP_ATTR_UDP_DPORT,		/* u16 */
+	L2TP_ATTR_MTU,			/* u16 */
+	L2TP_ATTR_MRU,			/* u16 */
+	L2TP_ATTR_STATS,		/* nested */
+	__L2TP_ATTR_MAX,
+};
+
+#define L2TP_ATTR_MAX			(__L2TP_ATTR_MAX - 1)
+
+/* Nested in L2TP_ATTR_STATS */
+enum {
+	L2TP_ATTR_STATS_NONE,		/* no data */
+	L2TP_ATTR_TX_PACKETS,		/* u64 */
+	L2TP_ATTR_TX_BYTES,		/* u64 */
+	L2TP_ATTR_TX_ERRORS,		/* u64 */
+	L2TP_ATTR_RX_PACKETS,		/* u64 */
+	L2TP_ATTR_RX_BYTES,		/* u64 */
+	L2TP_ATTR_RX_SEQ_DISCARDS,	/* u64 */
+	L2TP_ATTR_RX_OOS_PACKETS,	/* u64 */
+	L2TP_ATTR_RX_ERRORS,		/* u64 */
+	__L2TP_ATTR_STATS_MAX,
+};
+
+#define L2TP_ATTR_STATS_MAX		(__L2TP_ATTR_STATS_MAX - 1)
+
+enum l2tp_pwtype {
+	L2TP_PWTYPE_NONE = 0x0000,
+	L2TP_PWTYPE_ETH_VLAN = 0x0004,
+	L2TP_PWTYPE_ETH = 0x0005,
+	L2TP_PWTYPE_PPP = 0x0007,
+	L2TP_PWTYPE_PPP_AC = 0x0008,
+	L2TP_PWTYPE_IP = 0x000b,
+	__L2TP_PWTYPE_MAX
+};
+
+enum l2tp_l2spec_type {
+	L2TP_L2SPECTYPE_NONE,
+	L2TP_L2SPECTYPE_DEFAULT,
+};
+
+enum l2tp_encap_type {
+	L2TP_ENCAPTYPE_UDP,
+	L2TP_ENCAPTYPE_IP,
+};
+
+enum l2tp_seqmode {
+	L2TP_SEQ_NONE = 0,
+	L2TP_SEQ_IP = 1,
+	L2TP_SEQ_ALL = 2,
+};
+
+/*
+ * NETLINK_GENERIC related info
+ */
+#define L2TP_GENL_NAME		"l2tp"
+#define L2TP_GENL_VERSION	0x1
+
 #endif
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index ef28b16f7d6a..2c4a14b673ab 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
 # Build l2tp as modules if L2TP is M
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1739d04367e4..fbd1f2119fe9 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -49,6 +49,7 @@
 #include <net/dst.h>
 #include <net/ip.h>
 #include <net/udp.h>
+#include <net/inet_common.h>
 #include <net/xfrm.h>
 #include <net/protocol.h>
 
@@ -214,6 +215,32 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 }
 EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
 
+/* Lookup a session by interface name.
+ * This is very inefficient but is only used by management interfaces.
+ */
+struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+{
+	struct l2tp_net *pn = l2tp_pernet(net);
+	int hash;
+	struct hlist_node *walk;
+	struct l2tp_session *session;
+
+	read_lock_bh(&pn->l2tp_session_hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
+		hlist_for_each_entry(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
+			if (!strcmp(session->ifname, ifname)) {
+				read_unlock_bh(&pn->l2tp_session_hlist_lock);
+				return session;
+			}
+		}
+	}
+
+	read_unlock_bh(&pn->l2tp_session_hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+
 /* Lookup a tunnel by id
  */
 struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
@@ -758,7 +785,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 
 	/* Find the session context */
 	session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
-	if (!session) {
+	if (!session || !session->recv_skb) {
 		/* Not found? Pass to userspace to deal with */
 		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
 		       "%s: no session found (%u/%u). Passing up.\n",
@@ -1305,6 +1332,23 @@ err:
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
 
+/* This function is used by the netlink TUNNEL_DELETE command.
+ */
+int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+{
+	int err = 0;
+
+	/* Force the tunnel socket to close. This will eventually
+	 * cause the tunnel to be deleted via the normal socket close
+	 * mechanisms when userspace closes the tunnel socket.
+	 */
+	if ((tunnel->sock != NULL) && (tunnel->sock->sk_socket != NULL))
+		err = inet_shutdown(tunnel->sock->sk_socket, 2);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
+
 /* Really kill the session.
  */
 void l2tp_session_free(struct l2tp_session *session)
@@ -1349,6 +1393,21 @@ void l2tp_session_free(struct l2tp_session *session)
 }
 EXPORT_SYMBOL_GPL(l2tp_session_free);
 
+/* This function is used by the netlink SESSION_DELETE command and by
+   pseudowire modules.
+ */
+int l2tp_session_delete(struct l2tp_session *session)
+{
+	if (session->session_close != NULL)
+		(*session->session_close)(session);
+
+	l2tp_session_dec_refcount(session);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_delete);
+
+
 /* We come here whenever a session's send_seq, cookie_len or
  * l2specific_len parameters are set.
  */
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index d2395984645e..2974d9ade167 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -33,26 +33,6 @@ enum {
 	L2TP_MSG_DATA		= (1 << 3),	/* data packets */
 };
 
-enum l2tp_pwtype {
-	L2TP_PWTYPE_NONE = 0x0000,
-	L2TP_PWTYPE_ETH_VLAN = 0x0004,
-	L2TP_PWTYPE_ETH = 0x0005,
-	L2TP_PWTYPE_PPP = 0x0007,
-	L2TP_PWTYPE_PPP_AC = 0x0008,
-	L2TP_PWTYPE_IP = 0x000b,
-	__L2TP_PWTYPE_MAX
-};
-
-enum l2tp_l2spec_type {
-	L2TP_L2SPECTYPE_NONE,
-	L2TP_L2SPECTYPE_DEFAULT,
-};
-
-enum l2tp_encap_type {
-	L2TP_ENCAPTYPE_UDP,
-	L2TP_ENCAPTYPE_IP,
-};
-
 struct sk_buff;
 
 struct l2tp_stats {
@@ -87,6 +67,7 @@ struct l2tp_session_cfg {
 						 * control of LNS. */
 	int			debug;		/* bitmask of debug message
 						 * categories */
+	u16			vlan_id;	/* VLAN pseudowire only */
 	u16			offset;		/* offset to payload */
 	u16			l2specific_len;	/* Layer 2 specific length */
 	u16			l2specific_type; /* Layer 2 specific type */
@@ -98,6 +79,7 @@ struct l2tp_session_cfg {
 						  * (in jiffies) */
 	int			mtu;
 	int			mru;
+	char			*ifname;
 };
 
 struct l2tp_session {
@@ -124,6 +106,7 @@ struct l2tp_session {
 	atomic_t		ref_count;
 
 	char			name[32];	/* for logging */
+	char			ifname[IFNAMSIZ];
 	unsigned		data_seq:2;	/* data sequencing level
 						 * 0 => none, 1 => IP only,
 						 * 2 => all
@@ -192,6 +175,11 @@ struct l2tp_tunnel {
 	uint8_t			priv[0];	/* private data */
 };
 
+struct l2tp_nl_cmd_ops {
+	int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+	int (*session_delete)(struct l2tp_session *session);
+};
+
 static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
 {
 	return &tunnel->priv[0];
@@ -224,11 +212,14 @@ out:
 
 extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
 extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
 extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
 extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
 
 extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern int l2tp_session_delete(struct l2tp_session *session);
 extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 extern void l2tp_session_free(struct l2tp_session *session);
 extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
@@ -241,6 +232,9 @@ extern void l2tp_tunnel_destruct(struct sock *sk);
 extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 
+extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
+extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
+
 /* Tunnel reference counts. Incremented per session that is added to
  * the tunnel.
  */
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
new file mode 100644
index 000000000000..3d0f7f6f7488
--- /dev/null
+++ b/net/l2tp/l2tp_netlink.c
@@ -0,0 +1,830 @@
+/*
+ * L2TP netlink layer, for management
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * Partly based on the IrDA nelink implementation
+ * (see net/irda/irnetlink.c) which is:
+ * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org>
+ * which is in turn partly based on the wireless netlink code:
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <net/udp.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <net/net_namespace.h>
+
+#include <linux/l2tp.h>
+
+#include "l2tp_core.h"
+
+
+static struct genl_family l2tp_nl_family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= L2TP_GENL_NAME,
+	.version	= L2TP_GENL_VERSION,
+	.hdrsize	= 0,
+	.maxattr	= L2TP_ATTR_MAX,
+};
+
+/* Accessed under genl lock */
+static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
+
+static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+{
+	u32 tunnel_id;
+	u32 session_id;
+	char *ifname;
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session = NULL;
+	struct net *net = genl_info_net(info);
+
+	if (info->attrs[L2TP_ATTR_IFNAME]) {
+		ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
+		session = l2tp_session_find_by_ifname(net, ifname);
+	} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
+		   (info->attrs[L2TP_ATTR_CONN_ID])) {
+		tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+		session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
+		tunnel = l2tp_tunnel_find(net, tunnel_id);
+		if (tunnel)
+			session = l2tp_session_find(net, tunnel, session_id);
+	}
+
+	return session;
+}
+
+static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	int ret = -ENOBUFS;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+			  &l2tp_nl_family, 0, L2TP_CMD_NOOP);
+	if (IS_ERR(hdr)) {
+		ret = PTR_ERR(hdr);
+		goto err_out;
+	}
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_out:
+	nlmsg_free(msg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
+{
+	u32 tunnel_id;
+	u32 peer_tunnel_id;
+	int proto_version;
+	int fd;
+	int ret = 0;
+	struct l2tp_tunnel_cfg cfg = { 0, };
+	struct l2tp_tunnel *tunnel;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	if (!info->attrs[L2TP_ATTR_PEER_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	peer_tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_CONN_ID]);
+
+	if (!info->attrs[L2TP_ATTR_PROTO_VERSION]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	proto_version = nla_get_u8(info->attrs[L2TP_ATTR_PROTO_VERSION]);
+
+	if (!info->attrs[L2TP_ATTR_ENCAP_TYPE]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
+
+	if (!info->attrs[L2TP_ATTR_FD]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel != NULL) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	ret = -EINVAL;
+	switch (cfg.encap) {
+	case L2TP_ENCAPTYPE_UDP:
+	case L2TP_ENCAPTYPE_IP:
+		ret = l2tp_tunnel_create(net, fd, proto_version, tunnel_id,
+					 peer_tunnel_id, &cfg, &tunnel);
+		break;
+	}
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_tunnel *tunnel;
+	u32 tunnel_id;
+	int ret = 0;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	(void) l2tp_tunnel_delete(tunnel);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_tunnel *tunnel;
+	u32 tunnel_id;
+	int ret = 0;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+			       struct l2tp_tunnel *tunnel)
+{
+	void *hdr;
+	struct nlattr *nest;
+	struct sock *sk = NULL;
+	struct inet_sock *inet;
+
+	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags,
+			  L2TP_CMD_TUNNEL_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version);
+	NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, tunnel->debug);
+	NLA_PUT_U16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap);
+
+	nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, tunnel->stats.tx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, tunnel->stats.tx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, tunnel->stats.tx_errors);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, tunnel->stats.rx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, tunnel->stats.rx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, tunnel->stats.rx_seq_discards);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, tunnel->stats.rx_oos_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, tunnel->stats.rx_errors);
+	nla_nest_end(skb, nest);
+
+	sk = tunnel->sock;
+	if (!sk)
+		goto out;
+
+	inet = inet_sk(sk);
+
+	switch (tunnel->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		NLA_PUT_U16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport));
+		NLA_PUT_U16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport));
+		NLA_PUT_U8(skb, L2TP_ATTR_UDP_CSUM, (sk->sk_no_check != UDP_CSUM_NOXMIT));
+		/* NOBREAK */
+	case L2TP_ENCAPTYPE_IP:
+		NLA_PUT_BE32(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr);
+		NLA_PUT_BE32(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr);
+		break;
+	}
+
+out:
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -1;
+}
+
+static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_tunnel *tunnel;
+	struct sk_buff *msg;
+	u32 tunnel_id;
+	int ret = -ENOBUFS;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq,
+				  NLM_F_ACK, tunnel);
+	if (ret < 0)
+		goto err_out;
+
+	return genlmsg_unicast(net, msg, info->snd_pid);
+
+err_out:
+	nlmsg_free(msg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int ti = cb->args[0];
+	struct l2tp_tunnel *tunnel;
+	struct net *net = sock_net(skb->sk);
+
+	for (;;) {
+		tunnel = l2tp_tunnel_find_nth(net, ti);
+		if (tunnel == NULL)
+			goto out;
+
+		if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid,
+					cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					tunnel) <= 0)
+			goto out;
+
+		ti++;
+	}
+
+out:
+	cb->args[0] = ti;
+
+	return skb->len;
+}
+
+static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *info)
+{
+	u32 tunnel_id = 0;
+	u32 session_id;
+	u32 peer_session_id;
+	int ret = 0;
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;
+	struct l2tp_session_cfg cfg = { 0, };
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (!tunnel) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (!info->attrs[L2TP_ATTR_SESSION_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
+	session = l2tp_session_find(net, tunnel, session_id);
+	if (session) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]);
+
+	if (!info->attrs[L2TP_ATTR_PW_TYPE]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]);
+	if (cfg.pw_type >= __L2TP_PWTYPE_MAX) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (tunnel->version > 2) {
+		if (info->attrs[L2TP_ATTR_OFFSET])
+			cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
+
+		if (info->attrs[L2TP_ATTR_DATA_SEQ])
+			cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
+
+		cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT;
+		if (info->attrs[L2TP_ATTR_L2SPEC_TYPE])
+			cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]);
+
+		cfg.l2specific_len = 4;
+		if (info->attrs[L2TP_ATTR_L2SPEC_LEN])
+			cfg.l2specific_len = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_LEN]);
+
+		if (info->attrs[L2TP_ATTR_COOKIE]) {
+			u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
+			if (len > 8) {
+				ret = -EINVAL;
+				goto out;
+			}
+			cfg.cookie_len = len;
+			memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len);
+		}
+		if (info->attrs[L2TP_ATTR_PEER_COOKIE]) {
+			u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
+			if (len > 8) {
+				ret = -EINVAL;
+				goto out;
+			}
+			cfg.peer_cookie_len = len;
+			memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len);
+		}
+		if (info->attrs[L2TP_ATTR_IFNAME])
+			cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
+
+		if (info->attrs[L2TP_ATTR_VLAN_ID])
+			cfg.vlan_id = nla_get_u16(info->attrs[L2TP_ATTR_VLAN_ID]);
+	}
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+	if (info->attrs[L2TP_ATTR_RECV_SEQ])
+		cfg.recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_SEND_SEQ])
+		cfg.send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_LNS_MODE])
+		cfg.lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
+
+	if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
+		cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
+
+	if (info->attrs[L2TP_ATTR_MTU])
+		cfg.mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
+
+	if (info->attrs[L2TP_ATTR_MRU])
+		cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+
+	if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
+	    (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
+		ret = -EPROTONOSUPPORT;
+		goto out;
+	}
+
+	/* Check that pseudowire-specific params are present */
+	switch (cfg.pw_type) {
+	case L2TP_PWTYPE_NONE:
+		break;
+	case L2TP_PWTYPE_ETH_VLAN:
+		if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
+			ret = -EINVAL;
+			goto out;
+		}
+		break;
+	case L2TP_PWTYPE_ETH:
+		break;
+	case L2TP_PWTYPE_PPP:
+	case L2TP_PWTYPE_PPP_AC:
+		break;
+	case L2TP_PWTYPE_IP:
+	default:
+		ret = -EPROTONOSUPPORT;
+		break;
+	}
+
+	ret = -EPROTONOSUPPORT;
+	if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create)
+		ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
+			session_id, peer_session_id, &cfg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret = 0;
+	struct l2tp_session *session;
+	u16 pw_type;
+
+	session = l2tp_nl_session_find(info);
+	if (session == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	pw_type = session->pwtype;
+	if (pw_type < __L2TP_PWTYPE_MAX)
+		if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
+			ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret = 0;
+	struct l2tp_session *session;
+
+	session = l2tp_nl_session_find(info);
+	if (session == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		session->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+	if (info->attrs[L2TP_ATTR_DATA_SEQ])
+		session->data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_RECV_SEQ])
+		session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_SEND_SEQ])
+		session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_LNS_MODE])
+		session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
+
+	if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
+		session->reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
+
+	if (info->attrs[L2TP_ATTR_MTU])
+		session->mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
+
+	if (info->attrs[L2TP_ATTR_MRU])
+		session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+				struct l2tp_session *session)
+{
+	void *hdr;
+	struct nlattr *nest;
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct sock *sk = NULL;
+
+	sk = tunnel->sock;
+
+	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_SESSION_ID, session->session_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, session->debug);
+	NLA_PUT_U16(skb, L2TP_ATTR_PW_TYPE, session->pwtype);
+	NLA_PUT_U16(skb, L2TP_ATTR_MTU, session->mtu);
+	if (session->mru)
+		NLA_PUT_U16(skb, L2TP_ATTR_MRU, session->mru);
+
+	if (session->ifname && session->ifname[0])
+		NLA_PUT_STRING(skb, L2TP_ATTR_IFNAME, session->ifname);
+	if (session->cookie_len)
+		NLA_PUT(skb, L2TP_ATTR_COOKIE, session->cookie_len, &session->cookie[0]);
+	if (session->peer_cookie_len)
+		NLA_PUT(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, &session->peer_cookie[0]);
+	NLA_PUT_U8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq);
+	NLA_PUT_U8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq);
+	NLA_PUT_U8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode);
+#ifdef CONFIG_XFRM
+	if ((sk) && (sk->sk_policy[0] || sk->sk_policy[1]))
+		NLA_PUT_U8(skb, L2TP_ATTR_USING_IPSEC, 1);
+#endif
+	if (session->reorder_timeout)
+		NLA_PUT_MSECS(skb, L2TP_ATTR_RECV_TIMEOUT, session->reorder_timeout);
+
+	nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, session->stats.tx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, session->stats.tx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, session->stats.tx_errors);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, session->stats.rx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, session->stats.rx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, session->stats.rx_seq_discards);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, session->stats.rx_oos_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, session->stats.rx_errors);
+	nla_nest_end(skb, nest);
+
+	return genlmsg_end(skb, hdr);
+
+ nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -1;
+}
+
+static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_session *session;
+	struct sk_buff *msg;
+	int ret;
+
+	session = l2tp_nl_session_find(info);
+	if (session == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq,
+				   0, session);
+	if (ret < 0)
+		goto err_out;
+
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_out:
+	nlmsg_free(msg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel = NULL;
+	int ti = cb->args[0];
+	int si = cb->args[1];
+
+	for (;;) {
+		if (tunnel == NULL) {
+			tunnel = l2tp_tunnel_find_nth(net, ti);
+			if (tunnel == NULL)
+				goto out;
+		}
+
+		session = l2tp_session_find_nth(tunnel, si);
+		if (session == NULL) {
+			ti++;
+			tunnel = NULL;
+			si = 0;
+			continue;
+		}
+
+		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 session) <= 0)
+			break;
+
+		si++;
+	}
+
+out:
+	cb->args[0] = ti;
+	cb->args[1] = si;
+
+	return skb->len;
+}
+
+static struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = {
+	[L2TP_ATTR_NONE]		= { .type = NLA_UNSPEC, },
+	[L2TP_ATTR_PW_TYPE]		= { .type = NLA_U16, },
+	[L2TP_ATTR_ENCAP_TYPE]		= { .type = NLA_U16, },
+	[L2TP_ATTR_OFFSET]		= { .type = NLA_U16, },
+	[L2TP_ATTR_DATA_SEQ]		= { .type = NLA_U8, },
+	[L2TP_ATTR_L2SPEC_TYPE]		= { .type = NLA_U8, },
+	[L2TP_ATTR_L2SPEC_LEN]		= { .type = NLA_U8, },
+	[L2TP_ATTR_PROTO_VERSION]	= { .type = NLA_U8, },
+	[L2TP_ATTR_CONN_ID]		= { .type = NLA_U32, },
+	[L2TP_ATTR_PEER_CONN_ID]	= { .type = NLA_U32, },
+	[L2TP_ATTR_SESSION_ID]		= { .type = NLA_U32, },
+	[L2TP_ATTR_PEER_SESSION_ID]	= { .type = NLA_U32, },
+	[L2TP_ATTR_UDP_CSUM]		= { .type = NLA_U8, },
+	[L2TP_ATTR_VLAN_ID]		= { .type = NLA_U16, },
+	[L2TP_ATTR_DEBUG]		= { .type = NLA_U32, },
+	[L2TP_ATTR_RECV_SEQ]		= { .type = NLA_U8, },
+	[L2TP_ATTR_SEND_SEQ]		= { .type = NLA_U8, },
+	[L2TP_ATTR_LNS_MODE]		= { .type = NLA_U8, },
+	[L2TP_ATTR_USING_IPSEC]		= { .type = NLA_U8, },
+	[L2TP_ATTR_RECV_TIMEOUT]	= { .type = NLA_MSECS, },
+	[L2TP_ATTR_FD]			= { .type = NLA_U32, },
+	[L2TP_ATTR_IP_SADDR]		= { .type = NLA_U32, },
+	[L2TP_ATTR_IP_DADDR]		= { .type = NLA_U32, },
+	[L2TP_ATTR_UDP_SPORT]		= { .type = NLA_U16, },
+	[L2TP_ATTR_UDP_DPORT]		= { .type = NLA_U16, },
+	[L2TP_ATTR_MTU]			= { .type = NLA_U16, },
+	[L2TP_ATTR_MRU]			= { .type = NLA_U16, },
+	[L2TP_ATTR_STATS]		= { .type = NLA_NESTED, },
+	[L2TP_ATTR_IFNAME] = {
+		.type = NLA_NUL_STRING,
+		.len = IFNAMSIZ - 1,
+	},
+	[L2TP_ATTR_COOKIE] = {
+		.type = NLA_BINARY,
+		.len = 8,
+	},
+	[L2TP_ATTR_PEER_COOKIE] = {
+		.type = NLA_BINARY,
+		.len = 8,
+	},
+};
+
+static struct genl_ops l2tp_nl_ops[] = {
+	{
+		.cmd = L2TP_CMD_NOOP,
+		.doit = l2tp_nl_cmd_noop,
+		.policy = l2tp_nl_policy,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_CREATE,
+		.doit = l2tp_nl_cmd_tunnel_create,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_DELETE,
+		.doit = l2tp_nl_cmd_tunnel_delete,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_MODIFY,
+		.doit = l2tp_nl_cmd_tunnel_modify,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_GET,
+		.doit = l2tp_nl_cmd_tunnel_get,
+		.dumpit = l2tp_nl_cmd_tunnel_dump,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_CREATE,
+		.doit = l2tp_nl_cmd_session_create,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_DELETE,
+		.doit = l2tp_nl_cmd_session_delete,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_MODIFY,
+		.doit = l2tp_nl_cmd_session_modify,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_GET,
+		.doit = l2tp_nl_cmd_session_get,
+		.dumpit = l2tp_nl_cmd_session_dump,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
+int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
+{
+	int ret;
+
+	ret = -EINVAL;
+	if (pw_type >= __L2TP_PWTYPE_MAX)
+		goto err;
+
+	genl_lock();
+	ret = -EBUSY;
+	if (l2tp_nl_cmd_ops[pw_type])
+		goto out;
+
+	l2tp_nl_cmd_ops[pw_type] = ops;
+
+out:
+	genl_unlock();
+err:
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_nl_register_ops);
+
+void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
+{
+	if (pw_type < __L2TP_PWTYPE_MAX) {
+		genl_lock();
+		l2tp_nl_cmd_ops[pw_type] = NULL;
+		genl_unlock();
+	}
+}
+EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
+
+static int l2tp_nl_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "L2TP netlink interface\n");
+	err = genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops,
+					    ARRAY_SIZE(l2tp_nl_ops));
+
+	return err;
+}
+
+static void l2tp_nl_cleanup(void)
+{
+	genl_unregister_family(&l2tp_nl_family);
+}
+
+module_init(l2tp_nl_init);
+module_exit(l2tp_nl_cleanup);
+
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP netlink");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+MODULE_ALIAS("net-pf-" __stringify(PF_NETLINK) "-proto-" \
+	     __stringify(NETLINK_GENERIC) "-type-" "l2tp")
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 63fc62baeeb9..d64f081f2b1c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -87,6 +87,7 @@
 #include <linux/hash.h>
 #include <linux/sort.h>
 #include <linux/proc_fs.h>
+#include <linux/l2tp.h>
 #include <linux/nsproxy.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -656,17 +657,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (tunnel_id == 0)
 		goto end;
 
+	tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
+
 	/* Special case: create tunnel context if session_id and
 	 * peer_session_id is 0. Otherwise look up tunnel using supplied
 	 * tunnel id.
 	 */
 	if ((session_id == 0) && (peer_session_id == 0)) {
-		error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, NULL, &tunnel);
-		if (error < 0)
-			goto end;
+		if (tunnel == NULL) {
+			struct l2tp_tunnel_cfg tcfg = {
+				.encap = L2TP_ENCAPTYPE_UDP,
+				.debug = 0,
+			};
+			error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
+			if (error < 0)
+				goto end;
+		}
 	} else {
-		tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
-
 		/* Error if we can't find the tunnel */
 		error = -ENOENT;
 		if (tunnel == NULL)
@@ -680,28 +687,46 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (tunnel->recv_payload_hook == NULL)
 		tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
 
-	/* Check that this session doesn't already exist */
-	error = -EEXIST;
-	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
-	if (session != NULL)
-		goto end;
-
-	/* Default MTU values. */
-	if (cfg.mtu == 0)
-		cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-	if (cfg.mru == 0)
-		cfg.mru = cfg.mtu;
-	cfg.debug = tunnel->debug;
+	if (tunnel->peer_tunnel_id == 0) {
+		if (ver == 2)
+			tunnel->peer_tunnel_id = sp->pppol2tp.d_tunnel;
+		else
+			tunnel->peer_tunnel_id = sp3->pppol2tp.d_tunnel;
+	}
 
-	/* Allocate and initialize a new session context. */
-	session = l2tp_session_create(sizeof(struct pppol2tp_session),
-				      tunnel, session_id,
-				      peer_session_id, &cfg);
+	/* Create session if it doesn't already exist. We handle the
+	 * case where a session was previously created by the netlink
+	 * interface by checking that the session doesn't already have
+	 * a socket and its tunnel socket are what we expect. If any
+	 * of those checks fail, return EEXIST to the caller.
+	 */
+	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
 	if (session == NULL) {
-		error = -ENOMEM;
-		goto end;
+		/* Default MTU must allow space for UDP/L2TP/PPP
+		 * headers.
+		 */
+		cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+
+		/* Allocate and initialize a new session context. */
+		session = l2tp_session_create(sizeof(struct pppol2tp_session),
+					      tunnel, session_id,
+					      peer_session_id, &cfg);
+		if (session == NULL) {
+			error = -ENOMEM;
+			goto end;
+		}
+	} else {
+		ps = l2tp_session_priv(session);
+		error = -EEXIST;
+		if (ps->sock != NULL)
+			goto end;
+
+		/* consistency checks */
+		if (ps->tunnel_sock != tunnel->sock)
+			goto end;
 	}
 
+	/* Associate session with its PPPoL2TP socket */
 	ps = l2tp_session_priv(session);
 	ps->owner	     = current->pid;
 	ps->sock	     = sk;
@@ -764,6 +789,74 @@ end:
 	return error;
 }
 
+#ifdef CONFIG_L2TP_V3
+
+/* Called when creating sessions via the netlink interface.
+ */
+static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+	int error;
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;
+	struct pppol2tp_session *ps;
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+
+	/* Error if we can't find the tunnel */
+	error = -ENOENT;
+	if (tunnel == NULL)
+		goto out;
+
+	/* Error if tunnel socket is not prepped */
+	if (tunnel->sock == NULL)
+		goto out;
+
+	/* Check that this session doesn't already exist */
+	error = -EEXIST;
+	session = l2tp_session_find(net, tunnel, session_id);
+	if (session != NULL)
+		goto out;
+
+	/* Default MTU values. */
+	if (cfg->mtu == 0)
+		cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	if (cfg->mru == 0)
+		cfg->mru = cfg->mtu;
+
+	/* Allocate and initialize a new session context. */
+	error = -ENOMEM;
+	session = l2tp_session_create(sizeof(struct pppol2tp_session),
+				      tunnel, session_id,
+				      peer_session_id, cfg);
+	if (session == NULL)
+		goto out;
+
+	ps = l2tp_session_priv(session);
+	ps->tunnel_sock = tunnel->sock;
+
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: created\n", session->name);
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* Called when deleting sessions via the netlink interface.
+ */
+static int pppol2tp_session_delete(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps->sock == NULL)
+		l2tp_session_dec_refcount(session);
+
+	return 0;
+}
+
+#endif /* CONFIG_L2TP_V3 */
+
 /* getname() support.
  */
 static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
@@ -1660,6 +1753,15 @@ static struct pppox_proto pppol2tp_proto = {
 	.ioctl		= pppol2tp_ioctl
 };
 
+#ifdef CONFIG_L2TP_V3
+
+static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = {
+	.session_create	= pppol2tp_session_create,
+	.session_delete	= pppol2tp_session_delete,
+};
+
+#endif /* CONFIG_L2TP_V3 */
+
 static int __init pppol2tp_init(void)
 {
 	int err;
@@ -1676,11 +1778,22 @@ static int __init pppol2tp_init(void)
 	if (err)
 		goto out_unregister_pppol2tp_proto;
 
+#ifdef CONFIG_L2TP_V3
+	err = l2tp_nl_register_ops(L2TP_PWTYPE_PPP, &pppol2tp_nl_cmd_ops);
+	if (err)
+		goto out_unregister_pppox;
+#endif
+
 	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
 	       PPPOL2TP_DRV_VERSION);
 
 out:
 	return err;
+
+#ifdef CONFIG_L2TP_V3
+out_unregister_pppox:
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+#endif
 out_unregister_pppol2tp_proto:
 	proto_unregister(&pppol2tp_sk_proto);
 out_unregister_pppol2tp_pernet:
@@ -1690,6 +1803,9 @@ out_unregister_pppol2tp_pernet:
 
 static void __exit pppol2tp_exit(void)
 {
+#ifdef CONFIG_L2TP_V3
+	l2tp_nl_unregister_ops(L2TP_PWTYPE_PPP);
+#endif
 	unregister_pppox_proto(PX_PROTO_OL2TP);
 	proto_unregister(&pppol2tp_sk_proto);
 	unregister_pernet_device(&pppol2tp_net_ops);
-- 
cgit v1.2.3-59-g8ed1b


From e02d494d2c60746ee6583132904ac1791f5bc9a6 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:16 +0000
Subject: l2tp: Convert rwlock to RCU

Reader/write locks are discouraged because they are slower than spin
locks. So this patch converts the rwlocks used in the per_net structs
to rcu.

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 78 ++++++++++++++++++++++++++++------------------------
 1 file changed, 42 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index fbd1f2119fe9..473cf2d63905 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -21,6 +21,7 @@
 #include <linux/module.h>
 #include <linux/string.h>
 #include <linux/list.h>
+#include <linux/rculist.h>
 #include <linux/uaccess.h>
 
 #include <linux/kernel.h>
@@ -105,9 +106,9 @@ static atomic_t l2tp_session_count;
 static unsigned int l2tp_net_id;
 struct l2tp_net {
 	struct list_head l2tp_tunnel_list;
-	rwlock_t l2tp_tunnel_list_lock;
+	spinlock_t l2tp_tunnel_list_lock;
 	struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
-	rwlock_t l2tp_session_hlist_lock;
+	spinlock_t l2tp_session_hlist_lock;
 };
 
 static inline struct l2tp_net *l2tp_pernet(struct net *net)
@@ -139,14 +140,14 @@ static struct l2tp_session *l2tp_session_find_2(struct net *net, u32 session_id)
 	struct l2tp_session *session;
 	struct hlist_node *walk;
 
-	read_lock_bh(&pn->l2tp_session_hlist_lock);
-	hlist_for_each_entry(session, walk, session_list, global_hlist) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(session, walk, session_list, global_hlist) {
 		if (session->session_id == session_id) {
-			read_unlock_bh(&pn->l2tp_session_hlist_lock);
+			rcu_read_unlock_bh();
 			return session;
 		}
 	}
-	read_unlock_bh(&pn->l2tp_session_hlist_lock);
+	rcu_read_unlock_bh();
 
 	return NULL;
 }
@@ -225,17 +226,17 @@ struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
 	struct hlist_node *walk;
 	struct l2tp_session *session;
 
-	read_lock_bh(&pn->l2tp_session_hlist_lock);
+	rcu_read_lock_bh();
 	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
-		hlist_for_each_entry(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
+		hlist_for_each_entry_rcu(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
 			if (!strcmp(session->ifname, ifname)) {
-				read_unlock_bh(&pn->l2tp_session_hlist_lock);
+				rcu_read_unlock_bh();
 				return session;
 			}
 		}
 	}
 
-	read_unlock_bh(&pn->l2tp_session_hlist_lock);
+	rcu_read_unlock_bh();
 
 	return NULL;
 }
@@ -248,14 +249,14 @@ struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
 	struct l2tp_tunnel *tunnel;
 	struct l2tp_net *pn = l2tp_pernet(net);
 
-	read_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+	rcu_read_lock_bh();
+	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
 		if (tunnel->tunnel_id == tunnel_id) {
-			read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+			rcu_read_unlock_bh();
 			return tunnel;
 		}
 	}
-	read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	rcu_read_unlock_bh();
 
 	return NULL;
 }
@@ -267,15 +268,15 @@ struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
 	struct l2tp_tunnel *tunnel;
 	int count = 0;
 
-	read_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+	rcu_read_lock_bh();
+	list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
 		if (++count > nth) {
-			read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+			rcu_read_unlock_bh();
 			return tunnel;
 		}
 	}
 
-	read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	rcu_read_unlock_bh();
 
 	return NULL;
 }
@@ -1167,9 +1168,10 @@ again:
 			if (tunnel->version != L2TP_HDR_VER_2) {
 				struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
 
-				write_lock_bh(&pn->l2tp_session_hlist_lock);
-				hlist_del_init(&session->global_hlist);
-				write_unlock_bh(&pn->l2tp_session_hlist_lock);
+				spin_lock_bh(&pn->l2tp_session_hlist_lock);
+				hlist_del_init_rcu(&session->global_hlist);
+				spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+				synchronize_rcu();
 			}
 
 			if (session->session_close != NULL)
@@ -1206,9 +1208,10 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
 	       "%s: free...\n", tunnel->name);
 
 	/* Remove from tunnel list */
-	write_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_del_init(&tunnel->list);
-	write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_del_rcu(&tunnel->list);
+	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	synchronize_rcu();
 
 	atomic_dec(&l2tp_tunnel_count);
 	kfree(tunnel);
@@ -1310,9 +1313,10 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 
 	/* Add tunnel to our list */
 	INIT_LIST_HEAD(&tunnel->list);
-	write_lock_bh(&pn->l2tp_tunnel_list_lock);
-	list_add(&tunnel->list, &pn->l2tp_tunnel_list);
-	write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	spin_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
+	spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	synchronize_rcu();
 	atomic_inc(&l2tp_tunnel_count);
 
 	/* Bump the reference count. The tunnel context is deleted
@@ -1370,9 +1374,10 @@ void l2tp_session_free(struct l2tp_session *session)
 		if (tunnel->version != L2TP_HDR_VER_2) {
 			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
 
-			write_lock_bh(&pn->l2tp_session_hlist_lock);
-			hlist_del_init(&session->global_hlist);
-			write_unlock_bh(&pn->l2tp_session_hlist_lock);
+			spin_lock_bh(&pn->l2tp_session_hlist_lock);
+			hlist_del_init_rcu(&session->global_hlist);
+			spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+			synchronize_rcu();
 		}
 
 		if (session->session_id != 0)
@@ -1494,10 +1499,11 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 		if (tunnel->version != L2TP_HDR_VER_2) {
 			struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
 
-			write_lock_bh(&pn->l2tp_session_hlist_lock);
-			hlist_add_head(&session->global_hlist,
-				       l2tp_session_id_hash_2(pn, session_id));
-			write_unlock_bh(&pn->l2tp_session_hlist_lock);
+			spin_lock_bh(&pn->l2tp_session_hlist_lock);
+			hlist_add_head_rcu(&session->global_hlist,
+					   l2tp_session_id_hash_2(pn, session_id));
+			spin_unlock_bh(&pn->l2tp_session_hlist_lock);
+			synchronize_rcu();
 		}
 
 		/* Ignore management session in session count value */
@@ -1524,12 +1530,12 @@ static __net_init int l2tp_init_net(struct net *net)
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
-	rwlock_init(&pn->l2tp_tunnel_list_lock);
+	spin_lock_init(&pn->l2tp_tunnel_list_lock);
 
 	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
 		INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
 
-	rwlock_init(&pn->l2tp_session_hlist_lock);
+	spin_lock_init(&pn->l2tp_session_hlist_lock);
 
 	err = net_assign_generic(net, l2tp_net_id, pn);
 	if (err)
-- 
cgit v1.2.3-59-g8ed1b


From d9e31d17ceba5f0736f5a34bbc236239cd42b420 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:26 +0000
Subject: l2tp: Add L2TP ethernet pseudowire support

This driver presents a regular net_device for each L2TP ethernet
pseudowire instance. These interfaces are named l2tpethN by default,
though userspace can specify an alternative name when the L2TP
session is created, if preferred. When the pseudowire is established,
regular Linux networking utilities may be used to configure the
interface, i.e. give it IP address info or add it to a bridge. Any
data passed over the interface is carried over an L2TP tunnel.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/Kconfig    |  24 ++++
 net/l2tp/Makefile   |   1 +
 net/l2tp/l2tp_eth.c | 347 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 372 insertions(+)
 create mode 100644 net/l2tp/l2tp_eth.c

(limited to 'net')

diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index 0a11ccf2d95b..a292270c0abe 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -68,3 +68,27 @@ config L2TP_IP
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called l2tp_ip.
+
+config L2TP_ETH
+	tristate "L2TP ethernet pseudowire support for L2TPv3"
+	depends on L2TP_V3
+	help
+	  Support for carrying raw ethernet frames over L2TPv3.
+
+	  From RFC 4719 <http://www.ietf.org/rfc/rfc4719.txt>.
+
+	  The Layer 2 Tunneling Protocol, Version 3 (L2TPv3) can be
+	  used as a control protocol and for data encapsulation to set
+	  up Pseudowires for transporting layer 2 Packet Data Units
+	  across an IP network [RFC3931].
+
+	  This driver provides an ethernet virtual interface for each
+	  L2TP ethernet pseudowire instance. Standard Linux tools may
+	  be used to assign an IP address to the local virtual
+	  interface, or add the interface to a bridge.
+
+	  If you are using L2TPv3, you will almost certainly want to
+	  enable this option.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called l2tp_eth.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 2c4a14b673ab..bddbf04f0ed3 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -8,3 +8,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_ETH)) += l2tp_eth.o
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
new file mode 100644
index 000000000000..755c29729b6f
--- /dev/null
+++ b/net/l2tp/l2tp_eth.c
@@ -0,0 +1,347 @@
+/*
+ * L2TPv3 ethernet pseudowire driver
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include "l2tp_core.h"
+
+/* Default device name. May be overridden by name specified by user */
+#define L2TP_ETH_DEV_NAME	"l2tpeth%d"
+
+/* via netdev_priv() */
+struct l2tp_eth {
+	struct net_device	*dev;
+	struct sock		*tunnel_sock;
+	struct l2tp_session	*session;
+	struct list_head	list;
+};
+
+/* via l2tp_session_priv() */
+struct l2tp_eth_sess {
+	struct net_device	*dev;
+};
+
+/* per-net private data for this module */
+static unsigned int l2tp_eth_net_id;
+struct l2tp_eth_net {
+	struct list_head l2tp_eth_dev_list;
+	spinlock_t l2tp_eth_lock;
+};
+
+static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net)
+{
+	return net_generic(net, l2tp_eth_net_id);
+}
+
+static int l2tp_eth_dev_init(struct net_device *dev)
+{
+	struct l2tp_eth *priv = netdev_priv(dev);
+
+	priv->dev = dev;
+	random_ether_addr(dev->dev_addr);
+	memset(&dev->broadcast[0], 0xff, 6);
+
+	return 0;
+}
+
+static void l2tp_eth_dev_uninit(struct net_device *dev)
+{
+	struct l2tp_eth *priv = netdev_priv(dev);
+	struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev));
+
+	spin_lock(&pn->l2tp_eth_lock);
+	list_del_init(&priv->list);
+	spin_unlock(&pn->l2tp_eth_lock);
+	dev_put(dev);
+}
+
+static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct l2tp_eth *priv = netdev_priv(dev);
+	struct l2tp_session *session = priv->session;
+
+	l2tp_xmit_skb(session, skb, session->hdr_len);
+
+	dev->stats.tx_bytes += skb->len;
+	dev->stats.tx_packets++;
+
+	return 0;
+}
+
+static struct net_device_ops l2tp_eth_netdev_ops = {
+	.ndo_init		= l2tp_eth_dev_init,
+	.ndo_uninit		= l2tp_eth_dev_uninit,
+	.ndo_start_xmit		= l2tp_eth_dev_xmit,
+};
+
+static void l2tp_eth_dev_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->netdev_ops		= &l2tp_eth_netdev_ops;
+	dev->destructor		= free_netdev;
+}
+
+static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
+{
+	struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
+	struct net_device *dev = spriv->dev;
+
+	if (session->debug & L2TP_MSG_DATA) {
+		unsigned int length;
+		int offset;
+		u8 *ptr = skb->data;
+
+		length = min(32u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto error;
+
+		printk(KERN_DEBUG "%s: eth recv: ", session->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	if (data_len < ETH_HLEN)
+		goto error;
+
+	secpath_reset(skb);
+
+	/* checksums verified by L2TP */
+	skb->ip_summed = CHECKSUM_NONE;
+
+	skb_dst_drop(skb);
+	nf_reset(skb);
+
+	if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) {
+		dev->last_rx = jiffies;
+		dev->stats.rx_packets++;
+		dev->stats.rx_bytes += data_len;
+	} else
+		dev->stats.rx_errors++;
+
+	return;
+
+error:
+	dev->stats.rx_errors++;
+	kfree_skb(skb);
+}
+
+static void l2tp_eth_delete(struct l2tp_session *session)
+{
+	struct l2tp_eth_sess *spriv;
+	struct net_device *dev;
+
+	if (session) {
+		spriv = l2tp_session_priv(session);
+		dev = spriv->dev;
+		if (dev) {
+			unregister_netdev(dev);
+			spriv->dev = NULL;
+		}
+	}
+}
+
+static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+	struct net_device *dev;
+	char name[IFNAMSIZ];
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;
+	struct l2tp_eth *priv;
+	struct l2tp_eth_sess *spriv;
+	int rc;
+	struct l2tp_eth_net *pn;
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (!tunnel) {
+		rc = -ENODEV;
+		goto out;
+	}
+
+	session = l2tp_session_find(net, tunnel, session_id);
+	if (session) {
+		rc = -EEXIST;
+		goto out;
+	}
+
+	if (cfg->ifname) {
+		dev = dev_get_by_name(net, cfg->ifname);
+		if (dev) {
+			dev_put(dev);
+			rc = -EEXIST;
+			goto out;
+		}
+		strlcpy(name, cfg->ifname, IFNAMSIZ);
+	} else
+		strcpy(name, L2TP_ETH_DEV_NAME);
+
+	session = l2tp_session_create(sizeof(*spriv), tunnel, session_id,
+				      peer_session_id, cfg);
+	if (!session) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	dev = alloc_netdev(sizeof(*priv), name, l2tp_eth_dev_setup);
+	if (!dev) {
+		rc = -ENOMEM;
+		goto out_del_session;
+	}
+
+	dev_net_set(dev, net);
+	if (session->mtu == 0)
+		session->mtu = dev->mtu - session->hdr_len;
+	dev->mtu = session->mtu;
+	dev->needed_headroom += session->hdr_len;
+
+	priv = netdev_priv(dev);
+	priv->dev = dev;
+	priv->session = session;
+	INIT_LIST_HEAD(&priv->list);
+
+	priv->tunnel_sock = tunnel->sock;
+	session->recv_skb = l2tp_eth_dev_recv;
+	session->session_close = l2tp_eth_delete;
+
+	spriv = l2tp_session_priv(session);
+	spriv->dev = dev;
+
+	rc = register_netdev(dev);
+	if (rc < 0)
+		goto out_del_dev;
+
+	/* Must be done after register_netdev() */
+	strlcpy(session->ifname, dev->name, IFNAMSIZ);
+
+	dev_hold(dev);
+	pn = l2tp_eth_pernet(dev_net(dev));
+	spin_lock(&pn->l2tp_eth_lock);
+	list_add(&priv->list, &pn->l2tp_eth_dev_list);
+	spin_unlock(&pn->l2tp_eth_lock);
+
+	return 0;
+
+out_del_dev:
+	free_netdev(dev);
+out_del_session:
+	l2tp_session_delete(session);
+out:
+	return rc;
+}
+
+static __net_init int l2tp_eth_init_net(struct net *net)
+{
+	struct l2tp_eth_net *pn;
+	int err;
+
+	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
+	if (!pn)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
+	spin_lock_init(&pn->l2tp_eth_lock);
+
+	err = net_assign_generic(net, l2tp_eth_net_id, pn);
+	if (err)
+		goto out;
+
+	return 0;
+
+out:
+	kfree(pn);
+	return err;
+}
+
+static __net_exit void l2tp_eth_exit_net(struct net *net)
+{
+	struct l2tp_eth_net *pn;
+
+	pn = net_generic(net, l2tp_eth_net_id);
+	/*
+	 * if someone has cached our net then
+	 * further net_generic call will return NULL
+	 */
+	net_assign_generic(net, l2tp_eth_net_id, NULL);
+	kfree(pn);
+}
+
+static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
+	.init = l2tp_eth_init_net,
+	.exit = l2tp_eth_exit_net,
+	.id   = &l2tp_eth_net_id,
+	.size = sizeof(struct l2tp_eth_net),
+};
+
+
+static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = {
+	.session_create	= l2tp_eth_create,
+	.session_delete	= l2tp_session_delete,
+};
+
+
+static int __init l2tp_eth_init(void)
+{
+	int err = 0;
+
+	err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops);
+	if (err)
+		goto out;
+
+	err = register_pernet_device(&l2tp_eth_net_ops);
+	if (err)
+		goto out_unreg;
+
+	printk(KERN_INFO "L2TP ethernet pseudowire support (L2TPv3)\n");
+
+	return 0;
+
+out_unreg:
+	l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
+out:
+	return err;
+}
+
+static void __exit l2tp_eth_exit(void)
+{
+	unregister_pernet_device(&l2tp_eth_net_ops);
+	l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH);
+}
+
+module_init(l2tp_eth_init);
+module_exit(l2tp_eth_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP ethernet pseudowire driver");
+MODULE_VERSION("1.0");
-- 
cgit v1.2.3-59-g8ed1b


From 0ad6614048cf722e4d27909665b4846805357f1b Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:33 +0000
Subject: l2tp: Add debugfs files for dumping l2tp debug info

The existing pppol2tp driver exports debug info to
/proc/net/pppol2tp. Rather than adding info to that file for the new
functionality added in this patch series, we add new files in debugfs,
leaving the old /proc file for backwards compatibility (L2TPv2 only).

Currently only one file is provided: l2tp/tunnels, which lists
internal debug info for all l2tp tunnels and sessions. More files may
be added later. The info is for debug and problem analysis only -
userspace apps should use netlink to obtain status about l2tp tunnels
and sessions.

Although debugfs does not support net namespaces, the tunnels and
sessions dumped in l2tp/tunnels are only those in the net namespace of
the process reading the file.

Signed-off-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/Kconfig        |  13 ++
 net/l2tp/Makefile       |   1 +
 net/l2tp/l2tp_core.h    |   8 +-
 net/l2tp/l2tp_debugfs.c | 341 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/l2tp/l2tp_eth.c     |  14 ++
 net/l2tp/l2tp_ppp.c     |  17 +++
 6 files changed, 392 insertions(+), 2 deletions(-)
 create mode 100644 net/l2tp/l2tp_debugfs.c

(limited to 'net')

diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index a292270c0abe..4b1e71751e10 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -31,6 +31,19 @@ menuconfig L2TP
 	  If you don't need L2TP, say N. To compile all L2TP code as
 	  modules, choose M here.
 
+config L2TP_DEBUGFS
+	tristate "L2TP debugfs support"
+	depends on L2TP && DEBUG_FS
+	help
+	  Support for l2tp directory in debugfs filesystem. This may be
+	  used to dump internal state of the l2tp drivers for problem
+	  analysis.
+
+	  If unsure, say 'Y'.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called l2tp_debugfs.
+
 config L2TP_V3
 	bool "L2TPv3 support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL && L2TP
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index bddbf04f0ed3..110e7bc2de5e 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -9,3 +9,4 @@ obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_ETH)) += l2tp_eth.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_DEBUGFS)) += l2tp_debugfs.o
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 2974d9ade167..571335530c6f 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -133,7 +133,9 @@ struct l2tp_session {
 	void (*session_close)(struct l2tp_session *session);
 	void (*ref)(struct l2tp_session *session);
 	void (*deref)(struct l2tp_session *session);
-
+#ifdef CONFIG_L2TP_DEBUGFS
+	void (*show)(struct seq_file *m, void *priv);
+#endif
 	uint8_t			priv[0];	/* private data */
 };
 
@@ -166,7 +168,9 @@ struct l2tp_tunnel {
 	struct net		*l2tp_net;	/* the net we belong to */
 
 	atomic_t		ref_count;
-
+#ifdef CONFIG_DEBUG_FS
+	void (*show)(struct seq_file *m, void *arg);
+#endif
 	int (*recv_payload_hook)(struct sk_buff *skb);
 	void (*old_sk_destruct)(struct sock *);
 	struct sock		*sock;		/* Parent socket */
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
new file mode 100644
index 000000000000..908f10f9720e
--- /dev/null
+++ b/net/l2tp/l2tp_debugfs.c
@@ -0,0 +1,341 @@
+/*
+ * L2TP subsystem debugfs
+ *
+ * Copyright (c) 2010 Katalix Systems Ltd
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/hash.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <linux/etherdevice.h>
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+
+#include "l2tp_core.h"
+
+static struct dentry *rootdir;
+static struct dentry *tunnels;
+
+struct l2tp_dfs_seq_data {
+	struct net *net;
+	int tunnel_idx;			/* current tunnel */
+	int session_idx;		/* index of session within current tunnel */
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;	/* NULL means get next tunnel */
+};
+
+static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd)
+{
+	pd->tunnel = l2tp_tunnel_find_nth(pd->net, pd->tunnel_idx);
+	pd->tunnel_idx++;
+}
+
+static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd)
+{
+	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+	pd->session_idx++;
+
+	if (pd->session == NULL) {
+		pd->session_idx = 0;
+		l2tp_dfs_next_tunnel(pd);
+	}
+
+}
+
+static void *l2tp_dfs_seq_start(struct seq_file *m, loff_t *offs)
+{
+	struct l2tp_dfs_seq_data *pd = SEQ_START_TOKEN;
+	loff_t pos = *offs;
+
+	if (!pos)
+		goto out;
+
+	BUG_ON(m->private == NULL);
+	pd = m->private;
+
+	if (pd->tunnel == NULL)
+		l2tp_dfs_next_tunnel(pd);
+	else
+		l2tp_dfs_next_session(pd);
+
+	/* NULL tunnel and session indicates end of list */
+	if ((pd->tunnel == NULL) && (pd->session == NULL))
+		pd = NULL;
+
+out:
+	return pd;
+}
+
+
+static void *l2tp_dfs_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return NULL;
+}
+
+static void l2tp_dfs_seq_stop(struct seq_file *p, void *v)
+{
+	/* nothing to do */
+}
+
+static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
+{
+	struct l2tp_tunnel *tunnel = v;
+	int session_count = 0;
+	int hash;
+	struct hlist_node *walk;
+	struct hlist_node *tmp;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+			struct l2tp_session *session;
+
+			session = hlist_entry(walk, struct l2tp_session, hlist);
+			if (session->session_id == 0)
+				continue;
+
+			session_count++;
+		}
+	}
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	seq_printf(m, "\nTUNNEL %u peer %u", tunnel->tunnel_id, tunnel->peer_tunnel_id);
+	if (tunnel->sock) {
+		struct inet_sock *inet = inet_sk(tunnel->sock);
+		seq_printf(m, " from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n",
+			   NIPQUAD(inet->inet_saddr), NIPQUAD(inet->inet_daddr));
+		if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
+			seq_printf(m, " source port %hu, dest port %hu\n",
+				   ntohs(inet->inet_sport), ntohs(inet->inet_dport));
+	}
+	seq_printf(m, " L2TPv%d, %s\n", tunnel->version,
+		   tunnel->encap == L2TP_ENCAPTYPE_UDP ? "UDP" :
+		   tunnel->encap == L2TP_ENCAPTYPE_IP ? "IP" :
+		   "");
+	seq_printf(m, " %d sessions, refcnt %d/%d\n", session_count,
+		   tunnel->sock ? atomic_read(&tunnel->sock->sk_refcnt) : 0,
+		   atomic_read(&tunnel->ref_count));
+
+	seq_printf(m, " %08x rx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+		   tunnel->debug,
+		   (unsigned long long)tunnel->stats.tx_packets,
+		   (unsigned long long)tunnel->stats.tx_bytes,
+		   (unsigned long long)tunnel->stats.tx_errors,
+		   (unsigned long long)tunnel->stats.rx_packets,
+		   (unsigned long long)tunnel->stats.rx_bytes,
+		   (unsigned long long)tunnel->stats.rx_errors);
+
+	if (tunnel->show != NULL)
+		tunnel->show(m, tunnel);
+}
+
+static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
+{
+	struct l2tp_session *session = v;
+
+	seq_printf(m, "  SESSION %u, peer %u, %s\n", session->session_id,
+		   session->peer_session_id,
+		   session->pwtype == L2TP_PWTYPE_ETH ? "ETH" :
+		   session->pwtype == L2TP_PWTYPE_PPP ? "PPP" :
+		   "");
+	if (session->send_seq || session->recv_seq)
+		seq_printf(m, "   nr %hu, ns %hu\n", session->nr, session->ns);
+	seq_printf(m, "   refcnt %d\n", atomic_read(&session->ref_count));
+	seq_printf(m, "   config %d/%d/%c/%c/%s/%s %08x %u\n",
+		   session->mtu, session->mru,
+		   session->recv_seq ? 'R' : '-',
+		   session->send_seq ? 'S' : '-',
+		   session->data_seq == 1 ? "IPSEQ" :
+		   session->data_seq == 2 ? "DATASEQ" : "-",
+		   session->lns_mode ? "LNS" : "LAC",
+		   session->debug,
+		   jiffies_to_msecs(session->reorder_timeout));
+	seq_printf(m, "   offset %hu l2specific %hu/%hu\n",
+		   session->offset, session->l2specific_type, session->l2specific_len);
+	if (session->cookie_len) {
+		seq_printf(m, "   cookie %02x%02x%02x%02x",
+			   session->cookie[0], session->cookie[1],
+			   session->cookie[2], session->cookie[3]);
+		if (session->cookie_len == 8)
+			seq_printf(m, "%02x%02x%02x%02x",
+				   session->cookie[4], session->cookie[5],
+				   session->cookie[6], session->cookie[7]);
+		seq_printf(m, "\n");
+	}
+	if (session->peer_cookie_len) {
+		seq_printf(m, "   peer cookie %02x%02x%02x%02x",
+			   session->peer_cookie[0], session->peer_cookie[1],
+			   session->peer_cookie[2], session->peer_cookie[3]);
+		if (session->peer_cookie_len == 8)
+			seq_printf(m, "%02x%02x%02x%02x",
+				   session->peer_cookie[4], session->peer_cookie[5],
+				   session->peer_cookie[6], session->peer_cookie[7]);
+		seq_printf(m, "\n");
+	}
+
+	seq_printf(m, "   %hu/%hu tx %llu/%llu/%llu rx %llu/%llu/%llu\n",
+		   session->nr, session->ns,
+		   (unsigned long long)session->stats.tx_packets,
+		   (unsigned long long)session->stats.tx_bytes,
+		   (unsigned long long)session->stats.tx_errors,
+		   (unsigned long long)session->stats.rx_packets,
+		   (unsigned long long)session->stats.rx_bytes,
+		   (unsigned long long)session->stats.rx_errors);
+
+	if (session->show != NULL)
+		session->show(m, session);
+}
+
+static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
+{
+	struct l2tp_dfs_seq_data *pd = v;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "TUNNEL ID, peer ID from IP to IP\n");
+		seq_puts(m, " L2TPv2/L2TPv3, UDP/IP\n");
+		seq_puts(m, " sessions session-count, refcnt refcnt/sk->refcnt\n");
+		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		seq_puts(m, "  SESSION ID, peer ID, PWTYPE\n");
+		seq_puts(m, "   refcnt cnt\n");
+		seq_puts(m, "   offset OFFSET l2specific TYPE/LEN\n");
+		seq_puts(m, "   [ cookie ]\n");
+		seq_puts(m, "   [ peer cookie ]\n");
+		seq_puts(m, "   config mtu/mru/rcvseq/sendseq/dataseq/lns debug reorderto\n");
+		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		goto out;
+	}
+
+	/* Show the tunnel or session context */
+	if (pd->session == NULL)
+		l2tp_dfs_seq_tunnel_show(m, pd->tunnel);
+	else
+		l2tp_dfs_seq_session_show(m, pd->session);
+
+out:
+	return 0;
+}
+
+static const struct seq_operations l2tp_dfs_seq_ops = {
+	.start		= l2tp_dfs_seq_start,
+	.next		= l2tp_dfs_seq_next,
+	.stop		= l2tp_dfs_seq_stop,
+	.show		= l2tp_dfs_seq_show,
+};
+
+static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
+{
+	struct l2tp_dfs_seq_data *pd;
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+
+	pd = kzalloc(GFP_KERNEL, sizeof(*pd));
+	if (pd == NULL)
+		goto out;
+
+	/* Derive the network namespace from the pid opening the
+	 * file.
+	 */
+	pd->net = get_net_ns_by_pid(current->pid);
+	if (IS_ERR(pd->net)) {
+		rc = -PTR_ERR(pd->net);
+		goto err_free_pd;
+	}
+
+	rc = seq_open(file, &l2tp_dfs_seq_ops);
+	if (rc)
+		goto err_free_net;
+
+	seq = file->private_data;
+	seq->private = pd;
+
+out:
+	return rc;
+
+err_free_net:
+	put_net(pd->net);
+err_free_pd:
+	kfree(pd);
+	goto out;
+}
+
+static int l2tp_dfs_seq_release(struct inode *inode, struct file *file)
+{
+	struct l2tp_dfs_seq_data *pd;
+	struct seq_file *seq;
+
+	seq = file->private_data;
+	pd = seq->private;
+	if (pd->net)
+		put_net(pd->net);
+	kfree(pd);
+	seq_release(inode, file);
+
+	return 0;
+}
+
+static const struct file_operations l2tp_dfs_fops = {
+	.owner		= THIS_MODULE,
+	.open		= l2tp_dfs_seq_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= l2tp_dfs_seq_release,
+};
+
+static int __init l2tp_debugfs_init(void)
+{
+	int rc = 0;
+
+	rootdir = debugfs_create_dir("l2tp", NULL);
+	if (IS_ERR(rootdir)) {
+		rc = PTR_ERR(rootdir);
+		rootdir = NULL;
+		goto out;
+	}
+
+	tunnels = debugfs_create_file("tunnels", 0600, rootdir, NULL, &l2tp_dfs_fops);
+	if (tunnels == NULL)
+		rc = -EIO;
+
+	printk(KERN_INFO "L2TP debugfs support\n");
+
+out:
+	if (rc)
+		printk(KERN_WARNING "l2tp debugfs: unable to init\n");
+
+	return rc;
+}
+
+static void __exit l2tp_debugfs_exit(void)
+{
+	debugfs_remove(tunnels);
+	debugfs_remove(rootdir);
+}
+
+module_init(l2tp_debugfs_init);
+module_exit(l2tp_debugfs_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP debugfs driver");
+MODULE_VERSION("1.0");
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 755c29729b6f..9848faa3d163 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -172,6 +172,17 @@ static void l2tp_eth_delete(struct l2tp_session *session)
 	}
 }
 
+#ifdef CONFIG_L2TP_DEBUGFS
+static void l2tp_eth_show(struct seq_file *m, void *arg)
+{
+	struct l2tp_session *session = arg;
+	struct l2tp_eth_sess *spriv = l2tp_session_priv(session);
+	struct net_device *dev = spriv->dev;
+
+	seq_printf(m, "   interface %s\n", dev->name);
+}
+#endif
+
 static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
 {
 	struct net_device *dev;
@@ -233,6 +244,9 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 	priv->tunnel_sock = tunnel->sock;
 	session->recv_skb = l2tp_eth_dev_recv;
 	session->session_close = l2tp_eth_delete;
+#ifdef CONFIG_L2TP_DEBUGFS
+	session->show = l2tp_eth_show;
+#endif
 
 	spriv = l2tp_session_priv(session);
 	spriv->dev = dev;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index d64f081f2b1c..1ef10e4118d2 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -597,6 +597,20 @@ out:
 	return error;
 }
 
+#ifdef CONFIG_L2TP_DEBUGFS
+static void pppol2tp_show(struct seq_file *m, void *arg)
+{
+	struct l2tp_session *session = arg;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps) {
+		struct pppox_sock *po = pppox_sk(ps->sock);
+		if (po)
+			seq_printf(m, "   interface %s\n", ppp_dev_name(&po->chan));
+	}
+}
+#endif
+
 /* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
  */
 static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
@@ -734,6 +748,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	session->recv_skb	= pppol2tp_recv;
 	session->session_close	= pppol2tp_session_close;
+#ifdef CONFIG_L2TP_DEBUGFS
+	session->show		= pppol2tp_show;
+#endif
 
 	/* We need to know each time a skb is dropped from the reorder
 	 * queue.
-- 
cgit v1.2.3-59-g8ed1b


From 789a4a2c61d843df67988d69e7c3f3a4bca97e8e Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:40 +0000
Subject: l2tp: Add support for static unmanaged L2TPv3 tunnels

This patch adds support for static (unmanaged) L2TPv3 tunnels, where
the tunnel socket is created by the kernel rather than being created
by userspace. This means L2TP tunnels and sessions can be created
manually, without needing an L2TP control protocol implemented in
userspace. This might be useful where the user wants a simple ethernet
over IP tunnel.

A patch to iproute2 adds a new command set under "ip l2tp" to make use
of this feature. This will be submitted separately.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c    | 115 +++++++++++++++++++++++++++++++++++++++++++-----
 net/l2tp/l2tp_core.h    |   7 +++
 net/l2tp/l2tp_netlink.c |  18 ++++++--
 3 files changed, 126 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 473cf2d63905..13ed85baf4e9 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1218,6 +1218,82 @@ void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
 
+/* Create a socket for the tunnel, if one isn't set up by
+ * userspace. This is used for static tunnels where there is no
+ * managing L2TP daemon.
+ */
+static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct socket **sockp)
+{
+	int err = -EINVAL;
+	struct sockaddr_in udp_addr;
+	struct sockaddr_l2tpip ip_addr;
+	struct socket *sock;
+
+	switch (cfg->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		err = sock_create(AF_INET, SOCK_DGRAM, 0, sockp);
+		if (err < 0)
+			goto out;
+
+		sock = *sockp;
+
+		memset(&udp_addr, 0, sizeof(udp_addr));
+		udp_addr.sin_family = AF_INET;
+		udp_addr.sin_addr = cfg->local_ip;
+		udp_addr.sin_port = htons(cfg->local_udp_port);
+		err = kernel_bind(sock, (struct sockaddr *) &udp_addr, sizeof(udp_addr));
+		if (err < 0)
+			goto out;
+
+		udp_addr.sin_family = AF_INET;
+		udp_addr.sin_addr = cfg->peer_ip;
+		udp_addr.sin_port = htons(cfg->peer_udp_port);
+		err = kernel_connect(sock, (struct sockaddr *) &udp_addr, sizeof(udp_addr), 0);
+		if (err < 0)
+			goto out;
+
+		if (!cfg->use_udp_checksums)
+			sock->sk->sk_no_check = UDP_CSUM_NOXMIT;
+
+		break;
+
+	case L2TP_ENCAPTYPE_IP:
+		err = sock_create(AF_INET, SOCK_DGRAM, IPPROTO_L2TP, sockp);
+		if (err < 0)
+			goto out;
+
+		sock = *sockp;
+
+		memset(&ip_addr, 0, sizeof(ip_addr));
+		ip_addr.l2tp_family = AF_INET;
+		ip_addr.l2tp_addr = cfg->local_ip;
+		ip_addr.l2tp_conn_id = tunnel_id;
+		err = kernel_bind(sock, (struct sockaddr *) &ip_addr, sizeof(ip_addr));
+		if (err < 0)
+			goto out;
+
+		ip_addr.l2tp_family = AF_INET;
+		ip_addr.l2tp_addr = cfg->peer_ip;
+		ip_addr.l2tp_conn_id = peer_tunnel_id;
+		err = kernel_connect(sock, (struct sockaddr *) &ip_addr, sizeof(ip_addr), 0);
+		if (err < 0)
+			goto out;
+
+		break;
+
+	default:
+		goto out;
+	}
+
+out:
+	if ((err < 0) && sock) {
+		sock_release(sock);
+		*sockp = NULL;
+	}
+
+	return err;
+}
+
 int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
 {
 	struct l2tp_tunnel *tunnel = NULL;
@@ -1228,14 +1304,21 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
 
 	/* Get the tunnel socket from the fd, which was opened by
-	 * the userspace L2TP daemon.
+	 * the userspace L2TP daemon. If not specified, create a
+	 * kernel socket.
 	 */
-	err = -EBADF;
-	sock = sockfd_lookup(fd, &err);
-	if (!sock) {
-		printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
-		       tunnel_id, fd, err);
-		goto err;
+	if (fd < 0) {
+		err = l2tp_tunnel_sock_create(tunnel_id, peer_tunnel_id, cfg, &sock);
+		if (err < 0)
+			goto err;
+	} else {
+		err = -EBADF;
+		sock = sockfd_lookup(fd, &err);
+		if (!sock) {
+			printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+			       tunnel_id, fd, err);
+			goto err;
+		}
 	}
 
 	sk = sock->sk;
@@ -1329,7 +1412,10 @@ err:
 	if (tunnelp)
 		*tunnelp = tunnel;
 
-	if (sock)
+	/* If tunnel's socket was created by the kernel, it doesn't
+	 *  have a file.
+	 */
+	if (sock && sock->file)
 		sockfd_put(sock);
 
 	return err;
@@ -1341,13 +1427,22 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
 int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
 {
 	int err = 0;
+	struct socket *sock = tunnel->sock ? tunnel->sock->sk_socket : NULL;
 
 	/* Force the tunnel socket to close. This will eventually
 	 * cause the tunnel to be deleted via the normal socket close
 	 * mechanisms when userspace closes the tunnel socket.
 	 */
-	if ((tunnel->sock != NULL) && (tunnel->sock->sk_socket != NULL))
-		err = inet_shutdown(tunnel->sock->sk_socket, 2);
+	if (sock != NULL) {
+		err = inet_shutdown(sock, 2);
+
+		/* If the tunnel's socket was created by the kernel,
+		 * close the socket here since the socket was not
+		 * created by userspace.
+		 */
+		if (sock->file == NULL)
+			err = inet_release(sock);
+	}
 
 	return err;
 }
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 571335530c6f..a961c77e0867 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -146,6 +146,13 @@ struct l2tp_tunnel_cfg {
 	int			debug;		/* bitmask of debug message
 						 * categories */
 	enum l2tp_encap_type	encap;
+
+	/* Used only for kernel-created sockets */
+	struct in_addr		local_ip;
+	struct in_addr		peer_ip;
+	u16			local_udp_port;
+	u16			peer_udp_port;
+	int			use_udp_checksums:1;
 };
 
 struct l2tp_tunnel {
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 3d0f7f6f7488..12341a6cc70e 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -129,11 +129,21 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 	}
 	cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
 
-	if (!info->attrs[L2TP_ATTR_FD]) {
-		ret = -EINVAL;
-		goto out;
+	fd = -1;
+	if (info->attrs[L2TP_ATTR_FD]) {
+		fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
+	} else {
+		if (info->attrs[L2TP_ATTR_IP_SADDR])
+			cfg.local_ip.s_addr = nla_get_be32(info->attrs[L2TP_ATTR_IP_SADDR]);
+		if (info->attrs[L2TP_ATTR_IP_DADDR])
+			cfg.peer_ip.s_addr = nla_get_be32(info->attrs[L2TP_ATTR_IP_DADDR]);
+		if (info->attrs[L2TP_ATTR_UDP_SPORT])
+			cfg.local_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_SPORT]);
+		if (info->attrs[L2TP_ATTR_UDP_DPORT])
+			cfg.peer_udp_port = nla_get_u16(info->attrs[L2TP_ATTR_UDP_DPORT]);
+		if (info->attrs[L2TP_ATTR_UDP_CSUM])
+			cfg.use_udp_checksums = nla_get_flag(info->attrs[L2TP_ATTR_UDP_CSUM]);
 	}
-	fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
 
 	if (info->attrs[L2TP_ATTR_DEBUG])
 		cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
-- 
cgit v1.2.3-59-g8ed1b


From f481c0d86227156fb1691b166a11c3f0058e1cb2 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 3 Apr 2010 14:58:07 -0700
Subject: l2tp: Add missing semicolon to MODULE_ALIAS() in l2tp_netlink.c

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_netlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 12341a6cc70e..4c1e540732d7 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -837,4 +837,4 @@ MODULE_DESCRIPTION("L2TP netlink");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1.0");
 MODULE_ALIAS("net-pf-" __stringify(PF_NETLINK) "-proto-" \
-	     __stringify(NETLINK_GENERIC) "-type-" "l2tp")
+	     __stringify(NETLINK_GENERIC) "-type-" "l2tp");
-- 
cgit v1.2.3-59-g8ed1b


From f66ef2d0649b220874532dfb83b6b5b368f83591 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 3 Apr 2010 15:01:37 -0700
Subject: l2tp: Fix L2TP_DEBUGFS ifdef tests.

We have to check CONFIG_L2TP_DEBUGFS_MODULE as well as
CONFIG_L2TP_DEBUGFS.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.h | 2 +-
 net/l2tp/l2tp_eth.c  | 4 ++--
 net/l2tp/l2tp_ppp.c  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index a961c77e0867..91b1b9ca5cfa 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -133,7 +133,7 @@ struct l2tp_session {
 	void (*session_close)(struct l2tp_session *session);
 	void (*ref)(struct l2tp_session *session);
 	void (*deref)(struct l2tp_session *session);
-#ifdef CONFIG_L2TP_DEBUGFS
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
 	void (*show)(struct seq_file *m, void *priv);
 #endif
 	uint8_t			priv[0];	/* private data */
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index 9848faa3d163..ca1164afeb74 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -172,7 +172,7 @@ static void l2tp_eth_delete(struct l2tp_session *session)
 	}
 }
 
-#ifdef CONFIG_L2TP_DEBUGFS
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
 static void l2tp_eth_show(struct seq_file *m, void *arg)
 {
 	struct l2tp_session *session = arg;
@@ -244,7 +244,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p
 	priv->tunnel_sock = tunnel->sock;
 	session->recv_skb = l2tp_eth_dev_recv;
 	session->session_close = l2tp_eth_delete;
-#ifdef CONFIG_L2TP_DEBUGFS
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
 	session->show = l2tp_eth_show;
 #endif
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 1ef10e4118d2..90d82b3f2889 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -597,7 +597,7 @@ out:
 	return error;
 }
 
-#ifdef CONFIG_L2TP_DEBUGFS
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
 static void pppol2tp_show(struct seq_file *m, void *arg)
 {
 	struct l2tp_session *session = arg;
@@ -748,7 +748,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	session->recv_skb	= pppol2tp_recv;
 	session->session_close	= pppol2tp_session_close;
-#ifdef CONFIG_L2TP_DEBUGFS
+#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE)
 	session->show		= pppol2tp_show;
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From 1f8438a853667d48055ad38384c63e94b32c6578 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 3 Apr 2010 15:09:04 -0700
Subject: icmp: Account for ICMP out errors

When ip_append() fails because of socket limit or memory shortage,
increment ICMP_MIB_OUTERRORS counter, so that "netstat -s" can report
these errors.

LANG=C netstat -s | grep "ICMP messages failed"
    0 ICMP messages failed

For IPV6, implement ICMP6_MIB_OUTERRORS counter as well.

# grep Icmp6OutErrors /proc/net/dev_snmp6/*
/proc/net/dev_snmp6/eth0:Icmp6OutErrors                   	0
/proc/net/dev_snmp6/lo:Icmp6OutErrors                   	0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h | 1 +
 net/ipv4/icmp.c      | 5 +++--
 net/ipv6/icmp.c      | 2 ++
 net/ipv6/proc.c      | 1 +
 4 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index d2a9aa3c6c88..52797714ade7 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -100,6 +100,7 @@ enum
 	ICMP6_MIB_INMSGS,			/* InMsgs */
 	ICMP6_MIB_INERRORS,			/* InErrors */
 	ICMP6_MIB_OUTMSGS,			/* OutMsgs */
+	ICMP6_MIB_OUTERRORS,			/* OutErrors */
 	__ICMP6_MIB_MAX
 };
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b4c2bcd15db..d2aa7438c523 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -330,9 +330,10 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 	if (ip_append_data(sk, icmp_glue_bits, icmp_param,
 			   icmp_param->data_len+icmp_param->head_len,
 			   icmp_param->head_len,
-			   ipc, rt, MSG_DONTWAIT) < 0)
+			   ipc, rt, MSG_DONTWAIT) < 0) {
+		ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
 		ip_flush_pending_frames(sk);
-	else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
+	} else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
 		struct icmphdr *icmph = icmp_hdr(skb);
 		__wsum csum = 0;
 		struct sk_buff *skb1;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index eb9abe24bdf0..a00c18aa6c8d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -482,6 +482,7 @@ route_done:
 			      np->tclass, NULL, &fl, (struct rt6_info*)dst,
 			      MSG_DONTWAIT);
 	if (err) {
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		ip6_flush_pending_frames(sk);
 		goto out_put;
 	}
@@ -562,6 +563,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 				(struct rt6_info*)dst, MSG_DONTWAIT);
 
 	if (err) {
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		ip6_flush_pending_frames(sk);
 		goto out_put;
 	}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 58344c0fbd13..458eabfbe130 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -97,6 +97,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
 	SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
 	SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
 	SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
+	SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
 	SNMP_MIB_SENTINEL
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 486f50ca796a2572c42c34dd4378cdc8eeb0b137 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Sat, 3 Apr 2010 15:10:21 -0700
Subject: SCTP: Change to use ipv6_addr_copy()

Change SCTP IPv6 code to use ipv6_addr_copy()

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/ipv6.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 216d88f27236..db1c767e509e 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -364,7 +364,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 		if (addr) {
 			addr->a.v6.sin6_family = AF_INET6;
 			addr->a.v6.sin6_port = 0;
-			addr->a.v6.sin6_addr = ifp->addr;
+			ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifp->addr);
 			addr->a.v6.sin6_scope_id = dev->ifindex;
 			addr->valid = 1;
 			INIT_LIST_HEAD(&addr->list);
@@ -405,7 +405,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
 {
 	addr->v6.sin6_family = AF_INET6;
 	addr->v6.sin6_port = 0;
-	addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
+	ipv6_addr_copy(&addr->v6.sin6_addr, &inet6_sk(sk)->rcv_saddr);
 }
 
 /* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -418,7 +418,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
 		inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
 			addr->v4.sin_addr.s_addr;
 	} else {
-		inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
+		ipv6_addr_copy(&inet6_sk(sk)->rcv_saddr, &addr->v6.sin6_addr);
 	}
 }
 
@@ -431,7 +431,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
 		inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
 		inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
 	} else {
-		inet6_sk(sk)->daddr = addr->v6.sin6_addr;
+		ipv6_addr_copy(&inet6_sk(sk)->daddr, &addr->v6.sin6_addr);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 7bddd0db6248d92adb1f547fd45507af4368d6fa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 4 Apr 2010 01:02:46 -0700
Subject: l2tp: unmanaged L2TPv3 tunnels fixes

Followup to commit 789a4a2c
(l2tp: Add support for static unmanaged L2TPv3 tunnels)

One missing init in l2tp_tunnel_sock_create() could access random kernel
memory, and a bit field should be unsigned.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 2 +-
 net/l2tp/l2tp_core.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 13ed85baf4e9..98dfcce1a5fc 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1227,7 +1227,7 @@ static int l2tp_tunnel_sock_create(u32 tunnel_id, u32 peer_tunnel_id, struct l2t
 	int err = -EINVAL;
 	struct sockaddr_in udp_addr;
 	struct sockaddr_l2tpip ip_addr;
-	struct socket *sock;
+	struct socket *sock = NULL;
 
 	switch (cfg->encap) {
 	case L2TP_ENCAPTYPE_UDP:
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 91b1b9ca5cfa..f0f318edd3f1 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -152,7 +152,7 @@ struct l2tp_tunnel_cfg {
 	struct in_addr		peer_ip;
 	u16			local_udp_port;
 	u16			peer_udp_port;
-	int			use_udp_checksums:1;
+	unsigned int		use_udp_checksums:1;
 };
 
 struct l2tp_tunnel {
-- 
cgit v1.2.3-59-g8ed1b


From 5a6d234e73d7d021c74e1aa349b3b37b81372c66 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Apr 2010 14:37:19 -0700
Subject: rps: fixed missed rps_unlock

Fix spin_unlock_irq which needs to be rps_unlock.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 2a9b7dd0bb6e..74f77ca03349 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3120,7 +3120,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		skb = __skb_dequeue(&queue->input_pkt_queue);
 		if (!skb) {
 			__napi_complete(napi);
-			spin_unlock_irq(&queue->input_pkt_queue.lock);
+			rps_unlock(queue);
 			break;
 		}
 		rps_unlock(queue);
-- 
cgit v1.2.3-59-g8ed1b


From e4008276fddd10445ff06707694a938cb7f35ed4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 5 Apr 2010 15:42:39 -0700
Subject: net: Add a missing local_irq_enable()

As noticed by Changli Gao, we must call local_irq_enable() after
rps_unlock()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 74f77ca03349..b98ddc62a55d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3121,6 +3121,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		if (!skb) {
 			__napi_complete(napi);
 			rps_unlock(queue);
+			local_irq_enable();
 			break;
 		}
 		rps_unlock(queue);
-- 
cgit v1.2.3-59-g8ed1b


From 2f787b0b76bf5de2eaa3ca3a29d89123ae03c856 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sun, 4 Apr 2010 17:59:30 +0000
Subject: mac80211: Ensure initializing private mc_list in prepare_multicast().

Fix kernel panic by NULL pointer dereference in the context of
ieee80211_ops->prepare_multicast().

This bug was introduced by commit 22bedad3c.. ("net: convert
multicast list to list_head").

Call __hw_addr_init() in ieee80211_alloc_hw() to initialize
list_head of private device multicast list, like we do in
bond_init().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Reviewed-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mac80211/main.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 84ad249a4e2e..0b82cd2f781e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -388,6 +388,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	local->uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN;
 
 	INIT_LIST_HEAD(&local->interfaces);
+
+	__hw_addr_init(&local->mc_list);
+
 	mutex_init(&local->iflist_mtx);
 	mutex_init(&local->scan_mtx);
 
-- 
cgit v1.2.3-59-g8ed1b


From d211e90e28a074447584729018a39910d691d1a8 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sun, 28 Mar 2010 22:29:52 -0700
Subject: mac80211: Fix robust management frame handling (MFP)

Commit e34e09401ee9888dd662b2fca5d607794a56daf2 incorrectly removed
use of ieee80211_has_protected() from the management frame case and in
practice, made this validation drop all Action frames when MFP is
enabled. This should have only been done for frames with Protected
field set to zero.

Signed-off-by: Jouni Malinen <j@w1.fi>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 14366d4afbed..b83d4db6ca6a 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1419,7 +1419,8 @@ ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
 		return 0;
 
 	if (rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP)) {
-		if (unlikely(ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
+		if (unlikely(!ieee80211_has_protected(fc) &&
+			     ieee80211_is_unicast_robust_mgmt_frame(rx->skb) &&
 			     rx->key))
 			return -EACCES;
 		/* BIP does not use Protected field, so need to check MMIE */
-- 
cgit v1.2.3-59-g8ed1b


From c6537d6742985da1fbf12ae26cde6a096fd35b5c Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Tue, 6 Apr 2010 11:40:52 +0000
Subject: TIPC: Updated topology subscription protocol according to latest spec

This patch makes it explicit in the API that all fields in subscriptions and events exchanged with the Topology Server must be in
network byte order.
It also ensures that all fields of a subscription are compared when cancelling a subscription, in order to avoid inadvertent
cancelling of the wrong subscription.
Finally, the tipc module version is updated to 2.0.0, to reflect the API change.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc.h | 30 ++++++++++++------------------
 net/tipc/core.c      |  2 +-
 net/tipc/subscr.c    | 15 ++++++++++-----
 3 files changed, 23 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/include/linux/tipc.h b/include/linux/tipc.h
index 3d92396639de..9536d8aeadf1 100644
--- a/include/linux/tipc.h
+++ b/include/linux/tipc.h
@@ -127,23 +127,17 @@ static inline unsigned int tipc_node(__u32 addr)
  * TIPC topology subscription service definitions
  */
 
-#define TIPC_SUB_PORTS     	0x01  	/* filter for port availability */
-#define TIPC_SUB_SERVICE     	0x02  	/* filter for service availability */
-#define TIPC_SUB_CANCEL         0x04    /* cancel a subscription */
-#if 0
-/* The following filter options are not currently implemented */
-#define TIPC_SUB_NO_BIND_EVTS	0x04	/* filter out "publish" events */
-#define TIPC_SUB_NO_UNBIND_EVTS	0x08	/* filter out "withdraw" events */
-#define TIPC_SUB_SINGLE_EVT	0x10	/* expire after first event */
-#endif
+#define TIPC_SUB_SERVICE     	0x00  	/* Filter for service availability    */
+#define TIPC_SUB_PORTS     	0x01  	/* Filter for port availability  */
+#define TIPC_SUB_CANCEL         0x04    /* Cancel a subscription         */
 
 #define TIPC_WAIT_FOREVER	~0	/* timeout for permanent subscription */
 
 struct tipc_subscr {
-	struct tipc_name_seq seq;	/* name sequence of interest */
-	__u32 timeout;			/* subscription duration (in ms) */
-        __u32 filter;   		/* bitmask of filter options */
-	char usr_handle[8];		/* available for subscriber use */
+	struct tipc_name_seq seq;	/* NBO. Name sequence of interest */
+	__u32 timeout;			/* NBO. Subscription duration (in ms) */
+        __u32 filter;   		/* NBO. Bitmask of filter options */
+	char usr_handle[8];		/* Opaque. Available for subscriber use */
 };
 
 #define TIPC_PUBLISHED		1	/* publication event */
@@ -151,11 +145,11 @@ struct tipc_subscr {
 #define TIPC_SUBSCR_TIMEOUT	3	/* subscription timeout event */
 
 struct tipc_event {
-	__u32 event;			/* event type */
-	__u32 found_lower;		/* matching name seq instances */
-	__u32 found_upper;		/*    "      "    "     "      */
-	struct tipc_portid port;	/* associated port */
-	struct tipc_subscr s;		/* associated subscription */
+	__u32 event;			/* NBO. Event type, as defined above */
+	__u32 found_lower;		/* NBO. Matching name seq instances  */
+	__u32 found_upper;		/*  "      "       "   "    "        */
+	struct tipc_portid port;	/* NBO. Associated port              */
+	struct tipc_subscr s;		/* Original, associated subscription */
 };
 
 /*
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 52c571fedbe0..4e84c8431f32 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -49,7 +49,7 @@
 #include "config.h"
 
 
-#define TIPC_MOD_VER "1.6.4"
+#define TIPC_MOD_VER "2.0.0"
 
 #ifndef CONFIG_TIPC_ZONES
 #define CONFIG_TIPC_ZONES 3
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ff123e56114a..ab6eab4c45e2 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -274,7 +274,7 @@ static void subscr_cancel(struct tipc_subscr *s,
 {
 	struct subscription *sub;
 	struct subscription *sub_temp;
-	__u32 type, lower, upper;
+	__u32 type, lower, upper, timeout, filter;
 	int found = 0;
 
 	/* Find first matching subscription, exit if not found */
@@ -282,12 +282,18 @@ static void subscr_cancel(struct tipc_subscr *s,
 	type = ntohl(s->seq.type);
 	lower = ntohl(s->seq.lower);
 	upper = ntohl(s->seq.upper);
+	timeout = ntohl(s->timeout);
+	filter = ntohl(s->filter) & ~TIPC_SUB_CANCEL;
 
 	list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
 				 subscription_list) {
 			if ((type == sub->seq.type) &&
 			    (lower == sub->seq.lower) &&
-			    (upper == sub->seq.upper)) {
+			    (upper == sub->seq.upper) &&
+			    (timeout == sub->timeout) &&
+                            (filter == sub->filter) &&
+                             !memcmp(s->usr_handle,sub->evt.s.usr_handle,
+				     sizeof(s->usr_handle)) ){
 				found = 1;
 				break;
 			}
@@ -304,7 +310,7 @@ static void subscr_cancel(struct tipc_subscr *s,
 		k_term_timer(&sub->timer);
 		spin_lock_bh(subscriber->lock);
 	}
-	dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n",
+	dbg("Cancel: removing sub %u,%u,%u from subscriber %p list\n",
 	    sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
 	subscr_del(sub);
 }
@@ -352,8 +358,7 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
 	sub->seq.upper = ntohl(s->seq.upper);
 	sub->timeout = ntohl(s->timeout);
 	sub->filter = ntohl(s->filter);
-	if ((!(sub->filter & TIPC_SUB_PORTS) ==
-	     !(sub->filter & TIPC_SUB_SERVICE)) ||
+	if ((sub->filter && (sub->filter != TIPC_SUB_PORTS)) ||
 	    (sub->seq.lower > sub->seq.upper)) {
 		warn("Subscription rejected, illegal request\n");
 		kfree(sub);
-- 
cgit v1.2.3-59-g8ed1b


From 842509b8591fd9a40f5532a5f049bd29804af6d6 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Tue, 6 Apr 2010 05:39:52 +0000
Subject: socket: remove duplicate declaration of struct timespec

struct timespec ts was alreay defined. Reuse the previously
defined one and reduce the memory footprint on the stack by
16 bytes.

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/socket.c b/net/socket.c
index 769c386bd428..ae904b58d9f5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -619,10 +619,9 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
 				 sizeof(tv), &tv);
 		} else {
-			struct timespec ts;
-			skb_get_timestampns(skb, &ts);
+			skb_get_timestampns(skb, &ts[0]);
 			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
-				 sizeof(ts), &ts);
+				 sizeof(ts[0]), &ts[0]);
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From fe1a5f031e76bd8761a7803d75b95ee96e84a574 Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:04 +0000
Subject: flow: virtualize flow cache entry methods

This allows to validate the cached object before returning it.
It also allows to destruct object properly, if the last reference
was held in flow cache. This is also a prepartion for caching
bundles in the flow cache.

In return for virtualizing the methods, we save on:
- not having to regenerate the whole flow cache on policy removal:
  each flow matching a killed policy gets refreshed as the getter
  function notices it smartly.
- we do not have to call flow_cache_flush from policy gc, since the
  flow cache now properly deletes the object if it had any references

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow.h     |  23 +++++++--
 include/net/xfrm.h     |   2 +
 net/core/flow.c        | 128 ++++++++++++++++++++++++++-----------------------
 net/xfrm/xfrm_policy.c | 112 ++++++++++++++++++++++++++++---------------
 4 files changed, 163 insertions(+), 102 deletions(-)

(limited to 'net')

diff --git a/include/net/flow.h b/include/net/flow.h
index 809970b7dfee..bb08692a20b0 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -86,11 +86,26 @@ struct flowi {
 
 struct net;
 struct sock;
-typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family,
-			      u8 dir, void **objp, atomic_t **obj_refp);
+struct flow_cache_ops;
+
+struct flow_cache_object {
+	const struct flow_cache_ops *ops;
+};
+
+struct flow_cache_ops {
+	struct flow_cache_object *(*get)(struct flow_cache_object *);
+	int (*check)(struct flow_cache_object *);
+	void (*delete)(struct flow_cache_object *);
+};
+
+typedef struct flow_cache_object *(*flow_resolve_t)(
+		struct net *net, struct flowi *key, u16 family,
+		u8 dir, struct flow_cache_object *oldobj, void *ctx);
+
+extern struct flow_cache_object *flow_cache_lookup(
+		struct net *net, struct flowi *key, u16 family,
+		u8 dir, flow_resolve_t resolver, void *ctx);
 
-extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family,
-			       u8 dir, flow_resolve_t resolver);
 extern void flow_cache_flush(void);
 extern atomic_t flow_cache_genid;
 
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d74e080ba6c9..35396e2dd1dc 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -19,6 +19,7 @@
 #include <net/route.h>
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
+#include <net/flow.h>
 
 #include <linux/interrupt.h>
 
@@ -481,6 +482,7 @@ struct xfrm_policy {
 	atomic_t		refcnt;
 	struct timer_list	timer;
 
+	struct flow_cache_object flo;
 	u32			priority;
 	u32			index;
 	struct xfrm_mark	mark;
diff --git a/net/core/flow.c b/net/core/flow.c
index 1d27ca6b421d..521df52a77d2 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,17 +26,16 @@
 #include <linux/security.h>
 
 struct flow_cache_entry {
-	struct flow_cache_entry	*next;
-	u16			family;
-	u8			dir;
-	u32			genid;
-	struct flowi		key;
-	void			*object;
-	atomic_t		*object_ref;
+	struct flow_cache_entry		*next;
+	u16				family;
+	u8				dir;
+	u32				genid;
+	struct flowi			key;
+	struct flow_cache_object	*object;
 };
 
 struct flow_cache_percpu {
-	struct flow_cache_entry **	hash_table;
+	struct flow_cache_entry		**hash_table;
 	int				hash_count;
 	u32				hash_rnd;
 	int				hash_rnd_recalc;
@@ -44,7 +43,7 @@ struct flow_cache_percpu {
 };
 
 struct flow_flush_info {
-	struct flow_cache *		cache;
+	struct flow_cache		*cache;
 	atomic_t			cpuleft;
 	struct completion		completion;
 };
@@ -52,7 +51,7 @@ struct flow_flush_info {
 struct flow_cache {
 	u32				hash_shift;
 	unsigned long			order;
-	struct flow_cache_percpu *	percpu;
+	struct flow_cache_percpu	*percpu;
 	struct notifier_block		hotcpu_notifier;
 	int				low_watermark;
 	int				high_watermark;
@@ -78,12 +77,21 @@ static void flow_cache_new_hashrnd(unsigned long arg)
 	add_timer(&fc->rnd_timer);
 }
 
+static int flow_entry_valid(struct flow_cache_entry *fle)
+{
+	if (atomic_read(&flow_cache_genid) != fle->genid)
+		return 0;
+	if (fle->object && !fle->object->ops->check(fle->object))
+		return 0;
+	return 1;
+}
+
 static void flow_entry_kill(struct flow_cache *fc,
 			    struct flow_cache_percpu *fcp,
 			    struct flow_cache_entry *fle)
 {
 	if (fle->object)
-		atomic_dec(fle->object_ref);
+		fle->object->ops->delete(fle->object);
 	kmem_cache_free(flow_cachep, fle);
 	fcp->hash_count--;
 }
@@ -96,16 +104,18 @@ static void __flow_cache_shrink(struct flow_cache *fc,
 	int i;
 
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		int k = 0;
+		int saved = 0;
 
 		flp = &fcp->hash_table[i];
-		while ((fle = *flp) != NULL && k < shrink_to) {
-			k++;
-			flp = &fle->next;
-		}
 		while ((fle = *flp) != NULL) {
-			*flp = fle->next;
-			flow_entry_kill(fc, fcp, fle);
+			if (saved < shrink_to &&
+			    flow_entry_valid(fle)) {
+				saved++;
+				flp = &fle->next;
+			} else {
+				*flp = fle->next;
+				flow_entry_kill(fc, fcp, fle);
+			}
 		}
 	}
 }
@@ -166,18 +176,21 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 	return 0;
 }
 
-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
-			flow_resolve_t resolver)
+struct flow_cache_object *
+flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+		  flow_resolve_t resolver, void *ctx)
 {
 	struct flow_cache *fc = &flow_cache_global;
 	struct flow_cache_percpu *fcp;
 	struct flow_cache_entry *fle, **head;
+	struct flow_cache_object *flo;
 	unsigned int hash;
 
 	local_bh_disable();
 	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
 
 	fle = NULL;
+	flo = NULL;
 	/* Packet really early in init?  Making flow_cache_init a
 	 * pre-smp initcall would solve this.  --RR */
 	if (!fcp->hash_table)
@@ -185,27 +198,17 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 
 	if (fcp->hash_rnd_recalc)
 		flow_new_hash_rnd(fc, fcp);
-	hash = flow_hash_code(fc, fcp, key);
 
+	hash = flow_hash_code(fc, fcp, key);
 	head = &fcp->hash_table[hash];
 	for (fle = *head; fle; fle = fle->next) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
-		    flow_key_compare(key, &fle->key) == 0) {
-			if (fle->genid == atomic_read(&flow_cache_genid)) {
-				void *ret = fle->object;
-
-				if (ret)
-					atomic_inc(fle->object_ref);
-				local_bh_enable();
-
-				return ret;
-			}
+		    flow_key_compare(key, &fle->key) == 0)
 			break;
-		}
 	}
 
-	if (!fle) {
+	if (unlikely(!fle)) {
 		if (fcp->hash_count > fc->high_watermark)
 			flow_cache_shrink(fc, fcp);
 
@@ -219,33 +222,39 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 			fle->object = NULL;
 			fcp->hash_count++;
 		}
+	} else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+		flo = fle->object;
+		if (!flo)
+			goto ret_object;
+		flo = flo->ops->get(flo);
+		if (flo)
+			goto ret_object;
+	} else if (fle->object) {
+	        flo = fle->object;
+	        flo->ops->delete(flo);
+	        fle->object = NULL;
 	}
 
 nocache:
-	{
-		int err;
-		void *obj;
-		atomic_t *obj_ref;
-
-		err = resolver(net, key, family, dir, &obj, &obj_ref);
-
-		if (fle && !err) {
-			fle->genid = atomic_read(&flow_cache_genid);
-
-			if (fle->object)
-				atomic_dec(fle->object_ref);
-
-			fle->object = obj;
-			fle->object_ref = obj_ref;
-			if (obj)
-				atomic_inc(fle->object_ref);
-		}
-		local_bh_enable();
-
-		if (err)
-			obj = ERR_PTR(err);
-		return obj;
+	flo = NULL;
+	if (fle) {
+		flo = fle->object;
+		fle->object = NULL;
+	}
+	flo = resolver(net, key, family, dir, flo, ctx);
+	if (fle) {
+		fle->genid = atomic_read(&flow_cache_genid);
+		if (!IS_ERR(flo))
+			fle->object = flo;
+		else
+			fle->genid--;
+	} else {
+		if (flo && !IS_ERR(flo))
+			flo->ops->delete(flo);
 	}
+ret_object:
+	local_bh_enable();
+	return flo;
 }
 
 static void flow_cache_flush_tasklet(unsigned long data)
@@ -261,13 +270,12 @@ static void flow_cache_flush_tasklet(unsigned long data)
 
 		fle = fcp->hash_table[i];
 		for (; fle; fle = fle->next) {
-			unsigned genid = atomic_read(&flow_cache_genid);
-
-			if (!fle->object || fle->genid == genid)
+			if (flow_entry_valid(fle))
 				continue;
 
+			if (fle->object)
+				fle->object->ops->delete(fle->object);
 			fle->object = NULL;
-			atomic_dec(fle->object_ref);
 		}
 	}
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 82789cf1c632..7722baeb140d 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -216,6 +216,35 @@ expired:
 	xfrm_pol_put(xp);
 }
 
+static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
+{
+	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
+
+	if (unlikely(pol->walk.dead))
+		flo = NULL;
+	else
+		xfrm_pol_hold(pol);
+
+	return flo;
+}
+
+static int xfrm_policy_flo_check(struct flow_cache_object *flo)
+{
+	struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
+
+	return !pol->walk.dead;
+}
+
+static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
+{
+	xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
+}
+
+static const struct flow_cache_ops xfrm_policy_fc_ops = {
+	.get = xfrm_policy_flo_get,
+	.check = xfrm_policy_flo_check,
+	.delete = xfrm_policy_flo_delete,
+};
 
 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
  * SPD calls.
@@ -236,6 +265,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
 		atomic_set(&policy->refcnt, 1);
 		setup_timer(&policy->timer, xfrm_policy_timer,
 				(unsigned long)policy);
+		policy->flo.ops = &xfrm_policy_fc_ops;
 	}
 	return policy;
 }
@@ -269,9 +299,6 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
 	if (del_timer(&policy->timer))
 		atomic_dec(&policy->refcnt);
 
-	if (atomic_read(&policy->refcnt) > 1)
-		flow_cache_flush();
-
 	xfrm_pol_put(policy);
 }
 
@@ -661,10 +688,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
 	}
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (ret && delete) {
-		atomic_inc(&flow_cache_genid);
+	if (ret && delete)
 		xfrm_policy_kill(ret);
-	}
 	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
@@ -703,10 +728,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
 	}
 	write_unlock_bh(&xfrm_policy_lock);
 
-	if (ret && delete) {
-		atomic_inc(&flow_cache_genid);
+	if (ret && delete)
 		xfrm_policy_kill(ret);
-	}
 	return ret;
 }
 EXPORT_SYMBOL(xfrm_policy_byid);
@@ -822,7 +845,6 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
 	}
 	if (!cnt)
 		err = -ESRCH;
-	atomic_inc(&flow_cache_genid);
 out:
 	write_unlock_bh(&xfrm_policy_lock);
 	return err;
@@ -976,32 +998,35 @@ fail:
 	return ret;
 }
 
-static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
-			      u8 dir, void **objp, atomic_t **obj_refp)
+static struct flow_cache_object *
+xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
 {
 	struct xfrm_policy *pol;
-	int err = 0;
+
+	if (old_obj)
+		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
 
 #ifdef CONFIG_XFRM_SUB_POLICY
 	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
-	if (IS_ERR(pol)) {
-		err = PTR_ERR(pol);
-		pol = NULL;
-	}
-	if (pol || err)
-		goto end;
+	if (IS_ERR(pol))
+		return ERR_CAST(pol);
+	if (pol)
+		goto found;
 #endif
 	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
-	if (IS_ERR(pol)) {
-		err = PTR_ERR(pol);
-		pol = NULL;
-	}
-#ifdef CONFIG_XFRM_SUB_POLICY
-end:
-#endif
-	if ((*objp = (void *) pol) != NULL)
-		*obj_refp = &pol->refcnt;
-	return err;
+	if (IS_ERR(pol))
+		return ERR_CAST(pol);
+	if (pol)
+		goto found;
+	return NULL;
+
+found:
+	/* Resolver returns two references:
+	 * one for cache and one for caller of flow_cache_lookup() */
+	xfrm_pol_hold(pol);
+
+	return &pol->flo;
 }
 
 static inline int policy_to_flow_dir(int dir)
@@ -1091,8 +1116,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
 	pol = __xfrm_policy_unlink(pol, dir);
 	write_unlock_bh(&xfrm_policy_lock);
 	if (pol) {
-		if (dir < XFRM_POLICY_MAX)
-			atomic_inc(&flow_cache_genid);
 		xfrm_policy_kill(pol);
 		return 0;
 	}
@@ -1578,18 +1601,24 @@ restart:
 	}
 
 	if (!policy) {
+		struct flow_cache_object *flo;
+
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) ||
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
-					   dir, xfrm_policy_lookup);
-		err = PTR_ERR(policy);
-		if (IS_ERR(policy)) {
+		flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
+					dir, xfrm_policy_lookup, NULL);
+		err = PTR_ERR(flo);
+		if (IS_ERR(flo)) {
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
 			goto dropdst;
 		}
+		if (flo)
+			policy = container_of(flo, struct xfrm_policy, flo);
+		else
+			policy = NULL;
 	}
 
 	if (!policy)
@@ -1939,9 +1968,16 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		}
 	}
 
-	if (!pol)
-		pol = flow_cache_lookup(net, &fl, family, fl_dir,
-					xfrm_policy_lookup);
+	if (!pol) {
+		struct flow_cache_object *flo;
+
+		flo = flow_cache_lookup(net, &fl, family, fl_dir,
+					xfrm_policy_lookup, NULL);
+		if (IS_ERR_OR_NULL(flo))
+			pol = ERR_CAST(flo);
+		else
+			pol = container_of(flo, struct xfrm_policy, flo);
+	}
 
 	if (IS_ERR(pol)) {
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
-- 
cgit v1.2.3-59-g8ed1b


From 80c802f3073e84c956846e921e8a0b02dfa3755f Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:05 +0000
Subject: xfrm: cache bundles instead of policies for outgoing flows

__xfrm_lookup() is called for each packet transmitted out of
system. The xfrm_find_bundle() does a linear search which can
kill system performance depending on how many bundles are
required per policy.

This modifies __xfrm_lookup() to store bundles directly in
the flow cache. If we did not get a hit, we just create a new
bundle instead of doing slow search. This means that we can now
get multiple xfrm_dst's for same flow (on per-cpu basis).

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      |  10 +-
 net/ipv4/xfrm4_policy.c |  22 --
 net/ipv6/xfrm6_policy.c |  31 ---
 net/xfrm/xfrm_policy.c  | 711 +++++++++++++++++++++++++-----------------------
 4 files changed, 383 insertions(+), 391 deletions(-)

(limited to 'net')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 35396e2dd1dc..625dd61ccbba 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -267,7 +267,6 @@ struct xfrm_policy_afinfo {
 					       xfrm_address_t *saddr,
 					       xfrm_address_t *daddr);
 	int			(*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr);
-	struct dst_entry	*(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
 	void			(*decode_session)(struct sk_buff *skb,
 						  struct flowi *fl,
 						  int reverse);
@@ -483,13 +482,13 @@ struct xfrm_policy {
 	struct timer_list	timer;
 
 	struct flow_cache_object flo;
+	atomic_t		genid;
 	u32			priority;
 	u32			index;
 	struct xfrm_mark	mark;
 	struct xfrm_selector	selector;
 	struct xfrm_lifetime_cfg lft;
 	struct xfrm_lifetime_cur curlft;
-	struct dst_entry       *bundles;
 	struct xfrm_policy_walk_entry walk;
 	u8			type;
 	u8			action;
@@ -879,11 +878,15 @@ struct xfrm_dst {
 		struct rt6_info		rt6;
 	} u;
 	struct dst_entry *route;
+	struct flow_cache_object flo;
+	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+	int num_pols, num_xfrms;
 #ifdef CONFIG_XFRM_SUB_POLICY
 	struct flowi *origin;
 	struct xfrm_selector *partner;
 #endif
-	u32 genid;
+	u32 xfrm_genid;
+	u32 policy_genid;
 	u32 route_mtu_cached;
 	u32 child_mtu_cached;
 	u32 route_cookie;
@@ -893,6 +896,7 @@ struct xfrm_dst {
 #ifdef CONFIG_XFRM
 static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
 {
+	xfrm_pols_put(xdst->pols, xdst->num_pols);
 	dst_release(xdst->route);
 	if (likely(xdst->u.dst.xfrm))
 		xfrm_state_put(xdst->u.dst.xfrm);
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index e4a1483fba77..1705476670ef 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -59,27 +59,6 @@ static int xfrm4_get_saddr(struct net *net,
 	return 0;
 }
 
-static struct dst_entry *
-__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
-{
-	struct dst_entry *dst;
-
-	read_lock_bh(&policy->lock);
-	for (dst = policy->bundles; dst; dst = dst->next) {
-		struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
-		if (xdst->u.rt.fl.oif == fl->oif &&	/*XXX*/
-		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
-		    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
-		    xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
-		    xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
-			dst_clone(dst);
-			break;
-		}
-	}
-	read_unlock_bh(&policy->lock);
-	return dst;
-}
-
 static int xfrm4_get_tos(struct flowi *fl)
 {
 	return fl->fl4_tos;
@@ -259,7 +238,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 	.dst_ops =		&xfrm4_dst_ops,
 	.dst_lookup =		xfrm4_dst_lookup,
 	.get_saddr =		xfrm4_get_saddr,
-	.find_bundle = 		__xfrm4_find_bundle,
 	.decode_session =	_decode_session4,
 	.get_tos =		xfrm4_get_tos,
 	.init_path =		xfrm4_init_path,
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ae181651c75a..8c452fd5ceae 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -67,36 +67,6 @@ static int xfrm6_get_saddr(struct net *net,
 	return 0;
 }
 
-static struct dst_entry *
-__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
-{
-	struct dst_entry *dst;
-
-	/* Still not clear if we should set fl->fl6_{src,dst}... */
-	read_lock_bh(&policy->lock);
-	for (dst = policy->bundles; dst; dst = dst->next) {
-		struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
-		struct in6_addr fl_dst_prefix, fl_src_prefix;
-
-		ipv6_addr_prefix(&fl_dst_prefix,
-				 &fl->fl6_dst,
-				 xdst->u.rt6.rt6i_dst.plen);
-		ipv6_addr_prefix(&fl_src_prefix,
-				 &fl->fl6_src,
-				 xdst->u.rt6.rt6i_src.plen);
-		if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
-		    ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
-		    xfrm_bundle_ok(policy, xdst, fl, AF_INET6,
-				   (xdst->u.rt6.rt6i_dst.plen != 128 ||
-				    xdst->u.rt6.rt6i_src.plen != 128))) {
-			dst_clone(dst);
-			break;
-		}
-	}
-	read_unlock_bh(&policy->lock);
-	return dst;
-}
-
 static int xfrm6_get_tos(struct flowi *fl)
 {
 	return 0;
@@ -291,7 +261,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
 	.dst_ops =		&xfrm6_dst_ops,
 	.dst_lookup =		xfrm6_dst_lookup,
 	.get_saddr = 		xfrm6_get_saddr,
-	.find_bundle =		__xfrm6_find_bundle,
 	.decode_session =	_decode_session6,
 	.get_tos =		xfrm6_get_tos,
 	.init_path =		xfrm6_init_path,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7722baeb140d..06ccc71c871f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -37,6 +37,8 @@
 DEFINE_MUTEX(xfrm_cfg_mutex);
 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
+static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
+static struct dst_entry *xfrm_policy_sk_bundles;
 static DEFINE_RWLOCK(xfrm_policy_lock);
 
 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
@@ -50,6 +52,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
 static void xfrm_init_pmtu(struct dst_entry *dst);
+static int stale_bundle(struct dst_entry *dst);
 
 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 						int dir);
@@ -277,8 +280,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
 {
 	BUG_ON(!policy->walk.dead);
 
-	BUG_ON(policy->bundles);
-
 	if (del_timer(&policy->timer))
 		BUG();
 
@@ -289,12 +290,7 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
 
 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
 {
-	struct dst_entry *dst;
-
-	while ((dst = policy->bundles) != NULL) {
-		policy->bundles = dst->next;
-		dst_free(dst);
-	}
+	atomic_inc(&policy->genid);
 
 	if (del_timer(&policy->timer))
 		atomic_dec(&policy->refcnt);
@@ -572,7 +568,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	struct xfrm_policy *delpol;
 	struct hlist_head *chain;
 	struct hlist_node *entry, *newpos;
-	struct dst_entry *gc_list;
 	u32 mark = policy->mark.v & policy->mark.m;
 
 	write_lock_bh(&xfrm_policy_lock);
@@ -622,34 +617,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 	else if (xfrm_bydst_should_resize(net, dir, NULL))
 		schedule_work(&net->xfrm.policy_hash_work);
 
-	read_lock_bh(&xfrm_policy_lock);
-	gc_list = NULL;
-	entry = &policy->bydst;
-	hlist_for_each_entry_continue(policy, entry, bydst) {
-		struct dst_entry *dst;
-
-		write_lock(&policy->lock);
-		dst = policy->bundles;
-		if (dst) {
-			struct dst_entry *tail = dst;
-			while (tail->next)
-				tail = tail->next;
-			tail->next = gc_list;
-			gc_list = dst;
-
-			policy->bundles = NULL;
-		}
-		write_unlock(&policy->lock);
-	}
-	read_unlock_bh(&xfrm_policy_lock);
-
-	while (gc_list) {
-		struct dst_entry *dst = gc_list;
-
-		gc_list = dst->next;
-		dst_free(dst);
-	}
-
 	return 0;
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
@@ -998,6 +965,19 @@ fail:
 	return ret;
 }
 
+static struct xfrm_policy *
+__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
+{
+#ifdef CONFIG_XFRM_SUB_POLICY
+	struct xfrm_policy *pol;
+
+	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
+	if (pol != NULL)
+		return pol;
+#endif
+	return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+}
+
 static struct flow_cache_object *
 xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
 		   u8 dir, struct flow_cache_object *old_obj, void *ctx)
@@ -1007,21 +987,10 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
 	if (old_obj)
 		xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
 
-#ifdef CONFIG_XFRM_SUB_POLICY
-	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
-	if (IS_ERR(pol))
+	pol = __xfrm_policy_lookup(net, fl, family, dir);
+	if (IS_ERR_OR_NULL(pol))
 		return ERR_CAST(pol);
-	if (pol)
-		goto found;
-#endif
-	pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
-	if (IS_ERR(pol))
-		return ERR_CAST(pol);
-	if (pol)
-		goto found;
-	return NULL;
 
-found:
 	/* Resolver returns two references:
 	 * one for cache and one for caller of flow_cache_lookup() */
 	xfrm_pol_hold(pol);
@@ -1313,18 +1282,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
  * still valid.
  */
 
-static struct dst_entry *
-xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
-{
-	struct dst_entry *x;
-	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
-	if (unlikely(afinfo == NULL))
-		return ERR_PTR(-EINVAL);
-	x = afinfo->find_bundle(fl, policy);
-	xfrm_policy_put_afinfo(afinfo);
-	return x;
-}
-
 static inline int xfrm_get_tos(struct flowi *fl, int family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1340,6 +1297,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
 	return tos;
 }
 
+static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
+{
+	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
+	struct dst_entry *dst = &xdst->u.dst;
+
+	if (xdst->route == NULL) {
+		/* Dummy bundle - if it has xfrms we were not
+		 * able to build bundle as template resolution failed.
+		 * It means we need to try again resolving. */
+		if (xdst->num_xfrms > 0)
+			return NULL;
+	} else {
+		/* Real bundle */
+		if (stale_bundle(dst))
+			return NULL;
+	}
+
+	dst_hold(dst);
+	return flo;
+}
+
+static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
+{
+	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
+	struct dst_entry *dst = &xdst->u.dst;
+
+	if (!xdst->route)
+		return 0;
+	if (stale_bundle(dst))
+		return 0;
+
+	return 1;
+}
+
+static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
+{
+	struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
+	struct dst_entry *dst = &xdst->u.dst;
+
+	dst_free(dst);
+}
+
+static const struct flow_cache_ops xfrm_bundle_fc_ops = {
+	.get = xfrm_bundle_flo_get,
+	.check = xfrm_bundle_flo_check,
+	.delete = xfrm_bundle_flo_delete,
+};
+
 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
@@ -1362,9 +1367,10 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 		BUG();
 	}
 	xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
-
 	xfrm_policy_put_afinfo(afinfo);
 
+	xdst->flo.ops = &xfrm_bundle_fc_ops;
+
 	return xdst;
 }
 
@@ -1402,6 +1408,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	return err;
 }
 
+
 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
  * all the metrics... Shortly, bundle a bundle.
  */
@@ -1465,7 +1472,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 			dst_hold(dst);
 
 		dst1->xfrm = xfrm[i];
-		xdst->genid = xfrm[i]->genid;
+		xdst->xfrm_genid = xfrm[i]->genid;
 
 		dst1->obsolete = -1;
 		dst1->flags |= DST_HOST;
@@ -1558,7 +1565,186 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
 #endif
 }
 
-static int stale_bundle(struct dst_entry *dst);
+static int xfrm_expand_policies(struct flowi *fl, u16 family,
+				struct xfrm_policy **pols,
+				int *num_pols, int *num_xfrms)
+{
+	int i;
+
+	if (*num_pols == 0 || !pols[0]) {
+		*num_pols = 0;
+		*num_xfrms = 0;
+		return 0;
+	}
+	if (IS_ERR(pols[0]))
+		return PTR_ERR(pols[0]);
+
+	*num_xfrms = pols[0]->xfrm_nr;
+
+#ifdef CONFIG_XFRM_SUB_POLICY
+	if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
+	    pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+		pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
+						    XFRM_POLICY_TYPE_MAIN,
+						    fl, family,
+						    XFRM_POLICY_OUT);
+		if (pols[1]) {
+			if (IS_ERR(pols[1])) {
+				xfrm_pols_put(pols, *num_pols);
+				return PTR_ERR(pols[1]);
+			}
+			(*num_pols) ++;
+			(*num_xfrms) += pols[1]->xfrm_nr;
+		}
+	}
+#endif
+	for (i = 0; i < *num_pols; i++) {
+		if (pols[i]->action != XFRM_POLICY_ALLOW) {
+			*num_xfrms = -1;
+			break;
+		}
+	}
+
+	return 0;
+
+}
+
+static struct xfrm_dst *
+xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
+			       struct flowi *fl, u16 family,
+			       struct dst_entry *dst_orig)
+{
+	struct net *net = xp_net(pols[0]);
+	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+	struct dst_entry *dst;
+	struct xfrm_dst *xdst;
+	int err;
+
+	/* Try to instantiate a bundle */
+	err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
+	if (err < 0) {
+		if (err != -EAGAIN)
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+		return ERR_PTR(err);
+	}
+
+	dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
+	if (IS_ERR(dst)) {
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
+		return ERR_CAST(dst);
+	}
+
+	xdst = (struct xfrm_dst *)dst;
+	xdst->num_xfrms = err;
+	if (num_pols > 1)
+		err = xfrm_dst_update_parent(dst, &pols[1]->selector);
+	else
+		err = xfrm_dst_update_origin(dst, fl);
+	if (unlikely(err)) {
+		dst_free(dst);
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+		return ERR_PTR(err);
+	}
+
+	xdst->num_pols = num_pols;
+	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
+	xdst->policy_genid = atomic_read(&pols[0]->genid);
+
+	return xdst;
+}
+
+static struct flow_cache_object *
+xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
+		   struct flow_cache_object *oldflo, void *ctx)
+{
+	struct dst_entry *dst_orig = (struct dst_entry *)ctx;
+	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+	struct xfrm_dst *xdst, *new_xdst;
+	int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
+
+	/* Check if the policies from old bundle are usable */
+	xdst = NULL;
+	if (oldflo) {
+		xdst = container_of(oldflo, struct xfrm_dst, flo);
+		num_pols = xdst->num_pols;
+		num_xfrms = xdst->num_xfrms;
+		pol_dead = 0;
+		for (i = 0; i < num_pols; i++) {
+			pols[i] = xdst->pols[i];
+			pol_dead |= pols[i]->walk.dead;
+		}
+		if (pol_dead) {
+			dst_free(&xdst->u.dst);
+			xdst = NULL;
+			num_pols = 0;
+			num_xfrms = 0;
+			oldflo = NULL;
+		}
+	}
+
+	/* Resolve policies to use if we couldn't get them from
+	 * previous cache entry */
+	if (xdst == NULL) {
+		num_pols = 1;
+		pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
+		err = xfrm_expand_policies(fl, family, pols,
+					   &num_pols, &num_xfrms);
+		if (err < 0)
+			goto inc_error;
+		if (num_pols == 0)
+			return NULL;
+		if (num_xfrms <= 0)
+			goto make_dummy_bundle;
+	}
+
+	new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
+	if (IS_ERR(new_xdst)) {
+		err = PTR_ERR(new_xdst);
+		if (err != -EAGAIN)
+			goto error;
+		if (oldflo == NULL)
+			goto make_dummy_bundle;
+		dst_hold(&xdst->u.dst);
+		return oldflo;
+	}
+
+	/* Kill the previous bundle */
+	if (xdst) {
+		/* The policies were stolen for newly generated bundle */
+		xdst->num_pols = 0;
+		dst_free(&xdst->u.dst);
+	}
+
+	/* Flow cache does not have reference, it dst_free()'s,
+	 * but we do need to return one reference for original caller */
+	dst_hold(&new_xdst->u.dst);
+	return &new_xdst->flo;
+
+make_dummy_bundle:
+	/* We found policies, but there's no bundles to instantiate:
+	 * either because the policy blocks, has no transformations or
+	 * we could not build template (no xfrm_states).*/
+	xdst = xfrm_alloc_dst(net, family);
+	if (IS_ERR(xdst)) {
+		xfrm_pols_put(pols, num_pols);
+		return ERR_CAST(xdst);
+	}
+	xdst->num_pols = num_pols;
+	xdst->num_xfrms = num_xfrms;
+	memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
+
+	dst_hold(&xdst->u.dst);
+	return &xdst->flo;
+
+inc_error:
+	XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+error:
+	if (xdst != NULL)
+		dst_free(&xdst->u.dst);
+	else
+		xfrm_pols_put(pols, num_pols);
+	return ERR_PTR(err);
+}
 
 /* Main function: finds/creates a bundle for given flow.
  *
@@ -1568,248 +1754,152 @@ static int stale_bundle(struct dst_entry *dst);
 int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
 		  struct sock *sk, int flags)
 {
-	struct xfrm_policy *policy;
 	struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
-	int npols;
-	int pol_dead;
-	int xfrm_nr;
-	int pi;
-	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
-	struct dst_entry *dst, *dst_orig = *dst_p;
-	int nx = 0;
-	int err;
-	u32 genid;
-	u16 family;
+	struct flow_cache_object *flo;
+	struct xfrm_dst *xdst;
+	struct dst_entry *dst, *dst_orig = *dst_p, *route;
+	u16 family = dst_orig->ops->family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+	int i, err, num_pols, num_xfrms, drop_pols = 0;
 
 restart:
-	genid = atomic_read(&flow_cache_genid);
-	policy = NULL;
-	for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
-		pols[pi] = NULL;
-	npols = 0;
-	pol_dead = 0;
-	xfrm_nr = 0;
+	dst = NULL;
+	xdst = NULL;
+	route = NULL;
 
 	if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
-		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
-		err = PTR_ERR(policy);
-		if (IS_ERR(policy)) {
-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+		num_pols = 1;
+		pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+		err = xfrm_expand_policies(fl, family, pols,
+					   &num_pols, &num_xfrms);
+		if (err < 0)
 			goto dropdst;
+
+		if (num_pols) {
+			if (num_xfrms <= 0) {
+				drop_pols = num_pols;
+				goto no_transform;
+			}
+
+			xdst = xfrm_resolve_and_create_bundle(
+					pols, num_pols, fl,
+					family, dst_orig);
+			if (IS_ERR(xdst)) {
+				xfrm_pols_put(pols, num_pols);
+				err = PTR_ERR(xdst);
+				goto dropdst;
+			}
+
+			spin_lock_bh(&xfrm_policy_sk_bundle_lock);
+			xdst->u.dst.next = xfrm_policy_sk_bundles;
+			xfrm_policy_sk_bundles = &xdst->u.dst;
+			spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
+
+			route = xdst->route;
 		}
 	}
 
-	if (!policy) {
-		struct flow_cache_object *flo;
-
+	if (xdst == NULL) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) ||
 		    !net->xfrm.policy_count[XFRM_POLICY_OUT])
 			goto nopol;
 
-		flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
-					dir, xfrm_policy_lookup, NULL);
-		err = PTR_ERR(flo);
+		flo = flow_cache_lookup(net, fl, family, dir,
+					xfrm_bundle_lookup, dst_orig);
+		if (flo == NULL)
+			goto nopol;
 		if (IS_ERR(flo)) {
-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+			err = PTR_ERR(flo);
 			goto dropdst;
 		}
-		if (flo)
-			policy = container_of(flo, struct xfrm_policy, flo);
-		else
-			policy = NULL;
+		xdst = container_of(flo, struct xfrm_dst, flo);
+
+		num_pols = xdst->num_pols;
+		num_xfrms = xdst->num_xfrms;
+		memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
+		route = xdst->route;
+	}
+
+	dst = &xdst->u.dst;
+	if (route == NULL && num_xfrms > 0) {
+		/* The only case when xfrm_bundle_lookup() returns a
+		 * bundle with null route, is when the template could
+		 * not be resolved. It means policies are there, but
+		 * bundle could not be created, since we don't yet
+		 * have the xfrm_state's. We need to wait for KM to
+		 * negotiate new SA's or bail out with error.*/
+		if (net->xfrm.sysctl_larval_drop) {
+			/* EREMOTE tells the caller to generate
+			 * a one-shot blackhole route. */
+			dst_release(dst);
+			xfrm_pols_put(pols, num_pols);
+			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
+			return -EREMOTE;
+		}
+		if (flags & XFRM_LOOKUP_WAIT) {
+			DECLARE_WAITQUEUE(wait, current);
+
+			add_wait_queue(&net->xfrm.km_waitq, &wait);
+			set_current_state(TASK_INTERRUPTIBLE);
+			schedule();
+			set_current_state(TASK_RUNNING);
+			remove_wait_queue(&net->xfrm.km_waitq, &wait);
+
+			if (!signal_pending(current)) {
+				dst_release(dst);
+				goto restart;
+			}
+
+			err = -ERESTART;
+		} else
+			err = -EAGAIN;
+
+		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
+		goto error;
 	}
 
-	if (!policy)
+no_transform:
+	if (num_pols == 0)
 		goto nopol;
 
-	family = dst_orig->ops->family;
-	pols[0] = policy;
-	npols ++;
-	xfrm_nr += pols[0]->xfrm_nr;
-
-	err = -ENOENT;
-	if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
+	if ((flags & XFRM_LOOKUP_ICMP) &&
+	    !(pols[0]->flags & XFRM_POLICY_ICMP)) {
+		err = -ENOENT;
 		goto error;
+	}
 
-	policy->curlft.use_time = get_seconds();
+	for (i = 0; i < num_pols; i++)
+		pols[i]->curlft.use_time = get_seconds();
 
-	switch (policy->action) {
-	default:
-	case XFRM_POLICY_BLOCK:
+	if (num_xfrms < 0) {
 		/* Prohibit the flow */
 		XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
 		err = -EPERM;
 		goto error;
-
-	case XFRM_POLICY_ALLOW:
-#ifndef CONFIG_XFRM_SUB_POLICY
-		if (policy->xfrm_nr == 0) {
-			/* Flow passes not transformed. */
-			xfrm_pol_put(policy);
-			return 0;
-		}
-#endif
-
-		/* Try to find matching bundle.
-		 *
-		 * LATER: help from flow cache. It is optional, this
-		 * is required only for output policy.
-		 */
-		dst = xfrm_find_bundle(fl, policy, family);
-		if (IS_ERR(dst)) {
-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-			err = PTR_ERR(dst);
-			goto error;
-		}
-
-		if (dst)
-			break;
-
-#ifdef CONFIG_XFRM_SUB_POLICY
-		if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
-			pols[1] = xfrm_policy_lookup_bytype(net,
-							    XFRM_POLICY_TYPE_MAIN,
-							    fl, family,
-							    XFRM_POLICY_OUT);
-			if (pols[1]) {
-				if (IS_ERR(pols[1])) {
-					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
-					err = PTR_ERR(pols[1]);
-					goto error;
-				}
-				if (pols[1]->action == XFRM_POLICY_BLOCK) {
-					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
-					err = -EPERM;
-					goto error;
-				}
-				npols ++;
-				xfrm_nr += pols[1]->xfrm_nr;
-			}
-		}
-
-		/*
-		 * Because neither flowi nor bundle information knows about
-		 * transformation template size. On more than one policy usage
-		 * we can realize whether all of them is bypass or not after
-		 * they are searched. See above not-transformed bypass
-		 * is surrounded by non-sub policy configuration, too.
-		 */
-		if (xfrm_nr == 0) {
-			/* Flow passes not transformed. */
-			xfrm_pols_put(pols, npols);
-			return 0;
-		}
-
-#endif
-		nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
-
-		if (unlikely(nx<0)) {
-			err = nx;
-			if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
-				/* EREMOTE tells the caller to generate
-				 * a one-shot blackhole route.
-				 */
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-				xfrm_pol_put(policy);
-				return -EREMOTE;
-			}
-			if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
-				DECLARE_WAITQUEUE(wait, current);
-
-				add_wait_queue(&net->xfrm.km_waitq, &wait);
-				set_current_state(TASK_INTERRUPTIBLE);
-				schedule();
-				set_current_state(TASK_RUNNING);
-				remove_wait_queue(&net->xfrm.km_waitq, &wait);
-
-				nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
-
-				if (nx == -EAGAIN && signal_pending(current)) {
-					XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-					err = -ERESTART;
-					goto error;
-				}
-				if (nx == -EAGAIN ||
-				    genid != atomic_read(&flow_cache_genid)) {
-					xfrm_pols_put(pols, npols);
-					goto restart;
-				}
-				err = nx;
-			}
-			if (err < 0) {
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
-				goto error;
-			}
-		}
-		if (nx == 0) {
-			/* Flow passes not transformed. */
-			xfrm_pols_put(pols, npols);
-			return 0;
-		}
-
-		dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
-		err = PTR_ERR(dst);
-		if (IS_ERR(dst)) {
-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
-			goto error;
-		}
-
-		for (pi = 0; pi < npols; pi++)
-			pol_dead |= pols[pi]->walk.dead;
-
-		write_lock_bh(&policy->lock);
-		if (unlikely(pol_dead || stale_bundle(dst))) {
-			/* Wow! While we worked on resolving, this
-			 * policy has gone. Retry. It is not paranoia,
-			 * we just cannot enlist new bundle to dead object.
-			 * We can't enlist stable bundles either.
-			 */
-			write_unlock_bh(&policy->lock);
-			dst_free(dst);
-
-			if (pol_dead)
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
-			else
-				XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-			err = -EHOSTUNREACH;
-			goto error;
-		}
-
-		if (npols > 1)
-			err = xfrm_dst_update_parent(dst, &pols[1]->selector);
-		else
-			err = xfrm_dst_update_origin(dst, fl);
-		if (unlikely(err)) {
-			write_unlock_bh(&policy->lock);
-			dst_free(dst);
-			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
-			goto error;
-		}
-
-		dst->next = policy->bundles;
-		policy->bundles = dst;
-		dst_hold(dst);
-		write_unlock_bh(&policy->lock);
+	} else if (num_xfrms > 0) {
+		/* Flow transformed */
+		*dst_p = dst;
+		dst_release(dst_orig);
+	} else {
+		/* Flow passes untransformed */
+		dst_release(dst);
 	}
-	*dst_p = dst;
-	dst_release(dst_orig);
-	xfrm_pols_put(pols, npols);
+ok:
+	xfrm_pols_put(pols, drop_pols);
 	return 0;
 
+nopol:
+	if (!(flags & XFRM_LOOKUP_ICMP))
+		goto ok;
+	err = -ENOENT;
 error:
-	xfrm_pols_put(pols, npols);
+	dst_release(dst);
 dropdst:
 	dst_release(dst_orig);
 	*dst_p = NULL;
+	xfrm_pols_put(pols, drop_pols);
 	return err;
-
-nopol:
-	err = -ENOENT;
-	if (flags & XFRM_LOOKUP_ICMP)
-		goto dropdst;
-	return 0;
 }
 EXPORT_SYMBOL(__xfrm_lookup);
 
@@ -2161,71 +2251,24 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
 	return dst;
 }
 
-static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
-{
-	struct dst_entry *dst, **dstp;
-
-	write_lock(&pol->lock);
-	dstp = &pol->bundles;
-	while ((dst=*dstp) != NULL) {
-		if (func(dst)) {
-			*dstp = dst->next;
-			dst->next = *gc_list_p;
-			*gc_list_p = dst;
-		} else {
-			dstp = &dst->next;
-		}
-	}
-	write_unlock(&pol->lock);
-}
-
-static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
+static void __xfrm_garbage_collect(struct net *net)
 {
-	struct dst_entry *gc_list = NULL;
-	int dir;
+	struct dst_entry *head, *next;
 
-	read_lock_bh(&xfrm_policy_lock);
-	for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
-		struct xfrm_policy *pol;
-		struct hlist_node *entry;
-		struct hlist_head *table;
-		int i;
+	flow_cache_flush();
 
-		hlist_for_each_entry(pol, entry,
-				     &net->xfrm.policy_inexact[dir], bydst)
-			prune_one_bundle(pol, func, &gc_list);
+	spin_lock_bh(&xfrm_policy_sk_bundle_lock);
+	head = xfrm_policy_sk_bundles;
+	xfrm_policy_sk_bundles = NULL;
+	spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
 
-		table = net->xfrm.policy_bydst[dir].table;
-		for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
-			hlist_for_each_entry(pol, entry, table + i, bydst)
-				prune_one_bundle(pol, func, &gc_list);
-		}
-	}
-	read_unlock_bh(&xfrm_policy_lock);
-
-	while (gc_list) {
-		struct dst_entry *dst = gc_list;
-		gc_list = dst->next;
-		dst_free(dst);
+	while (head) {
+		next = head->next;
+		dst_free(head);
+		head = next;
 	}
 }
 
-static int unused_bundle(struct dst_entry *dst)
-{
-	return !atomic_read(&dst->__refcnt);
-}
-
-static void __xfrm_garbage_collect(struct net *net)
-{
-	xfrm_prune_bundles(net, unused_bundle);
-}
-
-static int xfrm_flush_bundles(struct net *net)
-{
-	xfrm_prune_bundles(net, stale_bundle);
-	return 0;
-}
-
 static void xfrm_init_pmtu(struct dst_entry *dst)
 {
 	do {
@@ -2283,7 +2326,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
 			return 0;
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
-		if (xdst->genid != dst->xfrm->genid)
+		if (xdst->xfrm_genid != dst->xfrm->genid)
+			return 0;
+		if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
 			return 0;
 
 		if (strict && fl &&
@@ -2448,7 +2493,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
 
 	switch (event) {
 	case NETDEV_DOWN:
-		xfrm_flush_bundles(dev_net(dev));
+		__xfrm_garbage_collect(dev_net(dev));
 	}
 	return NOTIFY_DONE;
 }
@@ -2780,7 +2825,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
 			       struct xfrm_migrate *m, int num_migrate)
 {
 	struct xfrm_migrate *mp;
-	struct dst_entry *dst;
 	int i, j, n = 0;
 
 	write_lock_bh(&pol->lock);
@@ -2805,10 +2849,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
 			       sizeof(pol->xfrm_vec[i].saddr));
 			pol->xfrm_vec[i].encap_family = mp->new_family;
 			/* flush bundles */
-			while ((dst = pol->bundles) != NULL) {
-				pol->bundles = dst->next;
-				dst_free(dst);
-			}
+			atomic_inc(&pol->genid);
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 285ead175c5dd5075cab5b6c94f35a3e6c0a3ae6 Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:06 +0000
Subject: xfrm: remove policy garbage collection

Policies are now properly reference counted and destroyed from
all code paths. The delayed gc is just an overhead now and can
be removed.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 39 +++++----------------------------------
 1 file changed, 5 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 06ccc71c871f..7430ac26ec49 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -46,9 +46,6 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
 
 static struct kmem_cache *xfrm_dst_cache __read_mostly;
 
-static HLIST_HEAD(xfrm_policy_gc_list);
-static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
-
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
 static void xfrm_init_pmtu(struct dst_entry *dst);
@@ -288,32 +285,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
 }
 EXPORT_SYMBOL(xfrm_policy_destroy);
 
-static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
-{
-	atomic_inc(&policy->genid);
-
-	if (del_timer(&policy->timer))
-		atomic_dec(&policy->refcnt);
-
-	xfrm_pol_put(policy);
-}
-
-static void xfrm_policy_gc_task(struct work_struct *work)
-{
-	struct xfrm_policy *policy;
-	struct hlist_node *entry, *tmp;
-	struct hlist_head gc_list;
-
-	spin_lock_bh(&xfrm_policy_gc_lock);
-	gc_list.first = xfrm_policy_gc_list.first;
-	INIT_HLIST_HEAD(&xfrm_policy_gc_list);
-	spin_unlock_bh(&xfrm_policy_gc_lock);
-
-	hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
-		xfrm_policy_gc_kill(policy);
-}
-static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
-
 /* Rule must be locked. Release descentant resources, announce
  * entry dead. The rule must be unlinked from lists to the moment.
  */
@@ -322,11 +293,12 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
 {
 	policy->walk.dead = 1;
 
-	spin_lock_bh(&xfrm_policy_gc_lock);
-	hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
-	spin_unlock_bh(&xfrm_policy_gc_lock);
+	atomic_inc(&policy->genid);
 
-	schedule_work(&xfrm_policy_gc_work);
+	if (del_timer(&policy->timer))
+		xfrm_pol_put(policy);
+
+	xfrm_pol_put(policy);
 }
 
 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
@@ -2599,7 +2571,6 @@ static void xfrm_policy_fini(struct net *net)
 	audit_info.sessionid = -1;
 	audit_info.secid = 0;
 	xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
-	flush_work(&xfrm_policy_gc_work);
 
 	WARN_ON(!list_empty(&net->xfrm.policy_all));
 
-- 
cgit v1.2.3-59-g8ed1b


From 8e4795605d1e1b39113818ad7c147b8a867a1f6a Mon Sep 17 00:00:00 2001
From: Timo Teräs <timo.teras@iki.fi>
Date: Wed, 7 Apr 2010 00:30:07 +0000
Subject: flow: delayed deletion of flow cache entries

Speed up lookups by freeing flow cache entries later. After
virtualizing flow cache entry operations, the flow cache may now
end up calling policy or bundle destructor which can be slowish.

As gc_list is more effective with double linked list, the flow cache
is converted to use common hlist and list macroes where appropriate.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/flow.c | 100 ++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 69 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/core/flow.c b/net/core/flow.c
index 521df52a77d2..161900674009 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -26,7 +26,10 @@
 #include <linux/security.h>
 
 struct flow_cache_entry {
-	struct flow_cache_entry		*next;
+	union {
+		struct hlist_node	hlist;
+		struct list_head	gc_list;
+	} u;
 	u16				family;
 	u8				dir;
 	u32				genid;
@@ -35,7 +38,7 @@ struct flow_cache_entry {
 };
 
 struct flow_cache_percpu {
-	struct flow_cache_entry		**hash_table;
+	struct hlist_head		*hash_table;
 	int				hash_count;
 	u32				hash_rnd;
 	int				hash_rnd_recalc;
@@ -62,6 +65,9 @@ atomic_t flow_cache_genid = ATOMIC_INIT(0);
 static struct flow_cache flow_cache_global;
 static struct kmem_cache *flow_cachep;
 
+static DEFINE_SPINLOCK(flow_cache_gc_lock);
+static LIST_HEAD(flow_cache_gc_list);
+
 #define flow_cache_hash_size(cache)	(1 << (cache)->hash_shift)
 #define FLOW_HASH_RND_PERIOD		(10 * 60 * HZ)
 
@@ -86,38 +92,66 @@ static int flow_entry_valid(struct flow_cache_entry *fle)
 	return 1;
 }
 
-static void flow_entry_kill(struct flow_cache *fc,
-			    struct flow_cache_percpu *fcp,
-			    struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle)
 {
 	if (fle->object)
 		fle->object->ops->delete(fle->object);
 	kmem_cache_free(flow_cachep, fle);
-	fcp->hash_count--;
+}
+
+static void flow_cache_gc_task(struct work_struct *work)
+{
+	struct list_head gc_list;
+	struct flow_cache_entry *fce, *n;
+
+	INIT_LIST_HEAD(&gc_list);
+	spin_lock_bh(&flow_cache_gc_lock);
+	list_splice_tail_init(&flow_cache_gc_list, &gc_list);
+	spin_unlock_bh(&flow_cache_gc_lock);
+
+	list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
+		flow_entry_kill(fce);
+}
+static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
+
+static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
+				     int deleted, struct list_head *gc_list)
+{
+	if (deleted) {
+		fcp->hash_count -= deleted;
+		spin_lock_bh(&flow_cache_gc_lock);
+		list_splice_tail(gc_list, &flow_cache_gc_list);
+		spin_unlock_bh(&flow_cache_gc_lock);
+		schedule_work(&flow_cache_gc_work);
+	}
 }
 
 static void __flow_cache_shrink(struct flow_cache *fc,
 				struct flow_cache_percpu *fcp,
 				int shrink_to)
 {
-	struct flow_cache_entry *fle, **flp;
-	int i;
+	struct flow_cache_entry *fle;
+	struct hlist_node *entry, *tmp;
+	LIST_HEAD(gc_list);
+	int i, deleted = 0;
 
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
 		int saved = 0;
 
-		flp = &fcp->hash_table[i];
-		while ((fle = *flp) != NULL) {
+		hlist_for_each_entry_safe(fle, entry, tmp,
+					  &fcp->hash_table[i], u.hlist) {
 			if (saved < shrink_to &&
 			    flow_entry_valid(fle)) {
 				saved++;
-				flp = &fle->next;
 			} else {
-				*flp = fle->next;
-				flow_entry_kill(fc, fcp, fle);
+				deleted++;
+				hlist_del(&fle->u.hlist);
+				list_add_tail(&fle->u.gc_list, &gc_list);
 			}
 		}
 	}
+
+	flow_cache_queue_garbage(fcp, deleted, &gc_list);
 }
 
 static void flow_cache_shrink(struct flow_cache *fc,
@@ -182,7 +216,8 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 {
 	struct flow_cache *fc = &flow_cache_global;
 	struct flow_cache_percpu *fcp;
-	struct flow_cache_entry *fle, **head;
+	struct flow_cache_entry *fle, *tfle;
+	struct hlist_node *entry;
 	struct flow_cache_object *flo;
 	unsigned int hash;
 
@@ -200,12 +235,13 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 		flow_new_hash_rnd(fc, fcp);
 
 	hash = flow_hash_code(fc, fcp, key);
-	head = &fcp->hash_table[hash];
-	for (fle = *head; fle; fle = fle->next) {
-		if (fle->family == family &&
-		    fle->dir == dir &&
-		    flow_key_compare(key, &fle->key) == 0)
+	hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
+		if (tfle->family == family &&
+		    tfle->dir == dir &&
+		    flow_key_compare(key, &tfle->key) == 0) {
+			fle = tfle;
 			break;
+		}
 	}
 
 	if (unlikely(!fle)) {
@@ -214,12 +250,11 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
 
 		fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
 		if (fle) {
-			fle->next = *head;
-			*head = fle;
 			fle->family = family;
 			fle->dir = dir;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
+			hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
 			fcp->hash_count++;
 		}
 	} else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
@@ -262,23 +297,26 @@ static void flow_cache_flush_tasklet(unsigned long data)
 	struct flow_flush_info *info = (void *)data;
 	struct flow_cache *fc = info->cache;
 	struct flow_cache_percpu *fcp;
-	int i;
+	struct flow_cache_entry *fle;
+	struct hlist_node *entry, *tmp;
+	LIST_HEAD(gc_list);
+	int i, deleted = 0;
 
 	fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
 	for (i = 0; i < flow_cache_hash_size(fc); i++) {
-		struct flow_cache_entry *fle;
-
-		fle = fcp->hash_table[i];
-		for (; fle; fle = fle->next) {
+		hlist_for_each_entry_safe(fle, entry, tmp,
+					  &fcp->hash_table[i], u.hlist) {
 			if (flow_entry_valid(fle))
 				continue;
 
-			if (fle->object)
-				fle->object->ops->delete(fle->object);
-			fle->object = NULL;
+			deleted++;
+			hlist_del(&fle->u.hlist);
+			list_add_tail(&fle->u.gc_list, &gc_list);
 		}
 	}
 
+	flow_cache_queue_garbage(fcp, deleted, &gc_list);
+
 	if (atomic_dec_and_test(&info->cpuleft))
 		complete(&info->completion);
 }
@@ -320,7 +358,7 @@ void flow_cache_flush(void)
 static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
 					  struct flow_cache_percpu *fcp)
 {
-	fcp->hash_table = (struct flow_cache_entry **)
+	fcp->hash_table = (struct hlist_head *)
 		__get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
 	if (!fcp->hash_table)
 		panic("NET: failed to allocate flow cache order %lu\n", fc->order);
@@ -354,7 +392,7 @@ static int flow_cache_init(struct flow_cache *fc)
 
 	for (order = 0;
 	     (PAGE_SIZE << order) <
-		     (sizeof(struct flow_cache_entry *)*flow_cache_hash_size(fc));
+		     (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
 	     order++)
 		/* NOTHING */;
 	fc->order = order;
-- 
cgit v1.2.3-59-g8ed1b


From d5cdfacb35ed886271d1ccfffbded98d3447da17 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sun, 4 Apr 2010 09:37:19 +0300
Subject: cfg80211: Add local-state-change-only auth/deauth/disassoc

cfg80211 is quite strict on allowing authentication and association
commands only in certain states. In order to meet these requirements,
user space applications may need to clear authentication or
association state in some cases. Currently, this can be done with
deauth/disassoc command, but that ends up sending out Deauthentication
or Disassociation frame unnecessarily. Add a new nl80211 attribute to
allow this sending of the frame be skipped, but with all other
deauth/disassoc operations being completed.

Similar state change is also needed for IEEE 802.11r FT protocol in
the FT-over-DS case which does not use Authentication frame exchange
in a transition to another BSS. For this to work with cfg80211, an
authentication entry needs to be created for the target BSS without
sending out an Authentication frame. The nl80211 authentication
command can be used for this purpose, too, with the new attribute to
indicate that the command is only for changing local state. This
enables wpa_supplicant to complete FT-over-DS transition successfully.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  8 ++++++++
 include/net/cfg80211.h  | 11 +++++++++++
 net/mac80211/mlme.c     | 23 +++++++++++++++--------
 net/wireless/core.h     | 15 ++++++++++-----
 net/wireless/mlme.c     | 39 ++++++++++++++++++++++++++++-----------
 net/wireless/nl80211.c  | 19 ++++++++++++++++---
 net/wireless/sme.c      | 15 +++++++++------
 7 files changed, 97 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index daf6a3432b92..2ea3edeee7aa 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -703,6 +703,12 @@ enum nl80211_commands {
  * @NL80211_ATTR_CQM: connection quality monitor configuration in a
  *	nested attribute with %NL80211_ATTR_CQM_* sub-attributes.
  *
+ * @NL80211_ATTR_LOCAL_STATE_CHANGE: Flag attribute to indicate that a command
+ *	is requesting a local authentication/association state change without
+ *	invoking actual management frame exchange. This can be used with
+ *	NL80211_CMD_AUTHENTICATE, NL80211_CMD_DEAUTHENTICATE,
+ *	NL80211_CMD_DISASSOCIATE.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -856,6 +862,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_CQM,
 
+	NL80211_ATTR_LOCAL_STATE_CHANGE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 868cfd3b9724..37cebd3aa0f7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -704,6 +704,10 @@ struct cfg80211_crypto_settings {
  * @key_len: length of WEP key for shared key authentication
  * @key_idx: index of WEP key for shared key authentication
  * @key: WEP key for shared key authentication
+ * @local_state_change: This is a request for a local state only, i.e., no
+ *	Authentication frame is to be transmitted and authentication state is
+ *	to be changed without having to wait for a response from the peer STA
+ *	(AP).
  */
 struct cfg80211_auth_request {
 	struct cfg80211_bss *bss;
@@ -712,6 +716,7 @@ struct cfg80211_auth_request {
 	enum nl80211_auth_type auth_type;
 	const u8 *key;
 	u8 key_len, key_idx;
+	bool local_state_change;
 };
 
 /**
@@ -744,12 +749,15 @@ struct cfg80211_assoc_request {
  * @ie: Extra IEs to add to Deauthentication frame or %NULL
  * @ie_len: Length of ie buffer in octets
  * @reason_code: The reason code for the deauthentication
+ * @local_state_change: This is a request for a local state only, i.e., no
+ *	Deauthentication frame is to be transmitted.
  */
 struct cfg80211_deauth_request {
 	struct cfg80211_bss *bss;
 	const u8 *ie;
 	size_t ie_len;
 	u16 reason_code;
+	bool local_state_change;
 };
 
 /**
@@ -762,12 +770,15 @@ struct cfg80211_deauth_request {
  * @ie: Extra IEs to add to Disassociation frame or %NULL
  * @ie_len: Length of ie buffer in octets
  * @reason_code: The reason code for the disassociation
+ * @local_state_change: This is a request for a local state only, i.e., no
+ *	Disassociation frame is to be transmitted.
  */
 struct cfg80211_disassoc_request {
 	struct cfg80211_bss *bss;
 	const u8 *ie;
 	size_t ie_len;
 	u16 reason_code;
+	bool local_state_change;
 };
 
 /**
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 57a3c62139e2..4c189d0be4a3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -210,7 +210,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 
 static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 					   const u8 *bssid, u16 stype, u16 reason,
-					   void *cookie)
+					   void *cookie, bool send_frame)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -247,7 +247,11 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 			cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len);
 	if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED))
 		IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
-	ieee80211_tx_skb(sdata, skb);
+
+	if (send_frame)
+		ieee80211_tx_skb(sdata, skb);
+	else
+		kfree_skb(skb);
 }
 
 void ieee80211_send_pspoll(struct ieee80211_local *local,
@@ -980,7 +984,7 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
 	ieee80211_send_deauth_disassoc(sdata, bssid,
 				       IEEE80211_STYPE_DEAUTH,
 				       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
-				       NULL);
+				       NULL, true);
 }
 
 void ieee80211_beacon_connection_loss_work(struct work_struct *work)
@@ -1724,7 +1728,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 			ieee80211_send_deauth_disassoc(sdata, bssid,
 					IEEE80211_STYPE_DEAUTH,
 					WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
-					NULL);
+					NULL, true);
 			mutex_lock(&ifmgd->mtx);
 		}
 	}
@@ -1908,6 +1912,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_work *wk;
 	u16 auth_alg;
 
+	if (req->local_state_change)
+		return 0; /* no need to update mac80211 state */
+
 	switch (req->auth_type) {
 	case NL80211_AUTHTYPE_OPEN_SYSTEM:
 		auth_alg = WLAN_AUTH_OPEN;
@@ -2163,9 +2170,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: deauthenticating from %pM by local choice (reason=%d)\n",
 	       sdata->name, bssid, req->reason_code);
 
-	ieee80211_send_deauth_disassoc(sdata, bssid,
-			IEEE80211_STYPE_DEAUTH, req->reason_code,
-			cookie);
+	ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH,
+				       req->reason_code, cookie,
+				       !req->local_state_change);
 
 	ieee80211_recalc_idle(sdata->local);
 
@@ -2202,7 +2209,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_send_deauth_disassoc(sdata, req->bss->bssid,
 			IEEE80211_STYPE_DISASSOC, req->reason_code,
-			cookie);
+			cookie, !req->local_state_change);
 	sta_info_destroy_addr(sdata, bssid);
 
 	ieee80211_recalc_idle(sdata->local);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d52da913145a..b2234b436ead 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -293,13 +293,15 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 			 const u8 *bssid,
 			 const u8 *ssid, int ssid_len,
 			 const u8 *ie, int ie_len,
-			 const u8 *key, int key_len, int key_idx);
+			 const u8 *key, int key_len, int key_idx,
+			 bool local_state_change);
 int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev, struct ieee80211_channel *chan,
 		       enum nl80211_auth_type auth_type, const u8 *bssid,
 		       const u8 *ssid, int ssid_len,
 		       const u8 *ie, int ie_len,
-		       const u8 *key, int key_len, int key_idx);
+		       const u8 *key, int key_len, int key_idx,
+		       bool local_state_change);
 int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
 			  struct ieee80211_channel *chan,
@@ -315,13 +317,16 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			struct cfg80211_crypto_settings *crypt);
 int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason);
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change);
 int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev, const u8 *bssid,
-			 const u8 *ie, int ie_len, u16 reason);
+			 const u8 *ie, int ie_len, u16 reason,
+			 bool local_state_change);
 int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason);
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change);
 void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 			struct net_device *dev);
 void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 0855f0d32349..387dd2a27d2f 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -377,7 +377,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 			 const u8 *bssid,
 			 const u8 *ssid, int ssid_len,
 			 const u8 *ie, int ie_len,
-			 const u8 *key, int key_len, int key_idx)
+			 const u8 *key, int key_len, int key_idx,
+			 bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_auth_request req;
@@ -407,6 +408,7 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 
 	memset(&req, 0, sizeof(req));
 
+	req.local_state_change = local_state_change;
 	req.ie = ie;
 	req.ie_len = ie_len;
 	req.auth_type = auth_type;
@@ -433,12 +435,18 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		goto out;
 	}
 
-	wdev->authtry_bsses[slot] = bss;
+	if (local_state_change)
+		wdev->auth_bsses[slot] = bss;
+	else
+		wdev->authtry_bsses[slot] = bss;
 	cfg80211_hold_bss(bss);
 
 	err = rdev->ops->auth(&rdev->wiphy, dev, &req);
 	if (err) {
-		wdev->authtry_bsses[slot] = NULL;
+		if (local_state_change)
+			wdev->auth_bsses[slot] = NULL;
+		else
+			wdev->authtry_bsses[slot] = NULL;
 		cfg80211_unhold_bss(bss);
 	}
 
@@ -453,14 +461,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		       enum nl80211_auth_type auth_type, const u8 *bssid,
 		       const u8 *ssid, int ssid_len,
 		       const u8 *ie, int ie_len,
-		       const u8 *key, int key_len, int key_idx)
+		       const u8 *key, int key_len, int key_idx,
+		       bool local_state_change)
 {
 	int err;
 
 	wdev_lock(dev->ieee80211_ptr);
 	err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
 				   ssid, ssid_len, ie, ie_len,
-				   key, key_len, key_idx);
+				   key, key_len, key_idx, local_state_change);
 	wdev_unlock(dev->ieee80211_ptr);
 
 	return err;
@@ -554,7 +563,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 
 int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason)
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_deauth_request req;
@@ -564,6 +574,7 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 
 	memset(&req, 0, sizeof(req));
 	req.reason_code = reason;
+	req.local_state_change = local_state_change;
 	req.ie = ie;
 	req.ie_len = ie_len;
 	if (wdev->current_bss &&
@@ -590,13 +601,15 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 
 int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev, const u8 *bssid,
-			 const u8 *ie, int ie_len, u16 reason)
+			 const u8 *ie, int ie_len, u16 reason,
+			 bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
-	err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason);
+	err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason,
+				     local_state_change);
 	wdev_unlock(wdev);
 
 	return err;
@@ -604,7 +617,8 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 
 static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 				    struct net_device *dev, const u8 *bssid,
-				    const u8 *ie, int ie_len, u16 reason)
+				    const u8 *ie, int ie_len, u16 reason,
+				    bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_disassoc_request req;
@@ -619,6 +633,7 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 
 	memset(&req, 0, sizeof(req));
 	req.reason_code = reason;
+	req.local_state_change = local_state_change;
 	req.ie = ie;
 	req.ie_len = ie_len;
 	if (memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0)
@@ -631,13 +646,15 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 
 int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason)
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
-	err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason);
+	err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason,
+				       local_state_change);
 	wdev_unlock(wdev);
 
 	return err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 95149f303409..df5505b3930c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -150,6 +150,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
 	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
+	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
 };
 
 /* policy for the attributes */
@@ -3393,6 +3394,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	int err, ssid_len, ie_len = 0;
 	enum nl80211_auth_type auth_type;
 	struct key_parse key;
+	bool local_state_change;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3471,9 +3473,12 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
+	local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
+
 	err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
 				 ssid, ssid_len, ie, ie_len,
-				 key.p.key, key.p.key_len, key.idx);
+				 key.p.key, key.p.key_len, key.idx,
+				 local_state_change);
 
 out:
 	cfg80211_unlock_rdev(rdev);
@@ -3650,6 +3655,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
 	const u8 *ie = NULL, *bssid;
 	int err, ie_len = 0;
 	u16 reason_code;
+	bool local_state_change;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3695,7 +3701,10 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
 		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code);
+	local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
+
+	err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code,
+				   local_state_change);
 
 out:
 	cfg80211_unlock_rdev(rdev);
@@ -3712,6 +3721,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 	const u8 *ie = NULL, *bssid;
 	int err, ie_len = 0;
 	u16 reason_code;
+	bool local_state_change;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3757,7 +3767,10 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code);
+	local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
+
+	err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code,
+				     local_state_change);
 
 out:
 	cfg80211_unlock_rdev(rdev);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 17fde0da1b08..17465777eb47 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -170,7 +170,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 					    params->ssid, params->ssid_len,
 					    NULL, 0,
 					    params->key, params->key_len,
-					    params->key_idx);
+					    params->key_idx, false);
 	case CFG80211_CONN_ASSOCIATE_NEXT:
 		BUG_ON(!rdev->ops->assoc);
 		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
@@ -185,12 +185,13 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 		if (err)
 			__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 					       NULL, 0,
-					       WLAN_REASON_DEAUTH_LEAVING);
+					       WLAN_REASON_DEAUTH_LEAVING,
+					       false);
 		return err;
 	case CFG80211_CONN_DEAUTH_ASSOC_FAIL:
 		__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 				       NULL, 0,
-				       WLAN_REASON_DEAUTH_LEAVING);
+				       WLAN_REASON_DEAUTH_LEAVING, false);
 		/* return an error so that we call __cfg80211_connect_result() */
 		return -EINVAL;
 	default:
@@ -675,7 +676,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 				continue;
 			bssid = wdev->auth_bsses[i]->pub.bssid;
 			ret = __cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0,
-						WLAN_REASON_DEAUTH_LEAVING);
+						WLAN_REASON_DEAUTH_LEAVING,
+						false);
 			WARN(ret, "deauth failed: %d\n", ret);
 		}
 	}
@@ -934,7 +936,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 		/* wdev->conn->params.bssid must be set if > SCANNING */
 		err = __cfg80211_mlme_deauth(rdev, dev,
 					     wdev->conn->params.bssid,
-					     NULL, 0, reason);
+					     NULL, 0, reason, false);
 		if (err)
 			return err;
 	} else {
@@ -990,7 +992,8 @@ void cfg80211_sme_disassoc(struct net_device *dev, int idx)
 
 	memcpy(bssid, wdev->auth_bsses[idx]->pub.bssid, ETH_ALEN);
 	if (__cfg80211_mlme_deauth(rdev, dev, bssid,
-				   NULL, 0, WLAN_REASON_DEAUTH_LEAVING)) {
+				   NULL, 0, WLAN_REASON_DEAUTH_LEAVING,
+				   false)) {
 		/* whatever -- assume gone anyway */
 		cfg80211_unhold_bss(wdev->auth_bsses[idx]);
 		cfg80211_put_bss(&wdev->auth_bsses[idx]->pub);
-- 
cgit v1.2.3-59-g8ed1b


From e64b379574d6c92c15b4239ee0a5173317176547 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:43 +0200
Subject: mac80211: fix station destruction problem

When a station w/o a key is destroyed, or when
a driver submits work for a station and thereby
references it again, it seems like potentially
we could reference the station structure while
it is being destroyed.

Wait for an RCU grace period to elapse before
finishing destroying the station after we have
removed the station from the driver and from
the hash table etc., even in the case where no
key is associated with the station.

Also, there's no point in deleting the plink
timer here since it'll be properly deleted just
a bit later.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 211c475f73c6..bd11753c1525 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -632,9 +632,6 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 		 * may mean it is removed from hardware which requires that
 		 * the key->sta pointer is still valid, so flush the key todo
 		 * list here.
-		 *
-		 * ieee80211_key_todo() will synchronize_rcu() so after this
-		 * nothing can reference this sta struct any more.
 		 */
 		ieee80211_key_todo();
 
@@ -666,11 +663,17 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 		sdata = sta->sdata;
 	}
 
+	/*
+	 * At this point, after we wait for an RCU grace period,
+	 * neither mac80211 nor the driver can reference this
+	 * sta struct any more except by still existing timers
+	 * associated with this station that we clean up below.
+	 */
+	synchronize_rcu();
+
 #ifdef CONFIG_MAC80211_MESH
-	if (ieee80211_vif_is_mesh(&sdata->vif)) {
+	if (ieee80211_vif_is_mesh(&sdata->vif))
 		mesh_accept_plinks_update(sdata);
-		del_timer(&sta->plink_timer);
-	}
 #endif
 
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
-- 
cgit v1.2.3-59-g8ed1b


From 2b43ae6daf26f29cec49fa3a3f18025355495500 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:44 +0200
Subject: mac80211: remove irq disabling for sta lock

All other places except one in the TX path, which
has BHs disabled, and it also cannot be locked from
interrupts so disabling IRQs is not necessary.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 350096afe79a..f7209d691c35 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1144,13 +1144,12 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 
 	if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) &&
 	    (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) {
-		unsigned long flags;
 		struct tid_ampdu_tx *tid_tx;
 
 		qc = ieee80211_get_qos_ctl(hdr);
 		tid = *qc & IEEE80211_QOS_CTL_TID_MASK;
 
-		spin_lock_irqsave(&tx->sta->lock, flags);
+		spin_lock(&tx->sta->lock);
 		/*
 		 * XXX: This spinlock could be fairly expensive, but see the
 		 *	comment in agg-tx.c:ieee80211_agg_tx_operational().
@@ -1175,7 +1174,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 			info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 			__skb_queue_tail(&tid_tx->pending, skb);
 		}
-		spin_unlock_irqrestore(&tx->sta->lock, flags);
+		spin_unlock(&tx->sta->lock);
 
 		if (unlikely(queued))
 			return TX_QUEUED;
-- 
cgit v1.2.3-59-g8ed1b


From 66b0470aeef10a3b0f9a6a1c60d908b5a06c62ae Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:45 +0200
Subject: mac80211: remove ieee80211_sta_stop_rx_ba_session

All callers of ieee80211_sta_stop_rx_ba_session can
just call __ieee80211_stop_rx_ba_session instead
because they already have the station struct, so do
that and remove ieee80211_sta_stop_rx_ba_session.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c      | 24 ++----------------------
 net/mac80211/ht.c          |  3 +--
 net/mac80211/ieee80211_i.h |  2 --
 net/mac80211/rx.c          |  4 ++--
 4 files changed, 5 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index a978e666ed6f..cb9f80a94002 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -79,28 +79,9 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 	spin_unlock_bh(&sta->lock);
 }
 
-void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid,
-					u16 initiator, u16 reason)
-{
-	struct sta_info *sta;
-
-	rcu_read_lock();
-
-	sta = sta_info_get(sdata, ra);
-	if (!sta) {
-		rcu_read_unlock();
-		return;
-	}
-
-	__ieee80211_stop_rx_ba_session(sta, tid, initiator, reason);
-
-	rcu_read_unlock();
-}
-
 /*
  * After accepting the AddBA Request we activated a timer,
  * resetting it after each frame that arrives from the originator.
- * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed.
  */
 static void sta_rx_agg_session_timer_expired(unsigned long data)
 {
@@ -116,9 +97,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
 #endif
-	ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr,
-					 (u16)*ptid, WLAN_BACK_TIMER,
-					 WLAN_REASON_QSTA_TIMEOUT);
+	__ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT,
+				       WLAN_REASON_QSTA_TIMEOUT);
 }
 
 static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index bb677a73b7c9..2ab106a0a491 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -175,8 +175,7 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata,
 #endif /* CONFIG_MAC80211_HT_DEBUG */
 
 	if (initiator == WLAN_BACK_INITIATOR)
-		ieee80211_sta_stop_rx_ba_session(sdata, sta->sta.addr, tid,
-						 WLAN_BACK_INITIATOR, 0);
+		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0);
 	else { /* WLAN_BACK_RECIPIENT */
 		spin_lock_bh(&sta->lock);
 		if (sta->ampdu_mlme.tid_state_tx[tid] & HT_ADDBA_REQUESTED_MSK)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 741fb8bbc4a0..4e73660ebe99 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1098,8 +1098,6 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata,
 			       enum ieee80211_smps_mode smps, const u8 *da,
 			       const u8 *bssid);
 
-void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da,
-				u16 tid, u16 initiator, u16 reason);
 void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				    u16 initiator, u16 reason);
 void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta);
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index b83d4db6ca6a..c02e43b50ac3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -739,8 +739,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 	/* if this mpdu is fragmented - terminate rx aggregation session */
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	if (sc & IEEE80211_SCTL_FRAG) {
-		ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr,
-			tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP);
+		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
+					       WLAN_REASON_QSTA_REQUIRE_SETUP);
 		dev_kfree_skb(skb);
 		return;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 618f356b95e37ca0c30b3b513898fda54abd52a6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:46 +0200
Subject: mac80211: rename WLAN_STA_SUSPEND to WLAN_STA_BLOCK_BA

I want to use it during station destruction as well
so rename it to WLAN_STA_BLOCK_BA which is also the
only use of it now.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c   | 2 +-
 net/mac80211/agg-tx.c   | 2 +-
 net/mac80211/pm.c       | 2 +-
 net/mac80211/sta_info.h | 6 +++---
 net/mac80211/util.c     | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index cb9f80a94002..7d87f446f030 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -173,7 +173,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 
 	status = WLAN_STATUS_REQUEST_DECLINED;
 
-	if (test_sta_flags(sta, WLAN_STA_SUSPEND)) {
+	if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Suspend in progress. "
 		       "Denying ADDBA request\n");
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index bb4ac70fc97a..32d2148b5b98 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -245,7 +245,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 		return -EINVAL;
 	}
 
-	if (test_sta_flags(sta, WLAN_STA_SUSPEND)) {
+	if (test_sta_flags(sta, WLAN_STA_BLOCK_BA)) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Suspend in progress. "
 		       "Denying BA session request\n");
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 0e64484e861c..75202b295a4e 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -46,7 +46,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw)
 
 	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
 		list_for_each_entry_rcu(sta, &local->sta_list, list) {
-			set_sta_flags(sta, WLAN_STA_SUSPEND);
+			set_sta_flags(sta, WLAN_STA_BLOCK_BA);
 			ieee80211_sta_tear_down_BA_sessions(sta);
 		}
 	}
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 2b635909de5c..57e81758d6f7 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -35,8 +35,8 @@
  *	IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next
  *	frame to this station is transmitted.
  * @WLAN_STA_MFP: Management frame protection is used with this STA.
- * @WLAN_STA_SUSPEND: Set/cleared during a suspend/resume cycle.
- *	Used to deny ADDBA requests (both TX and RX).
+ * @WLAN_STA_BLOCK_BA: Used to deny ADDBA requests (both TX and RX)
+ *	during suspend/resume.
  * @WLAN_STA_PS_DRIVER: driver requires keeping this station in
  *	power-save mode logically to flush frames that might still
  *	be in the queues
@@ -57,7 +57,7 @@ enum ieee80211_sta_info_flags {
 	WLAN_STA_WDS		= 1<<7,
 	WLAN_STA_CLEAR_PS_FILT	= 1<<9,
 	WLAN_STA_MFP		= 1<<10,
-	WLAN_STA_SUSPEND	= 1<<11,
+	WLAN_STA_BLOCK_BA	= 1<<11,
 	WLAN_STA_PS_DRIVER	= 1<<12,
 	WLAN_STA_PSPOLL		= 1<<13,
 	WLAN_STA_DISASSOC       = 1<<14,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 7b2c170af71c..7614821caed5 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1140,7 +1140,7 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 
 	if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) {
 		list_for_each_entry_rcu(sta, &local->sta_list, list) {
-			clear_sta_flags(sta, WLAN_STA_SUSPEND);
+			clear_sta_flags(sta, WLAN_STA_BLOCK_BA);
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 098a607091426e79178b9a6c318d993fea131791 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:47 +0200
Subject: mac80211: clean up/fix aggregation code

The aggregation code has a number of quirks, like
inventing an unneeded WLAN_BACK_TIMER value and
leaking memory under certain circumstances during
station destruction. Fix these issues by using
the regular aggregation session teardown code and
blocking new aggregation sessions, all before the
station is really destructed.

As a side effect, this gets rid of the long code
block to destroy aggregation safely.

Additionally, rename tid_state_rx which can only
have the values IDLE and OPERATIONAL to
tid_active_rx to make it easier to understand
that there is no bitwise stuff going on on the
RX side -- the TX side remains because it needs
to keep track of the driver and peer states.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  1 -
 net/mac80211/agg-rx.c      | 48 +++++++++++++++++---------------------
 net/mac80211/debugfs_sta.c | 10 ++++----
 net/mac80211/rx.c          |  5 ++--
 net/mac80211/sta_info.c    | 58 ++++++++--------------------------------------
 net/mac80211/sta_info.h    |  6 ++---
 6 files changed, 40 insertions(+), 88 deletions(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 19984958ab7b..e9e03b02cb08 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1324,7 +1324,6 @@ enum ieee80211_back_actioncode {
 enum ieee80211_back_parties {
 	WLAN_BACK_RECIPIENT = 0,
 	WLAN_BACK_INITIATOR = 1,
-	WLAN_BACK_TIMER = 2,
 };
 
 /* SA Query action */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 7d87f446f030..53233ab50f65 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -22,19 +22,20 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				    u16 initiator, u16 reason)
 {
 	struct ieee80211_local *local = sta->local;
+	struct tid_ampdu_rx *tid_rx;
 	int i;
 
-	/* check if TID is in operational state */
 	spin_lock_bh(&sta->lock);
-	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) {
+
+	/* check if TID is in operational state */
+	if (!sta->ampdu_mlme.tid_active_rx[tid]) {
 		spin_unlock_bh(&sta->lock);
 		return;
 	}
 
-	sta->ampdu_mlme.tid_state_rx[tid] =
-		HT_AGG_STATE_REQ_STOP_BA_MSK |
-		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
-	spin_unlock_bh(&sta->lock);
+	sta->ampdu_mlme.tid_active_rx[tid] = false;
+
+	tid_rx = sta->ampdu_mlme.tid_rx[tid];
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
@@ -46,37 +47,30 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 		printk(KERN_DEBUG "HW problem - can not stop rx "
 				"aggregation for tid %d\n", tid);
 
-	/* shutdown timer has not expired */
-	if (initiator != WLAN_BACK_TIMER)
-		del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
-
 	/* check if this is a self generated aggregation halt */
-	if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
+	if (initiator == WLAN_BACK_RECIPIENT)
 		ieee80211_send_delba(sta->sdata, sta->sta.addr,
 				     tid, 0, reason);
 
 	/* free the reordering buffer */
-	for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
-		if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) {
+	for (i = 0; i < tid_rx->buf_size; i++) {
+		if (tid_rx->reorder_buf[i]) {
 			/* release the reordered frames */
-			dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]);
-			sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--;
-			sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL;
+			dev_kfree_skb(tid_rx->reorder_buf[i]);
+			tid_rx->stored_mpdu_num--;
+			tid_rx->reorder_buf[i] = NULL;
 		}
 	}
 
-	spin_lock_bh(&sta->lock);
 	/* free resources */
-	kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf);
-	kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_time);
-
-	if (!sta->ampdu_mlme.tid_rx[tid]->shutdown) {
-		kfree(sta->ampdu_mlme.tid_rx[tid]);
-		sta->ampdu_mlme.tid_rx[tid] = NULL;
-	}
+	kfree(tid_rx->reorder_buf);
+	kfree(tid_rx->reorder_time);
+	sta->ampdu_mlme.tid_rx[tid] = NULL;
 
-	sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE;
 	spin_unlock_bh(&sta->lock);
+
+	del_timer_sync(&tid_rx->session_timer);
+	kfree(tid_rx);
 }
 
 /*
@@ -211,7 +205,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	/* examine state machine */
 	spin_lock_bh(&sta->lock);
 
-	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) {
+	if (sta->ampdu_mlme.tid_active_rx[tid]) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_DEBUG "unexpected AddBA Req from "
@@ -273,7 +267,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	}
 
 	/* change state and send addba resp */
-	sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL;
+	sta->ampdu_mlme.tid_active_rx[tid] = true;
 	tid_agg_rx->dialog_token = dialog_token;
 	tid_agg_rx->ssn = start_seq_num;
 	tid_agg_rx->head_seq_num = start_seq_num;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 23e720034577..740ff6c5b92c 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -119,7 +119,7 @@ STA_OPS(last_seq_ctrl);
 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
-	char buf[64 + STA_TID_NUM * 40], *p = buf;
+	char buf[71 + STA_TID_NUM * 40], *p = buf;
 	int i;
 	struct sta_info *sta = file->private_data;
 
@@ -127,16 +127,16 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 	p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n",
 			sta->ampdu_mlme.dialog_token_allocator + 1);
 	p += scnprintf(p, sizeof(buf) + buf - p,
-		       "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n");
+		       "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n");
 	for (i = 0; i < STA_TID_NUM; i++) {
 		p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
-				sta->ampdu_mlme.tid_state_rx[i]);
+				sta->ampdu_mlme.tid_active_rx[i]);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
-				sta->ampdu_mlme.tid_state_rx[i] ?
+				sta->ampdu_mlme.tid_active_rx[i] ?
 				sta->ampdu_mlme.tid_rx[i]->dialog_token : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
-				sta->ampdu_mlme.tid_state_rx[i] ?
+				sta->ampdu_mlme.tid_active_rx[i] ?
 				sta->ampdu_mlme.tid_rx[i]->ssn : 0);
 
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c02e43b50ac3..62053fa711f3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -720,7 +720,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 
 	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 
-	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL)
+	if (!sta->ampdu_mlme.tid_active_rx[tid])
 		goto dont_reorder;
 
 	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
@@ -1805,8 +1805,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 		if (!rx->sta)
 			return RX_DROP_MONITOR;
 		tid = le16_to_cpu(bar->control) >> 12;
-		if (rx->sta->ampdu_mlme.tid_state_rx[tid]
-					!= HT_AGG_STATE_OPERATIONAL)
+		if (!rx->sta->ampdu_mlme.tid_active_rx[tid])
 			return RX_DROP_MONITOR;
 		tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid];
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index bd11753c1525..5bf044b92dca 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -238,9 +238,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		 * enable session_timer's data differentiation. refer to
 		 * sta_rx_agg_session_timer_expired for useage */
 		sta->timer_to_tid[i] = i;
-		/* rx */
-		sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE;
-		sta->ampdu_mlme.tid_rx[i] = NULL;
 		/* tx */
 		sta->ampdu_mlme.tid_state_tx[i] = HT_AGG_STATE_IDLE;
 		sta->ampdu_mlme.tid_tx[i] = NULL;
@@ -606,7 +603,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	struct ieee80211_sub_if_data *sdata;
 	struct sk_buff *skb;
 	unsigned long flags;
-	int ret, i;
+	int ret;
 
 	might_sleep();
 
@@ -616,6 +613,15 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	local = sta->local;
 	sdata = sta->sdata;
 
+	/*
+	 * Before removing the station from the driver and
+	 * rate control, it might still start new aggregation
+	 * sessions -- block that to make sure the tear-down
+	 * will be sufficient.
+	 */
+	set_sta_flags(sta, WLAN_STA_BLOCK_BA);
+	ieee80211_sta_tear_down_BA_sessions(sta);
+
 	spin_lock_irqsave(&local->sta_lock, flags);
 	ret = sta_info_hash_del(local, sta);
 	/* this might still be the pending list ... which is fine */
@@ -700,50 +706,6 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL)
 		dev_kfree_skb_any(skb);
 
-	for (i = 0; i <  STA_TID_NUM; i++) {
-		struct tid_ampdu_rx *tid_rx;
-		struct tid_ampdu_tx *tid_tx;
-
-		spin_lock_bh(&sta->lock);
-		tid_rx = sta->ampdu_mlme.tid_rx[i];
-		/* Make sure timer won't free the tid_rx struct, see below */
-		if (tid_rx)
-			tid_rx->shutdown = true;
-
-		spin_unlock_bh(&sta->lock);
-
-		/*
-		 * Outside spinlock - shutdown is true now so that the timer
-		 * won't free tid_rx, we have to do that now. Can't let the
-		 * timer do it because we have to sync the timer outside the
-		 * lock that it takes itself.
-		 */
-		if (tid_rx) {
-			del_timer_sync(&tid_rx->session_timer);
-			kfree(tid_rx);
-		}
-
-		/*
-		 * No need to do such complications for TX agg sessions, the
-		 * path leading to freeing the tid_tx struct goes via a call
-		 * from the driver, and thus needs to look up the sta struct
-		 * again, which cannot be found when we get here. Hence, we
-		 * just need to delete the timer and free the aggregation
-		 * info; we won't be telling the peer about it then but that
-		 * doesn't matter if we're not talking to it again anyway.
-		 */
-		tid_tx = sta->ampdu_mlme.tid_tx[i];
-		if (tid_tx) {
-			del_timer_sync(&tid_tx->addba_resp_timer);
-			/*
-			 * STA removed while aggregation session being
-			 * started? Bit odd, but purge frames anyway.
-			 */
-			skb_queue_purge(&tid_tx->pending);
-			kfree(tid_tx);
-		}
-	}
-
 	__sta_info_free(local, sta);
 
 	return 0;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 57e81758d6f7..48a5e80957f0 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -36,7 +36,7 @@
  *	frame to this station is transmitted.
  * @WLAN_STA_MFP: Management frame protection is used with this STA.
  * @WLAN_STA_BLOCK_BA: Used to deny ADDBA requests (both TX and RX)
- *	during suspend/resume.
+ *	during suspend/resume and station removal.
  * @WLAN_STA_PS_DRIVER: driver requires keeping this station in
  *	power-save mode logically to flush frames that might still
  *	be in the queues
@@ -106,7 +106,6 @@ struct tid_ampdu_tx {
  * @buf_size: buffer size for incoming A-MPDUs
  * @timeout: reset timer value (in TUs).
  * @dialog_token: dialog token for aggregation session
- * @shutdown: this session is being shut down due to STA removal
  */
 struct tid_ampdu_rx {
 	struct sk_buff **reorder_buf;
@@ -118,7 +117,6 @@ struct tid_ampdu_rx {
 	u16 buf_size;
 	u16 timeout;
 	u8 dialog_token;
-	bool shutdown;
 };
 
 /**
@@ -156,7 +154,7 @@ enum plink_state {
  */
 struct sta_ampdu_mlme {
 	/* rx */
-	u8 tid_state_rx[STA_TID_NUM];
+	bool tid_active_rx[STA_TID_NUM];
 	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
 	/* tx */
 	u8 tid_state_tx[STA_TID_NUM];
-- 
cgit v1.2.3-59-g8ed1b


From 54297e4d60b74e602138594c131097347d128b5a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:48 +0200
Subject: mac80211: fix some RX aggregation locking

A few places in mac80211 do not currently acquire
the sta lock for RX aggregation, but they should.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 62053fa711f3..f42d5060a7bb 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -720,14 +720,16 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 
 	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 
+	spin_lock(&sta->lock);
+
 	if (!sta->ampdu_mlme.tid_active_rx[tid])
-		goto dont_reorder;
+		goto dont_reorder_unlock;
 
 	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
 
 	/* qos null data frames are excluded */
 	if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC)))
-		goto dont_reorder;
+		goto dont_reorder_unlock;
 
 	/* new, potentially un-ordered, ampdu frame - process it */
 
@@ -739,15 +741,20 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 	/* if this mpdu is fragmented - terminate rx aggregation session */
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	if (sc & IEEE80211_SCTL_FRAG) {
+		spin_unlock(&sta->lock);
 		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT,
 					       WLAN_REASON_QSTA_REQUIRE_SETUP);
 		dev_kfree_skb(skb);
 		return;
 	}
 
-	if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames))
+	if (ieee80211_sta_manage_reorder_buf(hw, tid_agg_rx, skb, frames)) {
+		spin_unlock(&sta->lock);
 		return;
+	}
 
+ dont_reorder_unlock:
+	spin_unlock(&sta->lock);
  dont_reorder:
 	__skb_queue_tail(frames, skb);
 }
@@ -1804,9 +1811,12 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 	if (ieee80211_is_back_req(bar->frame_control)) {
 		if (!rx->sta)
 			return RX_DROP_MONITOR;
+		spin_lock(&rx->sta->lock);
 		tid = le16_to_cpu(bar->control) >> 12;
-		if (!rx->sta->ampdu_mlme.tid_active_rx[tid])
+		if (!rx->sta->ampdu_mlme.tid_active_rx[tid]) {
+			spin_unlock(&rx->sta->lock);
 			return RX_DROP_MONITOR;
+		}
 		tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid];
 
 		start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4;
@@ -1820,6 +1830,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 		ieee80211_release_reorder_frames(hw, tid_agg_rx, start_seq_num,
 						 frames);
 		kfree_skb(skb);
+		spin_unlock(&rx->sta->lock);
 		return RX_QUEUED;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 8c11e4ab09ffb975a89802dde0e9aa52a53b8aa5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 7 Apr 2010 11:26:56 +0200
Subject: mac80211: fix paged RX crypto

WEP crypto was broken, but upon finding the problem
it is evident that other things were broken by the
paged RX patch as well.

To fix it, for now move the linearising in front.
This means that we linearise all frames, which is
not at all what we want, but at least it fixes the
problem for now.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index f42d5060a7bb..a33f865807f9 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -820,7 +820,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 {
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_hdr *hdr;
 	int keyidx;
 	int hdrlen;
 	ieee80211_rx_result result = RX_DROP_UNUSABLE;
@@ -861,6 +861,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_CONTINUE;
 
+	if (skb_linearize(rx->skb))
+		return RX_DROP_UNUSABLE;
+
+	hdr = (struct ieee80211_hdr *)skb->data;
+
 	/* start without a key */
 	rx->key = NULL;
 
@@ -944,9 +949,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		return RX_DROP_MONITOR;
 	}
 
-	if (skb_linearize(rx->skb))
-		return RX_DROP_UNUSABLE;
-
 	/* Check for weak IVs if possible */
 	if (rx->sta && rx->key->conf.alg == ALG_WEP &&
 	    ieee80211_is_data(hdr->frame_control) &&
-- 
cgit v1.2.3-59-g8ed1b


From 298b9e44be9592e94c0e69a5d3893cd11f5484fa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 7 Apr 2010 16:46:36 -0700
Subject: net: include linux/proc_fs.h in dev_addr_lists.c

As pointed by Randy Dunlap, we must include linux/proc_fs.h in
net/core/dev_addr_lists.c, regardless of CONFIG_PROC_FS

Reported-by: Randy Dunlap <randy.dunlap@oracle.com>,
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev_addr_lists.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 37d5975e18a3..508f9c18992f 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -14,6 +14,7 @@
 #include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/list.h>
+#include <linux/proc_fs.h>
 
 /*
  * General list handling functions
@@ -667,7 +668,6 @@ void dev_mc_init(struct net_device *dev)
 EXPORT_SYMBOL(dev_mc_init);
 
 #ifdef CONFIG_PROC_FS
-#include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 
 static int dev_mc_seq_show(struct seq_file *seq, void *v)
-- 
cgit v1.2.3-59-g8ed1b


From 97f8aefbbfb5aa5c9944e5fa8149f1fdaf71c7b6 Mon Sep 17 00:00:00 2001
From: chavey <chavey@google.com>
Date: Wed, 7 Apr 2010 21:54:42 -0700
Subject: net: fix ethtool coding style errors and warnings

Fix coding style errors and warnings output while running checkpatch.pl
on the files net/core/ethtool.c and include/linux/ethtool.h

Signed-off-by: chavey <chavey@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 115 +++++++++++++++++++++------------------
 net/core/ethtool.c      | 141 +++++++++++++++++++++++++-----------------------
 2 files changed, 136 insertions(+), 120 deletions(-)

(limited to 'net')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b391969a0dd9..276b40a16835 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -491,12 +491,12 @@ void ethtool_ntuple_flush(struct net_device *dev);
  * get_ufo: Report whether UDP fragmentation offload is enabled
  * set_ufo: Turn UDP fragmentation offload on or off
  * self_test: Run specified self-tests
- * get_strings: Return a set of strings that describe the requested objects 
+ * get_strings: Return a set of strings that describe the requested objects
  * phys_id: Identify the device
  * get_stats: Return statistics about the device
  * get_flags: get 32-bit flags bitmap
  * set_flags: set 32-bit flags bitmap
- * 
+ *
  * Description:
  *
  * get_settings:
@@ -532,14 +532,20 @@ struct ethtool_ops {
 	int	(*nway_reset)(struct net_device *);
 	u32	(*get_link)(struct net_device *);
 	int	(*get_eeprom_len)(struct net_device *);
-	int	(*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
-	int	(*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
+	int	(*get_eeprom)(struct net_device *,
+			      struct ethtool_eeprom *, u8 *);
+	int	(*set_eeprom)(struct net_device *,
+			      struct ethtool_eeprom *, u8 *);
 	int	(*get_coalesce)(struct net_device *, struct ethtool_coalesce *);
 	int	(*set_coalesce)(struct net_device *, struct ethtool_coalesce *);
-	void	(*get_ringparam)(struct net_device *, struct ethtool_ringparam *);
-	int	(*set_ringparam)(struct net_device *, struct ethtool_ringparam *);
-	void	(*get_pauseparam)(struct net_device *, struct ethtool_pauseparam*);
-	int	(*set_pauseparam)(struct net_device *, struct ethtool_pauseparam*);
+	void	(*get_ringparam)(struct net_device *,
+				 struct ethtool_ringparam *);
+	int	(*set_ringparam)(struct net_device *,
+				 struct ethtool_ringparam *);
+	void	(*get_pauseparam)(struct net_device *,
+				  struct ethtool_pauseparam*);
+	int	(*set_pauseparam)(struct net_device *,
+				  struct ethtool_pauseparam*);
 	u32	(*get_rx_csum)(struct net_device *);
 	int	(*set_rx_csum)(struct net_device *, u32);
 	u32	(*get_tx_csum)(struct net_device *);
@@ -551,21 +557,24 @@ struct ethtool_ops {
 	void	(*self_test)(struct net_device *, struct ethtool_test *, u64 *);
 	void	(*get_strings)(struct net_device *, u32 stringset, u8 *);
 	int	(*phys_id)(struct net_device *, u32);
-	void	(*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *);
+	void	(*get_ethtool_stats)(struct net_device *,
+				     struct ethtool_stats *, u64 *);
 	int	(*begin)(struct net_device *);
 	void	(*complete)(struct net_device *);
-	u32     (*get_ufo)(struct net_device *);
-	int     (*set_ufo)(struct net_device *, u32);
-	u32     (*get_flags)(struct net_device *);
-	int     (*set_flags)(struct net_device *, u32);
-	u32     (*get_priv_flags)(struct net_device *);
-	int     (*set_priv_flags)(struct net_device *, u32);
+	u32	(*get_ufo)(struct net_device *);
+	int	(*set_ufo)(struct net_device *, u32);
+	u32	(*get_flags)(struct net_device *);
+	int	(*set_flags)(struct net_device *, u32);
+	u32	(*get_priv_flags)(struct net_device *);
+	int	(*set_priv_flags)(struct net_device *, u32);
 	int	(*get_sset_count)(struct net_device *, int);
-	int	(*get_rxnfc)(struct net_device *, struct ethtool_rxnfc *, void *);
+	int	(*get_rxnfc)(struct net_device *,
+			     struct ethtool_rxnfc *, void *);
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
-	int     (*flash_device)(struct net_device *, struct ethtool_flash *);
+	int	(*flash_device)(struct net_device *, struct ethtool_flash *);
 	int	(*reset)(struct net_device *, u32 *);
-	int	(*set_rx_ntuple)(struct net_device *, struct ethtool_rx_ntuple *);
+	int	(*set_rx_ntuple)(struct net_device *,
+				 struct ethtool_rx_ntuple *);
 	int	(*get_rx_ntuple)(struct net_device *, u32 stringset, void *);
 };
 #endif /* __KERNEL__ */
@@ -577,29 +586,29 @@ struct ethtool_ops {
 #define ETHTOOL_GREGS		0x00000004 /* Get NIC registers. */
 #define ETHTOOL_GWOL		0x00000005 /* Get wake-on-lan options. */
 #define ETHTOOL_SWOL		0x00000006 /* Set wake-on-lan options. */
-#define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
-#define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level. */
+#define ETHTOOL_GMSGLVL	0x00000007 /* Get driver message level */
+#define ETHTOOL_SMSGLVL	0x00000008 /* Set driver msg level. */
 #define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation. */
 #define ETHTOOL_GLINK		0x0000000a /* Get link status (ethtool_value) */
-#define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
-#define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data. */
+#define ETHTOOL_GEEPROM	0x0000000b /* Get EEPROM data */
+#define ETHTOOL_SEEPROM	0x0000000c /* Set EEPROM data. */
 #define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
 #define ETHTOOL_SCOALESCE	0x0000000f /* Set coalesce config. */
 #define ETHTOOL_GRINGPARAM	0x00000010 /* Get ring parameters */
 #define ETHTOOL_SRINGPARAM	0x00000011 /* Set ring parameters. */
 #define ETHTOOL_GPAUSEPARAM	0x00000012 /* Get pause parameters */
 #define ETHTOOL_SPAUSEPARAM	0x00000013 /* Set pause parameters. */
-#define ETHTOOL_GRXCSUM		0x00000014 /* Get RX hw csum enable (ethtool_value) */
-#define ETHTOOL_SRXCSUM		0x00000015 /* Set RX hw csum enable (ethtool_value) */
-#define ETHTOOL_GTXCSUM		0x00000016 /* Get TX hw csum enable (ethtool_value) */
-#define ETHTOOL_STXCSUM		0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#define ETHTOOL_GRXCSUM	0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#define ETHTOOL_SRXCSUM	0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#define ETHTOOL_GTXCSUM	0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#define ETHTOOL_STXCSUM	0x00000017 /* Set TX hw csum enable (ethtool_value) */
 #define ETHTOOL_GSG		0x00000018 /* Get scatter-gather enable
 					    * (ethtool_value) */
 #define ETHTOOL_SSG		0x00000019 /* Set scatter-gather enable
 					    * (ethtool_value). */
 #define ETHTOOL_TEST		0x0000001a /* execute NIC self-test. */
 #define ETHTOOL_GSTRINGS	0x0000001b /* get specified string set */
-#define ETHTOOL_PHYS_ID		0x0000001c /* identify the NIC */
+#define ETHTOOL_PHYS_ID	0x0000001c /* identify the NIC */
 #define ETHTOOL_GSTATS		0x0000001d /* get NIC-specific statistics */
 #define ETHTOOL_GTSO		0x0000001e /* Get TSO enable (ethtool_value) */
 #define ETHTOOL_STSO		0x0000001f /* Set TSO enable (ethtool_value) */
@@ -610,24 +619,24 @@ struct ethtool_ops {
 #define ETHTOOL_SGSO		0x00000024 /* Set GSO enable (ethtool_value) */
 #define ETHTOOL_GFLAGS		0x00000025 /* Get flags bitmap(ethtool_value) */
 #define ETHTOOL_SFLAGS		0x00000026 /* Set flags bitmap(ethtool_value) */
-#define ETHTOOL_GPFLAGS		0x00000027 /* Get driver-private flags bitmap */
-#define ETHTOOL_SPFLAGS		0x00000028 /* Set driver-private flags bitmap */
+#define ETHTOOL_GPFLAGS	0x00000027 /* Get driver-private flags bitmap */
+#define ETHTOOL_SPFLAGS	0x00000028 /* Set driver-private flags bitmap */
 
-#define	ETHTOOL_GRXFH		0x00000029 /* Get RX flow hash configuration */
-#define	ETHTOOL_SRXFH		0x0000002a /* Set RX flow hash configuration */
+#define ETHTOOL_GRXFH		0x00000029 /* Get RX flow hash configuration */
+#define ETHTOOL_SRXFH		0x0000002a /* Set RX flow hash configuration */
 #define ETHTOOL_GGRO		0x0000002b /* Get GRO enable (ethtool_value) */
 #define ETHTOOL_SGRO		0x0000002c /* Set GRO enable (ethtool_value) */
-#define	ETHTOOL_GRXRINGS	0x0000002d /* Get RX rings available for LB */
-#define	ETHTOOL_GRXCLSRLCNT	0x0000002e /* Get RX class rule count */
-#define	ETHTOOL_GRXCLSRULE	0x0000002f /* Get RX classification rule */
-#define	ETHTOOL_GRXCLSRLALL	0x00000030 /* Get all RX classification rule */
-#define	ETHTOOL_SRXCLSRLDEL	0x00000031 /* Delete RX classification rule */
-#define	ETHTOOL_SRXCLSRLINS	0x00000032 /* Insert RX classification rule */
-#define	ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
-#define	ETHTOOL_RESET		0x00000034 /* Reset hardware */
-#define	ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
-#define	ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
-#define	ETHTOOL_GSSET_INFO	0x00000037 /* Get string set info */
+#define ETHTOOL_GRXRINGS	0x0000002d /* Get RX rings available for LB */
+#define ETHTOOL_GRXCLSRLCNT	0x0000002e /* Get RX class rule count */
+#define ETHTOOL_GRXCLSRULE	0x0000002f /* Get RX classification rule */
+#define ETHTOOL_GRXCLSRLALL	0x00000030 /* Get all RX classification rule */
+#define ETHTOOL_SRXCLSRLDEL	0x00000031 /* Delete RX classification rule */
+#define ETHTOOL_SRXCLSRLINS	0x00000032 /* Insert RX classification rule */
+#define ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
+#define ETHTOOL_RESET		0x00000034 /* Reset hardware */
+#define ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
+#define ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
+#define ETHTOOL_GSSET_INFO	0x00000037 /* Get string set info */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
@@ -636,18 +645,18 @@ struct ethtool_ops {
 /* Indicates what features are supported by the interface. */
 #define SUPPORTED_10baseT_Half		(1 << 0)
 #define SUPPORTED_10baseT_Full		(1 << 1)
-#define SUPPORTED_100baseT_Half		(1 << 2)
-#define SUPPORTED_100baseT_Full		(1 << 3)
+#define SUPPORTED_100baseT_Half	(1 << 2)
+#define SUPPORTED_100baseT_Full	(1 << 3)
 #define SUPPORTED_1000baseT_Half	(1 << 4)
 #define SUPPORTED_1000baseT_Full	(1 << 5)
 #define SUPPORTED_Autoneg		(1 << 6)
 #define SUPPORTED_TP			(1 << 7)
 #define SUPPORTED_AUI			(1 << 8)
 #define SUPPORTED_MII			(1 << 9)
-#define SUPPORTED_FIBRE			(1 << 10)
+#define SUPPORTED_FIBRE		(1 << 10)
 #define SUPPORTED_BNC			(1 << 11)
 #define SUPPORTED_10000baseT_Full	(1 << 12)
-#define SUPPORTED_Pause			(1 << 13)
+#define SUPPORTED_Pause		(1 << 13)
 #define SUPPORTED_Asym_Pause		(1 << 14)
 #define SUPPORTED_2500baseX_Full	(1 << 15)
 #define SUPPORTED_Backplane		(1 << 16)
@@ -657,8 +666,8 @@ struct ethtool_ops {
 #define SUPPORTED_10000baseR_FEC	(1 << 20)
 
 /* Indicates what features are advertised by the interface. */
-#define ADVERTISED_10baseT_Half		(1 << 0)
-#define ADVERTISED_10baseT_Full		(1 << 1)
+#define ADVERTISED_10baseT_Half	(1 << 0)
+#define ADVERTISED_10baseT_Full	(1 << 1)
 #define ADVERTISED_100baseT_Half	(1 << 2)
 #define ADVERTISED_100baseT_Full	(1 << 3)
 #define ADVERTISED_1000baseT_Half	(1 << 4)
@@ -697,12 +706,12 @@ struct ethtool_ops {
 #define DUPLEX_FULL		0x01
 
 /* Which connector port. */
-#define PORT_TP			0x00
+#define PORT_TP		0x00
 #define PORT_AUI		0x01
 #define PORT_MII		0x02
 #define PORT_FIBRE		0x03
 #define PORT_BNC		0x04
-#define PORT_DA			0x05
+#define PORT_DA		0x05
 #define PORT_NONE		0xef
 #define PORT_OTHER		0xff
 
@@ -716,7 +725,7 @@ struct ethtool_ops {
 /* Enable or disable autonegotiation.  If this is set to enable,
  * the forced link modes above are completely ignored.
  */
-#define AUTONEG_DISABLE		0x00
+#define AUTONEG_DISABLE	0x00
 #define AUTONEG_ENABLE		0x01
 
 /* Mode MDI or MDI-X */
@@ -747,8 +756,8 @@ struct ethtool_ops {
 #define	AH_V6_FLOW	0x0b
 #define	ESP_V6_FLOW	0x0c
 #define	IP_USER_FLOW	0x0d
-#define IPV4_FLOW       0x10
-#define IPV6_FLOW       0x11
+#define	IPV4_FLOW	0x10
+#define	IPV6_FLOW	0x11
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 73c81edde8d9..99e9f850ea07 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -18,7 +18,7 @@
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
 #include <linux/bitops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Some useful ethtool_ops methods that're device independent.
@@ -30,6 +30,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
 {
 	return netif_carrier_ok(dev) ? 1 : 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_link);
 
 u32 ethtool_op_get_rx_csum(struct net_device *dev)
 {
@@ -62,6 +63,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
 
 int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
 {
@@ -72,11 +74,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
 
 u32 ethtool_op_get_sg(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_SG) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_sg);
 
 int ethtool_op_set_sg(struct net_device *dev, u32 data)
 {
@@ -87,11 +91,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_sg);
 
 u32 ethtool_op_get_tso(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_TSO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_tso);
 
 int ethtool_op_set_tso(struct net_device *dev, u32 data)
 {
@@ -102,11 +108,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tso);
 
 u32 ethtool_op_get_ufo(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_UFO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_ufo);
 
 int ethtool_op_set_ufo(struct net_device *dev, u32 data)
 {
@@ -116,6 +124,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
 		dev->features &= ~NETIF_F_UFO;
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_ufo);
 
 /* the following list of flags are the same as their associated
  * NETIF_F_xxx values in include/linux/netdevice.h
@@ -132,6 +141,7 @@ u32 ethtool_op_get_flags(struct net_device *dev)
 
 	return dev->features & flags_dup_features;
 }
+EXPORT_SYMBOL(ethtool_op_get_flags);
 
 int ethtool_op_set_flags(struct net_device *dev, u32 data)
 {
@@ -160,6 +170,7 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
 	dev->features = features;
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_flags);
 
 void ethtool_ntuple_flush(struct net_device *dev)
 {
@@ -205,7 +216,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_settings(dev, &cmd);
 }
 
-static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
+						  void __user *useraddr)
 {
 	struct ethtool_drvinfo info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -245,7 +257,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void _
 }
 
 static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
-                                          void __user *useraddr)
+						    void __user *useraddr)
 {
 	struct ethtool_sset_info info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -304,7 +316,8 @@ out:
 	return ret;
 }
 
-static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+						void __user *useraddr)
 {
 	struct ethtool_rxnfc cmd;
 
@@ -317,7 +330,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __u
 	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
 }
 
-static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+						void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -362,8 +376,8 @@ err_out:
 }
 
 static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
-                              struct ethtool_rx_ntuple_flow_spec *spec,
-                              struct ethtool_rx_ntuple_flow_spec_container *fsc)
+			struct ethtool_rx_ntuple_flow_spec *spec,
+			struct ethtool_rx_ntuple_flow_spec_container *fsc)
 {
 
 	/* don't add filters forever */
@@ -389,7 +403,8 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 	list->count++;
 }
 
-static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
+						    void __user *useraddr)
 {
 	struct ethtool_rx_ntuple cmd;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -514,125 +529,125 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 		case UDP_V4_FLOW:
 		case SCTP_V4_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.ip4src);
+				fsc->fs.h_u.tcp_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.tcp_ip4_spec.ip4src);
+				fsc->fs.m_u.tcp_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.ip4dst);
+				fsc->fs.h_u.tcp_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.tcp_ip4_spec.ip4dst);
+				fsc->fs.m_u.tcp_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.psrc,
-			        fsc->fs.m_u.tcp_ip4_spec.psrc);
+				fsc->fs.h_u.tcp_ip4_spec.psrc,
+				fsc->fs.m_u.tcp_ip4_spec.psrc);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.pdst,
-			        fsc->fs.m_u.tcp_ip4_spec.pdst);
+				fsc->fs.h_u.tcp_ip4_spec.pdst,
+				fsc->fs.m_u.tcp_ip4_spec.pdst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.tos,
-			        fsc->fs.m_u.tcp_ip4_spec.tos);
+				fsc->fs.h_u.tcp_ip4_spec.tos,
+				fsc->fs.m_u.tcp_ip4_spec.tos);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		case AH_ESP_V4_FLOW:
 		case ESP_V4_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.ip4src);
+				fsc->fs.h_u.ah_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.ah_ip4_spec.ip4src);
+				fsc->fs.m_u.ah_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.ip4dst);
+				fsc->fs.h_u.ah_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.ah_ip4_spec.ip4dst);
+				fsc->fs.m_u.ah_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSPI: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.spi,
-			        fsc->fs.m_u.ah_ip4_spec.spi);
+				fsc->fs.h_u.ah_ip4_spec.spi,
+				fsc->fs.m_u.ah_ip4_spec.spi);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.tos,
-			        fsc->fs.m_u.ah_ip4_spec.tos);
+				fsc->fs.h_u.ah_ip4_spec.tos,
+				fsc->fs.m_u.ah_ip4_spec.tos);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		case IP_USER_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.raw_ip4_spec.ip4src);
+				fsc->fs.h_u.raw_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.raw_ip4_spec.ip4src);
+				fsc->fs.m_u.raw_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.raw_ip4_spec.ip4dst);
+				fsc->fs.h_u.raw_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.raw_ip4_spec.ip4dst);
+				fsc->fs.m_u.raw_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		case IPV4_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.ip4src);
+				fsc->fs.h_u.usr_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.usr_ip4_spec.ip4src);
+				fsc->fs.m_u.usr_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.ip4dst);
+				fsc->fs.h_u.usr_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.usr_ip4_spec.ip4dst);
+				fsc->fs.m_u.usr_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
-			        fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
+				fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
+				fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.tos,
-			        fsc->fs.m_u.usr_ip4_spec.tos);
+				fsc->fs.h_u.usr_ip4_spec.tos,
+				fsc->fs.m_u.usr_ip4_spec.tos);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.ip_ver,
-			        fsc->fs.m_u.usr_ip4_spec.ip_ver);
+				fsc->fs.h_u.usr_ip4_spec.ip_ver,
+				fsc->fs.m_u.usr_ip4_spec.ip_ver);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.proto,
-			        fsc->fs.m_u.usr_ip4_spec.proto);
+				fsc->fs.h_u.usr_ip4_spec.proto,
+				fsc->fs.m_u.usr_ip4_spec.proto);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		};
 		sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
-		        fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
+			fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
 		p += ETH_GSTRING_LEN;
 		num_strings++;
 		sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
@@ -645,7 +660,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 			sprintf(p, "\tAction: Drop\n");
 		else
 			sprintf(p, "\tAction: Direct to queue %d\n",
-			        fsc->fs.action);
+				fsc->fs.action);
 		p += ETH_GSTRING_LEN;
 		num_strings++;
 unknown_filter:
@@ -857,7 +872,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
-static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
+						   void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
 
@@ -871,7 +887,8 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void
 	return 0;
 }
 
-static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
+						   void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce;
 
@@ -975,6 +992,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
 
 	return dev->ethtool_ops->set_tx_csum(dev, edata.data);
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
 
 static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
 {
@@ -1046,7 +1064,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
 
 	edata.data = dev->features & NETIF_F_GSO;
 	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		 return -EFAULT;
+		return -EFAULT;
 	return 0;
 }
 
@@ -1069,7 +1087,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
 
 	edata.data = dev->features & NETIF_F_GRO;
 	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		 return -EFAULT;
+		return -EFAULT;
 	return 0;
 }
 
@@ -1281,7 +1299,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
 	return actor(dev, edata.data);
 }
 
-static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
+						   char __user *useraddr)
 {
 	struct ethtool_flash efl;
 
@@ -1310,11 +1329,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	if (!dev->ethtool_ops)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+	if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
 		return -EFAULT;
 
 	/* Allow some commands to be done by anyone */
-	switch(ethcmd) {
+	switch (ethcmd) {
 	case ETHTOOL_GDRVINFO:
 	case ETHTOOL_GMSGLVL:
 	case ETHTOOL_GCOALESCE:
@@ -1342,10 +1361,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 			return -EPERM;
 	}
 
-	if (dev->ethtool_ops->begin)
-		if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+	if (dev->ethtool_ops->begin) {
+		rc = dev->ethtool_ops->begin(dev);
+		if (rc  < 0)
 			return rc;
-
+	}
 	old_features = dev->features;
 
 	switch (ethcmd) {
@@ -1535,16 +1555,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 
 	return rc;
 }
-
-EXPORT_SYMBOL(ethtool_op_get_link);
-EXPORT_SYMBOL(ethtool_op_get_sg);
-EXPORT_SYMBOL(ethtool_op_get_tso);
-EXPORT_SYMBOL(ethtool_op_set_sg);
-EXPORT_SYMBOL(ethtool_op_set_tso);
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-EXPORT_SYMBOL(ethtool_op_set_flags);
-EXPORT_SYMBOL(ethtool_op_get_flags);
-- 
cgit v1.2.3-59-g8ed1b


From 3d91c1a848c812e0e66e7e57f076667822cb460e Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 8 Apr 2010 13:35:47 +0200
Subject: IPVS: fix potential stack overflow with overly long protocol names

When protocols use very long names, the sprintf calls might overflow
the on-stack buffer. No protocol in the kernel does this however.

Print the protocol name in the pr_debug statement directly to avoid
this.

Based on patch by Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/ipvs/ip_vs_proto.c        | 28 ++++++++++++----------------
 net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 14 ++++++--------
 2 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 0e584553819d..27add971bb13 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -166,26 +166,24 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp,
 
 	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
 	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
+		sprintf(buf, "TRUNCATED");
 	else if (ih->frag_off & htons(IP_OFFSET))
-		sprintf(buf, "%s %pI4->%pI4 frag",
-			pp->name, &ih->saddr, &ih->daddr);
+		sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr);
 	else {
 		__be16 _ports[2], *pptr
 ;
 		pptr = skb_header_pointer(skb, offset + ih->ihl*4,
 					  sizeof(_ports), _ports);
 		if (pptr == NULL)
-			sprintf(buf, "%s TRUNCATED %pI4->%pI4",
-				pp->name, &ih->saddr, &ih->daddr);
+			sprintf(buf, "TRUNCATED %pI4->%pI4",
+				&ih->saddr, &ih->daddr);
 		else
-			sprintf(buf, "%s %pI4:%u->%pI4:%u",
-				pp->name,
+			sprintf(buf, "%pI4:%u->%pI4:%u",
 				&ih->saddr, ntohs(pptr[0]),
 				&ih->daddr, ntohs(pptr[1]));
 	}
 
-	pr_debug("%s: %s\n", msg, buf);
+	pr_debug("%s: %s %s\n", msg, pp->name, buf);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -200,26 +198,24 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp,
 
 	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
 	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
+		sprintf(buf, "TRUNCATED");
 	else if (ih->nexthdr == IPPROTO_FRAGMENT)
-		sprintf(buf, "%s %pI6->%pI6 frag",
-			pp->name, &ih->saddr, &ih->daddr);
+		sprintf(buf, "%pI6->%pI6 frag",	&ih->saddr, &ih->daddr);
 	else {
 		__be16 _ports[2], *pptr;
 
 		pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr),
 					  sizeof(_ports), _ports);
 		if (pptr == NULL)
-			sprintf(buf, "%s TRUNCATED %pI6->%pI6",
-				pp->name, &ih->saddr, &ih->daddr);
+			sprintf(buf, "TRUNCATED %pI6->%pI6",
+				&ih->saddr, &ih->daddr);
 		else
-			sprintf(buf, "%s %pI6:%u->%pI6:%u",
-				pp->name,
+			sprintf(buf, "%pI6:%u->%pI6:%u",
 				&ih->saddr, ntohs(pptr[0]),
 				&ih->daddr, ntohs(pptr[1]));
 	}
 
-	pr_debug("%s: %s\n", msg, buf);
+	pr_debug("%s: %s %s\n", msg, pp->name, buf);
 }
 #endif
 
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index c30b43c36cd7..1892dfc12fdd 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -136,12 +136,11 @@ ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 
 	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
 	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
+		sprintf(buf, "TRUNCATED");
 	else
-		sprintf(buf, "%s %pI4->%pI4",
-			pp->name, &ih->saddr, &ih->daddr);
+		sprintf(buf, "%pI4->%pI4", &ih->saddr, &ih->daddr);
 
-	pr_debug("%s: %s\n", msg, buf);
+	pr_debug("%s: %s %s\n", msg, pp->name, buf);
 }
 
 #ifdef CONFIG_IP_VS_IPV6
@@ -154,12 +153,11 @@ ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb,
 
 	ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
 	if (ih == NULL)
-		sprintf(buf, "%s TRUNCATED", pp->name);
+		sprintf(buf, "TRUNCATED");
 	else
-		sprintf(buf, "%s %pI6->%pI6",
-			pp->name, &ih->saddr, &ih->daddr);
+		sprintf(buf, "%pI6->%pI6", &ih->saddr, &ih->daddr);
 
-	pr_debug("%s: %s\n", msg, buf);
+	pr_debug("%s: %s %s\n", msg, pp->name, buf);
 }
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From 9e56c21486f2a64473f36fa49475fd253422fbf6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 8 Apr 2010 14:52:28 +0200
Subject: netfilter: only do skb_checksum_help on CHECKSUM_PARTIAL in ip_queue

While doing yet another audit on ip_summed I noticed ip_queue
calling skb_checksum_help unnecessarily.  As we will set ip_summed
to CHECKSUM_NONE when necessary in ipq_mangle_ipv4, there is no
need to zap CHECKSUM_COMPLETE in ipq_build_packet_message.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_queue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 2855f1f38cbc..d781513282d4 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -160,8 +160,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 		break;
 
 	case IPQ_COPY_PACKET:
-		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
-		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
 		    (*errp = skb_checksum_help(entry->skb))) {
 			read_unlock_bh(&queue_lock);
 			return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 5dd59cc99131fb4a2775c00928b0f24b9e192a76 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 8 Apr 2010 14:53:40 +0200
Subject: netfilter: only do skb_checksum_help on CHECKSUM_PARTIAL in ip6_queue

As we will set ip_summed to CHECKSUM_NONE when necessary in
ipq_mangle_ipv6, there is no need to zap CHECKSUM_COMPLETE in
ipq_build_packet_message.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter/ip6_queue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 7854052be60b..39856a25189c 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -161,8 +161,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 		break;
 
 	case IPQ_COPY_PACKET:
-		if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
-		     entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
+		if (entry->skb->ip_summed == CHECKSUM_PARTIAL &&
 		    (*errp = skb_checksum_help(entry->skb))) {
 			read_unlock_bh(&queue_lock);
 			return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From e9f13cab49f7f28b65a6f63201fca56480b2e059 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 8 Apr 2010 14:54:35 +0200
Subject: netfilter: only do skb_checksum_help on CHECKSUM_PARTIAL in
 nfnetlink_queue

As we will set ip_summed to CHECKSUM_NONE when necessary in
nfqnl_mangle, there is no need to zap CHECKSUM_COMPLETE in
nfqnl_build_packet_message.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nfnetlink_queue.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 7ba4abc405c9..08c1a33077a0 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -245,8 +245,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 		break;
 
 	case NFQNL_COPY_PACKET:
-		if ((entskb->ip_summed == CHECKSUM_PARTIAL ||
-		     entskb->ip_summed == CHECKSUM_COMPLETE) &&
+		if (entskb->ip_summed == CHECKSUM_PARTIAL &&
 		    skb_checksum_help(entskb)) {
 			spin_unlock_bh(&queue->lock);
 			return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 97ad9139fd68b5c71f44d28d3f9788d89cfd4916 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Mon, 29 Mar 2010 11:00:21 -0700
Subject: mac80211: Moved mesh action codes to a more visible location

Grouped mesh action codes together with the other action codes in
ieee80211.h.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 2 ++
 net/mac80211/mesh.c       | 4 ++--
 net/mac80211/mesh.h       | 2 --
 net/mac80211/mesh_hwmp.c  | 4 ++--
 net/mac80211/mesh_plink.c | 2 +-
 net/mac80211/rx.c         | 6 +++---
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e9e03b02cb08..1252ba1fbff5 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1211,6 +1211,8 @@ enum ieee80211_category {
 	WLAN_CATEGORY_SA_QUERY = 8,
 	WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9,
 	WLAN_CATEGORY_WMM = 17,
+	WLAN_CATEGORY_MESH_PLINK = 30,		/* Pending ANA approval */
+	WLAN_CATEGORY_MESH_PATH_SEL = 32,	/* Pending ANA approval */
 	WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
 	WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
 };
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 7a6bebce7f2f..2669fbf8c812 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -600,10 +600,10 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 					  struct ieee80211_rx_status *rx_status)
 {
 	switch (mgmt->u.action.category) {
-	case MESH_PLINK_CATEGORY:
+	case WLAN_CATEGORY_MESH_PLINK:
 		mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
 		break;
-	case MESH_PATH_SEL_CATEGORY:
+	case WLAN_CATEGORY_MESH_PATH_SEL:
 		mesh_rx_path_sel_frame(sdata, mgmt, len);
 		break;
 	}
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 85562c59d7d6..c88087f1cd0f 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -209,8 +209,6 @@ struct mesh_rmc {
 #define MESH_MAX_MPATHS		1024
 
 /* Pending ANA approval */
-#define MESH_PLINK_CATEGORY	30
-#define MESH_PATH_SEL_CATEGORY	32
 #define MESH_PATH_SEL_ACTION	0
 
 /* PERR reason codes */
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index ccff6133e19a..36141d6e701b 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -131,7 +131,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID == SA */
 	memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
-	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL;
 	mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
 
 	switch (action) {
@@ -224,7 +224,7 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 	memcpy(mgmt->da, ra, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
-	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL;
 	mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
 	ie_len = 15;
 	pos = skb_put(skb, 2 + ie_len);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index bc4e20e57ff5..c384154ac895 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -171,7 +171,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
-	mgmt->u.action.category = MESH_PLINK_CATEGORY;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_PLINK;
 	mgmt->u.action.u.plink_action.action_code = action;
 
 	if (action == PLINK_CLOSE)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c0ad7e879a6e..d08ede44ac7e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -490,7 +490,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 
 		if (ieee80211_is_action(hdr->frame_control)) {
 			mgmt = (struct ieee80211_mgmt *)hdr;
-			if (mgmt->u.action.category != MESH_PLINK_CATEGORY)
+			if (mgmt->u.action.category != WLAN_CATEGORY_MESH_PLINK)
 				return RX_DROP_MONITOR;
 			return RX_CONTINUE;
 		}
@@ -1994,8 +1994,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto handled;
 		}
 		break;
-	case MESH_PLINK_CATEGORY:
-	case MESH_PATH_SEL_CATEGORY:
+	case WLAN_CATEGORY_MESH_PLINK:
+	case WLAN_CATEGORY_MESH_PATH_SEL:
 		if (ieee80211_vif_is_mesh(&sdata->vif))
 			return ieee80211_mesh_rx_mgmt(sdata, rx->skb);
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From b5878a2dc5e7e7f031a52c3e15b571224cb6b540 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 7 Apr 2010 16:48:40 +0200
Subject: mac80211: enhance tracing

Enhance tracing by adding tracing for a variety of
callbacks that the drivers call, and also for
internal calls (currently limited to queue status).
This can aid debugging what is going on in mac80211
in interaction with drivers, since we can now see
what drivers call and not just what mac80211 calls
in the driver.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/Kconfig        |   8 +-
 net/mac80211/agg-tx.c       |   8 ++
 net/mac80211/driver-trace.h | 275 ++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/main.c         |   2 +
 net/mac80211/mlme.c         |   6 +
 net/mac80211/scan.c         |   2 +
 net/mac80211/sta_info.c     |   2 +
 net/mac80211/util.c         |   4 +
 8 files changed, 303 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 334c359da5e8..8a91f6c0bb18 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -221,8 +221,8 @@ config MAC80211_DRIVER_API_TRACER
 	depends on EVENT_TRACING
 	help
 	  Say Y here to make mac80211 register with the ftrace
-	  framework for the driver API -- you can see which
-	  driver methods it is calling then by looking at the
-	  trace.
+	  framework for the driver API -- you can then see which
+	  driver methods it is calling and which API functions
+	  drivers are calling by looking at the trace.
 
-	  If unsure, say N.
+	  If unsure, say Y.
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 32d2148b5b98..6bb4d0a1e5c5 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -214,6 +214,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid)
 	int ret = 0;
 	u16 start_seq_num;
 
+	trace_api_start_tx_ba_session(pubsta, tid);
+
 	if (WARN_ON(!local->ops->ampdu_action))
 		return -EINVAL;
 
@@ -440,6 +442,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid)
 	struct sta_info *sta;
 	u8 *state;
 
+	trace_api_start_tx_ba_cb(sdata, ra, tid);
+
 	if (tid >= STA_TID_NUM) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
@@ -541,6 +545,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 	struct ieee80211_sub_if_data *sdata = sta->sdata;
 	struct ieee80211_local *local = sdata->local;
 
+	trace_api_stop_tx_ba_session(pubsta, tid, initiator);
+
 	if (!local->ops->ampdu_action)
 		return -EINVAL;
 
@@ -558,6 +564,8 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u8 tid)
 	struct sta_info *sta;
 	u8 *state;
 
+	trace_api_stop_tx_ba_cb(sdata, ra, tid);
+
 	if (tid >= STA_TID_NUM) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n",
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 41baf730a5c7..e209cb82ff29 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -32,6 +32,10 @@ static inline void trace_ ## name(proto) {}
 #define VIF_PR_FMT	" vif:%s(%d)"
 #define VIF_PR_ARG	__get_str(vif_name), __entry->vif_type
 
+/*
+ * Tracing for driver callbacks.
+ */
+
 TRACE_EVENT(drv_start,
 	TP_PROTO(struct ieee80211_local *local, int ret),
 
@@ -766,6 +770,277 @@ TRACE_EVENT(drv_flush,
 		LOCAL_PR_ARG, __entry->drop
 	)
 );
+
+/*
+ * Tracing for API calls that drivers call.
+ */
+
+TRACE_EVENT(api_start_tx_ba_session,
+	TP_PROTO(struct ieee80211_sta *sta, u16 tid),
+
+	TP_ARGS(sta, tid),
+
+	TP_STRUCT__entry(
+		STA_ENTRY
+		__field(u16, tid)
+	),
+
+	TP_fast_assign(
+		STA_ASSIGN;
+		__entry->tid = tid;
+	),
+
+	TP_printk(
+		STA_PR_FMT " tid:%d",
+		STA_PR_ARG, __entry->tid
+	)
+);
+
+TRACE_EVENT(api_start_tx_ba_cb,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
+
+	TP_ARGS(sdata, ra, tid),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+		__array(u8, ra, ETH_ALEN)
+		__field(u16, tid)
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+		memcpy(__entry->ra, ra, ETH_ALEN);
+		__entry->tid = tid;
+	),
+
+	TP_printk(
+		VIF_PR_FMT " ra:%pM tid:%d",
+		VIF_PR_ARG, __entry->ra, __entry->tid
+	)
+);
+
+TRACE_EVENT(api_stop_tx_ba_session,
+	TP_PROTO(struct ieee80211_sta *sta, u16 tid, u16 initiator),
+
+	TP_ARGS(sta, tid, initiator),
+
+	TP_STRUCT__entry(
+		STA_ENTRY
+		__field(u16, tid)
+		__field(u16, initiator)
+	),
+
+	TP_fast_assign(
+		STA_ASSIGN;
+		__entry->tid = tid;
+		__entry->initiator = initiator;
+	),
+
+	TP_printk(
+		STA_PR_FMT " tid:%d initiator:%d",
+		STA_PR_ARG, __entry->tid, __entry->initiator
+	)
+);
+
+TRACE_EVENT(api_stop_tx_ba_cb,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata, const u8 *ra, u16 tid),
+
+	TP_ARGS(sdata, ra, tid),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+		__array(u8, ra, ETH_ALEN)
+		__field(u16, tid)
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+		memcpy(__entry->ra, ra, ETH_ALEN);
+		__entry->tid = tid;
+	),
+
+	TP_printk(
+		VIF_PR_FMT " ra:%pM tid:%d",
+		VIF_PR_ARG, __entry->ra, __entry->tid
+	)
+);
+
+TRACE_EVENT(api_restart_hw,
+	TP_PROTO(struct ieee80211_local *local),
+
+	TP_ARGS(local),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT,
+		LOCAL_PR_ARG
+	)
+);
+
+TRACE_EVENT(api_beacon_loss,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata),
+
+	TP_ARGS(sdata),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+	),
+
+	TP_printk(
+		VIF_PR_FMT,
+		VIF_PR_ARG
+	)
+);
+
+TRACE_EVENT(api_connection_loss,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata),
+
+	TP_ARGS(sdata),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+	),
+
+	TP_printk(
+		VIF_PR_FMT,
+		VIF_PR_ARG
+	)
+);
+
+TRACE_EVENT(api_cqm_rssi_notify,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata,
+		 enum nl80211_cqm_rssi_threshold_event rssi_event),
+
+	TP_ARGS(sdata, rssi_event),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+		__field(u32, rssi_event)
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+		__entry->rssi_event = rssi_event;
+	),
+
+	TP_printk(
+		VIF_PR_FMT " event:%d",
+		VIF_PR_ARG, __entry->rssi_event
+	)
+);
+
+TRACE_EVENT(api_scan_completed,
+	TP_PROTO(struct ieee80211_local *local, bool aborted),
+
+	TP_ARGS(local, aborted),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(bool, aborted)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->aborted = aborted;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " aborted:%d",
+		LOCAL_PR_ARG, __entry->aborted
+	)
+);
+
+TRACE_EVENT(api_sta_block_awake,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sta *sta, bool block),
+
+	TP_ARGS(local, sta, block),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		STA_ENTRY
+		__field(bool, block)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		STA_ASSIGN;
+		__entry->block = block;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT STA_PR_FMT " block:%d",
+		LOCAL_PR_ARG, STA_PR_FMT, __entry->block
+	)
+);
+
+/*
+ * Tracing for internal functions
+ * (which may also be called in response to driver calls)
+ */
+
+TRACE_EVENT(wake_queue,
+	TP_PROTO(struct ieee80211_local *local, u16 queue,
+		 enum queue_stop_reason reason),
+
+	TP_ARGS(local, queue, reason),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u16, queue)
+		__field(u32, reason)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->queue = queue;
+		__entry->reason = reason;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " queue:%d, reason:%d",
+		LOCAL_PR_ARG, __entry->queue, __entry->reason
+	)
+);
+
+TRACE_EVENT(stop_queue,
+	TP_PROTO(struct ieee80211_local *local, u16 queue,
+		 enum queue_stop_reason reason),
+
+	TP_ARGS(local, queue, reason),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u16, queue)
+		__field(u32, reason)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->queue = queue;
+		__entry->reason = reason;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " queue:%d, reason:%d",
+		LOCAL_PR_ARG, __entry->queue, __entry->reason
+	)
+);
 #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index b887e484ae04..4afe851cf8dc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -309,6 +309,8 @@ void ieee80211_restart_hw(struct ieee80211_hw *hw)
 {
 	struct ieee80211_local *local = hw_to_local(hw);
 
+	trace_api_restart_hw(local);
+
 	/* use this reason, __ieee80211_resume will unblock it */
 	ieee80211_stop_queues_by_reason(hw,
 		IEEE80211_QUEUE_STOP_REASON_SUSPEND);
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 461167dfa42c..d11a54c289a2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1007,6 +1007,8 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif)
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_hw *hw = &sdata->local->hw;
 
+	trace_api_beacon_loss(sdata);
+
 	WARN_ON(hw->flags & IEEE80211_HW_CONNECTION_MONITOR);
 	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
 }
@@ -1017,6 +1019,8 @@ void ieee80211_connection_loss(struct ieee80211_vif *vif)
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 	struct ieee80211_hw *hw = &sdata->local->hw;
 
+	trace_api_connection_loss(sdata);
+
 	WARN_ON(!(hw->flags & IEEE80211_HW_CONNECTION_MONITOR));
 	ieee80211_queue_work(hw, &sdata->u.mgd.beacon_connection_loss_work);
 }
@@ -2261,6 +2265,8 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
 {
 	struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
 
+	trace_api_cqm_rssi_notify(sdata, rssi_event);
+
 	cfg80211_cqm_rssi_notify(sdata->dev, rssi_event, gfp);
 }
 EXPORT_SYMBOL(ieee80211_cqm_rssi_notify);
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 75a85978c3b3..eb86a5f6e645 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -247,6 +247,8 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	struct ieee80211_local *local = hw_to_local(hw);
 	bool was_hw_scan;
 
+	trace_api_scan_completed(local, aborted);
+
 	mutex_lock(&local->scan_mtx);
 
 	/*
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4de987cbda1c..ff0eb948917b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -957,6 +957,8 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw,
 {
 	struct sta_info *sta = container_of(pubsta, struct sta_info, sta);
 
+	trace_api_sta_block_awake(sta->local, pubsta, block);
+
 	if (block)
 		set_sta_flags(sta, WLAN_STA_PS_DRIVER);
 	else
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index ad9009f717ed..2b75b4fb68f4 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -270,6 +270,8 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue,
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 
+	trace_wake_queue(local, queue, reason);
+
 	if (WARN_ON(queue >= hw->queues))
 		return;
 
@@ -312,6 +314,8 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 
+	trace_stop_queue(local, queue, reason);
+
 	if (WARN_ON(queue >= hw->queues))
 		return;
 
-- 
cgit v1.2.3-59-g8ed1b


From ed86308f6179d8fa6151c2d0f652aad0091548e2 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 9 Apr 2010 16:42:15 +0200
Subject: netfilter: remove invalid rcu_dereference() calls

The CONFIG_PROVE_RCU option discovered a few invalid uses of
rcu_dereference() in netfilter. In all these cases, the code code
intends to check whether a pointer is already assigned when
performing registration or whether the assigned pointer matches
when performing unregistration. The entire registration/
unregistration is protected by a mutex, so we don't need the
rcu_dereference() calls.

Reported-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Tested-by: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_ecache.c | 18 ++++--------------
 net/netfilter/nf_log.c              |  8 ++------
 2 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index d5a9bcd7d61b..849614af2322 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -81,11 +81,9 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
 {
 	int ret = 0;
-	struct nf_ct_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference(nf_conntrack_event_cb);
-	if (notify != NULL) {
+	if (nf_conntrack_event_cb != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -101,11 +99,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 
 void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
 {
-	struct nf_ct_event_notifier *notify;
-
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference(nf_conntrack_event_cb);
-	BUG_ON(notify != new);
+	BUG_ON(nf_conntrack_event_cb != new);
 	rcu_assign_pointer(nf_conntrack_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
@@ -114,11 +109,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
 {
 	int ret = 0;
-	struct nf_exp_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference(nf_expect_event_cb);
-	if (notify != NULL) {
+	if (nf_expect_event_cb != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -134,11 +127,8 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 
 void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
 {
-	struct nf_exp_event_notifier *notify;
-
 	mutex_lock(&nf_ct_ecache_mutex);
-	notify = rcu_dereference(nf_expect_event_cb);
-	BUG_ON(notify != new);
+	BUG_ON(nf_expect_event_cb != new);
 	rcu_assign_pointer(nf_expect_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 015725a5cd50..908f59935fbb 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -35,7 +35,6 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
 /* return EEXIST if the same logger is registred, 0 on success. */
 int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 {
-	const struct nf_logger *llog;
 	int i;
 
 	if (pf >= ARRAY_SIZE(nf_loggers))
@@ -52,8 +51,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 	} else {
 		/* register at end of list to honor first register win */
 		list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
-		llog = rcu_dereference(nf_loggers[pf]);
-		if (llog == NULL)
+		if (nf_loggers[pf] == NULL)
 			rcu_assign_pointer(nf_loggers[pf], logger);
 	}
 
@@ -65,13 +63,11 @@ EXPORT_SYMBOL(nf_log_register);
 
 void nf_log_unregister(struct nf_logger *logger)
 {
-	const struct nf_logger *c_logger;
 	int i;
 
 	mutex_lock(&nf_log_mutex);
 	for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
-		c_logger = rcu_dereference(nf_loggers[i]);
-		if (c_logger == logger)
+		if (nf_loggers[i] == logger)
 			rcu_assign_pointer(nf_loggers[i], NULL);
 		list_del(&logger->list[i]);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From b660d0485efeff743c72f1be2185832de8477a24 Mon Sep 17 00:00:00 2001
From: Adam Nielsen <a.nielsen@shikadi.net>
Date: Fri, 9 Apr 2010 16:51:40 +0200
Subject: netfilter: xt_LED: add refcounts to LED target

Add reference counting to the netfilter LED target, to fix errors when
multiple rules point to the same target ("LED trigger already exists").

Signed-off-by: Adam Nielsen <a.nielsen@shikadi.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/xt_LED.c | 69 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 63 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index efcf56db23e8..bd102c77d1f0 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,12 +31,18 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
 MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
 
+static LIST_HEAD(xt_led_triggers);
+static DEFINE_MUTEX(xt_led_mutex);
+
 /*
  * This is declared in here (the kernel module) only, to avoid having these
  * dependencies in userspace code.  This is what xt_led_info.internal_data
  * points to.
  */
 struct xt_led_info_internal {
+	struct list_head list;
+	int refcnt;
+	char *trigger_id;
 	struct led_trigger netfilter_led_trigger;
 	struct timer_list timer;
 };
@@ -53,7 +59,7 @@ led_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	 */
 	if ((ledinfo->delay > 0) && ledinfo->always_blink &&
 	    timer_pending(&ledinternal->timer))
-		led_trigger_event(&ledinternal->netfilter_led_trigger,LED_OFF);
+		led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
 
 	led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
 
@@ -74,12 +80,23 @@ led_tg(struct sk_buff *skb, const struct xt_target_param *par)
 
 static void led_timeout_callback(unsigned long data)
 {
-	struct xt_led_info *ledinfo = (struct xt_led_info *)data;
-	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
+	struct xt_led_info_internal *ledinternal = (struct xt_led_info_internal *)data;
 
 	led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
 }
 
+static struct xt_led_info_internal *led_trigger_lookup(const char *name)
+{
+	struct xt_led_info_internal *ledinternal;
+
+	list_for_each_entry(ledinternal, &xt_led_triggers, list) {
+		if (!strcmp(name, ledinternal->netfilter_led_trigger.name)) {
+			return ledinternal;
+		}
+	}
+	return NULL;
+}
+
 static int led_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_led_info *ledinfo = par->targinfo;
@@ -91,11 +108,25 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 		return -EINVAL;
 	}
 
+	mutex_lock(&xt_led_mutex);
+
+	ledinternal = led_trigger_lookup(ledinfo->id);
+	if (ledinternal) {
+		ledinternal->refcnt++;
+		goto out;
+	}
+
+	err = -ENOMEM;
 	ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL);
 	if (!ledinternal)
-		return -ENOMEM;
+		goto exit_mutex_only;
+
+	ledinternal->trigger_id = kstrdup(ledinfo->id, GFP_KERNEL);
+	if (!ledinternal->trigger_id)
+		goto exit_internal_alloc;
 
-	ledinternal->netfilter_led_trigger.name = ledinfo->id;
+	ledinternal->refcnt = 1;
+	ledinternal->netfilter_led_trigger.name = ledinternal->trigger_id;
 
 	err = led_trigger_register(&ledinternal->netfilter_led_trigger);
 	if (err) {
@@ -108,13 +139,26 @@ static int led_tg_check(const struct xt_tgchk_param *par)
 	/* See if we need to set up a timer */
 	if (ledinfo->delay > 0)
 		setup_timer(&ledinternal->timer, led_timeout_callback,
-			    (unsigned long)ledinfo);
+			    (unsigned long)ledinternal);
+
+	list_add_tail(&ledinternal->list, &xt_led_triggers);
+
+out:
+	mutex_unlock(&xt_led_mutex);
 
 	ledinfo->internal_data = ledinternal;
+
 	return 0;
 
 exit_alloc:
+	kfree(ledinternal->trigger_id);
+
+exit_internal_alloc:
 	kfree(ledinternal);
+
+exit_mutex_only:
+	mutex_unlock(&xt_led_mutex);
+
 	return err;
 }
 
@@ -123,10 +167,23 @@ static void led_tg_destroy(const struct xt_tgdtor_param *par)
 	const struct xt_led_info *ledinfo = par->targinfo;
 	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
 
+	mutex_lock(&xt_led_mutex);
+
+	if (--ledinternal->refcnt) {
+		mutex_unlock(&xt_led_mutex);
+		return;
+	}
+
+	list_del(&ledinternal->list);
+
 	if (ledinfo->delay > 0)
 		del_timer_sync(&ledinternal->timer);
 
 	led_trigger_unregister(&ledinternal->netfilter_led_trigger);
+
+	mutex_unlock(&xt_led_mutex);
+
+	kfree(ledinternal->trigger_id);
 	kfree(ledinternal);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 39184b151cbe5ce9f1487190ac4244f69bf6a04b Mon Sep 17 00:00:00 2001
From: Zhu Yi <yi.zhu@intel.com>
Date: Thu, 8 Apr 2010 15:35:10 +0800
Subject: mac80211: delay skb linearising in rx decryption

We delay the skb linearising in ieee80211_rx_h_decrypt so that
frames do not require software decryption are not linearized. We
are safe to do this because ieee80211_get_mmie_keyidx() only
requires to touch nonlinear data for management frames, which are
already linearized before getting here.

Cc: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Zhu Yi <yi.zhu@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index d08ede44ac7e..8ee7db193269 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -820,7 +820,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 {
 	struct sk_buff *skb = rx->skb;
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
-	struct ieee80211_hdr *hdr;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
 	int keyidx;
 	int hdrlen;
 	ieee80211_rx_result result = RX_DROP_UNUSABLE;
@@ -861,11 +861,6 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	if (!(rx->flags & IEEE80211_RX_RA_MATCH))
 		return RX_CONTINUE;
 
-	if (skb_linearize(rx->skb))
-		return RX_DROP_UNUSABLE;
-
-	hdr = (struct ieee80211_hdr *)skb->data;
-
 	/* start without a key */
 	rx->key = NULL;
 
@@ -906,6 +901,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 			rx->key = key;
 		return RX_CONTINUE;
 	} else {
+		u8 keyid;
 		/*
 		 * The device doesn't give us the IV so we won't be
 		 * able to look up the key. That's ok though, we
@@ -928,7 +924,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		 * no need to call ieee80211_wep_get_keyidx,
 		 * it verifies a bunch of things we've done already
 		 */
-		keyidx = rx->skb->data[hdrlen + 3] >> 6;
+		skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1);
+		keyidx = keyid >> 6;
 
 		rx->key = rcu_dereference(rx->sdata->keys[keyidx]);
 
@@ -949,6 +946,11 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 		return RX_DROP_MONITOR;
 	}
 
+	if (skb_linearize(rx->skb))
+		return RX_DROP_UNUSABLE;
+
+	hdr = (struct ieee80211_hdr *)rx->skb->data;
+
 	/* Check for weak IVs if possible */
 	if (rx->sta && rx->key->conf.alg == ALG_WEP &&
 	    ieee80211_is_data(hdr->frame_control) &&
-- 
cgit v1.2.3-59-g8ed1b


From c15cf5fcf9ea0a7749536c201965370d99c86c7f Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Thu, 8 Apr 2010 16:08:46 -0400
Subject: mac80211: fix typo for LDPC capability

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/debugfs_sta.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 740ff6c5b92c..6bc9b07c3eda 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -176,7 +176,7 @@ static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 	if (htc->ht_supported) {
 		p += scnprintf(p, sizeof(buf)+buf-p, "cap: %#.4x\n", htc->cap);
 
-		PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDCP");
+		PRINT_HT_CAP((htc->cap & BIT(0)), "RX LDPC");
 		PRINT_HT_CAP((htc->cap & BIT(1)), "HT20/HT40");
 		PRINT_HT_CAP(!(htc->cap & BIT(1)), "HT20");
 
-- 
cgit v1.2.3-59-g8ed1b


From 68dd5b7a45d1935fcd32b786e8d3d3f7bb4bbfe7 Mon Sep 17 00:00:00 2001
From: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Date: Fri, 9 Apr 2010 13:07:55 +0300
Subject: mac80211: check whether scan is in progress before queueing scan_work

As scan_work is queued from work_work it needs to be checked if scan
has been started during execution of work_work. Otherwise, when hw
scan is used, the stack gets error about hw being busy with ongoing
scan. This causes the stack to abort scan without notifying the driver
about it. This leads to a situation where the hw is scanning and the stack
thinks it's not. Then when the scan finishes, the stack will complain by
warnings.

Signed-off-by: Teemu Paasikivi <ext-teemu.3.paasikivi@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 1e1ea3007b06..7bd8670379de 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -919,11 +919,16 @@ static void ieee80211_work_work(struct work_struct *work)
 		run_again(local, jiffies + HZ/2);
 	}
 
-	if (list_empty(&local->work_list) && local->scan_req)
+	mutex_lock(&local->scan_mtx);
+
+	if (list_empty(&local->work_list) && local->scan_req &&
+	    !local->scanning)
 		ieee80211_queue_delayed_work(&local->hw,
 					     &local->scan_work,
 					     round_jiffies_relative(0));
 
+	mutex_unlock(&local->scan_mtx);
+
 	mutex_unlock(&local->work_mtx);
 
 	ieee80211_recalc_idle(local);
-- 
cgit v1.2.3-59-g8ed1b


From 419f9f896074ce8b21e88066e6f3515f18e5641c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 11 Apr 2010 02:15:53 +0000
Subject: tcp: Handle CHECKSUM_PARTIAL for SYNACK packets for IPv4

tcp: Handle CHECKSUM_PARTIAL for SYNACK packets for IPv4

This patch moves the common code between tcp_v4_send_check and
tcp_v4_gso_send_check into a new function __tcp_v4_send_check.

It then uses the new function in tcp_v4_send_synack so that it
handles CHECKSUM_PARTIAL properly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Yinghai <yinghai.lu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3c23e70885f4..aebfd28c5089 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -519,26 +519,31 @@ out:
 	sock_put(sk);
 }
 
-/* This routine computes an IPv4 TCP checksum. */
-void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+static void __tcp_v4_send_check(struct sk_buff *skb,
+				__be32 saddr, __be32 daddr)
 {
-	struct inet_sock *inet = inet_sk(sk);
 	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		th->check = ~tcp_v4_check(len, inet->inet_saddr,
-					  inet->inet_daddr, 0);
+		th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
-		th->check = tcp_v4_check(len, inet->inet_saddr,
-					 inet->inet_daddr,
+		th->check = tcp_v4_check(skb->len, saddr, daddr,
 					 csum_partial(th,
 						      th->doff << 2,
 						      skb->csum));
 	}
 }
 
+/* This routine computes an IPv4 TCP checksum. */
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+{
+	struct inet_sock *inet = inet_sk(sk);
+
+	__tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
+}
+
 int tcp_v4_gso_send_check(struct sk_buff *skb)
 {
 	const struct iphdr *iph;
@@ -551,10 +556,8 @@ int tcp_v4_gso_send_check(struct sk_buff *skb)
 	th = tcp_hdr(skb);
 
 	th->check = 0;
-	th->check = ~tcp_v4_check(skb->len, iph->saddr, iph->daddr, 0);
-	skb->csum_start = skb_transport_header(skb) - skb->head;
-	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
+	__tcp_v4_send_check(skb, iph->saddr, iph->daddr);
 	return 0;
 }
 
@@ -763,13 +766,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	skb = tcp_make_synack(sk, dst, req, rvp);
 
 	if (skb) {
-		struct tcphdr *th = tcp_hdr(skb);
-
-		th->check = tcp_v4_check(skb->len,
-					 ireq->loc_addr,
-					 ireq->rmt_addr,
-					 csum_partial(th, skb->len,
-						      skb->csum));
+		__tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr);
 
 		err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
 					    ireq->rmt_addr,
-- 
cgit v1.2.3-59-g8ed1b


From 8ad50d96db58c58ba67ec1c6f9d3dae0db52338a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 11 Apr 2010 02:15:54 +0000
Subject: tcp: Handle CHECKSUM_PARTIAL for SYNACK packets for IPv6

tcp: Handle CHECKSUM_PARTIAL for SYNACK packets for IPv6

This patch moves the common code between tcp_v6_send_check and
tcp_v6_gso_send_check into a new function __tcp_v6_send_check.

It then uses the new function in tcp_v6_send_synack as well as
tcp_v6_send_response so that they handle CHECKSUM_PARTIAL properly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Yinghai <yinghai.lu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c92ebe8f80d5..f84c506c588a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -75,6 +75,9 @@ static void	tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
 				      struct request_sock *req);
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
+static void	__tcp_v6_send_check(struct sk_buff *skb,
+				    struct in6_addr *saddr,
+				    struct in6_addr *daddr);
 
 static const struct inet_connection_sock_af_ops ipv6_mapped;
 static const struct inet_connection_sock_af_ops ipv6_specific;
@@ -503,11 +506,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 
 	skb = tcp_make_synack(sk, dst, req, rvp);
 	if (skb) {
-		struct tcphdr *th = tcp_hdr(skb);
-
-		th->check = tcp_v6_check(skb->len,
-					 &treq->loc_addr, &treq->rmt_addr,
-					 csum_partial(th, skb->len, skb->csum));
+		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 
 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
 		err = ip6_xmit(sk, skb, &fl, opt, 0);
@@ -918,22 +917,29 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = {
 	.twsk_destructor= tcp_twsk_destructor,
 };
 
-static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
+static void __tcp_v6_send_check(struct sk_buff *skb,
+				struct in6_addr *saddr, struct in6_addr *daddr)
 {
-	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcphdr *th = tcp_hdr(skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
+		th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
 		skb->csum_start = skb_transport_header(skb) - skb->head;
 		skb->csum_offset = offsetof(struct tcphdr, check);
 	} else {
-		th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
-					    csum_partial(th, th->doff<<2,
-							 skb->csum));
+		th->check = tcp_v6_check(skb->len, saddr, daddr,
+					 csum_partial(th, th->doff << 2,
+						      skb->csum));
 	}
 }
 
+static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	__tcp_v6_send_check(skb, &np->saddr, &np->daddr);
+}
+
 static int tcp_v6_gso_send_check(struct sk_buff *skb)
 {
 	struct ipv6hdr *ipv6h;
@@ -946,11 +952,8 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb)
 	th = tcp_hdr(skb);
 
 	th->check = 0;
-	th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
-				     IPPROTO_TCP, 0);
-	skb->csum_start = skb_transport_header(skb) - skb->head;
-	skb->csum_offset = offsetof(struct tcphdr, check);
 	skb->ip_summed = CHECKSUM_PARTIAL;
+	__tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
 	return 0;
 }
 
@@ -1053,9 +1056,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
 
-	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
-				    tot_len, IPPROTO_TCP,
-				    buff->csum);
+	__tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst);
 
 	fl.proto = IPPROTO_TCP;
 	fl.oif = inet6_iif(skb);
-- 
cgit v1.2.3-59-g8ed1b


From bb29624614c2afe2873ee8ee97cf09df42701694 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 11 Apr 2010 02:15:55 +0000
Subject: inet: Remove unused send_check length argument

inet: Remove unused send_check length argument

This patch removes the unused length argument from the send_check
function in struct inet_connection_sock_af_ops.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Yinghai <yinghai.lu@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 3 +--
 include/net/tcp.h                  | 2 +-
 net/dccp/dccp.h                    | 2 +-
 net/dccp/ipv4.c                    | 2 +-
 net/dccp/ipv6.c                    | 3 +--
 net/dccp/output.c                  | 2 +-
 net/ipv4/tcp_ipv4.c                | 2 +-
 net/ipv4/tcp_output.c              | 2 +-
 net/ipv6/tcp_ipv6.c                | 2 +-
 9 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 696d6e4ce68a..52c8b8b8a0b9 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -37,8 +37,7 @@ struct tcp_congestion_ops;
  */
 struct inet_connection_sock_af_ops {
 	int	    (*queue_xmit)(struct sk_buff *skb, int ipfragok);
-	void	    (*send_check)(struct sock *sk, int len,
-				  struct sk_buff *skb);
+	void	    (*send_check)(struct sock *sk, struct sk_buff *skb);
 	int	    (*rebuild_header)(struct sock *sk);
 	int	    (*conn_request)(struct sock *sk, struct sk_buff *skb);
 	struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 75be5a28815d..70c5159f4b36 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -423,7 +423,7 @@ extern u8			*tcp_parse_md5sig_option(struct tcphdr *th);
  *	TCP v4 functions exported for the inet6 API
  */
 
-extern void		       	tcp_v4_send_check(struct sock *sk, int len,
+extern void		       	tcp_v4_send_check(struct sock *sk,
 						  struct sk_buff *skb);
 
 extern int			tcp_v4_conn_request(struct sock *sk,
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 53f8e12d0c10..a10a61a1ded2 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -223,7 +223,7 @@ static inline void dccp_csum_outgoing(struct sk_buff *skb)
 	skb->csum = skb_checksum(skb, 0, (cov > skb->len)? skb->len : cov, 0);
 }
 
-extern void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
+extern void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb);
 
 extern int  dccp_retransmit_skb(struct sock *sk);
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 52ffa1cde15a..d9b11ef8694c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -349,7 +349,7 @@ static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb,
 	return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum);
 }
 
-void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb)
+void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 {
 	const struct inet_sock *inet = inet_sk(sk);
 	struct dccp_hdr *dh = dccp_hdr(skb);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 3b11e41a2929..ab1ab95946df 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -60,8 +60,7 @@ static inline __sum16 dccp_v6_csum_finish(struct sk_buff *skb,
 	return csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_DCCP, skb->csum);
 }
 
-static inline void dccp_v6_send_check(struct sock *sk, int unused_value,
-				      struct sk_buff *skb)
+static inline void dccp_v6_send_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct dccp_hdr *dh = dccp_hdr(skb);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index fc3f436440b4..b8d98e3c052a 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -129,7 +129,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 			break;
 		}
 
-		icsk->icsk_af_ops->send_check(sk, 0, skb);
+		icsk->icsk_af_ops->send_check(sk, skb);
 
 		if (set_ack)
 			dccp_event_ack_sent(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index aebfd28c5089..a24995cdc4b6 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -537,7 +537,7 @@ static void __tcp_v4_send_check(struct sk_buff *skb,
 }
 
 /* This routine computes an IPv4 TCP checksum. */
-void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_sock *inet = inet_sk(sk);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0dda86e72ad8..0ae7ce7a71a6 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -878,7 +878,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	}
 #endif
 
-	icsk->icsk_af_ops->send_check(sk, skb->len, skb);
+	icsk->icsk_af_ops->send_check(sk, skb);
 
 	if (likely(tcb->flags & TCPCB_FLAG_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f84c506c588a..b429dfdd69dc 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -933,7 +933,7 @@ static void __tcp_v6_send_check(struct sk_buff *skb,
 	}
 }
 
-static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
+static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
 
-- 
cgit v1.2.3-59-g8ed1b


From 2e8e18ef52e7dd1af0a3bd1f7d990a1d0b249586 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 8 Apr 2010 11:32:30 -0700
Subject: tcp: Set CHECKSUM_UNNECESSARY in tcp_init_nondata_skb

Back in commit 04a0551c87363f100b04d28d7a15a632b70e18e7
("loopback: Drop obsolete ip_summed setting") we stopped
setting CHECKSUM_UNNECESSARY in the loopback xmit.

This is because such a setting was a lie since it implies that the
checksum field of the packet is properly filled in.

Instead what happens normally is that CHECKSUM_PARTIAL is set and
skb->csum is calculated as needed.

But this was only happening for TCP data packets (via the
skb->ip_summed assignment done in tcp_sendmsg()).  It doesn't
happen for non-data packets like ACKs etc.

Fix this by setting skb->ip_summed in the common non-data packet
constructor.  It already is setting skb->csum to zero.

But this reminds us that we still have things like ip_output.c's
ip_dev_loopback_xmit() which sets skb->ip_summed to the value
CHECKSUM_UNNECESSARY, which Herbert's patch teaches us is not
valid.  So we'll have to address that at some point too.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0ae7ce7a71a6..e46849989a53 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -350,6 +350,7 @@ static inline void TCP_ECN_send(struct sock *sk, struct sk_buff *skb,
  */
 static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags)
 {
+	skb->ip_summed = CHECKSUM_PARTIAL;
 	skb->csum = 0;
 
 	TCP_SKB_CB(skb)->flags = flags;
-- 
cgit v1.2.3-59-g8ed1b


From ed85b565b825566da34e55eee9ad150ed93fdda0 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Wed, 7 Apr 2010 22:41:28 +0000
Subject: packet: support for TX time stamps on RAW sockets

Enable the SO_TIMESTAMPING socket infrastructure for raw packet sockets.
We introduce PACKET_TX_TIMESTAMP for the control message cmsg_type.

Similar support for UDP and CAN sockets was added in commit
51f31cabe3ce5345b51e4a4f82138b38c4d5dc91

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |  1 +
 net/packet/af_packet.c    | 61 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 61 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index aa57a5f993fc..6ac23ef1801a 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -47,6 +47,7 @@ struct sockaddr_ll {
 #define PACKET_TX_RING			13
 #define PACKET_LOSS			14
 #define PACKET_VNET_HDR			15
+#define PACKET_TX_TIMESTAMP		16
 
 struct tpacket_stats {
 	unsigned int	tp_packets;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d7d0310dca9d..f162d59d8161 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -82,6 +82,7 @@
 #include <linux/mutex.h>
 #include <linux/if_vlan.h>
 #include <linux/virtio_net.h>
+#include <linux/errqueue.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -315,6 +316,8 @@ static inline struct packet_sock *pkt_sk(struct sock *sk)
 
 static void packet_sock_destruct(struct sock *sk)
 {
+	skb_queue_purge(&sk->sk_error_queue);
+
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 
@@ -483,6 +486,9 @@ retry:
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
+	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+	if (err < 0)
+		goto out_unlock;
 
 	dev_queue_xmit(skb);
 	rcu_read_unlock();
@@ -1188,6 +1194,9 @@ static int packet_snd(struct socket *sock,
 	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
 	if (err)
 		goto out_free;
+	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+	if (err < 0)
+		goto out_free;
 
 	skb->protocol = proto;
 	skb->dev = dev;
@@ -1487,6 +1496,51 @@ out:
 	return err;
 }
 
+static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct sock_exterr_skb *serr;
+	struct sk_buff *skb, *skb2;
+	int copied, err;
+
+	err = -EAGAIN;
+	skb = skb_dequeue(&sk->sk_error_queue);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free_skb;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	serr = SKB_EXT_ERR(skb);
+	put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
+		 sizeof(serr->ee), &serr->ee);
+
+	msg->msg_flags |= MSG_ERRQUEUE;
+	err = copied;
+
+	/* Reset and regenerate socket error */
+	spin_lock_bh(&sk->sk_error_queue.lock);
+	sk->sk_err = 0;
+	if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+		sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+		spin_unlock_bh(&sk->sk_error_queue.lock);
+		sk->sk_error_report(sk);
+	} else
+		spin_unlock_bh(&sk->sk_error_queue.lock);
+
+out_free_skb:
+	kfree_skb(skb);
+out:
+	return err;
+}
+
 /*
  *	Pull a packet from our receive queue and hand it to the user.
  *	If necessary we block.
@@ -1502,7 +1556,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	int vnet_hdr_len = 0;
 
 	err = -EINVAL;
-	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
+	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
 		goto out;
 
 #if 0
@@ -1511,6 +1565,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return -ENODEV;
 #endif
 
+	if (flags & MSG_ERRQUEUE) {
+		err = packet_recv_error(sk, msg, len);
+		goto out;
+	}
+
 	/*
 	 *	Call the generic datagram receiver. This handles all sorts
 	 *	of horrible races and re-entrancy so we can forget about it
-- 
cgit v1.2.3-59-g8ed1b


From 7a161ea92471087a1579239d7a58dd06eaa5601c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Apr 2010 21:26:13 +0000
Subject: net: Dont use netdev_warn()

Dont use netdev_warn() in dev_cap_txqueue() and get_rps_cpu() so that we
can catch following warnings without crash.

bond0.2240 received packet on queue 6, but number of RX queues is 1
bond0.2240 received packet on queue 11, but number of RX queues is 1

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a10a21619ae3..0eb79e35671f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1987,9 +1987,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
 		if (net_ratelimit()) {
-			netdev_warn(dev, "selects TX queue %d, but "
-			     "real number of TX queues is %d\n",
-			     queue_index, dev->real_num_tx_queues);
+			pr_warning("%s selects TX queue %d, but "
+				"real number of TX queues is %d\n",
+				dev->name, queue_index, dev->real_num_tx_queues);
 		}
 		return 0;
 	}
@@ -2223,9 +2223,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 		u16 index = skb_get_rx_queue(skb);
 		if (unlikely(index >= dev->num_rx_queues)) {
 			if (net_ratelimit()) {
-				netdev_warn(dev, "received packet on queue "
-				    "%u, but number of RX queues is %u\n",
-				     index, dev->num_rx_queues);
+				pr_warning("%s received packet on queue "
+					"%u, but number of RX queues is %u\n",
+					dev->name, index, dev->num_rx_queues);
 			}
 			goto done;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From b6c6712a42ca3f9fa7f4a3d7c40e3a9dd1fd9e03 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Apr 2010 23:03:29 +0000
Subject: net: sk_dst_cache RCUification

With latest CONFIG_PROVE_RCU stuff, I felt more comfortable to make this
work.

sk->sk_dst_cache is currently protected by a rwlock (sk_dst_lock)

This rwlock is readlocked for a very small amount of time, and dst
entries are already freed after RCU grace period. This calls for RCU
again :)

This patch converts sk_dst_lock to a spinlock, and use RCU for readers.

__sk_dst_get() is supposed to be called with rcu_read_lock() or if
socket locked by user, so use appropriate rcu_dereference_check()
condition (rcu_read_lock_held() || sock_owned_by_user(sk))

This patch avoids two atomic ops per tx packet on UDP connected sockets,
for example, and permits sk_dst_lock to be much less dirtied.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h        | 15 ---------------
 include/net/ip6_route.h  |  4 ++--
 include/net/sock.h       | 47 ++++++++++++++++++++++++++++++-----------------
 net/core/dev.c           |  2 +-
 net/core/sock.c          |  8 ++++----
 net/dccp/timer.c         |  4 ++--
 net/decnet/af_decnet.c   |  6 +++---
 net/ipv4/af_inet.c       |  2 +-
 net/ipv4/tcp_input.c     |  4 ++--
 net/ipv4/tcp_timer.c     |  4 ++--
 net/ipv6/ipv6_sockglue.c | 25 +++++++++++++------------
 11 files changed, 60 insertions(+), 61 deletions(-)

(limited to 'net')

diff --git a/include/net/dst.h b/include/net/dst.h
index ce078cda6b74..aac5a5fcfda9 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -225,21 +225,6 @@ static inline void dst_confirm(struct dst_entry *dst)
 		neigh_confirm(dst->neighbour);
 }
 
-static inline void dst_negative_advice(struct dst_entry **dst_p,
-				       struct sock *sk)
-{
-	struct dst_entry * dst = *dst_p;
-	if (dst && dst->ops->negative_advice) {
-		*dst_p = dst->ops->negative_advice(dst);
-
-		if (dst != *dst_p) {
-			extern void sk_reset_txq(struct sock *sk);
-
-			sk_reset_txq(sk);
-		}
-	}
-}
-
 static inline void dst_link_failure(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 68f67836e146..278312c95f96 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -152,9 +152,9 @@ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst,
 static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst,
 				 struct in6_addr *daddr, struct in6_addr *saddr)
 {
-	write_lock(&sk->sk_dst_lock);
+	spin_lock(&sk->sk_dst_lock);
 	__ip6_dst_store(sk, dst, daddr, saddr);
-	write_unlock(&sk->sk_dst_lock);
+	spin_unlock(&sk->sk_dst_lock);
 }
 
 static inline int ipv6_unicast_destination(struct sk_buff *skb)
diff --git a/include/net/sock.h b/include/net/sock.h
index b4603cd54fcd..56df440a950b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -262,7 +262,7 @@ struct sock {
 #ifdef CONFIG_XFRM
 	struct xfrm_policy	*sk_policy[2];
 #endif
-	rwlock_t		sk_dst_lock;
+	spinlock_t		sk_dst_lock;
 	atomic_t		sk_rmem_alloc;
 	atomic_t		sk_wmem_alloc;
 	atomic_t		sk_omem_alloc;
@@ -1192,7 +1192,8 @@ extern unsigned long sock_i_ino(struct sock *sk);
 static inline struct dst_entry *
 __sk_dst_get(struct sock *sk)
 {
-	return sk->sk_dst_cache;
+	return rcu_dereference_check(sk->sk_dst_cache, rcu_read_lock_held() ||
+						       sock_owned_by_user(sk));
 }
 
 static inline struct dst_entry *
@@ -1200,50 +1201,62 @@ sk_dst_get(struct sock *sk)
 {
 	struct dst_entry *dst;
 
-	read_lock(&sk->sk_dst_lock);
-	dst = sk->sk_dst_cache;
+	rcu_read_lock();
+	dst = rcu_dereference(sk->sk_dst_cache);
 	if (dst)
 		dst_hold(dst);
-	read_unlock(&sk->sk_dst_lock);
+	rcu_read_unlock();
 	return dst;
 }
 
+extern void sk_reset_txq(struct sock *sk);
+
+static inline void dst_negative_advice(struct sock *sk)
+{
+	struct dst_entry *ndst, *dst = __sk_dst_get(sk);
+
+	if (dst && dst->ops->negative_advice) {
+		ndst = dst->ops->negative_advice(dst);
+
+		if (ndst != dst) {
+			rcu_assign_pointer(sk->sk_dst_cache, ndst);
+			sk_reset_txq(sk);
+		}
+	}
+}
+
 static inline void
 __sk_dst_set(struct sock *sk, struct dst_entry *dst)
 {
 	struct dst_entry *old_dst;
 
 	sk_tx_queue_clear(sk);
-	old_dst = sk->sk_dst_cache;
-	sk->sk_dst_cache = dst;
+	old_dst = rcu_dereference_check(sk->sk_dst_cache,
+					lockdep_is_held(&sk->sk_dst_lock));
+	rcu_assign_pointer(sk->sk_dst_cache, dst);
 	dst_release(old_dst);
 }
 
 static inline void
 sk_dst_set(struct sock *sk, struct dst_entry *dst)
 {
-	write_lock(&sk->sk_dst_lock);
+	spin_lock(&sk->sk_dst_lock);
 	__sk_dst_set(sk, dst);
-	write_unlock(&sk->sk_dst_lock);
+	spin_unlock(&sk->sk_dst_lock);
 }
 
 static inline void
 __sk_dst_reset(struct sock *sk)
 {
-	struct dst_entry *old_dst;
-
-	sk_tx_queue_clear(sk);
-	old_dst = sk->sk_dst_cache;
-	sk->sk_dst_cache = NULL;
-	dst_release(old_dst);
+	__sk_dst_set(sk, NULL);
 }
 
 static inline void
 sk_dst_reset(struct sock *sk)
 {
-	write_lock(&sk->sk_dst_lock);
+	spin_lock(&sk->sk_dst_lock);
 	__sk_dst_reset(sk);
-	write_unlock(&sk->sk_dst_lock);
+	spin_unlock(&sk->sk_dst_lock);
 }
 
 extern struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0eb79e35671f..ca4cdef74a1b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2015,7 +2015,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 			if (dev->real_num_tx_queues > 1)
 				queue_index = skb_tx_hash(dev, skb);
 
-			if (sk && sk->sk_dst_cache)
+			if (sk && rcu_dereference_check(sk->sk_dst_cache, 1))
 				sk_tx_queue_set(sk, queue_index);
 		}
 	}
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bbc2cc9..7effa1e689df 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -364,11 +364,11 @@ EXPORT_SYMBOL(sk_reset_txq);
 
 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
 {
-	struct dst_entry *dst = sk->sk_dst_cache;
+	struct dst_entry *dst = __sk_dst_get(sk);
 
 	if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
 		sk_tx_queue_clear(sk);
-		sk->sk_dst_cache = NULL;
+		rcu_assign_pointer(sk->sk_dst_cache, NULL);
 		dst_release(dst);
 		return NULL;
 	}
@@ -1157,7 +1157,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		skb_queue_head_init(&newsk->sk_async_wait_queue);
 #endif
 
-		rwlock_init(&newsk->sk_dst_lock);
+		spin_lock_init(&newsk->sk_dst_lock);
 		rwlock_init(&newsk->sk_callback_lock);
 		lockdep_set_class_and_name(&newsk->sk_callback_lock,
 				af_callback_keys + newsk->sk_family,
@@ -1898,7 +1898,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	} else
 		sk->sk_sleep	=	NULL;
 
-	rwlock_init(&sk->sk_dst_lock);
+	spin_lock_init(&sk->sk_dst_lock);
 	rwlock_init(&sk->sk_callback_lock);
 	lockdep_set_class_and_name(&sk->sk_callback_lock,
 			af_callback_keys + sk->sk_family,
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
index bbfeb5eae46a..1a9aa05d4dc4 100644
--- a/net/dccp/timer.c
+++ b/net/dccp/timer.c
@@ -38,7 +38,7 @@ static int dccp_write_timeout(struct sock *sk)
 
 	if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
 		if (icsk->icsk_retransmits != 0)
-			dst_negative_advice(&sk->sk_dst_cache, sk);
+			dst_negative_advice(sk);
 		retry_until = icsk->icsk_syn_retries ?
 			    : sysctl_dccp_request_retries;
 	} else {
@@ -63,7 +63,7 @@ static int dccp_write_timeout(struct sock *sk)
 			   Golden words :-).
 		   */
 
-			dst_negative_advice(&sk->sk_dst_cache, sk);
+			dst_negative_advice(sk);
 		}
 
 		retry_until = sysctl_dccp_retries2;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2b494fac9468..55e3b6b0061a 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -446,7 +446,7 @@ static void dn_destruct(struct sock *sk)
 	skb_queue_purge(&scp->other_xmit_queue);
 	skb_queue_purge(&scp->other_receive_queue);
 
-	dst_release(xchg(&sk->sk_dst_cache, NULL));
+	dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
 }
 
 static int dn_memory_pressure;
@@ -1105,7 +1105,7 @@ static int dn_accept(struct socket *sock, struct socket *newsock, int flags)
 	release_sock(sk);
 
 	dst = skb_dst(skb);
-	dst_release(xchg(&newsk->sk_dst_cache, dst));
+	sk_dst_set(newsk, dst);
 	skb_dst_set(skb, NULL);
 
 	DN_SK(newsk)->state        = DN_CR;
@@ -1956,7 +1956,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 	if ((flags & MSG_TRYHARD) && sk->sk_dst_cache)
-		dst_negative_advice(&sk->sk_dst_cache, sk);
+		dst_negative_advice(sk);
 
 	mss = scp->segsize_rem;
 	fctype = scp->services_rem & NSP_FC_MASK;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a0beb32beaa3..193dcd6ed64f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -154,7 +154,7 @@ void inet_sock_destruct(struct sock *sk)
 	WARN_ON(sk->sk_forward_alloc);
 
 	kfree(inet->opt);
-	dst_release(sk->sk_dst_cache);
+	dst_release(rcu_dereference_check(sk->sk_dst_cache, 1));
 	sk_refcnt_debug_dec(sk);
 }
 EXPORT_SYMBOL(inet_sock_destruct);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4000b10610b7..ae3ec15fb630 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3710,7 +3710,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
 	}
 
 	if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
-		dst_confirm(sk->sk_dst_cache);
+		dst_confirm(__sk_dst_get(sk));
 
 	return 1;
 
@@ -5833,7 +5833,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			if (tp->snd_una == tp->write_seq) {
 				tcp_set_state(sk, TCP_FIN_WAIT2);
 				sk->sk_shutdown |= SEND_SHUTDOWN;
-				dst_confirm(sk->sk_dst_cache);
+				dst_confirm(__sk_dst_get(sk));
 
 				if (!sock_flag(sk, SOCK_DEAD))
 					/* Wake up lingering close() */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8a0ab2977f1f..c732be00606b 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -172,14 +172,14 @@ static int tcp_write_timeout(struct sock *sk)
 
 	if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
 		if (icsk->icsk_retransmits)
-			dst_negative_advice(&sk->sk_dst_cache, sk);
+			dst_negative_advice(sk);
 		retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries;
 	} else {
 		if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
 			/* Black hole detection */
 			tcp_mtu_probing(icsk, sk);
 
-			dst_negative_advice(&sk->sk_dst_cache, sk);
+			dst_negative_advice(sk);
 		}
 
 		retry_until = sysctl_tcp_retries2;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 33f60fca7aa7..1160400e9dbd 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -114,9 +114,9 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
 		}
 		opt = xchg(&inet6_sk(sk)->opt, opt);
 	} else {
-		write_lock(&sk->sk_dst_lock);
+		spin_lock(&sk->sk_dst_lock);
 		opt = xchg(&inet6_sk(sk)->opt, opt);
-		write_unlock(&sk->sk_dst_lock);
+		spin_unlock(&sk->sk_dst_lock);
 	}
 	sk_dst_reset(sk);
 
@@ -971,14 +971,13 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 	case IPV6_MTU:
 	{
 		struct dst_entry *dst;
+
 		val = 0;
-		lock_sock(sk);
-		dst = sk_dst_get(sk);
-		if (dst) {
+		rcu_read_lock();
+		dst = __sk_dst_get(sk);
+		if (dst)
 			val = dst_mtu(dst);
-			dst_release(dst);
-		}
-		release_sock(sk);
+		rcu_read_unlock();
 		if (!val)
 			return -ENOTCONN;
 		break;
@@ -1066,12 +1065,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		else
 			val = np->mcast_hops;
 
-		dst = sk_dst_get(sk);
-		if (dst) {
-			if (val < 0)
+		if (val < 0) {
+			rcu_read_lock();
+			dst = __sk_dst_get(sk);
+			if (dst)
 				val = ip6_dst_hoplimit(dst);
-			dst_release(dst);
+			rcu_read_unlock();
 		}
+
 		if (val < 0)
 			val = sock_net(sk)->ipv6.devconf_all->hop_limit;
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From 22068311b62858ea7eb71653a07564fd73d7a9b0 Mon Sep 17 00:00:00 2001
From: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Date: Tue, 13 Apr 2010 11:25:41 +0200
Subject: netfilter: fix some coding styles and remove moduleparam.h

Fix some coding styles and remove moduleparam.h

Signed-off-by: Zhitong Wang <zhitong.wangzt@alibaba-inc.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/nf_nat_standalone.c | 3 +--
 net/ipv4/netfilter/nf_nat_tftp.c       | 1 -
 net/netfilter/nf_conntrack_proto.c     | 2 --
 3 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 5678e9562c15..0b49248e34fa 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -137,9 +137,8 @@ nf_nat_fn(unsigned int hooknum,
 				ret = nf_nat_rule_find(skb, hooknum, in, out,
 						       ct);
 
-			if (ret != NF_ACCEPT) {
+			if (ret != NF_ACCEPT)
 				return ret;
-			}
 		} else
 			pr_debug("Already setup manip %s for ct %p\n",
 				 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
index b096e81500ae..7274a43c7a12 100644
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ b/net/ipv4/netfilter/nf_nat_tftp.c
@@ -6,7 +6,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/udp.h>
 
 #include <net/netfilter/nf_nat_helper.h>
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 1a4568bf7ea5..f71cd5da751c 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -13,12 +13,10 @@
 #include <linux/netfilter.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/skbuff.h>
 #include <linux/vmalloc.h>
 #include <linux/stddef.h>
 #include <linux/err.h>
 #include <linux/percpu.h>
-#include <linux/moduleparam.h>
 #include <linux/notifier.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
-- 
cgit v1.2.3-59-g8ed1b


From 93fa159abe50d3c55c7f83622d3f5c09b6e06f4b Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 12 Apr 2010 05:41:31 +0000
Subject: IPv6: keep route for tentative address

Recent changes preserve IPv6 address when link goes down (good).
But would cause address to point to dead dst entry (bad).
The simplest fix is to just not delete route if address is
being held for later use.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1b00bfef268e..a9913d23f7c7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4047,7 +4047,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 			addrconf_leave_anycast(ifp);
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		dst_hold(&ifp->rt->u.dst);
-		if (ip6_del_rt(ifp->rt))
+
+		if (ifp->dead && ip6_del_rt(ifp->rt))
 			dst_free(&ifp->rt->u.dst);
 		break;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 27bdb2abcc5edb3526e25407b74bf17d1872c329 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 12 Apr 2010 05:41:32 +0000
Subject: IPv6: keep tentative addresses in hash table

When link goes down, want address to be preserved but in a tentative
state, therefore it has to stay in hash list.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a9913d23f7c7..9d78c1229497 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2704,17 +2704,18 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			/* Flag it for later restoration when link comes up */
 			ifa->flags |= IFA_F_TENTATIVE;
 			in6_ifa_hold(ifa);
+			write_unlock_bh(&idev->lock);
 		} else {
 			list_del(&ifa->if_list);
 			ifa->dead = 1;
-		}
-		write_unlock_bh(&idev->lock);
+			write_unlock_bh(&idev->lock);
 
-		/* clear hash table */
-		spin_lock_bh(&addrconf_hash_lock);
-		hlist_del_init_rcu(&ifa->addr_lst);
-		__in6_ifa_put(ifa);
-		spin_unlock_bh(&addrconf_hash_lock);
+			/* clear hash table */
+			spin_lock_bh(&addrconf_hash_lock);
+			hlist_del_init_rcu(&ifa->addr_lst);
+			__in6_ifa_put(ifa);
+			spin_unlock_bh(&addrconf_hash_lock);
+		}
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
 		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
-- 
cgit v1.2.3-59-g8ed1b


From d1f84c63a465d6ba16955930519b7f68c550cae1 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 12 Apr 2010 05:41:33 +0000
Subject: ipv6: additional ref count for hash list unnecessary

Since an address in hash list has to already have a ref count,
no additional ref count is needed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 9d78c1229497..a0175edb6589 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -676,7 +676,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	hash = ipv6_addr_hash(addr);
 
 	hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
-	in6_ifa_hold(ifa);
 	spin_unlock(&addrconf_hash_lock);
 
 	write_lock(&idev->lock);
@@ -724,7 +723,6 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	spin_lock_bh(&addrconf_hash_lock);
 	hlist_del_init_rcu(&ifp->addr_lst);
-	__in6_ifa_put(ifp);
 	spin_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
@@ -2713,7 +2711,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			/* clear hash table */
 			spin_lock_bh(&addrconf_hash_lock);
 			hlist_del_init_rcu(&ifa->addr_lst);
-			__in6_ifa_put(ifa);
 			spin_unlock_bh(&addrconf_hash_lock);
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From 8595805aafc8b077e01804c9a3668e9aa3510e89 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 12 Apr 2010 05:41:34 +0000
Subject: IPv6: only notify protocols if address is compeletely gone

The notifier for address down should only be called if address is completely
gone, not just being marked as tentative on link transistion. The code
in net-next would case bonding/sctp/s390 to see address disappear on link
down, but they would never see it reappear on link up.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a0175edb6589..7cba8845242f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2715,7 +2715,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		}
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
-		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
+		if (ifa->dead)
+			atomic_notifier_call_chain(&inet6addr_chain,
+						   NETDEV_DOWN, ifa);
 		in6_ifa_put(ifa);
 
 		write_lock_bh(&idev->lock);
-- 
cgit v1.2.3-59-g8ed1b


From 8237908e145b84d4b020790df0f9315d2f71e980 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Tue, 13 Apr 2010 11:40:41 +0200
Subject: netfilter: bridge-netfilter: cleanup br_netfilter.c

bridge-netfilter: cleanup br_netfilter.c

- remove some of the graffiti at the head of br_netfilter.c
- remove __br_dnat_complain()
- remove KERN_INFO messages when CONFIG_NETFILTER_DEBUG is defined

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 58 ++---------------------------------------------
 1 file changed, 2 insertions(+), 56 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index bc6b57248494..dd6f538ba0b0 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -3,15 +3,8 @@
  *	Linux ethernet bridge
  *
  *	Authors:
- *	Lennert Buytenhek               <buytenh@gnu.org>
- *	Bart De Schuymer (maintainer)	<bdschuym@pandora.be>
- *
- *	Changes:
- *	Apr 29 2003: physdev module support (bdschuym)
- *	Jun 19 2003: let arptables see bridged ARP traffic (bdschuym)
- *	Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge
- *	             (bdschuym)
- *	Sep 01 2004: add IPv6 filtering (bdschuym)
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *	Bart De Schuymer		<bdschuym@pandora.be>
  *
  *	This program is free software; you can redistribute it and/or
  *	modify it under the terms of the GNU General Public License
@@ -252,17 +245,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	return 0;
 }
 
-static void __br_dnat_complain(void)
-{
-	static unsigned long last_complaint;
-
-	if (jiffies - last_complaint >= 5 * HZ) {
-		printk(KERN_WARNING "Performing cross-bridge DNAT requires IP "
-		       "forwarding to be enabled\n");
-		last_complaint = jiffies;
-	}
-}
-
 /* This requires some explaining. If DNAT has taken place,
  * we will need to fix up the destination Ethernet address,
  * and this is a tricky process.
@@ -378,11 +360,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
 					skb_dst_set(skb, (struct dst_entry *)rt);
 					goto bridged_dnat;
 				}
-				/* we are sure that forwarding is disabled, so printing
-				 * this message is no problem. Note that the packet could
-				 * still have a martian destination address, in which case
-				 * the packet could be dropped even if forwarding were enabled */
-				__br_dnat_complain();
 				dst_release((struct dst_entry *)rt);
 			}
 free_skb:
@@ -820,17 +797,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 	struct net_device *realoutdev = bridge_parent(skb->dev);
 	u_int8_t pf;
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	/* Be very paranoid. This probably won't happen anymore, but let's
-	 * keep the check just to be sure... */
-	if (skb_mac_header(skb) < skb->head ||
-	    skb_mac_header(skb) + ETH_HLEN > skb->data) {
-		printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: "
-		       "bad mac.raw pointer.\n");
-		goto print_error;
-	}
-#endif
-
 	if (!nf_bridge)
 		return NF_ACCEPT;
 
@@ -849,13 +815,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 	else
 		return NF_ACCEPT;
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	if (skb_dst(skb) == NULL) {
-		printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n");
-		goto print_error;
-	}
-#endif
-
 	/* We assume any code from br_dev_queue_push_xmit onwards doesn't care
 	 * about the value of skb->pkt_type. */
 	if (skb->pkt_type == PACKET_OTHERHOST) {
@@ -870,19 +829,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 		br_nf_dev_queue_xmit);
 
 	return NF_STOLEN;
-
-#ifdef CONFIG_NETFILTER_DEBUG
-print_error:
-	if (skb->dev != NULL) {
-		printk("[%s]", skb->dev->name);
-		if (realoutdev)
-			printk("[%s]", realoutdev->name);
-	}
-	printk(" head:%p, raw:%p, data:%p\n", skb->head, skb_mac_header(skb),
-	       skb->data);
-	dump_stack();
-	return NF_ACCEPT;
-#endif
 }
 
 /* IP/SABOTAGE *****************************************************/
-- 
cgit v1.2.3-59-g8ed1b


From e26c28e8bffe12b27df5b828404afed9e1949191 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Tue, 13 Apr 2010 11:41:39 +0200
Subject: netfilter: bridge-netfilter: update a comment in br_forward.c about
 ip_fragment()

ip_refrag isn't used anymore in the bridge-netfilter code

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_forward.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7ab52d07b477..15abef7349f3 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -43,7 +43,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 	if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb))
 		kfree_skb(skb);
 	else {
-		/* ip_refrag calls ip_fragment, doesn't copy the MAC header. */
+		/* ip_fragment doesn't copy the MAC header */
 		if (nf_bridge_maybe_copy_header(skb))
 			kfree_skb(skb);
 		else {
-- 
cgit v1.2.3-59-g8ed1b


From acbbc07145b919248c410e1852b953d385be5c97 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 11 Apr 2010 06:56:11 +0000
Subject: net: uninline skb_bond_should_drop()

skb_bond_should_drop() is too big to be inlined.

This patch reduces kernel text size, and its compilation time as well
(shrinking include/linux/netdevice.h)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 48 ++++------------------------------------------
 net/core/dev.c            | 49 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d1a21b576a40..470f7c951afb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2089,54 +2089,14 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
 	dev->gso_max_size = size;
 }
 
-static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
-					      struct net_device *master)
-{
-	if (skb->pkt_type == PACKET_HOST) {
-		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
-
-		memcpy(dest, master->dev_addr, ETH_ALEN);
-	}
-}
+extern int __skb_bond_should_drop(struct sk_buff *skb,
+				  struct net_device *master);
 
-/* On bonding slaves other than the currently active slave, suppress
- * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
- * ARP on active-backup slaves with arp_validate enabled.
- */
 static inline int skb_bond_should_drop(struct sk_buff *skb,
 				       struct net_device *master)
 {
-	if (master) {
-		struct net_device *dev = skb->dev;
-
-		if (master->priv_flags & IFF_MASTER_ARPMON)
-			dev->last_rx = jiffies;
-
-		if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
-			/* Do address unmangle. The local destination address
-			 * will be always the one master has. Provides the right
-			 * functionality in a bridge.
-			 */
-			skb_bond_set_mac_by_master(skb, master);
-		}
-
-		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
-			if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
-			    skb->protocol == __cpu_to_be16(ETH_P_ARP))
-				return 0;
-
-			if (master->priv_flags & IFF_MASTER_ALB) {
-				if (skb->pkt_type != PACKET_BROADCAST &&
-				    skb->pkt_type != PACKET_MULTICAST)
-					return 0;
-			}
-			if (master->priv_flags & IFF_MASTER_8023AD &&
-			    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
-				return 0;
-
-			return 1;
-		}
-	}
+	if (master)
+		return __skb_bond_should_drop(skb, master);
 	return 0;
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index ca4cdef74a1b..876b1112d5ba 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2646,6 +2646,55 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
+					      struct net_device *master)
+{
+	if (skb->pkt_type == PACKET_HOST) {
+		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
+
+		memcpy(dest, master->dev_addr, ETH_ALEN);
+	}
+}
+
+/* On bonding slaves other than the currently active slave, suppress
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
+ * ARP on active-backup slaves with arp_validate enabled.
+ */
+int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
+{
+	struct net_device *dev = skb->dev;
+
+	if (master->priv_flags & IFF_MASTER_ARPMON)
+		dev->last_rx = jiffies;
+
+	if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+		/* Do address unmangle. The local destination address
+		 * will be always the one master has. Provides the right
+		 * functionality in a bridge.
+		 */
+		skb_bond_set_mac_by_master(skb, master);
+	}
+
+	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
+			return 0;
+
+		if (master->priv_flags & IFF_MASTER_ALB) {
+			if (skb->pkt_type != PACKET_BROADCAST &&
+			    skb->pkt_type != PACKET_MULTICAST)
+				return 0;
+		}
+		if (master->priv_flags & IFF_MASTER_8023AD &&
+		    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
+			return 0;
+
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(__skb_bond_should_drop);
+
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
-- 
cgit v1.2.3-59-g8ed1b


From 561155110307ad304226a23272244398fa46cbae Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 12 Apr 2010 07:38:05 +0000
Subject: dst: don't inline dst_ifdown

The function dst_ifdown is called only two places but in a non-
performance critical code path, there is no reason to inline it.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dst.c b/net/core/dst.c
index b8c22f0f9373..9920722cc82b 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -286,8 +286,8 @@ EXPORT_SYMBOL(dst_release);
  *
  * Commented and originally written by Alexey.
  */
-static inline void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
-			      int unregister)
+static void dst_ifdown(struct dst_entry *dst, struct net_device *dev,
+		       int unregister)
 {
 	if (dst->ops->ifdown)
 		dst->ops->ifdown(dst, dev, unregister);
-- 
cgit v1.2.3-59-g8ed1b


From 9e50849054a4824f06c66d2b449de21b98e03770 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 13 Apr 2010 15:28:11 +0200
Subject: netfilter: ipv6: move POSTROUTING invocation before fragmentation

Patrick McHardy notes: "We used to invoke IPv4 POST_ROUTING after
fragmentation as well just to defragment the packets in conntrack
immediately afterwards, but that got changed during the
netfilter-ipsec integration. Ideally IPv6 would behave like IPv4."

This patch makes it so. Sending an oversized frame (e.g. `ping6
-s64000 -c1 ::1`) will now show up in POSTROUTING as a single skb
rather than multiple ones.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/ip6_output.c | 49 +++++++++++++++++++++++--------------------------
 1 file changed, 23 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4535b7a0169b..236ac7813744 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -82,22 +82,6 @@ int ip6_local_out(struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(ip6_local_out);
 
-static int ip6_output_finish(struct sk_buff *skb)
-{
-	struct dst_entry *dst = skb_dst(skb);
-
-	if (dst->hh)
-		return neigh_hh_output(dst->hh, skb);
-	else if (dst->neighbour)
-		return dst->neighbour->output(skb);
-
-	IP6_INC_STATS_BH(dev_net(dst->dev),
-			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
-	kfree_skb(skb);
-	return -EINVAL;
-
-}
-
 /* dev_loopback_xmit for use with netfilter. */
 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 {
@@ -111,8 +95,7 @@ static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
 	return 0;
 }
 
-
-static int ip6_output2(struct sk_buff *skb)
+static int ip6_finish_output2(struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct net_device *dev = dst->dev;
@@ -150,8 +133,15 @@ static int ip6_output2(struct sk_buff *skb)
 				skb->len);
 	}
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
-		       ip6_output_finish);
+	if (dst->hh)
+		return neigh_hh_output(dst->hh, skb);
+	else if (dst->neighbour)
+		return dst->neighbour->output(skb);
+
+	IP6_INC_STATS_BH(dev_net(dst->dev),
+			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+	kfree_skb(skb);
+	return -EINVAL;
 }
 
 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
@@ -162,21 +152,28 @@ static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 	       skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
 }
 
+static int ip6_finish_output(struct sk_buff *skb)
+{
+	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
+	    dst_allfrag(skb_dst(skb)))
+		return ip6_fragment(skb, ip6_finish_output2);
+	else
+		return ip6_finish_output2(skb);
+}
+
 int ip6_output(struct sk_buff *skb)
 {
+	struct net_device *dev = skb_dst(skb)->dev;
 	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 	if (unlikely(idev->cnf.disable_ipv6)) {
-		IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
+		IP6_INC_STATS(dev_net(dev), idev,
 			      IPSTATS_MIB_OUTDISCARDS);
 		kfree_skb(skb);
 		return 0;
 	}
 
-	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
-				dst_allfrag(skb_dst(skb)))
-		return ip6_fragment(skb, ip6_output2);
-	else
-		return ip6_output2(skb);
+	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+		       ip6_finish_output);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 9c6eb28aca52d562f3ffbaebaa56385df9972a43 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 13 Apr 2010 15:32:16 +0200
Subject: netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING
 invocation

Similar to how IPv4's ip_output.c works, have ip6_output also check
the IPSKB_REROUTED flag. It will be set from xt_TEE for cloned packets
since Xtables can currently only deal with a single packet in flight
at a time.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Acked-by: David S. Miller <davem@davemloft.net>
[Patrick: changed to use an IP6SKB value instead of IPSKB]
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/ipv6.h  | 1 +
 net/ipv6/ip6_output.c | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e0cc9a7db2b5..7bdf6ffe2b49 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -250,6 +250,7 @@ struct inet6_skb_parm {
 
 #define IP6SKB_XFRM_TRANSFORMED	1
 #define IP6SKB_FORWARDED	2
+#define IP6SKB_REROUTED		4
 };
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 236ac7813744..c10a38a71a5e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -172,8 +172,9 @@ int ip6_output(struct sk_buff *skb)
 		return 0;
 	}
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
-		       ip6_finish_output);
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+			    ip6_finish_output,
+			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From d8a566beaa75c6ad5e38cdccf0ea5294323e7866 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:15 +0000
Subject: net: fib_rules: consolidate IPv4 and DECnet ->default_pref()
 functions.

Both functions are equivalent, consolidate them since a following patch
needs a third implementation for multicast routing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/fib_rules.h |  1 +
 net/core/fib_rules.c    | 18 ++++++++++++++++++
 net/decnet/dn_rules.c   | 19 +------------------
 net/ipv4/fib_rules.c    | 19 +------------------
 4 files changed, 21 insertions(+), 36 deletions(-)

(limited to 'net')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index c49086d2bc7d..52bd9e6c9141 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -114,4 +114,5 @@ extern int			fib_rules_lookup(struct fib_rules_ops *,
 extern int			fib_default_rule_add(struct fib_rules_ops *,
 						     u32 pref, u32 table,
 						     u32 flags);
+extern u32			fib_default_rule_pref(struct fib_rules_ops *ops);
 #endif
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 05cce4ec84dd..1eb32276be77 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -39,6 +39,24 @@ int fib_default_rule_add(struct fib_rules_ops *ops,
 }
 EXPORT_SYMBOL(fib_default_rule_add);
 
+u32 fib_default_rule_pref(struct fib_rules_ops *ops)
+{
+	struct list_head *pos;
+	struct fib_rule *rule;
+
+	if (!list_empty(&ops->rules_list)) {
+		pos = ops->rules_list.next;
+		if (pos->next != &ops->rules_list) {
+			rule = list_entry(pos->next, struct fib_rule, list);
+			if (rule->pref)
+				return rule->pref - 1;
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(fib_default_rule_pref);
+
 static void notify_rule_change(int event, struct fib_rule *rule,
 			       struct fib_rules_ops *ops, struct nlmsghdr *nlh,
 			       u32 pid);
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 7466c546f286..2d14093a2c3a 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -212,23 +212,6 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static u32 dn_fib_rule_default_pref(struct fib_rules_ops *ops)
-{
-	struct list_head *pos;
-	struct fib_rule *rule;
-
-	if (!list_empty(&dn_fib_rules_ops->rules_list)) {
-		pos = dn_fib_rules_ops->rules_list.next;
-		if (pos->next != &dn_fib_rules_ops->rules_list) {
-			rule = list_entry(pos->next, struct fib_rule, list);
-			if (rule->pref)
-				return rule->pref - 1;
-		}
-	}
-
-	return 0;
-}
-
 static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 {
 	dn_rt_cache_flush(-1);
@@ -243,7 +226,7 @@ static struct fib_rules_ops dn_fib_rules_ops_template = {
 	.configure	= dn_fib_rule_configure,
 	.compare	= dn_fib_rule_compare,
 	.fill		= dn_fib_rule_fill,
-	.default_pref	= dn_fib_rule_default_pref,
+	.default_pref	= fib_default_rule_pref,
 	.flush_cache	= dn_fib_rule_flush_cache,
 	.nlgroup	= RTNLGRP_DECnet_RULE,
 	.policy		= dn_fib_rule_policy,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index ca2d07b1c706..73b67849c5b9 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -234,23 +234,6 @@ nla_put_failure:
 	return -ENOBUFS;
 }
 
-static u32 fib4_rule_default_pref(struct fib_rules_ops *ops)
-{
-	struct list_head *pos;
-	struct fib_rule *rule;
-
-	if (!list_empty(&ops->rules_list)) {
-		pos = ops->rules_list.next;
-		if (pos->next != &ops->rules_list) {
-			rule = list_entry(pos->next, struct fib_rule, list);
-			if (rule->pref)
-				return rule->pref - 1;
-		}
-	}
-
-	return 0;
-}
-
 static size_t fib4_rule_nlmsg_payload(struct fib_rule *rule)
 {
 	return nla_total_size(4) /* dst */
@@ -272,7 +255,7 @@ static struct fib_rules_ops fib4_rules_ops_template = {
 	.configure	= fib4_rule_configure,
 	.compare	= fib4_rule_compare,
 	.fill		= fib4_rule_fill,
-	.default_pref	= fib4_rule_default_pref,
+	.default_pref	= fib_default_rule_pref,
 	.nlmsg_payload	= fib4_rule_nlmsg_payload,
 	.flush_cache	= fib4_rule_flush_cache,
 	.nlgroup	= RTNLGRP_IPV4_RULE,
-- 
cgit v1.2.3-59-g8ed1b


From 28bb17268b92b0c568f2496e5e631008f9108409 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:16 +0000
Subject: net: fib_rules: set family in fib_rule_hdr centrally

All fib_rules implementations need to set the family in their ->fill()
functions. Since the value is available to the generic fib_nl_fill_rule()
function, set it there.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/fib_rules.c  | 1 +
 net/decnet/dn_rules.c | 1 -
 net/ipv4/fib_rules.c  | 1 -
 net/ipv6/fib6_rules.c | 1 -
 4 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 1eb32276be77..1bc66592453c 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -535,6 +535,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 		return -EMSGSIZE;
 
 	frh = nlmsg_data(nlh);
+	frh->family = ops->family;
 	frh->table = rule->table;
 	NLA_PUT_U32(skb, FRA_TABLE, rule->table);
 	frh->res1 = 0;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 2d14093a2c3a..1c8cc6d5b645 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -196,7 +196,6 @@ static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 {
 	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
 
-	frh->family = AF_DECnet;
 	frh->dst_len = r->dst_len;
 	frh->src_len = r->src_len;
 	frh->tos = 0;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 73b67849c5b9..a18355e15111 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -213,7 +213,6 @@ static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 {
 	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	frh->family = AF_INET;
 	frh->dst_len = rule4->dst_len;
 	frh->src_len = rule4->src_len;
 	frh->tos = rule4->tos;
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 5e463c43fcc2..92b2b7fb6c3d 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -208,7 +208,6 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 {
 	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
 
-	frh->family = AF_INET6;
 	frh->dst_len = rule6->dst.plen;
 	frh->src_len = rule6->src.plen;
 	frh->tos = rule6->tclass;
-- 
cgit v1.2.3-59-g8ed1b


From 0f87b1dd01b51dc3c789f7a212656a4a87eee1bd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:17 +0000
Subject: net: fib_rules: decouple address families from real address families

Decouple the address family values used for fib_rules from the real
address families in socket.h. This allows to use fib_rules for
code that is not a real address family without increasing AF_MAX/NPROTO.

Values up to 127 are reserved for real address families and map directly
to the corresponding AF value, values starting from 128 are for other
uses. rtnetlink is changed to invoke the AF_UNSPEC dumpit/doit handlers
for these families.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h |  7 +++++++
 net/core/rtnetlink.c      | 15 ++++++++++-----
 net/decnet/dn_rules.c     |  2 +-
 net/ipv4/fib_rules.c      |  2 +-
 net/ipv6/fib6_rules.c     |  2 +-
 5 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 51da65b68b85..405e41139a4d 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -15,6 +15,13 @@
 /* try to find source address in routing lookups */
 #define FIB_RULE_FIND_SADDR	0x00010000
 
+/* fib_rules families. values up to 127 are reserved for real address
+ * families, values above 128 may be used arbitrarily.
+ */
+#define FIB_RULES_IPV4		AF_INET
+#define FIB_RULES_IPV6		AF_INET6
+#define FIB_RULES_DECNET	AF_DECnet
+
 struct fib_rule_hdr {
 	__u8		family;
 	__u8		dst_len;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bf919b6acea2..78c85985cb30 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -118,7 +118,11 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	tab = rtnl_msg_handlers[protocol];
+	if (protocol < NPROTO)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
 	if (tab == NULL || tab[msgindex].doit == NULL)
 		tab = rtnl_msg_handlers[PF_UNSPEC];
 
@@ -129,7 +133,11 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	tab = rtnl_msg_handlers[protocol];
+	if (protocol < NPROTO)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
 	if (tab == NULL || tab[msgindex].dumpit == NULL)
 		tab = rtnl_msg_handlers[PF_UNSPEC];
 
@@ -1444,9 +1452,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return 0;
 
 	family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
-	if (family >= NPROTO)
-		return -EAFNOSUPPORT;
-
 	sz_idx = type>>2;
 	kind = type&3;
 
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 1c8cc6d5b645..af28dcc21844 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -217,7 +217,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static struct fib_rules_ops dn_fib_rules_ops_template = {
-	.family		= AF_DECnet,
+	.family		= FIB_RULES_DECNET,
 	.rule_size	= sizeof(struct dn_fib_rule),
 	.addr_size	= sizeof(u16),
 	.action		= dn_fib_rule_action,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a18355e15111..3ec84fea5b71 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -246,7 +246,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static struct fib_rules_ops fib4_rules_ops_template = {
-	.family		= AF_INET,
+	.family		= FIB_RULES_IPV4,
 	.rule_size	= sizeof(struct fib4_rule),
 	.addr_size	= sizeof(u32),
 	.action		= fib4_rule_action,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 92b2b7fb6c3d..8124f16f2ac2 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 }
 
 static struct fib_rules_ops fib6_rules_ops_template = {
-	.family			= AF_INET6,
+	.family			= FIB_RULES_IPV6,
 	.rule_size		= sizeof(struct fib6_rule),
 	.addr_size		= sizeof(struct in6_addr),
 	.action			= fib6_rule_action,
-- 
cgit v1.2.3-59-g8ed1b


From e258beb22f4d3ea3dc88586ffc9c990d0eb03380 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:19 +0000
Subject: ipv4: ipmr: move unres_queue and timer to per-namespace data

The unres_queue is currently shared between all namespaces. Following patches
will additionally allow to create multiple multicast routing tables in each
namespace. Having a single shared queue for all these users seems to excessive,
move the queue and the cleanup timer to the per-namespace data to unshare it.

As a side-effect, this fixes a bug in the seq file iteration functions: the
first entry returned is always from the current namespace, entries returned
after that may belong to any namespace.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h |  2 ++
 net/ipv4/ipmr.c          | 70 ++++++++++++++++++++----------------------------
 2 files changed, 31 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2764994c9136..b15e518f952a 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -60,6 +60,8 @@ struct netns_ipv4 {
 
 #ifdef CONFIG_IP_MROUTE
 	struct sock		*mroute_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct mfc_cache	*mfc_unres_queue;
 	struct mfc_cache	**mfc_cache_array;
 	struct vif_device	*vif_table;
 	int			maxvif;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d4f6d1340a4..d6aa65e2b08f 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -80,8 +80,6 @@ static DEFINE_RWLOCK(mrt_lock);
 
 #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
 
-static struct mfc_cache *mfc_unres_queue;		/* Queue of unresolved entries */
-
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
 
@@ -100,8 +98,6 @@ static int ipmr_cache_report(struct net *net,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
 
-static struct timer_list ipmr_expire_timer;
-
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
@@ -364,25 +360,26 @@ static void ipmr_destroy_unres(struct mfc_cache *c)
 }
 
 
-/* Single timer process for all the unresolved queue. */
+/* Timer process for the unresolved queue. */
 
-static void ipmr_expire_process(unsigned long dummy)
+static void ipmr_expire_process(unsigned long arg)
 {
+	struct net *net = (struct net *)arg;
 	unsigned long now;
 	unsigned long expires;
 	struct mfc_cache *c, **cp;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
-		mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
+		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
 		return;
 	}
 
-	if (mfc_unres_queue == NULL)
+	if (net->ipv4.mfc_unres_queue == NULL)
 		goto out;
 
 	now = jiffies;
 	expires = 10*HZ;
-	cp = &mfc_unres_queue;
+	cp = &net->ipv4.mfc_unres_queue;
 
 	while ((c=*cp) != NULL) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
@@ -398,8 +395,8 @@ static void ipmr_expire_process(unsigned long dummy)
 		ipmr_destroy_unres(c);
 	}
 
-	if (mfc_unres_queue != NULL)
-		mod_timer(&ipmr_expire_timer, jiffies + expires);
+	if (net->ipv4.mfc_unres_queue != NULL)
+		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
 
 out:
 	spin_unlock(&mfc_unres_lock);
@@ -708,9 +705,8 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 	const struct iphdr *iph = ip_hdr(skb);
 
 	spin_lock_bh(&mfc_unres_lock);
-	for (c=mfc_unres_queue; c; c=c->next) {
-		if (net_eq(mfc_net(c), net) &&
-		    c->mfc_mcastgrp == iph->daddr &&
+	for (c=net->ipv4.mfc_unres_queue; c; c=c->next) {
+		if (c->mfc_mcastgrp == iph->daddr &&
 		    c->mfc_origin == iph->saddr)
 			break;
 	}
@@ -751,10 +747,10 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
-		c->next = mfc_unres_queue;
-		mfc_unres_queue = c;
+		c->next = net->ipv4.mfc_unres_queue;
+		net->ipv4.mfc_unres_queue = c;
 
-		mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
+		mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
 	}
 
 	/*
@@ -849,18 +845,17 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	 *	need to send on the frames and tidy up.
 	 */
 	spin_lock_bh(&mfc_unres_lock);
-	for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
+	for (cp = &net->ipv4.mfc_unres_queue; (uc=*cp) != NULL;
 	     cp = &uc->next) {
-		if (net_eq(mfc_net(uc), net) &&
-		    uc->mfc_origin == c->mfc_origin &&
+		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 			*cp = uc->next;
 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
 			break;
 		}
 	}
-	if (mfc_unres_queue == NULL)
-		del_timer(&ipmr_expire_timer);
+	if (net->ipv4.mfc_unres_queue == NULL)
+		del_timer(&net->ipv4.ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
@@ -912,14 +907,9 @@ static void mroute_clean_tables(struct net *net)
 		struct mfc_cache *c, **cp;
 
 		spin_lock_bh(&mfc_unres_lock);
-		cp = &mfc_unres_queue;
+		cp = &net->ipv4.mfc_unres_queue;
 		while ((c = *cp) != NULL) {
-			if (!net_eq(mfc_net(c), net)) {
-				cp = &c->next;
-				continue;
-			}
 			*cp = c->next;
-
 			ipmr_destroy_unres(c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
@@ -1819,11 +1809,10 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 				return mfc;
 	read_unlock(&mrt_lock);
 
-	it->cache = &mfc_unres_queue;
+	it->cache = &net->ipv4.mfc_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
-		if (net_eq(mfc_net(mfc), net) &&
-		    pos-- == 0)
+	for (mfc = net->ipv4.mfc_unres_queue; mfc; mfc = mfc->next)
+		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
 
@@ -1857,7 +1846,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (mfc->next)
 		return mfc->next;
 
-	if (it->cache == &mfc_unres_queue)
+	if (it->cache == &net->ipv4.mfc_unres_queue)
 		goto end_of_list;
 
 	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
@@ -1870,13 +1859,11 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	/* exhausted cache_array, show unresolved */
 	read_unlock(&mrt_lock);
-	it->cache = &mfc_unres_queue;
+	it->cache = &net->ipv4.mfc_unres_queue;
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
-	mfc = mfc_unres_queue;
-	while (mfc && !net_eq(mfc_net(mfc), net))
-		mfc = mfc->next;
+	mfc = net->ipv4.mfc_unres_queue;
 	if (mfc)
 		return mfc;
 
@@ -1892,7 +1879,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
 
-	if (it->cache == &mfc_unres_queue)
+	if (it->cache == &net->ipv4.mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
 	else if (it->cache == net->ipv4.mfc_cache_array)
 		read_unlock(&mrt_lock);
@@ -1915,7 +1902,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   (unsigned long) mfc->mfc_origin,
 			   mfc->mfc_parent);
 
-		if (it->cache != &mfc_unres_queue) {
+		if (it->cache != &net->ipv4.mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
 				   mfc->mfc_un.res.pkt,
 				   mfc->mfc_un.res.bytes,
@@ -1992,6 +1979,9 @@ static int __net_init ipmr_net_init(struct net *net)
 		goto fail_mfc_cache;
 	}
 
+	setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)net);
+
 #ifdef CONFIG_IP_PIMSM
 	net->ipv4.mroute_reg_vif_num = -1;
 #endif
@@ -2047,7 +2037,6 @@ int __init ip_mr_init(void)
 	if (err)
 		goto reg_pernet_fail;
 
-	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
 	err = register_netdevice_notifier(&ip_mr_notifier);
 	if (err)
 		goto reg_notif_fail;
@@ -2065,7 +2054,6 @@ add_proto_fail:
 	unregister_netdevice_notifier(&ip_mr_notifier);
 #endif
 reg_notif_fail:
-	del_timer(&ipmr_expire_timer);
 	unregister_pernet_subsys(&ipmr_net_ops);
 reg_pernet_fail:
 	kmem_cache_destroy(mrt_cachep);
-- 
cgit v1.2.3-59-g8ed1b


From d658f8a0e63b6476148162aa7a3ffffc58dcad52 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:20 +0000
Subject: ipv4: ipmr: remove net pointer from struct mfc_cache

Now that cache entries in unres_queue don't need to be distinguished by their
network namespace pointer anymore, we can remove it from struct mfc_cache
add pass the namespace as function argument to the functions that need it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h | 15 ------------
 net/ipv4/ipmr.c        | 65 +++++++++++++++++++++++++-------------------------
 2 files changed, 32 insertions(+), 48 deletions(-)

(limited to 'net')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index c5f3d53548e2..de7780a6dd32 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -192,9 +192,6 @@ struct vif_device {
 
 struct mfc_cache {
 	struct mfc_cache *next;			/* Next entry on cache line 	*/
-#ifdef CONFIG_NET_NS
-	struct net *mfc_net;
-#endif
 	__be32 mfc_mcastgrp;			/* Group the entry belongs to 	*/
 	__be32 mfc_origin;			/* Source of packet 		*/
 	vifi_t mfc_parent;			/* Source interface		*/
@@ -217,18 +214,6 @@ struct mfc_cache {
 	} mfc_un;
 };
 
-static inline
-struct net *mfc_net(const struct mfc_cache *mfc)
-{
-	return read_pnet(&mfc->mfc_net);
-}
-
-static inline
-void mfc_net_set(struct mfc_cache *mfc, struct net *net)
-{
-	write_pnet(&mfc->mfc_net, hold_net(net));
-}
-
 #define MFC_STATIC		1
 #define MFC_NOTIFY		2
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d6aa65e2b08f..f8e25c8ba070 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -93,10 +93,12 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
+static int ip_mr_forward(struct net *net, struct sk_buff *skb,
+			 struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct net *net,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
-static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
+static int ipmr_fill_mroute(struct net *net, struct sk_buff *skb,
+			    struct mfc_cache *c, struct rtmsg *rtm);
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
@@ -325,7 +327,6 @@ static int vif_delete(struct net *net, int vifi, int notify,
 
 static inline void ipmr_cache_free(struct mfc_cache *c)
 {
-	release_net(mfc_net(c));
 	kmem_cache_free(mrt_cachep, c);
 }
 
@@ -333,11 +334,10 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ipmr_destroy_unres(struct mfc_cache *c)
+static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
 {
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
-	struct net *net = mfc_net(c);
 
 	atomic_dec(&net->ipv4.cache_resolve_queue_len);
 
@@ -392,7 +392,7 @@ static void ipmr_expire_process(unsigned long arg)
 
 		*cp = c->next;
 
-		ipmr_destroy_unres(c);
+		ipmr_destroy_unres(net, c);
 	}
 
 	if (net->ipv4.mfc_unres_queue != NULL)
@@ -404,10 +404,10 @@ out:
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
+static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
+				   unsigned char *ttls)
 {
 	int vifi;
-	struct net *net = mfc_net(cache);
 
 	cache->mfc_un.res.minvif = MAXVIFS;
 	cache->mfc_un.res.maxvif = 0;
@@ -547,24 +547,22 @@ static struct mfc_cache *ipmr_cache_find(struct net *net,
 /*
  *	Allocate a multicast cache entry
  */
-static struct mfc_cache *ipmr_cache_alloc(struct net *net)
+static struct mfc_cache *ipmr_cache_alloc(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (c == NULL)
 		return NULL;
 	c->mfc_un.res.minvif = MAXVIFS;
-	mfc_net_set(c, net);
 	return c;
 }
 
-static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
+static struct mfc_cache *ipmr_cache_alloc_unres(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (c == NULL)
 		return NULL;
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10*HZ;
-	mfc_net_set(c, net);
 	return c;
 }
 
@@ -572,7 +570,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
+static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
+			       struct mfc_cache *c)
 {
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
@@ -585,7 +584,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
-			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ipmr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
 						  (u8 *)nlh);
 			} else {
@@ -597,9 +596,9 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 				memset(&e->msg, 0, sizeof(e->msg));
 			}
 
-			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip_mr_forward(skb, c, 0);
+			ip_mr_forward(net, skb, c, 0);
 	}
 }
 
@@ -717,7 +716,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		 */
 
 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
-		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
+		    (c = ipmr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -814,7 +813,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	if (c != NULL) {
 		write_lock_bh(&mrt_lock);
 		c->mfc_parent = mfc->mfcc_parent;
-		ipmr_update_thresholds(c, mfc->mfcc_ttls);
+		ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -824,14 +823,14 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 		return -EINVAL;
 
-	c = ipmr_cache_alloc(net);
+	c = ipmr_cache_alloc();
 	if (c == NULL)
 		return -ENOMEM;
 
 	c->mfc_origin = mfc->mfcc_origin.s_addr;
 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 	c->mfc_parent = mfc->mfcc_parent;
-	ipmr_update_thresholds(c, mfc->mfcc_ttls);
+	ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
@@ -859,7 +858,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
-		ipmr_cache_resolve(uc, c);
+		ipmr_cache_resolve(net, uc, c);
 		ipmr_cache_free(uc);
 	}
 	return 0;
@@ -910,7 +909,7 @@ static void mroute_clean_tables(struct net *net)
 		cp = &net->ipv4.mfc_unres_queue;
 		while ((c = *cp) != NULL) {
 			*cp = c->next;
-			ipmr_destroy_unres(c);
+			ipmr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -1221,9 +1220,9 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
  *	Processing handlers for ipmr_forward
  */
 
-static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
+static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
+			    struct mfc_cache *c, int vifi)
 {
-	struct net *net = mfc_net(c);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct vif_device *vif = &net->ipv4.vif_table[vifi];
 	struct net_device *dev;
@@ -1335,11 +1334,11 @@ static int ipmr_find_vif(struct net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 
-static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
+static int ip_mr_forward(struct net *net, struct sk_buff *skb,
+			 struct mfc_cache *cache, int local)
 {
 	int psend = -1;
 	int vif, ct;
-	struct net *net = mfc_net(cache);
 
 	vif = cache->mfc_parent;
 	cache->mfc_un.res.pkt++;
@@ -1396,7 +1395,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ipmr_queue_xmit(skb2, cache, psend);
+					ipmr_queue_xmit(net, skb2, cache, psend);
 			}
 			psend = ct;
 		}
@@ -1405,9 +1404,9 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 		if (local) {
 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (skb2)
-				ipmr_queue_xmit(skb2, cache, psend);
+				ipmr_queue_xmit(net, skb2, cache, psend);
 		} else {
-			ipmr_queue_xmit(skb, cache, psend);
+			ipmr_queue_xmit(net, skb, cache, psend);
 			return 0;
 		}
 	}
@@ -1488,7 +1487,7 @@ int ip_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip_mr_forward(skb, cache, local);
+	ip_mr_forward(net, skb, cache, local);
 
 	read_unlock(&mrt_lock);
 
@@ -1602,11 +1601,11 @@ drop:
 #endif
 
 static int
-ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
+ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
+		 struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
-	struct net *net = mfc_net(c);
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
@@ -1686,7 +1685,7 @@ int ipmr_get_route(struct net *net,
 
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
-	err = ipmr_fill_mroute(skb, cache, rtm);
+	err = ipmr_fill_mroute(net, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 862465f2e7e90975e7bf0ecfbb171dd3adedd950 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:21 +0000
Subject: ipv4: ipmr: convert struct mfc_cache to struct list_head

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h   |   2 +-
 include/net/netns/ipv4.h |   4 +-
 net/ipv4/ipmr.c          | 125 +++++++++++++++++++++++------------------------
 3 files changed, 64 insertions(+), 67 deletions(-)

(limited to 'net')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index de7780a6dd32..7ff6c77d6008 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -191,7 +191,7 @@ struct vif_device {
 #define VIFF_STATIC 0x8000
 
 struct mfc_cache {
-	struct mfc_cache *next;			/* Next entry on cache line 	*/
+	struct list_head list;
 	__be32 mfc_mcastgrp;			/* Group the entry belongs to 	*/
 	__be32 mfc_origin;			/* Source of packet 		*/
 	vifi_t mfc_parent;			/* Source interface		*/
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b15e518f952a..5d06429968d5 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -61,8 +61,8 @@ struct netns_ipv4 {
 #ifdef CONFIG_IP_MROUTE
 	struct sock		*mroute_sk;
 	struct timer_list	ipmr_expire_timer;
-	struct mfc_cache	*mfc_unres_queue;
-	struct mfc_cache	**mfc_cache_array;
+	struct list_head	mfc_unres_queue;
+	struct list_head	*mfc_cache_array;
 	struct vif_device	*vif_table;
 	int			maxvif;
 	atomic_t		cache_resolve_queue_len;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f8e25c8ba070..21b5edc2f343 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -367,35 +367,32 @@ static void ipmr_expire_process(unsigned long arg)
 	struct net *net = (struct net *)arg;
 	unsigned long now;
 	unsigned long expires;
-	struct mfc_cache *c, **cp;
+	struct mfc_cache *c, *next;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
 		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
 		return;
 	}
 
-	if (net->ipv4.mfc_unres_queue == NULL)
+	if (list_empty(&net->ipv4.mfc_unres_queue))
 		goto out;
 
 	now = jiffies;
 	expires = 10*HZ;
-	cp = &net->ipv4.mfc_unres_queue;
 
-	while ((c=*cp) != NULL) {
+	list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			unsigned long interval = c->mfc_un.unres.expires - now;
 			if (interval < expires)
 				expires = interval;
-			cp = &c->next;
 			continue;
 		}
 
-		*cp = c->next;
-
+		list_del(&c->list);
 		ipmr_destroy_unres(net, c);
 	}
 
-	if (net->ipv4.mfc_unres_queue != NULL)
+	if (!list_empty(&net->ipv4.mfc_unres_queue))
 		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
 
 out:
@@ -537,11 +534,11 @@ static struct mfc_cache *ipmr_cache_find(struct net *net,
 	int line = MFC_HASH(mcastgrp, origin);
 	struct mfc_cache *c;
 
-	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
-		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
-			break;
+	list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
+		if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
+			return c;
 	}
-	return c;
+	return NULL;
 }
 
 /*
@@ -699,18 +696,21 @@ static int ipmr_cache_report(struct net *net,
 static int
 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 {
+	bool found = false;
 	int err;
 	struct mfc_cache *c;
 	const struct iphdr *iph = ip_hdr(skb);
 
 	spin_lock_bh(&mfc_unres_lock);
-	for (c=net->ipv4.mfc_unres_queue; c; c=c->next) {
+	list_for_each_entry(c, &net->ipv4.mfc_unres_queue, list) {
 		if (c->mfc_mcastgrp == iph->daddr &&
-		    c->mfc_origin == iph->saddr)
+		    c->mfc_origin == iph->saddr) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c == NULL) {
+	if (!found) {
 		/*
 		 *	Create a new entry if allowable
 		 */
@@ -746,8 +746,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
-		c->next = net->ipv4.mfc_unres_queue;
-		net->ipv4.mfc_unres_queue = c;
+		list_add(&c->list, &net->ipv4.mfc_unres_queue);
 
 		mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
 	}
@@ -774,16 +773,15 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 {
 	int line;
-	struct mfc_cache *c, **cp;
+	struct mfc_cache *c, *next;
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	for (cp = &net->ipv4.mfc_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ipmr_cache_free(c);
@@ -795,22 +793,24 @@ static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 
 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 {
+	bool found = false;
 	int line;
-	struct mfc_cache *uc, *c, **cp;
+	struct mfc_cache *uc, *c;
 
 	if (mfc->mfcc_parent >= MAXVIFS)
 		return -ENFILE;
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	for (cp = &net->ipv4.mfc_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
-		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
+		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c != NULL) {
+	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mfc_parent = mfc->mfcc_parent;
 		ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
@@ -835,8 +835,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	c->next = net->ipv4.mfc_cache_array[line];
-	net->ipv4.mfc_cache_array[line] = c;
+	list_add(&c->list, &net->ipv4.mfc_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -844,16 +843,15 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	 *	need to send on the frames and tidy up.
 	 */
 	spin_lock_bh(&mfc_unres_lock);
-	for (cp = &net->ipv4.mfc_unres_queue; (uc=*cp) != NULL;
-	     cp = &uc->next) {
+	list_for_each_entry(uc, &net->ipv4.mfc_unres_queue, list) {
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
-			*cp = uc->next;
+			list_del(&uc->list);
 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
 			break;
 		}
 	}
-	if (net->ipv4.mfc_unres_queue == NULL)
+	if (list_empty(&net->ipv4.mfc_unres_queue))
 		del_timer(&net->ipv4.ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
@@ -872,6 +870,7 @@ static void mroute_clean_tables(struct net *net)
 {
 	int i;
 	LIST_HEAD(list);
+	struct mfc_cache *c, *next;
 
 	/*
 	 *	Shut down all active vif entries
@@ -885,17 +884,12 @@ static void mroute_clean_tables(struct net *net)
 	/*
 	 *	Wipe the cache
 	 */
-	for (i=0; i<MFC_LINES; i++) {
-		struct mfc_cache *c, **cp;
-
-		cp = &net->ipv4.mfc_cache_array[i];
-		while ((c = *cp) != NULL) {
-			if (c->mfc_flags&MFC_STATIC) {
-				cp = &c->next;
+	for (i = 0; i < MFC_LINES; i++) {
+		list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[i], list) {
+			if (c->mfc_flags&MFC_STATIC)
 				continue;
-			}
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ipmr_cache_free(c);
@@ -903,12 +897,9 @@ static void mroute_clean_tables(struct net *net)
 	}
 
 	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
-		struct mfc_cache *c, **cp;
-
 		spin_lock_bh(&mfc_unres_lock);
-		cp = &net->ipv4.mfc_unres_queue;
-		while ((c = *cp) != NULL) {
-			*cp = c->next;
+		list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
+			list_del(&c->list);
 			ipmr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
@@ -1789,7 +1780,7 @@ static const struct file_operations ipmr_vif_fops = {
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
-	struct mfc_cache **cache;
+	struct list_head *cache;
 	int ct;
 };
 
@@ -1799,18 +1790,18 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 {
 	struct mfc_cache *mfc;
 
-	it->cache = net->ipv4.mfc_cache_array;
 	read_lock(&mrt_lock);
-	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
-		for (mfc = net->ipv4.mfc_cache_array[it->ct];
-		     mfc; mfc = mfc->next)
+	for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
+		it->cache = &net->ipv4.mfc_cache_array[it->ct];
+		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
+	}
 	read_unlock(&mrt_lock);
 
-	it->cache = &net->ipv4.mfc_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for (mfc = net->ipv4.mfc_unres_queue; mfc; mfc = mfc->next)
+	it->cache = &net->ipv4.mfc_unres_queue;
+	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
@@ -1842,18 +1833,19 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (v == SEQ_START_TOKEN)
 		return ipmr_mfc_seq_idx(net, seq->private, 0);
 
-	if (mfc->next)
-		return mfc->next;
+	if (mfc->list.next != it->cache)
+		return list_entry(mfc->list.next, struct mfc_cache, list);
 
 	if (it->cache == &net->ipv4.mfc_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
+	BUG_ON(it->cache != &net->ipv4.mfc_cache_array[it->ct]);
 
 	while (++it->ct < MFC_LINES) {
-		mfc = net->ipv4.mfc_cache_array[it->ct];
-		if (mfc)
-			return mfc;
+		it->cache = &net->ipv4.mfc_cache_array[it->ct];
+		if (list_empty(it->cache))
+			continue;
+		return list_first_entry(it->cache, struct mfc_cache, list);
 	}
 
 	/* exhausted cache_array, show unresolved */
@@ -1862,9 +1854,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
-	mfc = net->ipv4.mfc_unres_queue;
-	if (mfc)
-		return mfc;
+	if (!list_empty(it->cache))
+		return list_first_entry(it->cache, struct mfc_cache, list);
 
  end_of_list:
 	spin_unlock_bh(&mfc_unres_lock);
@@ -1880,7 +1871,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 
 	if (it->cache == &net->ipv4.mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == net->ipv4.mfc_cache_array)
+	else if (it->cache == &net->ipv4.mfc_cache_array[it->ct])
 		read_unlock(&mrt_lock);
 }
 
@@ -1960,6 +1951,7 @@ static const struct net_protocol pim_protocol = {
  */
 static int __net_init ipmr_net_init(struct net *net)
 {
+	unsigned int i;
 	int err = 0;
 
 	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
@@ -1971,13 +1963,18 @@ static int __net_init ipmr_net_init(struct net *net)
 
 	/* Forwarding cache */
 	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
-					    sizeof(struct mfc_cache *),
+					    sizeof(struct list_head),
 					    GFP_KERNEL);
 	if (!net->ipv4.mfc_cache_array) {
 		err = -ENOMEM;
 		goto fail_mfc_cache;
 	}
 
+	for (i = 0; i < MFC_LINES; i++)
+		INIT_LIST_HEAD(&net->ipv4.mfc_cache_array[i]);
+
+	INIT_LIST_HEAD(&net->ipv4.mfc_unres_queue);
+
 	setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
 		    (unsigned long)net);
 
-- 
cgit v1.2.3-59-g8ed1b


From 0c12295a741d3186987f96f518cfbdaf01abb087 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:22 +0000
Subject: ipv4: ipmr: move mroute data into seperate structure

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h |  13 +-
 net/ipv4/ipmr.c          | 369 +++++++++++++++++++++++++----------------------
 2 files changed, 200 insertions(+), 182 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 5d06429968d5..72e762ab3e5d 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -59,18 +59,7 @@ struct netns_ipv4 {
 	atomic_t rt_genid;
 
 #ifdef CONFIG_IP_MROUTE
-	struct sock		*mroute_sk;
-	struct timer_list	ipmr_expire_timer;
-	struct list_head	mfc_unres_queue;
-	struct list_head	*mfc_cache_array;
-	struct vif_device	*vif_table;
-	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
-	int			mroute_do_assert;
-	int			mroute_do_pim;
-#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
-	int			mroute_reg_vif_num;
-#endif
+	struct mr_table		*mrt;
 #endif
 };
 #endif
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 21b5edc2f343..498f4e907d52 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -68,6 +68,21 @@
 #define CONFIG_IP_PIMSM	1
 #endif
 
+struct mr_table {
+	struct sock		*mroute_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct list_head	mfc_unres_queue;
+	struct list_head	mfc_cache_array[MFC_LINES];
+	struct vif_device	vif_table[MAXVIFS];
+	int			maxvif;
+	atomic_t		cache_resolve_queue_len;
+	int			mroute_do_assert;
+	int			mroute_do_pim;
+#if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
+	int			mroute_reg_vif_num;
+#endif
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -78,7 +93,7 @@ static DEFINE_RWLOCK(mrt_lock);
  *	Multicast router control variables
  */
 
-#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL)
+#define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -93,11 +108,12 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip_mr_forward(struct net *net, struct sk_buff *skb,
-			 struct mfc_cache *cache, int local);
-static int ipmr_cache_report(struct net *net,
+static int ip_mr_forward(struct net *net, struct mr_table *mrt,
+			 struct sk_buff *skb, struct mfc_cache *cache,
+			 int local);
+static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
-static int ipmr_fill_mroute(struct net *net, struct sk_buff *skb,
+static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			    struct mfc_cache *c, struct rtmsg *rtm);
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
@@ -199,12 +215,12 @@ failure:
 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
 	dev->stats.tx_packets++;
-	ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num,
-			  IGMPMSG_WHOLEPKT);
+	ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return NETDEV_TX_OK;
@@ -274,17 +290,17 @@ failure:
  *	@notify: Set to 1, if the caller is a notifier_call
  */
 
-static int vif_delete(struct net *net, int vifi, int notify,
+static int vif_delete(struct mr_table *mrt, int vifi, int notify,
 		      struct list_head *head)
 {
 	struct vif_device *v;
 	struct net_device *dev;
 	struct in_device *in_dev;
 
-	if (vifi < 0 || vifi >= net->ipv4.maxvif)
+	if (vifi < 0 || vifi >= mrt->maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &net->ipv4.vif_table[vifi];
+	v = &mrt->vif_table[vifi];
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -296,17 +312,17 @@ static int vif_delete(struct net *net, int vifi, int notify,
 	}
 
 #ifdef CONFIG_IP_PIMSM
-	if (vifi == net->ipv4.mroute_reg_vif_num)
-		net->ipv4.mroute_reg_vif_num = -1;
+	if (vifi == mrt->mroute_reg_vif_num)
+		mrt->mroute_reg_vif_num = -1;
 #endif
 
-	if (vifi+1 == net->ipv4.maxvif) {
+	if (vifi+1 == mrt->maxvif) {
 		int tmp;
 		for (tmp=vifi-1; tmp>=0; tmp--) {
-			if (VIF_EXISTS(net, tmp))
+			if (VIF_EXISTS(mrt, tmp))
 				break;
 		}
-		net->ipv4.maxvif = tmp+1;
+		mrt->maxvif = tmp+1;
 	}
 
 	write_unlock_bh(&mrt_lock);
@@ -334,12 +350,13 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
+static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 {
+	struct net *net = NULL; //mrt->net;
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
 
-	atomic_dec(&net->ipv4.cache_resolve_queue_len);
+	atomic_dec(&mrt->cache_resolve_queue_len);
 
 	while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
 		if (ip_hdr(skb)->version == 0) {
@@ -364,23 +381,23 @@ static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
 
 static void ipmr_expire_process(unsigned long arg)
 {
-	struct net *net = (struct net *)arg;
+	struct mr_table *mrt = (struct mr_table *)arg;
 	unsigned long now;
 	unsigned long expires;
 	struct mfc_cache *c, *next;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
-		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
+		mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
 		return;
 	}
 
-	if (list_empty(&net->ipv4.mfc_unres_queue))
+	if (list_empty(&mrt->mfc_unres_queue))
 		goto out;
 
 	now = jiffies;
 	expires = 10*HZ;
 
-	list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			unsigned long interval = c->mfc_un.unres.expires - now;
 			if (interval < expires)
@@ -389,11 +406,11 @@ static void ipmr_expire_process(unsigned long arg)
 		}
 
 		list_del(&c->list);
-		ipmr_destroy_unres(net, c);
+		ipmr_destroy_unres(mrt, c);
 	}
 
-	if (!list_empty(&net->ipv4.mfc_unres_queue))
-		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
+	if (!list_empty(&mrt->mfc_unres_queue))
+		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 
 out:
 	spin_unlock(&mfc_unres_lock);
@@ -401,7 +418,7 @@ out:
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
+static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
 				   unsigned char *ttls)
 {
 	int vifi;
@@ -410,8 +427,8 @@ static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
 	cache->mfc_un.res.maxvif = 0;
 	memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
 
-	for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) {
-		if (VIF_EXISTS(net, vifi) &&
+	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
+		if (VIF_EXISTS(mrt, vifi) &&
 		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
@@ -422,16 +439,17 @@ static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
 	}
 }
 
-static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
+static int vif_add(struct net *net, struct mr_table *mrt,
+		   struct vifctl *vifc, int mrtsock)
 {
 	int vifi = vifc->vifc_vifi;
-	struct vif_device *v = &net->ipv4.vif_table[vifi];
+	struct vif_device *v = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct in_device *in_dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (VIF_EXISTS(net, vifi))
+	if (VIF_EXISTS(mrt, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->vifc_flags) {
@@ -441,7 +459,7 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 		 * Special Purpose VIF in PIM
 		 * All the packets will be sent to the daemon
 		 */
-		if (net->ipv4.mroute_reg_vif_num >= 0)
+		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
 		dev = ipmr_reg_vif(net);
 		if (!dev)
@@ -519,22 +537,22 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
 	v->dev = dev;
 #ifdef CONFIG_IP_PIMSM
 	if (v->flags&VIFF_REGISTER)
-		net->ipv4.mroute_reg_vif_num = vifi;
+		mrt->mroute_reg_vif_num = vifi;
 #endif
-	if (vifi+1 > net->ipv4.maxvif)
-		net->ipv4.maxvif = vifi+1;
+	if (vifi+1 > mrt->maxvif)
+		mrt->maxvif = vifi+1;
 	write_unlock_bh(&mrt_lock);
 	return 0;
 }
 
-static struct mfc_cache *ipmr_cache_find(struct net *net,
+static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
 					 __be32 origin,
 					 __be32 mcastgrp)
 {
 	int line = MFC_HASH(mcastgrp, origin);
 	struct mfc_cache *c;
 
-	list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
+	list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
 			return c;
 	}
@@ -567,8 +585,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(void)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
-			       struct mfc_cache *c)
+static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
+			       struct mfc_cache *uc, struct mfc_cache *c)
 {
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
@@ -581,7 +599,7 @@ static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
-			if (ipmr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
 						  (u8 *)nlh);
 			} else {
@@ -595,7 +613,7 @@ static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
 
 			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip_mr_forward(net, skb, c, 0);
+			ip_mr_forward(net, mrt, skb, c, 0);
 	}
 }
 
@@ -606,7 +624,7 @@ static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
  *	Called under mrt_lock.
  */
 
-static int ipmr_cache_report(struct net *net,
+static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert)
 {
 	struct sk_buff *skb;
@@ -639,7 +657,7 @@ static int ipmr_cache_report(struct net *net,
 		memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
 		msg->im_msgtype = IGMPMSG_WHOLEPKT;
 		msg->im_mbz = 0;
-		msg->im_vif = net->ipv4.mroute_reg_vif_num;
+		msg->im_vif = mrt->mroute_reg_vif_num;
 		ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
 		ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
 					     sizeof(struct iphdr));
@@ -671,7 +689,7 @@ static int ipmr_cache_report(struct net *net,
 	skb->transport_header = skb->network_header;
 	}
 
-	if (net->ipv4.mroute_sk == NULL) {
+	if (mrt->mroute_sk == NULL) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -679,7 +697,7 @@ static int ipmr_cache_report(struct net *net,
 	/*
 	 *	Deliver to mrouted
 	 */
-	ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb);
+	ret = sock_queue_rcv_skb(mrt->mroute_sk, skb);
 	if (ret < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
@@ -694,7 +712,7 @@ static int ipmr_cache_report(struct net *net,
  */
 
 static int
-ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
+ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
 {
 	bool found = false;
 	int err;
@@ -702,7 +720,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 	const struct iphdr *iph = ip_hdr(skb);
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &net->ipv4.mfc_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
 		if (c->mfc_mcastgrp == iph->daddr &&
 		    c->mfc_origin == iph->saddr) {
 			found = true;
@@ -715,7 +733,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
+		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
 		    (c = ipmr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
@@ -733,7 +751,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		/*
 		 *	Reflect first query at mrouted.
 		 */
-		err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE);
+		err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
 		if (err < 0) {
 			/* If the report failed throw the cache entry
 			   out - Brad Parker
@@ -745,10 +763,10 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 			return err;
 		}
 
-		atomic_inc(&net->ipv4.cache_resolve_queue_len);
-		list_add(&c->list, &net->ipv4.mfc_unres_queue);
+		atomic_inc(&mrt->cache_resolve_queue_len);
+		list_add(&c->list, &mrt->mfc_unres_queue);
 
-		mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
+		mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
 	}
 
 	/*
@@ -770,14 +788,14 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
  *	MFC cache manipulation by user space mroute daemon
  */
 
-static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
+static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
 {
 	int line;
 	struct mfc_cache *c, *next;
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[line], list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 			write_lock_bh(&mrt_lock);
@@ -791,7 +809,8 @@ static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 	return -ENOENT;
 }
 
-static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
+static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
+			struct mfcctl *mfc, int mrtsock)
 {
 	bool found = false;
 	int line;
@@ -802,7 +821,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
+	list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 			found = true;
@@ -813,7 +832,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mfc_parent = mfc->mfcc_parent;
-		ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
+		ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -830,12 +849,12 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	c->mfc_origin = mfc->mfcc_origin.s_addr;
 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 	c->mfc_parent = mfc->mfcc_parent;
-	ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
+	ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	list_add(&c->list, &net->ipv4.mfc_cache_array[line]);
+	list_add(&c->list, &mrt->mfc_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -843,20 +862,20 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	 *	need to send on the frames and tidy up.
 	 */
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &net->ipv4.mfc_unres_queue, list) {
+	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 			list_del(&uc->list);
-			atomic_dec(&net->ipv4.cache_resolve_queue_len);
+			atomic_dec(&mrt->cache_resolve_queue_len);
 			break;
 		}
 	}
-	if (list_empty(&net->ipv4.mfc_unres_queue))
-		del_timer(&net->ipv4.ipmr_expire_timer);
+	if (list_empty(&mrt->mfc_unres_queue))
+		del_timer(&mrt->ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
-		ipmr_cache_resolve(net, uc, c);
+		ipmr_cache_resolve(net, mrt, uc, c);
 		ipmr_cache_free(uc);
 	}
 	return 0;
@@ -866,7 +885,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct net *net)
+static void mroute_clean_tables(struct mr_table *mrt)
 {
 	int i;
 	LIST_HEAD(list);
@@ -875,9 +894,9 @@ static void mroute_clean_tables(struct net *net)
 	/*
 	 *	Shut down all active vif entries
 	 */
-	for (i = 0; i < net->ipv4.maxvif; i++) {
-		if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC))
-			vif_delete(net, i, 0, &list);
+	for (i = 0; i < mrt->maxvif; i++) {
+		if (!(mrt->vif_table[i].flags&VIFF_STATIC))
+			vif_delete(mrt, i, 0, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -885,7 +904,7 @@ static void mroute_clean_tables(struct net *net)
 	 *	Wipe the cache
 	 */
 	for (i = 0; i < MFC_LINES; i++) {
-		list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[i], list) {
+		list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
 			if (c->mfc_flags&MFC_STATIC)
 				continue;
 			write_lock_bh(&mrt_lock);
@@ -896,11 +915,11 @@ static void mroute_clean_tables(struct net *net)
 		}
 	}
 
-	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
+	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
+		list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
 			list_del(&c->list);
-			ipmr_destroy_unres(net, c);
+			ipmr_destroy_unres(mrt, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -909,16 +928,17 @@ static void mroute_clean_tables(struct net *net)
 static void mrtsock_destruct(struct sock *sk)
 {
 	struct net *net = sock_net(sk);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	rtnl_lock();
-	if (sk == net->ipv4.mroute_sk) {
+	if (sk == mrt->mroute_sk) {
 		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 
 		write_lock_bh(&mrt_lock);
-		net->ipv4.mroute_sk = NULL;
+		mrt->mroute_sk = NULL;
 		write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(net);
+		mroute_clean_tables(mrt);
 	}
 	rtnl_unlock();
 }
@@ -936,9 +956,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 	struct vifctl vif;
 	struct mfcctl mfc;
 	struct net *net = sock_net(sk);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (optname != MRT_INIT) {
-		if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN))
+		if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -951,7 +972,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 			return -ENOPROTOOPT;
 
 		rtnl_lock();
-		if (net->ipv4.mroute_sk) {
+		if (mrt->mroute_sk) {
 			rtnl_unlock();
 			return -EADDRINUSE;
 		}
@@ -959,7 +980,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		ret = ip_ra_control(sk, 1, mrtsock_destruct);
 		if (ret == 0) {
 			write_lock_bh(&mrt_lock);
-			net->ipv4.mroute_sk = sk;
+			mrt->mroute_sk = sk;
 			write_unlock_bh(&mrt_lock);
 
 			IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
@@ -967,7 +988,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		rtnl_unlock();
 		return ret;
 	case MRT_DONE:
-		if (sk != net->ipv4.mroute_sk)
+		if (sk != mrt->mroute_sk)
 			return -EACCES;
 		return ip_ra_control(sk, 0, NULL);
 	case MRT_ADD_VIF:
@@ -980,9 +1001,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 			return -ENFILE;
 		rtnl_lock();
 		if (optname == MRT_ADD_VIF) {
-			ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk);
+			ret = vif_add(net, mrt, &vif, sk == mrt->mroute_sk);
 		} else {
-			ret = vif_delete(net, vif.vifc_vifi, 0, NULL);
+			ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
 		}
 		rtnl_unlock();
 		return ret;
@@ -999,9 +1020,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 			return -EFAULT;
 		rtnl_lock();
 		if (optname == MRT_DEL_MFC)
-			ret = ipmr_mfc_delete(net, &mfc);
+			ret = ipmr_mfc_delete(mrt, &mfc);
 		else
-			ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk);
+			ret = ipmr_mfc_add(net, mrt, &mfc, sk == mrt->mroute_sk);
 		rtnl_unlock();
 		return ret;
 		/*
@@ -1012,7 +1033,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		int v;
 		if (get_user(v,(int __user *)optval))
 			return -EFAULT;
-		net->ipv4.mroute_do_assert = (v) ? 1 : 0;
+		mrt->mroute_do_assert = (v) ? 1 : 0;
 		return 0;
 	}
 #ifdef CONFIG_IP_PIMSM
@@ -1026,9 +1047,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 
 		rtnl_lock();
 		ret = 0;
-		if (v != net->ipv4.mroute_do_pim) {
-			net->ipv4.mroute_do_pim = v;
-			net->ipv4.mroute_do_assert = v;
+		if (v != mrt->mroute_do_pim) {
+			mrt->mroute_do_pim = v;
+			mrt->mroute_do_assert = v;
 		}
 		rtnl_unlock();
 		return ret;
@@ -1052,6 +1073,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (optname != MRT_VERSION &&
 #ifdef CONFIG_IP_PIMSM
@@ -1073,10 +1095,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
 		val = 0x0305;
 #ifdef CONFIG_IP_PIMSM
 	else if (optname == MRT_PIM)
-		val = net->ipv4.mroute_do_pim;
+		val = mrt->mroute_do_pim;
 #endif
 	else
-		val = net->ipv4.mroute_do_assert;
+		val = mrt->mroute_do_assert;
 	if (copy_to_user(optval, &val, olr))
 		return -EFAULT;
 	return 0;
@@ -1093,16 +1115,17 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct vif_device *vif;
 	struct mfc_cache *c;
 	struct net *net = sock_net(sk);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	switch (cmd) {
 	case SIOCGETVIFCNT:
 		if (copy_from_user(&vr, arg, sizeof(vr)))
 			return -EFAULT;
-		if (vr.vifi >= net->ipv4.maxvif)
+		if (vr.vifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &net->ipv4.vif_table[vr.vifi];
-		if (VIF_EXISTS(net, vr.vifi)) {
+		vif = &mrt->vif_table[vr.vifi];
+		if (VIF_EXISTS(mrt, vr.vifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1120,7 +1143,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 			return -EFAULT;
 
 		read_lock(&mrt_lock);
-		c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr);
+		c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
 		if (c) {
 			sr.pktcnt = c->mfc_un.res.pkt;
 			sr.bytecnt = c->mfc_un.res.bytes;
@@ -1143,16 +1166,17 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
+	struct mr_table *mrt = net->ipv4.mrt;
 	struct vif_device *v;
 	int ct;
 	LIST_HEAD(list);
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
-	v = &net->ipv4.vif_table[0];
-	for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) {
+	v = &mrt->vif_table[0];
+	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
 		if (v->dev == dev)
-			vif_delete(net, ct, 1, &list);
+			vif_delete(mrt, ct, 1, &list);
 	}
 	unregister_netdevice_many(&list);
 	return NOTIFY_DONE;
@@ -1211,11 +1235,11 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
  *	Processing handlers for ipmr_forward
  */
 
-static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
-			    struct mfc_cache *c, int vifi)
+static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
+			    struct sk_buff *skb, struct mfc_cache *c, int vifi)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	struct vif_device *vif = &net->ipv4.vif_table[vifi];
+	struct vif_device *vif = &mrt->vif_table[vifi];
 	struct net_device *dev;
 	struct rtable *rt;
 	int    encap = 0;
@@ -1229,7 +1253,7 @@ static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
 		vif->bytes_out += skb->len;
 		vif->dev->stats.tx_bytes += skb->len;
 		vif->dev->stats.tx_packets++;
-		ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT);
+		ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
 		goto out_free;
 	}
 #endif
@@ -1312,12 +1336,12 @@ out_free:
 	return;
 }
 
-static int ipmr_find_vif(struct net_device *dev)
+static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
 {
-	struct net *net = dev_net(dev);
 	int ct;
-	for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
-		if (net->ipv4.vif_table[ct].dev == dev)
+
+	for (ct = mrt->maxvif-1; ct >= 0; ct--) {
+		if (mrt->vif_table[ct].dev == dev)
 			break;
 	}
 	return ct;
@@ -1325,8 +1349,9 @@ static int ipmr_find_vif(struct net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 
-static int ip_mr_forward(struct net *net, struct sk_buff *skb,
-			 struct mfc_cache *cache, int local)
+static int ip_mr_forward(struct net *net, struct mr_table *mrt,
+			 struct sk_buff *skb, struct mfc_cache *cache,
+			 int local)
 {
 	int psend = -1;
 	int vif, ct;
@@ -1338,7 +1363,7 @@ static int ip_mr_forward(struct net *net, struct sk_buff *skb,
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (net->ipv4.vif_table[vif].dev != skb->dev) {
+	if (mrt->vif_table[vif].dev != skb->dev) {
 		int true_vifi;
 
 		if (skb_rtable(skb)->fl.iif == 0) {
@@ -1357,26 +1382,26 @@ static int ip_mr_forward(struct net *net, struct sk_buff *skb,
 		}
 
 		cache->mfc_un.res.wrong_if++;
-		true_vifi = ipmr_find_vif(skb->dev);
+		true_vifi = ipmr_find_vif(mrt, skb->dev);
 
-		if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
+		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
 		       so that we cannot check that packet arrived on an oif.
 		       It is bad, but otherwise we would need to move pretty
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
-		    (net->ipv4.mroute_do_pim ||
+		    (mrt->mroute_do_pim ||
 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
 			cache->mfc_un.res.last_assert = jiffies;
-			ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF);
+			ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
 		}
 		goto dont_forward;
 	}
 
-	net->ipv4.vif_table[vif].pkt_in++;
-	net->ipv4.vif_table[vif].bytes_in += skb->len;
+	mrt->vif_table[vif].pkt_in++;
+	mrt->vif_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
@@ -1386,7 +1411,8 @@ static int ip_mr_forward(struct net *net, struct sk_buff *skb,
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ipmr_queue_xmit(net, skb2, cache, psend);
+					ipmr_queue_xmit(net, mrt, skb2, cache,
+							psend);
 			}
 			psend = ct;
 		}
@@ -1395,9 +1421,9 @@ static int ip_mr_forward(struct net *net, struct sk_buff *skb,
 		if (local) {
 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (skb2)
-				ipmr_queue_xmit(net, skb2, cache, psend);
+				ipmr_queue_xmit(net, mrt, skb2, cache, psend);
 		} else {
-			ipmr_queue_xmit(net, skb, cache, psend);
+			ipmr_queue_xmit(net, mrt, skb, cache, psend);
 			return 0;
 		}
 	}
@@ -1417,6 +1443,7 @@ int ip_mr_input(struct sk_buff *skb)
 {
 	struct mfc_cache *cache;
 	struct net *net = dev_net(skb->dev);
+	struct mr_table *mrt = net->ipv4.mrt;
 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
 
 	/* Packet is looped back after forward, it should not be
@@ -1437,9 +1464,9 @@ int ip_mr_input(struct sk_buff *skb)
 			       that we can forward NO IGMP messages.
 			     */
 			    read_lock(&mrt_lock);
-			    if (net->ipv4.mroute_sk) {
+			    if (mrt->mroute_sk) {
 				    nf_reset(skb);
-				    raw_rcv(net->ipv4.mroute_sk, skb);
+				    raw_rcv(mrt->mroute_sk, skb);
 				    read_unlock(&mrt_lock);
 				    return 0;
 			    }
@@ -1448,7 +1475,7 @@ int ip_mr_input(struct sk_buff *skb)
 	}
 
 	read_lock(&mrt_lock);
-	cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
+	cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
 
 	/*
 	 *	No usable cache entry
@@ -1466,9 +1493,9 @@ int ip_mr_input(struct sk_buff *skb)
 			skb = skb2;
 		}
 
-		vif = ipmr_find_vif(skb->dev);
+		vif = ipmr_find_vif(mrt, skb->dev);
 		if (vif >= 0) {
-			int err = ipmr_cache_unresolved(net, vif, skb);
+			int err = ipmr_cache_unresolved(mrt, vif, skb);
 			read_unlock(&mrt_lock);
 
 			return err;
@@ -1478,7 +1505,7 @@ int ip_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip_mr_forward(net, skb, cache, local);
+	ip_mr_forward(net, mrt, skb, cache, local);
 
 	read_unlock(&mrt_lock);
 
@@ -1500,6 +1527,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
 	struct net_device *reg_dev = NULL;
 	struct iphdr *encap;
 	struct net *net = dev_net(skb->dev);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
 	/*
@@ -1514,8 +1542,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
 		return 1;
 
 	read_lock(&mrt_lock);
-	if (net->ipv4.mroute_reg_vif_num >= 0)
-		reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev;
+	if (mrt->mroute_reg_vif_num >= 0)
+		reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -1550,13 +1578,14 @@ int pim_rcv_v1(struct sk_buff * skb)
 {
 	struct igmphdr *pim;
 	struct net *net = dev_net(skb->dev);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
 		goto drop;
 
 	pim = igmp_hdr(skb);
 
-	if (!net->ipv4.mroute_do_pim ||
+	if (!mrt->mroute_do_pim ||
 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
 		goto drop;
 
@@ -1592,7 +1621,7 @@ drop:
 #endif
 
 static int
-ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
+ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
 		 struct rtmsg *rtm)
 {
 	int ct;
@@ -1604,19 +1633,19 @@ ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
 	if (c->mfc_parent > MAXVIFS)
 		return -ENOENT;
 
-	if (VIF_EXISTS(net, c->mfc_parent))
-		RTA_PUT(skb, RTA_IIF, 4, &net->ipv4.vif_table[c->mfc_parent].dev->ifindex);
+	if (VIF_EXISTS(mrt, c->mfc_parent))
+		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
 
 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
 
 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (VIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
+		if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
 				goto rtattr_failure;
 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
 			nhp->rtnh_flags = 0;
 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex;
+			nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
 	}
@@ -1634,11 +1663,12 @@ int ipmr_get_route(struct net *net,
 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
+	struct mr_table *mrt = net->ipv4.mrt;
 	struct mfc_cache *cache;
 	struct rtable *rt = skb_rtable(skb);
 
 	read_lock(&mrt_lock);
-	cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst);
+	cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
 
 	if (cache == NULL) {
 		struct sk_buff *skb2;
@@ -1652,7 +1682,7 @@ int ipmr_get_route(struct net *net,
 		}
 
 		dev = skb->dev;
-		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
+		if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
 			read_unlock(&mrt_lock);
 			return -ENODEV;
 		}
@@ -1669,14 +1699,14 @@ int ipmr_get_route(struct net *net,
 		iph->saddr = rt->rt_src;
 		iph->daddr = rt->rt_dst;
 		iph->version = 0;
-		err = ipmr_cache_unresolved(net, vif, skb2);
+		err = ipmr_cache_unresolved(mrt, vif, skb2);
 		read_unlock(&mrt_lock);
 		return err;
 	}
 
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
-	err = ipmr_fill_mroute(net, skb, cache, rtm);
+	err = ipmr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
@@ -1694,11 +1724,13 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
 					   struct ipmr_vif_iter *iter,
 					   loff_t pos)
 {
-	for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) {
-		if (!VIF_EXISTS(net, iter->ct))
+	struct mr_table *mrt = net->ipv4.mrt;
+
+	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+		if (!VIF_EXISTS(mrt, iter->ct))
 			continue;
 		if (pos-- == 0)
-			return &net->ipv4.vif_table[iter->ct];
+			return &mrt->vif_table[iter->ct];
 	}
 	return NULL;
 }
@@ -1717,15 +1749,16 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
 		return ipmr_vif_seq_idx(net, iter, 0);
 
-	while (++iter->ct < net->ipv4.maxvif) {
-		if (!VIF_EXISTS(net, iter->ct))
+	while (++iter->ct < mrt->maxvif) {
+		if (!VIF_EXISTS(mrt, iter->ct))
 			continue;
-		return &net->ipv4.vif_table[iter->ct];
+		return &mrt->vif_table[iter->ct];
 	}
 	return NULL;
 }
@@ -1739,6 +1772,7 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -1749,7 +1783,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq,
 			   "%2Zd %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
-			   vif - net->ipv4.vif_table,
+			   vif - mrt->vif_table,
 			   name, vif->bytes_in, vif->pkt_in,
 			   vif->bytes_out, vif->pkt_out,
 			   vif->flags, vif->local, vif->remote);
@@ -1788,11 +1822,12 @@ struct ipmr_mfc_iter {
 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 					  struct ipmr_mfc_iter *it, loff_t pos)
 {
+	struct mr_table *mrt = net->ipv4.mrt;
 	struct mfc_cache *mfc;
 
 	read_lock(&mrt_lock);
 	for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
-		it->cache = &net->ipv4.mfc_cache_array[it->ct];
+		it->cache = &mrt->mfc_cache_array[it->ct];
 		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
@@ -1800,7 +1835,7 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 	read_unlock(&mrt_lock);
 
 	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &net->ipv4.mfc_unres_queue;
+	it->cache = &mrt->mfc_unres_queue;
 	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
@@ -1827,6 +1862,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	++*pos;
 
@@ -1836,13 +1872,13 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (mfc->list.next != it->cache)
 		return list_entry(mfc->list.next, struct mfc_cache, list);
 
-	if (it->cache == &net->ipv4.mfc_unres_queue)
+	if (it->cache == &mrt->mfc_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != &net->ipv4.mfc_cache_array[it->ct]);
+	BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
 
 	while (++it->ct < MFC_LINES) {
-		it->cache = &net->ipv4.mfc_cache_array[it->ct];
+		it->cache = &mrt->mfc_cache_array[it->ct];
 		if (list_empty(it->cache))
 			continue;
 		return list_first_entry(it->cache, struct mfc_cache, list);
@@ -1850,7 +1886,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	/* exhausted cache_array, show unresolved */
 	read_unlock(&mrt_lock);
-	it->cache = &net->ipv4.mfc_unres_queue;
+	it->cache = &mrt->mfc_unres_queue;
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
@@ -1868,10 +1904,11 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = net->ipv4.mrt;
 
-	if (it->cache == &net->ipv4.mfc_unres_queue)
+	if (it->cache == &mrt->mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == &net->ipv4.mfc_cache_array[it->ct])
+	else if (it->cache == &mrt->mfc_cache_array[it->ct])
 		read_unlock(&mrt_lock);
 }
 
@@ -1879,6 +1916,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -1892,14 +1930,14 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   (unsigned long) mfc->mfc_origin,
 			   mfc->mfc_parent);
 
-		if (it->cache != &net->ipv4.mfc_unres_queue) {
+		if (it->cache != &mrt->mfc_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
 				   mfc->mfc_un.res.pkt,
 				   mfc->mfc_un.res.bytes,
 				   mfc->mfc_un.res.wrong_if);
 			for (n = mfc->mfc_un.res.minvif;
 			     n < mfc->mfc_un.res.maxvif; n++ ) {
-				if (VIF_EXISTS(net, n) &&
+				if (VIF_EXISTS(mrt, n) &&
 				    mfc->mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
 					   " %2d:%-3d",
@@ -1951,35 +1989,27 @@ static const struct net_protocol pim_protocol = {
  */
 static int __net_init ipmr_net_init(struct net *net)
 {
+	struct mr_table *mrt;
 	unsigned int i;
 	int err = 0;
 
-	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
-				      GFP_KERNEL);
-	if (!net->ipv4.vif_table) {
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL) {
 		err = -ENOMEM;
 		goto fail;
 	}
 
 	/* Forwarding cache */
-	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
-					    sizeof(struct list_head),
-					    GFP_KERNEL);
-	if (!net->ipv4.mfc_cache_array) {
-		err = -ENOMEM;
-		goto fail_mfc_cache;
-	}
-
 	for (i = 0; i < MFC_LINES; i++)
-		INIT_LIST_HEAD(&net->ipv4.mfc_cache_array[i]);
+		INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
 
-	INIT_LIST_HEAD(&net->ipv4.mfc_unres_queue);
+	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
 
-	setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
 		    (unsigned long)net);
 
 #ifdef CONFIG_IP_PIMSM
-	net->ipv4.mroute_reg_vif_num = -1;
+	mrt->mroute_reg_vif_num = -1;
 #endif
 
 #ifdef CONFIG_PROC_FS
@@ -1989,16 +2019,16 @@ static int __net_init ipmr_net_init(struct net *net)
 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
 		goto proc_cache_fail;
 #endif
+
+	net->ipv4.mrt = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip_mr_vif");
 proc_vif_fail:
-	kfree(net->ipv4.mfc_cache_array);
+	kfree(mrt);
 #endif
-fail_mfc_cache:
-	kfree(net->ipv4.vif_table);
 fail:
 	return err;
 }
@@ -2009,8 +2039,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
 	proc_net_remove(net, "ip_mr_cache");
 	proc_net_remove(net, "ip_mr_vif");
 #endif
-	kfree(net->ipv4.mfc_cache_array);
-	kfree(net->ipv4.vif_table);
+	kfree(net->ipv4.mrt);
 }
 
 static struct pernet_operations ipmr_net_ops = {
-- 
cgit v1.2.3-59-g8ed1b


From f0ad0860d01e47a3ffd220564c5c653b3afbe962 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:23 +0000
Subject: ipv4: ipmr: support multiple tables

This patch adds support for multiple independant multicast routing instances,
named "tables".

Userspace multicast routing daemons can bind to a specific table instance by
issuing a setsockopt call using a new option MRT_TABLE. The table number is
stored in the raw socket data and affects all following ipmr setsockopt(),
getsockopt() and ioctl() calls. By default, a single table (RT_TABLE_DEFAULT)
is created with a default routing rule pointing to it. Newly created pimreg
devices have the table number appended ("pimregX"), with the exception of
devices created in the default table, which are named just "pimreg" for
compatibility reasons.

Packets are directed to a specific table instance using routing rules,
similar to how regular routing rules work. Currently iif, oif and mark
are supported as keys, source and destination addresses could be supported
additionally.

Example usage:

- bind pimd/xorp/... to a specific table:

uint32_t table = 123;
setsockopt(fd, IPPROTO_IP, MRT_TABLE, &table, sizeof(table));

- create routing rules directing packets to the new table:

# ip mrule add iif eth0 lookup 123
# ip mrule add oif eth0 lookup 123

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h |   1 +
 include/linux/mroute.h    |   3 +-
 include/net/netns/ipv4.h  |   5 +
 include/net/raw.h         |   1 +
 net/ipv4/Kconfig          |  14 ++
 net/ipv4/ipmr.c           | 399 +++++++++++++++++++++++++++++++++++++++-------
 6 files changed, 361 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 405e41139a4d..04a397619ebe 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -21,6 +21,7 @@
 #define FIB_RULES_IPV4		AF_INET
 #define FIB_RULES_IPV6		AF_INET6
 #define FIB_RULES_DECNET	AF_DECnet
+#define FIB_RULES_IPMR		128
 
 struct fib_rule_hdr {
 	__u8		family;
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 7ff6c77d6008..fa04b246c9ae 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -27,7 +27,8 @@
 #define MRT_DEL_MFC	(MRT_BASE+5)	/* Delete a multicast forwarding entry	*/
 #define MRT_VERSION	(MRT_BASE+6)	/* Get the kernel multicast version	*/
 #define MRT_ASSERT	(MRT_BASE+7)	/* Activate PIM assert mode		*/
-#define MRT_PIM		(MRT_BASE+8)	/* enable PIM code	*/
+#define MRT_PIM		(MRT_BASE+8)	/* enable PIM code			*/
+#define MRT_TABLE	(MRT_BASE+9)	/* Specify mroute table ID		*/
 
 #define SIOCGETVIFCNT	SIOCPROTOPRIVATE	/* IP protocol privates */
 #define SIOCGETSGCNT	(SIOCPROTOPRIVATE+1)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 72e762ab3e5d..ae07feec6446 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -59,7 +59,12 @@ struct netns_ipv4 {
 	atomic_t rt_genid;
 
 #ifdef CONFIG_IP_MROUTE
+#ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES
 	struct mr_table		*mrt;
+#else
+	struct list_head	mr_tables;
+	struct fib_rules_ops	*mr_rules_ops;
+#endif
 #endif
 };
 #endif
diff --git a/include/net/raw.h b/include/net/raw.h
index 67cc64369432..43c57502659b 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -61,6 +61,7 @@ struct raw_sock {
 	/* inet_sock has to be the first member */
 	struct inet_sock   inet;
 	struct icmp_filter filter;
+	u32		   ipmr_table;
 };
 
 static inline struct raw_sock *raw_sk(const struct sock *sk)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c9a1c68767ff..be597749c385 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -250,6 +250,20 @@ config IP_MROUTE
 	  <file:Documentation/networking/multicast.txt>. If you haven't heard
 	  about it, you don't need it.
 
+config IP_MROUTE_MULTIPLE_TABLES
+	bool "IP: multicast policy routing"
+	depends on IP_ADVANCED_ROUTER
+	select FIB_RULES
+	help
+	  Normally, a multicast router runs a userspace daemon and decides
+	  what to do with a multicast packet based on the source and
+	  destination addresses. If you say Y here, the multicast router
+	  will also be able to take interfaces and packet marks into
+	  account and run multiple instances of userspace daemons
+	  simultaneously, each one handling a single table.
+
+	  If unsure, say N.
+
 config IP_PIMSM_V1
 	bool "IP: PIM-SM version 1 support"
 	depends on IP_MROUTE
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 498f4e907d52..5df5fd74c6d1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -63,12 +63,15 @@
 #include <net/ipip.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
+#include <net/fib_rules.h>
 
 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
 #define CONFIG_IP_PIMSM	1
 #endif
 
 struct mr_table {
+	struct list_head	list;
+	u32			id;
 	struct sock		*mroute_sk;
 	struct timer_list	ipmr_expire_timer;
 	struct list_head	mfc_unres_queue;
@@ -83,6 +86,14 @@ struct mr_table {
 #endif
 };
 
+struct ipmr_rule {
+	struct fib_rule		common;
+};
+
+struct ipmr_result {
+	struct mr_table		*mrt;
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -108,6 +119,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
+static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 			 struct sk_buff *skb, struct mfc_cache *cache,
 			 int local);
@@ -115,6 +127,206 @@ static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			    struct mfc_cache *c, struct rtmsg *rtm);
+static void ipmr_expire_process(unsigned long arg);
+
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+#define ipmr_for_each_table(mrt, net) \
+	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+
+static struct mr_table *ipmr_get_table(struct net *net, u32 id)
+{
+	struct mr_table *mrt;
+
+	ipmr_for_each_table(mrt, net) {
+		if (mrt->id == id)
+			return mrt;
+	}
+	return NULL;
+}
+
+static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
+			   struct mr_table **mrt)
+{
+	struct ipmr_result res;
+	struct fib_lookup_arg arg = { .result = &res, };
+	int err;
+
+	err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
+	if (err < 0)
+		return err;
+	*mrt = res.mrt;
+	return 0;
+}
+
+static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
+{
+	struct ipmr_result *res = arg->result;
+	struct mr_table *mrt;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		return -ENETUNREACH;
+	case FR_ACT_PROHIBIT:
+		return -EACCES;
+	case FR_ACT_BLACKHOLE:
+	default:
+		return -EINVAL;
+	}
+
+	mrt = ipmr_get_table(rule->fr_net, rule->table);
+	if (mrt == NULL)
+		return -EAGAIN;
+	res->mrt = mrt;
+	return 0;
+}
+
+static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+	return 1;
+}
+
+static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
+	FRA_GENERIC_POLICY,
+};
+
+static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+			       struct fib_rule_hdr *frh, struct nlattr **tb)
+{
+	return 0;
+}
+
+static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			     struct nlattr **tb)
+{
+	return 1;
+}
+
+static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			  struct fib_rule_hdr *frh)
+{
+	frh->dst_len = 0;
+	frh->src_len = 0;
+	frh->tos     = 0;
+	return 0;
+}
+
+static struct fib_rules_ops ipmr_rules_ops_template = {
+	.family		= FIB_RULES_IPMR,
+	.rule_size	= sizeof(struct ipmr_rule),
+	.addr_size	= sizeof(u32),
+	.action		= ipmr_rule_action,
+	.match		= ipmr_rule_match,
+	.configure	= ipmr_rule_configure,
+	.compare	= ipmr_rule_compare,
+	.default_pref	= fib_default_rule_pref,
+	.fill		= ipmr_rule_fill,
+	.nlgroup	= RTNLGRP_IPV4_RULE,
+	.policy		= ipmr_rule_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __net_init ipmr_rules_init(struct net *net)
+{
+	struct fib_rules_ops *ops;
+	struct mr_table *mrt;
+	int err;
+
+	ops = fib_rules_register(&ipmr_rules_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	INIT_LIST_HEAD(&net->ipv4.mr_tables);
+
+	mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
+	if (err < 0)
+		goto err2;
+
+	net->ipv4.mr_rules_ops = ops;
+	return 0;
+
+err2:
+	kfree(mrt);
+err1:
+	fib_rules_unregister(ops);
+	return err;
+}
+
+static void __net_exit ipmr_rules_exit(struct net *net)
+{
+	struct mr_table *mrt, *next;
+
+	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
+		kfree(mrt);
+	fib_rules_unregister(net->ipv4.mr_rules_ops);
+}
+#else
+#define ipmr_for_each_table(mrt, net) \
+	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+
+static struct mr_table *ipmr_get_table(struct net *net, u32 id)
+{
+	return net->ipv4.mrt;
+}
+
+static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
+			   struct mr_table **mrt)
+{
+	*mrt = net->ipv4.mrt;
+	return 0;
+}
+
+static int __net_init ipmr_rules_init(struct net *net)
+{
+	net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
+	return net->ipv4.mrt ? 0 : -ENOMEM;
+}
+
+static void __net_exit ipmr_rules_exit(struct net *net)
+{
+	kfree(net->ipv4.mrt);
+}
+#endif
+
+static struct mr_table *ipmr_new_table(struct net *net, u32 id)
+{
+	struct mr_table *mrt;
+	unsigned int i;
+
+	mrt = ipmr_get_table(net, id);
+	if (mrt != NULL)
+		return mrt;
+
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL)
+		return NULL;
+	mrt->id = id;
+
+	/* Forwarding cache */
+	for (i = 0; i < MFC_LINES; i++)
+		INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
+
+	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)mrt);
+
+#ifdef CONFIG_IP_PIMSM
+	mrt->mroute_reg_vif_num = -1;
+#endif
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+	return mrt;
+}
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
@@ -215,7 +427,17 @@ failure:
 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+	struct flowi fl = {
+		.oif		= dev->ifindex,
+		.iif		= skb->skb_iif,
+		.mark		= skb->mark,
+	};
+	int err;
+
+	err = ipmr_fib_lookup(net, &fl, &mrt);
+	if (err < 0)
+		return err;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
@@ -240,12 +462,18 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ipmr_reg_vif(struct net *net)
+static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
 {
 	struct net_device *dev;
 	struct in_device *in_dev;
+	char name[IFNAMSIZ];
+
+	if (mrt->id == RT_TABLE_DEFAULT)
+		sprintf(name, "pimreg");
+	else
+		sprintf(name, "pimreg%u", mrt->id);
 
-	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
+	dev = alloc_netdev(0, name, reg_vif_setup);
 
 	if (dev == NULL)
 		return NULL;
@@ -461,7 +689,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 		 */
 		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
-		dev = ipmr_reg_vif(net);
+		dev = ipmr_reg_vif(net, mrt);
 		if (!dev)
 			return -ENOBUFS;
 		err = dev_set_allmulti(dev, 1);
@@ -928,17 +1156,19 @@ static void mroute_clean_tables(struct mr_table *mrt)
 static void mrtsock_destruct(struct sock *sk)
 {
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 
 	rtnl_lock();
-	if (sk == mrt->mroute_sk) {
-		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
+	ipmr_for_each_table(mrt, net) {
+		if (sk == mrt->mroute_sk) {
+			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 
-		write_lock_bh(&mrt_lock);
-		mrt->mroute_sk = NULL;
-		write_unlock_bh(&mrt_lock);
+			write_lock_bh(&mrt_lock);
+			mrt->mroute_sk = NULL;
+			write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(mrt);
+			mroute_clean_tables(mrt);
+		}
 	}
 	rtnl_unlock();
 }
@@ -956,7 +1186,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 	struct vifctl vif;
 	struct mfcctl mfc;
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	if (optname != MRT_INIT) {
 		if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
@@ -1054,6 +1288,27 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		rtnl_unlock();
 		return ret;
 	}
+#endif
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+	case MRT_TABLE:
+	{
+		u32 v;
+
+		if (optlen != sizeof(u32))
+			return -EINVAL;
+		if (get_user(v, (u32 __user *)optval))
+			return -EFAULT;
+		if (sk == mrt->mroute_sk)
+			return -EBUSY;
+
+		rtnl_lock();
+		ret = 0;
+		if (!ipmr_new_table(net, v))
+			ret = -ENOMEM;
+		raw_sk(sk)->ipmr_table = v;
+		rtnl_unlock();
+		return ret;
+	}
 #endif
 	/*
 	 *	Spurious command, or MRT_VERSION which you cannot
@@ -1073,7 +1328,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	if (optname != MRT_VERSION &&
 #ifdef CONFIG_IP_PIMSM
@@ -1115,7 +1374,11 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct vif_device *vif;
 	struct mfc_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	switch (cmd) {
 	case SIOCGETVIFCNT:
@@ -1166,17 +1429,20 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 	struct vif_device *v;
 	int ct;
 	LIST_HEAD(list);
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
-	v = &mrt->vif_table[0];
-	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
-		if (v->dev == dev)
-			vif_delete(mrt, ct, 1, &list);
+
+	ipmr_for_each_table(mrt, net) {
+		v = &mrt->vif_table[0];
+		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
+			if (v->dev == dev)
+				vif_delete(mrt, ct, 1, &list);
+		}
 	}
 	unregister_netdevice_many(&list);
 	return NOTIFY_DONE;
@@ -1443,8 +1709,9 @@ int ip_mr_input(struct sk_buff *skb)
 {
 	struct mfc_cache *cache;
 	struct net *net = dev_net(skb->dev);
-	struct mr_table *mrt = net->ipv4.mrt;
 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
+	struct mr_table *mrt;
+	int err;
 
 	/* Packet is looped back after forward, it should not be
 	   forwarded second time, but still can be delivered locally.
@@ -1452,6 +1719,10 @@ int ip_mr_input(struct sk_buff *skb)
 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
 		goto dont_forward;
 
+	err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
+	if (err < 0)
+		return err;
+
 	if (!local) {
 		    if (IPCB(skb)->opt.router_alert) {
 			    if (ip_call_ra_chain(skb))
@@ -1522,12 +1793,11 @@ dont_forward:
 }
 
 #ifdef CONFIG_IP_PIMSM
-static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
+static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
+		     unsigned int pimlen)
 {
 	struct net_device *reg_dev = NULL;
 	struct iphdr *encap;
-	struct net *net = dev_net(skb->dev);
-	struct mr_table *mrt = net->ipv4.mrt;
 
 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
 	/*
@@ -1578,18 +1848,21 @@ int pim_rcv_v1(struct sk_buff * skb)
 {
 	struct igmphdr *pim;
 	struct net *net = dev_net(skb->dev);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
 		goto drop;
 
 	pim = igmp_hdr(skb);
 
+	if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
+		goto drop;
+
 	if (!mrt->mroute_do_pim ||
 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
 		goto drop;
 
-	if (__pim_rcv(skb, sizeof(*pim))) {
+	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
 drop:
 		kfree_skb(skb);
 	}
@@ -1601,6 +1874,8 @@ drop:
 static int pim_rcv(struct sk_buff * skb)
 {
 	struct pimreghdr *pim;
+	struct net *net = dev_net(skb->dev);
+	struct mr_table *mrt;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
 		goto drop;
@@ -1612,7 +1887,10 @@ static int pim_rcv(struct sk_buff * skb)
 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 		goto drop;
 
-	if (__pim_rcv(skb, sizeof(*pim))) {
+	if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
+		goto drop;
+
+	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
 drop:
 		kfree_skb(skb);
 	}
@@ -1663,10 +1941,14 @@ int ipmr_get_route(struct net *net,
 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 	struct mfc_cache *cache;
 	struct rtable *rt = skb_rtable(skb);
 
+	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
+
 	read_lock(&mrt_lock);
 	cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
 
@@ -1717,6 +1999,7 @@ int ipmr_get_route(struct net *net,
  */
 struct ipmr_vif_iter {
 	struct seq_net_private p;
+	struct mr_table *mrt;
 	int ct;
 };
 
@@ -1724,7 +2007,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
 					   struct ipmr_vif_iter *iter,
 					   loff_t pos)
 {
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = iter->mrt;
 
 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 		if (!VIF_EXISTS(mrt, iter->ct))
@@ -1738,7 +2021,15 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
+	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
+
+	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
 	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
@@ -1749,7 +2040,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = iter->mrt;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
@@ -1771,8 +2062,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -1814,6 +2105,7 @@ static const struct file_operations ipmr_vif_fops = {
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
+	struct mr_table *mrt;
 	struct list_head *cache;
 	int ct;
 };
@@ -1822,7 +2114,7 @@ struct ipmr_mfc_iter {
 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 					  struct ipmr_mfc_iter *it, loff_t pos)
 {
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = it->mrt;
 	struct mfc_cache *mfc;
 
 	read_lock(&mrt_lock);
@@ -1850,7 +2142,13 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
 
+	it->mrt = mrt;
 	it->cache = NULL;
 	it->ct = 0;
 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
@@ -1862,7 +2160,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = it->mrt;
 
 	++*pos;
 
@@ -1903,8 +2201,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = it->mrt;
 
 	if (it->cache == &mrt->mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
@@ -1915,8 +2212,6 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -1924,6 +2219,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	} else {
 		const struct mfc_cache *mfc = v;
 		const struct ipmr_mfc_iter *it = seq->private;
+		const struct mr_table *mrt = it->mrt;
 
 		seq_printf(seq, "%08lX %08lX %-3hd",
 			   (unsigned long) mfc->mfc_mcastgrp,
@@ -1989,28 +2285,11 @@ static const struct net_protocol pim_protocol = {
  */
 static int __net_init ipmr_net_init(struct net *net)
 {
-	struct mr_table *mrt;
-	unsigned int i;
-	int err = 0;
+	int err;
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (mrt == NULL) {
-		err = -ENOMEM;
+	err = ipmr_rules_init(net);
+	if (err < 0)
 		goto fail;
-	}
-
-	/* Forwarding cache */
-	for (i = 0; i < MFC_LINES; i++)
-		INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
-
-	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
-	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
-		    (unsigned long)net);
-
-#ifdef CONFIG_IP_PIMSM
-	mrt->mroute_reg_vif_num = -1;
-#endif
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
@@ -2019,15 +2298,13 @@ static int __net_init ipmr_net_init(struct net *net)
 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
 		goto proc_cache_fail;
 #endif
-
-	net->ipv4.mrt = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip_mr_vif");
 proc_vif_fail:
-	kfree(mrt);
+	ipmr_rules_exit(net);
 #endif
 fail:
 	return err;
@@ -2039,7 +2316,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
 	proc_net_remove(net, "ip_mr_cache");
 	proc_net_remove(net, "ip_mr_vif");
 #endif
-	kfree(net->ipv4.mrt);
+	ipmr_rules_exit(net);
 }
 
 static struct pernet_operations ipmr_net_ops = {
-- 
cgit v1.2.3-59-g8ed1b


From b0e28f1effd1d840b36e961edc1def81e01b1ca1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 15 Apr 2010 00:14:07 -0700
Subject: net: netif_rx() must disable preemption

Eric Paris reported netif_rx() is calling smp_processor_id() from
preemptible context, in particular when caller is
ip_dev_loopback_xmit().

RPS commit added this smp_processor_id() call, this patch makes sure
preemption is disabled. rps_get_cpus() wants rcu_read_lock() anyway, we
can dot it a bit earlier.

Reported-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 876b1112d5ba..e8041eb76ac1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2206,6 +2206,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
+ * rcu_read_lock must be held on entry.
  */
 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 {
@@ -2217,8 +2218,6 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 	u8 ip_proto;
 	u32 addr1, addr2, ports, ihl;
 
-	rcu_read_lock();
-
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
 		if (unlikely(index >= dev->num_rx_queues)) {
@@ -2296,7 +2295,6 @@ got_hash:
 	}
 
 done:
-	rcu_read_unlock();
 	return cpu;
 }
 
@@ -2392,7 +2390,7 @@ enqueue:
 
 int netif_rx(struct sk_buff *skb)
 {
-	int cpu;
+	int ret;
 
 	/* if netpoll wants it, pretend we never saw it */
 	if (netpoll_rx(skb))
@@ -2402,14 +2400,21 @@ int netif_rx(struct sk_buff *skb)
 		net_timestamp(skb);
 
 #ifdef CONFIG_RPS
-	cpu = get_rps_cpu(skb->dev, skb);
-	if (cpu < 0)
-		cpu = smp_processor_id();
+	{
+		int cpu;
+
+		rcu_read_lock();
+		cpu = get_rps_cpu(skb->dev, skb);
+		if (cpu < 0)
+			cpu = smp_processor_id();
+		ret = enqueue_to_backlog(skb, cpu);
+		rcu_read_unlock();
+	}
 #else
-	cpu = smp_processor_id();
+	ret = enqueue_to_backlog(skb, get_cpu());
+	put_cpu();
 #endif
-
-	return enqueue_to_backlog(skb, cpu);
+	return ret;
 }
 EXPORT_SYMBOL(netif_rx);
 
-- 
cgit v1.2.3-59-g8ed1b


From ea2d9b41bd418894d1ee25de1642c3325d71c397 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Thu, 15 Apr 2010 12:14:51 +0200
Subject: netfilter: bridge-netfilter: simplify IP DNAT

Remove br_netfilter.c::br_nf_local_out(). The function
br_nf_local_out() was needed because the PF_BRIDGE::LOCAL_OUT hook
could be called when IP DNAT happens on to-be-bridged traffic. The
new scheme eliminates this mess.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge.h |  17 ++++--
 net/bridge/br_device.c           |   9 +++-
 net/bridge/br_netfilter.c        | 114 +++++++--------------------------------
 3 files changed, 40 insertions(+), 100 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index f8105e54716a..ffab6c423a57 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -41,9 +41,8 @@ enum nf_br_hook_priorities {
 
 #define BRNF_PKT_TYPE			0x01
 #define BRNF_BRIDGED_DNAT		0x02
-#define BRNF_DONT_TAKE_PARENT		0x04
-#define BRNF_BRIDGED			0x08
-#define BRNF_NF_BRIDGE_PREROUTING	0x10
+#define BRNF_BRIDGED			0x04
+#define BRNF_NF_BRIDGE_PREROUTING	0x08
 
 
 /* Only used in br_forward.c */
@@ -68,6 +67,18 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 	}
 }
 
+extern int br_handle_frame_finish(struct sk_buff *skb);
+/* Only used in br_device.c */
+static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
+{
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+
+	skb_pull(skb, ETH_HLEN);
+	nf_bridge->mask ^= BRNF_BRIDGED_DNAT;
+	skb->dev = nf_bridge->physindev;
+	return br_handle_frame_finish(skb);
+}
+
 /* This is called by the IP fragmenting code and it ensures there is
  * enough room for the encapsulating header (if there is one). */
 static inline unsigned int nf_bridge_pad(const struct sk_buff *skb)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5b8a6e73b02f..007bde87415d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -15,7 +15,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
-
+#include <linux/netfilter_bridge.h>
 #include <asm/uaccess.h>
 #include "br_private.h"
 
@@ -28,6 +28,13 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_bridge_mdb_entry *mdst;
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+		br_nf_pre_routing_finish_bridge_slow(skb);
+		return NETDEV_TX_OK;
+	}
+#endif
+
 	brstats->tx_packets++;
 	brstats->tx_bytes += skb->len;
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index dd6f538ba0b0..05dc6304992c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -246,8 +246,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 }
 
 /* This requires some explaining. If DNAT has taken place,
- * we will need to fix up the destination Ethernet address,
- * and this is a tricky process.
+ * we will need to fix up the destination Ethernet address.
  *
  * There are two cases to consider:
  * 1. The packet was DNAT'ed to a device in the same bridge
@@ -261,52 +260,38 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
  * call ip_route_input() and to look at skb->dst->dev, which is
  * changed to the destination device if ip_route_input() succeeds.
  *
- * Let us first consider the case that ip_route_input() succeeds:
- *
- * If skb->dst->dev equals the logical bridge device the packet
- * came in on, we can consider this bridging. The packet is passed
- * through the neighbour output function to build a new destination
- * MAC address, which will make the packet enter br_nf_local_out()
- * not much later. In that function it is assured that the iptables
- * FORWARD chain is traversed for the packet.
+ * Let's first consider the case that ip_route_input() succeeds:
  *
+ * If the output device equals the logical bridge device the packet
+ * came in on, we can consider this bridging. The corresponding MAC
+ * address will be obtained in br_nf_pre_routing_finish_bridge.
  * Otherwise, the packet is considered to be routed and we just
  * change the destination MAC address so that the packet will
  * later be passed up to the IP stack to be routed. For a redirected
  * packet, ip_route_input() will give back the localhost as output device,
  * which differs from the bridge device.
  *
- * Let us now consider the case that ip_route_input() fails:
+ * Let's now consider the case that ip_route_input() fails:
  *
  * This can be because the destination address is martian, in which case
  * the packet will be dropped.
- * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input()
- * will fail, while __ip_route_output_key() will return success. The source
- * address for __ip_route_output_key() is set to zero, so __ip_route_output_key
+ * If IP forwarding is disabled, ip_route_input() will fail, while
+ * ip_route_output_key() can return success. The source
+ * address for ip_route_output_key() is set to zero, so ip_route_output_key()
  * thinks we're handling a locally generated packet and won't care
- * if IP forwarding is allowed. We send a warning message to the users's
- * log telling her to put IP forwarding on.
- *
- * ip_route_input() will also fail if there is no route available.
- * In that case we just drop the packet.
- *
- * --Lennert, 20020411
- * --Bart, 20020416 (updated)
- * --Bart, 20021007 (updated)
- * --Bart, 20062711 (updated) */
+ * if IP forwarding is enabled. If the output device equals the logical bridge
+ * device, we proceed as if ip_route_input() succeeded. If it differs from the
+ * logical bridge port or if ip_route_output_key() fails we drop the packet.
+ */
+
 static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 {
-	if (skb->pkt_type == PACKET_OTHERHOST) {
-		skb->pkt_type = PACKET_HOST;
-		skb->nf_bridge->mask |= BRNF_PKT_TYPE;
-	}
-	skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
-
 	skb->dev = bridge_parent(skb->dev);
 	if (skb->dev) {
 		struct dst_entry *dst = skb_dst(skb);
 
 		nf_bridge_pull_encap_header(skb);
+		skb->nf_bridge->mask |= BRNF_BRIDGED_DNAT;
 
 		if (dst->hh)
 			return neigh_hh_output(dst->hh, skb);
@@ -368,9 +353,6 @@ free_skb:
 		} else {
 			if (skb_dst(skb)->dev == dev) {
 bridged_dnat:
-				/* Tell br_nf_local_out this is a
-				 * bridged frame */
-				nf_bridge->mask |= BRNF_BRIDGED_DNAT;
 				skb->dev = nf_bridge->physindev;
 				nf_bridge_push_encap_header(skb);
 				NF_HOOK_THRESH(NFPROTO_BRIDGE,
@@ -721,54 +703,6 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
-/* PF_BRIDGE/LOCAL_OUT ***********************************************
- *
- * This function sees both locally originated IP packets and forwarded
- * IP packets (in both cases the destination device is a bridge
- * device). It also sees bridged-and-DNAT'ed packets.
- *
- * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
- * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
- * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
- * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
- * will be executed.
- */
-static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
-{
-	struct net_device *realindev;
-	struct nf_bridge_info *nf_bridge;
-
-	if (!skb->nf_bridge)
-		return NF_ACCEPT;
-
-	/* Need exclusive nf_bridge_info since we might have multiple
-	 * different physoutdevs. */
-	if (!nf_bridge_unshare(skb))
-		return NF_DROP;
-
-	nf_bridge = skb->nf_bridge;
-	if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
-		return NF_ACCEPT;
-
-	/* Bridged, take PF_BRIDGE/FORWARD.
-	 * (see big note in front of br_nf_pre_routing_finish) */
-	nf_bridge->physoutdev = skb->dev;
-	realindev = nf_bridge->physindev;
-
-	if (nf_bridge->mask & BRNF_PKT_TYPE) {
-		skb->pkt_type = PACKET_OTHERHOST;
-		nf_bridge->mask ^= BRNF_PKT_TYPE;
-	}
-	nf_bridge_push_encap_header(skb);
-
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
-		br_forward_finish);
-	return NF_STOLEN;
-}
-
 #if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
@@ -797,10 +731,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 	struct net_device *realoutdev = bridge_parent(skb->dev);
 	u_int8_t pf;
 
-	if (!nf_bridge)
-		return NF_ACCEPT;
-
-	if (!(nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT)))
+	if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED))
 		return NF_ACCEPT;
 
 	if (!realoutdev)
@@ -847,10 +778,8 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
- * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
- * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
- * ip_refrag() can return NF_STOLEN. */
+/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
+ * br_dev_queue_push_xmit is called afterwards */
 static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 	{
 		.hook = br_nf_pre_routing,
@@ -880,13 +809,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 		.hooknum = NF_BR_FORWARD,
 		.priority = NF_BR_PRI_BRNF,
 	},
-	{
-		.hook = br_nf_local_out,
-		.owner = THIS_MODULE,
-		.pf = PF_BRIDGE,
-		.hooknum = NF_BR_LOCAL_OUT,
-		.priority = NF_BR_PRI_FIRST,
-	},
 	{
 		.hook = br_nf_post_routing,
 		.owner = THIS_MODULE,
-- 
cgit v1.2.3-59-g8ed1b


From e179e6322ac334e21a3c6d669d95bc967e5d0a80 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Thu, 15 Apr 2010 12:26:39 +0200
Subject: netfilter: bridge-netfilter: Fix MAC header handling with IP DNAT

- fix IP DNAT on vlan- or pppoe-encapsulated traffic: The functions
neigh_hh_output() or dst->neighbour->output() overwrite the complete
Ethernet header, although we only need the destination MAC address.
For encapsulated packets, they ended up overwriting the encapsulating
header. The new code copies the Ethernet source MAC address and
protocol number before calling dst->neighbour->output(). The Ethernet
source MAC and protocol number are copied back in place in
br_nf_pre_routing_finish_bridge_slow(). This also makes the IP DNAT
more transparent because in the old scheme the source MAC of the
bridge was copied into the source address in the Ethernet header. We
also let skb->protocol equal ETH_P_IP resp. ETH_P_IPV6 during the
execution of the PF_INET resp. PF_INET6 hooks.

- Speed up IP DNAT by calling neigh_hh_bridge() instead of
neigh_hh_output(): if dst->hh is available, we already know the MAC
address so we can just copy it.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge.h |  5 ++-
 include/net/neighbour.h          | 14 +++++++
 net/bridge/br_netfilter.c        | 90 +++++++++++++++++++++++++++++-----------
 3 files changed, 83 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index ffab6c423a57..ea0e44b90432 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -43,7 +43,8 @@ enum nf_br_hook_priorities {
 #define BRNF_BRIDGED_DNAT		0x02
 #define BRNF_BRIDGED			0x04
 #define BRNF_NF_BRIDGE_PREROUTING	0x08
-
+#define BRNF_8021Q			0x10
+#define BRNF_PPPoE			0x20
 
 /* Only used in br_forward.c */
 extern int nf_bridge_copy_header(struct sk_buff *skb);
@@ -75,6 +76,8 @@ static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 
 	skb_pull(skb, ETH_HLEN);
 	nf_bridge->mask ^= BRNF_BRIDGED_DNAT;
+	skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
+				       skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
 	skb->dev = nf_bridge->physindev;
 	return br_handle_frame_finish(skb);
 }
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index da1d58be31b7..eb21340a573b 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -299,6 +299,20 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 	return 0;
 }
 
+#ifdef CONFIG_BRIDGE_NETFILTER
+static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
+{
+	unsigned seq, hh_alen;
+
+	do {
+		seq = read_seqbegin(&hh->hh_lock);
+		hh_alen = HH_DATA_ALIGN(ETH_HLEN);
+		memcpy(skb->data - hh_alen, hh->hh_data, ETH_ALEN + hh_alen - ETH_HLEN);
+	} while (read_seqretry(&hh->hh_lock, seq));
+	return 0;
+}
+#endif
+
 static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
 {
 	unsigned seq;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 05dc6304992c..b7e405dc9d1c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -196,15 +196,24 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
 					 skb->nf_bridge->data, header_size);
 }
 
-/*
- * When forwarding bridge frames, we save a copy of the original
- * header before processing.
+static inline void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+	if (skb->nf_bridge->mask & BRNF_8021Q)
+		skb->protocol = htons(ETH_P_8021Q);
+	else if (skb->nf_bridge->mask & BRNF_PPPoE)
+		skb->protocol = htons(ETH_P_PPP_SES);
+}
+
+/* Fill in the header for fragmented IP packets handled by
+ * the IPv4 connection tracking code.
  */
 int nf_bridge_copy_header(struct sk_buff *skb)
 {
 	int err;
-	int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+	unsigned int header_size;
 
+	nf_bridge_update_protocol(skb);
+	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
 	err = skb_cow_head(skb, header_size);
 	if (err)
 		return err;
@@ -238,6 +247,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	skb_dst_set(skb, &rt->u.dst);
 
 	skb->dev = nf_bridge->physindev;
+	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -245,6 +255,38 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	return 0;
 }
 
+/* Obtain the correct destination MAC address, while preserving the original
+ * source MAC address. If we already know this address, we just copy it. If we
+ * don't, we use the neighbour framework to find out. In both cases, we make
+ * sure that br_handle_frame_finish() is called afterwards.
+ */
+static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
+{
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+	struct dst_entry *dst;
+
+	skb->dev = bridge_parent(skb->dev);
+	if (!skb->dev)
+		goto free_skb;
+	dst = skb_dst(skb);
+	if (dst->hh) {
+		neigh_hh_bridge(dst->hh, skb);
+		skb->dev = nf_bridge->physindev;
+		return br_handle_frame_finish(skb);
+	} else if (dst->neighbour) {
+		/* the neighbour function below overwrites the complete
+		 * MAC header, so we save the Ethernet source address and
+		 * protocol number. */
+		skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+		/* tell br_dev_xmit to continue with forwarding */
+		nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+		return dst->neighbour->output(skb);
+	}
+free_skb:
+	kfree_skb(skb);
+	return 0;
+}
+
 /* This requires some explaining. If DNAT has taken place,
  * we will need to fix up the destination Ethernet address.
  *
@@ -283,25 +325,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
  * device, we proceed as if ip_route_input() succeeded. If it differs from the
  * logical bridge port or if ip_route_output_key() fails we drop the packet.
  */
-
-static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
-{
-	skb->dev = bridge_parent(skb->dev);
-	if (skb->dev) {
-		struct dst_entry *dst = skb_dst(skb);
-
-		nf_bridge_pull_encap_header(skb);
-		skb->nf_bridge->mask |= BRNF_BRIDGED_DNAT;
-
-		if (dst->hh)
-			return neigh_hh_output(dst->hh, skb);
-		else if (dst->neighbour)
-			return dst->neighbour->output(skb);
-	}
-	kfree_skb(skb);
-	return 0;
-}
-
 static int br_nf_pre_routing_finish(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
@@ -354,6 +377,7 @@ free_skb:
 			if (skb_dst(skb)->dev == dev) {
 bridged_dnat:
 				skb->dev = nf_bridge->physindev;
+				nf_bridge_update_protocol(skb);
 				nf_bridge_push_encap_header(skb);
 				NF_HOOK_THRESH(NFPROTO_BRIDGE,
 					       NF_BR_PRE_ROUTING,
@@ -376,6 +400,7 @@ bridged_dnat:
 	}
 
 	skb->dev = nf_bridge->physindev;
+	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -396,6 +421,10 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
 	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
 	nf_bridge->physindev = skb->dev;
 	skb->dev = bridge_parent(skb->dev);
+	if (skb->protocol == htons(ETH_P_8021Q))
+		nf_bridge->mask |= BRNF_8021Q;
+	else if (skb->protocol == htons(ETH_P_PPP_SES))
+		nf_bridge->mask |= BRNF_PPPoE;
 
 	return skb->dev;
 }
@@ -494,6 +523,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	if (!setup_pre_routing(skb))
 		return NF_DROP;
 
+	skb->protocol = htons(ETH_P_IPV6);
 	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish_ipv6);
 
@@ -566,6 +596,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 	if (!setup_pre_routing(skb))
 		return NF_DROP;
 	store_orig_dstaddr(skb);
+	skb->protocol = htons(ETH_P_IP);
 
 	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish);
@@ -614,7 +645,9 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 	} else {
 		in = *((struct net_device **)(skb->cb));
 	}
+	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
+
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in,
 		       skb->dev, br_forward_finish, 1);
 	return 0;
@@ -666,6 +699,10 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 	/* The physdev module checks on this */
 	nf_bridge->mask |= BRNF_BRIDGED;
 	nf_bridge->physoutdev = skb->dev;
+	if (pf == PF_INET)
+		skb->protocol = htons(ETH_P_IP);
+	else
+		skb->protocol = htons(ETH_P_IPV6);
 
 	NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent,
 		br_nf_forward_finish);
@@ -706,8 +743,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 #if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
-	if (skb->nfct != NULL &&
-	    (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) &&
+	if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
 	    skb->len > skb->dev->mtu &&
 	    !skb_is_gso(skb))
 		return ip_fragment(skb, br_dev_queue_push_xmit);
@@ -755,6 +791,10 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 
 	nf_bridge_pull_encap_header(skb);
 	nf_bridge_save_header(skb);
+	if (pf == PF_INET)
+		skb->protocol = htons(ETH_P_IP);
+	else
+		skb->protocol = htons(ETH_P_IPV6);
 
 	NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev,
 		br_nf_dev_queue_xmit);
-- 
cgit v1.2.3-59-g8ed1b


From 90348e0ede4e74f9404c4d08cce1dbb1baa05b06 Mon Sep 17 00:00:00 2001
From: Ulrich Weber <uweber@astaro.com>
Date: Thu, 15 Apr 2010 12:37:18 +0200
Subject: netfilter: ipv6: move xfrm_lookup at end of ip6_route_me_harder

xfrm_lookup should be called after ip6_route_output skb_dst_set,
otherwise skb_dst_set of xfrm_lookup is pointless

Signed-off-by: Ulrich Weber <uweber@astaro.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/netfilter.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c
index d5ed92b14346..a74951c039b6 100644
--- a/net/ipv6/netfilter.c
+++ b/net/ipv6/netfilter.c
@@ -25,20 +25,6 @@ int ip6_route_me_harder(struct sk_buff *skb)
 	};
 
 	dst = ip6_route_output(net, skb->sk, &fl);
-
-#ifdef CONFIG_XFRM
-	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
-	    xfrm_decode_session(skb, &fl, AF_INET6) == 0) {
-		struct dst_entry *dst2 = skb_dst(skb);
-
-		if (xfrm_lookup(net, &dst2, &fl, skb->sk, 0)) {
-			skb_dst_set(skb, NULL);
-			return -1;
-		}
-		skb_dst_set(skb, dst2);
-	}
-#endif
-
 	if (dst->error) {
 		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");
@@ -50,6 +36,17 @@ int ip6_route_me_harder(struct sk_buff *skb)
 	skb_dst_drop(skb);
 
 	skb_dst_set(skb, dst);
+
+#ifdef CONFIG_XFRM
+	if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+	    xfrm_decode_session(skb, &fl, AF_INET6) == 0) {
+		skb_dst_set(skb, NULL);
+		if (xfrm_lookup(net, &dst, &fl, skb->sk, 0))
+			return -1;
+		skb_dst_set(skb, dst);
+	}
+#endif
+
 	return 0;
 }
 EXPORT_SYMBOL(ip6_route_me_harder);
-- 
cgit v1.2.3-59-g8ed1b


From 66496d4973dcb848d163805fa6b485850b7555e3 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 15 Apr 2010 13:29:27 +0200
Subject: ipv4: ipmr: fix IP_MROUTE_MULTIPLE_TABLES Kconfig dependencies

IP_MROUTE_MULTIPLE_TABLES should depend on IP_MROUTE.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index be597749c385..8e3a1fd938ab 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -252,7 +252,7 @@ config IP_MROUTE
 
 config IP_MROUTE_MULTIPLE_TABLES
 	bool "IP: multicast policy routing"
-	depends on IP_ADVANCED_ROUTER
+	depends on IP_MROUTE && IP_ADVANCED_ROUTER
 	select FIB_RULES
 	help
 	  Normally, a multicast router runs a userspace daemon and decides
-- 
cgit v1.2.3-59-g8ed1b


From b0ebb739a8f68039f03e80b3476b204fe5adf0d7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 15 Apr 2010 13:29:28 +0200
Subject: ipv4: ipmr: fix invalid cache resolving when adding a non-matching
 entry

The patch to convert struct mfc_cache to list_heads (ipv4: ipmr: convert
struct mfc_cache to struct list_head) introduced a bug when adding new
cache entries that don't match any unresolved entries.

The unres queue is searched for a matching entry, which is then resolved.
When no matching entry is present, the iterator points to the head of the
list, but is treated as a matching entry. Use a seperate variable to
indicate that a matching entry was found.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/ipmr.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5df5fd74c6d1..0643fb6d47c4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1089,12 +1089,14 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 	 *	Check to see if we resolved a queued list. If so we
 	 *	need to send on the frames and tidy up.
 	 */
+	found = false;
 	spin_lock_bh(&mfc_unres_lock);
 	list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
 			list_del(&uc->list);
 			atomic_dec(&mrt->cache_resolve_queue_len);
+			found = true;
 			break;
 		}
 	}
@@ -1102,7 +1104,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
 		del_timer(&mrt->ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
-	if (uc) {
+	if (found) {
 		ipmr_cache_resolve(net, mrt, uc, c);
 		ipmr_cache_free(uc);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 8de53dfbf9a0a0f7538c005137059c5c021476e1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 15 Apr 2010 13:29:28 +0200
Subject: ipv4: ipmr: fix NULL pointer deref during unres queue destruction

Fix an oversight in ipmr_destroy_unres() - the net pointer is
unconditionally initialized to NULL, resulting in a NULL pointer
dereference later on.

Fix by adding a net pointer to struct mr_table and using it in
ipmr_destroy_unres().

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/ipmr.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 0643fb6d47c4..7d8a2bcecb76 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -71,6 +71,9 @@
 
 struct mr_table {
 	struct list_head	list;
+#ifdef CONFIG_NET_NS
+	struct net		*net;
+#endif
 	u32			id;
 	struct sock		*mroute_sk;
 	struct timer_list	ipmr_expire_timer;
@@ -308,6 +311,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
 	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
 	if (mrt == NULL)
 		return NULL;
+	write_pnet(&mrt->net, net);
 	mrt->id = id;
 
 	/* Forwarding cache */
@@ -580,7 +584,7 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
 
 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
 {
-	struct net *net = NULL; //mrt->net;
+	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
 
-- 
cgit v1.2.3-59-g8ed1b


From f0d57a54aa9fdf3a4d9435d44c69b20388ad0b3b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 15 Apr 2010 19:09:01 +0200
Subject: netfilter: ipt_LOG/ip6t_LOG: use more appropriate log level as
 default

Use KERN_NOTICE instead of KERN_EMERG by default. This only affects
kernel internal logging (like conntrack), user-specified logging rules
contain a seperate log level.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ipt_LOG.c  | 2 +-
 net/ipv6/netfilter/ip6t_LOG.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index a6a454b25502..3bd35f370817 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -367,7 +367,7 @@ static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
-			.level    = 0,
+			.level    = 5,
 			.logflags = NF_LOG_MASK,
 		},
 	},
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 439ededd5300..1f47a525f484 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -377,7 +377,7 @@ static struct nf_loginfo default_loginfo = {
 	.type	= NF_LOG_TYPE_LOG,
 	.u = {
 		.log = {
-			.level	  = 0,
+			.level	  = 5,
 			.logflags = NF_LOG_MASK,
 		},
 	},
-- 
cgit v1.2.3-59-g8ed1b


From a4fbf8415c462208e77251779d80dbc81914cebd Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 15 Apr 2010 15:37:13 -0700
Subject: net/l2tp/l2tp_debugfs.c: Convert NIPQUAD to %pI4

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: James Chapman <jchapman@katalix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_debugfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 908f10f9720e..104ec3b283d4 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -122,8 +122,8 @@ static void l2tp_dfs_seq_tunnel_show(struct seq_file *m, void *v)
 	seq_printf(m, "\nTUNNEL %u peer %u", tunnel->tunnel_id, tunnel->peer_tunnel_id);
 	if (tunnel->sock) {
 		struct inet_sock *inet = inet_sk(tunnel->sock);
-		seq_printf(m, " from " NIPQUAD_FMT " to " NIPQUAD_FMT "\n",
-			   NIPQUAD(inet->inet_saddr), NIPQUAD(inet->inet_daddr));
+		seq_printf(m, " from %pI4 to %pI4\n",
+			   &inet->inet_saddr, &inet->inet_daddr);
 		if (tunnel->encap == L2TP_ENCAPTYPE_UDP)
 			seq_printf(m, " source port %hu, dest port %hu\n",
 				   ntohs(inet->inet_sport), ntohs(inet->inet_dport));
-- 
cgit v1.2.3-59-g8ed1b


From 0eecb784942792863b77dfe11e0c7e286e92db85 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Thu, 15 Apr 2010 16:39:14 +0000
Subject: ipv6: cancel to setting local_df in ip6_xmit()

commit f88037(sctp: Drop ipfargok in sctp_xmit function)
has droped ipfragok and set local_df value properly.

So the change of commit 77e2f1(ipv6: Fix ip6_xmit to
send fragments if ipfragok is true) is not needed.
So the patch remove them.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 16c4391f952b..f3a847e3ec88 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -231,10 +231,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	/* Allow local fragmentation. */
-	if (ipfragok)
-		skb->local_df = 1;
-
 	/*
 	 *	Fill in the IPv6 header
 	 */
-- 
cgit v1.2.3-59-g8ed1b


From 4e15ed4d930297c127d280ca1d0c785be870def4 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Thu, 15 Apr 2010 16:43:08 +0000
Subject: net: replace ipfragok with skb->local_df

As Herbert Xu said: we should be able to simply replace ipfragok
with skb->local_df. commit f88037(sctp: Drop ipfargok in sctp_xmit function)
has droped ipfragok and set local_df value properly.

The patch kills the ipfragok parameter of .queue_xmit().

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet6_connection_sock.h | 2 +-
 include/net/inet_connection_sock.h  | 2 +-
 include/net/ip.h                    | 2 +-
 include/net/ipv6.h                  | 3 +--
 net/dccp/ipv6.c                     | 4 ++--
 net/dccp/output.c                   | 2 +-
 net/ipv4/ip_output.c                | 4 ++--
 net/ipv4/tcp_output.c               | 2 +-
 net/ipv6/inet6_connection_sock.c    | 4 ++--
 net/ipv6/ip6_output.c               | 2 +-
 net/ipv6/tcp_ipv6.c                 | 4 ++--
 net/l2tp/l2tp_core.c                | 3 ++-
 net/l2tp/l2tp_ip.c                  | 2 +-
 net/sctp/ipv6.c                     | 2 +-
 net/sctp/protocol.c                 | 2 +-
 15 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index f13ddc2543b1..aae08f686633 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -38,5 +38,5 @@ extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
 
 extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
 
-extern int inet6_csk_xmit(struct sk_buff *skb, int ipfragok);
+extern int inet6_csk_xmit(struct sk_buff *skb);
 #endif /* _INET6_CONNECTION_SOCK_H */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 52c8b8b8a0b9..b6d3b55da19b 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -36,7 +36,7 @@ struct tcp_congestion_ops;
  * (i.e. things that depend on the address family)
  */
 struct inet_connection_sock_af_ops {
-	int	    (*queue_xmit)(struct sk_buff *skb, int ipfragok);
+	int	    (*queue_xmit)(struct sk_buff *skb);
 	void	    (*send_check)(struct sock *sk, struct sk_buff *skb);
 	int	    (*rebuild_header)(struct sock *sk);
 	int	    (*conn_request)(struct sock *sk, struct sk_buff *skb);
diff --git a/include/net/ip.h b/include/net/ip.h
index 503994a38ed1..a84ceb692687 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -101,7 +101,7 @@ extern int		ip_do_nat(struct sk_buff *skb);
 extern void		ip_send_check(struct iphdr *ip);
 extern int		__ip_local_out(struct sk_buff *skb);
 extern int		ip_local_out(struct sk_buff *skb);
-extern int		ip_queue_xmit(struct sk_buff *skb, int ipfragok);
+extern int		ip_queue_xmit(struct sk_buff *skb);
 extern void		ip_init(void);
 extern int		ip_append_data(struct sock *sk,
 				       int getfrag(void *from, char *to, int offset, int len,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 033ddd4652a5..b1d8db90b214 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -482,8 +482,7 @@ extern int			ip6_rcv_finish(struct sk_buff *skb);
 extern int			ip6_xmit(struct sock *sk,
 					 struct sk_buff *skb,
 					 struct flowi *fl,
-					 struct ipv6_txoptions *opt,
-					 int ipfragok);
+					 struct ipv6_txoptions *opt);
 
 extern int			ip6_nd_hdr(struct sock *sk,
 					   struct sk_buff *skb,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index ab1ab95946df..091698899594 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -292,7 +292,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 							 &ireq6->loc_addr,
 							 &ireq6->rmt_addr);
 		ipv6_addr_copy(&fl.fl6_dst, &ireq6->rmt_addr);
-		err = ip6_xmit(sk, skb, &fl, opt, 0);
+		err = ip6_xmit(sk, skb, &fl, opt);
 		err = net_xmit_eval(err);
 	}
 
@@ -347,7 +347,7 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 	if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
 		if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
 			skb_dst_set(skb, dst);
-			ip6_xmit(ctl_sk, skb, &fl, NULL, 0);
+			ip6_xmit(ctl_sk, skb, &fl, NULL);
 			DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
 			DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
 			return;
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b8d98e3c052a..e98b65e9569f 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -136,7 +136,7 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 
 		DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
 
-		err = icsk->icsk_af_ops->queue_xmit(skb, 0);
+		err = icsk->icsk_af_ops->queue_xmit(skb);
 		return net_xmit_eval(err);
 	}
 	return -ENOBUFS;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c65f18e0936e..512af81b750f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -311,7 +311,7 @@ int ip_output(struct sk_buff *skb)
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
 
-int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
+int ip_queue_xmit(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct inet_sock *inet = inet_sk(sk);
@@ -370,7 +370,7 @@ packet_routed:
 	skb_reset_network_header(skb);
 	iph = ip_hdr(skb);
 	*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
-	if (ip_dont_fragment(sk, &rt->u.dst) && !ipfragok)
+	if (ip_dont_fragment(sk, &rt->u.dst) && !skb->local_df)
 		iph->frag_off = htons(IP_DF);
 	else
 		iph->frag_off = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e46849989a53..2b7d71fb8439 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -890,7 +890,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
 		TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
 
-	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
+	err = icsk->icsk_af_ops->queue_xmit(skb);
 	if (likely(err <= 0))
 		return err;
 
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 628db24bcf22..0c5e3c3b7fd5 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -178,7 +178,7 @@ struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie)
 	return dst;
 }
 
-int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
+int inet6_csk_xmit(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
 	struct inet_sock *inet = inet_sk(sk);
@@ -234,7 +234,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
 	/* Restore final destination back after routing done */
 	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 
-	return ip6_xmit(sk, skb, &fl, np->opt, 0);
+	return ip6_xmit(sk, skb, &fl, np->opt);
 }
 
 EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index f3a847e3ec88..141819f0c6f1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -185,7 +185,7 @@ int ip6_output(struct sk_buff *skb)
  */
 
 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
-	     struct ipv6_txoptions *opt, int ipfragok)
+	     struct ipv6_txoptions *opt)
 {
 	struct net *net = sock_net(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b429dfdd69dc..bd5ef7b6e48e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -509,7 +509,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 
 		ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
-		err = ip6_xmit(sk, skb, &fl, opt, 0);
+		err = ip6_xmit(sk, skb, &fl, opt);
 		err = net_xmit_eval(err);
 	}
 
@@ -1071,7 +1071,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	if (!ip6_dst_lookup(ctl_sk, &dst, &fl)) {
 		if (xfrm_lookup(net, &dst, &fl, NULL, 0) >= 0) {
 			skb_dst_set(buff, dst);
-			ip6_xmit(ctl_sk, buff, &fl, NULL, 0);
+			ip6_xmit(ctl_sk, buff, &fl, NULL);
 			TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
 			if (rst)
 				TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 98dfcce1a5fc..ecc7aea9efe4 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -954,7 +954,8 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
 	}
 
 	/* Queue the packet to IP for output */
-	error = ip_queue_xmit(skb, 1);
+	skb->local_df = 1;
+	error = ip_queue_xmit(skb);
 
 	/* Update stats */
 	if (error >= 0) {
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 75bf784ba18d..0852512d392c 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -501,7 +501,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 	skb_dst_set(skb, dst_clone(&rt->u.dst));
 
 	/* Queue the packet to IP for output */
-	rc = ip_queue_xmit(skb, 0);
+	rc = ip_queue_xmit(skb);
 
 error:
 	/* Update stats */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 14db5689fb89..732689140fb8 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -232,7 +232,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	if (!(transport->param_flags & SPP_PMTUD_ENABLE))
 		skb->local_df = 1;
 
-	return ip6_xmit(sk, skb, &fl, np->opt, 0);
+	return ip6_xmit(sk, skb, &fl, np->opt);
 }
 
 /* Returns the dst cache entry for the given source and destination ip
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index a56f98e82f92..704298f4b284 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -854,7 +854,7 @@ static inline int sctp_v4_xmit(struct sk_buff *skb,
 			 IP_PMTUDISC_DO : IP_PMTUDISC_DONT;
 
 	SCTP_INC_STATS(SCTP_MIB_OUTSCTPPACKS);
-	return ip_queue_xmit(skb, 0);
+	return ip_queue_xmit(skb);
 }
 
 static struct sctp_af sctp_af_inet;
-- 
cgit v1.2.3-59-g8ed1b


From b5d43998234331b9c01bd2165fdbb25115f4387f Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Thu, 15 Apr 2010 16:48:48 +0000
Subject: ipv6: fix the comment of ip6_xmit()

ip6_xmit() is used by upper transport protocol.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 141819f0c6f1..5129a16f482b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -181,7 +181,7 @@ int ip6_output(struct sk_buff *skb)
 }
 
 /*
- *	xmit an sk_buff (used by TCP)
+ *	xmit an sk_buff (used by TCP, SCTP and DCCP)
  */
 
 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
-- 
cgit v1.2.3-59-g8ed1b


From 7834704be4777fc0ed67c4486ef8c5691078d135 Mon Sep 17 00:00:00 2001
From: Nishant Sarmukadam <nishants@marvell.com>
Date: Wed, 14 Apr 2010 22:03:02 -0700
Subject: cfg80211: Avoid sending IWEVASSOCREQIE and IWEVASSOCRESPIE events
 with NULL event body

In a scenario, where a cfg80211 driver (station mode) does not send assoc request
and assoc response IEs in cfg80211_connect_result after a successful association
to an AP, cfg80211 sends IWEVASSOCREQIE and IWEVASSOCRESPIE to the user space
application with NULL data. This can cause an issue at the event recipient.

An example of this is when cfg80211 sends IWEVASSOCREQIE and IWEVASSOCRESPIE
events with NULL event body to wpa_supplicant. The wpa_supplicant overwrites
the assoc request and assoc response IEs for this station with NULL data.
If the association is WPA/WPA2, the wpa_supplicant is not able to generate
EAPOL handshake messages, since the IEs are NULL.

With the patch, req_ie and resp_ie will be NULL by avoiding the
assignment if the driver has not sent the IEs to cfg80211. The event sending
code sends the events only if resp_ie and req_ie are not NULL. This
will ensure that the events are not sent with NULL event body.

Signed-off-by: Nishant Sarmukadam <nishants@marvell.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/wireless/sme.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 17465777eb47..dcd7685242f7 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -517,12 +517,16 @@ void cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
 	ev->type = EVENT_CONNECT_RESULT;
 	if (bssid)
 		memcpy(ev->cr.bssid, bssid, ETH_ALEN);
-	ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev);
-	ev->cr.req_ie_len = req_ie_len;
-	memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len);
-	ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
-	ev->cr.resp_ie_len = resp_ie_len;
-	memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len);
+	if (req_ie_len) {
+		ev->cr.req_ie = ((u8 *)ev) + sizeof(*ev);
+		ev->cr.req_ie_len = req_ie_len;
+		memcpy((void *)ev->cr.req_ie, req_ie, req_ie_len);
+	}
+	if (resp_ie_len) {
+		ev->cr.resp_ie = ((u8 *)ev) + sizeof(*ev) + req_ie_len;
+		ev->cr.resp_ie_len = resp_ie_len;
+		memcpy((void *)ev->cr.resp_ie, resp_ie, resp_ie_len);
+	}
 	ev->cr.status = status;
 
 	spin_lock_irqsave(&wdev->event_lock, flags);
-- 
cgit v1.2.3-59-g8ed1b


From fec5e652e58fa6017b2c9e06466cb2a6538de5b4 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 16 Apr 2010 16:01:27 -0700
Subject: rfs: Receive Flow Steering

This patch implements receive flow steering (RFS).  RFS steers
received packets for layer 3 and 4 processing to the CPU where
the application for the corresponding flow is running.  RFS is an
extension of Receive Packet Steering (RPS).

The basic idea of RFS is that when an application calls recvmsg
(or sendmsg) the application's running CPU is stored in a hash
table that is indexed by the connection's rxhash which is stored in
the socket structure.  The rxhash is passed in skb's received on
the connection from netif_receive_skb.  For each received packet,
the associated rxhash is used to look up the CPU in the hash table,
if a valid CPU is set then the packet is steered to that CPU using
the RPS mechanisms.

The convolution of the simple approach is that it would potentially
allow OOO packets.  If threads are thrashing around CPUs or multiple
threads are trying to read from the same sockets, a quickly changing
CPU value in the hash table could cause rampant OOO packets--
we consider this a non-starter.

To avoid OOO packets, this solution implements two types of hash
tables: rps_sock_flow_table and rps_dev_flow_table.

rps_sock_table is a global hash table.  Each entry is just a CPU
number and it is populated in recvmsg and sendmsg as described above.
This table contains the "desired" CPUs for flows.

rps_dev_flow_table is specific to each device queue.  Each entry
contains a CPU and a tail queue counter.  The CPU is the "current"
CPU for a matching flow.  The tail queue counter holds the value
of a tail queue counter for the associated CPU's backlog queue at
the time of last enqueue for a flow matching the entry.

Each backlog queue has a queue head counter which is incremented
on dequeue, and so a queue tail counter is computed as queue head
count + queue length.  When a packet is enqueued on a backlog queue,
the current value of the queue tail counter is saved in the hash
entry of the rps_dev_flow_table.

And now the trick: when selecting the CPU for RPS (get_rps_cpu)
the rps_sock_flow table and the rps_dev_flow table for the RX queue
are consulted.  When the desired CPU for the flow (found in the
rps_sock_flow table) does not match the current CPU (found in the
rps_dev_flow table), the current CPU is changed to the desired CPU
if one of the following is true:

- The current CPU is unset (equal to RPS_NO_CPU)
- Current CPU is offline
- The current CPU's queue head counter >= queue tail counter in the
rps_dev_flow table.  This checks if the queue tail has advanced
beyond the last packet that was enqueued using this table entry.
This guarantees that all packets queued using this entry have been
dequeued, thus preserving in order delivery.

Making each queue have its own rps_dev_flow table has two advantages:
1) the tail queue counters will be written on each receive, so
keeping the table local to interrupting CPU s good for locality.  2)
this allows lockless access to the table-- the CPU number and queue
tail counter need to be accessed together under mutual exclusion
from netif_receive_skb, we assume that this is only called from
device napi_poll which is non-reentrant.

This patch implements RFS for TCP and connected UDP sockets.
It should be usable for other flow oriented protocols.

There are two configuration parameters for RFS.  The
"rps_flow_entries" kernel init parameter sets the number of
entries in the rps_sock_flow_table, the per rxqueue sysfs entry
"rps_flow_cnt" contains the number of entries in the rps_dev_flow
table for the rxqueue.  Both are rounded to power of two.

The obvious benefit of RFS (over just RPS) is that it achieves
CPU locality between the receive processing for a flow and the
applications processing; this can result in increased performance
(higher pps, lower latency).

The benefits of RFS are dependent on cache hierarchy, application
load, and other factors.  On simple benchmarks, we don't necessarily
see improvement and sometimes see degradation.  However, for more
complex benchmarks and for applications where cache pressure is
much higher this technique seems to perform very well.

Below are some benchmark results which show the potential benfit of
this patch.  The netperf test has 500 instances of netperf TCP_RR
test with 1 byte req. and resp.  The RPC test is an request/response
test similar in structure to netperf RR test ith 100 threads on
each host, but does more work in userspace that netperf.

e1000e on 8 core Intel
   No RFS or RPS		104K tps at 30% CPU
   No RFS (best RPS config):    290K tps at 63% CPU
   RFS				303K tps at 61% CPU

RPC test	tps	CPU%	50/90/99% usec latency	Latency StdDev
  No RFS/RPS	103K	48%	757/900/3185		4472.35
  RPS only:	174K	73%	415/993/2468		491.66
  RFS		223K	73%	379/651/1382		315.61

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  |  69 +++++++++++++++++++++++++++-
 include/net/inet_sock.h    |  38 ++++++++++++++++
 net/core/dev.c             | 111 +++++++++++++++++++++++++++++++++++++--------
 net/core/net-sysfs.c       |  94 ++++++++++++++++++++++++++++++++++++--
 net/core/sysctl_net_core.c |  68 +++++++++++++++++++++++++++
 net/ipv4/af_inet.c         |  29 ++++++++++--
 net/ipv4/tcp_ipv4.c        |   2 +
 net/ipv4/udp.c             |   7 ++-
 8 files changed, 389 insertions(+), 29 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 55c2086e1f06..649a0252686e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -530,14 +530,73 @@ struct rps_map {
 };
 #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
 
+/*
+ * The rps_dev_flow structure contains the mapping of a flow to a CPU and the
+ * tail pointer for that CPU's input queue at the time of last enqueue.
+ */
+struct rps_dev_flow {
+	u16 cpu;
+	u16 fill;
+	unsigned int last_qtail;
+};
+
+/*
+ * The rps_dev_flow_table structure contains a table of flow mappings.
+ */
+struct rps_dev_flow_table {
+	unsigned int mask;
+	struct rcu_head rcu;
+	struct work_struct free_work;
+	struct rps_dev_flow flows[0];
+};
+#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
+    (_num * sizeof(struct rps_dev_flow)))
+
+/*
+ * The rps_sock_flow_table contains mappings of flows to the last CPU
+ * on which they were processed by the application (set in recvmsg).
+ */
+struct rps_sock_flow_table {
+	unsigned int mask;
+	u16 ents[0];
+};
+#define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \
+    (_num * sizeof(u16)))
+
+#define RPS_NO_CPU 0xffff
+
+static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
+					u32 hash)
+{
+	if (table && hash) {
+		unsigned int cpu, index = hash & table->mask;
+
+		/* We only give a hint, preemption can change cpu under us */
+		cpu = raw_smp_processor_id();
+
+		if (table->ents[index] != cpu)
+			table->ents[index] = cpu;
+	}
+}
+
+static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
+				       u32 hash)
+{
+	if (table && hash)
+		table->ents[hash & table->mask] = RPS_NO_CPU;
+}
+
+extern struct rps_sock_flow_table *rps_sock_flow_table;
+
 /* This structure contains an instance of an RX queue. */
 struct netdev_rx_queue {
 	struct rps_map *rps_map;
+	struct rps_dev_flow_table *rps_flow_table;
 	struct kobject kobj;
 	struct netdev_rx_queue *first;
 	atomic_t count;
 } ____cacheline_aligned_in_smp;
-#endif
+#endif /* CONFIG_RPS */
 
 /*
  * This structure defines the management hooks for network devices.
@@ -1333,11 +1392,19 @@ struct softnet_data {
 	/* Elements below can be accessed between CPUs for RPS */
 #ifdef CONFIG_RPS
 	struct call_single_data	csd ____cacheline_aligned_in_smp;
+	unsigned int		input_queue_head;
 #endif
 	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
 };
 
+static inline void incr_input_queue_head(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+	queue->input_queue_head++;
+#endif
+}
+
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
 #define HAVE_NETIF_QUEUE
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 83fd34437cf1..b487bc1b99ab 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -21,6 +21,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/jhash.h>
+#include <linux/netdevice.h>
 
 #include <net/flow.h>
 #include <net/sock.h>
@@ -101,6 +102,7 @@ struct rtable;
  * @uc_ttl - Unicast TTL
  * @inet_sport - Source port
  * @inet_id - ID counter for DF pkts
+ * @rxhash - flow hash received from netif layer
  * @tos - TOS
  * @mc_ttl - Multicasting TTL
  * @is_icsk - is this an inet_connection_sock?
@@ -124,6 +126,9 @@ struct inet_sock {
 	__u16			cmsg_flags;
 	__be16			inet_sport;
 	__u16			inet_id;
+#ifdef CONFIG_RPS
+	__u32			rxhash;
+#endif
 
 	struct ip_options	*opt;
 	__u8			tos;
@@ -219,4 +224,37 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
 	return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0;
 }
 
+static inline void inet_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+	struct rps_sock_flow_table *sock_flow_table;
+
+	rcu_read_lock();
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	rps_record_sock_flow(sock_flow_table, inet_sk(sk)->rxhash);
+	rcu_read_unlock();
+#endif
+}
+
+static inline void inet_rps_reset_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+	struct rps_sock_flow_table *sock_flow_table;
+
+	rcu_read_lock();
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	rps_reset_sock_flow(sock_flow_table, inet_sk(sk)->rxhash);
+	rcu_read_unlock();
+#endif
+}
+
+static inline void inet_rps_save_rxhash(const struct sock *sk, u32 rxhash)
+{
+#ifdef CONFIG_RPS
+	if (unlikely(inet_sk(sk)->rxhash != rxhash)) {
+		inet_rps_reset_flow(sk);
+		inet_sk(sk)->rxhash = rxhash;
+	}
+#endif
+}
 #endif	/* _INET_SOCK_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index e8041eb76ac1..d7107ac835fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2203,19 +2203,28 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
 #ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
  * rcu_read_lock must be held on entry.
  */
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+		       struct rps_dev_flow **rflowp)
 {
 	struct ipv6hdr *ip6;
 	struct iphdr *ip;
 	struct netdev_rx_queue *rxqueue;
 	struct rps_map *map;
+	struct rps_dev_flow_table *flow_table;
+	struct rps_sock_flow_table *sock_flow_table;
 	int cpu = -1;
 	u8 ip_proto;
+	u16 tcpu;
 	u32 addr1, addr2, ports, ihl;
 
 	if (skb_rx_queue_recorded(skb)) {
@@ -2232,7 +2241,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 	} else
 		rxqueue = dev->_rx;
 
-	if (!rxqueue->rps_map)
+	if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
 		goto done;
 
 	if (skb->rxhash)
@@ -2284,9 +2293,48 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 		skb->rxhash = 1;
 
 got_hash:
+	flow_table = rcu_dereference(rxqueue->rps_flow_table);
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	if (flow_table && sock_flow_table) {
+		u16 next_cpu;
+		struct rps_dev_flow *rflow;
+
+		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+		tcpu = rflow->cpu;
+
+		next_cpu = sock_flow_table->ents[skb->rxhash &
+		    sock_flow_table->mask];
+
+		/*
+		 * If the desired CPU (where last recvmsg was done) is
+		 * different from current CPU (one in the rx-queue flow
+		 * table entry), switch if one of the following holds:
+		 *   - Current CPU is unset (equal to RPS_NO_CPU).
+		 *   - Current CPU is offline.
+		 *   - The current CPU's queue tail has advanced beyond the
+		 *     last packet that was enqueued using this table entry.
+		 *     This guarantees that all previous packets for the flow
+		 *     have been dequeued, thus preserving in order delivery.
+		 */
+		if (unlikely(tcpu != next_cpu) &&
+		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
+		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+		      rflow->last_qtail)) >= 0)) {
+			tcpu = rflow->cpu = next_cpu;
+			if (tcpu != RPS_NO_CPU)
+				rflow->last_qtail = per_cpu(softnet_data,
+				    tcpu).input_queue_head;
+		}
+		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
+			*rflowp = rflow;
+			cpu = tcpu;
+			goto done;
+		}
+	}
+
 	map = rcu_dereference(rxqueue->rps_map);
 	if (map) {
-		u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
 
 		if (cpu_online(tcpu)) {
 			cpu = tcpu;
@@ -2320,13 +2368,14 @@ static void trigger_softirq(void *data)
 	__napi_schedule(&queue->backlog);
 	__get_cpu_var(netdev_rx_stat).received_rps++;
 }
-#endif /* CONFIG_SMP */
+#endif /* CONFIG_RPS */
 
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
  * queue (may be a remote CPU queue).
  */
-static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+			      unsigned int *qtail)
 {
 	struct softnet_data *queue;
 	unsigned long flags;
@@ -2341,6 +2390,10 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
 		if (queue->input_pkt_queue.qlen) {
 enqueue:
 			__skb_queue_tail(&queue->input_pkt_queue, skb);
+#ifdef CONFIG_RPS
+			*qtail = queue->input_queue_head +
+			    queue->input_pkt_queue.qlen;
+#endif
 			rps_unlock(queue);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
@@ -2355,11 +2408,10 @@ enqueue:
 
 				cpu_set(cpu, rcpus->mask[rcpus->select]);
 				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-			} else
-				__napi_schedule(&queue->backlog);
-#else
-			__napi_schedule(&queue->backlog);
+				goto enqueue;
+			}
 #endif
+			__napi_schedule(&queue->backlog);
 		}
 		goto enqueue;
 	}
@@ -2401,18 +2453,25 @@ int netif_rx(struct sk_buff *skb)
 
 #ifdef CONFIG_RPS
 	{
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 
 		rcu_read_lock();
-		cpu = get_rps_cpu(skb->dev, skb);
+
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 		if (cpu < 0)
 			cpu = smp_processor_id();
-		ret = enqueue_to_backlog(skb, cpu);
+
+		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+
 		rcu_read_unlock();
 	}
 #else
-	ret = enqueue_to_backlog(skb, get_cpu());
-	put_cpu();
+	{
+		unsigned int qtail;
+		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+		put_cpu();
+	}
 #endif
 	return ret;
 }
@@ -2830,14 +2889,22 @@ out:
 int netif_receive_skb(struct sk_buff *skb)
 {
 #ifdef CONFIG_RPS
-	int cpu;
+	struct rps_dev_flow voidflow, *rflow = &voidflow;
+	int cpu, ret;
+
+	rcu_read_lock();
 
-	cpu = get_rps_cpu(skb->dev, skb);
+	cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
-	if (cpu < 0)
-		return __netif_receive_skb(skb);
-	else
-		return enqueue_to_backlog(skb, cpu);
+	if (cpu >= 0) {
+		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+		rcu_read_unlock();
+	} else {
+		rcu_read_unlock();
+		ret = __netif_receive_skb(skb);
+	}
+
+	return ret;
 #else
 	return __netif_receive_skb(skb);
 #endif
@@ -2856,6 +2923,7 @@ static void flush_backlog(void *arg)
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &queue->input_pkt_queue);
 			kfree_skb(skb);
+			incr_input_queue_head(queue);
 		}
 	rps_unlock(queue);
 }
@@ -3179,6 +3247,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 			local_irq_enable();
 			break;
 		}
+		incr_input_queue_head(queue);
 		rps_unlock(queue);
 		local_irq_enable();
 
@@ -5542,8 +5611,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	local_irq_enable();
 
 	/* Process offline CPU's input_pkt_queue */
-	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
+		incr_input_queue_head(oldsd);
+	}
 
 	return NOTIFY_OK;
 }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 96ed6905b823..143052a22b9b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -17,6 +17,7 @@
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
+#include <linux/vmalloc.h>
 #include <net/wext.h>
 
 #include "net-sysfs.h"
@@ -601,22 +602,109 @@ ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	return len;
 }
 
+static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+					   struct rx_queue_attribute *attr,
+					   char *buf)
+{
+	struct rps_dev_flow_table *flow_table;
+	unsigned int val = 0;
+
+	rcu_read_lock();
+	flow_table = rcu_dereference(queue->rps_flow_table);
+	if (flow_table)
+		val = flow_table->mask + 1;
+	rcu_read_unlock();
+
+	return sprintf(buf, "%u\n", val);
+}
+
+static void rps_dev_flow_table_release_work(struct work_struct *work)
+{
+	struct rps_dev_flow_table *table = container_of(work,
+	    struct rps_dev_flow_table, free_work);
+
+	vfree(table);
+}
+
+static void rps_dev_flow_table_release(struct rcu_head *rcu)
+{
+	struct rps_dev_flow_table *table = container_of(rcu,
+	    struct rps_dev_flow_table, rcu);
+
+	INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
+	schedule_work(&table->free_work);
+}
+
+ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+				     struct rx_queue_attribute *attr,
+				     const char *buf, size_t len)
+{
+	unsigned int count;
+	char *endp;
+	struct rps_dev_flow_table *table, *old_table;
+	static DEFINE_SPINLOCK(rps_dev_flow_lock);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	count = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+
+	if (count) {
+		int i;
+
+		if (count > 1<<30) {
+			/* Enforce a limit to prevent overflow */
+			return -EINVAL;
+		}
+		count = roundup_pow_of_two(count);
+		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
+		if (!table)
+			return -ENOMEM;
+
+		table->mask = count - 1;
+		for (i = 0; i < count; i++)
+			table->flows[i].cpu = RPS_NO_CPU;
+	} else
+		table = NULL;
+
+	spin_lock(&rps_dev_flow_lock);
+	old_table = queue->rps_flow_table;
+	rcu_assign_pointer(queue->rps_flow_table, table);
+	spin_unlock(&rps_dev_flow_lock);
+
+	if (old_table)
+		call_rcu(&old_table->rcu, rps_dev_flow_table_release);
+
+	return len;
+}
+
 static struct rx_queue_attribute rps_cpus_attribute =
 	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
 
+
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
+	__ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+	    show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+
 static struct attribute *rx_queue_default_attrs[] = {
 	&rps_cpus_attribute.attr,
+	&rps_dev_flow_table_cnt_attribute.attr,
 	NULL
 };
 
 static void rx_queue_release(struct kobject *kobj)
 {
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
-	struct rps_map *map = queue->rps_map;
 	struct netdev_rx_queue *first = queue->first;
 
-	if (map)
-		call_rcu(&map->rcu, rps_map_release);
+	if (queue->rps_map)
+		call_rcu(&queue->rps_map->rcu, rps_map_release);
+
+	if (queue->rps_flow_table)
+		call_rcu(&queue->rps_flow_table->rcu,
+		    rps_dev_flow_table_release);
 
 	if (atomic_dec_and_test(&first->count))
 		kfree(first);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b7b6b8208f75..dcc7d25996ab 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,12 +11,72 @@
 #include <linux/socket.h>
 #include <linux/netdevice.h>
 #include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
 
+#ifdef CONFIG_RPS
+static int rps_sock_flow_sysctl(ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	unsigned int orig_size, size;
+	int ret, i;
+	ctl_table tmp = {
+		.data = &size,
+		.maxlen = sizeof(size),
+		.mode = table->mode
+	};
+	struct rps_sock_flow_table *orig_sock_table, *sock_table;
+	static DEFINE_MUTEX(sock_flow_mutex);
+
+	mutex_lock(&sock_flow_mutex);
+
+	orig_sock_table = rps_sock_flow_table;
+	size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
+
+	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+
+	if (write) {
+		if (size) {
+			if (size > 1<<30) {
+				/* Enforce limit to prevent overflow */
+				mutex_unlock(&sock_flow_mutex);
+				return -EINVAL;
+			}
+			size = roundup_pow_of_two(size);
+			if (size != orig_size) {
+				sock_table =
+				    vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
+				if (!sock_table) {
+					mutex_unlock(&sock_flow_mutex);
+					return -ENOMEM;
+				}
+
+				sock_table->mask = size - 1;
+			} else
+				sock_table = orig_sock_table;
+
+			for (i = 0; i < size; i++)
+				sock_table->ents[i] = RPS_NO_CPU;
+		} else
+			sock_table = NULL;
+
+		if (sock_table != orig_sock_table) {
+			rcu_assign_pointer(rps_sock_flow_table, sock_table);
+			synchronize_rcu();
+			vfree(orig_sock_table);
+		}
+	}
+
+	mutex_unlock(&sock_flow_mutex);
+
+	return ret;
+}
+#endif /* CONFIG_RPS */
+
 static struct ctl_table net_core_table[] = {
 #ifdef CONFIG_NET
 	{
@@ -82,6 +142,14 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_RPS
+	{
+		.procname	= "rps_sock_flow_entries",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= rps_sock_flow_sysctl
+	},
+#endif
 #endif /* CONFIG_NET */
 	{
 		.procname	= "netdev_budget",
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 193dcd6ed64f..c5376c725503 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -419,6 +419,8 @@ int inet_release(struct socket *sock)
 	if (sk) {
 		long timeout;
 
+		inet_rps_reset_flow(sk);
+
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
 
@@ -720,6 +722,8 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 {
 	struct sock *sk = sock->sk;
 
+	inet_rps_record_flow(sk);
+
 	/* We may need to bind the socket. */
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
@@ -728,12 +732,13 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
-
 static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 			     size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 
+	inet_rps_record_flow(sk);
+
 	/* We may need to bind the socket. */
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
@@ -743,6 +748,22 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
 
+int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+		 size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	int addr_len = 0;
+	int err;
+
+	inet_rps_record_flow(sk);
+
+	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+				   flags & ~MSG_DONTWAIT, &addr_len);
+	if (err >= 0)
+		msg->msg_namelen = addr_len;
+	return err;
+}
+EXPORT_SYMBOL(inet_recvmsg);
 
 int inet_shutdown(struct socket *sock, int how)
 {
@@ -872,7 +893,7 @@ const struct proto_ops inet_stream_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = tcp_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = tcp_sendpage,
 	.splice_read	   = tcp_splice_read,
@@ -899,7 +920,7 @@ const struct proto_ops inet_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 #ifdef CONFIG_COMPAT
@@ -929,7 +950,7 @@ static const struct proto_ops inet_sockraw_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a24995cdc4b6..ad08392a738c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1672,6 +1672,8 @@ process:
 
 	skb->dev = NULL;
 
+	inet_rps_save_rxhash(sk, skb->rxhash);
+
 	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fef859db35d..666b963496ff 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1217,6 +1217,7 @@ int udp_disconnect(struct sock *sk, int flags)
 	sk->sk_state = TCP_CLOSE;
 	inet->inet_daddr = 0;
 	inet->inet_dport = 0;
+	inet_rps_save_rxhash(sk, 0);
 	sk->sk_bound_dev_if = 0;
 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 		inet_reset_saddr(sk);
@@ -1258,8 +1259,12 @@ EXPORT_SYMBOL(udp_lib_unhash);
 
 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
-	int rc = sock_queue_rcv_skb(sk, skb);
+	int rc;
+
+	if (inet_sk(sk)->inet_daddr)
+		inet_rps_save_rxhash(sk, skb->rxhash);
 
+	rc = sock_queue_rcv_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
 
-- 
cgit v1.2.3-59-g8ed1b


From 8770acf0494ae06de6abd34f951a436f8f15d1de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 17 Apr 2010 00:54:36 -0700
Subject: rps: rps_sock_flow_table is mostly read

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index d7107ac835fa..7abf9590e3c5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2205,7 +2205,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table;
+struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 9958da0501fced47c1ac5c5a3a7731c87e45472c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 17 Apr 2010 04:17:02 +0000
Subject: net: remove time limit in process_backlog()

- There is no point to enforce a time limit in process_backlog(), since
other napi instances dont follow same rule. We can exit after only one
packet processed...
The normal quota of 64 packets per napi instance should be the norm, and
net_rx_action() already has its own time limit.
Note : /proc/net/core/dev_weight can be used to tune this 64 default
value.

- Use DEFINE_PER_CPU_ALIGNED for softnet_data definition.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7abf9590e3c5..8092f01713fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -264,7 +264,7 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
  *	queue in the local softnet handler.
  */
 
-DEFINE_PER_CPU(struct softnet_data, softnet_data);
+DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 
 #ifdef CONFIG_LOCKDEP
@@ -3232,7 +3232,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
-	unsigned long start_time = jiffies;
 
 	napi->weight = weight_p;
 	do {
@@ -3252,7 +3251,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 		local_irq_enable();
 
 		__netif_receive_skb(skb);
-	} while (++work < quota && jiffies == start_time);
+	} while (++work < quota);
 
 	return work;
 }
-- 
cgit v1.2.3-59-g8ed1b


From fc6055a5ba31e2c14e36e8939f9bf2b6d586a7f5 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 16 Apr 2010 12:18:22 +0000
Subject: net: Introduce skb_orphan_try()

Transmitted skb might be attached to a socket and a destructor, for
memory accounting purposes.

Traditionally, this destructor is called at tx completion time, when skb
is freed.

When tx completion is performed by another cpu than the sender, this
forces some cache lines to change ownership. XPS was an attempt to give
tx completion to initial cpu.

David idea is to call destructor right before giving skb to device (call
to ndo_start_xmit()). Because device queues are usually small, orphaning
skb before tx completion is not a big deal. Some drivers already do
this, we could do it in upper level.

There is one known exception to this early orphaning, called tx
timestamping. It needs to keep a reference to socket until device can
give a hardware or software timestamp.

This patch adds a skb_orphan_try() helper, to centralize all exceptions
to early orphaning in one spot, and use it in dev_hard_start_xmit().

"tbench 16" results on a Nehalem machine (2 X5570  @ 2.93GHz)
before: Throughput 4428.9 MB/sec 16 procs
after: Throughput 4448.14 MB/sec 16 procs

UDP should get even better results, its destructor being more complex,
since SOCK_USE_WRITE_QUEUE is not set (four atomic ops instead of one)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 8092f01713fb..8eb50e2292fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1880,6 +1880,17 @@ static int dev_gso_segment(struct sk_buff *skb)
 	return 0;
 }
 
+/*
+ * Try to orphan skb early, right before transmission by the device.
+ * We cannot orphan skb if tx timestamp is requested, since
+ * drivers need to call skb_tstamp_tx() to send the timestamp.
+ */
+static inline void skb_orphan_try(struct sk_buff *skb)
+{
+	if (!skb_tx(skb)->flags)
+		skb_orphan(skb);
+}
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
@@ -1904,23 +1915,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(skb);
 
+		skb_orphan_try(skb);
 		rc = ops->ndo_start_xmit(skb, dev);
 		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
-		/*
-		 * TODO: if skb_orphan() was called by
-		 * dev->hard_start_xmit() (for example, the unmodified
-		 * igb driver does that; bnx2 doesn't), then
-		 * skb_tx_software_timestamp() will be unable to send
-		 * back the time stamp.
-		 *
-		 * How can this be prevented? Always create another
-		 * reference to the socket before calling
-		 * dev->hard_start_xmit()? Prevent that skb_orphan()
-		 * does anything in dev->hard_start_xmit() by clearing
-		 * the skb destructor before the call and restoring it
-		 * afterwards, then doing the skb_orphan() ourselves?
-		 */
 		return rc;
 	}
 
@@ -1938,6 +1936,7 @@ gso:
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(nskb);
 
+		skb_orphan_try(nskb);
 		rc = ops->ndo_start_xmit(nskb, dev);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
-- 
cgit v1.2.3-59-g8ed1b


From e281b19897dc21c1071802808d461627d747a877 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 19 Apr 2010 14:17:47 +0200
Subject: netfilter: xtables: inclusion of xt_TEE

xt_TEE can be used to clone and reroute a packet. This can for
example be used to copy traffic at a router for logging purposes
to another dedicated machine.

References: http://www.gossamer-threads.com/lists/iptables/devel/68781
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild   |   1 +
 include/linux/netfilter/xt_TEE.h |   9 ++
 net/ipv4/ip_output.c             |   1 +
 net/ipv6/ip6_output.c            |   1 +
 net/netfilter/Kconfig            |   7 ++
 net/netfilter/Makefile           |   1 +
 net/netfilter/xt_TEE.c           | 256 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 276 insertions(+)
 create mode 100644 include/linux/netfilter/xt_TEE.h
 create mode 100644 net/netfilter/xt_TEE.c

(limited to 'net')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index a5a63e41b8af..48767cd16453 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -16,6 +16,7 @@ header-y += xt_RATEEST.h
 header-y += xt_SECMARK.h
 header-y += xt_TCPMSS.h
 header-y += xt_TCPOPTSTRIP.h
+header-y += xt_TEE.h
 header-y += xt_TPROXY.h
 header-y += xt_comment.h
 header-y += xt_connbytes.h
diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h
new file mode 100644
index 000000000000..55d4a5011523
--- /dev/null
+++ b/include/linux/netfilter/xt_TEE.h
@@ -0,0 +1,9 @@
+#ifndef _XT_TEE_TARGET_H
+#define _XT_TEE_TARGET_H
+
+struct xt_tee_tginfo {
+	union nf_inet_addr gw;
+	char oif[16];
+};
+
+#endif /* _XT_TEE_TARGET_H */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f09135e1e14f..0abfddec1e26 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -309,6 +309,7 @@ int ip_output(struct sk_buff *skb)
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
+EXPORT_SYMBOL_GPL(ip_output);
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c10a38a71a5e..d09be7ff8735 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -176,6 +176,7 @@ int ip6_output(struct sk_buff *skb)
 			    ip6_finish_output,
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
+EXPORT_SYMBOL_GPL(ip6_output);
 
 /*
  *	xmit an sk_buff (used by TCP)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8055786b7702..673a6c8f0e95 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -502,6 +502,13 @@ config NETFILTER_XT_TARGET_RATEEST
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_TEE
+	tristate '"TEE" - packet cloning to alternate destiantion'
+	depends on NETFILTER_ADVANCED
+	---help---
+	This option adds a "TEE" target with which a packet can be cloned and
+	this clone be rerouted to another nexthop.
+
 config NETFILTER_XT_TARGET_TPROXY
 	tristate '"TPROXY" target support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index cd31afe0692a..14e3a8fd8180 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 
 # matches
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
new file mode 100644
index 000000000000..b3d730163f12
--- /dev/null
+++ b/net/netfilter/xt_TEE.c
@@ -0,0 +1,256 @@
+/*
+ *	"TEE" target extension for Xtables
+ *	Copyright © Sebastian Claßen, 2007
+ *	Jan Engelhardt, 2007-2010
+ *
+ *	based on ipt_ROUTE.c from Cédric de Launois
+ *	<delaunois@info.ucl.be>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 or later, as published by the Free Software Foundation.
+ */
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/route.h>
+#include <linux/skbuff.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/route.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_TEE.h>
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#	define WITH_CONNTRACK 1
+#	include <net/netfilter/nf_conntrack.h>
+#endif
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#	define WITH_IPV6 1
+#endif
+
+static const union nf_inet_addr tee_zero_address;
+
+static struct net *pick_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+	const struct dst_entry *dst;
+
+	if (skb->dev != NULL)
+		return dev_net(skb->dev);
+	dst = skb_dst(skb);
+	if (dst != NULL && dst->dev != NULL)
+		return dev_net(dst->dev);
+#endif
+	return &init_net;
+}
+
+static bool tee_tg_route_oif(struct flowi *f, struct net *net,
+			     const struct xt_tee_tginfo *info)
+{
+	const struct net_device *dev;
+
+	if (*info->oif != '\0')
+		return true;
+	dev = dev_get_by_name(net, info->oif);
+	if (dev == NULL)
+		return false;
+	f->oif = dev->ifindex;
+	return true;
+}
+
+static bool
+tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct net *net = pick_net(skb);
+	struct rtable *rt;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	if (!tee_tg_route_oif(&fl, net, info))
+		return false;
+	fl.nl_u.ip4_u.daddr = info->gw.ip;
+	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
+	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
+	if (ip_route_output_key(net, &rt, &fl) != 0)
+		return false;
+
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, &rt->u.dst);
+	skb->dev      = rt->u.dst.dev;
+	skb->protocol = htons(ETH_P_IP);
+	return true;
+}
+
+static unsigned int
+tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+	struct iphdr *iph;
+
+	/*
+	 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
+	 * the original skb, which should continue on its way as if nothing has
+	 * happened. The copy should be independently delivered to the TEE
+	 * --gateway.
+	 */
+	skb = pskb_copy(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		return XT_CONTINUE;
+
+#ifdef WITH_CONNTRACK
+	/* Avoid counting cloned packets towards the original connection. */
+	nf_conntrack_put(skb->nfct);
+	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+#endif
+	/*
+	 * If we are in PREROUTING/INPUT, the checksum must be recalculated
+	 * since the length could have changed as a result of defragmentation.
+	 *
+	 * We also decrease the TTL to mitigate potential TEE loops
+	 * between two hosts.
+	 *
+	 * Set %IP_DF so that the original source is notified of a potentially
+	 * decreased MTU on the clone route. IPv6 does this too.
+	 */
+	iph = ip_hdr(skb);
+	iph->frag_off |= htons(IP_DF);
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_IN)
+		--iph->ttl;
+	ip_send_check(iph);
+
+	/*
+	 * Xtables is not reentrant currently, so a choice has to be made:
+	 * 1. return absolute verdict for the original and let the cloned
+	 *    packet travel through the chains
+	 * 2. let the original continue travelling and not pass the clone
+	 *    to Xtables.
+	 * #2 is chosen. Normally, we would use ip_local_out for the clone.
+	 * Because iph->check is already correct and we don't pass it to
+	 * Xtables anyway, a shortcut to dst_output [forwards to ip_output] can
+	 * be taken. %IPSKB_REROUTED needs to be set so that ip_output does not
+	 * invoke POSTROUTING on the cloned packet.
+	 */
+	IPCB(skb)->flags |= IPSKB_REROUTED;
+	if (tee_tg_route4(skb, info))
+		ip_output(skb);
+	else
+		kfree_skb(skb);
+
+	return XT_CONTINUE;
+}
+
+#ifdef WITH_IPV6
+static bool
+tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct net *net = pick_net(skb);
+	struct dst_entry *dst;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	if (!tee_tg_route_oif(&fl, net, info))
+		return false;
+	fl.nl_u.ip6_u.daddr = info->gw.in6;
+	fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
+				  (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
+	dst = ip6_route_output(net, NULL, &fl);
+	if (dst == NULL)
+		return false;
+
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, dst);
+	skb->dev      = dst->dev;
+	skb->protocol = htons(ETH_P_IPV6);
+	return true;
+}
+
+static unsigned int
+tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+
+	skb = pskb_copy(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		return XT_CONTINUE;
+
+#ifdef WITH_CONNTRACK
+	nf_conntrack_put(skb->nfct);
+	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+#endif
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_IN) {
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+		--iph->hop_limit;
+	}
+	IP6CB(skb)->flags |= IP6SKB_REROUTED;
+	if (tee_tg_route6(skb, info))
+		ip6_output(skb);
+	else
+		kfree_skb(skb);
+
+	return XT_CONTINUE;
+}
+#endif /* WITH_IPV6 */
+
+static int tee_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+
+	if (info->oif[sizeof(info->oif)-1] != '\0')
+		return -EINVAL;
+	/* 0.0.0.0 and :: not allowed */
+	return (memcmp(&info->gw, &tee_zero_address,
+	       sizeof(tee_zero_address)) == 0) ? -EINVAL : 0;
+}
+
+static struct xt_target tee_tg_reg[] __read_mostly = {
+	{
+		.name       = "TEE",
+		.revision   = 1,
+		.family     = NFPROTO_IPV4,
+		.target     = tee_tg4,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.checkentry = tee_tg_check,
+		.me         = THIS_MODULE,
+	},
+#ifdef WITH_IPV6
+	{
+		.name       = "TEE",
+		.revision   = 1,
+		.family     = NFPROTO_IPV6,
+		.target     = tee_tg6,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.checkentry = tee_tg_check,
+		.me         = THIS_MODULE,
+	},
+#endif
+};
+
+static int __init tee_tg_init(void)
+{
+	return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+static void __exit tee_tg_exit(void)
+{
+	xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+module_init(tee_tg_init);
+module_exit(tee_tg_exit);
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: Reroute packet copy");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TEE");
+MODULE_ALIAS("ip6t_TEE");
-- 
cgit v1.2.3-59-g8ed1b


From f3c5c1bfd430858d3a05436f82c51e53104feb6b Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 19 Apr 2010 16:05:10 +0200
Subject: netfilter: xtables: make ip_tables reentrant

Currently, the table traverser stores return addresses in the ruleset
itself (struct ip6t_entry->comefrom). This has a well-known drawback:
the jumpstack is overwritten on reentry, making it necessary for
targets to return absolute verdicts. Also, the ruleset (which might
be heavy memory-wise) needs to be replicated for each CPU that can
possibly invoke ip6t_do_table.

This patch decouples the jumpstack from struct ip6t_entry and instead
puts it into xt_table_info. Not being restricted by 'comefrom'
anymore, we can set up a stack as needed. By default, there is room
allocated for two entries into the traverser.

arp_tables is not touched though, because there is just one/two
modules and further patches seek to collapse the table traverser
anyhow.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h |  7 ++++
 net/ipv4/netfilter/arp_tables.c    |  6 ++-
 net/ipv4/netfilter/ip_tables.c     | 65 +++++++++++++++++---------------
 net/ipv6/netfilter/ip6_tables.c    | 56 +++++++++++----------------
 net/netfilter/x_tables.c           | 77 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 145 insertions(+), 66 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 26ced0c323a5..50c867256ca3 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -401,6 +401,13 @@ struct xt_table_info {
 	unsigned int hook_entry[NF_INET_NUMHOOKS];
 	unsigned int underflow[NF_INET_NUMHOOKS];
 
+	/*
+	 * Number of user chains. Since tables cannot have loops, at most
+	 * @stacksize jumps (number of user chains) can possibly be made.
+	 */
+	unsigned int stacksize;
+	unsigned int *stackptr;
+	void ***jumpstack;
 	/* ipt_entry tables: one per CPU */
 	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
 	void *entries[1];
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8e363d90365..07a699059390 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -649,6 +649,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			break;
 		++i;
+		if (strcmp(arpt_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
 	if (ret != 0)
@@ -1774,8 +1777,7 @@ struct xt_table *arpt_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 18c5b1573f3e..70900ecf88e2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -321,8 +321,6 @@ ipt_do_table(struct sk_buff *skb,
 	     const struct net_device *out,
 	     struct xt_table *table)
 {
-#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
-
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct iphdr *ip;
 	bool hotdrop = false;
@@ -330,7 +328,8 @@ ipt_do_table(struct sk_buff *skb,
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
 	const void *table_base;
-	struct ipt_entry *e, *back;
+	struct ipt_entry *e, **jumpstack;
+	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
@@ -356,19 +355,23 @@ ipt_do_table(struct sk_buff *skb,
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 	xt_info_rdlock_bh();
 	private = table->private;
-	table_base = private->entries[smp_processor_id()];
+	cpu        = smp_processor_id();
+	table_base = private->entries[cpu];
+	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
+	stackptr   = &private->stackptr[cpu];
+	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	/* For return from builtin chain */
-	back = get_entry(table_base, private->underflow[hook]);
+	pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
+		 table->name, hook, origptr,
+		 get_entry(table_base, private->underflow[hook]));
 
 	do {
 		const struct ipt_entry_target *t;
 		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
-		IP_NF_ASSERT(back);
 		if (!ip_packet_match(ip, indev, outdev,
 		    &e->ip, mtpar.fragoff)) {
  no_match:
@@ -403,17 +406,28 @@ ipt_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				e = back;
-				back = get_entry(table_base, back->comefrom);
+				if (*stackptr == 0) {
+					e = get_entry(table_base,
+					    private->underflow[hook]);
+					pr_devel("Underflow (this is normal) "
+						 "to %p\n", e);
+				} else {
+					e = jumpstack[--*stackptr];
+					pr_devel("Pulled %p out from pos %u\n",
+						 e, *stackptr);
+					e = ipt_next_entry(e);
+				}
 				continue;
 			}
 			if (table_base + v != ipt_next_entry(e) &&
 			    !(e->ip.flags & IPT_F_GOTO)) {
-				/* Save old back ptr in next entry */
-				struct ipt_entry *next = ipt_next_entry(e);
-				next->comefrom = (void *)back - table_base;
-				/* set back pointer to next entry */
-				back = next;
+				if (*stackptr >= private->stacksize) {
+					verdict = NF_DROP;
+					break;
+				}
+				jumpstack[(*stackptr)++] = e;
+				pr_devel("Pushed %p into pos %u\n",
+					 e, *stackptr - 1);
 			}
 
 			e = get_entry(table_base, v);
@@ -426,18 +440,7 @@ ipt_do_table(struct sk_buff *skb,
 		tgpar.targinfo = t->data;
 
 
-#ifdef CONFIG_NETFILTER_DEBUG
-		tb_comefrom = 0xeeeeeeec;
-#endif
 		verdict = t->u.kernel.target->target(skb, &tgpar);
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
-			printk("Target %s reentered!\n",
-			       t->u.kernel.target->name);
-			verdict = NF_DROP;
-		}
-		tb_comefrom = 0x57acc001;
-#endif
 		/* Target might have changed stuff. */
 		ip = ip_hdr(skb);
 		if (verdict == IPT_CONTINUE)
@@ -447,7 +450,9 @@ ipt_do_table(struct sk_buff *skb,
 			break;
 	} while (!hotdrop);
 	xt_info_rdunlock_bh();
-
+	pr_devel("Exiting %s; resetting sp from %u to %u\n",
+		 __func__, *stackptr, origptr);
+	*stackptr = origptr;
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
@@ -455,8 +460,6 @@ ipt_do_table(struct sk_buff *skb,
 		return NF_DROP;
 	else return verdict;
 #endif
-
-#undef tb_comefrom
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -838,6 +841,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			return ret;
 		++i;
+		if (strcmp(ipt_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 
 	if (i != repl->num_entries) {
@@ -2086,8 +2092,7 @@ struct xt_table *ipt_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f2b815e72329..2a2770bcd640 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -351,15 +351,14 @@ ip6t_do_table(struct sk_buff *skb,
 	      const struct net_device *out,
 	      struct xt_table *table)
 {
-#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
-
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
 	const void *table_base;
-	struct ip6t_entry *e, *back;
+	struct ip6t_entry *e, **jumpstack;
+	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
@@ -383,19 +382,19 @@ ip6t_do_table(struct sk_buff *skb,
 
 	xt_info_rdlock_bh();
 	private = table->private;
-	table_base = private->entries[smp_processor_id()];
+	cpu        = smp_processor_id();
+	table_base = private->entries[cpu];
+	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
+	stackptr   = &private->stackptr[cpu];
+	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	/* For return from builtin chain */
-	back = get_entry(table_base, private->underflow[hook]);
-
 	do {
 		const struct ip6t_entry_target *t;
 		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
-		IP_NF_ASSERT(back);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
 		    &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
  no_match:
@@ -432,17 +431,20 @@ ip6t_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				e = back;
-				back = get_entry(table_base, back->comefrom);
+				if (*stackptr == 0)
+					e = get_entry(table_base,
+					    private->underflow[hook]);
+				else
+					e = ip6t_next_entry(jumpstack[--*stackptr]);
 				continue;
 			}
 			if (table_base + v != ip6t_next_entry(e) &&
 			    !(e->ipv6.flags & IP6T_F_GOTO)) {
-				/* Save old back ptr in next entry */
-				struct ip6t_entry *next = ip6t_next_entry(e);
-				next->comefrom = (void *)back - table_base;
-				/* set back pointer to next entry */
-				back = next;
+				if (*stackptr >= private->stacksize) {
+					verdict = NF_DROP;
+					break;
+				}
+				jumpstack[(*stackptr)++] = e;
 			}
 
 			e = get_entry(table_base, v);
@@ -454,19 +456,7 @@ ip6t_do_table(struct sk_buff *skb,
 		tgpar.target   = t->u.kernel.target;
 		tgpar.targinfo = t->data;
 
-#ifdef CONFIG_NETFILTER_DEBUG
-		tb_comefrom = 0xeeeeeeec;
-#endif
 		verdict = t->u.kernel.target->target(skb, &tgpar);
-
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
-			printk("Target %s reentered!\n",
-			       t->u.kernel.target->name);
-			verdict = NF_DROP;
-		}
-		tb_comefrom = 0x57acc001;
-#endif
 		if (verdict == IP6T_CONTINUE)
 			e = ip6t_next_entry(e);
 		else
@@ -474,10 +464,8 @@ ip6t_do_table(struct sk_buff *skb,
 			break;
 	} while (!hotdrop);
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	tb_comefrom = NETFILTER_LINK_POISON;
-#endif
 	xt_info_rdunlock_bh();
+	*stackptr = origptr;
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -486,8 +474,6 @@ ip6t_do_table(struct sk_buff *skb,
 		return NF_DROP;
 	else return verdict;
 #endif
-
-#undef tb_comefrom
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -869,6 +855,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			return ret;
 		++i;
+		if (strcmp(ip6t_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 
 	if (i != repl->num_entries) {
@@ -2120,8 +2109,7 @@ struct xt_table *ip6t_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8e23d8f68459..edde5c602890 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -62,6 +62,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
 	[NFPROTO_IPV6]   = "ip6",
 };
 
+/* Allow this many total (re)entries. */
+static const unsigned int xt_jumpstack_multiplier = 2;
+
 /* Registration hooks for targets. */
 int
 xt_register_target(struct xt_target *target)
@@ -680,6 +683,26 @@ void xt_free_table_info(struct xt_table_info *info)
 		else
 			vfree(info->entries[cpu]);
 	}
+
+	if (info->jumpstack != NULL) {
+		if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
+			for_each_possible_cpu(cpu)
+				vfree(info->jumpstack[cpu]);
+		} else {
+			for_each_possible_cpu(cpu)
+				kfree(info->jumpstack[cpu]);
+		}
+	}
+
+	if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
+		vfree(info->jumpstack);
+	else
+		kfree(info->jumpstack);
+	if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
+		vfree(info->stackptr);
+	else
+		kfree(info->stackptr);
+
 	kfree(info);
 }
 EXPORT_SYMBOL(xt_free_table_info);
@@ -724,6 +747,49 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
 DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
 EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
 
+static int xt_jumpstack_alloc(struct xt_table_info *i)
+{
+	unsigned int size;
+	int cpu;
+
+	size = sizeof(unsigned int) * nr_cpu_ids;
+	if (size > PAGE_SIZE)
+		i->stackptr = vmalloc(size);
+	else
+		i->stackptr = kmalloc(size, GFP_KERNEL);
+	if (i->stackptr == NULL)
+		return -ENOMEM;
+	memset(i->stackptr, 0, size);
+
+	size = sizeof(void **) * nr_cpu_ids;
+	if (size > PAGE_SIZE)
+		i->jumpstack = vmalloc(size);
+	else
+		i->jumpstack = kmalloc(size, GFP_KERNEL);
+	if (i->jumpstack == NULL)
+		return -ENOMEM;
+	memset(i->jumpstack, 0, size);
+
+	i->stacksize *= xt_jumpstack_multiplier;
+	size = sizeof(void *) * i->stacksize;
+	for_each_possible_cpu(cpu) {
+		if (size > PAGE_SIZE)
+			i->jumpstack[cpu] = vmalloc_node(size,
+				cpu_to_node(cpu));
+		else
+			i->jumpstack[cpu] = kmalloc_node(size,
+				GFP_KERNEL, cpu_to_node(cpu));
+		if (i->jumpstack[cpu] == NULL)
+			/*
+			 * Freeing will be done later on by the callers. The
+			 * chain is: xt_replace_table -> __do_replace ->
+			 * do_replace -> xt_free_table_info.
+			 */
+			return -ENOMEM;
+	}
+
+	return 0;
+}
 
 struct xt_table_info *
 xt_replace_table(struct xt_table *table,
@@ -732,6 +798,7 @@ xt_replace_table(struct xt_table *table,
 	      int *error)
 {
 	struct xt_table_info *private;
+	int ret;
 
 	/* Do the substitution. */
 	local_bh_disable();
@@ -746,6 +813,12 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0) {
+		*error = ret;
+		return NULL;
+	}
+
 	table->private = newinfo;
 	newinfo->initial_entries = private->initial_entries;
 
@@ -770,6 +843,10 @@ struct xt_table *xt_register_table(struct net *net,
 	struct xt_table_info *private;
 	struct xt_table *t, *table;
 
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
 	/* Don't add one object to multiple lists. */
 	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
 	if (!table) {
-- 
cgit v1.2.3-59-g8ed1b


From cd58bcd9787ef4c16ab6e442c4f1bf3539b3ab39 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 19 Apr 2010 16:06:52 +0200
Subject: netfilter: xt_TEE: have cloned packet travel through Xtables too

Since Xtables is now reentrant/nestable, the cloned packet can also go
through Xtables and be subject to rules itself.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/ip_output.c   |  1 -
 net/ipv6/ip6_output.c  |  1 -
 net/netfilter/xt_TEE.c | 40 ++++++++++++++++++----------------------
 3 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 0abfddec1e26..f09135e1e14f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -309,7 +309,6 @@ int ip_output(struct sk_buff *skb)
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
-EXPORT_SYMBOL_GPL(ip_output);
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d09be7ff8735..c10a38a71a5e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -176,7 +176,6 @@ int ip6_output(struct sk_buff *skb)
 			    ip6_finish_output,
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
-EXPORT_SYMBOL_GPL(ip6_output);
 
 /*
  *	xmit an sk_buff (used by TCP)
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index b3d730163f12..842e7012eca7 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -12,6 +12,7 @@
  */
 #include <linux/ip.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
 #include <linux/route.h>
 #include <linux/skbuff.h>
 #include <net/checksum.h>
@@ -32,6 +33,7 @@
 #endif
 
 static const union nf_inet_addr tee_zero_address;
+static DEFINE_PER_CPU(bool, tee_active);
 
 static struct net *pick_net(struct sk_buff *skb)
 {
@@ -91,6 +93,8 @@ tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 	const struct xt_tee_tginfo *info = par->targinfo;
 	struct iphdr *iph;
 
+	if (percpu_read(tee_active))
+		return XT_CONTINUE;
 	/*
 	 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
 	 * the original skb, which should continue on its way as if nothing has
@@ -125,24 +129,13 @@ tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 		--iph->ttl;
 	ip_send_check(iph);
 
-	/*
-	 * Xtables is not reentrant currently, so a choice has to be made:
-	 * 1. return absolute verdict for the original and let the cloned
-	 *    packet travel through the chains
-	 * 2. let the original continue travelling and not pass the clone
-	 *    to Xtables.
-	 * #2 is chosen. Normally, we would use ip_local_out for the clone.
-	 * Because iph->check is already correct and we don't pass it to
-	 * Xtables anyway, a shortcut to dst_output [forwards to ip_output] can
-	 * be taken. %IPSKB_REROUTED needs to be set so that ip_output does not
-	 * invoke POSTROUTING on the cloned packet.
-	 */
-	IPCB(skb)->flags |= IPSKB_REROUTED;
-	if (tee_tg_route4(skb, info))
-		ip_output(skb);
-	else
+	if (tee_tg_route4(skb, info)) {
+		percpu_write(tee_active, true);
+		ip_local_out(skb);
+		percpu_write(tee_active, false);
+	} else {
 		kfree_skb(skb);
-
+	}
 	return XT_CONTINUE;
 }
 
@@ -177,6 +170,8 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
 
+	if (percpu_read(tee_active))
+		return XT_CONTINUE;
 	skb = pskb_copy(skb, GFP_ATOMIC);
 	if (skb == NULL)
 		return XT_CONTINUE;
@@ -192,12 +187,13 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 		struct ipv6hdr *iph = ipv6_hdr(skb);
 		--iph->hop_limit;
 	}
-	IP6CB(skb)->flags |= IP6SKB_REROUTED;
-	if (tee_tg_route6(skb, info))
-		ip6_output(skb);
-	else
+	if (tee_tg_route6(skb, info)) {
+		percpu_write(tee_active, true);
+		ip6_local_out(skb);
+		percpu_write(tee_active, false);
+	} else {
 		kfree_skb(skb);
-
+	}
 	return XT_CONTINUE;
 }
 #endif /* WITH_IPV6 */
-- 
cgit v1.2.3-59-g8ed1b


From 5b775eb1c04c2ef33f5e17035e368214214ef9c2 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 19 Apr 2010 16:07:47 +0200
Subject: netfilter: xtables: remove old comments about reentrancy

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c   | 2 --
 net/ipv4/netfilter/ipt_REJECT.c  | 3 ---
 net/ipv6/netfilter/ip6_tables.c  | 2 --
 net/ipv6/netfilter/ip6t_REJECT.c | 3 ---
 4 files changed, 10 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 70900ecf88e2..bb5e0d9b8137 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -434,8 +434,6 @@ ipt_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		/* Targets which reenter must return
-		   abs. verdicts */
 		tgpar.target   = t->u.kernel.target;
 		tgpar.targinfo = t->data;
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index b026014e7a5b..038fa0bb8f6b 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -139,9 +139,6 @@ reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
 {
 	const struct ipt_reject_info *reject = par->targinfo;
 
-	/* WARNING: This code causes reentry within iptables.
-	   This means that the iptables jump stack is now crap.  We
-	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IPT_ICMP_NET_UNREACHABLE:
 		send_unreach(skb, ICMP_NET_UNREACH);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 2a2770bcd640..7afa11773164 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -451,8 +451,6 @@ ip6t_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		/* Targets which reenter must return
-		   abs. verdicts */
 		tgpar.target   = t->u.kernel.target;
 		tgpar.targinfo = t->data;
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 55b9b2da1340..dad97622ed72 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -179,9 +179,6 @@ reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
 
 	pr_debug("%s: medium point\n", __func__);
-	/* WARNING: This code causes reentry within ip6tables.
-	   This means that the ip6tables jump stack is now crap.  We
-	   must return an absolute verdict. --RR */
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
 		send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum);
-- 
cgit v1.2.3-59-g8ed1b


From 88751275b8e867d756e4f86ae92afe0232de129f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Apr 2010 05:07:33 +0000
Subject: rps: shortcut net_rps_action()

net_rps_action() is a bit expensive on NR_CPUS=64..4096 kernels, even if
RPS is not active.

Tom Herbert used two bitmasks to hold information needed to send IPI,
but a single LIFO list seems more appropriate.

Move all RPS logic into net_rps_action() to cleanup net_rx_action() code
(remove two ifdefs)

Move rps_remote_softirq_cpus into softnet_data to share its first cache
line, filling an existing hole.

In a future patch, we could call net_rps_action() from process_backlog()
to make sure we send IPI before handling this cpu backlog.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  9 ++++--
 net/core/dev.c            | 79 +++++++++++++++++++----------------------------
 2 files changed, 38 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 649a0252686e..83ab3da149ad 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1381,17 +1381,20 @@ static inline int unregister_gifconf(unsigned int family)
 }
 
 /*
- * Incoming packets are placed on per-cpu queues so that
- * no locking is needed.
+ * Incoming packets are placed on per-cpu queues
  */
 struct softnet_data {
 	struct Qdisc		*output_queue;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
 
-	/* Elements below can be accessed between CPUs for RPS */
 #ifdef CONFIG_RPS
+	struct softnet_data	*rps_ipi_list;
+
+	/* Elements below can be accessed between CPUs for RPS */
 	struct call_single_data	csd ____cacheline_aligned_in_smp;
+	struct softnet_data	*rps_ipi_next;
+	unsigned int		cpu;
 	unsigned int		input_queue_head;
 #endif
 	struct sk_buff_head	input_pkt_queue;
diff --git a/net/core/dev.c b/net/core/dev.c
index 8eb50e2292fb..05a2b294906b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2345,21 +2345,6 @@ done:
 	return cpu;
 }
 
-/*
- * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
- * to be sent to kick remote softirq processing.  There are two masks since
- * the sending of IPIs must be done with interrupts enabled.  The select field
- * indicates the current mask that enqueue_backlog uses to schedule IPIs.
- * select is flipped before net_rps_action is called while still under lock,
- * net_rps_action then uses the non-selected mask to send the IPIs and clears
- * it without conflicting with enqueue_backlog operation.
- */
-struct rps_remote_softirq_cpus {
-	cpumask_t mask[2];
-	int select;
-};
-static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
-
 /* Called from hardirq (IPI) context */
 static void trigger_softirq(void *data)
 {
@@ -2402,10 +2387,12 @@ enqueue:
 		if (napi_schedule_prep(&queue->backlog)) {
 #ifdef CONFIG_RPS
 			if (cpu != smp_processor_id()) {
-				struct rps_remote_softirq_cpus *rcpus =
-				    &__get_cpu_var(rps_remote_softirq_cpus);
+				struct softnet_data *myqueue;
+
+				myqueue = &__get_cpu_var(softnet_data);
+				queue->rps_ipi_next = myqueue->rps_ipi_list;
+				myqueue->rps_ipi_list = queue;
 
-				cpu_set(cpu, rcpus->mask[rcpus->select]);
 				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 				goto enqueue;
 			}
@@ -2910,7 +2897,9 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
-/* Network device is going away, flush any packets still pending  */
+/* Network device is going away, flush any packets still pending
+ * Called with irqs disabled.
+ */
 static void flush_backlog(void *arg)
 {
 	struct net_device *dev = arg;
@@ -3338,24 +3327,33 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
-#ifdef CONFIG_RPS
 /*
- * net_rps_action sends any pending IPI's for rps.  This is only called from
- * softirq and interrupts must be enabled.
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
  */
-static void net_rps_action(cpumask_t *mask)
+static void net_rps_action(void)
 {
-	int cpu;
+#ifdef CONFIG_RPS
+	struct softnet_data *locqueue = &__get_cpu_var(softnet_data);
+	struct softnet_data *remqueue = locqueue->rps_ipi_list;
 
-	/* Send pending IPI's to kick RPS processing on remote cpus. */
-	for_each_cpu_mask_nr(cpu, *mask) {
-		struct softnet_data *queue = &per_cpu(softnet_data, cpu);
-		if (cpu_online(cpu))
-			__smp_call_function_single(cpu, &queue->csd, 0);
-	}
-	cpus_clear(*mask);
-}
+	if (remqueue) {
+		locqueue->rps_ipi_list = NULL;
+
+		local_irq_enable();
+
+		/* Send pending IPI's to kick RPS processing on remote cpus. */
+		while (remqueue) {
+			struct softnet_data *next = remqueue->rps_ipi_next;
+			if (cpu_online(remqueue->cpu))
+				__smp_call_function_single(remqueue->cpu,
+							   &remqueue->csd, 0);
+			remqueue = next;
+		}
+	} else
 #endif
+		local_irq_enable();
+}
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3363,10 +3361,6 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
-#ifdef CONFIG_RPS
-	int select;
-	struct rps_remote_softirq_cpus *rcpus;
-#endif
 
 	local_irq_disable();
 
@@ -3429,17 +3423,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
-#ifdef CONFIG_RPS
-	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
-	select = rcpus->select;
-	rcpus->select ^= 1;
-
-	local_irq_enable();
-
-	net_rps_action(&rcpus->mask[select]);
-#else
-	local_irq_enable();
-#endif
+	net_rps_action();
 
 #ifdef CONFIG_NET_DMA
 	/*
@@ -5839,6 +5823,7 @@ static int __init net_dev_init(void)
 		queue->csd.func = trigger_softirq;
 		queue->csd.info = queue;
 		queue->csd.flags = 0;
+		queue->cpu = i;
 #endif
 
 		queue->backlog.poll = process_backlog;
-- 
cgit v1.2.3-59-g8ed1b


From 2aab4c273ad837fbcf2955aee32b9ec4706c2521 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 19 Apr 2010 11:00:24 +0200
Subject: mac80211: fix stopping RX BA session from timer

Kalle reported that his system deadlocks since my
recent work in this area. The reason quickly became
apparent: we try to cancel_timer_sync() a timer
from within itself. Fix that by making the function
aware of the context it is called from.

Reported-by: Kalle Valo <kvalo@adurom.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Tested-by: Kalle Valo <kvalo@adurom.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-rx.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 53233ab50f65..1771dd9bd137 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -18,8 +18,9 @@
 #include "ieee80211_i.h"
 #include "driver-ops.h"
 
-void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
-				    u16 initiator, u16 reason)
+static void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
+					    u16 initiator, u16 reason,
+					    bool from_timer)
 {
 	struct ieee80211_local *local = sta->local;
 	struct tid_ampdu_rx *tid_rx;
@@ -69,10 +70,17 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 
 	spin_unlock_bh(&sta->lock);
 
-	del_timer_sync(&tid_rx->session_timer);
+	if (!from_timer)
+		del_timer_sync(&tid_rx->session_timer);
 	kfree(tid_rx);
 }
 
+void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
+				    u16 initiator, u16 reason)
+{
+	___ieee80211_stop_rx_ba_session(sta, tid, initiator, reason, false);
+}
+
 /*
  * After accepting the AddBA Request we activated a timer,
  * resetting it after each frame that arrives from the originator.
@@ -91,8 +99,8 @@ static void sta_rx_agg_session_timer_expired(unsigned long data)
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid);
 #endif
-	__ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT,
-				       WLAN_REASON_QSTA_TIMEOUT);
+	___ieee80211_stop_rx_ba_session(sta, *ptid, WLAN_BACK_RECIPIENT,
+					WLAN_REASON_QSTA_TIMEOUT, true);
 }
 
 static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid,
-- 
cgit v1.2.3-59-g8ed1b


From 3393a608c4979a94d1887efc05b792849d361a65 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Mon, 19 Apr 2010 10:12:52 +0300
Subject: mac80211: Prevent running sta_cleanup timer unnecessarily
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The sta_cleanup timer is used to periodically expire buffered frames from the
tx buf. The timer is executing periodically, regardless of the need for it.
This is wasting resources.

Fix this simply by not restarting the sta_cleanup timer if the tx buffer was
empty. Restart the timer when there is some more tx-traffic.

Cc: Janne Ylälehto <janne.ylalehto@nokia.com>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c | 13 ++++++++++---
 net/mac80211/tx.c       |  7 +++++++
 2 files changed, 17 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index ff0eb948917b..3de7a2260d65 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -575,7 +575,7 @@ static int sta_info_buffer_expired(struct sta_info *sta,
 }
 
 
-static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
+static bool sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 					     struct sta_info *sta)
 {
 	unsigned long flags;
@@ -583,7 +583,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 	struct ieee80211_sub_if_data *sdata;
 
 	if (skb_queue_empty(&sta->ps_tx_buf))
-		return;
+		return false;
 
 	for (;;) {
 		spin_lock_irqsave(&sta->ps_tx_buf.lock, flags);
@@ -608,6 +608,8 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local,
 		if (skb_queue_empty(&sta->ps_tx_buf))
 			sta_info_clear_tim_bit(sta);
 	}
+
+	return true;
 }
 
 static int __must_check __sta_info_destroy(struct sta_info *sta)
@@ -755,15 +757,20 @@ static void sta_info_cleanup(unsigned long data)
 {
 	struct ieee80211_local *local = (struct ieee80211_local *) data;
 	struct sta_info *sta;
+	bool timer_needed = false;
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(sta, &local->sta_list, list)
-		sta_info_cleanup_expire_buffered(local, sta);
+		if (sta_info_cleanup_expire_buffered(local, sta))
+			timer_needed = true;
 	rcu_read_unlock();
 
 	if (local->quiescing)
 		return;
 
+	if (!timer_needed)
+		return;
+
 	local->sta_cleanup.expires =
 		round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL);
 	add_timer(&local->sta_cleanup);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2cb77267f733..e2aa972d584f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -429,6 +429,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 	struct sta_info *sta = tx->sta;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
+	struct ieee80211_local *local = tx->local;
 	u32 staflags;
 
 	if (unlikely(!sta ||
@@ -476,6 +477,12 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
 		info->control.vif = &tx->sdata->vif;
 		info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
 		skb_queue_tail(&sta->ps_tx_buf, tx->skb);
+
+		if (!timer_pending(&local->sta_cleanup))
+			mod_timer(&local->sta_cleanup,
+				  round_jiffies(jiffies +
+						STA_INFO_CLEANUP_INTERVAL));
+
 		return TX_QUEUED;
 	}
 #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG
-- 
cgit v1.2.3-59-g8ed1b


From 67e0f392779e35a96c43bc240ef5d30a701d153e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 19 Apr 2010 11:03:13 +0200
Subject: mac80211: add missing newline

One HT debugging printk is missing a newline,
add it.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/agg-tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 6bb4d0a1e5c5..7dfe833d9716 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -186,7 +186,7 @@ static void sta_addba_resp_timer_expired(unsigned long data)
 		*state = HT_AGG_STATE_IDLE;
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		printk(KERN_DEBUG "timer expired on tid %d but we are not "
-				"(or no longer) expecting addBA response there",
+				"(or no longer) expecting addBA response there\n",
 			tid);
 #endif
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From f5acb907dc24c3822f408211bad1cd6e5d0433cf Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Apr 2010 14:40:57 -0700
Subject: rps: static functions

store_rps_map() & store_rps_dev_flow_table_cnt() are static.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 143052a22b9b..c57c4b228bb5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -550,7 +550,7 @@ static void rps_map_release(struct rcu_head *rcu)
 	kfree(map);
 }
 
-ssize_t store_rps_map(struct netdev_rx_queue *queue,
+static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 		      struct rx_queue_attribute *attribute,
 		      const char *buf, size_t len)
 {
@@ -635,7 +635,7 @@ static void rps_dev_flow_table_release(struct rcu_head *rcu)
 	schedule_work(&table->free_work);
 }
 
-ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 				     struct rx_queue_attribute *attr,
 				     const char *buf, size_t len)
 {
-- 
cgit v1.2.3-59-g8ed1b


From e36fa2f7e92f25aab2e3d787dcfe3590817f19d3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Apr 2010 21:17:14 +0000
Subject: rps: cleanups

struct softnet_data holds many queues, so consistent use "sd" name
instead of "queue" is better.

Adds a rps_ipi_queued() helper to cleanup enqueue_to_backlog()

Adds a _and_irq_disable suffix to net_rps_action() name, as David
suggested.

incr_input_queue_head() becomes input_queue_head_incr()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   4 +-
 net/core/dev.c            | 149 +++++++++++++++++++++++++---------------------
 2 files changed, 82 insertions(+), 71 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 83ab3da149ad..3c5ed5f5274e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1401,10 +1401,10 @@ struct softnet_data {
 	struct napi_struct	backlog;
 };
 
-static inline void incr_input_queue_head(struct softnet_data *queue)
+static inline void input_queue_head_incr(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	queue->input_queue_head++;
+	sd->input_queue_head++;
 #endif
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 05a2b294906b..7f5755b0a57c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -208,17 +208,17 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
 
-static inline void rps_lock(struct softnet_data *queue)
+static inline void rps_lock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	spin_lock(&queue->input_pkt_queue.lock);
+	spin_lock(&sd->input_pkt_queue.lock);
 #endif
 }
 
-static inline void rps_unlock(struct softnet_data *queue)
+static inline void rps_unlock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	spin_unlock(&queue->input_pkt_queue.lock);
+	spin_unlock(&sd->input_pkt_queue.lock);
 #endif
 }
 
@@ -2346,14 +2346,37 @@ done:
 }
 
 /* Called from hardirq (IPI) context */
-static void trigger_softirq(void *data)
+static void rps_trigger_softirq(void *data)
 {
-	struct softnet_data *queue = data;
-	__napi_schedule(&queue->backlog);
+	struct softnet_data *sd = data;
+
+	__napi_schedule(&sd->backlog);
 	__get_cpu_var(netdev_rx_stat).received_rps++;
 }
+
 #endif /* CONFIG_RPS */
 
+/*
+ * Check if this softnet_data structure is another cpu one
+ * If yes, queue it to our IPI list and return 1
+ * If no, return 0
+ */
+static int rps_ipi_queued(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
+
+	if (sd != mysd) {
+		sd->rps_ipi_next = mysd->rps_ipi_list;
+		mysd->rps_ipi_list = sd;
+
+		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+		return 1;
+	}
+#endif /* CONFIG_RPS */
+	return 0;
+}
+
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
  * queue (may be a remote CPU queue).
@@ -2361,48 +2384,36 @@ static void trigger_softirq(void *data)
 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 			      unsigned int *qtail)
 {
-	struct softnet_data *queue;
+	struct softnet_data *sd;
 	unsigned long flags;
 
-	queue = &per_cpu(softnet_data, cpu);
+	sd = &per_cpu(softnet_data, cpu);
 
 	local_irq_save(flags);
 	__get_cpu_var(netdev_rx_stat).total++;
 
-	rps_lock(queue);
-	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (queue->input_pkt_queue.qlen) {
+	rps_lock(sd);
+	if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
+		if (sd->input_pkt_queue.qlen) {
 enqueue:
-			__skb_queue_tail(&queue->input_pkt_queue, skb);
+			__skb_queue_tail(&sd->input_pkt_queue, skb);
 #ifdef CONFIG_RPS
-			*qtail = queue->input_queue_head +
-			    queue->input_pkt_queue.qlen;
+			*qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
 #endif
-			rps_unlock(queue);
+			rps_unlock(sd);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
 		}
 
 		/* Schedule NAPI for backlog device */
-		if (napi_schedule_prep(&queue->backlog)) {
-#ifdef CONFIG_RPS
-			if (cpu != smp_processor_id()) {
-				struct softnet_data *myqueue;
-
-				myqueue = &__get_cpu_var(softnet_data);
-				queue->rps_ipi_next = myqueue->rps_ipi_list;
-				myqueue->rps_ipi_list = queue;
-
-				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-				goto enqueue;
-			}
-#endif
-			__napi_schedule(&queue->backlog);
+		if (napi_schedule_prep(&sd->backlog)) {
+			if (!rps_ipi_queued(sd))
+				__napi_schedule(&sd->backlog);
 		}
 		goto enqueue;
 	}
 
-	rps_unlock(queue);
+	rps_unlock(sd);
 
 	__get_cpu_var(netdev_rx_stat).dropped++;
 	local_irq_restore(flags);
@@ -2903,17 +2914,17 @@ EXPORT_SYMBOL(netif_receive_skb);
 static void flush_backlog(void *arg)
 {
 	struct net_device *dev = arg;
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	struct sk_buff *skb, *tmp;
 
-	rps_lock(queue);
-	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
+	rps_lock(sd);
+	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
 		if (skb->dev == dev) {
-			__skb_unlink(skb, &queue->input_pkt_queue);
+			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
-			incr_input_queue_head(queue);
+			input_queue_head_incr(sd);
 		}
-	rps_unlock(queue);
+	rps_unlock(sd);
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -3219,23 +3230,23 @@ EXPORT_SYMBOL(napi_gro_frags);
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 
 	napi->weight = weight_p;
 	do {
 		struct sk_buff *skb;
 
 		local_irq_disable();
-		rps_lock(queue);
-		skb = __skb_dequeue(&queue->input_pkt_queue);
+		rps_lock(sd);
+		skb = __skb_dequeue(&sd->input_pkt_queue);
 		if (!skb) {
 			__napi_complete(napi);
-			rps_unlock(queue);
+			rps_unlock(sd);
 			local_irq_enable();
 			break;
 		}
-		incr_input_queue_head(queue);
-		rps_unlock(queue);
+		input_queue_head_incr(sd);
+		rps_unlock(sd);
 		local_irq_enable();
 
 		__netif_receive_skb(skb);
@@ -3331,24 +3342,25 @@ EXPORT_SYMBOL(netif_napi_del);
  * net_rps_action sends any pending IPI's for rps.
  * Note: called with local irq disabled, but exits with local irq enabled.
  */
-static void net_rps_action(void)
+static void net_rps_action_and_irq_disable(void)
 {
 #ifdef CONFIG_RPS
-	struct softnet_data *locqueue = &__get_cpu_var(softnet_data);
-	struct softnet_data *remqueue = locqueue->rps_ipi_list;
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+	struct softnet_data *remsd = sd->rps_ipi_list;
 
-	if (remqueue) {
-		locqueue->rps_ipi_list = NULL;
+	if (remsd) {
+		sd->rps_ipi_list = NULL;
 
 		local_irq_enable();
 
 		/* Send pending IPI's to kick RPS processing on remote cpus. */
-		while (remqueue) {
-			struct softnet_data *next = remqueue->rps_ipi_next;
-			if (cpu_online(remqueue->cpu))
-				__smp_call_function_single(remqueue->cpu,
-							   &remqueue->csd, 0);
-			remqueue = next;
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
+
+			if (cpu_online(remsd->cpu))
+				__smp_call_function_single(remsd->cpu,
+							   &remsd->csd, 0);
+			remsd = next;
 		}
 	} else
 #endif
@@ -3423,7 +3435,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
-	net_rps_action();
+	net_rps_action_and_irq_disable();
 
 #ifdef CONFIG_NET_DMA
 	/*
@@ -5595,7 +5607,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
-		incr_input_queue_head(oldsd);
+		input_queue_head_incr(oldsd);
 	}
 
 	return NOTIFY_OK;
@@ -5812,24 +5824,23 @@ static int __init net_dev_init(void)
 	 */
 
 	for_each_possible_cpu(i) {
-		struct softnet_data *queue;
+		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
-		queue = &per_cpu(softnet_data, i);
-		skb_queue_head_init(&queue->input_pkt_queue);
-		queue->completion_queue = NULL;
-		INIT_LIST_HEAD(&queue->poll_list);
+		skb_queue_head_init(&sd->input_pkt_queue);
+		sd->completion_queue = NULL;
+		INIT_LIST_HEAD(&sd->poll_list);
 
 #ifdef CONFIG_RPS
-		queue->csd.func = trigger_softirq;
-		queue->csd.info = queue;
-		queue->csd.flags = 0;
-		queue->cpu = i;
+		sd->csd.func = rps_trigger_softirq;
+		sd->csd.info = sd;
+		sd->csd.flags = 0;
+		sd->cpu = i;
 #endif
 
-		queue->backlog.poll = process_backlog;
-		queue->backlog.weight = weight_p;
-		queue->backlog.gro_list = NULL;
-		queue->backlog.gro_count = 0;
+		sd->backlog.poll = process_backlog;
+		sd->backlog.weight = weight_p;
+		sd->backlog.gro_list = NULL;
+		sd->backlog.gro_count = 0;
 	}
 
 	dev_boot_phase = 0;
-- 
cgit v1.2.3-59-g8ed1b


From b249dcb82d327e419d3cb45773b146ebb5faf419 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Apr 2010 21:56:38 +0000
Subject: rps: consistent rxhash

In case we compute a software skb->rxhash, we can generate a consistent
hash : Its value will be the same in both flow directions.

This helps some workloads, like conntracking, since the same state needs
to be accessed in both directions.

tbench + RFS + this patch gives better results than tbench with default
kernel configuration (no RPS, no RFS)

Also fixed some sparse warnings.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 7f5755b0a57c..0d78e0454a6d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1974,7 +1974,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
 	else
-		hash = skb->protocol;
+		hash = (__force u16) skb->protocol;
 
 	hash = jhash_1word(hash, hashrnd);
 
@@ -2253,8 +2253,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 
 		ip = (struct iphdr *) skb->data;
 		ip_proto = ip->protocol;
-		addr1 = ip->saddr;
-		addr2 = ip->daddr;
+		addr1 = (__force u32) ip->saddr;
+		addr2 = (__force u32) ip->daddr;
 		ihl = ip->ihl;
 		break;
 	case __constant_htons(ETH_P_IPV6):
@@ -2263,8 +2263,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 
 		ip6 = (struct ipv6hdr *) skb->data;
 		ip_proto = ip6->nexthdr;
-		addr1 = ip6->saddr.s6_addr32[3];
-		addr2 = ip6->daddr.s6_addr32[3];
+		addr1 = (__force u32) ip6->saddr.s6_addr32[3];
+		addr2 = (__force u32) ip6->daddr.s6_addr32[3];
 		ihl = (40 >> 2);
 		break;
 	default:
@@ -2279,14 +2279,25 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	case IPPROTO_AH:
 	case IPPROTO_SCTP:
 	case IPPROTO_UDPLITE:
-		if (pskb_may_pull(skb, (ihl * 4) + 4))
-			ports = *((u32 *) (skb->data + (ihl * 4)));
+		if (pskb_may_pull(skb, (ihl * 4) + 4)) {
+			__be16 *hports = (__be16 *) (skb->data + (ihl * 4));
+			u32 sport, dport;
+
+			sport = (__force u16) hports[0];
+			dport = (__force u16) hports[1];
+			if (dport < sport)
+				swap(sport, dport);
+			ports = (sport << 16) + dport;
+		}
 		break;
 
 	default:
 		break;
 	}
 
+	/* get a consistent hash (same value on both flow directions) */
+	if (addr2 < addr1)
+		swap(addr1, addr2);
 	skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
 	if (!skb->rxhash)
 		skb->rxhash = 1;
-- 
cgit v1.2.3-59-g8ed1b


From ab9304717f7624c41927f442e6b6d418b2d8b3e4 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Tue, 20 Apr 2010 01:45:37 -0700
Subject: net: emphasize rtnl lock required in call_netdevice_notifiers

Since netdev_chain is guarded by rtnl_lock, ASSERT_RTNL should be
present here to make sure that all callers of call_netdevice_notifiers
does the locking properly.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0d78e0454a6d..b31d5d69a467 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1435,6 +1435,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier);
 
 int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 {
+	ASSERT_RTNL();
 	return raw_notifier_call_chain(&netdev_chain, val, dev);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 22265a5c3c103cf8c50be62e6c90d045eb649e6d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 20 Apr 2010 15:07:32 +0200
Subject: netfilter: xt_TEE: resolve oif using netdevice notifiers

Replace the runtime oif name resolving by netdevice notifier based
resolving. When an oif is given, a netdevice notifier is registered
to resolve the name on NETDEV_REGISTER or NETDEV_CHANGE and unresolve
it again on NETDEV_UNREGISTER or NETDEV_CHANGE to a different name.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_TEE.h |   3 ++
 net/netfilter/xt_TEE.c           | 103 ++++++++++++++++++++++++++++++---------
 2 files changed, 83 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h
index 55d4a5011523..5c21d5c829af 100644
--- a/include/linux/netfilter/xt_TEE.h
+++ b/include/linux/netfilter/xt_TEE.h
@@ -4,6 +4,9 @@
 struct xt_tee_tginfo {
 	union nf_inet_addr gw;
 	char oif[16];
+
+	/* used internally by the kernel */
+	struct xt_tee_priv *priv __attribute__((aligned(8)));
 };
 
 #endif /* _XT_TEE_TARGET_H */
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 842e7012eca7..49da6c05f4e0 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -15,6 +15,7 @@
 #include <linux/percpu.h>
 #include <linux/route.h>
 #include <linux/skbuff.h>
+#include <linux/notifier.h>
 #include <net/checksum.h>
 #include <net/icmp.h>
 #include <net/ip.h>
@@ -32,6 +33,12 @@
 #	define WITH_IPV6 1
 #endif
 
+struct xt_tee_priv {
+	struct notifier_block	notifier;
+	struct xt_tee_tginfo	*tginfo;
+	int			oif;
+};
+
 static const union nf_inet_addr tee_zero_address;
 static DEFINE_PER_CPU(bool, tee_active);
 
@@ -49,20 +56,6 @@ static struct net *pick_net(struct sk_buff *skb)
 	return &init_net;
 }
 
-static bool tee_tg_route_oif(struct flowi *f, struct net *net,
-			     const struct xt_tee_tginfo *info)
-{
-	const struct net_device *dev;
-
-	if (*info->oif != '\0')
-		return true;
-	dev = dev_get_by_name(net, info->oif);
-	if (dev == NULL)
-		return false;
-	f->oif = dev->ifindex;
-	return true;
-}
-
 static bool
 tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 {
@@ -72,8 +65,11 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	struct flowi fl;
 
 	memset(&fl, 0, sizeof(fl));
-	if (!tee_tg_route_oif(&fl, net, info))
-		return false;
+	if (info->priv) {
+		if (info->priv->oif == -1)
+			return false;
+		fl.oif = info->priv->oif;
+	}
 	fl.nl_u.ip4_u.daddr = info->gw.ip;
 	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
 	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
@@ -149,8 +145,11 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	struct flowi fl;
 
 	memset(&fl, 0, sizeof(fl));
-	if (!tee_tg_route_oif(&fl, net, info))
-		return false;
+	if (info->priv) {
+		if (info->priv->oif == -1)
+			return false;
+		fl.oif = info->priv->oif;
+	}
 	fl.nl_u.ip6_u.daddr = info->gw.in6;
 	fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
 				  (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
@@ -198,15 +197,71 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 }
 #endif /* WITH_IPV6 */
 
+static int tee_netdev_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct xt_tee_priv *priv;
+
+	priv = container_of(this, struct xt_tee_priv, notifier);
+	switch (event) {
+	case NETDEV_REGISTER:
+		if (!strcmp(dev->name, priv->tginfo->oif))
+			priv->oif = dev->ifindex;
+		break;
+	case NETDEV_UNREGISTER:
+		if (dev->ifindex == priv->oif)
+			priv->oif = -1;
+		break;
+	case NETDEV_CHANGENAME:
+		if (!strcmp(dev->name, priv->tginfo->oif))
+			priv->oif = dev->ifindex;
+		else if (dev->ifindex == priv->oif)
+			priv->oif = -1;
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 static int tee_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_tee_tginfo *info = par->targinfo;
+	struct xt_tee_tginfo *info = par->targinfo;
+	struct xt_tee_priv *priv;
 
-	if (info->oif[sizeof(info->oif)-1] != '\0')
-		return -EINVAL;
 	/* 0.0.0.0 and :: not allowed */
-	return (memcmp(&info->gw, &tee_zero_address,
-	       sizeof(tee_zero_address)) == 0) ? -EINVAL : 0;
+	if (memcmp(&info->gw, &tee_zero_address,
+		   sizeof(tee_zero_address)) == 0)
+		return -EINVAL;
+
+	if (info->oif[0]) {
+		if (info->oif[sizeof(info->oif)-1] != '\0')
+			return -EINVAL;
+
+		priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+		if (priv == NULL)
+			return -ENOMEM;
+
+		priv->tginfo  = info;
+		priv->oif     = -1;
+		priv->notifier.notifier_call = tee_netdev_event;
+		info->priv    = priv;
+
+		register_netdevice_notifier(&priv->notifier);
+	} else
+		info->priv = NULL;
+
+	return 0;
+}
+
+static void tee_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	struct xt_tee_tginfo *info = par->targinfo;
+
+	if (info->priv) {
+		unregister_netdevice_notifier(&info->priv->notifier);
+		kfree(info->priv);
+	}
 }
 
 static struct xt_target tee_tg_reg[] __read_mostly = {
@@ -217,6 +272,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
 		.target     = tee_tg4,
 		.targetsize = sizeof(struct xt_tee_tginfo),
 		.checkentry = tee_tg_check,
+		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
 #ifdef WITH_IPV6
@@ -227,6 +283,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
 		.target     = tee_tg6,
 		.targetsize = sizeof(struct xt_tee_tginfo),
 		.checkentry = tee_tg_check,
+		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 6c79bf0f2440fd250c8fce8d9b82fcf03d4e8350 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Tue, 20 Apr 2010 16:22:01 +0200
Subject: netfilter: bridge-netfilter: fix refragmenting IP traffic
 encapsulated in PPPoE traffic

The MTU for IP traffic encapsulated inside PPPoE traffic is smaller
than the MTU of the Ethernet device (1500). Connection tracking
gathers all IP packets and sometimes will refragment them in
ip_fragment(). We then need to subtract the length of the
encapsulating header from the mtu used in ip_fragment(). The check in
br_nf_dev_queue_xmit() which determines if ip_fragment() has to be
called is also updated for the PPPoE-encapsulated packets.
nf_bridge_copy_header() is also updated to make sure the PPPoE data
length field has the correct value.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge.h | 7 +++++++
 net/bridge/br_netfilter.c        | 2 +-
 net/ipv4/ip_output.c             | 4 ++++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index ea0e44b90432..0ddd161f3b06 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -68,6 +68,13 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 	}
 }
 
+static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
+{
+	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
+		return PPPOE_SES_HLEN;
+	return 0;
+}
+
 extern int br_handle_frame_finish(struct sk_buff *skb);
 /* Only used in br_device.c */
 static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 6b80ebc37667..93f80fefa496 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -745,7 +745,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
-	    skb->len > skb->dev->mtu &&
+	    skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
 	    !skb_is_gso(skb))
 		return ip_fragment(skb, br_dev_queue_push_xmit);
 	else
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b0b2e3059f11..d979710684b2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -469,6 +469,10 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 	hlen = iph->ihl * 4;
 	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge)
+		mtu -= nf_bridge_mtu_reduction(skb);
+#endif
 	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 
 	/* When frag_list is given, use it. First, check its validity:
-- 
cgit v1.2.3-59-g8ed1b


From 03ceedea972a82d343fa5c2528b3952fa9e615d5 Mon Sep 17 00:00:00 2001
From: Daniel Yingqiang Ma <yma.cool@gmail.com>
Date: Tue, 13 Apr 2010 15:12:07 +0800
Subject: ath9k: Group Key fix for VAPs

When I set up multiple VAPs with ath9k, I encountered an issue that
the traffic may be lost after a while.

The detailed phenomenon is
1. After a while the clients connected to one of these VAPs will get
into a state that no broadcast/multicast packets can be transfered
successfully while the unicast packets can be transfered normally.
2. Minutes latter the unitcast packets transfer will fail as well,
because the ARP entry is expired and it can't be freshed due to the
broadcast trouble.

It's caused by the group key overwritten and someone discussed this
issue in ath9k-devel maillist before, but haven't work out a fix yet.

I referred the method in madwifi, and made a patch for ath9k.
The method is to set the high bit of the sender(AP)'s address, and
associated that mac and the group key. It requires the hardware
supports multicast frame key search. It seems true for AR9160.

Not sure whether it's the correct way to fix this issue. But it seems
to work in my test. The patch is attached, feel free to revise it.

Signed-off-by: Daniel Yingqiang ma <yma.cool@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/main.c | 28 +++++++++++++++++++++++++---
 include/net/mac80211.h                |  1 +
 net/mac80211/key.c                    |  1 +
 3 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 1f4ea74bf4ca..40136cf63fa4 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -751,6 +751,7 @@ static int ath_key_config(struct ath_common *common,
 	struct ath_hw *ah = common->ah;
 	struct ath9k_keyval hk;
 	const u8 *mac = NULL;
+	u8 gmac[ETH_ALEN];
 	int ret = 0;
 	int idx;
 
@@ -774,9 +775,30 @@ static int ath_key_config(struct ath_common *common,
 	memcpy(hk.kv_val, key->key, key->keylen);
 
 	if (!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
-		/* For now, use the default keys for broadcast keys. This may
-		 * need to change with virtual interfaces. */
-		idx = key->keyidx;
+
+		if (key->ap_addr) {
+			/*
+			 * Group keys on hardware that supports multicast frame
+			 * key search use a mac that is the sender's address with
+			 * the high bit set instead of the app-specified address.
+			 */
+			memcpy(gmac, key->ap_addr, ETH_ALEN);
+			gmac[0] |= 0x80;
+			mac = gmac;
+
+			if (key->alg == ALG_TKIP)
+				idx = ath_reserve_key_cache_slot_tkip(common);
+			else
+				idx = ath_reserve_key_cache_slot(common);
+			if (idx < 0)
+				mac = NULL; /* no free key cache entries */
+		}
+
+		if (!mac) {
+			/* For now, use the default keys for broadcast keys. This may
+			 * need to change with virtual interfaces. */
+			idx = key->keyidx;
+		}
 	} else if (key->keyidx) {
 		if (WARN_ON(!sta))
 			return -EOPNOTSUPP;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c49e6adcd8fa..63e9d37e3e71 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -793,6 +793,7 @@ struct ieee80211_key_conf {
 	u8 iv_len;
 	u8 hw_key_idx;
 	u8 flags;
+	u8 *ap_addr;
 	s8 keyidx;
 	u8 keylen;
 	u8 key[0];
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 8160d9c5372e..75705bd41956 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -139,6 +139,7 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 				     struct ieee80211_sub_if_data,
 				     u.ap);
 
+	key->conf.ap_addr = sdata->dev->dev_addr;
 	ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf);
 
 	if (!ret) {
-- 
cgit v1.2.3-59-g8ed1b


From 1289723ef238908ca8d95ff48a46ee0de970f882 Mon Sep 17 00:00:00 2001
From: Holger Schurig <holgerschurig@gmail.com>
Date: Mon, 19 Apr 2010 10:23:57 +0200
Subject: mac80211: sample survey implementation for mac80211 & hwsim

This adds the survey function to both mac80211 itself and to mac80211_hwsim.
For the latter driver, we simply invent some noise level.A real driver which
cannot determine the real channel noise MUST NOT report any noise, especially
not a magically conjured one :-)

Signed-off-by: Holger Schurig <holgerschurig@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 28 ++++++++++++++++++++++++++++
 include/net/mac80211.h                |  3 ++-
 net/mac80211/cfg.c                    | 12 ++++++++++++
 net/mac80211/driver-ops.h             |  9 +++++++++
 4 files changed, 51 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index ec8b08291790..79bb8833ddb9 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -829,6 +829,33 @@ static int mac80211_hwsim_conf_tx(
 	return 0;
 }
 
+static int mac80211_hwsim_get_survey(
+	struct ieee80211_hw *hw, int idx,
+	struct survey_info *survey)
+{
+	struct ieee80211_conf *conf = &hw->conf;
+
+	printk(KERN_DEBUG "%s:%s (idx=%d)\n",
+	       wiphy_name(hw->wiphy), __func__, idx);
+
+	if (idx != 0)
+		return -ENOENT;
+
+	/* Current channel */
+	survey->channel = conf->channel;
+
+	/*
+	 * Magically conjured noise level --- this is only ok for simulated hardware.
+	 *
+	 * A real driver which cannot determine the real channel noise MUST NOT
+	 * report any noise, especially not a magically conjured one :-)
+	 */
+	survey->filled = SURVEY_INFO_NOISE_DBM;
+	survey->noise = -92;
+
+	return 0;
+}
+
 #ifdef CONFIG_NL80211_TESTMODE
 /*
  * This section contains example code for using netlink
@@ -1012,6 +1039,7 @@ static struct ieee80211_ops mac80211_hwsim_ops =
 	.sta_notify = mac80211_hwsim_sta_notify,
 	.set_tim = mac80211_hwsim_set_tim,
 	.conf_tx = mac80211_hwsim_conf_tx,
+	.get_survey = mac80211_hwsim_get_survey,
 	CFG80211_TESTMODE_CMD(mac80211_hwsim_testmode_cmd)
 	.ampdu_action = mac80211_hwsim_ampdu_action,
 	.sw_scan_start = mac80211_hwsim_sw_scan,
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 63e9d37e3e71..cd9915686dd9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1674,7 +1674,8 @@ struct ieee80211_ops {
 			    struct ieee80211_vif *vif,
 			    enum ieee80211_ampdu_mlme_action action,
 			    struct ieee80211_sta *sta, u16 tid, u16 *ssn);
-
+	int (*get_survey)(struct ieee80211_hw *hw, int idx,
+		struct survey_info *survey);
 	void (*rfkill_poll)(struct ieee80211_hw *hw);
 	void (*set_coverage_class)(struct ieee80211_hw *hw, u8 coverage_class);
 #ifdef CONFIG_NL80211_TESTMODE
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4edd73cbf052..f97dda735cbb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -410,6 +410,17 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
 	return ret;
 }
 
+static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev,
+				 int idx, struct survey_info *survey)
+{
+	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
+
+	if (!local->ops->get_survey)
+		return -EOPNOTSUPP;
+
+	return drv_get_survey(local, idx, survey);
+}
+
 static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev,
 				 u8 *mac, struct station_info *sinfo)
 {
@@ -1507,6 +1518,7 @@ struct cfg80211_ops mac80211_config_ops = {
 	.change_station = ieee80211_change_station,
 	.get_station = ieee80211_get_station,
 	.dump_station = ieee80211_dump_station,
+	.dump_survey = ieee80211_dump_survey,
 #ifdef CONFIG_MAC80211_MESH
 	.add_mpath = ieee80211_add_mpath,
 	.del_mpath = ieee80211_del_mpath,
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c3d844093a2f..d1f8a7c2225a 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -346,6 +346,15 @@ static inline int drv_ampdu_action(struct ieee80211_local *local,
 	return ret;
 }
 
+static inline int drv_get_survey(struct ieee80211_local *local, int idx,
+				struct survey_info *survey)
+{
+	int ret = -EOPNOTSUPP;
+	if (local->ops->conf_tx)
+		ret = local->ops->get_survey(&local->hw, idx, survey);
+	/* trace_drv_get_survey(local, idx, survey, ret); */
+	return ret;
+}
 
 static inline void drv_rfkill_poll(struct ieee80211_local *local)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 7bdfcaaff5de368a88a4f784f7283b66c17d051d Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 20 Apr 2010 13:15:56 +0300
Subject: mac80211: Fix ieee80211_sta_conn_mon_timer with hw connection
 monitoring

When IEEE80211_HW_CONNECTION_MONITOR is configured by the driver, starting
of ieee80211_sta_conn_mon_timer should be prevented, as it is then not needed.

This is currently partially the case. As it seems, when a probe-response is
received from the AP the timer is still restarted, thus restarting the host
based connection keep-alive mechanism. These probe-responses happen at least
when scanning while associated.

Fix this by preventing starting of the ieee80211_sta_conn_mon_timer in the
ieee80211_rx_mgmt_probe_resp function.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d11a54c289a2..d811e3fa1d75 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1330,12 +1330,17 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 		mutex_lock(&sdata->local->iflist_mtx);
 		ieee80211_recalc_ps(sdata->local, -1);
 		mutex_unlock(&sdata->local->iflist_mtx);
+
+		if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
+			return;
+
 		/*
 		 * We've received a probe response, but are not sure whether
 		 * we have or will be receiving any beacons or data, so let's
 		 * schedule the timers again, just in case.
 		 */
 		mod_beacon_timer(sdata);
+
 		mod_timer(&ifmgd->conn_mon_timer,
 			  round_jiffies_up(jiffies +
 					   IEEE80211_CONNECTION_IDLE_TIME));
-- 
cgit v1.2.3-59-g8ed1b


From aa395145165cb06a0d0885221bbe0ce4a564391d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Apr 2010 13:03:51 +0000
Subject: net: sk_sleep() helper

Define a new function to return the waitqueue of a "struct sock".

static inline wait_queue_head_t *sk_sleep(struct sock *sk)
{
	return sk->sk_sleep;
}

Change all read occurrences of sk_sleep by a call to this function.

Needed for a future RCU conversion. sk_sleep wont be a field directly
available.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/atmtcp.c            |  6 ++--
 drivers/net/macvtap.c           |  4 +--
 drivers/net/tun.c               |  4 +--
 drivers/scsi/iscsi_tcp.c        |  4 +--
 include/net/sock.h              | 10 +++++--
 include/net/tcp.h               |  2 +-
 net/atm/common.c                | 12 ++++----
 net/atm/signaling.c             |  2 +-
 net/atm/svc.c                   | 62 ++++++++++++++++++++---------------------
 net/ax25/af_ax25.c              |  8 +++---
 net/bluetooth/af_bluetooth.c    |  6 ++--
 net/bluetooth/bnep/core.c       |  8 +++---
 net/bluetooth/bnep/netdev.c     |  6 ++--
 net/bluetooth/cmtp/cmtp.h       |  2 +-
 net/bluetooth/cmtp/core.c       |  4 +--
 net/bluetooth/hidp/core.c       | 10 +++----
 net/bluetooth/hidp/hidp.h       |  4 +--
 net/bluetooth/l2cap.c           |  4 +--
 net/bluetooth/rfcomm/sock.c     |  8 +++---
 net/bluetooth/sco.c             |  4 +--
 net/caif/caif_socket.c          |  2 +-
 net/core/datagram.c             |  6 ++--
 net/core/sock.c                 | 16 +++++------
 net/core/stream.c               | 16 +++++------
 net/dccp/output.c               |  6 ++--
 net/dccp/proto.c                |  2 +-
 net/decnet/af_decnet.c          | 26 ++++++++---------
 net/ipv4/af_inet.c              |  6 ++--
 net/ipv4/inet_connection_sock.c |  4 +--
 net/ipv4/tcp.c                  |  2 +-
 net/irda/af_irda.c              | 14 +++++-----
 net/iucv/af_iucv.c              | 12 ++++----
 net/llc/af_llc.c                | 12 ++++----
 net/netfilter/ipvs/ip_vs_sync.c |  2 +-
 net/netrom/af_netrom.c          |  8 +++---
 net/rds/af_rds.c                |  2 +-
 net/rds/rds.h                   |  2 +-
 net/rds/recv.c                  |  2 +-
 net/rds/send.c                  |  2 +-
 net/rose/af_rose.c              |  8 +++---
 net/rxrpc/af_rxrpc.c            |  4 +--
 net/sctp/socket.c               | 20 ++++++-------
 net/sunrpc/svcsock.c            | 24 ++++++++--------
 net/tipc/socket.c               | 26 ++++++++---------
 net/unix/af_unix.c              | 10 +++----
 net/x25/af_x25.c                |  8 +++---
 46 files changed, 208 insertions(+), 204 deletions(-)

(limited to 'net')

diff --git a/drivers/atm/atmtcp.c b/drivers/atm/atmtcp.c
index b86712167eb8..b9101818b47b 100644
--- a/drivers/atm/atmtcp.c
+++ b/drivers/atm/atmtcp.c
@@ -68,7 +68,7 @@ static int atmtcp_send_control(struct atm_vcc *vcc,int type,
 	*(struct atm_vcc **) &new_msg->vcc = vcc;
 	old_test = test_bit(flag,&vcc->flags);
 	out_vcc->push(out_vcc,skb);
-	add_wait_queue(sk_atm(vcc)->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk_atm(vcc)), &wait);
 	while (test_bit(flag,&vcc->flags) == old_test) {
 		mb();
 		out_vcc = PRIV(vcc->dev) ? PRIV(vcc->dev)->vcc : NULL;
@@ -80,7 +80,7 @@ static int atmtcp_send_control(struct atm_vcc *vcc,int type,
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk_atm(vcc)->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk_atm(vcc)), &wait);
 	return error;
 }
 
@@ -105,7 +105,7 @@ static int atmtcp_recv_control(const struct atmtcp_control *msg)
 		    msg->type);
 		return -EINVAL;
 	}
-	wake_up(sk_atm(vcc)->sk_sleep);
+	wake_up(sk_sleep(sk_atm(vcc)));
 	return 0;
 }
 
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index abba3cc81f12..85d6420f8404 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -246,8 +246,8 @@ static void macvtap_sock_write_space(struct sock *sk)
 	    !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
 		return;
 
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND);
 }
 
 static int macvtap_open(struct inode *inode, struct file *file)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 43265207d463..20a17938c62b 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -868,8 +868,8 @@ static void tun_sock_write_space(struct sock *sk)
 	if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
 		return;
 
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
 	tun = tun_sk(sk)->tun;
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 0ee725ced511..9eae04afa9a0 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -599,9 +599,9 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag)
 	set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
 	write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
 
-	if (sock->sk->sk_sleep && waitqueue_active(sock->sk->sk_sleep)) {
+	if (sk_sleep(sock->sk) && waitqueue_active(sk_sleep(sock->sk))) {
 		sock->sk->sk_err = EIO;
-		wake_up_interruptible(sock->sk->sk_sleep);
+		wake_up_interruptible(sk_sleep(sock->sk));
 	}
 
 	iscsi_conn_stop(cls_conn, flag);
diff --git a/include/net/sock.h b/include/net/sock.h
index 56df440a950b..8ab05146a447 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1160,6 +1160,10 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 	sk->sk_socket = sock;
 }
 
+static inline wait_queue_head_t *sk_sleep(struct sock *sk)
+{
+	return sk->sk_sleep;
+}
 /* Detach socket from process context.
  * Announce socket dead, detach it from wait queue and inode.
  * Note that parent inode held reference count on this struct sock,
@@ -1346,8 +1350,8 @@ static inline int sk_has_allocations(const struct sock *sk)
  *   tp->rcv_nxt check   sock_def_readable
  *   ...                 {
  *   schedule               ...
- *                          if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
- *                              wake_up_interruptible(sk->sk_sleep)
+ *                          if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+ *                              wake_up_interruptible(sk_sleep(sk))
  *                          ...
  *                       }
  *
@@ -1368,7 +1372,7 @@ static inline int sk_has_sleeper(struct sock *sk)
 	 * This memory barrier is paired in the sock_poll_wait.
 	 */
 	smp_mb__after_lock();
-	return sk->sk_sleep && waitqueue_active(sk->sk_sleep);
+	return sk_sleep(sk) && waitqueue_active(sk_sleep(sk));
 }
 
 /**
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 70c5159f4b36..b7d83d204a93 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -939,7 +939,7 @@ static inline int tcp_prequeue(struct sock *sk, struct sk_buff *skb)
 
 		tp->ucopy.memory = 0;
 	} else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
-		wake_up_interruptible_sync_poll(sk->sk_sleep,
+		wake_up_interruptible_sync_poll(sk_sleep(sk),
 					   POLLIN | POLLRDNORM | POLLRDBAND);
 		if (!inet_csk_ack_scheduled(sk))
 			inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
diff --git a/net/atm/common.c b/net/atm/common.c
index 97ed94aa0cbc..e3e10e6f8628 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -92,7 +92,7 @@ static void vcc_def_wakeup(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (sk_has_sleeper(sk))
-		wake_up(sk->sk_sleep);
+		wake_up(sk_sleep(sk));
 	read_unlock(&sk->sk_callback_lock);
 }
 
@@ -110,7 +110,7 @@ static void vcc_write_space(struct sock *sk)
 
 	if (vcc_writable(sk)) {
 		if (sk_has_sleeper(sk))
-			wake_up_interruptible(sk->sk_sleep);
+			wake_up_interruptible(sk_sleep(sk));
 
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
@@ -549,7 +549,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 	}
 
 	eff = (size+3) & ~3; /* align to word boundary */
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	error = 0;
 	while (!(skb = alloc_tx(vcc, eff))) {
 		if (m->msg_flags & MSG_DONTWAIT) {
@@ -568,9 +568,9 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 			send_sig(SIGPIPE, current, 0);
 			break;
 		}
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (error)
 		goto out;
 	skb->dev = NULL; /* for paths shared with net_device interfaces */
@@ -595,7 +595,7 @@ unsigned int vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
 	struct atm_vcc *vcc;
 	unsigned int mask;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	vcc = ATM_SD(sock);
diff --git a/net/atm/signaling.c b/net/atm/signaling.c
index 6ba6e466ee54..509c8ac02b63 100644
--- a/net/atm/signaling.c
+++ b/net/atm/signaling.c
@@ -131,7 +131,7 @@ static int sigd_send(struct atm_vcc *vcc, struct sk_buff *skb)
 		}
 		sk->sk_ack_backlog++;
 		skb_queue_tail(&sk->sk_receive_queue, skb);
-		pr_debug("waking sk->sk_sleep 0x%p\n", sk->sk_sleep);
+		pr_debug("waking sk_sleep(sk) 0x%p\n", sk_sleep(sk));
 		sk->sk_state_change(sk);
 as_indicate_complete:
 		release_sock(sk);
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 3ba9a45a51ac..754ee4791d96 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -49,14 +49,14 @@ static void svc_disconnect(struct atm_vcc *vcc)
 
 	pr_debug("%p\n", vcc);
 	if (test_bit(ATM_VF_REGIS, &vcc->flags)) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 		sigd_enq(vcc, as_close, NULL, NULL, NULL);
 		while (!test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
 			schedule();
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_UNINTERRUPTIBLE);
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 	}
 	/* beware - socket is still in use by atmsigd until the last
 	   as_indicate has been answered */
@@ -125,13 +125,13 @@ static int svc_bind(struct socket *sock, struct sockaddr *sockaddr,
 	}
 	vcc->local = *addr;
 	set_bit(ATM_VF_WAITING, &vcc->flags);
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	sigd_enq(vcc, as_bind, NULL, NULL, &vcc->local);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	clear_bit(ATM_VF_REGIS, &vcc->flags); /* doesn't count */
 	if (!sigd) {
 		error = -EUNATCH;
@@ -201,10 +201,10 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
 		}
 		vcc->remote = *addr;
 		set_bit(ATM_VF_WAITING, &vcc->flags);
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		sigd_enq(vcc, as_connect, NULL, NULL, &vcc->remote);
 		if (flags & O_NONBLOCK) {
-			finish_wait(sk->sk_sleep, &wait);
+			finish_wait(sk_sleep(sk), &wait);
 			sock->state = SS_CONNECTING;
 			error = -EINPROGRESS;
 			goto out;
@@ -213,7 +213,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
 		while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 			schedule();
 			if (!signal_pending(current)) {
-				prepare_to_wait(sk->sk_sleep, &wait,
+				prepare_to_wait(sk_sleep(sk), &wait,
 						TASK_INTERRUPTIBLE);
 				continue;
 			}
@@ -232,14 +232,14 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
 			 */
 			sigd_enq(vcc, as_close, NULL, NULL, NULL);
 			while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
-				prepare_to_wait(sk->sk_sleep, &wait,
+				prepare_to_wait(sk_sleep(sk), &wait,
 						TASK_INTERRUPTIBLE);
 				schedule();
 			}
 			if (!sk->sk_err)
 				while (!test_bit(ATM_VF_RELEASED, &vcc->flags) &&
 				       sigd) {
-					prepare_to_wait(sk->sk_sleep, &wait,
+					prepare_to_wait(sk_sleep(sk), &wait,
 							TASK_INTERRUPTIBLE);
 					schedule();
 				}
@@ -250,7 +250,7 @@ static int svc_connect(struct socket *sock, struct sockaddr *sockaddr,
 			error = -EINTR;
 			break;
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		if (error)
 			goto out;
 		if (!sigd) {
@@ -302,13 +302,13 @@ static int svc_listen(struct socket *sock, int backlog)
 		goto out;
 	}
 	set_bit(ATM_VF_WAITING, &vcc->flags);
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	sigd_enq(vcc, as_listen, NULL, NULL, &vcc->local);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (!sigd) {
 		error = -EUNATCH;
 		goto out;
@@ -343,7 +343,7 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
 	while (1) {
 		DEFINE_WAIT(wait);
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		while (!(skb = skb_dequeue(&sk->sk_receive_queue)) &&
 		       sigd) {
 			if (test_bit(ATM_VF_RELEASED, &old_vcc->flags))
@@ -363,10 +363,10 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
 				error = -ERESTARTSYS;
 				break;
 			}
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		if (error)
 			goto out;
 		if (!skb) {
@@ -392,17 +392,17 @@ static int svc_accept(struct socket *sock, struct socket *newsock, int flags)
 		}
 		/* wait should be short, so we ignore the non-blocking flag */
 		set_bit(ATM_VF_WAITING, &new_vcc->flags);
-		prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait,
+		prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
 				TASK_UNINTERRUPTIBLE);
 		sigd_enq(new_vcc, as_accept, old_vcc, NULL, NULL);
 		while (test_bit(ATM_VF_WAITING, &new_vcc->flags) && sigd) {
 			release_sock(sk);
 			schedule();
 			lock_sock(sk);
-			prepare_to_wait(sk_atm(new_vcc)->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk_atm(new_vcc)), &wait,
 					TASK_UNINTERRUPTIBLE);
 		}
-		finish_wait(sk_atm(new_vcc)->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk_atm(new_vcc)), &wait);
 		if (!sigd) {
 			error = -EUNATCH;
 			goto out;
@@ -438,14 +438,14 @@ int svc_change_qos(struct atm_vcc *vcc, struct atm_qos *qos)
 	DEFINE_WAIT(wait);
 
 	set_bit(ATM_VF_WAITING, &vcc->flags);
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	sigd_enq2(vcc, as_modify, NULL, NULL, &vcc->local, qos, 0);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) &&
 	       !test_bit(ATM_VF_RELEASED, &vcc->flags) && sigd) {
 		schedule();
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_UNINTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_UNINTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (!sigd)
 		return -EUNATCH;
 	return -sk->sk_err;
@@ -534,20 +534,20 @@ static int svc_addparty(struct socket *sock, struct sockaddr *sockaddr,
 
 	lock_sock(sk);
 	set_bit(ATM_VF_WAITING, &vcc->flags);
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	sigd_enq(vcc, as_addparty, NULL, NULL,
 		 (struct sockaddr_atmsvc *) sockaddr);
 	if (flags & O_NONBLOCK) {
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		error = -EINPROGRESS;
 		goto out;
 	}
 	pr_debug("added wait queue\n");
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	error = xchg(&sk->sk_err_soft, 0);
 out:
 	release_sock(sk);
@@ -563,13 +563,13 @@ static int svc_dropparty(struct socket *sock, int ep_ref)
 
 	lock_sock(sk);
 	set_bit(ATM_VF_WAITING, &vcc->flags);
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	sigd_enq2(vcc, as_dropparty, NULL, NULL, NULL, NULL, ep_ref);
 	while (test_bit(ATM_VF_WAITING, &vcc->flags) && sigd) {
 		schedule();
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (!sigd) {
 		error = -EUNATCH;
 		goto out;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 65c5801261f9..cfdfd7e2a172 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1281,7 +1281,7 @@ static int __must_check ax25_connect(struct socket *sock,
 		DEFINE_WAIT(wait);
 
 		for (;;) {
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
@@ -1294,7 +1294,7 @@ static int __must_check ax25_connect(struct socket *sock,
 			err = -ERESTARTSYS;
 			break;
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 
 		if (err)
 			goto out_release;
@@ -1346,7 +1346,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
 	 *	hooked into the SABM we saved
 	 */
 	for (;;) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
 			break;
@@ -1364,7 +1364,7 @@ static int ax25_accept(struct socket *sock, struct socket *newsock, int flags)
 		err = -ERESTARTSYS;
 		break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 
 	if (err)
 		goto out;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 404a8500fd03..421c45bd1b95 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -288,7 +288,7 @@ unsigned int bt_sock_poll(struct file * file, struct socket *sock, poll_table *w
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	if (sk->sk_state == BT_LISTEN)
 		return bt_accept_poll(sk);
@@ -378,7 +378,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
 
 	BT_DBG("sk %p", sk);
 
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	while (sk->sk_state != state) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -401,7 +401,7 @@ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo)
 			break;
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 	return err;
 }
 EXPORT_SYMBOL(bt_sock_wait_state);
diff --git a/net/bluetooth/bnep/core.c b/net/bluetooth/bnep/core.c
index 8062dad6d10d..f10b41fb05a0 100644
--- a/net/bluetooth/bnep/core.c
+++ b/net/bluetooth/bnep/core.c
@@ -474,7 +474,7 @@ static int bnep_session(void *arg)
 	set_user_nice(current, -15);
 
 	init_waitqueue_entry(&wait, current);
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	while (!atomic_read(&s->killed)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -496,7 +496,7 @@ static int bnep_session(void *arg)
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	/* Cleanup session */
 	down_write(&bnep_session_sem);
@@ -507,7 +507,7 @@ static int bnep_session(void *arg)
 	/* Wakeup user-space polling for socket errors */
 	s->sock->sk->sk_err = EUNATCH;
 
-	wake_up_interruptible(s->sock->sk->sk_sleep);
+	wake_up_interruptible(sk_sleep(s->sock->sk));
 
 	/* Release the socket */
 	fput(s->sock->file);
@@ -638,7 +638,7 @@ int bnep_del_connection(struct bnep_conndel_req *req)
 
 		/* Kill session thread */
 		atomic_inc(&s->killed);
-		wake_up_interruptible(s->sock->sk->sk_sleep);
+		wake_up_interruptible(sk_sleep(s->sock->sk));
 	} else
 		err = -ENOENT;
 
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index d48b33f4d4ba..0faad5ce6dc4 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -109,7 +109,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 	}
 
 	skb_queue_tail(&sk->sk_write_queue, skb);
-	wake_up_interruptible(sk->sk_sleep);
+	wake_up_interruptible(sk_sleep(sk));
 #endif
 }
 
@@ -193,11 +193,11 @@ static netdev_tx_t bnep_net_xmit(struct sk_buff *skb,
 	/*
 	 * We cannot send L2CAP packets from here as we are potentially in a bh.
 	 * So we have to queue them and wake up session thread which is sleeping
-	 * on the sk->sk_sleep.
+	 * on the sk_sleep(sk).
 	 */
 	dev->trans_start = jiffies;
 	skb_queue_tail(&sk->sk_write_queue, skb);
-	wake_up_interruptible(sk->sk_sleep);
+	wake_up_interruptible(sk_sleep(sk));
 
 	if (skb_queue_len(&sk->sk_write_queue) >= BNEP_TX_QUEUE_LEN) {
 		BT_DBG("tx queue is full");
diff --git a/net/bluetooth/cmtp/cmtp.h b/net/bluetooth/cmtp/cmtp.h
index e4663aa14d26..785e79e953c5 100644
--- a/net/bluetooth/cmtp/cmtp.h
+++ b/net/bluetooth/cmtp/cmtp.h
@@ -125,7 +125,7 @@ static inline void cmtp_schedule(struct cmtp_session *session)
 {
 	struct sock *sk = session->sock->sk;
 
-	wake_up_interruptible(sk->sk_sleep);
+	wake_up_interruptible(sk_sleep(sk));
 }
 
 /* CMTP init defines */
diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c
index 0073ec8495da..d4c6af082d48 100644
--- a/net/bluetooth/cmtp/core.c
+++ b/net/bluetooth/cmtp/core.c
@@ -284,7 +284,7 @@ static int cmtp_session(void *arg)
 	set_user_nice(current, -15);
 
 	init_waitqueue_entry(&wait, current);
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	while (!atomic_read(&session->terminate)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -301,7 +301,7 @@ static int cmtp_session(void *arg)
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	down_write(&cmtp_session_sem);
 
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 280529ad9274..bfe641b7dfaf 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -561,8 +561,8 @@ static int hidp_session(void *arg)
 
 	init_waitqueue_entry(&ctrl_wait, current);
 	init_waitqueue_entry(&intr_wait, current);
-	add_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait);
-	add_wait_queue(intr_sk->sk_sleep, &intr_wait);
+	add_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
+	add_wait_queue(sk_sleep(intr_sk), &intr_wait);
 	while (!atomic_read(&session->terminate)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -584,8 +584,8 @@ static int hidp_session(void *arg)
 		schedule();
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(intr_sk->sk_sleep, &intr_wait);
-	remove_wait_queue(ctrl_sk->sk_sleep, &ctrl_wait);
+	remove_wait_queue(sk_sleep(intr_sk), &intr_wait);
+	remove_wait_queue(sk_sleep(ctrl_sk), &ctrl_wait);
 
 	down_write(&hidp_session_sem);
 
@@ -609,7 +609,7 @@ static int hidp_session(void *arg)
 
 	fput(session->intr_sock->file);
 
-	wait_event_timeout(*(ctrl_sk->sk_sleep),
+	wait_event_timeout(*(sk_sleep(ctrl_sk)),
 		(ctrl_sk->sk_state == BT_CLOSED), msecs_to_jiffies(500));
 
 	fput(session->ctrl_sock->file);
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index a4e215d50c10..8d934a19da0a 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -164,8 +164,8 @@ static inline void hidp_schedule(struct hidp_session *session)
 	struct sock *ctrl_sk = session->ctrl_sock->sk;
 	struct sock *intr_sk = session->intr_sock->sk;
 
-	wake_up_interruptible(ctrl_sk->sk_sleep);
-	wake_up_interruptible(intr_sk->sk_sleep);
+	wake_up_interruptible(sk_sleep(ctrl_sk));
+	wake_up_interruptible(sk_sleep(intr_sk));
 }
 
 /* HIDP init defines */
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 99d68c34e4f1..c1e60eed5a97 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1147,7 +1147,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
 	BT_DBG("sk %p timeo %ld", sk, timeo);
 
 	/* Wait for an incoming connection. (wake-one). */
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (!(nsk = bt_accept_dequeue(sk, newsock))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!timeo) {
@@ -1170,7 +1170,7 @@ static int l2cap_sock_accept(struct socket *sock, struct socket *newsock, int fl
 		}
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
 		goto done;
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 8ed3c37684fa..43fbf6b4b4bf 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -503,7 +503,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
 	BT_DBG("sk %p timeo %ld", sk, timeo);
 
 	/* Wait for an incoming connection. (wake-one). */
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (!(nsk = bt_accept_dequeue(sk, newsock))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!timeo) {
@@ -526,7 +526,7 @@ static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, int f
 		}
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
 		goto done;
@@ -621,7 +621,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
 {
 	DECLARE_WAITQUEUE(wait, current);
 
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	for (;;) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -640,7 +640,7 @@ static long rfcomm_sock_data_wait(struct sock *sk, long timeo)
 	}
 
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 	return timeo;
 }
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index ca6b2ad1c3fc..b406d3eff53a 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -567,7 +567,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
 	BT_DBG("sk %p timeo %ld", sk, timeo);
 
 	/* Wait for an incoming connection. (wake-one). */
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (!(ch = bt_accept_dequeue(sk, newsock))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!timeo) {
@@ -590,7 +590,7 @@ static int sco_sock_accept(struct socket *sock, struct socket *newsock, int flag
 		}
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
 		goto done;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index cdf62b9fefac..90317e7d10b4 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -689,7 +689,7 @@ static unsigned int caif_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 	u32 mask = 0;
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 	lock_sock(&(cf_sk->sk));
 	if (!STATE_IS_OPEN(cf_sk)) {
 		if (!STATE_IS_PENDING(cf_sk))
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 2dccd4ee591b..5574a5ddf908 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -86,7 +86,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
 	int error;
 	DEFINE_WAIT_FUNC(wait, receiver_wake_function);
 
-	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 	/* Socket errors? */
 	error = sock_error(sk);
@@ -115,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
 	error = 0;
 	*timeo_p = schedule_timeout(*timeo_p);
 out:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return error;
 interrupted:
 	error = sock_intr_errno(*timeo_p);
@@ -726,7 +726,7 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	unsigned int mask;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* exceptional events? */
diff --git a/net/core/sock.c b/net/core/sock.c
index 7effa1e689df..58ebd146ce5a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1395,7 +1395,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
 		if (signal_pending(current))
 			break;
 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		if (atomic_read(&sk->sk_wmem_alloc) < sk->sk_sndbuf)
 			break;
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
@@ -1404,7 +1404,7 @@ static long sock_wait_for_wmem(struct sock *sk, long timeo)
 			break;
 		timeo = schedule_timeout(timeo);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return timeo;
 }
 
@@ -1570,11 +1570,11 @@ int sk_wait_data(struct sock *sk, long *timeo)
 	int rc;
 	DEFINE_WAIT(wait);
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	rc = sk_wait_event(sk, timeo, !skb_queue_empty(&sk->sk_receive_queue));
 	clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 }
 EXPORT_SYMBOL(sk_wait_data);
@@ -1798,7 +1798,7 @@ static void sock_def_wakeup(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (sk_has_sleeper(sk))
-		wake_up_interruptible_all(sk->sk_sleep);
+		wake_up_interruptible_all(sk_sleep(sk));
 	read_unlock(&sk->sk_callback_lock);
 }
 
@@ -1806,7 +1806,7 @@ static void sock_def_error_report(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (sk_has_sleeper(sk))
-		wake_up_interruptible_poll(sk->sk_sleep, POLLERR);
+		wake_up_interruptible_poll(sk_sleep(sk), POLLERR);
 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
 	read_unlock(&sk->sk_callback_lock);
 }
@@ -1815,7 +1815,7 @@ static void sock_def_readable(struct sock *sk, int len)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (sk_has_sleeper(sk))
-		wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN |
+		wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN |
 						POLLRDNORM | POLLRDBAND);
 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
 	read_unlock(&sk->sk_callback_lock);
@@ -1830,7 +1830,7 @@ static void sock_def_write_space(struct sock *sk)
 	 */
 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
 		if (sk_has_sleeper(sk))
-			wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+			wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
 		/* Should agree with poll, otherwise some programs break */
diff --git a/net/core/stream.c b/net/core/stream.c
index a37debfeb1b2..7b3c3f30b107 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -32,8 +32,8 @@ void sk_stream_write_space(struct sock *sk)
 	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 
-		if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-			wake_up_interruptible_poll(sk->sk_sleep, POLLOUT |
+		if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+			wake_up_interruptible_poll(sk_sleep(sk), POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
 			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
@@ -66,13 +66,13 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
 		if (signal_pending(tsk))
 			return sock_intr_errno(*timeo_p);
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		sk->sk_write_pending++;
 		done = sk_wait_event(sk, timeo_p,
 				     !sk->sk_err &&
 				     !((1 << sk->sk_state) &
 				       ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)));
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		sk->sk_write_pending--;
 	} while (!done);
 	return 0;
@@ -96,13 +96,13 @@ void sk_stream_wait_close(struct sock *sk, long timeout)
 		DEFINE_WAIT(wait);
 
 		do {
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 			if (sk_wait_event(sk, &timeout, !sk_stream_closing(sk)))
 				break;
 		} while (!signal_pending(current) && timeout);
 
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 	}
 }
 
@@ -126,7 +126,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 	while (1) {
 		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 			goto do_error;
@@ -157,7 +157,7 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 		*timeo_p = current_timeo;
 	}
 out:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return err;
 
 do_error:
diff --git a/net/dccp/output.c b/net/dccp/output.c
index e98b65e9569f..2d3dcb39851f 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -198,7 +198,7 @@ void dccp_write_space(struct sock *sk)
 	read_lock(&sk->sk_callback_lock);
 
 	if (sk_has_sleeper(sk))
-		wake_up_interruptible(sk->sk_sleep);
+		wake_up_interruptible(sk_sleep(sk));
 	/* Should agree with poll, otherwise some programs break */
 	if (sock_writeable(sk))
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
@@ -225,7 +225,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
 		dccp_pr_debug("delayed send by %d msec\n", delay);
 		jiffdelay = msecs_to_jiffies(delay);
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 		sk->sk_write_pending++;
 		release_sock(sk);
@@ -241,7 +241,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, int delay)
 		rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb);
 	} while ((delay = rc) > 0);
 out:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 
 do_error:
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index a0e38d8018f5..b03ecf6b2bb0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -312,7 +312,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	if (sk->sk_state == DCCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 55e3b6b0061a..d6b93d19790f 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -832,7 +832,7 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
 	scp->segsize_loc = dst_metric(__sk_dst_get(sk), RTAX_ADVMSS);
 	dn_send_conn_conf(sk, allocation);
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	for(;;) {
 		release_sock(sk);
 		if (scp->state == DN_CC)
@@ -850,9 +850,9 @@ static int dn_confirm_accept(struct sock *sk, long *timeo, gfp_t allocation)
 		err = -EAGAIN;
 		if (!*timeo)
 			break;
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (err == 0) {
 		sk->sk_socket->state = SS_CONNECTED;
 	} else if (scp->state != DN_CC) {
@@ -873,7 +873,7 @@ static int dn_wait_run(struct sock *sk, long *timeo)
 	if (!*timeo)
 		return -EALREADY;
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	for(;;) {
 		release_sock(sk);
 		if (scp->state == DN_CI || scp->state == DN_CC)
@@ -891,9 +891,9 @@ static int dn_wait_run(struct sock *sk, long *timeo)
 		err = -ETIMEDOUT;
 		if (!*timeo)
 			break;
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 out:
 	if (err == 0) {
 		sk->sk_socket->state = SS_CONNECTED;
@@ -1040,7 +1040,7 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
 	struct sk_buff *skb = NULL;
 	int err = 0;
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	for(;;) {
 		release_sock(sk);
 		skb = skb_dequeue(&sk->sk_receive_queue);
@@ -1060,9 +1060,9 @@ static struct sk_buff *dn_wait_for_connect(struct sock *sk, long *timeo)
 		err = -EAGAIN;
 		if (!*timeo)
 			break;
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 
 	return skb == NULL ? ERR_PTR(err) : skb;
 }
@@ -1746,11 +1746,11 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
 			goto out;
 		}
 
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 		sk_wait_event(sk, &timeo, dn_data_ready(sk, queue, flags, target));
 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 	}
 
 	skb_queue_walk_safe(queue, skb, n) {
@@ -2003,12 +2003,12 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
 				goto out;
 			}
 
-			prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+			prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 			set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 			sk_wait_event(sk, &timeo,
 				      !dn_queue_too_long(scp, queue, flags));
 			clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-			finish_wait(sk->sk_sleep, &wait);
+			finish_wait(sk_sleep(sk), &wait);
 			continue;
 		}
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c5376c725503..5ca7290c2e61 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -548,7 +548,7 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
 {
 	DEFINE_WAIT(wait);
 
-	prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 	/* Basic assumption: if someone sets sk->sk_err, he _must_
 	 * change state of the socket from TCP_SYN_*.
@@ -561,9 +561,9 @@ static long inet_wait_for_connect(struct sock *sk, long timeo)
 		lock_sock(sk);
 		if (signal_pending(current) || !timeo)
 			break;
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return timeo;
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8da6429269dd..e0a3e3537b14 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -234,7 +234,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 	 * having to remove and re-insert us on the wait queue.
 	 */
 	for (;;) {
-		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
+		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
 					  TASK_INTERRUPTIBLE);
 		release_sock(sk);
 		if (reqsk_queue_empty(&icsk->icsk_accept_queue))
@@ -253,7 +253,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
 		if (!timeo)
 			break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return err;
 }
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0f8caf64caa3..77208334a613 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -378,7 +378,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	struct sock *sk = sock->sk;
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	if (sk->sk_state == TCP_LISTEN)
 		return inet_csk_listen_poll(sk);
 
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 2a4efcea3423..79986a674f6e 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -347,7 +347,7 @@ static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow)
 		self->tx_flow = flow;
 		IRDA_DEBUG(1, "%s(), IrTTP wants us to start again\n",
 			   __func__);
-		wake_up_interruptible(sk->sk_sleep);
+		wake_up_interruptible(sk_sleep(sk));
 		break;
 	default:
 		IRDA_DEBUG(0, "%s(), Unknown flow command!\n", __func__);
@@ -900,7 +900,7 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags)
 		if (flags & O_NONBLOCK)
 			goto out;
 
-		err = wait_event_interruptible(*(sk->sk_sleep),
+		err = wait_event_interruptible(*(sk_sleep(sk)),
 					skb_peek(&sk->sk_receive_queue));
 		if (err)
 			goto out;
@@ -1066,7 +1066,7 @@ static int irda_connect(struct socket *sock, struct sockaddr *uaddr,
 		goto out;
 
 	err = -ERESTARTSYS;
-	if (wait_event_interruptible(*(sk->sk_sleep),
+	if (wait_event_interruptible(*(sk_sleep(sk)),
 				     (sk->sk_state != TCP_SYN_SENT)))
 		goto out;
 
@@ -1318,7 +1318,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	/* Check if IrTTP is wants us to slow down */
 
-	if (wait_event_interruptible(*(sk->sk_sleep),
+	if (wait_event_interruptible(*(sk_sleep(sk)),
 	    (self->tx_flow != FLOW_STOP  ||  sk->sk_state != TCP_ESTABLISHED))) {
 		err = -ERESTARTSYS;
 		goto out;
@@ -1477,7 +1477,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 			if (copied >= target)
 				break;
 
-			prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+			prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 			/*
 			 *	POSIX 1003.1g mandates this order.
@@ -1497,7 +1497,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 				/* Wait process until data arrives */
 				schedule();
 
-			finish_wait(sk->sk_sleep, &wait);
+			finish_wait(sk_sleep(sk), &wait);
 
 			if (err)
 				goto out;
@@ -1787,7 +1787,7 @@ static unsigned int irda_poll(struct file * file, struct socket *sock,
 	IRDA_DEBUG(4, "%s()\n", __func__);
 
 	lock_kernel();
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* Exceptional events? */
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c18286a2167b..9636b7d27b48 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -59,7 +59,7 @@ do {									\
 	DEFINE_WAIT(__wait);						\
 	long __timeo = timeo;						\
 	ret = 0;							\
-	prepare_to_wait(sk->sk_sleep, &__wait, TASK_INTERRUPTIBLE);	\
+	prepare_to_wait(sk_sleep(sk), &__wait, TASK_INTERRUPTIBLE);	\
 	while (!(condition)) {						\
 		if (!__timeo) {						\
 			ret = -EAGAIN;					\
@@ -76,7 +76,7 @@ do {									\
 		if (ret)						\
 			break;						\
 	}								\
-	finish_wait(sk->sk_sleep, &__wait);				\
+	finish_wait(sk_sleep(sk), &__wait);				\
 } while (0)
 
 #define iucv_sock_wait(sk, condition, timeo)				\
@@ -307,7 +307,7 @@ static void iucv_sock_wake_msglim(struct sock *sk)
 {
 	read_lock(&sk->sk_callback_lock);
 	if (sk_has_sleeper(sk))
-		wake_up_interruptible_all(sk->sk_sleep);
+		wake_up_interruptible_all(sk_sleep(sk));
 	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	read_unlock(&sk->sk_callback_lock);
 }
@@ -795,7 +795,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
 	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
 
 	/* Wait for an incoming connection */
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	while (!(nsk = iucv_accept_dequeue(sk, newsock))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (!timeo) {
@@ -819,7 +819,7 @@ static int iucv_sock_accept(struct socket *sock, struct socket *newsock,
 	}
 
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	if (err)
 		goto done;
@@ -1269,7 +1269,7 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	unsigned int mask = 0;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 
 	if (sk->sk_state == IUCV_LISTEN)
 		return iucv_accept_poll(sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 2db6a9f75913..023ba820236f 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -536,7 +536,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
 	int rc = 0;
 
 	while (1) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE))
 			break;
 		rc = -ERESTARTSYS;
@@ -547,7 +547,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
 			break;
 		rc = 0;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 }
 
@@ -556,13 +556,13 @@ static int llc_ui_wait_for_conn(struct sock *sk, long timeout)
 	DEFINE_WAIT(wait);
 
 	while (1) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT))
 			break;
 		if (signal_pending(current) || !timeout)
 			break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return timeout;
 }
 
@@ -573,7 +573,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
 	int rc;
 
 	while (1) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		rc = 0;
 		if (sk_wait_event(sk, &timeout,
 				  (sk->sk_shutdown & RCV_SHUTDOWN) ||
@@ -588,7 +588,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
 		if (!timeout)
 			break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return rc;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 8fb0ae616761..7ba06939829f 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -802,7 +802,7 @@ static int sync_thread_backup(void *data)
 		ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
 
 	while (!kthread_should_stop()) {
-		wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
+		wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
 			 !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
 			 || kthread_should_stop());
 
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index fa07f044b599..06cb02796a0e 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -739,7 +739,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 		DEFINE_WAIT(wait);
 
 		for (;;) {
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
@@ -752,7 +752,7 @@ static int nr_connect(struct socket *sock, struct sockaddr *uaddr,
 			err = -ERESTARTSYS;
 			break;
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 		if (err)
 			goto out_release;
 	}
@@ -798,7 +798,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 	 *	hooked into the SABM we saved
 	 */
 	for (;;) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
 			break;
@@ -816,7 +816,7 @@ static int nr_accept(struct socket *sock, struct socket *newsock, int flags)
 		err = -ERESTARTSYS;
 		break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (err)
 		goto out_release;
 
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 7919a9edb8e9..aebfecbdb841 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -158,7 +158,7 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,
 	unsigned int mask = 0;
 	unsigned long flags;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	if (rs->rs_seen_congestion)
 		poll_wait(file, &rds_poll_waitq, wait);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 4bec6e2ed495..c224b5bb3ba9 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -492,7 +492,7 @@ void rds_sock_put(struct rds_sock *rs);
 void rds_wake_sk_sleep(struct rds_sock *rs);
 static inline void __rds_wake_sk_sleep(struct sock *sk)
 {
-	wait_queue_head_t *waitq = sk->sk_sleep;
+	wait_queue_head_t *waitq = sk_sleep(sk);
 
 	if (!sock_flag(sk, SOCK_DEAD) && waitq)
 		wake_up(waitq);
diff --git a/net/rds/recv.c b/net/rds/recv.c
index e2a2b9344f7b..795a00b7f2cb 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -432,7 +432,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 				break;
 			}
 
-			timeo = wait_event_interruptible_timeout(*sk->sk_sleep,
+			timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
 					(!list_empty(&rs->rs_notify_queue) ||
 					 rs->rs_cong_notify ||
 					 rds_next_incoming(rs, &inc)), timeo);
diff --git a/net/rds/send.c b/net/rds/send.c
index 53d6795ac9d0..9c1c6bcaa6c9 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -915,7 +915,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 			goto out;
 		}
 
-		timeo = wait_event_interruptible_timeout(*sk->sk_sleep,
+		timeo = wait_event_interruptible_timeout(*sk_sleep(sk),
 					rds_send_queue_rm(rs, conn, rm,
 							  rs->rs_bound_port,
 							  dport,
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 4fb711a035f4..8e45e76a95f5 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -845,7 +845,7 @@ rose_try_next_neigh:
 		DEFINE_WAIT(wait);
 
 		for (;;) {
-			prepare_to_wait(sk->sk_sleep, &wait,
+			prepare_to_wait(sk_sleep(sk), &wait,
 					TASK_INTERRUPTIBLE);
 			if (sk->sk_state != TCP_SYN_SENT)
 				break;
@@ -858,7 +858,7 @@ rose_try_next_neigh:
 			err = -ERESTARTSYS;
 			break;
 		}
-		finish_wait(sk->sk_sleep, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 
 		if (err)
 			goto out_release;
@@ -911,7 +911,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 	 *	hooked into the SABM we saved
 	 */
 	for (;;) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (skb)
@@ -930,7 +930,7 @@ static int rose_accept(struct socket *sock, struct socket *newsock, int flags)
 		err = -ERESTARTSYS;
 		break;
 	}
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	if (err)
 		goto out_release;
 
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c060095b27ce..c432d76f415e 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk)
 	read_lock(&sk->sk_callback_lock);
 	if (rxrpc_writable(sk)) {
 		if (sk_has_sleeper(sk))
-			wake_up_interruptible(sk->sk_sleep);
+			wake_up_interruptible(sk_sleep(sk));
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 	read_unlock(&sk->sk_callback_lock);
@@ -589,7 +589,7 @@ static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
 	unsigned int mask;
 	struct sock *sk = sock->sk;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* the socket is readable if there are any messages waiting on the Rx
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c1941276f6e3..f34adcca8a8c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5702,7 +5702,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
 	struct sctp_sock *sp = sctp_sk(sk);
 	unsigned int mask;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	/* A TCP-style listening socket becomes readable when the accept queue
 	 * is not empty.
@@ -5943,7 +5943,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
 	int error;
 	DEFINE_WAIT(wait);
 
-	prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+	prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 	/* Socket errors? */
 	error = sock_error(sk);
@@ -5980,14 +5980,14 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p)
 	sctp_lock_sock(sk);
 
 ready:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	return 0;
 
 interrupted:
 	error = sock_intr_errno(*timeo_p);
 
 out:
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	*err = error;
 	return error;
 }
@@ -6061,8 +6061,8 @@ static void __sctp_write_space(struct sctp_association *asoc)
 			wake_up_interruptible(&asoc->wait);
 
 		if (sctp_writeable(sk)) {
-			if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-				wake_up_interruptible(sk->sk_sleep);
+			if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+				wake_up_interruptible(sk_sleep(sk));
 
 			/* Note that we try to include the Async I/O support
 			 * here by modeling from the current TCP/UDP code.
@@ -6296,7 +6296,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo)
 
 
 	for (;;) {
-		prepare_to_wait_exclusive(sk->sk_sleep, &wait,
+		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
 					  TASK_INTERRUPTIBLE);
 
 		if (list_empty(&ep->asocs)) {
@@ -6322,7 +6322,7 @@ static int sctp_wait_for_accept(struct sock *sk, long timeo)
 			break;
 	}
 
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 
 	return err;
 }
@@ -6332,7 +6332,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout)
 	DEFINE_WAIT(wait);
 
 	do {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 		if (list_empty(&sctp_sk(sk)->ep->asocs))
 			break;
 		sctp_release_sock(sk);
@@ -6340,7 +6340,7 @@ static void sctp_wait_for_close(struct sock *sk, long timeout)
 		sctp_lock_sock(sk);
 	} while (!signal_pending(current) && timeout);
 
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 }
 
 static void sctp_skb_set_owner_r_frag(struct sk_buff *skb, struct sock *sk)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a29f259204e6..ce0d5b35c2ac 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -419,8 +419,8 @@ static void svc_udp_data_ready(struct sock *sk, int count)
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible(sk_sleep(sk));
 }
 
 /*
@@ -436,10 +436,10 @@ static void svc_write_space(struct sock *sk)
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
 
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) {
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) {
 		dprintk("RPC svc_write_space: someone sleeping on %p\n",
 		       svsk);
-		wake_up_interruptible(sk->sk_sleep);
+		wake_up_interruptible(sk_sleep(sk));
 	}
 }
 
@@ -757,8 +757,8 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
 			printk("svc: socket %p: no user data\n", sk);
 	}
 
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_all(sk->sk_sleep);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible_all(sk_sleep(sk));
 }
 
 /*
@@ -777,8 +777,8 @@ static void svc_tcp_state_change(struct sock *sk)
 		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_all(sk->sk_sleep);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible_all(sk_sleep(sk));
 }
 
 static void svc_tcp_data_ready(struct sock *sk, int count)
@@ -791,8 +791,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count)
 		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
 		svc_xprt_enqueue(&svsk->sk_xprt);
 	}
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible(sk_sleep(sk));
 }
 
 /*
@@ -1494,8 +1494,8 @@ static void svc_sock_detach(struct svc_xprt *xprt)
 	sk->sk_data_ready = svsk->sk_odata;
 	sk->sk_write_space = svsk->sk_owspace;
 
-	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
+	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible(sk_sleep(sk));
 }
 
 /*
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index cfb20b80b3a1..66e889ba48fd 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -446,7 +446,7 @@ static unsigned int poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk;
 	u32 mask;
 
-	poll_wait(file, sk->sk_sleep, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	if (!skb_queue_empty(&sk->sk_receive_queue) ||
 	    (sock->state == SS_UNCONNECTED) ||
@@ -591,7 +591,7 @@ static int send_msg(struct kiocb *iocb, struct socket *sock,
 			break;
 		}
 		release_sock(sk);
-		res = wait_event_interruptible(*sk->sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(sk),
 					       !tport->congested);
 		lock_sock(sk);
 		if (res)
@@ -650,7 +650,7 @@ static int send_packet(struct kiocb *iocb, struct socket *sock,
 			break;
 		}
 		release_sock(sk);
-		res = wait_event_interruptible(*sk->sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(sk),
 			(!tport->congested || !tport->connected));
 		lock_sock(sk);
 		if (res)
@@ -931,7 +931,7 @@ restart:
 			goto exit;
 		}
 		release_sock(sk);
-		res = wait_event_interruptible(*sk->sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(sk),
 			(!skb_queue_empty(&sk->sk_receive_queue) ||
 			 (sock->state == SS_DISCONNECTING)));
 		lock_sock(sk);
@@ -1064,7 +1064,7 @@ restart:
 			goto exit;
 		}
 		release_sock(sk);
-		res = wait_event_interruptible(*sk->sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(sk),
 			(!skb_queue_empty(&sk->sk_receive_queue) ||
 			 (sock->state == SS_DISCONNECTING)));
 		lock_sock(sk);
@@ -1271,8 +1271,8 @@ static u32 filter_rcv(struct sock *sk, struct sk_buff *buf)
 		tipc_disconnect_port(tipc_sk_port(sk));
 	}
 
-	if (waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
+	if (waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible(sk_sleep(sk));
 	return TIPC_OK;
 }
 
@@ -1343,8 +1343,8 @@ static void wakeupdispatch(struct tipc_port *tport)
 {
 	struct sock *sk = (struct sock *)tport->usr_handle;
 
-	if (waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible(sk->sk_sleep);
+	if (waitqueue_active(sk_sleep(sk)))
+		wake_up_interruptible(sk_sleep(sk));
 }
 
 /**
@@ -1426,7 +1426,7 @@ static int connect(struct socket *sock, struct sockaddr *dest, int destlen,
 	/* Wait until an 'ACK' or 'RST' arrives, or a timeout occurs */
 
 	release_sock(sk);
-	res = wait_event_interruptible_timeout(*sk->sk_sleep,
+	res = wait_event_interruptible_timeout(*sk_sleep(sk),
 			(!skb_queue_empty(&sk->sk_receive_queue) ||
 			(sock->state != SS_CONNECTING)),
 			sk->sk_rcvtimeo);
@@ -1521,7 +1521,7 @@ static int accept(struct socket *sock, struct socket *new_sock, int flags)
 			goto exit;
 		}
 		release_sock(sk);
-		res = wait_event_interruptible(*sk->sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(sk),
 				(!skb_queue_empty(&sk->sk_receive_queue)));
 		lock_sock(sk);
 		if (res)
@@ -1632,8 +1632,8 @@ restart:
 		/* Discard any unreceived messages; wake up sleeping tasks */
 
 		discard_rx_queue(sk);
-		if (waitqueue_active(sk->sk_sleep))
-			wake_up_interruptible(sk->sk_sleep);
+		if (waitqueue_active(sk_sleep(sk)))
+			wake_up_interruptible(sk_sleep(sk));
 		res = 0;
 		break;
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 3d9122e78f41..87c0360eaa25 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -316,7 +316,7 @@ static void unix_write_space(struct sock *sk)
 	read_lock(&sk->sk_callback_lock);
 	if (unix_writable(sk)) {
 		if (sk_has_sleeper(sk))
-			wake_up_interruptible_sync(sk->sk_sleep);
+			wake_up_interruptible_sync(sk_sleep(sk));
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 	read_unlock(&sk->sk_callback_lock);
@@ -1736,7 +1736,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
 	unix_state_lock(sk);
 
 	for (;;) {
-		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
 		if (!skb_queue_empty(&sk->sk_receive_queue) ||
 		    sk->sk_err ||
@@ -1752,7 +1752,7 @@ static long unix_stream_data_wait(struct sock *sk, long timeo)
 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	}
 
-	finish_wait(sk->sk_sleep, &wait);
+	finish_wait(sk_sleep(sk), &wait);
 	unix_state_unlock(sk);
 	return timeo;
 }
@@ -1991,7 +1991,7 @@ static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table
 	struct sock *sk = sock->sk;
 	unsigned int mask;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* exceptional events? */
@@ -2028,7 +2028,7 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 	struct sock *sk = sock->sk, *other;
 	unsigned int mask, writable;
 
-	sock_poll_wait(file, sk->sk_sleep, wait);
+	sock_poll_wait(file, sk_sleep(sk), wait);
 	mask = 0;
 
 	/* exceptional events? */
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index cbddd0cb83f1..6cffbc4da029 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -718,7 +718,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk)
 	DECLARE_WAITQUEUE(wait, current);
 	int rc;
 
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	for (;;) {
 		__set_current_state(TASK_INTERRUPTIBLE);
 		rc = -ERESTARTSYS;
@@ -738,7 +738,7 @@ static int x25_wait_for_connection_establishment(struct sock *sk)
 			break;
 	}
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 	return rc;
 }
 
@@ -838,7 +838,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
 	DECLARE_WAITQUEUE(wait, current);
 	int rc = 0;
 
-	add_wait_queue_exclusive(sk->sk_sleep, &wait);
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
 	for (;;) {
 		__set_current_state(TASK_INTERRUPTIBLE);
 		if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -858,7 +858,7 @@ static int x25_wait_for_data(struct sock *sk, long timeout)
 			break;
 	}
 	__set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 	return rc;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 0eae88f31ca2b88911ce843452054139e028771f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Apr 2010 19:06:52 -0700
Subject: net: Fix various endianness glitches

Sparse can help us find endianness bugs, but we need to make some
cleanups to be able to more easily spot real bugs.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c |  2 +-
 net/bridge/br_private.h   | 15 ++++++++-------
 net/ethernet/eth.c        |  2 +-
 net/ipv4/af_inet.c        |  8 ++++----
 net/ipv4/ipmr.c           | 10 +++++-----
 net/ipv4/route.c          | 29 ++++++++++++++---------------
 net/ipv4/tcp.c            | 15 ++++++++-------
 net/ipv4/tcp_ipv4.c       |  4 ++--
 net/ipv4/tcp_output.c     |  4 ++--
 net/ipv4/udp.c            |  8 ++++----
 net/ipv6/addrconf.c       |  3 ++-
 net/ipv6/ip6_fib.c        |  3 ++-
 net/ipv6/tcp_ipv6.c       |  4 ++--
 net/ipv6/udp.c            |  4 ++--
 net/sched/sch_sfq.c       | 10 +++++-----
 net/sunrpc/xprt.c         |  2 +-
 net/xfrm/xfrm_hash.h      |  3 ++-
 17 files changed, 65 insertions(+), 61 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 3fe86ffc069c..61e1d1094b85 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -29,7 +29,7 @@
 
 static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
 {
-	return jhash_1word(mdb->secret, (u32)ip) & (mdb->max - 1);
+	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
 }
 
 static struct net_bridge_mdb_entry *__br_mdb_ip_get(
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 791d4ab0fd4d..63181e4a2a67 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -130,19 +130,20 @@ struct net_bridge_port
 #endif
 };
 
+struct br_cpu_netstats {
+	unsigned long	rx_packets;
+	unsigned long	rx_bytes;
+	unsigned long	tx_packets;
+	unsigned long	tx_bytes;
+};
+
 struct net_bridge
 {
 	spinlock_t			lock;
 	struct list_head		port_list;
 	struct net_device		*dev;
 
-	struct br_cpu_netstats __percpu {
-		unsigned long	rx_packets;
-		unsigned long	rx_bytes;
-		unsigned long	tx_packets;
-		unsigned long	tx_bytes;
-	} *stats;
-
+	struct br_cpu_netstats __percpu *stats;
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
 	unsigned long			feature_mask;
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 205a1c12f3c0..35846964082c 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -136,7 +136,7 @@ int eth_rebuild_header(struct sk_buff *skb)
 	default:
 		printk(KERN_DEBUG
 		       "%s: unable to resolve type %X addresses.\n",
-		       dev->name, (int)eth->h_proto);
+		       dev->name, (__force int)eth->h_proto);
 
 		memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
 		break;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 5ca7290c2e61..9f52880fae10 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1323,8 +1323,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 	if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
 		goto out_unlock;
 
-	id = ntohl(*(u32 *)&iph->id);
-	flush = (u16)((ntohl(*(u32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
+	id = ntohl(*(__be32 *)&iph->id);
+	flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF));
 	id >>= 16;
 
 	for (p = *head; p; p = p->next) {
@@ -1337,8 +1337,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head,
 
 		if ((iph->protocol ^ iph2->protocol) |
 		    (iph->tos ^ iph2->tos) |
-		    (iph->saddr ^ iph2->saddr) |
-		    (iph->daddr ^ iph2->daddr)) {
+		    ((__force u32)iph->saddr ^ (__force u32)iph2->saddr) |
+		    ((__force u32)iph->daddr ^ (__force u32)iph2->daddr)) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7d8a2bcecb76..a2df5012a1d0 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1772,10 +1772,10 @@ int ip_mr_input(struct sk_buff *skb)
 
 		vif = ipmr_find_vif(mrt, skb->dev);
 		if (vif >= 0) {
-			int err = ipmr_cache_unresolved(mrt, vif, skb);
+			int err2 = ipmr_cache_unresolved(mrt, vif, skb);
 			read_unlock(&mrt_lock);
 
-			return err;
+			return err2;
 		}
 		read_unlock(&mrt_lock);
 		kfree_skb(skb);
@@ -2227,9 +2227,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 		const struct ipmr_mfc_iter *it = seq->private;
 		const struct mr_table *mrt = it->mrt;
 
-		seq_printf(seq, "%08lX %08lX %-3hd",
-			   (unsigned long) mfc->mfc_mcastgrp,
-			   (unsigned long) mfc->mfc_origin,
+		seq_printf(seq, "%08X %08X %-3hd",
+			   (__force u32) mfc->mfc_mcastgrp,
+			   (__force u32) mfc->mfc_origin,
 			   mfc->mfc_parent);
 
 		if (it->cache != &mrt->mfc_unres_queue) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index cb562fdd9b9a..a947428ef0ae 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -258,10 +258,9 @@ static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
 	(__raw_get_cpu_var(rt_cache_stat).field++)
 
 static inline unsigned int rt_hash(__be32 daddr, __be32 saddr, int idx,
-		int genid)
+				   int genid)
 {
-	return jhash_3words((__force u32)(__be32)(daddr),
-			    (__force u32)(__be32)(saddr),
+	return jhash_3words((__force u32)daddr, (__force u32)saddr,
 			    idx, genid)
 		& rt_hash_mask;
 }
@@ -378,12 +377,13 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
 		struct rtable *r = v;
 		int len;
 
-		seq_printf(seq, "%s\t%08lX\t%08lX\t%8X\t%d\t%u\t%d\t"
-			      "%08lX\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
+		seq_printf(seq, "%s\t%08X\t%08X\t%8X\t%d\t%u\t%d\t"
+			      "%08X\t%d\t%u\t%u\t%02X\t%d\t%1d\t%08X%n",
 			r->u.dst.dev ? r->u.dst.dev->name : "*",
-			(unsigned long)r->rt_dst, (unsigned long)r->rt_gateway,
+			(__force u32)r->rt_dst,
+			(__force u32)r->rt_gateway,
 			r->rt_flags, atomic_read(&r->u.dst.__refcnt),
-			r->u.dst.__use, 0, (unsigned long)r->rt_src,
+			r->u.dst.__use, 0, (__force u32)r->rt_src,
 			(dst_metric(&r->u.dst, RTAX_ADVMSS) ?
 			     (int)dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
 			dst_metric(&r->u.dst, RTAX_WINDOW),
@@ -685,18 +685,17 @@ static inline bool rt_caching(const struct net *net)
 static inline bool compare_hash_inputs(const struct flowi *fl1,
 					const struct flowi *fl2)
 {
-	return (__force u32)(((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
-		(fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr) |
+	return ((((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
+		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
 		(fl1->iif ^ fl2->iif)) == 0);
 }
 
 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
 {
-	return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
-		(fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) |
+	return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
+		((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
 		(fl1->mark ^ fl2->mark) |
-		(*(u16 *)&fl1->nl_u.ip4_u.tos ^
-		 *(u16 *)&fl2->nl_u.ip4_u.tos) |
+		(*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
 		(fl1->oif ^ fl2->oif) |
 		(fl1->iif ^ fl2->iif)) == 0;
 }
@@ -2319,8 +2318,8 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	rcu_read_lock();
 	for (rth = rcu_dereference(rt_hash_table[hash].chain); rth;
 	     rth = rcu_dereference(rth->u.dst.rt_next)) {
-		if (((rth->fl.fl4_dst ^ daddr) |
-		     (rth->fl.fl4_src ^ saddr) |
+		if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
+		     ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
 		     (rth->fl.iif ^ iif) |
 		     rth->fl.oif |
 		     (rth->fl.fl4_tos ^ tos)) == 0 &&
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 77208334a613..6689c61cab47 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2721,7 +2721,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	struct tcphdr *th2;
 	unsigned int len;
 	unsigned int thlen;
-	unsigned int flags;
+	__be32 flags;
 	unsigned int mss = 1;
 	unsigned int hlen;
 	unsigned int off;
@@ -2771,10 +2771,10 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 
 found:
 	flush = NAPI_GRO_CB(p)->flush;
-	flush |= flags & TCP_FLAG_CWR;
-	flush |= (flags ^ tcp_flag_word(th2)) &
-		  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH);
-	flush |= th->ack_seq ^ th2->ack_seq;
+	flush |= (__force int)(flags & TCP_FLAG_CWR);
+	flush |= (__force int)((flags ^ tcp_flag_word(th2)) &
+		  ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH));
+	flush |= (__force int)(th->ack_seq ^ th2->ack_seq);
 	for (i = sizeof(*th); i < thlen; i += 4)
 		flush |= *(u32 *)((u8 *)th + i) ^
 			 *(u32 *)((u8 *)th2 + i);
@@ -2795,8 +2795,9 @@ found:
 
 out_check_final:
 	flush = len < mss;
-	flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
-			  TCP_FLAG_SYN | TCP_FLAG_FIN);
+	flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH |
+					TCP_FLAG_RST | TCP_FLAG_SYN |
+					TCP_FLAG_FIN));
 
 	if (p && (!NAPI_GRO_CB(skb)->same_flow || flush))
 		pp = head;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ad08392a738c..4d6717d1e61c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1286,8 +1286,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 			goto drop_and_release;
 
 		/* Secret recipe starts with IP addresses */
-		*mess++ ^= daddr;
-		*mess++ ^= saddr;
+		*mess++ ^= (__force u32)daddr;
+		*mess++ ^= (__force u32)saddr;
 
 		/* plus variable length Initiator Cookie */
 		c = (u8 *)mess;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2b7d71fb8439..429ad9286efc 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -861,7 +861,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 			th->urg_ptr = htons(tp->snd_up - tcb->seq);
 			th->urg = 1;
 		} else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
-			th->urg_ptr = 0xFFFF;
+			th->urg_ptr = htons(0xFFFF);
 			th->urg = 1;
 		}
 	}
@@ -2485,7 +2485,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 			*tail-- ^= TCP_SKB_CB(skb)->seq + 1;
 
 			/* recommended */
-			*tail-- ^= ((th->dest << 16) | th->source);
+			*tail-- ^= (((__force u32)th->dest << 16) | (__force u32)th->source);
 			*tail-- ^= (u32)(unsigned long)cvp; /* per sockopt */
 
 			sha_transform((__u32 *)&xvp->cookie_bakery[0],
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 666b963496ff..1e18f9cc9247 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -307,13 +307,13 @@ static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
 static unsigned int udp4_portaddr_hash(struct net *net, __be32 saddr,
 				       unsigned int port)
 {
-	return jhash_1word(saddr, net_hash_mix(net)) ^ port;
+	return jhash_1word((__force u32)saddr, net_hash_mix(net)) ^ port;
 }
 
 int udp_v4_get_port(struct sock *sk, unsigned short snum)
 {
 	unsigned int hash2_nulladdr =
-		udp4_portaddr_hash(sock_net(sk), INADDR_ANY, snum);
+		udp4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum);
 	unsigned int hash2_partial =
 		udp4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, 0);
 
@@ -466,14 +466,14 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
 					  daddr, hnum, dif,
 					  hslot2, slot2);
 		if (!result) {
-			hash2 = udp4_portaddr_hash(net, INADDR_ANY, hnum);
+			hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
 			slot2 = hash2 & udptable->mask;
 			hslot2 = &udptable->hash2[slot2];
 			if (hslot->count < hslot2->count)
 				goto begin;
 
 			result = udp4_lib_lookup2(net, saddr, sport,
-						  INADDR_ANY, hnum, dif,
+						  htonl(INADDR_ANY), hnum, dif,
 						  hslot2, slot2);
 		}
 		rcu_read_unlock();
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7cba8845242f..34d2d649e396 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -588,7 +588,8 @@ static u32 ipv6_addr_hash(const struct in6_addr *addr)
 	 * We perform the hash function over the last 64 bits of the address
 	 * This will include the IEEE address token on links that support it.
 	 */
-	return jhash_2words(addr->s6_addr32[2],  addr->s6_addr32[3], 0)
+	return jhash_2words((__force u32)addr->s6_addr32[2],
+			    (__force u32)addr->s6_addr32[3], 0)
 		& (IN6_ADDR_HSIZE - 1);
 }
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index dc6e0b8f260d..92a122b7795d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -144,7 +144,8 @@ static __inline__ __be32 addr_bit_set(void *token, int fn_bit)
 	 *	htonl(1 << ((~fn_bit)&0x1F))
 	 * See include/asm-generic/bitops/le.h.
 	 */
-	return (1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) & addr[fn_bit >> 5];
+	return (__force __be32)(1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)) &
+	       addr[fn_bit >> 5];
 }
 
 static __inline__ struct fib6_node * node_alloc(void)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index bd5ef7b6e48e..a92b4a5cd8bf 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1234,12 +1234,12 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 			goto drop_and_free;
 
 		/* Secret recipe starts with IP addresses */
-		d = &ipv6_hdr(skb)->daddr.s6_addr32[0];
+		d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
 		*mess++ ^= *d++;
 		*mess++ ^= *d++;
 		*mess++ ^= *d++;
 		*mess++ ^= *d++;
-		d = &ipv6_hdr(skb)->saddr.s6_addr32[0];
+		d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
 		*mess++ ^= *d++;
 		*mess++ ^= *d++;
 		*mess++ ^= *d++;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 90824852f598..92bf9033e245 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -91,9 +91,9 @@ static unsigned int udp6_portaddr_hash(struct net *net,
 	if (ipv6_addr_any(addr6))
 		hash = jhash_1word(0, mix);
 	else if (ipv6_addr_v4mapped(addr6))
-		hash = jhash_1word(addr6->s6_addr32[3], mix);
+		hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix);
 	else
-		hash = jhash2(addr6->s6_addr32, 4, mix);
+		hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix);
 
 	return hash ^ port;
 }
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index c5a9ac566007..c65762823f5e 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -123,8 +123,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	case htons(ETH_P_IP):
 	{
 		const struct iphdr *iph = ip_hdr(skb);
-		h = iph->daddr;
-		h2 = iph->saddr ^ iph->protocol;
+		h = (__force u32)iph->daddr;
+		h2 = (__force u32)iph->saddr ^ iph->protocol;
 		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
 		    (iph->protocol == IPPROTO_TCP ||
 		     iph->protocol == IPPROTO_UDP ||
@@ -138,8 +138,8 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	case htons(ETH_P_IPV6):
 	{
 		struct ipv6hdr *iph = ipv6_hdr(skb);
-		h = iph->daddr.s6_addr32[3];
-		h2 = iph->saddr.s6_addr32[3] ^ iph->nexthdr;
+		h = (__force u32)iph->daddr.s6_addr32[3];
+		h2 = (__force u32)iph->saddr.s6_addr32[3] ^ iph->nexthdr;
 		if (iph->nexthdr == IPPROTO_TCP ||
 		    iph->nexthdr == IPPROTO_UDP ||
 		    iph->nexthdr == IPPROTO_UDPLITE ||
@@ -150,7 +150,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 		break;
 	}
 	default:
-		h = (unsigned long)skb_dst(skb) ^ skb->protocol;
+		h = (unsigned long)skb_dst(skb) ^ (__force u32)skb->protocol;
 		h2 = (unsigned long)skb->sk;
 	}
 
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 42f09ade0044..699ade68aac1 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -974,7 +974,7 @@ void xprt_reserve(struct rpc_task *task)
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
 {
-	return xprt->xid++;
+	return (__force __be32)xprt->xid++;
 }
 
 static inline void xprt_init_xid(struct rpc_xprt *xprt)
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index e5195c99f71e..1396572d2ade 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -16,7 +16,8 @@ static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr)
 
 static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
 {
-	return ntohl(daddr->a4 + saddr->a4);
+	u32 sum = (__force u32)daddr->a4 + (__force u32)saddr->a4;
+	return ntohl((__force __be32)sum);
 }
 
 static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr)
-- 
cgit v1.2.3-59-g8ed1b


From ccb7c7732e2ceb4e81a7806faf1670be9681ccd2 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Tue, 20 Apr 2010 22:39:53 -0700
Subject: net: Remove two unnecessary exports (skbuff).

There is no need to export skb_under_panic() and skb_over_panic() in
skbuff.c, since these methods are used only in skbuff.c ; this patch
removes these two exports. It also marks these functions as 'static'
and removeS the extern declarations of them from
include/linux/skbuff.h

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ----
 net/core/skbuff.c      | 6 ++----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 38501d20650c..82f5116a89e4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -470,10 +470,6 @@ extern int	       skb_cow_data(struct sk_buff *skb, int tailbits,
 				    struct sk_buff **trailer);
 extern int	       skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	consume_skb(a)
-extern void	      skb_over_panic(struct sk_buff *skb, int len,
-				     void *here);
-extern void	      skb_under_panic(struct sk_buff *skb, int len,
-				      void *here);
 
 extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 			int getfrag(void *from, char *to, int offset,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bdea0efdf8cb..4218ff49bf13 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -117,7 +117,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
  *
  *	Out of line support code for skb_put(). Not user callable.
  */
-void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
 			  "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
-EXPORT_SYMBOL(skb_over_panic);
 
 /**
  *	skb_under_panic	- 	private function
@@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic);
  *	Out of line support code for skb_push(). Not user callable.
  */
 
-void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
 			  "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
-EXPORT_SYMBOL(skb_under_panic);
 
 /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  *	'private' fields and also do memory statistics to find all the
-- 
cgit v1.2.3-59-g8ed1b


From f71b70e115dd0bb34eee4d281a4fb6416e88cfff Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 21 Apr 2010 01:57:01 -0700
Subject: tcp: Fix ipv6 checksumming on response packets for real.

Commit 6651ffc8e8bdd5fb4b7d1867c6cfebb4f309512c
("ipv6: Fix tcp_v6_send_response transport header setting.")
fixed one half of why ipv6 tcp response checksums were
invalid, but it's not the whole story.

If we're going to use CHECKSUM_PARTIAL for these things (which we are
since commit 2e8e18ef52e7dd1af0a3bd1f7d990a1d0b249586 "tcp: Set
CHECKSUM_UNNECESSARY in tcp_init_nondata_skb"), we can't be setting
buff->csum as we always have been here in tcp_v6_send_response.  We
need to leave it at zero.

Kill that line and checksums are good again.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 78480f410a9b..5d2e4301e246 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1050,8 +1050,6 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	}
 #endif
 
-	buff->csum = csum_partial(t1, tot_len, 0);
-
 	memset(&fl, 0, sizeof(fl));
 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
-- 
cgit v1.2.3-59-g8ed1b


From d97a9e47ba148cfc41e354c5cd241f472273207c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 21 Apr 2010 14:45:51 +0200
Subject: netfilter: x_tables: move sleeping allocation outside BH-disabled
 region

The jumpstack allocation needs to be moved out of the critical region.
Corrects this notice:

BUG: sleeping function called from invalid context at mm/slub.c:1705
[  428.295762] in_atomic(): 1, irqs_disabled(): 0, pid: 9111, name: iptables
[  428.295771] Pid: 9111, comm: iptables Not tainted 2.6.34-rc1 #2
[  428.295776] Call Trace:
[  428.295791]  [<c012138e>] __might_sleep+0xe5/0xed
[  428.295801]  [<c019e8ca>] __kmalloc+0x92/0xfc
[  428.295825]  [<f865b3bb>] ? xt_jumpstack_alloc+0x36/0xff [x_tables]

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/x_tables.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 3ae32340d4df..445de702b8b7 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -801,6 +801,12 @@ xt_replace_table(struct xt_table *table,
 	struct xt_table_info *private;
 	int ret;
 
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0) {
+		*error = ret;
+		return NULL;
+	}
+
 	/* Do the substitution. */
 	local_bh_disable();
 	private = table->private;
@@ -814,12 +820,6 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 
-	ret = xt_jumpstack_alloc(newinfo);
-	if (ret < 0) {
-		*error = ret;
-		return NULL;
-	}
-
 	table->private = newinfo;
 	newinfo->initial_entries = private->initial_entries;
 
-- 
cgit v1.2.3-59-g8ed1b


From e5700aff144fbbba46be40049f0c55fb57283777 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 21 Apr 2010 14:59:20 -0700
Subject: tcp: Mark v6 response packets as CHECKSUM_PARTIAL

Otherwise we only get the checksum right for data-less TCP responses.

Noticed by Herbert Xu.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tcp_ipv6.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5d2e4301e246..1ababbb41131 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1054,6 +1054,9 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 	ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr);
 
+	buff->ip_summed = CHECKSUM_PARTIAL;
+	buff->csum = 0;
+
 	__tcp_v6_send_check(buff, &fl.fl6_src, &fl.fl6_dst);
 
 	fl.proto = IPPROTO_TCP;
-- 
cgit v1.2.3-59-g8ed1b


From 989a2979205dd34269382b357e6d4b4b6956b889 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 14 Apr 2010 09:55:35 +0000
Subject: fasync: RCU and fine grained locking

kill_fasync() uses a central rwlock, candidate for RCU conversion, to
avoid cache line ping pongs on SMP.

fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short
section instead during whole list scan.

Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and
fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync()
doesnt need its own implementation and can use fasync_helper(), to
reduce code size and complexity.

We can remove __kill_fasync() direct use in net/socket.c, and rename it
to kill_fasync_rcu().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/fcntl.c         | 66 ++++++++++++++++++++++++++++++------------------
 include/linux/fs.h | 12 ++++-----
 net/socket.c       | 73 ++++++++----------------------------------------------
 3 files changed, 59 insertions(+), 92 deletions(-)

(limited to 'net')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 452d02f9075e..0a140741b39e 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -614,9 +614,15 @@ int send_sigurg(struct fown_struct *fown)
 	return ret;
 }
 
-static DEFINE_RWLOCK(fasync_lock);
+static DEFINE_SPINLOCK(fasync_lock);
 static struct kmem_cache *fasync_cache __read_mostly;
 
+static void fasync_free_rcu(struct rcu_head *head)
+{
+	kmem_cache_free(fasync_cache,
+			container_of(head, struct fasync_struct, fa_rcu));
+}
+
 /*
  * Remove a fasync entry. If successfully removed, return
  * positive and clear the FASYNC flag. If no entry exists,
@@ -625,8 +631,6 @@ static struct kmem_cache *fasync_cache __read_mostly;
  * NOTE! It is very important that the FASYNC flag always
  * match the state "is the filp on a fasync list".
  *
- * We always take the 'filp->f_lock', in since fasync_lock
- * needs to be irq-safe.
  */
 static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 {
@@ -634,17 +638,22 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 	int result = 0;
 
 	spin_lock(&filp->f_lock);
-	write_lock_irq(&fasync_lock);
+	spin_lock(&fasync_lock);
 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 		if (fa->fa_file != filp)
 			continue;
+
+		spin_lock_irq(&fa->fa_lock);
+		fa->fa_file = NULL;
+		spin_unlock_irq(&fa->fa_lock);
+
 		*fp = fa->fa_next;
-		kmem_cache_free(fasync_cache, fa);
+		call_rcu(&fa->fa_rcu, fasync_free_rcu);
 		filp->f_flags &= ~FASYNC;
 		result = 1;
 		break;
 	}
-	write_unlock_irq(&fasync_lock);
+	spin_unlock(&fasync_lock);
 	spin_unlock(&filp->f_lock);
 	return result;
 }
@@ -666,25 +675,30 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
 		return -ENOMEM;
 
 	spin_lock(&filp->f_lock);
-	write_lock_irq(&fasync_lock);
+	spin_lock(&fasync_lock);
 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 		if (fa->fa_file != filp)
 			continue;
+
+		spin_lock_irq(&fa->fa_lock);
 		fa->fa_fd = fd;
+		spin_unlock_irq(&fa->fa_lock);
+
 		kmem_cache_free(fasync_cache, new);
 		goto out;
 	}
 
+	spin_lock_init(&new->fa_lock);
 	new->magic = FASYNC_MAGIC;
 	new->fa_file = filp;
 	new->fa_fd = fd;
 	new->fa_next = *fapp;
-	*fapp = new;
+	rcu_assign_pointer(*fapp, new);
 	result = 1;
 	filp->f_flags |= FASYNC;
 
 out:
-	write_unlock_irq(&fasync_lock);
+	spin_unlock(&fasync_lock);
 	spin_unlock(&filp->f_lock);
 	return result;
 }
@@ -704,37 +718,41 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
 
 EXPORT_SYMBOL(fasync_helper);
 
-void __kill_fasync(struct fasync_struct *fa, int sig, int band)
+/*
+ * rcu_read_lock() is held
+ */
+static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
 {
 	while (fa) {
-		struct fown_struct * fown;
+		struct fown_struct *fown;
 		if (fa->magic != FASYNC_MAGIC) {
 			printk(KERN_ERR "kill_fasync: bad magic number in "
 			       "fasync_struct!\n");
 			return;
 		}
-		fown = &fa->fa_file->f_owner;
-		/* Don't send SIGURG to processes which have not set a
-		   queued signum: SIGURG has its own default signalling
-		   mechanism. */
-		if (!(sig == SIGURG && fown->signum == 0))
-			send_sigio(fown, fa->fa_fd, band);
-		fa = fa->fa_next;
+		spin_lock(&fa->fa_lock);
+		if (fa->fa_file) {
+			fown = &fa->fa_file->f_owner;
+			/* Don't send SIGURG to processes which have not set a
+			   queued signum: SIGURG has its own default signalling
+			   mechanism. */
+			if (!(sig == SIGURG && fown->signum == 0))
+				send_sigio(fown, fa->fa_fd, band);
+		}
+		spin_unlock(&fa->fa_lock);
+		fa = rcu_dereference(fa->fa_next);
 	}
 }
 
-EXPORT_SYMBOL(__kill_fasync);
-
 void kill_fasync(struct fasync_struct **fp, int sig, int band)
 {
 	/* First a quick test without locking: usually
 	 * the list is empty.
 	 */
 	if (*fp) {
-		read_lock(&fasync_lock);
-		/* reread *fp after obtaining the lock */
-		__kill_fasync(*fp, sig, band);
-		read_unlock(&fasync_lock);
+		rcu_read_lock();
+		kill_fasync_rcu(rcu_dereference(*fp), sig, band);
+		rcu_read_unlock();
 	}
 }
 EXPORT_SYMBOL(kill_fasync);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 39d57bc6cc71..018d382f6f92 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1280,10 +1280,12 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
 
 
 struct fasync_struct {
-	int	magic;
-	int	fa_fd;
-	struct	fasync_struct	*fa_next; /* singly linked list */
-	struct	file 		*fa_file;
+	spinlock_t		fa_lock;
+	int			magic;
+	int			fa_fd;
+	struct fasync_struct	*fa_next; /* singly linked list */
+	struct file		*fa_file;
+	struct rcu_head		fa_rcu;
 };
 
 #define FASYNC_MAGIC 0x4601
@@ -1292,8 +1294,6 @@ struct fasync_struct {
 extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
 /* can be called from interrupts */
 extern void kill_fasync(struct fasync_struct **, int, int);
-/* only for net: no internal synchronization */
-extern void __kill_fasync(struct fasync_struct *, int, int);
 
 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
 extern int f_setown(struct file *filp, unsigned long arg, int force);
diff --git a/net/socket.c b/net/socket.c
index 35bc198bbf68..9822081eab38 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1067,78 +1067,27 @@ static int sock_close(struct inode *inode, struct file *filp)
  *	1. fasync_list is modified only under process context socket lock
  *	   i.e. under semaphore.
  *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
- *	   or under socket lock.
- *	3. fasync_list can be used from softirq context, so that
- *	   modification under socket lock have to be enhanced with
- *	   write_lock_bh(&sk->sk_callback_lock).
- *							--ANK (990710)
+ *	   or under socket lock
  */
 
 static int sock_fasync(int fd, struct file *filp, int on)
 {
-	struct fasync_struct *fa, *fna = NULL, **prev;
-	struct socket *sock;
-	struct sock *sk;
-
-	if (on) {
-		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
-		if (fna == NULL)
-			return -ENOMEM;
-	}
-
-	sock = filp->private_data;
+	struct socket *sock = filp->private_data;
+	struct sock *sk = sock->sk;
 
-	sk = sock->sk;
-	if (sk == NULL) {
-		kfree(fna);
+	if (sk == NULL)
 		return -EINVAL;
-	}
 
 	lock_sock(sk);
 
-	spin_lock(&filp->f_lock);
-	if (on)
-		filp->f_flags |= FASYNC;
-	else
-		filp->f_flags &= ~FASYNC;
-	spin_unlock(&filp->f_lock);
-
-	prev = &(sock->fasync_list);
+	fasync_helper(fd, filp, on, &sock->fasync_list);
 
-	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
-		if (fa->fa_file == filp)
-			break;
-
-	if (on) {
-		if (fa != NULL) {
-			write_lock_bh(&sk->sk_callback_lock);
-			fa->fa_fd = fd;
-			write_unlock_bh(&sk->sk_callback_lock);
-
-			kfree(fna);
-			goto out;
-		}
-		fna->fa_file = filp;
-		fna->fa_fd = fd;
-		fna->magic = FASYNC_MAGIC;
-		fna->fa_next = sock->fasync_list;
-		write_lock_bh(&sk->sk_callback_lock);
-		sock->fasync_list = fna;
+	if (!sock->fasync_list)
+		sock_reset_flag(sk, SOCK_FASYNC);
+	else
 		sock_set_flag(sk, SOCK_FASYNC);
-		write_unlock_bh(&sk->sk_callback_lock);
-	} else {
-		if (fa != NULL) {
-			write_lock_bh(&sk->sk_callback_lock);
-			*prev = fa->fa_next;
-			if (!sock->fasync_list)
-				sock_reset_flag(sk, SOCK_FASYNC);
-			write_unlock_bh(&sk->sk_callback_lock);
-			kfree(fa);
-		}
-	}
 
-out:
-	release_sock(sock->sk);
+	release_sock(sk);
 	return 0;
 }
 
@@ -1159,10 +1108,10 @@ int sock_wake_async(struct socket *sock, int how, int band)
 		/* fall through */
 	case SOCK_WAKE_IO:
 call_kill:
-		__kill_fasync(sock->fasync_list, SIGIO, band);
+		kill_fasync(&sock->fasync_list, SIGIO, band);
 		break;
 	case SOCK_WAKE_URG:
-		__kill_fasync(sock->fasync_list, SIGURG, band);
+		kill_fasync(&sock->fasync_list, SIGURG, band);
 	}
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 9a20e3197e7f6097897c6d1f18335a326ee06299 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Apr 2010 20:08:36 +0000
Subject: net: Introduce skb_orphan_try()

At this point, skb->destructor is not the original one (stored in
DEV_GSO_CB(skb)->destructor)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index e904c476b112..9bf1cccb067e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1937,7 +1937,6 @@ gso:
 		if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
 			skb_dst_drop(nskb);
 
-		skb_orphan_try(nskb);
 		rc = ops->ndo_start_xmit(nskb, dev);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
-- 
cgit v1.2.3-59-g8ed1b


From b002a861092b0db128800794a116cc3acc5ec239 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 20 Apr 2010 21:06:07 +0000
Subject: ethernet: print protocol in host byte order

Eric's recent patch added __force, but this
place would seem to require actually doing
a byte order conversion so the printk is
consistent across architectures.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethernet/eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 35846964082c..0c0d272a9888 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -136,7 +136,7 @@ int eth_rebuild_header(struct sk_buff *skb)
 	default:
 		printk(KERN_DEBUG
 		       "%s: unable to resolve type %X addresses.\n",
-		       dev->name, (__force int)eth->h_proto);
+		       dev->name, ntohs(eth->h_proto));
 
 		memcpy(eth->h_source, dev->dev_addr, ETH_ALEN);
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From e326bed2f47d0365da5a8faaf8ee93ed2d86325b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 22 Apr 2010 00:22:45 -0700
Subject: rps: immediate send IPI in process_backlog()

If some skb are queued to our backlog, we are delaying IPI sending at
the end of net_rx_action(), increasing latencies. This defeats the
queueing, since we want to quickly dispatch packets to the pool of
worker cpus, then eventually deeply process our packets.

It's better to send IPI before processing our packets in upper layers,
from process_backlog().

Change the _and_disable_irq suffix to _and_enable_irq(), since we enable
local irq in net_rps_action(), sorry for the confusion.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 76 ++++++++++++++++++++++++++++++++--------------------------
 1 file changed, 42 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 9bf1cccb067e..3ba774b6091c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3242,11 +3242,48 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_frags);
 
+/*
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
+ */
+static void net_rps_action_and_irq_enable(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	struct softnet_data *remsd = sd->rps_ipi_list;
+
+	if (remsd) {
+		sd->rps_ipi_list = NULL;
+
+		local_irq_enable();
+
+		/* Send pending IPI's to kick RPS processing on remote cpus. */
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
+
+			if (cpu_online(remsd->cpu))
+				__smp_call_function_single(remsd->cpu,
+							   &remsd->csd, 0);
+			remsd = next;
+		}
+	} else
+#endif
+		local_irq_enable();
+}
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 
+#ifdef CONFIG_RPS
+	/* Check if we have pending ipi, its better to send them now,
+	 * not waiting net_rx_action() end.
+	 */
+	if (sd->rps_ipi_list) {
+		local_irq_disable();
+		net_rps_action_and_irq_enable(sd);
+	}
+#endif
 	napi->weight = weight_p;
 	do {
 		struct sk_buff *skb;
@@ -3353,45 +3390,16 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
-/*
- * net_rps_action sends any pending IPI's for rps.
- * Note: called with local irq disabled, but exits with local irq enabled.
- */
-static void net_rps_action_and_irq_disable(void)
-{
-#ifdef CONFIG_RPS
-	struct softnet_data *sd = &__get_cpu_var(softnet_data);
-	struct softnet_data *remsd = sd->rps_ipi_list;
-
-	if (remsd) {
-		sd->rps_ipi_list = NULL;
-
-		local_irq_enable();
-
-		/* Send pending IPI's to kick RPS processing on remote cpus. */
-		while (remsd) {
-			struct softnet_data *next = remsd->rps_ipi_next;
-
-			if (cpu_online(remsd->cpu))
-				__smp_call_function_single(remsd->cpu,
-							   &remsd->csd, 0);
-			remsd = next;
-		}
-	} else
-#endif
-		local_irq_enable();
-}
-
 static void net_rx_action(struct softirq_action *h)
 {
-	struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
 
 	local_irq_disable();
 
-	while (!list_empty(list)) {
+	while (!list_empty(&sd->poll_list)) {
 		struct napi_struct *n;
 		int work, weight;
 
@@ -3409,7 +3417,7 @@ static void net_rx_action(struct softirq_action *h)
 		 * entries to the tail of this list, and only ->poll()
 		 * calls can remove this head entry from the list.
 		 */
-		n = list_first_entry(list, struct napi_struct, poll_list);
+		n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
 
 		have = netpoll_poll_lock(n);
 
@@ -3444,13 +3452,13 @@ static void net_rx_action(struct softirq_action *h)
 				napi_complete(n);
 				local_irq_disable();
 			} else
-				list_move_tail(&n->poll_list, list);
+				list_move_tail(&n->poll_list, &sd->poll_list);
 		}
 
 		netpoll_poll_unlock(have);
 	}
 out:
-	net_rps_action_and_irq_disable();
+	net_rps_action_and_irq_enable(sd);
 
 #ifdef CONFIG_NET_DMA
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 9ccb8975940c4ee51161152e37058e3d9e06c62f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 22 Apr 2010 01:02:07 -0700
Subject: net: Orphan and de-dst skbs earlier in xmit path.

This way GSO packets don't get handled differently.

With help from Eric Dumazet.

Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/core/dev.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 3ba774b6091c..a4a7c36917d1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1902,13 +1902,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(skb, dev);
 
-		if (netif_needs_gso(dev, skb)) {
-			if (unlikely(dev_gso_segment(skb)))
-				goto out_kfree_skb;
-			if (skb->next)
-				goto gso;
-		}
-
 		/*
 		 * If device doesnt need skb->dst, release it right now while
 		 * its hot in this cpu cache
@@ -1917,6 +1910,14 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			skb_dst_drop(skb);
 
 		skb_orphan_try(skb);
+
+		if (netif_needs_gso(dev, skb)) {
+			if (unlikely(dev_gso_segment(skb)))
+				goto out_kfree_skb;
+			if (skb->next)
+				goto gso;
+		}
+
 		rc = ops->ndo_start_xmit(skb, dev);
 		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
-- 
cgit v1.2.3-59-g8ed1b


From cecc74de25d2cfb08e7702cd38e3f195950f1228 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 22 Apr 2010 13:03:24 +0200
Subject: netfilter: ip_tables: convert pr_devel() to pr_debug()

We want to be able to use CONFIG_DYNAMIC_DEBUG in netfilter code, switch
the few existing pr_devel() calls to pr_debug().

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_tables.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index bb5e0d9b8137..3e6af1036fbc 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -363,7 +363,7 @@ ipt_do_table(struct sk_buff *skb,
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
+	pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
 		 table->name, hook, origptr,
 		 get_entry(table_base, private->underflow[hook]));
 
@@ -409,11 +409,11 @@ ipt_do_table(struct sk_buff *skb,
 				if (*stackptr == 0) {
 					e = get_entry(table_base,
 					    private->underflow[hook]);
-					pr_devel("Underflow (this is normal) "
+					pr_debug("Underflow (this is normal) "
 						 "to %p\n", e);
 				} else {
 					e = jumpstack[--*stackptr];
-					pr_devel("Pulled %p out from pos %u\n",
+					pr_debug("Pulled %p out from pos %u\n",
 						 e, *stackptr);
 					e = ipt_next_entry(e);
 				}
@@ -426,7 +426,7 @@ ipt_do_table(struct sk_buff *skb,
 					break;
 				}
 				jumpstack[(*stackptr)++] = e;
-				pr_devel("Pushed %p into pos %u\n",
+				pr_debug("Pushed %p into pos %u\n",
 					 e, *stackptr - 1);
 			}
 
@@ -448,7 +448,7 @@ ipt_do_table(struct sk_buff *skb,
 			break;
 	} while (!hotdrop);
 	xt_info_rdunlock_bh();
-	pr_devel("Exiting %s; resetting sp from %u to %u\n",
+	pr_debug("Exiting %s; resetting sp from %u to %u\n",
 		 __func__, *stackptr, origptr);
 	*stackptr = origptr;
 #ifdef DEBUG_ALLOW_ALL
-- 
cgit v1.2.3-59-g8ed1b


From e802af9cabb011f09b9c19a82faef3dd315f27eb Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 22 Apr 2010 15:24:53 -0700
Subject: IPv6: Generic TTL Security Mechanism (final version)

This patch adds IPv6 support for RFC5082 Generalized TTL Security Mechanism.

Not to users of mapped address; the IPV6 and IPV4 socket options are seperate.
The server does have to deal with both IPv4 and IPv6 socket options
and the client has to handle the different for each family.

On client:
	int ttl = 255;
	getaddrinfo(argv[1], argv[2], &hint, &result);

	for (rp = result; rp != NULL; rp = rp->ai_next) {
		s = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
		if (s < 0) continue;

		if (rp->ai_family == AF_INET) {
			setsockopt(s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl));
		} else if (rp->ai_family == AF_INET6) {
			setsockopt(s, IPPROTO_IPV6,  IPV6_UNICAST_HOPS,
					&ttl, sizeof(ttl)))
		}

		if (connect(s, rp->ai_addr, rp->ai_addrlen) == 0) {
		   ...

On server:
	int minttl = 255 - maxhops;

	getaddrinfo(NULL, port, &hints, &result);
	for (rp = result; rp != NULL; rp = rp->ai_next) {
		s = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
		if (s < 0) continue;

		if (rp->ai_family == AF_INET6)
			setsockopt(s, IPPROTO_IPV6,  IPV6_MINHOPCOUNT,
					&minttl, sizeof(minttl));
		setsockopt(s, IPPROTO_IP, IP_MINTTL, &minttl, sizeof(minttl));

		if (bind(s, rp->ai_addr, rp->ai_addrlen) == 0)
			break
...

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h      |  3 +++
 include/linux/ipv6.h     |  1 +
 net/ipv6/ipv6_sockglue.c | 12 ++++++++++++
 net/ipv6/tcp_ipv6.c      | 14 +++++++++++++-
 4 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index bd55c6e46b2e..9b90cb296eb1 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -265,6 +265,9 @@ struct in6_flowlabel_req {
 #define IPV6_PREFER_SRC_CGA		0x0008
 #define IPV6_PREFER_SRC_NONCGA		0x0800
 
+/* RFC5082: Generalized Ttl Security Mechanism */
+#define IPV6_MINHOPCOUNT		73
+
 /*
  * Multicast Routing:
  * see include/linux/mroute6.h.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e0cc9a7db2b5..1bdbebf08d16 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -348,6 +348,7 @@ struct ipv6_pinfo {
 						 * 010: prefer public address
 						 * 100: prefer care-of address
 						 */
+	__u8			min_hopcount;
 	__u8			tclass;
 
 	__u32			dst_cookie;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 1160400e9dbd..92295ad3487a 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -767,6 +767,14 @@ pref_skip_coa:
 
 		break;
 	    }
+	case IPV6_MINHOPCOUNT:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		if (val < 0 || val > 255)
+			goto e_inval;
+		np->min_hopcount = val;
+		retv = 0;
+		break;
 	}
 
 	release_sock(sk);
@@ -1116,6 +1124,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 			val |= IPV6_PREFER_SRC_HOME;
 		break;
 
+	case IPV6_MINHOPCOUNT:
+		val = np->min_hopcount;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1ababbb41131..6603511e3673 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -353,6 +353,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
 
+	if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+		goto out;
+	}
+
 	tp = tcp_sk(sk);
 	seq = ntohl(th->seq);
 	if (sk->sk_state != TCP_LISTEN &&
@@ -1678,6 +1683,7 @@ ipv6_pktoptions:
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
 	struct tcphdr *th;
+	struct ipv6hdr *hdr;
 	struct sock *sk;
 	int ret;
 	struct net *net = dev_net(skb->dev);
@@ -1704,12 +1710,13 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 		goto bad_packet;
 
 	th = tcp_hdr(skb);
+	hdr = ipv6_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
 	TCP_SKB_CB(skb)->when = 0;
-	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
+	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr);
 	TCP_SKB_CB(skb)->sacked = 0;
 
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
@@ -1720,6 +1727,11 @@ process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
 
+	if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+		goto discard_and_relse;
+	}
+
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-- 
cgit v1.2.3-59-g8ed1b


From aa2ea0586d9dbe56a334d835a43b45e8c2104e77 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 22 Apr 2010 07:00:24 +0000
Subject: tcp: fix outsegs stat for TSO segments

Account for TSO segments of an skb in TCP_MIB_OUTSEGS counter.  Without
doing this, the counter can be off by orders of magnitude from the
actual number of segments sent.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/snmp.h    | 2 ++
 include/net/tcp.h     | 1 +
 net/ipv4/tcp_output.c | 5 +++--
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/snmp.h b/include/net/snmp.h
index 884fdbb74b23..92456f1035f5 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -133,6 +133,8 @@ struct linux_xfrm_mib {
 			__this_cpu_add(mib[0]->mibs[field], addend)
 #define SNMP_ADD_STATS_USER(mib, field, addend)	\
 			this_cpu_add(mib[1]->mibs[field], addend)
+#define SNMP_ADD_STATS(mib, field, addend)	\
+			this_cpu_add(mib[0]->mibs[field], addend)
 /*
  * Use "__typeof__(*mib[0]) *ptr" instead of "__typeof__(mib[0]) ptr"
  * to make @ptr a non-percpu pointer.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b7d83d204a93..3f87fd87bc9c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -294,6 +294,7 @@ extern struct proto tcp_prot;
 #define TCP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.tcp_statistics, field)
 #define TCP_DEC_STATS(net, field)	SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
 #define TCP_ADD_STATS_USER(net, field, val) SNMP_ADD_STATS_USER((net)->mib.tcp_statistics, field, val)
+#define TCP_ADD_STATS(net, field, val)	SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
 
 extern void			tcp_v4_err(struct sk_buff *skb, u32);
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 429ad9286efc..5db3a2c6cb33 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -888,7 +888,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		tcp_event_data_sent(tp, skb, sk);
 
 	if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
-		TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
+			      tcp_skb_pcount(skb));
 
 	err = icsk->icsk_af_ops->queue_xmit(skb);
 	if (likely(err <= 0))
@@ -2503,7 +2504,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 	th->window = htons(min(req->rcv_wnd, 65535U));
 	tcp_options_write((__be32 *)(th + 1), tp, &opts);
 	th->doff = (tcp_header_size >> 2);
-	TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
+	TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, tcp_skb_pcount(skb));
 
 #ifdef CONFIG_TCP_MD5SIG
 	/* Okay, we have all we need - do the md5 hash if needed */
-- 
cgit v1.2.3-59-g8ed1b


From 40eaf96271526a9f71030dd1a199ce46c045752e Mon Sep 17 00:00:00 2001
From: Paul LeoNerd Evans <leonerd@leonerd.org.uk>
Date: Thu, 22 Apr 2010 03:32:22 +0000
Subject: net: Socket filter ancilliary data access for skb->dev->type

Add an SKF_AD_HATYPE field to the packet ancilliary data area, giving
access to skb->dev->type, as reported in the sll_hatype field.

When capturing packets on a PF_PACKET/SOCK_RAW socket bound to all
interfaces, there doesn't appear to be a way for the filter program to
actually find out the underlying hardware type the packet was captured
on. This patch adds such ability.

This patch also handles the case where skb->dev can be NULL, such as on
netlink sockets.

Signed-off-by: Paul Evans <leonerd@leonerd.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 3 ++-
 net/core/filter.c      | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 29a0e3db9f43..151f5d703b7e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -123,7 +123,8 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_NLATTR_NEST	16
 #define SKF_AD_MARK 	20
 #define SKF_AD_QUEUE	24
-#define SKF_AD_MAX	28
+#define SKF_AD_HATYPE	28
+#define SKF_AD_MAX	32
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index ff943bed21af..da69fb728d32 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -302,6 +302,8 @@ load_b:
 			A = skb->pkt_type;
 			continue;
 		case SKF_AD_IFINDEX:
+			if (!skb->dev)
+				return 0;
 			A = skb->dev->ifindex;
 			continue;
 		case SKF_AD_MARK:
@@ -310,6 +312,11 @@ load_b:
 		case SKF_AD_QUEUE:
 			A = skb->queue_mapping;
 			continue;
+		case SKF_AD_HATYPE:
+			if (!skb->dev)
+				return 0;
+			A = skb->dev->type;
+			continue;
 		case SKF_AD_NLATTR: {
 			struct nlattr *nla;
 
-- 
cgit v1.2.3-59-g8ed1b


From 5ebfbc06aae941484326c9e7e9c4d85330f63591 Mon Sep 17 00:00:00 2001
From: Andrew Hendry <andrew.hendry@gmail.com>
Date: Thu, 22 Apr 2010 16:12:36 -0700
Subject: X25: Add if_x25.h and x25 to device identifiers

V2 Feedback from John Hughes.
- Add header for userspace implementations such as xot/xoe to use
- Use explicit values for interface stability
- No changes to driver patches

V1
- Use identifiers instead of magic numbers for X25 layer 3 to device interface.
- Also fixed checkpatch notes on updated code.

[ Add new user header to include/linux/Kbuild  -DaveM ]

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild    |  1 +
 include/linux/if_x25.h  | 26 ++++++++++++++++++++++++++
 include/net/x25device.h |  1 +
 net/x25/x25_dev.c       | 36 ++++++++++++++++++++----------------
 4 files changed, 48 insertions(+), 16 deletions(-)
 create mode 100644 include/linux/if_x25.h

(limited to 'net')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index e2ea0b2159cd..2fc8e14cc24a 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -94,6 +94,7 @@ header-y += if_ppp.h
 header-y += if_slip.h
 header-y += if_strip.h
 header-y += if_tun.h
+header-y += if_x25.h
 header-y += in_route.h
 header-y += ioctl.h
 header-y += ip6_tunnel.h
diff --git a/include/linux/if_x25.h b/include/linux/if_x25.h
new file mode 100644
index 000000000000..897765f5feb8
--- /dev/null
+++ b/include/linux/if_x25.h
@@ -0,0 +1,26 @@
+/*
+ *  Linux X.25 packet to device interface
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ */
+
+#ifndef _IF_X25_H
+#define _IF_X25_H
+
+#include <linux/types.h>
+
+/* Documentation/networking/x25-iface.txt */
+#define X25_IFACE_DATA		0x00
+#define X25_IFACE_CONNECT	0x01
+#define X25_IFACE_DISCONNECT	0x02
+#define X25_IFACE_PARAMS	0x03
+
+#endif /* _IF_X25_H */
diff --git a/include/net/x25device.h b/include/net/x25device.h
index 1415bcf93980..1fa08b49f1c2 100644
--- a/include/net/x25device.h
+++ b/include/net/x25device.h
@@ -3,6 +3,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
+#include <linux/if_x25.h>
 #include <linux/skbuff.h>
 
 static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev)
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index b9ef682230a0..9005f6daeab5 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -24,6 +24,7 @@
 #include <net/sock.h>
 #include <linux/if_arp.h>
 #include <net/x25.h>
+#include <net/x25device.h>
 
 static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 {
@@ -115,19 +116,22 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	switch (skb->data[0]) {
-		case 0x00:
-			skb_pull(skb, 1);
-			if (x25_receive_data(skb, nb)) {
-				x25_neigh_put(nb);
-				goto out;
-			}
-			break;
-		case 0x01:
-			x25_link_established(nb);
-			break;
-		case 0x02:
-			x25_link_terminated(nb);
-			break;
+
+	case X25_IFACE_DATA:
+		skb_pull(skb, 1);
+		if (x25_receive_data(skb, nb)) {
+			x25_neigh_put(nb);
+			goto out;
+		}
+		break;
+
+	case X25_IFACE_CONNECT:
+		x25_link_established(nb);
+		break;
+
+	case X25_IFACE_DISCONNECT:
+		x25_link_terminated(nb);
+		break;
 	}
 	x25_neigh_put(nb);
 drop:
@@ -148,7 +152,7 @@ void x25_establish_link(struct x25_neigh *nb)
 				return;
 			}
 			ptr  = skb_put(skb, 1);
-			*ptr = 0x01;
+			*ptr = X25_IFACE_CONNECT;
 			break;
 
 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
@@ -184,7 +188,7 @@ void x25_terminate_link(struct x25_neigh *nb)
 	}
 
 	ptr  = skb_put(skb, 1);
-	*ptr = 0x02;
+	*ptr = X25_IFACE_DISCONNECT;
 
 	skb->protocol = htons(ETH_P_X25);
 	skb->dev      = nb->dev;
@@ -200,7 +204,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
 	switch (nb->dev->type) {
 		case ARPHRD_X25:
 			dptr  = skb_push(skb, 1);
-			*dptr = 0x00;
+			*dptr = X25_IFACE_DATA;
 			break;
 
 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
-- 
cgit v1.2.3-59-g8ed1b


From 6e7cb8370760ec17e10098399822292def8d84f3 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 18 Apr 2010 12:42:05 +0900
Subject: ipv6 mcast: Introduce include/net/mld.h for MLD definitions.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/mld.h |  75 ++++++++++++++++++++++++++++++
 net/ipv6/mcast.c  | 135 ++++++++++++++++--------------------------------------
 2 files changed, 115 insertions(+), 95 deletions(-)
 create mode 100644 include/net/mld.h

(limited to 'net')

diff --git a/include/net/mld.h b/include/net/mld.h
new file mode 100644
index 000000000000..467143cd4e2f
--- /dev/null
+++ b/include/net/mld.h
@@ -0,0 +1,75 @@
+#ifndef LINUX_MLD_H
+#define LINUX_MLD_H
+
+#include <linux/in6.h>
+#include <linux/icmpv6.h>
+
+/* MLDv1 Query/Report/Done */
+struct mld_msg {
+	struct icmp6hdr		mld_hdr;
+	struct in6_addr		mld_mca;
+};
+
+#define mld_type		mld_hdr.icmp6_type
+#define mld_code		mld_hdr.icmp6_code
+#define mld_cksum		mld_hdr.icmp6_cksum
+#define mld_maxdelay		mld_hdr.icmp6_maxdelay
+#define mld_reserved		mld_hdr.icmp6_dataun.un_data16[1]
+
+/* Multicast Listener Discovery version 2 headers */
+/* MLDv2 Report */
+struct mld2_grec {
+	__u8		grec_type;
+	__u8		grec_auxwords;
+	__be16		grec_nsrcs;
+	struct in6_addr	grec_mca;
+	struct in6_addr	grec_src[0];
+};
+
+struct mld2_report {
+	struct icmp6hdr		mld2r_hdr;
+	struct mld2_grec	mld2r_grec[0];
+};
+
+#define mld2r_type		mld2r_hdr.icmp6_type
+#define mld2r_resv1		mld2r_hdr.icmp6_code
+#define mld2r_cksum		mld2r_hdr.icmp6_cksum
+#define mld2r_resv2		mld2r_hdr.icmp6_dataun.un_data16[0]
+#define mld2r_ngrec		mld2r_hdr.icmp6_dataun.un_data16[1]
+
+/* MLDv2 Query */
+struct mld2_query {
+	struct icmp6hdr		mld2q_hdr;
+	struct in6_addr		mld2q_mca;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+	__u8			mld2q_qrv:3,
+				mld2q_suppress:1,
+				mld2q_resv2:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u8			mld2q_resv2:4,
+				mld2q_suppress:1,
+				mld2q_qrv:3;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+	__u8			mld2q_qqic;
+	__be16			mld2q_nsrcs;
+	struct in6_addr		mld2q_srcs[0];
+};
+
+#define mld2q_type		mld2q_hdr.icmp6_type
+#define mld2q_code		mld2q_hdr.icmp6_code
+#define mld2q_cksum		mld2q_hdr.icmp6_cksum
+#define mld2q_mrc		mld2q_hdr.icmp6_maxdelay
+#define mld2q_resv1		mld2q_hdr.icmp6_dataun.un_data16[1]
+
+/* Max Response Code */
+#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
+#define MLDV2_EXP(thresh, nbmant, nbexp, value) \
+	((value) < (thresh) ? (value) : \
+	((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \
+	(MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
+
+#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
+
+#endif
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 62ed08213d91..006aee683a0f 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -44,6 +44,7 @@
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <net/mld.h>
 
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
@@ -71,54 +72,11 @@
 #define MDBG(x)
 #endif
 
-/*
- *  These header formats should be in a separate include file, but icmpv6.h
- *  doesn't have in6_addr defined in all cases, there is no __u128, and no
- *  other files reference these.
- *
- *  			+-DLS 4/14/03
- */
-
-/* Multicast Listener Discovery version 2 headers */
-
-struct mld2_grec {
-	__u8		grec_type;
-	__u8		grec_auxwords;
-	__be16		grec_nsrcs;
-	struct in6_addr	grec_mca;
-	struct in6_addr	grec_src[0];
-};
-
-struct mld2_report {
-	__u8	type;
-	__u8	resv1;
-	__sum16	csum;
-	__be16	resv2;
-	__be16	ngrec;
-	struct mld2_grec grec[0];
-};
-
-struct mld2_query {
-	__u8 type;
-	__u8 code;
-	__sum16 csum;
-	__be16 mrc;
-	__be16 resv1;
-	struct in6_addr mca;
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-	__u8 qrv:3,
-	     suppress:1,
-	     resv2:4;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-	__u8 resv2:4,
-	     suppress:1,
-	     qrv:3;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-	__u8 qqic;
-	__be16 nsrcs;
-	struct in6_addr srcs[0];
+/* Ensure that we have struct in6_addr aligned on 32bit word. */
+static void *__mld2_query_bugs[] __attribute__((__unused__)) = {
+	BUILD_BUG_ON_NULL(offsetof(struct mld2_query, mld2q_srcs) % 4),
+	BUILD_BUG_ON_NULL(offsetof(struct mld2_report, mld2r_grec) % 4),
+	BUILD_BUG_ON_NULL(offsetof(struct mld2_grec, grec_mca) % 4)
 };
 
 static struct in6_addr mld2_all_mcr = MLD2_ALL_MCR_INIT;
@@ -157,14 +115,6 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
 		((idev)->mc_v1_seen && \
 		time_before(jiffies, (idev)->mc_v1_seen)))
 
-#define MLDV2_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
-#define MLDV2_EXP(thresh, nbmant, nbexp, value) \
-	((value) < (thresh) ? (value) : \
-	((MLDV2_MASK(value, nbmant) | (1<<(nbmant))) << \
-	(MLDV2_MASK((value) >> (nbmant), nbexp) + (nbexp))))
-
-#define MLDV2_MRC(value) MLDV2_EXP(0x8000, 12, 3, value)
-
 #define IPV6_MLD_MAX_MSF	64
 
 int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF;
@@ -1161,7 +1111,7 @@ int igmp6_event_query(struct sk_buff *skb)
 	struct in6_addr *group;
 	unsigned long max_delay;
 	struct inet6_dev *idev;
-	struct icmp6hdr *hdr;
+	struct mld_msg *mld;
 	int group_type;
 	int mark = 0;
 	int len;
@@ -1182,8 +1132,8 @@ int igmp6_event_query(struct sk_buff *skb)
 	if (idev == NULL)
 		return 0;
 
-	hdr = icmp6_hdr(skb);
-	group = (struct in6_addr *) (hdr + 1);
+	mld = (struct mld_msg *)icmp6_hdr(skb);
+	group = &mld->mld_mca;
 	group_type = ipv6_addr_type(group);
 
 	if (group_type != IPV6_ADDR_ANY &&
@@ -1197,7 +1147,7 @@ int igmp6_event_query(struct sk_buff *skb)
 		/* MLDv1 router present */
 
 		/* Translate milliseconds to jiffies */
-		max_delay = (ntohs(hdr->icmp6_maxdelay)*HZ)/1000;
+		max_delay = (ntohs(mld->mld_maxdelay)*HZ)/1000;
 
 		switchback = (idev->mc_qrv + 1) * max_delay;
 		idev->mc_v1_seen = jiffies + switchback;
@@ -1216,14 +1166,14 @@ int igmp6_event_query(struct sk_buff *skb)
 			return -EINVAL;
 		}
 		mlh2 = (struct mld2_query *)skb_transport_header(skb);
-		max_delay = (MLDV2_MRC(ntohs(mlh2->mrc))*HZ)/1000;
+		max_delay = (MLDV2_MRC(ntohs(mlh2->mld2q_mrc))*HZ)/1000;
 		if (!max_delay)
 			max_delay = 1;
 		idev->mc_maxdelay = max_delay;
-		if (mlh2->qrv)
-			idev->mc_qrv = mlh2->qrv;
+		if (mlh2->mld2q_qrv)
+			idev->mc_qrv = mlh2->mld2q_qrv;
 		if (group_type == IPV6_ADDR_ANY) { /* general query */
-			if (mlh2->nsrcs) {
+			if (mlh2->mld2q_nsrcs) {
 				in6_dev_put(idev);
 				return -EINVAL; /* no sources allowed */
 			}
@@ -1232,9 +1182,9 @@ int igmp6_event_query(struct sk_buff *skb)
 			return 0;
 		}
 		/* mark sources to include, if group & source-specific */
-		if (mlh2->nsrcs != 0) {
+		if (mlh2->mld2q_nsrcs != 0) {
 			if (!pskb_may_pull(skb, srcs_offset +
-			    ntohs(mlh2->nsrcs) * sizeof(struct in6_addr))) {
+			    ntohs(mlh2->mld2q_nsrcs) * sizeof(struct in6_addr))) {
 				in6_dev_put(idev);
 				return -EINVAL;
 			}
@@ -1270,7 +1220,7 @@ int igmp6_event_query(struct sk_buff *skb)
 					ma->mca_flags &= ~MAF_GSQUERY;
 			}
 			if (!(ma->mca_flags & MAF_GSQUERY) ||
-			    mld_marksources(ma, ntohs(mlh2->nsrcs), mlh2->srcs))
+			    mld_marksources(ma, ntohs(mlh2->mld2q_nsrcs), mlh2->mld2q_srcs))
 				igmp6_group_queried(ma, max_delay);
 			spin_unlock_bh(&ma->mca_lock);
 			break;
@@ -1286,9 +1236,8 @@ int igmp6_event_query(struct sk_buff *skb)
 int igmp6_event_report(struct sk_buff *skb)
 {
 	struct ifmcaddr6 *ma;
-	struct in6_addr *addrp;
 	struct inet6_dev *idev;
-	struct icmp6hdr *hdr;
+	struct mld_msg *mld;
 	int addr_type;
 
 	/* Our own report looped back. Ignore it. */
@@ -1300,10 +1249,10 @@ int igmp6_event_report(struct sk_buff *skb)
 	    skb->pkt_type != PACKET_BROADCAST)
 		return 0;
 
-	if (!pskb_may_pull(skb, sizeof(struct in6_addr)))
+	if (!pskb_may_pull(skb, sizeof(*mld) - sizeof(struct icmp6hdr)))
 		return -EINVAL;
 
-	hdr = icmp6_hdr(skb);
+	mld = (struct mld_msg *)icmp6_hdr(skb);
 
 	/* Drop reports with not link local source */
 	addr_type = ipv6_addr_type(&ipv6_hdr(skb)->saddr);
@@ -1311,8 +1260,6 @@ int igmp6_event_report(struct sk_buff *skb)
 	    !(addr_type&IPV6_ADDR_LINKLOCAL))
 		return -EINVAL;
 
-	addrp = (struct in6_addr *) (hdr + 1);
-
 	idev = in6_dev_get(skb->dev);
 	if (idev == NULL)
 		return -ENODEV;
@@ -1323,7 +1270,7 @@ int igmp6_event_report(struct sk_buff *skb)
 
 	read_lock_bh(&idev->lock);
 	for (ma = idev->mc_list; ma; ma=ma->next) {
-		if (ipv6_addr_equal(&ma->mca_addr, addrp)) {
+		if (ipv6_addr_equal(&ma->mca_addr, &mld->mld_mca)) {
 			spin_lock(&ma->mca_lock);
 			if (del_timer(&ma->mca_timer))
 				atomic_dec(&ma->mca_refcnt);
@@ -1432,11 +1379,11 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
 	skb_set_transport_header(skb, skb_tail_pointer(skb) - skb->data);
 	skb_put(skb, sizeof(*pmr));
 	pmr = (struct mld2_report *)skb_transport_header(skb);
-	pmr->type = ICMPV6_MLD2_REPORT;
-	pmr->resv1 = 0;
-	pmr->csum = 0;
-	pmr->resv2 = 0;
-	pmr->ngrec = 0;
+	pmr->mld2r_type = ICMPV6_MLD2_REPORT;
+	pmr->mld2r_resv1 = 0;
+	pmr->mld2r_cksum = 0;
+	pmr->mld2r_resv2 = 0;
+	pmr->mld2r_ngrec = 0;
 	return skb;
 }
 
@@ -1458,9 +1405,10 @@ static void mld_sendpack(struct sk_buff *skb)
 	mldlen = skb->tail - skb->transport_header;
 	pip6->payload_len = htons(payload_len);
 
-	pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
-		IPPROTO_ICMPV6, csum_partial(skb_transport_header(skb),
-					     mldlen, 0));
+	pmr->mld2r_cksum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
+					   IPPROTO_ICMPV6,
+					   csum_partial(skb_transport_header(skb),
+							mldlen, 0));
 
 	dst = icmp6_dst_alloc(skb->dev, NULL, &ipv6_hdr(skb)->daddr);
 
@@ -1521,7 +1469,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 	pgr->grec_nsrcs = 0;
 	pgr->grec_mca = pmc->mca_addr;	/* structure copy */
 	pmr = (struct mld2_report *)skb_transport_header(skb);
-	pmr->ngrec = htons(ntohs(pmr->ngrec)+1);
+	pmr->mld2r_ngrec = htons(ntohs(pmr->mld2r_ngrec)+1);
 	*ppgr = pgr;
 	return skb;
 }
@@ -1557,7 +1505,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
 
 	/* EX and TO_EX get a fresh packet, if needed */
 	if (truncate) {
-		if (pmr && pmr->ngrec &&
+		if (pmr && pmr->mld2r_ngrec &&
 		    AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
 			if (skb)
 				mld_sendpack(skb);
@@ -1770,9 +1718,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	struct sock *sk = net->ipv6.igmp_sk;
 	struct inet6_dev *idev;
 	struct sk_buff *skb;
-	struct icmp6hdr *hdr;
+	struct mld_msg *hdr;
 	const struct in6_addr *snd_addr, *saddr;
-	struct in6_addr *addrp;
 	struct in6_addr addr_buf;
 	int err, len, payload_len, full_len;
 	u8 ra[8] = { IPPROTO_ICMPV6, 0,
@@ -1820,16 +1767,14 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 
 	memcpy(skb_put(skb, sizeof(ra)), ra, sizeof(ra));
 
-	hdr = (struct icmp6hdr *) skb_put(skb, sizeof(struct icmp6hdr));
-	memset(hdr, 0, sizeof(struct icmp6hdr));
-	hdr->icmp6_type = type;
+	hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg));
+	memset(hdr, 0, sizeof(struct mld_msg));
+	hdr->mld_type = type;
+	ipv6_addr_copy(&hdr->mld_mca, addr);
 
-	addrp = (struct in6_addr *) skb_put(skb, sizeof(struct in6_addr));
-	ipv6_addr_copy(addrp, addr);
-
-	hdr->icmp6_cksum = csum_ipv6_magic(saddr, snd_addr, len,
-					   IPPROTO_ICMPV6,
-					   csum_partial(hdr, len, 0));
+	hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
+					 IPPROTO_ICMPV6,
+					 csum_partial(hdr, len, 0));
 
 	idev = in6_dev_get(skb->dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From 8ef2a9a59854994bace13b5c4f7edc2c8d4d124e Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Sun, 18 Apr 2010 12:42:07 +0900
Subject: bridge br_multicast: Make functions less ipv4 dependent.

Introduce struct br_ip{} to store ip address and protocol
and make functions more generic so that we can support
both IPv4 and IPv6 with less pain.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/bridge/br_multicast.c | 197 +++++++++++++++++++++++++++++++++-------------
 net/bridge/br_private.h   |  12 ++-
 2 files changed, 151 insertions(+), 58 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 81bfdfe14ce5..64a3e4f74348 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -27,48 +27,86 @@
 
 #include "br_private.h"
 
-static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
+static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
+{
+	if (a->proto != b->proto)
+		return 0;
+	switch (a->proto) {
+	case htons(ETH_P_IP):
+		return a->u.ip4 == b->u.ip4;
+	}
+	return 0;
+}
+
+static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
 {
 	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
 }
 
+static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
+			     struct br_ip *ip)
+{
+	switch (ip->proto) {
+	case htons(ETH_P_IP):
+		return __br_ip4_hash(mdb, ip->u.ip4);
+	}
+	return 0;
+}
+
 static struct net_bridge_mdb_entry *__br_mdb_ip_get(
-	struct net_bridge_mdb_htable *mdb, __be32 dst, int hash)
+	struct net_bridge_mdb_htable *mdb, struct br_ip *dst, int hash)
 {
 	struct net_bridge_mdb_entry *mp;
 	struct hlist_node *p;
 
 	hlist_for_each_entry_rcu(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
-		if (dst == mp->addr)
+		if (br_ip_equal(&mp->addr, dst))
 			return mp;
 	}
 
 	return NULL;
 }
 
-static struct net_bridge_mdb_entry *br_mdb_ip_get(
+static struct net_bridge_mdb_entry *br_mdb_ip4_get(
 	struct net_bridge_mdb_htable *mdb, __be32 dst)
 {
-	if (!mdb)
-		return NULL;
+	struct br_ip br_dst;
+
+	br_dst.u.ip4 = dst;
+	br_dst.proto = htons(ETH_P_IP);
 
+	return __br_mdb_ip_get(mdb, &br_dst, __br_ip4_hash(mdb, dst));
+}
+
+static struct net_bridge_mdb_entry *br_mdb_ip_get(
+	struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
+{
 	return __br_mdb_ip_get(mdb, dst, br_ip_hash(mdb, dst));
 }
 
 struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 					struct sk_buff *skb)
 {
-	if (br->multicast_disabled)
+	struct net_bridge_mdb_htable *mdb = br->mdb;
+	struct br_ip ip;
+
+	if (!mdb || br->multicast_disabled)
+		return NULL;
+
+	if (BR_INPUT_SKB_CB(skb)->igmp)
 		return NULL;
 
+	ip.proto = skb->protocol;
+
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		if (BR_INPUT_SKB_CB(skb)->igmp)
-			break;
-		return br_mdb_ip_get(br->mdb, ip_hdr(skb)->daddr);
+		ip.u.ip4 = ip_hdr(skb)->daddr;
+		break;
+	default:
+		return NULL;
 	}
 
-	return NULL;
+	return br_mdb_ip_get(mdb, &ip);
 }
 
 static void br_mdb_free(struct rcu_head *head)
@@ -95,7 +133,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new,
 	for (i = 0; i < old->max; i++)
 		hlist_for_each_entry(mp, p, &old->mhash[i], hlist[old->ver])
 			hlist_add_head(&mp->hlist[new->ver],
-				       &new->mhash[br_ip_hash(new, mp->addr)]);
+				       &new->mhash[br_ip_hash(new, &mp->addr)]);
 
 	if (!elasticity)
 		return 0;
@@ -163,7 +201,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
 	struct net_bridge_port_group *p;
 	struct net_bridge_port_group **pp;
 
-	mp = br_mdb_ip_get(mdb, pg->addr);
+	mp = br_mdb_ip_get(mdb, &pg->addr);
 	if (WARN_ON(!mp))
 		return;
 
@@ -249,8 +287,8 @@ out:
 	return 0;
 }
 
-static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
-						__be32 group)
+static struct sk_buff *br_ip4_multicast_alloc_query(struct net_bridge *br,
+						    __be32 group)
 {
 	struct sk_buff *skb;
 	struct igmphdr *ih;
@@ -314,12 +352,22 @@ out:
 	return skb;
 }
 
+static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
+						struct br_ip *addr)
+{
+	switch (addr->proto) {
+	case htons(ETH_P_IP):
+		return br_ip4_multicast_alloc_query(br, addr->u.ip4);
+	}
+	return NULL;
+}
+
 static void br_multicast_send_group_query(struct net_bridge_mdb_entry *mp)
 {
 	struct net_bridge *br = mp->br;
 	struct sk_buff *skb;
 
-	skb = br_multicast_alloc_query(br, mp->addr);
+	skb = br_multicast_alloc_query(br, &mp->addr);
 	if (!skb)
 		goto timer;
 
@@ -353,7 +401,7 @@ static void br_multicast_send_port_group_query(struct net_bridge_port_group *pg)
 	struct net_bridge *br = port->br;
 	struct sk_buff *skb;
 
-	skb = br_multicast_alloc_query(br, pg->addr);
+	skb = br_multicast_alloc_query(br, &pg->addr);
 	if (!skb)
 		goto timer;
 
@@ -383,8 +431,8 @@ out:
 }
 
 static struct net_bridge_mdb_entry *br_multicast_get_group(
-	struct net_bridge *br, struct net_bridge_port *port, __be32 group,
-	int hash)
+	struct net_bridge *br, struct net_bridge_port *port,
+	struct br_ip *group, int hash)
 {
 	struct net_bridge_mdb_htable *mdb = br->mdb;
 	struct net_bridge_mdb_entry *mp;
@@ -396,9 +444,8 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
 
 	hlist_for_each_entry(mp, p, &mdb->mhash[hash], hlist[mdb->ver]) {
 		count++;
-		if (unlikely(group == mp->addr)) {
+		if (unlikely(br_ip_equal(group, &mp->addr)))
 			return mp;
-		}
 	}
 
 	elasticity = 0;
@@ -463,7 +510,8 @@ err:
 }
 
 static struct net_bridge_mdb_entry *br_multicast_new_group(
-	struct net_bridge *br, struct net_bridge_port *port, __be32 group)
+	struct net_bridge *br, struct net_bridge_port *port,
+	struct br_ip *group)
 {
 	struct net_bridge_mdb_htable *mdb = br->mdb;
 	struct net_bridge_mdb_entry *mp;
@@ -496,7 +544,7 @@ rehash:
 		goto out;
 
 	mp->br = br;
-	mp->addr = group;
+	mp->addr = *group;
 	setup_timer(&mp->timer, br_multicast_group_expired,
 		    (unsigned long)mp);
 	setup_timer(&mp->query_timer, br_multicast_group_query_expired,
@@ -510,7 +558,8 @@ out:
 }
 
 static int br_multicast_add_group(struct net_bridge *br,
-				  struct net_bridge_port *port, __be32 group)
+				  struct net_bridge_port *port,
+				  struct br_ip *group)
 {
 	struct net_bridge_mdb_entry *mp;
 	struct net_bridge_port_group *p;
@@ -518,9 +567,6 @@ static int br_multicast_add_group(struct net_bridge *br,
 	unsigned long now = jiffies;
 	int err;
 
-	if (ipv4_is_local_multicast(group))
-		return 0;
-
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
 	    (port && port->state == BR_STATE_DISABLED))
@@ -549,7 +595,7 @@ static int br_multicast_add_group(struct net_bridge *br,
 	if (unlikely(!p))
 		goto err;
 
-	p->addr = group;
+	p->addr = *group;
 	p->port = port;
 	p->next = *pp;
 	hlist_add_head(&p->mglist, &port->mglist);
@@ -570,6 +616,21 @@ err:
 	return err;
 }
 
+static int br_ip4_multicast_add_group(struct net_bridge *br,
+				      struct net_bridge_port *port,
+				      __be32 group)
+{
+	struct br_ip br_group;
+
+	if (ipv4_is_local_multicast(group))
+		return 0;
+
+	br_group.u.ip4 = group;
+	br_group.proto = htons(ETH_P_IP);
+
+	return br_multicast_add_group(br, port, &br_group);
+}
+
 static void br_multicast_router_expired(unsigned long data)
 {
 	struct net_bridge_port *port = (void *)data;
@@ -591,19 +652,15 @@ static void br_multicast_local_router_expired(unsigned long data)
 {
 }
 
-static void br_multicast_send_query(struct net_bridge *br,
-				    struct net_bridge_port *port, u32 sent)
+static void __br_multicast_send_query(struct net_bridge *br,
+				      struct net_bridge_port *port,
+				      struct br_ip *ip)
 {
-	unsigned long time;
 	struct sk_buff *skb;
 
-	if (!netif_running(br->dev) || br->multicast_disabled ||
-	    timer_pending(&br->multicast_querier_timer))
-		return;
-
-	skb = br_multicast_alloc_query(br, 0);
+	skb = br_multicast_alloc_query(br, ip);
 	if (!skb)
-		goto timer;
+		return;
 
 	if (port) {
 		__skb_push(skb, sizeof(struct ethhdr));
@@ -612,8 +669,23 @@ static void br_multicast_send_query(struct net_bridge *br,
 			dev_queue_xmit);
 	} else
 		netif_rx(skb);
+}
+
+static void br_multicast_send_query(struct net_bridge *br,
+				    struct net_bridge_port *port, u32 sent)
+{
+	unsigned long time;
+	struct br_ip br_group;
+
+	if (!netif_running(br->dev) || br->multicast_disabled ||
+	    timer_pending(&br->multicast_querier_timer))
+		return;
+
+	br_group.u.ip4 = 0;
+	br_group.proto = htons(ETH_P_IP);
+
+	__br_multicast_send_query(br, port, &br_group);
 
-timer:
 	time = jiffies;
 	time += sent < br->multicast_startup_query_count ?
 		br->multicast_startup_query_interval :
@@ -698,9 +770,9 @@ void br_multicast_disable_port(struct net_bridge_port *port)
 	spin_unlock(&br->multicast_lock);
 }
 
-static int br_multicast_igmp3_report(struct net_bridge *br,
-				     struct net_bridge_port *port,
-				     struct sk_buff *skb)
+static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
+					 struct net_bridge_port *port,
+					 struct sk_buff *skb)
 {
 	struct igmpv3_report *ih;
 	struct igmpv3_grec *grec;
@@ -745,7 +817,7 @@ static int br_multicast_igmp3_report(struct net_bridge *br,
 			continue;
 		}
 
-		err = br_multicast_add_group(br, port, group);
+		err = br_ip4_multicast_add_group(br, port, group);
 		if (err)
 			break;
 	}
@@ -800,7 +872,7 @@ timer:
 
 static void br_multicast_query_received(struct net_bridge *br,
 					struct net_bridge_port *port,
-					__be32 saddr)
+					int saddr)
 {
 	if (saddr)
 		mod_timer(&br->multicast_querier_timer,
@@ -811,9 +883,9 @@ static void br_multicast_query_received(struct net_bridge *br,
 	br_multicast_mark_router(br, port);
 }
 
-static int br_multicast_query(struct net_bridge *br,
-			      struct net_bridge_port *port,
-			      struct sk_buff *skb)
+static int br_ip4_multicast_query(struct net_bridge *br,
+				  struct net_bridge_port *port,
+				  struct sk_buff *skb)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	struct igmphdr *ih = igmp_hdr(skb);
@@ -831,7 +903,7 @@ static int br_multicast_query(struct net_bridge *br,
 	    (port && port->state == BR_STATE_DISABLED))
 		goto out;
 
-	br_multicast_query_received(br, port, iph->saddr);
+	br_multicast_query_received(br, port, !!iph->saddr);
 
 	group = ih->group;
 
@@ -859,7 +931,7 @@ static int br_multicast_query(struct net_bridge *br,
 	if (!group)
 		goto out;
 
-	mp = br_mdb_ip_get(br->mdb, group);
+	mp = br_mdb_ip4_get(br->mdb, group);
 	if (!mp)
 		goto out;
 
@@ -885,7 +957,7 @@ out:
 
 static void br_multicast_leave_group(struct net_bridge *br,
 				     struct net_bridge_port *port,
-				     __be32 group)
+				     struct br_ip *group)
 {
 	struct net_bridge_mdb_htable *mdb;
 	struct net_bridge_mdb_entry *mp;
@@ -893,9 +965,6 @@ static void br_multicast_leave_group(struct net_bridge *br,
 	unsigned long now;
 	unsigned long time;
 
-	if (ipv4_is_local_multicast(group))
-		return;
-
 	spin_lock(&br->multicast_lock);
 	if (!netif_running(br->dev) ||
 	    (port && port->state == BR_STATE_DISABLED) ||
@@ -946,6 +1015,22 @@ out:
 	spin_unlock(&br->multicast_lock);
 }
 
+static void br_ip4_multicast_leave_group(struct net_bridge *br,
+					 struct net_bridge_port *port,
+					 __be32 group)
+{
+	struct br_ip br_group;
+
+	if (ipv4_is_local_multicast(group))
+		return;
+
+	br_group.u.ip4 = group;
+	br_group.proto = htons(ETH_P_IP);
+
+	br_multicast_leave_group(br, port, &br_group);
+}
+
+
 static int br_multicast_ipv4_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
 				 struct sk_buff *skb)
@@ -1023,16 +1108,16 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	case IGMP_HOST_MEMBERSHIP_REPORT:
 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
 		BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
-		err = br_multicast_add_group(br, port, ih->group);
+		err = br_ip4_multicast_add_group(br, port, ih->group);
 		break;
 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
-		err = br_multicast_igmp3_report(br, port, skb2);
+		err = br_ip4_multicast_igmp3_report(br, port, skb2);
 		break;
 	case IGMP_HOST_MEMBERSHIP_QUERY:
-		err = br_multicast_query(br, port, skb2);
+		err = br_ip4_multicast_query(br, port, skb2);
 		break;
 	case IGMP_HOST_LEAVE_MESSAGE:
-		br_multicast_leave_group(br, port, ih->group);
+		br_ip4_multicast_leave_group(br, port, ih->group);
 		break;
 	}
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 63181e4a2a67..45d11e49fbbb 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -45,6 +45,14 @@ struct mac_addr
 	unsigned char	addr[6];
 };
 
+struct br_ip
+{
+	union {
+		__be32	ip4;
+	} u;
+	__be16		proto;
+};
+
 struct net_bridge_fdb_entry
 {
 	struct hlist_node		hlist;
@@ -64,7 +72,7 @@ struct net_bridge_port_group {
 	struct rcu_head			rcu;
 	struct timer_list		timer;
 	struct timer_list		query_timer;
-	__be32				addr;
+	struct br_ip			addr;
 	u32				queries_sent;
 };
 
@@ -77,7 +85,7 @@ struct net_bridge_mdb_entry
 	struct rcu_head			rcu;
 	struct timer_list		timer;
 	struct timer_list		query_timer;
-	__be32				addr;
+	struct br_ip			addr;
 	u32				queries_sent;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 08b202b6726459626c73ecfa08fcdc8c3efc76c2 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Fri, 23 Apr 2010 01:54:22 +0900
Subject: bridge br_multicast: IPv6 MLD support.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/bridge/Kconfig        |   6 +-
 net/bridge/br_multicast.c | 424 +++++++++++++++++++++++++++++++++++++++++++++-
 net/bridge/br_private.h   |   3 +
 3 files changed, 429 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index d115d5cea5b6..9190ae462cb4 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -33,14 +33,14 @@ config BRIDGE
 	  If unsure, say N.
 
 config BRIDGE_IGMP_SNOOPING
-	bool "IGMP snooping"
+	bool "IGMP/MLD snooping"
 	depends on BRIDGE
 	depends on INET
 	default y
 	---help---
 	  If you say Y here, then the Ethernet bridge will be able selectively
-	  forward multicast traffic based on IGMP traffic received from each
-	  port.
+	  forward multicast traffic based on IGMP/MLD traffic received from
+	  each port.
 
 	  Say N to exclude this support and reduce the binary size.
 
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 64a3e4f74348..38d1fbde5fb8 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -24,9 +24,24 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <net/ip.h>
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#include <net/ipv6.h>
+#include <net/mld.h>
+#include <net/addrconf.h>
+#endif
 
 #include "br_private.h"
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static inline int ipv6_is_local_multicast(const struct in6_addr *addr)
+{
+	if (ipv6_addr_is_multicast(addr) &&
+	    IPV6_ADDR_MC_SCOPE(addr) <= IPV6_ADDR_SCOPE_LINKLOCAL)
+		return 1;
+	return 0;
+}
+#endif
+
 static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 {
 	if (a->proto != b->proto)
@@ -34,6 +49,10 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 	switch (a->proto) {
 	case htons(ETH_P_IP):
 		return a->u.ip4 == b->u.ip4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		return ipv6_addr_equal(&a->u.ip6, &b->u.ip6);
+#endif
 	}
 	return 0;
 }
@@ -43,12 +62,24 @@ static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
 	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
+				const struct in6_addr *ip)
+{
+	return jhash2((__force u32 *)ip->s6_addr32, 4, mdb->secret) & (mdb->max - 1);
+}
+#endif
+
 static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
 			     struct br_ip *ip)
 {
 	switch (ip->proto) {
 	case htons(ETH_P_IP):
 		return __br_ip4_hash(mdb, ip->u.ip4);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		return __br_ip6_hash(mdb, &ip->u.ip6);
+#endif
 	}
 	return 0;
 }
@@ -78,6 +109,19 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get(
 	return __br_mdb_ip_get(mdb, &br_dst, __br_ip4_hash(mdb, dst));
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static struct net_bridge_mdb_entry *br_mdb_ip6_get(
+	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst)
+{
+	struct br_ip br_dst;
+
+	ipv6_addr_copy(&br_dst.u.ip6, dst);
+	br_dst.proto = htons(ETH_P_IPV6);
+
+	return __br_mdb_ip_get(mdb, &br_dst, __br_ip6_hash(mdb, dst));
+}
+#endif
+
 static struct net_bridge_mdb_entry *br_mdb_ip_get(
 	struct net_bridge_mdb_htable *mdb, struct br_ip *dst)
 {
@@ -102,6 +146,11 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 	case htons(ETH_P_IP):
 		ip.u.ip4 = ip_hdr(skb)->daddr;
 		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		ipv6_addr_copy(&ip.u.ip6, &ipv6_hdr(skb)->daddr);
+		break;
+#endif
 	default:
 		return NULL;
 	}
@@ -352,12 +401,94 @@ out:
 	return skb;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
+						    struct in6_addr *group)
+{
+	struct sk_buff *skb;
+	struct ipv6hdr *ip6h;
+	struct mld_msg *mldq;
+	struct ethhdr *eth;
+	u8 *hopopt;
+	unsigned long interval;
+
+	skb = netdev_alloc_skb_ip_align(br->dev, sizeof(*eth) + sizeof(*ip6h) +
+						 8 + sizeof(*mldq));
+	if (!skb)
+		goto out;
+
+	skb->protocol = htons(ETH_P_IPV6);
+
+	/* Ethernet header */
+	skb_reset_mac_header(skb);
+	eth = eth_hdr(skb);
+
+	memcpy(eth->h_source, br->dev->dev_addr, 6);
+	ipv6_eth_mc_map(group, eth->h_dest);
+	eth->h_proto = htons(ETH_P_IPV6);
+	skb_put(skb, sizeof(*eth));
+
+	/* IPv6 header + HbH option */
+	skb_set_network_header(skb, skb->len);
+	ip6h = ipv6_hdr(skb);
+
+	*(__force __be32 *)ip6h = htonl(0x60000000);
+	ip6h->payload_len = 8 + sizeof(*mldq);
+	ip6h->nexthdr = IPPROTO_HOPOPTS;
+	ip6h->hop_limit = 1;
+	ipv6_addr_set(&ip6h->saddr, 0, 0, 0, 0);
+	ipv6_addr_set(&ip6h->daddr, htonl(0xff020000), 0, 0, htonl(1));
+
+	hopopt = (u8 *)(ip6h + 1);
+	hopopt[0] = IPPROTO_ICMPV6;		/* next hdr */
+	hopopt[1] = 0;				/* length of HbH */
+	hopopt[2] = IPV6_TLV_ROUTERALERT;	/* Router Alert */
+	hopopt[3] = 2;				/* Length of RA Option */
+	hopopt[4] = 0;				/* Type = 0x0000 (MLD) */
+	hopopt[5] = 0;
+	hopopt[6] = IPV6_TLV_PAD0;		/* Pad0 */
+	hopopt[7] = IPV6_TLV_PAD0;		/* Pad0 */
+
+	skb_put(skb, sizeof(*ip6h) + 8);
+
+	/* ICMPv6 */
+	skb_set_transport_header(skb, skb->len);
+	mldq = (struct mld_msg *) icmp6_hdr(skb);
+
+	interval = ipv6_addr_any(group) ? br->multicast_last_member_interval :
+					  br->multicast_query_response_interval;
+
+	mldq->mld_type = ICMPV6_MGM_QUERY;
+	mldq->mld_code = 0;
+	mldq->mld_cksum = 0;
+	mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
+	mldq->mld_reserved = 0;
+	ipv6_addr_copy(&mldq->mld_mca, group);
+
+	/* checksum */
+	mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					  sizeof(*mldq), IPPROTO_ICMPV6,
+					  csum_partial(mldq,
+						       sizeof(*mldq), 0));
+	skb_put(skb, sizeof(*mldq));
+
+	__skb_pull(skb, sizeof(*eth));
+
+out:
+	return skb;
+}
+#endif
+
 static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
 						struct br_ip *addr)
 {
 	switch (addr->proto) {
 	case htons(ETH_P_IP):
 		return br_ip4_multicast_alloc_query(br, addr->u.ip4);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		return br_ip6_multicast_alloc_query(br, &addr->u.ip6);
+#endif
 	}
 	return NULL;
 }
@@ -631,6 +762,23 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 	return br_multicast_add_group(br, port, &br_group);
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int br_ip6_multicast_add_group(struct net_bridge *br,
+				      struct net_bridge_port *port,
+				      const struct in6_addr *group)
+{
+	struct br_ip br_group;
+
+	if (ipv6_is_local_multicast(group))
+		return 0;
+
+	ipv6_addr_copy(&br_group.u.ip6, group);
+	br_group.proto = htons(ETH_P_IP);
+
+	return br_multicast_add_group(br, port, &br_group);
+}
+#endif
+
 static void br_multicast_router_expired(unsigned long data)
 {
 	struct net_bridge_port *port = (void *)data;
@@ -681,10 +829,15 @@ static void br_multicast_send_query(struct net_bridge *br,
 	    timer_pending(&br->multicast_querier_timer))
 		return;
 
-	br_group.u.ip4 = 0;
+	memset(&br_group.u, 0, sizeof(br_group.u));
+
 	br_group.proto = htons(ETH_P_IP);
+	__br_multicast_send_query(br, port, &br_group);
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	br_group.proto = htons(ETH_P_IPV6);
 	__br_multicast_send_query(br, port, &br_group);
+#endif
 
 	time = jiffies;
 	time += sent < br->multicast_startup_query_count ?
@@ -825,6 +978,66 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 	return err;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int br_ip6_multicast_mld2_report(struct net_bridge *br,
+					struct net_bridge_port *port,
+					struct sk_buff *skb)
+{
+	struct icmp6hdr *icmp6h;
+	struct mld2_grec *grec;
+	int i;
+	int len;
+	int num;
+	int err = 0;
+
+	if (!pskb_may_pull(skb, sizeof(*icmp6h)))
+		return -EINVAL;
+
+	icmp6h = icmp6_hdr(skb);
+	num = ntohs(icmp6h->icmp6_dataun.un_data16[1]);
+	len = sizeof(*icmp6h);
+
+	for (i = 0; i < num; i++) {
+		__be16 *nsrcs, _nsrcs;
+
+		nsrcs = skb_header_pointer(skb,
+					   len + offsetof(struct mld2_grec,
+							  grec_mca),
+					   sizeof(_nsrcs), &_nsrcs);
+		if (!nsrcs)
+			return -EINVAL;
+
+		if (!pskb_may_pull(skb,
+				   len + sizeof(*grec) +
+				   sizeof(struct in6_addr) * (*nsrcs)))
+			return -EINVAL;
+
+		grec = (struct mld2_grec *)(skb->data + len);
+		len += sizeof(*grec) + sizeof(struct in6_addr) * (*nsrcs);
+
+		/* We treat these as MLDv1 reports for now. */
+		switch (grec->grec_type) {
+		case MLD2_MODE_IS_INCLUDE:
+		case MLD2_MODE_IS_EXCLUDE:
+		case MLD2_CHANGE_TO_INCLUDE:
+		case MLD2_CHANGE_TO_EXCLUDE:
+		case MLD2_ALLOW_NEW_SOURCES:
+		case MLD2_BLOCK_OLD_SOURCES:
+			break;
+
+		default:
+			continue;
+		}
+
+		err = br_ip6_multicast_add_group(br, port, &grec->grec_mca);
+		if (!err)
+			break;
+	}
+
+	return err;
+}
+#endif
+
 static void br_multicast_add_router(struct net_bridge *br,
 				    struct net_bridge_port *port)
 {
@@ -955,6 +1168,75 @@ out:
 	return err;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int br_ip6_multicast_query(struct net_bridge *br,
+				  struct net_bridge_port *port,
+				  struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct mld_msg *mld = (struct mld_msg *) icmp6_hdr(skb);
+	struct net_bridge_mdb_entry *mp;
+	struct mld2_query *mld2q;
+	struct net_bridge_port_group *p, **pp;
+	unsigned long max_delay;
+	unsigned long now = jiffies;
+	struct in6_addr *group = NULL;
+	int err = 0;
+
+	spin_lock(&br->multicast_lock);
+	if (!netif_running(br->dev) ||
+	    (port && port->state == BR_STATE_DISABLED))
+		goto out;
+
+	br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr));
+
+	if (skb->len == sizeof(*mld)) {
+		if (!pskb_may_pull(skb, sizeof(*mld))) {
+			err = -EINVAL;
+			goto out;
+		}
+		mld = (struct mld_msg *) icmp6_hdr(skb);
+		max_delay = msecs_to_jiffies(htons(mld->mld_maxdelay));
+		if (max_delay)
+			group = &mld->mld_mca;
+	} else if (skb->len >= sizeof(*mld2q)) {
+		if (!pskb_may_pull(skb, sizeof(*mld2q))) {
+			err = -EINVAL;
+			goto out;
+		}
+		mld2q = (struct mld2_query *)icmp6_hdr(skb);
+		if (!mld2q->mld2q_nsrcs)
+			group = &mld2q->mld2q_mca;
+		max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(mld2q->mld2q_mrc) : 1;
+	}
+
+	if (!group)
+		goto out;
+
+	mp = br_mdb_ip6_get(br->mdb, group);
+	if (!mp)
+		goto out;
+
+	max_delay *= br->multicast_last_member_count;
+	if (!hlist_unhashed(&mp->mglist) &&
+	    (timer_pending(&mp->timer) ?
+	     time_after(mp->timer.expires, now + max_delay) :
+	     try_to_del_timer_sync(&mp->timer) >= 0))
+		mod_timer(&mp->timer, now + max_delay);
+
+	for (pp = &mp->ports; (p = *pp); pp = &p->next) {
+		if (timer_pending(&p->timer) ?
+		    time_after(p->timer.expires, now + max_delay) :
+		    try_to_del_timer_sync(&p->timer) >= 0)
+			mod_timer(&mp->timer, now + max_delay);
+	}
+
+out:
+	spin_unlock(&br->multicast_lock);
+	return err;
+}
+#endif
+
 static void br_multicast_leave_group(struct net_bridge *br,
 				     struct net_bridge_port *port,
 				     struct br_ip *group)
@@ -1030,6 +1312,22 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 	br_multicast_leave_group(br, port, &br_group);
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static void br_ip6_multicast_leave_group(struct net_bridge *br,
+					 struct net_bridge_port *port,
+					 const struct in6_addr *group)
+{
+	struct br_ip br_group;
+
+	if (ipv6_is_local_multicast(group))
+		return;
+
+	ipv6_addr_copy(&br_group.u.ip6, group);
+	br_group.proto = htons(ETH_P_IPV6);
+
+	br_multicast_leave_group(br, port, &br_group);
+}
+#endif
 
 static int br_multicast_ipv4_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
@@ -1129,6 +1427,126 @@ err_out:
 	return err;
 }
 
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+static int br_multicast_ipv6_rcv(struct net_bridge *br,
+				 struct net_bridge_port *port,
+				 struct sk_buff *skb)
+{
+	struct sk_buff *skb2 = skb;
+	struct ipv6hdr *ip6h;
+	struct icmp6hdr *icmp6h;
+	u8 nexthdr;
+	unsigned len;
+	unsigned offset;
+	int err;
+
+	BR_INPUT_SKB_CB(skb)->igmp = 0;
+	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
+
+	if (!pskb_may_pull(skb, sizeof(*ip6h)))
+		return -EINVAL;
+
+	ip6h = ipv6_hdr(skb);
+
+	/*
+	 * We're interested in MLD messages only.
+	 *  - Version is 6
+	 *  - MLD has always Router Alert hop-by-hop option
+	 *  - But we do not support jumbrograms.
+	 */
+	if (ip6h->version != 6 ||
+	    ip6h->nexthdr != IPPROTO_HOPOPTS ||
+	    ip6h->payload_len == 0)
+		return 0;
+
+	len = ntohs(ip6h->payload_len);
+	if (skb->len < len)
+		return -EINVAL;
+
+	nexthdr = ip6h->nexthdr;
+	offset = ipv6_skip_exthdr(skb, sizeof(*ip6h), &nexthdr);
+
+	if (offset < 0 || nexthdr != IPPROTO_ICMPV6)
+		return 0;
+
+	/* Okay, we found ICMPv6 header */
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (!skb2)
+		return -ENOMEM;
+
+	len -= offset - skb_network_offset(skb2);
+
+	__skb_pull(skb2, offset);
+	skb_reset_transport_header(skb2);
+
+	err = -EINVAL;
+	if (!pskb_may_pull(skb2, sizeof(*icmp6h)))
+		goto out;
+
+	icmp6h = icmp6_hdr(skb2);
+
+	switch (icmp6h->icmp6_type) {
+	case ICMPV6_MGM_QUERY:
+	case ICMPV6_MGM_REPORT:
+	case ICMPV6_MGM_REDUCTION:
+	case ICMPV6_MLD2_REPORT:
+		break;
+	default:
+		err = 0;
+		goto out;
+	}
+
+	/* Okay, we found MLD message. Check further. */
+	if (skb2->len > len) {
+		err = pskb_trim_rcsum(skb2, len);
+		if (err)
+			goto out;
+	}
+
+	switch (skb2->ip_summed) {
+	case CHECKSUM_COMPLETE:
+		if (!csum_fold(skb2->csum))
+			break;
+		/*FALLTHROUGH*/
+	case CHECKSUM_NONE:
+		skb2->csum = 0;
+		if (skb_checksum_complete(skb2))
+			goto out;
+	}
+
+	err = 0;
+
+	BR_INPUT_SKB_CB(skb)->igmp = 1;
+
+	switch (icmp6h->icmp6_type) {
+	case ICMPV6_MGM_REPORT:
+	    {
+		struct mld_msg *mld = (struct mld_msg *)icmp6h;
+		BR_INPUT_SKB_CB(skb2)->mrouters_only = 1;
+		err = br_ip6_multicast_add_group(br, port, &mld->mld_mca);
+		break;
+	    }
+	case ICMPV6_MLD2_REPORT:
+		err = br_ip6_multicast_mld2_report(br, port, skb2);
+		break;
+	case ICMPV6_MGM_QUERY:
+		err = br_ip6_multicast_query(br, port, skb2);
+		break;
+	case ICMPV6_MGM_REDUCTION:
+	    {
+		struct mld_msg *mld = (struct mld_msg *)icmp6h;
+		br_ip6_multicast_leave_group(br, port, &mld->mld_mca);
+	    }
+	}
+
+out:
+	__skb_push(skb2, offset);
+	if (skb2 != skb)
+		kfree_skb(skb2);
+	return err;
+}
+#endif
+
 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 		     struct sk_buff *skb)
 {
@@ -1138,6 +1556,10 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		return br_multicast_ipv4_rcv(br, port, skb);
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case htons(ETH_P_IPV6):
+		return br_multicast_ipv6_rcv(br, port, skb);
+#endif
 	}
 
 	return 0;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 45d11e49fbbb..018499ebe19d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -49,6 +49,9 @@ struct br_ip
 {
 	union {
 		__be32	ip4;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		struct in6_addr ip6;
+#endif
 	} u;
 	__be16		proto;
 };
-- 
cgit v1.2.3-59-g8ed1b


From af740b2c8f4521e2c45698ee6040941a82d6349d Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Fri, 23 Apr 2010 12:34:56 +0200
Subject: netfilter: nf_conntrack: extend with extra stat counter

I suspect an unfortunatly series of events occuring under a DDoS
attack, in function __nf_conntrack_find() nf_contrack_core.c.

Adding a stats counter to see if the search is restarted too often.

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_common.h         | 1 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 7 ++++---
 net/netfilter/nf_conntrack_core.c                     | 4 +++-
 net/netfilter/nf_conntrack_standalone.c               | 7 ++++---
 4 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index c608677dda60..14e6d32002c4 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -113,6 +113,7 @@ struct ip_conntrack_stat {
 	unsigned int expect_new;
 	unsigned int expect_create;
 	unsigned int expect_delete;
+	unsigned int search_restart;
 };
 
 /* call to create an explicit dependency on nf_conntrack. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 2fb7b76da94f..244f7cb08d68 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -336,12 +336,12 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
+		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
 		return 0;
 	}
 
 	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
-			"%08x %08x %08x %08x %08x  %08x %08x %08x \n",
+			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
 		   nr_conntracks,
 		   st->searched,
 		   st->found,
@@ -358,7 +358,8 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 
 		   st->expect_new,
 		   st->expect_create,
-		   st->expect_delete
+		   st->expect_delete,
+		   st->search_restart
 		);
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0c9bbe93cc16..3907efb97a7c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -319,8 +319,10 @@ begin:
 	 * not the expected one, we must restart lookup.
 	 * We probably met an item that was moved to another chain.
 	 */
-	if (get_nulls_value(n) != hash)
+	if (get_nulls_value(n) != hash) {
+		NF_CT_STAT_INC(net, search_restart);
 		goto begin;
+	}
 	local_bh_enable();
 
 	return NULL;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index faa8eb3722b9..ea4a8d384234 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -252,12 +252,12 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
+		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
 		return 0;
 	}
 
 	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
-			"%08x %08x %08x %08x %08x  %08x %08x %08x \n",
+			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
 		   nr_conntracks,
 		   st->searched,
 		   st->found,
@@ -274,7 +274,8 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 
 		   st->expect_new,
 		   st->expect_create,
-		   st->expect_delete
+		   st->expect_delete,
+		   st->search_restart
 		);
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From e773aaff8295e7f3428d9cf6f8a476a33de00716 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 23 Apr 2010 00:53:39 +0000
Subject: l2tp: fix memory allocation

Since .size is set properly in "struct pernet_operations l2tp_net_ops",
allocating space for "struct l2tp_net" by hand is not correct, even causes
memory leakage.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_core.c | 29 +----------------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index ecc7aea9efe4..1712af1c7b3f 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1617,14 +1617,9 @@ EXPORT_SYMBOL_GPL(l2tp_session_create);
 
 static __net_init int l2tp_init_net(struct net *net)
 {
-	struct l2tp_net *pn;
-	int err;
+	struct l2tp_net *pn = net_generic(net, l2tp_net_id);
 	int hash;
 
-	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
-	if (!pn)
-		return -ENOMEM;
-
 	INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
 	spin_lock_init(&pn->l2tp_tunnel_list_lock);
 
@@ -1633,33 +1628,11 @@ static __net_init int l2tp_init_net(struct net *net)
 
 	spin_lock_init(&pn->l2tp_session_hlist_lock);
 
-	err = net_assign_generic(net, l2tp_net_id, pn);
-	if (err)
-		goto out;
-
 	return 0;
-
-out:
-	kfree(pn);
-	return err;
-}
-
-static __net_exit void l2tp_exit_net(struct net *net)
-{
-	struct l2tp_net *pn;
-
-	pn = net_generic(net, l2tp_net_id);
-	/*
-	 * if someone has cached our net then
-	 * further net_generic call will return NULL
-	 */
-	net_assign_generic(net, l2tp_net_id, NULL);
-	kfree(pn);
 }
 
 static struct pernet_operations l2tp_net_ops = {
 	.init = l2tp_init_net,
-	.exit = l2tp_exit_net,
 	.id   = &l2tp_net_id,
 	.size = sizeof(struct l2tp_net),
 };
-- 
cgit v1.2.3-59-g8ed1b


From 3a737028630bb3c2b9efc38b9ddef2e09b06b808 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 23 Apr 2010 01:01:52 +0000
Subject: l2tp_eth: fix memory allocation

Since .size is set properly in "struct pernet_operations l2tp_eth_net_ops",
allocating space for "struct l2tp_eth_net" by hand is not correct, even causes
memory leakage.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/l2tp/l2tp_eth.c | 29 +----------------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index ca1164afeb74..58c6c4cda73b 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -276,43 +276,16 @@ out:
 
 static __net_init int l2tp_eth_init_net(struct net *net)
 {
-	struct l2tp_eth_net *pn;
-	int err;
-
-	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
-	if (!pn)
-		return -ENOMEM;
+	struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id);
 
 	INIT_LIST_HEAD(&pn->l2tp_eth_dev_list);
 	spin_lock_init(&pn->l2tp_eth_lock);
 
-	err = net_assign_generic(net, l2tp_eth_net_id, pn);
-	if (err)
-		goto out;
-
 	return 0;
-
-out:
-	kfree(pn);
-	return err;
-}
-
-static __net_exit void l2tp_eth_exit_net(struct net *net)
-{
-	struct l2tp_eth_net *pn;
-
-	pn = net_generic(net, l2tp_eth_net_id);
-	/*
-	 * if someone has cached our net then
-	 * further net_generic call will return NULL
-	 */
-	net_assign_generic(net, l2tp_eth_net_id, NULL);
-	kfree(pn);
 }
 
 static __net_initdata struct pernet_operations l2tp_eth_net_ops = {
 	.init = l2tp_eth_init_net,
-	.exit = l2tp_eth_exit_net,
 	.id   = &l2tp_eth_net_id,
 	.size = sizeof(struct l2tp_eth_net),
 };
-- 
cgit v1.2.3-59-g8ed1b


From 793b14731686595a741d9f47726ad8b9a235385a Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 23 Apr 2010 11:26:07 +0000
Subject: IPv6: data structure changes for new socket options

Add underlying data structure changes and basic setsockopt()
and getsockopt() support for IPV6_RECVPATHMTU, IPV6_PATHMTU,
and IPV6_DONTFRAG.  IPV6_PATHMTU is actually fully functional
at this point.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h      |  2 +-
 include/linux/ipv6.h     | 13 ++++++++++---
 net/ipv6/ipv6_sockglue.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index 9b90cb296eb1..c4bf46f764bf 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -221,10 +221,10 @@ struct in6_flowlabel_req {
 #define IPV6_RTHDR		57
 #define IPV6_RECVDSTOPTS	58
 #define IPV6_DSTOPTS		59
-#if 0	/* not yet */
 #define IPV6_RECVPATHMTU	60
 #define IPV6_PATHMTU		61
 #define IPV6_DONTFRAG		62
+#if 0	/* not yet */
 #define IPV6_USE_MIN_MTU	63
 #endif
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 1bdbebf08d16..1976942cf6f9 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -21,6 +21,10 @@ struct in6_pktinfo {
 	int		ipi6_ifindex;
 };
 
+struct ip6_mtuinfo {
+	struct sockaddr_in6	ip6m_addr;
+	__u32			ip6m_mtu;
+};
 
 struct in6_ifreq {
 	struct in6_addr	ifr6_addr;
@@ -334,22 +338,25 @@ struct ipv6_pinfo {
 				dstopts:1,
 				odstopts:1,
                                 rxflow:1,
-				rxtclass:1;
+				rxtclass:1,
+				rxpmtu:1;
 		} bits;
 		__u16		all;
 	} rxopt;
 
 	/* sockopt flags */
-	__u8			recverr:1,
+	__u16			recverr:1,
 	                        sndflow:1,
 				pmtudisc:2,
 				ipv6only:1,
-				srcprefs:3;	/* 001: prefer temporary address
+				srcprefs:3,	/* 001: prefer temporary address
 						 * 010: prefer public address
 						 * 100: prefer care-of address
 						 */
+				dontfrag:1;
 	__u8			min_hopcount;
 	__u8			tclass;
+	__u8			padding;
 
 	__u32			dst_cookie;
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 92295ad3487a..2bf9eda72788 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -337,6 +337,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		retv = 0;
 		break;
 
+	case IPV6_RECVPATHMTU:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->rxopt.bits.rxpmtu = valbool;
+		retv = 0;
+		break;
+
 	case IPV6_HOPOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	case IPV6_RTHDR:
@@ -773,6 +780,9 @@ pref_skip_coa:
 		if (val < 0 || val > 255)
 			goto e_inval;
 		np->min_hopcount = val;
+		break;
+	case IPV6_DONTFRAG:
+		np->dontfrag = valbool;
 		retv = 0;
 		break;
 	}
@@ -1063,6 +1073,38 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->rxopt.bits.rxflow;
 		break;
 
+	case IPV6_RECVPATHMTU:
+		val = np->rxopt.bits.rxpmtu;
+		break;
+
+	case IPV6_PATHMTU:
+	{
+		struct dst_entry *dst;
+		struct ip6_mtuinfo mtuinfo;
+
+		if (len < sizeof(mtuinfo))
+			return -EINVAL;
+
+		len = sizeof(mtuinfo);
+		memset(&mtuinfo, 0, sizeof(mtuinfo));
+
+		rcu_read_lock();
+		dst = __sk_dst_get(sk);
+		if (dst)
+			mtuinfo.ip6m_mtu = dst_mtu(dst);
+		rcu_read_unlock();
+		if (!mtuinfo.ip6m_mtu)
+			return -ENOTCONN;
+
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &mtuinfo, len))
+			return -EFAULT;
+
+		return 0;
+		break;
+	}
+
 	case IPV6_UNICAST_HOPS:
 	case IPV6_MULTICAST_HOPS:
 	{
@@ -1128,6 +1170,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->min_hopcount;
 		break;
 
+	case IPV6_DONTFRAG:
+		val = np->dontfrag;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 13b52cd44670e3359055e9918d0e766d89836425 Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 23 Apr 2010 11:26:08 +0000
Subject: IPv6: Add dontfrag argument to relevant functions

Add dontfrag argument to relevant functions for
IPV6_DONTFRAG support, as well as allowing the value
to be passed-in via ancillary cmsg data.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h       |  3 ++-
 include/net/transp_v6.h  |  3 ++-
 net/ipv6/datagram.c      | 21 ++++++++++++++++++++-
 net/ipv6/icmp.c          |  5 +++--
 net/ipv6/ip6_flowlabel.c |  3 ++-
 net/ipv6/ip6_output.c    |  2 +-
 net/ipv6/ipv6_sockglue.c |  3 ++-
 net/ipv6/raw.c           |  9 +++++++--
 net/ipv6/udp.c           |  9 +++++++--
 9 files changed, 46 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index b1d8db90b214..7ab6323e631e 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -503,7 +503,8 @@ extern int			ip6_append_data(struct sock *sk,
 						struct ipv6_txoptions *opt,
 						struct flowi *fl,
 						struct rt6_info *rt,
-						unsigned int flags);
+						unsigned int flags,
+						int dontfrag);
 
 extern int			ip6_push_pending_frames(struct sock *sk);
 
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index d65381cad0fc..42a0eb68b7b6 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -44,7 +44,8 @@ extern int			datagram_send_ctl(struct net *net,
 						  struct msghdr *msg,
 						  struct flowi *fl,
 						  struct ipv6_txoptions *opt,
-						  int *hlimit, int *tclass);
+						  int *hlimit, int *tclass,
+						  int *dontfrag);
 
 #define		LOOPBACK4_IPV6		cpu_to_be32(0x7f000006)
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 622dc7939a1b..f5076d349b18 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -497,7 +497,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 int datagram_send_ctl(struct net *net,
 		      struct msghdr *msg, struct flowi *fl,
 		      struct ipv6_txoptions *opt,
-		      int *hlimit, int *tclass)
+		      int *hlimit, int *tclass, int *dontfrag)
 {
 	struct in6_pktinfo *src_info;
 	struct cmsghdr *cmsg;
@@ -735,6 +735,25 @@ int datagram_send_ctl(struct net *net,
 			err = 0;
 			*tclass = tc;
 
+			break;
+		    }
+
+		case IPV6_DONTFRAG:
+		    {
+			int df;
+
+			err = -EINVAL;
+			if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) {
+				goto exit_f;
+			}
+
+			df = *(int *)CMSG_DATA(cmsg);
+			if (df < 0 || df > 1)
+				goto exit_f;
+
+			err = 0;
+			*dontfrag = df;
+
 			break;
 		    }
 		default:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 12d2fa42657d..ce7992982557 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -481,7 +481,7 @@ route_done:
 			      len + sizeof(struct icmp6hdr),
 			      sizeof(struct icmp6hdr), hlimit,
 			      np->tclass, NULL, &fl, (struct rt6_info*)dst,
-			      MSG_DONTWAIT);
+			      MSG_DONTWAIT, np->dontfrag);
 	if (err) {
 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		ip6_flush_pending_frames(sk);
@@ -561,7 +561,8 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 
 	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
 				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl,
-				(struct rt6_info*)dst, MSG_DONTWAIT);
+				(struct rt6_info*)dst, MSG_DONTWAIT,
+				np->dontfrag);
 
 	if (err) {
 		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 14e23216eb28..13654686aeab 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -360,7 +360,8 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
 		msg.msg_control = (void*)(fl->opt+1);
 		flowi.oif = 0;
 
-		err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk);
+		err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk,
+					&junk, &junk);
 		if (err)
 			goto done;
 		err = -EINVAL;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 263d4cf5a8de..54d43dd1f085 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1105,7 +1105,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int offset, int len, int odd, struct sk_buff *skb),
 	void *from, int length, int transhdrlen,
 	int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
-	struct rt6_info *rt, unsigned int flags)
+	struct rt6_info *rt, unsigned int flags, int dontfrag)
 {
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2bf9eda72788..bd43f0152c21 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -458,7 +458,8 @@ sticky_done:
 		msg.msg_controllen = optlen;
 		msg.msg_control = (void*)(opt+1);
 
-		retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk);
+		retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk,
+					 &junk);
 		if (retv)
 			goto done;
 update:
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8763b1a0814a..44a84ea9b3e8 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -733,6 +733,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int addr_len = msg->msg_namelen;
 	int hlimit = -1;
 	int tclass = -1;
+	int dontfrag = -1;
 	u16 proto;
 	int err;
 
@@ -811,7 +812,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
+					&tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -880,6 +882,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (tclass < 0)
 		tclass = np->tclass;
 
+	if (dontfrag < 0)
+		dontfrag = np->dontfrag;
+
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
 
@@ -890,7 +895,7 @@ back_from_confirm:
 		lock_sock(sk);
 		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
 			len, 0, hlimit, tclass, opt, &fl, (struct rt6_info*)dst,
-			msg->msg_flags);
+			msg->msg_flags, dontfrag);
 
 		if (err)
 			ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 92bf9033e245..39e3665d9460 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -919,6 +919,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 	int ulen = len;
 	int hlimit = -1;
 	int tclass = -1;
+	int dontfrag = -1;
 	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
 	int err;
 	int connected = 0;
@@ -1049,7 +1050,8 @@ do_udp_sendmsg:
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(*opt);
 
-		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit,
+					&tclass, &dontfrag);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
@@ -1120,6 +1122,9 @@ do_udp_sendmsg:
 	if (tclass < 0)
 		tclass = np->tclass;
 
+	if (dontfrag < 0)
+		dontfrag = np->dontfrag;
+
 	if (msg->msg_flags&MSG_CONFIRM)
 		goto do_confirm;
 back_from_confirm:
@@ -1143,7 +1148,7 @@ do_append_data:
 	err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
 		sizeof(struct udphdr), hlimit, tclass, opt, &fl,
 		(struct rt6_info*)dst,
-		corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
+		corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
 	if (err)
 		udp_v6_flush_pending_frames(sk);
 	else if (!corkreq)
-- 
cgit v1.2.3-59-g8ed1b


From 4b340ae20d0e2366792abe70f46629e576adaf5e Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 23 Apr 2010 11:26:09 +0000
Subject: IPv6: Complete IPV6_DONTFRAG support

Finally add support to detect a local IPV6_DONTFRAG event
and return the relevant data to the user if they've enabled
IPV6_RECVPATHMTU on the socket.  The next recvmsg() will
return no data, but have an IPV6_PATHMTU as ancillary data.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h  |  2 ++
 include/net/ipv6.h    |  2 ++
 net/ipv6/af_inet6.c   |  3 ++
 net/ipv6/datagram.c   | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_output.c | 24 +++++++++-----
 net/ipv6/raw.c        |  3 ++
 net/ipv6/udp.c        |  3 ++
 7 files changed, 116 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 1976942cf6f9..2ab5509f6d49 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -257,6 +257,7 @@ struct inet6_skb_parm {
 };
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
+#define IP6CBMTU(skb)	((struct ip6_mtuinfo *)((skb)->cb))
 
 static inline int inet6_iif(const struct sk_buff *skb)
 {
@@ -366,6 +367,7 @@ struct ipv6_pinfo {
 
 	struct ipv6_txoptions	*opt;
 	struct sk_buff		*pktoptions;
+	struct sk_buff		*rxpmtu;
 	struct {
 		struct ipv6_txoptions *opt;
 		u8 hop_limit;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7ab6323e631e..eba5cc00325a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -578,9 +578,11 @@ extern int			ip6_datagram_connect(struct sock *sk,
 						     struct sockaddr *addr, int addr_len);
 
 extern int 			ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len);
+extern int 			ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len);
 extern void			ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
 						u32 info, u8 *payload);
 extern void			ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info);
+extern void			ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu);
 
 extern int inet6_release(struct socket *sock);
 extern int inet6_bind(struct socket *sock, struct sockaddr *uaddr, 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3192aa02ba5d..d2df3144429b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -417,6 +417,9 @@ void inet6_destroy_sock(struct sock *sk)
 	if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
 		kfree_skb(skb);
 
+	if ((skb = xchg(&np->rxpmtu, NULL)) != NULL)
+		kfree_skb(skb);
+
 	/* Free flowlabels */
 	fl6_free_socklist(sk);
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f5076d349b18..5959230bc6c1 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -278,6 +278,45 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 		kfree_skb(skb);
 }
 
+void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6hdr *iph;
+	struct sk_buff *skb;
+	struct ip6_mtuinfo *mtu_info;
+
+	if (!np->rxopt.bits.rxpmtu)
+		return;
+
+	skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	skb_put(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	iph = ipv6_hdr(skb);
+	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+
+	mtu_info = IP6CBMTU(skb);
+	if (!mtu_info) {
+		kfree_skb(skb);
+		return;
+	}
+
+	mtu_info->ip6m_mtu = mtu;
+	mtu_info->ip6m_addr.sin6_family = AF_INET6;
+	mtu_info->ip6m_addr.sin6_port = 0;
+	mtu_info->ip6m_addr.sin6_flowinfo = 0;
+	mtu_info->ip6m_addr.sin6_scope_id = fl->oif;
+	ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
+
+	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+	skb_reset_transport_header(skb);
+
+	skb = xchg(&np->rxpmtu, skb);
+	kfree_skb(skb);
+}
+
 /*
  *	Handle MSG_ERRQUEUE
  */
@@ -381,6 +420,54 @@ out:
 	return err;
 }
 
+/*
+ *	Handle IPV6_RECVPATHMTU
+ */
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+	struct sockaddr_in6 *sin;
+	struct ip6_mtuinfo mtu_info;
+	int err;
+	int copied;
+
+	err = -EAGAIN;
+	skb = xchg(&np->rxpmtu, NULL);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free_skb;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
+
+	sin = (struct sockaddr_in6 *)msg->msg_name;
+	if (sin) {
+		sin->sin6_family = AF_INET6;
+		sin->sin6_flowinfo = 0;
+		sin->sin6_port = 0;
+		sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
+		ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
+	}
+
+	put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
+
+	err = copied;
+
+out_free_skb:
+	kfree_skb(skb);
+out:
+	return err;
+}
 
 
 int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 54d43dd1f085..61e2bef56090 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1219,15 +1219,23 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	 */
 
 	inet->cork.length += length;
-	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	if (length > mtu) {
+		int proto = sk->sk_protocol;
+		if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
+			ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
+			return -EMSGSIZE;
+		}
 
-		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
-					  fragheaderlen, transhdrlen, mtu,
-					  flags);
-		if (err)
-			goto error;
-		return 0;
+		if (proto == IPPROTO_UDP &&
+		    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+
+			err = ip6_ufo_append_data(sk, getfrag, from, length,
+						  hh_len, fragheaderlen,
+						  transhdrlen, mtu, flags);
+			if (err)
+				goto error;
+			return 0;
+		}
 	}
 
 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 44a84ea9b3e8..85627386cb02 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -461,6 +461,9 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len);
 
+	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+		return ipv6_recv_rxpmtu(sk, msg, len);
+
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb)
 		goto out;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 39e3665d9460..2850e35cee3d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -335,6 +335,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len);
 
+	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+		return ipv6_recv_rxpmtu(sk, msg, len);
+
 try_again:
 	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
 				  &peeked, &err);
-- 
cgit v1.2.3-59-g8ed1b


From 8c52d509e84bbf26cffb8b6e75b399689af67885 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sat, 24 Apr 2010 22:50:10 -0700
Subject: rps: optimize rps_get_cpu()

optimize rps_get_cpu().

don't initialize ports when we can get the ports. one memory access
for ports than two.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index a4a7c36917d1..4d43f1a80f74 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2229,7 +2229,11 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	int cpu = -1;
 	u8 ip_proto;
 	u16 tcpu;
-	u32 addr1, addr2, ports, ihl;
+	u32 addr1, addr2, ihl;
+	union {
+		u32 v32;
+		u16 v16[2];
+	} ports;
 
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
@@ -2275,7 +2279,6 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	default:
 		goto done;
 	}
-	ports = 0;
 	switch (ip_proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
@@ -2285,25 +2288,20 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	case IPPROTO_SCTP:
 	case IPPROTO_UDPLITE:
 		if (pskb_may_pull(skb, (ihl * 4) + 4)) {
-			__be16 *hports = (__be16 *) (skb->data + (ihl * 4));
-			u32 sport, dport;
-
-			sport = (__force u16) hports[0];
-			dport = (__force u16) hports[1];
-			if (dport < sport)
-				swap(sport, dport);
-			ports = (sport << 16) + dport;
+			ports.v32 = * (__force u32 *) (skb->data + (ihl * 4));
+			if (ports.v16[1] < ports.v16[0])
+				swap(ports.v16[0], ports.v16[1]);
+			break;
 		}
-		break;
-
 	default:
+		ports.v32 = 0;
 		break;
 	}
 
 	/* get a consistent hash (same value on both flow directions) */
 	if (addr2 < addr1)
 		swap(addr1, addr2);
-	skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
+	skb->rxhash = jhash_3words(addr1, addr2, ports.v32, hashrnd);
 	if (!skb->rxhash)
 		skb->rxhash = 1;
 
-- 
cgit v1.2.3-59-g8ed1b


From b3c981d2bbbe889125169bd0bb482e64d3c028a1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Sun, 25 Apr 2010 00:49:56 -0700
Subject: netns: rename unregister_pernet_subsys parameter

Stay consistent with other functions and with comment also and name
pernet_operations parameter properly.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net_namespace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index bd8c4712ea24..69a20bfc527c 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -469,10 +469,10 @@ EXPORT_SYMBOL_GPL(register_pernet_subsys);
  *	addition run the exit method for all existing network
  *	namespaces.
  */
-void unregister_pernet_subsys(struct pernet_operations *module)
+void unregister_pernet_subsys(struct pernet_operations *ops)
 {
 	mutex_lock(&net_mutex);
-	unregister_pernet_operations(module);
+	unregister_pernet_operations(ops);
 	mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
-- 
cgit v1.2.3-59-g8ed1b


From 3d0c9c4eb2dbdcc461be4084abd87a9a9e70f713 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 26 Apr 2010 16:02:04 +0200
Subject: net: fib_rules: mark arguments to fib_rules_register const and
 __net_initdata

fib_rules_register() duplicates the template passed to it without modification,
mark the argument as const. Additionally the templates are only needed when
instantiating a new namespace, so mark them as __net_initdata, which means
they can be discarded when CONFIG_NET_NS=n.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/fib_rules.h | 2 +-
 net/core/fib_rules.c    | 2 +-
 net/decnet/dn_rules.c   | 2 +-
 net/ipv4/fib_rules.c    | 2 +-
 net/ipv4/ipmr.c         | 2 +-
 net/ipv6/fib6_rules.c   | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 52bd9e6c9141..e8923bc20f9f 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -104,7 +104,7 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla)
 	return frh->table;
 }
 
-extern struct fib_rules_ops *fib_rules_register(struct fib_rules_ops *, struct net *);
+extern struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *);
 extern void fib_rules_unregister(struct fib_rules_ops *);
 extern void                     fib_rules_cleanup_ops(struct fib_rules_ops *);
 
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 1bc66592453c..42e84e08a1be 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -122,7 +122,7 @@ errout:
 }
 
 struct fib_rules_ops *
-fib_rules_register(struct fib_rules_ops *tmpl, struct net *net)
+fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net)
 {
 	struct fib_rules_ops *ops;
 	int err;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index af28dcc21844..1226bcad776b 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -216,7 +216,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 	dn_rt_cache_flush(-1);
 }
 
-static struct fib_rules_ops dn_fib_rules_ops_template = {
+static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = {
 	.family		= FIB_RULES_DECNET,
 	.rule_size	= sizeof(struct dn_fib_rule),
 	.addr_size	= sizeof(u16),
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 3ec84fea5b71..8ab62a56701c 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -245,7 +245,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 	rt_cache_flush(ops->fro_net, -1);
 }
 
-static struct fib_rules_ops fib4_rules_ops_template = {
+static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
 	.family		= FIB_RULES_IPV4,
 	.rule_size	= sizeof(struct fib4_rule),
 	.addr_size	= sizeof(u32),
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a2df5012a1d0..7d3e382aed64 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -216,7 +216,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 	return 0;
 }
 
-static struct fib_rules_ops ipmr_rules_ops_template = {
+static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
 	.family		= FIB_RULES_IPMR,
 	.rule_size	= sizeof(struct ipmr_rule),
 	.addr_size	= sizeof(u32),
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 8124f16f2ac2..35f6949446f0 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -237,7 +237,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 	       + nla_total_size(16); /* src */
 }
 
-static struct fib_rules_ops fib6_rules_ops_template = {
+static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = {
 	.family			= FIB_RULES_IPV6,
 	.rule_size		= sizeof(struct fib6_rule),
 	.addr_size		= sizeof(struct in6_addr),
-- 
cgit v1.2.3-59-g8ed1b


From 25239cee7e8732dbdc9f5d324f1c22a3bdec1d1f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 26 Apr 2010 16:02:05 +0200
Subject: net: rtnetlink: decouple rtnetlink address families from real address
 families

Decouple rtnetlink address families from real address families in socket.h to
be able to add rtnetlink interfaces to code that is not a real address family
without increasing AF_MAX/NPROTO.

This will be used to add support for multicast route dumping from all tables
as the proc interface can't be extended to support anything but the main table
without breaking compatibility.

This partialy undoes the patch to introduce independant families for routing
rules and converts ipmr routing rules to a new rtnetlink family. Similar to
that patch, values up to 127 are reserved for real address families, values
above that may be used arbitrarily.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/fib_rules.h |  8 --------
 include/linux/rtnetlink.h |  6 ++++++
 net/core/rtnetlink.c      | 14 +++++++-------
 net/decnet/dn_rules.c     |  2 +-
 net/ipv4/fib_rules.c      |  2 +-
 net/ipv4/ipmr.c           |  2 +-
 net/ipv6/fib6_rules.c     |  2 +-
 7 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 04a397619ebe..51da65b68b85 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -15,14 +15,6 @@
 /* try to find source address in routing lookups */
 #define FIB_RULE_FIND_SADDR	0x00010000
 
-/* fib_rules families. values up to 127 are reserved for real address
- * families, values above 128 may be used arbitrarily.
- */
-#define FIB_RULES_IPV4		AF_INET
-#define FIB_RULES_IPV6		AF_INET6
-#define FIB_RULES_DECNET	AF_DECnet
-#define FIB_RULES_IPMR		128
-
 struct fib_rule_hdr {
 	__u8		family;
 	__u8		dst_len;
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index d1c7c90e9cd4..5a42c36cb6aa 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -7,6 +7,12 @@
 #include <linux/if_addr.h>
 #include <linux/neighbour.h>
 
+/* rtnetlink families. Values up to 127 are reserved for real address
+ * families, values above 128 may be used arbitrarily.
+ */
+#define RTNL_FAMILY_IPMR		128
+#define RTNL_FAMILY_MAX			128
+
 /****
  *		Routing/neighbour discovery messages.
  ****/
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 78c85985cb30..fd781b62fa7f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -98,7 +98,7 @@ int lockdep_rtnl_is_held(void)
 EXPORT_SYMBOL(lockdep_rtnl_is_held);
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 
-static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
 
 static inline int rtm_msgindex(int msgtype)
 {
@@ -118,7 +118,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	if (protocol < NPROTO)
+	if (protocol <= RTNL_FAMILY_MAX)
 		tab = rtnl_msg_handlers[protocol];
 	else
 		tab = NULL;
@@ -133,7 +133,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	if (protocol < NPROTO)
+	if (protocol <= RTNL_FAMILY_MAX)
 		tab = rtnl_msg_handlers[protocol];
 	else
 		tab = NULL;
@@ -167,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype,
 	struct rtnl_link *tab;
 	int msgindex;
 
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
 	tab = rtnl_msg_handlers[protocol];
@@ -219,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype)
 {
 	int msgindex;
 
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
 	if (rtnl_msg_handlers[protocol] == NULL)
@@ -241,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
  */
 void rtnl_unregister_all(int protocol)
 {
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 
 	kfree(rtnl_msg_handlers[protocol]);
 	rtnl_msg_handlers[protocol] = NULL;
@@ -1384,7 +1384,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (s_idx == 0)
 		s_idx = 1;
-	for (idx = 1; idx < NPROTO; idx++) {
+	for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
 		int type = cb->nlh->nlmsg_type-RTM_BASE;
 		if (idx < s_idx || idx == PF_PACKET)
 			continue;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 1226bcad776b..48fdf10be7a1 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -217,7 +217,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = {
-	.family		= FIB_RULES_DECNET,
+	.family		= AF_DECnet,
 	.rule_size	= sizeof(struct dn_fib_rule),
 	.addr_size	= sizeof(u16),
 	.action		= dn_fib_rule_action,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 8ab62a56701c..76daeb5ff564 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -246,7 +246,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
-	.family		= FIB_RULES_IPV4,
+	.family		= AF_INET,
 	.rule_size	= sizeof(struct fib4_rule),
 	.addr_size	= sizeof(u32),
 	.action		= fib4_rule_action,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7d3e382aed64..41e8fc0ce8b3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -217,7 +217,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 }
 
 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
-	.family		= FIB_RULES_IPMR,
+	.family		= RTNL_FAMILY_IPMR,
 	.rule_size	= sizeof(struct ipmr_rule),
 	.addr_size	= sizeof(u32),
 	.action		= ipmr_rule_action,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 35f6949446f0..8e44f8f9c188 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 }
 
 static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = {
-	.family			= FIB_RULES_IPV6,
+	.family			= AF_INET6,
 	.rule_size		= sizeof(struct fib6_rule),
 	.addr_size		= sizeof(struct in6_addr),
 	.action			= fib6_rule_action,
-- 
cgit v1.2.3-59-g8ed1b


From cb6a4e461fb427689920472bd7335f926d521747 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 26 Apr 2010 16:02:08 +0200
Subject: net: ipmr: add support for dumping routing tables over netlink

The ipmr /proc interface (ip_mr_cache) can't be extended to dump routes
from any tables but the main table in a backwards compatible fashion since
the output format ends in a variable amount of output interfaces.

Introduce a new netlink interface to dump multicast routes from all tables,
similar to the netlink interface for regular routes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/ipmr.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 89 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 41e8fc0ce8b3..eddfd12f55b8 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -128,8 +128,8 @@ static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 			 int local);
 static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
-static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
-			    struct mfc_cache *c, struct rtmsg *rtm);
+static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			      struct mfc_cache *c, struct rtmsg *rtm);
 static void ipmr_expire_process(unsigned long arg);
 
 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -831,7 +831,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
-			if (ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
 						  (u8 *)nlh);
 			} else {
@@ -1904,9 +1904,8 @@ drop:
 }
 #endif
 
-static int
-ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, struct mfc_cache *c,
-		 struct rtmsg *rtm)
+static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			      struct mfc_cache *c, struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
@@ -1994,11 +1993,93 @@ int ipmr_get_route(struct net *net,
 
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
-	err = ipmr_fill_mroute(mrt, skb, cache, rtm);
+	err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
 
+static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+			    u32 pid, u32 seq, struct mfc_cache *c)
+{
+	struct nlmsghdr *nlh;
+	struct rtmsg *rtm;
+
+	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	rtm = nlmsg_data(nlh);
+	rtm->rtm_family   = RTNL_FAMILY_IPMR;
+	rtm->rtm_dst_len  = 32;
+	rtm->rtm_src_len  = 32;
+	rtm->rtm_tos      = 0;
+	rtm->rtm_table    = mrt->id;
+	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
+	rtm->rtm_type     = RTN_MULTICAST;
+	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
+	rtm->rtm_protocol = RTPROT_UNSPEC;
+	rtm->rtm_flags    = 0;
+
+	NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
+	NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
+
+	if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct mr_table *mrt;
+	struct mfc_cache *mfc;
+	unsigned int t = 0, s_t;
+	unsigned int h = 0, s_h;
+	unsigned int e = 0, s_e;
+
+	s_t = cb->args[0];
+	s_h = cb->args[1];
+	s_e = cb->args[2];
+
+	read_lock(&mrt_lock);
+	ipmr_for_each_table(mrt, net) {
+		if (t < s_t)
+			goto next_table;
+		if (t > s_t)
+			s_h = 0;
+		for (h = s_h; h < MFC_LINES; h++) {
+			list_for_each_entry(mfc, &mrt->mfc_cache_array[h], list) {
+				if (e < s_e)
+					goto next_entry;
+				if (ipmr_fill_mroute(mrt, skb,
+						     NETLINK_CB(cb->skb).pid,
+						     cb->nlh->nlmsg_seq,
+						     mfc) < 0)
+					goto done;
+next_entry:
+				e++;
+			}
+			e = s_e = 0;
+		}
+		s_h = 0;
+next_table:
+		t++;
+	}
+done:
+	read_unlock(&mrt_lock);
+
+	cb->args[2] = e;
+	cb->args[1] = h;
+	cb->args[0] = t;
+
+	return skb->len;
+}
+
 #ifdef CONFIG_PROC_FS
 /*
  *	The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
@@ -2355,6 +2436,7 @@ int __init ip_mr_init(void)
 		goto add_proto_fail;
 	}
 #endif
+	rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
 	return 0;
 
 #ifdef CONFIG_IP_PIMSM_V2
-- 
cgit v1.2.3-59-g8ed1b


From 0c86980817853e4166f66c7cd18bc5fe1adeb5f7 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Thu, 22 Apr 2010 10:27:48 +0300
Subject: mac80211: Fix sta->last_tx_rate setting with no-op rate control
 devices

The sta->last_tx_rate is traditionally updated just before transmitting a
frame based on information from the rate control algorithm. However, for
hardware drivers with IEEE80211_HW_HAS_RATE_CONTROL this is not performed,
as the rate control algorithm is not executed, and because the used rate is
not known before the frame has actually been transmitted.

This causes atleast a fixed 1Mb/s to be reported to user space. A few other
instances of code also rely on this information.

Fix this by setting the sta->last_tx_rate in tx_status handling. There, look
for last rates entry set by the driver, and use that as value for
sta->last_tx_rate.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/status.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 11805a3a626f..94613af009f3 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -171,6 +171,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	struct net_device *prev_dev = NULL;
 	struct sta_info *sta, *tmp;
 	int retry_count = -1, i;
+	int rates_idx = -1;
 	bool send_to_cooked;
 
 	for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
@@ -178,6 +179,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 		if (i >= hw->max_rates) {
 			info->status.rates[i].idx = -1;
 			info->status.rates[i].count = 0;
+		} else if (info->status.rates[i].idx >= 0) {
+			rates_idx = i;
 		}
 
 		retry_count += info->status.rates[i].count;
@@ -206,6 +209,10 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 			return;
 		}
 
+		if ((local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL) &&
+		    (rates_idx != -1))
+			sta->last_tx_rate = info->status.rates[rates_idx];
+
 		if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) &&
 		    (ieee80211_is_data_qos(fc))) {
 			u16 tid, ssn;
-- 
cgit v1.2.3-59-g8ed1b


From 0db3f0f49a99db33a411af1c4352839c0296eff3 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 26 Apr 2010 03:41:00 +0000
Subject: phonet: use phonet_pernet instead of directly net_generic

As in for example pppoe introduce phonet_pernet and use it instead of calling
net_generic directly.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/phonet/pn_dev.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c
index 9b4ced6e0968..c33da6576942 100644
--- a/net/phonet/pn_dev.c
+++ b/net/phonet/pn_dev.c
@@ -46,9 +46,16 @@ struct phonet_net {
 
 int phonet_net_id __read_mostly;
 
+static struct phonet_net *phonet_pernet(struct net *net)
+{
+	BUG_ON(!net);
+
+	return net_generic(net, phonet_net_id);
+}
+
 struct phonet_device_list *phonet_device_list(struct net *net)
 {
-	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(net);
 	return &pnn->pndevs;
 }
 
@@ -261,7 +268,7 @@ static int phonet_device_autoconf(struct net_device *dev)
 
 static void phonet_route_autodel(struct net_device *dev)
 {
-	struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(dev_net(dev));
 	unsigned i;
 	DECLARE_BITMAP(deleted, 64);
 
@@ -313,7 +320,7 @@ static struct notifier_block phonet_device_notifier = {
 /* Per-namespace Phonet devices handling */
 static int __net_init phonet_init_net(struct net *net)
 {
-	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(net);
 
 	if (!proc_net_fops_create(net, "phonet", 0, &pn_sock_seq_fops))
 		return -ENOMEM;
@@ -326,7 +333,7 @@ static int __net_init phonet_init_net(struct net *net)
 
 static void __net_exit phonet_exit_net(struct net *net)
 {
-	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(net);
 	struct net_device *dev;
 	unsigned i;
 
@@ -376,7 +383,7 @@ void phonet_device_exit(void)
 
 int phonet_route_add(struct net_device *dev, u8 daddr)
 {
-	struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(dev_net(dev));
 	struct phonet_routes *routes = &pnn->routes;
 	int err = -EEXIST;
 
@@ -393,7 +400,7 @@ int phonet_route_add(struct net_device *dev, u8 daddr)
 
 int phonet_route_del(struct net_device *dev, u8 daddr)
 {
-	struct phonet_net *pnn = net_generic(dev_net(dev), phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(dev_net(dev));
 	struct phonet_routes *routes = &pnn->routes;
 
 	daddr = daddr >> 2;
@@ -413,7 +420,7 @@ int phonet_route_del(struct net_device *dev, u8 daddr)
 
 struct net_device *phonet_route_get(struct net *net, u8 daddr)
 {
-	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(net);
 	struct phonet_routes *routes = &pnn->routes;
 	struct net_device *dev;
 
@@ -428,7 +435,7 @@ struct net_device *phonet_route_get(struct net *net, u8 daddr)
 
 struct net_device *phonet_route_output(struct net *net, u8 daddr)
 {
-	struct phonet_net *pnn = net_generic(net, phonet_net_id);
+	struct phonet_net *pnn = phonet_pernet(net);
 	struct phonet_routes *routes = &pnn->routes;
 	struct net_device *dev;
 
-- 
cgit v1.2.3-59-g8ed1b


From 4a4771a58e13b46bfdc999fe481e550f8c6937ff Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 25 Apr 2010 22:20:06 +0000
Subject: net: use sk_sleep()

Commit aa395145 (net: sk_sleep() helper) missed three files in the
conversion.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c  |  6 +++---
 net/caif/caif_socket.c | 30 +++++++++++++++---------------
 net/rxrpc/ar-recvmsg.c |  6 +++---
 3 files changed, 21 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 85d6420f8404..d97e1fd234ba 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -181,7 +181,7 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
 		return -ENOLINK;
 
 	skb_queue_tail(&q->sk.sk_receive_queue, skb);
-	wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND);
+	wake_up_interruptible_poll(sk_sleep(&q->sk), POLLIN | POLLRDNORM | POLLRDBAND);
 	return 0;
 }
 
@@ -562,7 +562,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb,
 	struct sk_buff *skb;
 	ssize_t ret = 0;
 
-	add_wait_queue(q->sk.sk_sleep, &wait);
+	add_wait_queue(sk_sleep(&q->sk), &wait);
 	while (len) {
 		current->state = TASK_INTERRUPTIBLE;
 
@@ -587,7 +587,7 @@ static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb,
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(q->sk.sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(&q->sk), &wait);
 	return ret;
 }
 
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 90317e7d10b4..d455375789fb 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -169,7 +169,7 @@ static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
 
 	/* Signal reader that data is available. */
 
-	wake_up_interruptible(cf_sk->sk.sk_sleep);
+	wake_up_interruptible(sk_sleep(&cf_sk->sk));
 
 	return 0;
 }
@@ -203,7 +203,7 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 		dbfs_atomic_inc(&cnt.num_tx_flow_on_ind);
 		/* Signal reader that data is available. */
 		SET_TX_FLOW_ON(cf_sk);
-		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		wake_up_interruptible(sk_sleep(&cf_sk->sk));
 		break;
 
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
@@ -217,7 +217,7 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 		caif_assert(STATE_IS_OPEN(cf_sk));
 		SET_PENDING_OFF(cf_sk);
 		SET_TX_FLOW_ON(cf_sk);
-		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		wake_up_interruptible(sk_sleep(&cf_sk->sk));
 		break;
 
 	case CAIF_CTRLCMD_DEINIT_RSP:
@@ -225,8 +225,8 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 		caif_assert(!STATE_IS_OPEN(cf_sk));
 		SET_PENDING_OFF(cf_sk);
 		if (!STATE_IS_PENDING_DESTROY(cf_sk)) {
-			if (cf_sk->sk.sk_sleep != NULL)
-				wake_up_interruptible(cf_sk->sk.sk_sleep);
+			if (sk_sleep(&cf_sk->sk) != NULL)
+				wake_up_interruptible(sk_sleep(&cf_sk->sk));
 		}
 		dbfs_atomic_inc(&cnt.num_deinit);
 		sock_put(&cf_sk->sk);
@@ -238,7 +238,7 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 		SET_STATE_CLOSED(cf_sk);
 		SET_PENDING_OFF(cf_sk);
 		SET_TX_FLOW_OFF(cf_sk);
-		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		wake_up_interruptible(sk_sleep(&cf_sk->sk));
 		break;
 
 	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
@@ -247,7 +247,7 @@ static void caif_sktflowctrl_cb(struct cflayer *layr,
 		/* Use sk_shutdown to indicate remote shutdown indication */
 		cf_sk->sk.sk_shutdown |= RCV_SHUTDOWN;
 		cf_sk->file_mode = 0;
-		wake_up_interruptible(cf_sk->sk.sk_sleep);
+		wake_up_interruptible(sk_sleep(&cf_sk->sk));
 		break;
 
 	default:
@@ -325,7 +325,7 @@ static int caif_recvmsg(struct kiocb *iocb, struct socket *sock,
 		release_sock(&cf_sk->sk);
 
 		result =
-		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					     !STATE_IS_PENDING(cf_sk));
 
 		lock_sock(&(cf_sk->sk));
@@ -365,7 +365,7 @@ static int caif_recvmsg(struct kiocb *iocb, struct socket *sock,
 		release_sock(&cf_sk->sk);
 
 		/* Block reader until data arrives or socket is closed. */
-		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					cfpkt_qpeek(cf_sk->pktq)
 					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
 					|| !STATE_IS_OPEN(cf_sk)) ==
@@ -537,7 +537,7 @@ static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		 * for its conclusion.
 		 */
 		result =
-		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					     !STATE_IS_PENDING(cf_sk));
 		/* I want to be alone on cf_sk (except status and queue) */
 		lock_sock(&(cf_sk->sk));
@@ -573,7 +573,7 @@ static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		release_sock(&cf_sk->sk);
 
 		/* Wait until flow is on or socket is closed */
-		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					TX_FLOW_IS_ON(cf_sk)
 					|| !STATE_IS_OPEN(cf_sk)
 					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
@@ -650,7 +650,7 @@ static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		release_sock(&cf_sk->sk);
 
 		/* Wait until flow is on or socket is closed */
-		if (wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					TX_FLOW_IS_ON(cf_sk)
 					|| !STATE_IS_OPEN(cf_sk)
 					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
@@ -898,7 +898,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uservaddr,
 			 * for its conclusion.
 			 */
 			result =
-			    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+			    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 						     !STATE_IS_PENDING(cf_sk));
 
 			lock_sock(&(cf_sk->sk));
@@ -965,7 +965,7 @@ static int caif_connect(struct socket *sock, struct sockaddr *uservaddr,
 		release_sock(&cf_sk->sk);
 
 		result =
-		    wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 					     !STATE_IS_PENDING(cf_sk));
 
 		lock_sock(&(cf_sk->sk));
@@ -1107,7 +1107,7 @@ static int caif_release(struct socket *sock)
 	 * CAIF stack.
 	 */
 	if (!(sock->file->f_flags & O_NONBLOCK)) {
-		res = wait_event_interruptible(*cf_sk->sk.sk_sleep,
+		res = wait_event_interruptible(*sk_sleep(&cf_sk->sk),
 						!STATE_IS_PENDING(cf_sk));
 
 		if (res == -ERESTARTSYS) {
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 60c2b94e6b54..0c65013e3bfe 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -91,7 +91,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 			/* wait for a message to turn up */
 			release_sock(&rx->sk);
-			prepare_to_wait_exclusive(rx->sk.sk_sleep, &wait,
+			prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
 						  TASK_INTERRUPTIBLE);
 			ret = sock_error(&rx->sk);
 			if (ret)
@@ -102,7 +102,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 					goto wait_interrupted;
 				timeo = schedule_timeout(timeo);
 			}
-			finish_wait(rx->sk.sk_sleep, &wait);
+			finish_wait(sk_sleep(&rx->sk), &wait);
 			lock_sock(&rx->sk);
 			continue;
 		}
@@ -356,7 +356,7 @@ csum_copy_error:
 wait_interrupted:
 	ret = sock_intr_errno(timeo);
 wait_error:
-	finish_wait(rx->sk.sk_sleep, &wait);
+	finish_wait(sk_sleep(&rx->sk), &wait);
 	if (continue_call)
 		rxrpc_put_call(continue_call);
 	if (copied)
-- 
cgit v1.2.3-59-g8ed1b


From 1fafc7a9353ef68e1b8d4bb130cb6402cf7dfd5a Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org>
Date: Sun, 25 Apr 2010 08:06:40 +0000
Subject: bridge br_multicast: Ensure to initialize
 BR_INPUT_SKB_CB(skb)->mrouters_only.

Even with commit 32dec5dd0233ebffa9cae25ce7ba6daeb7df4467 ("bridge
br_multicast: Don't refer to BR_INPUT_SKB_CB(skb)->mrouters_only
without IGMP snooping."), BR_INPUT_SKB_CB(skb)->mrouters_only is
not appropriately initialized if IGMP/MLD snooping support is
compiled and disabled, so we can see garbage.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 38d1fbde5fb8..e481dbd19495 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1340,9 +1340,6 @@ static int br_multicast_ipv4_rcv(struct net_bridge *br,
 	unsigned offset;
 	int err;
 
-	BR_INPUT_SKB_CB(skb)->igmp = 0;
-	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
-
 	/* We treat OOM as packet loss for now. */
 	if (!pskb_may_pull(skb, sizeof(*iph)))
 		return -EINVAL;
@@ -1440,9 +1437,6 @@ static int br_multicast_ipv6_rcv(struct net_bridge *br,
 	unsigned offset;
 	int err;
 
-	BR_INPUT_SKB_CB(skb)->igmp = 0;
-	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
-
 	if (!pskb_may_pull(skb, sizeof(*ip6h)))
 		return -EINVAL;
 
@@ -1550,6 +1544,9 @@ out:
 int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 		     struct sk_buff *skb)
 {
+	BR_INPUT_SKB_CB(skb)->igmp = 0;
+	BR_INPUT_SKB_CB(skb)->mrouters_only = 0;
+
 	if (br->multicast_disabled)
 		return 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From d4c4f07df16c767b8efbc44e7cdf795fac326b33 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Apr 2010 10:16:54 -0700
Subject: bridge: Fix build of ipv6 multicast code.

Based upon a report from Stephen Rothwell:

--------------------
net/bridge/br_multicast.c: In function 'br_ip6_multicast_alloc_query':
net/bridge/br_multicast.c:469: error: implicit declaration of function 'csum_ipv6_magic'

Introduced by commit 08b202b6726459626c73ecfa08fcdc8c3efc76c2 ("bridge
br_multicast: IPv6 MLD support") from the net tree.

csum_ipv6_magic is declared in net/ip6_checksum.h ...
--------------------

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index e481dbd19495..2048ef0f9be5 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -28,6 +28,7 @@
 #include <net/ipv6.h>
 #include <net/mld.h>
 #include <net/addrconf.h>
+#include <net/ip6_checksum.h>
 #endif
 
 #include "br_private.h"
-- 
cgit v1.2.3-59-g8ed1b


From 7180f7751d5a1b6f1ef40285b5e928970cdd5306 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 07:13:06 +0000
Subject: bridge: use is_multicast_ether_addr

Use existing inline function.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5b8a6e73b02f..82599405dc15 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,7 +36,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	skb_reset_mac_header(skb);
 	skb_pull(skb, ETH_HLEN);
 
-	if (dest[0] & 1) {
+	if (is_multicast_ether_addr(dest)) {
 		if (br_multicast_rcv(br, NULL, skb))
 			goto out;
 
-- 
cgit v1.2.3-59-g8ed1b


From dcdca2c49bb6328bbc7cd8d73434c308b5dd0df2 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 07:13:11 +0000
Subject: bridge: multicast router list manipulation

I prefer that the hlist be only accessed through the hlist macro
objects. Explicit twiddling of links (especially with RCU) exposes
the code to future bugs.

Compile tested only.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 2048ef0f9be5..fcba313f1894 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1042,21 +1042,21 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 static void br_multicast_add_router(struct net_bridge *br,
 				    struct net_bridge_port *port)
 {
-	struct hlist_node *p;
-	struct hlist_node **h;
-
-	for (h = &br->router_list.first;
-	     (p = *h) &&
-	     (unsigned long)container_of(p, struct net_bridge_port, rlist) >
-	     (unsigned long)port;
-	     h = &p->next)
-		;
-
-	port->rlist.pprev = h;
-	port->rlist.next = p;
-	rcu_assign_pointer(*h, &port->rlist);
-	if (p)
-		p->pprev = &port->rlist.next;
+	struct net_bridge_port *p;
+	struct hlist_node *n, *last = NULL;
+
+	hlist_for_each_entry(p, n, &br->router_list, rlist) {
+		if ((unsigned long) port >= (unsigned long) p) {
+			hlist_add_before_rcu(n, &port->rlist);
+			return;
+		}
+		last = n;
+	}
+
+	if (last)
+		hlist_add_after_rcu(last, &port->rlist);
+	else
+		hlist_add_head_rcu(&port->rlist, &br->router_list);
 }
 
 static void br_multicast_mark_router(struct net_bridge *br,
-- 
cgit v1.2.3-59-g8ed1b


From 6c37e5de456987f5bc80879afde05aa120784095 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fleitner@redhat.com>
Date: Mon, 26 Apr 2010 18:33:27 +0000
Subject: TCP: avoid to send keepalive probes if receiving data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

RFC 1122 says the following:
...
  Keep-alive packets MUST only be sent when no data or
  acknowledgement packets have been received for the
  connection within an interval.
...

The acknowledgement packet is reseting the keepalive
timer but the data packet isn't. This patch fixes it by
checking the timestamp of the last received data packet
too when the keepalive timer expires.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 8 ++++++++
 net/ipv4/tcp.c       | 2 +-
 net/ipv4/tcp_timer.c | 4 ++--
 3 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 3f87fd87bc9c..fb5c66b2ab81 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1033,6 +1033,14 @@ static inline int keepalive_probes(const struct tcp_sock *tp)
 	return tp->keepalive_probes ? : sysctl_tcp_keepalive_probes;
 }
 
+static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
+{
+	const struct inet_connection_sock *icsk = &tp->inet_conn;
+
+	return min_t(u32, tcp_time_stamp - icsk->icsk_ack.lrcvtime,
+			  tcp_time_stamp - tp->rcv_tstamp);
+}
+
 static inline int tcp_fin_time(const struct sock *sk)
 {
 	int fin_timeout = tcp_sk(sk)->linger2 ? : sysctl_tcp_fin_timeout;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6689c61cab47..8ce29747ad9b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2298,7 +2298,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			if (sock_flag(sk, SOCK_KEEPOPEN) &&
 			    !((1 << sk->sk_state) &
 			      (TCPF_CLOSE | TCPF_LISTEN))) {
-				__u32 elapsed = tcp_time_stamp - tp->rcv_tstamp;
+				u32 elapsed = keepalive_time_elapsed(tp);
 				if (tp->keepalive_time > elapsed)
 					elapsed = tp->keepalive_time - elapsed;
 				else
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c732be00606b..440a5c6004f6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -517,7 +517,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	struct sock *sk = (struct sock *) data;
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
-	__u32 elapsed;
+	u32 elapsed;
 
 	/* Only process if socket is not in use. */
 	bh_lock_sock(sk);
@@ -554,7 +554,7 @@ static void tcp_keepalive_timer (unsigned long data)
 	if (tp->packets_out || tcp_send_head(sk))
 		goto resched;
 
-	elapsed = tcp_time_stamp - tp->rcv_tstamp;
+	elapsed = keepalive_time_elapsed(tp);
 
 	if (elapsed >= keepalive_time_when(tp)) {
 		if (icsk->icsk_probes_out >= keepalive_probes(tp)) {
-- 
cgit v1.2.3-59-g8ed1b


From f7917af92024d43bc20bc1afc92de27b0bd0f50b Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 27 Apr 2010 00:26:34 +0200
Subject: mac80211: fix handling of 4-address-mode in ieee80211_change_iface

A misplaced interface type check bails out too early if the interface
is not in monitor mode. This patch moves it to the right place, so that
it only covers changes to the monitor flags.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f97dda735cbb..592f07d2ae5c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -96,9 +96,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 					    params->mesh_id_len,
 					    params->mesh_id);
 
-	if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !flags)
-		return 0;
-
 	if (type == NL80211_IFTYPE_AP_VLAN &&
 	    params && params->use_4addr == 0)
 		rcu_assign_pointer(sdata->u.vlan.sta, NULL);
@@ -106,7 +103,9 @@ static int ieee80211_change_iface(struct wiphy *wiphy,
 		 params && params->use_4addr >= 0)
 		sdata->u.mgd.use_4addr = params->use_4addr;
 
-	sdata->u.mntr_flags = *flags;
+	if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags)
+		sdata->u.mntr_flags = *flags;
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From fd8aaaf3519f3fd3c82594e90bc6808072b94d54 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 27 Apr 2010 01:23:35 +0200
Subject: cfg80211: add ap isolation support

This is used to configure APs to not bridge traffic between connected stations.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 5 +++++
 include/net/cfg80211.h  | 2 ++
 net/wireless/nl80211.c  | 4 ++++
 3 files changed, 11 insertions(+)

(limited to 'net')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 2ea3edeee7aa..f8750f9a65b8 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -709,6 +709,9 @@ enum nl80211_commands {
  *	NL80211_CMD_AUTHENTICATE, NL80211_CMD_DEAUTHENTICATE,
  *	NL80211_CMD_DISASSOCIATE.
  *
+ * @NL80211_ATTR_AP_ISOLATE: (AP mode) Do not forward traffic between stations
+ *	connected to this BSS.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -864,6 +867,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_LOCAL_STATE_CHANGE,
 
+	NL80211_ATTR_AP_ISOLATE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5a4efe54cffd..f6b29bf925d0 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -511,6 +511,7 @@ struct mpath_info {
  * @basic_rates: basic rates in IEEE 802.11 format
  *	(or NULL for no change)
  * @basic_rates_len: number of basic rates
+ * @ap_isolate: do not forward packets between connected stations
  */
 struct bss_parameters {
 	int use_cts_prot;
@@ -518,6 +519,7 @@ struct bss_parameters {
 	int use_short_slot_time;
 	u8 *basic_rates;
 	u8 basic_rates_len;
+	int ap_isolate;
 };
 
 struct mesh_config {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index df5505b3930c..c27bef8e0c11 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -151,6 +151,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
+	[NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 },
 };
 
 /* policy for the attributes */
@@ -2441,6 +2442,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	params.use_cts_prot = -1;
 	params.use_short_preamble = -1;
 	params.use_short_slot_time = -1;
+	params.ap_isolate = -1;
 
 	if (info->attrs[NL80211_ATTR_BSS_CTS_PROT])
 		params.use_cts_prot =
@@ -2457,6 +2459,8 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 		params.basic_rates_len =
 			nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
 	}
+	if (info->attrs[NL80211_ATTR_AP_ISOLATE])
+		params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]);
 
 	rtnl_lock();
 
-- 
cgit v1.2.3-59-g8ed1b


From 7b7b5e56d7bdfdd0eb5ea13e6c9613b16b8eac46 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 27 Apr 2010 01:23:36 +0200
Subject: mac80211: implement ap isolation support

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/cfg.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 592f07d2ae5c..e13fb3a62239 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1113,6 +1113,13 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 		changed |= BSS_CHANGED_BASIC_RATES;
 	}
 
+	if (params->ap_isolate >= 0) {
+		if (params->ap_isolate)
+			sdata->flags |= IEEE80211_SDATA_DONT_BRIDGE_PACKETS;
+		else
+			sdata->flags &= ~IEEE80211_SDATA_DONT_BRIDGE_PACKETS;
+	}
+
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 195e294d21e88af879da4f88db2ceeb4ec28a755 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 27 Apr 2010 12:47:40 +0300
Subject: mac80211: Determine dynamic PS timeout based on ps-qos network
 latency

Determine the dynamic PS timeout based on the configured ps-qos network
latency. For backwards wext compatibility, allow the dynamic PS timeout
configured by the cfg80211 to overrule the automatically determined value.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  5 ++++-
 net/mac80211/cfg.c     |  4 ++--
 net/mac80211/main.c    |  2 ++
 net/mac80211/mlme.c    | 21 +++++++++++++++++++++
 4 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7dc5a6790397..c26de6cb12f8 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -668,6 +668,9 @@ enum ieee80211_smps_mode {
  * @dynamic_ps_timeout: The dynamic powersave timeout (in ms), see the
  *	powersave documentation below. This variable is valid only when
  *	the CONF_PS flag is set.
+ * @dynamic_ps_forced_timeout: The dynamic powersave timeout (in ms) configured
+ *	by cfg80211 (essentially, wext) If set, this value overrules the value
+ *	chosen by mac80211 based on ps qos network latency.
  *
  * @power_level: requested transmit power (in dBm)
  *
@@ -687,7 +690,7 @@ enum ieee80211_smps_mode {
  */
 struct ieee80211_conf {
 	u32 flags;
-	int power_level, dynamic_ps_timeout;
+	int power_level, dynamic_ps_timeout, dynamic_ps_forced_timeout;
 	int max_sleep_period;
 
 	u16 listen_interval;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e13fb3a62239..b575a5066219 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1404,11 +1404,11 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
 		return -EOPNOTSUPP;
 
 	if (enabled == sdata->u.mgd.powersave &&
-	    timeout == conf->dynamic_ps_timeout)
+	    timeout == conf->dynamic_ps_forced_timeout)
 		return 0;
 
 	sdata->u.mgd.powersave = enabled;
-	conf->dynamic_ps_timeout = timeout;
+	conf->dynamic_ps_forced_timeout = timeout;
 
 	/* no change, but if automatic follow powersave */
 	mutex_lock(&sdata->u.mgd.mtx);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 4afe851cf8dc..ebcca0eaf1dc 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -569,6 +569,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	local->hw.conf.listen_interval = local->hw.max_listen_interval;
 
+	local->hw.conf.dynamic_ps_forced_timeout = -1;
+
 	result = sta_info_start(local);
 	if (result < 0)
 		goto fail_sta_info;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d811e3fa1d75..2d1a2bef366d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -475,6 +475,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 {
 	struct ieee80211_sub_if_data *sdata, *found = NULL;
 	int count = 0;
+	int timeout;
 
 	if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) {
 		local->ps_sdata = NULL;
@@ -508,6 +509,26 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		beaconint_us = ieee80211_tu_to_usec(
 					found->vif.bss_conf.beacon_int);
 
+		timeout = local->hw.conf.dynamic_ps_forced_timeout;
+		if (timeout < 0) {
+			/*
+			 * The 2 second value is there for compatibility until
+			 * the PM_QOS_NETWORK_LATENCY is configured with real
+			 * values.
+			 */
+			if (latency == 2000000000)
+				timeout = 100;
+			else if (latency <= 50000)
+				timeout = 300;
+			else if (latency <= 100000)
+				timeout = 100;
+			else if (latency <= 500000)
+				timeout = 50;
+			else
+				timeout = 0;
+		}
+		local->hw.conf.dynamic_ps_timeout = timeout;
+
 		if (beaconint_us > latency) {
 			local->ps_sdata = NULL;
 		} else {
-- 
cgit v1.2.3-59-g8ed1b


From 9043f3b89abebfbfe4b8d64c7b71b9ac0b9eaa0b Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 27 Apr 2010 12:47:41 +0300
Subject: cfg80211: Remove default dynamic PS timeout value

Now that the mac80211 is choosing dynamic ps timeouts based on the ps-qos
network latency configuration, configure a default value of -1 as the dynamic
ps timeout in cfg80211. This value allows the mac80211 to determine the value
to be used.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 2 ++
 net/wireless/core.c    | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f6b29bf925d0..7d10c0182f53 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1020,6 +1020,8 @@ struct cfg80211_pmksa {
  *	RSN IE. It allows for faster roaming between WPA2 BSSIDs.
  * @del_pmksa: Delete a cached PMKID.
  * @flush_pmksa: Flush all cached PMKIDs.
+ * @set_power_mgmt: Configure WLAN power management. A timeout value of -1
+ *	allows the driver to adjust the dynamic ps timeout value.
  * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold.
  *
  */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 7fdb9409ad2a..40cbbbfbccbf 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -704,7 +704,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb,
 			wdev->ps = true;
 		else
 			wdev->ps = false;
-		wdev->ps_timeout = 100;
+		/* allow mac80211 to determine the timeout */
+		wdev->ps_timeout = -1;
 		if (rdev->ops->set_power_mgmt)
 			if (rdev->ops->set_power_mgmt(wdev->wiphy, dev,
 						      wdev->ps,
-- 
cgit v1.2.3-59-g8ed1b


From a060bbfe4ee95d115e8f9705a66894ac34e2c475 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 27 Apr 2010 11:59:34 +0200
Subject: mac80211: give virtual interface to hw_scan

When scanning, it is somewhat important to scan
on the correct virtual interface. All drivers
that currently implement hw_scan only support a
single virtual interface, but that may change
and then we'd want to be ready.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/at76c50x-usb.c       | 1 +
 drivers/net/wireless/iwlwifi/iwl-core.h   | 4 +++-
 drivers/net/wireless/iwlwifi/iwl-scan.c   | 3 ++-
 drivers/net/wireless/mac80211_hwsim.c     | 1 +
 drivers/net/wireless/wl12xx/wl1251_main.c | 1 +
 drivers/net/wireless/wl12xx/wl1271_main.c | 1 +
 include/net/mac80211.h                    | 2 +-
 net/mac80211/driver-ops.h                 | 5 +++--
 net/mac80211/driver-trace.h               | 9 ++++++---
 net/mac80211/scan.c                       | 4 ++--
 10 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c
index 0fb419936dff..7a626d4e100f 100644
--- a/drivers/net/wireless/at76c50x-usb.c
+++ b/drivers/net/wireless/at76c50x-usb.c
@@ -1889,6 +1889,7 @@ static void at76_dwork_hw_scan(struct work_struct *work)
 }
 
 static int at76_hw_scan(struct ieee80211_hw *hw,
+			struct ieee80211_vif *vif,
 			struct cfg80211_scan_request *req)
 {
 	struct at76_priv *priv = hw->priv;
diff --git a/drivers/net/wireless/iwlwifi/iwl-core.h b/drivers/net/wireless/iwlwifi/iwl-core.h
index a0cc11ecbe93..727360944859 100644
--- a/drivers/net/wireless/iwlwifi/iwl-core.h
+++ b/drivers/net/wireless/iwlwifi/iwl-core.h
@@ -512,7 +512,9 @@ static inline __le32 iwl_hw_set_rate_n_flags(u8 rate, u32 flags)
 void iwl_init_scan_params(struct iwl_priv *priv);
 int iwl_scan_cancel(struct iwl_priv *priv);
 int iwl_scan_cancel_timeout(struct iwl_priv *priv, unsigned long ms);
-int iwl_mac_hw_scan(struct ieee80211_hw *hw, struct cfg80211_scan_request *req);
+int iwl_mac_hw_scan(struct ieee80211_hw *hw,
+		    struct ieee80211_vif *vif,
+		    struct cfg80211_scan_request *req);
 void iwl_internal_short_hw_scan(struct iwl_priv *priv);
 int iwl_force_reset(struct iwl_priv *priv, int mode);
 u16 iwl_fill_probe_req(struct iwl_priv *priv, struct ieee80211_mgmt *frame,
diff --git a/drivers/net/wireless/iwlwifi/iwl-scan.c b/drivers/net/wireless/iwlwifi/iwl-scan.c
index 1bc1d29f2a07..447c3018296d 100644
--- a/drivers/net/wireless/iwlwifi/iwl-scan.c
+++ b/drivers/net/wireless/iwlwifi/iwl-scan.c
@@ -312,7 +312,8 @@ static int iwl_scan_initiate(struct iwl_priv *priv)
 }
 
 int iwl_mac_hw_scan(struct ieee80211_hw *hw,
-		     struct cfg80211_scan_request *req)
+		    struct ieee80211_vif *vif,
+		    struct cfg80211_scan_request *req)
 {
 	struct iwl_priv *priv = hw->priv;
 	int ret;
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 79bb8833ddb9..d016933d7573 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -973,6 +973,7 @@ static void hw_scan_done(struct work_struct *work)
 }
 
 static int mac80211_hwsim_hw_scan(struct ieee80211_hw *hw,
+				  struct ieee80211_vif *vif,
 				  struct cfg80211_scan_request *req)
 {
 	struct hw_scan_done *hsd = kzalloc(sizeof(*hsd), GFP_KERNEL);
diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c
index 390cee7e61a1..4d3be80c220e 100644
--- a/drivers/net/wireless/wl12xx/wl1251_main.c
+++ b/drivers/net/wireless/wl12xx/wl1251_main.c
@@ -856,6 +856,7 @@ out:
 }
 
 static int wl1251_op_hw_scan(struct ieee80211_hw *hw,
+			     struct ieee80211_vif *vif,
 			     struct cfg80211_scan_request *req)
 {
 	struct wl1251 *wl = hw->priv;
diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c
index 283d5dade1ae..08ff6447dcd9 100644
--- a/drivers/net/wireless/wl12xx/wl1271_main.c
+++ b/drivers/net/wireless/wl12xx/wl1271_main.c
@@ -1523,6 +1523,7 @@ out:
 }
 
 static int wl1271_op_hw_scan(struct ieee80211_hw *hw,
+			     struct ieee80211_vif *vif,
 			     struct cfg80211_scan_request *req)
 {
 	struct wl1271 *wl = hw->priv;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c26de6cb12f8..a36e0df5a17c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1657,7 +1657,7 @@ struct ieee80211_ops {
 				struct ieee80211_key_conf *conf,
 				struct ieee80211_sta *sta,
 				u32 iv32, u16 *phase1key);
-	int (*hw_scan)(struct ieee80211_hw *hw,
+	int (*hw_scan)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       struct cfg80211_scan_request *req);
 	void (*sw_scan_start)(struct ieee80211_hw *hw);
 	void (*sw_scan_complete)(struct ieee80211_hw *hw);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index d1f8a7c2225a..997008e236ff 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -154,14 +154,15 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
 }
 
 static inline int drv_hw_scan(struct ieee80211_local *local,
+			      struct ieee80211_sub_if_data *sdata,
 			      struct cfg80211_scan_request *req)
 {
 	int ret;
 
 	might_sleep();
 
-	ret = local->ops->hw_scan(&local->hw, req);
-	trace_drv_hw_scan(local, req, ret);
+	ret = local->ops->hw_scan(&local->hw, &sdata->vif, req);
+	trace_drv_hw_scan(local, sdata, req, ret);
 	return ret;
 }
 
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index e209cb82ff29..ce734b58d07a 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -363,23 +363,26 @@ TRACE_EVENT(drv_update_tkip_key,
 
 TRACE_EVENT(drv_hw_scan,
 	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
 		 struct cfg80211_scan_request *req, int ret),
 
-	TP_ARGS(local, req, ret),
+	TP_ARGS(local, sdata, req, ret),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
+		VIF_ENTRY
 		__field(int, ret)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
+		VIF_ASSIGN;
 		__entry->ret = ret;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " ret:%d",
-		LOCAL_PR_ARG, __entry->ret
+		LOCAL_PR_FMT VIF_PR_FMT " ret:%d",
+		LOCAL_PR_ARG,VIF_PR_ARG, __entry->ret
 	)
 );
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index eb86a5f6e645..2b1f1f3d6a58 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -410,7 +410,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 
 	if (local->ops->hw_scan) {
 		WARN_ON(!ieee80211_prep_hw_scan(local));
-		rc = drv_hw_scan(local, local->hw_scan_req);
+		rc = drv_hw_scan(local, sdata, local->hw_scan_req);
 	} else
 		rc = ieee80211_start_sw_scan(local);
 
@@ -654,7 +654,7 @@ void ieee80211_scan_work(struct work_struct *work)
 	}
 
 	if (local->hw_scan_req) {
-		int rc = drv_hw_scan(local, local->hw_scan_req);
+		int rc = drv_hw_scan(local, sdata, local->hw_scan_req);
 		mutex_unlock(&local->scan_mtx);
 		if (rc)
 			ieee80211_scan_completed(&local->hw, true);
-- 
cgit v1.2.3-59-g8ed1b


From a2c40249a36d0b4d76d1caf6bf806e4ae5b06e8a Mon Sep 17 00:00:00 2001
From: Shanyu Zhao <shanyu.zhao@intel.com>
Date: Tue, 27 Apr 2010 11:15:12 -0700
Subject: mac80211: fix rts threshold check

Currently whenever rts thresold is set, every packet will use RTS
protection no matter its size exceeds the threshold or not. This is
due to a bug in the rts threshold check.
	if (len > tx->local->hw.wiphy->rts_threshold) {
		txrc.rts = rts = true;
	}
Basically it is comparing an int (len) and a u32 (rts_threshold),
and the variable len is assigned as:
	len = min_t(int, tx->skb->len + FCS_LEN,
			 tx->local->hw.wiphy->frag_threshold);
However, when frag_threshold is "-1", len is always "-1", which is
0xffffffff therefore rts is always set to true.

CC: stable@kernel.org
Signed-off-by: Shanyu Zhao <shanyu.zhao@intel.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e2aa972d584f..f3841f43249e 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -593,7 +593,8 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	struct ieee80211_hdr *hdr = (void *)tx->skb->data;
 	struct ieee80211_supported_band *sband;
 	struct ieee80211_rate *rate;
-	int i, len;
+	int i;
+	u32 len;
 	bool inval = false, rts = false, short_preamble = false;
 	struct ieee80211_tx_rate_control txrc;
 	u32 sta_flags;
@@ -602,7 +603,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 
 	sband = tx->local->hw.wiphy->bands[tx->channel->band];
 
-	len = min_t(int, tx->skb->len + FCS_LEN,
+	len = min_t(u32, tx->skb->len + FCS_LEN,
 			 tx->local->hw.wiphy->frag_threshold);
 
 	/* set up the tx rate control struct we give the RC algo */
-- 
cgit v1.2.3-59-g8ed1b


From a9cbd588fdb71ea415754c885e2f9f03e6bf1ba0 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 26 Apr 2010 23:06:24 +0000
Subject: net: reimplement softnet_data.output_queue as a FIFO queue

reimplement softnet_data.output_queue as a FIFO queue to keep the
fairness among the qdiscs rescheduled.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
----
 include/linux/netdevice.h |    1 +
 net/core/dev.c            |   22 ++++++++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 22 ++++++++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3c5ed5f5274e..c04ca246395d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1385,6 +1385,7 @@ static inline int unregister_gifconf(unsigned int family)
  */
 struct softnet_data {
 	struct Qdisc		*output_queue;
+	struct Qdisc		**output_queue_tailp;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 4d43f1a80f74..3d314919a2cf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1557,8 +1557,9 @@ static inline void __netif_reschedule(struct Qdisc *q)
 
 	local_irq_save(flags);
 	sd = &__get_cpu_var(softnet_data);
-	q->next_sched = sd->output_queue;
-	sd->output_queue = q;
+	q->next_sched = NULL;
+	*sd->output_queue_tailp = q;
+	sd->output_queue_tailp = &q->next_sched;
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_restore(flags);
 }
@@ -2529,6 +2530,7 @@ static void net_tx_action(struct softirq_action *h)
 		local_irq_disable();
 		head = sd->output_queue;
 		sd->output_queue = NULL;
+		sd->output_queue_tailp = &sd->output_queue;
 		local_irq_enable();
 
 		while (head) {
@@ -5594,7 +5596,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 			    void *ocpu)
 {
 	struct sk_buff **list_skb;
-	struct Qdisc **list_net;
 	struct sk_buff *skb;
 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
 	struct softnet_data *sd, *oldsd;
@@ -5615,13 +5616,13 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	*list_skb = oldsd->completion_queue;
 	oldsd->completion_queue = NULL;
 
-	/* Find end of our output_queue. */
-	list_net = &sd->output_queue;
-	while (*list_net)
-		list_net = &(*list_net)->next_sched;
 	/* Append output queue from offline CPU. */
-	*list_net = oldsd->output_queue;
-	oldsd->output_queue = NULL;
+	if (oldsd->output_queue) {
+		*sd->output_queue_tailp = oldsd->output_queue;
+		sd->output_queue_tailp = oldsd->output_queue_tailp;
+		oldsd->output_queue = NULL;
+		oldsd->output_queue_tailp = &oldsd->output_queue;
+	}
 
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_enable();
@@ -5851,7 +5852,8 @@ static int __init net_dev_init(void)
 		skb_queue_head_init(&sd->input_pkt_queue);
 		sd->completion_queue = NULL;
 		INIT_LIST_HEAD(&sd->poll_list);
-
+		sd->output_queue = NULL;
+		sd->output_queue_tailp = &sd->output_queue;
 #ifdef CONFIG_RPS
 		sd->csd.func = rps_trigger_softirq;
 		sd->csd.info = sd;
-- 
cgit v1.2.3-59-g8ed1b


From c58dc01babfd58ec9e71a6ce080150dc27755d88 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Apr 2010 15:05:31 -0700
Subject: net: Make RFS socket operations not be inet specific.

Idea from Eric Dumazet.

As for placement inside of struct sock, I tried to choose a place
that otherwise has a 32-bit hole on 64-bit systems.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/net/inet_sock.h | 37 -------------------------------------
 include/net/sock.h      | 38 ++++++++++++++++++++++++++++++++++++++
 net/ipv4/af_inet.c      |  8 ++++----
 net/ipv4/tcp_ipv4.c     |  2 +-
 net/ipv4/udp.c          |  4 ++--
 5 files changed, 45 insertions(+), 44 deletions(-)

(limited to 'net')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index c1d42957b86b..1653de515cee 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -102,7 +102,6 @@ struct rtable;
  * @uc_ttl - Unicast TTL
  * @inet_sport - Source port
  * @inet_id - ID counter for DF pkts
- * @rxhash - flow hash received from netif layer
  * @tos - TOS
  * @mc_ttl - Multicasting TTL
  * @is_icsk - is this an inet_connection_sock?
@@ -126,9 +125,6 @@ struct inet_sock {
 	__u16			cmsg_flags;
 	__be16			inet_sport;
 	__u16			inet_id;
-#ifdef CONFIG_RPS
-	__u32			rxhash;
-#endif
 
 	struct ip_options	*opt;
 	__u8			tos;
@@ -224,37 +220,4 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
 	return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0;
 }
 
-static inline void inet_rps_record_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
-	struct rps_sock_flow_table *sock_flow_table;
-
-	rcu_read_lock();
-	sock_flow_table = rcu_dereference(rps_sock_flow_table);
-	rps_record_sock_flow(sock_flow_table, inet_sk(sk)->rxhash);
-	rcu_read_unlock();
-#endif
-}
-
-static inline void inet_rps_reset_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
-	struct rps_sock_flow_table *sock_flow_table;
-
-	rcu_read_lock();
-	sock_flow_table = rcu_dereference(rps_sock_flow_table);
-	rps_reset_sock_flow(sock_flow_table, inet_sk(sk)->rxhash);
-	rcu_read_unlock();
-#endif
-}
-
-static inline void inet_rps_save_rxhash(struct sock *sk, u32 rxhash)
-{
-#ifdef CONFIG_RPS
-	if (unlikely(inet_sk(sk)->rxhash != rxhash)) {
-		inet_rps_reset_flow(sk);
-		inet_sk(sk)->rxhash = rxhash;
-	}
-#endif
-}
 #endif	/* _INET_SOCK_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 4081db86a352..07822280d953 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -198,6 +198,7 @@ struct sock_common {
   *	@sk_rcvlowat: %SO_RCVLOWAT setting
   *	@sk_rcvtimeo: %SO_RCVTIMEO setting
   *	@sk_sndtimeo: %SO_SNDTIMEO setting
+  *	@sk_rxhash: flow hash received from netif layer
   *	@sk_filter: socket filtering instructions
   *	@sk_protinfo: private area, net family specific, when not using slab
   *	@sk_timer: sock cleanup timer
@@ -279,6 +280,9 @@ struct sock {
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	int			sk_rcvlowat;
+#ifdef CONFIG_RPS
+	__u32			sk_rxhash;
+#endif
 	unsigned long 		sk_flags;
 	unsigned long	        sk_lingertime;
 	struct sk_buff_head	sk_error_queue;
@@ -620,6 +624,40 @@ static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 	return sk->sk_backlog_rcv(sk, skb);
 }
 
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+	struct rps_sock_flow_table *sock_flow_table;
+
+	rcu_read_lock();
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	rps_record_sock_flow(sock_flow_table, sk->sk_rxhash);
+	rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_reset_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+	struct rps_sock_flow_table *sock_flow_table;
+
+	rcu_read_lock();
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash);
+	rcu_read_unlock();
+#endif
+}
+
+static inline void sock_rps_save_rxhash(struct sock *sk, u32 rxhash)
+{
+#ifdef CONFIG_RPS
+	if (unlikely(sk->sk_rxhash != rxhash)) {
+		sock_rps_reset_flow(sk);
+		sk->sk_rxhash = rxhash;
+	}
+#endif
+}
+
 #define sk_wait_event(__sk, __timeo, __condition)			\
 	({	int __rc;						\
 		release_sock(__sk);					\
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 9f52880fae10..c6c43bcd1c6f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -419,7 +419,7 @@ int inet_release(struct socket *sock)
 	if (sk) {
 		long timeout;
 
-		inet_rps_reset_flow(sk);
+		sock_rps_reset_flow(sk);
 
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
@@ -722,7 +722,7 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 {
 	struct sock *sk = sock->sk;
 
-	inet_rps_record_flow(sk);
+	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
@@ -737,7 +737,7 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 {
 	struct sock *sk = sock->sk;
 
-	inet_rps_record_flow(sk);
+	sock_rps_record_flow(sk);
 
 	/* We may need to bind the socket. */
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
@@ -755,7 +755,7 @@ int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	int addr_len = 0;
 	int err;
 
-	inet_rps_record_flow(sk);
+	sock_rps_record_flow(sk);
 
 	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
 				   flags & ~MSG_DONTWAIT, &addr_len);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4d6717d1e61c..771f8146a2e5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1672,7 +1672,7 @@ process:
 
 	skb->dev = NULL;
 
-	inet_rps_save_rxhash(sk, skb->rxhash);
+	sock_rps_save_rxhash(sk, skb->rxhash);
 
 	bh_lock_sock_nested(sk);
 	ret = 0;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1e18f9cc9247..fa3d2874db41 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1217,7 +1217,7 @@ int udp_disconnect(struct sock *sk, int flags)
 	sk->sk_state = TCP_CLOSE;
 	inet->inet_daddr = 0;
 	inet->inet_dport = 0;
-	inet_rps_save_rxhash(sk, 0);
+	sock_rps_save_rxhash(sk, 0);
 	sk->sk_bound_dev_if = 0;
 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 		inet_reset_saddr(sk);
@@ -1262,7 +1262,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	int rc;
 
 	if (inet_sk(sk)->inet_daddr)
-		inet_rps_save_rxhash(sk, skb->rxhash);
+		sock_rps_save_rxhash(sk, skb->rxhash);
 
 	rc = sock_queue_rcv_skb(sk, skb);
 	if (rc < 0) {
-- 
cgit v1.2.3-59-g8ed1b


From 6e7676c1a76aed6e957611d8d7a9e5592e23aeba Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 27 Apr 2010 15:07:33 -0700
Subject: net: batch skb dequeueing from softnet input_pkt_queue

batch skb dequeueing from softnet input_pkt_queue to reduce potential lock
contention when RPS is enabled.

Note: in the worst case, the number of packets in a softnet_data may
be double of netdev_max_backlog.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 +++--
 net/core/dev.c            | 57 ++++++++++++++++++++++++++++++++---------------
 2 files changed, 43 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c04ca246395d..40d4c20d034b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1388,6 +1388,7 @@ struct softnet_data {
 	struct Qdisc		**output_queue_tailp;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
+	struct sk_buff_head	process_queue;
 
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
@@ -1402,10 +1403,11 @@ struct softnet_data {
 	struct napi_struct	backlog;
 };
 
-static inline void input_queue_head_incr(struct softnet_data *sd)
+static inline void input_queue_head_add(struct softnet_data *sd,
+					unsigned int len)
 {
 #ifdef CONFIG_RPS
-	sd->input_queue_head++;
+	sd->input_queue_head += len;
 #endif
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 3d314919a2cf..100dcbd29739 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2408,12 +2408,13 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 	__get_cpu_var(netdev_rx_stat).total++;
 
 	rps_lock(sd);
-	if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (sd->input_pkt_queue.qlen) {
+	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
+		if (skb_queue_len(&sd->input_pkt_queue)) {
 enqueue:
 			__skb_queue_tail(&sd->input_pkt_queue, skb);
 #ifdef CONFIG_RPS
-			*qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
+			*qtail = sd->input_queue_head +
+					skb_queue_len(&sd->input_pkt_queue);
 #endif
 			rps_unlock(sd);
 			local_irq_restore(flags);
@@ -2934,13 +2935,21 @@ static void flush_backlog(void *arg)
 	struct sk_buff *skb, *tmp;
 
 	rps_lock(sd);
-	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
+	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
-			input_queue_head_incr(sd);
+			input_queue_head_add(sd, 1);
 		}
+	}
 	rps_unlock(sd);
+
+	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
+		if (skb->dev == dev) {
+			__skb_unlink(skb, &sd->process_queue);
+			kfree_skb(skb);
+		}
+	}
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -3286,24 +3295,33 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	}
 #endif
 	napi->weight = weight_p;
-	do {
+	local_irq_disable();
+	while (work < quota) {
 		struct sk_buff *skb;
+		unsigned int qlen;
+
+		while ((skb = __skb_dequeue(&sd->process_queue))) {
+			local_irq_enable();
+			__netif_receive_skb(skb);
+			if (++work >= quota)
+				return work;
+			local_irq_disable();
+		}
 
-		local_irq_disable();
 		rps_lock(sd);
-		skb = __skb_dequeue(&sd->input_pkt_queue);
-		if (!skb) {
+		qlen = skb_queue_len(&sd->input_pkt_queue);
+		if (qlen) {
+			input_queue_head_add(sd, qlen);
+			skb_queue_splice_tail_init(&sd->input_pkt_queue,
+						   &sd->process_queue);
+		}
+		if (qlen < quota - work) {
 			__napi_complete(napi);
-			rps_unlock(sd);
-			local_irq_enable();
-			break;
+			quota = work + qlen;
 		}
-		input_queue_head_incr(sd);
 		rps_unlock(sd);
-		local_irq_enable();
-
-		__netif_receive_skb(skb);
-	} while (++work < quota);
+	}
+	local_irq_enable();
 
 	return work;
 }
@@ -5630,8 +5648,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
-		input_queue_head_incr(oldsd);
+		input_queue_head_add(oldsd, 1);
 	}
+	while ((skb = __skb_dequeue(&oldsd->process_queue)))
+		netif_rx(skb);
 
 	return NOTIFY_OK;
 }
@@ -5850,6 +5870,7 @@ static int __init net_dev_init(void)
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
 		skb_queue_head_init(&sd->input_pkt_queue);
+		skb_queue_head_init(&sd->process_queue);
 		sd->completion_queue = NULL;
 		INIT_LIST_HEAD(&sd->poll_list);
 		sd->output_queue = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From c377411f2494a931ff7facdbb3a6839b1266bcf6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 27 Apr 2010 15:13:20 -0700
Subject: net: sk_add_backlog() take rmem_alloc into account

Current socket backlog limit is not enough to really stop DDOS attacks,
because user thread spend many time to process a full backlog each
round, and user might crazy spin on socket lock.

We should add backlog size and receive_queue size (aka rmem_alloc) to
pace writers, and let user run without being slow down too much.

Introduce a sk_rcvqueues_full() helper, to avoid taking socket lock in
stress situations.

Under huge stress from a multiqueue/RPS enabled NIC, a single flow udp
receiver can now process ~200.000 pps (instead of ~100 pps before the
patch) on a 8 core machine.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 13 +++++++++++--
 net/core/sock.c    |  5 ++++-
 net/ipv4/udp.c     |  4 ++++
 net/ipv6/udp.c     |  8 ++++++++
 net/sctp/socket.c  |  3 ---
 5 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 07822280d953..cf12b1e61fa6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -256,7 +256,6 @@ struct sock {
 		struct sk_buff *head;
 		struct sk_buff *tail;
 		int len;
-		int limit;
 	} sk_backlog;
 	wait_queue_head_t	*sk_sleep;
 	struct dst_entry	*sk_dst_cache;
@@ -608,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 	skb->next = NULL;
 }
 
+/*
+ * Take into account size of receive queue and backlog queue
+ */
+static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
+{
+	unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
+
+	return qsize + skb->truesize > sk->sk_rcvbuf;
+}
+
 /* The per-socket spinlock must be held here. */
 static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
-	if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1))
+	if (sk_rcvqueues_full(sk, skb))
 		return -ENOBUFS;
 
 	__sk_add_backlog(sk, skb);
diff --git a/net/core/sock.c b/net/core/sock.c
index 58ebd146ce5a..51041759517e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 
 	skb->dev = NULL;
 
+	if (sk_rcvqueues_full(sk, skb)) {
+		atomic_inc(&sk->sk_drops);
+		goto discard_and_relse;
+	}
 	if (nested)
 		bh_lock_sock_nested(sk);
 	else
@@ -1885,7 +1889,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_allocation	=	GFP_KERNEL;
 	sk->sk_rcvbuf		=	sysctl_rmem_default;
 	sk->sk_sndbuf		=	sysctl_wmem_default;
-	sk->sk_backlog.limit	=	sk->sk_rcvbuf << 1;
 	sk->sk_state		=	TCP_CLOSE;
 	sk_set_socket(sk, sock);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index fa3d2874db41..63eb56b2d873 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1372,6 +1372,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 			goto drop;
 	}
 
+
+	if (sk_rcvqueues_full(sk, skb))
+		goto drop;
+
 	rc = 0;
 
 	bh_lock_sock(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2850e35cee3d..3ead20ad9d07 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -584,6 +584,10 @@ static void flush_stack(struct sock **stack, unsigned int count,
 
 		sk = stack[i];
 		if (skb1) {
+			if (sk_rcvqueues_full(sk, skb)) {
+				kfree_skb(skb1);
+				goto drop;
+			}
 			bh_lock_sock(sk);
 			if (!sock_owned_by_user(sk))
 				udpv6_queue_rcv_skb(sk, skb1);
@@ -759,6 +763,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 
 	/* deliver */
 
+	if (sk_rcvqueues_full(sk, skb)) {
+		sock_put(sk);
+		goto discard;
+	}
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk))
 		udpv6_queue_rcv_skb(sk, skb);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index f34adcca8a8c..13d8229f3a9c 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -3721,9 +3721,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
 	SCTP_DBG_OBJCNT_INC(sock);
 	percpu_counter_inc(&sctp_sockets_allocated);
 
-	/* Set socket backlog limit. */
-	sk->sk_backlog.limit = sysctl_sctp_rmem[1];
-
 	local_bh_disable();
 	sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 	local_bh_enable();
-- 
cgit v1.2.3-59-g8ed1b


From 05fceb4ad7e8bf809a2a97061d6273d27d1a8449 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 23 Apr 2010 01:40:47 +0000
Subject: net: disallow to use net_assign_generic externally

Now there's no need to use this fuction directly because it's handled by
register_pernet_device. So to make this simple and easy to understand,
make this static to do not tempt potentional users.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/generic.h |  9 +----
 net/core/net_namespace.c    | 91 ++++++++++++++++++++++-----------------------
 2 files changed, 47 insertions(+), 53 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/generic.h b/include/net/netns/generic.h
index ff4982ab84b6..81a31c0db3e7 100644
--- a/include/net/netns/generic.h
+++ b/include/net/netns/generic.h
@@ -14,11 +14,8 @@
  * The rules are simple:
  * 1. set pernet_operations->id.  After register_pernet_device you
  *    will have the id of your private pointer.
- * 2. Either set pernet_operations->size (to have the code allocate and
- *    free a private structure pointed to from struct net ) or 
- *    call net_assign_generic() to put the private data on the struct
- *    net (most preferably this should be done in the ->init callback
- *    of the ops registered);
+ * 2. set pernet_operations->size to have the code allocate and free
+ *    a private structure pointed to from struct net.
  * 3. do not change this pointer while the net is alive;
  * 4. do not try to have any private reference on the net_generic object.
  *
@@ -46,6 +43,4 @@ static inline void *net_generic(struct net *net, int id)
 
 	return ptr;
 }
-
-extern int net_assign_generic(struct net *net, int id, void *data);
 #endif
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 69a20bfc527c..c988e685433a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -27,6 +27,51 @@ EXPORT_SYMBOL(init_net);
 
 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
 
+static void net_generic_release(struct rcu_head *rcu)
+{
+	struct net_generic *ng;
+
+	ng = container_of(rcu, struct net_generic, rcu);
+	kfree(ng);
+}
+
+static int net_assign_generic(struct net *net, int id, void *data)
+{
+	struct net_generic *ng, *old_ng;
+
+	BUG_ON(!mutex_is_locked(&net_mutex));
+	BUG_ON(id == 0);
+
+	ng = old_ng = net->gen;
+	if (old_ng->len >= id)
+		goto assign;
+
+	ng = kzalloc(sizeof(struct net_generic) +
+			id * sizeof(void *), GFP_KERNEL);
+	if (ng == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Some synchronisation notes:
+	 *
+	 * The net_generic explores the net->gen array inside rcu
+	 * read section. Besides once set the net->gen->ptr[x]
+	 * pointer never changes (see rules in netns/generic.h).
+	 *
+	 * That said, we simply duplicate this array and schedule
+	 * the old copy for kfree after a grace period.
+	 */
+
+	ng->len = id;
+	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
+
+	rcu_assign_pointer(net->gen, ng);
+	call_rcu(&old_ng->rcu, net_generic_release);
+assign:
+	ng->ptr[id - 1] = data;
+	return 0;
+}
+
 static int ops_init(const struct pernet_operations *ops, struct net *net)
 {
 	int err;
@@ -526,49 +571,3 @@ void unregister_pernet_device(struct pernet_operations *ops)
 	mutex_unlock(&net_mutex);
 }
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
-
-static void net_generic_release(struct rcu_head *rcu)
-{
-	struct net_generic *ng;
-
-	ng = container_of(rcu, struct net_generic, rcu);
-	kfree(ng);
-}
-
-int net_assign_generic(struct net *net, int id, void *data)
-{
-	struct net_generic *ng, *old_ng;
-
-	BUG_ON(!mutex_is_locked(&net_mutex));
-	BUG_ON(id == 0);
-
-	ng = old_ng = net->gen;
-	if (old_ng->len >= id)
-		goto assign;
-
-	ng = kzalloc(sizeof(struct net_generic) +
-			id * sizeof(void *), GFP_KERNEL);
-	if (ng == NULL)
-		return -ENOMEM;
-
-	/*
-	 * Some synchronisation notes:
-	 *
-	 * The net_generic explores the net->gen array inside rcu
-	 * read section. Besides once set the net->gen->ptr[x]
-	 * pointer never changes (see rules in netns/generic.h).
-	 *
-	 * That said, we simply duplicate this array and schedule
-	 * the old copy for kfree after a grace period.
-	 */
-
-	ng->len = id;
-	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
-
-	rcu_assign_pointer(net->gen, ng);
-	call_rcu(&old_ng->rcu, net_generic_release);
-assign:
-	ng->ptr[id - 1] = data;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(net_assign_generic);
-- 
cgit v1.2.3-59-g8ed1b


From ff65e8275f6c96a5eda57493bd84c4555decf7b3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Apr 2010 16:26:49 -0700
Subject: bridge: Use hlist_for_each_entry_rcu() in br_multicast_add_router()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Noticed by Michał Mirosław.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index fcba313f1894..e29c9b738796 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1045,7 +1045,7 @@ static void br_multicast_add_router(struct net_bridge *br,
 	struct net_bridge_port *p;
 	struct hlist_node *n, *last = NULL;
 
-	hlist_for_each_entry(p, n, &br->router_list, rlist) {
+	hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
 		if ((unsigned long) port >= (unsigned long) p) {
 			hlist_add_before_rcu(n, &port->rlist);
 			return;
-- 
cgit v1.2.3-59-g8ed1b


From 709b9326ef6fc1b7e379d47d5f39ebc27dad3f4d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 27 Apr 2010 16:49:58 -0700
Subject: Revert "bridge: Use hlist_for_each_entry_rcu() in
 br_multicast_add_router()"

This reverts commit ff65e8275f6c96a5eda57493bd84c4555decf7b3.

As explained by Stephen Hemminger, the traversal doesn't require
RCU handling as we hold a lock.

The list addition et al. calls, on the other hand, do.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index e29c9b738796..fcba313f1894 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1045,7 +1045,7 @@ static void br_multicast_add_router(struct net_bridge *br,
 	struct net_bridge_port *p;
 	struct hlist_node *n, *last = NULL;
 
-	hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) {
+	hlist_for_each_entry(p, n, &br->router_list, rlist) {
 		if ((unsigned long) port >= (unsigned long) p) {
 			hlist_add_before_rcu(n, &port->rlist);
 			return;
-- 
cgit v1.2.3-59-g8ed1b


From 7e80c124485b73146deadce14fd4da2054581806 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 15:01:04 +0000
Subject: bridge: simplify multicast_add_router

By coding slightly differently, there are only two cases
to deal with: add at head and add after previous entry.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_multicast.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index fcba313f1894..d63868c9b2c0 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1039,22 +1039,25 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 }
 #endif
 
+/*
+ * Add port to rotuer_list
+ *  list is maintained ordered by pointer value
+ *  and locked by br->multicast_lock and RCU
+ */
 static void br_multicast_add_router(struct net_bridge *br,
 				    struct net_bridge_port *port)
 {
 	struct net_bridge_port *p;
-	struct hlist_node *n, *last = NULL;
+	struct hlist_node *n, *slot = NULL;
 
 	hlist_for_each_entry(p, n, &br->router_list, rlist) {
-		if ((unsigned long) port >= (unsigned long) p) {
-			hlist_add_before_rcu(n, &port->rlist);
-			return;
-		}
-		last = n;
+		if ((unsigned long) port >= (unsigned long) p)
+			break;
+		slot = n;
 	}
 
-	if (last)
-		hlist_add_after_rcu(last, &port->rlist);
+	if (slot)
+		hlist_add_after_rcu(slot, &port->rlist);
 	else
 		hlist_add_head_rcu(&port->rlist, &br->router_list);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 168d40ee3d147ae20860e7916bd79b636cbe8fd5 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 15:01:05 +0000
Subject: bridge: multicast flood

Fix unsafe usage of RCU. Would never work on Alpha SMP because
of lack of rcu_dereference()

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 7a241c396981..5b70fc012e40 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -216,7 +216,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 
 	prev = NULL;
 
-	rp = br->router_list.first;
+	rp = rcu_dereference(br->router_list.first);
 	p = mdst ? mdst->ports : NULL;
 	while (p || rp) {
 		lport = p ? p->port : NULL;
@@ -233,7 +233,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 		if ((unsigned long)lport >= (unsigned long)port)
 			p = p->next;
 		if ((unsigned long)rport >= (unsigned long)port)
-			rp = rp->next;
+			rp = rcu_dereference(rp->next);
 	}
 
 	if (!prev)
-- 
cgit v1.2.3-59-g8ed1b


From 83f6a740b4e52f88e312223df2fc94016a208618 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 15:01:06 +0000
Subject: bridge: multicast port group RCU fix

The recently introduced bridge mulitcast port group list was only
partially using RCU correctly. It was missing rcu_dereference()
and missing the necessary barrier on deletion.

The code should have used one of the standard list methods (list or hlist)
instead of open coding a RCU based link list.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c   | 4 ++--
 net/bridge/br_multicast.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 5b70fc012e40..5f9988a3f06a 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -217,7 +217,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 	prev = NULL;
 
 	rp = rcu_dereference(br->router_list.first);
-	p = mdst ? mdst->ports : NULL;
+	p = mdst ? rcu_dereference(mdst->ports) : NULL;
 	while (p || rp) {
 		lport = p ? p->port : NULL;
 		rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
@@ -231,7 +231,7 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 			goto out;
 
 		if ((unsigned long)lport >= (unsigned long)port)
-			p = p->next;
+			p = rcu_dereference(p->next);
 		if ((unsigned long)rport >= (unsigned long)port)
 			rp = rcu_dereference(rp->next);
 	}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d63868c9b2c0..7128abdce45f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -259,7 +259,7 @@ static void br_multicast_del_pg(struct net_bridge *br,
 		if (p != pg)
 			continue;
 
-		*pp = p->next;
+		rcu_assign_pointer(*pp, p->next);
 		hlist_del_init(&p->mglist);
 		del_timer(&p->timer);
 		del_timer(&p->query_timer);
-- 
cgit v1.2.3-59-g8ed1b


From afe0159d935ab731c682e811356914bb2be9470c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 27 Apr 2010 15:01:07 +0000
Subject: bridge: multicast_flood cleanup

Move some declarations around to make it clearer which variables
are being used inside loop.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_forward.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 5f9988a3f06a..396f077216a3 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -208,17 +208,15 @@ static void br_multicast_flood(struct net_bridge_mdb_entry *mdst,
 {
 	struct net_device *dev = BR_INPUT_SKB_CB(skb)->brdev;
 	struct net_bridge *br = netdev_priv(dev);
-	struct net_bridge_port *port;
-	struct net_bridge_port *lport, *rport;
-	struct net_bridge_port *prev;
+	struct net_bridge_port *prev = NULL;
 	struct net_bridge_port_group *p;
 	struct hlist_node *rp;
 
-	prev = NULL;
-
 	rp = rcu_dereference(br->router_list.first);
 	p = mdst ? rcu_dereference(mdst->ports) : NULL;
 	while (p || rp) {
+		struct net_bridge_port *port, *lport, *rport;
+
 		lport = p ? p->port : NULL;
 		rport = rp ? hlist_entry(rp, struct net_bridge_port, rlist) :
 			     NULL;
-- 
cgit v1.2.3-59-g8ed1b


From e539d83cc8a4fa581cbf8ed288fdadb19a692cb0 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:35 +0000
Subject: caif: Rename functions in cfcnfg and caif_dev

Changes:
 o Renamed cfcnfg_del_adapt_layer to cfcnfg_disconn_adapt_layer
 o Fixed typo cfcfg to cfcnfg
 o Renamed linkid to channel_id
 o Updated documentation in caif_dev.h
 o Minor formatting changes

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/caif_dev.h |  6 ++++--
 include/net/caif/cfcnfg.h   | 10 ++++-----
 net/caif/caif_dev.c         | 15 +++++++------
 net/caif/cfcnfg.c           | 51 ++++++++++++++++++++++-----------------------
 4 files changed, 41 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h
index 42a7c7867849..3aa1ff642323 100644
--- a/include/net/caif/caif_dev.h
+++ b/include/net/caif/caif_dev.h
@@ -23,17 +23,19 @@ struct caif_param {
 };
 
 /**
- * caif_connect_request - Request data for CAIF channel setup.
+ * struct caif_connect_request - Request data for CAIF channel setup.
+ * @protocol:		Type of CAIF protocol to use (at, datagram etc)
  * @sockaddr:		Socket address to connect.
  * @priority:		Priority of the connection.
  * @link_selector:	Link selector (high bandwidth or low latency)
  * @link_name:		Name of the CAIF Link Layer to use.
+ * @param:		Connect Request parameters (CAIF_SO_REQ_PARAM).
  *
  * This struct is used when connecting a CAIF channel.
  * It contains all CAIF channel configuration options.
  */
 struct caif_connect_request {
-	int protocol;
+	enum caif_protocol_type protocol;
 	struct sockaddr_caif sockaddr;
 	enum caif_channel_priority priority;
 	enum caif_link_selector link_selector;
diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h
index 366082c5d435..f16b875acc48 100644
--- a/include/net/caif/cfcnfg.h
+++ b/include/net/caif/cfcnfg.h
@@ -87,13 +87,14 @@ cfcnfg_add_phy_layer(struct cfcnfg *cnfg, enum cfcnfg_phy_type phy_type,
 int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer);
 
 /**
- * cfcnfg_del_adapt_layer - Deletes an adaptation layer from the CAIF stack.
+ * cfcnfg_disconn_adapt_layer - Disconnects an adaptation layer.
  *
  * @cnfg:	Pointer to a CAIF configuration object, created by
  *		cfcnfg_create().
  * @adap_layer: Adaptation layer to be removed.
  */
-int cfcnfg_del_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer);
+int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg,
+			struct cflayer *adap_layer);
 
 /**
  * cfcnfg_add_adaptation_layer - Add an adaptation layer to the CAIF stack.
@@ -102,14 +103,13 @@ int cfcnfg_del_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer);
  * driver functionality is implemented.
  *
  * @cnfg:		Pointer to a CAIF configuration object, created by
- *				cfcnfg_create().
+ *			cfcnfg_create().
  * @param:		Link setup parameters.
  * @adap_layer:		Specify the adaptation layer; the receive and
  *			flow-control functions MUST be set in the structure.
  *
  */
-int
-cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
+int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 			    struct cfctrl_link_param *param,
 			    struct cflayer *adap_layer);
 
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index e84837e1bc86..be1f674a3b67 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -330,20 +330,19 @@ int caif_connect_client(struct caif_connect_request *conn_req,
 			   struct cflayer *client_layer)
 {
 	struct cfctrl_link_param param;
-	if (connect_req_to_link_param(get_caif_conf(), conn_req, &param) == 0)
-		/* Hook up the adaptation layer. */
-		return cfcnfg_add_adaptation_layer(get_caif_conf(),
+	int ret;
+	ret = connect_req_to_link_param(get_caif_conf(), conn_req, &param);
+	if (ret)
+		return ret;
+	/* Hook up the adaptation layer. */
+	return cfcnfg_add_adaptation_layer(get_caif_conf(),
 						&param, client_layer);
-
-	return -EINVAL;
-
-	caif_assert(0);
 }
 EXPORT_SYMBOL(caif_connect_client);
 
 int caif_disconnect_client(struct cflayer *adap_layer)
 {
-	return cfcnfg_del_adapt_layer(get_caif_conf(), adap_layer);
+       return cfcnfg_disconn_adapt_layer(get_caif_conf(), adap_layer);
 }
 EXPORT_SYMBOL(caif_disconnect_client);
 
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index c873e3d4387c..d52f2566916e 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -51,12 +51,12 @@ struct cfcnfg {
 	struct cfcnfg_phyinfo phy_layers[MAX_PHY_LAYERS];
 };
 
-static void cncfg_linkup_rsp(struct cflayer *layer, u8 linkid,
+static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
 			     enum cfctrl_srv serv, u8 phyid,
 			     struct cflayer *adapt_layer);
-static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
+static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id,
 				  struct cflayer *client_layer);
-static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
+static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
 			     struct cflayer *adapt_layer);
 static void cfctrl_resp_func(void);
 static void cfctrl_enum_resp(void);
@@ -82,13 +82,13 @@ struct cfcnfg *cfcnfg_create(void)
 	resp = cfctrl_get_respfuncs(this->ctrl);
 	resp->enum_rsp = cfctrl_enum_resp;
 	resp->linkerror_ind = cfctrl_resp_func;
-	resp->linkdestroy_rsp = cncfg_linkdestroy_rsp;
+	resp->linkdestroy_rsp = cfcnfg_linkdestroy_rsp;
 	resp->sleep_rsp = cfctrl_resp_func;
 	resp->wake_rsp = cfctrl_resp_func;
 	resp->restart_rsp = cfctrl_resp_func;
 	resp->radioset_rsp = cfctrl_resp_func;
-	resp->linksetup_rsp = cncfg_linkup_rsp;
-	resp->reject_rsp = cncfg_reject_rsp;
+	resp->linksetup_rsp = cfcnfg_linkup_rsp;
+	resp->reject_rsp = cfcnfg_reject_rsp;
 
 	this->last_phyid = 1;
 
@@ -191,8 +191,7 @@ int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
  *	 4) Link-Error - (no response)
  *	      Not handled, but this should be a CAIF PROTOCOL ERROR
  */
-
-int cfcnfg_del_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
+int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
 {
 	u8 channel_id = 0;
 	int ret = 0;
@@ -246,9 +245,9 @@ end:
 	return ret;
 
 }
-EXPORT_SYMBOL(cfcnfg_del_adapt_layer);
+EXPORT_SYMBOL(cfcnfg_disconn_adapt_layer);
 
-static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
+static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id,
 				  struct cflayer *client_layer)
 {
 	struct cfcnfg *cnfg = container_obj(layer);
@@ -258,20 +257,20 @@ static void cncfg_linkdestroy_rsp(struct cflayer *layer, u8 linkid,
 	 * 1) Remove service from the MUX layer. The MUX must
 	 *    guarante that no more payload sent "upwards" (receive)
 	 */
-	servl = cfmuxl_remove_uplayer(cnfg->mux, linkid);
+	servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id);
 
 	if (servl == NULL) {
 		pr_err("CAIF: %s(): PROTOCOL ERROR "
-		       "- Error removing service_layer Linkid(%d)",
-			__func__, linkid);
+		       "- Error removing service_layer Channel_Id(%d)",
+			__func__, channel_id);
 		return;
 	}
-	caif_assert(linkid == servl->id);
+	caif_assert(channel_id == servl->id);
 
 	if (servl != client_layer && servl->up != client_layer) {
 		pr_err("CAIF: %s(): Error removing service_layer "
-		       "Linkid(%d) %p %p",
-			__func__, linkid, (void *) servl,
+		       "Channel_Id(%d) %p %p",
+			__func__, channel_id, (void *) servl,
 			(void *) client_layer);
 		return;
 	}
@@ -345,7 +344,7 @@ cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 }
 EXPORT_SYMBOL(cfcnfg_add_adaptation_layer);
 
-static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
+static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
 			     struct cflayer *adapt_layer)
 {
 	if (adapt_layer != NULL && adapt_layer->ctrlcmd != NULL)
@@ -354,7 +353,7 @@ static void cncfg_reject_rsp(struct cflayer *layer, u8 linkid,
 }
 
 static void
-cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv,
+cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 		 u8 phyid, struct cflayer *adapt_layer)
 {
 	struct cfcnfg *cnfg = container_obj(layer);
@@ -383,26 +382,26 @@ cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv,
 					     _CAIF_MODEMCMD_PHYIF_USEFULL);
 
 	}
-	adapt_layer->id = linkid;
+	adapt_layer->id = channel_id;
 
 	switch (serv) {
 	case CFCTRL_SRV_VEI:
-		servicel = cfvei_create(linkid, &phyinfo->dev_info);
+		servicel = cfvei_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_DATAGRAM:
-		servicel = cfdgml_create(linkid, &phyinfo->dev_info);
+		servicel = cfdgml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_RFM:
-		servicel = cfrfml_create(linkid, &phyinfo->dev_info);
+		servicel = cfrfml_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_UTIL:
-		servicel = cfutill_create(linkid, &phyinfo->dev_info);
+		servicel = cfutill_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_VIDEO:
-		servicel = cfvidl_create(linkid, &phyinfo->dev_info);
+		servicel = cfvidl_create(channel_id, &phyinfo->dev_info);
 		break;
 	case CFCTRL_SRV_DBG:
-		servicel = cfdbgl_create(linkid, &phyinfo->dev_info);
+		servicel = cfdbgl_create(channel_id, &phyinfo->dev_info);
 		break;
 	default:
 		pr_err("CAIF: %s(): Protocol error. "
@@ -415,7 +414,7 @@ cncfg_linkup_rsp(struct cflayer *layer, u8 linkid, enum cfctrl_srv serv,
 		return;
 	}
 	layer_set_dn(servicel, cnfg->mux);
-	cfmuxl_set_uplayer(cnfg->mux, servicel, linkid);
+	cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id);
 	layer_set_up(servicel, adapt_layer);
 	layer_set_dn(adapt_layer, servicel);
 	servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0);
-- 
cgit v1.2.3-59-g8ed1b


From 5b2086567503f9b55136642031ec0067319f58e0 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:36 +0000
Subject: caif: Add reference counting to service layer

Changes:
o Added functions cfsrvl_get and cfsrvl_put.
o Added support release_client to use by socket and net device.
o Increase reference counting for in-flight packets from cfmuxl

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/caif_dev.h | 11 +++++++++++
 include/net/caif/cfcnfg.h   |  7 +++++++
 include/net/caif/cfsrvl.h   | 22 ++++++++++++++++++++++
 net/caif/caif_dev.c         |  6 ++++++
 net/caif/cfcnfg.c           |  7 +++++++
 net/caif/cfmuxl.c           |  7 ++++++-
 net/caif/cfsrvl.c           |  7 +++++++
 7 files changed, 66 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/caif/caif_dev.h b/include/net/caif/caif_dev.h
index 3aa1ff642323..318ab9478a44 100644
--- a/include/net/caif/caif_dev.h
+++ b/include/net/caif/caif_dev.h
@@ -69,6 +69,17 @@ int caif_connect_client(struct caif_connect_request *config,
  */
 int caif_disconnect_client(struct cflayer *client_layer);
 
+/**
+ * caif_release_client - Release adaptation layer reference to client.
+ *
+ * @client_layer: Client layer.
+ *
+ * Releases a client/adaptation layer use of the caif stack.
+ * This function must be used after caif_disconnect_client to
+ * decrease the reference count of the service layer.
+ */
+void caif_release_client(struct cflayer *client_layer);
+
 /**
  * connect_req_to_link_param - Translate configuration parameters
  *				from socket format to internal format.
diff --git a/include/net/caif/cfcnfg.h b/include/net/caif/cfcnfg.h
index f16b875acc48..9fc2fc20b884 100644
--- a/include/net/caif/cfcnfg.h
+++ b/include/net/caif/cfcnfg.h
@@ -96,6 +96,13 @@ int cfcnfg_del_phy_layer(struct cfcnfg *cnfg, struct cflayer *phy_layer);
 int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg,
 			struct cflayer *adap_layer);
 
+/**
+ * cfcnfg_release_adap_layer - Used by client to release the adaptation layer.
+ *
+ * @adap_layer: Adaptation layer.
+ */
+void cfcnfg_release_adap_layer(struct cflayer *adap_layer);
+
 /**
  * cfcnfg_add_adaptation_layer - Add an adaptation layer to the CAIF stack.
  *
diff --git a/include/net/caif/cfsrvl.h b/include/net/caif/cfsrvl.h
index b2a12db20cd2..2dc9eb193ecf 100644
--- a/include/net/caif/cfsrvl.h
+++ b/include/net/caif/cfsrvl.h
@@ -9,14 +9,18 @@
 #include <linux/list.h>
 #include <linux/stddef.h>
 #include <linux/types.h>
+#include <linux/kref.h>
+
 struct cfsrvl {
 	struct cflayer layer;
 	bool open;
 	bool phy_flow_on;
 	bool modem_flow_on;
 	struct dev_info dev_info;
+	struct kref ref;
 };
 
+void cfsrvl_release(struct kref *kref);
 struct cflayer *cfvei_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfdgml_create(u8 linkid, struct dev_info *dev_info);
 struct cflayer *cfutill_create(u8 linkid, struct dev_info *dev_info);
@@ -31,4 +35,22 @@ void cfsrvl_init(struct cfsrvl *service,
 bool cfsrvl_ready(struct cfsrvl *service, int *err);
 u8 cfsrvl_getphyid(struct cflayer *layer);
 
+static inline void cfsrvl_get(struct cflayer *layr)
+{
+	struct cfsrvl *s;
+	if (layr == NULL)
+		return;
+	s = container_of(layr, struct cfsrvl, layer);
+	kref_get(&s->ref);
+}
+
+static inline void cfsrvl_put(struct cflayer *layr)
+{
+	struct cfsrvl *s;
+	if (layr == NULL)
+		return;
+	s = container_of(layr, struct cfsrvl, layer);
+	kref_put(&s->ref, cfsrvl_release);
+}
+
 #endif				/* CFSRVL_H_ */
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index be1f674a3b67..0145bae0274f 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -346,6 +346,12 @@ int caif_disconnect_client(struct cflayer *adap_layer)
 }
 EXPORT_SYMBOL(caif_disconnect_client);
 
+void caif_release_client(struct cflayer *adap_layer)
+{
+       cfcnfg_release_adap_layer(adap_layer);
+}
+EXPORT_SYMBOL(caif_release_client);
+
 /* Per-namespace Caif devices handling */
 static int caif_init_net(struct net *net)
 {
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index d52f2566916e..f94f3dfe85c1 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -247,6 +247,13 @@ end:
 }
 EXPORT_SYMBOL(cfcnfg_disconn_adapt_layer);
 
+void cfcnfg_release_adap_layer(struct cflayer *adap_layer)
+{
+	if (adap_layer->dn)
+		cfsrvl_put(adap_layer->dn);
+}
+EXPORT_SYMBOL(cfcnfg_release_adap_layer);
+
 static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id,
 				  struct cflayer *client_layer)
 {
diff --git a/net/caif/cfmuxl.c b/net/caif/cfmuxl.c
index 6fb9f9e96cf8..7372f27f1d32 100644
--- a/net/caif/cfmuxl.c
+++ b/net/caif/cfmuxl.c
@@ -62,6 +62,7 @@ int cfmuxl_set_uplayer(struct cflayer *layr, struct cflayer *up, u8 linkid)
 {
 	struct cfmuxl *muxl = container_obj(layr);
 	spin_lock(&muxl->receive_lock);
+	cfsrvl_get(up);
 	list_add(&up->node, &muxl->srvl_list);
 	spin_unlock(&muxl->receive_lock);
 	return 0;
@@ -172,8 +173,11 @@ struct cflayer *cfmuxl_remove_uplayer(struct cflayer *layr, u8 id)
 	struct cfmuxl *muxl = container_obj(layr);
 	spin_lock(&muxl->receive_lock);
 	up = get_up(muxl, id);
+	if (up == NULL)
+		return NULL;
 	memset(muxl->up_cache, 0, sizeof(muxl->up_cache));
 	list_del(&up->node);
+	cfsrvl_put(up);
 	spin_unlock(&muxl->receive_lock);
 	return up;
 }
@@ -203,8 +207,9 @@ static int cfmuxl_receive(struct cflayer *layr, struct cfpkt *pkt)
 		 */
 		return /* CFGLU_EPROT; */ 0;
 	}
-
+	cfsrvl_get(up);
 	ret = up->receive(up, pkt);
+	cfsrvl_put(up);
 	return ret;
 }
 
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index d470c51c6431..aff31f34528f 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -158,6 +158,13 @@ void cfsrvl_init(struct cfsrvl *service,
 	service->layer.ctrlcmd = cfservl_ctrlcmd;
 	service->layer.modemcmd = cfservl_modemcmd;
 	service->dev_info = *dev_info;
+	kref_init(&service->ref);
+}
+
+void cfsrvl_release(struct kref *kref)
+{
+	struct cfsrvl *service = container_of(kref, struct cfsrvl, ref);
+	kfree(service);
 }
 
 bool cfsrvl_ready(struct cfsrvl *service, int *err)
-- 
cgit v1.2.3-59-g8ed1b


From 8d545c8f958f5f433c50a00762ce1f231ed56eee Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:37 +0000
Subject: caif: Disconnect without waiting for response

Changes:
o Function cfcnfg_disconn_adapt_layer is changed to do asynchronous
  disconnect, not waiting for any response from the modem. Due to this
  the function cfcnfg_linkdestroy_rsp does nothing anymore.
o Because disconnect may take down a connection before a connect response
  is received the function cfcnfg_linkup_rsp is checking if the client is
  still waiting for the response, if not a disconnect request is sent to
  the modem.
o cfctrl is no longer keeping track of pending disconnect requests.
o Added function cfctrl_cancel_req, which is used for deleting a pending
  connect request if disconnect is done before connect response is received.
o Removed unused function cfctrl_insert_req2
o Added better handling of connect reject from modem.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/caif/cfctrl.h |   7 ++-
 net/caif/cfcnfg.c         | 155 ++++++++++++++--------------------------------
 net/caif/cfctrl.c         |  95 ++++++++++++++++++----------
 3 files changed, 111 insertions(+), 146 deletions(-)

(limited to 'net')

diff --git a/include/net/caif/cfctrl.h b/include/net/caif/cfctrl.h
index dee25b86caa0..997603f2bf4c 100644
--- a/include/net/caif/cfctrl.h
+++ b/include/net/caif/cfctrl.h
@@ -43,8 +43,7 @@ struct cfctrl_rsp {
 	void (*linksetup_rsp)(struct cflayer *layer, u8 linkid,
 			      enum cfctrl_srv serv, u8 phyid,
 			      struct cflayer *adapt_layer);
-	void (*linkdestroy_rsp)(struct cflayer *layer, u8 linkid,
-				struct cflayer *client_layer);
+	void (*linkdestroy_rsp)(struct cflayer *layer, u8 linkid);
 	void (*linkerror_ind)(void);
 	void (*enum_rsp)(void);
 	void (*sleep_rsp)(void);
@@ -117,7 +116,7 @@ struct cfctrl {
 };
 
 void cfctrl_enum_req(struct cflayer *cfctrl, u8 physlinkid);
-void cfctrl_linkup_request(struct cflayer *cfctrl,
+int cfctrl_linkup_request(struct cflayer *cfctrl,
 			   struct cfctrl_link_param *param,
 			   struct cflayer *user_layer);
 int  cfctrl_linkdown_req(struct cflayer *cfctrl, u8 linkid,
@@ -135,4 +134,6 @@ void cfctrl_insert_req(struct cfctrl *ctrl,
 			      struct cfctrl_request_info *req);
 struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
 					      struct cfctrl_request_info *req);
+void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer);
+
 #endif				/* CFCTRL_H_ */
diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index f94f3dfe85c1..471c62939fad 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -54,8 +54,7 @@ struct cfcnfg {
 static void cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id,
 			     enum cfctrl_srv serv, u8 phyid,
 			     struct cflayer *adapt_layer);
-static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id,
-				  struct cflayer *client_layer);
+static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id);
 static void cfcnfg_reject_rsp(struct cflayer *layer, u8 channel_id,
 			     struct cflayer *adapt_layer);
 static void cfctrl_resp_func(void);
@@ -175,73 +174,65 @@ int cfcnfg_get_named(struct cfcnfg *cnfg, char *name)
 	return 0;
 }
 
-/*
- * NOTE: What happens on destroy failure:
- *	 1a) No response - Too early
- *	      This will not happen because enumerate has already
- *	      completed.
- *	 1b) No response - FATAL
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- *	      Modem error, response is really expected -  this
- *	      case is not really handled.
- *	 2) O/E-bit indicate error
- *	      Ignored - this link is destroyed anyway.
- *	 3) Not able to match on request
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- *	 4) Link-Error - (no response)
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- */
 int cfcnfg_disconn_adapt_layer(struct cfcnfg *cnfg, struct cflayer *adap_layer)
 {
 	u8 channel_id = 0;
 	int ret = 0;
+	struct cflayer *servl = NULL;
 	struct cfcnfg_phyinfo *phyinfo = NULL;
 	u8 phyid = 0;
-
 	caif_assert(adap_layer != NULL);
 	channel_id = adap_layer->id;
-	if (channel_id == 0) {
+	if (adap_layer->dn == NULL || channel_id == 0) {
 		pr_err("CAIF: %s():adap_layer->id is 0\n", __func__);
 		ret = -ENOTCONN;
 		goto end;
 	}
-
-	if (adap_layer->dn == NULL) {
-		pr_err("CAIF: %s():adap_layer->dn is NULL\n", __func__);
-		ret = -ENODEV;
-		goto end;
-	}
-
-	if (adap_layer->dn != NULL)
-		phyid = cfsrvl_getphyid(adap_layer->dn);
-
-	phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
-	if (phyinfo == NULL) {
-		pr_warning("CAIF: %s(): No interface to send disconnect to\n",
-			   __func__);
-		ret = -ENODEV;
+	servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id);
+	if (servl == NULL)
 		goto end;
-	}
-
-	if (phyinfo->id != phyid
-		|| phyinfo->phy_layer->id != phyid
-		|| phyinfo->frm_layer->id != phyid) {
-
-		pr_err("CAIF: %s(): Inconsistency in phy registration\n",
-			__func__);
+	layer_set_up(servl, NULL);
+	ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
+	if (servl == NULL) {
+		pr_err("CAIF: %s(): PROTOCOL ERROR "
+		       "- Error removing service_layer Channel_Id(%d)",
+			__func__, channel_id);
 		ret = -EINVAL;
 		goto end;
 	}
+	caif_assert(channel_id == servl->id);
+	if (adap_layer->dn != NULL) {
+		phyid = cfsrvl_getphyid(adap_layer->dn);
 
-	ret = cfctrl_linkdown_req(cnfg->ctrl, channel_id, adap_layer);
-
-end:
+		phyinfo = cfcnfg_get_phyinfo(cnfg, phyid);
+		if (phyinfo == NULL) {
+			pr_warning("CAIF: %s(): "
+				"No interface to send disconnect to\n",
+				__func__);
+			ret = -ENODEV;
+			goto end;
+		}
+		if (phyinfo->id != phyid ||
+			phyinfo->phy_layer->id != phyid ||
+			phyinfo->frm_layer->id != phyid) {
+			pr_err("CAIF: %s(): "
+				"Inconsistency in phy registration\n",
+				__func__);
+			ret = -EINVAL;
+			goto end;
+		}
+	}
 	if (phyinfo != NULL && --phyinfo->phy_ref_count == 0 &&
 		phyinfo->phy_layer != NULL &&
 		phyinfo->phy_layer->modemcmd != NULL) {
 		phyinfo->phy_layer->modemcmd(phyinfo->phy_layer,
 					     _CAIF_MODEMCMD_PHYIF_USELESS);
 	}
+end:
+	cfsrvl_put(servl);
+	cfctrl_cancel_req(cnfg->ctrl, adap_layer);
+	if (adap_layer->ctrlcmd != NULL)
+		adap_layer->ctrlcmd(adap_layer, CAIF_CTRLCMD_DEINIT_RSP, 0);
 	return ret;
 
 }
@@ -254,69 +245,11 @@ void cfcnfg_release_adap_layer(struct cflayer *adap_layer)
 }
 EXPORT_SYMBOL(cfcnfg_release_adap_layer);
 
-static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id,
-				  struct cflayer *client_layer)
+static void cfcnfg_linkdestroy_rsp(struct cflayer *layer, u8 channel_id)
 {
-	struct cfcnfg *cnfg = container_obj(layer);
-	struct cflayer *servl;
-
-	/*
-	 * 1) Remove service from the MUX layer. The MUX must
-	 *    guarante that no more payload sent "upwards" (receive)
-	 */
-	servl = cfmuxl_remove_uplayer(cnfg->mux, channel_id);
-
-	if (servl == NULL) {
-		pr_err("CAIF: %s(): PROTOCOL ERROR "
-		       "- Error removing service_layer Channel_Id(%d)",
-			__func__, channel_id);
-		return;
-	}
-	caif_assert(channel_id == servl->id);
-
-	if (servl != client_layer && servl->up != client_layer) {
-		pr_err("CAIF: %s(): Error removing service_layer "
-		       "Channel_Id(%d) %p %p",
-			__func__, channel_id, (void *) servl,
-			(void *) client_layer);
-		return;
-	}
-
-	/*
-	 * 2) DEINIT_RSP must guarantee that no more packets are transmitted
-	 *    from client (adap_layer) when it returns.
-	 */
-
-	if (servl->ctrlcmd == NULL) {
-		pr_err("CAIF: %s(): Error servl->ctrlcmd == NULL", __func__);
-		return;
-	}
-
-	servl->ctrlcmd(servl, CAIF_CTRLCMD_DEINIT_RSP, 0);
-
-	/* 3) It is now safe to destroy the service layer. */
-	cfservl_destroy(servl);
 }
 
-/*
- * NOTE: What happens on linksetup failure:
- *	 1a) No response - Too early
- *	      This will not happen because enumerate is secured
- *	      before using interface.
- *	 1b) No response - FATAL
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- *	      Modem error, response is really expected -  this case is
- *	      not really handled.
- *	 2) O/E-bit indicate error
- *	      Handled in cnfg_reject_rsp
- *	 3) Not able to match on request
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- *	 4) Link-Error - (no response)
- *	      Not handled, but this should be a CAIF PROTOCOL ERROR
- */
-
-int
-cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
+int cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 				struct cfctrl_link_param *param,
 				struct cflayer *adap_layer)
 {
@@ -346,8 +279,7 @@ cfcnfg_add_adaptation_layer(struct cfcnfg *cnfg,
 		     param->phyid);
 	/* FIXME: ENUMERATE INITIALLY WHEN ACTIVATING PHYSICAL INTERFACE */
 	cfctrl_enum_req(cnfg->ctrl, param->phyid);
-	cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
-	return 0;
+	return cfctrl_linkup_request(cnfg->ctrl, param, adap_layer);
 }
 EXPORT_SYMBOL(cfcnfg_add_adaptation_layer);
 
@@ -367,8 +299,10 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 	struct cflayer *servicel = NULL;
 	struct cfcnfg_phyinfo *phyinfo;
 	if (adapt_layer == NULL) {
-		pr_err("CAIF: %s(): PROTOCOL ERROR "
-			"- LinkUp Request/Response did not match\n", __func__);
+		pr_debug("CAIF: %s(): link setup response "
+				"but no client exist, send linkdown back\n",
+				__func__);
+		cfctrl_linkdown_req(cnfg->ctrl, channel_id, NULL);
 		return;
 	}
 
@@ -424,6 +358,7 @@ cfcnfg_linkup_rsp(struct cflayer *layer, u8 channel_id, enum cfctrl_srv serv,
 	cfmuxl_set_uplayer(cnfg->mux, servicel, channel_id);
 	layer_set_up(servicel, adapt_layer);
 	layer_set_dn(adapt_layer, servicel);
+	cfsrvl_get(servicel);
 	servicel->ctrlcmd(servicel, CAIF_CTRLCMD_INIT_RSP, 0);
 }
 
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 11f80140f3cb..a521d32cfe56 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -32,6 +32,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 
 struct cflayer *cfctrl_create(void)
 {
+	struct dev_info dev_info;
 	struct cfctrl *this =
 		kmalloc(sizeof(struct cfctrl), GFP_ATOMIC);
 	if (!this) {
@@ -39,12 +40,13 @@ struct cflayer *cfctrl_create(void)
 		return NULL;
 	}
 	caif_assert(offsetof(struct cfctrl, serv.layer) == 0);
+	memset(&dev_info, 0, sizeof(dev_info));
+	dev_info.id = 0xff;
 	memset(this, 0, sizeof(*this));
+	cfsrvl_init(&this->serv, 0, &dev_info);
 	spin_lock_init(&this->info_list_lock);
 	atomic_set(&this->req_seq_no, 1);
 	atomic_set(&this->rsp_seq_no, 1);
-	this->serv.dev_info.id = 0xff;
-	this->serv.layer.id = 0;
 	this->serv.layer.receive = cfctrl_recv;
 	sprintf(this->serv.layer.name, "ctrl");
 	this->serv.layer.ctrlcmd = cfctrl_ctrlcmd;
@@ -127,20 +129,6 @@ void cfctrl_insert_req(struct cfctrl *ctrl,
 	spin_unlock(&ctrl->info_list_lock);
 }
 
-static void cfctrl_insert_req2(struct cfctrl *ctrl, enum cfctrl_cmd cmd,
-			       u8 linkid, struct cflayer *user_layer)
-{
-	struct cfctrl_request_info *req = kmalloc(sizeof(*req), GFP_KERNEL);
-	if (!req) {
-		pr_warning("CAIF: %s(): Out of memory\n", __func__);
-		return;
-	}
-	req->client_layer = user_layer;
-	req->cmd = cmd;
-	req->channel_id = linkid;
-	cfctrl_insert_req(ctrl, req);
-}
-
 /* Compare and remove request */
 struct cfctrl_request_info *cfctrl_remove_req(struct cfctrl *ctrl,
 					      struct cfctrl_request_info *req)
@@ -234,7 +222,7 @@ void cfctrl_enum_req(struct cflayer *layer, u8 physlinkid)
 	}
 }
 
-void cfctrl_linkup_request(struct cflayer *layer,
+int cfctrl_linkup_request(struct cflayer *layer,
 			   struct cfctrl_link_param *param,
 			   struct cflayer *user_layer)
 {
@@ -248,7 +236,7 @@ void cfctrl_linkup_request(struct cflayer *layer,
 	struct cfpkt *pkt = cfpkt_create(CFPKT_CTRL_PKT_LEN);
 	if (!pkt) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
-		return;
+		return -ENOMEM;
 	}
 	cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_SETUP);
 	cfpkt_addbdy(pkt, (param->chtype << 4) + param->linktype);
@@ -294,11 +282,12 @@ void cfctrl_linkup_request(struct cflayer *layer,
 	default:
 		pr_warning("CAIF: %s():Request setup of bad link type = %d\n",
 			   __func__, param->linktype);
+		return -EINVAL;
 	}
 	req = kmalloc(sizeof(*req), GFP_KERNEL);
 	if (!req) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
-		return;
+		return -ENOMEM;
 	}
 	memset(req, 0, sizeof(*req));
 	req->client_layer = user_layer;
@@ -306,6 +295,11 @@ void cfctrl_linkup_request(struct cflayer *layer,
 	req->param = *param;
 	cfctrl_insert_req(cfctrl, req);
 	init_info(cfpkt_info(pkt), cfctrl);
+	/*
+	 * NOTE:Always send linkup and linkdown request on the same
+	 *	device as the payload. Otherwise old queued up payload
+	 *	might arrive with the newly allocated channel ID.
+	 */
 	cfpkt_info(pkt)->dev_info->id = param->phyid;
 	ret =
 	    cfctrl->serv.layer.dn->transmit(cfctrl->serv.layer.dn, pkt);
@@ -313,7 +307,9 @@ void cfctrl_linkup_request(struct cflayer *layer,
 		pr_err("CAIF: %s(): Could not transmit linksetup request\n",
 			__func__);
 		cfpkt_destroy(pkt);
+		return -ENODEV;
 	}
+	return 0;
 }
 
 int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
@@ -326,7 +322,6 @@ int cfctrl_linkdown_req(struct cflayer *layer, u8 channelid,
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return -ENOMEM;
 	}
-	cfctrl_insert_req2(cfctrl, CFCTRL_CMD_LINK_DESTROY, channelid, client);
 	cfpkt_addbdy(pkt, CFCTRL_CMD_LINK_DESTROY);
 	cfpkt_addbdy(pkt, channelid);
 	init_info(cfpkt_info(pkt), cfctrl);
@@ -392,6 +387,38 @@ void cfctrl_getstartreason_req(struct cflayer *layer)
 }
 
 
+void cfctrl_cancel_req(struct cflayer *layr, struct cflayer *adap_layer)
+{
+	struct cfctrl_request_info *p, *req;
+	struct cfctrl *ctrl = container_obj(layr);
+	spin_lock(&ctrl->info_list_lock);
+
+	if (ctrl->first_req == NULL) {
+		spin_unlock(&ctrl->info_list_lock);
+		return;
+	}
+
+	if (ctrl->first_req->client_layer == adap_layer) {
+
+		req = ctrl->first_req;
+		ctrl->first_req = ctrl->first_req->next;
+		kfree(req);
+	}
+
+	p = ctrl->first_req;
+	while (p != NULL && p->next != NULL) {
+		if (p->next->client_layer == adap_layer) {
+
+			req = p->next;
+			p->next = p->next->next;
+			kfree(p->next);
+		}
+		p = p->next;
+	}
+
+	spin_unlock(&ctrl->info_list_lock);
+}
+
 static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 {
 	u8 cmdrsp;
@@ -409,11 +436,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 	cmd = cmdrsp & CFCTRL_CMD_MASK;
 	if (cmd != CFCTRL_CMD_LINK_ERR
 	    && CFCTRL_RSP_BIT != (CFCTRL_RSP_BIT & cmdrsp)) {
-		if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE) {
-			pr_info("CAIF: %s() CAIF Protocol error:"
-				"Response bit not set\n", __func__);
-			goto error;
-		}
+		if (handle_loop(cfctrl, cmd, pkt) == CAIF_FAILURE)
+			cmdrsp |= CFCTRL_ERR_BIT;
 	}
 
 	switch (cmd) {
@@ -451,12 +475,16 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 			switch (serv) {
 			case CFCTRL_SRV_VEI:
 			case CFCTRL_SRV_DBG:
+				if (CFCTRL_ERR_BIT & cmdrsp)
+					break;
 				/* Link ID */
 				cfpkt_extr_head(pkt, &linkid, 1);
 				break;
 			case CFCTRL_SRV_VIDEO:
 				cfpkt_extr_head(pkt, &tmp, 1);
 				linkparam.u.video.connid = tmp;
+				if (CFCTRL_ERR_BIT & cmdrsp)
+					break;
 				/* Link ID */
 				cfpkt_extr_head(pkt, &linkid, 1);
 				break;
@@ -465,6 +493,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 				cfpkt_extr_head(pkt, &tmp32, 4);
 				linkparam.u.datagram.connid =
 				    le32_to_cpu(tmp32);
+				if (CFCTRL_ERR_BIT & cmdrsp)
+					break;
 				/* Link ID */
 				cfpkt_extr_head(pkt, &linkid, 1);
 				break;
@@ -483,6 +513,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 					*cp++ = tmp;
 				*cp = '\0';
 
+				if (CFCTRL_ERR_BIT & cmdrsp)
+					break;
 				/* Link ID */
 				cfpkt_extr_head(pkt, &linkid, 1);
 
@@ -519,6 +551,8 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 					cfpkt_extr_head(pkt, &tmp, 1);
 					*cp++ = tmp;
 				}
+				if (CFCTRL_ERR_BIT & cmdrsp)
+					break;
 				/* Link ID */
 				cfpkt_extr_head(pkt, &linkid, 1);
 				/* Length */
@@ -560,13 +594,7 @@ static int cfctrl_recv(struct cflayer *layer, struct cfpkt *pkt)
 		break;
 	case CFCTRL_CMD_LINK_DESTROY:
 		cfpkt_extr_head(pkt, &linkid, 1);
-		rsp.cmd = cmd;
-		rsp.channel_id = linkid;
-		req = cfctrl_remove_req(cfctrl, &rsp);
-		cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid,
-					    req ? req->client_layer : NULL);
-		if (req != NULL)
-			kfree(req);
+		cfctrl->res.linkdestroy_rsp(cfctrl->serv.layer.up, linkid);
 		break;
 	case CFCTRL_CMD_LINK_ERR:
 		pr_err("CAIF: %s(): Frame Error Indication received\n",
@@ -608,7 +636,7 @@ static void cfctrl_ctrlcmd(struct cflayer *layr, enum caif_ctrlcmd ctrl,
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
 		spin_lock(&this->info_list_lock);
 		if (this->first_req != NULL) {
-			pr_warning("CAIF: %s(): Received flow off in "
+			pr_debug("CAIF: %s(): Received flow off in "
 				   "control layer", __func__);
 		}
 		spin_unlock(&this->info_list_lock);
@@ -633,6 +661,7 @@ static int handle_loop(struct cfctrl *ctrl, int cmd, struct cfpkt *pkt)
 			if (!ctrl->loop_linkused[linkid])
 				goto found;
 		spin_unlock(&ctrl->loop_linkid_lock);
+		pr_err("CAIF: %s(): Out of link-ids\n", __func__);
 		return -EINVAL;
 found:
 		if (!ctrl->loop_linkused[linkid])
-- 
cgit v1.2.3-59-g8ed1b


From bece7b2398d073d11b2e352405a3ecd3a1e39c60 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:38 +0000
Subject: caif: Rewritten socket implementation

Changes:
 This is a complete re-write of the socket layer. Making the socket
 implementation more aligned with the other socket layers and using more
 of the support functions available in sock.c. Lots of code is copied
 from af_unix (and some from af_irda).
 Non-blocking mode should be working as well.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/caif/caif_socket.h |    5 +-
 net/caif/caif_socket.c           | 1795 ++++++++++++++++++--------------------
 2 files changed, 831 insertions(+), 969 deletions(-)

(limited to 'net')

diff --git a/include/linux/caif/caif_socket.h b/include/linux/caif/caif_socket.h
index 8e5c8444a3f4..2a61eb1beb85 100644
--- a/include/linux/caif/caif_socket.h
+++ b/include/linux/caif/caif_socket.h
@@ -16,7 +16,6 @@
 #include <sys/socket.h>
 #endif
 
-
 /**
  * enum caif_link_selector -    Physical Link Selection.
  * @CAIF_LINK_HIGH_BANDW:	Physical interface for high-bandwidth
@@ -59,7 +58,7 @@ enum caif_channel_priority {
 /**
  * enum caif_protocol_type  -	CAIF Channel type.
  * @CAIFPROTO_AT:		Classic AT channel.
- * @CAIFPROTO_DATAGRAM:		Datagram channel.
+ * @CAIFPROTO_DATAGRAM:	Datagram channel.
  * @CAIFPROTO_DATAGRAM_LOOP:	Datagram loopback channel, used for testing.
  * @CAIFPROTO_UTIL:		Utility (Psock) channel.
  * @CAIFPROTO_RFM:		Remote File Manager
@@ -87,6 +86,7 @@ enum caif_at_type {
 
 /**
  * struct sockaddr_caif - the sockaddr structure for CAIF sockets.
+ * @family:		     Address family number, must be AF_CAIF.
  * @u:			     Union of address data 'switched' by family.
  * :
  * @u.at:                    Applies when family = CAIFPROTO_AT.
@@ -153,6 +153,7 @@ struct sockaddr_caif {
  *
  *
  * This enum defines the CAIF Socket options to be used on a socket
+ * of type PF_CAIF.
  *
  */
 enum caif_socket_opts {
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index d455375789fb..c3a70c5c893a 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1,7 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland sjur.brandeland@stericsson.com
- *		Per Sigmond per.sigmond@stericsson.com
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -16,91 +15,52 @@
 #include <linux/poll.h>
 #include <linux/tcp.h>
 #include <linux/uaccess.h>
-#include <asm/atomic.h>
-
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
 #include <linux/caif/caif_socket.h>
+#include <asm/atomic.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/caif_dev.h>
 #include <net/caif/cfpkt.h>
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(AF_CAIF);
+
+#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10)
+#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100)
+
+/*
+ * CAIF state is re-using the TCP socket states.
+ * caif_states stored in sk_state reflect the state as reported by
+ * the CAIF stack, while sk_socket->state is the state of the socket.
+ */
+enum caif_states {
+	CAIF_CONNECTED		= TCP_ESTABLISHED,
+	CAIF_CONNECTING	= TCP_SYN_SENT,
+	CAIF_DISCONNECTED	= TCP_CLOSE
+};
+
+#define TX_FLOW_ON_BIT	1
+#define RX_FLOW_ON_BIT	2
 
-#define CHNL_SKT_READ_QUEUE_HIGH 200
-#define CHNL_SKT_READ_QUEUE_LOW 100
-
-static int caif_sockbuf_size = 40000;
-static atomic_t caif_nr_socks = ATOMIC_INIT(0);
-
-#define CONN_STATE_OPEN_BIT	      1
-#define CONN_STATE_PENDING_BIT	      2
-#define CONN_STATE_PEND_DESTROY_BIT   3
-#define CONN_REMOTE_SHUTDOWN_BIT      4
-
-#define TX_FLOW_ON_BIT		      1
-#define RX_FLOW_ON_BIT		      2
-
-#define STATE_IS_OPEN(cf_sk) test_bit(CONN_STATE_OPEN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define STATE_IS_REMOTE_SHUTDOWN(cf_sk) test_bit(CONN_REMOTE_SHUTDOWN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define STATE_IS_PENDING(cf_sk) test_bit(CONN_STATE_PENDING_BIT,\
-				       (void *) &(cf_sk)->conn_state)
-#define STATE_IS_PENDING_DESTROY(cf_sk) test_bit(CONN_STATE_PEND_DESTROY_BIT,\
-				       (void *) &(cf_sk)->conn_state)
-
-#define SET_STATE_PENDING_DESTROY(cf_sk) set_bit(CONN_STATE_PEND_DESTROY_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define SET_STATE_OPEN(cf_sk) set_bit(CONN_STATE_OPEN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define SET_STATE_CLOSED(cf_sk) clear_bit(CONN_STATE_OPEN_BIT,\
-					(void *) &(cf_sk)->conn_state)
-#define SET_PENDING_ON(cf_sk) set_bit(CONN_STATE_PENDING_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define SET_PENDING_OFF(cf_sk) clear_bit(CONN_STATE_PENDING_BIT,\
-				       (void *) &(cf_sk)->conn_state)
-#define SET_REMOTE_SHUTDOWN(cf_sk) set_bit(CONN_REMOTE_SHUTDOWN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-
-#define SET_REMOTE_SHUTDOWN_OFF(dev) clear_bit(CONN_REMOTE_SHUTDOWN_BIT,\
-				    (void *) &(dev)->conn_state)
-#define RX_FLOW_IS_ON(cf_sk) test_bit(RX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-#define TX_FLOW_IS_ON(cf_sk) test_bit(TX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-
-#define SET_RX_FLOW_OFF(cf_sk) clear_bit(RX_FLOW_ON_BIT,\
-				       (void *) &(cf_sk)->flow_state)
-#define SET_RX_FLOW_ON(cf_sk) set_bit(RX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-#define SET_TX_FLOW_OFF(cf_sk) clear_bit(TX_FLOW_ON_BIT,\
-				       (void *) &(cf_sk)->flow_state)
-#define SET_TX_FLOW_ON(cf_sk) set_bit(TX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-
-#define SKT_READ_FLAG 0x01
-#define SKT_WRITE_FLAG 0x02
 static struct dentry *debugfsdir;
-#include <linux/debugfs.h>
 
 #ifdef CONFIG_DEBUG_FS
 struct debug_fs_counter {
-	atomic_t num_open;
-	atomic_t num_close;
-	atomic_t num_init;
-	atomic_t num_init_resp;
-	atomic_t num_init_fail_resp;
-	atomic_t num_deinit;
-	atomic_t num_deinit_resp;
+	atomic_t caif_nr_socks;
+	atomic_t num_connect_req;
+	atomic_t num_connect_resp;
+	atomic_t num_connect_fail_resp;
+	atomic_t num_disconnect;
 	atomic_t num_remote_shutdown_ind;
 	atomic_t num_tx_flow_off_ind;
 	atomic_t num_tx_flow_on_ind;
 	atomic_t num_rx_flow_off;
 	atomic_t num_rx_flow_on;
-	atomic_t skb_in_use;
-	atomic_t skb_alloc;
-	atomic_t skb_free;
 };
-static struct debug_fs_counter cnt;
+struct debug_fs_counter cnt;
 #define	dbfs_atomic_inc(v) atomic_inc(v)
 #define	dbfs_atomic_dec(v) atomic_dec(v)
 #else
@@ -108,624 +68,666 @@ static struct debug_fs_counter cnt;
 #define	dbfs_atomic_dec(v)
 #endif
 
-/* The AF_CAIF socket */
 struct caifsock {
-	/* NOTE: sk has to be the first member */
-	struct sock sk;
+	struct sock sk; /* must be first member */
 	struct cflayer layer;
-	char name[CAIF_LAYER_NAME_SZ];
-	u32 conn_state;
+	char name[CAIF_LAYER_NAME_SZ]; /* Used for debugging */
 	u32 flow_state;
-	struct cfpktq *pktq;
-	int file_mode;
 	struct caif_connect_request conn_req;
-	int read_queue_len;
-	/* protect updates of read_queue_len */
-	spinlock_t read_queue_len_lock;
+	struct mutex readlock;
 	struct dentry *debugfs_socket_dir;
 };
 
-static void drain_queue(struct caifsock *cf_sk);
+static int rx_flow_is_on(struct caifsock *cf_sk)
+{
+	return test_bit(RX_FLOW_ON_BIT,
+			(void *) &cf_sk->flow_state);
+}
+
+static int tx_flow_is_on(struct caifsock *cf_sk)
+{
+	return test_bit(TX_FLOW_ON_BIT,
+			(void *) &cf_sk->flow_state);
+}
 
-/* Packet Receive Callback function called from CAIF Stack */
-static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
+static void set_rx_flow_off(struct caifsock *cf_sk)
 {
-	struct caifsock *cf_sk;
-	int read_queue_high;
-	cf_sk = container_of(layr, struct caifsock, layer);
+	 clear_bit(RX_FLOW_ON_BIT,
+		 (void *) &cf_sk->flow_state);
+}
 
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/*FIXME: This should be allowed finally!*/
-		pr_debug("CAIF: %s(): called after close request\n", __func__);
-		cfpkt_destroy(pkt);
-		return 0;
-	}
-	/* NOTE: This function may be called in Tasklet context! */
+static void set_rx_flow_on(struct caifsock *cf_sk)
+{
+	 set_bit(RX_FLOW_ON_BIT,
+			(void *) &cf_sk->flow_state);
+}
 
-	/* The queue has its own lock */
-	cfpkt_queue(cf_sk->pktq, pkt, 0);
+static void set_tx_flow_off(struct caifsock *cf_sk)
+{
+	 clear_bit(TX_FLOW_ON_BIT,
+		(void *) &cf_sk->flow_state);
+}
 
-	spin_lock(&cf_sk->read_queue_len_lock);
-	cf_sk->read_queue_len++;
+static void set_tx_flow_on(struct caifsock *cf_sk)
+{
+	 set_bit(TX_FLOW_ON_BIT,
+		(void *) &cf_sk->flow_state);
+}
 
-	read_queue_high = (cf_sk->read_queue_len > CHNL_SKT_READ_QUEUE_HIGH);
-	spin_unlock(&cf_sk->read_queue_len_lock);
+static void caif_read_lock(struct sock *sk)
+{
+	struct caifsock *cf_sk;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	mutex_lock(&cf_sk->readlock);
+}
 
-	if (RX_FLOW_IS_ON(cf_sk) && read_queue_high) {
-		dbfs_atomic_inc(&cnt.num_rx_flow_off);
-		SET_RX_FLOW_OFF(cf_sk);
+static void caif_read_unlock(struct sock *sk)
+{
+	struct caifsock *cf_sk;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	mutex_unlock(&cf_sk->readlock);
+}
 
-		/* Send flow off (NOTE: must not sleep) */
-		pr_debug("CAIF: %s():"
-			" sending flow OFF (queue len = %d)\n",
-			__func__,
-		     cf_sk->read_queue_len);
-		caif_assert(cf_sk->layer.dn);
-		caif_assert(cf_sk->layer.dn->ctrlcmd);
+int sk_rcvbuf_lowwater(struct caifsock *cf_sk)
+{
+	/* A quarter of full buffer is used a low water mark */
+	return cf_sk->sk.sk_rcvbuf / 4;
+}
 
-		(void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-					       CAIF_MODEMCMD_FLOW_OFF_REQ);
-	}
+void caif_flow_ctrl(struct sock *sk, int mode)
+{
+	struct caifsock *cf_sk;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	if (cf_sk->layer.dn)
+		cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode);
+}
 
-	/* Signal reader that data is available. */
+/*
+ * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are
+ * not dropped, but CAIF is sending flow off instead.
+ */
+int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int err;
+	int skb_len;
+	unsigned long flags;
+	struct sk_buff_head *list = &sk->sk_receive_queue;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
-	wake_up_interruptible(sk_sleep(&cf_sk->sk));
+	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+		(unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
+		trace_printk("CAIF: %s():"
+			" sending flow OFF (queue len = %d %d)\n",
+			__func__,
+			atomic_read(&cf_sk->sk.sk_rmem_alloc),
+			sk_rcvbuf_lowwater(cf_sk));
+		set_rx_flow_off(cf_sk);
+		if (cf_sk->layer.dn)
+			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+						CAIF_MODEMCMD_FLOW_OFF_REQ);
+	}
 
+	err = sk_filter(sk, skb);
+	if (err)
+		return err;
+	if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
+		set_rx_flow_off(cf_sk);
+		trace_printk("CAIF: %s():"
+			" sending flow OFF due to rmem_schedule\n",
+			__func__);
+		if (cf_sk->layer.dn)
+			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+						CAIF_MODEMCMD_FLOW_OFF_REQ);
+	}
+	skb->dev = NULL;
+	skb_set_owner_r(skb, sk);
+	/* Cache the SKB length before we tack it onto the receive
+	 * queue. Once it is added it no longer belongs to us and
+	 * may be freed by other threads of control pulling packets
+	 * from the queue.
+	 */
+	skb_len = skb->len;
+	spin_lock_irqsave(&list->lock, flags);
+	if (!sock_flag(sk, SOCK_DEAD))
+		__skb_queue_tail(list, skb);
+	spin_unlock_irqrestore(&list->lock, flags);
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb_len);
+	else
+		kfree_skb(skb);
 	return 0;
 }
 
-/* Packet Flow Control Callback function called from CAIF */
-static void caif_sktflowctrl_cb(struct cflayer *layr,
-				enum caif_ctrlcmd flow,
-				int phyid)
+/* Packet Receive Callback function called from CAIF Stack */
+static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
 {
 	struct caifsock *cf_sk;
-
-	/* NOTE: This function may be called in Tasklet context! */
-	pr_debug("CAIF: %s(): flowctrl func called: %s.\n",
-		      __func__,
-		      flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
-		      flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
-		      flow == CAIF_CTRLCMD_INIT_RSP ? "INIT_RSP" :
-		      flow == CAIF_CTRLCMD_DEINIT_RSP ? "DEINIT_RSP" :
-		      flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "INIT_FAIL_RSP" :
-		      flow ==
-		      CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? "REMOTE_SHUTDOWN" :
-		      "UKNOWN CTRL COMMAND");
-
-	if (layr == NULL)
-		return;
+	struct sk_buff *skb;
 
 	cf_sk = container_of(layr, struct caifsock, layer);
+	skb = cfpkt_tonative(pkt);
+
+	if (unlikely(cf_sk->sk.sk_state != CAIF_CONNECTED)) {
+		cfpkt_destroy(pkt);
+		return 0;
+	}
+	caif_queue_rcv_skb(&cf_sk->sk, skb);
+	return 0;
+}
 
+/* Packet Control Callback function called from CAIF */
+static void caif_ctrl_cb(struct cflayer *layr,
+				enum caif_ctrlcmd flow,
+				int phyid)
+{
+	struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
 	switch (flow) {
 	case CAIF_CTRLCMD_FLOW_ON_IND:
+		/* OK from modem to start sending again */
 		dbfs_atomic_inc(&cnt.num_tx_flow_on_ind);
-		/* Signal reader that data is available. */
-		SET_TX_FLOW_ON(cf_sk);
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		set_tx_flow_on(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		/* Modem asks us to shut up */
 		dbfs_atomic_inc(&cnt.num_tx_flow_off_ind);
-		SET_TX_FLOW_OFF(cf_sk);
+		set_tx_flow_off(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_INIT_RSP:
-		dbfs_atomic_inc(&cnt.num_init_resp);
-		/* Signal reader that data is available. */
-		caif_assert(STATE_IS_OPEN(cf_sk));
-		SET_PENDING_OFF(cf_sk);
-		SET_TX_FLOW_ON(cf_sk);
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		/* We're now connected */
+		dbfs_atomic_inc(&cnt.num_connect_resp);
+		cf_sk->sk.sk_state = CAIF_CONNECTED;
+		set_tx_flow_on(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_DEINIT_RSP:
-		dbfs_atomic_inc(&cnt.num_deinit_resp);
-		caif_assert(!STATE_IS_OPEN(cf_sk));
-		SET_PENDING_OFF(cf_sk);
-		if (!STATE_IS_PENDING_DESTROY(cf_sk)) {
-			if (sk_sleep(&cf_sk->sk) != NULL)
-				wake_up_interruptible(sk_sleep(&cf_sk->sk));
-		}
-		dbfs_atomic_inc(&cnt.num_deinit);
-		sock_put(&cf_sk->sk);
+		/* We're now disconnected */
+		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
+		cfcnfg_release_adap_layer(&cf_sk->layer);
 		break;
 
 	case CAIF_CTRLCMD_INIT_FAIL_RSP:
-		dbfs_atomic_inc(&cnt.num_init_fail_resp);
-		caif_assert(STATE_IS_OPEN(cf_sk));
-		SET_STATE_CLOSED(cf_sk);
-		SET_PENDING_OFF(cf_sk);
-		SET_TX_FLOW_OFF(cf_sk);
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		/* Connect request failed */
+		dbfs_atomic_inc(&cnt.num_connect_fail_resp);
+		cf_sk->sk.sk_err = ECONNREFUSED;
+		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
+		cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
+		/*
+		 * Socket "standards" seems to require POLLOUT to
+		 * be set at connect failure.
+		 */
+		set_tx_flow_on(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+		/* Modem has closed this connection, or device is down. */
 		dbfs_atomic_inc(&cnt.num_remote_shutdown_ind);
-		SET_REMOTE_SHUTDOWN(cf_sk);
-		/* Use sk_shutdown to indicate remote shutdown indication */
-		cf_sk->sk.sk_shutdown |= RCV_SHUTDOWN;
-		cf_sk->file_mode = 0;
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
+		cf_sk->sk.sk_err = ECONNRESET;
+		set_rx_flow_on(cf_sk);
+		cf_sk->sk.sk_error_report(&cf_sk->sk);
 		break;
 
 	default:
 		pr_debug("CAIF: %s(): Unexpected flow command %d\n",
-			      __func__, flow);
+				__func__, flow);
 	}
 }
 
-static void skb_destructor(struct sk_buff *skb)
+static void caif_check_flow_release(struct sock *sk)
 {
-	dbfs_atomic_inc(&cnt.skb_free);
-	dbfs_atomic_dec(&cnt.skb_in_use);
-}
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
+	if (cf_sk->layer.dn == NULL || cf_sk->layer.dn->modemcmd == NULL)
+		return;
+	if (rx_flow_is_on(cf_sk))
+		return;
 
-static int caif_recvmsg(struct kiocb *iocb, struct socket *sock,
+	if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) {
+			dbfs_atomic_inc(&cnt.num_rx_flow_on);
+			set_rx_flow_on(cf_sk);
+			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+						CAIF_MODEMCMD_FLOW_ON_REQ);
+	}
+}
+/*
+ * Copied from sock.c:sock_queue_rcv_skb(), and added check that user buffer
+ * has sufficient size.
+ */
+
+static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
 				struct msghdr *m, size_t buf_len, int flags)
 
 {
 	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	struct cfpkt *pkt = NULL;
-	size_t len;
-	int result;
 	struct sk_buff *skb;
-	ssize_t ret = -EIO;
-	int read_queue_low;
-
-	if (cf_sk == NULL) {
-		pr_debug("CAIF: %s(): private_data not set!\n",
-			      __func__);
-		ret = -EBADFD;
-		goto read_error;
-	}
-
-	/* Don't do multiple iovec entries yet */
-	if (m->msg_iovlen != 1)
-		return -EOPNOTSUPP;
+	int ret = 0;
+	int len;
 
 	if (unlikely(!buf_len))
 		return -EINVAL;
 
-	lock_sock(&(cf_sk->sk));
-
-	caif_assert(cf_sk->pktq);
-
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/* Socket is closed or closing. */
-		if (!STATE_IS_PENDING(cf_sk)) {
-			pr_debug("CAIF: %s(): socket is closed (by remote)\n",
-				 __func__);
-			ret = -EPIPE;
-		} else {
-			pr_debug("CAIF: %s(): socket is closing..\n", __func__);
-			ret = -EBADF;
-		}
+	skb = skb_recv_datagram(sk, flags, 0 , &ret);
+	if (!skb)
 		goto read_error;
-	}
-	/* Socket is open or opening. */
-	if (STATE_IS_PENDING(cf_sk)) {
-		pr_debug("CAIF: %s(): socket is opening...\n", __func__);
-
-		if (flags & MSG_DONTWAIT) {
-			/* We can't block. */
-			pr_debug("CAIF: %s():state pending and MSG_DONTWAIT\n",
-				 __func__);
-			ret = -EAGAIN;
-			goto read_error;
-		}
 
+	len = skb->len;
+
+	if (skb && skb->len > buf_len && !(flags & MSG_PEEK)) {
+		len = buf_len;
 		/*
-		 * Blocking mode; state is pending and we need to wait
-		 * for its conclusion.
+		 * Push skb back on receive queue if buffer too small.
+		 * This has a built-in race where multi-threaded receive
+		 * may get packet in wrong order, but multiple read does
+		 * not really guarantee ordered delivery anyway.
+		 * Let's optimize for speed without taking locks.
 		 */
-		release_sock(&cf_sk->sk);
-
-		result =
-		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					     !STATE_IS_PENDING(cf_sk));
 
-		lock_sock(&(cf_sk->sk));
-
-		if (result == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 " woken by a signal (1)", __func__);
-			ret = -ERESTARTSYS;
-			goto read_error;
-		}
+		skb_queue_head(&sk->sk_receive_queue, skb);
+		ret = -EMSGSIZE;
+		goto read_error;
 	}
 
-	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
-		!STATE_IS_OPEN(cf_sk) ||
-		STATE_IS_PENDING(cf_sk)) {
-
-		pr_debug("CAIF: %s(): socket closed\n",
-			__func__);
-		ret = -ESHUTDOWN;
+	ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
+	if (ret)
 		goto read_error;
-	}
 
-	/*
-	 * Block if we don't have any received buffers.
-	 * The queue has its own lock.
-	 */
-	while ((pkt = cfpkt_qpeek(cf_sk->pktq)) == NULL) {
+	skb_free_datagram(sk, skb);
 
-		if (flags & MSG_DONTWAIT) {
-			pr_debug("CAIF: %s(): MSG_DONTWAIT\n", __func__);
-			ret = -EAGAIN;
-			goto read_error;
-		}
-		trace_printk("CAIF: %s() wait_event\n", __func__);
+	caif_check_flow_release(sk);
 
-		/* Let writers in. */
-		release_sock(&cf_sk->sk);
+	return len;
 
-		/* Block reader until data arrives or socket is closed. */
-		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					cfpkt_qpeek(cf_sk->pktq)
-					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
-					|| !STATE_IS_OPEN(cf_sk)) ==
-		    -ERESTARTSYS) {
-			pr_debug("CAIF: %s():"
-				" wait_event_interruptible woken by "
-				"a signal, signal_pending(current) = %d\n",
-				__func__,
-				signal_pending(current));
-			return -ERESTARTSYS;
-		}
+read_error:
+	return ret;
+}
 
-		trace_printk("CAIF: %s() awake\n", __func__);
-		if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
-			pr_debug("CAIF: %s(): "
-				 "received remote_shutdown indication\n",
-				 __func__);
-			ret = -ESHUTDOWN;
-			goto read_error_no_unlock;
-		}
 
-		/* I want to be alone on cf_sk (except status and queue). */
-		lock_sock(&(cf_sk->sk));
+/* Copied from unix_stream_wait_data, identical except for lock call. */
+static long caif_stream_data_wait(struct sock *sk, long timeo)
+{
+	DEFINE_WAIT(wait);
+	lock_sock(sk);
+
+	for (;;) {
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+		if (!skb_queue_empty(&sk->sk_receive_queue) ||
+			sk->sk_err ||
+			sk->sk_state != CAIF_CONNECTED ||
+			sock_flag(sk, SOCK_DEAD) ||
+			(sk->sk_shutdown & RCV_SHUTDOWN) ||
+			signal_pending(current) ||
+			!timeo)
+			break;
 
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* Someone closed the link, report error. */
-			pr_debug("CAIF: %s(): remote end shutdown!\n",
-				      __func__);
-			ret = -EPIPE;
-			goto read_error;
-		}
+		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	}
 
-	/* The queue has its own lock. */
-	len = cfpkt_getlen(pkt);
-
-	/* Check max length that can be copied. */
-	if (len <= buf_len)
-		pkt = cfpkt_dequeue(cf_sk->pktq);
-	else {
-		pr_debug("CAIF: %s(): user buffer too small (%ld,%ld)\n",
-			 __func__, (long) len, (long) buf_len);
-		if (sock->type == SOCK_SEQPACKET) {
-			ret = -EMSGSIZE;
-			goto read_error;
-		}
-		len = buf_len;
-	}
+	finish_wait(sk_sleep(sk), &wait);
+	release_sock(sk);
+	return timeo;
+}
 
 
-	spin_lock(&cf_sk->read_queue_len_lock);
-	cf_sk->read_queue_len--;
-	read_queue_low = (cf_sk->read_queue_len < CHNL_SKT_READ_QUEUE_LOW);
-	spin_unlock(&cf_sk->read_queue_len_lock);
+/*
+ * Copied from unix_stream_recvmsg, but removed credit checks,
+ * changed locking calls, changed address handling.
+ */
+static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
+				struct msghdr *msg, size_t size,
+				int flags)
+{
+	struct sock *sk = sock->sk;
+	int copied = 0;
+	int target;
+	int err = 0;
+	long timeo;
 
-	if (!RX_FLOW_IS_ON(cf_sk) && read_queue_low) {
-		dbfs_atomic_inc(&cnt.num_rx_flow_on);
-		SET_RX_FLOW_ON(cf_sk);
+	err = -EOPNOTSUPP;
+	if (flags&MSG_OOB)
+		goto out;
 
-		/* Send flow on. */
-		pr_debug("CAIF: %s(): sending flow ON (queue len = %d)\n",
-			 __func__, cf_sk->read_queue_len);
-		caif_assert(cf_sk->layer.dn);
-		caif_assert(cf_sk->layer.dn->ctrlcmd);
-		(void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-					       CAIF_MODEMCMD_FLOW_ON_REQ);
+	msg->msg_namelen = 0;
 
-		caif_assert(cf_sk->read_queue_len >= 0);
-	}
+	/*
+	 * Lock the socket to prevent queue disordering
+	 * while sleeps in memcpy_tomsg
+	 */
+	err = -EAGAIN;
+	if (sk->sk_state == CAIF_CONNECTING)
+		goto out;
 
-	skb = cfpkt_tonative(pkt);
-	result = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
-	skb_pull(skb, len);
+	caif_read_lock(sk);
+	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
+	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
 
-	if (result) {
-		pr_debug("CAIF: %s(): copy to_iovec failed\n", __func__);
-		cfpkt_destroy(pkt);
-		ret = -EFAULT;
-		goto read_error;
-	}
+	do {
+		int chunk;
+		struct sk_buff *skb;
 
-	/* Free packet and remove from queue */
-	if (skb->len == 0)
-		skb_free_datagram(sk, skb);
+		lock_sock(sk);
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		caif_check_flow_release(sk);
 
-	/* Let the others in. */
-	release_sock(&cf_sk->sk);
-	return len;
+		if (skb == NULL) {
+			if (copied >= target)
+				goto unlock;
+			/*
+			 *	POSIX 1003.1g mandates this order.
+			 */
+			err = sock_error(sk);
+			if (err)
+				goto unlock;
+			err = -ECONNRESET;
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				goto unlock;
 
-read_error:
-	release_sock(&cf_sk->sk);
-read_error_no_unlock:
-	return ret;
-}
+			err = -EPIPE;
+			if (sk->sk_state != CAIF_CONNECTED)
+				goto unlock;
+			if (sock_flag(sk, SOCK_DEAD))
+				goto unlock;
 
-/* Send a signal as a consequence of sendmsg, sendto or caif_sendmsg. */
-static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
-			struct msghdr *msg, size_t len)
-{
+			release_sock(sk);
 
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	size_t payload_size = msg->msg_iov->iov_len;
-	struct cfpkt *pkt = NULL;
-	struct caif_payload_info info;
-	unsigned char *txbuf;
-	ssize_t ret = -EIO;
-	int result;
-	struct sk_buff *skb;
-	caif_assert(msg->msg_iovlen == 1);
+			err = -EAGAIN;
+			if (!timeo)
+				break;
 
-	if (cf_sk == NULL) {
-		pr_debug("CAIF: %s(): private_data not set!\n",
-			      __func__);
-		ret = -EBADFD;
-		goto write_error_no_unlock;
-	}
+			caif_read_unlock(sk);
 
-	if (unlikely(msg->msg_iov->iov_base == NULL)) {
-		pr_warning("CAIF: %s(): Buffer is NULL.\n", __func__);
-		ret = -EINVAL;
-		goto write_error_no_unlock;
-	}
+			timeo = caif_stream_data_wait(sk, timeo);
 
-	if (payload_size > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_debug("CAIF: %s(): buffer too long\n", __func__);
-		if (sock->type == SOCK_SEQPACKET) {
-			ret = -EINVAL;
-			goto write_error_no_unlock;
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeo);
+				goto out;
+			}
+			caif_read_lock(sk);
+			continue;
+unlock:
+			release_sock(sk);
+			break;
 		}
-		payload_size = CAIF_MAX_PAYLOAD_SIZE;
-	}
+		release_sock(sk);
+		chunk = min_t(unsigned int, skb->len, size);
+		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			if (copied == 0)
+				copied = -EFAULT;
+			break;
+		}
+		copied += chunk;
+		size -= chunk;
 
-	/* I want to be alone on cf_sk (except status and queue) */
-	lock_sock(&(cf_sk->sk));
+		/* Mark read part of skb as used */
+		if (!(flags & MSG_PEEK)) {
+			skb_pull(skb, chunk);
 
-	caif_assert(cf_sk->pktq);
+			/* put the skb back if we didn't use it up. */
+			if (skb->len) {
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				break;
+			}
+			kfree_skb(skb);
 
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/* Socket is closed or closing */
-		if (!STATE_IS_PENDING(cf_sk)) {
-			pr_debug("CAIF: %s(): socket is closed (by remote)\n",
-				 __func__);
-			ret = -EPIPE;
 		} else {
-			pr_debug("CAIF: %s(): socket is closing...\n",
-				 __func__);
-			ret = -EBADF;
-		}
-		goto write_error;
-	}
-
-	/* Socket is open or opening */
-	if (STATE_IS_PENDING(cf_sk)) {
-		pr_debug("CAIF: %s(): socket is opening...\n", __func__);
-
-		if (msg->msg_flags & MSG_DONTWAIT) {
-			/* We can't block */
-			trace_printk("CAIF: %s():state pending:"
-				     "state=MSG_DONTWAIT\n", __func__);
-			ret = -EAGAIN;
-			goto write_error;
+			/*
+			 * It is questionable, see note in unix_dgram_recvmsg.
+			 */
+			/* put message back and return */
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			break;
 		}
-		/* Let readers in */
-		release_sock(&cf_sk->sk);
-
-		/*
-		 * Blocking mode; state is pending and we need to wait
-		 * for its conclusion.
-		 */
-		result =
-		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					     !STATE_IS_PENDING(cf_sk));
-		/* I want to be alone on cf_sk (except status and queue) */
-		lock_sock(&(cf_sk->sk));
+	} while (size);
+	caif_read_unlock(sk);
 
-		if (result == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 " woken by a signal (1)", __func__);
-			ret = -ERESTARTSYS;
-			goto write_error;
-		}
-	}
-	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
-		!STATE_IS_OPEN(cf_sk) ||
-		STATE_IS_PENDING(cf_sk)) {
+out:
+	return copied ? : err;
+}
 
-		pr_debug("CAIF: %s(): socket closed\n",
-			__func__);
-		ret = -ESHUTDOWN;
-		goto write_error;
+/*
+ * Copied from sock.c:sock_wait_for_wmem, but change to wait for
+ * CAIF flow-on and sock_writable.
+ */
+static long caif_wait_for_flow_on(struct caifsock *cf_sk,
+				int wait_writeable, long timeo, int *err)
+{
+	struct sock *sk = &cf_sk->sk;
+	DEFINE_WAIT(wait);
+	for (;;) {
+		*err = 0;
+		if (tx_flow_is_on(cf_sk) &&
+			(!wait_writeable || sock_writeable(&cf_sk->sk)))
+			break;
+		*err = -ETIMEDOUT;
+		if (!timeo)
+			break;
+		*err = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		*err = -ECONNRESET;
+		if (sk->sk_shutdown & SHUTDOWN_MASK)
+			break;
+		*err = -sk->sk_err;
+		if (sk->sk_err)
+			break;
+		*err = -EPIPE;
+		if (cf_sk->sk.sk_state != CAIF_CONNECTED)
+			break;
+		timeo = schedule_timeout(timeo);
 	}
+	finish_wait(sk_sleep(sk), &wait);
+	return timeo;
+}
 
-	if (!TX_FLOW_IS_ON(cf_sk)) {
+/*
+ * Transmit a SKB. The device may temporarily request re-transmission
+ * by returning EAGAIN.
+ */
+static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
+			int noblock, long timeo)
+{
+	struct cfpkt *pkt;
+	int ret, loopcnt = 0;
 
-		/* Flow is off. Check non-block flag */
-		if (msg->msg_flags & MSG_DONTWAIT) {
-			trace_printk("CAIF: %s(): MSG_DONTWAIT and tx flow off",
-				 __func__);
-			ret = -EAGAIN;
-			goto write_error;
-		}
+	pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb);
+	memset(cfpkt_info(pkt), 0, sizeof(struct caif_payload_info));
+	do {
 
-		/* release lock before waiting */
-		release_sock(&cf_sk->sk);
+		ret = -ETIMEDOUT;
 
-		/* Wait until flow is on or socket is closed */
-		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					TX_FLOW_IS_ON(cf_sk)
-					|| !STATE_IS_OPEN(cf_sk)
-					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
-					) == -ERESTARTSYS) {
-			pr_debug("CAIF: %s():"
-				 " wait_event_interruptible woken by a signal",
-				 __func__);
-			ret = -ERESTARTSYS;
-			goto write_error_no_unlock;
+		/* Slight paranoia, probably not needed. */
+		if (unlikely(loopcnt++ > 1000)) {
+			pr_warning("CAIF: %s(): transmit retries failed,"
+				" error = %d\n", __func__, ret);
+			break;
 		}
 
-		/* I want to be alone on cf_sk (except status and queue) */
-		lock_sock(&(cf_sk->sk));
-
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* someone closed the link, report error */
-			pr_debug("CAIF: %s(): remote end shutdown!\n",
-				      __func__);
-			ret = -EPIPE;
-			goto write_error;
+		if (cf_sk->layer.dn != NULL)
+			ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
+		if (likely(ret >= 0))
+			break;
+		/* if transmit return -EAGAIN, then retry */
+		if (noblock && ret == -EAGAIN)
+			break;
+		timeo = caif_wait_for_flow_on(cf_sk, 0, timeo, &ret);
+		if (signal_pending(current)) {
+			ret = sock_intr_errno(timeo);
+			break;
 		}
-
-		if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
-			pr_debug("CAIF: %s(): "
-				 "received remote_shutdown indication\n",
-				 __func__);
-			ret = -ESHUTDOWN;
-			goto write_error;
+		if (ret)
+			break;
+		if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
+			sock_flag(&cf_sk->sk, SOCK_DEAD) ||
+			(cf_sk->sk.sk_shutdown & RCV_SHUTDOWN)) {
+			ret = -EPIPE;
+			cf_sk->sk.sk_err = EPIPE;
+			break;
 		}
-	}
+	} while (ret == -EAGAIN);
+	return ret;
+}
 
-	pkt = cfpkt_create(payload_size);
-	skb = (struct sk_buff *)pkt;
-	skb->destructor = skb_destructor;
-	skb->sk = sk;
-	dbfs_atomic_inc(&cnt.skb_alloc);
-	dbfs_atomic_inc(&cnt.skb_in_use);
-	if (cfpkt_raw_append(pkt, (void **) &txbuf, payload_size) < 0) {
-		pr_debug("CAIF: %s(): cfpkt_raw_append failed\n", __func__);
-		cfpkt_destroy(pkt);
-		ret = -EINVAL;
-		goto write_error;
-	}
+/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
+static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int buffer_size;
+	int ret = 0;
+	struct sk_buff *skb = NULL;
+	int noblock;
+	long timeo;
+	caif_assert(cf_sk);
+	ret = sock_error(sk);
+	if (ret)
+		goto err;
+
+	ret = -EOPNOTSUPP;
+	if (msg->msg_flags&MSG_OOB)
+		goto err;
+
+	ret = -EOPNOTSUPP;
+	if (msg->msg_namelen)
+		goto err;
+
+	ret = -EINVAL;
+	if (unlikely(msg->msg_iov->iov_base == NULL))
+		goto err;
+	noblock = msg->msg_flags & MSG_DONTWAIT;
+
+	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
+
+	ret = -EMSGSIZE;
+	if (buffer_size > CAIF_MAX_PAYLOAD_SIZE)
+		goto err;
+
+	timeo = sock_sndtimeo(sk, noblock);
+	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
+				1, timeo, &ret);
+
+	ret = -EPIPE;
+	if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
+		sock_flag(sk, SOCK_DEAD) ||
+		(sk->sk_shutdown & RCV_SHUTDOWN))
+		goto err;
+
+	ret = -ENOMEM;
+	skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret);
+	if (!skb)
+		goto err;
+	skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+
+	ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+
+	if (ret)
+		goto err;
+	ret = transmit_skb(skb, cf_sk, noblock, timeo);
+	if (ret < 0)
+		goto err;
+	return len;
+err:
+	kfree_skb(skb);
+	return ret;
+}
 
-	/* Copy data into buffer. */
-	if (copy_from_user(txbuf, msg->msg_iov->iov_base, payload_size)) {
-		pr_debug("CAIF: %s(): copy_from_user returned non zero.\n",
-			 __func__);
-		cfpkt_destroy(pkt);
-		ret = -EINVAL;
-		goto write_error;
-	}
-	memset(&info, 0, sizeof(info));
+/*
+ * Copied from unix_stream_sendmsg and adapted to CAIF:
+ * Changed removed permission handling and added waiting for flow on
+ * and other minor adaptations.
+ */
+static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
+				struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int err, size;
+	struct sk_buff *skb;
+	int sent = 0;
+	long timeo;
 
-	/* Send the packet down the stack. */
-	caif_assert(cf_sk->layer.dn);
-	caif_assert(cf_sk->layer.dn->transmit);
+	err = -EOPNOTSUPP;
 
-	do {
-		ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
+	if (unlikely(msg->msg_flags&MSG_OOB))
+		goto out_err;
 
-		if (likely((ret >= 0) || (ret != -EAGAIN)))
-			break;
+	if (unlikely(msg->msg_namelen))
+		goto out_err;
 
-		/* EAGAIN - retry */
-		if (msg->msg_flags & MSG_DONTWAIT) {
-			pr_debug("CAIF: %s(): NONBLOCK and transmit failed,"
-				 " error = %ld\n", __func__, (long) ret);
-			ret = -EAGAIN;
-			goto write_error;
-		}
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	timeo = caif_wait_for_flow_on(cf_sk, 1, timeo, &err);
 
-		/* Let readers in */
-		release_sock(&cf_sk->sk);
+	if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
+		goto pipe_err;
 
-		/* Wait until flow is on or socket is closed */
-		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					TX_FLOW_IS_ON(cf_sk)
-					|| !STATE_IS_OPEN(cf_sk)
-					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
-					) == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 " woken by a signal", __func__);
-			ret = -ERESTARTSYS;
-			goto write_error_no_unlock;
-		}
+	while (sent < len) {
 
-		/* I want to be alone on cf_sk (except status and queue) */
-		lock_sock(&(cf_sk->sk));
+		size = len-sent;
 
-	} while (ret == -EAGAIN);
+		if (size > CAIF_MAX_PAYLOAD_SIZE)
+			size = CAIF_MAX_PAYLOAD_SIZE;
 
-	if (ret < 0) {
-		cfpkt_destroy(pkt);
-		pr_debug("CAIF: %s(): transmit failed, error = %ld\n",
-			 __func__, (long) ret);
+		/* If size is more than half of sndbuf, chop up message */
+		if (size > ((sk->sk_sndbuf >> 1) - 64))
+			size = (sk->sk_sndbuf >> 1) - 64;
 
-		goto write_error;
-	}
+		if (size > SKB_MAX_ALLOC)
+			size = SKB_MAX_ALLOC;
 
-	release_sock(&cf_sk->sk);
-	return payload_size;
+		skb = sock_alloc_send_skb(sk,
+					size + CAIF_NEEDED_HEADROOM
+					+ CAIF_NEEDED_TAILROOM,
+					msg->msg_flags&MSG_DONTWAIT,
+					&err);
+		if (skb == NULL)
+			goto out_err;
 
-write_error:
-	release_sock(&cf_sk->sk);
-write_error_no_unlock:
-	return ret;
-}
+		skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+		/*
+		 *	If you pass two values to the sock_alloc_send_skb
+		 *	it tries to grab the large buffer with GFP_NOFS
+		 *	(which can fail easily), and if it fails grab the
+		 *	fallback size buffer which is under a page and will
+		 *	succeed. [Alan]
+		 */
+		size = min_t(int, size, skb_tailroom(skb));
 
-static unsigned int caif_poll(struct file *file, struct socket *sock,
-						poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	u32 mask = 0;
-	poll_wait(file, sk_sleep(sk), wait);
-	lock_sock(&(cf_sk->sk));
-	if (!STATE_IS_OPEN(cf_sk)) {
-		if (!STATE_IS_PENDING(cf_sk))
-			mask |= POLLHUP;
-	} else {
-		if (cfpkt_qpeek(cf_sk->pktq) != NULL)
-			mask |= (POLLIN | POLLRDNORM);
-		if (TX_FLOW_IS_ON(cf_sk))
-			mask |= (POLLOUT | POLLWRNORM);
+		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+		if (err) {
+			kfree_skb(skb);
+			goto out_err;
+		}
+		err = transmit_skb(skb, cf_sk,
+				msg->msg_flags&MSG_DONTWAIT, timeo);
+		if (err < 0) {
+			kfree_skb(skb);
+			goto pipe_err;
+		}
+		sent += size;
 	}
-	release_sock(&cf_sk->sk);
-	trace_printk("CAIF: %s(): poll mask=0x%04x\n",
-		      __func__, mask);
-	return mask;
-}
-
-static void drain_queue(struct caifsock *cf_sk)
-{
-	struct cfpkt *pkt = NULL;
-
-	/* Empty the queue */
-	do {
-		/* The queue has its own lock */
-		if (!cf_sk->pktq)
-			break;
-
-		pkt = cfpkt_dequeue(cf_sk->pktq);
-		if (!pkt)
-			break;
-		pr_debug("CAIF: %s(): freeing packet from read queue\n",
-			 __func__);
-		cfpkt_destroy(pkt);
 
-	} while (1);
+	return sent;
 
-	cf_sk->read_queue_len = 0;
+pipe_err:
+	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
+	err = -EPIPE;
+out_err:
+	return sent ? : err;
 }
 
 static int setsockopt(struct socket *sock,
@@ -736,19 +738,13 @@ static int setsockopt(struct socket *sock,
 	int prio, linksel;
 	struct ifreq ifreq;
 
-	if (STATE_IS_OPEN(cf_sk)) {
-		pr_debug("CAIF: %s(): setsockopt "
-			 "cannot be done on a connected socket\n",
-			 __func__);
+	if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
 		return -ENOPROTOOPT;
-	}
+
 	switch (opt) {
 	case CAIFSO_LINK_SELECT:
-		if (ol < sizeof(int)) {
-			pr_debug("CAIF: %s(): setsockopt"
-				 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
+		if (ol < sizeof(int))
 			return -EINVAL;
-		}
 		if (lvl != SOL_CAIF)
 			goto bad_sol;
 		if (copy_from_user(&linksel, ov, sizeof(int)))
@@ -761,28 +757,20 @@ static int setsockopt(struct socket *sock,
 	case SO_PRIORITY:
 		if (lvl != SOL_SOCKET)
 			goto bad_sol;
-		if (ol < sizeof(int)) {
-			pr_debug("CAIF: %s(): setsockopt"
-				 " SO_PRIORITY bad size\n", __func__);
+		if (ol < sizeof(int))
 			return -EINVAL;
-		}
 		if (copy_from_user(&prio, ov, sizeof(int)))
 			return -EINVAL;
 		lock_sock(&(cf_sk->sk));
 		cf_sk->conn_req.priority = prio;
-		pr_debug("CAIF: %s(): Setting sockopt priority=%d\n", __func__,
-			cf_sk->conn_req.priority);
 		release_sock(&cf_sk->sk);
 		return 0;
 
 	case SO_BINDTODEVICE:
 		if (lvl != SOL_SOCKET)
 			goto bad_sol;
-		if (ol < sizeof(struct ifreq)) {
-			pr_debug("CAIF: %s(): setsockopt"
-				 " SO_PRIORITY bad size\n", __func__);
+		if (ol < sizeof(struct ifreq))
 			return -EINVAL;
-		}
 		if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
 			return -EFAULT;
 		lock_sock(&(cf_sk->sk));
@@ -798,359 +786,275 @@ static int setsockopt(struct socket *sock,
 			goto bad_sol;
 		if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL)
 			return -ENOPROTOOPT;
-		if (ol > sizeof(cf_sk->conn_req.param.data))
-			goto req_param_bad_size;
-
 		lock_sock(&(cf_sk->sk));
 		cf_sk->conn_req.param.size = ol;
-		if (copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
+		if (ol > sizeof(cf_sk->conn_req.param.data) ||
+			copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
 			release_sock(&cf_sk->sk);
-req_param_bad_size:
-			pr_debug("CAIF: %s(): setsockopt"
-				 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
 			return -EINVAL;
 		}
-
 		release_sock(&cf_sk->sk);
 		return 0;
 
 	default:
-		pr_debug("CAIF: %s(): unhandled option %d\n", __func__, opt);
-		return -EINVAL;
+		return -ENOPROTOOPT;
 	}
 
 	return 0;
 bad_sol:
-	pr_debug("CAIF: %s(): setsockopt bad level\n", __func__);
 	return -ENOPROTOOPT;
 
 }
 
-static int caif_connect(struct socket *sock, struct sockaddr *uservaddr,
-	       int sockaddr_len, int flags)
+/*
+ * caif_connect() - Connect a CAIF Socket
+ * Copied and modified af_irda.c:irda_connect().
+ *
+ * Note : by consulting "errno", the user space caller may learn the cause
+ * of the failure. Most of them are visible in the function, others may come
+ * from subroutines called and are listed here :
+ *  o -EAFNOSUPPORT: bad socket family or type.
+ *  o -ESOCKTNOSUPPORT: bad socket type or protocol
+ *  o -EINVAL: bad socket address, or CAIF link type
+ *  o -ECONNREFUSED: remote end refused the connection.
+ *  o -EINPROGRESS: connect request sent but timed out (or non-blocking)
+ *  o -EISCONN: already connected.
+ *  o -ETIMEDOUT: Connection timed out (send timeout)
+ *  o -ENODEV: No link layer to send request
+ *  o -ECONNRESET: Received Shutdown indication or lost link layer
+ *  o -ENOMEM: Out of memory
+ *
+ *  State Strategy:
+ *  o sk_state: holds the CAIF_* protocol state, it's updated by
+ *	caif_ctrl_cb.
+ *  o sock->state: holds the SS_* socket state and is updated by connect and
+ *	disconnect.
+ */
+static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
+			int addr_len, int flags)
 {
-	struct caifsock *cf_sk = NULL;
-	int result = -1;
-	int mode = 0;
-	int ret = -EIO;
 	struct sock *sk = sock->sk;
-	BUG_ON(sk == NULL);
-
-	cf_sk = container_of(sk, struct caifsock, sk);
-
-	trace_printk("CAIF: %s(): cf_sk=%p OPEN=%d, TX_FLOW=%d, RX_FLOW=%d\n",
-		 __func__, cf_sk,
-		STATE_IS_OPEN(cf_sk),
-		TX_FLOW_IS_ON(cf_sk), RX_FLOW_IS_ON(cf_sk));
-
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	long timeo;
+	int err;
+	lock_sock(sk);
 
-	if (sock->type == SOCK_SEQPACKET || sock->type == SOCK_STREAM)
-		sock->state	= SS_CONNECTING;
-	else
+	err = -EAFNOSUPPORT;
+	if (uaddr->sa_family != AF_CAIF)
 		goto out;
 
-	/* I want to be alone on cf_sk (except status and queue) */
-	lock_sock(&(cf_sk->sk));
-
-	if (sockaddr_len != sizeof(struct sockaddr_caif)) {
-		pr_debug("CAIF: %s(): Bad address len (%ld,%lu)\n",
-			 __func__, (long) sockaddr_len,
-			(long unsigned) sizeof(struct sockaddr_caif));
-		ret = -EINVAL;
-		goto open_error;
+	err = -ESOCKTNOSUPPORT;
+	if (unlikely(!(sk->sk_type == SOCK_STREAM &&
+		       cf_sk->sk.sk_protocol == CAIFPROTO_AT) &&
+		       sk->sk_type != SOCK_SEQPACKET))
+		goto out;
+	switch (sock->state) {
+	case SS_UNCONNECTED:
+		/* Normal case, a fresh connect */
+		caif_assert(sk->sk_state == CAIF_DISCONNECTED);
+		break;
+	case SS_CONNECTING:
+		switch (sk->sk_state) {
+		case CAIF_CONNECTED:
+			sock->state = SS_CONNECTED;
+			err = -EISCONN;
+			goto out;
+		case CAIF_DISCONNECTED:
+			/* Reconnect allowed */
+			break;
+		case CAIF_CONNECTING:
+			err = -EALREADY;
+			if (flags & O_NONBLOCK)
+				goto out;
+			goto wait_connect;
+		}
+		break;
+	case SS_CONNECTED:
+		caif_assert(sk->sk_state == CAIF_CONNECTED ||
+				sk->sk_state == CAIF_DISCONNECTED);
+		if (sk->sk_shutdown & SHUTDOWN_MASK) {
+			/* Allow re-connect after SHUTDOWN_IND */
+			caif_disconnect_client(&cf_sk->layer);
+			break;
+		}
+		/* No reconnect on a seqpacket socket */
+		err = -EISCONN;
+		goto out;
+	case SS_DISCONNECTING:
+	case SS_FREE:
+		caif_assert(1); /*Should never happen */
+		break;
 	}
+	sk->sk_state = CAIF_DISCONNECTED;
+	sock->state = SS_UNCONNECTED;
+	sk_stream_kill_queues(&cf_sk->sk);
 
-	if (uservaddr->sa_family != AF_CAIF) {
-		pr_debug("CAIF: %s(): Bad address family (%d)\n",
-			 __func__, uservaddr->sa_family);
-		ret = -EAFNOSUPPORT;
-		goto open_error;
-	}
+	err = -EINVAL;
+	if (addr_len != sizeof(struct sockaddr_caif) ||
+		!uaddr)
+		goto out;
 
-	memcpy(&cf_sk->conn_req.sockaddr, uservaddr,
+	memcpy(&cf_sk->conn_req.sockaddr, uaddr,
 		sizeof(struct sockaddr_caif));
 
-	dbfs_atomic_inc(&cnt.num_open);
-	mode = SKT_READ_FLAG | SKT_WRITE_FLAG;
-
-	/* If socket is not open, make sure socket is in fully closed state */
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/* Has link close response been received (if we ever sent it)?*/
-		if (STATE_IS_PENDING(cf_sk)) {
-			/*
-			 * Still waiting for close response from remote.
-			 * If opened non-blocking, report "would block"
-			 */
-			if (flags & O_NONBLOCK) {
-				pr_debug("CAIF: %s(): O_NONBLOCK"
-					" && close pending\n", __func__);
-				ret = -EAGAIN;
-				goto open_error;
-			}
-
-			pr_debug("CAIF: %s(): Wait for close response"
-				 " from remote...\n", __func__);
-
-			release_sock(&cf_sk->sk);
-
-			/*
-			 * Blocking mode; close is pending and we need to wait
-			 * for its conclusion.
-			 */
-			result =
-			    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-						     !STATE_IS_PENDING(cf_sk));
-
-			lock_sock(&(cf_sk->sk));
-			if (result == -ERESTARTSYS) {
-				pr_debug("CAIF: %s(): wait_event_interruptible"
-					 "woken by a signal (1)", __func__);
-				ret = -ERESTARTSYS;
-				goto open_error;
-			}
-		}
+	/* Move to connecting socket, start sending Connect Requests */
+	sock->state = SS_CONNECTING;
+	sk->sk_state = CAIF_CONNECTING;
+
+	dbfs_atomic_inc(&cnt.num_connect_req);
+	cf_sk->layer.receive = caif_sktrecv_cb;
+	err = caif_connect_client(&cf_sk->conn_req,
+				&cf_sk->layer);
+	if (err < 0) {
+		cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
+		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
+		goto out;
 	}
 
-	/* socket is now either closed, pending open or open */
-	if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
-		/* Open */
-		pr_debug("CAIF: %s(): Socket is already opened (cf_sk=%p)"
-			" check access f_flags = 0x%x file_mode = 0x%x\n",
-			 __func__, cf_sk, mode, cf_sk->file_mode);
-
-	} else {
-		/* We are closed or pending open.
-		 * If closed:	    send link setup
-		 * If pending open: link setup already sent (we could have been
-		 *		    interrupted by a signal last time)
-		 */
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* First opening of file; connect lower layers: */
-			/* Drain queue (very unlikely) */
-			drain_queue(cf_sk);
-
-			cf_sk->layer.receive = caif_sktrecv_cb;
-			SET_STATE_OPEN(cf_sk);
-			SET_PENDING_ON(cf_sk);
-
-			/* Register this channel. */
-			result =
-				caif_connect_client(&cf_sk->conn_req,
-							&cf_sk->layer);
-			if (result < 0) {
-				pr_debug("CAIF: %s(): can't register channel\n",
-					__func__);
-				ret = -EIO;
-				SET_STATE_CLOSED(cf_sk);
-				SET_PENDING_OFF(cf_sk);
-				goto open_error;
-			}
-			dbfs_atomic_inc(&cnt.num_init);
-		}
-
-		/* If opened non-blocking, report "success".
-		 */
-		if (flags & O_NONBLOCK) {
-			pr_debug("CAIF: %s(): O_NONBLOCK success\n",
-				 __func__);
-			ret = -EINPROGRESS;
-			cf_sk->sk.sk_err = -EINPROGRESS;
-			goto open_error;
-		}
-
-		trace_printk("CAIF: %s(): Wait for connect response\n",
-			     __func__);
+	err = -EINPROGRESS;
+wait_connect:
 
-		/* release lock before waiting */
-		release_sock(&cf_sk->sk);
-
-		result =
-		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					     !STATE_IS_PENDING(cf_sk));
-
-		lock_sock(&(cf_sk->sk));
-
-		if (result == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 "woken by a signal (2)", __func__);
-			ret = -ERESTARTSYS;
-			goto open_error;
-		}
-
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* Lower layers said "no" */
-			pr_debug("CAIF: %s(): Closed received\n", __func__);
-			ret = -EPIPE;
-			goto open_error;
-		}
+	if (sk->sk_state != CAIF_CONNECTED && (flags & O_NONBLOCK))
+		goto out;
 
-		trace_printk("CAIF: %s(): Connect received\n", __func__);
+	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+
+	release_sock(sk);
+	err = wait_event_interruptible_timeout(*sk_sleep(sk),
+			sk->sk_state != CAIF_CONNECTING,
+			timeo);
+	lock_sock(sk);
+	if (err < 0)
+		goto out; /* -ERESTARTSYS */
+	if (err == 0 && sk->sk_state != CAIF_CONNECTED) {
+		err = -ETIMEDOUT;
+		goto out;
 	}
-	/* Open is ok */
-	cf_sk->file_mode |= mode;
 
-	trace_printk("CAIF: %s(): Connected - file mode = %x\n",
-		  __func__, cf_sk->file_mode);
-
-	release_sock(&cf_sk->sk);
-	return 0;
-open_error:
-	sock->state	= SS_UNCONNECTED;
-	release_sock(&cf_sk->sk);
+	if (sk->sk_state != CAIF_CONNECTED) {
+		sock->state = SS_UNCONNECTED;
+		err = sock_error(sk);
+		if (!err)
+			err = -ECONNREFUSED;
+		goto out;
+	}
+	sock->state = SS_CONNECTED;
+	err = 0;
 out:
-	return ret;
+	release_sock(sk);
+	return err;
 }
 
-static int caif_shutdown(struct socket *sock, int how)
+
+/*
+ * caif_release() - Disconnect a CAIF Socket
+ * Copied and modified af_irda.c:irda_release().
+ */
+static int caif_release(struct socket *sock)
 {
-	struct caifsock *cf_sk = NULL;
-	int result = 0;
-	int tx_flow_state_was_on;
 	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int res = 0;
 
-	trace_printk("CAIF: %s(): enter\n", __func__);
-	pr_debug("f_flags=%x\n", sock->file->f_flags);
-
-	if (how != SHUT_RDWR)
-		return -EOPNOTSUPP;
-
-	cf_sk = container_of(sk, struct caifsock, sk);
-	if (cf_sk == NULL) {
-		pr_debug("CAIF: %s(): COULD NOT FIND SOCKET\n", __func__);
-		return -EBADF;
-	}
-
-	/* I want to be alone on cf_sk (except status queue) */
-	lock_sock(&(cf_sk->sk));
-	sock_hold(&cf_sk->sk);
-
-	/* IS_CLOSED have double meaning:
-	 * 1) Spontanous Remote Shutdown Request.
-	 * 2) Ack on a channel teardown(disconnect)
-	 * Must clear bit in case we previously received
-	 * remote shudown request.
-	 */
-	if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
-		SET_STATE_CLOSED(cf_sk);
-		SET_PENDING_ON(cf_sk);
-		tx_flow_state_was_on = TX_FLOW_IS_ON(cf_sk);
-		SET_TX_FLOW_OFF(cf_sk);
-
-		/* Hold the socket until DEINIT_RSP is received */
-		sock_hold(&cf_sk->sk);
-		result = caif_disconnect_client(&cf_sk->layer);
-
-		if (result < 0) {
-			pr_debug("CAIF: %s(): "
-					"caif_disconnect_client() failed\n",
-					 __func__);
-			SET_STATE_CLOSED(cf_sk);
-			SET_PENDING_OFF(cf_sk);
-			SET_TX_FLOW_OFF(cf_sk);
-			release_sock(&cf_sk->sk);
-			sock_put(&cf_sk->sk);
-			return -EIO;
-		}
+	if (!sk)
+		return 0;
 
-	}
-	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
-		SET_PENDING_OFF(cf_sk);
-		SET_REMOTE_SHUTDOWN_OFF(cf_sk);
-	}
+	set_tx_flow_off(cf_sk);
 
 	/*
-	 * Socket is no longer in state pending close,
-	 * and we can release the reference.
+	 * Ensure that packets are not queued after this point in time.
+	 * caif_queue_rcv_skb checks SOCK_DEAD holding the queue lock,
+	 * this ensures no packets when sock is dead.
 	 */
+	spin_lock(&sk->sk_receive_queue.lock);
+	sock_set_flag(sk, SOCK_DEAD);
+	spin_unlock(&sk->sk_receive_queue.lock);
+	sock->sk = NULL;
 
-	dbfs_atomic_inc(&cnt.num_close);
-	drain_queue(cf_sk);
-	SET_RX_FLOW_ON(cf_sk);
-	cf_sk->file_mode = 0;
-	sock_put(&cf_sk->sk);
-	release_sock(&cf_sk->sk);
-	if (!result && (sock->file->f_flags & O_NONBLOCK)) {
-		pr_debug("nonblocking shutdown returing -EAGAIN\n");
-		return -EAGAIN;
-	} else
-		return result;
-}
-
-static ssize_t caif_sock_no_sendpage(struct socket *sock,
-				     struct page *page,
-				     int offset, size_t size, int flags)
-{
-	return -EOPNOTSUPP;
-}
-
-/* This function is called as part of close. */
-static int caif_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = NULL;
-	int res;
-	caif_assert(sk != NULL);
-	cf_sk = container_of(sk, struct caifsock, sk);
+	dbfs_atomic_inc(&cnt.num_disconnect);
 
 	if (cf_sk->debugfs_socket_dir != NULL)
 		debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
 
-	res = caif_shutdown(sock, SHUT_RDWR);
-	if (res && res != -EINPROGRESS)
-		return res;
-
-	/*
-	 * FIXME: Shutdown should probably be possible to do async
-	 * without flushing queues, allowing reception of frames while
-	 * waiting for DEINIT_IND.
-	 * Release should always block, to allow secure decoupling of
-	 * CAIF stack.
-	 */
-	if (!(sock->file->f_flags & O_NONBLOCK)) {
-		res = wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-						!STATE_IS_PENDING(cf_sk));
-
-		if (res == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				"woken by a signal (1)", __func__);
-		}
-	}
 	lock_sock(&(cf_sk->sk));
+	sk->sk_state = CAIF_DISCONNECTED;
+	sk->sk_shutdown = SHUTDOWN_MASK;
 
-	sock->sk = NULL;
+	if (cf_sk->sk.sk_socket->state == SS_CONNECTED ||
+		cf_sk->sk.sk_socket->state == SS_CONNECTING)
+		res = caif_disconnect_client(&cf_sk->layer);
 
-	/* Detach the socket from its process context by making it orphan. */
-	sock_orphan(sk);
+	cf_sk->sk.sk_socket->state = SS_DISCONNECTING;
+	wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP);
 
-	/*
-	 * Setting SHUTDOWN_MASK means that both send and receive are shutdown
-	 * for the socket.
-	 */
-	sk->sk_shutdown = SHUTDOWN_MASK;
+	sock_orphan(sk);
+	cf_sk->layer.dn = NULL;
+	sk_stream_kill_queues(&cf_sk->sk);
+	release_sock(sk);
+	sock_put(sk);
+	return res;
+}
 
-	/*
-	 * Set the socket state to closed, the TCP_CLOSE macro is used when
-	 * closing any socket.
-	 */
+/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
+static unsigned int caif_poll(struct file *file,
+				struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
-	/* Flush out this sockets receive queue. */
-	drain_queue(cf_sk);
+	sock_poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
 
-	/* Finally release the socket. */
-	SET_STATE_PENDING_DESTROY(cf_sk);
+	/* exceptional events? */
+	if (sk->sk_err)
+		mask |= POLLERR;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP;
 
-	release_sock(&cf_sk->sk);
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+		(sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
 
-	sock_put(sk);
+	/* Connection-based need to check for termination and startup */
+	if (sk->sk_state == CAIF_DISCONNECTED)
+		mask |= POLLHUP;
 
 	/*
-	 * The rest of the cleanup will be handled from the
-	 * caif_sock_destructor
+	 * we set writable also when the other side has shut down the
+	 * connection. This prevents stuck sockets.
 	 */
-	return res;
+	if (sock_writeable(sk) && tx_flow_is_on(cf_sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	return mask;
 }
 
-static const struct proto_ops caif_ops = {
+static const struct proto_ops caif_seqpacket_ops = {
+	.family = PF_CAIF,
+	.owner = THIS_MODULE,
+	.release = caif_release,
+	.bind = sock_no_bind,
+	.connect = caif_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = sock_no_getname,
+	.poll = caif_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = sock_no_shutdown,
+	.setsockopt = setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = caif_seqpkt_sendmsg,
+	.recvmsg = caif_seqpkt_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static const struct proto_ops caif_stream_ops = {
 	.family = PF_CAIF,
 	.owner = THIS_MODULE,
 	.release = caif_release,
@@ -1162,73 +1066,62 @@ static const struct proto_ops caif_ops = {
 	.poll = caif_poll,
 	.ioctl = sock_no_ioctl,
 	.listen = sock_no_listen,
-	.shutdown = caif_shutdown,
+	.shutdown = sock_no_shutdown,
 	.setsockopt = setsockopt,
 	.getsockopt = sock_no_getsockopt,
-	.sendmsg = caif_sendmsg,
-	.recvmsg = caif_recvmsg,
+	.sendmsg = caif_stream_sendmsg,
+	.recvmsg = caif_stream_recvmsg,
 	.mmap = sock_no_mmap,
-	.sendpage = caif_sock_no_sendpage,
+	.sendpage = sock_no_sendpage,
 };
 
 /* This function is called when a socket is finally destroyed. */
 static void caif_sock_destructor(struct sock *sk)
 {
-	struct caifsock *cf_sk = NULL;
-	cf_sk = container_of(sk, struct caifsock, sk);
-	/* Error checks. */
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 	caif_assert(!atomic_read(&sk->sk_wmem_alloc));
 	caif_assert(sk_unhashed(sk));
 	caif_assert(!sk->sk_socket);
 	if (!sock_flag(sk, SOCK_DEAD)) {
-		pr_debug("CAIF: %s(): 0x%p", __func__, sk);
+		pr_info("Attempt to release alive CAIF socket: %p\n", sk);
 		return;
 	}
-
-	if (STATE_IS_OPEN(cf_sk)) {
-		pr_debug("CAIF: %s(): socket is opened (cf_sk=%p)"
-			 " file_mode = 0x%x\n", __func__,
-			 cf_sk, cf_sk->file_mode);
-		return;
-	}
-	drain_queue(cf_sk);
-	kfree(cf_sk->pktq);
-
-	trace_printk("CAIF: %s(): caif_sock_destructor: Removing socket %s\n",
-		__func__, cf_sk->name);
-	atomic_dec(&caif_nr_socks);
+	sk_stream_kill_queues(&cf_sk->sk);
+	dbfs_atomic_dec(&cnt.caif_nr_socks);
 }
 
 static int caif_create(struct net *net, struct socket *sock, int protocol,
-		       int kern)
+			int kern)
 {
 	struct sock *sk = NULL;
 	struct caifsock *cf_sk = NULL;
-	int result = 0;
 	static struct proto prot = {.name = "PF_CAIF",
 		.owner = THIS_MODULE,
 		.obj_size = sizeof(struct caifsock),
 	};
 
+	if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN))
+		return -EPERM;
 	/*
 	 * The sock->type specifies the socket type to use.
-	 * in SEQPACKET mode packet boundaries are enforced.
+	 * The CAIF socket is a packet stream in the sense
+	 * that it is packet based. CAIF trusts the reliability
+	 * of the link, no resending is implemented.
 	 */
-	if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+	if (sock->type == SOCK_SEQPACKET)
+		sock->ops = &caif_seqpacket_ops;
+	else if (sock->type == SOCK_STREAM)
+		sock->ops = &caif_stream_ops;
+	else
 		return -ESOCKTNOSUPPORT;
 
-	if (net != &init_net)
-		return -EAFNOSUPPORT;
-
 	if (protocol < 0 || protocol >= CAIFPROTO_MAX)
 		return -EPROTONOSUPPORT;
 	/*
-	 * Set the socket state to unconnected.	 The socket state is really
-	 * not used at all in the net/core or socket.c but the
+	 * Set the socket state to unconnected.	 The socket state
+	 * is really not used at all in the net/core or socket.c but the
 	 * initialization makes sure that sock->state is not uninitialized.
 	 */
-	sock->state = SS_UNCONNECTED;
-
 	sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot);
 	if (!sk)
 		return -ENOMEM;
@@ -1238,11 +1131,9 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
 	/* Store the protocol */
 	sk->sk_protocol = (unsigned char) protocol;
 
-	spin_lock_init(&cf_sk->read_queue_len_lock);
-
-	/* Fill in some information concerning the misc socket. */
-	snprintf(cf_sk->name, sizeof(cf_sk->name), "cf_sk%d",
-		atomic_read(&caif_nr_socks));
+	/* Sendbuf dictates the amount of outbound packets not yet sent */
+	sk->sk_sndbuf = CAIF_DEF_SNDBUF;
+	sk->sk_rcvbuf = CAIF_DEF_RCVBUF;
 
 	/*
 	 * Lock in order to try to stop someone from opening the socket
@@ -1252,108 +1143,85 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
 
 	/* Initialize the nozero default sock structure data. */
 	sock_init_data(sock, sk);
-	sock->ops = &caif_ops;
 	sk->sk_destruct = caif_sock_destructor;
-	sk->sk_sndbuf = caif_sockbuf_size;
-	sk->sk_rcvbuf = caif_sockbuf_size;
 
-	cf_sk->pktq = cfpktq_create();
+	mutex_init(&cf_sk->readlock); /* single task reading lock */
+	cf_sk->layer.ctrlcmd = caif_ctrl_cb;
+	cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
+	cf_sk->sk.sk_state = CAIF_DISCONNECTED;
 
-	if (!cf_sk->pktq) {
-		pr_err("CAIF: %s(): queue create failed.\n", __func__);
-		result = -ENOMEM;
-		release_sock(&cf_sk->sk);
-		goto err_failed;
-	}
-	cf_sk->layer.ctrlcmd = caif_sktflowctrl_cb;
-	SET_STATE_CLOSED(cf_sk);
-	SET_PENDING_OFF(cf_sk);
-	SET_TX_FLOW_OFF(cf_sk);
-	SET_RX_FLOW_ON(cf_sk);
+	set_tx_flow_off(cf_sk);
+	set_rx_flow_on(cf_sk);
 
 	/* Set default options on configuration */
 	cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
-	cf_sk->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
+	cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
 	cf_sk->conn_req.protocol = protocol;
 	/* Increase the number of sockets created. */
-	atomic_inc(&caif_nr_socks);
+	dbfs_atomic_inc(&cnt.caif_nr_socks);
+#ifdef CONFIG_DEBUG_FS
 	if (!IS_ERR(debugfsdir)) {
+		/* Fill in some information concerning the misc socket. */
+		snprintf(cf_sk->name, sizeof(cf_sk->name), "cfsk%d",
+				atomic_read(&cnt.caif_nr_socks));
+
 		cf_sk->debugfs_socket_dir =
 			debugfs_create_dir(cf_sk->name, debugfsdir);
-		debugfs_create_u32("conn_state", S_IRUSR | S_IWUSR,
-				cf_sk->debugfs_socket_dir, &cf_sk->conn_state);
+		debugfs_create_u32("sk_state", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir,
+				(u32 *) &cf_sk->sk.sk_state);
 		debugfs_create_u32("flow_state", S_IRUSR | S_IWUSR,
 				cf_sk->debugfs_socket_dir, &cf_sk->flow_state);
-		debugfs_create_u32("read_queue_len", S_IRUSR | S_IWUSR,
+		debugfs_create_u32("sk_rmem_alloc", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir,
+				(u32 *) &cf_sk->sk.sk_rmem_alloc);
+		debugfs_create_u32("sk_wmem_alloc", S_IRUSR | S_IWUSR,
 				cf_sk->debugfs_socket_dir,
-				(u32 *) &cf_sk->read_queue_len);
+				(u32 *) &cf_sk->sk.sk_wmem_alloc);
 		debugfs_create_u32("identity", S_IRUSR | S_IWUSR,
 				cf_sk->debugfs_socket_dir,
 				(u32 *) &cf_sk->layer.id);
 	}
+#endif
 	release_sock(&cf_sk->sk);
 	return 0;
-err_failed:
-	sk_free(sk);
-	return result;
 }
 
+
 static struct net_proto_family caif_family_ops = {
 	.family = PF_CAIF,
 	.create = caif_create,
 	.owner = THIS_MODULE,
 };
 
-static int af_caif_init(void)
+int af_caif_init(void)
 {
-	int err;
-	err = sock_register(&caif_family_ops);
-
+	int err = sock_register(&caif_family_ops);
 	if (!err)
 		return err;
-
 	return 0;
 }
 
 static int __init caif_sktinit_module(void)
 {
-	int stat;
 #ifdef CONFIG_DEBUG_FS
-	debugfsdir = debugfs_create_dir("chnl_skt", NULL);
+	debugfsdir = debugfs_create_dir("caif_sk", NULL);
 	if (!IS_ERR(debugfsdir)) {
-		debugfs_create_u32("skb_inuse", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.skb_in_use);
-		debugfs_create_u32("skb_alloc", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.skb_alloc);
-		debugfs_create_u32("skb_free", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.skb_free);
 		debugfs_create_u32("num_sockets", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &caif_nr_socks);
-		debugfs_create_u32("num_open", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.caif_nr_socks);
+		debugfs_create_u32("num_connect_req", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_open);
-		debugfs_create_u32("num_close", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.num_connect_req);
+		debugfs_create_u32("num_connect_resp", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_close);
-		debugfs_create_u32("num_init", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.num_connect_resp);
+		debugfs_create_u32("num_connect_fail_resp", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_init);
-		debugfs_create_u32("num_init_resp", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.num_connect_fail_resp);
+		debugfs_create_u32("num_disconnect", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_init_resp);
-		debugfs_create_u32("num_init_fail_resp", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.num_init_fail_resp);
-		debugfs_create_u32("num_deinit", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.num_deinit);
-		debugfs_create_u32("num_deinit_resp", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.num_deinit_resp);
+				(u32 *) &cnt.num_disconnect);
 		debugfs_create_u32("num_remote_shutdown_ind",
 				S_IRUSR | S_IWUSR, debugfsdir,
 				(u32 *) &cnt.num_remote_shutdown_ind);
@@ -1371,13 +1239,7 @@ static int __init caif_sktinit_module(void)
 				(u32 *) &cnt.num_rx_flow_on);
 	}
 #endif
-	stat = af_caif_init();
-	if (stat) {
-		pr_err("CAIF: %s(): Failed to initialize CAIF socket layer.",
-		       __func__);
-		return stat;
-	}
-	return 0;
+	return af_caif_init();
 }
 
 static void __exit caif_sktexit_module(void)
@@ -1386,6 +1248,5 @@ static void __exit caif_sktexit_module(void)
 	if (debugfsdir != NULL)
 		debugfs_remove_recursive(debugfsdir);
 }
-
 module_init(caif_sktinit_module);
 module_exit(caif_sktexit_module);
-- 
cgit v1.2.3-59-g8ed1b


From 8391c4aab1aa4f47a9dab2c1ec3ebd2cbf09df1b Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:39 +0000
Subject: caif: Bugfixes in CAIF netdevice for close and flow control

Changes:
o Bugfix: Flow control was causing the device to be destroyed.
o Bugfix: Handle CAIF channel connect failures.
o If the underlying link layer is gone the net-device is no longer removed,
  but closed.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/chnl_net.c | 130 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 73 insertions(+), 57 deletions(-)

(limited to 'net')

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index f622ff1d39ba..610966abe2dc 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -22,10 +22,10 @@
 #include <net/caif/cfpkt.h>
 #include <net/caif/caif_dev.h>
 
-#define CAIF_CONNECT_TIMEOUT 30
+/* GPRS PDP connection has MTU to 1500 */
 #define SIZE_MTU 1500
-#define SIZE_MTU_MAX 4080
-#define SIZE_MTU_MIN 68
+/* 5 sec. connect timeout */
+#define CONNECT_TIMEOUT (5 * HZ)
 #define CAIF_NET_DEFAULT_QUEUE_LEN 500
 
 #undef pr_debug
@@ -37,6 +37,13 @@ static LIST_HEAD(chnl_net_list);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_RTNL_LINK("caif");
 
+enum caif_states {
+	CAIF_CONNECTED		= 1,
+	CAIF_CONNECTING,
+	CAIF_DISCONNECTED,
+	CAIF_SHUTDOWN
+};
+
 struct chnl_net {
 	struct cflayer chnl;
 	struct net_device_stats stats;
@@ -47,7 +54,7 @@ struct chnl_net {
 	wait_queue_head_t netmgmt_wq;
 	/* Flow status to remember and control the transmission. */
 	bool flowenabled;
-	bool pending_close;
+	enum caif_states state;
 };
 
 static void robust_list_del(struct list_head *delete_node)
@@ -58,15 +65,16 @@ static void robust_list_del(struct list_head *delete_node)
 	list_for_each_safe(list_node, n, &chnl_net_list) {
 		if (list_node == delete_node) {
 			list_del(list_node);
-			break;
+			return;
 		}
 	}
+	WARN_ON(1);
 }
 
 static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
 {
 	struct sk_buff *skb;
-	struct chnl_net *priv  = NULL;
+	struct chnl_net *priv  = container_of(layr, struct chnl_net, chnl);
 	int pktlen;
 	int err = 0;
 
@@ -91,7 +99,6 @@ static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
 	else
 		skb->ip_summed = CHECKSUM_NONE;
 
-	/* FIXME: Drivers should call this in tasklet context. */
 	if (in_interrupt())
 		netif_rx(skb);
 	else
@@ -117,23 +124,25 @@ static void close_work(struct work_struct *work)
 	struct chnl_net *dev = NULL;
 	struct list_head *list_node;
 	struct list_head *_tmp;
-	rtnl_lock();
+	/* May be called with or without RTNL lock held */
+	int islocked = rtnl_is_locked();
+	if (!islocked)
+		rtnl_lock();
 	list_for_each_safe(list_node, _tmp, &chnl_net_list) {
 		dev = list_entry(list_node, struct chnl_net, list_field);
-		if (!dev->pending_close)
-			continue;
-		list_del(list_node);
-		delete_device(dev);
+		if (dev->state == CAIF_SHUTDOWN)
+			dev_close(dev->netdev);
 	}
-	rtnl_unlock();
+	if (!islocked)
+		rtnl_unlock();
 }
 static DECLARE_WORK(close_worker, close_work);
 
 static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
 				int phyid)
 {
-	struct chnl_net *priv;
-	pr_debug("CAIF: %s(): NET flowctrl func called flow: %s.\n",
+	struct chnl_net *priv = container_of(layr, struct chnl_net, chnl);
+	pr_debug("CAIF: %s(): NET flowctrl func called flow: %s\n",
 		__func__,
 		flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
 		flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" :
@@ -143,21 +152,31 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
 		flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ?
 		 "REMOTE_SHUTDOWN" : "UKNOWN CTRL COMMAND");
 
-	priv = container_of(layr, struct chnl_net, chnl);
+
 
 	switch (flow) {
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		priv->flowenabled = false;
+		netif_stop_queue(priv->netdev);
+		break;
 	case CAIF_CTRLCMD_DEINIT_RSP:
+		priv->state = CAIF_DISCONNECTED;
+		break;
 	case CAIF_CTRLCMD_INIT_FAIL_RSP:
+		priv->state = CAIF_DISCONNECTED;
+		wake_up_interruptible(&priv->netmgmt_wq);
+		break;
 	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
-		priv->flowenabled = false;
+		priv->state = CAIF_SHUTDOWN;
 		netif_tx_disable(priv->netdev);
-		pr_warning("CAIF: %s(): done\n", __func__);
-		priv->pending_close = 1;
 		schedule_work(&close_worker);
 		break;
 	case CAIF_CTRLCMD_FLOW_ON_IND:
+		priv->flowenabled = true;
+		netif_wake_queue(priv->netdev);
+		break;
 	case CAIF_CTRLCMD_INIT_RSP:
+		priv->state = CAIF_CONNECTED;
 		priv->flowenabled = true;
 		netif_wake_queue(priv->netdev);
 		wake_up_interruptible(&priv->netmgmt_wq);
@@ -194,9 +213,6 @@ static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb);
 
-	pr_debug("CAIF: %s(): transmit inst %s %d,%p\n",
-		__func__, dev->name, priv->chnl.dn->id, &priv->chnl.dn);
-
 	/* Send the packet down the stack. */
 	result = priv->chnl.dn->transmit(priv->chnl.dn, pkt);
 	if (result) {
@@ -217,61 +233,59 @@ static int chnl_net_open(struct net_device *dev)
 	struct chnl_net *priv = NULL;
 	int result = -1;
 	ASSERT_RTNL();
-
 	priv = netdev_priv(dev);
-	pr_debug("CAIF: %s(): dev name: %s\n", __func__, priv->name);
-
 	if (!priv) {
 		pr_debug("CAIF: %s(): chnl_net_open: no priv\n", __func__);
 		return -ENODEV;
 	}
-	result = caif_connect_client(&priv->conn_req, &priv->chnl);
-	if (result != 0) {
-		pr_debug("CAIF: %s(): err: "
-			 "Unable to register and open device, Err:%d\n",
-			__func__,
-			result);
-		return -ENODEV;
+
+	if (priv->state != CAIF_CONNECTING) {
+		priv->state = CAIF_CONNECTING;
+		result = caif_connect_client(&priv->conn_req, &priv->chnl);
+		if (result != 0) {
+				priv->state = CAIF_DISCONNECTED;
+				pr_debug("CAIF: %s(): err: "
+					"Unable to register and open device,"
+					" Err:%d\n",
+					__func__,
+					result);
+				return result;
+		}
 	}
-	result = wait_event_interruptible(priv->netmgmt_wq, priv->flowenabled);
+
+	result = wait_event_interruptible_timeout(priv->netmgmt_wq,
+						priv->state != CAIF_CONNECTING,
+						CONNECT_TIMEOUT);
 
 	if (result == -ERESTARTSYS) {
 		pr_debug("CAIF: %s(): wait_event_interruptible"
 			 " woken by a signal\n", __func__);
 		return -ERESTARTSYS;
-	} else
-		pr_debug("CAIF: %s(): Flow on recieved\n", __func__);
+	}
+	if (result == 0) {
+		pr_debug("CAIF: %s(): connect timeout\n", __func__);
+		caif_disconnect_client(&priv->chnl);
+		priv->state = CAIF_DISCONNECTED;
+		pr_debug("CAIF: %s(): state disconnected\n", __func__);
+		return -ETIMEDOUT;
+	}
 
+	if (priv->state != CAIF_CONNECTED) {
+		pr_debug("CAIF: %s(): connect failed\n", __func__);
+		return -ECONNREFUSED;
+	}
+	pr_debug("CAIF: %s(): CAIF Netdevice connected\n", __func__);
 	return 0;
 }
 
 static int chnl_net_stop(struct net_device *dev)
 {
 	struct chnl_net *priv;
-	int result = -1;
+
 	ASSERT_RTNL();
 	priv = netdev_priv(dev);
-
-	result = caif_disconnect_client(&priv->chnl);
-	if (result != 0) {
-		pr_debug("CAIF: %s(): chnl_net_stop: err: "
-			 "Unable to STOP device, Err:%d\n",
-			 __func__, result);
-		return -EBUSY;
-	}
-	result = wait_event_interruptible(priv->netmgmt_wq,
-					  !priv->flowenabled);
-
-	if (result == -ERESTARTSYS) {
-		pr_debug("CAIF: %s(): wait_event_interruptible woken by"
-			 " signal, signal_pending(current) = %d\n",
-			 __func__,
-			 signal_pending(current));
-	} else {
-		pr_debug("CAIF: %s(): disconnect received\n", __func__);
-
-	}
-
+	priv->state = CAIF_DISCONNECTED;
+	caif_disconnect_client(&priv->chnl);
 	return 0;
 }
 
@@ -377,6 +391,8 @@ static int ipcaif_newlink(struct net *src_net, struct net_device *dev,
 	ASSERT_RTNL();
 	caifdev = netdev_priv(dev);
 	caif_netlink_parms(data, &caifdev->conn_req);
+	dev_net_set(caifdev->netdev, src_net);
+
 	ret = register_netdevice(dev);
 	if (ret)
 		pr_warning("CAIF: %s(): device rtml registration failed\n",
-- 
cgit v1.2.3-59-g8ed1b


From 2c485209a5c73fe94f4074e381cdc1ee24ca94a8 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:40 +0000
Subject: Bugfix: Link selection was swapped in switch.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 0145bae0274f..024fd5bb2d39 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -247,10 +247,10 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 
 		switch (caifdev->link_select) {
 		case CAIF_LINK_HIGH_BANDW:
-			pref = CFPHYPREF_LOW_LAT;
+			pref = CFPHYPREF_HIGH_BW;
 			break;
 		case CAIF_LINK_LOW_LATENCY:
-			pref = CFPHYPREF_HIGH_BW;
+			pref = CFPHYPREF_LOW_LAT;
 			break;
 		default:
 			pref = CFPHYPREF_HIGH_BW;
-- 
cgit v1.2.3-59-g8ed1b


From f0b058b61711ebf5be94d6865ca7b2c259b71d37 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 28 Apr 2010 15:17:03 +0200
Subject: mac80211: do not wip out old supported rates

Use old supported rates, if AP do not provide supported rates
information element in a new managment frame.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/scan.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 2b1f1f3d6a58..a9d40584e383 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -84,7 +84,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 {
 	struct cfg80211_bss *cbss;
 	struct ieee80211_bss *bss;
-	int clen;
+	int clen, srlen;
 	s32 signal = 0;
 
 	if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
@@ -113,23 +113,24 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 		bss->dtim_period = tim_ie->dtim_period;
 	}
 
-	bss->supp_rates_len = 0;
+	/* replace old supported rates if we get new values */
+	srlen = 0;
 	if (elems->supp_rates) {
-		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
+		clen = IEEE80211_MAX_SUPP_RATES;
 		if (clen > elems->supp_rates_len)
 			clen = elems->supp_rates_len;
-		memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates,
-		       clen);
-		bss->supp_rates_len += clen;
+		memcpy(bss->supp_rates, elems->supp_rates, clen);
+		srlen += clen;
 	}
 	if (elems->ext_supp_rates) {
-		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
+		clen = IEEE80211_MAX_SUPP_RATES - srlen;
 		if (clen > elems->ext_supp_rates_len)
 			clen = elems->ext_supp_rates_len;
-		memcpy(&bss->supp_rates[bss->supp_rates_len],
-		       elems->ext_supp_rates, clen);
-		bss->supp_rates_len += clen;
+		memcpy(bss->supp_rates + srlen, elems->ext_supp_rates, clen);
+		srlen += clen;
 	}
+	if (srlen)
+		bss->supp_rates_len = srlen;
 
 	bss->wmm_used = elems->wmm_param || elems->wmm_info;
 	bss->uapsd_supported = is_uapsd_supported(elems);
-- 
cgit v1.2.3-59-g8ed1b


From 76f273640134f3eb8257179cd5b3bc6ba5fe4a96 Mon Sep 17 00:00:00 2001
From: Stanislaw Gruszka <sgruszka@redhat.com>
Date: Wed, 28 Apr 2010 17:03:15 +0200
Subject: mac80211: fix supported rates IE if AP doesn't give us it's rates

If AP do not provide us supported rates before assiociation, send
all rates we are supporting instead of empty information element.

v1 -> v2: Add comment.

Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 7bd8670379de..4c7de72c27e7 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -212,15 +212,25 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 
 	sband = local->hw.wiphy->bands[wk->chan->band];
 
-	/*
-	 * Get all rates supported by the device and the AP as
-	 * some APs don't like getting a superset of their rates
-	 * in the association request (e.g. D-Link DAP 1353 in
-	 * b-only mode)...
-	 */
-	rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
-					       wk->assoc.supp_rates_len,
-					       sband, &rates);
+	if (wk->assoc.supp_rates_len) {
+		/*
+		 * Get all rates supported by the device and the AP as
+		 * some APs don't like getting a superset of their rates
+		 * in the association request (e.g. D-Link DAP 1353 in
+		 * b-only mode)...
+		 */
+		rates_len = ieee80211_compatible_rates(wk->assoc.supp_rates,
+						       wk->assoc.supp_rates_len,
+						       sband, &rates);
+	} else {
+		/*
+		 * In case AP not provide any supported rates information
+		 * before association, we send information element(s) with
+		 * all rates that we support.
+		 */
+		rates = ~0;
+		rates_len = sband->n_bitrates;
+	}
 
 	skb = alloc_skb(local->hw.extra_tx_headroom +
 			sizeof(*mgmt) + /* bit too much but doesn't matter */
-- 
cgit v1.2.3-59-g8ed1b


From 8fc214ba958648ab111a173f2db7b0e1dfed5b11 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 28 Apr 2010 17:40:43 +0200
Subject: mac80211: notify driver about IBSS status

Some drivers (e.g. iwlwifi) need to know and try
to figure it out based on other things, but making
it explicit is definitely better.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 6 +++++-
 net/mac80211/ibss.c    | 6 +++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index a36e0df5a17c..2879c8ef5571 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -145,6 +145,7 @@ struct ieee80211_low_level_stats {
  * @BSS_CHANGED_BEACON_ENABLED: Beaconing should be
  *	enabled/disabled (beaconing modes)
  * @BSS_CHANGED_CQM: Connection quality monitor config changed
+ * @BSS_CHANGED_IBSS: IBSS join status changed
  */
 enum ieee80211_bss_change {
 	BSS_CHANGED_ASSOC		= 1<<0,
@@ -158,6 +159,7 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_BEACON		= 1<<8,
 	BSS_CHANGED_BEACON_ENABLED	= 1<<9,
 	BSS_CHANGED_CQM			= 1<<10,
+	BSS_CHANGED_IBSS		= 1<<11,
 };
 
 /**
@@ -167,6 +169,8 @@ enum ieee80211_bss_change {
  * to that BSS) that can change during the lifetime of the BSS.
  *
  * @assoc: association status
+ * @ibss_joined: indicates whether this station is part of an IBSS
+ *	or not
  * @aid: association ID number, valid only when @assoc is true
  * @use_cts_prot: use CTS protection
  * @use_short_preamble: use 802.11b short preamble;
@@ -194,7 +198,7 @@ enum ieee80211_bss_change {
 struct ieee80211_bss_conf {
 	const u8 *bssid;
 	/* association related data */
-	bool assoc;
+	bool assoc, ibss_joined;
 	u16 aid;
 	/* erp related data */
 	bool use_cts_prot;
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 01974c2510a8..a87e309e3b99 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -170,6 +170,8 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	bss_change |= BSS_CHANGED_BSSID;
 	bss_change |= BSS_CHANGED_BEACON;
 	bss_change |= BSS_CHANGED_BEACON_ENABLED;
+	bss_change |= BSS_CHANGED_IBSS;
+	sdata->vif.bss_conf.ibss_joined = true;
 	ieee80211_bss_info_change_notify(sdata, bss_change);
 
 	ieee80211_sta_def_wmm_params(sdata, sband->n_bitrates, supp_rates);
@@ -950,7 +952,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 	kfree(sdata->u.ibss.ie);
 	skb = sdata->u.ibss.presp;
 	rcu_assign_pointer(sdata->u.ibss.presp, NULL);
-	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
+	sdata->vif.bss_conf.ibss_joined = false;
+	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
+						BSS_CHANGED_IBSS);
 	synchronize_rcu();
 	kfree_skb(skb);
 
-- 
cgit v1.2.3-59-g8ed1b


From 4b0b72f7dd617b13abd1b04c947e15873e011a24 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Apr 2010 14:35:48 -0700
Subject: net: speedup udp receive path

Since commit 95766fff ([UDP]: Add memory accounting.),
each received packet needs one extra sock_lock()/sock_release() pair.

This added latency because of possible backlog handling. Then later,
ticket spinlocks added yet another latency source in case of DDOS.

This patch introduces lock_sock_bh() and unlock_sock_bh()
synchronization primitives, avoiding one atomic operation and backlog
processing.

skb_free_datagram_locked() uses them instead of full blown
lock_sock()/release_sock(). skb is orphaned inside locked section for
proper socket memory reclaim, and finally freed outside of it.

UDP receive path now take the socket spinlock only once.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h  | 10 ++++++++++
 net/core/datagram.c | 10 +++++++---
 net/ipv4/udp.c      | 12 ++++++------
 net/ipv6/udp.c      |  4 ++--
 4 files changed, 25 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index cf12b1e61fa6..d361c7769fe0 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1021,6 +1021,16 @@ extern void release_sock(struct sock *sk);
 				SINGLE_DEPTH_NESTING)
 #define bh_unlock_sock(__sk)	spin_unlock(&((__sk)->sk_lock.slock))
 
+static inline void lock_sock_bh(struct sock *sk)
+{
+	spin_lock_bh(&sk->sk_lock.slock);
+}
+
+static inline void unlock_sock_bh(struct sock *sk)
+{
+	spin_unlock_bh(&sk->sk_lock.slock);
+}
+
 extern struct sock		*sk_alloc(struct net *net, int family,
 					  gfp_t priority,
 					  struct proto *prot);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 5574a5ddf908..95b851f3d713 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -229,9 +229,13 @@ EXPORT_SYMBOL(skb_free_datagram);
 
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
 {
-	lock_sock(sk);
-	skb_free_datagram(sk, skb);
-	release_sock(sk);
+	lock_sock_bh(sk);
+	skb_orphan(skb);
+	sk_mem_reclaim_partial(sk);
+	unlock_sock_bh(sk);
+
+	/* skb is now orphaned, might be freed outside of locked section */
+	consume_skb(skb);
 }
 EXPORT_SYMBOL(skb_free_datagram_locked);
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 63eb56b2d873..1f86965ba7d7 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1062,10 +1062,10 @@ static unsigned int first_packet_length(struct sock *sk)
 	spin_unlock_bh(&rcvq->lock);
 
 	if (!skb_queue_empty(&list_kill)) {
-		lock_sock(sk);
+		lock_sock_bh(sk);
 		__skb_queue_purge(&list_kill);
 		sk_mem_reclaim_partial(sk);
-		release_sock(sk);
+		unlock_sock_bh(sk);
 	}
 	return res;
 }
@@ -1196,10 +1196,10 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock(sk);
+	lock_sock_bh(sk);
 	if (!skb_kill_datagram(sk, skb, flags))
 		UDP_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
-	release_sock(sk);
+	unlock_sock_bh(sk);
 
 	if (noblock)
 		return -EAGAIN;
@@ -1624,9 +1624,9 @@ int udp_rcv(struct sk_buff *skb)
 
 void udp_destroy_sock(struct sock *sk)
 {
-	lock_sock(sk);
+	lock_sock_bh(sk);
 	udp_flush_pending_frames(sk);
-	release_sock(sk);
+	unlock_sock_bh(sk);
 }
 
 /*
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3ead20ad9d07..91c60f0090a4 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -424,7 +424,7 @@ out:
 	return err;
 
 csum_copy_err:
-	lock_sock(sk);
+	lock_sock_bh(sk);
 	if (!skb_kill_datagram(sk, skb, flags)) {
 		if (is_udp4)
 			UDP_INC_STATS_USER(sock_net(sk),
@@ -433,7 +433,7 @@ csum_copy_err:
 			UDP6_INC_STATS_USER(sock_net(sk),
 					UDP_MIB_INERRORS, is_udplite);
 	}
-	release_sock(sk);
+	unlock_sock_bh(sk);
 
 	if (flags & MSG_DONTWAIT)
 		return -EAGAIN;
-- 
cgit v1.2.3-59-g8ed1b


From f84af32cbca70a3c6d30463dc08c7984af11c277 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Apr 2010 15:31:51 -0700
Subject: net: ip_queue_rcv_skb() helper

When queueing a skb to socket, we can immediately release its dst if
target socket do not use IP_CMSG_PKTINFO.

tcp_data_queue() can drop dst too.

This to benefit from a hot cache line and avoid the receiver, possibly
on another cpu, to dirty this cache line himself.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       |  1 +
 net/ipv4/ip_sockglue.c | 16 ++++++++++++++++
 net/ipv4/raw.c         |  2 +-
 net/ipv4/tcp_input.c   |  1 +
 net/ipv4/udp.c         |  2 +-
 net/ipv6/raw.c         |  2 +-
 net/ipv6/udp.c         |  2 +-
 7 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/ip.h b/include/net/ip.h
index a84ceb692687..8149b77cea9b 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -393,6 +393,7 @@ extern int ip_options_rcv_srr(struct sk_buff *skb);
  *	Functions provided by ip_sockglue.c
  */
 
+extern int	ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
 extern void	ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb);
 extern int	ip_cmsg_send(struct net *net,
 			     struct msghdr *msg, struct ipcm_cookie *ipc);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b0aa0546a3b3..ce231780a2b1 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -954,6 +954,22 @@ e_inval:
 	return -EINVAL;
 }
 
+/**
+ * ip_queue_rcv_skb - Queue an skb into sock receive queue
+ * @sk: socket
+ * @skb: buffer
+ *
+ * Queues an skb into socket receive queue. If IP_CMSG_PKTINFO option
+ * is not set, we drop skb dst entry now, while dst cache line is hot.
+ */
+int ip_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	if (!(inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO))
+		skb_dst_drop(skb);
+	return sock_queue_rcv_skb(sk, skb);
+}
+EXPORT_SYMBOL(ip_queue_rcv_skb);
+
 int ip_setsockopt(struct sock *sk, int level,
 		int optname, char __user *optval, unsigned int optlen)
 {
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index cc6f097fbd5f..52ef5af78a45 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -290,7 +290,7 @@ static int raw_rcv_skb(struct sock * sk, struct sk_buff * skb)
 {
 	/* Charge it to the socket. */
 
-	if (sock_queue_rcv_skb(sk, skb) < 0) {
+	if (ip_queue_rcv_skb(sk, skb) < 0) {
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ae3ec15fb630..e82162c211bf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4367,6 +4367,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 	if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
 		goto drop;
 
+	skb_dst_drop(skb);
 	__skb_pull(skb, th->doff * 4);
 
 	TCP_ECN_accept_cwr(tp, skb);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1f86965ba7d7..4560b291180b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1264,7 +1264,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	if (inet_sk(sk)->inet_daddr)
 		sock_rps_save_rxhash(sk, skb->rxhash);
 
-	rc = sock_queue_rcv_skb(sk, skb);
+	rc = ip_queue_rcv_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 85627386cb02..0e3d2dd92078 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -381,7 +381,7 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
 	}
 
 	/* Charge it to the socket. */
-	if (sock_queue_rcv_skb(sk, skb) < 0) {
+	if (ip_queue_rcv_skb(sk, skb) < 0) {
 		kfree_skb(skb);
 		return NET_RX_DROP;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 91c60f0090a4..79359c8380bc 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -514,7 +514,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
 			goto drop;
 	}
 
-	if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) {
+	if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) {
 		/* Note that an ENOMEM error is charged twice */
 		if (rc == -ENOMEM)
 			UDP6_INC_STATS_BH(sock_net(sk),
-- 
cgit v1.2.3-59-g8ed1b


From 49b5c7f473f1bbcb30275dcaee2c06dfb8ec2279 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 29 Apr 2010 21:34:01 +0200
Subject: mac80211: tell driver about IBSS merge

My previous patch "mac80211: notify driver about
IBSS status" left a problem -- when we merge with
a new BSSID, we never tell the driver that we left
the old one. Fix that.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index a87e309e3b99..c585fced8584 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -91,6 +91,12 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	if (memcmp(ifibss->bssid, bssid, ETH_ALEN))
 		sta_info_flush(sdata->local, sdata);
 
+	/* if merging, indicate to driver that we leave the old IBSS */
+	if (sdata->vif.bss_conf.ibss_joined) {
+		sdata->vif.bss_conf.ibss_joined = false;
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IBSS);
+	}
+
 	memcpy(ifibss->bssid, bssid, ETH_ALEN);
 
 	sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
-- 
cgit v1.2.3-59-g8ed1b


From f5c044e53a6b319776c7140b22fee9be3bc1f758 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Fri, 30 Apr 2010 15:37:00 -0400
Subject: mac80211: remove deprecated noise field from ieee80211_rx_status

Also remove associated IEEE80211_HW_NOISE_DBM from ieee80211_hw_flags.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ar9170/main.c      | 3 +--
 drivers/net/wireless/ath/ath5k/base.c       | 3 +--
 drivers/net/wireless/b43/main.c             | 3 +--
 drivers/net/wireless/b43legacy/main.c       | 3 +--
 drivers/net/wireless/iwlwifi/iwl-agn.c      | 1 -
 drivers/net/wireless/iwlwifi/iwl3945-base.c | 1 -
 drivers/net/wireless/mwl8k.c                | 4 ++--
 drivers/net/wireless/p54/main.c             | 3 +--
 drivers/net/wireless/wl12xx/wl1251_main.c   | 1 -
 drivers/net/wireless/wl12xx/wl1271_main.c   | 1 -
 include/net/mac80211.h                      | 8 +-------
 net/mac80211/rx.c                           | 2 --
 12 files changed, 8 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index cfc6a35a2c6a..dfcc055a827f 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -2550,8 +2550,7 @@ void *ar9170_alloc(size_t priv_size)
 					 BIT(NL80211_IFTYPE_ADHOC);
 	ar->hw->flags |= IEEE80211_HW_RX_INCLUDES_FCS |
 			 IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
-			 IEEE80211_HW_SIGNAL_DBM |
-			 IEEE80211_HW_NOISE_DBM;
+			 IEEE80211_HW_SIGNAL_DBM;
 
 	if (modparam_ht) {
 		ar->hw->flags |= IEEE80211_HW_AMPDU_AGGREGATION;
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index 1f3e5b0986c7..feb7b9ea80ab 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -547,8 +547,7 @@ ath5k_pci_probe(struct pci_dev *pdev,
 	SET_IEEE80211_DEV(hw, &pdev->dev);
 	hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
 		    IEEE80211_HW_HOST_BROADCAST_PS_BUFFERING |
-		    IEEE80211_HW_SIGNAL_DBM |
-		    IEEE80211_HW_NOISE_DBM;
+		    IEEE80211_HW_SIGNAL_DBM;
 
 	hw->wiphy->interface_modes =
 		BIT(NL80211_IFTYPE_AP) |
diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index 14cf3bd7ea51..e0575591828c 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -4904,8 +4904,7 @@ static int b43_wireless_init(struct ssb_device *dev)
 
 	/* fill hw info */
 	hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
-		    IEEE80211_HW_SIGNAL_DBM |
-		    IEEE80211_HW_NOISE_DBM;
+		    IEEE80211_HW_SIGNAL_DBM;
 
 	hw->wiphy->interface_modes =
 		BIT(NL80211_IFTYPE_AP) |
diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c
index 1d070be5a678..b2df432d3c63 100644
--- a/drivers/net/wireless/b43legacy/main.c
+++ b/drivers/net/wireless/b43legacy/main.c
@@ -3768,8 +3768,7 @@ static int b43legacy_wireless_init(struct ssb_device *dev)
 
 	/* fill hw info */
 	hw->flags = IEEE80211_HW_RX_INCLUDES_FCS |
-		    IEEE80211_HW_SIGNAL_DBM |
-		    IEEE80211_HW_NOISE_DBM;
+		    IEEE80211_HW_SIGNAL_DBM;
 	hw->wiphy->interface_modes =
 		BIT(NL80211_IFTYPE_AP) |
 		BIT(NL80211_IFTYPE_STATION) |
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index c22d3d8c440c..0cd338631592 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -2653,7 +2653,6 @@ static int iwl_mac_setup_register(struct iwl_priv *priv)
 
 	/* Tell mac80211 our characteristics */
 	hw->flags = IEEE80211_HW_SIGNAL_DBM |
-		    IEEE80211_HW_NOISE_DBM |
 		    IEEE80211_HW_AMPDU_AGGREGATION |
 		    IEEE80211_HW_SPECTRUM_MGMT;
 
diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c
index e7263ed693c4..c9da39d7c414 100644
--- a/drivers/net/wireless/iwlwifi/iwl3945-base.c
+++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c
@@ -3872,7 +3872,6 @@ static int iwl3945_setup_mac(struct iwl_priv *priv)
 
 	/* Tell mac80211 our characteristics */
 	hw->flags = IEEE80211_HW_SIGNAL_DBM |
-		    IEEE80211_HW_NOISE_DBM |
 		    IEEE80211_HW_SPECTRUM_MGMT;
 
 	if (!priv->cfg->broken_powersave)
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index 9af6c9454605..a90bb6d2e26b 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -3981,8 +3981,8 @@ static int __devinit mwl8k_probe(struct pci_dev *pdev,
 
 	hw->queues = MWL8K_TX_QUEUES;
 
-	/* Set rssi and noise values to dBm */
-	hw->flags |= IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_NOISE_DBM;
+	/* Set rssi values to dBm */
+	hw->flags |= IEEE80211_HW_SIGNAL_DBM;
 	hw->vif_data_size = sizeof(struct mwl8k_vif);
 	hw->sta_data_size = sizeof(struct mwl8k_sta);
 
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 36f4c820ad01..10a4b16f31ce 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -545,8 +545,7 @@ struct ieee80211_hw *p54_init_common(size_t priv_data_len)
 		     IEEE80211_HW_SUPPORTS_PS |
 		     IEEE80211_HW_PS_NULLFUNC_STACK |
 		     IEEE80211_HW_BEACON_FILTER |
-		     IEEE80211_HW_REPORTS_TX_ACK_STATUS |
-		     IEEE80211_HW_NOISE_DBM;
+		     IEEE80211_HW_REPORTS_TX_ACK_STATUS;
 
 	dev->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) |
 				      BIT(NL80211_IFTYPE_ADHOC) |
diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c
index 4d3be80c220e..b70621f63cda 100644
--- a/drivers/net/wireless/wl12xx/wl1251_main.c
+++ b/drivers/net/wireless/wl12xx/wl1251_main.c
@@ -1291,7 +1291,6 @@ int wl1251_init_ieee80211(struct wl1251 *wl)
 	wl->hw->channel_change_time = 10000;
 
 	wl->hw->flags = IEEE80211_HW_SIGNAL_DBM |
-		IEEE80211_HW_NOISE_DBM |
 		IEEE80211_HW_SUPPORTS_PS |
 		IEEE80211_HW_BEACON_FILTER |
 		IEEE80211_HW_SUPPORTS_UAPSD;
diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c
index a794d5ed8c60..b0837255de6f 100644
--- a/drivers/net/wireless/wl12xx/wl1271_main.c
+++ b/drivers/net/wireless/wl12xx/wl1271_main.c
@@ -2275,7 +2275,6 @@ int wl1271_init_ieee80211(struct wl1271 *wl)
 	wl->hw->max_listen_interval = wl->conf.conn.max_listen_interval;
 
 	wl->hw->flags = IEEE80211_HW_SIGNAL_DBM |
-		IEEE80211_HW_NOISE_DBM |
 		IEEE80211_HW_BEACON_FILTER |
 		IEEE80211_HW_SUPPORTS_PS |
 		IEEE80211_HW_SUPPORTS_UAPSD |
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2879c8ef5571..00502b1a9e38 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -560,7 +560,6 @@ enum mac80211_rx_flags {
  * @signal: signal strength when receiving this frame, either in dBm, in dB or
  *	unspecified depending on the hardware capabilities flags
  *	@IEEE80211_HW_SIGNAL_*
- * @noise: noise when receiving this frame, in dBm (DEPRECATED).
  * @antenna: antenna used
  * @rate_idx: index of data rate into band's supported rates or MCS index if
  *	HT rates are use (RX_FLAG_HT)
@@ -571,7 +570,6 @@ struct ieee80211_rx_status {
 	enum ieee80211_band band;
 	int freq;
 	int signal;
-	int noise __deprecated;
 	int antenna;
 	int rate_idx;
 	int flag;
@@ -934,10 +932,6 @@ enum ieee80211_tkip_key_type {
  *	one milliwatt. This is the preferred method since it is standardized
  *	between different devices. @max_signal does not need to be set.
  *
- * @IEEE80211_HW_NOISE_DBM:
- *	Hardware can provide noise (radio interference) values in units dBm,
- *      decibel difference from one milliwatt.
- *
  * @IEEE80211_HW_SPECTRUM_MGMT:
  * 	Hardware supports spectrum management defined in 802.11h
  * 	Measurement, Channel Switch, Quieting, TPC
@@ -1001,7 +995,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_2GHZ_SHORT_PREAMBLE_INCAPABLE	= 1<<4,
 	IEEE80211_HW_SIGNAL_UNSPEC			= 1<<5,
 	IEEE80211_HW_SIGNAL_DBM				= 1<<6,
-	IEEE80211_HW_NOISE_DBM				= 1<<7,
+	/* use this hole */
 	IEEE80211_HW_SPECTRUM_MGMT			= 1<<8,
 	IEEE80211_HW_AMPDU_AGGREGATION			= 1<<9,
 	IEEE80211_HW_SUPPORTS_PS			= 1<<10,
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8ee7db193269..e4f325f68fd3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -80,8 +80,6 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local,
 		len += 8;
 	if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM)
 		len += 1;
-	if (local->hw.flags & IEEE80211_HW_NOISE_DBM)
-		len += 1;
 
 	if (len & 1) /* padding for RX_FLAGS if necessary */
 		len++;
-- 
cgit v1.2.3-59-g8ed1b


From 767dd03369ac18af58efdef0383d6eb986eab426 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 28 Apr 2010 19:14:43 +0000
Subject: net: speedup sock_recv_ts_and_drops()

sock_recv_ts_and_drops() is fat and slow (~ 4% of cpu time on some
profiles)

We can test all socket flags at once to make fast path fast again.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 19 ++++++++++++++++++-
 net/socket.c       |  4 ++--
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index d361c7769fe0..e1777db5b9ab 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1635,7 +1635,24 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 		sk->sk_stamp = kt;
 }
 
-extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb);
+extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+				     struct sk_buff *skb);
+
+static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+					  struct sk_buff *skb)
+{
+#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL)			| \
+			   (1UL << SOCK_RCVTSTAMP)			| \
+			   (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE)	| \
+			   (1UL << SOCK_TIMESTAMPING_SOFTWARE)		| \
+			   (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) 	| \
+			   (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE))
+
+	if (sk->sk_flags & FLAGS_TS_OR_DROPS)
+		__sock_recv_ts_and_drops(msg, sk, skb);
+	else
+		sk->sk_stamp = skb->tstamp;
+}
 
 /**
  * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
diff --git a/net/socket.c b/net/socket.c
index 9822081eab38..cb7c1f6c0d6e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -655,13 +655,13 @@ inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff
 			sizeof(__u32), &skb->dropcount);
 }
 
-void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 	struct sk_buff *skb)
 {
 	sock_recv_timestamp(msg, sk, skb);
 	sock_recv_drops(msg, sk, skb);
 }
-EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
+EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
 
 static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
 				       struct msghdr *msg, size_t size, int flags)
-- 
cgit v1.2.3-59-g8ed1b


From 4b021628beb26238087812829cc080da47e4b236 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 27 Apr 2010 21:20:22 +0000
Subject: xfrm: potential uninitialized variable num_xfrms

potential uninitialized variable num_xfrms

fix compiler warning: 'num_xfrms' may be used uninitialized in this function.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 net/xfrm/xfrm_policy.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 7430ac26ec49..31f4ba43b48f 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1732,7 +1732,7 @@ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
 	struct dst_entry *dst, *dst_orig = *dst_p, *route;
 	u16 family = dst_orig->ops->family;
 	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
-	int i, err, num_pols, num_xfrms, drop_pols = 0;
+	int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
 
 restart:
 	dst = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 83d7eb2979cd3390c375470225dd2d8f2009bc70 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 30 Apr 2010 16:42:08 -0700
Subject: ipv6: cleanup: remove unneeded null check

We dereference "sk" unconditionally elsewhere in the function.

This was left over from:  b30bd282 "ip6_xmit: remove unnecessary NULL
ptr check".  According to that commit message, "the sk argument to
ip6_xmit is never NULL nowadays since the skb->priority assigment
expects a valid socket."

Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7db09c3f5289..e7a5f17d5e95 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -218,8 +218,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 			}
 			kfree_skb(skb);
 			skb = skb2;
-			if (sk)
-				skb_set_owner_w(skb, sk);
+			skb_set_owner_w(skb, sk);
 		}
 		if (opt->opt_flen)
 			ipv6_push_frag_opts(skb, opt, &proto);
-- 
cgit v1.2.3-59-g8ed1b


From a5f4cea74f1397bb29d0bbdabeb05bd05a23a741 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 21:42:42 -0400
Subject: sctp: Use correct address family in sctp_getsockopt_peer_addrs()

The function should use the address family of the address when
trying to determine the length of the structure.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/socket.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13d8229f3a9c..1282a0ed855e 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4384,7 +4384,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
 				transports) {
 		memcpy(&temp, &from->ipaddr, sizeof(temp));
 		sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp);
-		addrlen = sctp_get_af_specific(sk->sk_family)->sockaddr_len;
+		addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len;
 		if (space_left < addrlen)
 			return -ENOMEM;
 		if (copy_to_user(to, &temp, addrlen))
-- 
cgit v1.2.3-59-g8ed1b


From c17b02b38aa99ef806c7066ef19a6f51122304f1 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 21:42:43 -0400
Subject: sctp: send SHUTDOWN-ACK chunk back to the source.

SHUTDOWN-ACK is alaways sent to the primary path at the first time,
but should better transmit SHUTDOWN-ACK chunk to the same destination
transport address from which it received the SHUTDOWN chunk.
Based on the work from Wei Yongjun <yjwei@cn.fujitsu.com>.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/sm_sideeffect.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 4c5bed9af4e3..49fb9acece63 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -697,11 +697,15 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds,
 {
 	struct sctp_transport *t;
 
-	t = sctp_assoc_choose_alter_transport(asoc,
+	if (chunk->transport)
+		t = chunk->transport;
+	else {
+		t = sctp_assoc_choose_alter_transport(asoc,
 					      asoc->shutdown_last_sent_to);
+		chunk->transport = t;
+	}
 	asoc->shutdown_last_sent_to = t;
 	asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
-	chunk->transport = t;
 }
 
 /* Helper function to change the state of an association. */
-- 
cgit v1.2.3-59-g8ed1b


From bd69b981a354be40cc709f3046f0c56f00da6163 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 21:42:43 -0400
Subject: sctp: assure at least one T3-rtx timer is running if a FORWARD TSN is
 sent

PR-SCTP extension section 3.5 Sender Side Implementation of PR-SCTP:
  C5) If a FORWARD TSN is sent, the sender MUST assure that at
      least one T3-rtx timer is running.

So this patch fix to assure at least one T3-rtx timer is running
if a FORWARD TSN is or will to sent.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index abfc0b8dee74..16d451a62b3f 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -854,6 +854,12 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			if (status  != SCTP_XMIT_OK) {
 				/* put the chunk back */
 				list_add(&chunk->list, &q->control_chunk_list);
+			} else if (chunk->chunk_hdr->type == SCTP_CID_FWD_TSN) {
+				/* PR-SCTP C5) If a FORWARD TSN is sent, the
+				 * sender MUST assure that at least one T3-rtx
+				 * timer is running.
+				 */
+				sctp_transport_reset_timers(transport, 0);
 			}
 			break;
 
-- 
cgit v1.2.3-59-g8ed1b


From 6429d3dc4bd6251b01c11b851e23a4d60f079e06 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 21:42:44 -0400
Subject: sctp: missing set src and dest port while lookup output route

While lookup the output route, we do not set the src and dest
port. This will cause we got a wrong route if we had set the
outbund transport to IPsec with src or dst port.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/protocol.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 704298f4b284..182749867c72 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -474,13 +474,17 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 
 	memset(&fl, 0x0, sizeof(struct flowi));
 	fl.fl4_dst  = daddr->v4.sin_addr.s_addr;
+	fl.fl_ip_dport = daddr->v4.sin_port;
 	fl.proto = IPPROTO_SCTP;
 	if (asoc) {
 		fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk);
 		fl.oif = asoc->base.sk->sk_bound_dev_if;
+		fl.fl_ip_sport = htons(asoc->base.bind_addr.port);
 	}
-	if (saddr)
+	if (saddr) {
 		fl.fl4_src = saddr->v4.sin_addr.s_addr;
+		fl.fl_ip_sport = saddr->v4.sin_port;
+	}
 
 	SCTP_DEBUG_PRINTK("%s: DST:%pI4, SRC:%pI4 - ",
 			  __func__, &fl.fl4_dst, &fl.fl4_src);
@@ -528,6 +532,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc,
 		if ((laddr->state == SCTP_ADDR_SRC) &&
 		    (AF_INET == laddr->a.sa.sa_family)) {
 			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
+			fl.fl_ip_sport = laddr->a.v4.sin_port;
 			if (!ip_route_output_key(&init_net, &rt, &fl)) {
 				dst = &rt->u.dst;
 				goto out_unlock;
-- 
cgit v1.2.3-59-g8ed1b


From bc4f841a05364b2572bcc266e9fd7e9cf5f06d5b Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 22:38:53 -0400
Subject: sctp: fix to retranmit at least one DATA chunk

While doing retranmit, if control chunk exists, such as
FORWARD TSN chunk, and the DATA chunk can not be bundled with
this control chunk because of PMTU limit, no DATA chunk
will be retranmitted in the current implementation. This
patch makes sure to retranmit at least one DATA chunk in this case.

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 16d451a62b3f..e333d5833616 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -598,11 +598,23 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 		if (fast_rtx && !chunk->fast_retransmit)
 			continue;
 
+redo:
 		/* Attempt to append this chunk to the packet. */
 		status = sctp_packet_append_chunk(pkt, chunk);
 
 		switch (status) {
 		case SCTP_XMIT_PMTU_FULL:
+			if (!pkt->has_data && !pkt->has_cookie_echo) {
+				/* If this packet did not contain DATA then
+				 * retransmission did not happen, so do it
+				 * again.  We'll ignore the error here since
+				 * control chunks are already freed so there
+				 * is nothing we can do.
+				 */
+				sctp_packet_transmit(pkt);
+				goto redo;
+			}
+
 			/* Send this packet.  */
 			error = sctp_packet_transmit(pkt);
 
-- 
cgit v1.2.3-59-g8ed1b


From fbdf501c9374966a56829ecca3a7f25d2b49a305 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:39:26 -0400
Subject: sctp: Do no select unconfirmed transports for retransmissions

An unconfirmed transport is one that we have not been
able to reach since the beginning.  There is no point in
trying to retrasnmit data on those transports.  Also, the
specification forbids it due to security issues.

Reported-by: Frank Schuster <frank.schuster01@web.de>

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index df5abbff63e2..de830c268564 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -762,7 +762,8 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 		asoc->peer.retran_path = peer;
 	}
 
-	if (asoc->peer.active_path == asoc->peer.retran_path) {
+	if (asoc->peer.active_path == asoc->peer.retran_path &&
+	    peer->state != SCTP_UNCONFIRMED) {
 		asoc->peer.retran_path = peer;
 	}
 
@@ -1318,7 +1319,7 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
 			/* Keep track of the next transport in case
 			 * we don't find any active transport.
 			 */
-			if (!next)
+			if (t->state != SCTP_UNCONFIRMED && !next)
 				next = t;
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From ec7b9519509061bbc09a43284c3570aa492e07f0 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: use sctp_chunk_is_data macro to decide a chunk is data chunk

sctp_chunk_is_data macro is defined to decide that
whether a chunk is data chunk or not.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index e333d5833616..a4fe7dee76e8 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -308,7 +308,7 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk)
 	/* If it is data, queue it up, otherwise, send it
 	 * immediately.
 	 */
-	if (SCTP_CID_DATA == chunk->chunk_hdr->type) {
+	if (sctp_chunk_is_data(chunk)) {
 		/* Is it OK to queue data chunks?  */
 		/* From 9. Termination of Association
 		 *
-- 
cgit v1.2.3-59-g8ed1b


From 787a51a0878f7bee3a9a83040077301e1556b69a Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: implement sctp association probing module

This patch implement sctp association probing module, the module
will be called sctp_probe.

This module allows for capturing the changes to SCTP association
state in response to incoming packets. It is used for debugging
SCTP congestion control algorithms.

Usage:
  $ modprobe sctp_probe [full=n] [port=n] [bufsize=n]
  $ cat /proc/net/sctpprobe

  The output format is:
    TIME     ASSOC     LPORT RPORT MTU    RWND  UNACK <REMOTE-ADDR   STATE  CWND   SSTHRESH  INFLIGHT  PARTIAL_BYTES_ACKED MTU> ...

  The output will be like this:
    9.226086 c4064c48  9000  8000  1500    53352     1 *192.168.0.19  1     4380    54784     1252        0     1500
    9.287195 c4064c48  9000  8000  1500    45144     5 *192.168.0.19  1     5880    54784     6500        0     1500
    9.289130 c4064c48  9000  8000  1500    42724     5 *192.168.0.19  1     7380    54784     6500        0     1500
    9.620332 c4064c48  9000  8000  1500    48284     4 *192.168.0.19  1     8880    54784     5200        0     1500
    ......

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/Kconfig  |  12 +++
 net/sctp/Makefile |   3 +
 net/sctp/probe.c  | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 228 insertions(+)
 create mode 100644 net/sctp/probe.c

(limited to 'net')

diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig
index 58b3e882a187..126b014eb79b 100644
--- a/net/sctp/Kconfig
+++ b/net/sctp/Kconfig
@@ -37,6 +37,18 @@ menuconfig IP_SCTP
 
 if IP_SCTP
 
+config NET_SCTPPROBE
+	tristate "SCTP: Association probing"
+        depends on PROC_FS && KPROBES
+        ---help---
+        This module allows for capturing the changes to SCTP association
+        state in response to incoming packets. It is used for debugging
+        SCTP congestion control algorithms. If you don't understand
+        what was just said, you don't need it: say N.
+
+        To compile this code as a module, choose M here: the
+        module will be called sctp_probe.
+
 config SCTP_DBG_MSG
 	bool "SCTP: Debug messages"
 	help
diff --git a/net/sctp/Makefile b/net/sctp/Makefile
index 6b794734380a..5c30b7a873df 100644
--- a/net/sctp/Makefile
+++ b/net/sctp/Makefile
@@ -3,6 +3,7 @@
 #
 
 obj-$(CONFIG_IP_SCTP) += sctp.o
+obj-$(CONFIG_NET_SCTPPROBE) += sctp_probe.o
 
 sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  protocol.o endpointola.o associola.o \
@@ -11,6 +12,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \
 	  tsnmap.o bind_addr.o socket.o primitive.o \
 	  output.o input.o debug.o ssnmap.o auth.o
 
+sctp_probe-y := probe.o
+
 sctp-$(CONFIG_SCTP_DBG_OBJCNT) += objcnt.o
 sctp-$(CONFIG_PROC_FS) += proc.o
 sctp-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sctp/probe.c b/net/sctp/probe.c
new file mode 100644
index 000000000000..8f025d5831aa
--- /dev/null
+++ b/net/sctp/probe.c
@@ -0,0 +1,213 @@
+/*
+ * sctp_probe - Observe the SCTP flow with kprobes.
+ *
+ * The idea for this came from Werner Almesberger's umlsim
+ * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
+ *
+ * Modified for SCTP from Stephen Hemminger's code
+ * Copyright (C) 2010, Wei Yongjun <yjwei@cn.fujitsu.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/socket.h>
+#include <linux/sctp.h>
+#include <linux/proc_fs.h>
+#include <linux/module.h>
+#include <linux/kfifo.h>
+#include <linux/time.h>
+#include <net/net_namespace.h>
+
+#include <net/sctp/sctp.h>
+#include <net/sctp/sm.h>
+
+MODULE_AUTHOR("Wei Yongjun <yjwei@cn.fujitsu.com>");
+MODULE_DESCRIPTION("SCTP snooper");
+MODULE_LICENSE("GPL");
+
+static int port __read_mostly = 0;
+MODULE_PARM_DESC(port, "Port to match (0=all)");
+module_param(port, int, 0);
+
+static int bufsize __read_mostly = 64 * 1024;
+MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
+module_param(bufsize, int, 0);
+
+static int full __read_mostly = 1;
+MODULE_PARM_DESC(full, "Full log (1=every ack packet received,  0=only cwnd changes)");
+module_param(full, int, 0);
+
+static const char procname[] = "sctpprobe";
+
+static struct {
+	struct kfifo	  fifo;
+	spinlock_t	  lock;
+	wait_queue_head_t wait;
+	struct timespec	  tstart;
+} sctpw;
+
+static void printl(const char *fmt, ...)
+{
+	va_list args;
+	int len;
+	char tbuf[256];
+
+	va_start(args, fmt);
+	len = vscnprintf(tbuf, sizeof(tbuf), fmt, args);
+	va_end(args);
+
+	kfifo_in_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
+	wake_up(&sctpw.wait);
+}
+
+static int sctpprobe_open(struct inode *inode, struct file *file)
+{
+	kfifo_reset(&sctpw.fifo);
+	getnstimeofday(&sctpw.tstart);
+
+	return 0;
+}
+
+static ssize_t sctpprobe_read(struct file *file, char __user *buf,
+			      size_t len, loff_t *ppos)
+{
+	int error = 0, cnt = 0;
+	unsigned char *tbuf;
+
+	if (!buf)
+		return -EINVAL;
+
+	if (len == 0)
+		return 0;
+
+	tbuf = vmalloc(len);
+	if (!tbuf)
+		return -ENOMEM;
+
+	error = wait_event_interruptible(sctpw.wait,
+					 kfifo_len(&sctpw.fifo) != 0);
+	if (error)
+		goto out_free;
+
+	cnt = kfifo_out_locked(&sctpw.fifo, tbuf, len, &sctpw.lock);
+	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
+
+out_free:
+	vfree(tbuf);
+
+	return error ? error : cnt;
+}
+
+static const struct file_operations sctpprobe_fops = {
+	.owner	= THIS_MODULE,
+	.open	= sctpprobe_open,
+	.read	= sctpprobe_read,
+};
+
+sctp_disposition_t jsctp_sf_eat_sack(const struct sctp_endpoint *ep,
+				     const struct sctp_association *asoc,
+				     const sctp_subtype_t type,
+				     void *arg,
+				     sctp_cmd_seq_t *commands)
+{
+	struct sctp_transport *sp;
+	static __u32 lcwnd = 0;
+	struct timespec now;
+
+	sp = asoc->peer.primary_path;
+
+	if ((full || sp->cwnd != lcwnd) &&
+	    (!port || asoc->peer.port == port ||
+	     ep->base.bind_addr.port == port)) {
+		lcwnd = sp->cwnd;
+
+		getnstimeofday(&now);
+		now = timespec_sub(now, sctpw.tstart);
+
+		printl("%lu.%06lu ", (unsigned long) now.tv_sec,
+		       (unsigned long) now.tv_nsec / NSEC_PER_USEC);
+
+		printl("%p %5d %5d %5d %8d %5d ", asoc,
+		       ep->base.bind_addr.port, asoc->peer.port,
+		       asoc->pathmtu, asoc->peer.rwnd, asoc->unack_data);
+
+		list_for_each_entry(sp, &asoc->peer.transport_addr_list,
+					transports) {
+			if (sp == asoc->peer.primary_path)
+				printl("*");
+
+			if (sp->ipaddr.sa.sa_family == AF_INET)
+				printl("%pI4 ", &sp->ipaddr.v4.sin_addr);
+			else
+				printl("%pI6 ", &sp->ipaddr.v6.sin6_addr);
+
+			printl("%2u %8u %8u %8u %8u %8u ",
+			       sp->state, sp->cwnd, sp->ssthresh,
+			       sp->flight_size, sp->partial_bytes_acked,
+			       sp->pathmtu);
+		}
+		printl("\n");
+	}
+
+	jprobe_return();
+	return 0;
+}
+
+static struct jprobe sctp_recv_probe = {
+	.kp	= {
+		.symbol_name = "sctp_sf_eat_sack_6_2",
+	},
+	.entry	= jsctp_sf_eat_sack,
+};
+
+static __init int sctpprobe_init(void)
+{
+	int ret = -ENOMEM;
+
+	init_waitqueue_head(&sctpw.wait);
+	spin_lock_init(&sctpw.lock);
+	if (kfifo_alloc(&sctpw.fifo, bufsize, GFP_KERNEL))
+		return ret;
+
+	if (!proc_net_fops_create(&init_net, procname, S_IRUSR,
+				  &sctpprobe_fops))
+		goto free_kfifo;
+
+	ret = register_jprobe(&sctp_recv_probe);
+	if (ret)
+		goto remove_proc;
+
+	pr_info("SCTP probe registered (port=%d)\n", port);
+
+	return 0;
+
+remove_proc:
+	proc_net_remove(&init_net, procname);
+free_kfifo:
+	kfifo_free(&sctpw.fifo);
+	return ret;
+}
+
+static __exit void sctpprobe_exit(void)
+{
+	kfifo_free(&sctpw.fifo);
+	proc_net_remove(&init_net, procname);
+	unregister_jprobe(&sctp_recv_probe);
+}
+
+module_init(sctpprobe_init);
+module_exit(sctpprobe_exit);
-- 
cgit v1.2.3-59-g8ed1b


From b99a4d53a74ac25eb4b930eef6c745579149c571 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: cleanup: remove duplicate assignment

This assignment isn't needed because we did it earlier already.

Also another reason to delete the assignment is because it triggers a
Smatch warning about checking for NULL pointers after a dereference.

Reported-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/sm_make_chunk.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 17cb400ecd6a..33aed1c6cbd6 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -419,10 +419,17 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	if (!retval)
 		goto nomem_chunk;
 
-	/* Per the advice in RFC 2960 6.4, send this reply to
-	 * the source of the INIT packet.
+	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
+	 *
+	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
+	 * HEARTBEAT ACK, * etc.) to the same destination transport
+	 * address from which it received the DATA or control chunk
+	 * to which it is replying.
+	 *
+	 * [INIT ACK back to where the INIT came from.]
 	 */
 	retval->transport = chunk->transport;
+
 	retval->subh.init_hdr =
 		sctp_addto_chunk(retval, sizeof(initack), &initack);
 	retval->param_hdr.v = sctp_addto_chunk(retval, addrs_len, addrs.v);
@@ -461,18 +468,6 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc,
 	/* We need to remove the const qualifier at this point.  */
 	retval->asoc = (struct sctp_association *) asoc;
 
-	/* RFC 2960 6.4 Multi-homed SCTP Endpoints
-	 *
-	 * An endpoint SHOULD transmit reply chunks (e.g., SACK,
-	 * HEARTBEAT ACK, * etc.) to the same destination transport
-	 * address from which it received the DATA or control chunk
-	 * to which it is replying.
-	 *
-	 * [INIT ACK back to where the INIT came from.]
-	 */
-	if (chunk)
-		retval->transport = chunk->transport;
-
 nomem_chunk:
 	kfree(cookie);
 nomem_cookie:
-- 
cgit v1.2.3-59-g8ed1b


From d598b166ced20d9b9281ea3527c0e18405ddb803 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: Make sure we always return valid retransmit path

commit 4951feda0c60d1ef681f1a270afdd617924ab041
    sctp: Do no select unconfirmed transports for retransmissions

added code to make sure that we do not select unconfirmed paths
for data transmission.  This caused a problem when there are only
2 paths, 1 unconfirmed and 1 unreachable.  In that case, the next
retransmit path returned is NULL and that causes a kernel crash.

The solution is to only change retransmit paths if we found one to use.

Reported-by: Frank Schuster <frank.schuster01@web.de>
Signed-off-b: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index de830c268564..fab9cb2783a7 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1324,7 +1324,8 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc)
 		}
 	}
 
-	asoc->peer.retran_path = t;
+	if (t)
+		asoc->peer.retran_path = t;
 
 	SCTP_DEBUG_PRINTK_IPADDR("sctp_assoc_update_retran_path:association"
 				 " %p addr: ",
-- 
cgit v1.2.3-59-g8ed1b


From ae19c54866450f6c6f79223ca7d37965859a54e1 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: remove 'resent' bit from the chunk

The 'resent' bit is used to make sure that we don't update
rto estimate based on retransmitted chunks.  However, we already
have the 'rto_pending' bit that we test when need to update rto,
so 'resent' bit is just extra.  Additionally, we currently have
a bug in that we always set a 'resent' bit and thus rto estimate
is only updated by Heartbeats.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h |  3 +--
 net/sctp/output.c          | 25 +++++++++----------------
 net/sctp/outqueue.c        |  1 -
 net/sctp/sm_make_chunk.c   |  1 -
 4 files changed, 10 insertions(+), 20 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index ff3017744711..03deffb1886b 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -757,7 +757,6 @@ struct sctp_chunk {
 #define SCTP_NEED_FRTX 0x1
 #define SCTP_DONT_FRTX 0x2
 	__u16	rtt_in_progress:1,	/* This chunk used for RTT calc? */
-		resent:1,		/* Has this chunk ever been resent. */
 		has_tsn:1,		/* Does this chunk have a TSN yet? */
 		has_ssn:1,		/* Does this chunk have a SSN yet? */
 		singleton:1,		/* Only chunk in the packet? */
@@ -1065,7 +1064,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 			  struct sctp_sock *);
 void sctp_transport_pmtu(struct sctp_transport *);
 void sctp_transport_free(struct sctp_transport *);
-void sctp_transport_reset_timers(struct sctp_transport *, int);
+void sctp_transport_reset_timers(struct sctp_transport *);
 void sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
diff --git a/net/sctp/output.c b/net/sctp/output.c
index fad261d41ec2..35e49b9df4e0 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -429,24 +429,17 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) {
 		list_del_init(&chunk->list);
 		if (sctp_chunk_is_data(chunk)) {
+			/* 6.3.1 C4) When data is in flight and when allowed
+			 * by rule C5, a new RTT measurement MUST be made each
+			 * round trip.  Furthermore, new RTT measurements
+			 * SHOULD be made no more than once per round-trip
+			 * for a given destination transport address.
+			 */
 
-			if (!chunk->resent) {
-
-				/* 6.3.1 C4) When data is in flight and when allowed
-				 * by rule C5, a new RTT measurement MUST be made each
-				 * round trip.  Furthermore, new RTT measurements
-				 * SHOULD be made no more than once per round-trip
-				 * for a given destination transport address.
-				 */
-
-				if (!tp->rto_pending) {
-					chunk->rtt_in_progress = 1;
-					tp->rto_pending = 1;
-				}
+			if (!tp->rto_pending) {
+				chunk->rtt_in_progress = 1;
+				tp->rto_pending = 1;
 			}
-
-			chunk->resent = 1;
-
 			has_data = 1;
 		}
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index a4fe7dee76e8..4e551ba8998c 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1405,7 +1405,6 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				 * instance).
 				 */
 				if (!tchunk->tsn_gap_acked &&
-				    !tchunk->resent &&
 				    tchunk->rtt_in_progress) {
 					tchunk->rtt_in_progress = 0;
 					rtt = jiffies - tchunk->sent_at;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 33aed1c6cbd6..24effdf471eb 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1205,7 +1205,6 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb,
 	INIT_LIST_HEAD(&retval->list);
 	retval->skb		= skb;
 	retval->asoc		= (struct sctp_association *)asoc;
-	retval->resent  	= 0;
 	retval->has_tsn		= 0;
 	retval->has_ssn         = 0;
 	retval->rtt_in_progress	= 0;
-- 
cgit v1.2.3-59-g8ed1b


From d9efc2231b28bc199f9de4dd594248b7341188e5 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:09 -0400
Subject: sctp: Do not force T3 timer on fast retransmissions.

We don't need to force the T3 timer any more and it's
actually wrong to do as it causes too long of a delay.
The timer will be started if one is not running, but if
one is running, we leave it alone.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c  | 15 +++------------
 net/sctp/transport.c |  4 ++--
 2 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 4e551ba8998c..786c4ff97ae4 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -659,14 +659,6 @@ redo:
 			if (chunk->fast_retransmit == SCTP_NEED_FRTX)
 				chunk->fast_retransmit = SCTP_DONT_FRTX;
 
-			/* Force start T3-rtx timer when fast retransmitting
-			 * the earliest outstanding TSN
-			 */
-			if (!timer && fast_rtx &&
-			    ntohl(chunk->subh.data_hdr->tsn) ==
-					     asoc->ctsn_ack_point + 1)
-				timer = 2;
-
 			q->empty = 0;
 			break;
 		}
@@ -871,7 +863,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 				 * sender MUST assure that at least one T3-rtx
 				 * timer is running.
 				 */
-				sctp_transport_reset_timers(transport, 0);
+				sctp_transport_reset_timers(transport);
 			}
 			break;
 
@@ -924,8 +916,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 						    rtx_timeout, &start_timer);
 
 			if (start_timer)
-				sctp_transport_reset_timers(transport,
-							    start_timer-1);
+				sctp_transport_reset_timers(transport);
 
 			/* This can happen on COOKIE-ECHO resend.  Only
 			 * one chunk can get bundled with a COOKIE-ECHO.
@@ -1058,7 +1049,7 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			list_add_tail(&chunk->transmitted_list,
 				      &transport->transmitted);
 
-			sctp_transport_reset_timers(transport, 0);
+			sctp_transport_reset_timers(transport);
 
 			q->empty = 0;
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index be4d63d5a5cc..0ebb97fc98e0 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -195,7 +195,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
 /* Start T3_rtx timer if it is not already running and update the heartbeat
  * timer.  This routine is called every time a DATA chunk is sent.
  */
-void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
+void sctp_transport_reset_timers(struct sctp_transport *transport)
 {
 	/* RFC 2960 6.3.2 Retransmission Timer Rules
 	 *
@@ -205,7 +205,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
 	 * address.
 	 */
 
-	if (force || !timer_pending(&transport->T3_rtx_timer))
+	if (!timer_pending(&transport->T3_rtx_timer))
 		if (!mod_timer(&transport->T3_rtx_timer,
 			       jiffies + transport->rto))
 			sctp_transport_hold(transport);
-- 
cgit v1.2.3-59-g8ed1b


From b2cf9b6bd93af1cc047d3356f1c6cc9367fe3731 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: update transport initializations

Right now, sctp transports are not fully initialized and when
adding any new fields, they have to be explicitely initialized.
This is prone to mistakes.  So we switch to calling kzalloc()
which makes things much simpler.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/sctp.h |  2 +-
 net/sctp/associola.c    |  3 ---
 net/sctp/endpointola.c  |  2 --
 net/sctp/transport.c    | 25 -------------------------
 4 files changed, 1 insertion(+), 31 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 59151557406c..289241d31cc1 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -546,7 +546,7 @@ for (pos = chunk->subh.fwdtsn_hdr->skip;\
 #define WORD_ROUND(s) (((s)+3)&~3)
 
 /* Make a new instance of type.  */
-#define t_new(type, flags)	(type *)kmalloc(sizeof(type), flags)
+#define t_new(type, flags)	(type *)kzalloc(sizeof(type), flags)
 
 /* Compare two timevals.  */
 #define tv_lt(s, t) \
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index fab9cb2783a7..37753cd48be3 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -87,9 +87,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
 	/* Retrieve the SCTP per socket area.  */
 	sp = sctp_sk((struct sock *)sk);
 
-	/* Init all variables to a known value.  */
-	memset(asoc, 0, sizeof(struct sctp_association));
-
 	/* Discarding const is appropriate here.  */
 	asoc->ep = (struct sctp_endpoint *)ep;
 	sctp_endpoint_hold(asoc->ep);
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index 905fda582b92..2f8763bae9ed 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -70,8 +70,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
 	struct sctp_shared_key *null_key;
 	int err;
 
-	memset(ep, 0, sizeof(struct sctp_endpoint));
-
 	ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp);
 	if (!ep->digest)
 		return NULL;
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 0ebb97fc98e0..854228bf3f34 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -64,9 +64,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	/* Copy in the address.  */
 	peer->ipaddr = *addr;
 	peer->af_specific = sctp_get_af_specific(addr->sa.sa_family);
-	peer->asoc = NULL;
-
-	peer->dst = NULL;
 	memset(&peer->saddr, 0, sizeof(union sctp_addr));
 
 	/* From 6.3.1 RTO Calculation:
@@ -76,34 +73,21 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	 * parameter 'RTO.Initial'.
 	 */
 	peer->rto = msecs_to_jiffies(sctp_rto_initial);
-	peer->rtt = 0;
-	peer->rttvar = 0;
-	peer->srtt = 0;
-	peer->rto_pending = 0;
-	peer->hb_sent = 0;
-	peer->fast_recovery = 0;
 
 	peer->last_time_heard = jiffies;
 	peer->last_time_ecne_reduced = jiffies;
 
-	peer->init_sent_count = 0;
-
 	peer->param_flags = SPP_HB_DISABLE |
 			    SPP_PMTUD_ENABLE |
 			    SPP_SACKDELAY_ENABLE;
-	peer->hbinterval  = 0;
 
 	/* Initialize the default path max_retrans.  */
 	peer->pathmaxrxt  = sctp_max_retrans_path;
-	peer->error_count = 0;
 
 	INIT_LIST_HEAD(&peer->transmitted);
 	INIT_LIST_HEAD(&peer->send_ready);
 	INIT_LIST_HEAD(&peer->transports);
 
-	peer->T3_rtx_timer.expires = 0;
-	peer->hb_timer.expires = 0;
-
 	setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event,
 			(unsigned long)peer);
 	setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event,
@@ -113,15 +97,6 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce));
 
 	atomic_set(&peer->refcnt, 1);
-	peer->dead = 0;
-
-	peer->malloced = 0;
-
-	/* Initialize the state information for SFR-CACC */
-	peer->cacc.changeover_active = 0;
-	peer->cacc.cycling_changeover = 0;
-	peer->cacc.next_tsn_at_change = 0;
-	peer->cacc.cacc_saw_newack = 0;
 
 	return peer;
 }
-- 
cgit v1.2.3-59-g8ed1b


From cf9b4812e18aab6f86ff998bd7425a9e823269c3 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: fast recovery algorithm is per association.

SCTP fast recovery algorithm really applies per association
and impacts all transports.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h | 12 ++++++------
 net/sctp/transport.c       | 32 +++++++++++++++++---------------
 2 files changed, 23 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 9072dd67d8ad..d463296d9f79 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -895,9 +895,6 @@ struct sctp_transport {
 		 */
 		hb_sent:1,
 
-		/* Flag to track the current fast recovery state */
-		fast_recovery:1,
-
 		/* Is the Path MTU update pending on this tranport */
 		pmtu_pending:1,
 
@@ -952,9 +949,6 @@ struct sctp_transport {
 
 	__u32 burst_limited;	/* Holds old cwnd when max.burst is applied */
 
-	/* TSN marking the fast recovery exit point */
-	__u32 fast_recovery_exit;
-
 	/* Destination */
 	struct dst_entry *dst;
 	/* Source address. */
@@ -1723,6 +1717,12 @@ struct sctp_association {
 	/* Highest TSN that is acknowledged by incoming SACKs. */
 	__u32 highest_sacked;
 
+	/* TSN marking the fast recovery exit point */
+	__u32 fast_recovery_exit;
+
+	/* Flag to track the current fast recovery state */
+	__u8 fast_recovery;
+
 	/* The number of unacknowledged data chunks.  Reported through
 	 * the SCTP_STATUS sockopt.
 	 */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 854228bf3f34..fccf4947aff1 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -378,15 +378,16 @@ void sctp_transport_update_rto(struct sctp_transport *tp, __u32 rtt)
 void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 			       __u32 sack_ctsn, __u32 bytes_acked)
 {
+	struct sctp_association *asoc = transport->asoc;
 	__u32 cwnd, ssthresh, flight_size, pba, pmtu;
 
 	cwnd = transport->cwnd;
 	flight_size = transport->flight_size;
 
 	/* See if we need to exit Fast Recovery first */
-	if (transport->fast_recovery &&
-	    TSN_lte(transport->fast_recovery_exit, sack_ctsn))
-		transport->fast_recovery = 0;
+	if (asoc->fast_recovery &&
+	    TSN_lte(asoc->fast_recovery_exit, sack_ctsn))
+		asoc->fast_recovery = 0;
 
 	/* The appropriate cwnd increase algorithm is performed if, and only
 	 * if the cumulative TSN whould advanced and the congestion window is
@@ -415,7 +416,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 		 *    2) the destination's path MTU.  This upper bound protects
 		 *    against the ACK-Splitting attack outlined in [SAVAGE99].
 		 */
-		if (transport->fast_recovery)
+		if (asoc->fast_recovery)
 			return;
 
 		if (bytes_acked > pmtu)
@@ -466,6 +467,8 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 			       sctp_lower_cwnd_t reason)
 {
+	struct sctp_association *asoc = transport->asoc;
+
 	switch (reason) {
 	case SCTP_LOWER_CWND_T3_RTX:
 		/* RFC 2960 Section 7.2.3, sctpimpguide
@@ -476,11 +479,11 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      partial_bytes_acked = 0
 		 */
 		transport->ssthresh = max(transport->cwnd/2,
-					  4*transport->asoc->pathmtu);
-		transport->cwnd = transport->asoc->pathmtu;
+					  4*asoc->pathmtu);
+		transport->cwnd = asoc->pathmtu;
 
-		/* T3-rtx also clears fast recovery on the transport */
-		transport->fast_recovery = 0;
+		/* T3-rtx also clears fast recovery */
+		asoc->fast_recovery = 0;
 		break;
 
 	case SCTP_LOWER_CWND_FAST_RTX:
@@ -496,15 +499,15 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      cwnd = ssthresh
 		 *      partial_bytes_acked = 0
 		 */
-		if (transport->fast_recovery)
+		if (asoc->fast_recovery)
 			return;
 
 		/* Mark Fast recovery */
-		transport->fast_recovery = 1;
-		transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
+		asoc->fast_recovery = 1;
+		asoc->fast_recovery_exit = asoc->next_tsn - 1;
 
 		transport->ssthresh = max(transport->cwnd/2,
-					  4*transport->asoc->pathmtu);
+					  4*asoc->pathmtu);
 		transport->cwnd = transport->ssthresh;
 		break;
 
@@ -524,7 +527,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		if (time_after(jiffies, transport->last_time_ecne_reduced +
 					transport->rtt)) {
 			transport->ssthresh = max(transport->cwnd/2,
-						  4*transport->asoc->pathmtu);
+						  4*asoc->pathmtu);
 			transport->cwnd = transport->ssthresh;
 			transport->last_time_ecne_reduced = jiffies;
 		}
@@ -540,7 +543,7 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 * interval.
 		 */
 		transport->cwnd = max(transport->cwnd/2,
-					 4*transport->asoc->pathmtu);
+					 4*asoc->pathmtu);
 		break;
 	}
 
@@ -625,7 +628,6 @@ void sctp_transport_reset(struct sctp_transport *t)
 	t->error_count = 0;
 	t->rto_pending = 0;
 	t->hb_sent = 0;
-	t->fast_recovery = 0;
 
 	/* Initialize the state information for SFR-CACC */
 	t->cacc.changeover_active = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 65883371894be2631603d5d412f90f8c09290fef Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: rwnd_press should be cumulative

rwnd_press tracks the pressure on the recieve window.  Every
timer the receive buffer overlows, we truncate the receive
window and then grow it back.  However, if we don't track
the cumulative presser, it's possible to reach a situation
when receive buffer is empty, but rwnd stays truncated.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/associola.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 37753cd48be3..65f9a7cdf466 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1482,7 +1482,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned len)
 	if (asoc->rwnd >= len) {
 		asoc->rwnd -= len;
 		if (over) {
-			asoc->rwnd_press = asoc->rwnd;
+			asoc->rwnd_press += asoc->rwnd;
 			asoc->rwnd = 0;
 		}
 	} else {
-- 
cgit v1.2.3-59-g8ed1b


From ea862c8d1f4a0d193979c7412c3b946f600721ce Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: correctly mark missing chunks in fast recovery

According to RFC 4960 Section 7.2.4:
 					If an endpoint is in Fast
   Recovery and a SACK arrives that advances the Cumulative TSN Ack
   Point, the miss indications are incremented for all TSNs reported
   missing in the SACK.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 786c4ff97ae4..b491a1aac3e4 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -1154,6 +1154,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	struct sctp_transport *primary = asoc->peer.primary_path;
 	int count_of_newacks = 0;
 	int gap_ack_blocks;
+	u8 accum_moved = 0;
 
 	/* Grab the association's destination address list. */
 	transport_list = &asoc->peer.transport_addr_list;
@@ -1232,16 +1233,22 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 			count_of_newacks ++;
 	}
 
+	/* Move the Cumulative TSN Ack Point if appropriate.  */
+	if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn)) {
+		asoc->ctsn_ack_point = sack_ctsn;
+		accum_moved = 1;
+	}
+
 	if (gap_ack_blocks) {
+
+		if (asoc->fast_recovery && accum_moved)
+			highest_new_tsn = highest_tsn;
+
 		list_for_each_entry(transport, transport_list, transports)
 			sctp_mark_missing(q, &transport->transmitted, transport,
 					  highest_new_tsn, count_of_newacks);
 	}
 
-	/* Move the Cumulative TSN Ack Point if appropriate.  */
-	if (TSN_lt(asoc->ctsn_ack_point, sack_ctsn))
-		asoc->ctsn_ack_point = sack_ctsn;
-
 	/* Update unack_data field in the assoc. */
 	sctp_sack_update_unack_data(asoc, sack);
 
@@ -1685,7 +1692,8 @@ static void sctp_mark_missing(struct sctp_outq *q,
 	struct sctp_chunk *chunk;
 	__u32 tsn;
 	char do_fast_retransmit = 0;
-	struct sctp_transport *primary = q->asoc->peer.primary_path;
+	struct sctp_association *asoc = q->asoc;
+	struct sctp_transport *primary = asoc->peer.primary_path;
 
 	list_for_each_entry(chunk, transmitted_queue, transmitted_list) {
 
-- 
cgit v1.2.3-59-g8ed1b


From bfa0d9843ac5feb9667990706b4524390fee4df9 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: Optimize computation of highest new tsn in SACK.

Right now, if the highest tsn in the SACK doesn't change, we'll
end up scanning the transmitted lists on the transports twice:
once for locating the highest _new_ tsn, and once for actually
tagging chunks as acked.  This is a waste, since we can record
the highest _new_ tsn at the same time as tagging chunks.  Long
ago this was not possible because we would try to mark chunks
as missing at the same time as tagging them acked and this approach
didn't work.  Now that the two steps are separate, we can re-use
the old approach.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 net/sctp/outqueue.c | 42 +++++++-----------------------------------
 1 file changed, 7 insertions(+), 35 deletions(-)

(limited to 'net')

diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index b491a1aac3e4..5d057178ce0c 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -62,7 +62,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				   struct list_head *transmitted_queue,
 				   struct sctp_transport *transport,
 				   struct sctp_sackhdr *sack,
-				   __u32 highest_new_tsn);
+				   __u32 *highest_new_tsn);
 
 static void sctp_mark_missing(struct sctp_outq *q,
 			      struct list_head *transmitted_queue,
@@ -1109,32 +1109,6 @@ static void sctp_sack_update_unack_data(struct sctp_association *assoc,
 	assoc->unack_data = unack_data;
 }
 
-/* Return the highest new tsn that is acknowledged by the given SACK chunk. */
-static __u32 sctp_highest_new_tsn(struct sctp_sackhdr *sack,
-				  struct sctp_association *asoc)
-{
-	struct sctp_transport *transport;
-	struct sctp_chunk *chunk;
-	__u32 highest_new_tsn, tsn;
-	struct list_head *transport_list = &asoc->peer.transport_addr_list;
-
-	highest_new_tsn = ntohl(sack->cum_tsn_ack);
-
-	list_for_each_entry(transport, transport_list, transports) {
-		list_for_each_entry(chunk, &transport->transmitted,
-				transmitted_list) {
-			tsn = ntohl(chunk->subh.data_hdr->tsn);
-
-			if (!chunk->tsn_gap_acked &&
-			    TSN_lt(highest_new_tsn, tsn) &&
-			    sctp_acked(sack, tsn))
-				highest_new_tsn = tsn;
-		}
-	}
-
-	return highest_new_tsn;
-}
-
 /* This is where we REALLY process a SACK.
  *
  * Process the SACK against the outqueue.  Mostly, this just frees
@@ -1203,18 +1177,15 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	if (gap_ack_blocks)
 		highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end);
 
-	if (TSN_lt(asoc->highest_sacked, highest_tsn)) {
-		highest_new_tsn = highest_tsn;
+	if (TSN_lt(asoc->highest_sacked, highest_tsn))
 		asoc->highest_sacked = highest_tsn;
-	} else {
-		highest_new_tsn = sctp_highest_new_tsn(sack, asoc);
-	}
 
+	highest_new_tsn = sack_ctsn;
 
 	/* Run through the retransmit queue.  Credit bytes received
 	 * and free those chunks that we can.
 	 */
-	sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn);
+	sctp_check_transmitted(q, &q->retransmit, NULL, sack, &highest_new_tsn);
 
 	/* Run through the transmitted queue.
 	 * Credit bytes received and free those chunks which we can.
@@ -1223,7 +1194,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack)
 	 */
 	list_for_each_entry(transport, transport_list, transports) {
 		sctp_check_transmitted(q, &transport->transmitted,
-				       transport, sack, highest_new_tsn);
+				       transport, sack, &highest_new_tsn);
 		/*
 		 * SFR-CACC algorithm:
 		 * C) Let count_of_newacks be the number of
@@ -1331,7 +1302,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 				   struct list_head *transmitted_queue,
 				   struct sctp_transport *transport,
 				   struct sctp_sackhdr *sack,
-				   __u32 highest_new_tsn_in_sack)
+				   __u32 *highest_new_tsn_in_sack)
 {
 	struct list_head *lchunk;
 	struct sctp_chunk *tchunk;
@@ -1419,6 +1390,7 @@ static void sctp_check_transmitted(struct sctp_outq *q,
 			 */
 			if (!tchunk->tsn_gap_acked) {
 				tchunk->tsn_gap_acked = 1;
+				*highest_new_tsn_in_sack = tsn;
 				bytes_acked += sctp_data_size(tchunk);
 				if (!tchunk->transport)
 					migrate_bytes += sctp_data_size(tchunk);
-- 
cgit v1.2.3-59-g8ed1b


From 0e3aef8d09a8c11e3fb83cdcb24b5bc7421b3726 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Fri, 30 Apr 2010 22:41:10 -0400
Subject: sctp: Tag messages that can be Nagle delayed at creation.

When we create the sctp_datamsg and fragment the user data,
we know exactly if we are sending full segments or not and
how they might be bundled.  During this time, we can mark
messages a Nagle capable or not.  This makes the check at
transmit time much simpler.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
---
 include/net/sctp/structs.h | 8 +++-----
 net/sctp/chunk.c           | 4 ++--
 net/sctp/output.c          | 2 +-
 3 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index d463296d9f79..9d44aef365da 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -643,17 +643,15 @@ struct sctp_pf {
 struct sctp_datamsg {
 	/* Chunks waiting to be submitted to lower layer. */
 	struct list_head chunks;
-	/* Chunks that have been transmitted. */
-	size_t msg_size;
 	/* Reference counting. */
 	atomic_t refcnt;
 	/* When is this message no longer interesting to the peer? */
 	unsigned long expires_at;
 	/* Did the messenge fail to send? */
 	int send_error;
-	char send_failed;
-	/* Control whether chunks from this message can be abandoned. */
-	char can_abandon;
+	u8 send_failed:1,
+	   can_abandon:1,   /* can chunks from this message can be abandoned. */
+	   can_delay;	    /* should this message be Nagle delayed */
 };
 
 struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *,
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 3eab6db59a37..476caaf100ed 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -58,9 +58,9 @@ static void sctp_datamsg_init(struct sctp_datamsg *msg)
 	msg->send_failed = 0;
 	msg->send_error = 0;
 	msg->can_abandon = 0;
+	msg->can_delay = 1;
 	msg->expires_at = 0;
 	INIT_LIST_HEAD(&msg->chunks);
-	msg->msg_size = 0;
 }
 
 /* Allocate and initialize datamsg. */
@@ -157,7 +157,6 @@ static void sctp_datamsg_assign(struct sctp_datamsg *msg, struct sctp_chunk *chu
 {
 	sctp_datamsg_hold(msg);
 	chunk->msg = msg;
-	msg->msg_size += chunk->skb->len;
 }
 
 
@@ -247,6 +246,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc,
 	if (msg_len >= first_len) {
 		msg_len -= first_len;
 		whole = 1;
+		msg->can_delay = 0;
 	}
 
 	/* How many full sized?  How many bytes leftover? */
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 35e49b9df4e0..a646681f5acd 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -674,7 +674,7 @@ static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet,
 		 * Don't delay large message writes that may have been
 		 * fragmeneted into small peices.
 		 */
-		if ((len < max) && (chunk->msg->msg_size < max)) {
+		if ((len < max) && chunk->msg->can_delay) {
 			retval = SCTP_XMIT_NAGLE_DELAY;
 			goto finish;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From e772c349a11de448f194d0c9f2e7eb23800e1a13 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 1 May 2010 18:29:43 +0200
Subject: netfilter: nf_ct_h323: switch "incomplete TPKT" message to pr_debug()

The message might be falsely triggered by non-H.323 traffic on port
1720.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_h323_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index a487c8038044..48bf15073a85 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -194,8 +194,7 @@ static int get_tpkt_data(struct sk_buff *skb, unsigned int protoff,
 			return 0;
 		}
 
-		if (net_ratelimit())
-			printk("nf_ct_h323: incomplete TPKT (fragmented?)\n");
+		pr_debug("nf_ct_h323: incomplete TPKT (fragmented?)\n");
 		goto clear_out;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 43815482370c510c569fd18edb57afcb0fa8cab6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 29 Apr 2010 11:01:49 +0000
Subject: net: sock_def_readable() and friends RCU conversion

sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.

RCU conversion is pretty much needed :

1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).

[Future patch will add a list anchor for wakeup coalescing]

2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().

3) Respect RCU grace period when freeing a "struct socket_wq"

4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"

5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep

6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.

7) Change all sk_has_sleeper() callers to :
  - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
  - Use wq_has_sleeper() to eventually wakeup tasks.
  - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)

8) sock_wake_async() is modified to use rcu protection as well.

9) Exceptions :
  macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.

Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c | 13 +++++++++----
 drivers/net/tun.c     | 21 ++++++++++++---------
 include/linux/net.h   | 14 +++++++++-----
 include/net/af_unix.h | 20 +++++++++++---------
 include/net/sock.h    | 38 +++++++++++++++++++-------------------
 net/atm/common.c      | 22 ++++++++++++++--------
 net/core/sock.c       | 50 +++++++++++++++++++++++++++++++-------------------
 net/core/stream.c     | 10 +++++++---
 net/dccp/output.c     | 10 ++++++----
 net/iucv/af_iucv.c    | 11 +++++++----
 net/phonet/pep.c      |  8 ++++----
 net/phonet/socket.c   |  2 +-
 net/rxrpc/af_rxrpc.c  | 10 ++++++----
 net/sctp/socket.c     |  2 +-
 net/socket.c          | 47 ++++++++++++++++++++++++++++++++++++-----------
 net/unix/af_unix.c    | 17 ++++++++---------
 16 files changed, 181 insertions(+), 114 deletions(-)

(limited to 'net')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index d97e1fd234ba..1c4110df343e 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -37,6 +37,7 @@
 struct macvtap_queue {
 	struct sock sk;
 	struct socket sock;
+	struct socket_wq wq;
 	struct macvlan_dev *vlan;
 	struct file *file;
 	unsigned int flags;
@@ -242,12 +243,15 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = {
 
 static void macvtap_sock_write_space(struct sock *sk)
 {
+	wait_queue_head_t *wqueue;
+
 	if (!sock_writeable(sk) ||
 	    !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
 		return;
 
-	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-		wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND);
+	wqueue = sk_sleep(sk);
+	if (wqueue && waitqueue_active(wqueue))
+		wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
 }
 
 static int macvtap_open(struct inode *inode, struct file *file)
@@ -272,7 +276,8 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	if (!q)
 		goto out;
 
-	init_waitqueue_head(&q->sock.wait);
+	q->sock.wq = &q->wq;
+	init_waitqueue_head(&q->wq.wait);
 	q->sock.type = SOCK_RAW;
 	q->sock.state = SS_CONNECTED;
 	q->sock.file = file;
@@ -308,7 +313,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
 		goto out;
 
 	mask = 0;
-	poll_wait(file, &q->sock.wait, wait);
+	poll_wait(file, &q->wq.wait, wait);
 
 	if (!skb_queue_empty(&q->sk.sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 20a17938c62b..e525a6cf5587 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -109,7 +109,7 @@ struct tun_struct {
 
 	struct tap_filter       txflt;
 	struct socket		socket;
-
+	struct socket_wq	wq;
 #ifdef TUN_DEBUG
 	int debug;
 #endif
@@ -323,7 +323,7 @@ static void tun_net_uninit(struct net_device *dev)
 	/* Inform the methods they need to stop using the dev.
 	 */
 	if (tfile) {
-		wake_up_all(&tun->socket.wait);
+		wake_up_all(&tun->wq.wait);
 		if (atomic_dec_and_test(&tfile->count))
 			__tun_detach(tun);
 	}
@@ -398,7 +398,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Notify and wake up reader process */
 	if (tun->flags & TUN_FASYNC)
 		kill_fasync(&tun->fasync, SIGIO, POLL_IN);
-	wake_up_interruptible_poll(&tun->socket.wait, POLLIN |
+	wake_up_interruptible_poll(&tun->wq.wait, POLLIN |
 				   POLLRDNORM | POLLRDBAND);
 	return NETDEV_TX_OK;
 
@@ -498,7 +498,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 
 	DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
 
-	poll_wait(file, &tun->socket.wait, wait);
+	poll_wait(file, &tun->wq.wait, wait);
 
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
@@ -773,7 +773,7 @@ static ssize_t tun_do_read(struct tun_struct *tun,
 
 	DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
 
-	add_wait_queue(&tun->socket.wait, &wait);
+	add_wait_queue(&tun->wq.wait, &wait);
 	while (len) {
 		current->state = TASK_INTERRUPTIBLE;
 
@@ -804,7 +804,7 @@ static ssize_t tun_do_read(struct tun_struct *tun,
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&tun->socket.wait, &wait);
+	remove_wait_queue(&tun->wq.wait, &wait);
 
 	return ret;
 }
@@ -861,6 +861,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = {
 static void tun_sock_write_space(struct sock *sk)
 {
 	struct tun_struct *tun;
+	wait_queue_head_t *wqueue;
 
 	if (!sock_writeable(sk))
 		return;
@@ -868,8 +869,9 @@ static void tun_sock_write_space(struct sock *sk)
 	if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
 		return;
 
-	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-		wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT |
+	wqueue = sk_sleep(sk);
+	if (wqueue && waitqueue_active(wqueue))
+		wake_up_interruptible_sync_poll(wqueue, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
 	tun = tun_sk(sk)->tun;
@@ -1039,7 +1041,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		if (!sk)
 			goto err_free_dev;
 
-		init_waitqueue_head(&tun->socket.wait);
+		tun->socket.wq = &tun->wq;
+		init_waitqueue_head(&tun->wq.wait);
 		tun->socket.ops = &tun_socket_ops;
 		sock_init_data(&tun->socket, sk);
 		sk->sk_write_space = tun_sock_write_space;
diff --git a/include/linux/net.h b/include/linux/net.h
index 4157b5d42bd6..2b4deeeb8646 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -59,6 +59,7 @@ typedef enum {
 #include <linux/wait.h>
 #include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/kmemcheck.h>
+#include <linux/rcupdate.h>
 
 struct poll_table_struct;
 struct pipe_inode_info;
@@ -116,6 +117,12 @@ enum sock_shutdown_cmd {
 	SHUT_RDWR	= 2,
 };
 
+struct socket_wq {
+	wait_queue_head_t	wait;
+	struct fasync_struct	*fasync_list;
+	struct rcu_head		rcu;
+} ____cacheline_aligned_in_smp;
+
 /**
  *  struct socket - general BSD socket
  *  @state: socket state (%SS_CONNECTED, etc)
@@ -135,11 +142,8 @@ struct socket {
 	kmemcheck_bitfield_end(type);
 
 	unsigned long		flags;
-	/*
-	 * Please keep fasync_list & wait fields in the same cache line
-	 */
-	struct fasync_struct	*fasync_list;
-	wait_queue_head_t	wait;
+
+	struct socket_wq	*wq;
 
 	struct file		*file;
 	struct sock		*sk;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 1614d78c60ed..20725e213aee 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -30,7 +30,7 @@ struct unix_skb_parms {
 #endif
 };
 
-#define UNIXCB(skb) 	(*(struct unix_skb_parms*)&((skb)->cb))
+#define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
 #define UNIXCREDS(skb)	(&UNIXCB((skb)).creds)
 #define UNIXSID(skb)	(&UNIXCB((skb)).secid)
 
@@ -45,21 +45,23 @@ struct unix_skb_parms {
 struct unix_sock {
 	/* WARNING: sk has to be the first member */
 	struct sock		sk;
-        struct unix_address     *addr;
-        struct dentry		*dentry;
-        struct vfsmount		*mnt;
+	struct unix_address     *addr;
+	struct dentry		*dentry;
+	struct vfsmount		*mnt;
 	struct mutex		readlock;
-        struct sock		*peer;
-        struct sock		*other;
+	struct sock		*peer;
+	struct sock		*other;
 	struct list_head	link;
-        atomic_long_t           inflight;
-        spinlock_t		lock;
+	atomic_long_t		inflight;
+	spinlock_t		lock;
 	unsigned int		gc_candidate : 1;
 	unsigned int		gc_maybe_cycle : 1;
-        wait_queue_head_t       peer_wait;
+	struct socket_wq	peer_wq;
 };
 #define unix_sk(__sk) ((struct unix_sock *)__sk)
 
+#define peer_wait peer_wq.wait
+
 #ifdef CONFIG_SYSCTL
 extern int unix_sysctl_register(struct net *net);
 extern void unix_sysctl_unregister(struct net *net);
diff --git a/include/net/sock.h b/include/net/sock.h
index e1777db5b9ab..cc7f91ec972c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -159,7 +159,7 @@ struct sock_common {
   *	@sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
   *	@sk_lock:	synchronizer
   *	@sk_rcvbuf: size of receive buffer in bytes
-  *	@sk_sleep: sock wait queue
+  *	@sk_wq: sock wait queue and async head
   *	@sk_dst_cache: destination cache
   *	@sk_dst_lock: destination cache lock
   *	@sk_policy: flow policy
@@ -257,7 +257,7 @@ struct sock {
 		struct sk_buff *tail;
 		int len;
 	} sk_backlog;
-	wait_queue_head_t	*sk_sleep;
+	struct socket_wq	*sk_wq;
 	struct dst_entry	*sk_dst_cache;
 #ifdef CONFIG_XFRM
 	struct xfrm_policy	*sk_policy[2];
@@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 
 static inline wait_queue_head_t *sk_sleep(struct sock *sk)
 {
-	return sk->sk_sleep;
+	return &sk->sk_wq->wait;
 }
 /* Detach socket from process context.
  * Announce socket dead, detach it from wait queue and inode.
@@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk)
 	write_lock_bh(&sk->sk_callback_lock);
 	sock_set_flag(sk, SOCK_DEAD);
 	sk_set_socket(sk, NULL);
-	sk->sk_sleep  = NULL;
+	sk->sk_wq  = NULL;
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 
 static inline void sock_graft(struct sock *sk, struct socket *parent)
 {
 	write_lock_bh(&sk->sk_callback_lock);
-	sk->sk_sleep = &parent->wait;
+	rcu_assign_pointer(sk->sk_wq, parent->wq);
 	parent->sk = sk;
 	sk_set_socket(sk, parent);
 	security_sock_graft(sk, parent);
@@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk)
 }
 
 /**
- * sk_has_sleeper - check if there are any waiting processes
- * @sk: socket
+ * wq_has_sleeper - check if there are any waiting processes
+ * @sk: struct socket_wq
  *
- * Returns true if socket has waiting processes
+ * Returns true if socket_wq has waiting processes
  *
- * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory
+ * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory
  * barrier call. They were added due to the race found within the tcp code.
  *
  * Consider following tcp code paths:
@@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk)
  *   ...                 ...
  *   tp->rcv_nxt check   sock_def_readable
  *   ...                 {
- *   schedule               ...
- *                          if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
- *                              wake_up_interruptible(sk_sleep(sk))
+ *   schedule               rcu_read_lock();
+ *                          wq = rcu_dereference(sk->sk_wq);
+ *                          if (wq && waitqueue_active(&wq->wait))
+ *                              wake_up_interruptible(&wq->wait)
  *                          ...
  *                       }
  *
@@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk)
  * could then endup calling schedule and sleep forever if there are no more
  * data on the socket.
  *
- * The sk_has_sleeper is always called right after a call to read_lock, so we
- * can use smp_mb__after_lock barrier.
  */
-static inline int sk_has_sleeper(struct sock *sk)
+static inline bool wq_has_sleeper(struct socket_wq *wq)
 {
+
 	/*
 	 * We need to be sure we are in sync with the
 	 * add_wait_queue modifications to the wait queue.
 	 *
 	 * This memory barrier is paired in the sock_poll_wait.
 	 */
-	smp_mb__after_lock();
-	return sk_sleep(sk) && waitqueue_active(sk_sleep(sk));
+	smp_mb();
+	return wq && waitqueue_active(&wq->wait);
 }
 
 /**
@@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk)
  * @wait_address:   socket wait queue
  * @p:              poll_table
  *
- * See the comments in the sk_has_sleeper function.
+ * See the comments in the wq_has_sleeper function.
  */
 static inline void sock_poll_wait(struct file *filp,
 		wait_queue_head_t *wait_address, poll_table *p)
@@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp,
 		 * We need to be sure we are in sync with the
 		 * socket flags modification.
 		 *
-		 * This memory barrier is paired in the sk_has_sleeper.
+		 * This memory barrier is paired in the wq_has_sleeper.
 		*/
 		smp_mb();
 	}
diff --git a/net/atm/common.c b/net/atm/common.c
index e3e10e6f8628..b43feb1a3995 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -90,10 +90,13 @@ static void vcc_sock_destruct(struct sock *sk)
 
 static void vcc_def_wakeup(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up(sk_sleep(sk));
-	read_unlock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up(&wq->wait);
+	rcu_read_unlock();
 }
 
 static inline int vcc_writable(struct sock *sk)
@@ -106,16 +109,19 @@ static inline int vcc_writable(struct sock *sk)
 
 static void vcc_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
 
 	if (vcc_writable(sk)) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible(sk_sleep(sk));
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible(&wq->wait);
 
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static struct proto vcc_proto = {
diff --git a/net/core/sock.c b/net/core/sock.c
index 51041759517e..94c4affdda9b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1211,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		 */
 		sk_refcnt_debug_inc(newsk);
 		sk_set_socket(newsk, NULL);
-		newsk->sk_sleep	 = NULL;
+		newsk->sk_wq = NULL;
 
 		if (newsk->sk_prot->sockets_allocated)
 			percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1800,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
 
 static void sock_def_wakeup(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_all(sk_sleep(sk));
-	read_unlock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_all(&wq->wait);
+	rcu_read_unlock();
 }
 
 static void sock_def_error_report(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_poll(sk_sleep(sk), POLLERR);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_poll(&wq->wait, POLLERR);
 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_readable(struct sock *sk, int len)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN |
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
 						POLLRDNORM | POLLRDBAND);
 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
 
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT |
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
 		/* Should agree with poll, otherwise some programs break */
@@ -1842,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk)
 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_destruct(struct sock *sk)
@@ -1896,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	if (sock) {
 		sk->sk_type	=	sock->type;
-		sk->sk_sleep	=	&sock->wait;
+		sk->sk_wq	=	sock->wq;
 		sock->sk	=	sk;
 	} else
-		sk->sk_sleep	=	NULL;
+		sk->sk_wq	=	NULL;
 
 	spin_lock_init(&sk->sk_dst_lock);
 	rwlock_init(&sk->sk_callback_lock);
diff --git a/net/core/stream.c b/net/core/stream.c
index 7b3c3f30b107..cc196f42b8d8 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -28,15 +28,19 @@
 void sk_stream_write_space(struct sock *sk)
 {
 	struct socket *sock = sk->sk_socket;
+	struct socket_wq *wq;
 
 	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 
-		if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-			wake_up_interruptible_poll(sk_sleep(sk), POLLOUT |
+		rcu_read_lock();
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
-		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+		if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
 			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+		rcu_read_unlock();
 	}
 }
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 2d3dcb39851f..aadbdb58758b 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -195,15 +195,17 @@ EXPORT_SYMBOL_GPL(dccp_sync_mss);
 
 void dccp_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
 
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible(sk_sleep(sk));
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible(&wq->wait);
 	/* Should agree with poll, otherwise some programs break */
 	if (sock_writeable(sk))
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /**
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 9636b7d27b48..8be324fe08b9 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -305,11 +305,14 @@ static inline int iucv_below_msglim(struct sock *sk)
  */
 static void iucv_sock_wake_msglim(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_all(sk_sleep(sk));
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_all(&wq->wait);
 	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /* Timers */
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e2a95762abd3..af4d38bc3b22 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -664,12 +664,12 @@ static int pep_wait_connreq(struct sock *sk, int noblock)
 		if (signal_pending(tsk))
 			return sock_intr_errno(timeo);
 
-		prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait,
+		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
 						TASK_INTERRUPTIBLE);
 		release_sock(sk);
 		timeo = schedule_timeout(timeo);
 		lock_sock(sk);
-		finish_wait(&sk->sk_socket->wait, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 	}
 
 	return 0;
@@ -910,10 +910,10 @@ disabled:
 			goto out;
 		}
 
-		prepare_to_wait(&sk->sk_socket->wait, &wait,
+		prepare_to_wait(sk_sleep(sk), &wait,
 				TASK_INTERRUPTIBLE);
 		done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits));
-		finish_wait(&sk->sk_socket->wait, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 
 		if (sk->sk_state != TCP_ESTABLISHED)
 			goto disabled;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index c785bfd0744f..6e9848bf0370 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -265,7 +265,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
 	struct pep_sock *pn = pep_sk(sk);
 	unsigned int mask = 0;
 
-	poll_wait(file, &sock->wait, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	switch (sk->sk_state) {
 	case TCP_LISTEN:
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c432d76f415e..0b9bb2085ce4 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -62,13 +62,15 @@ static inline int rxrpc_writable(struct sock *sk)
 static void rxrpc_write_space(struct sock *sk)
 {
 	_enter("%p", sk);
-	read_lock(&sk->sk_callback_lock);
+	rcu_read_lock();
 	if (rxrpc_writable(sk)) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible(sk_sleep(sk));
+		struct socket_wq *wq = rcu_dereference(sk->sk_wq);
+
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible(&wq->wait);
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13d8229f3a9c..d54700af927a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6065,7 +6065,7 @@ static void __sctp_write_space(struct sctp_association *asoc)
 			 * here by modeling from the current TCP/UDP code.
 			 * We have not tested with it yet.
 			 */
-			if (sock->fasync_list &&
+			if (sock->wq->fasync_list &&
 			    !(sk->sk_shutdown & SEND_SHUTDOWN))
 				sock_wake_async(sock,
 						SOCK_WAKE_SPACE, POLL_OUT);
diff --git a/net/socket.c b/net/socket.c
index cb7c1f6c0d6e..dae8c6b84a09 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
-	init_waitqueue_head(&ei->socket.wait);
+	ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL);
+	if (!ei->socket.wq) {
+		kmem_cache_free(sock_inode_cachep, ei);
+		return NULL;
+	}
+	init_waitqueue_head(&ei->socket.wq->wait);
+	ei->socket.wq->fasync_list = NULL;
 
-	ei->socket.fasync_list = NULL;
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
 	ei->socket.ops = NULL;
@@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
+
+static void wq_free_rcu(struct rcu_head *head)
+{
+	struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
+
+	kfree(wq);
+}
+
 static void sock_destroy_inode(struct inode *inode)
 {
-	kmem_cache_free(sock_inode_cachep,
-			container_of(inode, struct socket_alloc, vfs_inode));
+	struct socket_alloc *ei;
+
+	ei = container_of(inode, struct socket_alloc, vfs_inode);
+	call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
+	kmem_cache_free(sock_inode_cachep, ei);
 }
 
 static void init_once(void *foo)
@@ -513,7 +529,7 @@ void sock_release(struct socket *sock)
 		module_put(owner);
 	}
 
-	if (sock->fasync_list)
+	if (sock->wq->fasync_list)
 		printk(KERN_ERR "sock_release: fasync list not empty!\n");
 
 	percpu_sub(sockets_in_use, 1);
@@ -1080,9 +1096,9 @@ static int sock_fasync(int fd, struct file *filp, int on)
 
 	lock_sock(sk);
 
-	fasync_helper(fd, filp, on, &sock->fasync_list);
+	fasync_helper(fd, filp, on, &sock->wq->fasync_list);
 
-	if (!sock->fasync_list)
+	if (!sock->wq->fasync_list)
 		sock_reset_flag(sk, SOCK_FASYNC);
 	else
 		sock_set_flag(sk, SOCK_FASYNC);
@@ -1091,12 +1107,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
 	return 0;
 }
 
-/* This function may be called only under socket lock or callback_lock */
+/* This function may be called only under socket lock or callback_lock or rcu_lock */
 
 int sock_wake_async(struct socket *sock, int how, int band)
 {
-	if (!sock || !sock->fasync_list)
+	struct socket_wq *wq;
+
+	if (!sock)
 		return -1;
+	rcu_read_lock();
+	wq = rcu_dereference(sock->wq);
+	if (!wq || !wq->fasync_list) {
+		rcu_read_unlock();
+		return -1;
+	}
 	switch (how) {
 	case SOCK_WAKE_WAITD:
 		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
@@ -1108,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band)
 		/* fall through */
 	case SOCK_WAKE_IO:
 call_kill:
-		kill_fasync(&sock->fasync_list, SIGIO, band);
+		kill_fasync(&wq->fasync_list, SIGIO, band);
 		break;
 	case SOCK_WAKE_URG:
-		kill_fasync(&sock->fasync_list, SIGURG, band);
+		kill_fasync(&wq->fasync_list, SIGURG, band);
 	}
+	rcu_read_unlock();
 	return 0;
 }
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 87c0360eaa25..fef2cc5e9d2b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -313,13 +313,16 @@ static inline int unix_writable(struct sock *sk)
 
 static void unix_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
 	if (unix_writable(sk)) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible_sync(sk_sleep(sk));
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_sync(&wq->wait);
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /* When dgram socket disconnects (or changes its peer), we clear its receive
@@ -406,9 +409,7 @@ static int unix_release_sock(struct sock *sk, int embrion)
 				skpair->sk_err = ECONNRESET;
 			unix_state_unlock(skpair);
 			skpair->sk_state_change(skpair);
-			read_lock(&skpair->sk_callback_lock);
 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
-			read_unlock(&skpair->sk_callback_lock);
 		}
 		sock_put(skpair); /* It may now die */
 		unix_peer(sk) = NULL;
@@ -1142,7 +1143,7 @@ restart:
 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
 	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
 	newu = unix_sk(newsk);
-	newsk->sk_sleep		= &newu->peer_wait;
+	newsk->sk_wq		= &newu->peer_wq;
 	otheru = unix_sk(other);
 
 	/* copy address information from listening to new sock*/
@@ -1931,12 +1932,10 @@ static int unix_shutdown(struct socket *sock, int mode)
 			other->sk_shutdown |= peer_mode;
 			unix_state_unlock(other);
 			other->sk_state_change(other);
-			read_lock(&other->sk_callback_lock);
 			if (peer_mode == SHUTDOWN_MASK)
 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
 			else if (peer_mode & RCV_SHUTDOWN)
 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
-			read_unlock(&other->sk_callback_lock);
 		}
 		if (other)
 			sock_put(other);
-- 
cgit v1.2.3-59-g8ed1b


From 47d29646a2c1c147d8a7598aeac2c87dd71ed638 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 2 May 2010 02:21:44 -0700
Subject: net: Inline skb_pull() in eth_type_trans().

In commit 6be8ac2f ("[NET]: uninline skb_pull, de-bloats a lot")
we uninlined skb_pull.

But in some critical paths it makes sense to inline this thing
and it helps performance significantly.

Create an skb_pull_inline() so that we can do this in a way that
serves also as annotation.

Based upon a patch by Eric Dumazet.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 net/core/skbuff.c      | 2 +-
 net/ethernet/eth.c     | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 82f5116a89e4..746a652b9f6f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1128,6 +1128,11 @@ static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
 	return skb->data += len;
 }
 
+static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int len)
+{
+	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+}
+
 extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
 
 static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4218ff49bf13..8b9c109166a7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1051,7 +1051,7 @@ EXPORT_SYMBOL(skb_push);
  */
 unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
 {
-	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+	return skb_pull_inline(skb, len);
 }
 EXPORT_SYMBOL(skb_pull);
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 0c0d272a9888..61ec0329316c 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -162,7 +162,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	skb_pull(skb, ETH_HLEN);
+	skb_pull_inline(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
 
 	if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
-- 
cgit v1.2.3-59-g8ed1b


From b5cad0dfd3c80501330215b9a9ae31bcffbd7306 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 2 May 2010 13:55:21 +0200
Subject: netfilter: ip_tables: fix compilation when debug is enabled

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ip_tables.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3e6af1036fbc..f92818f76671 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -591,7 +591,7 @@ check_entry(const struct ipt_entry *e, const char *name)
 	const struct ipt_entry_target *t;
 
 	if (!ip_checkentry(&e->ip)) {
-		duprintf("ip check failed %p %s.\n", e, name);
+		duprintf("ip check failed %p %s.\n", e, par->match->name);
 		return -EINVAL;
 	}
 
@@ -618,7 +618,7 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
 	ret = xt_check_match(par, m->u.match_size - sizeof(*m),
 	      ip->proto, ip->invflags & IPT_INV_PROTO);
 	if (ret < 0) {
-		duprintf("check failed for `%s'.\n", par.match->name);
+		duprintf("check failed for `%s'.\n", par->match->name);
 		return ret;
 	}
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From c29c949288d343be0d5c4f3091bc87ac047e1a09 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 2 May 2010 14:04:54 +0200
Subject: netfilter: xtables: fix incorrect return code

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/netfilter/xt_multiport.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 83b77ceb264f..b21f90432247 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -117,7 +117,7 @@ static int multiport_mt_check(const struct xt_mtchk_param *par)
 	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
-		     multiinfo->count);
+		     multiinfo->count) ? 0 : -EINVAL;
 }
 
 static int multiport_mt6_check(const struct xt_mtchk_param *par)
@@ -126,7 +126,7 @@ static int multiport_mt6_check(const struct xt_mtchk_param *par)
 	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
 
 	return check(ip->proto, ip->invflags, multiinfo->flags,
-		     multiinfo->count);
+		     multiinfo->count) ? 0 : -EINVAL;
 }
 
 static struct xt_match multiport_mt_reg[] __read_mostly = {
-- 
cgit v1.2.3-59-g8ed1b


From ef53d702c3614fb919e8a8291033e3dbccfd1aea Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 9 Jul 2009 19:14:18 +0200
Subject: netfilter: xtables: dissolve do_match function

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/ip_tables.c  | 22 +++++-----------------
 net/ipv6/netfilter/ip6_tables.c | 22 +++++-----------------
 2 files changed, 10 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f92818f76671..265cedf88660 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -173,21 +173,6 @@ ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-/* Performance critical - called for every packet */
-static inline bool
-do_match(const struct ipt_entry_match *m, const struct sk_buff *skb,
-	 struct xt_match_param *par)
-{
-	par->match     = m->u.kernel.match;
-	par->matchinfo = m->data;
-
-	/* Stop iteration if it doesn't match */
-	if (!m->u.kernel.match->match(skb, par))
-		return true;
-	else
-		return false;
-}
-
 /* Performance critical */
 static inline struct ipt_entry *
 get_entry(const void *base, unsigned int offset)
@@ -379,9 +364,12 @@ ipt_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		xt_ematch_foreach(ematch, e)
-			if (do_match(ematch, skb, &mtpar) != 0)
+		xt_ematch_foreach(ematch, e) {
+			mtpar.match     = ematch->u.kernel.match;
+			mtpar.matchinfo = ematch->data;
+			if (!mtpar.match->match(skb, &mtpar))
 				goto no_match;
+		}
 
 		ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7afa11773164..f8ac4a0b5899 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -205,21 +205,6 @@ ip6t_error(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-/* Performance critical - called for every packet */
-static inline bool
-do_match(const struct ip6t_entry_match *m, const struct sk_buff *skb,
-	 struct xt_match_param *par)
-{
-	par->match     = m->u.kernel.match;
-	par->matchinfo = m->data;
-
-	/* Stop iteration if it doesn't match */
-	if (!m->u.kernel.match->match(skb, par))
-		return true;
-	else
-		return false;
-}
-
 static inline struct ip6t_entry *
 get_entry(const void *base, unsigned int offset)
 {
@@ -402,9 +387,12 @@ ip6t_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		xt_ematch_foreach(ematch, e)
-			if (do_match(ematch, skb, &mtpar) != 0)
+		xt_ematch_foreach(ematch, e) {
+			mtpar.match     = ematch->u.kernel.match;
+			mtpar.matchinfo = ematch->data;
+			if (!mtpar.match->match(skb, &mtpar))
 				goto no_match;
+		}
 
 		ADD_COUNTER(e->counters,
 			    ntohs(ipv6_hdr(skb)->payload_len) +
-- 
cgit v1.2.3-59-g8ed1b


From dee42870a423ad485129f43cddfe7275479f11d8 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 2 May 2010 05:42:16 +0000
Subject: net: fix softnet_stat

Per cpu variable softnet_data.total was shared between IRQ and SoftIRQ context
without any protection. And enqueue_to_backlog should update the netdev_rx_stat
of the target CPU.

This patch renames softnet_data.total to softnet_data.processed: the number of
packets processed in uppper levels(IP stacks).

softnet_stat data is moved into softnet_data.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/linux/netdevice.h |   17 +++++++----------
 net/core/dev.c            |   26 ++++++++++++--------------
 net/sched/sch_generic.c   |    2 +-
 3 files changed, 20 insertions(+), 25 deletions(-)
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 17 +++++++----------
 net/core/dev.c            | 26 ++++++++++++--------------
 net/sched/sch_generic.c   |  2 +-
 3 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40d4c20d034b..c39938f8a8d8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -218,16 +218,6 @@ struct neighbour;
 struct neigh_parms;
 struct sk_buff;
 
-struct netif_rx_stats {
-	unsigned total;
-	unsigned dropped;
-	unsigned time_squeeze;
-	unsigned cpu_collision;
-	unsigned received_rps;
-};
-
-DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat);
-
 struct netdev_hw_addr {
 	struct list_head	list;
 	unsigned char		addr[MAX_ADDR_LEN];
@@ -1390,6 +1380,12 @@ struct softnet_data {
 	struct sk_buff		*completion_queue;
 	struct sk_buff_head	process_queue;
 
+	/* stats */
+	unsigned		processed;
+	unsigned		time_squeeze;
+	unsigned		cpu_collision;
+	unsigned		received_rps;
+
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
 
@@ -1399,6 +1395,7 @@ struct softnet_data {
 	unsigned int		cpu;
 	unsigned int		input_queue_head;
 #endif
+	unsigned		dropped;
 	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 100dcbd29739..36d53be4fca6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2205,8 +2205,6 @@ int netdev_max_backlog __read_mostly = 1000;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 
-DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
-
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
@@ -2366,7 +2364,7 @@ static void rps_trigger_softirq(void *data)
 	struct softnet_data *sd = data;
 
 	__napi_schedule(&sd->backlog);
-	__get_cpu_var(netdev_rx_stat).received_rps++;
+	sd->received_rps++;
 }
 
 #endif /* CONFIG_RPS */
@@ -2405,7 +2403,6 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 	sd = &per_cpu(softnet_data, cpu);
 
 	local_irq_save(flags);
-	__get_cpu_var(netdev_rx_stat).total++;
 
 	rps_lock(sd);
 	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
@@ -2429,9 +2426,9 @@ enqueue:
 		goto enqueue;
 	}
 
+	sd->dropped++;
 	rps_unlock(sd);
 
-	__get_cpu_var(netdev_rx_stat).dropped++;
 	local_irq_restore(flags);
 
 	kfree_skb(skb);
@@ -2806,7 +2803,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 			skb->dev = master;
 	}
 
-	__get_cpu_var(netdev_rx_stat).total++;
+	__get_cpu_var(softnet_data).processed++;
 
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
@@ -3490,7 +3487,7 @@ out:
 	return;
 
 softnet_break:
-	__get_cpu_var(netdev_rx_stat).time_squeeze++;
+	sd->time_squeeze++;
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 	goto out;
 }
@@ -3691,17 +3688,17 @@ static int dev_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct netif_rx_stats *softnet_get_online(loff_t *pos)
+static struct softnet_data *softnet_get_online(loff_t *pos)
 {
-	struct netif_rx_stats *rc = NULL;
+	struct softnet_data *sd = NULL;
 
 	while (*pos < nr_cpu_ids)
 		if (cpu_online(*pos)) {
-			rc = &per_cpu(netdev_rx_stat, *pos);
+			sd = &per_cpu(softnet_data, *pos);
 			break;
 		} else
 			++*pos;
-	return rc;
+	return sd;
 }
 
 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3721,12 +3718,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
 
 static int softnet_seq_show(struct seq_file *seq, void *v)
 {
-	struct netif_rx_stats *s = v;
+	struct softnet_data *sd = v;
 
 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
-		   s->total, s->dropped, s->time_squeeze, 0,
+		   sd->processed, sd->dropped, sd->time_squeeze, 0,
 		   0, 0, 0, 0, /* was fastroute */
-		   s->cpu_collision, s->received_rps);
+		   sd->cpu_collision, sd->received_rps);
 	return 0;
 }
 
@@ -5869,6 +5866,7 @@ static int __init net_dev_init(void)
 	for_each_possible_cpu(i) {
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
+		memset(sd, 0, sizeof(*sd));
 		skb_queue_head_init(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
 		sd->completion_queue = NULL;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index aeddabfb8e4e..a969b111bd76 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -94,7 +94,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		 * Another cpu is holding lock, requeue & delay xmits for
 		 * some time.
 		 */
-		__get_cpu_var(netdev_rx_stat).cpu_collision++;
+		__get_cpu_var(softnet_data).cpu_collision++;
 		ret = dev_requeue_skb(skb, q);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From f7c65594f7148b778f41d591a701e94bb22428e4 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Fri, 30 Apr 2010 13:48:36 +0200
Subject: mac80211: fix ieee80211_find_sta[_by_hw]

Both of these functions can currently return
a station pointer that, to the driver, is
invalid (in IBSS mode only) because adding
the station failed. Check for that, and also
make ieee80211_find_sta() properly use the
per interface station search.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/sta_info.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 3de7a2260d65..730197591ab5 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -855,8 +855,12 @@ struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw,
 	struct sta_info *sta, *nxt;
 
 	/* Just return a random station ... first in list ... */
-	for_each_sta_info(hw_to_local(hw), addr, sta, nxt)
+	for_each_sta_info(hw_to_local(hw), addr, sta, nxt) {
+		if (!sta->uploaded)
+			return NULL;
 		return &sta->sta;
+	}
+
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw);
@@ -864,14 +868,19 @@ EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw);
 struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif,
 					 const u8 *addr)
 {
-	struct ieee80211_sub_if_data *sdata;
+	struct sta_info *sta;
 
 	if (!vif)
 		return NULL;
 
-	sdata = vif_to_sdata(vif);
+	sta = sta_info_get_bss(vif_to_sdata(vif), addr);
+	if (!sta)
+		return NULL;
+
+	if (!sta->uploaded)
+		return NULL;
 
-	return ieee80211_find_sta_by_hw(&sdata->local->hw, addr);
+	return &sta->sta;
 }
 EXPORT_SYMBOL(ieee80211_find_sta);
 
-- 
cgit v1.2.3-59-g8ed1b


From a75b4363eaafa99d909da4f1192322a78b074c73 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sat, 1 May 2010 18:53:51 +0200
Subject: mac80211: allow controlling aggregation manually

This allows enabling TX and disabling both TX and
RX aggregation sessions manually in debugfs. It is
very useful for debugging session initiation and
teardown problems since with this you don't have
to force a lot of traffic to get aggregation and
thus have less data to analyse.

Also, to debug mac80211 code itself, make hwsim
"support" aggregation sessions. It will still just
transfer the frame, but go through the setup and
teardown handshakes.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c |  3 +-
 net/mac80211/debugfs_sta.c            | 65 ++++++++++++++++++++++++++++++++++-
 2 files changed, 66 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index d016933d7573..ebcf43904291 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1299,7 +1299,8 @@ static int __init init_mac80211_hwsim(void)
 		hw->flags = IEEE80211_HW_MFP_CAPABLE |
 			    IEEE80211_HW_SIGNAL_DBM |
 			    IEEE80211_HW_SUPPORTS_STATIC_SMPS |
-			    IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS;
+			    IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS |
+			    IEEE80211_HW_AMPDU_AGGREGATION;
 
 		/* ask mac80211 to reserve space for magic */
 		hw->vif_data_size = sizeof(struct hwsim_vif_priv);
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 6bc9b07c3eda..e763f1529ddb 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -39,6 +39,13 @@ static const struct file_operations sta_ ##name## _ops = {		\
 	.open = mac80211_open_file_generic,				\
 }
 
+#define STA_OPS_RW(name)						\
+static const struct file_operations sta_ ##name## _ops = {		\
+	.read = sta_##name##_read,					\
+	.write = sta_##name##_write,					\
+	.open = mac80211_open_file_generic,				\
+}
+
 #define STA_FILE(name, field, format)					\
 		STA_READ_##format(name, field)				\
 		STA_OPS(name)
@@ -156,7 +163,63 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 
 	return simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
 }
-STA_OPS(agg_status);
+
+static ssize_t sta_agg_status_write(struct file *file, const char __user *userbuf,
+				    size_t count, loff_t *ppos)
+{
+	char _buf[12], *buf = _buf;
+	struct sta_info *sta = file->private_data;
+	bool start, tx;
+	unsigned long tid;
+	int ret;
+
+	if (count > sizeof(_buf))
+		return -EINVAL;
+
+	if (copy_from_user(buf, userbuf, count))
+		return -EFAULT;
+
+	buf[sizeof(_buf) - 1] = '\0';
+
+	if (strncmp(buf, "tx ", 3) == 0) {
+		buf += 3;
+		tx = true;
+	} else if (strncmp(buf, "rx ", 3) == 0) {
+		buf += 3;
+		tx = false;
+	} else
+		return -EINVAL;
+
+	if (strncmp(buf, "start ", 6) == 0) {
+		buf += 6;
+		start = true;
+		if (!tx)
+			return -EINVAL;
+	} else if (strncmp(buf, "stop ", 5) == 0) {
+		buf += 5;
+		start = false;
+	} else
+		return -EINVAL;
+
+	tid = simple_strtoul(buf, NULL, 0);
+
+	if (tid >= STA_TID_NUM)
+		return -EINVAL;
+
+	if (tx) {
+		if (start)
+			ret = ieee80211_start_tx_ba_session(&sta->sta, tid);
+		else
+			ret = ieee80211_stop_tx_ba_session(&sta->sta, tid,
+							   WLAN_BACK_RECIPIENT);
+	} else {
+		__ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_RECIPIENT, 3);
+		ret = 0;
+	}
+
+	return ret ?: count;
+}
+STA_OPS_RW(agg_status);
 
 static ssize_t sta_ht_capa_read(struct file *file, char __user *userbuf,
 				size_t count, loff_t *ppos)
-- 
cgit v1.2.3-59-g8ed1b


From be4a4b6a5d2f76393f545a2545fbaa1b65577e13 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Mon, 3 May 2010 08:49:48 +0200
Subject: mac80211: improve IBSS scanning

When IBSS is fixed to a frequency, it can still
scan to try to find the right BSSID. This makes
sense if the BSSID isn't also fixed, but it need
not scan all channels -- just one is sufficient.
Make it do that by moving the scan setup code to
ieee80211_request_internal_scan() and include
a channel variable setting.

Note that this can be further improved to start
the IBSS right away if both frequency and BSSID
are fixed.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c        |  9 ++++++---
 net/mac80211/ieee80211_i.h |  3 ++-
 net/mac80211/main.c        | 17 +----------------
 net/mac80211/scan.c        | 28 +++++++++++++++++++++++++++-
 4 files changed, 36 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index c585fced8584..ba752362b2b2 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -488,7 +488,9 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata)
 	printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other "
 	       "IBSS networks with same SSID (merge)\n", sdata->name);
 
-	ieee80211_request_internal_scan(sdata, ifibss->ssid, ifibss->ssid_len);
+	ieee80211_request_internal_scan(sdata,
+			ifibss->ssid, ifibss->ssid_len,
+			ifibss->fixed_channel ? ifibss->channel : NULL);
 }
 
 static void ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata)
@@ -595,8 +597,9 @@ static void ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata)
 		printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to "
 		       "join\n", sdata->name);
 
-		ieee80211_request_internal_scan(sdata, ifibss->ssid,
-						ifibss->ssid_len);
+		ieee80211_request_internal_scan(sdata,
+				ifibss->ssid, ifibss->ssid_len,
+				ifibss->fixed_channel ? ifibss->channel : NULL);
 	} else {
 		int interval = IEEE80211_SCAN_INTERVAL;
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4e73660ebe99..c8077a3647c6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1020,7 +1020,8 @@ void ieee80211_ibss_restart(struct ieee80211_sub_if_data *sdata);
 /* scan/BSS handling */
 void ieee80211_scan_work(struct work_struct *work);
 int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
-				    const u8 *ssid, u8 ssid_len);
+				    const u8 *ssid, u8 ssid_len,
+				    struct ieee80211_channel *chan);
 int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
 			   struct cfg80211_scan_request *req);
 void ieee80211_scan_cancel(struct ieee80211_local *local);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ebcca0eaf1dc..353b6b42d9c5 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -439,7 +439,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	struct ieee80211_local *local = hw_to_local(hw);
 	int result;
 	enum ieee80211_band band;
-	int channels, i, j, max_bitrates;
+	int channels, max_bitrates;
 	bool supp_ht;
 	static const u32 cipher_suites[] = {
 		WLAN_CIPHER_SUITE_WEP40,
@@ -605,21 +605,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 
 	ieee80211_led_init(local);
 
-	/* alloc internal scan request */
-	i = 0;
-	local->int_scan_req->ssids = &local->scan_ssid;
-	local->int_scan_req->n_ssids = 1;
-	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
-		if (!hw->wiphy->bands[band])
-			continue;
-		for (j = 0; j < hw->wiphy->bands[band]->n_channels; j++) {
-			local->int_scan_req->channels[i] =
-				&hw->wiphy->bands[band]->channels[j];
-			i++;
-		}
-	}
-	local->int_scan_req->n_channels = i;
-
 	local->network_latency_notifier.notifier_call =
 		ieee80211_max_network_latency;
 	result = pm_qos_add_notifier(PM_QOS_NETWORK_LATENCY,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index a9d40584e383..414651217b49 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -728,10 +728,12 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata,
 }
 
 int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
-				    const u8 *ssid, u8 ssid_len)
+				    const u8 *ssid, u8 ssid_len,
+				    struct ieee80211_channel *chan)
 {
 	struct ieee80211_local *local = sdata->local;
 	int ret = -EBUSY;
+	enum nl80211_band band;
 
 	mutex_lock(&local->scan_mtx);
 
@@ -739,6 +741,30 @@ int ieee80211_request_internal_scan(struct ieee80211_sub_if_data *sdata,
 	if (local->scan_req)
 		goto unlock;
 
+	/* fill internal scan request */
+	if (!chan) {
+		int i, nchan = 0;
+
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+			if (!local->hw.wiphy->bands[band])
+				continue;
+			for (i = 0;
+			     i < local->hw.wiphy->bands[band]->n_channels;
+			     i++) {
+				local->int_scan_req->channels[nchan] =
+				    &local->hw.wiphy->bands[band]->channels[i];
+				nchan++;
+			}
+		}
+
+		local->int_scan_req->n_channels = nchan;
+	} else {
+		local->int_scan_req->channels[0] = chan;
+		local->int_scan_req->n_channels = 1;
+	}
+
+	local->int_scan_req->ssids = &local->scan_ssid;
+	local->int_scan_req->n_ssids = 1;
 	memcpy(local->int_scan_req->ssids[0].ssid, ssid, IEEE80211_MAX_SSID_LEN);
 	local->int_scan_req->ssids[0].ssid_len = ssid_len;
 
-- 
cgit v1.2.3-59-g8ed1b


From a2f3be17c07ad9bd45ab300f79642ecb39cfb553 Mon Sep 17 00:00:00 2001
From: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Date: Mon, 3 May 2010 03:22:18 +0000
Subject: unix/garbage: kill copy of the skb queue walker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Worse yet, it seems that its arguments were in reverse order. Also
remove one related helper which seems hardly worth keeping.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/unix/garbage.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 14c22c3768da..c8df6fda0b1f 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -153,15 +153,6 @@ void unix_notinflight(struct file *fp)
 	}
 }
 
-static inline struct sk_buff *sock_queue_head(struct sock *sk)
-{
-	return (struct sk_buff *)&sk->sk_receive_queue;
-}
-
-#define receive_queue_for_each_skb(sk, next, skb) \
-	for (skb = sock_queue_head(sk)->next, next = skb->next; \
-	     skb != sock_queue_head(sk); skb = next, next = skb->next)
-
 static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 			  struct sk_buff_head *hitlist)
 {
@@ -169,7 +160,7 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
 	struct sk_buff *next;
 
 	spin_lock(&x->sk_receive_queue.lock);
-	receive_queue_for_each_skb(x, next, skb) {
+	skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
 		/*
 		 *	Do we have file descriptors ?
 		 */
@@ -225,7 +216,7 @@ static void scan_children(struct sock *x, void (*func)(struct unix_sock *),
 		 * and perform a scan on them as well.
 		 */
 		spin_lock(&x->sk_receive_queue.lock);
-		receive_queue_for_each_skb(x, next, skb) {
+		skb_queue_walk_safe(&x->sk_receive_queue, skb, next) {
 			u = unix_sk(skb->sk);
 
 			/*
-- 
cgit v1.2.3-59-g8ed1b


From 4f70ecca9c57731b4acbe5043eb22e4416bd2368 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 3 May 2010 10:50:14 +0000
Subject: net: rcu fixes

Add hlist_for_each_entry_rcu_bh() and
hlist_for_each_entry_continue_rcu_bh() macros, and use them in
ipv6_get_ifaddr(), if6_get_first() and if6_get_next() to fix lockdeps
warnings.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist.h | 29 +++++++++++++++++++++++++++++
 net/ipv6/addrconf.c     | 16 ++++++++--------
 2 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 004908b104d5..4ec3b38ce9c5 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -428,6 +428,23 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(pos->next))
 
+/**
+ * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define hlist_for_each_entry_rcu_bh(tpos, pos, head, member)		 \
+	for (pos = rcu_dereference_bh((head)->first);			 \
+		pos && ({ prefetch(pos->next); 1; }) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
+		pos = rcu_dereference_bh(pos->next))
+
 /**
  * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point
  * @tpos:	the type * to use as a loop cursor.
@@ -440,6 +457,18 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
 	     pos = rcu_dereference(pos->next))
 
+/**
+ * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member)		\
+	for (pos = rcu_dereference_bh((pos)->next);			\
+	     pos && ({ prefetch(pos->next); 1; }) &&			\
+	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
+	     pos = rcu_dereference_bh(pos->next))
+
 
 #endif	/* __KERNEL__ */
 #endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 34d2d649e396..3984f52181f4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1346,7 +1346,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 	struct hlist_node *node;
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -2959,7 +2959,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
 		struct hlist_node *n;
-		hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket],
+		hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
 					 addr_lst)
 			if (net_eq(dev_net(ifa->idev->dev), net))
 				return ifa;
@@ -2974,12 +2974,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct hlist_node *n = &ifa->addr_lst;
 
-	hlist_for_each_entry_continue_rcu(ifa, n, addr_lst)
+	hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst)
 		if (net_eq(dev_net(ifa->idev->dev), net))
 			return ifa;
 
 	while (++state->bucket < IN6_ADDR_HSIZE) {
-		hlist_for_each_entry(ifa, n,
+		hlist_for_each_entry_rcu_bh(ifa, n,
 				     &inet6_addr_lst[state->bucket], addr_lst) {
 			if (net_eq(dev_net(ifa->idev->dev), net))
 				return ifa;
@@ -3000,7 +3000,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(rcu)
+	__acquires(rcu_bh)
 {
 	rcu_read_lock_bh();
 	return if6_get_idx(seq, *pos);
@@ -3016,7 +3016,7 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void if6_seq_stop(struct seq_file *seq, void *v)
-	__releases(rcu)
+	__releases(rcu_bh)
 {
 	rcu_read_unlock_bh();
 }
@@ -3093,7 +3093,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 	unsigned int hash = ipv6_addr_hash(addr);
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3127,7 +3127,7 @@ static void addrconf_verify(unsigned long foo)
 
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
-		hlist_for_each_entry_rcu(ifp, node,
+		hlist_for_each_entry_rcu_bh(ifp, node,
 					 &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 
-- 
cgit v1.2.3-59-g8ed1b


From 93bb64eac10aad3dae6178d7da94765f207d121f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 3 May 2010 23:18:14 -0700
Subject: net: skb_free_datagram_locked() fix

Commit 4b0b72f7dd617b ( net: speedup udp receive path )
introduced a bug in skb_free_datagram_locked().

We should not skb_orphan() skb if we dont have the guarantee we are the
last skb user, this might happen with MSG_PEEK concurrent users.

To keep socket locked for the smallest period of time, we split
consume_skb() logic, inlined in skb_free_datagram_locked()

Reported-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/datagram.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/datagram.c b/net/core/datagram.c
index 95b851f3d713..e0097531417a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -229,13 +229,18 @@ EXPORT_SYMBOL(skb_free_datagram);
 
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
 {
+	if (likely(atomic_read(&skb->users) == 1))
+		smp_rmb();
+	else if (likely(!atomic_dec_and_test(&skb->users)))
+		return;
+
 	lock_sock_bh(sk);
 	skb_orphan(skb);
 	sk_mem_reclaim_partial(sk);
 	unlock_sock_bh(sk);
 
-	/* skb is now orphaned, might be freed outside of locked section */
-	consume_skb(skb);
+	/* skb is now orphaned, can be freed outside of locked section */
+	__kfree_skb(skb);
 }
 EXPORT_SYMBOL(skb_free_datagram_locked);
 
-- 
cgit v1.2.3-59-g8ed1b


From ec7d2f2cf3a1b76202986519ec4f8ec75b2de232 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 5 May 2010 01:07:37 -0700
Subject: net: __alloc_skb() speedup

With following patch I can reach maximum rate of my pktgen+udpsink
simulator :
- 'old' machine : dual quad core E5450  @3.00GHz
- 64 UDP rx flows (only differ by destination port)
- RPS enabled, NIC interrupts serviced on cpu0
- rps dispatched on 7 other cores. (~130.000 IPI per second)
- SLAB allocator (faster than SLUB in this workload)
- tg3 NIC
- 1.080.000 pps without a single drop at NIC level.

Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch
first sk_buff cache line, the second to prefetch the shinfo part.

Also using one memset() to initialize all skb_shared_info fields instead
of one by one to reduce number of instructions, using long word moves.

All skb_shared_info fields before 'dataref' are cleared in
__alloc_skb().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  7 ++++++-
 net/core/skbuff.c      | 21 +++++----------------
 2 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 746a652b9f6f..88d55395a27c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -187,7 +187,6 @@ union skb_shared_tx {
  * the end of the header data, ie. at skb->end.
  */
 struct skb_shared_info {
-	atomic_t	dataref;
 	unsigned short	nr_frags;
 	unsigned short	gso_size;
 	/* Warning: this field is not always filled in (UFO)! */
@@ -197,6 +196,12 @@ struct skb_shared_info {
 	union skb_shared_tx tx_flags;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
+
+	/*
+	 * Warning : all fields before dataref are cleared in __alloc_skb()
+	 */
+	atomic_t	dataref;
+
 	skb_frag_t	frags[MAX_SKB_FRAGS];
 	/* Intermediate layers must ensure that destructor_arg
 	 * remains valid until skb destructor */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8b9c109166a7..a9b0e1f77806 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -181,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
 	if (!skb)
 		goto out;
+	prefetchw(skb);
 
 	size = SKB_DATA_ALIGN(size);
 	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
 			gfp_mask, node);
 	if (!data)
 		goto nodata;
+	prefetchw(data + size);
 
 	/*
 	 * Only clear those fields we need to clear, not those that we will
@@ -208,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
-	shinfo->nr_frags  = 0;
-	shinfo->gso_size = 0;
-	shinfo->gso_segs = 0;
-	shinfo->gso_type = 0;
-	shinfo->ip6_frag_id = 0;
-	shinfo->tx_flags.flags = 0;
-	skb_frag_list_init(skb);
-	memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
 
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
@@ -505,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
 		return 0;
 
 	skb_release_head_state(skb);
+
 	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
-	shinfo->nr_frags = 0;
-	shinfo->gso_size = 0;
-	shinfo->gso_segs = 0;
-	shinfo->gso_type = 0;
-	shinfo->ip6_frag_id = 0;
-	shinfo->tx_flags.flags = 0;
-	skb_frag_list_init(skb);
-	memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
 
 	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->data = skb->head + NET_SKB_PAD;
-- 
cgit v1.2.3-59-g8ed1b


From adfba3c7c026a6a5560d2a43fefc9b198cb74462 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 5 May 2010 15:33:55 +0200
Subject: mac80211: use fixed channel in ibss join when appropriate

"mac80211: improve IBSS scanning" was missing a hunk.
This adds that hunk as originally intended.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/ibss.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index ba752362b2b2..d5855ae387e8 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -907,6 +907,12 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 	sdata->u.ibss.channel = params->channel;
 	sdata->u.ibss.fixed_channel = params->channel_fixed;
 
+	/* fix ourselves to that channel now already */
+	if (params->channel_fixed) {
+		sdata->local->oper_channel = params->channel;
+		sdata->local->oper_channel_type = NL80211_CHAN_NO_HT;
+	}
+
 	if (params->ie) {
 		sdata->u.ibss.ie = kmemdup(params->ie, params->ie_len,
 					   GFP_KERNEL);
-- 
cgit v1.2.3-59-g8ed1b


From 0e34e93177fb1f642cab080e0bde664c06c7183a Mon Sep 17 00:00:00 2001
From: WANG Cong <amwang@redhat.com>
Date: Thu, 6 May 2010 00:47:21 -0700
Subject: netpoll: add generic support for bridge and bonding devices

This whole patchset is for adding netpoll support to bridge and bonding
devices. I already tested it for bridge, bonding, bridge over bonding,
and bonding over bridge. It looks fine now.

To make bridge and bonding support netpoll, we need to adjust
some netpoll generic code. This patch does the following things:

1) introduce two new priv_flags for struct net_device:
   IFF_IN_NETPOLL which identifies we are processing a netpoll;
   IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
   at run-time;

2) introduce one new method for netdev_ops:
   ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
     removed.

3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
   export netpoll_send_skb() and netpoll_poll_dev() which will be used later;

4) hide a pointer to struct netpoll in struct netpoll_info, ditto.

5) introduce ->real_dev for struct netpoll.

6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable
   netconsole before releasing a slave, to avoid deadlocks.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netconsole.c  | 15 +++++++++------
 include/linux/if.h        |  2 ++
 include/linux/netdevice.h |  1 +
 include/linux/netpoll.h   |  5 +++++
 include/linux/notifier.h  |  1 +
 net/core/netpoll.c        | 26 +++++++++++++++++++++-----
 6 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index a361dea35574..ca142c47b2e4 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -665,7 +665,8 @@ static int netconsole_netdev_event(struct notifier_block *this,
 	struct netconsole_target *nt;
 	struct net_device *dev = ptr;
 
-	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER))
+	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
+	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
@@ -677,19 +678,21 @@ static int netconsole_netdev_event(struct notifier_block *this,
 				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
 				break;
 			case NETDEV_UNREGISTER:
-				if (!nt->enabled)
-					break;
 				netpoll_cleanup(&nt->np);
+				/* Fall through */
+			case NETDEV_GOING_DOWN:
+			case NETDEV_BONDING_DESLAVE:
 				nt->enabled = 0;
-				printk(KERN_INFO "netconsole: network logging stopped"
-					", interface %s unregistered\n",
-					dev->name);
 				break;
 			}
 		}
 		netconsole_target_put(nt);
 	}
 	spin_unlock_irqrestore(&target_list_lock, flags);
+	if (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE)
+		printk(KERN_INFO "netconsole: network logging stopped, "
+			"interface %s %s\n",  dev->name,
+			event == NETDEV_UNREGISTER ? "unregistered" : "released slaves");
 
 done:
 	return NOTIFY_DONE;
diff --git a/include/linux/if.h b/include/linux/if.h
index 3a9f410a296b..be350e62a905 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -71,6 +71,8 @@
 					 * release skb->dst
 					 */
 #define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
+#define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
+#define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 98112fbddefd..69022d47d6f2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -724,6 +724,7 @@ struct net_device_ops {
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
+	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
 						  int queue, u8 *mac);
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index a765ea898549..017e604d05f8 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -14,6 +14,7 @@
 
 struct netpoll {
 	struct net_device *dev;
+	struct net_device *real_dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
 	void (*rx_hook)(struct netpoll *, int, char *, int);
@@ -36,8 +37,11 @@ struct netpoll_info {
 	struct sk_buff_head txq;
 
 	struct delayed_work tx_work;
+
+	struct netpoll *netpoll;
 };
 
+void netpoll_poll_dev(struct net_device *dev);
 void netpoll_poll(struct netpoll *np);
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
@@ -47,6 +51,7 @@ int netpoll_trap(void);
 void netpoll_set_trap(int trap);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 
 
 #ifdef CONFIG_NETPOLL
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 9c5d3fad01f3..7c3609622334 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -206,6 +206,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_POST_TYPE_CHANGE	0x000F
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
+#define NETDEV_BONDING_DESLAVE  0x0012
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a58f59b97597..94825b109551 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -179,9 +179,8 @@ static void service_arp_queue(struct netpoll_info *npi)
 	}
 }
 
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
 {
-	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops;
 
 	if (!dev || !netif_running(dev))
@@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
 	zap_completion_queue();
 }
 
+void netpoll_poll(struct netpoll *np)
+{
+	netpoll_poll_dev(np->dev);
+}
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -282,7 +286,7 @@ static int netpoll_owner_active(struct net_device *dev)
 	return 0;
 }
 
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	int status = NETDEV_TX_BUSY;
 	unsigned long tries;
@@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq)) {
+					dev->priv_flags |= IFF_IN_NETPOLL;
 					status = ops->ndo_start_xmit(skb, dev);
+					dev->priv_flags &= ~IFF_IN_NETPOLL;
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
 				}
@@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
 		atomic_inc(&npinfo->refcnt);
 	}
 
-	if (!ndev->netdev_ops->ndo_poll_controller) {
+	npinfo->netpoll = np;
+
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
 		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 		       np->name, np->dev_name);
 		err = -ENOTSUPP;
@@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
 			}
 
 			if (atomic_dec_and_test(&npinfo->refcnt)) {
+				const struct net_device_ops *ops;
 				skb_queue_purge(&npinfo->arp_tx);
 				skb_queue_purge(&npinfo->txq);
 				cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
 				/* clean after last, unfinished work */
 				__skb_queue_purge(&npinfo->txq);
 				kfree(npinfo);
-				np->dev->npinfo = NULL;
+				ops = np->dev->netdev_ops;
+				if (ops->ndo_netpoll_cleanup)
+					ops->ndo_netpoll_cleanup(np->dev);
+				else
+					np->dev->npinfo = NULL;
 			}
 		}
 
@@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
 		atomic_dec(&trapped);
 }
 
+EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_set_trap);
 EXPORT_SYMBOL(netpoll_trap);
 EXPORT_SYMBOL(netpoll_print_options);
@@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
 EXPORT_SYMBOL(netpoll_setup);
 EXPORT_SYMBOL(netpoll_cleanup);
 EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
 EXPORT_SYMBOL(netpoll_poll);
-- 
cgit v1.2.3-59-g8ed1b


From c06ee961d3c0e51009cbd0e123b61fbb97f37d0b Mon Sep 17 00:00:00 2001
From: WANG Cong <amwang@redhat.com>
Date: Thu, 6 May 2010 00:48:24 -0700
Subject: bridge: make bridge support netpoll

Based on the previous patch, make bridge support netpoll by:

1) implement the 2 methods to support netpoll for bridge;

2) modify netpoll during forwarding packets via bridge;

3) disable netpoll support of bridge when a netpoll-unabled device
   is added to bridge;

4) enable netpoll support when all underlying devices support netpoll.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: Matt Mackall <mpm@selenic.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_forward.c | 23 ++++++++++++++++++-
 net/bridge/br_if.c      | 25 +++++++++++++++++++++
 net/bridge/br_private.h |  2 ++
 4 files changed, 108 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 82599405dc15..074c59690fc5 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -13,8 +13,10 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/list.h>
 
 #include <asm/uaccess.h>
 #include "br_private.h"
@@ -188,6 +190,59 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
 	return 0;
 }
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+bool br_devices_support_netpoll(struct net_bridge *br)
+{
+	struct net_bridge_port *p;
+	bool ret = true;
+	int count = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&br->lock, flags);
+	list_for_each_entry(p, &br->port_list, list) {
+		count++;
+		if ((p->dev->priv_flags & IFF_DISABLE_NETPOLL) ||
+		    !p->dev->netdev_ops->ndo_poll_controller)
+			ret = false;
+	}
+	spin_unlock_irqrestore(&br->lock, flags);
+	return count != 0 && ret;
+}
+
+static void br_poll_controller(struct net_device *br_dev)
+{
+	struct netpoll *np = br_dev->npinfo->netpoll;
+
+	if (np->real_dev != br_dev)
+		netpoll_poll_dev(np->real_dev);
+}
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+	struct net_bridge *br = netdev_priv(br_dev);
+	struct net_bridge_port *p, *n;
+	const struct net_device_ops *ops;
+
+	br->dev->npinfo = NULL;
+	list_for_each_entry_safe(p, n, &br->port_list, list) {
+		if (p->dev) {
+			ops = p->dev->netdev_ops;
+			if (ops->ndo_netpoll_cleanup)
+				ops->ndo_netpoll_cleanup(p->dev);
+			else
+				p->dev->npinfo = NULL;
+		}
+	}
+}
+
+#else
+
+void br_netpoll_cleanup(struct net_device *br_dev)
+{
+}
+
+#endif
+
 static const struct ethtool_ops br_ethtool_ops = {
 	.get_drvinfo    = br_getinfo,
 	.get_link	= ethtool_op_get_link,
@@ -211,6 +266,10 @@ static const struct net_device_ops br_netdev_ops = {
 	.ndo_set_multicast_list	 = br_dev_set_multicast_list,
 	.ndo_change_mtu		 = br_change_mtu,
 	.ndo_do_ioctl		 = br_dev_ioctl,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_netpoll_cleanup	 = br_netpoll_cleanup,
+	.ndo_poll_controller	 = br_poll_controller,
+#endif
 };
 
 static void br_dev_free(struct net_device *dev)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 396f077216a3..92ad9feb199d 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
@@ -50,7 +51,13 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 		else {
 			skb_push(skb, ETH_HLEN);
 
-			dev_queue_xmit(skb);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+			if (unlikely(skb->dev->priv_flags & IFF_IN_NETPOLL)) {
+				netpoll_send_skb(skb->dev->npinfo->netpoll, skb);
+				skb->dev->priv_flags &= ~IFF_IN_NETPOLL;
+			} else
+#endif
+				dev_queue_xmit(skb);
 		}
 	}
 
@@ -66,9 +73,23 @@ int br_forward_finish(struct sk_buff *skb)
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	struct net_bridge *br = to->br;
+	if (unlikely(br->dev->priv_flags & IFF_IN_NETPOLL)) {
+		struct netpoll *np;
+		to->dev->npinfo = skb->dev->npinfo;
+		np = skb->dev->npinfo->netpoll;
+		np->real_dev = np->dev = to->dev;
+		to->dev->priv_flags |= IFF_IN_NETPOLL;
+	}
+#endif
 	skb->dev = to->dev;
 	NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
 			br_forward_finish);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (skb->dev->npinfo)
+		skb->dev->npinfo->netpoll->dev = br->dev;
+#endif
 }
 
 static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 521439333316..537bdd60d9b9 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -13,6 +13,7 @@
 
 #include <linux/kernel.h>
 #include <linux/netdevice.h>
+#include <linux/netpoll.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
 #include <linux/module.h>
@@ -153,6 +154,14 @@ static void del_nbp(struct net_bridge_port *p)
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (br_devices_support_netpoll(br))
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+	if (dev->netdev_ops->ndo_netpoll_cleanup)
+		dev->netdev_ops->ndo_netpoll_cleanup(dev);
+	else
+		dev->npinfo = NULL;
+#endif
 	call_rcu(&p->rcu, destroy_nbp_rcu);
 }
 
@@ -165,6 +174,8 @@ static void del_br(struct net_bridge *br, struct list_head *head)
 		del_nbp(p);
 	}
 
+	br_netpoll_cleanup(br->dev);
+
 	del_timer_sync(&br->gc_timer);
 
 	br_sysfs_delbr(br->dev);
@@ -444,6 +455,20 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	if (br_devices_support_netpoll(br)) {
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+		if (br->dev->npinfo)
+			dev->npinfo = br->dev->npinfo;
+	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
+		printk(KERN_INFO "New device %s does not support netpoll\n",
+			dev->name);
+		printk(KERN_INFO "Disabling netpoll for %s\n",
+			br->dev->name);
+	}
+#endif
+
 	return 0;
 err2:
 	br_fdb_delete_by_port(br, p, 1);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 018499ebe19d..3d2d3fe0a97e 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -253,6 +253,8 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
 extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
 			       struct net_device *dev);
+extern bool br_devices_support_netpoll(struct net_bridge *br);
+extern void br_netpoll_cleanup(struct net_device *br_dev);
 
 /* br_fdb.c */
 extern int br_fdb_init(void);
-- 
cgit v1.2.3-59-g8ed1b


From d6bc0149d8f2300bffa03ea6fea3ca39744277a6 Mon Sep 17 00:00:00 2001
From: Bjørn Mork <bjorn@mork.no>
Date: Thu, 6 May 2010 03:44:35 +0000
Subject: ipv6: udp: make short packet logging consistent with ipv4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding addresses and ports to the short packet log message,
like ipv4/udp.c does it, makes these messages a lot more useful:

[  822.182450] UDPv6: short packet: From [2001:db8:ffb4:3::1]:47839 23715/178 to [2001:db8:ffb4:3:5054:ff:feff:200]:1234

This requires us to drop logging in case pskb_may_pull() fails,
which also is consistent with ipv4/udp.c

Signed-off-by: Bjørn Mork <bjorn@mork.no>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 79359c8380bc..3d7a2c0b836a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -699,7 +699,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	u32 ulen = 0;
 
 	if (!pskb_may_pull(skb, sizeof(struct udphdr)))
-		goto short_packet;
+		goto discard;
 
 	saddr = &ipv6_hdr(skb)->saddr;
 	daddr = &ipv6_hdr(skb)->daddr;
@@ -781,9 +781,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	return 0;
 
 short_packet:
-	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
+	LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n",
 		       proto == IPPROTO_UDPLITE ? "-Lite" : "",
-		       ulen, skb->len);
+		       saddr,
+		       ntohs(uh->source),
+		       ulen,
+		       skb->len,
+		       daddr,
+		       ntohs(uh->dest));
 
 discard:
 	UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
-- 
cgit v1.2.3-59-g8ed1b


From eecfd7c4e36ff532d895885971d01d049bd3e014 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 6 May 2010 22:07:48 -0700
Subject: rps: Various optimizations

Introduce ____napi_schedule() helper for callers in irq disabled
contexts. rps_trigger_softirq() becomes a leaf function.

Use container_of() in process_backlog() instead of accessing per_cpu
address.

Use a custom inlined version of __napi_complete() in process_backlog()
to avoid one locked instruction :

 only current cpu owns and manipulates this napi,
 and NAPI_STATE_SCHED is the only possible flag set on backlog.
 we can use a plain write instead of clear_bit(),
 and we dont need an smp_mb() memory barrier, since RPS is on,
 backlog is protected by a spinlock.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 36d53be4fca6..32611c8f1219 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2205,6 +2205,14 @@ int netdev_max_backlog __read_mostly = 1000;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 
+/* Called with irq disabled */
+static inline void ____napi_schedule(struct softnet_data *sd,
+				     struct napi_struct *napi)
+{
+	list_add_tail(&napi->poll_list, &sd->poll_list);
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+}
+
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
@@ -2363,7 +2371,7 @@ static void rps_trigger_softirq(void *data)
 {
 	struct softnet_data *sd = data;
 
-	__napi_schedule(&sd->backlog);
+	____napi_schedule(sd, &sd->backlog);
 	sd->received_rps++;
 }
 
@@ -2421,7 +2429,7 @@ enqueue:
 		/* Schedule NAPI for backlog device */
 		if (napi_schedule_prep(&sd->backlog)) {
 			if (!rps_ipi_queued(sd))
-				__napi_schedule(&sd->backlog);
+				____napi_schedule(sd, &sd->backlog);
 		}
 		goto enqueue;
 	}
@@ -3280,7 +3288,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
-	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+	struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
 
 #ifdef CONFIG_RPS
 	/* Check if we have pending ipi, its better to send them now,
@@ -3313,7 +3321,16 @@ static int process_backlog(struct napi_struct *napi, int quota)
 						   &sd->process_queue);
 		}
 		if (qlen < quota - work) {
-			__napi_complete(napi);
+			/*
+			 * Inline a custom version of __napi_complete().
+			 * only current cpu owns and manipulates this napi,
+			 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
+			 * we can use a plain write instead of clear_bit(),
+			 * and we dont need an smp_mb() memory barrier.
+			 */
+			list_del(&napi->poll_list);
+			napi->state = 0;
+
 			quota = work + qlen;
 		}
 		rps_unlock(sd);
@@ -3334,8 +3351,7 @@ void __napi_schedule(struct napi_struct *n)
 	unsigned long flags;
 
 	local_irq_save(flags);
-	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	____napi_schedule(&__get_cpu_var(softnet_data), n);
 	local_irq_restore(flags);
 }
 EXPORT_SYMBOL(__napi_schedule);
-- 
cgit v1.2.3-59-g8ed1b


From ac8dd506e40ee2c7fcc61654a44c32555a0a8d6c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 5 May 2010 09:44:02 +0200
Subject: mac80211: fix BSS info reconfiguration

When reconfiguring an interface due to a previous
hardware restart, mac80211 will currently include
the new IBSS flag on non-IBSS interfaces which may
confuse drivers.

Instead of doing the ~0 trick, simply spell out
which things are going to be reconfigured.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  2 ++
 net/mac80211/util.c    | 25 ++++++++++++++++++++-----
 2 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 00502b1a9e38..0528615ac827 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -160,6 +160,8 @@ enum ieee80211_bss_change {
 	BSS_CHANGED_BEACON_ENABLED	= 1<<9,
 	BSS_CHANGED_CQM			= 1<<10,
 	BSS_CHANGED_IBSS		= 1<<11,
+
+	/* when adding here, make sure to change ieee80211_reconfig */
 };
 
 /**
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 2b75b4fb68f4..5b79d552780a 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1160,18 +1160,33 @@ int ieee80211_reconfig(struct ieee80211_local *local)
 
 	/* Finally also reconfigure all the BSS information */
 	list_for_each_entry(sdata, &local->interfaces, list) {
-		u32 changed = ~0;
+		u32 changed;
+
 		if (!ieee80211_sdata_running(sdata))
 			continue;
+
+		/* common change flags for all interface types */
+		changed = BSS_CHANGED_ERP_CTS_PROT |
+			  BSS_CHANGED_ERP_PREAMBLE |
+			  BSS_CHANGED_ERP_SLOT |
+			  BSS_CHANGED_HT |
+			  BSS_CHANGED_BASIC_RATES |
+			  BSS_CHANGED_BEACON_INT |
+			  BSS_CHANGED_BSSID |
+			  BSS_CHANGED_CQM;
+
 		switch (sdata->vif.type) {
 		case NL80211_IFTYPE_STATION:
-			/* disable beacon change bits */
-			changed &= ~(BSS_CHANGED_BEACON |
-				     BSS_CHANGED_BEACON_ENABLED);
-			/* fall through */
+			changed |= BSS_CHANGED_ASSOC;
+			ieee80211_bss_info_change_notify(sdata, changed);
+			break;
 		case NL80211_IFTYPE_ADHOC:
+			changed |= BSS_CHANGED_IBSS;
+			/* fall through */
 		case NL80211_IFTYPE_AP:
 		case NL80211_IFTYPE_MESH_POINT:
+			changed |= BSS_CHANGED_BEACON |
+				   BSS_CHANGED_BEACON_ENABLED;
 			ieee80211_bss_info_change_notify(sdata, changed);
 			break;
 		case NL80211_IFTYPE_WDS:
-- 
cgit v1.2.3-59-g8ed1b


From f444de05d20e27cdd960c13fcbcfca3099f03143 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 5 May 2010 15:25:02 +0200
Subject: cfg80211/mac80211: better channel handling

Currently (all tested with hwsim) you can do stupid
things like setting up an AP on a certain channel,
then adding another virtual interface and making
that associate on another channel -- this will make
the beaconing to move channel but obviously without
the necessary IEs data update.

In order to improve this situation, first make the
configuration APIs (cfg80211 and nl80211) aware of
multi-channel operation -- we'll eventually need
that in the future anyway. There's one userland API
change and one API addition. The API change is that
now SET_WIPHY must be called with virtual interface
index rather than only wiphy index in order to take
effect for that interface -- luckily all current
users (hostapd) do that. For monitor interfaces, the
old setting is preserved, but monitors are always
slaved to other devices anyway so no guarantees.

The second userland API change is the introduction
of a per virtual interface SET_CHANNEL command, that
hostapd should use going forward to make it easier
to understand what's going on (it can automatically
detect a kernel with this command).

Other than mac80211, no existing cfg80211 drivers
are affected by this change because they only allow
a single virtual interface.

mac80211, however, now needs to be aware that the
channel settings are per interface now, and needs
to disallow (for now) real multi-channel operation,
which is another important part of this patch.

One of the immediate benefits is that you can now
start hostapd to operate on a hardware that already
has a connection on another virtual interface, as
long as you specify the same channel.

Note that two things are left unhandled (this is an
improvement -- not a complete fix):

 * different HT/no-HT modes

   currently you could start an HT AP and then
   connect to a non-HT network on the same channel
   which would configure the hardware for no HT;
   that can be fixed fairly easily

 * CSA

   An AP we're connected to on a virtual interface
   might indicate switching channels, and in that
   case we would follow it, regardless of how many
   other interfaces are operating; this requires
   more effort to fix but is pretty rare after all

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/libertas/cfg.c |   1 +
 drivers/net/wireless/orinoco/cfg.c  |   1 +
 drivers/net/wireless/rndis_wlan.c   |   4 +-
 include/linux/nl80211.h             |  13 +++
 include/net/cfg80211.h              |  11 ++-
 net/mac80211/Makefile               |   3 +-
 net/mac80211/cfg.c                  |  41 +++++++++
 net/mac80211/chan.c                 |  57 ++++++++++++
 net/mac80211/ieee80211_i.h          |  11 +++
 net/wireless/chan.c                 |  56 ++++--------
 net/wireless/core.h                 |  12 +--
 net/wireless/ibss.c                 |   5 --
 net/wireless/nl80211.c              | 171 ++++++++++++++++++++++++++++--------
 net/wireless/sme.c                  |   5 --
 net/wireless/wext-compat.c          |  15 ++--
 net/wireless/wext-sme.c             |   2 +-
 16 files changed, 300 insertions(+), 108 deletions(-)
 create mode 100644 net/mac80211/chan.c

(limited to 'net')

diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index 82ebe1461a77..ea9d0b2ea0d7 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -78,6 +78,7 @@ static const u32 cipher_suites[] = {
 
 
 static int lbs_cfg_set_channel(struct wiphy *wiphy,
+	struct net_device *netdev,
 	struct ieee80211_channel *chan,
 	enum nl80211_channel_type channel_type)
 {
diff --git a/drivers/net/wireless/orinoco/cfg.c b/drivers/net/wireless/orinoco/cfg.c
index 81d228de9e5d..8c4169c227ae 100644
--- a/drivers/net/wireless/orinoco/cfg.c
+++ b/drivers/net/wireless/orinoco/cfg.c
@@ -159,6 +159,7 @@ static int orinoco_scan(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int orinoco_set_channel(struct wiphy *wiphy,
+			struct net_device *netdev,
 			struct ieee80211_channel *chan,
 			enum nl80211_channel_type channel_type)
 {
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index aceb95ef7274..99d4f0de77ca 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -534,7 +534,7 @@ static int rndis_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 
 static int rndis_leave_ibss(struct wiphy *wiphy, struct net_device *dev);
 
-static int rndis_set_channel(struct wiphy *wiphy,
+static int rndis_set_channel(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_channel *chan, enum nl80211_channel_type channel_type);
 
 static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev,
@@ -2290,7 +2290,7 @@ static int rndis_leave_ibss(struct wiphy *wiphy, struct net_device *dev)
 	return deauthenticate(usbdev);
 }
 
-static int rndis_set_channel(struct wiphy *wiphy,
+static int rndis_set_channel(struct wiphy *wiphy, struct net_device *netdev,
 	struct ieee80211_channel *chan, enum nl80211_channel_type channel_type)
 {
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f8750f9a65b8..b7c77f9712f4 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -52,6 +52,8 @@
  *	%NL80211_ATTR_WIPHY_CHANNEL_TYPE, %NL80211_ATTR_WIPHY_RETRY_SHORT,
  *	%NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
  *	and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD.
+ *	However, for setting the channel, see %NL80211_CMD_SET_CHANNEL
+ *	instead, the support here is for backward compatibility only.
  * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request
  *	or rename notification. Has attributes %NL80211_ATTR_WIPHY and
  *	%NL80211_ATTR_WIPHY_NAME.
@@ -329,6 +331,15 @@
  * @NL80211_CMD_NOTIFY_CQM: Connection quality monitor notification. This
  *	command is used as an event to indicate the that a trigger level was
  *	reached.
+ * @NL80211_CMD_SET_CHANNEL: Set the channel (using %NL80211_ATTR_WIPHY_FREQ
+ *	and %NL80211_ATTR_WIPHY_CHANNEL_TYPE) the given interface (identifed
+ *	by %NL80211_ATTR_IFINDEX) shall operate on.
+ *	In case multiple channels are supported by the device, the mechanism
+ *	with which it switches channels is implementation-defined.
+ *	When a monitor interface is given, it can only switch channel while
+ *	no other interfaces are operating to avoid disturbing the operation
+ *	of any other interfaces, and other interfaces will again take
+ *	precedence when they are used.
  *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
@@ -428,6 +439,8 @@ enum nl80211_commands {
 	NL80211_CMD_SET_CQM,
 	NL80211_CMD_NOTIFY_CQM,
 
+	NL80211_CMD_SET_CHANNEL,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7d10c0182f53..b44a2e5321a3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -966,7 +966,11 @@ struct cfg80211_pmksa {
  *
  * @set_txq_params: Set TX queue parameters
  *
- * @set_channel: Set channel
+ * @set_channel: Set channel for a given wireless interface. Some devices
+ *	may support multi-channel operation (by channel hopping) so cfg80211
+ *	doesn't verify much. Note, however, that the passed netdev may be
+ *	%NULL as well if the user requested changing the channel for the
+ *	device itself, or for a monitor interface.
  *
  * @scan: Request to do a scan. If returning zero, the scan request is given
  *	the driver, and will be valid until passed to cfg80211_scan_done().
@@ -1095,7 +1099,7 @@ struct cfg80211_ops {
 	int	(*set_txq_params)(struct wiphy *wiphy,
 				  struct ieee80211_txq_params *params);
 
-	int	(*set_channel)(struct wiphy *wiphy,
+	int	(*set_channel)(struct wiphy *wiphy, struct net_device *dev,
 			       struct ieee80211_channel *chan,
 			       enum nl80211_channel_type channel_type);
 
@@ -1461,6 +1465,8 @@ struct cfg80211_cached_keys;
  * @list: (private) Used to collect the interfaces
  * @netdev: (private) Used to reference back to the netdev
  * @current_bss: (private) Used by the internal configuration code
+ * @channel: (private) Used by the internal configuration code to track
+ *	user-set AP, monitor and WDS channels for wireless extensions
  * @bssid: (private) Used by the internal configuration code
  * @ssid: (private) Used by the internal configuration code
  * @ssid_len: (private) Used by the internal configuration code
@@ -1507,6 +1513,7 @@ struct wireless_dev {
 	struct cfg80211_internal_bss *authtry_bsses[MAX_AUTH_BSSES];
 	struct cfg80211_internal_bss *auth_bsses[MAX_AUTH_BSSES];
 	struct cfg80211_internal_bss *current_bss; /* associated / joined */
+	struct ieee80211_channel *channel;
 
 	bool ps;
 	int ps_timeout;
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 04420291e7ad..84b48ba8a77e 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -23,7 +23,8 @@ mac80211-y := \
 	key.o \
 	util.o \
 	wme.o \
-	event.o
+	event.o \
+	chan.o
 
 mac80211-$(CONFIG_MAC80211_LEDS) += led.o
 mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b575a5066219..414b7dd7d7fd 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1161,11 +1161,24 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 }
 
 static int ieee80211_set_channel(struct wiphy *wiphy,
+				 struct net_device *netdev,
 				 struct ieee80211_channel *chan,
 				 enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 
+	switch (ieee80211_get_channel_mode(local, NULL)) {
+	case CHAN_MODE_HOPPING:
+		return -EBUSY;
+	case CHAN_MODE_FIXED:
+		if (local->oper_channel == chan &&
+		    local->oper_channel_type == channel_type)
+			return 0;
+		return -EBUSY;
+	case CHAN_MODE_UNDEFINED:
+		break;
+	}
+
 	local->oper_channel = chan;
 	local->oper_channel_type = channel_type;
 
@@ -1213,6 +1226,20 @@ static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 			   struct cfg80211_assoc_request *req)
 {
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	switch (ieee80211_get_channel_mode(local, sdata)) {
+	case CHAN_MODE_HOPPING:
+		return -EBUSY;
+	case CHAN_MODE_FIXED:
+		if (local->oper_channel == req->bss->channel)
+			break;
+		return -EBUSY;
+	case CHAN_MODE_UNDEFINED:
+		break;
+	}
+
 	return ieee80211_mgd_assoc(IEEE80211_DEV_TO_SUB_IF(dev), req);
 }
 
@@ -1235,8 +1262,22 @@ static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 			       struct cfg80211_ibss_params *params)
 {
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
+	switch (ieee80211_get_channel_mode(local, sdata)) {
+	case CHAN_MODE_HOPPING:
+		return -EBUSY;
+	case CHAN_MODE_FIXED:
+		if (!params->channel_fixed)
+			return -EBUSY;
+		if (local->oper_channel == params->channel)
+			break;
+		return -EBUSY;
+	case CHAN_MODE_UNDEFINED:
+		break;
+	}
+
 	return ieee80211_ibss_join(sdata, params);
 }
 
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
new file mode 100644
index 000000000000..08f3832661a5
--- /dev/null
+++ b/net/mac80211/chan.c
@@ -0,0 +1,57 @@
+/*
+ * mac80211 - channel management
+ */
+
+#include "ieee80211_i.h"
+
+enum ieee80211_chan_mode
+__ieee80211_get_channel_mode(struct ieee80211_local *local,
+			     struct ieee80211_sub_if_data *ignore)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	WARN_ON(!mutex_is_locked(&local->iflist_mtx));
+
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (sdata == ignore)
+			continue;
+
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+
+		if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
+			continue;
+
+		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+		    !sdata->u.mgd.associated)
+			continue;
+
+		if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
+			if (!sdata->u.ibss.ssid_len)
+				continue;
+			if (!sdata->u.ibss.fixed_channel)
+				return CHAN_MODE_HOPPING;
+		}
+
+		if (sdata->vif.type == NL80211_IFTYPE_AP &&
+		    !sdata->u.ap.beacon)
+			continue;
+
+		return CHAN_MODE_FIXED;
+	}
+
+	return CHAN_MODE_UNDEFINED;
+}
+
+enum ieee80211_chan_mode
+ieee80211_get_channel_mode(struct ieee80211_local *local,
+			   struct ieee80211_sub_if_data *ignore)
+{
+	enum ieee80211_chan_mode mode;
+
+	mutex_lock(&local->iflist_mtx);
+	mode = __ieee80211_get_channel_mode(local, ignore);
+	mutex_unlock(&local->iflist_mtx);
+
+	return mode;
+}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c8077a3647c6..359edff31471 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1229,6 +1229,17 @@ int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
 int ieee80211_wk_cancel_remain_on_channel(
 	struct ieee80211_sub_if_data *sdata, u64 cookie);
 
+/* channel management */
+enum ieee80211_chan_mode {
+	CHAN_MODE_UNDEFINED,
+	CHAN_MODE_HOPPING,
+	CHAN_MODE_FIXED,
+};
+
+enum ieee80211_chan_mode
+ieee80211_get_channel_mode(struct ieee80211_local *local,
+			   struct ieee80211_sub_if_data *ignore);
+
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
 #else
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index bf1737fc9a7e..d92d088026bf 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -9,38 +9,6 @@
 #include <net/cfg80211.h>
 #include "core.h"
 
-struct ieee80211_channel *
-rdev_fixed_channel(struct cfg80211_registered_device *rdev,
-		   struct wireless_dev *for_wdev)
-{
-	struct wireless_dev *wdev;
-	struct ieee80211_channel *result = NULL;
-
-	WARN_ON(!mutex_is_locked(&rdev->devlist_mtx));
-
-	list_for_each_entry(wdev, &rdev->netdev_list, list) {
-		if (wdev == for_wdev)
-			continue;
-
-		/*
-		 * Lock manually to tell lockdep about allowed
-		 * nesting here if for_wdev->mtx is held already.
-		 * This is ok as it's all under the rdev devlist
-		 * mutex and as such can only be done once at any
-		 * given time.
-		 */
-		mutex_lock_nested(&wdev->mtx, SINGLE_DEPTH_NESTING);
-		if (wdev->current_bss)
-			result = wdev->current_bss->pub.channel;
-		wdev_unlock(wdev);
-
-		if (result)
-			break;
-	}
-
-	return result;
-}
-
 struct ieee80211_channel *
 rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		  int freq, enum nl80211_channel_type channel_type)
@@ -75,15 +43,22 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 	return chan;
 }
 
-int rdev_set_freq(struct cfg80211_registered_device *rdev,
-		  struct wireless_dev *for_wdev,
-		  int freq, enum nl80211_channel_type channel_type)
+int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
+		      struct wireless_dev *wdev, int freq,
+		      enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_channel *chan;
 	int result;
 
-	if (rdev_fixed_channel(rdev, for_wdev))
-		return -EBUSY;
+	if (wdev->iftype == NL80211_IFTYPE_MONITOR)
+		wdev = NULL;
+
+	if (wdev) {
+		ASSERT_WDEV_LOCK(wdev);
+
+		if (!netif_running(wdev->netdev))
+			return -ENETDOWN;
+	}
 
 	if (!rdev->ops->set_channel)
 		return -EOPNOTSUPP;
@@ -92,11 +67,14 @@ int rdev_set_freq(struct cfg80211_registered_device *rdev,
 	if (!chan)
 		return -EINVAL;
 
-	result = rdev->ops->set_channel(&rdev->wiphy, chan, channel_type);
+	result = rdev->ops->set_channel(&rdev->wiphy,
+					wdev ? wdev->netdev : NULL,
+					chan, channel_type);
 	if (result)
 		return result;
 
-	rdev->channel = chan;
+	if (wdev)
+		wdev->channel = chan;
 
 	return 0;
 }
diff --git a/net/wireless/core.h b/net/wireless/core.h
index b2234b436ead..ae930acf75e9 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -70,9 +70,6 @@ struct cfg80211_registered_device {
 	struct work_struct conn_work;
 	struct work_struct event_work;
 
-	/* current channel */
-	struct ieee80211_channel *channel;
-
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
@@ -387,15 +384,12 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			  u32 *flags, struct vif_params *params);
 void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
 
-struct ieee80211_channel *
-rdev_fixed_channel(struct cfg80211_registered_device *rdev,
-		   struct wireless_dev *for_wdev);
 struct ieee80211_channel *
 rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		  int freq, enum nl80211_channel_type channel_type);
-int rdev_set_freq(struct cfg80211_registered_device *rdev,
-		  struct wireless_dev *for_wdev,
-		  int freq, enum nl80211_channel_type channel_type);
+int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
+		      struct wireless_dev *wdev, int freq,
+		      enum nl80211_channel_type channel_type);
 
 u16 cfg80211_calculate_bitrate(struct rate_info *rate);
 
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 6ef5a491fb4b..9825317e653a 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -80,15 +80,10 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 			 struct cfg80211_cached_keys *connkeys)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct ieee80211_channel *chan;
 	int err;
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	chan = rdev_fixed_channel(rdev, wdev);
-	if (chan && chan != params->channel)
-		return -EBUSY;
-
 	if (wdev->ssid_len)
 		return -EALREADY;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c27bef8e0c11..ec1b4a896c6e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -588,6 +588,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
 	}
+	CMD(set_channel, SET_CHANNEL);
 
 #undef CMD
 
@@ -688,10 +689,90 @@ static int parse_txq_params(struct nlattr *tb[],
 	return 0;
 }
 
+static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
+{
+	/*
+	 * You can only set the channel explicitly for AP, mesh
+	 * and WDS type interfaces; all others have their channel
+	 * managed via their respective "establish a connection"
+	 * command (connect, join, ...)
+	 *
+	 * Monitors are special as they are normally slaved to
+	 * whatever else is going on, so they behave as though
+	 * you tried setting the wiphy channel itself.
+	 */
+	return !wdev ||
+		wdev->iftype == NL80211_IFTYPE_AP ||
+		wdev->iftype == NL80211_IFTYPE_WDS ||
+		wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
+		wdev->iftype == NL80211_IFTYPE_MONITOR;
+}
+
+static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
+				 struct wireless_dev *wdev,
+				 struct genl_info *info)
+{
+	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
+	u32 freq;
+	int result;
+
+	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ])
+		return -EINVAL;
+
+	if (!nl80211_can_set_dev_channel(wdev))
+		return -EOPNOTSUPP;
+
+	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+		channel_type = nla_get_u32(info->attrs[
+				   NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    channel_type != NL80211_CHAN_HT20 &&
+		    channel_type != NL80211_CHAN_HT40PLUS &&
+		    channel_type != NL80211_CHAN_HT40MINUS)
+			return -EINVAL;
+	}
+
+	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+
+	mutex_lock(&rdev->devlist_mtx);
+	if (wdev) {
+		wdev_lock(wdev);
+		result = cfg80211_set_freq(rdev, wdev, freq, channel_type);
+		wdev_unlock(wdev);
+	} else {
+		result = cfg80211_set_freq(rdev, NULL, freq, channel_type);
+	}
+	mutex_unlock(&rdev->devlist_mtx);
+
+	return result;
+}
+
+static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *netdev;
+	int result;
+
+	rtnl_lock();
+
+	result = get_rdev_dev_by_info_ifindex(info, &rdev, &netdev);
+	if (result)
+		goto unlock;
+
+	result = __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info);
+
+ unlock:
+	rtnl_unlock();
+
+	return result;
+}
+
 static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
-	int result = 0, rem_txq_params = 0;
+	struct net_device *netdev = NULL;
+	struct wireless_dev *wdev;
+	int result, rem_txq_params = 0;
 	struct nlattr *nl_txq_params;
 	u32 changed;
 	u8 retry_short = 0, retry_long = 0;
@@ -700,16 +781,50 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
+	/*
+	 * Try to find the wiphy and netdev. Normally this
+	 * function shouldn't need the netdev, but this is
+	 * done for backward compatibility -- previously
+	 * setting the channel was done per wiphy, but now
+	 * it is per netdev. Previous userland like hostapd
+	 * also passed a netdev to set_wiphy, so that it is
+	 * possible to let that go to the right netdev!
+	 */
 	mutex_lock(&cfg80211_mutex);
 
-	rdev = __cfg80211_rdev_from_info(info);
-	if (IS_ERR(rdev)) {
-		mutex_unlock(&cfg80211_mutex);
-		result = PTR_ERR(rdev);
-		goto unlock;
+	if (info->attrs[NL80211_ATTR_IFINDEX]) {
+		int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]);
+
+		netdev = dev_get_by_index(genl_info_net(info), ifindex);
+		if (netdev && netdev->ieee80211_ptr) {
+			rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy);
+			mutex_lock(&rdev->mtx);
+		} else
+			netdev = NULL;
 	}
 
-	mutex_lock(&rdev->mtx);
+	if (!netdev) {
+		rdev = __cfg80211_rdev_from_info(info);
+		if (IS_ERR(rdev)) {
+			mutex_unlock(&cfg80211_mutex);
+			result = PTR_ERR(rdev);
+			goto unlock;
+		}
+		wdev = NULL;
+		netdev = NULL;
+		result = 0;
+
+		mutex_lock(&rdev->mtx);
+	} else if (netif_running(netdev) &&
+		   nl80211_can_set_dev_channel(netdev->ieee80211_ptr))
+		wdev = netdev->ieee80211_ptr;
+	else
+		wdev = NULL;
+
+	/*
+	 * end workaround code, by now the rdev is available
+	 * and locked, and wdev may or may not be NULL.
+	 */
 
 	if (info->attrs[NL80211_ATTR_WIPHY_NAME])
 		result = cfg80211_dev_rename(
@@ -748,26 +863,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
-		enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
-		u32 freq;
-
-		result = -EINVAL;
-
-		if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
-			channel_type = nla_get_u32(info->attrs[
-					   NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
-			if (channel_type != NL80211_CHAN_NO_HT &&
-			    channel_type != NL80211_CHAN_HT20 &&
-			    channel_type != NL80211_CHAN_HT40PLUS &&
-			    channel_type != NL80211_CHAN_HT40MINUS)
-				goto bad_res;
-		}
-
-		freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
-
-		mutex_lock(&rdev->devlist_mtx);
-		result = rdev_set_freq(rdev, NULL, freq, channel_type);
-		mutex_unlock(&rdev->devlist_mtx);
+		result = __nl80211_set_channel(rdev, wdev, info);
 		if (result)
 			goto bad_res;
 	}
@@ -864,6 +960,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 
  bad_res:
 	mutex_unlock(&rdev->mtx);
+	if (netdev)
+		dev_put(netdev);
  unlock:
 	rtnl_unlock();
 	return result;
@@ -3561,9 +3659,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
 	struct net_device *dev;
-	struct wireless_dev *wdev;
 	struct cfg80211_crypto_settings crypto;
-	struct ieee80211_channel *chan, *fixedchan;
+	struct ieee80211_channel *chan;
 	const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
 	int err, ssid_len, ie_len = 0;
 	bool use_mfp = false;
@@ -3606,16 +3703,6 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	mutex_lock(&rdev->devlist_mtx);
-	wdev = dev->ieee80211_ptr;
-	fixedchan = rdev_fixed_channel(rdev, wdev);
-	if (fixedchan && chan != fixedchan) {
-		err = -EBUSY;
-		mutex_unlock(&rdev->devlist_mtx);
-		goto out;
-	}
-	mutex_unlock(&rdev->devlist_mtx);
-
 	ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
 	ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
 
@@ -5185,6 +5272,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_CHANNEL,
+		.doit = nl80211_set_channel,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index dcd7685242f7..14cf8163912a 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -740,7 +740,6 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		       const u8 *prev_bssid)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct ieee80211_channel *chan;
 	struct cfg80211_bss *bss = NULL;
 	int err;
 
@@ -749,10 +748,6 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 	if (wdev->sme_state != CFG80211_SME_IDLE)
 		return -EALREADY;
 
-	chan = rdev_fixed_channel(rdev, wdev);
-	if (chan && chan != connect->channel)
-		return -EBUSY;
-
 	if (WARN_ON(wdev->connect_keys)) {
 		kfree(wdev->connect_keys);
 		wdev->connect_keys = NULL;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 9ab51838849e..75848c6cb22a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -781,16 +781,22 @@ int cfg80211_wext_siwfreq(struct net_device *dev,
 		return cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra);
 	case NL80211_IFTYPE_ADHOC:
 		return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra);
-	default:
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_WDS:
+	case NL80211_IFTYPE_MESH_POINT:
 		freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
 		if (freq < 0)
 			return freq;
 		if (freq == 0)
 			return -EINVAL;
+		wdev_lock(wdev);
 		mutex_lock(&rdev->devlist_mtx);
-		err = rdev_set_freq(rdev, NULL, freq, NL80211_CHAN_NO_HT);
+		err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
 		mutex_unlock(&rdev->devlist_mtx);
+		wdev_unlock(wdev);
 		return err;
+	default:
+		return -EOPNOTSUPP;
 	}
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwfreq);
@@ -800,7 +806,6 @@ int cfg80211_wext_giwfreq(struct net_device *dev,
 			  struct iw_freq *freq, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
 
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_STATION:
@@ -808,9 +813,9 @@ int cfg80211_wext_giwfreq(struct net_device *dev,
 	case NL80211_IFTYPE_ADHOC:
 		return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra);
 	default:
-		if (!rdev->channel)
+		if (!wdev->channel)
 			return -EINVAL;
-		freq->m = rdev->channel->center_freq;
+		freq->m = wdev->channel->center_freq;
 		freq->e = 6;
 		return 0;
 	}
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 5615a8802536..8e5ab4f4e9c4 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -107,7 +107,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
 
 	/* SSID is not set, we just want to switch channel */
 	if (chan && !wdev->wext.connect.ssid_len) {
-		err = rdev_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
+		err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
 		goto out;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 0aaffa9b9699894aab3266195a529baf9f96ac29 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 5 May 2010 15:28:27 +0200
Subject: mac80211: improve HT channel handling

Currently, when one interface switches HT mode,
all others will follow along. This is clearly
undesirable, since the new one might switch to
no-HT while another one is operating in HT.

Address this issue by keeping track of the HT
mode per interface, and allowing only changes
that are compatible, i.e. switching into HT40+
is not possible when another interface is in
HT40-, in that case the second one needs to
fall back to HT20.

Also, to allow drivers to know what's going on,
store the per-interface HT mode (channel type)
in the virtual interface's bss_conf.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 19 +++++-----
 include/net/mac80211.h                |  4 ++
 net/mac80211/cfg.c                    | 23 +++++++++---
 net/mac80211/chan.c                   | 70 +++++++++++++++++++++++++++++++++++
 net/mac80211/ibss.c                   |  5 ++-
 net/mac80211/ieee80211_i.h            |  5 ++-
 net/mac80211/main.c                   |  2 +-
 net/mac80211/mlme.c                   | 44 +++++++++++-----------
 8 files changed, 132 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 62a739f6c37b..bdce71a4ba20 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -651,17 +651,17 @@ static void mac80211_hwsim_beacon(unsigned long arg)
 	add_timer(&data->beacon_timer);
 }
 
+static const char *hwsim_chantypes[] = {
+	[NL80211_CHAN_NO_HT] = "noht",
+	[NL80211_CHAN_HT20] = "ht20",
+	[NL80211_CHAN_HT40MINUS] = "ht40-",
+	[NL80211_CHAN_HT40PLUS] = "ht40+",
+};
 
 static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct mac80211_hwsim_data *data = hw->priv;
 	struct ieee80211_conf *conf = &hw->conf;
-	static const char *chantypes[4] = {
-		[NL80211_CHAN_NO_HT] = "noht",
-		[NL80211_CHAN_HT20] = "ht20",
-		[NL80211_CHAN_HT40MINUS] = "ht40-",
-		[NL80211_CHAN_HT40PLUS] = "ht40+",
-	};
 	static const char *smps_modes[IEEE80211_SMPS_NUM_MODES] = {
 		[IEEE80211_SMPS_AUTOMATIC] = "auto",
 		[IEEE80211_SMPS_OFF] = "off",
@@ -672,7 +672,7 @@ static int mac80211_hwsim_config(struct ieee80211_hw *hw, u32 changed)
 	printk(KERN_DEBUG "%s:%s (freq=%d/%s idle=%d ps=%d smps=%s)\n",
 	       wiphy_name(hw->wiphy), __func__,
 	       conf->channel->center_freq,
-	       chantypes[conf->channel_type],
+	       hwsim_chantypes[conf->channel_type],
 	       !!(conf->flags & IEEE80211_CONF_IDLE),
 	       !!(conf->flags & IEEE80211_CONF_PS),
 	       smps_modes[conf->smps_mode]);
@@ -760,9 +760,10 @@ static void mac80211_hwsim_bss_info_changed(struct ieee80211_hw *hw,
 	}
 
 	if (changed & BSS_CHANGED_HT) {
-		printk(KERN_DEBUG "  %s: HT: op_mode=0x%x\n",
+		printk(KERN_DEBUG "  %s: HT: op_mode=0x%x, chantype=%s\n",
 		       wiphy_name(hw->wiphy),
-		       info->ht_operation_mode);
+		       info->ht_operation_mode,
+		       hwsim_chantypes[info->channel_type]);
 	}
 
 	if (changed & BSS_CHANGED_BASIC_RATES) {
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0528615ac827..9448a5b1bb15 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -191,6 +191,9 @@ enum ieee80211_bss_change {
  *	the current band.
  * @bssid: The BSSID for this BSS
  * @enable_beacon: whether beaconing should be enabled or not
+ * @channel_type: Channel type for this BSS -- the hardware might be
+ *	configured for HT40+ while this BSS only uses no-HT, for
+ *	example.
  * @ht_operation_mode: HT operation mode (like in &struct ieee80211_ht_info).
  *	This field is only valid when the channel type is one of the HT types.
  * @cqm_rssi_thold: Connection quality monitor RSSI threshold, a zero value
@@ -215,6 +218,7 @@ struct ieee80211_bss_conf {
 	u16 ht_operation_mode;
 	s32 cqm_rssi_thold;
 	u32 cqm_rssi_hyst;
+	enum nl80211_channel_type channel_type;
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 414b7dd7d7fd..ab166c6d9399 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1166,23 +1166,34 @@ static int ieee80211_set_channel(struct wiphy *wiphy,
 				 enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata = NULL;
+
+	if (netdev)
+		sdata = IEEE80211_DEV_TO_SUB_IF(netdev);
 
 	switch (ieee80211_get_channel_mode(local, NULL)) {
 	case CHAN_MODE_HOPPING:
 		return -EBUSY;
 	case CHAN_MODE_FIXED:
-		if (local->oper_channel == chan &&
-		    local->oper_channel_type == channel_type)
+		if (local->oper_channel != chan)
+			return -EBUSY;
+		if (!sdata && local->_oper_channel_type == channel_type)
 			return 0;
-		return -EBUSY;
+		break;
 	case CHAN_MODE_UNDEFINED:
 		break;
 	}
 
 	local->oper_channel = chan;
-	local->oper_channel_type = channel_type;
 
-	return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+	if (!ieee80211_set_channel_type(local, sdata, channel_type))
+		return -EBUSY;
+
+	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
+	if (sdata && sdata->vif.type != NL80211_IFTYPE_MONITOR)
+		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_HT);
+
+	return 0;
 }
 
 #ifdef CONFIG_PM
@@ -1406,7 +1417,7 @@ int __ieee80211_request_smps(struct ieee80211_sub_if_data *sdata,
 	 * association, there's no need to send an action frame.
 	 */
 	if (!sdata->u.mgd.associated ||
-	    sdata->local->oper_channel_type == NL80211_CHAN_NO_HT) {
+	    sdata->vif.bss_conf.channel_type == NL80211_CHAN_NO_HT) {
 		mutex_lock(&sdata->local->iflist_mtx);
 		ieee80211_recalc_smps(sdata->local, sdata);
 		mutex_unlock(&sdata->local->iflist_mtx);
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 08f3832661a5..5d218c530a4e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -2,6 +2,7 @@
  * mac80211 - channel management
  */
 
+#include <linux/nl80211.h>
 #include "ieee80211_i.h"
 
 enum ieee80211_chan_mode
@@ -55,3 +56,72 @@ ieee80211_get_channel_mode(struct ieee80211_local *local,
 
 	return mode;
 }
+
+bool ieee80211_set_channel_type(struct ieee80211_local *local,
+				struct ieee80211_sub_if_data *sdata,
+				enum nl80211_channel_type chantype)
+{
+	struct ieee80211_sub_if_data *tmp;
+	enum nl80211_channel_type superchan = NL80211_CHAN_NO_HT;
+	bool result;
+
+	mutex_lock(&local->iflist_mtx);
+
+	list_for_each_entry(tmp, &local->interfaces, list) {
+		if (tmp == sdata)
+			continue;
+
+		if (!ieee80211_sdata_running(tmp))
+			continue;
+
+		switch (tmp->vif.bss_conf.channel_type) {
+		case NL80211_CHAN_NO_HT:
+		case NL80211_CHAN_HT20:
+			superchan = tmp->vif.bss_conf.channel_type;
+			break;
+		case NL80211_CHAN_HT40PLUS:
+			WARN_ON(superchan == NL80211_CHAN_HT40MINUS);
+			superchan = NL80211_CHAN_HT40PLUS;
+			break;
+		case NL80211_CHAN_HT40MINUS:
+			WARN_ON(superchan == NL80211_CHAN_HT40PLUS);
+			superchan = NL80211_CHAN_HT40MINUS;
+			break;
+		}
+	}
+
+	switch (superchan) {
+	case NL80211_CHAN_NO_HT:
+	case NL80211_CHAN_HT20:
+		/*
+		 * allow any change that doesn't go to no-HT
+		 * (if it already is no-HT no change is needed)
+		 */
+		if (chantype == NL80211_CHAN_NO_HT)
+			break;
+		superchan = chantype;
+		break;
+	case NL80211_CHAN_HT40PLUS:
+	case NL80211_CHAN_HT40MINUS:
+		/* allow smaller bandwidth and same */
+		if (chantype == NL80211_CHAN_NO_HT)
+			break;
+		if (chantype == NL80211_CHAN_HT20)
+			break;
+		if (superchan == chantype)
+			break;
+		result = false;
+		goto out;
+	}
+
+	local->_oper_channel_type = superchan;
+
+	if (sdata)
+		sdata->vif.bss_conf.channel_type = chantype;
+
+	result = true;
+ out:
+	mutex_unlock(&local->iflist_mtx);
+
+	return result;
+}
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index d5855ae387e8..36745f494f63 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -102,7 +102,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata,
 	sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0;
 
 	local->oper_channel = chan;
-	local->oper_channel_type = NL80211_CHAN_NO_HT;
+	WARN_ON(!ieee80211_set_channel_type(local, sdata, NL80211_CHAN_NO_HT));
 	ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL);
 
 	sband = local->hw.wiphy->bands[chan->band];
@@ -910,7 +910,8 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 	/* fix ourselves to that channel now already */
 	if (params->channel_fixed) {
 		sdata->local->oper_channel = params->channel;
-		sdata->local->oper_channel_type = NL80211_CHAN_NO_HT;
+		WARN_ON(!ieee80211_set_channel_type(sdata->local, sdata,
+						    NL80211_CHAN_NO_HT));
 	}
 
 	if (params->ie) {
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 359edff31471..69e7f4131f46 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -768,7 +768,7 @@ struct ieee80211_local {
 	enum mac80211_scan_state next_scan_state;
 	struct delayed_work scan_work;
 	struct ieee80211_sub_if_data *scan_sdata;
-	enum nl80211_channel_type oper_channel_type;
+	enum nl80211_channel_type _oper_channel_type;
 	struct ieee80211_channel *oper_channel, *csa_channel;
 
 	/* Temporary remain-on-channel for off-channel operations */
@@ -1239,6 +1239,9 @@ enum ieee80211_chan_mode {
 enum ieee80211_chan_mode
 ieee80211_get_channel_mode(struct ieee80211_local *local,
 			   struct ieee80211_sub_if_data *ignore);
+bool ieee80211_set_channel_type(struct ieee80211_local *local,
+				struct ieee80211_sub_if_data *sdata,
+				enum nl80211_channel_type chantype);
 
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 353b6b42d9c5..d763d76e809f 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -111,7 +111,7 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed)
 		channel_type = local->tmp_channel_type;
 	} else {
 		chan = local->oper_channel;
-		channel_type = local->oper_channel_type;
+		channel_type = local->_oper_channel_type;
 	}
 
 	if (chan != local->hw.conf.channel ||
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 17cb8ae912bc..6e149b49d4f0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -136,11 +136,14 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 	struct sta_info *sta;
 	u32 changed = 0;
 	u16 ht_opmode;
-	bool enable_ht = true, ht_changed;
+	bool enable_ht = true;
+	enum nl80211_channel_type prev_chantype;
 	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
 
 	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
 
+	prev_chantype = sdata->vif.bss_conf.channel_type;
+
 	/* HT is not supported */
 	if (!sband->ht_cap.ht_supported)
 		enable_ht = false;
@@ -171,38 +174,37 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	ht_changed = conf_is_ht(&local->hw.conf) != enable_ht ||
-		     channel_type != local->hw.conf.channel_type;
-
 	if (local->tmp_channel)
 		local->tmp_channel_type = channel_type;
-	local->oper_channel_type = channel_type;
 
-	if (ht_changed) {
-                /* channel_type change automatically detected */
-		ieee80211_hw_config(local, 0);
+	if (!ieee80211_set_channel_type(local, sdata, channel_type)) {
+		/* can only fail due to HT40+/- mismatch */
+		channel_type = NL80211_CHAN_HT20;
+		WARN_ON(!ieee80211_set_channel_type(local, sdata, channel_type));
+	}
 
+	/* channel_type change automatically detected */
+	ieee80211_hw_config(local, 0);
+
+	if (prev_chantype != channel_type) {
 		rcu_read_lock();
 		sta = sta_info_get(sdata, bssid);
 		if (sta)
 			rate_control_rate_update(local, sband, sta,
 						 IEEE80211_RC_HT_CHANGED,
-						 local->oper_channel_type);
+						 channel_type);
 		rcu_read_unlock();
-        }
-
-	/* disable HT */
-	if (!enable_ht)
-		return 0;
+	}
 
 	ht_opmode = le16_to_cpu(hti->operation_mode);
 
 	/* if bss configuration changed store the new one */
-	if (!sdata->ht_opmode_valid ||
-	    sdata->vif.bss_conf.ht_operation_mode != ht_opmode) {
+	if (sdata->ht_opmode_valid != enable_ht ||
+	    sdata->vif.bss_conf.ht_operation_mode != ht_opmode ||
+	    prev_chantype != channel_type) {
 		changed |= BSS_CHANGED_HT;
 		sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
-		sdata->ht_opmode_valid = true;
+		sdata->ht_opmode_valid = enable_ht;
 	}
 
 	return changed;
@@ -865,7 +867,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 	ieee80211_set_wmm_default(sdata);
 
 	/* channel(_type) changes are handled by ieee80211_hw_config */
-	local->oper_channel_type = NL80211_CHAN_NO_HT;
+	WARN_ON(!ieee80211_set_channel_type(local, sdata, NL80211_CHAN_NO_HT));
 
 	/* on the next assoc, re-program HT parameters */
 	sdata->ht_opmode_valid = false;
@@ -882,8 +884,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_hw_config(local, config_changed);
 
-	/* And the BSSID changed -- not very interesting here */
-	changed |= BSS_CHANGED_BSSID;
+	/* The BSSID (not really interesting) and HT changed */
+	changed |= BSS_CHANGED_BSSID | BSS_CHANGED_HT;
 	ieee80211_bss_info_change_notify(sdata, changed);
 
 	if (remove_sta)
@@ -2265,7 +2267,7 @@ int ieee80211_mgd_action(struct ieee80211_sub_if_data *sdata,
 	if ((chan != local->tmp_channel ||
 	     channel_type != local->tmp_channel_type) &&
 	    (chan != local->oper_channel ||
-	     channel_type != local->oper_channel_type))
+	     channel_type != local->_oper_channel_type))
 		return -EBUSY;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom + len);
-- 
cgit v1.2.3-59-g8ed1b


From a472e71b3c71619087d2485282955c3b62ebfde9 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 6 May 2010 14:45:17 -0400
Subject: mac80211: set IEEE80211_TX_CTL_FIRST_FRAGMENT for beacons

Also simplify the flags assignment into a single statement at the
end of ieee80211_beacon_get_tim.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/tx.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index f3841f43249e..680bcb7093db 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2251,8 +2251,9 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 
 	info->control.vif = vif;
 
-	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
-	info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
+	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT |
+			IEEE80211_TX_CTL_ASSIGN_SEQ |
+			IEEE80211_TX_CTL_FIRST_FRAGMENT;
  out:
 	rcu_read_unlock();
 	return skb;
-- 
cgit v1.2.3-59-g8ed1b


From 3ee943728fff536edaf8f59faa58aaa1aa7366e3 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Sat, 8 May 2010 01:57:52 -0700
Subject: ipv4: remove ip_rt_secret timer (v4)

A while back there was a discussion regarding the rt_secret_interval timer.
Given that we've had the ability to do emergency route cache rebuilds for awhile
now, based on a statistical analysis of the various hash chain lengths in the
cache, the use of the flush timer is somewhat redundant.  This patch removes the
rt_secret_interval sysctl, allowing us to rely solely on the statistical
analysis mechanism to determine the need for route cache flushes.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h |   1 -
 kernel/sysctl_binary.c   |   1 -
 net/ipv4/route.c         | 108 ++++-------------------------------------------
 3 files changed, 8 insertions(+), 102 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index ae07feec6446..d68c3f121774 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -55,7 +55,6 @@ struct netns_ipv4 {
 	int sysctl_rt_cache_rebuild_count;
 	int current_rt_cache_rebuild_count;
 
-	struct timer_list rt_secret_timer;
 	atomic_t rt_genid;
 
 #ifdef CONFIG_IP_MROUTE
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 59030570f5ca..937d31dc8566 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -224,7 +224,6 @@ static const struct bin_table bin_net_ipv4_route_table[] = {
 	{ CTL_INT,	NET_IPV4_ROUTE_MTU_EXPIRES,		"mtu_expires" },
 	{ CTL_INT,	NET_IPV4_ROUTE_MIN_PMTU,		"min_pmtu" },
 	{ CTL_INT,	NET_IPV4_ROUTE_MIN_ADVMSS,		"min_adv_mss" },
-	{ CTL_INT,	NET_IPV4_ROUTE_SECRET_INTERVAL,		"secret_interval" },
 	{}
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a947428ef0ae..dea3f9264250 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -129,7 +129,6 @@ static int ip_rt_gc_elasticity __read_mostly	= 8;
 static int ip_rt_mtu_expires __read_mostly	= 10 * 60 * HZ;
 static int ip_rt_min_pmtu __read_mostly		= 512 + 20 + 20;
 static int ip_rt_min_advmss __read_mostly	= 256;
-static int ip_rt_secret_interval __read_mostly	= 10 * 60 * HZ;
 static int rt_chain_length_max __read_mostly	= 20;
 
 static struct delayed_work expires_work;
@@ -918,32 +917,11 @@ void rt_cache_flush_batch(void)
 	rt_do_flush(!in_softirq());
 }
 
-/*
- * We change rt_genid and let gc do the cleanup
- */
-static void rt_secret_rebuild(unsigned long __net)
-{
-	struct net *net = (struct net *)__net;
-	rt_cache_invalidate(net);
-	mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
-static void rt_secret_rebuild_oneshot(struct net *net)
-{
-	del_timer_sync(&net->ipv4.rt_secret_timer);
-	rt_cache_invalidate(net);
-	if (ip_rt_secret_interval)
-		mod_timer(&net->ipv4.rt_secret_timer, jiffies + ip_rt_secret_interval);
-}
-
 static void rt_emergency_hash_rebuild(struct net *net)
 {
-	if (net_ratelimit()) {
+	if (net_ratelimit())
 		printk(KERN_WARNING "Route hash chain too long!\n");
-		printk(KERN_WARNING "Adjust your secret_interval!\n");
-	}
-
-	rt_secret_rebuild_oneshot(net);
+	rt_cache_invalidate(net);
 }
 
 /*
@@ -3101,48 +3079,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
 	return -EINVAL;
 }
 
-static void rt_secret_reschedule(int old)
-{
-	struct net *net;
-	int new = ip_rt_secret_interval;
-	int diff = new - old;
-
-	if (!diff)
-		return;
-
-	rtnl_lock();
-	for_each_net(net) {
-		int deleted = del_timer_sync(&net->ipv4.rt_secret_timer);
-		long time;
-
-		if (!new)
-			continue;
-
-		if (deleted) {
-			time = net->ipv4.rt_secret_timer.expires - jiffies;
-
-			if (time <= 0 || (time += diff) <= 0)
-				time = 0;
-		} else
-			time = new;
-
-		mod_timer(&net->ipv4.rt_secret_timer, jiffies + time);
-	}
-	rtnl_unlock();
-}
-
-static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write,
-					  void __user *buffer, size_t *lenp,
-					  loff_t *ppos)
-{
-	int old = ip_rt_secret_interval;
-	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
-
-	rt_secret_reschedule(old);
-
-	return ret;
-}
-
 static ctl_table ipv4_route_table[] = {
 	{
 		.procname	= "gc_thresh",
@@ -3251,13 +3187,6 @@ static ctl_table ipv4_route_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
-	{
-		.procname	= "secret_interval",
-		.data		= &ip_rt_secret_interval,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= ipv4_sysctl_rt_secret_interval,
-	},
 	{ }
 };
 
@@ -3336,34 +3265,15 @@ static __net_initdata struct pernet_operations sysctl_route_ops = {
 };
 #endif
 
-
-static __net_init int rt_secret_timer_init(struct net *net)
+static __net_init int rt_genid_init(struct net *net)
 {
-	atomic_set(&net->ipv4.rt_genid,
-			(int) ((num_physpages ^ (num_physpages>>8)) ^
-			(jiffies ^ (jiffies >> 7))));
-
-	net->ipv4.rt_secret_timer.function = rt_secret_rebuild;
-	net->ipv4.rt_secret_timer.data = (unsigned long)net;
-	init_timer_deferrable(&net->ipv4.rt_secret_timer);
-
-	if (ip_rt_secret_interval) {
-		net->ipv4.rt_secret_timer.expires =
-			jiffies + net_random() % ip_rt_secret_interval +
-			ip_rt_secret_interval;
-		add_timer(&net->ipv4.rt_secret_timer);
-	}
+	get_random_bytes(&net->ipv4.rt_genid,
+			 sizeof(net->ipv4.rt_genid));
 	return 0;
 }
 
-static __net_exit void rt_secret_timer_exit(struct net *net)
-{
-	del_timer_sync(&net->ipv4.rt_secret_timer);
-}
-
-static __net_initdata struct pernet_operations rt_secret_timer_ops = {
-	.init = rt_secret_timer_init,
-	.exit = rt_secret_timer_exit,
+static __net_initdata struct pernet_operations rt_genid_ops = {
+	.init = rt_genid_init,
 };
 
 
@@ -3424,9 +3334,6 @@ int __init ip_rt_init(void)
 	schedule_delayed_work(&expires_work,
 		net_random() % ip_rt_gc_interval + ip_rt_gc_interval);
 
-	if (register_pernet_subsys(&rt_secret_timer_ops))
-		printk(KERN_ERR "Unable to setup rt_secret_timer\n");
-
 	if (ip_rt_proc_init())
 		printk(KERN_ERR "Unable to create route proc files\n");
 #ifdef CONFIG_XFRM
@@ -3438,6 +3345,7 @@ int __init ip_rt_init(void)
 #ifdef CONFIG_SYSCTL
 	register_pernet_subsys(&sysctl_route_ops);
 #endif
+	register_pernet_subsys(&rt_genid_ops);
 	return rc;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 0d861d8b8edd139a9b291cb262d08dec8dc3922d Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:35 -0300
Subject: Bluetooth: Make hci_send_sco() void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It also removes an unneeded check for the MTU. The check is done before
on sco_send_frame()

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 +-
 net/bluetooth/hci_core.c         | 9 +--------
 net/bluetooth/sco.c              | 3 +--
 3 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index ce3c99e5fa25..9830a88f487e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -637,7 +637,7 @@ int hci_unregister_notifier(struct notifier_block *nb);
 
 int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param);
 int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags);
-int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb);
+void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb);
 
 void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode);
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 4ad23192c7a5..1c9aef97f519 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1308,18 +1308,13 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 EXPORT_SYMBOL(hci_send_acl);
 
 /* Send SCO data */
-int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
+void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
 {
 	struct hci_dev *hdev = conn->hdev;
 	struct hci_sco_hdr hdr;
 
 	BT_DBG("%s len %d", hdev->name, skb->len);
 
-	if (skb->len > hdev->sco_mtu) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
-
 	hdr.handle = cpu_to_le16(conn->handle);
 	hdr.dlen   = skb->len;
 
@@ -1332,8 +1327,6 @@ int hci_send_sco(struct hci_conn *conn, struct sk_buff *skb)
 
 	skb_queue_tail(&conn->data_q, skb);
 	tasklet_schedule(&hdev->tx_task);
-
-	return 0;
 }
 EXPORT_SYMBOL(hci_send_sco);
 
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index b406d3eff53a..541b26efb724 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -249,8 +249,7 @@ static inline int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
 		goto fail;
 	}
 
-	if ((err = hci_send_sco(conn->hcon, skb)) < 0)
-		return err;
+	hci_send_sco(conn->hcon, skb);
 
 	return count;
 
-- 
cgit v1.2.3-59-g8ed1b


From b9dbdbc1f4404cba2e64939c30c87d59c9796e4e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:35 -0300
Subject: Bluetooth: Trivial clean ups to SCO

Remove extra braces and labels, break over column 80 lines, etc

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/sco.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 541b26efb724..4767928a93d3 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -165,11 +165,11 @@ static inline int sco_chan_add(struct sco_conn *conn, struct sock *sk, struct so
 	int err = 0;
 
 	sco_conn_lock(conn);
-	if (conn->sk) {
+	if (conn->sk)
 		err = -EBUSY;
-	} else {
+	else
 		__sco_chan_add(conn, sk, parent);
-	}
+
 	sco_conn_unlock(conn);
 	return err;
 }
@@ -241,21 +241,19 @@ static inline int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
 	BT_DBG("sk %p len %d", sk, len);
 
 	count = min_t(unsigned int, conn->mtu, len);
-	if (!(skb = bt_skb_send_alloc(sk, count, msg->msg_flags & MSG_DONTWAIT, &err)))
+	skb = bt_skb_send_alloc(sk, count,
+			msg->msg_flags & MSG_DONTWAIT, &err);
+	if (!skb)
 		return err;
 
 	if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) {
-		err = -EFAULT;
-		goto fail;
+		kfree_skb(skb);
+		return -EFAULT;
 	}
 
 	hci_send_sco(conn->hcon, skb);
 
 	return count;
-
-fail:
-	kfree_skb(skb);
-	return err;
 }
 
 static inline void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
@@ -625,7 +623,7 @@ static int sco_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 			    struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
-	int err = 0;
+	int err;
 
 	BT_DBG("sock %p, sk %p", sock, sk);
 
@@ -850,7 +848,8 @@ static void sco_conn_ready(struct sco_conn *conn)
 
 		bh_lock_sock(parent);
 
-		sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC);
+		sk = sco_sock_alloc(sock_net(parent), NULL,
+				BTPROTO_SCO, GFP_ATOMIC);
 		if (!sk) {
 			bh_unlock_sock(parent);
 			goto done;
-- 
cgit v1.2.3-59-g8ed1b


From c69163e9ed5048407cc84f439cbfecc53f6f7131 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:35 -0300
Subject: Bluetooth: Move specific Basic Mode code to the right place
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Inside "case L2CAP_MODE_BASIC:" we don't need to check for sk_type and
L2CAP mode. So only the length check is fine.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 864c76f4a678..c9a848d3ef94 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1611,11 +1611,6 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	if (msg->msg_flags & MSG_OOB)
 		return -EOPNOTSUPP;
 
-	/* Check outgoing MTU */
-	if (sk->sk_type == SOCK_SEQPACKET && pi->mode == L2CAP_MODE_BASIC &&
-	    len > pi->omtu)
-		return -EINVAL;
-
 	lock_sock(sk);
 
 	if (sk->sk_state != BT_CONNECTED) {
@@ -1635,6 +1630,12 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 
 	switch (pi->mode) {
 	case L2CAP_MODE_BASIC:
+		/* Check outgoing MTU */
+		if (len > pi->omtu) {
+			err = -EINVAL;
+			goto done;
+		}
+
 		/* Create a basic PDU */
 		skb = l2cap_create_basic_pdu(sk, msg, len);
 		if (IS_ERR(skb)) {
-- 
cgit v1.2.3-59-g8ed1b


From faaebd192ec9c3febcab98149d1309199a5b886c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:35 -0300
Subject: Bluetooth: Fix memory leak of S-frames into L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

l2cap_data_channel do not free the S-frame, so we free it here.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c9a848d3ef94..46f22640a337 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3522,6 +3522,7 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 		break;
 	}
 
+	kfree_skb(skb);
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 7dffe4210233a2860c3f41477c40b3252edf2b7d Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:36 -0300
Subject: Bluetooth: Fix expected_tx_seq calculation on L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All operation related to the txWindow should be modulo 64.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 46f22640a337..401011a53c73 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3611,7 +3611,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (pi->expected_tx_seq == tx_seq)
 			pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
 		else
-			pi->expected_tx_seq = tx_seq + 1;
+			pi->expected_tx_seq = (tx_seq + 1) % 64;
 
 		l2cap_sar_reassembly_sdu(sk, skb, control);
 
-- 
cgit v1.2.3-59-g8ed1b


From d1daa091e8612f3aab14d28b5836375fafe155e1 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:36 -0300
Subject: Bluetooth: Fix ACL MTU issue
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ERTM and Streaming Modes was having problems when the ACL MTU is lower
than MPS. The 'minus 10' is to take in account the header and fcs
lenghts.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 401011a53c73..99cf1772b481 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2267,6 +2267,8 @@ done:
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
+		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
+			rfc.max_pdu_size = pi->conn->mtu - 10;
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
@@ -2288,6 +2290,8 @@ done:
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
+		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
+			rfc.max_pdu_size = pi->conn->mtu - 10;
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
-- 
cgit v1.2.3-59-g8ed1b


From e8235c6bdd1c7ffbaa7eb8dcdbb46c51f1e5d72e Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:36 -0300
Subject: Bluetooth: Use a l2cap_pinfo struct instead l2cap_pi() macro
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trivial clean up.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 99cf1772b481..a9c152a09f0b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1291,7 +1291,7 @@ static int l2cap_streaming_send(struct sock *sk)
 		control |= pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT;
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
-		if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) {
+		if (pi->fcs == L2CAP_FCS_CRC16) {
 			fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
 			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
 		}
@@ -1344,7 +1344,7 @@ static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
 				| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
-		if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) {
+		if (pi->fcs == L2CAP_FCS_CRC16) {
 			fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
 			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
 		}
@@ -1388,7 +1388,7 @@ static int l2cap_ertm_send(struct sock *sk)
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
 
 
-		if (l2cap_pi(sk)->fcs == L2CAP_FCS_CRC16) {
+		if (pi->fcs == L2CAP_FCS_CRC16) {
 			fcs = crc16(0, (u8 *)skb->data, tx_skb->len - 2);
 			put_unaligned_le16(fcs, skb->data + tx_skb->len - 2);
 		}
@@ -3518,10 +3518,10 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 		pi->expected_ack_seq = tx_seq;
 		l2cap_drop_acked_frames(sk);
 
-		del_timer(&l2cap_pi(sk)->retrans_timer);
+		del_timer(&pi->retrans_timer);
 		if (rx_control & L2CAP_CTRL_POLL) {
 			u16 control = L2CAP_CTRL_FINAL;
-			l2cap_send_rr_or_rnr(l2cap_pi(sk), control);
+			l2cap_send_rr_or_rnr(pi, control);
 		}
 		break;
 	}
@@ -3622,7 +3622,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		goto done;
 
 	default:
-		BT_DBG("sk %p: bad mode 0x%2.2x", sk, l2cap_pi(sk)->mode);
+		BT_DBG("sk %p: bad mode 0x%2.2x", sk, pi->mode);
 		break;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From d5392c8f1e9faef089bb7cb66c3314da8bddd1fe Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:36 -0300
Subject: Bluetooth: Implement 'Send IorRRorRNR' event
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After receive a RR with P bit set ERTM shall use this funcion to choose
what type of frame to reply with F bit = 1.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 20 +++++++++++---------
 net/bluetooth/l2cap.c         | 43 +++++++++++++++++++++++++++++++++++++++----
 2 files changed, 50 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 17a689f27a6a..d9c20c3d6f3d 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -320,7 +320,7 @@ struct l2cap_pinfo {
 	__u8		conf_req[64];
 	__u8		conf_len;
 	__u8		conf_state;
-	__u8		conn_state;
+	__u16		conn_state;
 
 	__u8		next_tx_seq;
 	__u8		expected_ack_seq;
@@ -328,6 +328,7 @@ struct l2cap_pinfo {
 	__u8		buffer_seq;
 	__u8		buffer_seq_srej;
 	__u8		srej_save_reqseq;
+	__u8		frames_sent;
 	__u8		unacked_frames;
 	__u8		retry_count;
 	__u8		num_to_ack;
@@ -367,14 +368,15 @@ struct l2cap_pinfo {
 #define L2CAP_CONF_MAX_CONF_REQ 2
 #define L2CAP_CONF_MAX_CONF_RSP 2
 
-#define L2CAP_CONN_SAR_SDU         0x01
-#define L2CAP_CONN_SREJ_SENT       0x02
-#define L2CAP_CONN_WAIT_F          0x04
-#define L2CAP_CONN_SREJ_ACT        0x08
-#define L2CAP_CONN_SEND_PBIT       0x10
-#define L2CAP_CONN_REMOTE_BUSY     0x20
-#define L2CAP_CONN_LOCAL_BUSY      0x40
-#define L2CAP_CONN_REJ_ACT         0x80
+#define L2CAP_CONN_SAR_SDU         0x0001
+#define L2CAP_CONN_SREJ_SENT       0x0002
+#define L2CAP_CONN_WAIT_F          0x0004
+#define L2CAP_CONN_SREJ_ACT        0x0008
+#define L2CAP_CONN_SEND_PBIT       0x0010
+#define L2CAP_CONN_REMOTE_BUSY     0x0020
+#define L2CAP_CONN_LOCAL_BUSY      0x0040
+#define L2CAP_CONN_REJ_ACT         0x0080
+#define L2CAP_CONN_SEND_FBIT       0x0100
 
 #define __mod_retrans_timer() mod_timer(&l2cap_pi(sk)->retrans_timer, \
 		jiffies +  msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO));
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index a9c152a09f0b..06687e264703 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1383,6 +1383,10 @@ static int l2cap_ertm_send(struct sock *sk)
 		bt_cb(skb)->retries++;
 
 		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+		if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
+			control |= L2CAP_CTRL_FINAL;
+			pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
+		}
 		control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
 				| (pi->next_tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
 		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
@@ -1404,6 +1408,7 @@ static int l2cap_ertm_send(struct sock *sk)
 		pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
 
 		pi->unacked_frames++;
+		pi->frames_sent++;
 
 		if (skb_queue_is_last(TX_QUEUE(sk), skb))
 			sk->sk_send_head = NULL;
@@ -2191,6 +2196,7 @@ static inline void l2cap_ertm_init(struct sock *sk)
 	l2cap_pi(sk)->unacked_frames = 0;
 	l2cap_pi(sk)->buffer_seq = 0;
 	l2cap_pi(sk)->num_to_ack = 0;
+	l2cap_pi(sk)->frames_sent = 0;
 
 	setup_timer(&l2cap_pi(sk)->retrans_timer,
 			l2cap_retrans_timeout, (unsigned long) sk);
@@ -3148,6 +3154,38 @@ static int l2cap_check_fcs(struct l2cap_pinfo *pi,  struct sk_buff *skb)
 	return 0;
 }
 
+static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u16 control = 0;
+
+	pi->frames_sent = 0;
+	pi->conn_state |= L2CAP_CONN_SEND_FBIT;
+
+	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+
+	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
+		control |= L2CAP_SUPER_RCV_NOT_READY | L2CAP_CTRL_FINAL;
+		l2cap_send_sframe(pi, control);
+		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
+	}
+
+	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0)
+		__mod_retrans_timer();
+
+	l2cap_ertm_send(sk);
+
+	if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
+			pi->frames_sent == 0) {
+		control |= L2CAP_SUPER_RCV_READY;
+		if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
+			control |= L2CAP_CTRL_FINAL;
+			pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
+		}
+		l2cap_send_sframe(pi, control);
+	}
+}
+
 static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
 {
 	struct sk_buff *next_skb;
@@ -3418,10 +3456,7 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 	switch (rx_control & L2CAP_CTRL_SUPERVISE) {
 	case L2CAP_SUPER_RCV_READY:
 		if (rx_control & L2CAP_CTRL_POLL) {
-			u16 control = L2CAP_CTRL_FINAL;
-			control |= L2CAP_SUPER_RCV_READY |
-				(pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT);
-			l2cap_send_sframe(l2cap_pi(sk), control);
+			l2cap_send_i_or_rr_or_rnr(sk);
 			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
 		} else if (rx_control & L2CAP_CTRL_FINAL) {
-- 
cgit v1.2.3-59-g8ed1b


From 1d8f5d16913d74e428950ee02fe9ff7e6391c120 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:37 -0300
Subject: Bluetooth: Support case with F bit set under WAIT_F state.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On receipt of a F=1 under WAIT_F state ERTM shall stop monitor timer and
start retransmission timer (if there are unacked frames).

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 06687e264703..36cd4e4e6ad1 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3364,6 +3364,13 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
+	if (L2CAP_CTRL_FINAL & rx_control) {
+		del_timer(&pi->monitor_timer);
+		if (pi->unacked_frames > 0)
+			__mod_retrans_timer();
+		pi->conn_state &= ~L2CAP_CONN_WAIT_F;
+	}
+
 	pi->expected_ack_seq = req_seq;
 	l2cap_drop_acked_frames(sk);
 
@@ -3453,6 +3460,13 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
+	if (L2CAP_CTRL_FINAL & rx_control) {
+		del_timer(&pi->monitor_timer);
+		if (pi->unacked_frames > 0)
+			__mod_retrans_timer();
+		pi->conn_state &= ~L2CAP_CONN_WAIT_F;
+	}
+
 	switch (rx_control & L2CAP_CTRL_SUPERVISE) {
 	case L2CAP_SUPER_RCV_READY:
 		if (rx_control & L2CAP_CTRL_POLL) {
@@ -3472,14 +3486,6 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 				l2cap_ertm_send(sk);
 			}
 
-			if (!(pi->conn_state & L2CAP_CONN_WAIT_F))
-				break;
-
-			pi->conn_state &= ~L2CAP_CONN_WAIT_F;
-			del_timer(&pi->monitor_timer);
-
-			if (pi->unacked_frames > 0)
-				__mod_retrans_timer();
 		} else {
 			pi->expected_ack_seq = tx_seq;
 			l2cap_drop_acked_frames(sk);
-- 
cgit v1.2.3-59-g8ed1b


From 277ffbe362823d18a17792fbd8e507010e666299 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:37 -0300
Subject: Bluetooth: Check the minimum {I,S}-frame size into L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All packets with size fewer than the minimum specified is dropped.
Note that the size of the l2cap basic header, FCS and SAR fields are
already subtracted of len at the moment of the size check.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 36cd4e4e6ad1..ac00f5fac2d2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3627,10 +3627,17 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (l2cap_check_fcs(pi, skb))
 			goto drop;
 
-		if (__is_iframe(control))
+		if (__is_iframe(control)) {
+			if (len < 4)
+				goto drop;
+
 			l2cap_data_channel_iframe(sk, control, skb);
-		else
+		} else {
+			if (len != 0)
+				goto drop;
+
 			l2cap_data_channel_sframe(sk, control, skb);
+		}
 
 		goto done;
 
@@ -3645,7 +3652,8 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (pi->fcs == L2CAP_FCS_CRC16)
 			len -= 2;
 
-		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE || __is_sframe(control))
+		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE || len < 4
+				|| __is_sframe(control))
 			goto drop;
 
 		if (l2cap_check_fcs(pi, skb))
-- 
cgit v1.2.3-59-g8ed1b


From 36f2fd585f43199f006a3b5ff84e95815102cd31 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:37 -0300
Subject: Bluetooth: Check if SDU size is greater than MTU on L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After reassembly the SDU we need to check his size. It can't overflow
the MTU size.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ac00f5fac2d2..2e354d29f102 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3277,15 +3277,19 @@ static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 co
 		pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
 		pi->partial_sdu_len += skb->len;
 
+		if (pi->partial_sdu_len > pi->imtu)
+			goto drop;
+
 		if (pi->partial_sdu_len == pi->sdu_len) {
 			_skb = skb_clone(pi->sdu, GFP_ATOMIC);
 			err = sock_queue_rcv_skb(sk, _skb);
 			if (err < 0)
 				kfree_skb(_skb);
 		}
-		kfree_skb(pi->sdu);
 		err = 0;
 
+drop:
+		kfree_skb(pi->sdu);
 		break;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 9e917af13d59182f95bbb5483dc0c4254dfb7944 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:37 -0300
Subject: Bluetooth: Implement SendAck() Action on ERTM.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shall be used to ack received frames, It must decide type of
acknowledgment between a RR frame, a RNR frame or transmission of
pending I-frames.
It also modifies l2cap_ertm_send() to report the number of frames sent.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2e354d29f102..0a739ef167c2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -352,6 +352,11 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 	count = min_t(unsigned int, conn->mtu, hlen);
 	control |= L2CAP_CTRL_FRAME_TYPE;
 
+	if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
+		control |= L2CAP_CTRL_FINAL;
+		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
+	}
+
 	skb = bt_skb_alloc(count, GFP_ATOMIC);
 	if (!skb)
 		return -ENOMEM;
@@ -1364,7 +1369,7 @@ static int l2cap_ertm_send(struct sock *sk)
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u16 control, fcs;
-	int err;
+	int err, nsent = 0;
 
 	if (pi->conn_state & L2CAP_CONN_WAIT_F)
 		return 0;
@@ -1414,8 +1419,27 @@ static int l2cap_ertm_send(struct sock *sk)
 			sk->sk_send_head = NULL;
 		else
 			sk->sk_send_head = skb_queue_next(TX_QUEUE(sk), skb);
+
+		nsent++;
 	}
 
+	return nsent;
+}
+
+static int l2cap_send_ack(struct l2cap_pinfo *pi)
+{
+	struct sock *sk = (struct sock *)pi;
+	u16 control = 0;
+
+	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+
+	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
+		control |= L2CAP_SUPER_RCV_NOT_READY;
+		return l2cap_send_sframe(pi, control);
+	} else if (l2cap_ertm_send(sk) == 0) {
+		control |= L2CAP_SUPER_RCV_READY;
+		return l2cap_send_sframe(pi, control);
+	}
 	return 0;
 }
 
@@ -1678,7 +1702,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 		else
 			err = l2cap_ertm_send(sk);
 
-		if (!err)
+		if (err >= 0)
 			err = len;
 		break;
 
@@ -3178,10 +3202,6 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
 			pi->frames_sent == 0) {
 		control |= L2CAP_SUPER_RCV_READY;
-		if (pi->conn_state & L2CAP_CONN_SEND_FBIT) {
-			control |= L2CAP_CTRL_FINAL;
-			pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
-		}
 		l2cap_send_sframe(pi, control);
 	}
 }
@@ -3362,7 +3382,6 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_txseq(rx_control);
 	u8 req_seq = __get_reqseq(rx_control);
-	u16 tx_control = 0;
 	u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
 	int err = 0;
 
@@ -3449,11 +3468,9 @@ expected:
 		return err;
 
 	pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
-	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1) {
-		tx_control |= L2CAP_SUPER_RCV_READY;
-		tx_control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
-		l2cap_send_sframe(pi, tx_control);
-	}
+	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1)
+		l2cap_send_ack(pi);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f0946ccfc7da403a46b7ff7cb2e3deffac108742 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:37 -0300
Subject: Bluetooth: Move set of P-bit to l2cap_send_sframe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Abstract the send of of P-bit and avoids code duplication like we
did with the setting of F-bit.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0a739ef167c2..852c1400d069 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -357,6 +357,11 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
 	}
 
+	if (pi->conn_state & L2CAP_CONN_SEND_PBIT) {
+		control |= L2CAP_CTRL_POLL;
+		pi->conn_state &= ~L2CAP_CONN_SEND_PBIT;
+	}
+
 	skb = bt_skb_alloc(count, GFP_ATOMIC);
 	if (!skb)
 		return -ENOMEM;
@@ -3364,10 +3369,6 @@ static void l2cap_send_srejframe(struct sock *sk, u8 tx_seq)
 	while (tx_seq != pi->expected_tx_seq) {
 		control = L2CAP_SUPER_SELECT_REJECT;
 		control |= pi->expected_tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
-		if (pi->conn_state & L2CAP_CONN_SEND_PBIT) {
-			control |= L2CAP_CTRL_POLL;
-			pi->conn_state &= ~L2CAP_CONN_SEND_PBIT;
-		}
 		l2cap_send_sframe(pi, control);
 
 		new = kzalloc(sizeof(struct srej_list), GFP_ATOMIC);
-- 
cgit v1.2.3-59-g8ed1b


From 73edaa9933219e25d6733b78d1e2c881025705e2 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:38 -0300
Subject: Bluetooth: Add Recv RR (P=0)(F=0) for SREJ_SENT state on ERTM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This finishes the implementation of Recv RR (P=0)(F=0) for the Enhanced
Retransmission Mode on L2CAP.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 852c1400d069..e5cd64ac6fb2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3517,7 +3517,10 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 				__mod_retrans_timer();
 
 			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-			l2cap_ertm_send(sk);
+			if (pi->conn_state & L2CAP_CONN_SREJ_SENT)
+				l2cap_send_ack(pi);
+			else
+				l2cap_ertm_send(sk);
 		}
 		break;
 
-- 
cgit v1.2.3-59-g8ed1b


From e072745f4adb01b909bd08a0cfc8f79348f4d2c6 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:38 -0300
Subject: Bluetooth: Split l2cap_data_channel_sframe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Create a function for each type fo S-frame and avoid a lot of nested
code.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 204 ++++++++++++++++++++++++++++----------------------
 1 file changed, 115 insertions(+), 89 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e5cd64ac6fb2..068edf7704bf 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3475,120 +3475,146 @@ expected:
 	return 0;
 }
 
-static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
+static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u8 tx_seq = __get_reqseq(rx_control);
 
-	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
-
-	if (L2CAP_CTRL_FINAL & rx_control) {
-		del_timer(&pi->monitor_timer);
-		if (pi->unacked_frames > 0)
-			__mod_retrans_timer();
-		pi->conn_state &= ~L2CAP_CONN_WAIT_F;
-	}
+	if (rx_control & L2CAP_CTRL_POLL) {
+		l2cap_send_i_or_rr_or_rnr(sk);
+		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
-	switch (rx_control & L2CAP_CTRL_SUPERVISE) {
-	case L2CAP_SUPER_RCV_READY:
-		if (rx_control & L2CAP_CTRL_POLL) {
-			l2cap_send_i_or_rr_or_rnr(sk);
-			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-
-		} else if (rx_control & L2CAP_CTRL_FINAL) {
-			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-			pi->expected_ack_seq = tx_seq;
-			l2cap_drop_acked_frames(sk);
-
-			if (pi->conn_state & L2CAP_CONN_REJ_ACT)
-				pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
-			else {
-				sk->sk_send_head = TX_QUEUE(sk)->next;
-				pi->next_tx_seq = pi->expected_ack_seq;
-				l2cap_ertm_send(sk);
-			}
+	} else if (rx_control & L2CAP_CTRL_FINAL) {
+		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+		pi->expected_ack_seq = tx_seq;
+		l2cap_drop_acked_frames(sk);
 
-		} else {
-			pi->expected_ack_seq = tx_seq;
-			l2cap_drop_acked_frames(sk);
+		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
+			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
+		else {
+			sk->sk_send_head = TX_QUEUE(sk)->next;
+			pi->next_tx_seq = pi->expected_ack_seq;
+			l2cap_ertm_send(sk);
+		}
 
-			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
-			    (pi->unacked_frames > 0))
-				__mod_retrans_timer();
+	} else {
+		pi->expected_ack_seq = tx_seq;
+		l2cap_drop_acked_frames(sk);
 
-			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-			if (pi->conn_state & L2CAP_CONN_SREJ_SENT)
-				l2cap_send_ack(pi);
-			else
-				l2cap_ertm_send(sk);
-		}
-		break;
+		if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
+				(pi->unacked_frames > 0))
+			__mod_retrans_timer();
 
-	case L2CAP_SUPER_REJECT:
 		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+		if (pi->conn_state & L2CAP_CONN_SREJ_SENT)
+			l2cap_send_ack(pi);
+		else
+			l2cap_ertm_send(sk);
+	}
+}
 
-		pi->expected_ack_seq = __get_reqseq(rx_control);
-		l2cap_drop_acked_frames(sk);
+static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u8 tx_seq = __get_reqseq(rx_control);
 
-		if (rx_control & L2CAP_CTRL_FINAL) {
-			if (pi->conn_state & L2CAP_CONN_REJ_ACT)
-				pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
-			else {
-				sk->sk_send_head = TX_QUEUE(sk)->next;
-				pi->next_tx_seq = pi->expected_ack_seq;
-				l2cap_ertm_send(sk);
-			}
-		} else {
+	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+
+	pi->expected_ack_seq = __get_reqseq(rx_control);
+	l2cap_drop_acked_frames(sk);
+
+	if (rx_control & L2CAP_CTRL_FINAL) {
+		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
+			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
+		else {
 			sk->sk_send_head = TX_QUEUE(sk)->next;
 			pi->next_tx_seq = pi->expected_ack_seq;
 			l2cap_ertm_send(sk);
-
-			if (pi->conn_state & L2CAP_CONN_WAIT_F) {
-				pi->srej_save_reqseq = tx_seq;
-				pi->conn_state |= L2CAP_CONN_REJ_ACT;
-			}
 		}
+	} else {
+		sk->sk_send_head = TX_QUEUE(sk)->next;
+		pi->next_tx_seq = pi->expected_ack_seq;
+		l2cap_ertm_send(sk);
 
-		break;
+		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
+			pi->srej_save_reqseq = tx_seq;
+			pi->conn_state |= L2CAP_CONN_REJ_ACT;
+		}
+	}
+}
+static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u8 tx_seq = __get_reqseq(rx_control);
 
-	case L2CAP_SUPER_SELECT_REJECT:
-		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
-		if (rx_control & L2CAP_CTRL_POLL) {
-			pi->expected_ack_seq = tx_seq;
-			l2cap_drop_acked_frames(sk);
-			l2cap_retransmit_frame(sk, tx_seq);
-			l2cap_ertm_send(sk);
-			if (pi->conn_state & L2CAP_CONN_WAIT_F) {
-				pi->srej_save_reqseq = tx_seq;
-				pi->conn_state |= L2CAP_CONN_SREJ_ACT;
-			}
-		} else if (rx_control & L2CAP_CTRL_FINAL) {
-			if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) &&
-					pi->srej_save_reqseq == tx_seq)
-				pi->conn_state &= ~L2CAP_CONN_SREJ_ACT;
-			else
-				l2cap_retransmit_frame(sk, tx_seq);
+	if (rx_control & L2CAP_CTRL_POLL) {
+		pi->expected_ack_seq = tx_seq;
+		l2cap_drop_acked_frames(sk);
+		l2cap_retransmit_frame(sk, tx_seq);
+		l2cap_ertm_send(sk);
+		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
+			pi->srej_save_reqseq = tx_seq;
+			pi->conn_state |= L2CAP_CONN_SREJ_ACT;
 		}
-		else {
+	} else if (rx_control & L2CAP_CTRL_FINAL) {
+		if ((pi->conn_state & L2CAP_CONN_SREJ_ACT) &&
+				pi->srej_save_reqseq == tx_seq)
+			pi->conn_state &= ~L2CAP_CONN_SREJ_ACT;
+		else
 			l2cap_retransmit_frame(sk, tx_seq);
-			if (pi->conn_state & L2CAP_CONN_WAIT_F) {
-				pi->srej_save_reqseq = tx_seq;
-				pi->conn_state |= L2CAP_CONN_SREJ_ACT;
-			}
+	} else {
+		l2cap_retransmit_frame(sk, tx_seq);
+		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
+			pi->srej_save_reqseq = tx_seq;
+			pi->conn_state |= L2CAP_CONN_SREJ_ACT;
 		}
+	}
+}
+
+static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	u8 tx_seq = __get_reqseq(rx_control);
+
+	pi->conn_state |= L2CAP_CONN_REMOTE_BUSY;
+	pi->expected_ack_seq = tx_seq;
+	l2cap_drop_acked_frames(sk);
+
+	del_timer(&pi->retrans_timer);
+	if (rx_control & L2CAP_CTRL_POLL) {
+		u16 control = L2CAP_CTRL_FINAL;
+		l2cap_send_rr_or_rnr(pi, control);
+	}
+}
+
+static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
+{
+	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
+
+	if (L2CAP_CTRL_FINAL & rx_control) {
+		del_timer(&l2cap_pi(sk)->monitor_timer);
+		if (l2cap_pi(sk)->unacked_frames > 0)
+			__mod_retrans_timer();
+		l2cap_pi(sk)->conn_state &= ~L2CAP_CONN_WAIT_F;
+	}
+
+	switch (rx_control & L2CAP_CTRL_SUPERVISE) {
+	case L2CAP_SUPER_RCV_READY:
+		l2cap_data_channel_rrframe(sk, rx_control);
 		break;
 
-	case L2CAP_SUPER_RCV_NOT_READY:
-		pi->conn_state |= L2CAP_CONN_REMOTE_BUSY;
-		pi->expected_ack_seq = tx_seq;
-		l2cap_drop_acked_frames(sk);
+	case L2CAP_SUPER_REJECT:
+		l2cap_data_channel_rejframe(sk, rx_control);
+		break;
 
-		del_timer(&pi->retrans_timer);
-		if (rx_control & L2CAP_CTRL_POLL) {
-			u16 control = L2CAP_CTRL_FINAL;
-			l2cap_send_rr_or_rnr(pi, control);
-		}
+	case L2CAP_SUPER_SELECT_REJECT:
+		l2cap_data_channel_srejframe(sk, rx_control);
+		break;
+
+	case L2CAP_SUPER_RCV_NOT_READY:
+		l2cap_data_channel_rnrframe(sk, rx_control);
 		break;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 99b0d4b7b09edeacf4542bced5c01239375b51a9 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:38 -0300
Subject: Bluetooth: Handle all cases of receipt of RNR-frames into L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We weren't handling the receipt under SREJ_SENT state table.
It also introduce l2cap_send_srejtail(). It will be used in the nexts
commits too.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 068edf7704bf..8937a842347a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1448,6 +1448,22 @@ static int l2cap_send_ack(struct l2cap_pinfo *pi)
 	return 0;
 }
 
+static int l2cap_send_srejtail(struct sock *sk)
+{
+	struct srej_list *tail;
+	u16 control;
+
+	control = L2CAP_SUPER_SELECT_REJECT;
+	control |= L2CAP_CTRL_FINAL;
+
+	tail = list_entry(SREJ_LIST(sk)->prev, struct srej_list, list);
+	control |= tail->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+
+	l2cap_send_sframe(l2cap_pi(sk), control);
+
+	return 0;
+}
+
 static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb)
 {
 	struct l2cap_conn *conn = l2cap_pi(sk)->conn;
@@ -3582,11 +3598,19 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
 	pi->expected_ack_seq = tx_seq;
 	l2cap_drop_acked_frames(sk);
 
-	del_timer(&pi->retrans_timer);
-	if (rx_control & L2CAP_CTRL_POLL) {
-		u16 control = L2CAP_CTRL_FINAL;
-		l2cap_send_rr_or_rnr(pi, control);
+	if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) {
+		del_timer(&pi->retrans_timer);
+		if (rx_control & L2CAP_CTRL_POLL) {
+			u16 control = L2CAP_CTRL_FINAL;
+			l2cap_send_rr_or_rnr(pi, control);
+		}
+		return;
 	}
+
+	if (rx_control & L2CAP_CTRL_POLL)
+		l2cap_send_srejtail(sk);
+	else
+		l2cap_send_sframe(pi, L2CAP_SUPER_RCV_READY);
 }
 
 static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, struct sk_buff *skb)
-- 
cgit v1.2.3-59-g8ed1b


From 6e3a59819fac19006fe4255b87928e5a12c54532 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:38 -0300
Subject: Bluetooth: Group the ack of I-frames into
 l2cap_data_channel_rrframe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It also fix a bug: we weren't acknowledging I-frames when P=1.
Note that when F=1 we are acknowledging packets before setting
RemoteBusy to False. The spec says we should do that in the opposite
order, but acknowledment of packets doesn't care about RemoteBusy flag
so we can do that in the order we want.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 8937a842347a..d096c7c11ab5 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3494,7 +3494,9 @@ expected:
 static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
-	u8 tx_seq = __get_reqseq(rx_control);
+
+	pi->expected_ack_seq = __get_reqseq(rx_control);
+	l2cap_drop_acked_frames(sk);
 
 	if (rx_control & L2CAP_CTRL_POLL) {
 		l2cap_send_i_or_rr_or_rnr(sk);
@@ -3502,8 +3504,6 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 
 	} else if (rx_control & L2CAP_CTRL_FINAL) {
 		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-		pi->expected_ack_seq = tx_seq;
-		l2cap_drop_acked_frames(sk);
 
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
@@ -3514,9 +3514,6 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 		}
 
 	} else {
-		pi->expected_ack_seq = tx_seq;
-		l2cap_drop_acked_frames(sk);
-
 		if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
 				(pi->unacked_frames > 0))
 			__mod_retrans_timer();
-- 
cgit v1.2.3-59-g8ed1b


From 8abb52ee00c4b3f857269eb6b7145991bab869bf Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:38 -0300
Subject: Bluetooth: Remove duplicate use of __get_reqseq() macro on L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

tx_seq var already has the value of __get_reqseq().

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index d096c7c11ab5..e9ac9fb11917 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3533,7 +3533,7 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
 
 	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 
-	pi->expected_ack_seq = __get_reqseq(rx_control);
+	pi->expected_ack_seq = tx_seq;
 	l2cap_drop_acked_frames(sk);
 
 	if (rx_control & L2CAP_CTRL_FINAL) {
-- 
cgit v1.2.3-59-g8ed1b


From 05fbd89dd4153341717b33d9e8ae8bd29db6c1c8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:39 -0300
Subject: Bluetooth: Finish implementation for Rec RR (P=1) on ERTM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now the code handles the case under SREJ_SENT state.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e9ac9fb11917..f3869857ee9f 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3499,8 +3499,17 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 	l2cap_drop_acked_frames(sk);
 
 	if (rx_control & L2CAP_CTRL_POLL) {
-		l2cap_send_i_or_rr_or_rnr(sk);
-		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
+			if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
+					(pi->unacked_frames > 0))
+				__mod_retrans_timer();
+
+			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+			l2cap_send_srejtail(sk);
+		} else {
+			l2cap_send_i_or_rr_or_rnr(sk);
+			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+		}
 
 	} else if (rx_control & L2CAP_CTRL_FINAL) {
 		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-- 
cgit v1.2.3-59-g8ed1b


From c1b4f43be01c2a363be021485dd18cca33cfab8a Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:39 -0300
Subject: Bluetooth: Add timer to Acknowledge I-frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We ack I-frames on each txWindow/5 I-frames received, but if the sender
stop to send I-frames and it's not a txWindow multiple we can leave some
frames unacked.
So I added a timer to ack I-frames on this case. The timer expires in
200ms.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  4 ++++
 net/bluetooth/l2cap.c         | 15 +++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index d9c20c3d6f3d..48f10f46c73d 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -35,6 +35,7 @@
 #define L2CAP_DEFAULT_RETRANS_TO	1000    /* 1 second */
 #define L2CAP_DEFAULT_MONITOR_TO	12000   /* 12 seconds */
 #define L2CAP_DEFAULT_MAX_PDU_SIZE	672
+#define L2CAP_DEFAULT_ACK_TO		200
 
 #define L2CAP_CONN_TIMEOUT	(40000) /* 40 seconds */
 #define L2CAP_INFO_TIMEOUT	(4000)  /*  4 seconds */
@@ -348,6 +349,7 @@ struct l2cap_pinfo {
 
 	struct timer_list	retrans_timer;
 	struct timer_list	monitor_timer;
+	struct timer_list	ack_timer;
 	struct sk_buff_head	tx_queue;
 	struct sk_buff_head	srej_queue;
 	struct srej_list	srej_l;
@@ -382,6 +384,8 @@ struct l2cap_pinfo {
 		jiffies +  msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO));
 #define __mod_monitor_timer() mod_timer(&l2cap_pi(sk)->monitor_timer, \
 		jiffies + msecs_to_jiffies(L2CAP_DEFAULT_MONITOR_TO));
+#define __mod_ack_timer() mod_timer(&l2cap_pi(sk)->ack_timer, \
+		jiffies + msecs_to_jiffies(L2CAP_DEFAULT_ACK_TO));
 
 static inline int l2cap_tx_window_full(struct sock *sk)
 {
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f3869857ee9f..03006568f8a1 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2235,6 +2235,15 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val)
 	*ptr += L2CAP_CONF_OPT_SIZE + len;
 }
 
+static void l2cap_ack_timeout(unsigned long arg)
+{
+	struct sock *sk = (void *) arg;
+
+	bh_lock_sock(sk);
+	l2cap_send_ack(l2cap_pi(sk));
+	bh_unlock_sock(sk);
+}
+
 static inline void l2cap_ertm_init(struct sock *sk)
 {
 	l2cap_pi(sk)->expected_ack_seq = 0;
@@ -2247,6 +2256,8 @@ static inline void l2cap_ertm_init(struct sock *sk)
 			l2cap_retrans_timeout, (unsigned long) sk);
 	setup_timer(&l2cap_pi(sk)->monitor_timer,
 			l2cap_monitor_timeout, (unsigned long) sk);
+	setup_timer(&l2cap_pi(sk)->ack_timer,
+			l2cap_ack_timeout, (unsigned long) sk);
 
 	__skb_queue_head_init(SREJ_QUEUE(sk));
 }
@@ -2975,6 +2986,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 		skb_queue_purge(SREJ_QUEUE(sk));
 		del_timer(&l2cap_pi(sk)->retrans_timer);
 		del_timer(&l2cap_pi(sk)->monitor_timer);
+		del_timer(&l2cap_pi(sk)->ack_timer);
 	}
 
 	l2cap_chan_del(sk, ECONNRESET);
@@ -3005,6 +3017,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
 		skb_queue_purge(SREJ_QUEUE(sk));
 		del_timer(&l2cap_pi(sk)->retrans_timer);
 		del_timer(&l2cap_pi(sk)->monitor_timer);
+		del_timer(&l2cap_pi(sk)->ack_timer);
 	}
 
 	l2cap_chan_del(sk, 0);
@@ -3484,6 +3497,8 @@ expected:
 	if (err < 0)
 		return err;
 
+	__mod_ack_timer();
+
 	pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
 	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1)
 		l2cap_send_ack(pi);
-- 
cgit v1.2.3-59-g8ed1b


From 2fb862e215e53630066c677e06d7551fa38bf235 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:39 -0300
Subject: Bluetooth: Ignore Tx Window value with Streaming mode
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tx Window value shall not be used with Streaming Mode and the receiver
of the config Request shall ignore its value.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 03006568f8a1..f604405fe667 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2495,7 +2495,6 @@ done:
 			break;
 
 		case L2CAP_MODE_STREAMING:
-			pi->remote_tx_win = rfc.txwin_size;
 			pi->max_pdu_size = rfc.max_pdu_size;
 
 			pi->conf_state |= L2CAP_CONF_MODE_DONE;
-- 
cgit v1.2.3-59-g8ed1b


From 7b1c0049be3aabc18831ada339dbcf41ba8c81fd Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:39 -0300
Subject: Bluetooth: Read RFC conf option on a successful Conf RSP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On Enhanced Retransmission Mode and Streaming Mode a entity can send, on
a successful Conf RSP, new values for the RFC fields. For example, the
entity can send txWindow and MPS values less than the value received on
a Conf REQ.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 39 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f604405fe667..c50c05738fb6 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2602,6 +2602,42 @@ static int l2cap_build_conf_rsp(struct sock *sk, void *data, u16 result, u16 fla
 	return ptr - data;
 }
 
+static void l2cap_conf_rfc_get(struct sock *sk, void *rsp, int len)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int type, olen;
+	unsigned long val;
+	struct l2cap_conf_rfc rfc;
+
+	BT_DBG("sk %p, rsp %p, len %d", sk, rsp, len);
+
+	if ((pi->mode != L2CAP_MODE_ERTM) && (pi->mode != L2CAP_MODE_STREAMING))
+		return;
+
+	while (len >= L2CAP_CONF_OPT_SIZE) {
+		len -= l2cap_get_conf_opt(&rsp, &type, &olen, &val);
+
+		switch (type) {
+		case L2CAP_CONF_RFC:
+			if (olen == sizeof(rfc))
+				memcpy(&rfc, (void *)val, olen);
+			goto done;
+		}
+	}
+
+done:
+	switch (rfc.mode) {
+	case L2CAP_MODE_ERTM:
+		pi->remote_tx_win   = rfc.txwin_size;
+		pi->retrans_timeout = rfc.retrans_timeout;
+		pi->monitor_timeout = rfc.monitor_timeout;
+		pi->mps    = le16_to_cpu(rfc.max_pdu_size);
+		break;
+	case L2CAP_MODE_STREAMING:
+		pi->mps    = le16_to_cpu(rfc.max_pdu_size);
+	}
+}
+
 static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hdr *cmd, u8 *data)
 {
 	struct l2cap_cmd_rej *rej = (struct l2cap_cmd_rej *) data;
@@ -2881,6 +2917,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 	struct l2cap_conf_rsp *rsp = (struct l2cap_conf_rsp *)data;
 	u16 scid, flags, result;
 	struct sock *sk;
+	int len = cmd->len - sizeof(*rsp);
 
 	scid   = __le16_to_cpu(rsp->scid);
 	flags  = __le16_to_cpu(rsp->flags);
@@ -2895,11 +2932,11 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hdr
 
 	switch (result) {
 	case L2CAP_CONF_SUCCESS:
+		l2cap_conf_rfc_get(sk, rsp->data, len);
 		break;
 
 	case L2CAP_CONF_UNACCEPT:
 		if (l2cap_pi(sk)->num_conf_rsp <= L2CAP_CONF_MAX_CONF_RSP) {
-			int len = cmd->len - sizeof(*rsp);
 			char req[64];
 
 			if (len > sizeof(req) - sizeof(struct l2cap_conf_req)) {
-- 
cgit v1.2.3-59-g8ed1b


From 1c7621596d11b9c3e19eb88a818758dee4901c95 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:40 -0300
Subject: Bluetooth: Fix configuration of the MPS value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We were accepting values bigger than we can accept. This was leading
ERTM to drop packets because of wrong FCS checks.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  3 ++-
 net/bluetooth/l2cap.c         | 36 ++++++++++++++++++++----------------
 2 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 48f10f46c73d..0f4e4234c5fa 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -343,7 +343,8 @@ struct l2cap_pinfo {
 	__u8		remote_max_tx;
 	__u16		retrans_timeout;
 	__u16		monitor_timeout;
-	__u16		max_pdu_size;
+	__u16		remote_mps;
+	__u16		mps;
 
 	__le16		sport;
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c50c05738fb6..94be5dbb2569 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1606,21 +1606,21 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 
 	__skb_queue_head_init(&sar_queue);
 	control = L2CAP_SDU_START;
-	skb = l2cap_create_iframe_pdu(sk, msg, pi->max_pdu_size, control, len);
+	skb = l2cap_create_iframe_pdu(sk, msg, pi->remote_mps, control, len);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
 	__skb_queue_tail(&sar_queue, skb);
-	len -= pi->max_pdu_size;
-	size +=pi->max_pdu_size;
+	len -= pi->remote_mps;
+	size += pi->remote_mps;
 	control = 0;
 
 	while (len > 0) {
 		size_t buflen;
 
-		if (len > pi->max_pdu_size) {
+		if (len > pi->remote_mps) {
 			control |= L2CAP_SDU_CONTINUE;
-			buflen = pi->max_pdu_size;
+			buflen = pi->remote_mps;
 		} else {
 			control |= L2CAP_SDU_END;
 			buflen = len;
@@ -1701,7 +1701,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	case L2CAP_MODE_ERTM:
 	case L2CAP_MODE_STREAMING:
 		/* Entire SDU fits into one PDU */
-		if (len <= pi->max_pdu_size) {
+		if (len <= pi->remote_mps) {
 			control = L2CAP_SDU_UNSEGMENTED;
 			skb = l2cap_create_iframe_pdu(sk, msg, len, control, 0);
 			if (IS_ERR(skb)) {
@@ -2330,7 +2330,7 @@ done:
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
 		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
-			rfc.max_pdu_size = pi->conn->mtu - 10;
+			rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
@@ -2353,7 +2353,7 @@ done:
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
 		if (L2CAP_DEFAULT_MAX_PDU_SIZE > pi->conn->mtu - 10)
-			rfc.max_pdu_size = pi->conn->mtu - 10;
+			rfc.max_pdu_size = cpu_to_le16(pi->conn->mtu - 10);
 
 		l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC,
 					sizeof(rfc), (unsigned long) &rfc);
@@ -2482,7 +2482,10 @@ done:
 		case L2CAP_MODE_ERTM:
 			pi->remote_tx_win = rfc.txwin_size;
 			pi->remote_max_tx = rfc.max_transmit;
-			pi->max_pdu_size = rfc.max_pdu_size;
+			if (rfc.max_pdu_size > pi->conn->mtu - 10)
+				rfc.max_pdu_size = le16_to_cpu(pi->conn->mtu - 10);
+
+			pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
 
 			rfc.retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
 			rfc.monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
@@ -2495,7 +2498,10 @@ done:
 			break;
 
 		case L2CAP_MODE_STREAMING:
-			pi->max_pdu_size = rfc.max_pdu_size;
+			if (rfc.max_pdu_size > pi->conn->mtu - 10)
+				rfc.max_pdu_size = le16_to_cpu(pi->conn->mtu - 10);
+
+			pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
 
 			pi->conf_state |= L2CAP_CONF_MODE_DONE;
 
@@ -2574,11 +2580,10 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
 			pi->remote_tx_win   = rfc.txwin_size;
 			pi->retrans_timeout = rfc.retrans_timeout;
 			pi->monitor_timeout = rfc.monitor_timeout;
-			pi->max_pdu_size    = le16_to_cpu(rfc.max_pdu_size);
+			pi->mps    = le16_to_cpu(rfc.max_pdu_size);
 			break;
 		case L2CAP_MODE_STREAMING:
-			pi->max_pdu_size    = le16_to_cpu(rfc.max_pdu_size);
-			break;
+			pi->mps    = le16_to_cpu(rfc.max_pdu_size);
 		}
 	}
 
@@ -3753,7 +3758,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		 * Receiver will miss it and start proper recovery
 		 * procedures and ask retransmission.
 		 */
-		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE)
+		if (len > pi->mps)
 			goto drop;
 
 		if (l2cap_check_fcs(pi, skb))
@@ -3784,8 +3789,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (pi->fcs == L2CAP_FCS_CRC16)
 			len -= 2;
 
-		if (len > L2CAP_DEFAULT_MAX_PDU_SIZE || len < 4
-				|| __is_sframe(control))
+		if (len > pi->mps || len < 4 || __is_sframe(control))
 			goto drop;
 
 		if (l2cap_check_fcs(pi, skb))
-- 
cgit v1.2.3-59-g8ed1b


From 10467e9e9b89272b25b56688bb276d0830e9ab9a Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:40 -0300
Subject: Bluetooth: Add le16 macro to Retransmission and Monitor Timeouts
 values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix a possible problem with Big Endian machines.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 94be5dbb2569..0889949b6896 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2487,8 +2487,10 @@ done:
 
 			pi->remote_mps = le16_to_cpu(rfc.max_pdu_size);
 
-			rfc.retrans_timeout = L2CAP_DEFAULT_RETRANS_TO;
-			rfc.monitor_timeout = L2CAP_DEFAULT_MONITOR_TO;
+			rfc.retrans_timeout =
+				le16_to_cpu(L2CAP_DEFAULT_RETRANS_TO);
+			rfc.monitor_timeout =
+				le16_to_cpu(L2CAP_DEFAULT_MONITOR_TO);
 
 			pi->conf_state |= L2CAP_CONF_MODE_DONE;
 
@@ -2578,8 +2580,8 @@ static int l2cap_parse_conf_rsp(struct sock *sk, void *rsp, int len, void *data,
 		switch (rfc.mode) {
 		case L2CAP_MODE_ERTM:
 			pi->remote_tx_win   = rfc.txwin_size;
-			pi->retrans_timeout = rfc.retrans_timeout;
-			pi->monitor_timeout = rfc.monitor_timeout;
+			pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
+			pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
 			pi->mps    = le16_to_cpu(rfc.max_pdu_size);
 			break;
 		case L2CAP_MODE_STREAMING:
@@ -2634,8 +2636,8 @@ done:
 	switch (rfc.mode) {
 	case L2CAP_MODE_ERTM:
 		pi->remote_tx_win   = rfc.txwin_size;
-		pi->retrans_timeout = rfc.retrans_timeout;
-		pi->monitor_timeout = rfc.monitor_timeout;
+		pi->retrans_timeout = le16_to_cpu(rfc.retrans_timeout);
+		pi->monitor_timeout = le16_to_cpu(rfc.monitor_timeout);
 		pi->mps    = le16_to_cpu(rfc.max_pdu_size);
 		break;
 	case L2CAP_MODE_STREAMING:
-- 
cgit v1.2.3-59-g8ed1b


From 052897ca5019d9157ae09e5e84eee2a9ef5dccc6 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:40 -0300
Subject: Bluetooth: Check the SDU size against the MTU value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If the SDU size is greater than the MTU something is wrong, so report
an error.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
[jprvita@profusion.mobi: set err to appropriate errno value]
Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0889949b6896..e936913c921e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3338,6 +3338,11 @@ static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 co
 		pi->sdu_len = get_unaligned_le16(skb->data);
 		skb_pull(skb, 2);
 
+		if (pi->sdu_len > pi->imtu) {
+			err = -EMSGSIZE;
+			break;
+		}
+
 		pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
 		if (!pi->sdu) {
 			err = -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From 855666cccc939d392316de17512e17a08b2fa05a Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:40 -0300
Subject: Bluetooth: Send Ack after clear the SREJ list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As specified by Bluetooth 3.0 spec we shall send an acknowledgment using
the Send-Ack() after clear the SREJ list.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index e936913c921e..c6bc1b9ed657 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3493,6 +3493,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 			if (list_empty(SREJ_LIST(sk))) {
 				pi->buffer_seq = pi->buffer_seq_srej;
 				pi->conn_state &= ~L2CAP_CONN_SREJ_SENT;
+				l2cap_send_ack(pi);
 			}
 		} else {
 			struct srej_list *l;
-- 
cgit v1.2.3-59-g8ed1b


From 14b5aa71ec506f4e38ca6a1dc02ecd668ecfd902 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:40 -0300
Subject: Bluetooth: Add sockopt configuration for txWindow on L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now we can set/get Transmission Window size via sockopt.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 2 ++
 net/bluetooth/l2cap.c         | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 0f4e4234c5fa..07ae4ccfffbd 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -56,6 +56,7 @@ struct l2cap_options {
 	__u16 flush_to;
 	__u8  mode;
 	__u8  fcs;
+	__u16 txwin_size;
 };
 
 #define L2CAP_CONNINFO	0x02
@@ -339,6 +340,7 @@ struct l2cap_pinfo {
 
 	__u8		ident;
 
+	__u8		tx_win;
 	__u8		remote_tx_win;
 	__u8		remote_max_tx;
 	__u16		retrans_timeout;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index c6bc1b9ed657..530079649b43 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -782,6 +782,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 		pi->omtu = l2cap_pi(parent)->omtu;
 		pi->mode = l2cap_pi(parent)->mode;
 		pi->fcs  = l2cap_pi(parent)->fcs;
+		pi->tx_win = l2cap_pi(parent)->tx_win;
 		pi->sec_level = l2cap_pi(parent)->sec_level;
 		pi->role_switch = l2cap_pi(parent)->role_switch;
 		pi->force_reliable = l2cap_pi(parent)->force_reliable;
@@ -790,6 +791,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 		pi->omtu = 0;
 		pi->mode = L2CAP_MODE_BASIC;
 		pi->fcs  = L2CAP_FCS_CRC16;
+		pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
 		pi->sec_level = BT_SECURITY_LOW;
 		pi->role_switch = 0;
 		pi->force_reliable = 0;
@@ -1782,6 +1784,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		opts.flush_to = l2cap_pi(sk)->flush_to;
 		opts.mode     = l2cap_pi(sk)->mode;
 		opts.fcs      = l2cap_pi(sk)->fcs;
+		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
 
 		len = min_t(unsigned int, sizeof(opts), optlen);
 		if (copy_from_user((char *) &opts, optval, len)) {
@@ -1793,6 +1796,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		l2cap_pi(sk)->omtu = opts.omtu;
 		l2cap_pi(sk)->mode = opts.mode;
 		l2cap_pi(sk)->fcs  = opts.fcs;
+		l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
 		break;
 
 	case L2CAP_LM:
@@ -1907,6 +1911,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us
 		opts.flush_to = l2cap_pi(sk)->flush_to;
 		opts.mode     = l2cap_pi(sk)->mode;
 		opts.fcs      = l2cap_pi(sk)->fcs;
+		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
 
 		len = min_t(unsigned int, len, sizeof(opts));
 		if (copy_to_user(optval, (char *) &opts, len))
@@ -2324,7 +2329,7 @@ done:
 
 	case L2CAP_MODE_ERTM:
 		rfc.mode            = L2CAP_MODE_ERTM;
-		rfc.txwin_size      = L2CAP_DEFAULT_TX_WINDOW;
+		rfc.txwin_size      = pi->tx_win;
 		rfc.max_transmit    = max_transmit;
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 803020c6fa63aa738cfda3329c9675b42023e9d2 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:41 -0300
Subject: Bluetooth: Change acknowledgement to use the value of txWindow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that we can set the txWindow we need to change the acknowledgement
procedure to ack after each (pi->txWindow/6 + 1). The plus 1 is to avoid
the zero value.
It also renames pi->num_to_ack to a better name: pi->num_acked.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 3 +--
 net/bluetooth/l2cap.c         | 7 ++++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 07ae4ccfffbd..059260bed4e8 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -30,7 +30,6 @@
 #define L2CAP_DEFAULT_MIN_MTU		48
 #define L2CAP_DEFAULT_FLUSH_TO		0xffff
 #define L2CAP_DEFAULT_TX_WINDOW		63
-#define L2CAP_DEFAULT_NUM_TO_ACK        (L2CAP_DEFAULT_TX_WINDOW/5)
 #define L2CAP_DEFAULT_MAX_TX		3
 #define L2CAP_DEFAULT_RETRANS_TO	1000    /* 1 second */
 #define L2CAP_DEFAULT_MONITOR_TO	12000   /* 12 seconds */
@@ -333,7 +332,7 @@ struct l2cap_pinfo {
 	__u8		frames_sent;
 	__u8		unacked_frames;
 	__u8		retry_count;
-	__u8		num_to_ack;
+	__u8		num_acked;
 	__u16		sdu_len;
 	__u16		partial_sdu_len;
 	struct sk_buff	*sdu;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 530079649b43..0b0b237bb786 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -2254,7 +2254,7 @@ static inline void l2cap_ertm_init(struct sock *sk)
 	l2cap_pi(sk)->expected_ack_seq = 0;
 	l2cap_pi(sk)->unacked_frames = 0;
 	l2cap_pi(sk)->buffer_seq = 0;
-	l2cap_pi(sk)->num_to_ack = 0;
+	l2cap_pi(sk)->num_acked = 0;
 	l2cap_pi(sk)->frames_sent = 0;
 
 	setup_timer(&l2cap_pi(sk)->retrans_timer,
@@ -3466,6 +3466,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	u8 tx_seq = __get_txseq(rx_control);
 	u8 req_seq = __get_reqseq(rx_control);
 	u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
+	int num_to_ack = (pi->tx_win/6) + 1;
 	int err = 0;
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
@@ -3553,8 +3554,8 @@ expected:
 
 	__mod_ack_timer();
 
-	pi->num_to_ack = (pi->num_to_ack + 1) % L2CAP_DEFAULT_NUM_TO_ACK;
-	if (pi->num_to_ack == L2CAP_DEFAULT_NUM_TO_ACK - 1)
+	pi->num_acked = (pi->num_acked + 1) % num_to_ack;
+	if (pi->num_acked == num_to_ack - 1)
 		l2cap_send_ack(pi);
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 369ba30264826f38eefc61b93688100be8adbd4d Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:41 -0300
Subject: Bluetooth: Add module parameter for txWindow size on L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Very useful for testing purposes.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
[jprvita@profusion.mobi: improved parameter description]
Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 0b0b237bb786..ff1466b21580 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -57,6 +57,7 @@
 
 static int enable_ertm = 0;
 static int max_transmit = L2CAP_DEFAULT_MAX_TX;
+static int tx_window = L2CAP_DEFAULT_TX_WINDOW;
 
 static u32 l2cap_feat_mask = L2CAP_FEAT_FIXED_CHAN;
 static u8 l2cap_fixed_chan[8] = { 0x02, };
@@ -791,7 +792,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 		pi->omtu = 0;
 		pi->mode = L2CAP_MODE_BASIC;
 		pi->fcs  = L2CAP_FCS_CRC16;
-		pi->tx_win = L2CAP_DEFAULT_TX_WINDOW;
+		pi->tx_win = tx_window;
 		pi->sec_level = BT_SECURITY_LOW;
 		pi->role_switch = 0;
 		pi->force_reliable = 0;
@@ -4296,6 +4297,9 @@ MODULE_PARM_DESC(enable_ertm, "Enable enhanced retransmission mode");
 module_param(max_transmit, uint, 0644);
 MODULE_PARM_DESC(max_transmit, "Max transmit value (default = 3)");
 
+module_param(tx_window, uint, 0644);
+MODULE_PARM_DESC(tx_window, "Transmission window size value (default = 63)");
+
 MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>");
 MODULE_DESCRIPTION("Bluetooth L2CAP ver " VERSION);
 MODULE_VERSION(VERSION);
-- 
cgit v1.2.3-59-g8ed1b


From 68d7f0ce911e41e463c45911be031cdf6a096fe8 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:41 -0300
Subject: Bluetooth: Enable option to configure Max Transmission value via
 sockopt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the sockopt extension we can set a per-channel MaxTx value.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 2 ++
 net/bluetooth/l2cap.c         | 7 ++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 059260bed4e8..8942c2ffc58b 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -55,6 +55,7 @@ struct l2cap_options {
 	__u16 flush_to;
 	__u8  mode;
 	__u8  fcs;
+	__u8  max_tx;
 	__u16 txwin_size;
 };
 
@@ -340,6 +341,7 @@ struct l2cap_pinfo {
 	__u8		ident;
 
 	__u8		tx_win;
+	__u8		max_tx;
 	__u8		remote_tx_win;
 	__u8		remote_max_tx;
 	__u16		retrans_timeout;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ff1466b21580..f9e4da2677af 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -783,6 +783,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 		pi->omtu = l2cap_pi(parent)->omtu;
 		pi->mode = l2cap_pi(parent)->mode;
 		pi->fcs  = l2cap_pi(parent)->fcs;
+		pi->max_tx = l2cap_pi(parent)->max_tx;
 		pi->tx_win = l2cap_pi(parent)->tx_win;
 		pi->sec_level = l2cap_pi(parent)->sec_level;
 		pi->role_switch = l2cap_pi(parent)->role_switch;
@@ -791,6 +792,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 		pi->imtu = L2CAP_DEFAULT_MTU;
 		pi->omtu = 0;
 		pi->mode = L2CAP_MODE_BASIC;
+		pi->max_tx = max_transmit;
 		pi->fcs  = L2CAP_FCS_CRC16;
 		pi->tx_win = tx_window;
 		pi->sec_level = BT_SECURITY_LOW;
@@ -1785,6 +1787,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		opts.flush_to = l2cap_pi(sk)->flush_to;
 		opts.mode     = l2cap_pi(sk)->mode;
 		opts.fcs      = l2cap_pi(sk)->fcs;
+		opts.max_tx   = l2cap_pi(sk)->max_tx;
 		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
 
 		len = min_t(unsigned int, sizeof(opts), optlen);
@@ -1797,6 +1800,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 		l2cap_pi(sk)->omtu = opts.omtu;
 		l2cap_pi(sk)->mode = opts.mode;
 		l2cap_pi(sk)->fcs  = opts.fcs;
+		l2cap_pi(sk)->max_tx = opts.max_tx;
 		l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
 		break;
 
@@ -1912,6 +1916,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us
 		opts.flush_to = l2cap_pi(sk)->flush_to;
 		opts.mode     = l2cap_pi(sk)->mode;
 		opts.fcs      = l2cap_pi(sk)->fcs;
+		opts.max_tx   = l2cap_pi(sk)->max_tx;
 		opts.txwin_size = (__u16)l2cap_pi(sk)->tx_win;
 
 		len = min_t(unsigned int, len, sizeof(opts));
@@ -2331,7 +2336,7 @@ done:
 	case L2CAP_MODE_ERTM:
 		rfc.mode            = L2CAP_MODE_ERTM;
 		rfc.txwin_size      = pi->tx_win;
-		rfc.max_transmit    = max_transmit;
+		rfc.max_transmit    = pi->max_tx;
 		rfc.retrans_timeout = 0;
 		rfc.monitor_timeout = 0;
 		rfc.max_pdu_size    = cpu_to_le16(L2CAP_DEFAULT_MAX_PDU_SIZE);
-- 
cgit v1.2.3-59-g8ed1b


From f6e6b16823de0aff31cb8ee8c098383e3aceec58 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:41 -0300
Subject: Bluetooth: Fix bug when retransmitting I-frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If there is no frames to retransmit l2cap was crashing the kernel, now
we check if the queue is empty first.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index f9e4da2677af..4c7b2d22faa5 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3546,7 +3546,8 @@ expected:
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
 		else {
-			sk->sk_send_head = TX_QUEUE(sk)->next;
+			if (!skb_queue_empty(TX_QUEUE(sk)))
+				sk->sk_send_head = TX_QUEUE(sk)->next;
 			pi->next_tx_seq = pi->expected_ack_seq;
 			l2cap_ertm_send(sk);
 		}
@@ -3593,7 +3594,8 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
 		else {
-			sk->sk_send_head = TX_QUEUE(sk)->next;
+			if (!skb_queue_empty(TX_QUEUE(sk)))
+				sk->sk_send_head = TX_QUEUE(sk)->next;
 			pi->next_tx_seq = pi->expected_ack_seq;
 			l2cap_ertm_send(sk);
 		}
@@ -3625,12 +3627,14 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
 		else {
-			sk->sk_send_head = TX_QUEUE(sk)->next;
+			if (!skb_queue_empty(TX_QUEUE(sk)))
+				sk->sk_send_head = TX_QUEUE(sk)->next;
 			pi->next_tx_seq = pi->expected_ack_seq;
 			l2cap_ertm_send(sk);
 		}
 	} else {
-		sk->sk_send_head = TX_QUEUE(sk)->next;
+		if (!skb_queue_empty(TX_QUEUE(sk)))
+			sk->sk_send_head = TX_QUEUE(sk)->next;
 		pi->next_tx_seq = pi->expected_ack_seq;
 		l2cap_ertm_send(sk);
 
-- 
cgit v1.2.3-59-g8ed1b


From 0ee0d20855ae9271de3f6695f4cafc08ab2533bb Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:41 -0300
Subject: Bluetooth: Fix crash when monitor timeout expires
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code was crashing due to a invalid access to hci_conn after the
channel disconnect.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4c7b2d22faa5..2f9bbad42887 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1568,6 +1568,9 @@ static struct sk_buff *l2cap_create_iframe_pdu(struct sock *sk, struct msghdr *m
 
 	BT_DBG("sk %p len %d", sk, (int)len);
 
+	if (!conn)
+		return ERR_PTR(-ENOTCONN);
+
 	if (sdulen)
 		hlen += 2;
 
-- 
cgit v1.2.3-59-g8ed1b


From 812e737e29a1d559e7bfbea675fdcfcbad9f5e1f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:42 -0300
Subject: Bluetooth: Fix drop of acked packets on ERTM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

l2cap_drop_acked_frames() was droping not sent packets, causing them to
be not transmitted.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2f9bbad42887..042fd967e79c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1262,7 +1262,8 @@ static void l2cap_drop_acked_frames(struct sock *sk)
 {
 	struct sk_buff *skb;
 
-	while ((skb = skb_peek(TX_QUEUE(sk)))) {
+	while ((skb = skb_peek(TX_QUEUE(sk))) &&
+			l2cap_pi(sk)->unacked_frames) {
 		if (bt_cb(skb)->tx_seq == l2cap_pi(sk)->expected_ack_seq)
 			break;
 
-- 
cgit v1.2.3-59-g8ed1b


From 3b1a9f3fa6ad842991538da2c3b2e29e047b131f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:42 -0300
Subject: Bluetooth: Optimize SREJ_QUEUE append
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the I-frame received is the expected, i.e., its tx_seq is equal to
expected_tx_seq and we are under a SREJ, we can just add it to the tail
of the list. Doing that we change the complexity from O(n) to O(1).

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 042fd967e79c..187f46dd8309 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3542,7 +3542,9 @@ expected:
 	pi->expected_tx_seq = (pi->expected_tx_seq + 1) % 64;
 
 	if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
-		l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
+		bt_cb(skb)->tx_seq = tx_seq;
+		bt_cb(skb)->sar = sar;
+		__skb_queue_tail(SREJ_QUEUE(sk), skb);
 		return 0;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 84fb0a6334af0ccad3544f6972c055d90fbb9fbe Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:42 -0300
Subject: Bluetooth: Add Kconfig option for L2CAP Extended Features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The L2CAP Extended Features are still unstable and under development,
so we are adding them under the EXPERIMENTAL flag to get more feedback
on them. L2CAP Extended Features includes the Enhanced Retransmission
and Streaming Modes, Frame Check Sequence (FCS), and Segmentation and
Reassemby (SAR).

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/Kconfig | 13 +++++++++++++
 net/bluetooth/l2cap.c |  4 ++++
 2 files changed, 17 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index ed371684c133..ee3b3049d385 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -43,6 +43,19 @@ config BT_L2CAP
 	  Say Y here to compile L2CAP support into the kernel or say M to
 	  compile it as module (l2cap).
 
+config BT_L2CAP_EXT_FEATURES
+	bool "L2CAP Extended Features support (EXPERIMENTAL)"
+	depends on BT_L2CAP && EXPERIMENTAL
+	help
+	  This option enables the L2CAP Extended Features support. These
+	  new features include the Enhanced Retransmission and Streaming
+	  Modes, the Frame Check Sequence (FCS), and Segmentation and
+	  Reassembly (SAR) for L2CAP packets. They are a required for the
+	  new Alternate MAC/PHY and the Bluetooth Medical Profile.
+
+	  You should say N unless you know what you are doing. Note that
+	  this is in an experimental state yet.
+
 config BT_SCO
 	tristate "SCO links support"
 	depends on BT
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 187f46dd8309..4c007203d66b 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -55,7 +55,11 @@
 
 #define VERSION "2.14"
 
+#ifdef CONFIG_BT_L2CAP_EXT_FEATURES
+static int enable_ertm = 1;
+#else
 static int enable_ertm = 0;
+#endif
 static int max_transmit = L2CAP_DEFAULT_MAX_TX;
 static int tx_window = L2CAP_DEFAULT_TX_WINDOW;
 
-- 
cgit v1.2.3-59-g8ed1b


From bd3c9e255e76ae232d417e3914ca5d80ca3e9485 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:42 -0300
Subject: Bluetooth: Add SOCK_STREAM support to L2CAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

if enable_ertm is true and we have SOCK_STREAM the default mode will be
ERTM, otherwise Basic Mode.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 40 ++++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 4c007203d66b..1a32562adf46 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -224,7 +224,7 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so
 
 	l2cap_pi(sk)->conn = conn;
 
-	if (sk->sk_type == SOCK_SEQPACKET) {
+	if (sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM) {
 		/* Alloc CID for connection-oriented socket */
 		l2cap_pi(sk)->scid = l2cap_alloc_cid(l);
 	} else if (sk->sk_type == SOCK_DGRAM) {
@@ -452,7 +452,8 @@ static void l2cap_conn_start(struct l2cap_conn *conn)
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
 		bh_lock_sock(sk);
 
-		if (sk->sk_type != SOCK_SEQPACKET) {
+		if (sk->sk_type != SOCK_SEQPACKET &&
+				sk->sk_type != SOCK_STREAM) {
 			bh_unlock_sock(sk);
 			continue;
 		}
@@ -512,7 +513,8 @@ static void l2cap_conn_ready(struct l2cap_conn *conn)
 	for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) {
 		bh_lock_sock(sk);
 
-		if (sk->sk_type != SOCK_SEQPACKET) {
+		if (sk->sk_type != SOCK_SEQPACKET &&
+				sk->sk_type != SOCK_STREAM) {
 			l2cap_sock_clear_timer(sk);
 			sk->sk_state = BT_CONNECTED;
 			sk->sk_state_change(sk);
@@ -721,7 +723,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 
 	case BT_CONNECTED:
 	case BT_CONFIG:
-		if (sk->sk_type == SOCK_SEQPACKET) {
+		if (sk->sk_type == SOCK_SEQPACKET ||
+				sk->sk_type == SOCK_STREAM) {
 			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 
 			sk->sk_state = BT_DISCONN;
@@ -732,7 +735,8 @@ static void __l2cap_sock_close(struct sock *sk, int reason)
 		break;
 
 	case BT_CONNECT2:
-		if (sk->sk_type == SOCK_SEQPACKET) {
+		if (sk->sk_type == SOCK_SEQPACKET ||
+				sk->sk_type == SOCK_STREAM) {
 			struct l2cap_conn *conn = l2cap_pi(sk)->conn;
 			struct l2cap_conn_rsp rsp;
 			__u16 result;
@@ -795,7 +799,10 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 	} else {
 		pi->imtu = L2CAP_DEFAULT_MTU;
 		pi->omtu = 0;
-		pi->mode = L2CAP_MODE_BASIC;
+		if (enable_ertm && sk->sk_type == SOCK_STREAM)
+			pi->mode = L2CAP_MODE_ERTM;
+		else
+			pi->mode = L2CAP_MODE_BASIC;
 		pi->max_tx = max_transmit;
 		pi->fcs  = L2CAP_FCS_CRC16;
 		pi->tx_win = tx_window;
@@ -852,7 +859,7 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol,
 
 	sock->state = SS_UNCONNECTED;
 
-	if (sock->type != SOCK_SEQPACKET &&
+	if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM &&
 			sock->type != SOCK_DGRAM && sock->type != SOCK_RAW)
 		return -ESOCKTNOSUPPORT;
 
@@ -1000,7 +1007,8 @@ static int l2cap_do_connect(struct sock *sk)
 	l2cap_sock_set_timer(sk, sk->sk_sndtimeo);
 
 	if (hcon->state == BT_CONNECTED) {
-		if (sk->sk_type != SOCK_SEQPACKET) {
+		if (sk->sk_type != SOCK_SEQPACKET &&
+				sk->sk_type != SOCK_STREAM) {
 			l2cap_sock_clear_timer(sk);
 			sk->sk_state = BT_CONNECTED;
 		} else
@@ -1034,7 +1042,8 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al
 
 	lock_sock(sk);
 
-	if (sk->sk_type == SOCK_SEQPACKET && !la.l2_psm) {
+	if ((sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM)
+			&& !la.l2_psm) {
 		err = -EINVAL;
 		goto done;
 	}
@@ -1098,7 +1107,8 @@ static int l2cap_sock_listen(struct socket *sock, int backlog)
 
 	lock_sock(sk);
 
-	if (sk->sk_state != BT_BOUND || sock->type != SOCK_SEQPACKET) {
+	if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+			|| sk->sk_state != BT_BOUND) {
 		err = -EBADFD;
 		goto done;
 	}
@@ -1857,7 +1867,8 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch
 
 	switch (optname) {
 	case BT_SECURITY:
-		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) {
+		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
+				&& sk->sk_type != SOCK_RAW) {
 			err = -EINVAL;
 			break;
 		}
@@ -2007,7 +2018,8 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch
 
 	switch (optname) {
 	case BT_SECURITY:
-		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) {
+		if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM
+				&& sk->sk_type != SOCK_RAW) {
 			err = -EINVAL;
 			break;
 		}
@@ -2314,7 +2326,7 @@ static int l2cap_build_conf_req(struct sock *sk, void *data)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct l2cap_conf_req *req = data;
-	struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC };
+	struct l2cap_conf_rfc rfc = { .mode = pi->mode };
 	void *ptr = req->data;
 
 	BT_DBG("sk %p", sk);
@@ -3997,7 +4009,7 @@ static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason)
 
 static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt)
 {
-	if (sk->sk_type != SOCK_SEQPACKET)
+	if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_STREAM)
 		return;
 
 	if (encrypt == 0x00) {
-- 
cgit v1.2.3-59-g8ed1b


From 0041ecfa3025d7612fdaab12b2f07c9c3c09f42f Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Sat, 1 May 2010 16:15:42 -0300
Subject: Bluetooth: Check if mode is supported on getsockopt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add this check to getsockopt makes possible to fail early instead of
waiting until listen / connect.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1a32562adf46..bf5bb7dc6abf 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1814,9 +1814,22 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us
 			break;
 		}
 
+		l2cap_pi(sk)->mode = opts.mode;
+		switch (l2cap_pi(sk)->mode) {
+		case L2CAP_MODE_BASIC:
+			break;
+		case L2CAP_MODE_ERTM:
+		case L2CAP_MODE_STREAMING:
+			if (enable_ertm)
+				break;
+			/* fall through */
+		default:
+			err = -EINVAL;
+			break;
+		}
+
 		l2cap_pi(sk)->imtu = opts.imtu;
 		l2cap_pi(sk)->omtu = opts.omtu;
-		l2cap_pi(sk)->mode = opts.mode;
 		l2cap_pi(sk)->fcs  = opts.fcs;
 		l2cap_pi(sk)->max_tx = opts.max_tx;
 		l2cap_pi(sk)->tx_win = (__u8)opts.txwin_size;
-- 
cgit v1.2.3-59-g8ed1b


From afefdbc4cf3b9d409d07e1e5264e7ff88bc48711 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:43 -0300
Subject: Bluetooth: Fix SDU reassembly under SREJ
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The code was reusing the control var without its reinitialization.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index bf5bb7dc6abf..478def700c7c 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3446,14 +3446,14 @@ drop:
 static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
 {
 	struct sk_buff *skb;
-	u16 control = 0;
+	u16 control;
 
 	while((skb = skb_peek(SREJ_QUEUE(sk)))) {
 		if (bt_cb(skb)->tx_seq != tx_seq)
 			break;
 
 		skb = skb_dequeue(SREJ_QUEUE(sk));
-		control |= bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
+		control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
 		l2cap_sar_reassembly_sdu(sk, skb, control);
 		l2cap_pi(sk)->buffer_seq_srej =
 			(l2cap_pi(sk)->buffer_seq_srej + 1) % 64;
-- 
cgit v1.2.3-59-g8ed1b


From 01760bdde9a92413b7fff928d08e19352bf09d82 Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Sat, 1 May 2010 16:15:43 -0300
Subject: Bluetooth: Close L2CAP channel on invalid ReqSeq
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 478def700c7c..31514d8faa66 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3772,7 +3772,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 	struct sock *sk;
 	struct l2cap_pinfo *pi;
 	u16 control, len;
-	u8 tx_seq;
+	u8 tx_seq, req_seq, next_tx_seq_offset, req_seq_offset;
 
 	sk = l2cap_get_chan_by_scid(&conn->chan_list, cid);
 	if (!sk) {
@@ -3823,6 +3823,22 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		if (l2cap_check_fcs(pi, skb))
 			goto drop;
 
+		req_seq = __get_reqseq(control);
+		req_seq_offset = (req_seq - pi->expected_ack_seq) % 64;
+		if (req_seq_offset < 0)
+			req_seq_offset += 64;
+
+		next_tx_seq_offset =
+			(pi->next_tx_seq - pi->expected_ack_seq) % 64;
+		if (next_tx_seq_offset < 0)
+			next_tx_seq_offset += 64;
+
+		/* check for invalid req-seq */
+		if (req_seq_offset > next_tx_seq_offset) {
+			l2cap_send_disconn_req(pi->conn, sk);
+			goto drop;
+		}
+
 		if (__is_iframe(control)) {
 			if (len < 4)
 				goto drop;
-- 
cgit v1.2.3-59-g8ed1b


From 44651b85cc3a076147af5d181fc4833ef8debc59 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:43 -0300
Subject: Bluetooth: Don't set control bits to zero first
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We can set the SAR bits in the control field directly.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 31514d8faa66..cfb18cd97564 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1636,16 +1636,15 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 	__skb_queue_tail(&sar_queue, skb);
 	len -= pi->remote_mps;
 	size += pi->remote_mps;
-	control = 0;
 
 	while (len > 0) {
 		size_t buflen;
 
 		if (len > pi->remote_mps) {
-			control |= L2CAP_SDU_CONTINUE;
+			control = L2CAP_SDU_CONTINUE;
 			buflen = pi->remote_mps;
 		} else {
-			control |= L2CAP_SDU_END;
+			control = L2CAP_SDU_END;
 			buflen = len;
 		}
 
@@ -1658,7 +1657,6 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 		__skb_queue_tail(&sar_queue, skb);
 		len -= buflen;
 		size += buflen;
-		control = 0;
 	}
 	skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
 	if (sk->sk_send_head == NULL)
-- 
cgit v1.2.3-59-g8ed1b


From 59203a21a56c53afeb6f45e059299e6f1437f30f Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:43 -0300
Subject: Bluetooth: Fix errors reported by checkpatch.pl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cfb18cd97564..2a981de071df 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1400,7 +1400,7 @@ static int l2cap_ertm_send(struct sock *sk)
 		return 0;
 
 	while ((skb = sk->sk_send_head) && (!l2cap_tx_window_full(sk)) &&
-	       !(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) {
+			!(pi->conn_state & L2CAP_CONN_REMOTE_BUSY)) {
 
 		if (pi->remote_max_tx &&
 				bt_cb(skb)->retries == pi->remote_max_tx) {
@@ -1490,9 +1490,8 @@ static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, in
 	struct sk_buff **frag;
 	int err, sent = 0;
 
-	if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count)) {
+	if (memcpy_fromiovec(skb_put(skb, count), msg->msg_iov, count))
 		return -EFAULT;
-	}
 
 	sent += count;
 	len  -= count;
@@ -3347,7 +3346,7 @@ static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_
 		if (skb_queue_is_last(SREJ_QUEUE(sk), next_skb))
 			break;
 
-	} while((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb)));
+	} while ((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb)));
 
 	__skb_queue_tail(SREJ_QUEUE(sk), skb);
 }
@@ -3446,7 +3445,7 @@ static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
 	struct sk_buff *skb;
 	u16 control;
 
-	while((skb = skb_peek(SREJ_QUEUE(sk)))) {
+	while ((skb = skb_peek(SREJ_QUEUE(sk)))) {
 		if (bt_cb(skb)->tx_seq != tx_seq)
 			break;
 
@@ -3465,7 +3464,7 @@ static void l2cap_resend_srejframe(struct sock *sk, u8 tx_seq)
 	struct srej_list *l, *tmp;
 	u16 control;
 
-	list_for_each_entry_safe(l,tmp, SREJ_LIST(sk), list) {
+	list_for_each_entry_safe(l, tmp, SREJ_LIST(sk), list) {
 		if (l->tx_seq == tx_seq) {
 			list_del(&l->list);
 			kfree(l);
-- 
cgit v1.2.3-59-g8ed1b


From 0301ef04b5f49a95681694fc0d75af9441faa919 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 5 May 2010 20:56:43 -0300
Subject: Bluetooth: Remove set of SrejSaveReqSeq under receipt of REJ frame
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That action is not specified by the ERTM spec, so removing it.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 2a981de071df..d0d03302e14d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3670,10 +3670,8 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
 		pi->next_tx_seq = pi->expected_ack_seq;
 		l2cap_ertm_send(sk);
 
-		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
-			pi->srej_save_reqseq = tx_seq;
+		if (pi->conn_state & L2CAP_CONN_WAIT_F)
 			pi->conn_state |= L2CAP_CONN_REJ_ACT;
-		}
 	}
 }
 static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
-- 
cgit v1.2.3-59-g8ed1b


From a2e12a2a312f816d5970b0c809d43b399fbfe90c Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 5 May 2010 19:58:27 -0300
Subject: Bluetooth: Remove unneeded control vars
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Trivial clean up.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index d0d03302e14d..5a5203f03642 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1239,7 +1239,6 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
 static void l2cap_monitor_timeout(unsigned long arg)
 {
 	struct sock *sk = (void *) arg;
-	u16 control;
 
 	bh_lock_sock(sk);
 	if (l2cap_pi(sk)->retry_count >= l2cap_pi(sk)->remote_max_tx) {
@@ -1251,15 +1250,13 @@ static void l2cap_monitor_timeout(unsigned long arg)
 	l2cap_pi(sk)->retry_count++;
 	__mod_monitor_timer();
 
-	control = L2CAP_CTRL_POLL;
-	l2cap_send_rr_or_rnr(l2cap_pi(sk), control);
+	l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL);
 	bh_unlock_sock(sk);
 }
 
 static void l2cap_retrans_timeout(unsigned long arg)
 {
 	struct sock *sk = (void *) arg;
-	u16 control;
 
 	bh_lock_sock(sk);
 	l2cap_pi(sk)->retry_count = 1;
@@ -1267,8 +1264,7 @@ static void l2cap_retrans_timeout(unsigned long arg)
 
 	l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
 
-	control = L2CAP_CTRL_POLL;
-	l2cap_send_rr_or_rnr(l2cap_pi(sk), control);
+	l2cap_send_rr_or_rnr(l2cap_pi(sk), L2CAP_CTRL_POLL);
 	bh_unlock_sock(sk);
 }
 
@@ -3716,10 +3712,8 @@ static inline void l2cap_data_channel_rnrframe(struct sock *sk, u16 rx_control)
 
 	if (!(pi->conn_state & L2CAP_CONN_SREJ_SENT)) {
 		del_timer(&pi->retrans_timer);
-		if (rx_control & L2CAP_CTRL_POLL) {
-			u16 control = L2CAP_CTRL_FINAL;
-			l2cap_send_rr_or_rnr(pi, control);
-		}
+		if (rx_control & L2CAP_CTRL_POLL)
+			l2cap_send_rr_or_rnr(pi, L2CAP_CTRL_FINAL);
 		return;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 9b16dc6551cbde65d0ac525af3c46efab53a2c46 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 5 May 2010 20:05:57 -0300
Subject: Bluetooth: Check if we really are in WAIT_F when F bit comes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

F-bit set should be processed only if we are in the WAIT_F state.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 5a5203f03642..eb5cb29115a7 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3503,7 +3503,8 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
-	if (L2CAP_CTRL_FINAL & rx_control) {
+	if (L2CAP_CTRL_FINAL & rx_control &&
+			l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
 		del_timer(&pi->monitor_timer);
 		if (pi->unacked_frames > 0)
 			__mod_retrans_timer();
@@ -3727,7 +3728,8 @@ static inline int l2cap_data_channel_sframe(struct sock *sk, u16 rx_control, str
 {
 	BT_DBG("sk %p rx_control 0x%4.4x len %d", sk, rx_control, skb->len);
 
-	if (L2CAP_CTRL_FINAL & rx_control) {
+	if (L2CAP_CTRL_FINAL & rx_control &&
+			l2cap_pi(sk)->conn_state & L2CAP_CONN_WAIT_F) {
 		del_timer(&l2cap_pi(sk)->monitor_timer);
 		if (l2cap_pi(sk)->unacked_frames > 0)
 			__mod_retrans_timer();
-- 
cgit v1.2.3-59-g8ed1b


From ff12fd643334071084b6145cad3793bb6c956638 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Wed, 5 May 2010 22:09:15 -0300
Subject: Bluetooth: Fix lockdep annotation on ERTM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A spin_lock_init() call was missing. :)

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index eb5cb29115a7..6b08f4d7c873 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1622,7 +1622,7 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 	u16 control;
 	size_t size = 0;
 
-	__skb_queue_head_init(&sar_queue);
+	skb_queue_head_init(&sar_queue);
 	control = L2CAP_SDU_START;
 	skb = l2cap_create_iframe_pdu(sk, msg, pi->remote_mps, control, len);
 	if (IS_ERR(skb))
-- 
cgit v1.2.3-59-g8ed1b


From 9a9c6a34416b3743c09c00f3d6708d9df3c21629 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:43 -0300
Subject: Bluetooth: Make hci_send_acl() void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

hci_send_acl can't fail, so we can make it void. This patch changes
that and all the funcions that use hci_send_acl().
That change exposed a bug on sending connectionless data. We were not
reporting the lenght send back to the user space.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 +-
 net/bluetooth/hci_core.c         |  4 +--
 net/bluetooth/l2cap.c            | 73 +++++++++++++++-------------------------
 3 files changed, 30 insertions(+), 49 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 9830a88f487e..4511df2a0d7f 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -636,7 +636,7 @@ int hci_register_notifier(struct notifier_block *nb);
 int hci_unregister_notifier(struct notifier_block *nb);
 
 int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, void *param);
-int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags);
+void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags);
 void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb);
 
 void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 1c9aef97f519..904f1e8a7a3b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1260,7 +1260,7 @@ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags)
 	hdr->dlen   = cpu_to_le16(len);
 }
 
-int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
+void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 {
 	struct hci_dev *hdev = conn->hdev;
 	struct sk_buff *list;
@@ -1303,7 +1303,7 @@ int hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 
 	tasklet_schedule(&hdev->tx_task);
 
-	return 0;
+	return;
 }
 EXPORT_SYMBOL(hci_send_acl);
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 6b08f4d7c873..7e74d5be16e3 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -330,19 +330,19 @@ static inline u8 l2cap_get_ident(struct l2cap_conn *conn)
 	return id;
 }
 
-static inline int l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
+static inline void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data)
 {
 	struct sk_buff *skb = l2cap_build_cmd(conn, code, ident, len, data);
 
 	BT_DBG("code 0x%2.2x", code);
 
 	if (!skb)
-		return -ENOMEM;
+		return;
 
-	return hci_send_acl(conn->hcon, skb, 0);
+	hci_send_acl(conn->hcon, skb, 0);
 }
 
-static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
+static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 {
 	struct sk_buff *skb;
 	struct l2cap_hdr *lh;
@@ -369,7 +369,7 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 
 	skb = bt_skb_alloc(count, GFP_ATOMIC);
 	if (!skb)
-		return -ENOMEM;
+		return;
 
 	lh = (struct l2cap_hdr *) skb_put(skb, L2CAP_HDR_SIZE);
 	lh->len = cpu_to_le16(hlen - L2CAP_HDR_SIZE);
@@ -381,10 +381,10 @@ static inline int l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 		put_unaligned_le16(fcs, skb_put(skb, 2));
 	}
 
-	return hci_send_acl(pi->conn->hcon, skb, 0);
+	hci_send_acl(pi->conn->hcon, skb, 0);
 }
 
-static inline int l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
+static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
 {
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY)
 		control |= L2CAP_SUPER_RCV_NOT_READY;
@@ -393,7 +393,7 @@ static inline int l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
-	return l2cap_send_sframe(pi, control);
+	l2cap_send_sframe(pi, control);
 }
 
 static void l2cap_do_start(struct sock *sk)
@@ -1289,18 +1289,13 @@ static void l2cap_drop_acked_frames(struct sock *sk)
 	return;
 }
 
-static inline int l2cap_do_send(struct sock *sk, struct sk_buff *skb)
+static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
-	int err;
 
 	BT_DBG("sk %p, skb %p len %d", sk, skb, skb->len);
 
-	err = hci_send_acl(pi->conn->hcon, skb, 0);
-	if (err < 0)
-		kfree_skb(skb);
-
-	return err;
+	hci_send_acl(pi->conn->hcon, skb, 0);
 }
 
 static int l2cap_streaming_send(struct sock *sk)
@@ -1308,7 +1303,6 @@ static int l2cap_streaming_send(struct sock *sk)
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u16 control, fcs;
-	int err;
 
 	while ((skb = sk->sk_send_head)) {
 		tx_skb = skb_clone(skb, GFP_ATOMIC);
@@ -1322,11 +1316,7 @@ static int l2cap_streaming_send(struct sock *sk)
 			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
 		}
 
-		err = l2cap_do_send(sk, tx_skb);
-		if (err < 0) {
-			l2cap_send_disconn_req(pi->conn, sk);
-			return err;
-		}
+		l2cap_do_send(sk, tx_skb);
 
 		pi->next_tx_seq = (pi->next_tx_seq + 1) % 64;
 
@@ -1346,7 +1336,6 @@ static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *skb, *tx_skb;
 	u16 control, fcs;
-	int err;
 
 	skb = skb_peek(TX_QUEUE(sk));
 	do {
@@ -1375,11 +1364,7 @@ static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
 			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
 		}
 
-		err = l2cap_do_send(sk, tx_skb);
-		if (err < 0) {
-			l2cap_send_disconn_req(pi->conn, sk);
-			return err;
-		}
+		l2cap_do_send(sk, tx_skb);
 		break;
 	} while(1);
 	return 0;
@@ -1390,7 +1375,7 @@ static int l2cap_ertm_send(struct sock *sk)
 	struct sk_buff *skb, *tx_skb;
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	u16 control, fcs;
-	int err, nsent = 0;
+	int nsent = 0;
 
 	if (pi->conn_state & L2CAP_CONN_WAIT_F)
 		return 0;
@@ -1423,11 +1408,8 @@ static int l2cap_ertm_send(struct sock *sk)
 			put_unaligned_le16(fcs, skb->data + tx_skb->len - 2);
 		}
 
-		err = l2cap_do_send(sk, tx_skb);
-		if (err < 0) {
-			l2cap_send_disconn_req(pi->conn, sk);
-			return err;
-		}
+		l2cap_do_send(sk, tx_skb);
+
 		__mod_retrans_timer();
 
 		bt_cb(skb)->tx_seq = pi->next_tx_seq;
@@ -1447,7 +1429,7 @@ static int l2cap_ertm_send(struct sock *sk)
 	return nsent;
 }
 
-static int l2cap_send_ack(struct l2cap_pinfo *pi)
+static void l2cap_send_ack(struct l2cap_pinfo *pi)
 {
 	struct sock *sk = (struct sock *)pi;
 	u16 control = 0;
@@ -1456,15 +1438,15 @@ static int l2cap_send_ack(struct l2cap_pinfo *pi)
 
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
 		control |= L2CAP_SUPER_RCV_NOT_READY;
-		return l2cap_send_sframe(pi, control);
+		l2cap_send_sframe(pi, control);
+		return;
 	} else if (l2cap_ertm_send(sk) == 0) {
 		control |= L2CAP_SUPER_RCV_READY;
-		return l2cap_send_sframe(pi, control);
+		l2cap_send_sframe(pi, control);
 	}
-	return 0;
 }
 
-static int l2cap_send_srejtail(struct sock *sk)
+static void l2cap_send_srejtail(struct sock *sk)
 {
 	struct srej_list *tail;
 	u16 control;
@@ -1476,8 +1458,6 @@ static int l2cap_send_srejtail(struct sock *sk)
 	control |= tail->tx_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
 	l2cap_send_sframe(l2cap_pi(sk), control);
-
-	return 0;
 }
 
 static inline int l2cap_skbuff_fromiovec(struct sock *sk, struct msghdr *msg, int len, int count, struct sk_buff *skb)
@@ -1687,10 +1667,12 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 	/* Connectionless channel */
 	if (sk->sk_type == SOCK_DGRAM) {
 		skb = l2cap_create_connless_pdu(sk, msg, len);
-		if (IS_ERR(skb))
+		if (IS_ERR(skb)) {
 			err = PTR_ERR(skb);
-		else
-			err = l2cap_do_send(sk, skb);
+		} else {
+			l2cap_do_send(sk, skb);
+			err = len;
+		}
 		goto done;
 	}
 
@@ -1709,9 +1691,8 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 			goto done;
 		}
 
-		err = l2cap_do_send(sk, skb);
-		if (!err)
-			err = len;
+		l2cap_do_send(sk, skb);
+		err = len;
 		break;
 
 	case L2CAP_MODE_ERTM:
-- 
cgit v1.2.3-59-g8ed1b


From f11d676da4059c7888efca810ab300b931736a26 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:44 -0300
Subject: Bluetooth: Refactor l2cap_retransmit_frame()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the code flow cleaner and changes the function to void.
It also fixes a potential NULL dereference with skb.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 53 ++++++++++++++++++++++++++-------------------------
 1 file changed, 27 insertions(+), 26 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 7e74d5be16e3..1c35c328181d 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1331,43 +1331,44 @@ static int l2cap_streaming_send(struct sock *sk)
 	return 0;
 }
 
-static int l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
+static void l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *skb, *tx_skb;
 	u16 control, fcs;
 
 	skb = skb_peek(TX_QUEUE(sk));
-	do {
-		if (bt_cb(skb)->tx_seq != tx_seq) {
-			if (skb_queue_is_last(TX_QUEUE(sk), skb))
-				break;
-			skb = skb_queue_next(TX_QUEUE(sk), skb);
-			continue;
-		}
+	if (!skb)
+		return;
 
-		if (pi->remote_max_tx &&
-				bt_cb(skb)->retries == pi->remote_max_tx) {
-			l2cap_send_disconn_req(pi->conn, sk);
+	do {
+		if (bt_cb(skb)->tx_seq == tx_seq)
 			break;
-		}
 
-		tx_skb = skb_clone(skb, GFP_ATOMIC);
-		bt_cb(skb)->retries++;
-		control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
-		control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
-				| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
-		put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
+		if (skb_queue_is_last(TX_QUEUE(sk), skb))
+			return;
 
-		if (pi->fcs == L2CAP_FCS_CRC16) {
-			fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
-			put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
-		}
+	} while ((skb = skb_queue_next(TX_QUEUE(sk), skb)));
 
-		l2cap_do_send(sk, tx_skb);
-		break;
-	} while(1);
-	return 0;
+	if (pi->remote_max_tx &&
+			bt_cb(skb)->retries == pi->remote_max_tx) {
+		l2cap_send_disconn_req(pi->conn, sk);
+		return;
+	}
+
+	tx_skb = skb_clone(skb, GFP_ATOMIC);
+	bt_cb(skb)->retries++;
+	control = get_unaligned_le16(tx_skb->data + L2CAP_HDR_SIZE);
+	control |= (pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT)
+			| (tx_seq << L2CAP_CTRL_TXSEQ_SHIFT);
+	put_unaligned_le16(control, tx_skb->data + L2CAP_HDR_SIZE);
+
+	if (pi->fcs == L2CAP_FCS_CRC16) {
+		fcs = crc16(0, (u8 *)tx_skb->data, tx_skb->len - 2);
+		put_unaligned_le16(fcs, tx_skb->data + tx_skb->len - 2);
+	}
+
+	l2cap_do_send(sk, tx_skb);
 }
 
 static int l2cap_ertm_send(struct sock *sk)
-- 
cgit v1.2.3-59-g8ed1b


From 18778a63ddc83bc89bda3b119fb02eb121512a66 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:44 -0300
Subject: Bluetooth: Implement missing parts of the Invalid Frame Detection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is a plenty of situation where ERTM shall close the channel, this
commit treats the cases regarding Invalid Frame Detection.
It create one reassembly SDU function for ERTM and other for Streaming
Mode to make the Invalid Frame Detection handling less complex.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 112 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 1c35c328181d..cfd672419315 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3329,12 +3329,111 @@ static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_
 	__skb_queue_tail(SREJ_QUEUE(sk), skb);
 }
 
-static int l2cap_sar_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
+static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	struct sk_buff *_skb;
+	int err = 0;
+
+	switch (control & L2CAP_CTRL_SAR) {
+	case L2CAP_SDU_UNSEGMENTED:
+		if (pi->conn_state & L2CAP_CONN_SAR_SDU)
+			goto drop;
+
+		err = sock_queue_rcv_skb(sk, skb);
+		if (!err)
+			return err;
+
+		break;
+
+	case L2CAP_SDU_START:
+		if (pi->conn_state & L2CAP_CONN_SAR_SDU)
+			goto drop;
+
+		pi->sdu_len = get_unaligned_le16(skb->data);
+		skb_pull(skb, 2);
+
+		if (pi->sdu_len > pi->imtu)
+			goto disconnect;
+
+		pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
+		if (!pi->sdu) {
+			err = -ENOMEM;
+			break;
+		}
+
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
+		pi->conn_state |= L2CAP_CONN_SAR_SDU;
+		pi->partial_sdu_len = skb->len;
+		break;
+
+	case L2CAP_SDU_CONTINUE:
+		if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
+			goto disconnect;
+
+		if (!pi->sdu)
+			goto disconnect;
+
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
+		pi->partial_sdu_len += skb->len;
+		if (pi->partial_sdu_len > pi->sdu_len)
+			goto drop;
+
+		break;
+
+	case L2CAP_SDU_END:
+		if (!(pi->conn_state & L2CAP_CONN_SAR_SDU))
+			goto disconnect;
+
+		if (!pi->sdu)
+			goto disconnect;
+
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
+		pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
+		pi->partial_sdu_len += skb->len;
+
+		if (pi->partial_sdu_len > pi->imtu)
+			goto drop;
+
+		if (pi->partial_sdu_len != pi->sdu_len)
+			goto drop;
+
+		_skb = skb_clone(pi->sdu, GFP_ATOMIC);
+		err = sock_queue_rcv_skb(sk, _skb);
+		if (err < 0)
+			kfree_skb(_skb);
+
+		kfree_skb(pi->sdu);
+		break;
+	}
+
+	kfree_skb(skb);
+	return err;
+
+drop:
+	kfree_skb(pi->sdu);
+	pi->sdu = NULL;
+
+disconnect:
+	l2cap_send_disconn_req(pi->conn, sk);
+	kfree_skb(skb);
+	return 0;
+}
+
+static int l2cap_streaming_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *_skb;
 	int err = -EINVAL;
 
+	/*
+	 * TODO: We have to notify the userland if some data is lost with the
+	 * Streaming Mode.
+	 */
+
 	switch (control & L2CAP_CTRL_SAR) {
 	case L2CAP_SDU_UNSEGMENTED:
 		if (pi->conn_state & L2CAP_CONN_SAR_SDU) {
@@ -3429,7 +3528,7 @@ static void l2cap_check_srej_gap(struct sock *sk, u8 tx_seq)
 
 		skb = skb_dequeue(SREJ_QUEUE(sk));
 		control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
-		l2cap_sar_reassembly_sdu(sk, skb, control);
+		l2cap_ertm_reassembly_sdu(sk, skb, control);
 		l2cap_pi(sk)->buffer_seq_srej =
 			(l2cap_pi(sk)->buffer_seq_srej + 1) % 64;
 		tx_seq++;
@@ -3566,7 +3665,7 @@ expected:
 
 	pi->buffer_seq = (pi->buffer_seq + 1) % 64;
 
-	err = l2cap_sar_reassembly_sdu(sk, skb, rx_control);
+	err = l2cap_ertm_reassembly_sdu(sk, skb, rx_control);
 	if (err < 0)
 		return err;
 
@@ -3790,8 +3889,10 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		 * Receiver will miss it and start proper recovery
 		 * procedures and ask retransmission.
 		 */
-		if (len > pi->mps)
+		if (len > pi->mps) {
+			l2cap_send_disconn_req(pi->conn, sk);
 			goto drop;
+		}
 
 		if (l2cap_check_fcs(pi, skb))
 			goto drop;
@@ -3813,13 +3914,17 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		}
 
 		if (__is_iframe(control)) {
-			if (len < 4)
+			if (len < 4) {
+				l2cap_send_disconn_req(pi->conn, sk);
 				goto drop;
+			}
 
 			l2cap_data_channel_iframe(sk, control, skb);
 		} else {
-			if (len != 0)
+			if (len != 0) {
+				l2cap_send_disconn_req(pi->conn, sk);
 				goto drop;
+			}
 
 			l2cap_data_channel_sframe(sk, control, skb);
 		}
@@ -3850,7 +3955,7 @@ static inline int l2cap_data_channel(struct l2cap_conn *conn, u16 cid, struct sk
 		else
 			pi->expected_tx_seq = (tx_seq + 1) % 64;
 
-		l2cap_sar_reassembly_sdu(sk, skb, control);
+		l2cap_streaming_reassembly_sdu(sk, skb, control);
 
 		goto done;
 
-- 
cgit v1.2.3-59-g8ed1b


From 9b53350d3cf5b330c3261d89b5e62a2dc25c5653 Mon Sep 17 00:00:00 2001
From: João Paulo Rechi Vita <jprvita@profusion.mobi>
Date: Sat, 1 May 2010 16:15:44 -0300
Subject: Bluetooth: Completes the I-frame tx_seq check logic on RECV
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add checks for invalid tx_seq and fixes the duplicated tx_seq check.

Signed-off-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Acked-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 40 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index cfd672419315..481cec22ef96 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3302,7 +3302,7 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	}
 }
 
-static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
+static int l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_seq, u8 sar)
 {
 	struct sk_buff *next_skb;
 
@@ -3312,13 +3312,16 @@ static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_
 	next_skb = skb_peek(SREJ_QUEUE(sk));
 	if (!next_skb) {
 		__skb_queue_tail(SREJ_QUEUE(sk), skb);
-		return;
+		return 0;
 	}
 
 	do {
+		if (bt_cb(next_skb)->tx_seq == tx_seq)
+			return -EINVAL;
+
 		if (bt_cb(next_skb)->tx_seq > tx_seq) {
 			__skb_queue_before(SREJ_QUEUE(sk), next_skb, skb);
-			return;
+			return 0;
 		}
 
 		if (skb_queue_is_last(SREJ_QUEUE(sk), next_skb))
@@ -3327,6 +3330,8 @@ static void l2cap_add_to_srej_queue(struct sock *sk, struct sk_buff *skb, u8 tx_
 	} while ((next_skb = skb_queue_next(SREJ_QUEUE(sk), next_skb)));
 
 	__skb_queue_tail(SREJ_QUEUE(sk), skb);
+
+	return 0;
 }
 
 static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
@@ -3579,6 +3584,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	u8 tx_seq = __get_txseq(rx_control);
 	u8 req_seq = __get_reqseq(rx_control);
 	u8 sar = rx_control >> L2CAP_CTRL_SAR_SHIFT;
+	u8 tx_seq_offset, expected_tx_seq_offset;
 	int num_to_ack = (pi->tx_win/6) + 1;
 	int err = 0;
 
@@ -3598,6 +3604,16 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 	if (tx_seq == pi->expected_tx_seq)
 		goto expected;
 
+	tx_seq_offset = (tx_seq - pi->buffer_seq) % 64;
+	if (tx_seq_offset < 0)
+		tx_seq_offset += 64;
+
+	/* invalid tx_seq */
+	if (tx_seq_offset >= pi->tx_win) {
+		l2cap_send_disconn_req(pi->conn, sk);
+		goto drop;
+	}
+
 	if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 		struct srej_list *first;
 
@@ -3617,7 +3633,10 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 			}
 		} else {
 			struct srej_list *l;
-			l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
+
+			/* duplicated tx_seq */
+			if (l2cap_add_to_srej_queue(sk, skb, tx_seq, sar) < 0)
+				goto drop;
 
 			list_for_each_entry(l, SREJ_LIST(sk), list) {
 				if (l->tx_seq == tx_seq) {
@@ -3628,6 +3647,15 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 			l2cap_send_srejframe(sk, tx_seq);
 		}
 	} else {
+		expected_tx_seq_offset =
+			(pi->expected_tx_seq - pi->buffer_seq) % 64;
+		if (expected_tx_seq_offset < 0)
+			expected_tx_seq_offset += 64;
+
+		/* duplicated tx_seq */
+		if (tx_seq_offset < expected_tx_seq_offset)
+			goto drop;
+
 		pi->conn_state |= L2CAP_CONN_SREJ_SENT;
 
 		INIT_LIST_HEAD(SREJ_LIST(sk));
@@ -3676,6 +3704,10 @@ expected:
 		l2cap_send_ack(pi);
 
 	return 0;
+
+drop:
+	kfree_skb(skb);
+	return 0;
 }
 
 static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
-- 
cgit v1.2.3-59-g8ed1b


From 1890d36bb556a27684ad29654a9898ab9a5f57ee Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:44 -0300
Subject: Bluetooth: Implement Local Busy Condition handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Supports Local Busy condition handling through a waitqueue that wake ups
each 200ms and try to push the packets to the upper layer. If it can
push all the queue then it leaves the Local Busy state.

The patch modifies the behaviour of l2cap_ertm_reassembly_sdu() to
support retry of the push operation.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |   6 ++
 net/bluetooth/l2cap.c         | 187 +++++++++++++++++++++++++++++++++++++-----
 2 files changed, 172 insertions(+), 21 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 8942c2ffc58b..d0185cc04c14 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -35,6 +35,7 @@
 #define L2CAP_DEFAULT_MONITOR_TO	12000   /* 12 seconds */
 #define L2CAP_DEFAULT_MAX_PDU_SIZE	672
 #define L2CAP_DEFAULT_ACK_TO		200
+#define L2CAP_LOCAL_BUSY_TRIES		12
 
 #define L2CAP_CONN_TIMEOUT	(40000) /* 40 seconds */
 #define L2CAP_INFO_TIMEOUT	(4000)  /*  4 seconds */
@@ -294,6 +295,7 @@ struct l2cap_conn {
 #define l2cap_pi(sk) ((struct l2cap_pinfo *) sk)
 #define TX_QUEUE(sk) (&l2cap_pi(sk)->tx_queue)
 #define SREJ_QUEUE(sk) (&l2cap_pi(sk)->srej_queue)
+#define BUSY_QUEUE(sk) (&l2cap_pi(sk)->busy_queue)
 #define SREJ_LIST(sk) (&l2cap_pi(sk)->srej_l.list)
 
 struct srej_list {
@@ -356,6 +358,8 @@ struct l2cap_pinfo {
 	struct timer_list	ack_timer;
 	struct sk_buff_head	tx_queue;
 	struct sk_buff_head	srej_queue;
+	struct sk_buff_head	busy_queue;
+	struct work_struct	busy_work;
 	struct srej_list	srej_l;
 	struct l2cap_conn	*conn;
 	struct sock		*next_c;
@@ -383,6 +387,8 @@ struct l2cap_pinfo {
 #define L2CAP_CONN_LOCAL_BUSY      0x0040
 #define L2CAP_CONN_REJ_ACT         0x0080
 #define L2CAP_CONN_SEND_FBIT       0x0100
+#define L2CAP_CONN_RNR_SENT        0x0200
+#define L2CAP_CONN_SAR_RETRY       0x0400
 
 #define __mod_retrans_timer() mod_timer(&l2cap_pi(sk)->retrans_timer, \
 		jiffies +  msecs_to_jiffies(L2CAP_DEFAULT_RETRANS_TO));
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 481cec22ef96..103e4b54a86a 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -68,10 +68,14 @@ static u8 l2cap_fixed_chan[8] = { 0x02, };
 
 static const struct proto_ops l2cap_sock_ops;
 
+static struct workqueue_struct *_busy_wq;
+
 static struct bt_sock_list l2cap_sk_list = {
 	.lock = __RW_LOCK_UNLOCKED(l2cap_sk_list.lock)
 };
 
+static void l2cap_busy_work(struct work_struct *work);
+
 static void __l2cap_sock_close(struct sock *sk, int reason);
 static void l2cap_sock_close(struct sock *sk);
 static void l2cap_sock_kill(struct sock *sk);
@@ -386,9 +390,10 @@ static inline void l2cap_send_sframe(struct l2cap_pinfo *pi, u16 control)
 
 static inline void l2cap_send_rr_or_rnr(struct l2cap_pinfo *pi, u16 control)
 {
-	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY)
+	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
 		control |= L2CAP_SUPER_RCV_NOT_READY;
-	else
+		pi->conn_state |= L2CAP_CONN_RNR_SENT;
+	} else
 		control |= L2CAP_SUPER_RCV_READY;
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
@@ -816,6 +821,7 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent)
 	pi->flush_to = L2CAP_DEFAULT_FLUSH_TO;
 	skb_queue_head_init(TX_QUEUE(sk));
 	skb_queue_head_init(SREJ_QUEUE(sk));
+	skb_queue_head_init(BUSY_QUEUE(sk));
 	INIT_LIST_HEAD(SREJ_LIST(sk));
 }
 
@@ -1439,6 +1445,7 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
 		control |= L2CAP_SUPER_RCV_NOT_READY;
+		pi->conn_state |= L2CAP_CONN_RNR_SENT;
 		l2cap_send_sframe(pi, control);
 		return;
 	} else if (l2cap_ertm_send(sk) == 0) {
@@ -2279,6 +2286,9 @@ static inline void l2cap_ertm_init(struct sock *sk)
 			l2cap_ack_timeout, (unsigned long) sk);
 
 	__skb_queue_head_init(SREJ_QUEUE(sk));
+	__skb_queue_head_init(BUSY_QUEUE(sk));
+
+	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
 
 static int l2cap_mode_supported(__u8 mode, __u32 feat_mask)
@@ -3046,6 +3056,7 @@ static inline int l2cap_disconnect_req(struct l2cap_conn *conn, struct l2cap_cmd
 
 	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
 		skb_queue_purge(SREJ_QUEUE(sk));
+		skb_queue_purge(BUSY_QUEUE(sk));
 		del_timer(&l2cap_pi(sk)->retrans_timer);
 		del_timer(&l2cap_pi(sk)->monitor_timer);
 		del_timer(&l2cap_pi(sk)->ack_timer);
@@ -3077,6 +3088,7 @@ static inline int l2cap_disconnect_rsp(struct l2cap_conn *conn, struct l2cap_cmd
 
 	if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM) {
 		skb_queue_purge(SREJ_QUEUE(sk));
+		skb_queue_purge(BUSY_QUEUE(sk));
 		del_timer(&l2cap_pi(sk)->retrans_timer);
 		del_timer(&l2cap_pi(sk)->monitor_timer);
 		del_timer(&l2cap_pi(sk)->ack_timer);
@@ -3287,6 +3299,7 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
 		control |= L2CAP_SUPER_RCV_NOT_READY | L2CAP_CTRL_FINAL;
 		l2cap_send_sframe(pi, control);
+		pi->conn_state |= L2CAP_CONN_RNR_SENT;
 		pi->conn_state &= ~L2CAP_CONN_SEND_FBIT;
 	}
 
@@ -3338,7 +3351,7 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *_skb;
-	int err = 0;
+	int err;
 
 	switch (control & L2CAP_CTRL_SAR) {
 	case L2CAP_SDU_UNSEGMENTED:
@@ -3356,16 +3369,18 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 			goto drop;
 
 		pi->sdu_len = get_unaligned_le16(skb->data);
-		skb_pull(skb, 2);
 
 		if (pi->sdu_len > pi->imtu)
 			goto disconnect;
 
 		pi->sdu = bt_skb_alloc(pi->sdu_len, GFP_ATOMIC);
-		if (!pi->sdu) {
-			err = -ENOMEM;
-			break;
-		}
+		if (!pi->sdu)
+			return -ENOMEM;
+
+		/* pull sdu_len bytes only after alloc, because of Local Busy
+		 * condition we have to be sure that this will be executed
+		 * only once, i.e., when alloc does not fail */
+		skb_pull(skb, 2);
 
 		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
 
@@ -3395,28 +3410,40 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 		if (!pi->sdu)
 			goto disconnect;
 
-		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+		if (!(pi->conn_state & L2CAP_CONN_SAR_RETRY)) {
+			memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
 
-		pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
-		pi->partial_sdu_len += skb->len;
+			pi->partial_sdu_len += skb->len;
 
-		if (pi->partial_sdu_len > pi->imtu)
-			goto drop;
+			if (pi->partial_sdu_len > pi->imtu)
+				goto drop;
 
-		if (pi->partial_sdu_len != pi->sdu_len)
-			goto drop;
+			if (pi->partial_sdu_len != pi->sdu_len)
+				goto drop;
+		}
 
 		_skb = skb_clone(pi->sdu, GFP_ATOMIC);
+		if (!_skb) {
+			pi->conn_state |= L2CAP_CONN_SAR_RETRY;
+			return -ENOMEM;
+		}
+
 		err = sock_queue_rcv_skb(sk, _skb);
-		if (err < 0)
+		if (err < 0) {
 			kfree_skb(_skb);
+			pi->conn_state |= L2CAP_CONN_SAR_RETRY;
+			return err;
+		}
+
+		pi->conn_state &= ~L2CAP_CONN_SAR_RETRY;
+		pi->conn_state &= ~L2CAP_CONN_SAR_SDU;
 
 		kfree_skb(pi->sdu);
 		break;
 	}
 
 	kfree_skb(skb);
-	return err;
+	return 0;
 
 drop:
 	kfree_skb(pi->sdu);
@@ -3428,6 +3455,115 @@ disconnect:
 	return 0;
 }
 
+static void l2cap_busy_work(struct work_struct *work)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct l2cap_pinfo *pi =
+		container_of(work, struct l2cap_pinfo, busy_work);
+	struct sock *sk = (struct sock *)pi;
+	int n_tries = 0, timeo = HZ/5, err;
+	struct sk_buff *skb;
+	u16 control;
+
+	lock_sock(sk);
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	while ((skb = skb_peek(BUSY_QUEUE(sk)))) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (n_tries++ > L2CAP_LOCAL_BUSY_TRIES) {
+			err = -EBUSY;
+			l2cap_send_disconn_req(pi->conn, sk);
+			goto done;
+		}
+
+		if (!timeo)
+			timeo = HZ/5;
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			goto done;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+
+		err = sock_error(sk);
+		if (err)
+			goto done;
+
+		while ((skb = skb_dequeue(BUSY_QUEUE(sk)))) {
+			control = bt_cb(skb)->sar << L2CAP_CTRL_SAR_SHIFT;
+			err = l2cap_ertm_reassembly_sdu(sk, skb, control);
+			if (err < 0) {
+				skb_queue_head(BUSY_QUEUE(sk), skb);
+				break;
+			}
+
+			pi->buffer_seq = (pi->buffer_seq + 1) % 64;
+		}
+
+		if (!skb)
+			break;
+	}
+
+	if (!(pi->conn_state & L2CAP_CONN_RNR_SENT))
+		goto done;
+
+	control = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+	control |= L2CAP_SUPER_RCV_READY | L2CAP_CTRL_POLL;
+	l2cap_send_sframe(pi, control);
+	l2cap_pi(sk)->retry_count = 1;
+
+	del_timer(&pi->retrans_timer);
+	__mod_monitor_timer();
+
+	l2cap_pi(sk)->conn_state |= L2CAP_CONN_WAIT_F;
+
+done:
+	pi->conn_state &= ~L2CAP_CONN_LOCAL_BUSY;
+	pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
+
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+
+	release_sock(sk);
+}
+
+static int l2cap_push_rx_skb(struct sock *sk, struct sk_buff *skb, u16 control)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int sctrl, err;
+
+	if (pi->conn_state & L2CAP_CONN_LOCAL_BUSY) {
+		bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
+		__skb_queue_tail(BUSY_QUEUE(sk), skb);
+		return -EBUSY;
+	}
+
+	err = l2cap_ertm_reassembly_sdu(sk, skb, control);
+	if (err >= 0) {
+		pi->buffer_seq = (pi->buffer_seq + 1) % 64;
+		return err;
+	}
+
+	/* Busy Condition */
+	pi->conn_state |= L2CAP_CONN_LOCAL_BUSY;
+	bt_cb(skb)->sar = control >> L2CAP_CTRL_SAR_SHIFT;
+	__skb_queue_tail(BUSY_QUEUE(sk), skb);
+
+	sctrl = pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
+	sctrl |= L2CAP_SUPER_RCV_NOT_READY;
+	l2cap_send_sframe(pi, sctrl);
+
+	pi->conn_state |= L2CAP_CONN_RNR_SENT;
+
+	queue_work(_busy_wq, &pi->busy_work);
+
+	return err;
+}
+
 static int l2cap_streaming_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 control)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
@@ -3614,6 +3750,9 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 		goto drop;
 	}
 
+	if (pi->conn_state == L2CAP_CONN_LOCAL_BUSY)
+		goto drop;
+
 	if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 		struct srej_list *first;
 
@@ -3662,6 +3801,7 @@ static inline int l2cap_data_channel_iframe(struct sock *sk, u16 rx_control, str
 		pi->buffer_seq_srej = pi->buffer_seq;
 
 		__skb_queue_head_init(SREJ_QUEUE(sk));
+		__skb_queue_head_init(BUSY_QUEUE(sk));
 		l2cap_add_to_srej_queue(sk, skb, tx_seq, sar);
 
 		pi->conn_state |= L2CAP_CONN_SEND_PBIT;
@@ -3691,11 +3831,9 @@ expected:
 		}
 	}
 
-	pi->buffer_seq = (pi->buffer_seq + 1) % 64;
-
-	err = l2cap_ertm_reassembly_sdu(sk, skb, rx_control);
+	err = l2cap_push_rx_skb(sk, skb, rx_control);
 	if (err < 0)
-		return err;
+		return 0;
 
 	__mod_ack_timer();
 
@@ -4406,6 +4544,10 @@ static int __init l2cap_init(void)
 	if (err < 0)
 		return err;
 
+	_busy_wq = create_singlethread_workqueue("l2cap");
+	if (!_busy_wq)
+		goto error;
+
 	err = bt_sock_register(BTPROTO_L2CAP, &l2cap_sock_family_ops);
 	if (err < 0) {
 		BT_ERR("L2CAP socket registration failed");
@@ -4440,6 +4582,9 @@ static void __exit l2cap_exit(void)
 {
 	debugfs_remove(l2cap_debugfs);
 
+	flush_workqueue(_busy_wq);
+	destroy_workqueue(_busy_wq);
+
 	if (bt_sock_unregister(BTPROTO_L2CAP) < 0)
 		BT_ERR("L2CAP socket unregistration failed");
 
-- 
cgit v1.2.3-59-g8ed1b


From 6161c0382bbab883a634d284f7367a88bbe88534 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:44 -0300
Subject: Bluetooth: Add wait_queue to wait ack of all sent packets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To guarantee that all packets we sent were received we need to wait for
theirs ack before shutdown the socket.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 103e4b54a86a..9d514f9dbc0f 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1242,6 +1242,37 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l
 	return 0;
 }
 
+static int __l2cap_wait_ack(struct sock *sk)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	int err = 0;
+	int timeo = HZ/5;
+
+	add_wait_queue(sk->sk_sleep, &wait);
+	while ((l2cap_pi(sk)->unacked_frames > 0 && l2cap_pi(sk)->conn)) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (!timeo)
+			timeo = HZ/5;
+
+		if (signal_pending(current)) {
+			err = sock_intr_errno(timeo);
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+
+		err = sock_error(sk);
+		if (err)
+			break;
+	}
+	set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk->sk_sleep, &wait);
+	return err;
+}
+
 static void l2cap_monitor_timeout(unsigned long arg)
 {
 	struct sock *sk = (void *) arg;
@@ -2059,6 +2090,9 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
 
 	lock_sock(sk);
 	if (!sk->sk_shutdown) {
+		if (l2cap_pi(sk)->mode == L2CAP_MODE_ERTM)
+			err = __l2cap_wait_ack(sk);
+
 		sk->sk_shutdown = SHUTDOWN_MASK;
 		l2cap_sock_clear_timer(sk);
 		__l2cap_sock_close(sk, 0);
-- 
cgit v1.2.3-59-g8ed1b


From dfc909befbfe967bd7f46ef33b6969c1b7f3cf42 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:45 -0300
Subject: Bluetooth: Fix race condition on l2cap_ertm_send()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

l2cap_ertm_send() can be called both from user context and bottom half
context. The socket locks for that contexts are different, the user
context uses a mutex(which can sleep) and the second one uses a
spinlock_bh. That creates a race condition when we have interruptions on
both contexts at the same time.

The better way to solve this is to add a new spinlock to lock
l2cap_ertm_send() and the vars it access. The other solution was to defer
l2cap_ertm_send() with a workqueue, but we the sending process already
has one defer on the hci layer. It's not a good idea add another one.

The patch refactor the code to create l2cap_retransmit_frames(), then we
encapulate the lock of l2cap_ertm_send() for some call. It also changes
l2cap_retransmit_frame() to l2cap_retransmit_one_frame() to avoid
confusion

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Reviewed-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h |  1 +
 net/bluetooth/l2cap.c         | 99 ++++++++++++++++++++++++++++---------------
 2 files changed, 67 insertions(+), 33 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index d0185cc04c14..7c695bfd853c 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -353,6 +353,7 @@ struct l2cap_pinfo {
 
 	__le16		sport;
 
+	spinlock_t		send_lock;
 	struct timer_list	retrans_timer;
 	struct timer_list	monitor_timer;
 	struct timer_list	ack_timer;
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 9d514f9dbc0f..fe663e9c6684 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1368,7 +1368,7 @@ static int l2cap_streaming_send(struct sock *sk)
 	return 0;
 }
 
-static void l2cap_retransmit_frame(struct sock *sk, u8 tx_seq)
+static void l2cap_retransmit_one_frame(struct sock *sk, u8 tx_seq)
 {
 	struct l2cap_pinfo *pi = l2cap_pi(sk);
 	struct sk_buff *skb, *tx_skb;
@@ -1467,10 +1467,29 @@ static int l2cap_ertm_send(struct sock *sk)
 	return nsent;
 }
 
+static int l2cap_retransmit_frames(struct sock *sk)
+{
+	struct l2cap_pinfo *pi = l2cap_pi(sk);
+	int ret;
+
+	spin_lock_bh(&pi->send_lock);
+
+	if (!skb_queue_empty(TX_QUEUE(sk)))
+		sk->sk_send_head = TX_QUEUE(sk)->next;
+
+	pi->next_tx_seq = pi->expected_ack_seq;
+	ret = l2cap_ertm_send(sk);
+
+	spin_unlock_bh(&pi->send_lock);
+
+	return ret;
+}
+
 static void l2cap_send_ack(struct l2cap_pinfo *pi)
 {
 	struct sock *sk = (struct sock *)pi;
 	u16 control = 0;
+	int nframes;
 
 	control |= pi->buffer_seq << L2CAP_CTRL_REQSEQ_SHIFT;
 
@@ -1479,10 +1498,17 @@ static void l2cap_send_ack(struct l2cap_pinfo *pi)
 		pi->conn_state |= L2CAP_CONN_RNR_SENT;
 		l2cap_send_sframe(pi, control);
 		return;
-	} else if (l2cap_ertm_send(sk) == 0) {
-		control |= L2CAP_SUPER_RCV_READY;
-		l2cap_send_sframe(pi, control);
 	}
+
+	spin_lock_bh(&pi->send_lock);
+	nframes = l2cap_ertm_send(sk);
+	spin_unlock_bh(&pi->send_lock);
+
+	if (nframes > 0)
+		return;
+
+	control |= L2CAP_SUPER_RCV_READY;
+	l2cap_send_sframe(pi, control);
 }
 
 static void l2cap_send_srejtail(struct sock *sk)
@@ -1673,8 +1699,10 @@ static inline int l2cap_sar_segment_sdu(struct sock *sk, struct msghdr *msg, siz
 		size += buflen;
 	}
 	skb_queue_splice_tail(&sar_queue, TX_QUEUE(sk));
+	spin_lock_bh(&pi->send_lock);
 	if (sk->sk_send_head == NULL)
 		sk->sk_send_head = sar_queue.next;
+	spin_unlock_bh(&pi->send_lock);
 
 	return size;
 }
@@ -1745,8 +1773,15 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 				goto done;
 			}
 			__skb_queue_tail(TX_QUEUE(sk), skb);
+
+			if (pi->mode == L2CAP_MODE_ERTM)
+				spin_lock_bh(&pi->send_lock);
+
 			if (sk->sk_send_head == NULL)
 				sk->sk_send_head = skb;
+
+			if (pi->mode == L2CAP_MODE_ERTM)
+				spin_unlock_bh(&pi->send_lock);
 		} else {
 		/* Segment SDU into multiples PDUs */
 			err = l2cap_sar_segment_sdu(sk, msg, len);
@@ -1754,10 +1789,13 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms
 				goto done;
 		}
 
-		if (pi->mode == L2CAP_MODE_STREAMING)
+		if (pi->mode == L2CAP_MODE_STREAMING) {
 			err = l2cap_streaming_send(sk);
-		else
+		} else {
+			spin_lock_bh(&pi->send_lock);
 			err = l2cap_ertm_send(sk);
+			spin_unlock_bh(&pi->send_lock);
+		}
 
 		if (err >= 0)
 			err = len;
@@ -2321,6 +2359,7 @@ static inline void l2cap_ertm_init(struct sock *sk)
 
 	__skb_queue_head_init(SREJ_QUEUE(sk));
 	__skb_queue_head_init(BUSY_QUEUE(sk));
+	spin_lock_init(&l2cap_pi(sk)->send_lock);
 
 	INIT_WORK(&l2cap_pi(sk)->busy_work, l2cap_busy_work);
 }
@@ -3340,7 +3379,9 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0)
 		__mod_retrans_timer();
 
+	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
+	spin_unlock_bh(&pi->send_lock);
 
 	if (!(pi->conn_state & L2CAP_CONN_LOCAL_BUSY) &&
 			pi->frames_sent == 0) {
@@ -3857,12 +3898,8 @@ expected:
 	if (rx_control & L2CAP_CTRL_FINAL) {
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
-		else {
-			if (!skb_queue_empty(TX_QUEUE(sk)))
-				sk->sk_send_head = TX_QUEUE(sk)->next;
-			pi->next_tx_seq = pi->expected_ack_seq;
-			l2cap_ertm_send(sk);
-		}
+		else
+			l2cap_retransmit_frames(sk);
 	}
 
 	err = l2cap_push_rx_skb(sk, skb, rx_control);
@@ -3907,12 +3944,8 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
-		else {
-			if (!skb_queue_empty(TX_QUEUE(sk)))
-				sk->sk_send_head = TX_QUEUE(sk)->next;
-			pi->next_tx_seq = pi->expected_ack_seq;
-			l2cap_ertm_send(sk);
-		}
+		else
+			l2cap_retransmit_frames(sk);
 
 	} else {
 		if ((pi->conn_state & L2CAP_CONN_REMOTE_BUSY) &&
@@ -3920,10 +3953,13 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 			__mod_retrans_timer();
 
 		pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
-		if (pi->conn_state & L2CAP_CONN_SREJ_SENT)
+		if (pi->conn_state & L2CAP_CONN_SREJ_SENT) {
 			l2cap_send_ack(pi);
-		else
+		} else {
+			spin_lock_bh(&pi->send_lock);
 			l2cap_ertm_send(sk);
+			spin_unlock_bh(&pi->send_lock);
+		}
 	}
 }
 
@@ -3940,17 +3976,10 @@ static inline void l2cap_data_channel_rejframe(struct sock *sk, u16 rx_control)
 	if (rx_control & L2CAP_CTRL_FINAL) {
 		if (pi->conn_state & L2CAP_CONN_REJ_ACT)
 			pi->conn_state &= ~L2CAP_CONN_REJ_ACT;
-		else {
-			if (!skb_queue_empty(TX_QUEUE(sk)))
-				sk->sk_send_head = TX_QUEUE(sk)->next;
-			pi->next_tx_seq = pi->expected_ack_seq;
-			l2cap_ertm_send(sk);
-		}
+		else
+			l2cap_retransmit_frames(sk);
 	} else {
-		if (!skb_queue_empty(TX_QUEUE(sk)))
-			sk->sk_send_head = TX_QUEUE(sk)->next;
-		pi->next_tx_seq = pi->expected_ack_seq;
-		l2cap_ertm_send(sk);
+		l2cap_retransmit_frames(sk);
 
 		if (pi->conn_state & L2CAP_CONN_WAIT_F)
 			pi->conn_state |= L2CAP_CONN_REJ_ACT;
@@ -3966,8 +3995,12 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 	if (rx_control & L2CAP_CTRL_POLL) {
 		pi->expected_ack_seq = tx_seq;
 		l2cap_drop_acked_frames(sk);
-		l2cap_retransmit_frame(sk, tx_seq);
+		l2cap_retransmit_one_frame(sk, tx_seq);
+
+		spin_lock_bh(&pi->send_lock);
 		l2cap_ertm_send(sk);
+		spin_unlock_bh(&pi->send_lock);
+
 		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
 			pi->srej_save_reqseq = tx_seq;
 			pi->conn_state |= L2CAP_CONN_SREJ_ACT;
@@ -3977,9 +4010,9 @@ static inline void l2cap_data_channel_srejframe(struct sock *sk, u16 rx_control)
 				pi->srej_save_reqseq == tx_seq)
 			pi->conn_state &= ~L2CAP_CONN_SREJ_ACT;
 		else
-			l2cap_retransmit_frame(sk, tx_seq);
+			l2cap_retransmit_one_frame(sk, tx_seq);
 	} else {
-		l2cap_retransmit_frame(sk, tx_seq);
+		l2cap_retransmit_one_frame(sk, tx_seq);
 		if (pi->conn_state & L2CAP_CONN_WAIT_F) {
 			pi->srej_save_reqseq = tx_seq;
 			pi->conn_state |= L2CAP_CONN_SREJ_ACT;
-- 
cgit v1.2.3-59-g8ed1b


From 4178ba462a3e8ab5094e69606f01d9e95f2d5ea6 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Sat, 1 May 2010 16:15:45 -0300
Subject: Bluetooth: Prevents buffer overflow on l2cap_ertm_reassembly_sdu()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The checks should be done before the the memcpy to avoid buffer
overflow.

Reported-by: João Paulo Rechi Vita <jprvita@profusion.mobi>
Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index fe663e9c6684..9ef01c32b3a2 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3470,12 +3470,12 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 		if (!pi->sdu)
 			goto disconnect;
 
-		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
-
 		pi->partial_sdu_len += skb->len;
 		if (pi->partial_sdu_len > pi->sdu_len)
 			goto drop;
 
+		memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
+
 		break;
 
 	case L2CAP_SDU_END:
@@ -3486,8 +3486,6 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 			goto disconnect;
 
 		if (!(pi->conn_state & L2CAP_CONN_SAR_RETRY)) {
-			memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
-
 			pi->partial_sdu_len += skb->len;
 
 			if (pi->partial_sdu_len > pi->imtu)
@@ -3495,6 +3493,8 @@ static int l2cap_ertm_reassembly_sdu(struct sock *sk, struct sk_buff *skb, u16 c
 
 			if (pi->partial_sdu_len != pi->sdu_len)
 				goto drop;
+
+			memcpy(skb_put(pi->sdu, skb->len), skb->data, skb->len);
 		}
 
 		_skb = skb_clone(pi->sdu, GFP_ATOMIC);
-- 
cgit v1.2.3-59-g8ed1b


From 844c0972427ee5f661158160aaca10b22b3dda60 Mon Sep 17 00:00:00 2001
From: "Gustavo F. Padovan" <padovan@profusion.mobi>
Date: Tue, 4 May 2010 23:16:01 -0300
Subject: Bluetooth: Fix spec error in the RemoteBusy Logic

On the receipt of an RR(P=1) under RemoteBusy set to TRUE(on the RECV
state table) we have to call sendIorRRorRNR(F=1) and just after set
RemoteBusy to False. This leads to a freeze in the sending process since
it's not allowed send data with RemoteBusy set to true and no one
call SendPending-I-Frames after set RemoteBusy to false(The last action
for that event).

Actually sendIorRRorRNR() calls SendPending-I-Frames but at that moment
RemoteBusy is still True and we cannot send any frame, after, no one
calls SendPending-I-Frames again and the sending process stops.

The solution here is to set RemoteBusy to false inside
SendPending-I-Frames just before call SendPending-I-Frames. That will
make SendPending-I-Frames able to send frames. This solution is similar
to what RR(P=0)(F=0) on the RECV table and RR(P=1) on the SREJ_SENT
table do.

Actually doesn't make any sense call SendPending-I-Frames if we can send
any frame, i. e., RemoteBusy is True.

Signed-off-by: Gustavo F. Padovan <padovan@profusion.mobi>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 9ef01c32b3a2..ba49f9a3579e 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -3379,6 +3379,8 @@ static inline void l2cap_send_i_or_rr_or_rnr(struct sock *sk)
 	if (pi->conn_state & L2CAP_CONN_REMOTE_BUSY && pi->unacked_frames > 0)
 		__mod_retrans_timer();
 
+	pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
+
 	spin_lock_bh(&pi->send_lock);
 	l2cap_ertm_send(sk);
 	spin_unlock_bh(&pi->send_lock);
@@ -3936,7 +3938,6 @@ static inline void l2cap_data_channel_rrframe(struct sock *sk, u16 rx_control)
 			l2cap_send_srejtail(sk);
 		} else {
 			l2cap_send_i_or_rr_or_rnr(sk);
-			pi->conn_state &= ~L2CAP_CONN_REMOTE_BUSY;
 		}
 
 	} else if (rx_control & L2CAP_CTRL_FINAL) {
-- 
cgit v1.2.3-59-g8ed1b


From f48fd9c8cd746fdb055a97249a209c77dca0f710 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Mar 2010 15:20:04 +0100
Subject: Bluetooth: Create per controller workqueue

Instead of having a global workqueue for all controllers, it makes
more sense to have a workqueue per controller.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_core.c         | 14 ++++++++++++++
 net/bluetooth/hci_sysfs.c        | 16 +++-------------
 3 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'net')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 4511df2a0d7f..e42f6ed5421c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -107,6 +107,8 @@ struct hci_dev {
 	unsigned long	acl_last_tx;
 	unsigned long	sco_last_tx;
 
+	struct workqueue_struct	*workqueue;
+
 	struct tasklet_struct	cmd_task;
 	struct tasklet_struct	rx_task;
 	struct tasklet_struct	tx_task;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 904f1e8a7a3b..5e83f8e0877a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -37,6 +37,7 @@
 #include <linux/fcntl.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
+#include <linux/workqueue.h>
 #include <linux/interrupt.h>
 #include <linux/notifier.h>
 #include <linux/rfkill.h>
@@ -928,6 +929,10 @@ int hci_register_dev(struct hci_dev *hdev)
 
 	write_unlock_bh(&hci_dev_list_lock);
 
+	hdev->workqueue = create_singlethread_workqueue(hdev->name);
+	if (!hdev->workqueue)
+		goto nomem;
+
 	hci_register_sysfs(hdev);
 
 	hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev,
@@ -942,6 +947,13 @@ int hci_register_dev(struct hci_dev *hdev)
 	hci_notify(hdev, HCI_DEV_REG);
 
 	return id;
+
+nomem:
+	write_lock_bh(&hci_dev_list_lock);
+	list_del(&hdev->list);
+	write_unlock_bh(&hci_dev_list_lock);
+
+	return -ENOMEM;
 }
 EXPORT_SYMBOL(hci_register_dev);
 
@@ -970,6 +982,8 @@ int hci_unregister_dev(struct hci_dev *hdev)
 
 	hci_unregister_sysfs(hdev);
 
+	destroy_workqueue(hdev->workqueue);
+
 	__hci_dev_put(hdev);
 
 	return 0;
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index 0e8e1a59856c..a978449ab04d 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -14,8 +14,6 @@ static struct class *bt_class;
 struct dentry *bt_debugfs = NULL;
 EXPORT_SYMBOL_GPL(bt_debugfs);
 
-static struct workqueue_struct *bt_workq;
-
 static inline char *link_typetostr(int type)
 {
 	switch (type) {
@@ -161,14 +159,14 @@ void hci_conn_add_sysfs(struct hci_conn *conn)
 {
 	BT_DBG("conn %p", conn);
 
-	queue_work(bt_workq, &conn->work_add);
+	queue_work(conn->hdev->workqueue, &conn->work_add);
 }
 
 void hci_conn_del_sysfs(struct hci_conn *conn)
 {
 	BT_DBG("conn %p", conn);
 
-	queue_work(bt_workq, &conn->work_del);
+	queue_work(conn->hdev->workqueue, &conn->work_del);
 }
 
 static inline char *host_bustostr(int bus)
@@ -487,17 +485,11 @@ void hci_unregister_sysfs(struct hci_dev *hdev)
 
 int __init bt_sysfs_init(void)
 {
-	bt_workq = create_singlethread_workqueue("bluetooth");
-	if (!bt_workq)
-		return -ENOMEM;
-
 	bt_debugfs = debugfs_create_dir("bluetooth", NULL);
 
 	bt_class = class_create(THIS_MODULE, "bluetooth");
-	if (IS_ERR(bt_class)) {
-		destroy_workqueue(bt_workq);
+	if (IS_ERR(bt_class))
 		return PTR_ERR(bt_class);
-	}
 
 	return 0;
 }
@@ -507,6 +499,4 @@ void bt_sysfs_cleanup(void)
 	class_destroy(bt_class);
 
 	debugfs_remove_recursive(bt_debugfs);
-
-	destroy_workqueue(bt_workq);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7b767cad296d17cfe071734f71a3b7b479534c8c Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Tue, 9 Mar 2010 21:38:03 +0200
Subject: Bluetooth: Use strict_strtoul instead of simple_strtoul

Use strict_strtoul as suggested by checkpatch.pl for more strict input
checking.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_sysfs.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index a978449ab04d..463ffa4fe042 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -281,11 +281,9 @@ static ssize_t show_idle_timeout(struct device *dev, struct device_attribute *at
 static ssize_t store_idle_timeout(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
-	char *ptr;
-	__u32 val;
+	unsigned long val;
 
-	val = simple_strtoul(buf, &ptr, 10);
-	if (ptr == buf)
+	if (strict_strtoul(buf, 0, &val) < 0)
 		return -EINVAL;
 
 	if (val != 0 && (val < 500 || val > 3600000))
@@ -305,11 +303,9 @@ static ssize_t show_sniff_max_interval(struct device *dev, struct device_attribu
 static ssize_t store_sniff_max_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
-	char *ptr;
-	__u16 val;
+	unsigned long val;
 
-	val = simple_strtoul(buf, &ptr, 10);
-	if (ptr == buf)
+	if (strict_strtoul(buf, 0, &val) < 0)
 		return -EINVAL;
 
 	if (val < 0x0002 || val > 0xFFFE || val % 2)
@@ -332,11 +328,9 @@ static ssize_t show_sniff_min_interval(struct device *dev, struct device_attribu
 static ssize_t store_sniff_min_interval(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct hci_dev *hdev = dev_get_drvdata(dev);
-	char *ptr;
-	__u16 val;
+	unsigned long val;
 
-	val = simple_strtoul(buf, &ptr, 10);
-	if (ptr == buf)
+	if (strict_strtoul(buf, 0, &val) < 0)
 		return -EINVAL;
 
 	if (val < 0x0002 || val > 0xFFFE || val % 2)
-- 
cgit v1.2.3-59-g8ed1b


From 2b0b05ddc04b6d45e71cd36405df512075786f1e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 10 May 2010 11:33:10 +0200
Subject: Bluetooth: Fix issues where sk_sleep() helper is needed now

There were some left-overs that used sk->sk_sleep instead of the new
sk_sleep() helper.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index ba49f9a3579e..673a36886716 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1248,7 +1248,7 @@ static int __l2cap_wait_ack(struct sock *sk)
 	int err = 0;
 	int timeo = HZ/5;
 
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	while ((l2cap_pi(sk)->unacked_frames > 0 && l2cap_pi(sk)->conn)) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -1269,7 +1269,7 @@ static int __l2cap_wait_ack(struct sock *sk)
 			break;
 	}
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 	return err;
 }
 
@@ -3544,7 +3544,7 @@ static void l2cap_busy_work(struct work_struct *work)
 
 	lock_sock(sk);
 
-	add_wait_queue(sk->sk_sleep, &wait);
+	add_wait_queue(sk_sleep(sk), &wait);
 	while ((skb = skb_peek(BUSY_QUEUE(sk)))) {
 		set_current_state(TASK_INTERRUPTIBLE);
 
@@ -3603,7 +3603,7 @@ done:
 	pi->conn_state &= ~L2CAP_CONN_RNR_SENT;
 
 	set_current_state(TASK_RUNNING);
-	remove_wait_queue(sk->sk_sleep, &wait);
+	remove_wait_queue(sk_sleep(sk), &wait);
 
 	release_sock(sk);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 3b254c54ec46eb022cb26ee6ab37fae23f5f7d6a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 10 May 2010 17:45:56 +0200
Subject: netfilter: nf_conntrack_proto: fix warning with CONFIG_PROVE_RCU

===================================================
[ INFO: suspicious rcu_dereference_check() usage. ]
---------------------------------------------------
include/net/netfilter/nf_conntrack_l3proto.h:92 invoked rcu_dereference_check()
without protection!

other info that might help us debug this:

rcu_scheduler_active = 1, debug_locks = 0
2 locks held by iptables/3197:
 #0:  (sk_lock-AF_INET){+.+.+.}, at: [<ffffffff8149bd8c>]
ip_setsockopt+0x7c/0xa0
 #1:  (&xt[i].mutex){+.+.+.}, at: [<ffffffff8148a5fe>]
xt_find_table_lock+0x3e/0x110

stack backtrace:
Pid: 3197, comm: iptables Not tainted 2.6.34-rc4 #2
Call Trace:
 [<ffffffff8105e2e8>] lockdep_rcu_dereference+0xb8/0xc0
 [<ffffffff8147fb3b>] nf_ct_l3proto_module_put+0x6b/0x70
 [<ffffffff8148d891>] state_mt_destroy+0x11/0x20
 [<ffffffff814d3baf>] cleanup_match+0x2f/0x50
 [<ffffffff814d3c63>] cleanup_entry+0x33/0x90
 [<ffffffff814d5653>] ? __do_replace+0x1a3/0x210
 [<ffffffff814d564c>] __do_replace+0x19c/0x210
 [<ffffffff814d651a>] do_ipt_set_ctl+0x16a/0x1b0
 [<ffffffff8147a610>] nf_sockopt+0x60/0xa0
...

The __nf_ct_l3proto_find() call doesn't actually need rcu read side
protection since the caller holds a reference to the protocol. Use
rcu_read_lock() anyways to avoid the warning.

Kernel bugzilla #15781: https://bugzilla.kernel.org/show_bug.cgi?id=15781

Reported-by: Christian Casteyde <casteyde.christian@free.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_proto.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index a6defc793601..5886ba1d52a0 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -117,9 +117,13 @@ void nf_ct_l3proto_module_put(unsigned short l3proto)
 {
 	struct nf_conntrack_l3proto *p;
 
-	/* rcu_read_lock not necessary since the caller holds a reference */
+	/* rcu_read_lock not necessary since the caller holds a reference, but
+	 * taken anyways to avoid lockdep warnings in __nf_ct_l3proto_find()
+	 */
+	rcu_read_lock();
 	p = __nf_ct_l3proto_find(l3proto);
 	module_put(p->me);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put);
 
-- 
cgit v1.2.3-59-g8ed1b


From b56f2d55c6c22b0c5774b3b22e336fb6cc5f4094 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 10 May 2010 18:47:57 +0200
Subject: netfilter: use rcu_dereference_protected()

Restore the rcu_dereference() calls in conntrack/expectation notifier
and logger registration/unregistration, but use the _protected variant,
which will be required by the upcoming __rcu annotations.

Based on patch by Eric Dumazet <eric.dumazet@gmail.com>

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/netfilter/nf_conntrack_ecache.c | 22 ++++++++++++++++++----
 net/netfilter/nf_log.c              | 10 ++++++++--
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index a94ac3ad02cb..cdcc7649476b 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -82,9 +82,12 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
 int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
 {
 	int ret = 0;
+	struct nf_ct_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	if (nf_conntrack_event_cb != NULL) {
+	notify = rcu_dereference_protected(nf_conntrack_event_cb,
+					   lockdep_is_held(&nf_ct_ecache_mutex));
+	if (notify != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -100,8 +103,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier);
 
 void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new)
 {
+	struct nf_ct_event_notifier *notify;
+
 	mutex_lock(&nf_ct_ecache_mutex);
-	BUG_ON(nf_conntrack_event_cb != new);
+	notify = rcu_dereference_protected(nf_conntrack_event_cb,
+					   lockdep_is_held(&nf_ct_ecache_mutex));
+	BUG_ON(notify != new);
 	rcu_assign_pointer(nf_conntrack_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
@@ -110,9 +117,12 @@ EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier);
 int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new)
 {
 	int ret = 0;
+	struct nf_exp_event_notifier *notify;
 
 	mutex_lock(&nf_ct_ecache_mutex);
-	if (nf_expect_event_cb != NULL) {
+	notify = rcu_dereference_protected(nf_expect_event_cb,
+					   lockdep_is_held(&nf_ct_ecache_mutex));
+	if (notify != NULL) {
 		ret = -EBUSY;
 		goto out_unlock;
 	}
@@ -128,8 +138,12 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier);
 
 void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
 {
+	struct nf_exp_event_notifier *notify;
+
 	mutex_lock(&nf_ct_ecache_mutex);
-	BUG_ON(nf_expect_event_cb != new);
+	notify = rcu_dereference_protected(nf_expect_event_cb,
+					   lockdep_is_held(&nf_ct_ecache_mutex));
+	BUG_ON(notify != new);
 	rcu_assign_pointer(nf_expect_event_cb, NULL);
 	mutex_unlock(&nf_ct_ecache_mutex);
 }
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 908f59935fbb..7df37fd786bc 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -35,6 +35,7 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
 /* return EEXIST if the same logger is registred, 0 on success. */
 int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 {
+	const struct nf_logger *llog;
 	int i;
 
 	if (pf >= ARRAY_SIZE(nf_loggers))
@@ -51,7 +52,9 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)
 	} else {
 		/* register at end of list to honor first register win */
 		list_add_tail(&logger->list[pf], &nf_loggers_l[pf]);
-		if (nf_loggers[pf] == NULL)
+		llog = rcu_dereference_protected(nf_loggers[pf],
+						 lockdep_is_held(&nf_log_mutex));
+		if (llog == NULL)
 			rcu_assign_pointer(nf_loggers[pf], logger);
 	}
 
@@ -63,11 +66,14 @@ EXPORT_SYMBOL(nf_log_register);
 
 void nf_log_unregister(struct nf_logger *logger)
 {
+	const struct nf_logger *c_logger;
 	int i;
 
 	mutex_lock(&nf_log_mutex);
 	for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) {
-		if (nf_loggers[i] == logger)
+		c_logger = rcu_dereference_protected(nf_loggers[i],
+						     lockdep_is_held(&nf_log_mutex));
+		if (c_logger == logger)
 			rcu_assign_pointer(nf_loggers[i], NULL);
 		list_del(&logger->list[i]);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From c476efbcde5ba58b81ac752f4a894d6db8e17d94 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:48 +0200
Subject: ipv6: ip6mr: move unres_queue and timer to per-namespace data

The unres_queue is currently shared between all namespaces. Following patches
will additionally allow to create multiple multicast routing tables in each
namespace. Having a single shared queue for all these users seems to excessive,
move the queue and the cleanup timer to the per-namespace data to unshare it.

As a side-effect, this fixes a bug in the seq file iteration functions: the
first entry returned is always from the current namespace, entries returned
after that may belong to any namespace.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/ipv6.h |  2 ++
 net/ipv6/ip6mr.c         | 74 +++++++++++++++++++++---------------------------
 2 files changed, 35 insertions(+), 41 deletions(-)

(limited to 'net')

diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 1f11ebc22151..43d842ab004e 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -60,6 +60,8 @@ struct netns_ipv6 {
 	struct sock             *igmp_sk;
 #ifdef CONFIG_IPV6_MROUTE
 	struct sock		*mroute6_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct mfc6_cache	*mfc6_unres_queue;
 	struct mfc6_cache	**mfc6_cache_array;
 	struct mif_device	*vif6_table;
 	int			maxvif;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e0b530ca394c..7236030e403e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -63,8 +63,6 @@ static DEFINE_RWLOCK(mrt_lock);
 
 #define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
 
-static struct mfc6_cache *mfc_unres_queue;		/* Queue of unresolved entries */
-
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
 
@@ -84,8 +82,6 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
 static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
 static void mroute_clean_tables(struct net *net);
 
-static struct timer_list ipmr_expire_timer;
-
 
 #ifdef CONFIG_PROC_FS
 
@@ -110,11 +106,10 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 				return mfc;
 	read_unlock(&mrt_lock);
 
-	it->cache = &mfc_unres_queue;
+	it->cache = &net->ipv6.mfc6_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
-		if (net_eq(mfc6_net(mfc), net) &&
-		    pos-- == 0)
+	for (mfc = net->ipv6.mfc6_unres_queue; mfc; mfc = mfc->next)
+		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
 
@@ -244,7 +239,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (mfc->next)
 		return mfc->next;
 
-	if (it->cache == &mfc_unres_queue)
+	if (it->cache == &net->ipv6.mfc6_unres_queue)
 		goto end_of_list;
 
 	BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
@@ -257,11 +252,11 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	/* exhausted cache_array, show unresolved */
 	read_unlock(&mrt_lock);
-	it->cache = &mfc_unres_queue;
+	it->cache = &net->ipv6.mfc6_unres_queue;
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
-	mfc = mfc_unres_queue;
+	mfc = net->ipv6.mfc6_unres_queue;
 	if (mfc)
 		return mfc;
 
@@ -277,7 +272,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
 
-	if (it->cache == &mfc_unres_queue)
+	if (it->cache == &net->ipv6.mfc6_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
 	else if (it->cache == net->ipv6.mfc6_cache_array)
 		read_unlock(&mrt_lock);
@@ -301,7 +296,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 			   mfc->mf6c_parent);
 
-		if (it->cache != &mfc_unres_queue) {
+		if (it->cache != &net->ipv6.mfc6_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
 				   mfc->mfc_un.res.pkt,
 				   mfc->mfc_un.res.bytes,
@@ -559,15 +554,15 @@ static void ip6mr_destroy_unres(struct mfc6_cache *c)
 }
 
 
-/* Single timer process for all the unresolved queue. */
+/* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(unsigned long dummy)
+static void ipmr_do_expire_process(struct net *net)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
 	struct mfc6_cache *c, **cp;
 
-	cp = &mfc_unres_queue;
+	cp = &net->ipv6.mfc6_unres_queue;
 
 	while ((c = *cp) != NULL) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
@@ -583,19 +578,21 @@ static void ipmr_do_expire_process(unsigned long dummy)
 		ip6mr_destroy_unres(c);
 	}
 
-	if (mfc_unres_queue != NULL)
-		mod_timer(&ipmr_expire_timer, jiffies + expires);
+	if (net->ipv6.mfc6_unres_queue != NULL)
+		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + expires);
 }
 
-static void ipmr_expire_process(unsigned long dummy)
+static void ipmr_expire_process(unsigned long arg)
 {
+	struct net *net = (struct net *)arg;
+
 	if (!spin_trylock(&mfc_unres_lock)) {
-		mod_timer(&ipmr_expire_timer, jiffies + 1);
+		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + 1);
 		return;
 	}
 
-	if (mfc_unres_queue != NULL)
-		ipmr_do_expire_process(dummy);
+	if (net->ipv6.mfc6_unres_queue != NULL)
+		ipmr_do_expire_process(net);
 
 	spin_unlock(&mfc_unres_lock);
 }
@@ -880,9 +877,8 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	for (c = mfc_unres_queue; c; c = c->next) {
-		if (net_eq(mfc6_net(c), net) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
+	for (c = net->ipv6.mfc6_unres_queue; c; c = c->next) {
+		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
 			break;
 	}
@@ -923,10 +919,10 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&net->ipv6.cache_resolve_queue_len);
-		c->next = mfc_unres_queue;
-		mfc_unres_queue = c;
+		c->next = net->ipv6.mfc6_unres_queue;
+		net->ipv6.mfc6_unres_queue = c;
 
-		ipmr_do_expire_process(1);
+		ipmr_do_expire_process(net);
 	}
 
 	/*
@@ -1019,6 +1015,9 @@ static int __net_init ip6mr_net_init(struct net *net)
 		goto fail_mfc6_cache;
 	}
 
+	setup_timer(&net->ipv6.ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)net);
+
 #ifdef CONFIG_IPV6_PIMSM_V2
 	net->ipv6.mroute_reg_vif_num = -1;
 #endif
@@ -1050,6 +1049,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
 	proc_net_remove(net, "ip6_mr_cache");
 	proc_net_remove(net, "ip6_mr_vif");
 #endif
+	del_timer(&net->ipv6.ipmr_expire_timer);
 	mroute_clean_tables(net);
 	kfree(net->ipv6.mfc6_cache_array);
 	kfree(net->ipv6.vif6_table);
@@ -1075,7 +1075,6 @@ int __init ip6_mr_init(void)
 	if (err)
 		goto reg_pernet_fail;
 
-	setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
 	err = register_netdevice_notifier(&ip6_mr_notifier);
 	if (err)
 		goto reg_notif_fail;
@@ -1092,7 +1091,6 @@ add_proto_fail:
 	unregister_netdevice_notifier(&ip6_mr_notifier);
 #endif
 reg_notif_fail:
-	del_timer(&ipmr_expire_timer);
 	unregister_pernet_subsys(&ip6mr_net_ops);
 reg_pernet_fail:
 	kmem_cache_destroy(mrt_cachep);
@@ -1102,7 +1100,6 @@ reg_pernet_fail:
 void ip6_mr_cleanup(void)
 {
 	unregister_netdevice_notifier(&ip6_mr_notifier);
-	del_timer(&ipmr_expire_timer);
 	unregister_pernet_subsys(&ip6mr_net_ops);
 	kmem_cache_destroy(mrt_cachep);
 }
@@ -1167,18 +1164,17 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	 *	need to send on the frames and tidy up.
 	 */
 	spin_lock_bh(&mfc_unres_lock);
-	for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
+	for (cp = &net->ipv6.mfc6_unres_queue; (uc = *cp) != NULL;
 	     cp = &uc->next) {
-		if (net_eq(mfc6_net(uc), net) &&
-		    ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
+		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
 			*cp = uc->next;
 			atomic_dec(&net->ipv6.cache_resolve_queue_len);
 			break;
 		}
 	}
-	if (mfc_unres_queue == NULL)
-		del_timer(&ipmr_expire_timer);
+	if (net->ipv6.mfc6_unres_queue == NULL)
+		del_timer(&net->ipv6.ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
@@ -1230,12 +1226,8 @@ static void mroute_clean_tables(struct net *net)
 		struct mfc6_cache *c, **cp;
 
 		spin_lock_bh(&mfc_unres_lock);
-		cp = &mfc_unres_queue;
+		cp = &net->ipv6.mfc6_unres_queue;
 		while ((c = *cp) != NULL) {
-			if (!net_eq(mfc6_net(c), net)) {
-				cp = &c->next;
-				continue;
-			}
 			*cp = c->next;
 			ip6mr_destroy_unres(c);
 		}
-- 
cgit v1.2.3-59-g8ed1b


From b5aa30b19121de49021fba57aa1f6e4c787fcf67 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:50 +0200
Subject: ipv6: ip6mr: remove net pointer from struct mfc6_cache

Now that cache entries in unres_queue don't need to be distinguished by their
network namespace pointer anymore, we can remove it from struct mfc6_cache
add pass the namespace as function argument to the functions that need it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/mroute6.h | 15 ------------
 net/ipv6/ip6mr.c        | 63 ++++++++++++++++++++++++-------------------------
 2 files changed, 31 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 2caa1a8e525d..04e2e54d0404 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -183,9 +183,6 @@ struct mif_device {
 
 struct mfc6_cache {
 	struct mfc6_cache *next;		/* Next entry on cache line 	*/
-#ifdef CONFIG_NET_NS
-	struct net *mfc6_net;
-#endif
 	struct in6_addr mf6c_mcastgrp;			/* Group the entry belongs to 	*/
 	struct in6_addr mf6c_origin;			/* Source of packet 		*/
 	mifi_t mf6c_parent;			/* Source interface		*/
@@ -208,18 +205,6 @@ struct mfc6_cache {
 	} mfc_un;
 };
 
-static inline
-struct net *mfc6_net(const struct mfc6_cache *mfc)
-{
-	return read_pnet(&mfc->mfc6_net);
-}
-
-static inline
-void mfc6_net_set(struct mfc6_cache *mfc, struct net *net)
-{
-	write_pnet(&mfc->mfc6_net, hold_net(net));
-}
-
 #define MFC_STATIC		1
 #define MFC_NOTIFY		2
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7236030e403e..b3783a436bbd 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -76,10 +76,12 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
+static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
+			  struct mfc6_cache *cache);
 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
+static int ip6mr_fill_mroute(struct net *net, struct sk_buff *skb,
+			     struct mfc6_cache *c, struct rtmsg *rtm);
 static void mroute_clean_tables(struct net *net);
 
 
@@ -523,7 +525,6 @@ static int mif6_delete(struct net *net, int vifi, struct list_head *head)
 
 static inline void ip6mr_cache_free(struct mfc6_cache *c)
 {
-	release_net(mfc6_net(c));
 	kmem_cache_free(mrt_cachep, c);
 }
 
@@ -531,10 +532,9 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct net *net, struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
-	struct net *net = mfc6_net(c);
 
 	atomic_dec(&net->ipv6.cache_resolve_queue_len);
 
@@ -575,7 +575,7 @@ static void ipmr_do_expire_process(struct net *net)
 		}
 
 		*cp = c->next;
-		ip6mr_destroy_unres(c);
+		ip6mr_destroy_unres(net, c);
 	}
 
 	if (net->ipv6.mfc6_unres_queue != NULL)
@@ -599,10 +599,10 @@ static void ipmr_expire_process(unsigned long arg)
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
+static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
+				    unsigned char *ttls)
 {
 	int vifi;
-	struct net *net = mfc6_net(cache);
 
 	cache->mfc_un.res.minvif = MAXMIFS;
 	cache->mfc_un.res.maxvif = 0;
@@ -717,24 +717,22 @@ static struct mfc6_cache *ip6mr_cache_find(struct net *net,
 /*
  *	Allocate a multicast cache entry
  */
-static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
+static struct mfc6_cache *ip6mr_cache_alloc(void)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (c == NULL)
 		return NULL;
 	c->mfc_un.res.minvif = MAXMIFS;
-	mfc6_net_set(c, net);
 	return c;
 }
 
-static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
+static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (c == NULL)
 		return NULL;
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
-	mfc6_net_set(c, net);
 	return c;
 }
 
@@ -742,7 +740,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
+static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
+				struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
 
@@ -755,7 +754,7 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
 			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
-			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ip6mr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -763,9 +762,9 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
 				skb_trim(skb, nlh->nlmsg_len);
 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 			}
-			err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
+			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip6_mr_forward(skb, c);
+			ip6_mr_forward(net, skb, c);
 	}
 }
 
@@ -889,7 +888,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		 */
 
 		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
-		    (c = ip6mr_cache_alloc_unres(net)) == NULL) {
+		    (c = ip6mr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -1133,7 +1132,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	if (c != NULL) {
 		write_lock_bh(&mrt_lock);
 		c->mf6c_parent = mfc->mf6cc_parent;
-		ip6mr_update_thresholds(c, ttls);
+		ip6mr_update_thresholds(net, c, ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -1143,14 +1142,14 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
 		return -EINVAL;
 
-	c = ip6mr_cache_alloc(net);
+	c = ip6mr_cache_alloc();
 	if (c == NULL)
 		return -ENOMEM;
 
 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
 	c->mf6c_parent = mfc->mf6cc_parent;
-	ip6mr_update_thresholds(c, ttls);
+	ip6mr_update_thresholds(net, c, ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
@@ -1178,7 +1177,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
-		ip6mr_cache_resolve(uc, c);
+		ip6mr_cache_resolve(net, uc, c);
 		ip6mr_cache_free(uc);
 	}
 	return 0;
@@ -1229,7 +1228,7 @@ static void mroute_clean_tables(struct net *net)
 		cp = &net->ipv6.mfc6_unres_queue;
 		while ((c = *cp) != NULL) {
 			*cp = c->next;
-			ip6mr_destroy_unres(c);
+			ip6mr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -1497,10 +1496,10 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
  *	Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
+static int ip6mr_forward2(struct net *net, struct sk_buff *skb,
+			  struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct net *net = mfc6_net(c);
 	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
@@ -1581,11 +1580,11 @@ static int ip6mr_find_vif(struct net_device *dev)
 	return ct;
 }
 
-static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
+static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
+			  struct mfc6_cache *cache)
 {
 	int psend = -1;
 	int vif, ct;
-	struct net *net = mfc6_net(cache);
 
 	vif = cache->mf6c_parent;
 	cache->mfc_un.res.pkt++;
@@ -1627,13 +1626,13 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ip6mr_forward2(skb2, cache, psend);
+					ip6mr_forward2(net, skb2, cache, psend);
 			}
 			psend = ct;
 		}
 	}
 	if (psend != -1) {
-		ip6mr_forward2(skb, cache, psend);
+		ip6mr_forward2(net, skb, cache, psend);
 		return 0;
 	}
 
@@ -1674,7 +1673,7 @@ int ip6_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip6_mr_forward(skb, cache);
+	ip6_mr_forward(net, skb, cache);
 
 	read_unlock(&mrt_lock);
 
@@ -1683,11 +1682,11 @@ int ip6_mr_input(struct sk_buff *skb)
 
 
 static int
-ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
+ip6mr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc6_cache *c,
+		  struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
-	struct net *net = mfc6_net(c);
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
@@ -1781,7 +1780,7 @@ int ip6mr_get_route(struct net *net,
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
 
-	err = ip6mr_fill_mroute(skb, cache, rtm);
+	err = ip6mr_fill_mroute(net, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From f30a77842129b5656360cc1f5db48a3fcfb64528 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:51 +0200
Subject: ipv6: ip6mr: convert struct mfc_cache to struct list_head

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/mroute6.h  |   2 +-
 include/net/netns/ipv6.h |   4 +-
 net/ipv6/ip6mr.c         | 127 +++++++++++++++++++++++------------------------
 3 files changed, 65 insertions(+), 68 deletions(-)

(limited to 'net')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 04e2e54d0404..94a0cb521ca9 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -182,7 +182,7 @@ struct mif_device {
 #define VIFF_STATIC 0x8000
 
 struct mfc6_cache {
-	struct mfc6_cache *next;		/* Next entry on cache line 	*/
+	struct list_head list;
 	struct in6_addr mf6c_mcastgrp;			/* Group the entry belongs to 	*/
 	struct in6_addr mf6c_origin;			/* Source of packet 		*/
 	mifi_t mf6c_parent;			/* Source interface		*/
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 43d842ab004e..9cb3b5f2cd75 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -61,8 +61,8 @@ struct netns_ipv6 {
 #ifdef CONFIG_IPV6_MROUTE
 	struct sock		*mroute6_sk;
 	struct timer_list	ipmr_expire_timer;
-	struct mfc6_cache	*mfc6_unres_queue;
-	struct mfc6_cache	**mfc6_cache_array;
+	struct list_head	mfc6_unres_queue;
+	struct list_head	*mfc6_cache_array;
 	struct mif_device	*vif6_table;
 	int			maxvif;
 	atomic_t		cache_resolve_queue_len;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index b3783a436bbd..08e09042ad1c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -89,7 +89,7 @@ static void mroute_clean_tables(struct net *net);
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
-	struct mfc6_cache **cache;
+	struct list_head *cache;
 	int ct;
 };
 
@@ -99,18 +99,18 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 {
 	struct mfc6_cache *mfc;
 
-	it->cache = net->ipv6.mfc6_cache_array;
 	read_lock(&mrt_lock);
-	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
-		for (mfc = net->ipv6.mfc6_cache_array[it->ct];
-		     mfc; mfc = mfc->next)
+	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
+		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
+	}
 	read_unlock(&mrt_lock);
 
-	it->cache = &net->ipv6.mfc6_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for (mfc = net->ipv6.mfc6_unres_queue; mfc; mfc = mfc->next)
+	it->cache = &net->ipv6.mfc6_unres_queue;
+	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
@@ -119,9 +119,6 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 	return NULL;
 }
 
-
-
-
 /*
  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
  */
@@ -238,18 +235,19 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (v == SEQ_START_TOKEN)
 		return ipmr_mfc_seq_idx(net, seq->private, 0);
 
-	if (mfc->next)
-		return mfc->next;
+	if (mfc->list.next != it->cache)
+		return list_entry(mfc->list.next, struct mfc6_cache, list);
 
 	if (it->cache == &net->ipv6.mfc6_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
+	BUG_ON(it->cache != &net->ipv6.mfc6_cache_array[it->ct]);
 
 	while (++it->ct < MFC6_LINES) {
-		mfc = net->ipv6.mfc6_cache_array[it->ct];
-		if (mfc)
-			return mfc;
+		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		if (list_empty(it->cache))
+			continue;
+		return list_first_entry(it->cache, struct mfc6_cache, list);
 	}
 
 	/* exhausted cache_array, show unresolved */
@@ -258,9 +256,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
-	mfc = net->ipv6.mfc6_unres_queue;
-	if (mfc)
-		return mfc;
+	if (!list_empty(it->cache))
+		return list_first_entry(it->cache, struct mfc6_cache, list);
 
  end_of_list:
 	spin_unlock_bh(&mfc_unres_lock);
@@ -560,25 +557,22 @@ static void ipmr_do_expire_process(struct net *net)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
-	struct mfc6_cache *c, **cp;
-
-	cp = &net->ipv6.mfc6_unres_queue;
+	struct mfc6_cache *c, *next;
 
-	while ((c = *cp) != NULL) {
+	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			/* not yet... */
 			unsigned long interval = c->mfc_un.unres.expires - now;
 			if (interval < expires)
 				expires = interval;
-			cp = &c->next;
 			continue;
 		}
 
-		*cp = c->next;
+		list_del(&c->list);
 		ip6mr_destroy_unres(net, c);
 	}
 
-	if (net->ipv6.mfc6_unres_queue != NULL)
+	if (!list_empty(&net->ipv6.mfc6_unres_queue))
 		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + expires);
 }
 
@@ -591,7 +585,7 @@ static void ipmr_expire_process(unsigned long arg)
 		return;
 	}
 
-	if (net->ipv6.mfc6_unres_queue != NULL)
+	if (!list_empty(&net->ipv6.mfc6_unres_queue))
 		ipmr_do_expire_process(net);
 
 	spin_unlock(&mfc_unres_lock);
@@ -706,12 +700,12 @@ static struct mfc6_cache *ip6mr_cache_find(struct net *net,
 	int line = MFC6_HASH(mcastgrp, origin);
 	struct mfc6_cache *c;
 
-	for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
+	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
-			break;
+			return c;
 	}
-	return c;
+	return NULL;
 }
 
 /*
@@ -872,17 +866,20 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 static int
 ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 {
+	bool found = false;
 	int err;
 	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	for (c = net->ipv6.mfc6_unres_queue; c; c = c->next) {
+	list_for_each_entry(c, &net->ipv6.mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
-		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
+		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c == NULL) {
+	if (!found) {
 		/*
 		 *	Create a new entry if allowable
 		 */
@@ -918,8 +915,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&net->ipv6.cache_resolve_queue_len);
-		c->next = net->ipv6.mfc6_unres_queue;
-		net->ipv6.mfc6_unres_queue = c;
+		list_add(&c->list, &net->ipv6.mfc6_unres_queue);
 
 		ipmr_do_expire_process(net);
 	}
@@ -946,16 +942,15 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
 {
 	int line;
-	struct mfc6_cache *c, **cp;
+	struct mfc6_cache *c, *next;
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	for (cp = &net->ipv6.mfc6_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ip6mr_cache_free(c);
@@ -997,7 +992,9 @@ static struct notifier_block ip6_mr_notifier = {
 
 static int __net_init ip6mr_net_init(struct net *net)
 {
+	unsigned int i;
 	int err = 0;
+
 	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
 				       GFP_KERNEL);
 	if (!net->ipv6.vif6_table) {
@@ -1007,13 +1004,18 @@ static int __net_init ip6mr_net_init(struct net *net)
 
 	/* Forwarding cache */
 	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
-					     sizeof(struct mfc6_cache *),
+					     sizeof(struct list_head),
 					     GFP_KERNEL);
 	if (!net->ipv6.mfc6_cache_array) {
 		err = -ENOMEM;
 		goto fail_mfc6_cache;
 	}
 
+	for (i = 0; i < MFC6_LINES; i++)
+		INIT_LIST_HEAD(&net->ipv6.mfc6_cache_array[i]);
+
+	INIT_LIST_HEAD(&net->ipv6.mfc6_unres_queue);
+
 	setup_timer(&net->ipv6.ipmr_expire_timer, ipmr_expire_process,
 		    (unsigned long)net);
 
@@ -1105,8 +1107,9 @@ void ip6_mr_cleanup(void)
 
 static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 {
+	bool found = false;
 	int line;
-	struct mfc6_cache *uc, *c, **cp;
+	struct mfc6_cache *uc, *c;
 	unsigned char ttls[MAXMIFS];
 	int i;
 
@@ -1122,14 +1125,15 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	for (cp = &net->ipv6.mfc6_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
+		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c != NULL) {
+	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mf6c_parent = mfc->mf6cc_parent;
 		ip6mr_update_thresholds(net, c, ttls);
@@ -1154,29 +1158,29 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	c->next = net->ipv6.mfc6_cache_array[line];
-	net->ipv6.mfc6_cache_array[line] = c;
+	list_add(&c->list, &net->ipv6.mfc6_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
 	 *	Check to see if we resolved a queued list. If so we
 	 *	need to send on the frames and tidy up.
 	 */
+	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	for (cp = &net->ipv6.mfc6_unres_queue; (uc = *cp) != NULL;
-	     cp = &uc->next) {
+	list_for_each_entry(uc, &net->ipv6.mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
-			*cp = uc->next;
+			list_del(&uc->list);
 			atomic_dec(&net->ipv6.cache_resolve_queue_len);
+			found = true;
 			break;
 		}
 	}
-	if (net->ipv6.mfc6_unres_queue == NULL)
+	if (list_empty(&net->ipv6.mfc6_unres_queue))
 		del_timer(&net->ipv6.ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
-	if (uc) {
+	if (found) {
 		ip6mr_cache_resolve(net, uc, c);
 		ip6mr_cache_free(uc);
 	}
@@ -1191,6 +1195,7 @@ static void mroute_clean_tables(struct net *net)
 {
 	int i;
 	LIST_HEAD(list);
+	struct mfc6_cache *c, *next;
 
 	/*
 	 *	Shut down all active vif entries
@@ -1205,16 +1210,11 @@ static void mroute_clean_tables(struct net *net)
 	 *	Wipe the cache
 	 */
 	for (i = 0; i < MFC6_LINES; i++) {
-		struct mfc6_cache *c, **cp;
-
-		cp = &net->ipv6.mfc6_cache_array[i];
-		while ((c = *cp) != NULL) {
-			if (c->mfc_flags & MFC_STATIC) {
-				cp = &c->next;
+		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[i], list) {
+			if (c->mfc_flags & MFC_STATIC)
 				continue;
-			}
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ip6mr_cache_free(c);
@@ -1222,12 +1222,9 @@ static void mroute_clean_tables(struct net *net)
 	}
 
 	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
-		struct mfc6_cache *c, **cp;
-
 		spin_lock_bh(&mfc_unres_lock);
-		cp = &net->ipv6.mfc6_unres_queue;
-		while ((c = *cp) != NULL) {
-			*cp = c->next;
+		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
+			list_del(&c->list);
 			ip6mr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 6bd521433942d85e80f7a731a88cc91a327f38e0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:53 +0200
Subject: ipv6: ip6mr: move mroute data into seperate structure

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/mroute6.h  |   5 +-
 include/net/netns/ipv6.h |  13 +-
 net/ipv6/ip6mr.c         | 390 ++++++++++++++++++++++++++---------------------
 3 files changed, 216 insertions(+), 192 deletions(-)

(limited to 'net')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 94a0cb521ca9..0370dd4f2389 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -229,10 +229,7 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
 			   struct rtmsg *rtm, int nowait);
 
 #ifdef CONFIG_IPV6_MROUTE
-static inline struct sock *mroute6_socket(struct net *net)
-{
-	return net->ipv6.mroute6_sk;
-}
+extern struct sock *mroute6_socket(struct net *net);
 extern int ip6mr_sk_done(struct sock *sk);
 #else
 static inline struct sock *mroute6_socket(struct net *net) { return NULL; }
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 9cb3b5f2cd75..4e2780e6d8bc 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -59,18 +59,7 @@ struct netns_ipv6 {
 	struct sock             *tcp_sk;
 	struct sock             *igmp_sk;
 #ifdef CONFIG_IPV6_MROUTE
-	struct sock		*mroute6_sk;
-	struct timer_list	ipmr_expire_timer;
-	struct list_head	mfc6_unres_queue;
-	struct list_head	*mfc6_cache_array;
-	struct mif_device	*vif6_table;
-	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
-	int			mroute_do_assert;
-	int			mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
-	int			mroute_reg_vif_num;
-#endif
+	struct mr6_table	*mrt6;
 #endif
 };
 #endif
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 08e09042ad1c..9419fceeed41 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -51,6 +51,24 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/ip6_checksum.h>
 
+struct mr6_table {
+#ifdef CONFIG_NET_NS
+	struct net		*net;
+#endif
+	struct sock		*mroute6_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct list_head	mfc6_unres_queue;
+	struct list_head	mfc6_cache_array[MFC6_LINES];
+	struct mif_device	vif6_table[MAXMIFS];
+	int			maxvif;
+	atomic_t		cache_resolve_queue_len;
+	int			mroute_do_assert;
+	int			mroute_do_pim;
+#ifdef CONFIG_IPV6_PIMSM_V2
+	int			mroute_reg_vif_num;
+#endif
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -61,7 +79,7 @@ static DEFINE_RWLOCK(mrt_lock);
  *	Multicast router control variables
  */
 
-#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
+#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -76,13 +94,13 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
-			  struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
+static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+			  struct sk_buff *skb, struct mfc6_cache *cache);
+static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int ip6mr_fill_mroute(struct net *net, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 			     struct mfc6_cache *c, struct rtmsg *rtm);
-static void mroute_clean_tables(struct net *net);
+static void mroute_clean_tables(struct mr6_table *mrt);
 
 
 #ifdef CONFIG_PROC_FS
@@ -97,11 +115,12 @@ struct ipmr_mfc_iter {
 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 					   struct ipmr_mfc_iter *it, loff_t pos)
 {
+	struct mr6_table *mrt = net->ipv6.mrt6;
 	struct mfc6_cache *mfc;
 
 	read_lock(&mrt_lock);
 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
-		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		it->cache = &mrt->mfc6_cache_array[it->ct];
 		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
@@ -109,7 +128,7 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 	read_unlock(&mrt_lock);
 
 	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &net->ipv6.mfc6_unres_queue;
+	it->cache = &mrt->mfc6_unres_queue;
 	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
@@ -132,11 +151,13 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 					    struct ipmr_vif_iter *iter,
 					    loff_t pos)
 {
-	for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
-		if (!MIF_EXISTS(net, iter->ct))
+	struct mr6_table *mrt = net->ipv6.mrt6;
+
+	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+		if (!MIF_EXISTS(mrt, iter->ct))
 			continue;
 		if (pos-- == 0)
-			return &net->ipv6.vif6_table[iter->ct];
+			return &mrt->vif6_table[iter->ct];
 	}
 	return NULL;
 }
@@ -155,15 +176,16 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
 		return ip6mr_vif_seq_idx(net, iter, 0);
 
-	while (++iter->ct < net->ipv6.maxvif) {
-		if (!MIF_EXISTS(net, iter->ct))
+	while (++iter->ct < mrt->maxvif) {
+		if (!MIF_EXISTS(mrt, iter->ct))
 			continue;
-		return &net->ipv6.vif6_table[iter->ct];
+		return &mrt->vif6_table[iter->ct];
 	}
 	return NULL;
 }
@@ -177,6 +199,7 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -187,7 +210,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq,
 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
-			   vif - net->ipv6.vif6_table,
+			   vif - mrt->vif6_table,
 			   name, vif->bytes_in, vif->pkt_in,
 			   vif->bytes_out, vif->pkt_out,
 			   vif->flags);
@@ -229,6 +252,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc6_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	++*pos;
 
@@ -238,13 +262,13 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (mfc->list.next != it->cache)
 		return list_entry(mfc->list.next, struct mfc6_cache, list);
 
-	if (it->cache == &net->ipv6.mfc6_unres_queue)
+	if (it->cache == &mrt->mfc6_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != &net->ipv6.mfc6_cache_array[it->ct]);
+	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 
 	while (++it->ct < MFC6_LINES) {
-		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		it->cache = &mrt->mfc6_cache_array[it->ct];
 		if (list_empty(it->cache))
 			continue;
 		return list_first_entry(it->cache, struct mfc6_cache, list);
@@ -252,7 +276,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	/* exhausted cache_array, show unresolved */
 	read_unlock(&mrt_lock);
-	it->cache = &net->ipv6.mfc6_unres_queue;
+	it->cache = &mrt->mfc6_unres_queue;
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
@@ -270,10 +294,11 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
-	if (it->cache == &net->ipv6.mfc6_unres_queue)
+	if (it->cache == &mrt->mfc6_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == net->ipv6.mfc6_cache_array)
+	else if (it->cache == mrt->mfc6_cache_array)
 		read_unlock(&mrt_lock);
 }
 
@@ -281,6 +306,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -295,14 +321,14 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 			   mfc->mf6c_parent);
 
-		if (it->cache != &net->ipv6.mfc6_unres_queue) {
+		if (it->cache != &mrt->mfc6_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
 				   mfc->mfc_un.res.pkt,
 				   mfc->mfc_un.res.bytes,
 				   mfc->mfc_un.res.wrong_if);
 			for (n = mfc->mfc_un.res.minvif;
 			     n < mfc->mfc_un.res.maxvif; n++) {
-				if (MIF_EXISTS(net, n) &&
+				if (MIF_EXISTS(mrt, n) &&
 				    mfc->mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
 						   " %2d:%-3d",
@@ -349,7 +375,8 @@ static int pim6_rcv(struct sk_buff *skb)
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
 	struct net *net = dev_net(skb->dev);
-	int reg_vif_num = net->ipv6.mroute_reg_vif_num;
+	struct mr6_table *mrt = net->ipv6.mrt6;
+	int reg_vif_num = mrt->mroute_reg_vif_num;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
@@ -374,7 +401,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
-		reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
+		reg_dev = mrt->vif6_table[reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -411,12 +438,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
 	dev->stats.tx_packets++;
-	ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
-			   MRT6MSG_WHOLEPKT);
+	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return NETDEV_TX_OK;
@@ -472,15 +499,16 @@ failure:
  *	Delete a VIF entry
  */
 
-static int mif6_delete(struct net *net, int vifi, struct list_head *head)
+static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 {
 	struct mif_device *v;
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
-	if (vifi < 0 || vifi >= net->ipv6.maxvif)
+
+	if (vifi < 0 || vifi >= mrt->maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &net->ipv6.vif6_table[vifi];
+	v = &mrt->vif6_table[vifi];
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -492,17 +520,17 @@ static int mif6_delete(struct net *net, int vifi, struct list_head *head)
 	}
 
 #ifdef CONFIG_IPV6_PIMSM_V2
-	if (vifi == net->ipv6.mroute_reg_vif_num)
-		net->ipv6.mroute_reg_vif_num = -1;
+	if (vifi == mrt->mroute_reg_vif_num)
+		mrt->mroute_reg_vif_num = -1;
 #endif
 
-	if (vifi + 1 == net->ipv6.maxvif) {
+	if (vifi + 1 == mrt->maxvif) {
 		int tmp;
 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
-			if (MIF_EXISTS(net, tmp))
+			if (MIF_EXISTS(mrt, tmp))
 				break;
 		}
-		net->ipv6.maxvif = tmp + 1;
+		mrt->maxvif = tmp + 1;
 	}
 
 	write_unlock_bh(&mrt_lock);
@@ -529,11 +557,12 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct net *net, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 {
+	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 
-	atomic_dec(&net->ipv6.cache_resolve_queue_len);
+	atomic_dec(&mrt->cache_resolve_queue_len);
 
 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
@@ -553,13 +582,13 @@ static void ip6mr_destroy_unres(struct net *net, struct mfc6_cache *c)
 
 /* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(struct net *net)
+static void ipmr_do_expire_process(struct mr6_table *mrt)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
 	struct mfc6_cache *c, *next;
 
-	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			/* not yet... */
 			unsigned long interval = c->mfc_un.unres.expires - now;
@@ -569,31 +598,31 @@ static void ipmr_do_expire_process(struct net *net)
 		}
 
 		list_del(&c->list);
-		ip6mr_destroy_unres(net, c);
+		ip6mr_destroy_unres(mrt, c);
 	}
 
-	if (!list_empty(&net->ipv6.mfc6_unres_queue))
-		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + expires);
+	if (!list_empty(&mrt->mfc6_unres_queue))
+		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 }
 
 static void ipmr_expire_process(unsigned long arg)
 {
-	struct net *net = (struct net *)arg;
+	struct mr6_table *mrt = (struct mr6_table *)arg;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
-		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + 1);
+		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 		return;
 	}
 
-	if (!list_empty(&net->ipv6.mfc6_unres_queue))
-		ipmr_do_expire_process(net);
+	if (!list_empty(&mrt->mfc6_unres_queue))
+		ipmr_do_expire_process(mrt);
 
 	spin_unlock(&mfc_unres_lock);
 }
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 				    unsigned char *ttls)
 {
 	int vifi;
@@ -602,8 +631,8 @@ static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
 	cache->mfc_un.res.maxvif = 0;
 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
-	for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
-		if (MIF_EXISTS(net, vifi) &&
+	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
+		if (MIF_EXISTS(mrt, vifi) &&
 		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
@@ -614,16 +643,17 @@ static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
 	}
 }
 
-static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
+static int mif6_add(struct net *net, struct mr6_table *mrt,
+		    struct mif6ctl *vifc, int mrtsock)
 {
 	int vifi = vifc->mif6c_mifi;
-	struct mif_device *v = &net->ipv6.vif6_table[vifi];
+	struct mif_device *v = &mrt->vif6_table[vifi];
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (MIF_EXISTS(net, vifi))
+	if (MIF_EXISTS(mrt, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->mif6c_flags) {
@@ -633,7 +663,7 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
 		 * Special Purpose VIF in PIM
 		 * All the packets will be sent to the daemon
 		 */
-		if (net->ipv6.mroute_reg_vif_num >= 0)
+		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
 		dev = ip6mr_reg_vif(net);
 		if (!dev)
@@ -685,22 +715,22 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
 	v->dev = dev;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	if (v->flags & MIFF_REGISTER)
-		net->ipv6.mroute_reg_vif_num = vifi;
+		mrt->mroute_reg_vif_num = vifi;
 #endif
-	if (vifi + 1 > net->ipv6.maxvif)
-		net->ipv6.maxvif = vifi + 1;
+	if (vifi + 1 > mrt->maxvif)
+		mrt->maxvif = vifi + 1;
 	write_unlock_bh(&mrt_lock);
 	return 0;
 }
 
-static struct mfc6_cache *ip6mr_cache_find(struct net *net,
+static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 					   struct in6_addr *origin,
 					   struct in6_addr *mcastgrp)
 {
 	int line = MFC6_HASH(mcastgrp, origin);
 	struct mfc6_cache *c;
 
-	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
 			return c;
@@ -734,8 +764,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
-				struct mfc6_cache *c)
+static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+				struct mfc6_cache *uc, struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
 
@@ -748,7 +778,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
 			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
-			if (ip6mr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -758,7 +788,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
 			}
 			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip6_mr_forward(net, skb, c);
+			ip6_mr_forward(net, mrt, skb, c);
 	}
 }
 
@@ -769,8 +799,8 @@ static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
  *	Called under mrt_lock.
  */
 
-static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
-			      int assert)
+static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+			      mifi_t mifi, int assert)
 {
 	struct sk_buff *skb;
 	struct mrt6msg *msg;
@@ -806,7 +836,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 		msg = (struct mrt6msg *)skb_transport_header(skb);
 		msg->im6_mbz = 0;
 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
-		msg->im6_mif = net->ipv6.mroute_reg_vif_num;
+		msg->im6_mif = mrt->mroute_reg_vif_num;
 		msg->im6_pad = 0;
 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
@@ -841,7 +871,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	if (net->ipv6.mroute6_sk == NULL) {
+	if (mrt->mroute6_sk == NULL) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -849,7 +879,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 	/*
 	 *	Deliver to user space multicast routing algorithms
 	 */
-	ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
+	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
 	if (ret < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
@@ -864,14 +894,14 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
  */
 
 static int
-ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
+ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
 {
 	bool found = false;
 	int err;
 	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &net->ipv6.mfc6_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
 			found = true;
@@ -884,7 +914,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
+		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
@@ -902,7 +932,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		/*
 		 *	Reflect first query at pim6sd
 		 */
-		err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
+		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
 		if (err < 0) {
 			/* If the report failed throw the cache entry
 			   out - Brad Parker
@@ -914,10 +944,10 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 			return err;
 		}
 
-		atomic_inc(&net->ipv6.cache_resolve_queue_len);
-		list_add(&c->list, &net->ipv6.mfc6_unres_queue);
+		atomic_inc(&mrt->cache_resolve_queue_len);
+		list_add(&c->list, &mrt->mfc6_unres_queue);
 
-		ipmr_do_expire_process(net);
+		ipmr_do_expire_process(mrt);
 	}
 
 	/*
@@ -939,14 +969,14 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
  *	MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
+static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
 {
 	int line;
 	struct mfc6_cache *c, *next;
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[line], list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 			write_lock_bh(&mrt_lock);
@@ -965,6 +995,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 	struct mif_device *v;
 	int ct;
 	LIST_HEAD(list);
@@ -972,10 +1003,10 @@ static int ip6mr_device_event(struct notifier_block *this,
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	v = &net->ipv6.vif6_table[0];
-	for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
+	v = &mrt->vif6_table[0];
+	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
 		if (v->dev == dev)
-			mif6_delete(net, ct, &list);
+			mif6_delete(mrt, ct, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -992,35 +1023,28 @@ static struct notifier_block ip6_mr_notifier = {
 
 static int __net_init ip6mr_net_init(struct net *net)
 {
+	struct mr6_table *mrt;
 	unsigned int i;
 	int err = 0;
 
-	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
-				       GFP_KERNEL);
-	if (!net->ipv6.vif6_table) {
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL) {
 		err = -ENOMEM;
 		goto fail;
 	}
 
-	/* Forwarding cache */
-	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
-					     sizeof(struct list_head),
-					     GFP_KERNEL);
-	if (!net->ipv6.mfc6_cache_array) {
-		err = -ENOMEM;
-		goto fail_mfc6_cache;
-	}
+	write_pnet(&mrt->net, net);
 
 	for (i = 0; i < MFC6_LINES; i++)
-		INIT_LIST_HEAD(&net->ipv6.mfc6_cache_array[i]);
+		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 
-	INIT_LIST_HEAD(&net->ipv6.mfc6_unres_queue);
+	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 
-	setup_timer(&net->ipv6.ipmr_expire_timer, ipmr_expire_process,
-		    (unsigned long)net);
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)mrt);
 
 #ifdef CONFIG_IPV6_PIMSM_V2
-	net->ipv6.mroute_reg_vif_num = -1;
+	mrt->mroute_reg_vif_num = -1;
 #endif
 
 #ifdef CONFIG_PROC_FS
@@ -1030,30 +1054,31 @@ static int __net_init ip6mr_net_init(struct net *net)
 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
 		goto proc_cache_fail;
 #endif
+
+	net->ipv6.mrt6 = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip6_mr_vif");
 proc_vif_fail:
-	kfree(net->ipv6.mfc6_cache_array);
+	kfree(mrt);
 #endif
-fail_mfc6_cache:
-	kfree(net->ipv6.vif6_table);
 fail:
 	return err;
 }
 
 static void __net_exit ip6mr_net_exit(struct net *net)
 {
+	struct mr6_table *mrt = net->ipv6.mrt6;
+
 #ifdef CONFIG_PROC_FS
 	proc_net_remove(net, "ip6_mr_cache");
 	proc_net_remove(net, "ip6_mr_vif");
 #endif
-	del_timer(&net->ipv6.ipmr_expire_timer);
-	mroute_clean_tables(net);
-	kfree(net->ipv6.mfc6_cache_array);
-	kfree(net->ipv6.vif6_table);
+	del_timer(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt);
+	kfree(mrt);
 }
 
 static struct pernet_operations ip6mr_net_ops = {
@@ -1105,7 +1130,8 @@ void ip6_mr_cleanup(void)
 	kmem_cache_destroy(mrt_cachep);
 }
 
-static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
+static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+			 struct mf6cctl *mfc, int mrtsock)
 {
 	bool found = false;
 	int line;
@@ -1125,7 +1151,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 			found = true;
@@ -1136,7 +1162,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mf6c_parent = mfc->mf6cc_parent;
-		ip6mr_update_thresholds(net, c, ttls);
+		ip6mr_update_thresholds(mrt, c, ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -1153,12 +1179,12 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
 	c->mf6c_parent = mfc->mf6cc_parent;
-	ip6mr_update_thresholds(net, c, ttls);
+	ip6mr_update_thresholds(mrt, c, ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	list_add(&c->list, &net->ipv6.mfc6_cache_array[line]);
+	list_add(&c->list, &mrt->mfc6_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -1167,21 +1193,21 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &net->ipv6.mfc6_unres_queue, list) {
+	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
 			list_del(&uc->list);
-			atomic_dec(&net->ipv6.cache_resolve_queue_len);
+			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
 		}
 	}
-	if (list_empty(&net->ipv6.mfc6_unres_queue))
-		del_timer(&net->ipv6.ipmr_expire_timer);
+	if (list_empty(&mrt->mfc6_unres_queue))
+		del_timer(&mrt->ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (found) {
-		ip6mr_cache_resolve(net, uc, c);
+		ip6mr_cache_resolve(net, mrt, uc, c);
 		ip6mr_cache_free(uc);
 	}
 	return 0;
@@ -1191,7 +1217,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct net *net)
+static void mroute_clean_tables(struct mr6_table *mrt)
 {
 	int i;
 	LIST_HEAD(list);
@@ -1200,9 +1226,9 @@ static void mroute_clean_tables(struct net *net)
 	/*
 	 *	Shut down all active vif entries
 	 */
-	for (i = 0; i < net->ipv6.maxvif; i++) {
-		if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
-			mif6_delete(net, i, &list);
+	for (i = 0; i < mrt->maxvif; i++) {
+		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
+			mif6_delete(mrt, i, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -1210,7 +1236,7 @@ static void mroute_clean_tables(struct net *net)
 	 *	Wipe the cache
 	 */
 	for (i = 0; i < MFC6_LINES; i++) {
-		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[i], list) {
+		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
 			if (c->mfc_flags & MFC_STATIC)
 				continue;
 			write_lock_bh(&mrt_lock);
@@ -1221,25 +1247,25 @@ static void mroute_clean_tables(struct net *net)
 		}
 	}
 
-	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
+	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
+		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 			list_del(&c->list);
-			ip6mr_destroy_unres(net, c);
+			ip6mr_destroy_unres(mrt, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
 }
 
-static int ip6mr_sk_init(struct sock *sk)
+static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
 {
 	int err = 0;
 	struct net *net = sock_net(sk);
 
 	rtnl_lock();
 	write_lock_bh(&mrt_lock);
-	if (likely(net->ipv6.mroute6_sk == NULL)) {
-		net->ipv6.mroute6_sk = sk;
+	if (likely(mrt->mroute6_sk == NULL)) {
+		mrt->mroute6_sk = sk;
 		net->ipv6.devconf_all->mc_forwarding++;
 	}
 	else
@@ -1255,15 +1281,16 @@ int ip6mr_sk_done(struct sock *sk)
 {
 	int err = 0;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	rtnl_lock();
-	if (sk == net->ipv6.mroute6_sk) {
+	if (sk == mrt->mroute6_sk) {
 		write_lock_bh(&mrt_lock);
-		net->ipv6.mroute6_sk = NULL;
+		mrt->mroute6_sk = NULL;
 		net->ipv6.devconf_all->mc_forwarding--;
 		write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(net);
+		mroute_clean_tables(mrt);
 	} else
 		err = -EACCES;
 	rtnl_unlock();
@@ -1271,6 +1298,13 @@ int ip6mr_sk_done(struct sock *sk)
 	return err;
 }
 
+struct sock *mroute6_socket(struct net *net)
+{
+	struct mr6_table *mrt = net->ipv6.mrt6;
+
+	return mrt->mroute6_sk;
+}
+
 /*
  *	Socket options and virtual interface manipulation. The whole
  *	virtual interface system is a complete heap, but unfortunately
@@ -1285,9 +1319,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	struct mf6cctl mfc;
 	mifi_t mifi;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (optname != MRT6_INIT) {
-		if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
+		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -1299,7 +1334,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (optlen < sizeof(int))
 			return -EINVAL;
 
-		return ip6mr_sk_init(sk);
+		return ip6mr_sk_init(mrt, sk);
 
 	case MRT6_DONE:
 		return ip6mr_sk_done(sk);
@@ -1312,7 +1347,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (vif.mif6c_mifi >= MAXMIFS)
 			return -ENFILE;
 		rtnl_lock();
-		ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
+		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
 		rtnl_unlock();
 		return ret;
 
@@ -1322,7 +1357,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
 			return -EFAULT;
 		rtnl_lock();
-		ret = mif6_delete(net, mifi, NULL);
+		ret = mif6_delete(mrt, mifi, NULL);
 		rtnl_unlock();
 		return ret;
 
@@ -1338,10 +1373,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 			return -EFAULT;
 		rtnl_lock();
 		if (optname == MRT6_DEL_MFC)
-			ret = ip6mr_mfc_delete(net, &mfc);
+			ret = ip6mr_mfc_delete(mrt, &mfc);
 		else
-			ret = ip6mr_mfc_add(net, &mfc,
-					    sk == net->ipv6.mroute6_sk);
+			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
 		rtnl_unlock();
 		return ret;
 
@@ -1353,7 +1387,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		int v;
 		if (get_user(v, (int __user *)optval))
 			return -EFAULT;
-		net->ipv6.mroute_do_assert = !!v;
+		mrt->mroute_do_assert = !!v;
 		return 0;
 	}
 
@@ -1366,9 +1400,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		v = !!v;
 		rtnl_lock();
 		ret = 0;
-		if (v != net->ipv6.mroute_do_pim) {
-			net->ipv6.mroute_do_pim = v;
-			net->ipv6.mroute_do_assert = v;
+		if (v != mrt->mroute_do_pim) {
+			mrt->mroute_do_pim = v;
+			mrt->mroute_do_assert = v;
 		}
 		rtnl_unlock();
 		return ret;
@@ -1394,6 +1428,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	switch (optname) {
 	case MRT6_VERSION:
@@ -1401,11 +1436,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 		break;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	case MRT6_PIM:
-		val = net->ipv6.mroute_do_pim;
+		val = mrt->mroute_do_pim;
 		break;
 #endif
 	case MRT6_ASSERT:
-		val = net->ipv6.mroute_do_assert;
+		val = mrt->mroute_do_assert;
 		break;
 	default:
 		return -ENOPROTOOPT;
@@ -1436,16 +1471,17 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct mif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	switch (cmd) {
 	case SIOCGETMIFCNT_IN6:
 		if (copy_from_user(&vr, arg, sizeof(vr)))
 			return -EFAULT;
-		if (vr.mifi >= net->ipv6.maxvif)
+		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &net->ipv6.vif6_table[vr.mifi];
-		if (MIF_EXISTS(net, vr.mifi)) {
+		vif = &mrt->vif6_table[vr.mifi];
+		if (MIF_EXISTS(mrt, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1463,7 +1499,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 			return -EFAULT;
 
 		read_lock(&mrt_lock);
-		c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
 			sr.pktcnt = c->mfc_un.res.pkt;
 			sr.bytecnt = c->mfc_un.res.bytes;
@@ -1493,11 +1529,11 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
  *	Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct net *net, struct sk_buff *skb,
-			  struct mfc6_cache *c, int vifi)
+static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
+	struct mif_device *vif = &mrt->vif6_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
 	struct flowi fl;
@@ -1511,7 +1547,7 @@ static int ip6mr_forward2(struct net *net, struct sk_buff *skb,
 		vif->bytes_out += skb->len;
 		vif->dev->stats.tx_bytes += skb->len;
 		vif->dev->stats.tx_packets++;
-		ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
+		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
 		goto out_free;
 	}
 #endif
@@ -1566,19 +1602,19 @@ out_free:
 	return 0;
 }
 
-static int ip6mr_find_vif(struct net_device *dev)
+static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
 {
-	struct net *net = dev_net(dev);
 	int ct;
-	for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
-		if (net->ipv6.vif6_table[ct].dev == dev)
+
+	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
+		if (mrt->vif6_table[ct].dev == dev)
 			break;
 	}
 	return ct;
 }
 
-static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
-			  struct mfc6_cache *cache)
+static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+			  struct sk_buff *skb, struct mfc6_cache *cache)
 {
 	int psend = -1;
 	int vif, ct;
@@ -1590,30 +1626,30 @@ static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (net->ipv6.vif6_table[vif].dev != skb->dev) {
+	if (mrt->vif6_table[vif].dev != skb->dev) {
 		int true_vifi;
 
 		cache->mfc_un.res.wrong_if++;
-		true_vifi = ip6mr_find_vif(skb->dev);
+		true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
-		if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
+		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
 		       so that we cannot check that packet arrived on an oif.
 		       It is bad, but otherwise we would need to move pretty
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
-		    (net->ipv6.mroute_do_pim ||
+		    (mrt->mroute_do_pim ||
 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
 			cache->mfc_un.res.last_assert = jiffies;
-			ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
+			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
 		}
 		goto dont_forward;
 	}
 
-	net->ipv6.vif6_table[vif].pkt_in++;
-	net->ipv6.vif6_table[vif].bytes_in += skb->len;
+	mrt->vif6_table[vif].pkt_in++;
+	mrt->vif6_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
@@ -1623,13 +1659,13 @@ static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ip6mr_forward2(net, skb2, cache, psend);
+					ip6mr_forward2(net, mrt, skb2, cache, psend);
 			}
 			psend = ct;
 		}
 	}
 	if (psend != -1) {
-		ip6mr_forward2(net, skb, cache, psend);
+		ip6mr_forward2(net, mrt, skb, cache, psend);
 		return 0;
 	}
 
@@ -1647,9 +1683,10 @@ int ip6_mr_input(struct sk_buff *skb)
 {
 	struct mfc6_cache *cache;
 	struct net *net = dev_net(skb->dev);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	read_lock(&mrt_lock);
-	cache = ip6mr_cache_find(net,
+	cache = ip6mr_cache_find(mrt,
 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
 
 	/*
@@ -1658,9 +1695,9 @@ int ip6_mr_input(struct sk_buff *skb)
 	if (cache == NULL) {
 		int vif;
 
-		vif = ip6mr_find_vif(skb->dev);
+		vif = ip6mr_find_vif(mrt, skb->dev);
 		if (vif >= 0) {
-			int err = ip6mr_cache_unresolved(net, vif, skb);
+			int err = ip6mr_cache_unresolved(mrt, vif, skb);
 			read_unlock(&mrt_lock);
 
 			return err;
@@ -1670,7 +1707,7 @@ int ip6_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip6_mr_forward(net, skb, cache);
+	ip6_mr_forward(net, mrt, skb, cache);
 
 	read_unlock(&mrt_lock);
 
@@ -1679,8 +1716,8 @@ int ip6_mr_input(struct sk_buff *skb)
 
 
 static int
-ip6mr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc6_cache *c,
-		  struct rtmsg *rtm)
+ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+		  struct mfc6_cache *c, struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
@@ -1691,19 +1728,19 @@ ip6mr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc6_cache *c,
 	if (c->mf6c_parent > MAXMIFS)
 		return -ENOENT;
 
-	if (MIF_EXISTS(net, c->mf6c_parent))
-		RTA_PUT(skb, RTA_IIF, 4, &net->ipv6.vif6_table[c->mf6c_parent].dev->ifindex);
+	if (MIF_EXISTS(mrt, c->mf6c_parent))
+		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
 
 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
 
 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (MIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
+		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
 				goto rtattr_failure;
 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
 			nhp->rtnh_flags = 0;
 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
+			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
 	}
@@ -1721,11 +1758,12 @@ int ip6mr_get_route(struct net *net,
 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
+	struct mr6_table *mrt = net->ipv6.mrt6;
 	struct mfc6_cache *cache;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
 	read_lock(&mrt_lock);
-	cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
 
 	if (!cache) {
 		struct sk_buff *skb2;
@@ -1739,7 +1777,7 @@ int ip6mr_get_route(struct net *net,
 		}
 
 		dev = skb->dev;
-		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
+		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
 			read_unlock(&mrt_lock);
 			return -ENODEV;
 		}
@@ -1768,7 +1806,7 @@ int ip6mr_get_route(struct net *net,
 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
 
-		err = ip6mr_cache_unresolved(net, vif, skb2);
+		err = ip6mr_cache_unresolved(mrt, vif, skb2);
 		read_unlock(&mrt_lock);
 
 		return err;
@@ -1777,7 +1815,7 @@ int ip6mr_get_route(struct net *net,
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
 
-	err = ip6mr_fill_mroute(net, skb, cache, rtm);
+	err = ip6mr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From d1db275dd3f6e4182c4c4b4a1ac6287925d60569 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:55 +0200
Subject: ipv6: ip6mr: support multiple tables

This patch adds support for multiple independant multicast routing instances,
named "tables".

Userspace multicast routing daemons can bind to a specific table instance by
issuing a setsockopt call using a new option MRT6_TABLE. The table number is
stored in the raw socket data and affects all following ip6mr setsockopt(),
getsockopt() and ioctl() calls. By default, a single table (RT6_TABLE_DFLT)
is created with a default routing rule pointing to it. Newly created pim6reg
devices have the table number appended ("pim6regX"), with the exception of
devices created in the default table, which are named just "pim6reg" for
compatibility reasons.

Packets are directed to a specific table instance using routing rules,
similar to how regular routing rules work. Currently iif, oif and mark
are supported as keys, source and destination addresses could be supported
additionally.

Example usage:

- bind pimd/xorp/... to a specific table:

uint32_t table = 123;
setsockopt(fd, SOL_IPV6, MRT6_TABLE, &table, sizeof(table));

- create routing rules directing packets to the new table:

# ip -6 mrule add iif eth0 lookup 123
# ip -6 mrule add oif eth0 lookup 123

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/ipv6.h      |   1 +
 include/linux/mroute6.h   |  15 +-
 include/linux/rtnetlink.h |   3 +-
 include/net/netns/ipv6.h  |   5 +
 net/ipv6/Kconfig          |  14 ++
 net/ipv6/ip6_output.c     |   2 +-
 net/ipv6/ip6mr.c          | 428 +++++++++++++++++++++++++++++++++++++++-------
 7 files changed, 396 insertions(+), 72 deletions(-)

(limited to 'net')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0e269038bb38..99e1ab7e3eec 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -383,6 +383,7 @@ struct raw6_sock {
 	__u32			checksum;	/* perform checksum */
 	__u32			offset;		/* checksum offset  */
 	struct icmp6_filter	filter;
+	__u32			ip6mr_table;
 	/* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */
 	struct ipv6_pinfo	inet6;
 };
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 0370dd4f2389..6091ab77f388 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -24,7 +24,8 @@
 #define MRT6_DEL_MFC	(MRT6_BASE+5)	/* Delete a multicast forwarding entry	*/
 #define MRT6_VERSION	(MRT6_BASE+6)	/* Get the kernel multicast version	*/
 #define MRT6_ASSERT	(MRT6_BASE+7)	/* Activate PIM assert mode		*/
-#define MRT6_PIM	(MRT6_BASE+8)	/* enable PIM code	*/
+#define MRT6_PIM	(MRT6_BASE+8)	/* enable PIM code			*/
+#define MRT6_TABLE	(MRT6_BASE+9)	/* Specify mroute table ID		*/
 
 #define SIOCGETMIFCNT_IN6	SIOCPROTOPRIVATE	/* IP protocol privates */
 #define SIOCGETSGCNT_IN6	(SIOCPROTOPRIVATE+1)
@@ -229,11 +230,17 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
 			   struct rtmsg *rtm, int nowait);
 
 #ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net);
+extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
 extern int ip6mr_sk_done(struct sock *sk);
 #else
-static inline struct sock *mroute6_socket(struct net *net) { return NULL; }
-static inline int ip6mr_sk_done(struct sock *sk) { return 0; }
+static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+{
+	return NULL;
+}
+static inline int ip6mr_sk_done(struct sock *sk)
+{
+	return 0;
+}
 #endif
 #endif
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5a42c36cb6aa..fbc8cb0d48c3 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -11,7 +11,8 @@
  * families, values above 128 may be used arbitrarily.
  */
 #define RTNL_FAMILY_IPMR		128
-#define RTNL_FAMILY_MAX			128
+#define RTNL_FAMILY_IP6MR		129
+#define RTNL_FAMILY_MAX			129
 
 /****
  *		Routing/neighbour discovery messages.
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 4e2780e6d8bc..81abfcb2eb4e 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -59,7 +59,12 @@ struct netns_ipv6 {
 	struct sock             *tcp_sk;
 	struct sock             *igmp_sk;
 #ifdef CONFIG_IPV6_MROUTE
+#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 	struct mr6_table	*mrt6;
+#else
+	struct list_head	mr6_tables;
+	struct fib_rules_ops	*mr6_rules_ops;
+#endif
 #endif
 };
 #endif
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index a578096152ab..36d7437ac054 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -229,6 +229,20 @@ config IPV6_MROUTE
 	  Experimental support for IPv6 multicast forwarding.
 	  If unsure, say N.
 
+config IPV6_MROUTE_MULTIPLE_TABLES
+	bool "IPv6: multicast policy routing"
+	depends on IPV6_MROUTE
+	select FIB_RULES
+	help
+	  Normally, a multicast router runs a userspace daemon and decides
+	  what to do with a multicast packet based on the source and
+	  destination addresses. If you say Y here, the multicast router
+	  will also be able to take interfaces and packet marks into
+	  account and run multiple instances of userspace daemons
+	  simultaneously, each one handling a single table.
+
+	  If unsure, say N.
+
 config IPV6_PIMSM_V2
 	bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)"
 	depends on IPV6_MROUTE
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5173acaeb501..cd963f64e27c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -108,7 +108,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
-		    ((mroute6_socket(dev_net(dev)) &&
+		    ((mroute6_socket(dev_net(dev), skb) &&
 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 					 &ipv6_hdr(skb)->saddr))) {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9419fceeed41..c2920a1a6db3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -42,6 +42,7 @@
 #include <linux/if_arp.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
+#include <net/fib_rules.h>
 
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
@@ -52,9 +53,11 @@
 #include <net/ip6_checksum.h>
 
 struct mr6_table {
+	struct list_head	list;
 #ifdef CONFIG_NET_NS
 	struct net		*net;
 #endif
+	u32			id;
 	struct sock		*mroute6_sk;
 	struct timer_list	ipmr_expire_timer;
 	struct list_head	mfc6_unres_queue;
@@ -69,6 +72,14 @@ struct mr6_table {
 #endif
 };
 
+struct ip6mr_rule {
+	struct fib_rule		common;
+};
+
+struct ip6mr_result {
+	struct mr6_table	*mrt;
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -94,6 +105,9 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
+static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr6_table *mrt);
+
 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 			  struct sk_buff *skb, struct mfc6_cache *cache);
 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
@@ -101,12 +115,220 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 			     struct mfc6_cache *c, struct rtmsg *rtm);
 static void mroute_clean_tables(struct mr6_table *mrt);
+static void ipmr_expire_process(unsigned long arg);
+
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+#define ip6mr_for_each_table(mrt, met) \
+	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
+
+static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+{
+	struct mr6_table *mrt;
 
+	ip6mr_for_each_table(mrt, net) {
+		if (mrt->id == id)
+			return mrt;
+	}
+	return NULL;
+}
+
+static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
+			    struct mr6_table **mrt)
+{
+	struct ip6mr_result res;
+	struct fib_lookup_arg arg = { .result = &res, };
+	int err;
+
+	err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
+	if (err < 0)
+		return err;
+	*mrt = res.mrt;
+	return 0;
+}
+
+static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
+			     int flags, struct fib_lookup_arg *arg)
+{
+	struct ip6mr_result *res = arg->result;
+	struct mr6_table *mrt;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		return -ENETUNREACH;
+	case FR_ACT_PROHIBIT:
+		return -EACCES;
+	case FR_ACT_BLACKHOLE:
+	default:
+		return -EINVAL;
+	}
+
+	mrt = ip6mr_get_table(rule->fr_net, rule->table);
+	if (mrt == NULL)
+		return -EAGAIN;
+	res->mrt = mrt;
+	return 0;
+}
+
+static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
+{
+	return 1;
+}
+
+static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
+	FRA_GENERIC_POLICY,
+};
+
+static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+				struct fib_rule_hdr *frh, struct nlattr **tb)
+{
+	return 0;
+}
+
+static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			      struct nlattr **tb)
+{
+	return 1;
+}
+
+static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			   struct fib_rule_hdr *frh)
+{
+	frh->dst_len = 0;
+	frh->src_len = 0;
+	frh->tos     = 0;
+	return 0;
+}
+
+static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
+	.family		= RTNL_FAMILY_IP6MR,
+	.rule_size	= sizeof(struct ip6mr_rule),
+	.addr_size	= sizeof(struct in6_addr),
+	.action		= ip6mr_rule_action,
+	.match		= ip6mr_rule_match,
+	.configure	= ip6mr_rule_configure,
+	.compare	= ip6mr_rule_compare,
+	.default_pref	= fib_default_rule_pref,
+	.fill		= ip6mr_rule_fill,
+	.nlgroup	= RTNLGRP_IPV6_RULE,
+	.policy		= ip6mr_rule_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __net_init ip6mr_rules_init(struct net *net)
+{
+	struct fib_rules_ops *ops;
+	struct mr6_table *mrt;
+	int err;
+
+	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
+
+	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
+	if (err < 0)
+		goto err2;
+
+	net->ipv6.mr6_rules_ops = ops;
+	return 0;
+
+err2:
+	kfree(mrt);
+err1:
+	fib_rules_unregister(ops);
+	return err;
+}
+
+static void __net_exit ip6mr_rules_exit(struct net *net)
+{
+	struct mr6_table *mrt, *next;
+
+	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list)
+		ip6mr_free_table(mrt);
+	fib_rules_unregister(net->ipv6.mr6_rules_ops);
+}
+#else
+#define ip6mr_for_each_table(mrt, net) \
+	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
+
+static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+{
+	return net->ipv6.mrt6;
+}
+
+static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
+			    struct mr6_table **mrt)
+{
+	*mrt = net->ipv6.mrt6;
+	return 0;
+}
+
+static int __net_init ip6mr_rules_init(struct net *net)
+{
+	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
+	return net->ipv6.mrt6 ? 0 : -ENOMEM;
+}
+
+static void __net_exit ip6mr_rules_exit(struct net *net)
+{
+	ip6mr_free_table(net->ipv6.mrt6);
+}
+#endif
+
+static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+{
+	struct mr6_table *mrt;
+	unsigned int i;
+
+	mrt = ip6mr_get_table(net, id);
+	if (mrt != NULL)
+		return mrt;
+
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL)
+		return NULL;
+	mrt->id = id;
+	write_pnet(&mrt->net, net);
+
+	/* Forwarding cache */
+	for (i = 0; i < MFC6_LINES; i++)
+		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
+
+	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)mrt);
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+	mrt->mroute_reg_vif_num = -1;
+#endif
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
+#endif
+	return mrt;
+}
+
+static void ip6mr_free_table(struct mr6_table *mrt)
+{
+	del_timer(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt);
+	kfree(mrt);
+}
 
 #ifdef CONFIG_PROC_FS
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
+	struct mr6_table *mrt;
 	struct list_head *cache;
 	int ct;
 };
@@ -115,7 +337,7 @@ struct ipmr_mfc_iter {
 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 					   struct ipmr_mfc_iter *it, loff_t pos)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = it->mrt;
 	struct mfc6_cache *mfc;
 
 	read_lock(&mrt_lock);
@@ -144,6 +366,7 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 
 struct ipmr_vif_iter {
 	struct seq_net_private p;
+	struct mr6_table *mrt;
 	int ct;
 };
 
@@ -151,7 +374,7 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 					    struct ipmr_vif_iter *iter,
 					    loff_t pos)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = iter->mrt;
 
 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 		if (!MIF_EXISTS(mrt, iter->ct))
@@ -165,7 +388,15 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
+	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
+
+	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
@@ -176,7 +407,7 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = iter->mrt;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
@@ -198,8 +429,8 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct ipmr_vif_iter *iter = seq->private;
+	struct mr6_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -241,8 +472,15 @@ static const struct file_operations ip6mr_vif_fops = {
 
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
 
+	it->mrt = mrt;
 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 		: SEQ_START_TOKEN;
 }
@@ -252,7 +490,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc6_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = it->mrt;
 
 	++*pos;
 
@@ -293,8 +531,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = it->mrt;
 
 	if (it->cache == &mrt->mfc6_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
@@ -305,8 +542,6 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -316,6 +551,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	} else {
 		const struct mfc6_cache *mfc = v;
 		const struct ipmr_mfc_iter *it = seq->private;
+		struct mr6_table *mrt = it->mrt;
 
 		seq_printf(seq, "%pI6 %pI6 %-3hd",
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
@@ -375,8 +611,12 @@ static int pim6_rcv(struct sk_buff *skb)
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
-	int reg_vif_num = mrt->mroute_reg_vif_num;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.iif	= skb->dev->ifindex,
+		.mark	= skb->mark,
+	};
+	int reg_vif_num;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
@@ -399,6 +639,10 @@ static int pim6_rcv(struct sk_buff *skb)
 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 		goto drop;
 
+	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
+		goto drop;
+	reg_vif_num = mrt->mroute_reg_vif_num;
+
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
@@ -438,7 +682,17 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.oif		= dev->ifindex,
+		.iif		= skb->skb_iif,
+		.mark		= skb->mark,
+	};
+	int err;
+
+	err = ip6mr_fib_lookup(net, &fl, &mrt);
+	if (err < 0)
+		return err;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
@@ -463,11 +717,17 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ip6mr_reg_vif(struct net *net)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 {
 	struct net_device *dev;
+	char name[IFNAMSIZ];
+
+	if (mrt->id == RT6_TABLE_DFLT)
+		sprintf(name, "pim6reg");
+	else
+		sprintf(name, "pim6reg%u", mrt->id);
 
-	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
+	dev = alloc_netdev(0, name, reg_vif_setup);
 	if (dev == NULL)
 		return NULL;
 
@@ -665,7 +925,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 		 */
 		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
-		dev = ip6mr_reg_vif(net);
+		dev = ip6mr_reg_vif(net, mrt);
 		if (!dev)
 			return -ENOBUFS;
 		err = dev_set_allmulti(dev, 1);
@@ -995,7 +1255,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
 	struct mif_device *v;
 	int ct;
 	LIST_HEAD(list);
@@ -1003,10 +1263,12 @@ static int ip6mr_device_event(struct notifier_block *this,
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	v = &mrt->vif6_table[0];
-	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
-		if (v->dev == dev)
-			mif6_delete(mrt, ct, &list);
+	ip6mr_for_each_table(mrt, net) {
+		v = &mrt->vif6_table[0];
+		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
+			if (v->dev == dev)
+				mif6_delete(mrt, ct, &list);
+		}
 	}
 	unregister_netdevice_many(&list);
 
@@ -1023,29 +1285,11 @@ static struct notifier_block ip6_mr_notifier = {
 
 static int __net_init ip6mr_net_init(struct net *net)
 {
-	struct mr6_table *mrt;
-	unsigned int i;
-	int err = 0;
+	int err;
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (mrt == NULL) {
-		err = -ENOMEM;
+	err = ip6mr_rules_init(net);
+	if (err < 0)
 		goto fail;
-	}
-
-	write_pnet(&mrt->net, net);
-
-	for (i = 0; i < MFC6_LINES; i++)
-		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
-	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
-
-	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
-		    (unsigned long)mrt);
-
-#ifdef CONFIG_IPV6_PIMSM_V2
-	mrt->mroute_reg_vif_num = -1;
-#endif
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
@@ -1055,14 +1299,13 @@ static int __net_init ip6mr_net_init(struct net *net)
 		goto proc_cache_fail;
 #endif
 
-	net->ipv6.mrt6 = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip6_mr_vif");
 proc_vif_fail:
-	kfree(mrt);
+	ip6mr_rules_exit(net);
 #endif
 fail:
 	return err;
@@ -1070,15 +1313,11 @@ fail:
 
 static void __net_exit ip6mr_net_exit(struct net *net)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
-
 #ifdef CONFIG_PROC_FS
 	proc_net_remove(net, "ip6_mr_cache");
 	proc_net_remove(net, "ip6_mr_vif");
 #endif
-	del_timer(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt);
-	kfree(mrt);
+	ip6mr_rules_exit(net);
 }
 
 static struct pernet_operations ip6mr_net_ops = {
@@ -1279,28 +1518,39 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
 
 int ip6mr_sk_done(struct sock *sk)
 {
-	int err = 0;
+	int err = -EACCES;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
 
 	rtnl_lock();
-	if (sk == mrt->mroute6_sk) {
-		write_lock_bh(&mrt_lock);
-		mrt->mroute6_sk = NULL;
-		net->ipv6.devconf_all->mc_forwarding--;
-		write_unlock_bh(&mrt_lock);
+	ip6mr_for_each_table(mrt, net) {
+		if (sk == mrt->mroute6_sk) {
+			write_lock_bh(&mrt_lock);
+			mrt->mroute6_sk = NULL;
+			net->ipv6.devconf_all->mc_forwarding--;
+			write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(mrt);
-	} else
-		err = -EACCES;
+			mroute_clean_tables(mrt);
+			err = 0;
+			break;
+		}
+	}
 	rtnl_unlock();
 
 	return err;
 }
 
-struct sock *mroute6_socket(struct net *net)
+struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.iif	= skb->skb_iif,
+		.oif	= skb->dev->ifindex,
+		.mark	= skb->mark,
+	};
+
+	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
+		return NULL;
 
 	return mrt->mroute6_sk;
 }
@@ -1319,7 +1569,11 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	struct mf6cctl mfc;
 	mifi_t mifi;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	if (optname != MRT6_INIT) {
 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
@@ -1408,6 +1662,27 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		return ret;
 	}
 
+#endif
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+	case MRT6_TABLE:
+	{
+		u32 v;
+
+		if (optlen != sizeof(u32))
+			return -EINVAL;
+		if (get_user(v, (u32 __user *)optval))
+			return -EFAULT;
+		if (sk == mrt->mroute6_sk)
+			return -EBUSY;
+
+		rtnl_lock();
+		ret = 0;
+		if (!ip6mr_new_table(net, v))
+			ret = -ENOMEM;
+		raw6_sk(sk)->ip6mr_table = v;
+		rtnl_unlock();
+		return ret;
+	}
 #endif
 	/*
 	 *	Spurious command, or MRT6_VERSION which you cannot
@@ -1428,7 +1703,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	switch (optname) {
 	case MRT6_VERSION:
@@ -1471,7 +1750,11 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct mif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	switch (cmd) {
 	case SIOCGETMIFCNT_IN6:
@@ -1683,7 +1966,16 @@ int ip6_mr_input(struct sk_buff *skb)
 {
 	struct mfc6_cache *cache;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.iif	= skb->dev->ifindex,
+		.mark	= skb->mark,
+	};
+	int err;
+
+	err = ip6mr_fib_lookup(net, &fl, &mrt);
+	if (err < 0)
+		return err;
 
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt,
@@ -1758,10 +2050,14 @@ int ip6mr_get_route(struct net *net,
 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
 	struct mfc6_cache *cache;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
+
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
 
-- 
cgit v1.2.3-59-g8ed1b


From 5b285cac3570a935aaa28312c1ea28f9e01c5452 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:56 +0200
Subject: ipv6: ip6mr: add support for dumping routing tables over netlink

The ip6mr /proc interface (ip6_mr_cache) can't be extended to dump routes
from any tables but the main table in a backwards compatible fashion since
the output format ends in a variable amount of output interfaces.

Introduce a new netlink interface to dump multicast routes from all tables,
similar to the netlink interface for regular routes.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv6/ip6mr.c | 96 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 89 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index c2920a1a6db3..163850e22b11 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -112,8 +112,10 @@ static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 			  struct sk_buff *skb, struct mfc6_cache *cache);
 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-			     struct mfc6_cache *c, struct rtmsg *rtm);
+static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+			       struct mfc6_cache *c, struct rtmsg *rtm);
+static int ip6mr_rtm_dumproute(struct sk_buff *skb,
+			       struct netlink_callback *cb);
 static void mroute_clean_tables(struct mr6_table *mrt);
 static void ipmr_expire_process(unsigned long arg);
 
@@ -1038,7 +1040,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
 			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
-			if (ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -1350,6 +1352,7 @@ int __init ip6_mr_init(void)
 		goto add_proto_fail;
 	}
 #endif
+	rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, ip6mr_rtm_dumproute);
 	return 0;
 #ifdef CONFIG_IPV6_PIMSM_V2
 add_proto_fail:
@@ -2007,9 +2010,8 @@ int ip6_mr_input(struct sk_buff *skb)
 }
 
 
-static int
-ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
-		  struct mfc6_cache *c, struct rtmsg *rtm)
+static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+			       struct mfc6_cache *c, struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
@@ -2111,8 +2113,88 @@ int ip6mr_get_route(struct net *net,
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
 
-	err = ip6mr_fill_mroute(mrt, skb, cache, rtm);
+	err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
 
+static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+			     u32 pid, u32 seq, struct mfc6_cache *c)
+{
+	struct nlmsghdr *nlh;
+	struct rtmsg *rtm;
+
+	nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	rtm = nlmsg_data(nlh);
+	rtm->rtm_family   = RTNL_FAMILY_IPMR;
+	rtm->rtm_dst_len  = 128;
+	rtm->rtm_src_len  = 128;
+	rtm->rtm_tos      = 0;
+	rtm->rtm_table    = mrt->id;
+	NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
+	rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
+	rtm->rtm_protocol = RTPROT_UNSPEC;
+	rtm->rtm_flags    = 0;
+
+	NLA_PUT(skb, RTA_SRC, 16, &c->mf6c_origin);
+	NLA_PUT(skb, RTA_DST, 16, &c->mf6c_mcastgrp);
+
+	if (__ip6mr_fill_mroute(mrt, skb, c, rtm) < 0)
+		goto nla_put_failure;
+
+	return nlmsg_end(skb, nlh);
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct mr6_table *mrt;
+	struct mfc6_cache *mfc;
+	unsigned int t = 0, s_t;
+	unsigned int h = 0, s_h;
+	unsigned int e = 0, s_e;
+
+	s_t = cb->args[0];
+	s_h = cb->args[1];
+	s_e = cb->args[2];
+
+	read_lock(&mrt_lock);
+	ip6mr_for_each_table(mrt, net) {
+		if (t < s_t)
+			goto next_table;
+		if (t > s_t)
+			s_h = 0;
+		for (h = s_h; h < MFC6_LINES; h++) {
+			list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
+				if (e < s_e)
+					goto next_entry;
+				if (ip6mr_fill_mroute(mrt, skb,
+						      NETLINK_CB(cb->skb).pid,
+						      cb->nlh->nlmsg_seq,
+						      mfc) < 0)
+					goto done;
+next_entry:
+				e++;
+			}
+			e = s_e = 0;
+		}
+		s_h = 0;
+next_table:
+		t++;
+	}
+done:
+	read_unlock(&mrt_lock);
+
+	cb->args[2] = e;
+	cb->args[1] = h;
+	cb->args[0] = t;
+
+	return skb->len;
+}
-- 
cgit v1.2.3-59-g8ed1b


From de74c16996287250f0d947663127f80c6beebd3c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 5 Jul 2009 18:26:37 +0200
Subject: netfilter: xtables: combine struct xt_match_param and xt_target_param

The structures carried - besides match/target - almost the same data.
It is possible to combine them, as extensions are evaluated serially,
and so, the callers end up a little smaller.

  text  data  bss  filename
-15318   740  104  net/ipv4/netfilter/ip_tables.o
+15286   740  104  net/ipv4/netfilter/ip_tables.o
-15333   540  152  net/ipv6/netfilter/ip6_tables.o
+15269   540  152  net/ipv6/netfilter/ip6_tables.o

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h | 42 +++++++++++++++-----------------------
 net/bridge/netfilter/ebtables.c    | 30 +++++++++++++--------------
 net/ipv4/netfilter/arp_tables.c    | 16 +++++++--------
 net/ipv4/netfilter/ip_tables.c     | 32 ++++++++++++++---------------
 net/ipv6/netfilter/ip6_tables.c    | 27 ++++++++++++------------
 5 files changed, 68 insertions(+), 79 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index eeb4884c30be..5efa3757d08c 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -182,13 +182,17 @@ struct xt_counters_info {
 
 #include <linux/netdevice.h>
 
+#define xt_match_param xt_action_param
+#define xt_target_param xt_action_param
 /**
- * struct xt_match_param - parameters for match extensions' match functions
+ * struct xt_action_param - parameters for matches/targets
  *
+ * @match:	the match extension
+ * @target:	the target extension
+ * @matchinfo:	per-match data
+ * @targetinfo:	per-target data
  * @in:		input netdevice
  * @out:	output netdevice
- * @match:	struct xt_match through which this function was invoked
- * @matchinfo:	per-match data
  * @fragoff:	packet is a fragment, this is the data offset
  * @thoff:	position of transport header relative to skb->data
  * @hook:	hook number given packet came from
@@ -197,10 +201,15 @@ struct xt_counters_info {
  * @hotdrop:	drop packet if we had inspection problems
  * Network namespace obtainable using dev_net(in/out)
  */
-struct xt_match_param {
+struct xt_action_param {
+	union {
+		const struct xt_match *match;
+		const struct xt_target *target;
+	};
+	union {
+		const void *matchinfo, *targinfo;
+	};
 	const struct net_device *in, *out;
-	const struct xt_match *match;
-	const void *matchinfo;
 	int fragoff;
 	unsigned int thoff;
 	unsigned int hooknum;
@@ -242,23 +251,6 @@ struct xt_mtdtor_param {
 	u_int8_t family;
 };
 
-/**
- * struct xt_target_param - parameters for target extensions' target functions
- *
- * @hooknum:	hook through which this target was invoked
- * @target:	struct xt_target through which this function was invoked
- * @targinfo:	per-target data
- *
- * Other fields see above.
- */
-struct xt_target_param {
-	const struct net_device *in, *out;
-	const struct xt_target *target;
-	const void *targinfo;
-	unsigned int hooknum;
-	u_int8_t family;
-};
-
 /**
  * struct xt_tgchk_param - parameters for target extensions'
  * checkentry functions
@@ -298,7 +290,7 @@ struct xt_match {
 	   non-linear skb, using skb_header_pointer and
 	   skb_ip_make_writable. */
 	bool (*match)(const struct sk_buff *skb,
-		      const struct xt_match_param *);
+		      const struct xt_action_param *);
 
 	/* Called when user tries to insert an entry of this type. */
 	int (*checkentry)(const struct xt_mtchk_param *);
@@ -335,7 +327,7 @@ struct xt_target {
 	   must now handle non-linear skbs, using skb_copy_bits and
 	   skb_ip_make_writable. */
 	unsigned int (*target)(struct sk_buff *skb,
-			       const struct xt_target_param *);
+			       const struct xt_action_param *);
 
 	/* Called when user tries to insert an entry of this type:
            hook_mask is a bitmask of hooks from which it can be
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 1d8c2c0a7470..290d43541d46 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -86,7 +86,7 @@ static struct xt_target ebt_standard_target = {
 
 static inline int
 ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb,
-	       struct xt_target_param *par)
+	       struct xt_action_param *par)
 {
 	par->target   = w->u.watcher;
 	par->targinfo = w->data;
@@ -95,8 +95,9 @@ ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb,
 	return 0;
 }
 
-static inline int ebt_do_match (struct ebt_entry_match *m,
-   const struct sk_buff *skb, struct xt_match_param *par)
+static inline int
+ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb,
+	     struct xt_action_param *par)
 {
 	par->match     = m->u.match;
 	par->matchinfo = m->data;
@@ -186,14 +187,13 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	const char *base;
 	const struct ebt_table_info *private;
 	bool hotdrop = false;
-	struct xt_match_param mtpar;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
-	mtpar.family  = tgpar.family = NFPROTO_BRIDGE;
-	mtpar.in      = tgpar.in  = in;
-	mtpar.out     = tgpar.out = out;
-	mtpar.hotdrop = &hotdrop;
-	mtpar.hooknum = tgpar.hooknum = hook;
+	acpar.family  = NFPROTO_BRIDGE;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.hotdrop = &hotdrop;
+	acpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
 	private = table->private;
@@ -214,7 +214,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 		if (ebt_basic_match(point, eth_hdr(skb), in, out))
 			goto letscontinue;
 
-		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &mtpar) != 0)
+		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
 			goto letscontinue;
 		if (hotdrop) {
 			read_unlock_bh(&table->lock);
@@ -227,7 +227,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 
 		/* these should only watch: not modify, nor tell us
 		   what to do with the packet */
-		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &tgpar);
+		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
 
 		t = (struct ebt_entry_target *)
 		   (((char *)point) + point->target_offset);
@@ -235,9 +235,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 		if (!t->u.target->target)
 			verdict = ((struct ebt_standard_target *)t)->verdict;
 		else {
-			tgpar.target   = t->u.target;
-			tgpar.targinfo = t->data;
-			verdict = t->u.target->target(skb, &tgpar);
+			acpar.target   = t->u.target;
+			acpar.targinfo = t->data;
+			verdict = t->u.target->target(skb, &acpar);
 		}
 		if (verdict == EBT_ACCEPT) {
 			read_unlock_bh(&table->lock);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 07a699059390..73d924b88f89 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -265,7 +265,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	const char *indev, *outdev;
 	void *table_base;
 	const struct xt_table_info *private;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
 	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
 		return NF_DROP;
@@ -280,10 +280,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
 
-	tgpar.in      = in;
-	tgpar.out     = out;
-	tgpar.hooknum = hook;
-	tgpar.family  = NFPROTO_ARP;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.hooknum = hook;
+	acpar.family  = NFPROTO_ARP;
 
 	arp = arp_hdr(skb);
 	do {
@@ -333,9 +333,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 		/* Targets which reenter must return
 		 * abs. verdicts
 		 */
-		tgpar.target   = t->u.kernel.target;
-		tgpar.targinfo = t->data;
-		verdict = t->u.kernel.target->target(skb, &tgpar);
+		acpar.target   = t->u.kernel.target;
+		acpar.targinfo = t->data;
+		verdict = t->u.kernel.target->target(skb, &acpar);
 
 		/* Target might have changed stuff. */
 		arp = arp_hdr(skb);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 265cedf88660..e1a53c2da032 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -316,8 +316,7 @@ ipt_do_table(struct sk_buff *skb,
 	struct ipt_entry *e, **jumpstack;
 	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
-	struct xt_match_param mtpar;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
 	/* Initialization */
 	ip = ip_hdr(skb);
@@ -329,13 +328,13 @@ ipt_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
-	mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
-	mtpar.thoff   = ip_hdrlen(skb);
-	mtpar.hotdrop = &hotdrop;
-	mtpar.in      = tgpar.in  = in;
-	mtpar.out     = tgpar.out = out;
-	mtpar.family  = tgpar.family = NFPROTO_IPV4;
-	mtpar.hooknum = tgpar.hooknum = hook;
+	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+	acpar.thoff   = ip_hdrlen(skb);
+	acpar.hotdrop = &hotdrop;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.family  = NFPROTO_IPV4;
+	acpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 	xt_info_rdlock_bh();
@@ -358,16 +357,16 @@ ipt_do_table(struct sk_buff *skb,
 
 		IP_NF_ASSERT(e);
 		if (!ip_packet_match(ip, indev, outdev,
-		    &e->ip, mtpar.fragoff)) {
+		    &e->ip, acpar.fragoff)) {
  no_match:
 			e = ipt_next_entry(e);
 			continue;
 		}
 
 		xt_ematch_foreach(ematch, e) {
-			mtpar.match     = ematch->u.kernel.match;
-			mtpar.matchinfo = ematch->data;
-			if (!mtpar.match->match(skb, &mtpar))
+			acpar.match     = ematch->u.kernel.match;
+			acpar.matchinfo = ematch->data;
+			if (!acpar.match->match(skb, &acpar))
 				goto no_match;
 		}
 
@@ -422,11 +421,10 @@ ipt_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		tgpar.target   = t->u.kernel.target;
-		tgpar.targinfo = t->data;
+		acpar.target   = t->u.kernel.target;
+		acpar.targinfo = t->data;
 
-
-		verdict = t->u.kernel.target->target(skb, &tgpar);
+		verdict = t->u.kernel.target->target(skb, &acpar);
 		/* Target might have changed stuff. */
 		ip = ip_hdr(skb);
 		if (verdict == IPT_CONTINUE)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f8ac4a0b5899..076308c1acd7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -345,8 +345,7 @@ ip6t_do_table(struct sk_buff *skb,
 	struct ip6t_entry *e, **jumpstack;
 	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
-	struct xt_match_param mtpar;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
 	/* Initialization */
 	indev = in ? in->name : nulldevname;
@@ -357,11 +356,11 @@ ip6t_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
-	mtpar.hotdrop = &hotdrop;
-	mtpar.in      = tgpar.in  = in;
-	mtpar.out     = tgpar.out = out;
-	mtpar.family  = tgpar.family = NFPROTO_IPV6;
-	mtpar.hooknum = tgpar.hooknum = hook;
+	acpar.hotdrop = &hotdrop;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.family  = NFPROTO_IPV6;
+	acpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
@@ -381,16 +380,16 @@ ip6t_do_table(struct sk_buff *skb,
 
 		IP_NF_ASSERT(e);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
-		    &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
+		    &acpar.thoff, &acpar.fragoff, &hotdrop)) {
  no_match:
 			e = ip6t_next_entry(e);
 			continue;
 		}
 
 		xt_ematch_foreach(ematch, e) {
-			mtpar.match     = ematch->u.kernel.match;
-			mtpar.matchinfo = ematch->data;
-			if (!mtpar.match->match(skb, &mtpar))
+			acpar.match     = ematch->u.kernel.match;
+			acpar.matchinfo = ematch->data;
+			if (!acpar.match->match(skb, &acpar))
 				goto no_match;
 		}
 
@@ -439,10 +438,10 @@ ip6t_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		tgpar.target   = t->u.kernel.target;
-		tgpar.targinfo = t->data;
+		acpar.target   = t->u.kernel.target;
+		acpar.targinfo = t->data;
 
-		verdict = t->u.kernel.target->target(skb, &tgpar);
+		verdict = t->u.kernel.target->target(skb, &acpar);
 		if (verdict == IP6T_CONTINUE)
 			e = ip6t_next_entry(e);
 		else
-- 
cgit v1.2.3-59-g8ed1b


From 4b560b447df83368df44bd3712c0c39b1d79ba04 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 5 Jul 2009 19:43:26 +0200
Subject: netfilter: xtables: substitute temporary defines by final name

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h   | 2 --
 net/bridge/netfilter/ebt_802_3.c     | 2 +-
 net/bridge/netfilter/ebt_among.c     | 2 +-
 net/bridge/netfilter/ebt_arp.c       | 2 +-
 net/bridge/netfilter/ebt_arpreply.c  | 2 +-
 net/bridge/netfilter/ebt_dnat.c      | 2 +-
 net/bridge/netfilter/ebt_ip.c        | 2 +-
 net/bridge/netfilter/ebt_ip6.c       | 2 +-
 net/bridge/netfilter/ebt_limit.c     | 2 +-
 net/bridge/netfilter/ebt_log.c       | 2 +-
 net/bridge/netfilter/ebt_mark.c      | 2 +-
 net/bridge/netfilter/ebt_mark_m.c    | 2 +-
 net/bridge/netfilter/ebt_nflog.c     | 2 +-
 net/bridge/netfilter/ebt_pkttype.c   | 2 +-
 net/bridge/netfilter/ebt_redirect.c  | 2 +-
 net/bridge/netfilter/ebt_snat.c      | 2 +-
 net/bridge/netfilter/ebt_stp.c       | 2 +-
 net/bridge/netfilter/ebt_ulog.c      | 2 +-
 net/bridge/netfilter/ebt_vlan.c      | 2 +-
 net/ipv4/netfilter/arp_tables.c      | 2 +-
 net/ipv4/netfilter/arpt_mangle.c     | 2 +-
 net/ipv4/netfilter/ip_tables.c       | 4 ++--
 net/ipv4/netfilter/ipt_CLUSTERIP.c   | 2 +-
 net/ipv4/netfilter/ipt_ECN.c         | 2 +-
 net/ipv4/netfilter/ipt_LOG.c         | 2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c  | 2 +-
 net/ipv4/netfilter/ipt_NETMAP.c      | 2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c    | 2 +-
 net/ipv4/netfilter/ipt_REJECT.c      | 2 +-
 net/ipv4/netfilter/ipt_ULOG.c        | 2 +-
 net/ipv4/netfilter/ipt_addrtype.c    | 4 ++--
 net/ipv4/netfilter/ipt_ah.c          | 2 +-
 net/ipv4/netfilter/ipt_ecn.c         | 3 ++-
 net/ipv4/netfilter/nf_nat_rule.c     | 4 ++--
 net/ipv6/netfilter/ip6_tables.c      | 4 ++--
 net/ipv6/netfilter/ip6t_LOG.c        | 2 +-
 net/ipv6/netfilter/ip6t_REJECT.c     | 2 +-
 net/ipv6/netfilter/ip6t_ah.c         | 3 ++-
 net/ipv6/netfilter/ip6t_eui64.c      | 2 +-
 net/ipv6/netfilter/ip6t_frag.c       | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c        | 2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c | 2 +-
 net/ipv6/netfilter/ip6t_mh.c         | 3 ++-
 net/ipv6/netfilter/ip6t_rt.c         | 3 ++-
 net/netfilter/xt_CLASSIFY.c          | 2 +-
 net/netfilter/xt_CONNSECMARK.c       | 2 +-
 net/netfilter/xt_CT.c                | 2 +-
 net/netfilter/xt_DSCP.c              | 8 ++++----
 net/netfilter/xt_HL.c                | 4 ++--
 net/netfilter/xt_LED.c               | 2 +-
 net/netfilter/xt_NFLOG.c             | 2 +-
 net/netfilter/xt_NFQUEUE.c           | 4 ++--
 net/netfilter/xt_NOTRACK.c           | 2 +-
 net/netfilter/xt_RATEEST.c           | 2 +-
 net/netfilter/xt_SECMARK.c           | 2 +-
 net/netfilter/xt_TCPMSS.c            | 4 ++--
 net/netfilter/xt_TCPOPTSTRIP.c       | 4 ++--
 net/netfilter/xt_TEE.c               | 4 ++--
 net/netfilter/xt_TPROXY.c            | 2 +-
 net/netfilter/xt_TRACE.c             | 2 +-
 net/netfilter/xt_cluster.c           | 2 +-
 net/netfilter/xt_comment.c           | 2 +-
 net/netfilter/xt_connbytes.c         | 2 +-
 net/netfilter/xt_connlimit.c         | 2 +-
 net/netfilter/xt_connmark.c          | 4 ++--
 net/netfilter/xt_conntrack.c         | 6 +++---
 net/netfilter/xt_dccp.c              | 2 +-
 net/netfilter/xt_dscp.c              | 7 ++++---
 net/netfilter/xt_esp.c               | 3 ++-
 net/netfilter/xt_hashlimit.c         | 2 +-
 net/netfilter/xt_helper.c            | 2 +-
 net/netfilter/xt_hl.c                | 6 ++++--
 net/netfilter/xt_iprange.c           | 4 ++--
 net/netfilter/xt_length.c            | 4 ++--
 net/netfilter/xt_limit.c             | 2 +-
 net/netfilter/xt_mac.c               | 3 ++-
 net/netfilter/xt_mark.c              | 4 ++--
 net/netfilter/xt_multiport.c         | 2 +-
 net/netfilter/xt_osf.c               | 4 ++--
 net/netfilter/xt_owner.c             | 2 +-
 net/netfilter/xt_physdev.c           | 2 +-
 net/netfilter/xt_pkttype.c           | 2 +-
 net/netfilter/xt_policy.c            | 2 +-
 net/netfilter/xt_quota.c             | 2 +-
 net/netfilter/xt_rateest.c           | 2 +-
 net/netfilter/xt_realm.c             | 2 +-
 net/netfilter/xt_recent.c            | 2 +-
 net/netfilter/xt_sctp.c              | 2 +-
 net/netfilter/xt_socket.c            | 6 +++---
 net/netfilter/xt_state.c             | 2 +-
 net/netfilter/xt_statistic.c         | 2 +-
 net/netfilter/xt_string.c            | 2 +-
 net/netfilter/xt_tcpmss.c            | 2 +-
 net/netfilter/xt_tcpudp.c            | 6 ++++--
 net/netfilter/xt_time.c              | 2 +-
 net/netfilter/xt_u32.c               | 3 ++-
 net/sched/act_ipt.c                  | 2 +-
 97 files changed, 133 insertions(+), 123 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 5efa3757d08c..6d1f9a638056 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -182,8 +182,6 @@ struct xt_counters_info {
 
 #include <linux/netdevice.h>
 
-#define xt_match_param xt_action_param
-#define xt_target_param xt_action_param
 /**
  * struct xt_action_param - parameters for matches/targets
  *
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index f7de8dbc3422..4b0df00c82ec 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_802_3.h>
 
 static bool
-ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_802_3_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_802_3_info *info = par->matchinfo;
 	const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 20068e03fa81..c04f9461f734 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -129,7 +129,7 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
 }
 
 static bool
-ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_among_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
 	const char *dmac, *smac;
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 952150cd5e7d..6203f4dea2e2 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_arp.h>
 
 static bool
-ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_arp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_arp_info *info = par->matchinfo;
 	const struct arphdr *ah;
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 4581adb27583..070cf134a22f 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_arpreply.h>
 
 static unsigned int
-ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_arpreply_info *info = par->targinfo;
 	const __be32 *siptr, *diptr;
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 59d5b7c8a557..c59f7bfae6e2 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -15,7 +15,7 @@
 #include <linux/netfilter_bridge/ebt_nat.h>
 
 static unsigned int
-ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index a1c76c7e5219..a0cde7442b55 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -25,7 +25,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_ip_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_ip_info *info = par->matchinfo;
 	const struct iphdr *ih;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 33f8413f05ad..c451dc2ff822 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -28,7 +28,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_ip6_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_ip6_info *info = par->matchinfo;
 	const struct ipv6hdr *ih6;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 4b0e2e53fa57..760923f08067 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -32,7 +32,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-ebt_limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ebt_limit_info *info = (void *)par->matchinfo;
 	unsigned long now = jiffies;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index c46024156539..6e5a8bb9b940 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -171,7 +171,7 @@ out:
 }
 
 static unsigned int
-ebt_log_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_log_info *info = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 126e536ff8f4..66697cbd0a8b 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -19,7 +19,7 @@
 #include <linux/netfilter_bridge/ebt_mark_t.h>
 
 static unsigned int
-ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_mark_t_info *info = par->targinfo;
 	int action = info->target & -16;
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index e4366c0a1a43..de901634fa31 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_mark_m.h>
 
 static bool
-ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 22e2ad5f23e8..5be68bbcc341 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -20,7 +20,7 @@
 #include <net/netfilter/nf_log.h>
 
 static unsigned int
-ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index f34bcc3197bd..7ba67c4b677a 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_pkttype.h>
 
 static bool
-ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index a6044a6f2383..9e19166ba453 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_redirect.h>
 
 static unsigned int
-ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_redirect_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 79caca34ae2b..f8f0bd1a1d51 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter_bridge/ebt_nat.h>
 
 static unsigned int
-ebt_snat_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 02f28fdda393..3cd6070a1137 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -120,7 +120,7 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
 }
 
 static bool
-ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_stp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
 	const struct stp_header *sp;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 852f37c27659..ae3c7cef1484 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -243,7 +243,7 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 }
 
 static unsigned int
-ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	ebt_ulog_packet(par->hooknum, skb, par->in, par->out,
 	                par->targinfo, NULL);
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index bf8ae5c7a0c5..e4ab62533c74 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -36,7 +36,7 @@ MODULE_LICENSE("GPL");
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
 static bool
-ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_vlan_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_vlan_info *info = par->matchinfo;
 	const struct vlan_hdr *fp;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 73d924b88f89..9e7d089f168a 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -224,7 +224,7 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
 }
 
 static unsigned int
-arpt_error(struct sk_buff *skb, const struct xt_target_param *par)
+arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
 		printk("arp_tables: error: '%s'\n",
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 4b51a027f307..e1be7dd1171b 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -9,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
 MODULE_DESCRIPTION("arptables arp payload mangle target");
 
 static unsigned int
-target(struct sk_buff *skb, const struct xt_target_param *par)
+target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct arpt_mangle *mangle = par->targinfo;
 	const struct arphdr *arp;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e1a53c2da032..3ab1b81e799b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -165,7 +165,7 @@ ip_checkentry(const struct ipt_ip *ip)
 }
 
 static unsigned int
-ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
+ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
 		pr_info("error: `%s'\n", (const char *)par->targinfo);
@@ -2138,7 +2138,7 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
+icmp_match(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct icmphdr *ic;
 	struct icmphdr _icmph;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 8815d458de46..f91c94b9a790 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -282,7 +282,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
  ***********************************************************************/
 
 static unsigned int
-clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
+clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 	struct nf_conn *ct;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 563049f31aef..4bf3dc49ad1e 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -77,7 +77,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 }
 
 static unsigned int
-ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ecn_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_ECN_info *einfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 3bd35f370817..5234f4f3499a 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -425,7 +425,7 @@ ipt_log_packet(u_int8_t pf,
 }
 
 static unsigned int
-log_tg(struct sk_buff *skb, const struct xt_target_param *par)
+log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_log_info *loginfo = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 02b1bc477998..d2ed9dc74ebc 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -44,7 +44,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
+masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 708c7f8f7eea..f43867d1697f 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -38,7 +38,7 @@ static int netmap_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-netmap_tg(struct sk_buff *skb, const struct xt_target_param *par)
+netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 3cf101916523..18a0656505a0 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -42,7 +42,7 @@ static int redirect_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
+redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index a86135a28058..f5f4a888e4ec 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -136,7 +136,7 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
 }
 
 static unsigned int
-reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
+reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_reject_info *reject = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 8f60749e87a3..446e0f467a17 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -276,7 +276,7 @@ alloc_failure:
 }
 
 static unsigned int
-ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
 	                par->targinfo, NULL);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index e4b8f2bf8aaa..24ec548515e4 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -30,7 +30,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
 }
 
 static bool
-addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+addrtype_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info *info = par->matchinfo;
@@ -48,7 +48,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+addrtype_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info_v1 *info = par->matchinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 9f9810204892..48a8293bc1d1 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -30,7 +30,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ah_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ahdr;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 32e24100d8d1..744d13ee296e 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,7 +67,8 @@ static inline bool match_tcp(const struct sk_buff *skb,
 	return true;
 }
 
-static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ecn_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ipt_ecn_info *info = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index b48a0fc3d9ed..98ed78281aee 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -39,7 +39,7 @@ static const struct xt_table nat_table = {
 
 /* Source NAT */
 static unsigned int
-ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
+ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -58,7 +58,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
+ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 076308c1acd7..c3bc999a8bb6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -197,7 +197,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6)
 }
 
 static unsigned int
-ip6t_error(struct sk_buff *skb, const struct xt_target_param *par)
+ip6t_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
 		pr_info("error: `%s'\n", (const char *)par->targinfo);
@@ -2154,7 +2154,7 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par)
+icmp6_match(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct icmp6hdr *ic;
 	struct icmp6hdr _icmph;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 1f47a525f484..af4ee11f2066 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -436,7 +436,7 @@ ip6t_log_packet(u_int8_t pf,
 }
 
 static unsigned int
-log_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+log_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_log_info *loginfo = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index af1d6494ac39..47d227713758 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -175,7 +175,7 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
 }
 
 static unsigned int
-reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_reject_info *reject = par->targinfo;
 	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 1580693c86c1..4fe71898381d 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -36,7 +36,8 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ah_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ah;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index ca287f6d2bce..2fd2be1795e0 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -20,7 +20,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-eui64_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+eui64_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	unsigned char eui64[8];
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index a5daf0ffb4ec..8401aa82ea0b 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -35,7 +35,7 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 }
 
 static bool
-frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+frag_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct frag_hdr _frag;
 	const struct frag_hdr *fh;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index e424e7c8f824..d19d5cf47a38 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -44,7 +44,7 @@ MODULE_ALIAS("ip6t_dst");
 static struct xt_match hbh_mt6_reg[] __read_mostly;
 
 static bool
-hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+hbh_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6_opt_hdr _optsh;
 	const struct ipv6_opt_hdr *oh;
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 46fbabb493fa..8e88bb8311de 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+ipv6header_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_ipv6header_info *info = par->matchinfo;
 	unsigned int temp;
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index c9f443e0138f..4a60788873fd 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -32,7 +32,8 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
 	return (type >= min && type <= max) ^ invert;
 }
 
-static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool mh_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	struct ip6_mh _mh;
 	const struct ip6_mh *mh;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 09322720d2a6..793c27200881 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -36,7 +36,8 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 	return r;
 }
 
-static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool rt_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	struct ipv6_rt_hdr _route;
 	const struct ipv6_rt_hdr *rh;
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 011bc80dd2a1..c2c0e4abeb99 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,7 +27,7 @@ MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
 
 static unsigned int
-classify_tg(struct sk_buff *skb, const struct xt_target_param *par)
+classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_classify_target_info *clinfo = par->targinfo;
 
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index e953e302141d..e04dc282e3bb 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -64,7 +64,7 @@ static void secmark_restore(struct sk_buff *skb)
 }
 
 static unsigned int
-connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+connsecmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connsecmark_target_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index c8f547829bad..562bf3266e04 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -20,7 +20,7 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 
 static unsigned int xt_ct_target(struct sk_buff *skb,
-				 const struct xt_target_param *par)
+				 const struct xt_action_param *par)
 {
 	const struct xt_ct_target_info *info = par->targinfo;
 	struct nf_conn *ct = info->ct;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 969634f293e5..0a229191e55b 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -28,7 +28,7 @@ MODULE_ALIAS("ipt_TOS");
 MODULE_ALIAS("ip6t_TOS");
 
 static unsigned int
-dscp_tg(struct sk_buff *skb, const struct xt_target_param *par)
+dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_DSCP_info *dinfo = par->targinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -45,7 +45,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_DSCP_info *dinfo = par->targinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -72,7 +72,7 @@ static int dscp_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
+tos_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tos_target_info *info = par->targinfo;
 	struct iphdr *iph = ip_hdr(skb);
@@ -92,7 +92,7 @@ tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-tos_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tos_target_info *info = par->targinfo;
 	struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 77b99f732711..95b084800fcc 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -26,7 +26,7 @@ MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target");
 MODULE_LICENSE("GPL");
 
 static unsigned int
-ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct iphdr *iph;
 	const struct ipt_TTL_info *info = par->targinfo;
@@ -66,7 +66,7 @@ ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6hdr *ip6h;
 	const struct ip6t_HL_info *info = par->targinfo;
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index ab6f8ff9c9a7..a4140509eea1 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -49,7 +49,7 @@ struct xt_led_info_internal {
 };
 
 static unsigned int
-led_tg(struct sk_buff *skb, const struct xt_target_param *par)
+led_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_led_info *ledinfo = par->targinfo;
 	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 42dd8747b421..a17dd0f589b2 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_NFLOG");
 MODULE_ALIAS("ip6t_NFLOG");
 
 static unsigned int
-nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index f9217cb56fe3..039cce1bde3d 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -31,7 +31,7 @@ static u32 jhash_initval __read_mostly;
 static bool rnd_inited __read_mostly;
 
 static unsigned int
-nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
+nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_NFQ_info *tinfo = par->targinfo;
 
@@ -65,7 +65,7 @@ static u32 hash_v6(const struct sk_buff *skb)
 #endif
 
 static unsigned int
-nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
+nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 queue = info->queuenum;
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index e7a0a54fd4ea..512b9123252f 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -13,7 +13,7 @@ MODULE_ALIAS("ipt_NOTRACK");
 MODULE_ALIAS("ip6t_NOTRACK");
 
 static unsigned int
-notrack_tg(struct sk_buff *skb, const struct xt_target_param *par)
+notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	/* Previously seen (loopback)? Ignore. */
 	if (skb->nfct != NULL)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index a02193f06e39..69c01e10f8af 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -73,7 +73,7 @@ void xt_rateest_put(struct xt_rateest *est)
 EXPORT_SYMBOL_GPL(xt_rateest_put);
 
 static unsigned int
-xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
+xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_rateest_target_info *info = par->targinfo;
 	struct gnet_stats_basic_packed *stats = &info->est->bstats;
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index a91d4a7d5a2c..23b2d6c486b5 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_SECMARK");
 static u8 mode;
 
 static unsigned int
-secmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	u32 secmark = 0;
 	const struct xt_secmark_target_info *info = par->targinfo;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index d04606459c9d..62ec021fbd50 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -172,7 +172,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 }
 
 static unsigned int
-tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	__be16 newlen;
@@ -195,7 +195,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static unsigned int
-tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	u8 nexthdr;
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index e8b57609ddc0..9dc9ecfdd546 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -74,7 +74,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
 }
 
 static unsigned int
-tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb),
 	       sizeof(struct iphdr) + sizeof(struct tcphdr));
@@ -82,7 +82,7 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 
 #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
 static unsigned int
-tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	int tcphoff;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 49da6c05f4e0..d7920d9f49e9 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -84,7 +84,7 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 }
 
 static unsigned int
-tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
 	struct iphdr *iph;
@@ -165,7 +165,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 }
 
 static unsigned int
-tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 4f246ddc5c48..e1a0dedac258 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -25,7 +25,7 @@
 #include <net/netfilter/nf_tproxy_core.h>
 
 static unsigned int
-tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
+tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	const struct xt_tproxy_target_info *tgi = par->targinfo;
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index fbb04b86c46b..df48967af382 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -11,7 +11,7 @@ MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
 
 static unsigned int
-trace_tg(struct sk_buff *skb, const struct xt_target_param *par)
+trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	skb->nf_trace = 1;
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 6c941e1c6b9e..67fc317118c0 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -86,7 +86,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
 }
 
 static bool
-xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+xt_cluster_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	const struct xt_cluster_match_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index e82179832acd..1dbb3e13c059 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -16,7 +16,7 @@ MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
 
 static bool
-comment_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+comment_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	/* We always match */
 	return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index ff738a5f963a..5e5cf15f011f 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -18,7 +18,7 @@ MODULE_ALIAS("ipt_connbytes");
 MODULE_ALIAS("ip6t_connbytes");
 
 static bool
-connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+connbytes_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 326bc1b81681..6eec1913298e 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -173,7 +173,7 @@ static int count_them(struct net *net,
 }
 
 static bool
-connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+connlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct xt_connlimit_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index ae1015484ae2..b43cfc7f1bb5 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
 static unsigned int
-connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connmark_tginfo1 *info = par->targinfo;
 	enum ip_conntrack_info ctinfo;
@@ -91,7 +91,7 @@ static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 }
 
 static bool
-connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+connmark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connmark_mtinfo1 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 3348706ce56d..14a96f48001d 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -113,7 +113,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
 }
 
 static bool
-conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par,
+conntrack_mt(const struct sk_buff *skb, const struct xt_action_param *par,
              u16 state_mask, u16 status_mask)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
@@ -191,7 +191,7 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par,
 }
 
 static bool
-conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+conntrack_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
 
@@ -199,7 +199,7 @@ conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par)
+conntrack_mt_v2(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 0d260aec487f..7a4d4e8edc10 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -96,7 +96,7 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 }
 
 static bool
-dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+dccp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 	const struct dccp_hdr *dh;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 9db51fddbdb8..2133b509d157 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tos");
 MODULE_ALIAS("ip6t_tos");
 
 static bool
-dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+dscp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -34,7 +34,7 @@ dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+dscp_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -54,7 +54,8 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
-static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool tos_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 143bfdc8e38f..39caafff9485 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -36,7 +36,8 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool esp_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ip_esp_hdr *eh;
 	struct ip_esp_hdr _esp;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 0c366d387c8c..700c21e0804c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -516,7 +516,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+hashlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index b8b3e13dc71e..e941bd26a519 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_helper");
 
 
 static bool
-helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+helper_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_helper_info *info = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
index be53f7299623..335c34a4fd1c 100644
--- a/net/netfilter/xt_hl.c
+++ b/net/netfilter/xt_hl.c
@@ -25,7 +25,8 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_ttl");
 MODULE_ALIAS("ip6t_hl");
 
-static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ttl_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ipt_ttl_info *info = par->matchinfo;
 	const u8 ttl = ip_hdr(skb)->ttl;
@@ -44,7 +45,8 @@ static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool hl_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ip6t_hl_info *info = par->matchinfo;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 8471d9715bde..9578860a9217 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter/xt_iprange.h>
 
 static bool
-iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
+iprange_mt4(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
@@ -68,7 +68,7 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
 }
 
 static bool
-iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+iprange_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index c4871ca6c86d..842149b4122c 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
 
 static bool
-length_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+length_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -30,7 +30,7 @@ length_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-length_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+length_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 88215dca19cb..7dcfe8602c83 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -65,7 +65,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_rateinfo *r = par->matchinfo;
 	struct xt_limit_priv *priv = r->master;
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index b971ce93773e..36c49644ce35 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -25,7 +25,8 @@ MODULE_DESCRIPTION("Xtables: MAC address match");
 MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
-static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool mac_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct xt_mac_info *info = par->matchinfo;
 	bool ret;
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 035c468a0040..3c8347076d55 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_MARK");
 MODULE_ALIAS("ip6t_MARK");
 
 static unsigned int
-mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_mark_tginfo2 *info = par->targinfo;
 
@@ -34,7 +34,7 @@ mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static bool
-mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_mark_mtinfo1 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index b21f90432247..3140fd4049fc 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -72,7 +72,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 }
 
 static bool
-multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+multiport_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const __be16 *pptr;
 	__be16 _ports[2];
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 8dcde13a0781..37aa55860a96 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -193,8 +193,8 @@ static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info
 	return ip->ttl == f_ttl;
 }
 
-static bool xt_osf_match_packet(const struct sk_buff *skb,
-		const struct xt_match_param *p)
+static bool
+xt_osf_match_packet(const struct sk_buff *skb, const struct xt_action_param *p)
 {
 	const struct xt_osf_info *info = p->matchinfo;
 	const struct iphdr *ip = ip_hdr(skb);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index d24c76dffee2..3dd1391d385a 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter/xt_owner.h>
 
 static bool
-owner_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+owner_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index d0bdf3dd4d25..298cd290b06a 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ip6t_physdev");
 
 
 static bool
-physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+physdev_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct xt_physdev_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 69da1d3a1d85..d95f2149df93 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
 static bool
-pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_pkttype_info *info = par->matchinfo;
 	u_int8_t type;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 1fa239c1fb93..1abfc7ad4277 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -110,7 +110,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 }
 
 static bool
-policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+policy_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_policy_info *info = par->matchinfo;
 	int ret;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 7c95d69f6f06..e79e07c75da8 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ip6t_quota");
 static DEFINE_SPINLOCK(quota_lock);
 
 static bool
-quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+quota_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct xt_quota_info *q = (void *)par->matchinfo;
 	struct xt_quota_priv *priv = q->master;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 23805f8a444b..53f7a4d12e1e 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -15,7 +15,7 @@
 
 
 static bool
-xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+xt_rateest_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_rateest_match_info *info = par->matchinfo;
 	struct gnet_stats_rate_est *r;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 484d1689bfde..b063c783901a 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -22,7 +22,7 @@ MODULE_DESCRIPTION("Xtables: Routing realm match");
 MODULE_ALIAS("ipt_realm");
 
 static bool
-realm_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+realm_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_realm_info *info = par->matchinfo;
 	const struct dst_entry *dst = skb_dst(skb);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index b88d63b9c76a..503b7f199f14 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -224,7 +224,7 @@ static void recent_table_flush(struct recent_table *t)
 }
 
 static bool
-recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+recent_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	struct recent_net *recent_net = recent_pernet(net);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index c3694df54672..da4c3cb31403 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -114,7 +114,7 @@ match_packet(const struct sk_buff *skb,
 }
 
 static bool
-sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+sctp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 	const sctp_sctphdr_t *sh;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index a9b16867e1f7..2665e32d5db1 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -88,7 +88,7 @@ extract_icmp_fields(const struct sk_buff *skb,
 
 
 static bool
-socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
+socket_match(const struct sk_buff *skb, const struct xt_action_param *par,
 	     const struct xt_socket_mtinfo1 *info)
 {
 	const struct iphdr *iph = ip_hdr(skb);
@@ -174,13 +174,13 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
 }
 
 static bool
-socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+socket_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	return socket_match(skb, par, NULL);
 }
 
 static bool
-socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+socket_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	return socket_match(skb, par, par->matchinfo);
 }
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index bb1271852d50..344cca661099 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_state");
 MODULE_ALIAS("ip6t_state");
 
 static bool
-state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+state_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 5aeca1d023d8..ee4540a2cf33 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_statistic");
 static DEFINE_SPINLOCK(nth_lock);
 
 static bool
-statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+statistic_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_statistic_info *info = par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index f6d5112175e6..fd5dc5016a17 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
 
 static bool
-string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+string_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_string_info *conf = par->matchinfo;
 	struct ts_state state;
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 4809b34b10f8..5c8a7b435d66 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tcpmss");
 MODULE_ALIAS("ip6t_tcpmss");
 
 static bool
-tcpmss_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+tcpmss_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tcpmss_match_info *info = par->matchinfo;
 	const struct tcphdr *th;
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index efa2ede24ae6..19c31d4c2ba6 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -62,7 +62,8 @@ tcp_find_option(u_int8_t option,
 	return invert;
 }
 
-static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool tcp_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
@@ -128,7 +129,8 @@ static int tcp_mt_check(const struct xt_mtchk_param *par)
 	return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0;
 }
 
-static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool udp_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct udphdr *uh;
 	struct udphdr _udph;
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index d8556fdda440..ffdb8fac0be1 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -152,7 +152,7 @@ static void localtime_3(struct xtm *r, time_t time)
 }
 
 static bool
-time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+time_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_time_info *info = par->matchinfo;
 	unsigned int packet_time;
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index d7c05f03a7e7..f62797b1ddb5 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -86,7 +86,8 @@ static bool u32_match_it(const struct xt_u32 *data,
 	return true;
 }
 
-static bool u32_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool u32_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct xt_u32 *data = par->matchinfo;
 	bool ret;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 03f80a0fa167..1f9595467c17 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -199,7 +199,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 {
 	int ret = 0, result = 0;
 	struct tcf_ipt *ipt = a->priv;
-	struct xt_target_param par;
+	struct xt_action_param par;
 
 	if (skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-- 
cgit v1.2.3-59-g8ed1b


From 62fc8051083a334578c3f4b3488808f210b4565f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 7 Jul 2009 20:42:08 +0200
Subject: netfilter: xtables: deconstify struct xt_action_param for matches

In future, layer-3 matches will be an xt module of their own, and
need to set the fragoff and thoff fields. Adding more pointers would
needlessy increase memory requirements (esp. so for 64-bit, where
pointers are wider).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h   | 2 +-
 net/bridge/netfilter/ebt_802_3.c     | 2 +-
 net/bridge/netfilter/ebt_among.c     | 2 +-
 net/bridge/netfilter/ebt_arp.c       | 2 +-
 net/bridge/netfilter/ebt_ip.c        | 2 +-
 net/bridge/netfilter/ebt_ip6.c       | 2 +-
 net/bridge/netfilter/ebt_limit.c     | 2 +-
 net/bridge/netfilter/ebt_mark_m.c    | 2 +-
 net/bridge/netfilter/ebt_pkttype.c   | 2 +-
 net/bridge/netfilter/ebt_stp.c       | 2 +-
 net/bridge/netfilter/ebt_vlan.c      | 2 +-
 net/ipv4/netfilter/ip_tables.c       | 2 +-
 net/ipv4/netfilter/ipt_addrtype.c    | 4 ++--
 net/ipv4/netfilter/ipt_ah.c          | 2 +-
 net/ipv4/netfilter/ipt_ecn.c         | 3 +--
 net/ipv6/netfilter/ip6_tables.c      | 2 +-
 net/ipv6/netfilter/ip6t_ah.c         | 3 +--
 net/ipv6/netfilter/ip6t_eui64.c      | 2 +-
 net/ipv6/netfilter/ip6t_frag.c       | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c        | 2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c | 2 +-
 net/ipv6/netfilter/ip6t_mh.c         | 3 +--
 net/ipv6/netfilter/ip6t_rt.c         | 3 +--
 net/netfilter/xt_cluster.c           | 2 +-
 net/netfilter/xt_comment.c           | 2 +-
 net/netfilter/xt_connbytes.c         | 2 +-
 net/netfilter/xt_connlimit.c         | 2 +-
 net/netfilter/xt_connmark.c          | 2 +-
 net/netfilter/xt_conntrack.c         | 6 +++---
 net/netfilter/xt_dccp.c              | 2 +-
 net/netfilter/xt_dscp.c              | 7 +++----
 net/netfilter/xt_esp.c               | 3 +--
 net/netfilter/xt_hashlimit.c         | 2 +-
 net/netfilter/xt_helper.c            | 2 +-
 net/netfilter/xt_hl.c                | 6 ++----
 net/netfilter/xt_iprange.c           | 4 ++--
 net/netfilter/xt_length.c            | 4 ++--
 net/netfilter/xt_limit.c             | 2 +-
 net/netfilter/xt_mac.c               | 3 +--
 net/netfilter/xt_mark.c              | 2 +-
 net/netfilter/xt_multiport.c         | 2 +-
 net/netfilter/xt_osf.c               | 2 +-
 net/netfilter/xt_owner.c             | 2 +-
 net/netfilter/xt_physdev.c           | 2 +-
 net/netfilter/xt_pkttype.c           | 2 +-
 net/netfilter/xt_policy.c            | 2 +-
 net/netfilter/xt_quota.c             | 2 +-
 net/netfilter/xt_rateest.c           | 2 +-
 net/netfilter/xt_realm.c             | 2 +-
 net/netfilter/xt_recent.c            | 2 +-
 net/netfilter/xt_sctp.c              | 2 +-
 net/netfilter/xt_socket.c            | 6 +++---
 net/netfilter/xt_state.c             | 2 +-
 net/netfilter/xt_statistic.c         | 2 +-
 net/netfilter/xt_string.c            | 2 +-
 net/netfilter/xt_tcpmss.c            | 2 +-
 net/netfilter/xt_tcpudp.c            | 6 ++----
 net/netfilter/xt_time.c              | 2 +-
 net/netfilter/xt_u32.c               | 3 +--
 59 files changed, 70 insertions(+), 82 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 6d1f9a638056..40c6a8d2a9ea 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -288,7 +288,7 @@ struct xt_match {
 	   non-linear skb, using skb_header_pointer and
 	   skb_ip_make_writable. */
 	bool (*match)(const struct sk_buff *skb,
-		      const struct xt_action_param *);
+		      struct xt_action_param *);
 
 	/* Called when user tries to insert an entry of this type. */
 	int (*checkentry)(const struct xt_mtchk_param *);
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 4b0df00c82ec..2a449b7ab8fa 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_802_3.h>
 
 static bool
-ebt_802_3_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_802_3_info *info = par->matchinfo;
 	const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index c04f9461f734..8b84c581be30 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -129,7 +129,7 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
 }
 
 static bool
-ebt_among_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
 	const char *dmac, *smac;
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 6203f4dea2e2..cd457b891b27 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_arp.h>
 
 static bool
-ebt_arp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_arp_info *info = par->matchinfo;
 	const struct arphdr *ah;
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index a0cde7442b55..23bca62d58d2 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -25,7 +25,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_ip_info *info = par->matchinfo;
 	const struct iphdr *ih;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index c451dc2ff822..50a46afc2bcc 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -28,7 +28,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip6_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_ip6_info *info = par->matchinfo;
 	const struct ipv6hdr *ih6;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 760923f08067..517e78befcb2 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -32,7 +32,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-ebt_limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ebt_limit_info *info = (void *)par->matchinfo;
 	unsigned long now = jiffies;
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index de901634fa31..d98baefc4c7e 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_mark_m.h>
 
 static bool
-ebt_mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 7ba67c4b677a..496a56515307 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_pkttype.h>
 
 static bool
-ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 3cd6070a1137..5b33a2e634a6 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -120,7 +120,7 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
 }
 
 static bool
-ebt_stp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
 	const struct stp_header *sp;
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index e4ab62533c74..87b53b3a921d 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -36,7 +36,7 @@ MODULE_LICENSE("GPL");
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
 static bool
-ebt_vlan_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_vlan_info *info = par->matchinfo;
 	const struct vlan_hdr *fp;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3ab1b81e799b..4e674f2824a7 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2138,7 +2138,7 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp_match(const struct sk_buff *skb, const struct xt_action_param *par)
+icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct icmphdr *ic;
 	struct icmphdr _icmph;
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 24ec548515e4..db8bff0fb86d 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -30,7 +30,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
 }
 
 static bool
-addrtype_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
+addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info *info = par->matchinfo;
@@ -48,7 +48,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-addrtype_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
+addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info_v1 *info = par->matchinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 48a8293bc1d1..c9d83dc2d6fa 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -30,7 +30,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ahdr;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 744d13ee296e..b79dddc9edd6 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,8 +67,7 @@ static inline bool match_tcp(const struct sk_buff *skb,
 	return true;
 }
 
-static bool ecn_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ipt_ecn_info *info = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c3bc999a8bb6..4549f8d6f88f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2154,7 +2154,7 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp6_match(const struct sk_buff *skb, const struct xt_action_param *par)
+icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct icmp6hdr *ic;
 	struct icmp6hdr _icmph;
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 4fe71898381d..c89887f35a46 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -36,8 +36,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ah;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 2fd2be1795e0..f32fce34145a 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -20,7 +20,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-eui64_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	unsigned char eui64[8];
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 8401aa82ea0b..fcc8c72f218e 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -35,7 +35,7 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 }
 
 static bool
-frag_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct frag_hdr _frag;
 	const struct frag_hdr *fh;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index d19d5cf47a38..f8aebc098d71 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -44,7 +44,7 @@ MODULE_ALIAS("ip6t_dst");
 static struct xt_match hbh_mt6_reg[] __read_mostly;
 
 static bool
-hbh_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ipv6_opt_hdr _optsh;
 	const struct ipv6_opt_hdr *oh;
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 8e88bb8311de..54bd9790603f 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-ipv6header_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ip6t_ipv6header_info *info = par->matchinfo;
 	unsigned int temp;
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 4a60788873fd..eb1c3d65271a 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -32,8 +32,7 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
 	return (type >= min && type <= max) ^ invert;
 }
 
-static bool mh_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ip6_mh _mh;
 	const struct ip6_mh *mh;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 793c27200881..ee584693ee35 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -36,8 +36,7 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 	return r;
 }
 
-static bool rt_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ipv6_rt_hdr _route;
 	const struct ipv6_rt_hdr *rh;
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 67fc317118c0..30b95a1c1c89 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -86,7 +86,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
 }
 
 static bool
-xt_cluster_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	const struct xt_cluster_match_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 1dbb3e13c059..5c861d2f21ca 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -16,7 +16,7 @@ MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
 
 static bool
-comment_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+comment_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	/* We always match */
 	return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5e5cf15f011f..73517835303d 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -18,7 +18,7 @@ MODULE_ALIAS("ipt_connbytes");
 MODULE_ALIAS("ip6t_connbytes");
 
 static bool
-connbytes_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 6eec1913298e..f130fd9817be 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -173,7 +173,7 @@ static int count_them(struct net *net,
 }
 
 static bool
-connlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct xt_connlimit_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index b43cfc7f1bb5..7278145e6a68 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -91,7 +91,7 @@ static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 }
 
 static bool
-connmark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_connmark_mtinfo1 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 14a96f48001d..39681f10291c 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -113,7 +113,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
 }
 
 static bool
-conntrack_mt(const struct sk_buff *skb, const struct xt_action_param *par,
+conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
              u16 state_mask, u16 status_mask)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
@@ -191,7 +191,7 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_action_param *par,
 }
 
 static bool
-conntrack_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
+conntrack_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
 
@@ -199,7 +199,7 @@ conntrack_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-conntrack_mt_v2(const struct sk_buff *skb, const struct xt_action_param *par)
+conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 7a4d4e8edc10..cc2c2919439f 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -96,7 +96,7 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 }
 
 static bool
-dccp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 	const struct dccp_hdr *dh;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 2133b509d157..64670fc5d0e1 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tos");
 MODULE_ALIAS("ip6t_tos");
 
 static bool
-dscp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+dscp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -34,7 +34,7 @@ dscp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-dscp_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+dscp_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -54,8 +54,7 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
-static bool tos_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 39caafff9485..7c2d80020554 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -36,8 +36,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool esp_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool esp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ip_esp_hdr *eh;
 	struct ip_esp_hdr _esp;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 700c21e0804c..25a6e548ec27 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -516,7 +516,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index e941bd26a519..9f4ab00c8050 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_helper");
 
 
 static bool
-helper_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+helper_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_helper_info *info = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
index 335c34a4fd1c..7d12221ead89 100644
--- a/net/netfilter/xt_hl.c
+++ b/net/netfilter/xt_hl.c
@@ -25,8 +25,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_ttl");
 MODULE_ALIAS("ip6t_hl");
 
-static bool ttl_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ipt_ttl_info *info = par->matchinfo;
 	const u8 ttl = ip_hdr(skb)->ttl;
@@ -45,8 +44,7 @@ static bool ttl_mt(const struct sk_buff *skb,
 	return false;
 }
 
-static bool hl_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ip6t_hl_info *info = par->matchinfo;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 9578860a9217..88f7c3511c72 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter/xt_iprange.h>
 
 static bool
-iprange_mt4(const struct sk_buff *skb, const struct xt_action_param *par)
+iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
@@ -68,7 +68,7 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
 }
 
 static bool
-iprange_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 842149b4122c..176e5570a999 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
 
 static bool
-length_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+length_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -30,7 +30,7 @@ length_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-length_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+length_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 7dcfe8602c83..32b7a579a032 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -65,7 +65,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rateinfo *r = par->matchinfo;
 	struct xt_limit_priv *priv = r->master;
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 36c49644ce35..8160f6b1435d 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -25,8 +25,7 @@ MODULE_DESCRIPTION("Xtables: MAC address match");
 MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
-static bool mac_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool mac_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_mac_info *info = par->matchinfo;
 	bool ret;
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 3c8347076d55..23345238711b 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -34,7 +34,7 @@ mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_mark_mtinfo1 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 3140fd4049fc..52beb68256c8 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -72,7 +72,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 }
 
 static bool
-multiport_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+multiport_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const __be16 *pptr;
 	__be16 _ports[2];
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 37aa55860a96..4327e101c047 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -194,7 +194,7 @@ static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info
 }
 
 static bool
-xt_osf_match_packet(const struct sk_buff *skb, const struct xt_action_param *p)
+xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 {
 	const struct xt_osf_info *info = p->matchinfo;
 	const struct iphdr *ip = ip_hdr(skb);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 3dd1391d385a..772d7389b337 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter/xt_owner.h>
 
 static bool
-owner_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 298cd290b06a..d7ca16b8b8df 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ip6t_physdev");
 
 
 static bool
-physdev_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct xt_physdev_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index d95f2149df93..5b645cb598fc 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
 static bool
-pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_pkttype_info *info = par->matchinfo;
 	u_int8_t type;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 1abfc7ad4277..f23e97bb42d7 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -110,7 +110,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 }
 
 static bool
-policy_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_policy_info *info = par->matchinfo;
 	int ret;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index e79e07c75da8..b4f7dfea5980 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ip6t_quota");
 static DEFINE_SPINLOCK(quota_lock);
 
 static bool
-quota_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct xt_quota_info *q = (void *)par->matchinfo;
 	struct xt_quota_priv *priv = q->master;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 53f7a4d12e1e..76a083184d8e 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -15,7 +15,7 @@
 
 
 static bool
-xt_rateest_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rateest_match_info *info = par->matchinfo;
 	struct gnet_stats_rate_est *r;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index b063c783901a..459a7b256eb2 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -22,7 +22,7 @@ MODULE_DESCRIPTION("Xtables: Routing realm match");
 MODULE_ALIAS("ipt_realm");
 
 static bool
-realm_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+realm_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_realm_info *info = par->matchinfo;
 	const struct dst_entry *dst = skb_dst(skb);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 503b7f199f14..2808a7e33947 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -224,7 +224,7 @@ static void recent_table_flush(struct recent_table *t)
 }
 
 static bool
-recent_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	struct recent_net *recent_net = recent_pernet(net);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index da4c3cb31403..94d8b5deb2d0 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -114,7 +114,7 @@ match_packet(const struct sk_buff *skb,
 }
 
 static bool
-sctp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 	const sctp_sctphdr_t *sh;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2665e32d5db1..3d54c236a1ba 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -88,7 +88,7 @@ extract_icmp_fields(const struct sk_buff *skb,
 
 
 static bool
-socket_match(const struct sk_buff *skb, const struct xt_action_param *par,
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	     const struct xt_socket_mtinfo1 *info)
 {
 	const struct iphdr *iph = ip_hdr(skb);
@@ -174,13 +174,13 @@ socket_match(const struct sk_buff *skb, const struct xt_action_param *par,
 }
 
 static bool
-socket_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
+socket_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	return socket_match(skb, par, NULL);
 }
 
 static bool
-socket_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
+socket_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	return socket_match(skb, par, par->matchinfo);
 }
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 344cca661099..e12e053d3782 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_state");
 MODULE_ALIAS("ip6t_state");
 
 static bool
-state_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+state_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index ee4540a2cf33..96e62b8fd6b1 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_statistic");
 static DEFINE_SPINLOCK(nth_lock);
 
 static bool
-statistic_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_statistic_info *info = par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index fd5dc5016a17..d3c48b14ab94 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
 
 static bool
-string_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+string_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_string_info *conf = par->matchinfo;
 	struct ts_state state;
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 5c8a7b435d66..f90728f6b8a4 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tcpmss");
 MODULE_ALIAS("ip6t_tcpmss");
 
 static bool
-tcpmss_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_tcpmss_match_info *info = par->matchinfo;
 	const struct tcphdr *th;
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 19c31d4c2ba6..dedde33c9db6 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -62,8 +62,7 @@ tcp_find_option(u_int8_t option,
 	return invert;
 }
 
-static bool tcp_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
@@ -129,8 +128,7 @@ static int tcp_mt_check(const struct xt_mtchk_param *par)
 	return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0;
 }
 
-static bool udp_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct udphdr *uh;
 	struct udphdr _udph;
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index ffdb8fac0be1..79234bb19d05 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -152,7 +152,7 @@ static void localtime_3(struct xtm *r, time_t time)
 }
 
 static bool
-time_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+time_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_time_info *info = par->matchinfo;
 	unsigned int packet_time;
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index f62797b1ddb5..a95b50342dbb 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -86,8 +86,7 @@ static bool u32_match_it(const struct xt_u32 *data,
 	return true;
 }
 
-static bool u32_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_u32 *data = par->matchinfo;
 	bool ret;
-- 
cgit v1.2.3-59-g8ed1b


From b4ba26119b06052888696491f614201817491a0d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 7 Jul 2009 20:54:30 +0200
Subject: netfilter: xtables: change hotdrop pointer to direct modification

Since xt_action_param is writable, let's use it. The pointer to
'bool hotdrop' always worried (8 bytes (64-bit) to write 1 byte!).
Surprisingly results in a reduction in size:

   text    data     bss filename
5457066  692730  357892 vmlinux.o-prev
5456554  692730  357892 vmlinux.o

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h |  5 ++++-
 net/bridge/netfilter/ebtables.c    |  5 ++---
 net/ipv4/netfilter/arp_tables.c    |  6 +++---
 net/ipv4/netfilter/ip_tables.c     |  9 ++++-----
 net/ipv4/netfilter/ipt_ah.c        |  2 +-
 net/ipv4/netfilter/ipt_ecn.c       |  2 +-
 net/ipv6/netfilter/ip6_tables.c    | 11 +++++------
 net/ipv6/netfilter/ip6t_ah.c       |  4 ++--
 net/ipv6/netfilter/ip6t_eui64.c    |  2 +-
 net/ipv6/netfilter/ip6t_frag.c     |  4 ++--
 net/ipv6/netfilter/ip6t_hbh.c      |  4 ++--
 net/ipv6/netfilter/ip6t_mh.c       |  4 ++--
 net/ipv6/netfilter/ip6t_rt.c       |  4 ++--
 net/netfilter/xt_connlimit.c       |  4 ++--
 net/netfilter/xt_dccp.c            |  4 ++--
 net/netfilter/xt_esp.c             |  2 +-
 net/netfilter/xt_hashlimit.c       |  2 +-
 net/netfilter/xt_multiport.c       |  2 +-
 net/netfilter/xt_recent.c          |  2 +-
 net/netfilter/xt_sctp.c            |  4 ++--
 net/netfilter/xt_tcpmss.c          |  2 +-
 net/netfilter/xt_tcpudp.c          | 10 +++++-----
 22 files changed, 47 insertions(+), 47 deletions(-)

(limited to 'net')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 40c6a8d2a9ea..c2ee5d8550cf 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -196,6 +196,9 @@ struct xt_counters_info {
  * @hook:	hook number given packet came from
  * @family:	Actual NFPROTO_* through which the function is invoked
  * 		(helpful when match->family == NFPROTO_UNSPEC)
+ *
+ * Fields written to by extensions:
+ *
  * @hotdrop:	drop packet if we had inspection problems
  * Network namespace obtainable using dev_net(in/out)
  */
@@ -212,7 +215,7 @@ struct xt_action_param {
 	unsigned int thoff;
 	unsigned int hooknum;
 	u_int8_t family;
-	bool *hotdrop;
+	bool hotdrop;
 };
 
 /**
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 290d43541d46..59ca00e40dec 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -186,13 +186,12 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	struct ebt_entries *chaininfo;
 	const char *base;
 	const struct ebt_table_info *private;
-	bool hotdrop = false;
 	struct xt_action_param acpar;
 
 	acpar.family  = NFPROTO_BRIDGE;
 	acpar.in      = in;
 	acpar.out     = out;
-	acpar.hotdrop = &hotdrop;
+	acpar.hotdrop = false;
 	acpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
@@ -216,7 +215,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 
 		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
 			goto letscontinue;
-		if (hotdrop) {
+		if (acpar.hotdrop) {
 			read_unlock_bh(&table->lock);
 			return NF_DROP;
 		}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 9e7d089f168a..8cc56d26e937 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -260,7 +260,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	unsigned int verdict = NF_DROP;
 	const struct arphdr *arp;
-	bool hotdrop = false;
 	struct arpt_entry *e, *back;
 	const char *indev, *outdev;
 	void *table_base;
@@ -284,6 +283,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	acpar.out     = out;
 	acpar.hooknum = hook;
 	acpar.family  = NFPROTO_ARP;
+	acpar.hotdrop = false;
 
 	arp = arp_hdr(skb);
 	do {
@@ -345,10 +345,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 		else
 			/* Verdict */
 			break;
-	} while (!hotdrop);
+	} while (!acpar.hotdrop);
 	xt_info_rdunlock_bh();
 
-	if (hotdrop)
+	if (acpar.hotdrop)
 		return NF_DROP;
 	else
 		return verdict;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4e674f2824a7..607f89f16b76 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -308,7 +308,6 @@ ipt_do_table(struct sk_buff *skb,
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct iphdr *ip;
-	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
@@ -330,7 +329,7 @@ ipt_do_table(struct sk_buff *skb,
 	 * match it. */
 	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 	acpar.thoff   = ip_hdrlen(skb);
-	acpar.hotdrop = &hotdrop;
+	acpar.hotdrop = false;
 	acpar.in      = in;
 	acpar.out     = out;
 	acpar.family  = NFPROTO_IPV4;
@@ -432,7 +431,7 @@ ipt_do_table(struct sk_buff *skb,
 		else
 			/* Verdict */
 			break;
-	} while (!hotdrop);
+	} while (!acpar.hotdrop);
 	xt_info_rdunlock_bh();
 	pr_debug("Exiting %s; resetting sp from %u to %u\n",
 		 __func__, *stackptr, origptr);
@@ -440,7 +439,7 @@ ipt_do_table(struct sk_buff *skb,
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
-	if (hotdrop)
+	if (acpar.hotdrop)
 		return NF_DROP;
 	else return verdict;
 #endif
@@ -2154,7 +2153,7 @@ icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index c9d83dc2d6fa..14a2aa8b8a14 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -46,7 +46,7 @@ static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		pr_debug("Dropping evil AH tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b79dddc9edd6..af6e9c778345 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -78,7 +78,7 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
 		if (ip_hdr(skb)->protocol != IPPROTO_TCP)
 			return false;
-		if (!match_tcp(skb, info, par->hotdrop))
+		if (!match_tcp(skb, info, &par->hotdrop))
 			return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4549f8d6f88f..557fac9689c0 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -337,7 +337,6 @@ ip6t_do_table(struct sk_buff *skb,
 	      struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
-	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
@@ -356,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
-	acpar.hotdrop = &hotdrop;
+	acpar.hotdrop = false;
 	acpar.in      = in;
 	acpar.out     = out;
 	acpar.family  = NFPROTO_IPV6;
@@ -380,7 +379,7 @@ ip6t_do_table(struct sk_buff *skb,
 
 		IP_NF_ASSERT(e);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
-		    &acpar.thoff, &acpar.fragoff, &hotdrop)) {
+		    &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
  no_match:
 			e = ip6t_next_entry(e);
 			continue;
@@ -447,7 +446,7 @@ ip6t_do_table(struct sk_buff *skb,
 		else
 			/* Verdict */
 			break;
-	} while (!hotdrop);
+	} while (!acpar.hotdrop);
 
 	xt_info_rdunlock_bh();
 	*stackptr = origptr;
@@ -455,7 +454,7 @@ ip6t_do_table(struct sk_buff *skb,
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
-	if (hotdrop)
+	if (acpar.hotdrop)
 		return NF_DROP;
 	else return verdict;
 #endif
@@ -2170,7 +2169,7 @@ icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index c89887f35a46..89cccc5a9c92 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -48,13 +48,13 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
 	if (ah == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index f32fce34145a..aab0706908c5 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -27,7 +27,7 @@ eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	if (!(skb_mac_header(skb) >= skb->head &&
 	      skb_mac_header(skb) + ETH_HLEN <= skb->data) &&
 	    par->fragoff != 0) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index fcc8c72f218e..eda898fda6ca 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -46,13 +46,13 @@ frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
 	if (fh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index f8aebc098d71..59df051eaef6 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -65,13 +65,13 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 			    NEXTHDR_HOP : NEXTHDR_DEST, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
 	if (oh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index eb1c3d65271a..0c90c66b1992 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -47,14 +47,14 @@ static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		pr_debug("Dropping evil MH tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
 		pr_debug("Dropping invalid MH Payload Proto: %u\n",
 			 mh->ip6mh_proto);
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index ee584693ee35..d8488c50a8e0 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -52,13 +52,13 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
 	if (rh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index f130fd9817be..5c5b6b921b84 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -206,14 +206,14 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	if (connections < 0) {
 		/* kmalloc failed, drop it entirely */
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
 	return (connections > info->limit) ^ info->inverse;
 
  hotdrop:
-	*par->hotdrop = true;
+	par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index cc2c2919439f..b63d2a3d80ba 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -107,7 +107,7 @@ dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh);
 	if (dh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
@@ -120,7 +120,7 @@ dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		&& DCCHECK(match_types(dh, info->typemask),
 			   XT_DCCP_TYPE, info->flags, info->invflags)
 		&& DCCHECK(match_option(info->option, skb, par->thoff, dh,
-					par->hotdrop),
+					&par->hotdrop),
 			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 7c2d80020554..171ba82b5902 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -52,7 +52,7 @@ static bool esp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		pr_debug("Dropping evil ESP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 25a6e548ec27..b46a8390896d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -562,7 +562,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	return info->cfg.mode & XT_HASHLIMIT_INVERT;
 
  hotdrop:
-	*par->hotdrop = true;
+	par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 52beb68256c8..ac1d3c3d09e7 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -87,7 +87,7 @@ multiport_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		pr_debug("Dropping evil offset=0 tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 2808a7e33947..76aec6a44762 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -268,7 +268,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			goto out;
 		e = recent_entry_init(t, &addr, par->family, ttl);
 		if (e == NULL)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		ret = !ret;
 		goto out;
 	}
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 94d8b5deb2d0..c04fcf385c59 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -128,7 +128,7 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh);
 	if (sh == NULL) {
 		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 	pr_debug("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
@@ -140,7 +140,7 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			&& ntohs(sh->dest) <= info->dpts[1],
 			XT_SCTP_DEST_PORTS, info->flags, info->invflags)
 		&& SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
-					info, par->hotdrop),
+					info, &par->hotdrop),
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index f90728f6b8a4..c53d4d18eadf 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -73,7 +73,7 @@ out:
 	return info->invert;
 
 dropit:
-	*par->hotdrop = true;
+	par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index dedde33c9db6..c14d4645daa3 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -77,7 +77,7 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		*/
 		if (par->fragoff == 1) {
 			pr_debug("Dropping evil TCP offset=1 frag.\n");
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		}
 		/* Must not be a fragment. */
 		return false;
@@ -90,7 +90,7 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
@@ -108,13 +108,13 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		return false;
 	if (tcpinfo->option) {
 		if (th->doff * 4 < sizeof(_tcph)) {
-			*par->hotdrop = true;
+			par->hotdrop = true;
 			return false;
 		}
 		if (!tcp_find_option(tcpinfo->option, skb, par->thoff,
 				     th->doff*4 - sizeof(_tcph),
 				     tcpinfo->invflags & XT_TCP_INV_OPTION,
-				     par->hotdrop))
+				     &par->hotdrop))
 			return false;
 	}
 	return true;
@@ -143,7 +143,7 @@ static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		pr_debug("Dropping evil UDP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 4538506be386f9736b83bf9892f829adbbb70fea Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 4 Jul 2009 12:50:00 +0200
Subject: netfilter: xtables: combine built-in extension structs

Prepare the arrays for use with the multiregister function. The
future layer-3 xt matches can then be easily added to it without
needing more (un)register code.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 net/ipv4/netfilter/arp_tables.c | 41 ++++++++++++--------------
 net/ipv4/netfilter/ip_tables.c  | 65 +++++++++++++++++++----------------------
 net/ipv6/netfilter/ip6_tables.c | 64 +++++++++++++++++++---------------------
 3 files changed, 78 insertions(+), 92 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 8cc56d26e937..03352fcba172 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1828,22 +1828,23 @@ void arpt_unregister_table(struct xt_table *table)
 }
 
 /* The built-in targets: standard (NULL) and error. */
-static struct xt_target arpt_standard_target __read_mostly = {
-	.name		= ARPT_STANDARD_TARGET,
-	.targetsize	= sizeof(int),
-	.family		= NFPROTO_ARP,
+static struct xt_target arpt_builtin_tg[] __read_mostly = {
+	{
+		.name             = ARPT_STANDARD_TARGET,
+		.targetsize       = sizeof(int),
+		.family           = NFPROTO_ARP,
 #ifdef CONFIG_COMPAT
-	.compatsize	= sizeof(compat_int_t),
-	.compat_from_user = compat_standard_from_user,
-	.compat_to_user	= compat_standard_to_user,
+		.compatsize       = sizeof(compat_int_t),
+		.compat_from_user = compat_standard_from_user,
+		.compat_to_user   = compat_standard_to_user,
 #endif
-};
-
-static struct xt_target arpt_error_target __read_mostly = {
-	.name		= ARPT_ERROR_TARGET,
-	.target		= arpt_error,
-	.targetsize	= ARPT_FUNCTION_MAXNAMELEN,
-	.family		= NFPROTO_ARP,
+	},
+	{
+		.name             = ARPT_ERROR_TARGET,
+		.target           = arpt_error,
+		.targetsize       = ARPT_FUNCTION_MAXNAMELEN,
+		.family           = NFPROTO_ARP,
+	},
 };
 
 static struct nf_sockopt_ops arpt_sockopts = {
@@ -1887,12 +1888,9 @@ static int __init arp_tables_init(void)
 		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	ret = xt_register_target(&arpt_standard_target);
+	ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
 	if (ret < 0)
 		goto err2;
-	ret = xt_register_target(&arpt_error_target);
-	if (ret < 0)
-		goto err3;
 
 	/* Register setsockopt */
 	ret = nf_register_sockopt(&arpt_sockopts);
@@ -1903,9 +1901,7 @@ static int __init arp_tables_init(void)
 	return 0;
 
 err4:
-	xt_unregister_target(&arpt_error_target);
-err3:
-	xt_unregister_target(&arpt_standard_target);
+	xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
 err2:
 	unregister_pernet_subsys(&arp_tables_net_ops);
 err1:
@@ -1915,8 +1911,7 @@ err1:
 static void __exit arp_tables_fini(void)
 {
 	nf_unregister_sockopt(&arpt_sockopts);
-	xt_unregister_target(&arpt_error_target);
-	xt_unregister_target(&arpt_standard_target);
+	xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
 	unregister_pernet_subsys(&arp_tables_net_ops);
 }
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 607f89f16b76..49b9e4fb5460 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2172,23 +2172,23 @@ static int icmp_checkentry(const struct xt_mtchk_param *par)
 	return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
 }
 
-/* The built-in targets: standard (NULL) and error. */
-static struct xt_target ipt_standard_target __read_mostly = {
-	.name		= IPT_STANDARD_TARGET,
-	.targetsize	= sizeof(int),
-	.family		= NFPROTO_IPV4,
+static struct xt_target ipt_builtin_tg[] __read_mostly = {
+	{
+		.name             = IPT_STANDARD_TARGET,
+		.targetsize       = sizeof(int),
+		.family           = NFPROTO_IPV4,
 #ifdef CONFIG_COMPAT
-	.compatsize	= sizeof(compat_int_t),
-	.compat_from_user = compat_standard_from_user,
-	.compat_to_user	= compat_standard_to_user,
+		.compatsize       = sizeof(compat_int_t),
+		.compat_from_user = compat_standard_from_user,
+		.compat_to_user   = compat_standard_to_user,
 #endif
-};
-
-static struct xt_target ipt_error_target __read_mostly = {
-	.name		= IPT_ERROR_TARGET,
-	.target		= ipt_error,
-	.targetsize	= IPT_FUNCTION_MAXNAMELEN,
-	.family		= NFPROTO_IPV4,
+	},
+	{
+		.name             = IPT_ERROR_TARGET,
+		.target           = ipt_error,
+		.targetsize       = IPT_FUNCTION_MAXNAMELEN,
+		.family           = NFPROTO_IPV4,
+	},
 };
 
 static struct nf_sockopt_ops ipt_sockopts = {
@@ -2208,13 +2208,15 @@ static struct nf_sockopt_ops ipt_sockopts = {
 	.owner		= THIS_MODULE,
 };
 
-static struct xt_match icmp_matchstruct __read_mostly = {
-	.name		= "icmp",
-	.match		= icmp_match,
-	.matchsize	= sizeof(struct ipt_icmp),
-	.checkentry	= icmp_checkentry,
-	.proto		= IPPROTO_ICMP,
-	.family		= NFPROTO_IPV4,
+static struct xt_match ipt_builtin_mt[] __read_mostly = {
+	{
+		.name       = "icmp",
+		.match      = icmp_match,
+		.matchsize  = sizeof(struct ipt_icmp),
+		.checkentry = icmp_checkentry,
+		.proto      = IPPROTO_ICMP,
+		.family     = NFPROTO_IPV4,
+	},
 };
 
 static int __net_init ip_tables_net_init(struct net *net)
@@ -2241,13 +2243,10 @@ static int __init ip_tables_init(void)
 		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	ret = xt_register_target(&ipt_standard_target);
+	ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
 	if (ret < 0)
 		goto err2;
-	ret = xt_register_target(&ipt_error_target);
-	if (ret < 0)
-		goto err3;
-	ret = xt_register_match(&icmp_matchstruct);
+	ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
 	if (ret < 0)
 		goto err4;
 
@@ -2260,11 +2259,9 @@ static int __init ip_tables_init(void)
 	return 0;
 
 err5:
-	xt_unregister_match(&icmp_matchstruct);
+	xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
 err4:
-	xt_unregister_target(&ipt_error_target);
-err3:
-	xt_unregister_target(&ipt_standard_target);
+	xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
 err2:
 	unregister_pernet_subsys(&ip_tables_net_ops);
 err1:
@@ -2275,10 +2272,8 @@ static void __exit ip_tables_fini(void)
 {
 	nf_unregister_sockopt(&ipt_sockopts);
 
-	xt_unregister_match(&icmp_matchstruct);
-	xt_unregister_target(&ipt_error_target);
-	xt_unregister_target(&ipt_standard_target);
-
+	xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
+	xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
 	unregister_pernet_subsys(&ip_tables_net_ops);
 }
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 557fac9689c0..56782336474f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2190,22 +2190,23 @@ static int icmp6_checkentry(const struct xt_mtchk_param *par)
 }
 
 /* The built-in targets: standard (NULL) and error. */
-static struct xt_target ip6t_standard_target __read_mostly = {
-	.name		= IP6T_STANDARD_TARGET,
-	.targetsize	= sizeof(int),
-	.family		= NFPROTO_IPV6,
+static struct xt_target ip6t_builtin_tg[] __read_mostly = {
+	{
+		.name             = IP6T_STANDARD_TARGET,
+		.targetsize       = sizeof(int),
+		.family           = NFPROTO_IPV6,
 #ifdef CONFIG_COMPAT
-	.compatsize	= sizeof(compat_int_t),
-	.compat_from_user = compat_standard_from_user,
-	.compat_to_user	= compat_standard_to_user,
+		.compatsize       = sizeof(compat_int_t),
+		.compat_from_user = compat_standard_from_user,
+		.compat_to_user   = compat_standard_to_user,
 #endif
-};
-
-static struct xt_target ip6t_error_target __read_mostly = {
-	.name		= IP6T_ERROR_TARGET,
-	.target		= ip6t_error,
-	.targetsize	= IP6T_FUNCTION_MAXNAMELEN,
-	.family		= NFPROTO_IPV6,
+	},
+	{
+		.name             = IP6T_ERROR_TARGET,
+		.target           = ip6t_error,
+		.targetsize       = IP6T_FUNCTION_MAXNAMELEN,
+		.family           = NFPROTO_IPV6,
+	},
 };
 
 static struct nf_sockopt_ops ip6t_sockopts = {
@@ -2225,13 +2226,15 @@ static struct nf_sockopt_ops ip6t_sockopts = {
 	.owner		= THIS_MODULE,
 };
 
-static struct xt_match icmp6_matchstruct __read_mostly = {
-	.name		= "icmp6",
-	.match		= icmp6_match,
-	.matchsize	= sizeof(struct ip6t_icmp),
-	.checkentry	= icmp6_checkentry,
-	.proto		= IPPROTO_ICMPV6,
-	.family		= NFPROTO_IPV6,
+static struct xt_match ip6t_builtin_mt[] __read_mostly = {
+	{
+		.name       = "icmp6",
+		.match      = icmp6_match,
+		.matchsize  = sizeof(struct ip6t_icmp),
+		.checkentry = icmp6_checkentry,
+		.proto      = IPPROTO_ICMPV6,
+		.family     = NFPROTO_IPV6,
+	},
 };
 
 static int __net_init ip6_tables_net_init(struct net *net)
@@ -2258,13 +2261,10 @@ static int __init ip6_tables_init(void)
 		goto err1;
 
 	/* Noone else will be downing sem now, so we won't sleep */
-	ret = xt_register_target(&ip6t_standard_target);
+	ret = xt_register_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
 	if (ret < 0)
 		goto err2;
-	ret = xt_register_target(&ip6t_error_target);
-	if (ret < 0)
-		goto err3;
-	ret = xt_register_match(&icmp6_matchstruct);
+	ret = xt_register_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
 	if (ret < 0)
 		goto err4;
 
@@ -2277,11 +2277,9 @@ static int __init ip6_tables_init(void)
 	return 0;
 
 err5:
-	xt_unregister_match(&icmp6_matchstruct);
+	xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
 err4:
-	xt_unregister_target(&ip6t_error_target);
-err3:
-	xt_unregister_target(&ip6t_standard_target);
+	xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
 err2:
 	unregister_pernet_subsys(&ip6_tables_net_ops);
 err1:
@@ -2292,10 +2290,8 @@ static void __exit ip6_tables_fini(void)
 {
 	nf_unregister_sockopt(&ip6t_sockopts);
 
-	xt_unregister_match(&icmp6_matchstruct);
-	xt_unregister_target(&ip6t_error_target);
-	xt_unregister_target(&ip6t_standard_target);
-
+	xt_unregister_matches(ip6t_builtin_mt, ARRAY_SIZE(ip6t_builtin_mt));
+	xt_unregister_targets(ip6t_builtin_tg, ARRAY_SIZE(ip6t_builtin_tg));
 	unregister_pernet_subsys(&ip6_tables_net_ops);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From d3e56c0ad8a2dab7ffd6179b64f00702149e5c9a Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 22 Apr 2010 02:50:10 -0700
Subject: wimax: checking ERR_PTR vs null

stch_skb is allocated with wimax_gnl_re_state_change_alloc().  That
function returns ERR_PTRs on failure and doesn't return NULL.

Signed-off-by: Dan Carpenter <error27@gmail.com>
---
 net/wimax/stack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 1ed65dbdab03..62b1a6662209 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -315,7 +315,7 @@ void __wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
 		BUG();
 	}
 	__wimax_state_set(wimax_dev, new_state);
-	if (stch_skb)
+	if (!IS_ERR(stch_skb))
 		wimax_gnl_re_state_change_send(wimax_dev, stch_skb, header);
 out:
 	d_fnend(3, dev, "(wimax_dev %p new_state %u [old %u]) = void\n",
-- 
cgit v1.2.3-59-g8ed1b


From b8d92c9c141ee3dc9b3537b1f0ffb4a54ea8d9b2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 11 May 2010 12:42:04 +0200
Subject: mac80211: don't process work item with wrong frame

When we process a frame, we currently just match it
to the work struct by the MAC addresses, and not by
the work type. This means that we can end up doing
the work for an association request item when (for
whatever reason) we receive another frame type, for
example a probe response. Processing the wrong type
of frame will lead to completely invalid data being
processed, and will lead to various problems like
thinking the association was successful even if the
AP never sent an assocation response.

Fix this by making each processing function check
that it is invoked for the right work struct type
only and continue processing otherwise (and drop
frames that we didn't expect).

This bug was uncovered during the debugging for
https://bugzilla.kernel.org/show_bug.cgi?id=15862
but doesn't seem to be the cause for any of the
various problems reported there.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/work.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 1e1ea3007b06..b0ba58589ca3 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -32,6 +32,7 @@
 #define IEEE80211_MAX_PROBE_TRIES 5
 
 enum work_action {
+	WORK_ACT_MISMATCH,
 	WORK_ACT_NONE,
 	WORK_ACT_TIMEOUT,
 	WORK_ACT_DONE,
@@ -574,7 +575,7 @@ ieee80211_rx_mgmt_auth(struct ieee80211_work *wk,
 	u16 auth_alg, auth_transaction, status_code;
 
 	if (wk->type != IEEE80211_WORK_AUTH)
-		return WORK_ACT_NONE;
+		return WORK_ACT_MISMATCH;
 
 	if (len < 24 + 6)
 		return WORK_ACT_NONE;
@@ -625,6 +626,9 @@ ieee80211_rx_mgmt_assoc_resp(struct ieee80211_work *wk,
 	struct ieee802_11_elems elems;
 	u8 *pos;
 
+	if (wk->type != IEEE80211_WORK_ASSOC)
+		return WORK_ACT_MISMATCH;
+
 	/*
 	 * AssocResp and ReassocResp have identical structure, so process both
 	 * of them in this function.
@@ -680,6 +684,12 @@ ieee80211_rx_mgmt_probe_resp(struct ieee80211_work *wk,
 
 	ASSERT_WORK_MTX(local);
 
+	if (wk->type != IEEE80211_WORK_DIRECT_PROBE)
+		return WORK_ACT_MISMATCH;
+
+	if (len < 24 + 12)
+		return WORK_ACT_NONE;
+
 	baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt;
 	if (baselen > len)
 		return WORK_ACT_NONE;
@@ -694,7 +704,7 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 	struct ieee80211_rx_status *rx_status;
 	struct ieee80211_mgmt *mgmt;
 	struct ieee80211_work *wk;
-	enum work_action rma = WORK_ACT_NONE;
+	enum work_action rma;
 	u16 fc;
 
 	rx_status = (struct ieee80211_rx_status *) skb->cb;
@@ -741,7 +751,17 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 			break;
 		default:
 			WARN_ON(1);
+			rma = WORK_ACT_NONE;
 		}
+
+		/*
+		 * We've either received an unexpected frame, or we have
+		 * multiple work items and need to match the frame to the
+		 * right one.
+		 */
+		if (rma == WORK_ACT_MISMATCH)
+			continue;
+
 		/*
 		 * We've processed this frame for that work, so it can't
 		 * belong to another work struct.
@@ -751,6 +771,9 @@ static void ieee80211_work_rx_queued_mgmt(struct ieee80211_local *local,
 	}
 
 	switch (rma) {
+	case WORK_ACT_MISMATCH:
+		/* ignore this unmatched frame */
+		break;
 	case WORK_ACT_NONE:
 		break;
 	case WORK_ACT_DONE:
-- 
cgit v1.2.3-59-g8ed1b


From 5ce6e438d5d9ed8ed775cd1e94f92002c8da2bad Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 11 May 2010 16:20:57 +0200
Subject: mac80211: add offload channel switch support

This adds support for offloading the channel switch
operation to devices that support such, typically
by having specific firmware API for it. The reasons
for this could be that the firmware provides better
timing or that regulatory enforcement done by the
device requires special handling of CSAs.

In order to allow drivers to specify the timing to
the device, the new channel_switch callback will
pass through the received frame's mactime, where
available.

Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h      | 39 +++++++++++++++++++++++++++++++
 net/mac80211/driver-ops.h   | 11 +++++++++
 net/mac80211/driver-trace.h | 49 +++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h  |  3 ++-
 net/mac80211/mlme.c         | 56 +++++++++++++++++++++++++++++++++++++++++----
 5 files changed, 153 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 9448a5b1bb15..389e86a54fc4 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -711,6 +711,28 @@ struct ieee80211_conf {
 	enum ieee80211_smps_mode smps_mode;
 };
 
+/**
+ * struct ieee80211_channel_switch - holds the channel switch data
+ *
+ * The information provided in this structure is required for channel switch
+ * operation.
+ *
+ * @timestamp: value in microseconds of the 64-bit Time Synchronization
+ *	Function (TSF) timer when the frame containing the channel switch
+ *	announcement was received. This is simply the rx.mactime parameter
+ *	the driver passed into mac80211.
+ * @block_tx: Indicates whether transmission must be blocked before the
+ *	scheduled channel switch, as indicated by the AP.
+ * @channel: the new channel to switch to
+ * @count: the number of TBTT's until the channel switch event
+ */
+struct ieee80211_channel_switch {
+	u64 timestamp;
+	bool block_tx;
+	struct ieee80211_channel *channel;
+	u8 count;
+};
+
 /**
  * struct ieee80211_vif - per-interface data
  *
@@ -1631,6 +1653,11 @@ enum ieee80211_ampdu_mlme_action {
  * @flush: Flush all pending frames from the hardware queue, making sure
  *	that the hardware queues are empty. If the parameter @drop is set
  *	to %true, pending frames may be dropped. The callback can sleep.
+ *
+ * @channel_switch: Drivers that need (or want) to offload the channel
+ *	switch operation for CSAs received from the AP may implement this
+ *	callback. They must then call ieee80211_chswitch_done() to indicate
+ *	completion of the channel switch.
  */
 struct ieee80211_ops {
 	int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb);
@@ -1694,6 +1721,8 @@ struct ieee80211_ops {
 	int (*testmode_cmd)(struct ieee80211_hw *hw, void *data, int len);
 #endif
 	void (*flush)(struct ieee80211_hw *hw, bool drop);
+	void (*channel_switch)(struct ieee80211_hw *hw,
+			       struct ieee80211_channel_switch *ch_switch);
 };
 
 /**
@@ -2444,6 +2473,16 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif,
 			       enum nl80211_cqm_rssi_threshold_event rssi_event,
 			       gfp_t gfp);
 
+/**
+ * ieee80211_chswitch_done - Complete channel switch process
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @success: make the channel switch successful or not
+ *
+ * Complete the channel switch post-process: set the new operational channel
+ * and wake up the suspended queues.
+ */
+void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success);
+
 /* Rate control API */
 
 /**
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 997008e236ff..5662bb5190c3 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -373,4 +373,15 @@ static inline void drv_flush(struct ieee80211_local *local, bool drop)
 	if (local->ops->flush)
 		local->ops->flush(&local->hw, drop);
 }
+
+static inline void drv_channel_switch(struct ieee80211_local *local,
+				     struct ieee80211_channel_switch *ch_switch)
+{
+	might_sleep();
+
+	local->ops->channel_switch(&local->hw, ch_switch);
+
+	trace_drv_channel_switch(local, ch_switch);
+}
+
 #endif /* __MAC80211_DRIVER_OPS */
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index ce734b58d07a..6a9b2342a9c2 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -774,6 +774,34 @@ TRACE_EVENT(drv_flush,
 	)
 );
 
+TRACE_EVENT(drv_channel_switch,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_channel_switch *ch_switch),
+
+	TP_ARGS(local, ch_switch),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u64, timestamp)
+		__field(bool, block_tx)
+		__field(u16, freq)
+		__field(u8, count)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->timestamp = ch_switch->timestamp;
+		__entry->block_tx = ch_switch->block_tx;
+		__entry->freq = ch_switch->channel->center_freq;
+		__entry->count = ch_switch->count;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " new freq:%u count:%d",
+		LOCAL_PR_ARG, __entry->freq, __entry->count
+	)
+);
+
 /*
  * Tracing for API calls that drivers call.
  */
@@ -992,6 +1020,27 @@ TRACE_EVENT(api_sta_block_awake,
 	)
 );
 
+TRACE_EVENT(api_chswitch_done,
+	TP_PROTO(struct ieee80211_sub_if_data *sdata, bool success),
+
+	TP_ARGS(sdata, success),
+
+	TP_STRUCT__entry(
+		VIF_ENTRY
+		__field(bool, success)
+	),
+
+	TP_fast_assign(
+		VIF_ASSIGN;
+		__entry->success = success;
+	),
+
+	TP_printk(
+		VIF_PR_FMT " success=%d",
+		VIF_PR_ARG, __entry->success
+	)
+);
+
 /*
  * Tracing for internal functions
  * (which may also be called in response to driver calls)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 69e7f4131f46..1c8e24706685 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -999,7 +999,8 @@ int ieee80211_max_network_latency(struct notifier_block *nb,
 				  unsigned long data, void *dummy);
 void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_channel_sw_ie *sw_elem,
-				      struct ieee80211_bss *bss);
+				      struct ieee80211_bss *bss,
+				      u64 timestamp);
 void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata);
 void ieee80211_sta_restart(struct ieee80211_sub_if_data *sdata);
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7bfb0ebaaf00..6b74489fb9c6 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -341,7 +341,11 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 		goto out;
 
 	sdata->local->oper_channel = sdata->local->csa_channel;
-	ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL);
+	if (!sdata->local->ops->channel_switch) {
+		/* call "hw_config" only if doing sw channel switch */
+		ieee80211_hw_config(sdata->local,
+			IEEE80211_CONF_CHANGE_CHANNEL);
+	}
 
 	/* XXX: shouldn't really modify cfg80211-owned data! */
 	ifmgd->associated->channel = sdata->local->oper_channel;
@@ -353,6 +357,29 @@ static void ieee80211_chswitch_work(struct work_struct *work)
 	mutex_unlock(&ifmgd->mtx);
 }
 
+void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_managed *ifmgd;
+
+	sdata = vif_to_sdata(vif);
+	ifmgd = &sdata->u.mgd;
+
+	trace_api_chswitch_done(sdata, success);
+	if (!success) {
+		/*
+		 * If the channel switch was not successful, stay
+		 * around on the old channel. We currently lack
+		 * good handling of this situation, possibly we
+		 * should just drop the association.
+		 */
+		sdata->local->csa_channel = sdata->local->oper_channel;
+	}
+
+	ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
+}
+EXPORT_SYMBOL(ieee80211_chswitch_done);
+
 static void ieee80211_chswitch_timer(unsigned long data)
 {
 	struct ieee80211_sub_if_data *sdata =
@@ -369,7 +396,8 @@ static void ieee80211_chswitch_timer(unsigned long data)
 
 void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 				      struct ieee80211_channel_sw_ie *sw_elem,
-				      struct ieee80211_bss *bss)
+				      struct ieee80211_bss *bss,
+				      u64 timestamp)
 {
 	struct cfg80211_bss *cbss =
 		container_of((void *)bss, struct cfg80211_bss, priv);
@@ -397,6 +425,24 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 
 	sdata->local->csa_channel = new_ch;
 
+	if (sdata->local->ops->channel_switch) {
+		/* use driver's channel switch callback */
+		struct ieee80211_channel_switch ch_switch;
+		memset(&ch_switch, 0, sizeof(ch_switch));
+		ch_switch.timestamp = timestamp;
+		if (sw_elem->mode) {
+			ch_switch.block_tx = true;
+			ieee80211_stop_queues_by_reason(&sdata->local->hw,
+					IEEE80211_QUEUE_STOP_REASON_CSA);
+		}
+		ch_switch.channel = new_ch;
+		ch_switch.count = sw_elem->count;
+		ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
+		drv_channel_switch(sdata->local, &ch_switch);
+		return;
+	}
+
+	/* channel switch handled in software */
 	if (sw_elem->count <= 1) {
 		ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
 	} else {
@@ -1316,7 +1362,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 							ETH_ALEN) == 0)) {
 		struct ieee80211_channel_sw_ie *sw_elem =
 			(struct ieee80211_channel_sw_ie *)elems->ch_switch_elem;
-		ieee80211_sta_process_chanswitch(sdata, sw_elem, bss);
+		ieee80211_sta_process_chanswitch(sdata, sw_elem,
+						 bss, rx_status->mactime);
 	}
 }
 
@@ -1648,7 +1695,8 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata,
 
 			ieee80211_sta_process_chanswitch(sdata,
 					&mgmt->u.action.u.chan_switch.sw_elem,
-					(void *)ifmgd->associated->priv);
+					(void *)ifmgd->associated->priv,
+					rx_status->mactime);
 			break;
 		}
 		mutex_unlock(&ifmgd->mtx);
-- 
cgit v1.2.3-59-g8ed1b


From 9feaddc77b2a2ee460f39d1420f9675db972659e Mon Sep 17 00:00:00 2001
From: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Date: Wed, 5 May 2010 20:34:02 -0700
Subject: mac80211: check channel switch mode for future frames transmit

Check the mode in channel switch ie for either 0 or 1 on transmission.
A channel switch mode set to 1 means that the STA in a BSS to which the
frame containing the element is addressed shall transmit no further
frames 	within the BSS until the scheduled channel switch.

Signed-off-by: Wey-Yi Guy <wey-yi.w.guy@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/mlme.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 6b74489fb9c6..a444d03f6774 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -446,7 +446,8 @@ void ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	if (sw_elem->count <= 1) {
 		ieee80211_queue_work(&sdata->local->hw, &ifmgd->chswitch_work);
 	} else {
-		ieee80211_stop_queues_by_reason(&sdata->local->hw,
+		if (sw_elem->mode)
+			ieee80211_stop_queues_by_reason(&sdata->local->hw,
 					IEEE80211_QUEUE_STOP_REASON_CSA);
 		ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED;
 		mod_timer(&ifmgd->chswitch_timer,
-- 
cgit v1.2.3-59-g8ed1b


From 058897a4e93a6fc6d331e2ef591b2d6571431265 Mon Sep 17 00:00:00 2001
From: Abhijeet Kolekar <abhijeet.kolekar@intel.com>
Date: Tue, 11 May 2010 11:22:11 -0700
Subject: mac80211: fix paged defragmentation

Paged RX skb patch broke the defragmentation. We need to read hdr again
after linearization.

It fixes following bug
http://bugzilla.intellinuxwireless.org/show_bug.cgi?id=2194

Signed-off-by: Zhu, Yi <yi.zhu@intel.com>
Signed-off-by: Abhijeet Kolekar <abhijeet.kolekar@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 net/mac80211/rx.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e4f325f68fd3..8fa99554f4e2 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1252,6 +1252,12 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx)
 	if (skb_linearize(rx->skb))
 		return RX_DROP_UNUSABLE;
 
+	/*
+	 *  skb_linearize() might change the skb->data and
+	 *  previously cached variables (in this case, hdr) need to
+	 *  be refreshed with the new data.
+	 */
+	hdr = (struct ieee80211_hdr *)rx->skb->data;
 	seq = (sc & IEEE80211_SCTL_SEQ) >> 4;
 
 	if (frag == 0) {
-- 
cgit v1.2.3-59-g8ed1b


From 9b7ce2b76265b3bf133aa1919e5022302981dff6 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Wed, 12 May 2010 10:11:35 +0000
Subject: netfilter: xtables: add missing depends for xt_TEE

Aviod these link-time errors when IPV6=m, XT_TEE=y:

net/built-in.o: In function `tee_tg_route6':
xt_TEE.c:(.text+0x45ca5): undefined reference to `ip6_route_output'
net/built-in.o: In function `tee_tg6':
xt_TEE.c:(.text+0x45d79): undefined reference to `ip6_local_out'

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 673a6c8f0e95..e223f47b8bae 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -505,6 +505,7 @@ config NETFILTER_XT_TARGET_RATEEST
 config NETFILTER_XT_TARGET_TEE
 	tristate '"TEE" - packet cloning to alternate destiantion'
 	depends on NETFILTER_ADVANCED
+	depends on (IPV6 || IPV6=n)
 	---help---
 	This option adds a "TEE" target with which a packet can be cloned and
 	this clone be rerouted to another nexthop.
-- 
cgit v1.2.3-59-g8ed1b


From df4ef33716232077564024baf0e5f2c74a295dfd Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:03 +0000
Subject: tipc: Eliminate obsolete port's "congested_link" field

Eliminate a field of the TIPC port structure that is populated,
but never referenced.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 2 --
 net/tipc/port.c | 1 -
 net/tipc/port.h | 2 --
 3 files changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index c76e82e5f982..0b86f6aef3d8 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -561,7 +561,6 @@ static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
 			goto exit;
 		if (!list_empty(&p_ptr->wait_list))
 			goto exit;
-		p_ptr->congested_link = l_ptr;
 		p_ptr->publ.congested = 1;
 		p_ptr->waiting_pkts = 1 + ((sz - 1) / link_max_pkt(l_ptr));
 		list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
@@ -592,7 +591,6 @@ void tipc_link_wakeup_ports(struct link *l_ptr, int all)
 		if (win <= 0)
 			break;
 		list_del_init(&p_ptr->wait_list);
-		p_ptr->congested_link = NULL;
 		spin_lock_bh(p_ptr->publ.lock);
 		p_ptr->publ.congested = 0;
 		p_ptr->wakeup(&p_ptr->publ);
diff --git a/net/tipc/port.c b/net/tipc/port.c
index e70d27ea6578..c703ecbe09d7 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -247,7 +247,6 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
 	p_ptr->sent = 1;
 	INIT_LIST_HEAD(&p_ptr->wait_list);
 	INIT_LIST_HEAD(&p_ptr->subscription.nodesub_list);
-	p_ptr->congested_link = NULL;
 	p_ptr->dispatcher = dispatcher;
 	p_ptr->wakeup = wakeup;
 	p_ptr->user_port = NULL;
diff --git a/net/tipc/port.h b/net/tipc/port.h
index ff31ee4a1dc3..8d1652aab298 100644
--- a/net/tipc/port.h
+++ b/net/tipc/port.h
@@ -75,7 +75,6 @@ struct user_port {
  * @wakeup: ptr to routine to call when port is no longer congested
  * @user_port: ptr to user port associated with port (if any)
  * @wait_list: adjacent ports in list of ports waiting on link congestion
- * @congested_link: ptr to congested link port is waiting on
  * @waiting_pkts:
  * @sent:
  * @acked:
@@ -95,7 +94,6 @@ struct port {
 	void (*wakeup)(struct tipc_port *);
 	struct user_port *user_port;
 	struct list_head wait_list;
-	struct link *congested_link;
 	u32 waiting_pkts;
 	u32 sent;
 	u32 acked;
-- 
cgit v1.2.3-59-g8ed1b


From b82834e66aabb6e26c2b792a46d44bab346c46fb Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:04 +0000
Subject: tipc: Eliminate unused argument in print statement

Eliminate an argument in a print statement that has no corresponding
format specification.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 0b86f6aef3d8..c95038f42652 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -3328,9 +3328,7 @@ static void link_print(struct link *l_ptr, struct print_buf *buf,
 		if (l_ptr->next_out)
 			tipc_printf(buf, "%u..",
 				    msg_seqno(buf_msg(l_ptr->next_out)));
-		tipc_printf(buf, "%u]",
-			    msg_seqno(buf_msg
-				      (l_ptr->last_out)), l_ptr->out_queue_size);
+		tipc_printf(buf, "%u]", msg_seqno(buf_msg(l_ptr->last_out)));
 		if ((mod(msg_seqno(buf_msg(l_ptr->last_out)) -
 			 msg_seqno(buf_msg(l_ptr->first_out)))
 		     != (l_ptr->out_queue_size - 1)) ||
-- 
cgit v1.2.3-59-g8ed1b


From 289464e4fc2ebdef20be5f6b58414136f75107e2 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:05 +0000
Subject: tipc: Prune unused data structures from configuration service

Eliminate some unused data structures in the TIPC
configuration service that relate to the handling of link
subscriptions, which were not supported when TIPC 1.5 was
introduced.  If and when support for link subscriptions is
offered in TIPC, these elements may need to be re-introduced.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/config.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/tipc/config.c b/net/tipc/config.c
index ca3544d030c7..7370241412cb 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -56,9 +56,6 @@ struct subscr_data {
 struct manager {
 	u32 user_ref;
 	u32 port_ref;
-	u32 subscr_ref;
-	u32 link_subscriptions;
-	struct list_head link_subscribers;
 };
 
 static struct manager mng = { 0};
@@ -70,12 +67,6 @@ static int req_tlv_space;		/* request message TLV area size */
 static int rep_headroom;		/* reply message headroom to use */
 
 
-void tipc_cfg_link_event(u32 addr, char *name, int up)
-{
-	/* TIPC DOESN'T HANDLE LINK EVENT SUBSCRIPTIONS AT THE MOMENT */
-}
-
-
 struct sk_buff *tipc_cfg_reply_alloc(int payload_size)
 {
 	struct sk_buff *buf;
@@ -130,12 +121,24 @@ struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string)
 }
 
 
-
-
 #if 0
 
 /* Now obsolete code for handling commands not yet implemented the new way */
 
+/*
+ * Some of this code assumed that the manager structure contains two added
+ * fields:
+ *	u32 link_subscriptions;
+ *	struct list_head link_subscribers;
+ * which are currently not present.  These fields may need to be re-introduced
+ * if and when support for link subscriptions is added.
+ */
+
+void tipc_cfg_link_event(u32 addr, char *name, int up)
+{
+	/* TIPC DOESN'T HANDLE LINK EVENT SUBSCRIPTIONS AT THE MOMENT */
+}
+
 int tipc_cfg_cmd(const struct tipc_cmd_msg * msg,
 		 char *data,
 		 u32 sz,
@@ -667,9 +670,6 @@ int tipc_cfg_init(void)
 	struct tipc_name_seq seq;
 	int res;
 
-	memset(&mng, 0, sizeof(mng));
-	INIT_LIST_HEAD(&mng.link_subscribers);
-
 	res = tipc_attach(&mng.user_ref, NULL, NULL);
 	if (res)
 		goto failed;
-- 
cgit v1.2.3-59-g8ed1b


From 9ccc2eb4e12a39bd8430952b76c56c6267018500 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:06 +0000
Subject: tipc: Eliminate unnecessary initialization in native API send
 routines

Eliminate a couple of instances where TIPC's native API send routines
were doing pointless initialization of local variables.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/port.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/tipc/port.c b/net/tipc/port.c
index c703ecbe09d7..7641db667e0b 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -1452,7 +1452,7 @@ int tipc_forward2name(u32 ref,
 	struct port *p_ptr;
 	struct tipc_msg *msg;
 	u32 destnode = domain;
-	u32 destport = 0;
+	u32 destport;
 	int res;
 
 	p_ptr = tipc_port_deref(ref);
@@ -1524,7 +1524,7 @@ int tipc_forward_buf2name(u32 ref,
 	struct port *p_ptr;
 	struct tipc_msg *msg;
 	u32 destnode = domain;
-	u32 destport = 0;
+	u32 destport;
 	int res;
 
 	p_ptr = (struct port *)tipc_ref_deref(ref);
-- 
cgit v1.2.3-59-g8ed1b


From 3aec9cc936217a30dbb45a9b6808a39571674e66 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:07 +0000
Subject: tipc: Rename "multicast-link" to "broadcast-link"

Make a cosmetic change to the name displayed for the broadcast link,
to better reflect its true nature. Since TIPC utilizes this link to
distribute name table information, in addition to multicast messages
sent by user applications, the prior name "multicast-link" is
no longer appropriate.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 90a051912c03..a18f26deb659 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -119,7 +119,7 @@ static struct bclink *bclink = NULL;
 static struct link *bcl = NULL;
 static DEFINE_SPINLOCK(bc_lock);
 
-const char tipc_bclink_name[] = "multicast-link";
+const char tipc_bclink_name[] = "broadcast-link";
 
 
 static u32 buf_seqno(struct sk_buff *buf)
-- 
cgit v1.2.3-59-g8ed1b


From 107e7be628821dcb78c43adce0331e8ddb40eabd Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:08 +0000
Subject: tipc: Add support for "-s" configuration option

Provide initial support for displaying overall TIPC status/statistics
information at runtime.  Currently, only version info for the TIPC
kernel module is displayed.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |  1 +
 net/tipc/config.c           | 40 +++++++++++++++++++++++++++++++++++++++-
 net/tipc/core.c             |  2 --
 net/tipc/core.h             |  3 +++
 4 files changed, 43 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index 2bc6fa4adeb5..9cde86c32412 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -74,6 +74,7 @@
 #define  TIPC_CMD_SHOW_NAME_TABLE   0x0005    /* tx name_tbl_query, rx ultra_string */
 #define  TIPC_CMD_SHOW_PORTS        0x0006    /* tx none, rx ultra_string */
 #define  TIPC_CMD_SHOW_LINK_STATS   0x000B    /* tx link_name, rx ultra_string */
+#define  TIPC_CMD_SHOW_STATS        0x000F    /* tx unsigned, rx ultra_string */
 
 #if 0
 #define  TIPC_CMD_SHOW_PORT_STATS   0x0008    /* tx port_ref, rx ultra_string */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 7370241412cb..961d1b097146 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -246,13 +246,48 @@ static void cfg_cmd_event(struct tipc_cmd_msg *msg,
 	default:
 		rv = tipc_cfg_cmd(msg, data, sz, (u32 *)&msg_sect[1].iov_len, orig);
 	}
-	exit:
+exit:
 	rmsg.result_len = htonl(msg_sect[1].iov_len);
 	rmsg.retval = htonl(rv);
 	tipc_cfg_respond(msg_sect, 2u, orig);
 }
 #endif
 
+#define MAX_STATS_INFO 2000
+
+static struct sk_buff *tipc_show_stats(void)
+{
+	struct sk_buff *buf;
+	struct tlv_desc *rep_tlv;
+	struct print_buf pb;
+	int str_len;
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	value = ntohl(*(u32 *)TLV_DATA(req_tlv_area));
+	if (value != 0)
+		return tipc_cfg_reply_error_string("unsupported argument");
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_STATS_INFO));
+	if (buf == NULL)
+		return NULL;
+
+	rep_tlv = (struct tlv_desc *)buf->data;
+	tipc_printbuf_init(&pb, (char *)TLV_DATA(rep_tlv), MAX_STATS_INFO);
+
+	tipc_printf(&pb, "TIPC version " TIPC_MOD_VER "\n");
+
+	/* Use additional tipc_printf()'s to return more info ... */
+
+	str_len = tipc_printbuf_validate(&pb);
+	skb_put(buf, TLV_SPACE(str_len));
+	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+
+	return buf;
+}
+
 static struct sk_buff *cfg_enable_bearer(void)
 {
 	struct tipc_bearer_config *args;
@@ -536,6 +571,9 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_DUMP_LOG:
 		rep_tlv_buf = tipc_log_dump();
 		break;
+	case TIPC_CMD_SHOW_STATS:
+		rep_tlv_buf = tipc_show_stats();
+		break;
 	case TIPC_CMD_SET_LINK_TOL:
 	case TIPC_CMD_SET_LINK_PRI:
 	case TIPC_CMD_SET_LINK_WINDOW:
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 4e84c8431f32..b47d1842a970 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -49,8 +49,6 @@
 #include "config.h"
 
 
-#define TIPC_MOD_VER "2.0.0"
-
 #ifndef CONFIG_TIPC_ZONES
 #define CONFIG_TIPC_ZONES 3
 #endif
diff --git a/net/tipc/core.h b/net/tipc/core.h
index c58a1d16563a..1e149f55f3e2 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -59,6 +59,9 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
+
+#define TIPC_MOD_VER "2.0.0"
+
 /*
  * TIPC sanity test macros
  */
-- 
cgit v1.2.3-59-g8ed1b


From 15e979da7c9ddddd55d2eb81e962dbb2aac51ad1 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:10 +0000
Subject: tipc: remove abstraction for link_max_pkt

This is just a straight return of a field; there is no
value in the abstraction of hiding it behind a function.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

(limited to 'net')

diff --git a/net/tipc/link.c b/net/tipc/link.c
index c95038f42652..441b26a57850 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -232,11 +232,6 @@ static int link_congested(struct link *l_ptr)
 	return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]);
 }
 
-static u32 link_max_pkt(struct link *l_ptr)
-{
-	return l_ptr->max_pkt;
-}
-
 static void link_init_max_pkt(struct link *l_ptr)
 {
 	u32 max_pkt;
@@ -562,7 +557,7 @@ static int link_schedule_port(struct link *l_ptr, u32 origport, u32 sz)
 		if (!list_empty(&p_ptr->wait_list))
 			goto exit;
 		p_ptr->publ.congested = 1;
-		p_ptr->waiting_pkts = 1 + ((sz - 1) / link_max_pkt(l_ptr));
+		p_ptr->waiting_pkts = 1 + ((sz - 1) / l_ptr->max_pkt);
 		list_add_tail(&p_ptr->wait_list, &l_ptr->waiting_ports);
 		l_ptr->stats.link_congs++;
 exit:
@@ -1015,7 +1010,7 @@ static int link_bundle_buf(struct link *l_ptr,
 		return 0;
 	if (skb_tailroom(bundler) < (pad + size))
 		return 0;
-	if (link_max_pkt(l_ptr) < (to_pos + size))
+	if (l_ptr->max_pkt < (to_pos + size))
 		return 0;
 
 	skb_put(bundler, pad + size);
@@ -1062,7 +1057,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 	u32 queue_size = l_ptr->out_queue_size;
 	u32 imp = msg_tot_importance(msg);
 	u32 queue_limit = l_ptr->queue_limit[imp];
-	u32 max_packet = link_max_pkt(l_ptr);
+	u32 max_packet = l_ptr->max_pkt;
 
 	msg_set_prevnode(msg, tipc_own_addr);	/* If routed message */
 
@@ -1193,7 +1188,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
 	int res = msg_data_sz(msg);
 
 	if (likely(!link_congested(l_ptr))) {
-		if (likely(msg_size(msg) <= link_max_pkt(l_ptr))) {
+		if (likely(msg_size(msg) <= l_ptr->max_pkt)) {
 			if (likely(list_empty(&l_ptr->b_ptr->cong_links))) {
 				link_add_to_outqueue(l_ptr, buf, msg);
 				if (likely(tipc_bearer_send(l_ptr->b_ptr, buf,
@@ -1210,7 +1205,7 @@ static int link_send_buf_fast(struct link *l_ptr, struct sk_buff *buf,
 			}
 		}
 		else
-			*used_max_pkt = link_max_pkt(l_ptr);
+			*used_max_pkt = l_ptr->max_pkt;
 	}
 	return tipc_link_send_buf(l_ptr, buf);  /* All other cases */
 }
@@ -1317,7 +1312,7 @@ exit:
 			 * then re-try fast path or fragment the message
 			 */
 
-			sender->publ.max_pkt = link_max_pkt(l_ptr);
+			sender->publ.max_pkt = l_ptr->max_pkt;
 			tipc_node_unlock(node);
 			read_unlock_bh(&tipc_net_lock);
 
@@ -1480,8 +1475,8 @@ error:
 			tipc_node_unlock(node);
 			goto reject;
 		}
-		if (link_max_pkt(l_ptr) < max_pkt) {
-			sender->publ.max_pkt = link_max_pkt(l_ptr);
+		if (l_ptr->max_pkt < max_pkt) {
+			sender->publ.max_pkt = l_ptr->max_pkt;
 			tipc_node_unlock(node);
 			for (; buf_chain; buf_chain = buf) {
 				buf = buf_chain->next;
@@ -2679,7 +2674,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
 	u32 dsz = msg_data_sz(inmsg);
 	unchar *crs = buf->data;
 	u32 rest = insize;
-	u32 pack_sz = link_max_pkt(l_ptr);
+	u32 pack_sz = l_ptr->max_pkt;
 	u32 fragm_sz = pack_sz - INT_H_SIZE;
 	u32 fragm_no = 1;
 	u32 destaddr;
@@ -3125,7 +3120,7 @@ static int tipc_link_stats(const char *name, char *buf, const u32 buf_size)
 	tipc_printf(&pb, "Link <%s>\n"
 			 "  %s  MTU:%u  Priority:%u  Tolerance:%u ms"
 			 "  Window:%u packets\n",
-		    l_ptr->name, status, link_max_pkt(l_ptr),
+		    l_ptr->name, status, l_ptr->max_pkt,
 		    l_ptr->priority, l_ptr->tolerance, l_ptr->queue_limit[0]);
 	tipc_printf(&pb, "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
 		    l_ptr->next_in_no - l_ptr->stats.recv_info,
@@ -3270,7 +3265,7 @@ u32 tipc_link_get_max_pkt(u32 dest, u32 selector)
 		tipc_node_lock(n_ptr);
 		l_ptr = n_ptr->active_links[selector & 1];
 		if (l_ptr)
-			res = link_max_pkt(l_ptr);
+			res = l_ptr->max_pkt;
 		tipc_node_unlock(n_ptr);
 	}
 	read_unlock_bh(&tipc_net_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 01fee256a675f6492fc6945bbb9b59640d8705d4 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:11 +0000
Subject: tipc: Relocate trivial link status functions to header file

Rather than live in link.c where they can only be used in that file alone,
these helper routines are better served by being in link.h

Relocated are the following:

	link_working_working
	link_working_unknown
	link_reset_unknown
	link_reset_reset
	link_blocked
	link_congested

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/discover.c |  2 +-
 net/tipc/link.c     | 30 ------------------------------
 net/tipc/link.h     | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 36 insertions(+), 31 deletions(-)

(limited to 'net')

diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index 74b7d1e28aec..ce1390a0cd00 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -224,7 +224,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 			memcpy(addr, &media_addr, sizeof(*addr));
 			tipc_link_reset(link);
 		}
-		link_fully_up = (link->state == WORKING_WORKING);
+		link_fully_up = link_working_working(link);
 		spin_unlock_bh(&n_ptr->lock);
 		if ((type == DSC_RESP_MSG) || link_fully_up)
 			return;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 441b26a57850..e8320bf78d5a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -202,36 +202,6 @@ static unsigned int align(unsigned int i)
 	return (i + 3) & ~3u;
 }
 
-static int link_working_working(struct link *l_ptr)
-{
-	return (l_ptr->state == WORKING_WORKING);
-}
-
-static int link_working_unknown(struct link *l_ptr)
-{
-	return (l_ptr->state == WORKING_UNKNOWN);
-}
-
-static int link_reset_unknown(struct link *l_ptr)
-{
-	return (l_ptr->state == RESET_UNKNOWN);
-}
-
-static int link_reset_reset(struct link *l_ptr)
-{
-	return (l_ptr->state == RESET_RESET);
-}
-
-static int link_blocked(struct link *l_ptr)
-{
-	return (l_ptr->exp_msg_count || l_ptr->blocked);
-}
-
-static int link_congested(struct link *l_ptr)
-{
-	return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]);
-}
-
 static void link_init_max_pkt(struct link *l_ptr)
 {
 	u32 max_pkt;
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 6a51e38ad25c..2e5385c47d30 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -292,4 +292,39 @@ static inline u32 lesser(u32 left, u32 right)
 	return less_eq(left, right) ? left : right;
 }
 
+
+/*
+ * Link status checking routines
+ */
+
+static inline int link_working_working(struct link *l_ptr)
+{
+	return (l_ptr->state == WORKING_WORKING);
+}
+
+static inline int link_working_unknown(struct link *l_ptr)
+{
+	return (l_ptr->state == WORKING_UNKNOWN);
+}
+
+static inline int link_reset_unknown(struct link *l_ptr)
+{
+	return (l_ptr->state == RESET_UNKNOWN);
+}
+
+static inline int link_reset_reset(struct link *l_ptr)
+{
+	return (l_ptr->state == RESET_RESET);
+}
+
+static inline int link_blocked(struct link *l_ptr)
+{
+	return (l_ptr->exp_msg_count || l_ptr->blocked);
+}
+
+static inline int link_congested(struct link *l_ptr)
+{
+	return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]);
+}
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From c68ca7b72017f8f52e7aed0d2a6ecfaede133b6b Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:12 +0000
Subject: tipc: add tipc_ prefix to fcns targeted for un-inlining

These functions have enough code in them such that they
seem like sensible targets for un-inlining.  Prior to doing
that, this adds the tipc_ prefix to the functions, so that
in the event of a panic dump or similar, the subsystem from
which the functions come from is immediately clear.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.h       |  8 ++++----
 net/tipc/bcast.c      |  2 +-
 net/tipc/bcast.h      | 12 ++++++------
 net/tipc/bearer.c     |  4 ++--
 net/tipc/cluster.c    |  2 +-
 net/tipc/discover.c   |  6 +++---
 net/tipc/link.c       | 20 ++++++++++----------
 net/tipc/msg.h        | 14 +++++++-------
 net/tipc/name_distr.c |  2 +-
 net/tipc/name_table.c |  2 +-
 net/tipc/net.c        |  4 ++--
 net/tipc/node.c       | 12 ++++++------
 net/tipc/port.c       | 22 +++++++++++-----------
 13 files changed, 55 insertions(+), 55 deletions(-)

(limited to 'net')

diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 3ba67e6ce03e..4d4aee0e4232 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -67,7 +67,7 @@ static inline int may_route(u32 addr)
 	return(addr ^ tipc_own_addr) >> 11;
 }
 
-static inline int in_scope(u32 domain, u32 addr)
+static inline int tipc_in_scope(u32 domain, u32 addr)
 {
 	if (!domain || (domain == addr))
 		return 1;
@@ -79,10 +79,10 @@ static inline int in_scope(u32 domain, u32 addr)
 }
 
 /**
- * addr_scope - convert message lookup domain to equivalent 2-bit scope value
+ * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
  */
 
-static inline int addr_scope(u32 domain)
+static inline int tipc_addr_scope(u32 domain)
 {
 	if (likely(!domain))
 		return TIPC_ZONE_SCOPE;
@@ -110,7 +110,7 @@ static inline int addr_domain(int sc)
 	return tipc_addr(tipc_zone(tipc_own_addr), 0, 0);
 }
 
-static inline char *addr_string_fill(char *string, u32 addr)
+static inline char *tipc_addr_string_fill(char *string, u32 addr)
 {
 	snprintf(string, 16, "<%u.%u.%u>",
 		 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a18f26deb659..a8f22e78c3f3 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -275,7 +275,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr)
 	buf = buf_acquire(INT_H_SIZE);
 	if (buf) {
 		msg = buf_msg(buf);
-		msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
+		tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG,
 			 INT_H_SIZE, n_ptr->addr);
 		msg_set_mc_netid(msg, tipc_net_id);
 		msg_set_bcast_ack(msg, mod(n_ptr->bclink.last_in));
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 4c1771e95c99..2b1c4a755dfa 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -74,7 +74,7 @@ extern const char tipc_bclink_name[];
 
 
 /**
- * nmap_add - add a node to a node map
+ * tipc_nmap_add - add a node to a node map
  */
 
 static inline void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
@@ -90,7 +90,7 @@ static inline void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
 }
 
 /**
- * nmap_remove - remove a node from a node map
+ * tipc_nmap_remove - remove a node from a node map
  */
 
 static inline void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
@@ -106,7 +106,7 @@ static inline void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
 }
 
 /**
- * nmap_equal - test for equality of node maps
+ * tipc_nmap_equal - test for equality of node maps
  */
 
 static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b)
@@ -115,7 +115,7 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
 }
 
 /**
- * nmap_diff - find differences between node maps
+ * tipc_nmap_diff - find differences between node maps
  * @nm_a: input node map A
  * @nm_b: input node map B
  * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
@@ -143,7 +143,7 @@ static inline void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_m
 }
 
 /**
- * port_list_add - add a port to a port list, ensuring no duplicates
+ * tipc_port_list_add - add a port to a port list, ensuring no duplicates
  */
 
 static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
@@ -176,7 +176,7 @@ static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
 }
 
 /**
- * port_list_free - free dynamically created entries in port_list chain
+ * tipc_port_list_free - free dynamically created entries in port_list chain
  *
  * Note: First item is on stack, so it doesn't need to be released
  */
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 78091375ca12..ccec12f0ccc9 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -493,7 +493,7 @@ int tipc_enable_bearer(const char *name, u32 bcast_scope, u32 priority)
 		return -EINVAL;
 	}
 	if (!tipc_addr_domain_valid(bcast_scope) ||
-	    !in_scope(bcast_scope, tipc_own_addr)) {
+	    !tipc_in_scope(bcast_scope, tipc_own_addr)) {
 		warn("Bearer <%s> rejected, illegal broadcast scope\n", name);
 		return -EINVAL;
 	}
@@ -571,7 +571,7 @@ restart:
 	spin_lock_init(&b_ptr->publ.lock);
 	write_unlock_bh(&tipc_net_lock);
 	info("Enabled bearer <%s>, discovery domain %s, priority %u\n",
-	     name, addr_string_fill(addr_string, bcast_scope), priority);
+	     name, tipc_addr_string_fill(addr_string, bcast_scope), priority);
 	return 0;
 failed:
 	write_unlock_bh(&tipc_net_lock);
diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c
index a7eac00cd363..e68f705381bc 100644
--- a/net/tipc/cluster.c
+++ b/net/tipc/cluster.c
@@ -238,7 +238,7 @@ static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest)
 	if (buf) {
 		msg = buf_msg(buf);
 		memset((char *)msg, 0, size);
-		msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest);
+		tipc_msg_init(msg, ROUTE_DISTRIBUTOR, 0, INT_H_SIZE, dest);
 	}
 	return buf;
 }
diff --git a/net/tipc/discover.c b/net/tipc/discover.c
index ce1390a0cd00..fc1fcf5e6b53 100644
--- a/net/tipc/discover.c
+++ b/net/tipc/discover.c
@@ -120,7 +120,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type,
 
 	if (buf) {
 		msg = buf_msg(buf);
-		msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain);
+		tipc_msg_init(msg, LINK_CONFIG, type, DSC_H_SIZE, dest_domain);
 		msg_set_non_seq(msg, 1);
 		msg_set_req_links(msg, req_links);
 		msg_set_dest_domain(msg, dest_domain);
@@ -144,7 +144,7 @@ static void disc_dupl_alert(struct bearer *b_ptr, u32 node_addr,
 	char media_addr_str[64];
 	struct print_buf pb;
 
-	addr_string_fill(node_addr_str, node_addr);
+	tipc_addr_string_fill(node_addr_str, node_addr);
 	tipc_printbuf_init(&pb, media_addr_str, sizeof(media_addr_str));
 	tipc_media_addr_printf(&pb, media_addr);
 	tipc_printbuf_validate(&pb);
@@ -183,7 +183,7 @@ void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr)
 			disc_dupl_alert(b_ptr, tipc_own_addr, &media_addr);
 		return;
 	}
-	if (!in_scope(dest, tipc_own_addr))
+	if (!tipc_in_scope(dest, tipc_own_addr))
 		return;
 	if (is_slave(tipc_own_addr) && is_slave(orig))
 		return;
diff --git a/net/tipc/link.c b/net/tipc/link.c
index e8320bf78d5a..a3616b99529b 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -433,7 +433,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer,
 
 	l_ptr->pmsg = (struct tipc_msg *)&l_ptr->proto_msg;
 	msg = l_ptr->pmsg;
-	msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr);
+	tipc_msg_init(msg, LINK_PROTOCOL, RESET_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_size(msg, sizeof(l_ptr->proto_msg));
 	msg_set_session(msg, (tipc_random & 0xffff));
 	msg_set_bearer_id(msg, b_ptr->identity);
@@ -1025,7 +1025,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 	u32 size = msg_size(msg);
 	u32 dsz = msg_data_sz(msg);
 	u32 queue_size = l_ptr->out_queue_size;
-	u32 imp = msg_tot_importance(msg);
+	u32 imp = tipc_msg_tot_importance(msg);
 	u32 queue_limit = l_ptr->queue_limit[imp];
 	u32 max_packet = l_ptr->max_pkt;
 
@@ -1090,7 +1090,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf)
 			struct tipc_msg bundler_hdr;
 
 			if (bundler) {
-				msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
+				tipc_msg_init(&bundler_hdr, MSG_BUNDLER, OPEN_MSG,
 					 INT_H_SIZE, l_ptr->addr);
 				skb_copy_to_linear_data(bundler, &bundler_hdr,
 							INT_H_SIZE);
@@ -1243,7 +1243,7 @@ again:
 	 * (Must not hold any locks while building message.)
 	 */
 
-	res = msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt,
+	res = tipc_msg_build(hdr, msg_sect, num_sect, sender->publ.max_pkt,
 			!sender->user_port, &buf);
 
 	read_lock_bh(&tipc_net_lock);
@@ -1354,7 +1354,7 @@ again:
 	/* Prepare reusable fragment header: */
 
 	msg_dbg(hdr, ">FRAGMENTING>");
-	msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
+	tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
 		 INT_H_SIZE, msg_destnode(hdr));
 	msg_set_link_selector(&fragm_hdr, sender->publ.ref);
 	msg_set_size(&fragm_hdr, max_pkt);
@@ -1613,7 +1613,7 @@ static void link_reset_all(unsigned long addr)
 	tipc_node_lock(n_ptr);
 
 	warn("Resetting all links to %s\n",
-	     addr_string_fill(addr_string, n_ptr->addr));
+	     tipc_addr_string_fill(addr_string, n_ptr->addr));
 
 	for (i = 0; i < MAX_BEARERS; i++) {
 		if (n_ptr->links[i]) {
@@ -1655,7 +1655,7 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
 		n_ptr = l_ptr->owner->next;
 		tipc_node_lock(n_ptr);
 
-		addr_string_fill(addr_string, n_ptr->addr);
+		tipc_addr_string_fill(addr_string, n_ptr->addr);
 		tipc_printf(TIPC_OUTPUT, "Multicast link info for %s\n", addr_string);
 		tipc_printf(TIPC_OUTPUT, "Supported: %d,  ", n_ptr->bclink.supported);
 		tipc_printf(TIPC_OUTPUT, "Acked: %u\n", n_ptr->bclink.acked);
@@ -2398,7 +2398,7 @@ void tipc_link_changeover(struct link *l_ptr)
 		return;
 	}
 
-	msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
+	tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
 		 ORIGINAL_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
 	msg_set_msgcnt(&tunnel_hdr, msgcount);
@@ -2453,7 +2453,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel)
 	struct sk_buff *iter;
 	struct tipc_msg tunnel_hdr;
 
-	msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
+	tipc_msg_init(&tunnel_hdr, CHANGEOVER_PROTOCOL,
 		 DUPLICATE_MSG, INT_H_SIZE, l_ptr->addr);
 	msg_set_msgcnt(&tunnel_hdr, l_ptr->out_queue_size);
 	msg_set_bearer_id(&tunnel_hdr, l_ptr->peer_bearer_id);
@@ -2659,7 +2659,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf)
 
 	/* Prepare reusable fragment header: */
 
-	msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
+	tipc_msg_init(&fragm_hdr, MSG_FRAGMENTER, FIRST_FRAGMENT,
 		 INT_H_SIZE, destaddr);
 	msg_set_link_selector(&fragm_hdr, msg_link_selector(inmsg));
 	msg_set_long_msgno(&fragm_hdr, mod(l_ptr->long_msg_seq_no++));
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 7ee6ae238147..fbcd46f24a9d 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -708,7 +708,7 @@ static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos)
 #define DSC_REQ_MSG          0
 #define DSC_RESP_MSG         1
 
-static inline u32 msg_tot_importance(struct tipc_msg *m)
+static inline u32 tipc_msg_tot_importance(struct tipc_msg *m)
 {
 	if (likely(msg_isdata(m))) {
 		if (likely(msg_orignode(m) == tipc_own_addr))
@@ -722,7 +722,7 @@ static inline u32 msg_tot_importance(struct tipc_msg *m)
 }
 
 
-static inline void msg_init(struct tipc_msg *m, u32 user, u32 type,
+static inline void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
 			    u32 hsize, u32 destnode)
 {
 	memset(m, 0, hsize);
@@ -739,10 +739,10 @@ static inline void msg_init(struct tipc_msg *m, u32 user, u32 type,
 }
 
 /**
- * msg_calc_data_size - determine total data size for message
+ * tipc_msg_calc_data_size - determine total data size for message
  */
 
-static inline int msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
+static inline int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
 {
 	int dsz = 0;
 	int i;
@@ -753,20 +753,20 @@ static inline int msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
 }
 
 /**
- * msg_build - create message using specified header and data
+ * tipc_msg_build - create message using specified header and data
  *
  * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
  *
  * Returns message data size or errno
  */
 
-static inline int msg_build(struct tipc_msg *hdr,
+static inline int tipc_msg_build(struct tipc_msg *hdr,
 			    struct iovec const *msg_sect, u32 num_sect,
 			    int max_size, int usrmem, struct sk_buff** buf)
 {
 	int dsz, sz, hsz, pos, res, cnt;
 
-	dsz = msg_calc_data_size(msg_sect, num_sect);
+	dsz = tipc_msg_calc_data_size(msg_sect, num_sect);
 	if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
 		*buf = NULL;
 		return -EINVAL;
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index 10a69894e2fd..6ac3c543250b 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -103,7 +103,7 @@ static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest)
 
 	if (buf != NULL) {
 		msg = buf_msg(buf);
-		msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest);
+		tipc_msg_init(msg, NAME_DISTRIBUTOR, type, LONG_H_SIZE, dest);
 		msg_set_size(msg, LONG_H_SIZE + size);
 	}
 	return buf;
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index acab41a48d67..8ba79620db3f 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -627,7 +627,7 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
 	struct name_seq *seq;
 	u32 ref;
 
-	if (!in_scope(*destnode, tipc_own_addr))
+	if (!tipc_in_scope(*destnode, tipc_own_addr))
 		return 0;
 
 	read_lock_bh(&tipc_nametbl_lock);
diff --git a/net/tipc/net.c b/net/tipc/net.c
index d7cd1e064a80..f61b7694138b 100644
--- a/net/tipc/net.c
+++ b/net/tipc/net.c
@@ -219,7 +219,7 @@ void tipc_net_route_msg(struct sk_buff *buf)
 
 	/* Handle message for this node */
 	dnode = msg_short(msg) ? tipc_own_addr : msg_destnode(msg);
-	if (in_scope(dnode, tipc_own_addr)) {
+	if (tipc_in_scope(dnode, tipc_own_addr)) {
 		if (msg_isdata(msg)) {
 			if (msg_mcast(msg))
 				tipc_port_recv_mcast(buf, NULL);
@@ -277,7 +277,7 @@ int tipc_net_start(u32 addr)
 
 	info("Started in network mode\n");
 	info("Own node address %s, network identity %u\n",
-	     addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
+	     tipc_addr_string_fill(addr_string, tipc_own_addr), tipc_net_id);
 	return 0;
 }
 
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 17cc394f424f..b634942caba5 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -268,7 +268,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 
 		if (n_ptr->link_cnt >= 2) {
 			err("Attempt to create third link to %s\n",
-			    addr_string_fill(addr_string, n_ptr->addr));
+			    tipc_addr_string_fill(addr_string, n_ptr->addr));
 			return NULL;
 		}
 
@@ -280,7 +280,7 @@ struct tipc_node *tipc_node_attach_link(struct link *l_ptr)
 		}
 		err("Attempt to establish second link on <%s> to %s\n",
 		    l_ptr->b_ptr->publ.name,
-		    addr_string_fill(addr_string, l_ptr->addr));
+		    tipc_addr_string_fill(addr_string, l_ptr->addr));
 	}
 	return NULL;
 }
@@ -439,7 +439,7 @@ static void node_lost_contact(struct tipc_node *n_ptr)
 		return;
 
 	info("Lost contact with %s\n",
-	     addr_string_fill(addr_string, n_ptr->addr));
+	     tipc_addr_string_fill(addr_string, n_ptr->addr));
 
 	/* Abort link changeover */
 	for (i = 0; i < MAX_BEARERS; i++) {
@@ -602,7 +602,7 @@ u32 tipc_available_nodes(const u32 domain)
 
 	read_lock_bh(&tipc_net_lock);
 	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
-		if (!in_scope(domain, n_ptr->addr))
+		if (!tipc_in_scope(domain, n_ptr->addr))
 			continue;
 		if (tipc_node_is_up(n_ptr))
 			cnt++;
@@ -651,7 +651,7 @@ struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space)
 	/* Add TLVs for all nodes in scope */
 
 	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
-		if (!in_scope(domain, n_ptr->addr))
+		if (!tipc_in_scope(domain, n_ptr->addr))
 			continue;
 		node_info.addr = htonl(n_ptr->addr);
 		node_info.up = htonl(tipc_node_is_up(n_ptr));
@@ -711,7 +711,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space)
 	for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) {
 		u32 i;
 
-		if (!in_scope(domain, n_ptr->addr))
+		if (!tipc_in_scope(domain, n_ptr->addr))
 			continue;
 		tipc_node_lock(n_ptr);
 		for (i = 0; i < MAX_BEARERS; i++) {
diff --git a/net/tipc/port.c b/net/tipc/port.c
index 7641db667e0b..0737680e9266 100644
--- a/net/tipc/port.c
+++ b/net/tipc/port.c
@@ -116,7 +116,7 @@ int tipc_multicast(u32 ref, struct tipc_name_seq const *seq, u32 domain,
 	msg_set_namelower(hdr, seq->lower);
 	msg_set_nameupper(hdr, seq->upper);
 	msg_set_hdr_sz(hdr, MCAST_H_SIZE);
-	res = msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
+	res = tipc_msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
 			!oport->user_port, &buf);
 	if (unlikely(!buf))
 		return res;
@@ -241,7 +241,7 @@ struct tipc_port *tipc_createport_raw(void *usr_handle,
 	p_ptr->publ.max_pkt = MAX_PKT_DEFAULT;
 	p_ptr->publ.ref = ref;
 	msg = &p_ptr->publ.phdr;
-	msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
+	tipc_msg_init(msg, importance, TIPC_NAMED_MSG, LONG_H_SIZE, 0);
 	msg_set_origport(msg, ref);
 	p_ptr->last_in_seqno = 41;
 	p_ptr->sent = 1;
@@ -395,7 +395,7 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode,
 	buf = buf_acquire(LONG_H_SIZE);
 	if (buf) {
 		msg = buf_msg(buf);
-		msg_init(msg, usr, type, LONG_H_SIZE, destnode);
+		tipc_msg_init(msg, usr, type, LONG_H_SIZE, destnode);
 		msg_set_errcode(msg, err);
 		msg_set_destport(msg, destport);
 		msg_set_origport(msg, origport);
@@ -439,7 +439,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err)
 		return data_sz;
 	}
 	rmsg = buf_msg(rbuf);
-	msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg));
+	tipc_msg_init(rmsg, imp, msg_type(msg), hdr_sz, msg_orignode(msg));
 	msg_set_errcode(rmsg, err);
 	msg_set_destport(rmsg, msg_origport(msg));
 	msg_set_origport(rmsg, msg_destport(msg));
@@ -480,7 +480,7 @@ int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr,
 	struct sk_buff *buf;
 	int res;
 
-	res = msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
+	res = tipc_msg_build(hdr, msg_sect, num_sect, MAX_MSG_SIZE,
 			!p_ptr->user_port, &buf);
 	if (!buf)
 		return res;
@@ -1343,7 +1343,7 @@ int tipc_port_recv_sections(struct port *sender, unsigned int num_sect,
 	struct sk_buff *buf;
 	int res;
 
-	res = msg_build(&sender->publ.phdr, msg_sect, num_sect,
+	res = tipc_msg_build(&sender->publ.phdr, msg_sect, num_sect,
 			MAX_MSG_SIZE, !sender->user_port, &buf);
 	if (likely(buf))
 		tipc_port_recv_msg(buf);
@@ -1383,7 +1383,7 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect)
 	if (port_unreliable(p_ptr)) {
 		p_ptr->publ.congested = 0;
 		/* Just calculate msg length and return */
-		return msg_calc_data_size(msg_sect, num_sect);
+		return tipc_msg_calc_data_size(msg_sect, num_sect);
 	}
 	return -ELINKCONG;
 }
@@ -1466,7 +1466,7 @@ int tipc_forward2name(u32 ref,
 	msg_set_hdr_sz(msg, LONG_H_SIZE);
 	msg_set_nametype(msg, name->type);
 	msg_set_nameinst(msg, name->instance);
-	msg_set_lookup_scope(msg, addr_scope(domain));
+	msg_set_lookup_scope(msg, tipc_addr_scope(domain));
 	if (importance <= TIPC_CRITICAL_IMPORTANCE)
 		msg_set_importance(msg,importance);
 	destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
@@ -1483,7 +1483,7 @@ int tipc_forward2name(u32 ref,
 			return res;
 		if (port_unreliable(p_ptr)) {
 			/* Just calculate msg length and return */
-			return msg_calc_data_size(msg_sect, num_sect);
+			return tipc_msg_calc_data_size(msg_sect, num_sect);
 		}
 		return -ELINKCONG;
 	}
@@ -1539,7 +1539,7 @@ int tipc_forward_buf2name(u32 ref,
 	msg_set_origport(msg, orig->ref);
 	msg_set_nametype(msg, name->type);
 	msg_set_nameinst(msg, name->instance);
-	msg_set_lookup_scope(msg, addr_scope(domain));
+	msg_set_lookup_scope(msg, tipc_addr_scope(domain));
 	msg_set_hdr_sz(msg, LONG_H_SIZE);
 	msg_set_size(msg, LONG_H_SIZE + dsz);
 	destport = tipc_nametbl_translate(name->type, name->instance, &destnode);
@@ -1619,7 +1619,7 @@ int tipc_forward2port(u32 ref,
 		return res;
 	if (port_unreliable(p_ptr)) {
 		/* Just calculate msg length and return */
-		return msg_calc_data_size(msg_sect, num_sect);
+		return tipc_msg_calc_data_size(msg_sect, num_sect);
 	}
 	return -ELINKCONG;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 80e0c33064bd71bd5791c79f28c59a1aee898993 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:13 +0000
Subject: tipc: Reduce footprint by un-inlining address routines

Convert address-related inline routines that are more than one
line into standard functions, thereby eliminating a significant
amount of repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/addr.c | 32 ++++++++++++++++++++++++++++++++
 net/tipc/addr.h | 37 +++----------------------------------
 2 files changed, 35 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/net/tipc/addr.c b/net/tipc/addr.c
index e5207a11edf6..c048543ffbeb 100644
--- a/net/tipc/addr.c
+++ b/net/tipc/addr.c
@@ -92,3 +92,35 @@ int tipc_addr_node_valid(u32 addr)
 	return (tipc_addr_domain_valid(addr) && tipc_node(addr));
 }
 
+int tipc_in_scope(u32 domain, u32 addr)
+{
+	if (!domain || (domain == addr))
+		return 1;
+	if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */
+		return 1;
+	if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */
+		return 1;
+	return 0;
+}
+
+/**
+ * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
+ */
+
+int tipc_addr_scope(u32 domain)
+{
+	if (likely(!domain))
+		return TIPC_ZONE_SCOPE;
+	if (tipc_node(domain))
+		return TIPC_NODE_SCOPE;
+	if (tipc_cluster(domain))
+		return TIPC_CLUSTER_SCOPE;
+	return TIPC_ZONE_SCOPE;
+}
+
+char *tipc_addr_string_fill(char *string, u32 addr)
+{
+	snprintf(string, 16, "<%u.%u.%u>",
+		 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
+	return string;
+}
diff --git a/net/tipc/addr.h b/net/tipc/addr.h
index 4d4aee0e4232..c1cc5724d8cc 100644
--- a/net/tipc/addr.h
+++ b/net/tipc/addr.h
@@ -67,32 +67,6 @@ static inline int may_route(u32 addr)
 	return(addr ^ tipc_own_addr) >> 11;
 }
 
-static inline int tipc_in_scope(u32 domain, u32 addr)
-{
-	if (!domain || (domain == addr))
-		return 1;
-	if (domain == (addr & 0xfffff000u)) /* domain <Z.C.0> */
-		return 1;
-	if (domain == (addr & 0xff000000u)) /* domain <Z.0.0> */
-		return 1;
-	return 0;
-}
-
-/**
- * tipc_addr_scope - convert message lookup domain to a 2-bit scope value
- */
-
-static inline int tipc_addr_scope(u32 domain)
-{
-	if (likely(!domain))
-		return TIPC_ZONE_SCOPE;
-	if (tipc_node(domain))
-		return TIPC_NODE_SCOPE;
-	if (tipc_cluster(domain))
-		return TIPC_CLUSTER_SCOPE;
-	return TIPC_ZONE_SCOPE;
-}
-
 /**
  * addr_domain - convert 2-bit scope value to equivalent message lookup domain
  *
@@ -110,14 +84,9 @@ static inline int addr_domain(int sc)
 	return tipc_addr(tipc_zone(tipc_own_addr), 0, 0);
 }
 
-static inline char *tipc_addr_string_fill(char *string, u32 addr)
-{
-	snprintf(string, 16, "<%u.%u.%u>",
-		 tipc_zone(addr), tipc_cluster(addr), tipc_node(addr));
-	return string;
-}
-
 int tipc_addr_domain_valid(u32);
 int tipc_addr_node_valid(u32 addr);
-
+int tipc_in_scope(u32 domain, u32 addr);
+int tipc_addr_scope(u32 domain);
+char *tipc_addr_string_fill(char *string, u32 addr);
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 3e22e62b6204414cf31c414d5a91897e2b718135 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:14 +0000
Subject: tipc: Reduce footprint by un-inlining nmap routines

Converts nmap inline routines that are more than one line into standard
functions, thereby eliminating a significant amount of repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/tipc/bcast.h | 63 ++++----------------------------------------------------
 2 files changed, 64 insertions(+), 59 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index a8f22e78c3f3..1ee6424ef3e0 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -822,3 +822,63 @@ void tipc_bclink_stop(void)
 	spin_unlock_bh(&bc_lock);
 }
 
+
+/**
+ * tipc_nmap_add - add a node to a node map
+ */
+
+void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
+{
+	int n = tipc_node(node);
+	int w = n / WSIZE;
+	u32 mask = (1 << (n % WSIZE));
+
+	if ((nm_ptr->map[w] & mask) == 0) {
+		nm_ptr->count++;
+		nm_ptr->map[w] |= mask;
+	}
+}
+
+/**
+ * tipc_nmap_remove - remove a node from a node map
+ */
+
+void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
+{
+	int n = tipc_node(node);
+	int w = n / WSIZE;
+	u32 mask = (1 << (n % WSIZE));
+
+	if ((nm_ptr->map[w] & mask) != 0) {
+		nm_ptr->map[w] &= ~mask;
+		nm_ptr->count--;
+	}
+}
+
+/**
+ * tipc_nmap_diff - find differences between node maps
+ * @nm_a: input node map A
+ * @nm_b: input node map B
+ * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
+ */
+
+void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
+				  struct tipc_node_map *nm_diff)
+{
+	int stop = ARRAY_SIZE(nm_a->map);
+	int w;
+	int b;
+	u32 map;
+
+	memset(nm_diff, 0, sizeof(*nm_diff));
+	for (w = 0; w < stop; w++) {
+		map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
+		nm_diff->map[w] = map;
+		if (map != 0) {
+			for (b = 0 ; b < WSIZE; b++) {
+				if (map & (1 << b))
+					nm_diff->count++;
+			}
+		}
+	}
+}
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 2b1c4a755dfa..cd779816383f 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -72,38 +72,8 @@ struct tipc_node;
 
 extern const char tipc_bclink_name[];
 
-
-/**
- * tipc_nmap_add - add a node to a node map
- */
-
-static inline void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
-{
-	int n = tipc_node(node);
-	int w = n / WSIZE;
-	u32 mask = (1 << (n % WSIZE));
-
-	if ((nm_ptr->map[w] & mask) == 0) {
-		nm_ptr->count++;
-		nm_ptr->map[w] |= mask;
-	}
-}
-
-/**
- * tipc_nmap_remove - remove a node from a node map
- */
-
-static inline void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
-{
-	int n = tipc_node(node);
-	int w = n / WSIZE;
-	u32 mask = (1 << (n % WSIZE));
-
-	if ((nm_ptr->map[w] & mask) != 0) {
-		nm_ptr->map[w] &= ~mask;
-		nm_ptr->count--;
-	}
-}
+void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node);
+void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node);
 
 /**
  * tipc_nmap_equal - test for equality of node maps
@@ -114,33 +84,8 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
 	return !memcmp(nm_a, nm_b, sizeof(*nm_a));
 }
 
-/**
- * tipc_nmap_diff - find differences between node maps
- * @nm_a: input node map A
- * @nm_b: input node map B
- * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
- */
-
-static inline void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
-				  struct tipc_node_map *nm_diff)
-{
-	int stop = ARRAY_SIZE(nm_a->map);
-	int w;
-	int b;
-	u32 map;
-
-	memset(nm_diff, 0, sizeof(*nm_diff));
-	for (w = 0; w < stop; w++) {
-		map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
-		nm_diff->map[w] = map;
-		if (map != 0) {
-			for (b = 0 ; b < WSIZE; b++) {
-				if (map & (1 << b))
-					nm_diff->count++;
-			}
-		}
-	}
-}
+void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
+				  struct tipc_node_map *nm_diff);
 
 /**
  * tipc_port_list_add - add a port to a port list, ensuring no duplicates
-- 
cgit v1.2.3-59-g8ed1b


From 43608edc2dbe83057544cf76b765ecdf63d59e8c Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:15 +0000
Subject: tipc: Reduce footprint by un-inlining port list routines

Converts port list inline routines that are more than one line into
standard functions, thereby eliminating a significant amount of
repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/tipc/bcast.h | 52 ++--------------------------------------------------
 2 files changed, 52 insertions(+), 50 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 1ee6424ef3e0..a008c6689305 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -882,3 +882,53 @@ void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
 		}
 	}
 }
+
+/**
+ * tipc_port_list_add - add a port to a port list, ensuring no duplicates
+ */
+
+void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
+{
+	struct port_list *item = pl_ptr;
+	int i;
+	int item_sz = PLSIZE;
+	int cnt = pl_ptr->count;
+
+	for (; ; cnt -= item_sz, item = item->next) {
+		if (cnt < PLSIZE)
+			item_sz = cnt;
+		for (i = 0; i < item_sz; i++)
+			if (item->ports[i] == port)
+				return;
+		if (i < PLSIZE) {
+			item->ports[i] = port;
+			pl_ptr->count++;
+			return;
+		}
+		if (!item->next) {
+			item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
+			if (!item->next) {
+				warn("Incomplete multicast delivery, no memory\n");
+				return;
+			}
+			item->next->next = NULL;
+		}
+	}
+}
+
+/**
+ * tipc_port_list_free - free dynamically created entries in port_list chain
+ *
+ */
+
+void tipc_port_list_free(struct port_list *pl_ptr)
+{
+	struct port_list *item;
+	struct port_list *next;
+
+	for (item = pl_ptr->next; item; item = next) {
+		next = item->next;
+		kfree(item);
+	}
+}
+
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index cd779816383f..e8c2b81658c7 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -87,56 +87,8 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m
 void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
 				  struct tipc_node_map *nm_diff);
 
-/**
- * tipc_port_list_add - add a port to a port list, ensuring no duplicates
- */
-
-static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
-{
-	struct port_list *item = pl_ptr;
-	int i;
-	int item_sz = PLSIZE;
-	int cnt = pl_ptr->count;
-
-	for (; ; cnt -= item_sz, item = item->next) {
-		if (cnt < PLSIZE)
-			item_sz = cnt;
-		for (i = 0; i < item_sz; i++)
-			if (item->ports[i] == port)
-				return;
-		if (i < PLSIZE) {
-			item->ports[i] = port;
-			pl_ptr->count++;
-			return;
-		}
-		if (!item->next) {
-			item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
-			if (!item->next) {
-				warn("Incomplete multicast delivery, no memory\n");
-				return;
-			}
-			item->next->next = NULL;
-		}
-	}
-}
-
-/**
- * tipc_port_list_free - free dynamically created entries in port_list chain
- *
- * Note: First item is on stack, so it doesn't need to be released
- */
-
-static inline void tipc_port_list_free(struct port_list *pl_ptr)
-{
-	struct port_list *item;
-	struct port_list *next;
-
-	for (item = pl_ptr->next; item; item = next) {
-		next = item->next;
-		kfree(item);
-	}
-}
-
+void tipc_port_list_add(struct port_list *pl_ptr, u32 port);
+void tipc_port_list_free(struct port_list *pl_ptr);
 
 int  tipc_bclink_init(void);
 void tipc_bclink_stop(void);
-- 
cgit v1.2.3-59-g8ed1b


From b274f4ab8e674db1757371a21e7217e0766cb574 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:16 +0000
Subject: tipc: Reduce footprint by un-inlining bearer congestion routine

Convert bearer congestion inline routine that is more than one line into
a standard function, thereby eliminating some repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bearer.c | 12 ++++++++++++
 net/tipc/bearer.h | 16 ++--------------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'net')

diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index ccec12f0ccc9..52ae17b2583e 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -467,6 +467,18 @@ int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr)
 	return res;
 }
 
+/**
+ * tipc_bearer_congested - determines if bearer is currently congested
+ */
+
+int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
+{
+	if (unlikely(b_ptr->publ.blocked))
+		return 1;
+	if (likely(list_empty(&b_ptr->cong_links)))
+		return 0;
+	return !tipc_bearer_resolve_congestion(b_ptr, l_ptr);
+}
 
 /**
  * tipc_enable_bearer - enable bearer with the given name
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 000228e93f9e..a850b389663e 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -125,6 +125,7 @@ void tipc_bearer_remove_dest(struct bearer *b_ptr, u32 dest);
 void tipc_bearer_schedule(struct bearer *b_ptr, struct link *l_ptr);
 struct bearer *tipc_bearer_find_interface(const char *if_name);
 int tipc_bearer_resolve_congestion(struct bearer *b_ptr, struct link *l_ptr);
+int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr);
 int tipc_bearer_init(void);
 void tipc_bearer_stop(void);
 void tipc_bearer_lock_push(struct bearer *b_ptr);
@@ -154,17 +155,4 @@ static inline int tipc_bearer_send(struct bearer *b_ptr, struct sk_buff *buf,
 	return !b_ptr->media->send_msg(buf, &b_ptr->publ, dest);
 }
 
-/**
- * tipc_bearer_congested - determines if bearer is currently congested
- */
-
-static inline int tipc_bearer_congested(struct bearer *b_ptr, struct link *l_ptr)
-{
-	if (unlikely(b_ptr->publ.blocked))
-		return 1;
-	if (likely(list_empty(&b_ptr->cong_links)))
-		return 0;
-	return !tipc_bearer_resolve_congestion(b_ptr, l_ptr);
-}
-
-#endif
+#endif	/* _TIPC_BEARER_H */
-- 
cgit v1.2.3-59-g8ed1b


From 3032cca4d5cf885cacc78fae27ddf0c56dbf9963 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:17 +0000
Subject: tipc: Reduce footprint by un-inlining buf_acquire routine

Convert buf_acquire inline routine that is more than one line into
a standard function, thereby eliminating some repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/core.c | 24 ++++++++++++++++++++++++
 net/tipc/core.h | 24 +-----------------------
 2 files changed, 25 insertions(+), 23 deletions(-)

(limited to 'net')

diff --git a/net/tipc/core.c b/net/tipc/core.c
index b47d1842a970..696468117985 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -101,6 +101,30 @@ int tipc_get_mode(void)
 	return tipc_mode;
 }
 
+/**
+ * buf_acquire - creates a TIPC message buffer
+ * @size: message size (including TIPC header)
+ *
+ * Returns a new buffer with data pointers set to the specified size.
+ *
+ * NOTE: Headroom is reserved to allow prepending of a data link header.
+ *       There may also be unrequested tailroom present at the buffer's end.
+ */
+
+struct sk_buff *buf_acquire(u32 size)
+{
+	struct sk_buff *skb;
+	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
+
+	skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
+	if (skb) {
+		skb_reserve(skb, BUF_HEADROOM);
+		skb_put(skb, size);
+		skb->next = NULL;
+	}
+	return skb;
+}
+
 /**
  * tipc_core_stop_net - shut down TIPC networking sub-systems
  */
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 1e149f55f3e2..188799017abd 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -328,29 +328,7 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
 	return (struct tipc_msg *)skb->data;
 }
 
-/**
- * buf_acquire - creates a TIPC message buffer
- * @size: message size (including TIPC header)
- *
- * Returns a new buffer with data pointers set to the specified size.
- *
- * NOTE: Headroom is reserved to allow prepending of a data link header.
- *       There may also be unrequested tailroom present at the buffer's end.
- */
-
-static inline struct sk_buff *buf_acquire(u32 size)
-{
-	struct sk_buff *skb;
-	unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u;
-
-	skb = alloc_skb_fclone(buf_size, GFP_ATOMIC);
-	if (skb) {
-		skb_reserve(skb, BUF_HEADROOM);
-		skb_put(skb, size);
-		skb->next = NULL;
-	}
-	return skb;
-}
+extern struct sk_buff *buf_acquire(u32 size);
 
 /**
  * buf_discard - frees a TIPC message buffer
-- 
cgit v1.2.3-59-g8ed1b


From 23461e835b3537dd395828b090fb1cb64a198f85 Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:18 +0000
Subject: tipc: Reduce footprint by un-inlining tipc_msg_* routines

Convert tipc_msg_* inline routines that are more than one line into
standard functions, thereby eliminating some repeated code.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/tipc/msg.h | 99 ++++------------------------------------------------------
 2 files changed, 100 insertions(+), 93 deletions(-)

(limited to 'net')

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 73dcd00d674e..381063817b41 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -40,6 +40,100 @@
 #include "msg.h"
 #include "bearer.h"
 
+u32 tipc_msg_tot_importance(struct tipc_msg *m)
+{
+	if (likely(msg_isdata(m))) {
+		if (likely(msg_orignode(m) == tipc_own_addr))
+			return msg_importance(m);
+		return msg_importance(m) + 4;
+	}
+	if ((msg_user(m) == MSG_FRAGMENTER)  &&
+	    (msg_type(m) == FIRST_FRAGMENT))
+		return msg_importance(msg_get_wrapped(m));
+	return msg_importance(m);
+}
+
+
+void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
+			    u32 hsize, u32 destnode)
+{
+	memset(m, 0, hsize);
+	msg_set_version(m);
+	msg_set_user(m, user);
+	msg_set_hdr_sz(m, hsize);
+	msg_set_size(m, hsize);
+	msg_set_prevnode(m, tipc_own_addr);
+	msg_set_type(m, type);
+	if (!msg_short(m)) {
+		msg_set_orignode(m, tipc_own_addr);
+		msg_set_destnode(m, destnode);
+	}
+}
+
+/**
+ * tipc_msg_calc_data_size - determine total data size for message
+ */
+
+int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
+{
+	int dsz = 0;
+	int i;
+
+	for (i = 0; i < num_sect; i++)
+		dsz += msg_sect[i].iov_len;
+	return dsz;
+}
+
+/**
+ * tipc_msg_build - create message using specified header and data
+ *
+ * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
+ *
+ * Returns message data size or errno
+ */
+
+int tipc_msg_build(struct tipc_msg *hdr,
+			    struct iovec const *msg_sect, u32 num_sect,
+			    int max_size, int usrmem, struct sk_buff** buf)
+{
+	int dsz, sz, hsz, pos, res, cnt;
+
+	dsz = tipc_msg_calc_data_size(msg_sect, num_sect);
+	if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
+		*buf = NULL;
+		return -EINVAL;
+	}
+
+	pos = hsz = msg_hdr_sz(hdr);
+	sz = hsz + dsz;
+	msg_set_size(hdr, sz);
+	if (unlikely(sz > max_size)) {
+		*buf = NULL;
+		return dsz;
+	}
+
+	*buf = buf_acquire(sz);
+	if (!(*buf))
+		return -ENOMEM;
+	skb_copy_to_linear_data(*buf, hdr, hsz);
+	for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
+		if (likely(usrmem))
+			res = !copy_from_user((*buf)->data + pos,
+					      msg_sect[cnt].iov_base,
+					      msg_sect[cnt].iov_len);
+		else
+			skb_copy_to_linear_data_offset(*buf, pos,
+						       msg_sect[cnt].iov_base,
+						       msg_sect[cnt].iov_len);
+		pos += msg_sect[cnt].iov_len;
+	}
+	if (likely(res))
+		return dsz;
+
+	buf_discard(*buf);
+	*buf = NULL;
+	return -EFAULT;
+}
 
 #ifdef CONFIG_TIPC_DEBUG
 
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index fbcd46f24a9d..995d2da35b01 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -708,100 +708,13 @@ static inline void msg_set_dataoctet(struct tipc_msg *m, u32 pos)
 #define DSC_REQ_MSG          0
 #define DSC_RESP_MSG         1
 
-static inline u32 tipc_msg_tot_importance(struct tipc_msg *m)
-{
-	if (likely(msg_isdata(m))) {
-		if (likely(msg_orignode(m) == tipc_own_addr))
-			return msg_importance(m);
-		return msg_importance(m) + 4;
-	}
-	if ((msg_user(m) == MSG_FRAGMENTER)  &&
-	    (msg_type(m) == FIRST_FRAGMENT))
-		return msg_importance(msg_get_wrapped(m));
-	return msg_importance(m);
-}
-
-
-static inline void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
-			    u32 hsize, u32 destnode)
-{
-	memset(m, 0, hsize);
-	msg_set_version(m);
-	msg_set_user(m, user);
-	msg_set_hdr_sz(m, hsize);
-	msg_set_size(m, hsize);
-	msg_set_prevnode(m, tipc_own_addr);
-	msg_set_type(m, type);
-	if (!msg_short(m)) {
-		msg_set_orignode(m, tipc_own_addr);
-		msg_set_destnode(m, destnode);
-	}
-}
-
-/**
- * tipc_msg_calc_data_size - determine total data size for message
- */
-
-static inline int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect)
-{
-	int dsz = 0;
-	int i;
-
-	for (i = 0; i < num_sect; i++)
-		dsz += msg_sect[i].iov_len;
-	return dsz;
-}
-
-/**
- * tipc_msg_build - create message using specified header and data
- *
- * Note: Caller must not hold any locks in case copy_from_user() is interrupted!
- *
- * Returns message data size or errno
- */
-
-static inline int tipc_msg_build(struct tipc_msg *hdr,
+u32 tipc_msg_tot_importance(struct tipc_msg *m);
+void tipc_msg_init(struct tipc_msg *m, u32 user, u32 type,
+			    u32 hsize, u32 destnode);
+int tipc_msg_calc_data_size(struct iovec const *msg_sect, u32 num_sect);
+int tipc_msg_build(struct tipc_msg *hdr,
 			    struct iovec const *msg_sect, u32 num_sect,
-			    int max_size, int usrmem, struct sk_buff** buf)
-{
-	int dsz, sz, hsz, pos, res, cnt;
-
-	dsz = tipc_msg_calc_data_size(msg_sect, num_sect);
-	if (unlikely(dsz > TIPC_MAX_USER_MSG_SIZE)) {
-		*buf = NULL;
-		return -EINVAL;
-	}
-
-	pos = hsz = msg_hdr_sz(hdr);
-	sz = hsz + dsz;
-	msg_set_size(hdr, sz);
-	if (unlikely(sz > max_size)) {
-		*buf = NULL;
-		return dsz;
-	}
-
-	*buf = buf_acquire(sz);
-	if (!(*buf))
-		return -ENOMEM;
-	skb_copy_to_linear_data(*buf, hdr, hsz);
-	for (res = 1, cnt = 0; res && (cnt < num_sect); cnt++) {
-		if (likely(usrmem))
-			res = !copy_from_user((*buf)->data + pos,
-					      msg_sect[cnt].iov_base,
-					      msg_sect[cnt].iov_len);
-		else
-			skb_copy_to_linear_data_offset(*buf, pos,
-						       msg_sect[cnt].iov_base,
-						       msg_sect[cnt].iov_len);
-		pos += msg_sect[cnt].iov_len;
-	}
-	if (likely(res))
-		return dsz;
-
-	buf_discard(*buf);
-	*buf = NULL;
-	return -EFAULT;
-}
+			    int max_size, int usrmem, struct sk_buff** buf);
 
 static inline void msg_set_media_addr(struct tipc_msg *m, struct tipc_media_addr *a)
 {
-- 
cgit v1.2.3-59-g8ed1b


From e94c67436efa22af7d8b7d19c885863246042543 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Thu, 13 May 2010 14:55:34 +0200
Subject: netfilter: bridge-netfilter: fix crash in br_nf_forward_finish()

[ 4593.956206] BUG: unable to handle kernel NULL pointer dereference at 0000000000000018
[ 4593.956219] IP: [<ffffffffa03357a4>] br_nf_forward_finish+0x154/0x170 [bridge]
[ 4593.956232] PGD 195ece067 PUD 1ba005067 PMD 0
[ 4593.956241] Oops: 0000 [#1] SMP
[ 4593.956248] last sysfs file:
/sys/devices/LNXSYSTM:00/LNXSYBUS:00/PNP0A08:00/device:08/ATK0110:00/hwmon/hwmon0/temp2_label
[ 4593.956253] CPU 3
...
[ 4593.956380] Pid: 29512, comm: kvm Not tainted 2.6.34-rc7-net #195 P6T DELUXE/System Product Name
[ 4593.956384] RIP: 0010:[<ffffffffa03357a4>]  [<ffffffffa03357a4>] br_nf_forward_finish+0x154/0x170 [bridge]
[ 4593.956395] RSP: 0018:ffff880001e63b78  EFLAGS: 00010246
[ 4593.956399] RAX: 0000000000000608 RBX: ffff880057181700 RCX: ffff8801b813d000
[ 4593.956402] RDX: 0000000000000000 RSI: 0000000000000002 RDI: ffff880057181700
[ 4593.956406] RBP: ffff880001e63ba8 R08: ffff8801b9d97000 R09: ffffffffa0335650
[ 4593.956410] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801b813d000
[ 4593.956413] R13: ffffffff81ab3940 R14: ffff880057181700 R15: 0000000000000002
[ 4593.956418] FS:  00007fc40d380710(0000) GS:ffff880001e60000(0000) knlGS:0000000000000000
[ 4593.956422] CS:  0010 DS: 002b ES: 002b CR0: 000000008005003b
[ 4593.956426] CR2: 0000000000000018 CR3: 00000001ba1d7000 CR4: 00000000000026e0
[ 4593.956429] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 4593.956433] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
[ 4593.956437] Process kvm (pid: 29512, threadinfo ffff8801ba566000, task ffff8801b8003870)
[ 4593.956441] Stack:
[ 4593.956443]  0000000100000020 ffff880001e63ba0 ffff880001e63ba0 ffff880057181700
[ 4593.956451] <0> ffffffffa0335650 ffffffff81ab3940 ffff880001e63bd8 ffffffffa03350e6
[ 4593.956462] <0> ffff880001e63c40 000000000000024d ffff880057181700 0000000080000000
[ 4593.956474] Call Trace:
[ 4593.956478]  <IRQ>
[ 4593.956488]  [<ffffffffa0335650>] ? br_nf_forward_finish+0x0/0x170 [bridge]
[ 4593.956496]  [<ffffffffa03350e6>] NF_HOOK_THRESH+0x56/0x60 [bridge]
[ 4593.956504]  [<ffffffffa0335282>] br_nf_forward_arp+0x112/0x120 [bridge]
[ 4593.956511]  [<ffffffff813f7184>] nf_iterate+0x64/0xa0
[ 4593.956519]  [<ffffffffa032f920>] ? br_forward_finish+0x0/0x60 [bridge]
[ 4593.956524]  [<ffffffff813f722c>] nf_hook_slow+0x6c/0x100
[ 4593.956531]  [<ffffffffa032f920>] ? br_forward_finish+0x0/0x60 [bridge]
[ 4593.956538]  [<ffffffffa032f800>] ? __br_forward+0x0/0xc0 [bridge]
[ 4593.956545]  [<ffffffffa032f86d>] __br_forward+0x6d/0xc0 [bridge]
[ 4593.956550]  [<ffffffff813c5d8e>] ? skb_clone+0x3e/0x70
[ 4593.956557]  [<ffffffffa032f462>] deliver_clone+0x32/0x60 [bridge]
[ 4593.956564]  [<ffffffffa032f6b6>] br_flood+0xa6/0xe0 [bridge]
[ 4593.956571]  [<ffffffffa032f800>] ? __br_forward+0x0/0xc0 [bridge]

Don't call nf_bridge_update_protocol() for ARP traffic as skb->nf_bridge isn't
used in the ARP case.

Reported-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/bridge/br_netfilter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 93f80fefa496..44420992f72f 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -643,10 +643,10 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 			skb->pkt_type = PACKET_OTHERHOST;
 			nf_bridge->mask ^= BRNF_PKT_TYPE;
 		}
+		nf_bridge_update_protocol(skb);
 	} else {
 		in = *((struct net_device **)(skb->cb));
 	}
-	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
 
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in,
-- 
cgit v1.2.3-59-g8ed1b


From af5676039a9479e6ff42c6aab9fac1149ac9597f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 13 May 2010 15:00:20 +0200
Subject: netfilter: change NF_ASSERT to WARN_ON

Change netfilter asserts to standard WARN_ON. This has the
benefit of backtrace info and also causes netfilter errors
to show up on kerneloops.org.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c | 7 +------
 net/ipv4/netfilter/ip_tables.c  | 7 +------
 net/ipv6/netfilter/ip6_tables.c | 7 +------
 3 files changed, 3 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 03352fcba172..65f2944b5a64 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -49,12 +49,7 @@ MODULE_DESCRIPTION("arptables core");
 #endif
 
 #ifdef CONFIG_NETFILTER_DEBUG
-#define ARP_NF_ASSERT(x)					\
-do {								\
-	if (!(x))						\
-		printk("ARP_NF_ASSERT: %s:%s:%u\n",		\
-		       __func__, __FILE__, __LINE__);	\
-} while(0)
+#define ARP_NF_ASSERT(x)	WARN_ON(!(x))
 #else
 #define ARP_NF_ASSERT(x)
 #endif
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 49b9e4fb5460..ad702bc34e75 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -51,12 +51,7 @@ MODULE_DESCRIPTION("IPv4 packet filter");
 #endif
 
 #ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)						\
-do {								\
-	if (!(x))						\
-		printk("IP_NF_ASSERT: %s:%s:%u\n",		\
-		       __func__, __FILE__, __LINE__);	\
-} while(0)
+#define IP_NF_ASSERT(x)		WARN_ON(!(x))
 #else
 #define IP_NF_ASSERT(x)
 #endif
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 56782336474f..02a7db025d0f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -52,12 +52,7 @@ MODULE_DESCRIPTION("IPv6 packet filter");
 #endif
 
 #ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x)						\
-do {								\
-	if (!(x))						\
-		printk("IP_NF_ASSERT: %s:%s:%u\n",		\
-		       __func__, __FILE__, __LINE__);	\
-} while(0)
+#define IP_NF_ASSERT(x)	WARN_ON(!(x))
 #else
 #define IP_NF_ASSERT(x)
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 654d0fbdc8fe1041918741ed5b6abc8ad6b4c1d8 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 13 May 2010 15:02:08 +0200
Subject: netfilter: cleanup printk messages

Make sure all printk messages have a severity level.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/arp_tables.c                |  4 ++--
 net/ipv4/netfilter/ip_tables.c                 |  2 +-
 net/ipv4/netfilter/iptable_filter.c            |  2 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 10 +++++-----
 net/ipv4/netfilter/nf_nat_h323.c               | 16 ++++++++--------
 net/ipv4/netfilter/nf_nat_snmp_basic.c         | 16 ++++++++--------
 net/ipv4/netfilter/nf_nat_standalone.c         |  4 ++--
 net/ipv6/netfilter/ip6_tables.c                |  2 +-
 net/ipv6/netfilter/ip6table_filter.c           |  2 +-
 net/ipv6/netfilter/ip6table_mangle.c           |  2 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 14 +++++++-------
 net/netfilter/nf_conntrack_amanda.c            |  2 +-
 net/netfilter/nf_conntrack_core.c              |  2 +-
 net/netfilter/nf_conntrack_ftp.c               |  4 ++--
 net/netfilter/nf_conntrack_h323_main.c         |  6 +++---
 net/netfilter/nf_conntrack_irc.c               |  4 ++--
 net/netfilter/nf_conntrack_netlink.c           | 12 ++++++------
 net/netfilter/nf_conntrack_proto_sctp.c        |  4 ++--
 net/netfilter/nf_conntrack_sip.c               |  4 ++--
 net/netfilter/nf_conntrack_standalone.c        |  2 +-
 net/netfilter/nf_conntrack_tftp.c              |  4 ++--
 net/netfilter/nf_internals.h                   |  2 +-
 net/netfilter/nfnetlink.c                      |  4 ++--
 net/netfilter/nfnetlink_log.c                  |  4 ++--
 24 files changed, 64 insertions(+), 64 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 65f2944b5a64..1ac01b128621 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -222,7 +222,7 @@ static unsigned int
 arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
-		printk("arp_tables: error: '%s'\n",
+		pr_err("arp_tables: error: '%s'\n",
 		       (const char *)par->targinfo);
 
 	return NF_DROP;
@@ -385,7 +385,7 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
 			int visited = e->comefrom & (1 << hook);
 
 			if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
-				printk("arptables: loop hook %u pos %u %08X.\n",
+				pr_notice("arptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ad702bc34e75..63958f3394a5 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -466,7 +466,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 			int visited = e->comefrom & (1 << hook);
 
 			if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
-				printk("iptables: loop hook %u pos %u %08X.\n",
+				pr_err("iptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 55392466daa4..c37641e819f2 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -89,7 +89,7 @@ static int __init iptable_filter_init(void)
 	int ret;
 
 	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
+		pr_err("iptables forward must be 0 or 1\n");
 		return -EINVAL;
 	}
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 2bb1f87051c4..5a03c02af999 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -382,32 +382,32 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv4: can't register tcp.\n");
+		pr_err("nf_conntrack_ipv4: can't register tcp.\n");
 		goto cleanup_sockopt;
 	}
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv4: can't register udp.\n");
+		pr_err("nf_conntrack_ipv4: can't register udp.\n");
 		goto cleanup_tcp;
 	}
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv4: can't register icmp.\n");
+		pr_err("nf_conntrack_ipv4: can't register icmp.\n");
 		goto cleanup_udp;
 	}
 
 	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv4: can't register ipv4\n");
+		pr_err("nf_conntrack_ipv4: can't register ipv4\n");
 		goto cleanup_icmp;
 	}
 
 	ret = nf_register_hooks(ipv4_conntrack_ops,
 				ARRAY_SIZE(ipv4_conntrack_ops));
 	if (ret < 0) {
-		printk("nf_conntrack_ipv4: can't register hooks.\n");
+		pr_err("nf_conntrack_ipv4: can't register hooks.\n");
 		goto cleanup_ipv4;
 	}
 #if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index d4c061874f8f..5045196d853c 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -43,7 +43,7 @@ static int set_addr(struct sk_buff *skb,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
 			if (net_ratelimit())
-				printk("nf_nat_h323: nf_nat_mangle_tcp_packet"
+				pr_notice("nf_nat_h323: nf_nat_mangle_tcp_packet"
 				       " error\n");
 			return -1;
 		}
@@ -59,7 +59,7 @@ static int set_addr(struct sk_buff *skb,
 					      addroff, sizeof(buf),
 					      (char *) &buf, sizeof(buf))) {
 			if (net_ratelimit())
-				printk("nf_nat_h323: nf_nat_mangle_udp_packet"
+				pr_notice("nf_nat_h323: nf_nat_mangle_udp_packet"
 				       " error\n");
 			return -1;
 		}
@@ -215,7 +215,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 	/* Run out of expectations */
 	if (i >= H323_RTP_CHANNEL_MAX) {
 		if (net_ratelimit())
-			printk("nf_nat_h323: out of expectations\n");
+			pr_notice("nf_nat_h323: out of expectations\n");
 		return 0;
 	}
 
@@ -234,7 +234,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (nated_port == 0) {	/* No port available */
 		if (net_ratelimit())
-			printk("nf_nat_h323: out of RTP ports\n");
+			pr_notice("nf_nat_h323: out of RTP ports\n");
 		return 0;
 	}
 
@@ -291,7 +291,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (nated_port == 0) {	/* No port available */
 		if (net_ratelimit())
-			printk("nf_nat_h323: out of TCP ports\n");
+			pr_notice("nf_nat_h323: out of TCP ports\n");
 		return 0;
 	}
 
@@ -341,7 +341,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (nated_port == 0) {	/* No port available */
 		if (net_ratelimit())
-			printk("nf_nat_q931: out of TCP ports\n");
+			pr_notice("nf_nat_q931: out of TCP ports\n");
 		return 0;
 	}
 
@@ -425,7 +425,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (nated_port == 0) {	/* No port available */
 		if (net_ratelimit())
-			printk("nf_nat_ras: out of TCP ports\n");
+			pr_notice("nf_nat_ras: out of TCP ports\n");
 		return 0;
 	}
 
@@ -507,7 +507,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
 
 	if (nated_port == 0) {	/* No port available */
 		if (net_ratelimit())
-			printk("nf_nat_q931: out of TCP ports\n");
+			pr_notice("nf_nat_q931: out of TCP ports\n");
 		return 0;
 	}
 
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 4d85b6e55f29..1679e2c0963d 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -401,7 +401,7 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
 	*octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
 	if (*octets == NULL) {
 		if (net_ratelimit())
-			printk("OOM in bsalg (%d)\n", __LINE__);
+			pr_notice("OOM in bsalg (%d)\n", __LINE__);
 		return 0;
 	}
 
@@ -452,7 +452,7 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
 	*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
 	if (*oid == NULL) {
 		if (net_ratelimit())
-			printk("OOM in bsalg (%d)\n", __LINE__);
+			pr_notice("OOM in bsalg (%d)\n", __LINE__);
 		return 0;
 	}
 
@@ -729,7 +729,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 			if (*obj == NULL) {
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			(*obj)->syntax.l[0] = l;
@@ -746,7 +746,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 				kfree(p);
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			memcpy((*obj)->syntax.c, p, len);
@@ -761,7 +761,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 			if (*obj == NULL) {
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			if (!asn1_null_decode(ctx, end)) {
@@ -782,7 +782,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 				kfree(lp);
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			memcpy((*obj)->syntax.ul, lp, len);
@@ -803,7 +803,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 				kfree(p);
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			memcpy((*obj)->syntax.uc, p, len);
@@ -821,7 +821,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
 			if (*obj == NULL) {
 				kfree(id);
 				if (net_ratelimit())
-					printk("OOM in bsalg (%d)\n", __LINE__);
+					pr_notice("OOM in bsalg (%d)\n", __LINE__);
 				return 0;
 			}
 			(*obj)->syntax.ul[0] = ul;
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 84c7974f5830..beb25819c9c9 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -293,12 +293,12 @@ static int __init nf_nat_standalone_init(void)
 #endif
 	ret = nf_nat_rule_init();
 	if (ret < 0) {
-		printk("nf_nat_init: can't setup rules.\n");
+		pr_err("nf_nat_init: can't setup rules.\n");
 		goto cleanup_decode_session;
 	}
 	ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
 	if (ret < 0) {
-		printk("nf_nat_init: can't register hooks.\n");
+		pr_err("nf_nat_init: can't register hooks.\n");
 		goto cleanup_rule_init;
 	}
 	return ret;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 02a7db025d0f..6f517bd83692 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -481,7 +481,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 			int visited = e->comefrom & (1 << hook);
 
 			if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
-				printk("iptables: loop hook %u pos %u %08X.\n",
+				pr_err("iptables: loop hook %u pos %u %08X.\n",
 				       hook, pos, e->comefrom);
 				return 0;
 			}
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index d6fc9aff3163..c9e37c8fd62c 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -81,7 +81,7 @@ static int __init ip6table_filter_init(void)
 	int ret;
 
 	if (forward < 0 || forward > NF_MAX_VERDICT) {
-		printk("iptables forward must be 0 or 1\n");
+		pr_err("iptables forward must be 0 or 1\n");
 		return -EINVAL;
 	}
 
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index 6a102b57f356..679a0a3b7b3c 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -43,7 +43,7 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)
 	if (skb->len < sizeof(struct iphdr) ||
 	    ip_hdrlen(skb) < sizeof(struct iphdr)) {
 		if (net_ratelimit())
-			printk("ip6t_hook: happy cracking.\n");
+			pr_warning("ip6t_hook: happy cracking.\n");
 		return NF_ACCEPT;
 	}
 #endif
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 996c3f41fecd..ff43461704be 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -280,7 +280,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
 	/* root is playing with raw sockets. */
 	if (skb->len < sizeof(struct ipv6hdr)) {
 		if (net_ratelimit())
-			printk("ipv6_conntrack_local: packet too short\n");
+			pr_notice("ipv6_conntrack_local: packet too short\n");
 		return NF_ACCEPT;
 	}
 	return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn);
@@ -406,37 +406,37 @@ static int __init nf_conntrack_l3proto_ipv6_init(void)
 
 	ret = nf_ct_frag6_init();
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't initialize frag6.\n");
+		pr_err("nf_conntrack_ipv6: can't initialize frag6.\n");
 		return ret;
 	}
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't register tcp.\n");
+		pr_err("nf_conntrack_ipv6: can't register tcp.\n");
 		goto cleanup_frag6;
 	}
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't register udp.\n");
+		pr_err("nf_conntrack_ipv6: can't register udp.\n");
 		goto cleanup_tcp;
 	}
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmpv6);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't register icmpv6.\n");
+		pr_err("nf_conntrack_ipv6: can't register icmpv6.\n");
 		goto cleanup_udp;
 	}
 
 	ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6);
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't register ipv6\n");
+		pr_err("nf_conntrack_ipv6: can't register ipv6\n");
 		goto cleanup_icmpv6;
 	}
 
 	ret = nf_register_hooks(ipv6_conntrack_ops,
 				ARRAY_SIZE(ipv6_conntrack_ops));
 	if (ret < 0) {
-		printk("nf_conntrack_ipv6: can't register pre-routing defrag "
+		pr_err("nf_conntrack_ipv6: can't register pre-routing defrag "
 		       "hook.\n");
 		goto cleanup_ipv6;
 	}
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 372e80f07a81..13fd2c55e329 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -108,7 +108,7 @@ static int amanda_help(struct sk_buff *skb,
 	dataoff = protoff + sizeof(struct udphdr);
 	if (dataoff >= skb->len) {
 		if (net_ratelimit())
-			printk("amanda_help: skblen = %u\n", skb->len);
+			printk(KERN_ERR "amanda_help: skblen = %u\n", skb->len);
 		return NF_ACCEPT;
 	}
 
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 3907efb97a7c..b83c530c5e0a 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1335,7 +1335,7 @@ static int nf_conntrack_init_init_net(void)
 	}
 	nf_conntrack_max = max_factor * nf_conntrack_htable_size;
 
-	printk("nf_conntrack version %s (%u buckets, %d max)\n",
+	printk(KERN_INFO "nf_conntrack version %s (%u buckets, %d max)\n",
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
 
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 2ae3169e7633..e17cb7c7dd8f 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -573,8 +573,8 @@ static int __init nf_conntrack_ftp_init(void)
 				 ftp[i][j].tuple.src.l3num, ports[i]);
 			ret = nf_conntrack_helper_register(&ftp[i][j]);
 			if (ret) {
-				printk("nf_ct_ftp: failed to register helper "
-				       " for pf: %d port: %d\n",
+				printk(KERN_ERR "nf_ct_ftp: failed to register"
+				       " helper for pf: %d port: %d\n",
 					ftp[i][j].tuple.src.l3num, ports[i]);
 				nf_conntrack_ftp_fini();
 				return ret;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 48bf15073a85..6eaee7c8a337 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -607,7 +607,7 @@ static int h245_help(struct sk_buff *skb, unsigned int protoff,
       drop:
 	spin_unlock_bh(&nf_h323_lock);
 	if (net_ratelimit())
-		printk("nf_ct_h245: packet dropped\n");
+		pr_info("nf_ct_h245: packet dropped\n");
 	return NF_DROP;
 }
 
@@ -1152,7 +1152,7 @@ static int q931_help(struct sk_buff *skb, unsigned int protoff,
       drop:
 	spin_unlock_bh(&nf_h323_lock);
 	if (net_ratelimit())
-		printk("nf_ct_q931: packet dropped\n");
+		pr_info("nf_ct_q931: packet dropped\n");
 	return NF_DROP;
 }
 
@@ -1727,7 +1727,7 @@ static int ras_help(struct sk_buff *skb, unsigned int protoff,
       drop:
 	spin_unlock_bh(&nf_h323_lock);
 	if (net_ratelimit())
-		printk("nf_ct_ras: packet dropped\n");
+		pr_info("nf_ct_ras: packet dropped\n");
 	return NF_DROP;
 }
 
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 7673930ca342..b394aa318776 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -235,7 +235,7 @@ static int __init nf_conntrack_irc_init(void)
 	char *tmpname;
 
 	if (max_dcc_channels < 1) {
-		printk("nf_ct_irc: max_dcc_channels must not be zero\n");
+		printk(KERN_ERR "nf_ct_irc: max_dcc_channels must not be zero\n");
 		return -EINVAL;
 	}
 
@@ -267,7 +267,7 @@ static int __init nf_conntrack_irc_init(void)
 
 		ret = nf_conntrack_helper_register(&irc[i]);
 		if (ret) {
-			printk("nf_ct_irc: failed to register helper "
+			printk(KERN_ERR "nf_ct_irc: failed to register helper "
 			       "for pf: %u port: %u\n",
 			       irc[i].tuple.src.l3num, ports[i]);
 			nf_conntrack_irc_fini();
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4e55403bf263..961fb6a85294 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2057,29 +2057,29 @@ static int __init ctnetlink_init(void)
 {
 	int ret;
 
-	printk("ctnetlink v%s: registering with nfnetlink.\n", version);
+	pr_info("ctnetlink v%s: registering with nfnetlink.\n", version);
 	ret = nfnetlink_subsys_register(&ctnl_subsys);
 	if (ret < 0) {
-		printk("ctnetlink_init: cannot register with nfnetlink.\n");
+		pr_err("ctnetlink_init: cannot register with nfnetlink.\n");
 		goto err_out;
 	}
 
 	ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
 	if (ret < 0) {
-		printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
+		pr_err("ctnetlink_init: cannot register exp with nfnetlink.\n");
 		goto err_unreg_subsys;
 	}
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	ret = nf_conntrack_register_notifier(&ctnl_notifier);
 	if (ret < 0) {
-		printk("ctnetlink_init: cannot register notifier.\n");
+		pr_err("ctnetlink_init: cannot register notifier.\n");
 		goto err_unreg_exp_subsys;
 	}
 
 	ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp);
 	if (ret < 0) {
-		printk("ctnetlink_init: cannot expect register notifier.\n");
+		pr_err("ctnetlink_init: cannot expect register notifier.\n");
 		goto err_unreg_notifier;
 	}
 #endif
@@ -2100,7 +2100,7 @@ err_out:
 
 static void __exit ctnetlink_exit(void)
 {
-	printk("ctnetlink: unregistering from nfnetlink.\n");
+	pr_info("ctnetlink: unregistering from nfnetlink.\n");
 
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 	nf_ct_expect_unregister_notifier(&ctnl_notifier_exp);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index b68ff15ed979..c6049c2d5ea8 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -717,12 +717,12 @@ static int __init nf_conntrack_proto_sctp_init(void)
 
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp4);
 	if (ret) {
-		printk("nf_conntrack_l4proto_sctp4: protocol register failed\n");
+		pr_err("nf_conntrack_l4proto_sctp4: protocol register failed\n");
 		goto out;
 	}
 	ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_sctp6);
 	if (ret) {
-		printk("nf_conntrack_l4proto_sctp6: protocol register failed\n");
+		pr_err("nf_conntrack_l4proto_sctp6: protocol register failed\n");
 		goto cleanup_sctp4;
 	}
 
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index c6cd1b84eddd..b20f4275893c 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1549,8 +1549,8 @@ static int __init nf_conntrack_sip_init(void)
 
 			ret = nf_conntrack_helper_register(&sip[i][j]);
 			if (ret) {
-				printk("nf_ct_sip: failed to register helper "
-				       "for pf: %u port: %u\n",
+				printk(KERN_ERR "nf_ct_sip: failed to register"
+				       " helper for pf: %u port: %u\n",
 				       sip[i][j].tuple.src.l3num, ports[i]);
 				nf_conntrack_sip_fini();
 				return ret;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index ea4a8d384234..eb973fcd67ab 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -446,7 +446,7 @@ out_kmemdup:
 	if (net_eq(net, &init_net))
 		unregister_sysctl_table(nf_ct_netfilter_header);
 out:
-	printk("nf_conntrack: can't register to sysctl.\n");
+	printk(KERN_ERR "nf_conntrack: can't register to sysctl.\n");
 	return -ENOMEM;
 }
 
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 46e646b2e9b9..75466fd72f4f 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -138,8 +138,8 @@ static int __init nf_conntrack_tftp_init(void)
 
 			ret = nf_conntrack_helper_register(&tftp[i][j]);
 			if (ret) {
-				printk("nf_ct_tftp: failed to register helper "
-				       "for pf: %u port: %u\n",
+				printk(KERN_ERR "nf_ct_tftp: failed to register"
+				       " helper for pf: %u port: %u\n",
 					tftp[i][j].tuple.src.l3num, ports[i]);
 				nf_conntrack_tftp_fini();
 				return ret;
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index bf6609978af7..770f76432ad0 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -6,7 +6,7 @@
 #include <linux/netdevice.h>
 
 #ifdef CONFIG_NETFILTER_DEBUG
-#define NFDEBUG(format, args...)  printk(format , ## args)
+#define NFDEBUG(format, args...)  printk(KERN_DEBUG format , ## args)
 #else
 #define NFDEBUG(format, args...)
 #endif
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 39b0e3100575..b4a4532823e8 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -212,13 +212,13 @@ static struct pernet_operations nfnetlink_net_ops = {
 
 static int __init nfnetlink_init(void)
 {
-	printk("Netfilter messages via NETLINK v%s.\n", nfversion);
+	pr_info("Netfilter messages via NETLINK v%s.\n", nfversion);
 	return register_pernet_subsys(&nfnetlink_net_ops);
 }
 
 static void __exit nfnetlink_exit(void)
 {
-	printk("Removing netfilter NETLINK layer.\n");
+	pr_info("Removing netfilter NETLINK layer.\n");
 	unregister_pernet_subsys(&nfnetlink_net_ops);
 }
 module_init(nfnetlink_init);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 203643fb2c52..fc9a211e629e 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -297,7 +297,7 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
 	n = max(inst_size, pkt_size);
 	skb = alloc_skb(n, GFP_ATOMIC);
 	if (!skb) {
-		PRINTR("nfnetlink_log: can't alloc whole buffer (%u bytes)\n",
+		pr_notice("nfnetlink_log: can't alloc whole buffer (%u bytes)\n",
 			inst_size);
 
 		if (n > pkt_size) {
@@ -306,7 +306,7 @@ nfulnl_alloc_skb(unsigned int inst_size, unsigned int pkt_size)
 
 			skb = alloc_skb(pkt_size, GFP_ATOMIC);
 			if (!skb)
-				PRINTR("nfnetlink_log: can't even alloc %u "
+				pr_err("nfnetlink_log: can't even alloc %u "
 				       "bytes\n", pkt_size);
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 736d58e3a2245ac2779fe0f278f8735bcf33ca8d Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 13 May 2010 15:16:27 +0200
Subject: netfilter: remove unnecessary returns from void function()s

This patch removes from net/ netfilter files
all the unnecessary return; statements that precede the
last closing brace of void functions.

It does not remove the returns that are immediately
preceded by a label as gcc doesn't like that.

Done via:
$ grep -rP --include=*.[ch] -l "return;\n}" net/ | \
  xargs perl -i -e 'local $/ ; while (<>) { s/\n[ \t\n]+return;\n}/\n}/g; print; }'

Signed-off-by: Joe Perches <joe@perches.com>
[Patrick: changed to keep return statements in otherwise empty function bodies]
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 net/ipv4/netfilter/ip_queue.c        | 1 -
 net/ipv6/netfilter/ip6_queue.c       | 1 -
 net/netfilter/nf_conntrack_netlink.c | 1 -
 net/netfilter/nf_queue.c             | 1 -
 net/netfilter/xt_time.c              | 1 -
 5 files changed, 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index c838238104f5..a4e5fc5df4bf 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -461,7 +461,6 @@ __ipq_rcv_skb(struct sk_buff *skb)
 
 	if (flags & NLM_F_ACK)
 		netlink_ack(skb, nlh, 0);
-	return;
 }
 
 static void
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 8656eb75520c..8c201743d96d 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -462,7 +462,6 @@ __ipq_rcv_skb(struct sk_buff *skb)
 
 	if (flags & NLM_F_ACK)
 		netlink_ack(skb, nlh, 0);
-	return;
 }
 
 static void
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 961fb6a85294..c42ff6aa441d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2109,7 +2109,6 @@ static void __exit ctnetlink_exit(void)
 
 	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
 	nfnetlink_subsys_unregister(&ctnl_subsys);
-	return;
 }
 
 module_init(ctnetlink_init);
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index c49ef219899e..0b1103c0b1f3 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -279,7 +279,6 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	}
 	rcu_read_unlock();
 	kfree(entry);
-	return;
 }
 EXPORT_SYMBOL(nf_reinject);
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 79234bb19d05..c48975ff8ea2 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -148,7 +148,6 @@ static void localtime_3(struct xtm *r, time_t time)
 	}
 
 	r->month    = i + 1;
-	return;
 }
 
 static bool
-- 
cgit v1.2.3-59-g8ed1b


From 83827f6a891e20de7468b1181f2ae8a3cc72587b Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 14 May 2010 13:52:30 -0700
Subject: netfilter: xt_TEE depends on NF_CONNTRACK

Fix xt_TEE build for the case of NF_CONNTRACK=m and
NETFILTER_XT_TARGET_TEE=y:

xt_TEE.c:(.text+0x6df5c): undefined reference to `nf_conntrack_untracked'
4x

Built with all 4 m/y combinations.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e223f47b8bae..8593a77cfea9 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -506,6 +506,7 @@ config NETFILTER_XT_TARGET_TEE
 	tristate '"TEE" - packet cloning to alternate destiantion'
 	depends on NETFILTER_ADVANCED
 	depends on (IPV6 || IPV6=n)
+	depends on !NF_CONNTRACK || NF_CONNTRACK
 	---help---
 	This option adds a "TEE" target with which a packet can be cloned and
 	this clone be rerouted to another nexthop.
-- 
cgit v1.2.3-59-g8ed1b


From cfb478da70f2213520866c84cb0aea0d7c0e1148 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 10 May 2010 09:31:08 +0000
Subject: bridge: netpoll cleanup

Move code around so that the ifdef for NETPOLL_CONTROLLER don't have to
show up in main code path. The control functions should be in helpers
that are only compiled if needed.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  | 30 +++++++++++++++++++++++++-----
 net/bridge/br_if.c      | 23 ++---------------------
 net/bridge/br_private.h | 14 ++++++++++++--
 3 files changed, 39 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index f15f9c4a0dd2..22b97ba7dc52 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -199,7 +199,7 @@ static int br_set_tx_csum(struct net_device *dev, u32 data)
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-bool br_devices_support_netpoll(struct net_bridge *br)
+static bool br_devices_support_netpoll(struct net_bridge *br)
 {
 	struct net_bridge_port *p;
 	bool ret = true;
@@ -225,9 +225,9 @@ static void br_poll_controller(struct net_device *br_dev)
 		netpoll_poll_dev(np->real_dev);
 }
 
-void br_netpoll_cleanup(struct net_device *br_dev)
+void br_netpoll_cleanup(struct net_device *dev)
 {
-	struct net_bridge *br = netdev_priv(br_dev);
+	struct net_bridge *br = netdev_priv(dev);
 	struct net_bridge_port *p, *n;
 	const struct net_device_ops *ops;
 
@@ -243,10 +243,30 @@ void br_netpoll_cleanup(struct net_device *br_dev)
 	}
 }
 
-#else
+void br_netpoll_disable(struct net_bridge *br,
+			struct net_device *dev)
+{
+	if (br_devices_support_netpoll(br))
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+	if (dev->netdev_ops->ndo_netpoll_cleanup)
+		dev->netdev_ops->ndo_netpoll_cleanup(dev);
+	else
+		dev->npinfo = NULL;
+}
 
-void br_netpoll_cleanup(struct net_device *br_dev)
+void br_netpoll_enable(struct net_bridge *br,
+		       struct net_device *dev)
 {
+	if (br_devices_support_netpoll(br)) {
+		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
+		if (br->dev->npinfo)
+			dev->npinfo = br->dev->npinfo;
+	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
+		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
+		printk(KERN_INFO "%s:new device %s"
+			" does not support netpoll (disabling)",
+			br->dev->name, dev->name);
+	}
 }
 
 #endif
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 537bdd60d9b9..45f3f8871149 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -154,14 +154,7 @@ static void del_nbp(struct net_bridge_port *p)
 	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	if (br_devices_support_netpoll(br))
-		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
-	if (dev->netdev_ops->ndo_netpoll_cleanup)
-		dev->netdev_ops->ndo_netpoll_cleanup(dev);
-	else
-		dev->npinfo = NULL;
-#endif
+	br_netpoll_disable(br, dev);
 	call_rcu(&p->rcu, destroy_nbp_rcu);
 }
 
@@ -455,19 +448,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
 
 	kobject_uevent(&p->kobj, KOBJ_ADD);
 
-#ifdef CONFIG_NET_POLL_CONTROLLER
-	if (br_devices_support_netpoll(br)) {
-		br->dev->priv_flags &= ~IFF_DISABLE_NETPOLL;
-		if (br->dev->npinfo)
-			dev->npinfo = br->dev->npinfo;
-	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
-		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
-		printk(KERN_INFO "New device %s does not support netpoll\n",
-			dev->name);
-		printk(KERN_INFO "Disabling netpoll for %s\n",
-			br->dev->name);
-	}
-#endif
+	br_netpoll_enable(br, dev);
 
 	return 0;
 err2:
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 3d2d3fe0a97e..8a072661a744 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -253,8 +253,18 @@ static inline int br_is_root_bridge(const struct net_bridge *br)
 extern void br_dev_setup(struct net_device *dev);
 extern netdev_tx_t br_dev_xmit(struct sk_buff *skb,
 			       struct net_device *dev);
-extern bool br_devices_support_netpoll(struct net_bridge *br);
-extern void br_netpoll_cleanup(struct net_device *br_dev);
+#ifdef CONFIG_NET_POLL_CONTROLLER
+extern void br_netpoll_cleanup(struct net_device *dev);
+extern void br_netpoll_enable(struct net_bridge *br,
+			      struct net_device *dev);
+extern void br_netpoll_disable(struct net_bridge *br,
+			       struct net_device *dev);
+#else
+#define br_netpoll_cleanup(br)
+#define br_netpoll_enable(br, dev)
+#define br_netpoll_disable(br, dev)
+
+#endif
 
 /* br_fdb.c */
 extern int br_fdb_init(void);
-- 
cgit v1.2.3-59-g8ed1b


From 28a16c97963d3bc36a2c192859f6d8025ef2967a Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 10 May 2010 09:31:09 +0000
Subject: bridge: change console message interface

Use one set of macro's for all bridge messages.

Note: can't use netdev_XXX macro's because bridge is purely
virtual and has no device parent.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br.c           |  2 +-
 net/bridge/br_device.c    |  5 ++---
 net/bridge/br_fdb.c       |  9 ++++-----
 net/bridge/br_ioctl.c     |  2 +-
 net/bridge/br_multicast.c | 32 +++++++++++++-------------------
 net/bridge/br_netlink.c   |  8 +++++---
 net/bridge/br_private.h   | 15 +++++++++++++++
 net/bridge/br_stp.c       | 11 +++++------
 net/bridge/br_stp_if.c    | 16 ++++++----------
 net/bridge/br_stp_timer.c | 24 ++++++++++--------------
 10 files changed, 62 insertions(+), 62 deletions(-)

(limited to 'net')

diff --git a/net/bridge/br.c b/net/bridge/br.c
index e1241c76239a..76357b547752 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -38,7 +38,7 @@ static int __init br_init(void)
 
 	err = stp_proto_register(&br_stp_proto);
 	if (err < 0) {
-		printk(KERN_ERR "bridge: can't register sap for STP\n");
+		pr_err("bridge: can't register sap for STP\n");
 		return err;
 	}
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 22b97ba7dc52..eedf2c94820e 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -263,9 +263,8 @@ void br_netpoll_enable(struct net_bridge *br,
 			dev->npinfo = br->dev->npinfo;
 	} else if (!(br->dev->priv_flags & IFF_DISABLE_NETPOLL)) {
 		br->dev->priv_flags |= IFF_DISABLE_NETPOLL;
-		printk(KERN_INFO "%s:new device %s"
-			" does not support netpoll (disabling)",
-			br->dev->name, dev->name);
+		br_info(br,"new device %s does not support netpoll (disabling)",
+			dev->name);
 	}
 }
 
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 9101a4e56201..26637439965b 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -353,8 +353,7 @@ static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
 		 */
 		if (fdb->is_local)
 			return 0;
-
-		printk(KERN_WARNING "%s adding interface with same address "
+		br_warn(br, "adding interface %s with same address "
 		       "as a received packet\n",
 		       source->dev->name);
 		fdb_delete(fdb);
@@ -397,9 +396,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		/* attempt to update an entry for a local interface */
 		if (unlikely(fdb->is_local)) {
 			if (net_ratelimit())
-				printk(KERN_WARNING "%s: received packet with "
-				       "own address as source address\n",
-				       source->dev->name);
+				br_warn(br, "received packet on %s with "
+					"own address as source address\n",
+					source->dev->name);
 		} else {
 			/* fastpath: update of existing entry */
 			fdb->dst = source;
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index 995afc4b04dc..cb43312b846e 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -412,6 +412,6 @@ int br_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
 	}
 
-	pr_debug("Bridge does not support ioctl 0x%x\n", cmd);
+	br_debug(br, "Bridge does not support ioctl 0x%x\n", cmd);
 	return -EOPNOTSUPP;
 }
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index c8419e240316..9d21d98ae5fa 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -585,10 +585,9 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
 
 	if (unlikely(count > br->hash_elasticity && count)) {
 		if (net_ratelimit())
-			printk(KERN_INFO "%s: Multicast hash table "
-			       "chain limit reached: %s\n",
-			       br->dev->name, port ? port->dev->name :
-						     br->dev->name);
+			br_info(br, "Multicast hash table "
+				"chain limit reached: %s\n",
+				port ? port->dev->name : br->dev->name);
 
 		elasticity = br->hash_elasticity;
 	}
@@ -596,11 +595,9 @@ static struct net_bridge_mdb_entry *br_multicast_get_group(
 	if (mdb->size >= max) {
 		max *= 2;
 		if (unlikely(max >= br->hash_max)) {
-			printk(KERN_WARNING "%s: Multicast hash table maximum "
-			       "reached, disabling snooping: %s, %d\n",
-			       br->dev->name, port ? port->dev->name :
-						     br->dev->name,
-			       max);
+			br_warn(br, "Multicast hash table maximum "
+				"reached, disabling snooping: %s, %d\n",
+				port ? port->dev->name : br->dev->name, max);
 			err = -E2BIG;
 disable:
 			br->multicast_disabled = 1;
@@ -611,22 +608,19 @@ disable:
 	if (max > mdb->max || elasticity) {
 		if (mdb->old) {
 			if (net_ratelimit())
-				printk(KERN_INFO "%s: Multicast hash table "
-				       "on fire: %s\n",
-				       br->dev->name, port ? port->dev->name :
-							     br->dev->name);
+				br_info(br, "Multicast hash table "
+					"on fire: %s\n",
+					port ? port->dev->name : br->dev->name);
 			err = -EEXIST;
 			goto err;
 		}
 
 		err = br_mdb_rehash(&br->mdb, max, elasticity);
 		if (err) {
-			printk(KERN_WARNING "%s: Cannot rehash multicast "
-			       "hash table, disabling snooping: "
-			       "%s, %d, %d\n",
-			       br->dev->name, port ? port->dev->name :
-						     br->dev->name,
-			       mdb->size, err);
+			br_warn(br, "Cannot rehash multicast "
+				"hash table, disabling snooping: %s, %d, %d\n",
+				port ? port->dev->name : br->dev->name,
+				mdb->size, err);
 			goto disable;
 		}
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index aa56ac2c8829..fe0a79018ab2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -42,8 +42,8 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por
 	struct nlmsghdr *nlh;
 	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
 
-	pr_debug("br_fill_info event %d port %s master %s\n",
-		 event, dev->name, br->dev->name);
+	br_debug(br, "br_fill_info event %d port %s master %s\n",
+		     event, dev->name, br->dev->name);
 
 	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags);
 	if (nlh == NULL)
@@ -87,7 +87,9 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port)
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
-	pr_debug("bridge notify event=%d\n", event);
+	br_debug(port->br, "port %u(%s) event %d\n",
+		 (unsigned)port->port_no, port->dev->name, event);
+
 	skb = nlmsg_new(br_nlmsg_size(), GFP_ATOMIC);
 	if (skb == NULL)
 		goto errout;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 8a072661a744..79a13d1c2573 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -240,6 +240,21 @@ struct br_input_skb_cb {
 # define BR_INPUT_SKB_CB_MROUTERS_ONLY(__skb)	(0)
 #endif
 
+#define br_printk(level, br, format, args...)	\
+	printk(level "%s: " format, (br)->dev->name, ##args)
+
+#define br_err(__br, format, args...)			\
+	br_printk(KERN_ERR, __br, format, ##args)
+#define br_warn(__br, format, args...)			\
+	br_printk(KERN_WARNING, __br, format, ##args)
+#define br_notice(__br, format, args...)		\
+	br_printk(KERN_NOTICE, __br, format, ##args)
+#define br_info(__br, format, args...)			\
+	br_printk(KERN_INFO, __br, format, ##args)
+
+#define br_debug(br, format, args...)			\
+	pr_debug("%s: " format,  (br)->dev->name, ##args)
+
 extern struct notifier_block br_device_notifier;
 extern const u8 br_group_address[ETH_ALEN];
 
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index edcf14b560f6..57186d84d2bd 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -31,10 +31,9 @@ static const char *const br_port_state_names[] = {
 
 void br_log_state(const struct net_bridge_port *p)
 {
-	pr_info("%s: port %d(%s) entering %s state\n",
-		p->br->dev->name, p->port_no, p->dev->name,
+	br_info(p->br, "port %u(%s) entering %s state\n",
+		(unsigned) p->port_no, p->dev->name,
 		br_port_state_names[p->state]);
-
 }
 
 /* called under bridge lock */
@@ -300,7 +299,7 @@ void br_topology_change_detection(struct net_bridge *br)
 	if (br->stp_enabled != BR_KERNEL_STP)
 		return;
 
-	pr_info("%s: topology change detected, %s\n", br->dev->name,
+	br_info(br, "topology change detected, %s\n",
 		isroot ? "propagating" : "sending tcn bpdu");
 
 	if (isroot) {
@@ -469,8 +468,8 @@ void br_received_config_bpdu(struct net_bridge_port *p, struct br_config_bpdu *b
 void br_received_tcn_bpdu(struct net_bridge_port *p)
 {
 	if (br_is_designated_port(p)) {
-		pr_info("%s: received tcn bpdu on port %i(%s)\n",
-		       p->br->dev->name, p->port_no, p->dev->name);
+		br_info(p->br, "port %u(%s) received tcn bpdu\n",
+			(unsigned) p->port_no, p->dev->name);
 
 		br_topology_change_detection(p->br);
 		br_topology_change_acknowledge(p);
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d527119e9f54..1d8826914cbf 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -85,17 +85,16 @@ void br_stp_enable_port(struct net_bridge_port *p)
 {
 	br_init_port(p);
 	br_port_state_selection(p->br);
+	br_log_state(p);
 }
 
 /* called under bridge lock */
 void br_stp_disable_port(struct net_bridge_port *p)
 {
-	struct net_bridge *br;
+	struct net_bridge *br = p->br;
 	int wasroot;
 
-	br = p->br;
-	printk(KERN_INFO "%s: port %i(%s) entering %s state\n",
-	       br->dev->name, p->port_no, p->dev->name, "disabled");
+	br_log_state(p);
 
 	wasroot = br_is_root_bridge(br);
 	br_become_designated_port(p);
@@ -127,11 +126,10 @@ static void br_stp_start(struct net_bridge *br)
 	r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
 	if (r == 0) {
 		br->stp_enabled = BR_USER_STP;
-		printk(KERN_INFO "%s: userspace STP started\n", br->dev->name);
+		br_debug(br, "userspace STP started\n");
 	} else {
 		br->stp_enabled = BR_KERNEL_STP;
-		printk(KERN_INFO "%s: starting userspace STP failed, "
-				"starting kernel STP\n", br->dev->name);
+		br_debug(br, "using kernel STP\n");
 
 		/* To start timers on any ports left in blocking */
 		spin_lock_bh(&br->lock);
@@ -148,9 +146,7 @@ static void br_stp_stop(struct net_bridge *br)
 
 	if (br->stp_enabled == BR_USER_STP) {
 		r = call_usermodehelper(BR_STP_PROG, argv, envp, 1);
-		printk(KERN_INFO "%s: userspace STP stopped, return code %d\n",
-			br->dev->name, r);
-
+		br_info(br, "userspace STP stopped, return code %d\n", r);
 
 		/* To start timers on any ports left in blocking */
 		spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c
index 772a140bfdf0..7b22456023c5 100644
--- a/net/bridge/br_stp_timer.c
+++ b/net/bridge/br_stp_timer.c
@@ -35,7 +35,7 @@ static void br_hello_timer_expired(unsigned long arg)
 {
 	struct net_bridge *br = (struct net_bridge *)arg;
 
-	pr_debug("%s: hello timer expired\n", br->dev->name);
+	br_debug(br, "hello timer expired\n");
 	spin_lock(&br->lock);
 	if (br->dev->flags & IFF_UP) {
 		br_config_bpdu_generation(br);
@@ -55,13 +55,9 @@ static void br_message_age_timer_expired(unsigned long arg)
 	if (p->state == BR_STATE_DISABLED)
 		return;
 
-
-	pr_info("%s: neighbor %.2x%.2x.%.2x:%.2x:%.2x:%.2x:%.2x:%.2x lost on port %d(%s)\n",
-		br->dev->name,
-		id->prio[0], id->prio[1],
-		id->addr[0], id->addr[1], id->addr[2],
-		id->addr[3], id->addr[4], id->addr[5],
-		p->port_no, p->dev->name);
+	br_info(br, "port %u(%s) neighbor %.2x%.2x.%pM lost\n",
+		(unsigned) p->port_no, p->dev->name,
+		id->prio[0], id->prio[1], &id->addr);
 
 	/*
 	 * According to the spec, the message age timer cannot be
@@ -87,8 +83,8 @@ static void br_forward_delay_timer_expired(unsigned long arg)
 	struct net_bridge_port *p = (struct net_bridge_port *) arg;
 	struct net_bridge *br = p->br;
 
-	pr_debug("%s: %d(%s) forward delay timer\n",
-		 br->dev->name, p->port_no, p->dev->name);
+	br_debug(br, "port %u(%s) forward delay timer\n",
+		 (unsigned) p->port_no, p->dev->name);
 	spin_lock(&br->lock);
 	if (p->state == BR_STATE_LISTENING) {
 		p->state = BR_STATE_LEARNING;
@@ -107,7 +103,7 @@ static void br_tcn_timer_expired(unsigned long arg)
 {
 	struct net_bridge *br = (struct net_bridge *) arg;
 
-	pr_debug("%s: tcn timer expired\n", br->dev->name);
+	br_debug(br, "tcn timer expired\n");
 	spin_lock(&br->lock);
 	if (br->dev->flags & IFF_UP) {
 		br_transmit_tcn(br);
@@ -121,7 +117,7 @@ static void br_topology_change_timer_expired(unsigned long arg)
 {
 	struct net_bridge *br = (struct net_bridge *) arg;
 
-	pr_debug("%s: topo change timer expired\n", br->dev->name);
+	br_debug(br, "topo change timer expired\n");
 	spin_lock(&br->lock);
 	br->topology_change_detected = 0;
 	br->topology_change = 0;
@@ -132,8 +128,8 @@ static void br_hold_timer_expired(unsigned long arg)
 {
 	struct net_bridge_port *p = (struct net_bridge_port *) arg;
 
-	pr_debug("%s: %d(%s) hold timer expired\n",
-		 p->br->dev->name,  p->port_no, p->dev->name);
+	br_debug(p->br, "port %u(%s) hold timer expired\n",
+		 (unsigned) p->port_no, p->dev->name);
 
 	spin_lock(&p->br->lock);
 	if (p->config_pending)
-- 
cgit v1.2.3-59-g8ed1b


From e0f43752a942b7be1bc06b9fd74e20ae337c1cca Mon Sep 17 00:00:00 2001
From: Simon Arlott <simon@fire.lp0.eu>
Date: Mon, 10 May 2010 09:31:11 +0000
Subject: bridge: update sysfs link names if port device names have changed

Links for each port are created in sysfs using the device
name, but this could be changed after being added to the
bridge.

As well as being unable to remove interfaces after this
occurs (because userspace tools don't recognise the new
name, and the kernel won't recognise the old name), adding
another interface with the old name to the bridge will
cause an error trying to create the sysfs link.

This fixes the problem by listening for NETDEV_CHANGENAME
notifications and renaming the link.

https://bugzilla.kernel.org/show_bug.cgi?id=12743

Signed-off-by: Simon Arlott <simon@fire.lp0.eu>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/sysfs/symlink.c       |  1 +
 net/bridge/br_if.c       |  2 +-
 net/bridge/br_notify.c   |  7 +++++++
 net/bridge/br_private.h  |  6 ++++++
 net/bridge/br_sysfs_if.c | 32 +++++++++++++++++++++++++++-----
 5 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index b93ec51fa7ac..942f239a2132 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -261,3 +261,4 @@ const struct inode_operations sysfs_symlink_inode_operations = {
 
 EXPORT_SYMBOL_GPL(sysfs_create_link);
 EXPORT_SYMBOL_GPL(sysfs_remove_link);
+EXPORT_SYMBOL_GPL(sysfs_rename_link);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 45f3f8871149..f25e3c92bd72 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -133,7 +133,7 @@ static void del_nbp(struct net_bridge_port *p)
 	struct net_bridge *br = p->br;
 	struct net_device *dev = p->dev;
 
-	sysfs_remove_link(br->ifobj, dev->name);
+	sysfs_remove_link(br->ifobj, p->sysfs_name);
 
 	dev_set_promiscuity(dev, -1);
 
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index 1413b72acc7f..717e1fd6133c 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -34,6 +34,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 	struct net_device *dev = ptr;
 	struct net_bridge_port *p = dev->br_port;
 	struct net_bridge *br;
+	int err;
 
 	/* not a port of a bridge */
 	if (p == NULL)
@@ -83,6 +84,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
 		br_del_if(br, dev);
 		break;
 
+	case NETDEV_CHANGENAME:
+		err = br_sysfs_renameif(p);
+		if (err)
+			return notifier_from_errno(err);
+		break;
+
 	case NETDEV_PRE_TYPE_CHANGE:
 		/* Forbid underlaying device to change its type. */
 		return NOTIFY_BAD;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 79a13d1c2573..0f4a74bc6a9b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -139,6 +139,10 @@ struct net_bridge_port
 	struct hlist_head		mglist;
 	struct hlist_node		rlist;
 #endif
+
+#ifdef CONFIG_SYSFS
+	char				sysfs_name[IFNAMSIZ];
+#endif
 };
 
 struct br_cpu_netstats {
@@ -480,6 +484,7 @@ extern void br_ifinfo_notify(int event, struct net_bridge_port *port);
 /* br_sysfs_if.c */
 extern const struct sysfs_ops brport_sysfs_ops;
 extern int br_sysfs_addif(struct net_bridge_port *p);
+extern int br_sysfs_renameif(struct net_bridge_port *p);
 
 /* br_sysfs_br.c */
 extern int br_sysfs_addbr(struct net_device *dev);
@@ -488,6 +493,7 @@ extern void br_sysfs_delbr(struct net_device *dev);
 #else
 
 #define br_sysfs_addif(p)	(0)
+#define br_sysfs_renameif(p)	(0)
 #define br_sysfs_addbr(dev)	(0)
 #define br_sysfs_delbr(dev)	do { } while(0)
 #endif /* CONFIG_SYSFS */
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 0b9916489d6b..fd5799c9bc8d 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -246,7 +246,7 @@ const struct sysfs_ops brport_sysfs_ops = {
 /*
  * Add sysfs entries to ethernet device added to a bridge.
  * Creates a brport subdirectory with bridge attributes.
- * Puts symlink in bridge's brport subdirectory
+ * Puts symlink in bridge's brif subdirectory
  */
 int br_sysfs_addif(struct net_bridge_port *p)
 {
@@ -257,15 +257,37 @@ int br_sysfs_addif(struct net_bridge_port *p)
 	err = sysfs_create_link(&p->kobj, &br->dev->dev.kobj,
 				SYSFS_BRIDGE_PORT_LINK);
 	if (err)
-		goto out2;
+		return err;
 
 	for (a = brport_attrs; *a; ++a) {
 		err = sysfs_create_file(&p->kobj, &((*a)->attr));
 		if (err)
-			goto out2;
+			return err;
 	}
 
-	err = sysfs_create_link(br->ifobj, &p->kobj, p->dev->name);
-out2:
+	strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
+	return sysfs_create_link(br->ifobj, &p->kobj, p->sysfs_name);
+}
+
+/* Rename bridge's brif symlink */
+int br_sysfs_renameif(struct net_bridge_port *p)
+{
+	struct net_bridge *br = p->br;
+	int err;
+
+	/* If a rename fails, the rollback will cause another
+	 * rename call with the existing name.
+	 */
+	if (!strncmp(p->sysfs_name, p->dev->name, IFNAMSIZ))
+		return 0;
+
+	err = sysfs_rename_link(br->ifobj, &p->kobj,
+				p->sysfs_name, p->dev->name);
+	if (err)
+		netdev_notice(br->dev, "unable to rename link %s to %s",
+			      p->sysfs_name, p->dev->name);
+	else
+		strlcpy(p->sysfs_name, p->dev->name, IFNAMSIZ);
+
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From e3826f1e946e7d2354943232f1457be1455a29e2 Mon Sep 17 00:00:00 2001
From: Amerigo Wang <amwang@redhat.com>
Date: Wed, 5 May 2010 00:27:06 +0000
Subject: net: reserve ports for applications using fixed port numbers

(Dropped the infiniband part, because Tetsuo modified the related code,
I will send a separate patch for it once this is accepted.)

This patch introduces /proc/sys/net/ipv4/ip_local_reserved_ports which
allows users to reserve ports for third-party applications.

The reserved ports will not be used by automatic port assignments
(e.g. when calling connect() or bind() with port number 0). Explicit
port allocation behavior is unchanged.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 31 +++++++++++++++++++++++++++++++
 include/net/ip.h                       |  6 ++++++
 net/ipv4/af_inet.c                     |  8 +++++++-
 net/ipv4/inet_connection_sock.c        |  6 ++++++
 net/ipv4/inet_hashtables.c             |  2 ++
 net/ipv4/sysctl_net_ipv4.c             | 17 +++++++++++++++++
 net/ipv4/udp.c                         |  3 ++-
 net/sctp/socket.c                      |  2 ++
 8 files changed, 73 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 8b72c88ba213..d0536b5a4e01 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -588,6 +588,37 @@ ip_local_port_range - 2 INTEGERS
 	(i.e. by default) range 1024-4999 is enough to issue up to
 	2000 connections per second to systems supporting timestamps.
 
+ip_local_reserved_ports - list of comma separated ranges
+	Specify the ports which are reserved for known third-party
+	applications. These ports will not be used by automatic port
+	assignments (e.g. when calling connect() or bind() with port
+	number 0). Explicit port allocation behavior is unchanged.
+
+	The format used for both input and output is a comma separated
+	list of ranges (e.g. "1,2-4,10-10" for ports 1, 2, 3, 4 and
+	10). Writing to the file will clear all previously reserved
+	ports and update the current list with the one given in the
+	input.
+
+	Note that ip_local_port_range and ip_local_reserved_ports
+	settings are independent and both are considered by the kernel
+	when determining which ports are available for automatic port
+	assignments.
+
+	You can reserve ports which are not in the current
+	ip_local_port_range, e.g.:
+
+	$ cat /proc/sys/net/ipv4/ip_local_port_range
+	32000	61000
+	$ cat /proc/sys/net/ipv4/ip_local_reserved_ports
+	8080,9148
+
+	although this is redundant. However such a setting is useful
+	if later the port range is changed to a value that will
+	include the reserved ports.
+
+	Default: Empty
+
 ip_nonlocal_bind - BOOLEAN
 	If set, allows processes to bind() to non-local IP addresses,
 	which can be quite useful - but may break some applications.
diff --git a/include/net/ip.h b/include/net/ip.h
index 8149b77cea9b..63548f0a44b1 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -184,6 +184,12 @@ extern struct local_ports {
 } sysctl_local_ports;
 extern void inet_get_local_port_range(int *low, int *high);
 
+extern unsigned long *sysctl_local_reserved_ports;
+static inline int inet_is_reserved_local_port(int port)
+{
+	return test_bit(port, sysctl_local_reserved_ports);
+}
+
 extern int sysctl_ip_default_ttl;
 extern int sysctl_ip_nonlocal_bind;
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c6c43bcd1c6f..551ce564b035 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1573,9 +1573,13 @@ static int __init inet_init(void)
 
 	BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb));
 
+	sysctl_local_reserved_ports = kzalloc(65536 / 8, GFP_KERNEL);
+	if (!sysctl_local_reserved_ports)
+		goto out;
+
 	rc = proto_register(&tcp_prot, 1);
 	if (rc)
-		goto out;
+		goto out_free_reserved_ports;
 
 	rc = proto_register(&udp_prot, 1);
 	if (rc)
@@ -1674,6 +1678,8 @@ out_unregister_udp_proto:
 	proto_unregister(&udp_prot);
 out_unregister_tcp_proto:
 	proto_unregister(&tcp_prot);
+out_free_reserved_ports:
+	kfree(sysctl_local_reserved_ports);
 	goto out;
 }
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index e0a3e3537b14..70eb3507c406 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -37,6 +37,9 @@ struct local_ports sysctl_local_ports __read_mostly = {
 	.range = { 32768, 61000 },
 };
 
+unsigned long *sysctl_local_reserved_ports;
+EXPORT_SYMBOL(sysctl_local_reserved_ports);
+
 void inet_get_local_port_range(int *low, int *high)
 {
 	unsigned seq;
@@ -108,6 +111,8 @@ again:
 
 		smallest_size = -1;
 		do {
+			if (inet_is_reserved_local_port(rover))
+				goto next_nolock;
 			head = &hashinfo->bhash[inet_bhashfn(net, rover,
 					hashinfo->bhash_size)];
 			spin_lock(&head->lock);
@@ -130,6 +135,7 @@ again:
 			break;
 		next:
 			spin_unlock(&head->lock);
+		next_nolock:
 			if (++rover > high)
 				rover = low;
 		} while (--remaining > 0);
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 2b79377b468d..d3e160a88219 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -456,6 +456,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
 		local_bh_disable();
 		for (i = 1; i <= remaining; i++) {
 			port = low + (i + offset) % remaining;
+			if (inet_is_reserved_local_port(port))
+				continue;
 			head = &hinfo->bhash[inet_bhashfn(net, port,
 					hinfo->bhash_size)];
 			spin_lock(&head->lock);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 1cd5c15174b8..d96c1da4b17c 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -299,6 +299,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= ipv4_local_port_range,
 	},
+	{
+		.procname	= "ip_local_reserved_ports",
+		.data		= NULL, /* initialized in sysctl_ipv4_init */
+		.maxlen		= 65536,
+		.mode		= 0644,
+		.proc_handler	= proc_do_large_bitmap,
+	},
 #ifdef CONFIG_IP_MULTICAST
 	{
 		.procname	= "igmp_max_memberships",
@@ -736,6 +743,16 @@ static __net_initdata struct pernet_operations ipv4_sysctl_ops = {
 static __init int sysctl_ipv4_init(void)
 {
 	struct ctl_table_header *hdr;
+	struct ctl_table *i;
+
+	for (i = ipv4_table; i->procname; i++) {
+		if (strcmp(i->procname, "ip_local_reserved_ports") == 0) {
+			i->data = sysctl_local_reserved_ports;
+			break;
+		}
+	}
+	if (!i->procname)
+		return -EINVAL;
 
 	hdr = register_sysctl_paths(net_ipv4_ctl_path, ipv4_table);
 	if (hdr == NULL)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f3e00c5cd1ed..9de6a698f91d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -233,7 +233,8 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
 			 */
 			do {
 				if (low <= snum && snum <= high &&
-				    !test_bit(snum >> udptable->log, bitmap))
+				    !test_bit(snum >> udptable->log, bitmap) &&
+				    !inet_is_reserved_local_port(snum))
 					goto found;
 				snum += rand;
 			} while (snum != first);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ba1add0b13c3..ca44917872d2 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5433,6 +5433,8 @@ static long sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
 			rover++;
 			if ((rover < low) || (rover > high))
 				rover = low;
+			if (inet_is_reserved_local_port(rover))
+				continue;
 			index = sctp_phashfn(rover);
 			head = &sctp_port_hashtable[index];
 			sctp_spin_lock(&head->lock);
-- 
cgit v1.2.3-59-g8ed1b


From a14462f1bd4d3962994f518459102000438665aa Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 6 May 2010 01:33:53 +0000
Subject: net: adjust handle_macvlan to pass port struct to hook

Now there's null check here and also again in the hook. Looking at bridge bits
which are simmilar, port structure is rcu_dereferenced right away in
handle_bridge and passed to hook. Looks nicer.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      |  8 ++------
 include/linux/if_macvlan.h |  3 ++-
 net/core/dev.c             | 10 +++++++---
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'net')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9a939d828b47..1b78c0057a8d 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -145,19 +145,15 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
+					    struct sk_buff *skb)
 {
 	const struct ethhdr *eth = eth_hdr(skb);
-	const struct macvlan_port *port;
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (port == NULL)
-		return skb;
-
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index b78a712247da..9ea047aca795 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -85,6 +85,7 @@ extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
 
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *);
+extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
+						    struct sk_buff *);
 
 #endif /* _LINUX_IF_MACVLAN_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 3daee30a7c82..5cbba0927a8e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2612,7 +2612,8 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
 #endif
 
 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
+struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
+					     struct sk_buff *skb) __read_mostly;
 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
 
 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
@@ -2620,14 +2621,17 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
 					     int *ret,
 					     struct net_device *orig_dev)
 {
-	if (skb->dev->macvlan_port == NULL)
+	struct macvlan_port *port;
+
+	port = rcu_dereference(skb->dev->macvlan_port);
+	if (!port)
 		return skb;
 
 	if (*pt_prev) {
 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
 	}
-	return macvlan_handle_frame_hook(skb);
+	return macvlan_handle_frame_hook(port, skb);
 }
 #else
 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-- 
cgit v1.2.3-59-g8ed1b


From a1aa3483041bd3691c7f029272ccef4ce70bd957 Mon Sep 17 00:00:00 2001
From: Timo Teras <timo.teras@iki.fi>
Date: Sat, 15 May 2010 23:49:26 -0700
Subject: xfrm: fix policy unreferencing on larval drop

I mistakenly had the error path to use num_pols to decide how
many policies we need to drop (cruft from earlier patch set
version which did not handle socket policies right).

This is wrong since normally we do not keep explicit references
(instead we hold reference to the cache entry which holds references
to policies). drop_pols is set to num_pols if we are holding the
references, so use that. Otherwise we eventually BUG_ON inside
xfrm_policy_destroy due to premature policy deletion.

Signed-off-by: Timo Teras <timo.teras@iki.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 31f4ba43b48f..f4ea3a08e5a1 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1805,7 +1805,7 @@ restart:
 			/* EREMOTE tells the caller to generate
 			 * a one-shot blackhole route. */
 			dst_release(dst);
-			xfrm_pols_put(pols, num_pols);
+			xfrm_pols_put(pols, drop_pols);
 			XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
 			return -EREMOTE;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 3b098e2d7c693796cc4dffb07caa249fc0f70771 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 15 May 2010 23:57:10 -0700
Subject: net: Consistent skb timestamping

With RPS inclusion, skb timestamping is not consistent in RX path.

If netif_receive_skb() is used, its deferred after RPS dispatch.

If netif_rx() is used, its done before RPS dispatch.

This can give strange tcpdump timestamps results.

I think timestamping should be done as soon as possible in the receive
path, to get meaningful values (ie timestamps taken at the time packet
was delivered by NIC driver to our stack), even if NAPI already can
defer timestamping a bit (RPS can help to reduce the gap)

Tom Herbert prefer to sample timestamps after RPS dispatch. In case
sampling is expensive (HPET/acpi_pm on x86), this makes sense.

Let admins switch from one mode to another, using a new
sysctl, /proc/sys/net/core/netdev_tstamp_prequeue

Its default value (1), means timestamps are taken as soon as possible,
before backlog queueing, giving accurate timestamps.

Setting a 0 value permits to sample timestamps when processing backlog,
after RPS dispatch, to lower the load of the pre-RPS cpu.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt | 10 +++++++++
 include/linux/netdevice.h    |  1 +
 net/core/dev.c               | 50 +++++++++++++++++++++++++++-----------------
 net/core/sysctl_net_core.c   |  7 +++++++
 4 files changed, 49 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index df38ef046f8d..cbd05ffc606b 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -84,6 +84,16 @@ netdev_max_backlog
 Maximum number  of  packets,  queued  on  the  INPUT  side, when the interface
 receives packets faster than kernel can process them.
 
+netdev_tstamp_prequeue
+----------------------
+
+If set to 0, RX packet timestamps can be sampled after RPS processing, when
+the target CPU processes packets. It might give some delay on timestamps, but
+permit to distribute the load on several cpus.
+
+If set to 1 (default), timestamps are sampled as soon as possible, before
+queueing.
+
 optmem_max
 ----------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 69022d47d6f2..c1b2341897c2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2100,6 +2100,7 @@ extern const struct net_device_stats *dev_get_stats(struct net_device *dev);
 extern void		dev_txq_stats_fold(const struct net_device *dev, struct net_device_stats *stats);
 
 extern int		netdev_max_backlog;
+extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
 extern int skb_checksum_help(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 5cbba0927a8e..988e42912e72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1454,7 +1454,7 @@ void net_disable_timestamp(void)
 }
 EXPORT_SYMBOL(net_disable_timestamp);
 
-static inline void net_timestamp(struct sk_buff *skb)
+static inline void net_timestamp_set(struct sk_buff *skb)
 {
 	if (atomic_read(&netstamp_needed))
 		__net_timestamp(skb);
@@ -1462,6 +1462,12 @@ static inline void net_timestamp(struct sk_buff *skb)
 		skb->tstamp.tv64 = 0;
 }
 
+static inline void net_timestamp_check(struct sk_buff *skb)
+{
+	if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
+		__net_timestamp(skb);
+}
+
 /**
  * dev_forward_skb - loopback an skb to another netif
  *
@@ -1508,9 +1514,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
 #ifdef CONFIG_NET_CLS_ACT
 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-		net_timestamp(skb);
+		net_timestamp_set(skb);
 #else
-	net_timestamp(skb);
+	net_timestamp_set(skb);
 #endif
 
 	rcu_read_lock();
@@ -2201,6 +2207,7 @@ EXPORT_SYMBOL(dev_queue_xmit);
   =======================================================================*/
 
 int netdev_max_backlog __read_mostly = 1000;
+int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 
@@ -2465,8 +2472,8 @@ int netif_rx(struct sk_buff *skb)
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->tstamp.tv64)
-		net_timestamp(skb);
+	if (netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
 
 #ifdef CONFIG_RPS
 	{
@@ -2791,8 +2798,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	int ret = NET_RX_DROP;
 	__be16 type;
 
-	if (!skb->tstamp.tv64)
-		net_timestamp(skb);
+	if (!netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
 
 	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
 		return NET_RX_SUCCESS;
@@ -2910,23 +2917,28 @@ out:
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
+	if (netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
+
 #ifdef CONFIG_RPS
-	struct rps_dev_flow voidflow, *rflow = &voidflow;
-	int cpu, ret;
+	{
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
+		int cpu, ret;
 
-	rcu_read_lock();
+		rcu_read_lock();
+
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
-	cpu = get_rps_cpu(skb->dev, skb, &rflow);
+		if (cpu >= 0) {
+			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+			rcu_read_unlock();
+		} else {
+			rcu_read_unlock();
+			ret = __netif_receive_skb(skb);
+		}
 
-	if (cpu >= 0) {
-		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
-		rcu_read_unlock();
-	} else {
-		rcu_read_unlock();
-		ret = __netif_receive_skb(skb);
+		return ret;
 	}
-
-	return ret;
 #else
 	return __netif_receive_skb(skb);
 #endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index dcc7d25996ab..01eee5d984be 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -121,6 +121,13 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "netdev_tstamp_prequeue",
+		.data		= &netdev_tstamp_prequeue,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "message_cost",
 		.data		= &net_ratelimit_state.interval,
-- 
cgit v1.2.3-59-g8ed1b


From a465419b1febb603821f924805529cff89cafeed Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 16 May 2010 00:36:33 -0700
Subject: net: Introduce sk_route_nocaps

TCP-MD5 sessions have intermittent failures, when route cache is
invalidated. ip_queue_xmit() has to find a new route, calls
sk_setup_caps(sk, &rt->u.dst), destroying the

sk->sk_route_caps &= ~NETIF_F_GSO_MASK

that MD5 desperately try to make all over its way (from
tcp_transmit_skb() for example)

So we send few bad packets, and everything is fine when
tcp_transmit_skb() is called again for this socket.

Since ip_queue_xmit() is at a lower level than TCP-MD5, I chose to use a
socket field, sk_route_nocaps, containing bits to mask on sk_route_caps.

Reported-by: Bhaskar Dutta <bhaskie@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h    | 8 ++++++++
 net/core/sock.c       | 1 +
 net/ipv4/tcp_ipv4.c   | 6 +++---
 net/ipv4/tcp_output.c | 2 +-
 net/ipv6/tcp_ipv6.c   | 4 ++--
 5 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/sock.h b/include/net/sock.h
index 328e03f47dd1..aed16eb9db4b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -177,6 +177,7 @@ struct sock_common {
   *		   %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
   *	@sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
   *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
+  *	@sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
   *	@sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
   *	@sk_gso_max_size: Maximum GSO segment size to build
   *	@sk_lingertime: %SO_LINGER l_linger setting
@@ -276,6 +277,7 @@ struct sock {
 	int			sk_forward_alloc;
 	gfp_t			sk_allocation;
 	int			sk_route_caps;
+	int			sk_route_nocaps;
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	int			sk_rcvlowat;
@@ -1335,6 +1337,12 @@ static inline int sk_can_gso(const struct sock *sk)
 
 extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
 
+static inline void sk_nocaps_add(struct sock *sk, int flags)
+{
+	sk->sk_route_nocaps |= flags;
+	sk->sk_route_caps &= ~flags;
+}
+
 static inline int skb_copy_to_page(struct sock *sk, char __user *from,
 				   struct sk_buff *skb, struct page *page,
 				   int off, int copy)
diff --git a/net/core/sock.c b/net/core/sock.c
index 94c4affdda9b..63530a03b8c2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1231,6 +1231,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
 	sk->sk_route_caps = dst->dev->features;
 	if (sk->sk_route_caps & NETIF_F_GSO)
 		sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
+	sk->sk_route_caps &= ~sk->sk_route_nocaps;
 	if (sk_can_gso(sk)) {
 		if (dst->header_len) {
 			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 771f8146a2e5..202cf09c4cd4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -891,7 +891,7 @@ int tcp_v4_md5_do_add(struct sock *sk, __be32 addr,
 				kfree(newkey);
 				return -ENOMEM;
 			}
-			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		}
 		if (tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
@@ -1021,7 +1021,7 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
 			return -EINVAL;
 
 		tp->md5sig_info = p;
-		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 	}
 
 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, sk->sk_allocation);
@@ -1462,7 +1462,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		if (newkey != NULL)
 			tcp_v4_md5_do_add(newsk, newinet->inet_daddr,
 					  newkey, key->keylen);
-		newsk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
 	}
 #endif
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5db3a2c6cb33..18a3302480cb 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -873,7 +873,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 #ifdef CONFIG_TCP_MD5SIG
 	/* Calculate the MD5 hash, as we have all we need now */
 	if (md5) {
-		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		tp->af_specific->calc_md5_hash(opts.hash_location,
 					       md5, sk, NULL, skb);
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 6603511e3673..2b7c3a100e2c 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -604,7 +604,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, struct in6_addr *peer,
 				kfree(newkey);
 				return -ENOMEM;
 			}
-			sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+			sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 		}
 		if (tcp_alloc_md5sig_pool(sk) == NULL) {
 			kfree(newkey);
@@ -741,7 +741,7 @@ static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
 			return -ENOMEM;
 
 		tp->md5sig_info = p;
-		sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
+		sk_nocaps_add(sk, NETIF_F_GSO_MASK);
 	}
 
 	newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
-- 
cgit v1.2.3-59-g8ed1b


From 2d6c9ffcca7808f42ba6b953da0ba60e19a9cbbd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 May 2010 04:51:02 +0000
Subject: net: congestion notifications are not dropped packets

vlan/macvlan start_xmit() can inform caller of congestion with
NET_XMIT_CN return value. This doesnt mean packet was dropped.
Increment normal stat counters instead of tx_dropped.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 2 +-
 net/8021q/vlan_dev.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 1b78c0057a8d..4e238afab4a3 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -239,7 +239,7 @@ netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 	int ret;
 
 	ret = macvlan_queue_xmit(skb, dev);
-	if (likely(ret == NET_XMIT_SUCCESS)) {
+	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		txq->tx_packets++;
 		txq->tx_bytes += len;
 	} else
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index b5249c5fd4d3..55be90826f5f 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -327,7 +327,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	len = skb->len;
 	ret = dev_queue_xmit(skb);
 
-	if (likely(ret == NET_XMIT_SUCCESS)) {
+	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		txq->tx_packets++;
 		txq->tx_bytes += len;
 	} else
@@ -353,7 +353,7 @@ static netdev_tx_t vlan_dev_hwaccel_hard_start_xmit(struct sk_buff *skb,
 	len = skb->len;
 	ret = dev_queue_xmit(skb);
 
-	if (likely(ret == NET_XMIT_SUCCESS)) {
+	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		txq->tx_packets++;
 		txq->tx_bytes += len;
 	} else
-- 
cgit v1.2.3-59-g8ed1b


From 0771275b25e84f568699a5d209c5e339ef83fa94 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 7 May 2010 11:31:33 +0000
Subject: ipv6 addrlabel: permit deletion of labels assigned to removed dev

as addrlabels with an interface index are left alone when the
interface gets removed this results in addrlabels that can no
longer be removed.

Restrict validation of index to adding new addrlabels.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrlabel.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index ae404c9a746c..8c4348cb1950 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -422,10 +422,6 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 	    ifal->ifal_prefixlen > 128)
 		return -EINVAL;
 
-	if (ifal->ifal_index &&
-	    !__dev_get_by_index(net, ifal->ifal_index))
-		return -EINVAL;
-
 	if (!tb[IFAL_ADDRESS])
 		return -EINVAL;
 
@@ -441,6 +437,10 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	switch(nlh->nlmsg_type) {
 	case RTM_NEWADDRLABEL:
+		if (ifal->ifal_index &&
+		    !__dev_get_by_index(net, ifal->ifal_index))
+			return -EINVAL;
+
 		err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen,
 				     ifal->ifal_index, label,
 				     nlh->nlmsg_flags & NLM_F_REPLACE);
-- 
cgit v1.2.3-59-g8ed1b


From ebda37c27d0c768947e9b058332d7ea798210cf8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 6 May 2010 23:51:21 +0000
Subject: rps: avoid one atomic in enqueue_to_backlog

If CONFIG_SMP=y, then we own a queue spinlock, we can avoid the atomic
test_and_set_bit() from napi_schedule_prep().

We now have same number of atomic ops per netif_rx() calls than with
pre-RPS kernel.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 988e42912e72..cdcb9cbedf41 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2432,8 +2432,10 @@ enqueue:
 			return NET_RX_SUCCESS;
 		}
 
-		/* Schedule NAPI for backlog device */
-		if (napi_schedule_prep(&sd->backlog)) {
+		/* Schedule NAPI for backlog device
+		 * We can use non atomic operation since we own the queue lock
+		 */
+		if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
 			if (!rps_ipi_queued(sd))
 				____napi_schedule(sd, &sd->backlog);
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 7fee226ad2397b635e2fd565a59ca3ae08a164cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 11 May 2010 23:19:48 +0000
Subject: net: add a noref bit on skb dst

Use low order bit of skb->_skb_dst to tell dst is not refcounted.

Change _skb_dst to _skb_refdst to make sure all uses are catched.

skb_dst() returns the dst, regardless of noref bit set or not, but
with a lockdep check to make sure a noref dst is not given if current
user is not rcu protected.

New skb_dst_set_noref() helper to set an notrefcounted dst on a skb.
(with lockdep check)

skb_dst_drop() drops a reference only if skb dst was refcounted.

skb_dst_force() helper is used to force a refcount on dst, when skb
is queued and not anymore RCU protected.

Use skb_dst_force() in __sk_add_backlog(), __dev_xmit_skb() if
!IFF_XMIT_DST_RELEASE or skb enqueued on qdisc queue, in
sock_queue_rcv_skb(), in __nf_queue().

Use skb_dst_force() in dev_requeue_skb().

Note: dst_use_noref() still dirties dst, we might transform it
later to do one dirtying per jiffies.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h   | 58 ++++++++++++++++++++++++++++++++++++++++++++----
 include/net/dst.h        | 48 ++++++++++++++++++++++++++++++++++++---
 include/net/sock.h       | 13 ++++++-----
 net/core/dev.c           |  3 +++
 net/core/skbuff.c        |  2 +-
 net/core/sock.c          |  6 +++++
 net/ipv4/icmp.c          |  6 ++---
 net/ipv4/ip_options.c    |  9 ++++----
 net/ipv4/netfilter.c     |  6 ++---
 net/ipv4/route.c         |  2 +-
 net/netfilter/nf_queue.c |  2 ++
 net/sched/sch_generic.c  |  4 +++-
 12 files changed, 134 insertions(+), 25 deletions(-)

(limited to 'net')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c9525bce80f6..7cdfb4d52847 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -264,7 +264,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@transport_header: Transport layer header
  *	@network_header: Network layer header
  *	@mac_header: Link layer header
- *	@_skb_dst: destination entry
+ *	@_skb_refdst: destination entry (with norefcount bit)
  *	@sp: the security path, used for xfrm
  *	@cb: Control buffer. Free for use by every layer. Put private vars here
  *	@len: Length of actual data
@@ -328,7 +328,7 @@ struct sk_buff {
 	 */
 	char			cb[48] __aligned(8);
 
-	unsigned long		_skb_dst;
+	unsigned long		_skb_refdst;
 #ifdef CONFIG_XFRM
 	struct	sec_path	*sp;
 #endif
@@ -419,14 +419,64 @@ struct sk_buff {
 
 #include <asm/system.h>
 
+/*
+ * skb might have a dst pointer attached, refcounted or not.
+ * _skb_refdst low order bit is set if refcount was _not_ taken
+ */
+#define SKB_DST_NOREF	1UL
+#define SKB_DST_PTRMASK	~(SKB_DST_NOREF)
+
+/**
+ * skb_dst - returns skb dst_entry
+ * @skb: buffer
+ *
+ * Returns skb dst_entry, regardless of reference taken or not.
+ */
 static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
 {
-	return (struct dst_entry *)skb->_skb_dst;
+	/* If refdst was not refcounted, check we still are in a 
+	 * rcu_read_lock section
+	 */
+	WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) &&
+		!rcu_read_lock_held() &&
+		!rcu_read_lock_bh_held());
+	return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
 }
 
+/**
+ * skb_dst_set - sets skb dst
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was taken on dst and should
+ * be released by skb_dst_drop()
+ */
 static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 {
-	skb->_skb_dst = (unsigned long)dst;
+	skb->_skb_refdst = (unsigned long)dst;
+}
+
+/**
+ * skb_dst_set_noref - sets skb dst, without a reference
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was not taken on dst
+ * skb_dst_drop() should not dst_release() this dst
+ */
+static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+{
+	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+	skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
+}
+
+/**
+ * skb_dst_is_noref - Test if skb dst isnt refcounted
+ * @skb: buffer
+ */
+static inline bool skb_dst_is_noref(const struct sk_buff *skb)
+{
+	return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
 }
 
 static inline struct rtable *skb_rtable(const struct sk_buff *skb)
diff --git a/include/net/dst.h b/include/net/dst.h
index aac5a5fcfda9..27207a13f2a6 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -168,6 +168,12 @@ static inline void dst_use(struct dst_entry *dst, unsigned long time)
 	dst->lastuse = time;
 }
 
+static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
+{
+	dst->__use++;
+	dst->lastuse = time;
+}
+
 static inline
 struct dst_entry * dst_clone(struct dst_entry * dst)
 {
@@ -177,11 +183,47 @@ struct dst_entry * dst_clone(struct dst_entry * dst)
 }
 
 extern void dst_release(struct dst_entry *dst);
+
+static inline void refdst_drop(unsigned long refdst)
+{
+	if (!(refdst & SKB_DST_NOREF))
+		dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK));
+}
+
+/**
+ * skb_dst_drop - drops skb dst
+ * @skb: buffer
+ *
+ * Drops dst reference count if a reference was taken.
+ */
 static inline void skb_dst_drop(struct sk_buff *skb)
 {
-	if (skb->_skb_dst)
-		dst_release(skb_dst(skb));
-	skb->_skb_dst = 0UL;
+	if (skb->_skb_refdst) {
+		refdst_drop(skb->_skb_refdst);
+		skb->_skb_refdst = 0UL;
+	}
+}
+
+static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb)
+{
+	nskb->_skb_refdst = oskb->_skb_refdst;
+	if (!(nskb->_skb_refdst & SKB_DST_NOREF))
+		dst_clone(skb_dst(nskb));
+}
+
+/**
+ * skb_dst_force - makes sure skb dst is refcounted
+ * @skb: buffer
+ *
+ * If dst is not yet refcounted, let's do it
+ */
+static inline void skb_dst_force(struct sk_buff *skb)
+{
+	if (skb_dst_is_noref(skb)) {
+		WARN_ON(!rcu_read_lock_held());
+		skb->_skb_refdst &= ~SKB_DST_NOREF;
+		dst_clone(skb_dst(skb));
+	}
 }
 
 /* Children define the path of the packet through the
diff --git a/include/net/sock.h b/include/net/sock.h
index aed16eb9db4b..5697caf8cc76 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -600,12 +600,15 @@ static inline int sk_stream_memory_free(struct sock *sk)
 /* OOB backlog add */
 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
-	if (!sk->sk_backlog.tail) {
-		sk->sk_backlog.head = sk->sk_backlog.tail = skb;
-	} else {
+	/* dont let skb dst not refcounted, we are going to leave rcu lock */
+	skb_dst_force(skb);
+
+	if (!sk->sk_backlog.tail)
+		sk->sk_backlog.head = skb;
+	else
 		sk->sk_backlog.tail->next = skb;
-		sk->sk_backlog.tail = skb;
-	}
+
+	sk->sk_backlog.tail = skb;
 	skb->next = NULL;
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index cdcb9cbedf41..6c820650b80f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2052,6 +2052,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		 * waiting to be sent out; and the qdisc is not running -
 		 * xmit the skb directly.
 		 */
+		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
+			skb_dst_force(skb);
 		__qdisc_update_bstats(q, skb->len);
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
 			__qdisc_run(q);
@@ -2060,6 +2062,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
+		skb_dst_force(skb);
 		rc = qdisc_enqueue_root(skb, q);
 		qdisc_run(q);
 	}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a9b0e1f77806..c543dd252433 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -520,7 +520,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->transport_header	= old->transport_header;
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
-	skb_dst_set(new, dst_clone(skb_dst(old)));
+	skb_dst_copy(new, old);
 	new->rxhash		= old->rxhash;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
diff --git a/net/core/sock.c b/net/core/sock.c
index 63530a03b8c2..bf88a167c8f2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,6 +307,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	skb_len = skb->len;
 
+	/* we escape from rcu protected region, make sure we dont leak
+	 * a norefcounted dst
+	 */
+	skb_dst_force(skb);
+
 	spin_lock_irqsave(&list->lock, flags);
 	skb->dropcount = atomic_read(&sk->sk_drops);
 	__skb_queue_tail(list, skb);
@@ -1536,6 +1541,7 @@ static void __release_sock(struct sock *sk)
 		do {
 			struct sk_buff *next = skb->next;
 
+			WARN_ON_ONCE(skb_dst_is_noref(skb));
 			skb->next = NULL;
 			sk_backlog_rcv(sk, skb);
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index f3d339f728b0..d65e9215bcd7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -587,20 +587,20 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			err = __ip_route_output_key(net, &rt2, &fl);
 		else {
 			struct flowi fl2 = {};
-			struct dst_entry *odst;
+			unsigned long orefdst;
 
 			fl2.fl4_dst = fl.fl4_src;
 			if (ip_route_output_key(net, &rt2, &fl2))
 				goto relookup_failed;
 
 			/* Ugh! */
-			odst = skb_dst(skb_in);
+			orefdst = skb_in->_skb_refdst; /* save old refdst */
 			err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
 					     RT_TOS(tos), rt2->u.dst.dev);
 
 			dst_release(&rt2->u.dst);
 			rt2 = skb_rtable(skb_in);
-			skb_dst_set(skb_in, odst);
+			skb_in->_skb_refdst = orefdst; /* restore old refdst */
 		}
 
 		if (err)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4c09a31fd140..3244133c24f6 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -601,6 +601,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 	unsigned char *optptr = skb_network_header(skb) + opt->srr;
 	struct rtable *rt = skb_rtable(skb);
 	struct rtable *rt2;
+	unsigned long orefdst;
 	int err;
 
 	if (!opt->srr)
@@ -624,16 +625,16 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 		}
 		memcpy(&nexthop, &optptr[srrptr-1], 4);
 
-		rt = skb_rtable(skb);
+		orefdst = skb->_skb_refdst;
 		skb_dst_set(skb, NULL);
 		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
 		rt2 = skb_rtable(skb);
 		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
-			ip_rt_put(rt2);
-			skb_dst_set(skb, &rt->u.dst);
+			skb_dst_drop(skb);
+			skb->_skb_refdst = orefdst;
 			return -EINVAL;
 		}
-		ip_rt_put(rt);
+		refdst_drop(orefdst);
 		if (rt2->rt_type != RTN_LOCAL)
 			break;
 		/* Superfast 8) loopback forward */
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 82fb43c5c59e..07de855e2175 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,7 +17,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
 	struct flowi fl = {};
-	struct dst_entry *odst;
+	unsigned long orefdst;
 	unsigned int hh_len;
 	unsigned int type;
 
@@ -51,14 +51,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		if (ip_route_output_key(net, &rt, &fl) != 0)
 			return -1;
 
-		odst = skb_dst(skb);
+		orefdst = skb->_skb_refdst;
 		if (ip_route_input(skb, iph->daddr, iph->saddr,
 				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
 			dst_release(&rt->u.dst);
 			return -1;
 		}
 		dst_release(&rt->u.dst);
-		dst_release(odst);
+		refdst_drop(orefdst);
 	}
 
 	if (skb_dst(skb)->error)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index dea3f9264250..705eccfb4769 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3033,7 +3033,7 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 				continue;
 			if (rt_is_expired(rt))
 				continue;
-			skb_dst_set(skb, dst_clone(&rt->u.dst));
+			skb_dst_set_noref(skb, &rt->u.dst);
 			if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
 					 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					 1, NLM_F_MULTI) <= 0) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 0b1103c0b1f3..78b3cf9c519c 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -9,6 +9,7 @@
 #include <linux/rcupdate.h>
 #include <net/protocol.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/dst.h>
 
 #include "nf_internals.h"
 
@@ -170,6 +171,7 @@ static int __nf_queue(struct sk_buff *skb,
 			dev_hold(physoutdev);
 	}
 #endif
+	skb_dst_force(skb);
 	afinfo->saveroute(skb, entry);
 	status = qh->outfn(entry, queuenum);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a969b111bd76..a63029ef3edd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,6 +26,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <net/pkt_sched.h>
+#include <net/dst.h>
 
 /* Main transmission queue. */
 
@@ -40,6 +41,7 @@
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
+	skb_dst_force(skb);
 	q->gso_skb = skb;
 	q->qstats.requeues++;
 	q->q.qlen++;	/* it's still part of the queue */
@@ -179,7 +181,7 @@ static inline int qdisc_restart(struct Qdisc *q)
 	skb = dequeue_skb(q);
 	if (unlikely(!skb))
 		return 0;
-
+	WARN_ON_ONCE(skb_dst_is_noref(skb));
 	root_lock = qdisc_lock(q);
 	dev = qdisc_dev(q);
 	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-- 
cgit v1.2.3-59-g8ed1b


From 407eadd996dc62a827db85f1d0c286a98fd5d336 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 May 2010 11:32:55 +0000
Subject: net: implements ip_route_input_noref()

ip_route_input() is the version returning a refcounted dst, while
ip_route_input_noref() returns a non refcounted one.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h | 17 ++++++++++++++++-
 net/ipv4/route.c    | 15 ++++++++++-----
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'net')

diff --git a/include/net/route.h b/include/net/route.h
index 2c9fba7f7731..af6cf4b4c9dc 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -112,7 +112,22 @@ extern void		rt_cache_flush_batch(void);
 extern int		__ip_route_output_key(struct net *, struct rtable **, const struct flowi *flp);
 extern int		ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
 extern int		ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
-extern int		ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin);
+
+extern int ip_route_input_common(struct sk_buff *skb, __be32 dst, __be32 src,
+				 u8 tos, struct net_device *devin, bool noref);
+
+static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
+				 u8 tos, struct net_device *devin)
+{
+	return ip_route_input_common(skb, dst, src, tos, devin, false);
+}
+
+static inline int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
+				       u8 tos, struct net_device *devin)
+{
+	return ip_route_input_common(skb, dst, src, tos, devin, true);
+}
+
 extern unsigned short	ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
 extern void		ip_rt_send_redirect(struct sk_buff *skb);
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 705eccfb4769..560acc677ce4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2277,8 +2277,8 @@ martian_source:
 	goto e_inval;
 }
 
-int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-		   u8 tos, struct net_device *dev)
+int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+			   u8 tos, struct net_device *dev, bool noref)
 {
 	struct rtable * rth;
 	unsigned	hash;
@@ -2304,10 +2304,15 @@ int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 		    rth->fl.mark == skb->mark &&
 		    net_eq(dev_net(rth->u.dst.dev), net) &&
 		    !rt_is_expired(rth)) {
-			dst_use(&rth->u.dst, jiffies);
+			if (noref) {
+				dst_use_noref(&rth->u.dst, jiffies);
+				skb_dst_set_noref(skb, &rth->u.dst);
+			} else {
+				dst_use(&rth->u.dst, jiffies);
+				skb_dst_set(skb, &rth->u.dst);
+			}
 			RT_CACHE_STAT_INC(in_hit);
 			rcu_read_unlock();
-			skb_dst_set(skb, &rth->u.dst);
 			return 0;
 		}
 		RT_CACHE_STAT_INC(in_hlist_search);
@@ -2350,6 +2355,7 @@ skip_cache:
 	}
 	return ip_route_input_slow(skb, daddr, saddr, tos, dev);
 }
+EXPORT_SYMBOL(ip_route_input_common);
 
 static int __mkroute_output(struct rtable **result,
 			    struct fib_result *res,
@@ -3361,5 +3367,4 @@ void __init ip_static_sysctl_init(void)
 #endif
 
 EXPORT_SYMBOL(__ip_select_ident);
-EXPORT_SYMBOL(ip_route_input);
 EXPORT_SYMBOL(ip_route_output_key);
-- 
cgit v1.2.3-59-g8ed1b


From 4a94445c9a5cf5461fb41d80040033b9a8e2a85a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 May 2010 11:33:06 +0000
Subject: net: Use ip_route_input_noref() in input path

Use ip_route_input_noref() in ip fast path, to avoid two atomic ops per
incoming packet.

Note: loopback is excluded from this optimization in ip_rcv_finish()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/arp.c         | 2 +-
 net/ipv4/ip_input.c    | 4 ++--
 net/ipv4/xfrm4_input.c | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 80769f1f9fab..f094b75810db 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -854,7 +854,7 @@ static int arp_process(struct sk_buff *skb)
 	}
 
 	if (arp->ar_op == htons(ARPOP_REQUEST) &&
-	    ip_route_input(skb, tip, sip, 0, dev) == 0) {
+	    ip_route_input_noref(skb, tip, sip, 0, dev) == 0) {
 
 		rt = skb_rtable(skb);
 		addr_type = rt->rt_type;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index af76de5f76de..d930dc5e4d85 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -331,8 +331,8 @@ static int ip_rcv_finish(struct sk_buff *skb)
 	 *	how the packet travels inside Linux networking.
 	 */
 	if (skb_dst(skb) == NULL) {
-		int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
-					 skb->dev);
+		int err = ip_route_input_noref(skb, iph->daddr, iph->saddr,
+					       iph->tos, skb->dev);
 		if (unlikely(err)) {
 			if (err == -EHOSTUNREACH)
 				IP_INC_STATS_BH(dev_net(skb->dev),
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index abcd7ed65db1..ad8fbb871aa0 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -27,8 +27,8 @@ static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb)
 	if (skb_dst(skb) == NULL) {
 		const struct iphdr *iph = ip_hdr(skb);
 
-		if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
-				   skb->dev))
+		if (ip_route_input_noref(skb, iph->daddr, iph->saddr,
+					 iph->tos, skb->dev))
 			goto drop;
 	}
 	return dst_input(skb);
-- 
cgit v1.2.3-59-g8ed1b


From ab6e3feba1f1bc3b9418b854da6f481408d243de Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 10 May 2010 11:31:49 +0000
Subject: net: No dst refcounting in ip_queue_xmit()

TCP outgoing packets can avoid two atomic ops, and dirtying
of previously higly contended cache line using new refdst
infrastructure.

Note 1: loopback device excluded because of !IFF_XMIT_DST_RELEASE
Note 2: UDP packets dsts are built before ip_queue_xmit().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_output.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 252897443ef9..9a4a6c96cb0d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -318,10 +318,12 @@ int ip_queue_xmit(struct sk_buff *skb)
 	struct ip_options *opt = inet->opt;
 	struct rtable *rt;
 	struct iphdr *iph;
+	int res;
 
 	/* Skip all of this if the packet is already routed,
 	 * f.e. by something like SCTP.
 	 */
+	rcu_read_lock();
 	rt = skb_rtable(skb);
 	if (rt != NULL)
 		goto packet_routed;
@@ -359,7 +361,7 @@ int ip_queue_xmit(struct sk_buff *skb)
 		}
 		sk_setup_caps(sk, &rt->u.dst);
 	}
-	skb_dst_set(skb, dst_clone(&rt->u.dst));
+	skb_dst_set_noref(skb, &rt->u.dst);
 
 packet_routed:
 	if (opt && opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
@@ -391,9 +393,12 @@ packet_routed:
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
-	return ip_local_out(skb);
+	res = ip_local_out(skb);
+	rcu_read_unlock();
+	return res;
 
 no_route:
+	rcu_read_unlock();
 	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
 	kfree_skb(skb);
 	return -EHOSTUNREACH;
-- 
cgit v1.2.3-59-g8ed1b


From cb863ffd4a04f9f8619f52c01d472a64ccc716bd Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sun, 16 May 2010 22:59:41 +0000
Subject: X25: Move qbit flag to bitfield

Moves the X25 q bit flag from char into a bitfield to allow BKL cleanup.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/x25.h |  6 +++++-
 net/x25/af_x25.c  | 17 ++++++++++-------
 2 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/include/net/x25.h b/include/net/x25.h
index 468551ea4f1d..7b5795e1ceb2 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -113,6 +113,9 @@ enum {
 #define X25_MAX_AE_LEN 		40			/* Max num of semi-octets in AE - OSI Nw */
 #define X25_MAX_DTE_FACIL_LEN	21			/* Max length of DTE facility params */
 
+/* Bitset in x25_sock->flags for misc flags */
+#define X25_Q_BIT_FLAG		0
+
 /**
  *	struct x25_route - x25 routing entry
  *	@node - entry in x25_list_lock
@@ -146,10 +149,11 @@ struct x25_sock {
 	struct x25_address	source_addr, dest_addr;
 	struct x25_neigh	*neighbour;
 	unsigned int		lci, cudmatchlength;
-	unsigned char		state, condition, qbitincl, intflag, accptapprv;
+	unsigned char		state, condition, intflag, accptapprv;
 	unsigned short		vs, vr, va, vl;
 	unsigned long		t2, t21, t22, t23;
 	unsigned short		fraglen;
+	unsigned long		flags;
 	struct sk_buff_head	ack_queue;
 	struct sk_buff_head	fragment_queue;
 	struct sk_buff_head	interrupt_in_queue;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 296e65e01064..720534ce1fd9 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -465,7 +465,10 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
 	if (get_user(opt, (int __user *)optval))
 		goto out;
 
-	x25_sk(sk)->qbitincl = !!opt;
+	if (opt)
+		set_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
+	else
+		clear_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
 	rc = 0;
 out:
 	unlock_kernel();
@@ -496,7 +499,7 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
 	if (put_user(len, optlen))
 		goto out;
 
-	val = x25_sk(sk)->qbitincl;
+	val = test_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
 	rc = copy_to_user(optval, &val, len) ? -EFAULT : 0;
 out:
 	unlock_kernel();
@@ -632,8 +635,8 @@ static struct sock *x25_make_new(struct sock *osk)
 	x25->t22        = ox25->t22;
 	x25->t23        = ox25->t23;
 	x25->t2         = ox25->t2;
+	x25->flags	= ox25->flags;
 	x25->facilities = ox25->facilities;
-	x25->qbitincl   = ox25->qbitincl;
 	x25->dte_facilities = ox25->dte_facilities;
 	x25->cudmatchlength = ox25->cudmatchlength;
 	x25->accptapprv = ox25->accptapprv;
@@ -1186,7 +1189,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	 *	If the Q BIT Include socket option is in force, the first
 	 *	byte of the user data is the logical value of the Q Bit.
 	 */
-	if (x25->qbitincl) {
+	if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
 		qbit = skb->data[0];
 		skb_pull(skb, 1);
 	}
@@ -1242,7 +1245,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
 		len = rc;
 		if (rc < 0)
 			kfree_skb(skb);
-		else if (x25->qbitincl)
+		else if (test_bit(X25_Q_BIT_FLAG, &x25->flags))
 			len++;
 	}
 
@@ -1307,7 +1310,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		/*
 		 *	No Q bit information on Interrupt data.
 		 */
-		if (x25->qbitincl) {
+		if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
 			asmptr  = skb_push(skb, 1);
 			*asmptr = 0x00;
 		}
@@ -1325,7 +1328,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		skb_pull(skb, x25->neighbour->extended ?
 				X25_EXT_MIN_LEN : X25_STD_MIN_LEN);
 
-		if (x25->qbitincl) {
+		if (test_bit(X25_Q_BIT_FLAG, &x25->flags)) {
 			asmptr  = skb_push(skb, 1);
 			*asmptr = qbit;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From b7792e34cba641c49cd436d42fbfd2a632ff39d3 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sun, 16 May 2010 23:00:02 +0000
Subject: X25: Move interrupt flag to bitfield

Moves the x25 interrupt flag from char into bitfield.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/x25.h | 3 ++-
 net/x25/af_x25.c  | 1 +
 net/x25/x25_in.c  | 2 +-
 net/x25/x25_out.c | 5 +++--
 4 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/include/net/x25.h b/include/net/x25.h
index 7b5795e1ceb2..1576e92c6cec 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -115,6 +115,7 @@ enum {
 
 /* Bitset in x25_sock->flags for misc flags */
 #define X25_Q_BIT_FLAG		0
+#define X25_INTERRUPT_FLAG	1
 
 /**
  *	struct x25_route - x25 routing entry
@@ -149,7 +150,7 @@ struct x25_sock {
 	struct x25_address	source_addr, dest_addr;
 	struct x25_neigh	*neighbour;
 	unsigned int		lci, cudmatchlength;
-	unsigned char		state, condition, intflag, accptapprv;
+	unsigned char		state, condition, accptapprv;
 	unsigned short		vs, vr, va, vl;
 	unsigned long		t2, t21, t22, t23;
 	unsigned short		fraglen;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 720534ce1fd9..3d97b8caf0b1 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -641,6 +641,7 @@ static struct sock *x25_make_new(struct sock *osk)
 	x25->cudmatchlength = ox25->cudmatchlength;
 	x25->accptapprv = ox25->accptapprv;
 
+	clear_bit(X25_INTERRUPT_FLAG, &x25->flags);
 	x25_init_timers(sk);
 out:
 	return sk;
diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c
index 372ac226e648..63178961efac 100644
--- a/net/x25/x25_in.c
+++ b/net/x25/x25_in.c
@@ -273,7 +273,7 @@ static int x25_state3_machine(struct sock *sk, struct sk_buff *skb, int frametyp
 			break;
 
 		case X25_INTERRUPT_CONFIRMATION:
-			x25->intflag = 0;
+			clear_bit(X25_INTERRUPT_FLAG, &x25->flags);
 			break;
 
 		case X25_INTERRUPT:
diff --git a/net/x25/x25_out.c b/net/x25/x25_out.c
index 52351a26b6fc..d00649fb251d 100644
--- a/net/x25/x25_out.c
+++ b/net/x25/x25_out.c
@@ -148,8 +148,9 @@ void x25_kick(struct sock *sk)
 	/*
 	 *	Transmit interrupt data.
 	 */
-	if (!x25->intflag && skb_peek(&x25->interrupt_out_queue) != NULL) {
-		x25->intflag = 1;
+	if (skb_peek(&x25->interrupt_out_queue) != NULL &&
+		!test_and_set_bit(X25_INTERRUPT_FLAG, &x25->flags)) {
+
 		skb = skb_dequeue(&x25->interrupt_out_queue);
 		x25_transmit_link(skb, x25->neighbour);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 37cda78741ecdbf45dd9b64e4c99dbdb11b47b46 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sun, 16 May 2010 23:00:27 +0000
Subject: X25: Move accept approve flag to bitfield

Moves the x25 accept approve flag from char into bitfield.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/x25.h |  5 ++---
 net/x25/af_x25.c  | 12 ++++++------
 2 files changed, 8 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/x25.h b/include/net/x25.h
index 1576e92c6cec..1479cb4a41fc 100644
--- a/include/net/x25.h
+++ b/include/net/x25.h
@@ -80,8 +80,6 @@ enum {
 #define	X25_DEFAULT_PACKET_SIZE	X25_PS128		/* Default Packet Size */
 #define	X25_DEFAULT_THROUGHPUT	0x0A			/* Deafult Throughput */
 #define	X25_DEFAULT_REVERSE	0x00			/* Default Reverse Charging */
-#define X25_DENY_ACCPT_APPRV   0x01			/* Default value */
-#define X25_ALLOW_ACCPT_APPRV  0x00			/* Control enabled */
 
 #define X25_SMODULUS 		8
 #define	X25_EMODULUS		128
@@ -116,6 +114,7 @@ enum {
 /* Bitset in x25_sock->flags for misc flags */
 #define X25_Q_BIT_FLAG		0
 #define X25_INTERRUPT_FLAG	1
+#define X25_ACCPT_APPRV_FLAG	2
 
 /**
  *	struct x25_route - x25 routing entry
@@ -150,7 +149,7 @@ struct x25_sock {
 	struct x25_address	source_addr, dest_addr;
 	struct x25_neigh	*neighbour;
 	unsigned int		lci, cudmatchlength;
-	unsigned char		state, condition, accptapprv;
+	unsigned char		state, condition;
 	unsigned short		vs, vr, va, vl;
 	unsigned long		t2, t21, t22, t23;
 	unsigned short		fraglen;
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 3d97b8caf0b1..e5c1e3298f8a 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -586,7 +586,7 @@ static int x25_create(struct net *net, struct socket *sock, int protocol,
 	x25->t2    = sysctl_x25_ack_holdback_timeout;
 	x25->state = X25_STATE_0;
 	x25->cudmatchlength = 0;
-	x25->accptapprv = X25_DENY_ACCPT_APPRV;		/* normally no cud  */
+	set_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);	/* normally no cud  */
 							/* on call accept   */
 
 	x25->facilities.winsize_in  = X25_DEFAULT_WINDOW_SIZE;
@@ -639,7 +639,6 @@ static struct sock *x25_make_new(struct sock *osk)
 	x25->facilities = ox25->facilities;
 	x25->dte_facilities = ox25->dte_facilities;
 	x25->cudmatchlength = ox25->cudmatchlength;
-	x25->accptapprv = ox25->accptapprv;
 
 	clear_bit(X25_INTERRUPT_FLAG, &x25->flags);
 	x25_init_timers(sk);
@@ -1057,8 +1056,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb,
 	makex25->vc_facil_mask &= ~X25_MASK_CALLING_AE;
 	makex25->cudmatchlength = x25_sk(sk)->cudmatchlength;
 
-	/* Normally all calls are accepted immediatly */
-	if(makex25->accptapprv & X25_DENY_ACCPT_APPRV) {
+	/* Normally all calls are accepted immediately */
+	if (test_bit(X25_ACCPT_APPRV_FLAG, &makex25->flags)) {
 		x25_write_internal(make, X25_CALL_ACCEPTED);
 		makex25->state = X25_STATE_3;
 	}
@@ -1580,7 +1579,7 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = -EINVAL;
 			if (sk->sk_state != TCP_CLOSE)
 				break;
-			x25->accptapprv = X25_ALLOW_ACCPT_APPRV;
+			clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
 			rc = 0;
 			break;
 		}
@@ -1589,7 +1588,8 @@ static int x25_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 			rc = -EINVAL;
 			if (sk->sk_state != TCP_ESTABLISHED)
 				break;
-			if (x25->accptapprv)	/* must call accptapprv above */
+			/* must call accptapprv above */
+			if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags))
 				break;
 			x25_write_internal(sk, X25_CALL_ACCEPTED);
 			x25->state = X25_STATE_3;
-- 
cgit v1.2.3-59-g8ed1b


From 935e2a26b85003c0bd52b6c92712c2f77a5f9d33 Mon Sep 17 00:00:00 2001
From: andrew hendry <andrew.hendry@gmail.com>
Date: Sun, 16 May 2010 23:00:35 +0000
Subject: X25: Remove bkl in sockopts

Removes the BKL in x25 setsock and getsockopts.

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/x25/af_x25.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index e5c1e3298f8a..5e86d4e97dce 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -453,7 +453,6 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
 	struct sock *sk = sock->sk;
 	int rc = -ENOPROTOOPT;
 
-	lock_kernel();
 	if (level != SOL_X25 || optname != X25_QBITINCL)
 		goto out;
 
@@ -471,7 +470,6 @@ static int x25_setsockopt(struct socket *sock, int level, int optname,
 		clear_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
 	rc = 0;
 out:
-	unlock_kernel();
 	return rc;
 }
 
@@ -481,7 +479,6 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
 	struct sock *sk = sock->sk;
 	int val, len, rc = -ENOPROTOOPT;
 
-	lock_kernel();
 	if (level != SOL_X25 || optname != X25_QBITINCL)
 		goto out;
 
@@ -502,7 +499,6 @@ static int x25_getsockopt(struct socket *sock, int level, int optname,
 	val = test_bit(X25_Q_BIT_FLAG, &x25_sk(sk)->flags);
 	rc = copy_to_user(optval, &val, len) ? -EFAULT : 0;
 out:
-	unlock_kernel();
 	return rc;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ccbd6a5a4f76e821ed36f69fdaf59817c3a7f18e Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 14 May 2010 10:58:26 +0000
Subject: net: Remove unnecessary semicolons after switch statements

Also added an explicit break; to avoid
a fallthrough in net/ipv4/tcp_input.c

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/8021q/vlan_core.c | 2 +-
 net/core/ethtool.c    | 4 ++--
 net/ipv4/tcp.c        | 2 +-
 net/ipv4/tcp_input.c  | 5 +++--
 net/rds/tcp_connect.c | 2 +-
 net/socket.c          | 2 +-
 net/xfrm/xfrm_hash.h  | 6 +++---
 7 files changed, 12 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index c584a0af77d3..bd537fc10254 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -61,7 +61,7 @@ int vlan_hwaccel_do_receive(struct sk_buff *skb)
 					dev->dev_addr))
 			skb->pkt_type = PACKET_HOST;
 		break;
-	};
+	}
 	return 0;
 }
 
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 1a7db92037fa..a0f4964033d2 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -522,7 +522,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			goto unknown_filter;
-		};
+		}
 
 		/* now the rest of the filters */
 		switch (fsc->fs.flow_type) {
@@ -646,7 +646,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
-		};
+		}
 		sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
 			fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
 		p += ETH_GSTRING_LEN;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3284393d09b4..6596b4feeddc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2215,7 +2215,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	default:
 		/* fallthru */
 		break;
-	};
+	}
 
 	if (optlen < sizeof(int))
 		return -EINVAL;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e82162c211bf..3e6dafcb1071 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3845,12 +3845,13 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
 					/* 16-bit multiple */
 					opt_rx->cookie_plus = opsize;
 					*hvpp = ptr;
+					break;
 				default:
 					/* ignore option */
 					break;
-				};
+				}
 				break;
-			};
+			}
 
 			ptr += opsize-2;
 			length -= opsize;
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 056256285987..c397524c039c 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -141,7 +141,7 @@ void rds_tcp_conn_shutdown(struct rds_connection *conn)
 
 		release_sock(sock->sk);
 		sock_release(sock);
-	};
+	}
 
 	if (tc->t_tinc) {
 		rds_inc_put(&tc->t_tinc->ti_inc);
diff --git a/net/socket.c b/net/socket.c
index dae8c6b84a09..f9f7d0872cac 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2615,7 +2615,7 @@ static int bond_ioctl(struct net *net, unsigned int cmd,
 		return dev_ioctl(net, cmd, uifr);
 	default:
 		return -EINVAL;
-	};
+	}
 }
 
 static int siocdevprivate_ioctl(struct net *net, unsigned int cmd,
diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h
index 1396572d2ade..8e69533d2313 100644
--- a/net/xfrm/xfrm_hash.h
+++ b/net/xfrm/xfrm_hash.h
@@ -55,7 +55,7 @@ static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
 	case AF_INET6:
 		h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
 		break;
-	};
+	}
 	return (h ^ (h >> 16)) & hmask;
 }
 
@@ -102,7 +102,7 @@ static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short
 
 		h = __xfrm6_daddr_saddr_hash(daddr, saddr);
 		break;
-	};
+	}
 	h ^= (h >> 16);
 	return h & hmask;
 }
@@ -119,7 +119,7 @@ static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *sa
 	case AF_INET6:
 		h = __xfrm6_daddr_saddr_hash(daddr, saddr);
 		break;
-	};
+	}
 	h ^= (h >> 16);
 	return h & hmask;
 }
-- 
cgit v1.2.3-59-g8ed1b


From f0cd15081a72075df16c45a2310e873fb9fcd82f Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Fri, 14 May 2010 14:38:59 +0000
Subject: tbf: stop wanton destruction of children (v2)

Several netem users use TBF for rate control. But every time the parameters
of TBF are changed it destroys the child qdisc, requiring reconfigation.
Better to just keep child qdisc and just notify it of changed limit.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_tbf.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 8fb8107ab188..0991c640cd3e 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -273,7 +273,11 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt)
 	if (max_size < 0)
 		goto done;
 
-	if (qopt->limit > 0) {
+	if (q->qdisc != &noop_qdisc) {
+		err = fifo_set_limit(q->qdisc, qopt->limit);
+		if (err)
+			goto done;
+	} else if (qopt->limit > 0) {
 		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
 		if (IS_ERR(child)) {
 			err = PTR_ERR(child);
-- 
cgit v1.2.3-59-g8ed1b


From a2f79227138c71e08627af5f8961197364edbc98 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 14 May 2010 08:08:14 +0000
Subject: net_sched: sch_hfsc: fix classification loops

When attaching filters to a class pointing to a class higher up in the
hierarchy, classification may enter an endless loop. Currently this is
prevented for filters that are already resolved, but not for filters
resolved at runtime.

Only allow filters to point downwards in the hierarchy, similar to what
CBQ does.

Reported-by: Pawel Staszewski <pstaszewski@itcare.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_hfsc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b38b39c60752..a435cf13cc27 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1155,7 +1155,7 @@ static struct hfsc_class *
 hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 {
 	struct hfsc_sched *q = qdisc_priv(sch);
-	struct hfsc_class *cl;
+	struct hfsc_class *head, *cl;
 	struct tcf_result res;
 	struct tcf_proto *tcf;
 	int result;
@@ -1166,6 +1166,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 			return cl;
 
 	*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+	head = &q->root;
 	tcf = q->root.filter_list;
 	while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
 #ifdef CONFIG_NET_CLS_ACT
@@ -1180,6 +1181,8 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 		if ((cl = (struct hfsc_class *)res.class) == NULL) {
 			if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
 				break; /* filter selected invalid classid */
+			if (cl->level >= head->level)
+				break; /* filter may only point downwards */
 		}
 
 		if (cl->level == 0)
@@ -1187,6 +1190,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 
 		/* apply inner filter chain */
 		tcf = cl->filter_list;
+		head = cl;
 	}
 
 	/* classification failed, try default class */
-- 
cgit v1.2.3-59-g8ed1b


From eedf042a63ffef050ebc015de19b52dc065e830b Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 17 May 2010 22:27:12 -0700
Subject: ipv6: fix the bug of address check

The duplicate address check code got broken in the conversion
to hlist (2.6.35).  The earlier patch did not fix the case where
two addresses match same hash value. Use two exit paths,
rather than depending on state of loop variables (from macro).

Based on earlier fix by Shan Wei.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Reviewed-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3984f52181f4..75d3b8c1e856 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1274,7 +1274,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 		  struct net_device *dev, int strict)
 {
-	struct inet6_ifaddr *ifp = NULL;
+	struct inet6_ifaddr *ifp;
 	struct hlist_node *node;
 	unsigned int hash = ipv6_addr_hash(addr);
 
@@ -1283,15 +1283,16 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
-		    !(ifp->flags&IFA_F_TENTATIVE)) {
-			if (dev == NULL || ifp->idev->dev == dev ||
-			    !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))
-				break;
+		    !(ifp->flags&IFA_F_TENTATIVE) &&
+		    (dev == NULL || ifp->idev->dev == dev ||
+		     !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
+			rcu_read_unlock_bh();
+			return 1;
 		}
 	}
-	rcu_read_unlock_bh();
 
-	return ifp != NULL;
+	rcu_read_unlock_bh();
+	return 0;
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
 
-- 
cgit v1.2.3-59-g8ed1b


From de213e5eedecdfb1b1eea7e6be28bc64cac5c078 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 17 May 2010 22:35:36 -0700
Subject: tcp: tcp_synack_options() fix

Commit 33ad798c924b4a (tcp: options clean up) introduced a problem
if MD5+SACK+timestamps were used in initial SYN message.

Some stacks (old linux for example) try to negotiate MD5+SACK+TSTAMP
sessions, but since 40 bytes of tcp options space are not enough to
store all the bits needed, we chose to disable timestamps in this case.

We send a SYN-ACK _without_ timestamp option, but socket has timestamps
enabled and all further outgoing messages contain a TS block, all with
the initial timestamp of the remote peer.

Fix is to really disable timestamps option for the whole session.

Reported-by: Bijay Singh <Bijay.Singh@guavus.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 18a3302480cb..b4ed957f201a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -668,7 +668,6 @@ static unsigned tcp_synack_options(struct sock *sk,
 	u8 cookie_plus = (xvp != NULL && !xvp->cookie_out_never) ?
 			 xvp->cookie_plus :
 			 0;
-	bool doing_ts = ireq->tstamp_ok;
 
 #ifdef CONFIG_TCP_MD5SIG
 	*md5 = tcp_rsk(req)->af_specific->md5_lookup(sk, req);
@@ -681,7 +680,7 @@ static unsigned tcp_synack_options(struct sock *sk,
 		 * rather than TS in order to fit in better with old,
 		 * buggy kernels, but that was deemed to be unnecessary.
 		 */
-		doing_ts &= !ireq->sack_ok;
+		ireq->tstamp_ok &= !ireq->sack_ok;
 	}
 #else
 	*md5 = NULL;
@@ -696,7 +695,7 @@ static unsigned tcp_synack_options(struct sock *sk,
 		opts->options |= OPTION_WSCALE;
 		remaining -= TCPOLEN_WSCALE_ALIGNED;
 	}
-	if (likely(doing_ts)) {
+	if (likely(ireq->tstamp_ok)) {
 		opts->options |= OPTION_TS;
 		opts->tsval = TCP_SKB_CB(skb)->when;
 		opts->tsecr = req->ts_recent;
@@ -704,7 +703,7 @@ static unsigned tcp_synack_options(struct sock *sk,
 	}
 	if (likely(ireq->sack_ok)) {
 		opts->options |= OPTION_SACK_ADVERTISE;
-		if (unlikely(!doing_ts))
+		if (unlikely(!ireq->tstamp_ok))
 			remaining -= TCPOLEN_SACKPERM_ALIGNED;
 	}
 
@@ -712,7 +711,7 @@ static unsigned tcp_synack_options(struct sock *sk,
 	 * If the <SYN> options fit, the same options should fit now!
 	 */
 	if (*md5 == NULL &&
-	    doing_ts &&
+	    ireq->tstamp_ok &&
 	    cookie_plus > TCPOLEN_COOKIE_BASE) {
 		int need = cookie_plus; /* has TCPOLEN_COOKIE_BASE */
 
-- 
cgit v1.2.3-59-g8ed1b


From d19d56ddc88e7895429ef118db9c83c7bbe3ce6a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 17 May 2010 22:36:55 -0700
Subject: net: Introduce skb_tunnel_rx() helper

skb rxhash should be cleared when a skb is handled by a tunnel before
being delivered again, so that correct packet steering can take place.

There are other cleanups and accounting that we can factorize in a new
helper, skb_tunnel_rx()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h     | 20 ++++++++++++++++++++
 net/ipv4/ip_gre.c     |  9 +--------
 net/ipv4/ipip.c       |  7 ++-----
 net/ipv4/ipmr.c       |  8 +++-----
 net/ipv6/ip6_tunnel.c |  8 ++------
 net/ipv6/ip6mr.c      |  8 +++-----
 net/ipv6/sit.c        |  8 +++-----
 7 files changed, 34 insertions(+), 34 deletions(-)

(limited to 'net')

diff --git a/include/net/dst.h b/include/net/dst.h
index 27207a13f2a6..612069beda73 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -226,6 +226,26 @@ static inline void skb_dst_force(struct sk_buff *skb)
 	}
 }
 
+
+/**
+ *	skb_tunnel_rx - prepare skb for rx reinsert
+ *	@skb: buffer
+ *	@dev: tunnel device
+ *
+ *	After decapsulation, packet is going to re-enter (netif_rx()) our stack,
+ *	so make some cleanups, and perform accounting.
+ */
+static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev)
+{
+	skb->dev = dev;
+	/* TODO : stats should be SMP safe */
+	dev->stats.rx_packets++;
+	dev->stats.rx_bytes += skb->len;
+	skb->rxhash = 0;
+	skb_dst_drop(skb);
+	nf_reset(skb);
+}
+
 /* Children define the path of the packet through the
  * Linux networking.  Thus, destinations are stackable.
  */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fe381d12ecdd..498cf69c7977 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -538,7 +538,6 @@ static int ipgre_rcv(struct sk_buff *skb)
 	struct ip_tunnel *tunnel;
 	int    offset = 4;
 	__be16 gre_proto;
-	unsigned int len;
 
 	if (!pskb_may_pull(skb, 16))
 		goto drop_nolock;
@@ -629,8 +628,6 @@ static int ipgre_rcv(struct sk_buff *skb)
 			tunnel->i_seqno = seqno + 1;
 		}
 
-		len = skb->len;
-
 		/* Warning: All skb pointers will be invalidated! */
 		if (tunnel->dev->type == ARPHRD_ETHER) {
 			if (!pskb_may_pull(skb, ETH_HLEN)) {
@@ -644,11 +641,7 @@ static int ipgre_rcv(struct sk_buff *skb)
 			skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
 		}
 
-		stats->rx_packets++;
-		stats->rx_bytes += len;
-		skb->dev = tunnel->dev;
-		skb_dst_drop(skb);
-		nf_reset(skb);
+		skb_tunnel_rx(skb, tunnel->dev);
 
 		skb_reset_network_header(skb);
 		ipgre_ecn_decapsulate(iph, skb);
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 0b27b14dcc9d..7fd636711037 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -374,11 +374,8 @@ static int ipip_rcv(struct sk_buff *skb)
 		skb->protocol = htons(ETH_P_IP);
 		skb->pkt_type = PACKET_HOST;
 
-		tunnel->dev->stats.rx_packets++;
-		tunnel->dev->stats.rx_bytes += skb->len;
-		skb->dev = tunnel->dev;
-		skb_dst_drop(skb);
-		nf_reset(skb);
+		skb_tunnel_rx(skb, tunnel->dev);
+
 		ipip_ecn_decapsulate(iph, skb);
 		netif_rx(skb);
 		rcu_read_unlock();
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7a7ee1cc3b5a..217ebe035b34 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1831,14 +1831,12 @@ static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
 	skb->mac_header = skb->network_header;
 	skb_pull(skb, (u8*)encap - skb->data);
 	skb_reset_network_header(skb);
-	skb->dev = reg_dev;
 	skb->protocol = htons(ETH_P_IP);
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
-	skb_dst_drop(skb);
-	reg_dev->stats.rx_bytes += skb->len;
-	reg_dev->stats.rx_packets++;
-	nf_reset(skb);
+
+	skb_tunnel_rx(skb, reg_dev);
+
 	netif_rx(skb);
 	dev_put(reg_dev);
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2599870747ec..8f39893d8081 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -723,14 +723,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,
 		skb->protocol = htons(protocol);
 		skb->pkt_type = PACKET_HOST;
 		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
-		skb->dev = t->dev;
-		skb_dst_drop(skb);
-		nf_reset(skb);
 
-		dscp_ecn_decapsulate(t, ipv6h, skb);
+		skb_tunnel_rx(skb, t->dev);
 
-		t->dev->stats.rx_packets++;
-		t->dev->stats.rx_bytes += skb->len;
+		dscp_ecn_decapsulate(t, ipv6h, skb);
 		netif_rx(skb);
 		rcu_read_unlock();
 		return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 163850e22b11..bd9e7d3e9c8e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -658,14 +658,12 @@ static int pim6_rcv(struct sk_buff *skb)
 	skb->mac_header = skb->network_header;
 	skb_pull(skb, (u8 *)encap - skb->data);
 	skb_reset_network_header(skb);
-	skb->dev = reg_dev;
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->ip_summed = 0;
 	skb->pkt_type = PACKET_HOST;
-	skb_dst_drop(skb);
-	reg_dev->stats.rx_bytes += skb->len;
-	reg_dev->stats.rx_packets++;
-	nf_reset(skb);
+
+	skb_tunnel_rx(skb, reg_dev);
+
 	netif_rx(skb);
 	dev_put(reg_dev);
 	return 0;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 5abae10cd884..e51e650ea80b 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -566,11 +566,9 @@ static int ipip6_rcv(struct sk_buff *skb)
 			kfree_skb(skb);
 			return 0;
 		}
-		tunnel->dev->stats.rx_packets++;
-		tunnel->dev->stats.rx_bytes += skb->len;
-		skb->dev = tunnel->dev;
-		skb_dst_drop(skb);
-		nf_reset(skb);
+
+		skb_tunnel_rx(skb, tunnel->dev);
+
 		ipip6_ecn_decapsulate(iph, skb);
 		netif_rx(skb);
 		rcu_read_unlock();
-- 
cgit v1.2.3-59-g8ed1b


From 57b610805ce92dbd79fc97509f80fa5391b99623 Mon Sep 17 00:00:00 2001
From: Scott Feldman <scofeldm@cisco.com>
Date: Mon, 17 May 2010 22:49:55 -0700
Subject: net: Add netlink support for virtual port management (was iovnl)

Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface.  Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list).  These are both nested atrtibutes
using this layout:

              [IFLA_NUM_VF]
              [IFLA_VF_PORTS]
                      [IFLA_VF_PORT]
                              [IFLA_PORT_*], ...
                      [IFLA_VF_PORT]
                              [IFLA_PORT_*], ...
                      ...
              [IFLA_PORT_SELF]
                      [IFLA_PORT_*], ...

These attributes are design to be set and get symmetrically.  VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device.  PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.

A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev.  A port-profile is an identifier known to the switch.  How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.

There are two types of port-profiles specs in the netlink msg.  The first spec
is for 802.1Qbg (pre-)standard, VDP protocol.  The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details.  In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs.  For example, both specs
have a notition of associating/deassociating a port-profile.  And both specs
require some information from the hypervisor manager, such as client port
instance ID.

The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile.  What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h   |  75 ++++++++++++++++++++
 include/linux/netdevice.h |   8 +++
 net/core/rtnetlink.c      | 169 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 251 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c3af67fce3f2..85c812db5a3f 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -113,6 +113,8 @@ enum {
 	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
 	IFLA_VFINFO_LIST,
 	IFLA_STATS64,
+	IFLA_VF_PORTS,
+	IFLA_PORT_SELF,
 	__IFLA_MAX
 };
 
@@ -274,4 +276,77 @@ struct ifla_vf_info {
 	__u32 qos;
 	__u32 tx_rate;
 };
+
+/* VF ports management section
+ *
+ *	Nested layout of set/get msg is:
+ *
+ *		[IFLA_NUM_VF]
+ *		[IFLA_VF_PORTS]
+ *			[IFLA_VF_PORT]
+ *				[IFLA_PORT_*], ...
+ *			[IFLA_VF_PORT]
+ *				[IFLA_PORT_*], ...
+ *			...
+ *		[IFLA_PORT_SELF]
+ *			[IFLA_PORT_*], ...
+ */
+
+enum {
+	IFLA_VF_PORT_UNSPEC,
+	IFLA_VF_PORT,			/* nest */
+	__IFLA_VF_PORT_MAX,
+};
+
+#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1)
+
+enum {
+	IFLA_PORT_UNSPEC,
+	IFLA_PORT_VF,			/* __u32 */
+	IFLA_PORT_PROFILE,		/* string */
+	IFLA_PORT_VSI_TYPE,		/* 802.1Qbg (pre-)standard VDP */
+	IFLA_PORT_INSTANCE_UUID,	/* binary UUID */
+	IFLA_PORT_HOST_UUID,		/* binary UUID */
+	IFLA_PORT_REQUEST,		/* __u8 */
+	IFLA_PORT_RESPONSE,		/* __u16, output only */
+	__IFLA_PORT_MAX,
+};
+
+#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1)
+
+#define PORT_PROFILE_MAX	40
+#define PORT_UUID_MAX		16
+#define PORT_SELF_VF		-1
+
+enum {
+	PORT_REQUEST_PREASSOCIATE = 0,
+	PORT_REQUEST_PREASSOCIATE_RR,
+	PORT_REQUEST_ASSOCIATE,
+	PORT_REQUEST_DISASSOCIATE,
+};
+
+enum {
+	PORT_VDP_RESPONSE_SUCCESS = 0,
+	PORT_VDP_RESPONSE_INVALID_FORMAT,
+	PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES,
+	PORT_VDP_RESPONSE_UNUSED_VTID,
+	PORT_VDP_RESPONSE_VTID_VIOLATION,
+	PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION,
+	PORT_VDP_RESPONSE_OUT_OF_SYNC,
+	/* 0x08-0xFF reserved for future VDP use */
+	PORT_PROFILE_RESPONSE_SUCCESS = 0x100,
+	PORT_PROFILE_RESPONSE_INPROGRESS,
+	PORT_PROFILE_RESPONSE_INVALID,
+	PORT_PROFILE_RESPONSE_BADSTATE,
+	PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES,
+	PORT_PROFILE_RESPONSE_ERROR,
+};
+
+struct ifla_port_vsi {
+	__u8 vsi_mgr_id;
+	__u8 vsi_type_id[3];
+	__u8 vsi_type_version;
+	__u8 pad[3];
+};
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c1b2341897c2..c3487a6bdf99 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -686,6 +686,9 @@ struct netdev_rx_queue {
  * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
+ * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
+ *			  struct nlattr *port[]);
+ * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
  */
 #define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
@@ -735,6 +738,11 @@ struct net_device_ops {
 	int			(*ndo_get_vf_config)(struct net_device *dev,
 						     int vf,
 						     struct ifla_vf_info *ivf);
+	int			(*ndo_set_vf_port)(struct net_device *dev,
+						   int vf,
+						   struct nlattr *port[]);
+	int			(*ndo_get_vf_port)(struct net_device *dev,
+						   int vf, struct sk_buff *skb);
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 66db1201da9b..e4b9870e4706 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -660,6 +660,31 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
 		return 0;
 }
 
+static size_t rtnl_port_size(const struct net_device *dev)
+{
+	size_t port_size = nla_total_size(4)		/* PORT_VF */
+		+ nla_total_size(PORT_PROFILE_MAX)	/* PORT_PROFILE */
+		+ nla_total_size(sizeof(struct ifla_port_vsi))
+							/* PORT_VSI_TYPE */
+		+ nla_total_size(PORT_UUID_MAX)		/* PORT_INSTANCE_UUID */
+		+ nla_total_size(PORT_UUID_MAX)		/* PORT_HOST_UUID */
+		+ nla_total_size(1)			/* PROT_VDP_REQUEST */
+		+ nla_total_size(2);			/* PORT_VDP_RESPONSE */
+	size_t vf_ports_size = nla_total_size(sizeof(struct nlattr));
+	size_t vf_port_size = nla_total_size(sizeof(struct nlattr))
+		+ port_size;
+	size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+		+ port_size;
+
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+		return 0;
+	if (dev_num_vf(dev->dev.parent))
+		return port_self_size + vf_ports_size +
+			vf_port_size * dev_num_vf(dev->dev.parent);
+	else
+		return port_self_size;
+}
+
 static inline size_t if_nlmsg_size(const struct net_device *dev)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -680,9 +705,82 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
 }
 
+static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *vf_ports;
+	struct nlattr *vf_port;
+	int vf;
+	int err;
+
+	vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
+	if (!vf_ports)
+		return -EMSGSIZE;
+
+	for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
+		vf_port = nla_nest_start(skb, IFLA_VF_PORT);
+		if (!vf_port) {
+			nla_nest_cancel(skb, vf_ports);
+			return -EMSGSIZE;
+		}
+		NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
+		err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
+		if (err) {
+nla_put_failure:
+			nla_nest_cancel(skb, vf_port);
+			continue;
+		}
+		nla_nest_end(skb, vf_port);
+	}
+
+	nla_nest_end(skb, vf_ports);
+
+	return 0;
+}
+
+static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *port_self;
+	int err;
+
+	port_self = nla_nest_start(skb, IFLA_PORT_SELF);
+	if (!port_self)
+		return -EMSGSIZE;
+
+	err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
+	if (err) {
+		nla_nest_cancel(skb, port_self);
+		return err;
+	}
+
+	nla_nest_end(skb, port_self);
+
+	return 0;
+}
+
+static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	int err;
+
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+		return 0;
+
+	err = rtnl_port_self_fill(skb, dev);
+	if (err)
+		return err;
+
+	if (dev_num_vf(dev->dev.parent)) {
+		err = rtnl_vf_ports_fill(skb, dev);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags)
@@ -754,13 +852,15 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		goto nla_put_failure;
 	copy_rtnl_link_stats64(nla_data(attr), stats);
 
+	if (dev->dev.parent)
+		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
+
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
 
 		struct nlattr *vfinfo, *vf;
 		int num_vfs = dev_num_vf(dev->dev.parent);
 
-		NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
 		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
 		if (!vfinfo)
 			goto nla_put_failure;
@@ -788,6 +888,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		}
 		nla_nest_end(skb, vfinfo);
 	}
+
+	if (rtnl_port_fill(skb, dev))
+		goto nla_put_failure;
+
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;
@@ -849,6 +953,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
 	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
+	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
+	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -870,6 +976,20 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 				    .len = sizeof(struct ifla_vf_tx_rate) },
 };
 
+static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
+	[IFLA_PORT_VF]		= { .type = NLA_U32 },
+	[IFLA_PORT_PROFILE]	= { .type = NLA_STRING,
+				    .len = PORT_PROFILE_MAX },
+	[IFLA_PORT_VSI_TYPE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_port_vsi)},
+	[IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
+				      .len = PORT_UUID_MAX },
+	[IFLA_PORT_HOST_UUID]	= { .type = NLA_STRING,
+				    .len = PORT_UUID_MAX },
+	[IFLA_PORT_REQUEST]	= { .type = NLA_U8, },
+	[IFLA_PORT_RESPONSE]	= { .type = NLA_U16, },
+};
+
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
 	struct net *net;
@@ -1089,6 +1209,53 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	}
 	err = 0;
 
+	if (tb[IFLA_VF_PORTS]) {
+		struct nlattr *port[IFLA_PORT_MAX+1];
+		struct nlattr *attr;
+		int vf;
+		int rem;
+
+		err = -EOPNOTSUPP;
+		if (!ops->ndo_set_vf_port)
+			goto errout;
+
+		nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
+			if (nla_type(attr) != IFLA_VF_PORT)
+				continue;
+			err = nla_parse_nested(port, IFLA_PORT_MAX,
+				attr, ifla_port_policy);
+			if (err < 0)
+				goto errout;
+			if (!port[IFLA_PORT_VF]) {
+				err = -EOPNOTSUPP;
+				goto errout;
+			}
+			vf = nla_get_u32(port[IFLA_PORT_VF]);
+			err = ops->ndo_set_vf_port(dev, vf, port);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
+	}
+	err = 0;
+
+	if (tb[IFLA_PORT_SELF]) {
+		struct nlattr *port[IFLA_PORT_MAX+1];
+
+		err = nla_parse_nested(port, IFLA_PORT_MAX,
+			tb[IFLA_PORT_SELF], ifla_port_policy);
+		if (err < 0)
+			goto errout;
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_port)
+			err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+	err = 0;
+
 errout:
 	if (err < 0 && modified && net_ratelimit())
 		printk(KERN_WARNING "A link change request failed with "
-- 
cgit v1.2.3-59-g8ed1b


From 2e3219b5c8a2e44e0b83ae6e04f52f20a82ac0f2 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <yjwei@cn.fujitsu.com>
Date: Mon, 17 May 2010 22:51:58 -0700
Subject: sctp: fix append error cause to ERROR chunk correctly

commit 5fa782c2f5ef6c2e4f04d3e228412c9b4a4c8809
  sctp: Fix skb_over_panic resulting from multiple invalid \
    parameter errors (CVE-2010-1173) (v4)

cause 'error cause' never be add the the ERROR chunk due to
some typo when check valid length in sctp_init_cause_fixed().

Signed-off-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Reviewed-by: Neil Horman <nhorman@tuxdriver.com>
Acked-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/sm_make_chunk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index d8261f3d7715..bd2a50b482ac 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -141,7 +141,7 @@ int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code,
 	len = sizeof(sctp_errhdr_t) + paylen;
 	err.length  = htons(len);
 
-	if (skb_tailroom(chunk->skb) >  len)
+	if (skb_tailroom(chunk->skb) < len)
 		return -ENOSPC;
 	chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk,
 						     sizeof(sctp_errhdr_t),
@@ -1415,7 +1415,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data)
 void *sctp_addto_chunk_fixed(struct sctp_chunk *chunk,
 			     int len, const void *data)
 {
-	if (skb_tailroom(chunk->skb) > len)
+	if (skb_tailroom(chunk->skb) >= len)
 		return sctp_addto_chunk(chunk, len, data);
 	else
 		return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 49afa55b5bd792cda4fca15d3238e9b6f370c856 Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Thu, 13 May 2010 10:03:32 +0000
Subject: net/caif: Use kzalloc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use kzalloc rather than the combination of kmalloc and memset.

A simplified version of the semantic patch that makes this change is as
follows: (http://coccinelle.lip6.fr/)

// <smpl>
@@
expression x,size,flags;
statement S;
@@

-x = kmalloc(size,flags);
+x = kzalloc(size,flags);
 if (x == NULL) S
-memset(x, 0, size);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Acked-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/cfcnfg.c | 3 +--
 net/caif/cfctrl.c | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/caif/cfcnfg.c b/net/caif/cfcnfg.c
index 471c62939fad..df43f264d9fb 100644
--- a/net/caif/cfcnfg.c
+++ b/net/caif/cfcnfg.c
@@ -65,12 +65,11 @@ struct cfcnfg *cfcnfg_create(void)
 	struct cfcnfg *this;
 	struct cfctrl_rsp *resp;
 	/* Initiate this layer */
-	this = kmalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
+	this = kzalloc(sizeof(struct cfcnfg), GFP_ATOMIC);
 	if (!this) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return NULL;
 	}
-	memset(this, 0, sizeof(struct cfcnfg));
 	this->mux = cfmuxl_create();
 	if (!this->mux)
 		goto out_of_mem;
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index a521d32cfe56..0ffe1e1ce901 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -284,12 +284,11 @@ int cfctrl_linkup_request(struct cflayer *layer,
 			   __func__, param->linktype);
 		return -EINVAL;
 	}
-	req = kmalloc(sizeof(*req), GFP_KERNEL);
+	req = kzalloc(sizeof(*req), GFP_KERNEL);
 	if (!req) {
 		pr_warning("CAIF: %s(): Out of memory\n", __func__);
 		return -ENOMEM;
 	}
-	memset(req, 0, sizeof(*req));
 	req->client_layer = user_layer;
 	req->cmd = CFCTRL_CMD_LINK_SETUP;
 	req->param = *param;
-- 
cgit v1.2.3-59-g8ed1b


From 6ff9c3644e72bfac20844e0155c2cc8108602820 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 12 May 2010 06:37:05 +0000
Subject: net sched: printk message severity

The previous patch encourage me to go look at all the messages in
the network scheduler and fix them. Many messages were missing
any severity level. Some serious ones that should never happen
were turned into WARN(), and the random noise messages that were
handled changed to pr_debug().

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_api.c    | 20 +++++++++++---------
 net/sched/act_gact.c   |  4 ++--
 net/sched/act_ipt.c    |  3 ++-
 net/sched/act_mirred.c |  6 +++---
 net/sched/act_pedit.c  | 11 ++++++-----
 net/sched/act_simple.c |  4 ++--
 net/sched/cls_u32.c    | 10 +++++-----
 net/sched/ematch.c     |  3 ++-
 8 files changed, 33 insertions(+), 28 deletions(-)

(limited to 'net')

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 019045174fc3..972378f47f3c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -153,7 +153,7 @@ int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
 	} else if (type == RTM_GETACTION) {
 		return tcf_dump_walker(skb, cb, a, hinfo);
 	} else {
-		printk("tcf_generic_walker: unknown action %d\n", type);
+		WARN(1, "tcf_generic_walker: unknown action %d\n", type);
 		return -EINVAL;
 	}
 }
@@ -403,8 +403,9 @@ void tcf_action_destroy(struct tc_action *act, int bind)
 				module_put(a->ops->owner);
 			act = act->next;
 			kfree(a);
-		} else { /*FIXME: Remove later - catch insertion bugs*/
-			printk("tcf_action_destroy: BUG? destroying NULL ops\n");
+		} else {
+			/*FIXME: Remove later - catch insertion bugs*/
+			WARN(1, "tcf_action_destroy: BUG? destroying NULL ops\n");
 			act = act->next;
 			kfree(a);
 		}
@@ -744,7 +745,7 @@ static struct tc_action *create_a(int i)
 
 	act = kzalloc(sizeof(*act), GFP_KERNEL);
 	if (act == NULL) {
-		printk("create_a: failed to alloc!\n");
+		pr_debug("create_a: failed to alloc!\n");
 		return NULL;
 	}
 	act->order = i;
@@ -766,13 +767,13 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
 	int err = -ENOMEM;
 
 	if (a == NULL) {
-		printk("tca_action_flush: couldnt create tc_action\n");
+		pr_debug("tca_action_flush: couldnt create tc_action\n");
 		return err;
 	}
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb) {
-		printk("tca_action_flush: failed skb alloc\n");
+		pr_debug("tca_action_flush: failed skb alloc\n");
 		kfree(a);
 		return err;
 	}
@@ -979,7 +980,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
 		return ret;
 
 	if (tca[TCA_ACT_TAB] == NULL) {
-		printk("tc_ctl_action: received NO action attribs\n");
+		pr_notice("tc_ctl_action: received NO action attribs\n");
 		return -EINVAL;
 	}
 
@@ -1056,7 +1057,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct nlattr *kind = find_dump_kind(cb->nlh);
 
 	if (kind == NULL) {
-		printk("tc_dump_action: action bad kind\n");
+		pr_info("tc_dump_action: action bad kind\n");
 		return 0;
 	}
 
@@ -1069,7 +1070,8 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	a.ops = a_o;
 
 	if (a_o->walk == NULL) {
-		printk("tc_dump_action: %s !capable of dumping table\n", a_o->kind);
+		WARN(1, "tc_dump_action: %s !capable of dumping table\n",
+		     a_o->kind);
 		goto nla_put_failure;
 	}
 
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index e7f796aec657..8406c6654990 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -202,9 +202,9 @@ MODULE_LICENSE("GPL");
 static int __init gact_init_module(void)
 {
 #ifdef CONFIG_GACT_PROB
-	printk("GACT probability on\n");
+	printk(KERN_INFO "GACT probability on\n");
 #else
-	printk("GACT probability NOT on\n");
+	printk(KERN_INFO "GACT probability NOT on\n");
 #endif
 	return tcf_register_action(&act_gact_ops);
 }
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 1f9595467c17..c7e59e6ec349 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -235,7 +235,8 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 		break;
 	default:
 		if (net_ratelimit())
-			printk("Bogus netfilter code %d assume ACCEPT\n", ret);
+			pr_notice("tc filter: Bogus netfilter code"
+				  " %d assume ACCEPT\n", ret);
 		result = TC_POLICE_OK;
 		break;
 	}
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c046682054eb..c0b6863e3b87 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -164,8 +164,8 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a,
 	dev = m->tcfm_dev;
 	if (!(dev->flags & IFF_UP)) {
 		if (net_ratelimit())
-			printk("mirred to Houston: device %s is gone!\n",
-			       dev->name);
+			pr_notice("tc mirred to Houston: device %s is gone!\n",
+				  dev->name);
 		goto out;
 	}
 
@@ -252,7 +252,7 @@ MODULE_LICENSE("GPL");
 
 static int __init mirred_init_module(void)
 {
-	printk("Mirror/redirect action on\n");
+	pr_info("Mirror/redirect action on\n");
 	return tcf_register_action(&act_mirred_ops);
 }
 
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index b7dcfedc802e..fdbd0b7bd840 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -158,11 +158,13 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 			}
 
 			if (offset % 4) {
-				printk("offset must be on 32 bit boundaries\n");
+				pr_info("tc filter pedit"
+					" offset must be on 32 bit boundaries\n");
 				goto bad;
 			}
 			if (offset > 0 && offset > skb->len) {
-				printk("offset %d cant exceed pkt length %d\n",
+				pr_info("tc filter pedit"
+					" offset %d cant exceed pkt length %d\n",
 				       offset, skb->len);
 				goto bad;
 			}
@@ -176,9 +178,8 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a,
 		if (munged)
 			skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
 		goto done;
-	} else {
-		printk("pedit BUG: index %d\n", p->tcf_index);
-	}
+	} else
+		WARN(1, "pedit BUG: index %d\n", p->tcf_index);
 
 bad:
 	p->tcf_qstats.overlimits++;
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 622ca809c15c..1b4bc691d7d1 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -49,7 +49,7 @@ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result
 	 * Example if this was the 3rd packet and the string was "hello"
 	 * then it would look like "hello_3" (without quotes)
 	 **/
-	printk("simple: %s_%d\n",
+	pr_info("simple: %s_%d\n",
 	       (char *)d->tcfd_defdata, d->tcf_bstats.packets);
 	spin_unlock(&d->tcf_lock);
 	return d->tcf_action;
@@ -205,7 +205,7 @@ static int __init simp_init_module(void)
 {
 	int ret = tcf_register_action(&act_simp_ops);
 	if (!ret)
-		printk("Simple TC action Loaded\n");
+		pr_info("Simple TC action Loaded\n");
 	return ret;
 }
 
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 593eac056e8d..96275422c619 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -211,7 +211,7 @@ check_terminal:
 
 deadloop:
 	if (net_ratelimit())
-		printk("cls_u32: dead loop\n");
+		printk(KERN_WARNING "cls_u32: dead loop\n");
 	return -1;
 }
 
@@ -768,15 +768,15 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = {
 
 static int __init init_u32(void)
 {
-	printk("u32 classifier\n");
+	pr_info("u32 classifier\n");
 #ifdef CONFIG_CLS_U32_PERF
-	printk("    Performance counters on\n");
+	pr_info("    Performance counters on\n");
 #endif
 #ifdef CONFIG_NET_CLS_IND
-	printk("    input device check on\n");
+	pr_info("    input device check on\n");
 #endif
 #ifdef CONFIG_NET_CLS_ACT
-	printk("    Actions configured\n");
+	pr_info("    Actions configured\n");
 #endif
 	return register_tcf_proto_ops(&cls_u32_ops);
 }
diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index e782bdeedc58..5e37da961f80 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -527,7 +527,8 @@ pop_stack:
 
 stack_overflow:
 	if (net_ratelimit())
-		printk("Local stack overflow, increase NET_EMATCH_STACK\n");
+		printk(KERN_WARNING "tc ematch: local stack overflow,"
+			" increase NET_EMATCH_STACK\n");
 	return -1;
 }
 EXPORT_SYMBOL(__tcf_em_tree_match);
-- 
cgit v1.2.3-59-g8ed1b


From 62db5cfd70b1ef53aa21f144a806fe3b78c84fab Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 12 May 2010 06:37:06 +0000
Subject: xfrm: add severity to printk

Serious oh sh*t messages converted to WARN().
Add KERN_NOTICE severity to the unknown policy type messages.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/xfrm/xfrm_user.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index a267fbdda525..ba59983aaffe 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1783,7 +1783,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	} else {
 		// reset the timers here?
-		printk("Dont know what to do with soft policy expire\n");
+		WARN(1, "Dont know what to do with soft policy expire\n");
 	}
 	km_policy_expired(xp, p->dir, up->hard, current->pid);
 
@@ -1883,7 +1883,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return 0;
 
 bad_policy:
-	printk("BAD policy passed\n");
+	WARN(1, "BAD policy passed\n");
 free_state:
 	kfree(x);
 nomem:
@@ -2385,8 +2385,9 @@ static int xfrm_send_state_notify(struct xfrm_state *x, struct km_event *c)
 	case XFRM_MSG_FLUSHSA:
 		return xfrm_notify_sa_flush(c);
 	default:
-		 printk("xfrm_user: Unknown SA event %d\n", c->event);
-		 break;
+		printk(KERN_NOTICE "xfrm_user: Unknown SA event %d\n",
+		       c->event);
+		break;
 	}
 
 	return 0;
@@ -2676,7 +2677,8 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev
 	case XFRM_MSG_POLEXPIRE:
 		return xfrm_exp_policy_notify(xp, dir, c);
 	default:
-		printk("xfrm_user: Unknown Policy event %d\n", c->event);
+		printk(KERN_NOTICE "xfrm_user: Unknown Policy event %d\n",
+		       c->event);
 	}
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 207024b9477e253859abfc30f1ff314cd6008b24 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 12 May 2010 06:37:07 +0000
Subject: pfkey: add severity to printk

Put severity level on pfkey printk messages

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/key/af_key.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/key/af_key.c b/net/key/af_key.c
index ba9a3fcc2fed..43040e97c474 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -99,7 +99,7 @@ static void pfkey_sock_destruct(struct sock *sk)
 	skb_queue_purge(&sk->sk_receive_queue);
 
 	if (!sock_flag(sk, SOCK_DEAD)) {
-		printk("Attempt to release alive pfkey socket: %p\n", sk);
+		pr_err("Attempt to release alive pfkey socket: %p\n", sk);
 		return;
 	}
 
@@ -1402,7 +1402,7 @@ static inline int event2poltype(int event)
 	case XFRM_MSG_POLEXPIRE:
 	//	return SADB_X_SPDEXPIRE;
 	default:
-		printk("pfkey: Unknown policy event %d\n", event);
+		pr_err("pfkey: Unknown policy event %d\n", event);
 		break;
 	}
 
@@ -1421,7 +1421,7 @@ static inline int event2keytype(int event)
 	case XFRM_MSG_EXPIRE:
 		return SADB_EXPIRE;
 	default:
-		printk("pfkey: Unknown SA event %d\n", event);
+		pr_err("pfkey: Unknown SA event %d\n", event);
 		break;
 	}
 
@@ -2969,7 +2969,7 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c)
 	case XFRM_MSG_NEWAE: /* not yet supported */
 		break;
 	default:
-		printk("pfkey: Unknown SA event %d\n", c->event);
+		pr_err("pfkey: Unknown SA event %d\n", c->event);
 		break;
 	}
 
@@ -2993,7 +2993,7 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e
 			break;
 		return key_notify_policy_flush(c);
 	default:
-		printk("pfkey: Unknown policy event %d\n", c->event);
+		pr_err("pfkey: Unknown policy event %d\n", c->event);
 		break;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From b60b6592baa69c43a5a0f55d6300a7feaab15338 Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Tue, 11 May 2010 14:24:12 +0000
Subject: net sched: cleanup and rate limit warning

If the user has a bad classification configuration, and gets a packet
that goes through too many steps. Chances are more packets will arrive,
and the message spew will overrun syslog because it is not rate limited.
And because it is not tagged with appropriate priority it can't not be screened.

Added the qdisc to the message to try and give some more context when
the message does arrive.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_api.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 9839b26674f4..fe35c1f338c2 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1637,9 +1637,12 @@ reclassify:
 		tp = otp;
 
 		if (verd++ >= MAX_REC_LOOP) {
-			printk("rule prio %u protocol %02x reclassify loop, "
-			       "packet dropped\n",
-			       tp->prio&0xffff, ntohs(tp->protocol));
+			if (net_ratelimit())
+				printk(KERN_NOTICE
+				       "%s: packet reclassify loop"
+					  " rule prio %u protocol %02x\n",
+				       tp->q->ops->id,
+				       tp->prio & 0xffff, ntohs(tp->protocol));
 			return TC_ACT_SHOT;
 		}
 		skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
-- 
cgit v1.2.3-59-g8ed1b


From 3fa21e07e6acefa31f974d57fba2b6920a7ebd1a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 17 May 2010 23:08:21 -0700
Subject: net: Remove unnecessary returns from void function()s

This patch removes from net/ (but not any netfilter files)
all the unnecessary return; statements that precede the
last closing brace of void functions.

It does not remove the returns that are immediately
preceded by a label as gcc doesn't like that.

Done via:
$ grep -rP --include=*.[ch] -l "return;\n}" net/ | \
  xargs perl -i -e 'local $/ ; while (<>) { s/\n[ \t\n]+return;\n}/\n}/g; print; }'

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/9p/trans_rdma.c               |  1 -
 net/atm/br2684.c                  |  1 -
 net/atm/lec.c                     |  6 ------
 net/atm/mpc.c                     | 32 --------------------------------
 net/atm/mpoa_caches.c             | 20 --------------------
 net/bluetooth/hci_core.c          |  2 --
 net/bluetooth/l2cap.c             |  3 ---
 net/bluetooth/rfcomm/tty.c        |  2 --
 net/bluetooth/sco.c               |  1 -
 net/caif/caif_dev.c               |  1 -
 net/can/bcm.c                     |  2 --
 net/decnet/dn_dev.c               |  3 ---
 net/decnet/dn_route.c             |  1 -
 net/ipv4/cipso_ipv4.c             |  2 --
 net/ipv4/fib_trie.c               |  2 --
 net/ipv4/ip_gre.c                 |  1 -
 net/ipv4/ip_options.c             |  1 -
 net/ipv4/ipmr.c                   |  1 -
 net/ipv6/ndisc.c                  |  2 --
 net/ipv6/proc.c                   |  1 -
 net/ipv6/route.c                  |  2 --
 net/irda/iriap.c                  |  2 --
 net/irda/irnet/irnet_irda.c       |  3 ---
 net/iucv/af_iucv.c                |  1 -
 net/mac80211/debugfs.h            |  1 -
 net/mac80211/mesh.c               |  2 --
 net/mac80211/mesh_hwmp.c          |  1 -
 net/netlabel/netlabel_addrlist.h  |  2 --
 net/netlabel/netlabel_unlabeled.c |  1 -
 net/sched/cls_flow.c              |  1 -
 net/sched/sch_hfsc.c              |  1 -
 net/sched/sch_ingress.c           |  1 -
 net/sched/sch_mq.c                |  1 -
 net/sched/sch_multiq.c            |  1 -
 net/sched/sch_prio.c              |  1 -
 net/sched/sch_red.c               |  1 -
 net/sctp/associola.c              |  2 --
 net/sctp/outqueue.c               |  2 --
 net/sctp/proc.c                   |  3 ---
 net/sctp/sm_sideeffect.c          |  4 ----
 net/sctp/ulpqueue.c               |  2 --
 net/sunrpc/clnt.c                 |  1 -
 net/sunrpc/svcsock.c              |  1 -
 net/sunrpc/xprt.c                 |  1 -
 net/sunrpc/xprtsock.c             |  4 ----
 net/sysctl_net.c                  |  1 -
 net/wimax/stack.c                 |  2 --
 net/xfrm/xfrm_policy.c            |  1 -
 48 files changed, 131 deletions(-)

(limited to 'net')

diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 041101ab4aa5..0ea20c30466c 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -308,7 +308,6 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
 		   req, err, status);
 	rdma->state = P9_RDMA_FLUSHING;
 	client->status = Disconnected;
-	return;
 }
 
 static void
diff --git a/net/atm/br2684.c b/net/atm/br2684.c
index d6c7ceaf13e9..6719af6a59fa 100644
--- a/net/atm/br2684.c
+++ b/net/atm/br2684.c
@@ -446,7 +446,6 @@ error:
 	net_dev->stats.rx_errors++;
 free_skb:
 	dev_kfree_skb(skb);
-	return;
 }
 
 /*
diff --git a/net/atm/lec.c b/net/atm/lec.c
index feeaf5718472..d98bde1a0ac8 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -161,8 +161,6 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev)
 		skb_queue_tail(&sk->sk_receive_queue, skb2);
 		sk->sk_data_ready(sk, skb2->len);
 	}
-
-	return;
 }
 #endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */
 
@@ -640,7 +638,6 @@ static void lec_set_multicast_list(struct net_device *dev)
 	 * by default, all multicast frames arrive over the bus.
 	 * eventually support selective multicast service
 	 */
-	return;
 }
 
 static const struct net_device_ops lec_netdev_ops = {
@@ -1199,8 +1196,6 @@ static void __exit lane_module_cleanup(void)
 			dev_lec[i] = NULL;
 		}
 	}
-
-	return;
 }
 
 module_init(lane_module_init);
@@ -1334,7 +1329,6 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr,
 		priv->lane2_ops->associate_indicator(dev, mac_addr,
 						     tlvs, sizeoftlvs);
 	}
-	return;
 }
 
 /*
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 436f2e177657..622b471e14e0 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -455,7 +455,6 @@ static void lane2_assoc_ind(struct net_device *dev, const u8 *mac_addr,
 	if (end_of_tlvs - tlvs != 0)
 		pr_info("(%s) ignoring %Zd bytes of trailing TLV garbage\n",
 			dev->name, end_of_tlvs - tlvs);
-	return;
 }
 
 /*
@@ -684,8 +683,6 @@ static void mpc_vcc_close(struct atm_vcc *vcc, struct net_device *dev)
 
 	if (in_entry == NULL && eg_entry == NULL)
 		dprintk("(%s) unused vcc closed\n", dev->name);
-
-	return;
 }
 
 static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
@@ -783,8 +780,6 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb)
 
 	memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data));
 	netif_rx(new_skb);
-
-	return;
 }
 
 static struct atmdev_ops mpc_ops = { /* only send is required */
@@ -873,8 +868,6 @@ static void send_set_mps_ctrl_addr(const char *addr, struct mpoa_client *mpc)
 	mesg.type = SET_MPS_CTRL_ADDR;
 	memcpy(mesg.MPS_ctrl, addr, ATM_ESA_LEN);
 	msg_to_mpoad(&mesg, mpc);
-
-	return;
 }
 
 static void mpoad_close(struct atm_vcc *vcc)
@@ -911,8 +904,6 @@ static void mpoad_close(struct atm_vcc *vcc)
 	pr_info("(%s) going down\n",
 		(mpc->dev) ? mpc->dev->name : "<unknown>");
 	module_put(THIS_MODULE);
-
-	return;
 }
 
 /*
@@ -1122,7 +1113,6 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	pr_info("(%s) entry already in resolving state\n",
 		(mpc->dev) ? mpc->dev->name : "<unknown>");
 	mpc->in_ops->put(entry);
-	return;
 }
 
 /*
@@ -1166,7 +1156,6 @@ static void check_qos_and_open_shortcut(struct k_message *msg,
 	} else
 		memset(&msg->qos, 0, sizeof(struct atm_qos));
 	msg_to_mpoad(msg, client);
-	return;
 }
 
 static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc)
@@ -1240,8 +1229,6 @@ static void ingress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 		mpc->in_ops->put(entry);
 		entry = mpc->in_ops->get_with_mask(dst_ip, mpc, mask);
 	} while (entry != NULL);
-
-	return;
 }
 
 static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
@@ -1260,8 +1247,6 @@ static void egress_purge_rcvd(struct k_message *msg, struct mpoa_client *mpc)
 	write_unlock_irq(&mpc->egress_lock);
 
 	mpc->eg_ops->put(entry);
-
-	return;
 }
 
 static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
@@ -1295,8 +1280,6 @@ static void purge_egress_shortcut(struct atm_vcc *vcc, eg_cache_entry *entry)
 	skb_queue_tail(&sk->sk_receive_queue, skb);
 	sk->sk_data_ready(sk, skb->len);
 	dprintk("exiting\n");
-
-	return;
 }
 
 /*
@@ -1325,8 +1308,6 @@ static void mps_death(struct k_message *msg, struct mpoa_client *mpc)
 
 	mpc->in_ops->destroy_cache(mpc);
 	mpc->eg_ops->destroy_cache(mpc);
-
-	return;
 }
 
 static void MPOA_cache_impos_rcvd(struct k_message *msg,
@@ -1353,8 +1334,6 @@ static void MPOA_cache_impos_rcvd(struct k_message *msg,
 	write_unlock_irq(&mpc->egress_lock);
 
 	mpc->eg_ops->put(entry);
-
-	return;
 }
 
 static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg,
@@ -1392,8 +1371,6 @@ static void set_mpc_ctrl_addr_rcvd(struct k_message *mesg,
 			pr_info("(%s) targetless LE_ARP request failed\n",
 				mpc->dev->name);
 	}
-
-	return;
 }
 
 static void set_mps_mac_addr_rcvd(struct k_message *msg,
@@ -1409,8 +1386,6 @@ static void set_mps_mac_addr_rcvd(struct k_message *msg,
 		return;
 	}
 	client->number_of_mps_macs = 1;
-
-	return;
 }
 
 /*
@@ -1436,7 +1411,6 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action)
 
 	msg->type = action;
 	msg_to_mpoad(msg, mpc);
-	return;
 }
 
 static void mpc_timer_refresh(void)
@@ -1445,8 +1419,6 @@ static void mpc_timer_refresh(void)
 	mpc_timer.data = mpc_timer.expires;
 	mpc_timer.function = mpc_cache_check;
 	add_timer(&mpc_timer);
-
-	return;
 }
 
 static void mpc_cache_check(unsigned long checking_time)
@@ -1471,8 +1443,6 @@ static void mpc_cache_check(unsigned long checking_time)
 		mpc = mpc->next;
 	}
 	mpc_timer_refresh();
-
-	return;
 }
 
 static int atm_mpoa_ioctl(struct socket *sock, unsigned int cmd,
@@ -1561,8 +1531,6 @@ static void __exit atm_mpoa_cleanup(void)
 		kfree(qos);
 		qos = nextqos;
 	}
-
-	return;
 }
 
 module_init(atm_mpoa_init);
diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c
index e773d8336918..d1b2d9a03144 100644
--- a/net/atm/mpoa_caches.c
+++ b/net/atm/mpoa_caches.c
@@ -182,8 +182,6 @@ static void in_cache_put(in_cache_entry *entry)
 		memset(entry, 0, sizeof(in_cache_entry));
 		kfree(entry);
 	}
-
-	return;
 }
 
 /*
@@ -221,8 +219,6 @@ static void in_cache_remove_entry(in_cache_entry *entry,
 		}
 		vcc_release_async(vcc, -EPIPE);
 	}
-
-	return;
 }
 
 /* Call this every MPC-p2 seconds... Not exactly correct solution,
@@ -248,8 +244,6 @@ static void clear_count_and_expired(struct mpoa_client *client)
 		entry = next_entry;
 	}
 	write_unlock_bh(&client->ingress_lock);
-
-	return;
 }
 
 /* Call this every MPC-p4 seconds. */
@@ -334,8 +328,6 @@ static void in_destroy_cache(struct mpoa_client *mpc)
 	while (mpc->in_cache != NULL)
 		mpc->in_ops->remove_entry(mpc->in_cache, mpc);
 	write_unlock_irq(&mpc->ingress_lock);
-
-	return;
 }
 
 static eg_cache_entry *eg_cache_get_by_cache_id(__be32 cache_id,
@@ -427,8 +419,6 @@ static void eg_cache_put(eg_cache_entry *entry)
 		memset(entry, 0, sizeof(eg_cache_entry));
 		kfree(entry);
 	}
-
-	return;
 }
 
 /*
@@ -463,8 +453,6 @@ static void eg_cache_remove_entry(eg_cache_entry *entry,
 		}
 		vcc_release_async(vcc, -EPIPE);
 	}
-
-	return;
 }
 
 static eg_cache_entry *eg_cache_add_entry(struct k_message *msg,
@@ -509,8 +497,6 @@ static void update_eg_cache_entry(eg_cache_entry *entry, uint16_t holding_time)
 	do_gettimeofday(&(entry->tv));
 	entry->entry_state = EGRESS_RESOLVED;
 	entry->ctrl_info.holding_time = holding_time;
-
-	return;
 }
 
 static void clear_expired(struct mpoa_client *client)
@@ -537,8 +523,6 @@ static void clear_expired(struct mpoa_client *client)
 		entry = next_entry;
 	}
 	write_unlock_irq(&client->egress_lock);
-
-	return;
 }
 
 static void eg_destroy_cache(struct mpoa_client *mpc)
@@ -547,8 +531,6 @@ static void eg_destroy_cache(struct mpoa_client *mpc)
 	while (mpc->eg_cache != NULL)
 		mpc->eg_ops->remove_entry(mpc->eg_cache, mpc);
 	write_unlock_irq(&mpc->egress_lock);
-
-	return;
 }
 
 
@@ -584,6 +566,4 @@ void atm_mpoa_init_cache(struct mpoa_client *mpc)
 {
 	mpc->in_ops = &ingress_ops;
 	mpc->eg_ops = &egress_ops;
-
-	return;
 }
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5e83f8e0877a..2f768de87011 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1316,8 +1316,6 @@ void hci_send_acl(struct hci_conn *conn, struct sk_buff *skb, __u16 flags)
 	}
 
 	tasklet_schedule(&hdev->tx_task);
-
-	return;
 }
 EXPORT_SYMBOL(hci_send_acl);
 
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 673a36886716..1b682a5aa061 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -1322,8 +1322,6 @@ static void l2cap_drop_acked_frames(struct sock *sk)
 
 	if (!l2cap_pi(sk)->unacked_frames)
 		del_timer(&l2cap_pi(sk)->retrans_timer);
-
-	return;
 }
 
 static inline void l2cap_do_send(struct sock *sk, struct sk_buff *skb)
@@ -4667,7 +4665,6 @@ void l2cap_load(void)
 	/* Dummy function to trigger automatic L2CAP module loading by
 	 * other modules that use L2CAP sockets but don't use any other
 	 * symbols from it. */
-	return;
 }
 EXPORT_SYMBOL(l2cap_load);
 
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index cab71ea2796d..309b6c261b25 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -1014,8 +1014,6 @@ static void rfcomm_tty_set_termios(struct tty_struct *tty, struct ktermios *old)
 		rfcomm_send_rpn(dev->dlc->session, 1, dev->dlc->dlci, baud,
 				data_bits, stop_bits, parity,
 				RFCOMM_RPN_FLOW_NONE, x_on, x_off, changes);
-
-	return;
 }
 
 static void rfcomm_tty_throttle(struct tty_struct *tty)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 4767928a93d3..d0927d1fdada 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -273,7 +273,6 @@ static inline void sco_recv_frame(struct sco_conn *conn, struct sk_buff *skb)
 
 drop:
 	kfree_skb(skb);
-	return;
 }
 
 /* -------- Socket interface ---------- */
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 024fd5bb2d39..e2b86f1f5a47 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -112,7 +112,6 @@ static void caif_device_destroy(struct net_device *dev)
 	spin_unlock_bh(&caifdevs->lock);
 
 	kfree(caifd);
-	return;
 }
 
 static int transmit(struct cflayer *layer, struct cfpkt *pkt)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 907dc871fac8..9c65e9deb9c3 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -713,8 +713,6 @@ static void bcm_remove_op(struct bcm_op *op)
 		kfree(op->last_frames);
 
 	kfree(op);
-
-	return;
 }
 
 static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 615dbe3b43f9..4c409b46aa35 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -1220,17 +1220,14 @@ void dn_dev_down(struct net_device *dev)
 
 void dn_dev_init_pkt(struct sk_buff *skb)
 {
-	return;
 }
 
 void dn_dev_veri_pkt(struct sk_buff *skb)
 {
-	return;
 }
 
 void dn_dev_hello(struct sk_buff *skb)
 {
-	return;
 }
 
 void dn_dev_devices_off(void)
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index a8432e399545..812e6dff6067 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -264,7 +264,6 @@ static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst)
 
 static void dn_dst_link_failure(struct sk_buff *skb)
 {
-	return;
 }
 
 static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c97cd9ff697e..3a92a76ae41d 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -290,8 +290,6 @@ void cipso_v4_cache_invalidate(void)
 		cipso_v4_cache[iter].size = 0;
 		spin_unlock_bh(&cipso_v4_cache[iter].lock);
 	}
-
-	return;
 }
 
 /**
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c98f115fb0fd..79d057a939ba 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1022,8 +1022,6 @@ static void trie_rebalance(struct trie *t, struct tnode *tn)
 
 	rcu_assign_pointer(t->trie, (struct node *)tn);
 	tnode_free_flush();
-
-	return;
 }
 
 /* only used from updater-side */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 498cf69c7977..32618e11076d 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -502,7 +502,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info)
 	t->err_time = jiffies;
 out:
 	rcu_read_unlock();
-	return;
 }
 
 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 3244133c24f6..ba9836c488ed 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -238,7 +238,6 @@ void ip_options_fragment(struct sk_buff * skb)
 	opt->rr_needaddr = 0;
 	opt->ts_needaddr = 0;
 	opt->ts_needtime = 0;
-	return;
 }
 
 /*
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 217ebe035b34..7bcacf627b46 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1606,7 +1606,6 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
 
 out_free:
 	kfree_skb(skb);
-	return;
 }
 
 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3f7c12b70a26..0abdc242ddb7 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -890,8 +890,6 @@ out:
 		in6_ifa_put(ifp);
 	else
 		in6_dev_put(idev);
-
-	return;
 }
 
 static void ndisc_recv_na(struct sk_buff *skb)
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 458eabfbe130..566798d69f37 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -168,7 +168,6 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void __percpu **mib)
 			i & 0x100 ?  "Out" : "In", i & 0xff);
 		seq_printf(seq, "%-32s\t%lu\n", name, val);
 	}
-	return;
 }
 
 static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **mib,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 05ebd7833043..294cbe8b0725 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -316,7 +316,6 @@ static void rt6_probe(struct rt6_info *rt)
 #else
 static inline void rt6_probe(struct rt6_info *rt)
 {
-	return;
 }
 #endif
 
@@ -1553,7 +1552,6 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 
 out:
 	dst_release(&rt->u.dst);
-	return;
 }
 
 /*
diff --git a/net/irda/iriap.c b/net/irda/iriap.c
index 79a1e5a23e10..fce364c6c71a 100644
--- a/net/irda/iriap.c
+++ b/net/irda/iriap.c
@@ -685,8 +685,6 @@ static void iriap_getvaluebyclass_indication(struct iriap_cb *self,
 	/* We have a match; send the value.  */
 	iriap_getvaluebyclass_response(self, obj->id, IAS_SUCCESS,
 				       attrib->value);
-
-	return;
 }
 
 /*
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index df18ab4b6c5e..e98e40d76f4f 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -678,7 +678,6 @@ irda_irnet_destroy(irnet_socket *	self)
   self->stsap_sel = 0;
 
   DEXIT(IRDA_SOCK_TRACE, "\n");
-  return;
 }
 
 
@@ -928,7 +927,6 @@ irnet_disconnect_server(irnet_socket *	self,
   irttp_listen(self->tsap);
 
   DEXIT(IRDA_SERV_TRACE, "\n");
-  return;
 }
 
 /*------------------------------------------------------------------*/
@@ -1013,7 +1011,6 @@ irnet_destroy_server(void)
   irda_irnet_destroy(&irnet_server.s);
 
   DEXIT(IRDA_SERV_TRACE, "\n");
-  return;
 }
 
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 8be324fe08b9..c8b4599a752e 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -136,7 +136,6 @@ static void afiucv_pm_complete(struct device *dev)
 #ifdef CONFIG_PM_DEBUG
 	printk(KERN_WARNING "afiucv_pm_complete\n");
 #endif
-	return;
 }
 
 /**
diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h
index 68e6a2050f9a..09cc9be34796 100644
--- a/net/mac80211/debugfs.h
+++ b/net/mac80211/debugfs.h
@@ -7,7 +7,6 @@ extern int mac80211_open_file_generic(struct inode *inode, struct file *file);
 #else
 static inline void debugfs_hw_add(struct ieee80211_local *local)
 {
-	return;
 }
 #endif
 
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 7e93524459fc..bde81031727a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -287,8 +287,6 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata)
 	*pos++ |= sdata->u.mesh.accepting_plinks ?
 	    MESHCONF_CAPAB_ACCEPT_PLINKS : 0x00;
 	*pos++ = 0x00;
-
-	return;
 }
 
 u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl)
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index d89ed7f2592b..0705018d8d1e 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -624,7 +624,6 @@ static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata,
 fail:
 	rcu_read_unlock();
 	sdata->u.mesh.mshstats.dropped_frames_no_route++;
-	return;
 }
 
 static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata,
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index 07ae7fd82be1..1c1c093cf279 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -130,7 +130,6 @@ static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
 					     int src, const char *dev,
 					     __be32 addr, __be32 mask)
 {
-	return;
 }
 #endif
 
@@ -203,7 +202,6 @@ static inline void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf,
 					     const struct in6_addr *addr,
 					     const struct in6_addr *mask)
 {
-	return;
 }
 #endif
 #endif /* IPV6 */
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index a3d64aabe2f7..e2b0a680dd56 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -670,7 +670,6 @@ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
 
 unlhsh_condremove_failure:
 	spin_unlock(&netlbl_unlhsh_lock);
-	return;
 }
 
 /**
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 6ed61b10e002..f73542d2cdd0 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -602,7 +602,6 @@ static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
 
 static void flow_put(struct tcf_proto *tp, unsigned long f)
 {
-	return;
 }
 
 static int flow_dump(struct tcf_proto *tp, unsigned long fh,
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index a435cf13cc27..abd904be4287 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -617,7 +617,6 @@ rtsc_min(struct runtime_sc *rtsc, struct internal_sc *isc, u64 x, u64 y)
 	rtsc->y = y;
 	rtsc->dx = dx;
 	rtsc->dy = dy;
-	return;
 }
 
 static void
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index a9e646bdb605..f10e34a68445 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -44,7 +44,6 @@ static void ingress_put(struct Qdisc *sch, unsigned long cl)
 
 static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker)
 {
-	return;
 }
 
 static struct tcf_proto **ingress_find_tcf(struct Qdisc *sch, unsigned long cl)
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index b2aba3f5e6fa..fe91e50f9d98 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -174,7 +174,6 @@ static unsigned long mq_get(struct Qdisc *sch, u32 classid)
 
 static void mq_put(struct Qdisc *sch, unsigned long cl)
 {
-	return;
 }
 
 static int mq_dump_class(struct Qdisc *sch, unsigned long cl,
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index c50876cd8704..6ae251279fc2 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -340,7 +340,6 @@ static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
 
 static void multiq_put(struct Qdisc *q, unsigned long cl)
 {
-	return;
 }
 
 static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 81672e0c1b25..0748fb1e3a49 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -303,7 +303,6 @@ static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 clas
 
 static void prio_put(struct Qdisc *q, unsigned long cl)
 {
-	return;
 }
 
 static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 072cdf442f8e..8d42bb3ba540 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -303,7 +303,6 @@ static unsigned long red_get(struct Qdisc *sch, u32 classid)
 
 static void red_put(struct Qdisc *sch, unsigned long arg)
 {
-	return;
 }
 
 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 3912420cedcc..e41feff19e43 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -816,8 +816,6 @@ void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc,
 		if (t != primary)
 			sctp_assoc_rm_peer(asoc, t);
 	}
-
-	return;
 }
 
 /* Engage in transport control operations.
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 5d057178ce0c..c04b2eb59186 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -80,7 +80,6 @@ static inline void sctp_outq_head_data(struct sctp_outq *q,
 {
 	list_add(&ch->list, &q->out_chunk_list);
 	q->out_qlen += ch->skb->len;
-	return;
 }
 
 /* Take data from the front of the queue. */
@@ -103,7 +102,6 @@ static inline void sctp_outq_tail_data(struct sctp_outq *q,
 {
 	list_add_tail(&ch->list, &q->out_chunk_list);
 	q->out_qlen += ch->skb->len;
-	return;
 }
 
 /*
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index 784bcc9a979d..61aacfbbaa92 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -181,7 +181,6 @@ static void * sctp_eps_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void sctp_eps_seq_stop(struct seq_file *seq, void *v)
 {
-	return;
 }
 
 
@@ -286,7 +285,6 @@ static void * sctp_assocs_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void sctp_assocs_seq_stop(struct seq_file *seq, void *v)
 {
-	return;
 }
 
 
@@ -409,7 +407,6 @@ static void *sctp_remaddr_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 static void sctp_remaddr_seq_stop(struct seq_file *seq, void *v)
 {
-	return;
 }
 
 static int sctp_remaddr_seq_show(struct seq_file *seq, void *v)
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index 22e670200449..f5e5e27cac5e 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -892,8 +892,6 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq,
 	sctp_walk_fwdtsn(skip, chunk) {
 		sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn));
 	}
-
-	return;
 }
 
 /* Helper function to remove the association non-primary peer
@@ -912,8 +910,6 @@ static void sctp_cmd_del_non_primary(struct sctp_association *asoc)
 			sctp_assoc_del_peer(asoc, &t->ipaddr);
 		}
 	}
-
-	return;
 }
 
 /* Helper function to set sk_err on a 1-1 style socket. */
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 3a448536f0b6..c7f7e49609cb 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -955,7 +955,6 @@ void sctp_ulpq_skip(struct sctp_ulpq *ulpq, __u16 sid, __u16 ssn)
 	 * ordering and deliver them if needed.
 	 */
 	sctp_ulpq_reap_ordered(ulpq, sid);
-	return;
 }
 
 static __u16 sctp_ulpq_renege_list(struct sctp_ulpq *ulpq,
@@ -1064,7 +1063,6 @@ void sctp_ulpq_renege(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
 	}
 
 	sk_mem_reclaim(asoc->base.sk);
-	return;
 }
 
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 19c9983d5360..462462eaf296 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1518,7 +1518,6 @@ call_refreshresult(struct rpc_task *task)
 	task->tk_action = call_refresh;
 	if (status != -ETIMEDOUT)
 		rpc_delay(task, 3*HZ);
-	return;
 }
 
 static __be32 *
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index ce0d5b35c2ac..76e504bf74d0 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -150,7 +150,6 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 		}
 		break;
 	}
-	return;
 }
 
 /*
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 699ade68aac1..2e3d502ffe87 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -716,7 +716,6 @@ void xprt_connect(struct rpc_task *task)
 		xprt->stat.connect_start = jiffies;
 		xprt->ops->connect(task);
 	}
-	return;
 }
 
 static void xprt_connect_status(struct rpc_task *task)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 9847c30b5001..6e0df664b255 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1050,8 +1050,6 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
 		if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
 			transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
 	}
-
-	return;
 }
 
 /*
@@ -2210,7 +2208,6 @@ static int bc_send_request(struct rpc_task *task)
 
 static void bc_close(struct rpc_xprt *xprt)
 {
-	return;
 }
 
 /*
@@ -2220,7 +2217,6 @@ static void bc_close(struct rpc_xprt *xprt)
 
 static void bc_destroy(struct rpc_xprt *xprt)
 {
-	return;
 }
 
 static struct rpc_xprt_ops xs_udp_ops = {
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 53196009160a..ca84212cfbfe 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -82,7 +82,6 @@ static int __net_init sysctl_net_init(struct net *net)
 static void __net_exit sysctl_net_exit(struct net *net)
 {
 	WARN_ON(!list_empty(&net->sysctls.list));
-	return;
 }
 
 static struct pernet_operations sysctl_pernet_ops = {
diff --git a/net/wimax/stack.c b/net/wimax/stack.c
index 62b1a6662209..ee99e7dfcdba 100644
--- a/net/wimax/stack.c
+++ b/net/wimax/stack.c
@@ -320,7 +320,6 @@ void __wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
 out:
 	d_fnend(3, dev, "(wimax_dev %p new_state %u [old %u]) = void\n",
 		wimax_dev, new_state, old_state);
-	return;
 }
 
 
@@ -362,7 +361,6 @@ void wimax_state_change(struct wimax_dev *wimax_dev, enum wimax_st new_state)
 	if (wimax_dev->state > __WIMAX_ST_NULL)
 		__wimax_state_change(wimax_dev, new_state);
 	mutex_unlock(&wimax_dev->mutex);
-	return;
 }
 EXPORT_SYMBOL_GPL(wimax_state_change);
 
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f4ea3a08e5a1..d965a2bad8d3 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2209,7 +2209,6 @@ EXPORT_SYMBOL(xfrm_dst_ifdown);
 static void xfrm_link_failure(struct sk_buff *skb)
 {
 	/* Impossible. Such dst must be popped before reaches point of failure. */
-	return;
 }
 
 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
-- 
cgit v1.2.3-59-g8ed1b


From b3bcb72edb29c52fb0a065d5b1c7cf40ed9287f4 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Tue, 18 May 2010 12:26:27 -0700
Subject: bridge: fix build for CONFIG_SYSFS disabled

Fix build when CONFIG_SYSFS is not enabled:
net/bridge/br_if.c:136: error: 'struct net_bridge_port' has no member named 'sysfs_name'

Note: dev->name == sysfs_name except when change name is in
progress, and we are protected from that by RTNL mutex.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_if.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f25e3c92bd72..18b245e2c00e 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -133,7 +133,7 @@ static void del_nbp(struct net_bridge_port *p)
 	struct net_bridge *br = p->br;
 	struct net_device *dev = p->dev;
 
-	sysfs_remove_link(br->ifobj, p->sysfs_name);
+	sysfs_remove_link(br->ifobj, p->dev->name);
 
 	dev_set_promiscuity(dev, -1);
 
-- 
cgit v1.2.3-59-g8ed1b


From e9d3e084975869754d16f639378675c353560be9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 May 2010 15:36:06 -0700
Subject: ipv6: Replace inet6_ifaddr->dead with state

This patch replaces the boolean dead flag on inet6_ifaddr with
a state enum.  This allows us to roll back changes when deleting
an address according to whether DAD has completed or not.

This patch only adds the state field and does not change the logic.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h | 12 ++++++++++--
 net/ipv6/addrconf.c    | 16 +++++++++-------
 2 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'net')

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 13f9fc086d54..f95ff8d9aa47 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -32,6 +32,13 @@
 
 #ifdef __KERNEL__
 
+enum {
+	INET6_IFADDR_STATE_DAD,
+	INET6_IFADDR_STATE_POSTDAD,
+	INET6_IFADDR_STATE_UP,
+	INET6_IFADDR_STATE_DEAD,
+};
+
 struct inet6_ifaddr {
 	struct in6_addr		addr;
 	__u32			prefix_len;
@@ -40,6 +47,9 @@ struct inet6_ifaddr {
 	__u32			prefered_lft;
 	atomic_t		refcnt;
 	spinlock_t		lock;
+	spinlock_t		state_lock;
+
+	int			state;
 
 	__u8			probes;
 	__u8			flags;
@@ -62,8 +72,6 @@ struct inet6_ifaddr {
 	struct inet6_ifaddr	*ifpub;
 	int			regen_count;
 #endif
-
-	int			dead;
 	struct rcu_head		rcu;
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 75d3b8c1e856..4e5ad9de1679 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -553,7 +553,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 	if (del_timer(&ifp->timer))
 		pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
 
-	if (!ifp->dead) {
+	if (ifp->state != INET6_IFADDR_STATE_DEAD) {
 		pr_warning("Freeing alive inet6 address %p\n", ifp);
 		return;
 	}
@@ -648,6 +648,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	ipv6_addr_copy(&ifa->addr, addr);
 
 	spin_lock_init(&ifa->lock);
+	spin_lock_init(&ifa->state_lock);
 	init_timer(&ifa->timer);
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->timer.data = (unsigned long) ifa;
@@ -720,7 +721,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	hash = ipv6_addr_hash(&ifp->addr);
 
-	ifp->dead = 1;
+	ifp->state = INET6_IFADDR_STATE_DEAD;
 
 	spin_lock_bh(&addrconf_hash_lock);
 	hlist_del_init_rcu(&ifp->addr_lst);
@@ -2665,7 +2666,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		ifa = list_first_entry(&idev->tempaddr_list,
 				       struct inet6_ifaddr, tmp_list);
 		list_del(&ifa->tmp_list);
-		ifa->dead = 1;
+		ifa->state = INET6_IFADDR_STATE_DEAD;
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
 
@@ -2707,7 +2708,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 			write_unlock_bh(&idev->lock);
 		} else {
 			list_del(&ifa->if_list);
-			ifa->dead = 1;
+			ifa->state = INET6_IFADDR_STATE_DEAD;
 			write_unlock_bh(&idev->lock);
 
 			/* clear hash table */
@@ -2717,7 +2718,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		}
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
-		if (ifa->dead)
+		if (ifa->state == INET6_IFADDR_STATE_DEAD)
 			atomic_notifier_call_chain(&inet6addr_chain,
 						   NETDEV_DOWN, ifa);
 		in6_ifa_put(ifa);
@@ -2815,7 +2816,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	net_srandom(ifp->addr.s6_addr32[3]);
 
 	read_lock_bh(&idev->lock);
-	if (ifp->dead)
+	if (ifp->state == INET6_IFADDR_STATE_DEAD)
 		goto out;
 
 	spin_lock(&ifp->lock);
@@ -4050,7 +4051,8 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		addrconf_leave_solict(ifp->idev, &ifp->addr);
 		dst_hold(&ifp->rt->u.dst);
 
-		if (ifp->dead && ip6_del_rt(ifp->rt))
+		if (ifp->state == INET6_IFADDR_STATE_DEAD &&
+		    ip6_del_rt(ifp->rt))
 			dst_free(&ifp->rt->u.dst);
 		break;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 4c5ff6a6fe794f102479db998c69054319279e3c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 May 2010 15:54:18 -0700
Subject: ipv6: Use state_lock to protect ifa state

This patch makes use of the new state_lock to synchronise between
updates to the ifa state.  This fixes the issue where a remotely
triggered address deletion (through DAD failure) coincides with a
local administrative address deletion, causing certain actions to
be performed twice incorrectly.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4e5ad9de1679..2e42162c9042 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -715,13 +715,20 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 {
 	struct inet6_ifaddr *ifa, *ifn;
 	struct inet6_dev *idev = ifp->idev;
+	int state;
 	int hash;
 	int deleted = 0, onlink = 0;
 	unsigned long expires = jiffies;
 
 	hash = ipv6_addr_hash(&ifp->addr);
 
+	spin_lock_bh(&ifp->state_lock);
+	state = ifp->state;
 	ifp->state = INET6_IFADDR_STATE_DEAD;
+	spin_unlock_bh(&ifp->state_lock);
+
+	if (state == INET6_IFADDR_STATE_DEAD)
+		goto out;
 
 	spin_lock_bh(&addrconf_hash_lock);
 	hlist_del_init_rcu(&ifp->addr_lst);
@@ -819,6 +826,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 		dst_release(&rt->u.dst);
 	}
 
+out:
 	in6_ifa_put(ifp);
 }
 
@@ -2626,6 +2634,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 	struct inet6_dev *idev;
 	struct inet6_ifaddr *ifa;
 	LIST_HEAD(keep_list);
+	int state;
 
 	ASSERT_RTNL();
 
@@ -2666,7 +2675,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		ifa = list_first_entry(&idev->tempaddr_list,
 				       struct inet6_ifaddr, tmp_list);
 		list_del(&ifa->tmp_list);
-		ifa->state = INET6_IFADDR_STATE_DEAD;
 		write_unlock_bh(&idev->lock);
 		spin_lock_bh(&ifa->lock);
 
@@ -2704,23 +2712,34 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 			/* Flag it for later restoration when link comes up */
 			ifa->flags |= IFA_F_TENTATIVE;
-			in6_ifa_hold(ifa);
+
 			write_unlock_bh(&idev->lock);
+
+			in6_ifa_hold(ifa);
 		} else {
 			list_del(&ifa->if_list);
-			ifa->state = INET6_IFADDR_STATE_DEAD;
-			write_unlock_bh(&idev->lock);
 
 			/* clear hash table */
 			spin_lock_bh(&addrconf_hash_lock);
 			hlist_del_init_rcu(&ifa->addr_lst);
 			spin_unlock_bh(&addrconf_hash_lock);
+
+			write_unlock_bh(&idev->lock);
+			spin_lock_bh(&ifa->state_lock);
+			state = ifa->state;
+			ifa->state = INET6_IFADDR_STATE_DEAD;
+			spin_unlock_bh(&ifa->state_lock);
+
+			if (state == INET6_IFADDR_STATE_DEAD)
+				goto put_ifa;
 		}
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
 		if (ifa->state == INET6_IFADDR_STATE_DEAD)
 			atomic_notifier_call_chain(&inet6addr_chain,
 						   NETDEV_DOWN, ifa);
+
+put_ifa:
 		in6_ifa_put(ifa);
 
 		write_lock_bh(&idev->lock);
-- 
cgit v1.2.3-59-g8ed1b


From f2344a131bccdbfc5338e17fa71a807dee7944fa Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 May 2010 15:55:27 -0700
Subject: ipv6: Use POSTDAD state

This patch makes use of the new POSTDAD state.  This prevents
a race between DAD completion and failure.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2e42162c9042..7c769fa81d97 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1406,10 +1406,27 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		ipv6_del_addr(ifp);
 }
 
+static int addrconf_dad_end(struct inet6_ifaddr *ifp)
+{
+	int err = -ENOENT;
+
+	spin_lock(&ifp->state_lock);
+	if (ifp->state == INET6_IFADDR_STATE_DAD) {
+		ifp->state = INET6_IFADDR_STATE_POSTDAD;
+		err = 0;
+	}
+	spin_unlock(&ifp->state_lock);
+
+	return err;
+}
+
 void addrconf_dad_failure(struct inet6_ifaddr *ifp)
 {
 	struct inet6_dev *idev = ifp->idev;
 
+	if (addrconf_dad_end(ifp))
+		return;
+
 	if (net_ratelimit())
 		printk(KERN_INFO "%s: IPv6 duplicate address %pI6c detected!\n",
 			ifp->idev->dev->name, &ifp->addr);
@@ -2712,6 +2729,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 
 			/* Flag it for later restoration when link comes up */
 			ifa->flags |= IFA_F_TENTATIVE;
+			ifa->state = INET6_IFADDR_STATE_DAD;
 
 			write_unlock_bh(&idev->lock);
 
@@ -2883,6 +2901,9 @@ static void addrconf_dad_timer(unsigned long data)
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr mcaddr;
 
+	if (!ifp->probes && addrconf_dad_end(ifp))
+		goto out;
+
 	read_lock(&idev->lock);
 	if (idev->dead || !(idev->if_flags & IF_READY)) {
 		read_unlock(&idev->lock);
@@ -2956,12 +2977,10 @@ static void addrconf_dad_run(struct inet6_dev *idev)
 	read_lock_bh(&idev->lock);
 	list_for_each_entry(ifp, &idev->addr_list, if_list) {
 		spin_lock(&ifp->lock);
-		if (!(ifp->flags & IFA_F_TENTATIVE)) {
-			spin_unlock(&ifp->lock);
-			continue;
-		}
+		if (ifp->flags & IFA_F_TENTATIVE &&
+		    ifp->state == INET6_IFADDR_STATE_DAD)
+			addrconf_dad_kick(ifp);
 		spin_unlock(&ifp->lock);
-		addrconf_dad_kick(ifp);
 	}
 	read_unlock_bh(&idev->lock);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 622ccdf107bcb49c4d8fb65512652566d4c8928a Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 18 May 2010 15:56:06 -0700
Subject: ipv6: Never schedule DAD timer on dead address

This patch ensures that all places that schedule the DAD timer
look at the address state in a safe manner before scheduling the
timer.  This ensures that we don't end up with pending timers
after deleting an address.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7c769fa81d97..e1a698df5706 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2853,10 +2853,10 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 	net_srandom(ifp->addr.s6_addr32[3]);
 
 	read_lock_bh(&idev->lock);
+	spin_lock(&ifp->lock);
 	if (ifp->state == INET6_IFADDR_STATE_DEAD)
 		goto out;
 
-	spin_lock(&ifp->lock);
 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
 	    idev->cnf.accept_dad < 1 ||
 	    !(ifp->flags&IFA_F_TENTATIVE) ||
@@ -2890,8 +2890,8 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
 		ip6_ins_rt(ifp->rt);
 
 	addrconf_dad_kick(ifp);
-	spin_unlock(&ifp->lock);
 out:
+	spin_unlock(&ifp->lock);
 	read_unlock_bh(&idev->lock);
 }
 
@@ -2911,6 +2911,12 @@ static void addrconf_dad_timer(unsigned long data)
 	}
 
 	spin_lock(&ifp->lock);
+	if (ifp->state == INET6_IFADDR_STATE_DEAD) {
+		spin_unlock(&ifp->lock);
+		read_unlock(&idev->lock);
+		goto out;
+	}
+
 	if (ifp->probes == 0) {
 		/*
 		 * DAD was successful
-- 
cgit v1.2.3-59-g8ed1b