5 files changed, 281 insertions, 181 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 853c8b575f1d..be6cedab5aa8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1751,9 +1751,6 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
  *
  *	return values:
  *	NET_RX_SUCCESS	(no congestion)
- *	NET_RX_CN_LOW   (low congestion)
- *	NET_RX_CN_MOD   (moderate congestion)
- *	NET_RX_CN_HIGH  (high congestion)
  *	NET_RX_DROP     (packet was dropped)
  *
  */
@@ -2001,6 +1998,21 @@ out:
 }
 #endif
 
+/**
+ *	netif_receive_skb - process receive buffer from network
+ *	@skb: buffer to process
+ *
+ *	netif_receive_skb() is the main receive data processing function.
+ *	It always succeeds. The buffer may be dropped during processing
+ *	for congestion control or by the protocol layers.
+ *
+ *	This function may only be called from softirq context and interrupts
+ *	should be enabled.
+ *
+ *	Return values (usually ignored):
+ *	NET_RX_SUCCESS: no congestion
+ *	NET_RX_DROP: packet was dropped
+ */
 int netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
@@ -2172,7 +2184,15 @@ static void net_rx_action(struct softirq_action *h)
 
 		weight = n->weight;
 
-		work = n->poll(n, weight);
+		/* This NAPI_STATE_SCHED test is for avoiding a race
+		 * with netpoll's poll_napi().  Only the entity which
+		 * obtains the lock and sees NAPI_STATE_SCHED set will
+		 * actually make the ->poll() call.  Therefore we avoid
+		 * accidently calling ->poll() when NAPI is not scheduled.
+		 */
+		work = 0;
+		if (test_bit(NAPI_STATE_SCHED, &n->state))
+			work = n->poll(n, weight);
 
 		WARN_ON_ONCE(work > weight);
 
@@ -3488,6 +3508,60 @@ static void net_set_todo(struct net_device *dev)
 	spin_unlock(&net_todo_list_lock);
 }
 
+static void rollback_registered(struct net_device *dev)
+{
+	BUG_ON(dev_boot_phase);
+	ASSERT_RTNL();
+
+	/* Some devices call without registering for initialization unwind. */
+	if (dev->reg_state == NETREG_UNINITIALIZED) {
+		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
+				  "was registered\n", dev->name, dev);
+
+		WARN_ON(1);
+		return;
+	}
+
+	BUG_ON(dev->reg_state != NETREG_REGISTERED);
+
+	/* If device is running, close it first. */
+	dev_close(dev);
+
+	/* And unlink it from device chain. */
+	unlist_netdevice(dev);
+
+	dev->reg_state = NETREG_UNREGISTERING;
+
+	synchronize_net();
+
+	/* Shutdown queueing discipline. */
+	dev_shutdown(dev);
+
+
+	/* Notify protocols, that we are about to destroy
+	   this device. They should clean all the things.
+	*/
+	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+
+	/*
+	 *	Flush the unicast and multicast chains
+	 */
+	dev_addr_discard(dev);
+
+	if (dev->uninit)
+		dev->uninit(dev);
+
+	/* Notifier chain MUST detach us from master device. */
+	BUG_TRAP(!dev->master);
+
+	/* Remove entries from kobject tree */
+	netdev_unregister_kobject(dev);
+
+	synchronize_net();
+
+	dev_put(dev);
+}
+
 /**
  *	register_netdevice	- register a network device
  *	@dev: device to register
@@ -3625,8 +3699,10 @@ int register_netdevice(struct net_device *dev)
 	/* Notify protocols, that a new device appeared. */
 	ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
 	ret = notifier_to_errno(ret);
-	if (ret)
-		unregister_netdevice(dev);
+	if (ret) {
+		rollback_registered(dev);
+		dev->reg_state = NETREG_UNREGISTERED;
+	}
 
 out:
 	return ret;
@@ -3903,59 +3979,9 @@ void synchronize_net(void)
 
 void unregister_netdevice(struct net_device *dev)
 {
-	BUG_ON(dev_boot_phase);
-	ASSERT_RTNL();
-
-	/* Some devices call without registering for initialization unwind. */
-	if (dev->reg_state == NETREG_UNINITIALIZED) {
-		printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
-				  "was registered\n", dev->name, dev);
-
-		WARN_ON(1);
-		return;
-	}
-
-	BUG_ON(dev->reg_state != NETREG_REGISTERED);
-
-	/* If device is running, close it first. */
-	dev_close(dev);
-
-	/* And unlink it from device chain. */
-	unlist_netdevice(dev);
-
-	dev->reg_state = NETREG_UNREGISTERING;
-
-	synchronize_net();
-
-	/* Shutdown queueing discipline. */
-	dev_shutdown(dev);
-
-
-	/* Notify protocols, that we are about to destroy
-	   this device. They should clean all the things.
-	*/
-	call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-
-	/*
-	 *	Flush the unicast and multicast chains
-	 */
-	dev_addr_discard(dev);
-
-	if (dev->uninit)
-		dev->uninit(dev);
-
-	/* Notifier chain MUST detach us from master device. */
-	BUG_TRAP(!dev->master);
-
-	/* Remove entries from kobject tree */
-	netdev_unregister_kobject(dev);
-
+	rollback_registered(dev);
 	/* Finish processing unregister after unlock */
 	net_set_todo(dev);
-
-	synchronize_net();
-
-	dev_put(dev);
 }
 
 /**
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6f71db8c4428..e9f0964ce70b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -17,74 +17,13 @@ static DEFINE_MUTEX(net_mutex);
 
 LIST_HEAD(net_namespace_list);
 
-static struct kmem_cache *net_cachep;
-
 struct net init_net;
 EXPORT_SYMBOL_GPL(init_net);
 
-static struct net *net_alloc(void)
-{
-	return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
-}
-
-static void net_free(struct net *net)
-{
-	if (!net)
-		return;
-
-	if (unlikely(atomic_read(&net->use_count) != 0)) {
-		printk(KERN_EMERG "network namespace not free! Usage: %d\n",
-			atomic_read(&net->use_count));
-		return;
-	}
-
-	kmem_cache_free(net_cachep, net);
-}
-
-static void cleanup_net(struct work_struct *work)
-{
-	struct pernet_operations *ops;
-	struct net *net;
-
-	net = container_of(work, struct net, work);
-
-	mutex_lock(&net_mutex);
-
-	/* Don't let anyone else find us. */
-	rtnl_lock();
-	list_del(&net->list);
-	rtnl_unlock();
-
-	/* Run all of the network namespace exit methods */
-	list_for_each_entry_reverse(ops, &pernet_list, list) {
-		if (ops->exit)
-			ops->exit(net);
-	}
-
-	mutex_unlock(&net_mutex);
-
-	/* Ensure there are no outstanding rcu callbacks using this
-	 * network namespace.
-	 */
-	rcu_barrier();
-
-	/* Finally it is safe to free my network namespace structure */
-	net_free(net);
-}
-
-
-void __put_net(struct net *net)
-{
-	/* Cleanup the network namespace in process context */
-	INIT_WORK(&net->work, cleanup_net);
-	schedule_work(&net->work);
-}
-EXPORT_SYMBOL_GPL(__put_net);
-
 /*
  * setup_net runs the initializers for the network namespace object.
  */
-static int setup_net(struct net *net)
+static __net_init int setup_net(struct net *net)
 {
 	/* Must be called with net_mutex held */
 	struct pernet_operations *ops;
@@ -112,9 +51,19 @@ out_undo:
 		if (ops->exit)
 			ops->exit(net);
 	}
+
+	rcu_barrier();
 	goto out;
 }
 
+#ifdef CONFIG_NET_NS
+static struct kmem_cache *net_cachep;
+
+static struct net *net_alloc(void)
+{
+	return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
+}
+
 struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 {
 	struct net *new_net = NULL;
@@ -125,10 +74,6 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
 	if (!(flags & CLONE_NEWNET))
 		return old_net;
 
-#ifndef CONFIG_NET_NS
-	return ERR_PTR(-EINVAL);
-#endif
-
 	err = -ENOMEM;
 	new_net = net_alloc();
 	if (!new_net)
@@ -155,14 +100,78 @@ out:
 	return new_net;
 }
 
+static void net_free(struct net *net)
+{
+	if (!net)
+		return;
+
+	if (unlikely(atomic_read(&net->use_count) != 0)) {
+		printk(KERN_EMERG "network namespace not free! Usage: %d\n",
+			atomic_read(&net->use_count));
+		return;
+	}
+
+	kmem_cache_free(net_cachep, net);
+}
+
+static void cleanup_net(struct work_struct *work)
+{
+	struct pernet_operations *ops;
+	struct net *net;
+
+	net = container_of(work, struct net, work);
+
+	mutex_lock(&net_mutex);
+
+	/* Don't let anyone else find us. */
+	rtnl_lock();
+	list_del(&net->list);
+	rtnl_unlock();
+
+	/* Run all of the network namespace exit methods */
+	list_for_each_entry_reverse(ops, &pernet_list, list) {
+		if (ops->exit)
+			ops->exit(net);
+	}
+
+	mutex_unlock(&net_mutex);
+
+	/* Ensure there are no outstanding rcu callbacks using this
+	 * network namespace.
+	 */
+	rcu_barrier();
+
+	/* Finally it is safe to free my network namespace structure */
+	net_free(net);
+}
+
+void __put_net(struct net *net)
+{
+	/* Cleanup the network namespace in process context */
+	INIT_WORK(&net->work, cleanup_net);
+	schedule_work(&net->work);
+}
+EXPORT_SYMBOL_GPL(__put_net);
+
+#else
+struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+{
+	if (flags & CLONE_NEWNET)
+		return ERR_PTR(-EINVAL);
+	return old_net;
+}
+#endif
+
 static int __init net_ns_init(void)
 {
 	int err;
 
 	printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
+#ifdef CONFIG_NET_NS
 	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
 					SMP_CACHE_BYTES,
 					SLAB_PANIC, NULL);
+#endif
 	mutex_lock(&net_mutex);
 	err = setup_net(&init_net);
 
@@ -185,29 +194,28 @@ static int register_pernet_operations(struct list_head *list,
 	struct net *net, *undo_net;
 	int error;
 
-	error = 0;
 	list_add_tail(&ops->list, list);
-	for_each_net(net) {
-		if (ops->init) {
+	if (ops->init) {
+		for_each_net(net) {
 			error = ops->init(net);
 			if (error)
 				goto out_undo;
 		}
 	}
-out:
-	return error;
+	return 0;
 
 out_undo:
 	/* If I have an error cleanup all namespaces I initialized */
 	list_del(&ops->list);
-	for_each_net(undo_net) {
-		if (undo_net == net)
-			goto undone;
-		if (ops->exit)
+	if (ops->exit) {
+		for_each_net(undo_net) {
+			if (undo_net == net)
+				goto undone;
 			ops->exit(undo_net);
+		}
 	}
 undone:
-	goto out;
+	return error;
 }
 
 static void unregister_pernet_operations(struct pernet_operations *ops)
@@ -215,8 +223,8 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
 	struct net *net;
 
 	list_del(&ops->list);
-	for_each_net(net)
-		if (ops->exit)
+	if (ops->exit)
+		for_each_net(net)
 			ops->exit(net);
 }
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index bf8d18f1b013..c499b5c69bed 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -116,6 +116,29 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
  * network adapter, forcing superfluous retries and possibly timeouts.
  * Thus, we set our budget to greater than 1.
  */
+static int poll_one_napi(struct netpoll_info *npinfo,
+			 struct napi_struct *napi, int budget)
+{
+	int work;
+
+	/* net_rx_action's ->poll() invocations and our's are
+	 * synchronized by this test which is only made while
+	 * holding the napi->poll_lock.
+	 */
+	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
+		return budget;
+
+	npinfo->rx_flags |= NETPOLL_RX_DROP;
+	atomic_inc(&trapped);
+
+	work = napi->poll(napi, budget);
+
+	atomic_dec(&trapped);
+	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+
+	return budget - work;
+}
+
 static void poll_napi(struct netpoll *np)
 {
 	struct netpoll_info *npinfo = np->dev->npinfo;
@@ -123,17 +146,13 @@ static void poll_napi(struct netpoll *np)
 	int budget = 16;
 
 	list_for_each_entry(napi, &np->dev->napi_list, dev_list) {
-		if (test_bit(NAPI_STATE_SCHED, &napi->state) &&
-		    napi->poll_owner != smp_processor_id() &&
+		if (napi->poll_owner != smp_processor_id() &&
 		    spin_trylock(&napi->poll_lock)) {
-			npinfo->rx_flags |= NETPOLL_RX_DROP;
-			atomic_inc(&trapped);
-
-			napi->poll(napi, budget);
-
-			atomic_dec(&trapped);
-			npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+			budget = poll_one_napi(npinfo, napi, budget);
 			spin_unlock(&napi->poll_lock);
+
+			if (!budget)
+				break;
 		}
 	}
 }
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 573e17240197..64b50ff7a413 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2028,8 +2028,8 @@ void __init skb_init(void)
  *	Fill the specified scatter-gather list with mappings/pointers into a
  *	region of the buffer space attached to a socket buffer.
  */
-int
-skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+static int
+__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset;
@@ -2078,7 +2078,8 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 			if ((copy = end - offset) > 0) {
 				if (copy > len)
 					copy = len;
-				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+				elt += __skb_to_sgvec(list, sg+elt, offset - start,
+						      copy);
 				if ((len -= copy) == 0)
 					return elt;
 				offset += copy;
@@ -2090,6 +2091,15 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
 	return elt;
 }
 
+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+	int nsg = __skb_to_sgvec(skb, sg, offset, len);
+
+	__sg_mark_end(&sg[nsg - 1]);
+
+	return nsg;
+}
+
 /**
  *	skb_cow_data - Check that a socket buffer's data buffers are writable
  *	@skb: The socket buffer to check.
diff --git a/net/core/sock.c b/net/core/sock.c
index bba9949681ff..12ad2067a988 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -857,46 +857,43 @@ static inline void sock_lock_init(struct sock *sk)
 			af_family_keys + sk->sk_family);
 }
 
-/**
- *	sk_alloc - All socket objects are allocated here
- *	@net: the applicable net namespace
- *	@family: protocol family
- *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
- *	@prot: struct proto associated with this new sock instance
- *	@zero_it: if we should zero the newly allocated sock
- */
-struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
-		      struct proto *prot, int zero_it)
+static void sock_copy(struct sock *nsk, const struct sock *osk)
+{
+#ifdef CONFIG_SECURITY_NETWORK
+	void *sptr = nsk->sk_security;
+#endif
+
+	memcpy(nsk, osk, osk->sk_prot->obj_size);
+#ifdef CONFIG_SECURITY_NETWORK
+	nsk->sk_security = sptr;
+	security_sk_clone(osk, nsk);
+#endif
+}
+
+static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
+		int family)
 {
-	struct sock *sk = NULL;
-	struct kmem_cache *slab = prot->slab;
+	struct sock *sk;
+	struct kmem_cache *slab;
 
+	slab = prot->slab;
 	if (slab != NULL)
 		sk = kmem_cache_alloc(slab, priority);
 	else
 		sk = kmalloc(prot->obj_size, priority);
 
-	if (sk) {
-		if (zero_it) {
-			memset(sk, 0, prot->obj_size);
-			sk->sk_family = family;
-			/*
-			 * See comment in struct sock definition to understand
-			 * why we need sk_prot_creator -acme
-			 */
-			sk->sk_prot = sk->sk_prot_creator = prot;
-			sock_lock_init(sk);
-			sk->sk_net = get_net(net);
-		}
-
+	if (sk != NULL) {
 		if (security_sk_alloc(sk, family, priority))
 			goto out_free;
 
 		if (!try_module_get(prot->owner))
-			goto out_free;
+			goto out_free_sec;
 	}
+
 	return sk;
 
+out_free_sec:
+	security_sk_free(sk);
 out_free:
 	if (slab != NULL)
 		kmem_cache_free(slab, sk);
@@ -905,10 +902,53 @@ out_free:
 	return NULL;
 }
 
+static void sk_prot_free(struct proto *prot, struct sock *sk)
+{
+	struct kmem_cache *slab;
+	struct module *owner;
+
+	owner = prot->owner;
+	slab = prot->slab;
+
+	security_sk_free(sk);
+	if (slab != NULL)
+		kmem_cache_free(slab, sk);
+	else
+		kfree(sk);
+	module_put(owner);
+}
+
+/**
+ *	sk_alloc - All socket objects are allocated here
+ *	@net: the applicable net namespace
+ *	@family: protocol family
+ *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *	@prot: struct proto associated with this new sock instance
+ *	@zero_it: if we should zero the newly allocated sock
+ */
+struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
+		      struct proto *prot)
+{
+	struct sock *sk;
+
+	sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
+	if (sk) {
+		sk->sk_family = family;
+		/*
+		 * See comment in struct sock definition to understand
+		 * why we need sk_prot_creator -acme
+		 */
+		sk->sk_prot = sk->sk_prot_creator = prot;
+		sock_lock_init(sk);
+		sk->sk_net = get_net(net);
+	}
+
+	return sk;
+}
+
 void sk_free(struct sock *sk)
 {
 	struct sk_filter *filter;
-	struct module *owner = sk->sk_prot_creator->owner;
 
 	if (sk->sk_destruct)
 		sk->sk_destruct(sk);
@@ -925,25 +965,22 @@ void sk_free(struct sock *sk)
 		printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
 		       __FUNCTION__, atomic_read(&sk->sk_omem_alloc));
 
-	security_sk_free(sk);
 	put_net(sk->sk_net);
-	if (sk->sk_prot_creator->slab != NULL)
-		kmem_cache_free(sk->sk_prot_creator->slab, sk);
-	else
-		kfree(sk);
-	module_put(owner);
+	sk_prot_free(sk->sk_prot_creator, sk);
 }
 
 struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 {
-	struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0);
+	struct sock *newsk;
 
+	newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
 	if (newsk != NULL) {
 		struct sk_filter *filter;
 
 		sock_copy(newsk, sk);
 
 		/* SANITY */
+		get_net(newsk->sk_net);
 		sk_node_init(&newsk->sk_node);
 		sock_lock_init(newsk);
 		bh_lock_sock(newsk);