aboutsummaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--net/core/dev.c140
-rw-r--r--net/core/net_namespace.c164
-rw-r--r--net/core/netpoll.c37
-rw-r--r--net/core/skbuff.c16
-rw-r--r--net/core/sock.c105
5 files changed, 281 insertions, 181 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 853c8b575f1d..be6cedab5aa8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1751,9 +1751,6 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
*
* return values:
* NET_RX_SUCCESS (no congestion)
- * NET_RX_CN_LOW (low congestion)
- * NET_RX_CN_MOD (moderate congestion)
- * NET_RX_CN_HIGH (high congestion)
* NET_RX_DROP (packet was dropped)
*
*/
@@ -2001,6 +1998,21 @@ out:
}
#endif
+/**
+ * netif_receive_skb - process receive buffer from network
+ * @skb: buffer to process
+ *
+ * netif_receive_skb() is the main receive data processing function.
+ * It always succeeds. The buffer may be dropped during processing
+ * for congestion control or by the protocol layers.
+ *
+ * This function may only be called from softirq context and interrupts
+ * should be enabled.
+ *
+ * Return values (usually ignored):
+ * NET_RX_SUCCESS: no congestion
+ * NET_RX_DROP: packet was dropped
+ */
int netif_receive_skb(struct sk_buff *skb)
{
struct packet_type *ptype, *pt_prev;
@@ -2172,7 +2184,15 @@ static void net_rx_action(struct softirq_action *h)
weight = n->weight;
- work = n->poll(n, weight);
+ /* This NAPI_STATE_SCHED test is for avoiding a race
+ * with netpoll's poll_napi(). Only the entity which
+ * obtains the lock and sees NAPI_STATE_SCHED set will
+ * actually make the ->poll() call. Therefore we avoid
+ * accidently calling ->poll() when NAPI is not scheduled.
+ */
+ work = 0;
+ if (test_bit(NAPI_STATE_SCHED, &n->state))
+ work = n->poll(n, weight);
WARN_ON_ONCE(work > weight);
@@ -3488,6 +3508,60 @@ static void net_set_todo(struct net_device *dev)
spin_unlock(&net_todo_list_lock);
}
+static void rollback_registered(struct net_device *dev)
+{
+ BUG_ON(dev_boot_phase);
+ ASSERT_RTNL();
+
+ /* Some devices call without registering for initialization unwind. */
+ if (dev->reg_state == NETREG_UNINITIALIZED) {
+ printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
+ "was registered\n", dev->name, dev);
+
+ WARN_ON(1);
+ return;
+ }
+
+ BUG_ON(dev->reg_state != NETREG_REGISTERED);
+
+ /* If device is running, close it first. */
+ dev_close(dev);
+
+ /* And unlink it from device chain. */
+ unlist_netdevice(dev);
+
+ dev->reg_state = NETREG_UNREGISTERING;
+
+ synchronize_net();
+
+ /* Shutdown queueing discipline. */
+ dev_shutdown(dev);
+
+
+ /* Notify protocols, that we are about to destroy
+ this device. They should clean all the things.
+ */
+ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
+
+ /*
+ * Flush the unicast and multicast chains
+ */
+ dev_addr_discard(dev);
+
+ if (dev->uninit)
+ dev->uninit(dev);
+
+ /* Notifier chain MUST detach us from master device. */
+ BUG_TRAP(!dev->master);
+
+ /* Remove entries from kobject tree */
+ netdev_unregister_kobject(dev);
+
+ synchronize_net();
+
+ dev_put(dev);
+}
+
/**
* register_netdevice - register a network device
* @dev: device to register
@@ -3625,8 +3699,10 @@ int register_netdevice(struct net_device *dev)
/* Notify protocols, that a new device appeared. */
ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
ret = notifier_to_errno(ret);
- if (ret)
- unregister_netdevice(dev);
+ if (ret) {
+ rollback_registered(dev);
+ dev->reg_state = NETREG_UNREGISTERED;
+ }
out:
return ret;
@@ -3903,59 +3979,9 @@ void synchronize_net(void)
void unregister_netdevice(struct net_device *dev)
{
- BUG_ON(dev_boot_phase);
- ASSERT_RTNL();
-
- /* Some devices call without registering for initialization unwind. */
- if (dev->reg_state == NETREG_UNINITIALIZED) {
- printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
- "was registered\n", dev->name, dev);
-
- WARN_ON(1);
- return;
- }
-
- BUG_ON(dev->reg_state != NETREG_REGISTERED);
-
- /* If device is running, close it first. */
- dev_close(dev);
-
- /* And unlink it from device chain. */
- unlist_netdevice(dev);
-
- dev->reg_state = NETREG_UNREGISTERING;
-
- synchronize_net();
-
- /* Shutdown queueing discipline. */
- dev_shutdown(dev);
-
-
- /* Notify protocols, that we are about to destroy
- this device. They should clean all the things.
- */
- call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
-
- /*
- * Flush the unicast and multicast chains
- */
- dev_addr_discard(dev);
-
- if (dev->uninit)
- dev->uninit(dev);
-
- /* Notifier chain MUST detach us from master device. */
- BUG_TRAP(!dev->master);
-
- /* Remove entries from kobject tree */
- netdev_unregister_kobject(dev);
-
+ rollback_registered(dev);
/* Finish processing unregister after unlock */
net_set_todo(dev);
-
- synchronize_net();
-
- dev_put(dev);
}
/**
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6f71db8c4428..e9f0964ce70b 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -17,74 +17,13 @@ static DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
-static struct kmem_cache *net_cachep;
-
struct net init_net;
EXPORT_SYMBOL_GPL(init_net);
-static struct net *net_alloc(void)
-{
- return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
-}
-
-static void net_free(struct net *net)
-{
- if (!net)
- return;
-
- if (unlikely(atomic_read(&net->use_count) != 0)) {
- printk(KERN_EMERG "network namespace not free! Usage: %d\n",
- atomic_read(&net->use_count));
- return;
- }
-
- kmem_cache_free(net_cachep, net);
-}
-
-static void cleanup_net(struct work_struct *work)
-{
- struct pernet_operations *ops;
- struct net *net;
-
- net = container_of(work, struct net, work);
-
- mutex_lock(&net_mutex);
-
- /* Don't let anyone else find us. */
- rtnl_lock();
- list_del(&net->list);
- rtnl_unlock();
-
- /* Run all of the network namespace exit methods */
- list_for_each_entry_reverse(ops, &pernet_list, list) {
- if (ops->exit)
- ops->exit(net);
- }
-
- mutex_unlock(&net_mutex);
-
- /* Ensure there are no outstanding rcu callbacks using this
- * network namespace.
- */
- rcu_barrier();
-
- /* Finally it is safe to free my network namespace structure */
- net_free(net);
-}
-
-
-void __put_net(struct net *net)
-{
- /* Cleanup the network namespace in process context */
- INIT_WORK(&net->work, cleanup_net);
- schedule_work(&net->work);
-}
-EXPORT_SYMBOL_GPL(__put_net);
-
/*
* setup_net runs the initializers for the network namespace object.
*/
-static int setup_net(struct net *net)
+static __net_init int setup_net(struct net *net)
{
/* Must be called with net_mutex held */
struct pernet_operations *ops;
@@ -112,9 +51,19 @@ out_undo:
if (ops->exit)
ops->exit(net);
}
+
+ rcu_barrier();
goto out;
}
+#ifdef CONFIG_NET_NS
+static struct kmem_cache *net_cachep;
+
+static struct net *net_alloc(void)
+{
+ return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
+}
+
struct net *copy_net_ns(unsigned long flags, struct net *old_net)
{
struct net *new_net = NULL;
@@ -125,10 +74,6 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
if (!(flags & CLONE_NEWNET))
return old_net;
-#ifndef CONFIG_NET_NS
- return ERR_PTR(-EINVAL);
-#endif
-
err = -ENOMEM;
new_net = net_alloc();
if (!new_net)
@@ -155,14 +100,78 @@ out:
return new_net;
}
+static void net_free(struct net *net)
+{
+ if (!net)
+ return;
+
+ if (unlikely(atomic_read(&net->use_count) != 0)) {
+ printk(KERN_EMERG "network namespace not free! Usage: %d\n",
+ atomic_read(&net->use_count));
+ return;
+ }
+
+ kmem_cache_free(net_cachep, net);
+}
+
+static void cleanup_net(struct work_struct *work)
+{
+ struct pernet_operations *ops;
+ struct net *net;
+
+ net = container_of(work, struct net, work);
+
+ mutex_lock(&net_mutex);
+
+ /* Don't let anyone else find us. */
+ rtnl_lock();
+ list_del(&net->list);
+ rtnl_unlock();
+
+ /* Run all of the network namespace exit methods */
+ list_for_each_entry_reverse(ops, &pernet_list, list) {
+ if (ops->exit)
+ ops->exit(net);
+ }
+
+ mutex_unlock(&net_mutex);
+
+ /* Ensure there are no outstanding rcu callbacks using this
+ * network namespace.
+ */
+ rcu_barrier();
+
+ /* Finally it is safe to free my network namespace structure */
+ net_free(net);
+}
+
+void __put_net(struct net *net)
+{
+ /* Cleanup the network namespace in process context */
+ INIT_WORK(&net->work, cleanup_net);
+ schedule_work(&net->work);
+}
+EXPORT_SYMBOL_GPL(__put_net);
+
+#else
+struct net *copy_net_ns(unsigned long flags, struct net *old_net)
+{
+ if (flags & CLONE_NEWNET)
+ return ERR_PTR(-EINVAL);
+ return old_net;
+}
+#endif
+
static int __init net_ns_init(void)
{
int err;
printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
+#ifdef CONFIG_NET_NS
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
SMP_CACHE_BYTES,
SLAB_PANIC, NULL);
+#endif
mutex_lock(&net_mutex);
err = setup_net(&init_net);
@@ -185,29 +194,28 @@ static int register_pernet_operations(struct list_head *list,
struct net *net, *undo_net;
int error;
- error = 0;
list_add_tail(&ops->list, list);
- for_each_net(net) {
- if (ops->init) {
+ if (ops->init) {
+ for_each_net(net) {
error = ops->init(net);
if (error)
goto out_undo;
}
}
-out:
- return error;
+ return 0;
out_undo:
/* If I have an error cleanup all namespaces I initialized */
list_del(&ops->list);
- for_each_net(undo_net) {
- if (undo_net == net)
- goto undone;
- if (ops->exit)
+ if (ops->exit) {
+ for_each_net(undo_net) {
+ if (undo_net == net)
+ goto undone;
ops->exit(undo_net);
+ }
}
undone:
- goto out;
+ return error;
}
static void unregister_pernet_operations(struct pernet_operations *ops)
@@ -215,8 +223,8 @@ static void unregister_pernet_operations(struct pernet_operations *ops)
struct net *net;
list_del(&ops->list);
- for_each_net(net)
- if (ops->exit)
+ if (ops->exit)
+ for_each_net(net)
ops->exit(net);
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index bf8d18f1b013..c499b5c69bed 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -116,6 +116,29 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
* network adapter, forcing superfluous retries and possibly timeouts.
* Thus, we set our budget to greater than 1.
*/
+static int poll_one_napi(struct netpoll_info *npinfo,
+ struct napi_struct *napi, int budget)
+{
+ int work;
+
+ /* net_rx_action's ->poll() invocations and our's are
+ * synchronized by this test which is only made while
+ * holding the napi->poll_lock.
+ */
+ if (!test_bit(NAPI_STATE_SCHED, &napi->state))
+ return budget;
+
+ npinfo->rx_flags |= NETPOLL_RX_DROP;
+ atomic_inc(&trapped);
+
+ work = napi->poll(napi, budget);
+
+ atomic_dec(&trapped);
+ npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+
+ return budget - work;
+}
+
static void poll_napi(struct netpoll *np)
{
struct netpoll_info *npinfo = np->dev->npinfo;
@@ -123,17 +146,13 @@ static void poll_napi(struct netpoll *np)
int budget = 16;
list_for_each_entry(napi, &np->dev->napi_list, dev_list) {
- if (test_bit(NAPI_STATE_SCHED, &napi->state) &&
- napi->poll_owner != smp_processor_id() &&
+ if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- npinfo->rx_flags |= NETPOLL_RX_DROP;
- atomic_inc(&trapped);
-
- napi->poll(napi, budget);
-
- atomic_dec(&trapped);
- npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+ budget = poll_one_napi(npinfo, napi, budget);
spin_unlock(&napi->poll_lock);
+
+ if (!budget)
+ break;
}
}
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 573e17240197..64b50ff7a413 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2028,8 +2028,8 @@ void __init skb_init(void)
* Fill the specified scatter-gather list with mappings/pointers into a
* region of the buffer space attached to a socket buffer.
*/
-int
-skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+static int
+__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
{
int start = skb_headlen(skb);
int i, copy = start - offset;
@@ -2078,7 +2078,8 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
if ((copy = end - offset) > 0) {
if (copy > len)
copy = len;
- elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
+ elt += __skb_to_sgvec(list, sg+elt, offset - start,
+ copy);
if ((len -= copy) == 0)
return elt;
offset += copy;
@@ -2090,6 +2091,15 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
return elt;
}
+int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
+{
+ int nsg = __skb_to_sgvec(skb, sg, offset, len);
+
+ __sg_mark_end(&sg[nsg - 1]);
+
+ return nsg;
+}
+
/**
* skb_cow_data - Check that a socket buffer's data buffers are writable
* @skb: The socket buffer to check.
diff --git a/net/core/sock.c b/net/core/sock.c
index bba9949681ff..12ad2067a988 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -857,46 +857,43 @@ static inline void sock_lock_init(struct sock *sk)
af_family_keys + sk->sk_family);
}
-/**
- * sk_alloc - All socket objects are allocated here
- * @net: the applicable net namespace
- * @family: protocol family
- * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
- * @prot: struct proto associated with this new sock instance
- * @zero_it: if we should zero the newly allocated sock
- */
-struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
- struct proto *prot, int zero_it)
+static void sock_copy(struct sock *nsk, const struct sock *osk)
+{
+#ifdef CONFIG_SECURITY_NETWORK
+ void *sptr = nsk->sk_security;
+#endif
+
+ memcpy(nsk, osk, osk->sk_prot->obj_size);
+#ifdef CONFIG_SECURITY_NETWORK
+ nsk->sk_security = sptr;
+ security_sk_clone(osk, nsk);
+#endif
+}
+
+static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
+ int family)
{
- struct sock *sk = NULL;
- struct kmem_cache *slab = prot->slab;
+ struct sock *sk;
+ struct kmem_cache *slab;
+ slab = prot->slab;
if (slab != NULL)
sk = kmem_cache_alloc(slab, priority);
else
sk = kmalloc(prot->obj_size, priority);
- if (sk) {
- if (zero_it) {
- memset(sk, 0, prot->obj_size);
- sk->sk_family = family;
- /*
- * See comment in struct sock definition to understand
- * why we need sk_prot_creator -acme
- */
- sk->sk_prot = sk->sk_prot_creator = prot;
- sock_lock_init(sk);
- sk->sk_net = get_net(net);
- }
-
+ if (sk != NULL) {
if (security_sk_alloc(sk, family, priority))
goto out_free;
if (!try_module_get(prot->owner))
- goto out_free;
+ goto out_free_sec;
}
+
return sk;
+out_free_sec:
+ security_sk_free(sk);
out_free:
if (slab != NULL)
kmem_cache_free(slab, sk);
@@ -905,10 +902,53 @@ out_free:
return NULL;
}
+static void sk_prot_free(struct proto *prot, struct sock *sk)
+{
+ struct kmem_cache *slab;
+ struct module *owner;
+
+ owner = prot->owner;
+ slab = prot->slab;
+
+ security_sk_free(sk);
+ if (slab != NULL)
+ kmem_cache_free(slab, sk);
+ else
+ kfree(sk);
+ module_put(owner);
+}
+
+/**
+ * sk_alloc - All socket objects are allocated here
+ * @net: the applicable net namespace
+ * @family: protocol family
+ * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ * @prot: struct proto associated with this new sock instance
+ * @zero_it: if we should zero the newly allocated sock
+ */
+struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
+ struct proto *prot)
+{
+ struct sock *sk;
+
+ sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
+ if (sk) {
+ sk->sk_family = family;
+ /*
+ * See comment in struct sock definition to understand
+ * why we need sk_prot_creator -acme
+ */
+ sk->sk_prot = sk->sk_prot_creator = prot;
+ sock_lock_init(sk);
+ sk->sk_net = get_net(net);
+ }
+
+ return sk;
+}
+
void sk_free(struct sock *sk)
{
struct sk_filter *filter;
- struct module *owner = sk->sk_prot_creator->owner;
if (sk->sk_destruct)
sk->sk_destruct(sk);
@@ -925,25 +965,22 @@ void sk_free(struct sock *sk)
printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
__FUNCTION__, atomic_read(&sk->sk_omem_alloc));
- security_sk_free(sk);
put_net(sk->sk_net);
- if (sk->sk_prot_creator->slab != NULL)
- kmem_cache_free(sk->sk_prot_creator->slab, sk);
- else
- kfree(sk);
- module_put(owner);
+ sk_prot_free(sk->sk_prot_creator, sk);
}
struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
{
- struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0);
+ struct sock *newsk;
+ newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family);
if (newsk != NULL) {
struct sk_filter *filter;
sock_copy(newsk, sk);
/* SANITY */
+ get_net(newsk->sk_net);
sk_node_init(&newsk->sk_node);
sock_lock_init(newsk);
bh_lock_sock(newsk);