diff options
Diffstat (limited to '')
-rw-r--r-- | net/core/dev.c | 140 | ||||
-rw-r--r-- | net/core/net_namespace.c | 164 | ||||
-rw-r--r-- | net/core/netpoll.c | 37 | ||||
-rw-r--r-- | net/core/skbuff.c | 16 | ||||
-rw-r--r-- | net/core/sock.c | 105 |
5 files changed, 281 insertions, 181 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 853c8b575f1d..be6cedab5aa8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1751,9 +1751,6 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; * * return values: * NET_RX_SUCCESS (no congestion) - * NET_RX_CN_LOW (low congestion) - * NET_RX_CN_MOD (moderate congestion) - * NET_RX_CN_HIGH (high congestion) * NET_RX_DROP (packet was dropped) * */ @@ -2001,6 +1998,21 @@ out: } #endif +/** + * netif_receive_skb - process receive buffer from network + * @skb: buffer to process + * + * netif_receive_skb() is the main receive data processing function. + * It always succeeds. The buffer may be dropped during processing + * for congestion control or by the protocol layers. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ int netif_receive_skb(struct sk_buff *skb) { struct packet_type *ptype, *pt_prev; @@ -2172,7 +2184,15 @@ static void net_rx_action(struct softirq_action *h) weight = n->weight; - work = n->poll(n, weight); + /* This NAPI_STATE_SCHED test is for avoiding a race + * with netpoll's poll_napi(). Only the entity which + * obtains the lock and sees NAPI_STATE_SCHED set will + * actually make the ->poll() call. Therefore we avoid + * accidently calling ->poll() when NAPI is not scheduled. + */ + work = 0; + if (test_bit(NAPI_STATE_SCHED, &n->state)) + work = n->poll(n, weight); WARN_ON_ONCE(work > weight); @@ -3488,6 +3508,60 @@ static void net_set_todo(struct net_device *dev) spin_unlock(&net_todo_list_lock); } +static void rollback_registered(struct net_device *dev) +{ + BUG_ON(dev_boot_phase); + ASSERT_RTNL(); + + /* Some devices call without registering for initialization unwind. */ + if (dev->reg_state == NETREG_UNINITIALIZED) { + printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " + "was registered\n", dev->name, dev); + + WARN_ON(1); + return; + } + + BUG_ON(dev->reg_state != NETREG_REGISTERED); + + /* If device is running, close it first. */ + dev_close(dev); + + /* And unlink it from device chain. */ + unlist_netdevice(dev); + + dev->reg_state = NETREG_UNREGISTERING; + + synchronize_net(); + + /* Shutdown queueing discipline. */ + dev_shutdown(dev); + + + /* Notify protocols, that we are about to destroy + this device. They should clean all the things. + */ + call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + /* + * Flush the unicast and multicast chains + */ + dev_addr_discard(dev); + + if (dev->uninit) + dev->uninit(dev); + + /* Notifier chain MUST detach us from master device. */ + BUG_TRAP(!dev->master); + + /* Remove entries from kobject tree */ + netdev_unregister_kobject(dev); + + synchronize_net(); + + dev_put(dev); +} + /** * register_netdevice - register a network device * @dev: device to register @@ -3625,8 +3699,10 @@ int register_netdevice(struct net_device *dev) /* Notify protocols, that a new device appeared. */ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev); ret = notifier_to_errno(ret); - if (ret) - unregister_netdevice(dev); + if (ret) { + rollback_registered(dev); + dev->reg_state = NETREG_UNREGISTERED; + } out: return ret; @@ -3903,59 +3979,9 @@ void synchronize_net(void) void unregister_netdevice(struct net_device *dev) { - BUG_ON(dev_boot_phase); - ASSERT_RTNL(); - - /* Some devices call without registering for initialization unwind. */ - if (dev->reg_state == NETREG_UNINITIALIZED) { - printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " - "was registered\n", dev->name, dev); - - WARN_ON(1); - return; - } - - BUG_ON(dev->reg_state != NETREG_REGISTERED); - - /* If device is running, close it first. */ - dev_close(dev); - - /* And unlink it from device chain. */ - unlist_netdevice(dev); - - dev->reg_state = NETREG_UNREGISTERING; - - synchronize_net(); - - /* Shutdown queueing discipline. */ - dev_shutdown(dev); - - - /* Notify protocols, that we are about to destroy - this device. They should clean all the things. - */ - call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - - /* - * Flush the unicast and multicast chains - */ - dev_addr_discard(dev); - - if (dev->uninit) - dev->uninit(dev); - - /* Notifier chain MUST detach us from master device. */ - BUG_TRAP(!dev->master); - - /* Remove entries from kobject tree */ - netdev_unregister_kobject(dev); - + rollback_registered(dev); /* Finish processing unregister after unlock */ net_set_todo(dev); - - synchronize_net(); - - dev_put(dev); } /** diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6f71db8c4428..e9f0964ce70b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -17,74 +17,13 @@ static DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); -static struct kmem_cache *net_cachep; - struct net init_net; EXPORT_SYMBOL_GPL(init_net); -static struct net *net_alloc(void) -{ - return kmem_cache_zalloc(net_cachep, GFP_KERNEL); -} - -static void net_free(struct net *net) -{ - if (!net) - return; - - if (unlikely(atomic_read(&net->use_count) != 0)) { - printk(KERN_EMERG "network namespace not free! Usage: %d\n", - atomic_read(&net->use_count)); - return; - } - - kmem_cache_free(net_cachep, net); -} - -static void cleanup_net(struct work_struct *work) -{ - struct pernet_operations *ops; - struct net *net; - - net = container_of(work, struct net, work); - - mutex_lock(&net_mutex); - - /* Don't let anyone else find us. */ - rtnl_lock(); - list_del(&net->list); - rtnl_unlock(); - - /* Run all of the network namespace exit methods */ - list_for_each_entry_reverse(ops, &pernet_list, list) { - if (ops->exit) - ops->exit(net); - } - - mutex_unlock(&net_mutex); - - /* Ensure there are no outstanding rcu callbacks using this - * network namespace. - */ - rcu_barrier(); - - /* Finally it is safe to free my network namespace structure */ - net_free(net); -} - - -void __put_net(struct net *net) -{ - /* Cleanup the network namespace in process context */ - INIT_WORK(&net->work, cleanup_net); - schedule_work(&net->work); -} -EXPORT_SYMBOL_GPL(__put_net); - /* * setup_net runs the initializers for the network namespace object. */ -static int setup_net(struct net *net) +static __net_init int setup_net(struct net *net) { /* Must be called with net_mutex held */ struct pernet_operations *ops; @@ -112,9 +51,19 @@ out_undo: if (ops->exit) ops->exit(net); } + + rcu_barrier(); goto out; } +#ifdef CONFIG_NET_NS +static struct kmem_cache *net_cachep; + +static struct net *net_alloc(void) +{ + return kmem_cache_zalloc(net_cachep, GFP_KERNEL); +} + struct net *copy_net_ns(unsigned long flags, struct net *old_net) { struct net *new_net = NULL; @@ -125,10 +74,6 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) if (!(flags & CLONE_NEWNET)) return old_net; -#ifndef CONFIG_NET_NS - return ERR_PTR(-EINVAL); -#endif - err = -ENOMEM; new_net = net_alloc(); if (!new_net) @@ -155,14 +100,78 @@ out: return new_net; } +static void net_free(struct net *net) +{ + if (!net) + return; + + if (unlikely(atomic_read(&net->use_count) != 0)) { + printk(KERN_EMERG "network namespace not free! Usage: %d\n", + atomic_read(&net->use_count)); + return; + } + + kmem_cache_free(net_cachep, net); +} + +static void cleanup_net(struct work_struct *work) +{ + struct pernet_operations *ops; + struct net *net; + + net = container_of(work, struct net, work); + + mutex_lock(&net_mutex); + + /* Don't let anyone else find us. */ + rtnl_lock(); + list_del(&net->list); + rtnl_unlock(); + + /* Run all of the network namespace exit methods */ + list_for_each_entry_reverse(ops, &pernet_list, list) { + if (ops->exit) + ops->exit(net); + } + + mutex_unlock(&net_mutex); + + /* Ensure there are no outstanding rcu callbacks using this + * network namespace. + */ + rcu_barrier(); + + /* Finally it is safe to free my network namespace structure */ + net_free(net); +} + +void __put_net(struct net *net) +{ + /* Cleanup the network namespace in process context */ + INIT_WORK(&net->work, cleanup_net); + schedule_work(&net->work); +} +EXPORT_SYMBOL_GPL(__put_net); + +#else +struct net *copy_net_ns(unsigned long flags, struct net *old_net) +{ + if (flags & CLONE_NEWNET) + return ERR_PTR(-EINVAL); + return old_net; +} +#endif + static int __init net_ns_init(void) { int err; printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net)); +#ifdef CONFIG_NET_NS net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), SMP_CACHE_BYTES, SLAB_PANIC, NULL); +#endif mutex_lock(&net_mutex); err = setup_net(&init_net); @@ -185,29 +194,28 @@ static int register_pernet_operations(struct list_head *list, struct net *net, *undo_net; int error; - error = 0; list_add_tail(&ops->list, list); - for_each_net(net) { - if (ops->init) { + if (ops->init) { + for_each_net(net) { error = ops->init(net); if (error) goto out_undo; } } -out: - return error; + return 0; out_undo: /* If I have an error cleanup all namespaces I initialized */ list_del(&ops->list); - for_each_net(undo_net) { - if (undo_net == net) - goto undone; - if (ops->exit) + if (ops->exit) { + for_each_net(undo_net) { + if (undo_net == net) + goto undone; ops->exit(undo_net); + } } undone: - goto out; + return error; } static void unregister_pernet_operations(struct pernet_operations *ops) @@ -215,8 +223,8 @@ static void unregister_pernet_operations(struct pernet_operations *ops) struct net *net; list_del(&ops->list); - for_each_net(net) - if (ops->exit) + if (ops->exit) + for_each_net(net) ops->exit(net); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index bf8d18f1b013..c499b5c69bed 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -116,6 +116,29 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, * network adapter, forcing superfluous retries and possibly timeouts. * Thus, we set our budget to greater than 1. */ +static int poll_one_napi(struct netpoll_info *npinfo, + struct napi_struct *napi, int budget) +{ + int work; + + /* net_rx_action's ->poll() invocations and our's are + * synchronized by this test which is only made while + * holding the napi->poll_lock. + */ + if (!test_bit(NAPI_STATE_SCHED, &napi->state)) + return budget; + + npinfo->rx_flags |= NETPOLL_RX_DROP; + atomic_inc(&trapped); + + work = napi->poll(napi, budget); + + atomic_dec(&trapped); + npinfo->rx_flags &= ~NETPOLL_RX_DROP; + + return budget - work; +} + static void poll_napi(struct netpoll *np) { struct netpoll_info *npinfo = np->dev->npinfo; @@ -123,17 +146,13 @@ static void poll_napi(struct netpoll *np) int budget = 16; list_for_each_entry(napi, &np->dev->napi_list, dev_list) { - if (test_bit(NAPI_STATE_SCHED, &napi->state) && - napi->poll_owner != smp_processor_id() && + if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - npinfo->rx_flags |= NETPOLL_RX_DROP; - atomic_inc(&trapped); - - napi->poll(napi, budget); - - atomic_dec(&trapped); - npinfo->rx_flags &= ~NETPOLL_RX_DROP; + budget = poll_one_napi(npinfo, napi, budget); spin_unlock(&napi->poll_lock); + + if (!budget) + break; } } } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 573e17240197..64b50ff7a413 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2028,8 +2028,8 @@ void __init skb_init(void) * Fill the specified scatter-gather list with mappings/pointers into a * region of the buffer space attached to a socket buffer. */ -int -skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +static int +__skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { int start = skb_headlen(skb); int i, copy = start - offset; @@ -2078,7 +2078,8 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) if ((copy = end - offset) > 0) { if (copy > len) copy = len; - elt += skb_to_sgvec(list, sg+elt, offset - start, copy); + elt += __skb_to_sgvec(list, sg+elt, offset - start, + copy); if ((len -= copy) == 0) return elt; offset += copy; @@ -2090,6 +2091,15 @@ skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) return elt; } +int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) +{ + int nsg = __skb_to_sgvec(skb, sg, offset, len); + + __sg_mark_end(&sg[nsg - 1]); + + return nsg; +} + /** * skb_cow_data - Check that a socket buffer's data buffers are writable * @skb: The socket buffer to check. diff --git a/net/core/sock.c b/net/core/sock.c index bba9949681ff..12ad2067a988 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -857,46 +857,43 @@ static inline void sock_lock_init(struct sock *sk) af_family_keys + sk->sk_family); } -/** - * sk_alloc - All socket objects are allocated here - * @net: the applicable net namespace - * @family: protocol family - * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) - * @prot: struct proto associated with this new sock instance - * @zero_it: if we should zero the newly allocated sock - */ -struct sock *sk_alloc(struct net *net, int family, gfp_t priority, - struct proto *prot, int zero_it) +static void sock_copy(struct sock *nsk, const struct sock *osk) +{ +#ifdef CONFIG_SECURITY_NETWORK + void *sptr = nsk->sk_security; +#endif + + memcpy(nsk, osk, osk->sk_prot->obj_size); +#ifdef CONFIG_SECURITY_NETWORK + nsk->sk_security = sptr; + security_sk_clone(osk, nsk); +#endif +} + +static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, + int family) { - struct sock *sk = NULL; - struct kmem_cache *slab = prot->slab; + struct sock *sk; + struct kmem_cache *slab; + slab = prot->slab; if (slab != NULL) sk = kmem_cache_alloc(slab, priority); else sk = kmalloc(prot->obj_size, priority); - if (sk) { - if (zero_it) { - memset(sk, 0, prot->obj_size); - sk->sk_family = family; - /* - * See comment in struct sock definition to understand - * why we need sk_prot_creator -acme - */ - sk->sk_prot = sk->sk_prot_creator = prot; - sock_lock_init(sk); - sk->sk_net = get_net(net); - } - + if (sk != NULL) { if (security_sk_alloc(sk, family, priority)) goto out_free; if (!try_module_get(prot->owner)) - goto out_free; + goto out_free_sec; } + return sk; +out_free_sec: + security_sk_free(sk); out_free: if (slab != NULL) kmem_cache_free(slab, sk); @@ -905,10 +902,53 @@ out_free: return NULL; } +static void sk_prot_free(struct proto *prot, struct sock *sk) +{ + struct kmem_cache *slab; + struct module *owner; + + owner = prot->owner; + slab = prot->slab; + + security_sk_free(sk); + if (slab != NULL) + kmem_cache_free(slab, sk); + else + kfree(sk); + module_put(owner); +} + +/** + * sk_alloc - All socket objects are allocated here + * @net: the applicable net namespace + * @family: protocol family + * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc) + * @prot: struct proto associated with this new sock instance + * @zero_it: if we should zero the newly allocated sock + */ +struct sock *sk_alloc(struct net *net, int family, gfp_t priority, + struct proto *prot) +{ + struct sock *sk; + + sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family); + if (sk) { + sk->sk_family = family; + /* + * See comment in struct sock definition to understand + * why we need sk_prot_creator -acme + */ + sk->sk_prot = sk->sk_prot_creator = prot; + sock_lock_init(sk); + sk->sk_net = get_net(net); + } + + return sk; +} + void sk_free(struct sock *sk) { struct sk_filter *filter; - struct module *owner = sk->sk_prot_creator->owner; if (sk->sk_destruct) sk->sk_destruct(sk); @@ -925,25 +965,22 @@ void sk_free(struct sock *sk) printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", __FUNCTION__, atomic_read(&sk->sk_omem_alloc)); - security_sk_free(sk); put_net(sk->sk_net); - if (sk->sk_prot_creator->slab != NULL) - kmem_cache_free(sk->sk_prot_creator->slab, sk); - else - kfree(sk); - module_put(owner); + sk_prot_free(sk->sk_prot_creator, sk); } struct sock *sk_clone(const struct sock *sk, const gfp_t priority) { - struct sock *newsk = sk_alloc(sk->sk_net, sk->sk_family, priority, sk->sk_prot, 0); + struct sock *newsk; + newsk = sk_prot_alloc(sk->sk_prot, priority, sk->sk_family); if (newsk != NULL) { struct sk_filter *filter; sock_copy(newsk, sk); /* SANITY */ + get_net(newsk->sk_net); sk_node_init(&newsk->sk_node); sock_lock_init(newsk); bh_lock_sock(newsk); |