diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 53 | ||||
-rw-r--r-- | net/core/filter.c | 2 | ||||
-rw-r--r-- | net/core/flow.c | 132 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 24 | ||||
-rw-r--r-- | net/core/neighbour.c | 9 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 11 | ||||
-rw-r--r-- | net/core/netpoll.c | 585 | ||||
-rw-r--r-- | net/core/pktgen.c | 32 | ||||
-rw-r--r-- | net/core/request_sock.c | 1 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 107 | ||||
-rw-r--r-- | net/core/skbuff.c | 166 |
11 files changed, 321 insertions, 801 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 45fa2f11f84d..cf92139b229c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1245,7 +1245,7 @@ static int __dev_open(struct net_device *dev) * If we don't do this there is a chance ndo_poll_controller * or ndo_poll may be running while we open the device */ - netpoll_rx_disable(dev); + netpoll_poll_disable(dev); ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev); ret = notifier_to_errno(ret); @@ -1260,7 +1260,7 @@ static int __dev_open(struct net_device *dev) if (!ret && ops->ndo_open) ret = ops->ndo_open(dev); - netpoll_rx_enable(dev); + netpoll_poll_enable(dev); if (ret) clear_bit(__LINK_STATE_START, &dev->state); @@ -1313,6 +1313,9 @@ static int __dev_close_many(struct list_head *head) might_sleep(); list_for_each_entry(dev, head, close_list) { + /* Temporarily disable netpoll until the interface is down */ + netpoll_poll_disable(dev); + call_netdevice_notifiers(NETDEV_GOING_DOWN, dev); clear_bit(__LINK_STATE_START, &dev->state); @@ -1343,6 +1346,7 @@ static int __dev_close_many(struct list_head *head) dev->flags &= ~IFF_UP; net_dmaengine_put(); + netpoll_poll_enable(dev); } return 0; @@ -1353,14 +1357,10 @@ static int __dev_close(struct net_device *dev) int retval; LIST_HEAD(single); - /* Temporarily disable netpoll until the interface is down */ - netpoll_rx_disable(dev); - list_add(&dev->close_list, &single); retval = __dev_close_many(&single); list_del(&single); - netpoll_rx_enable(dev); return retval; } @@ -1398,14 +1398,9 @@ int dev_close(struct net_device *dev) if (dev->flags & IFF_UP) { LIST_HEAD(single); - /* Block netpoll rx while the interface is going down */ - netpoll_rx_disable(dev); - list_add(&dev->close_list, &single); dev_close_many(&single); list_del(&single); - - netpoll_rx_enable(dev); } return 0; } @@ -2885,6 +2880,7 @@ recursion_alert: rc = -ENETDOWN; rcu_read_unlock_bh(); + atomic_long_inc(&dev->tx_dropped); kfree_skb(skb); return rc; out: @@ -2957,7 +2953,7 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (!flow_table) goto out; - flow_id = skb->rxhash & flow_table->mask; + flow_id = skb_get_hash(skb) & flow_table->mask; rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id); if (rc < 0) @@ -2991,6 +2987,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_sock_flow_table *sock_flow_table; int cpu = -1; u16 tcpu; + u32 hash; if (skb_rx_queue_recorded(skb)) { u16 index = skb_get_rx_queue(skb); @@ -3019,7 +3016,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } skb_reset_network_header(skb); - if (!skb_get_hash(skb)) + hash = skb_get_hash(skb); + if (!hash) goto done; flow_table = rcu_dereference(rxqueue->rps_flow_table); @@ -3028,11 +3026,10 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, u16 next_cpu; struct rps_dev_flow *rflow; - rflow = &flow_table->flows[skb->rxhash & flow_table->mask]; + rflow = &flow_table->flows[hash & flow_table->mask]; tcpu = rflow->cpu; - next_cpu = sock_flow_table->ents[skb->rxhash & - sock_flow_table->mask]; + next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask]; /* * If the desired CPU (where last recvmsg was done) is @@ -3061,7 +3058,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, } if (map) { - tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32]; + tcpu = map->cpus[((u64) hash * map->len) >> 32]; if (cpu_online(tcpu)) { cpu = tcpu; @@ -3236,10 +3233,6 @@ static int netif_rx_internal(struct sk_buff *skb) { int ret; - /* if netpoll wants it, pretend we never saw it */ - if (netpoll_rx(skb)) - return NET_RX_DROP; - net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); @@ -3500,11 +3493,11 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); static bool skb_pfmemalloc_protocol(struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_ARP): - case __constant_htons(ETH_P_IP): - case __constant_htons(ETH_P_IPV6): - case __constant_htons(ETH_P_8021Q): - case __constant_htons(ETH_P_8021AD): + case htons(ETH_P_ARP): + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): return true; default: return false; @@ -3525,10 +3518,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc) trace_netif_receive_skb(skb); - /* if we've gotten here through NAPI, check netpoll */ - if (netpoll_receive_skb(skb)) - goto out; - orig_dev = skb->dev; skb_reset_network_header(skb); @@ -3655,7 +3644,6 @@ drop: unlock: rcu_read_unlock(); -out: return ret; } @@ -3880,7 +3868,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff int same_flow; enum gro_result ret; - if (!(skb->dev->features & NETIF_F_GRO) || netpoll_rx_on(skb)) + if (!(skb->dev->features & NETIF_F_GRO)) goto normal; if (skb_is_gso(skb) || skb_has_frag_list(skb)) @@ -6251,6 +6239,7 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, netdev_stats_to_stats64(storage, &dev->stats); } storage->rx_dropped += atomic_long_read(&dev->rx_dropped); + storage->tx_dropped += atomic_long_read(&dev->tx_dropped); return storage; } EXPORT_SYMBOL(dev_get_stats); diff --git a/net/core/filter.c b/net/core/filter.c index ad30d626a5bd..65b75966e206 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -336,7 +336,7 @@ load_b: A = skb->dev->type; continue; case BPF_S_ANC_RXHASH: - A = skb->rxhash; + A = skb->hash; continue; case BPF_S_ANC_CPU: A = raw_smp_processor_id(); diff --git a/net/core/flow.c b/net/core/flow.c index dfa602ceb8cd..31cfb365e0c6 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -24,6 +24,7 @@ #include <net/flow.h> #include <linux/atomic.h> #include <linux/security.h> +#include <net/net_namespace.h> struct flow_cache_entry { union { @@ -38,37 +39,14 @@ struct flow_cache_entry { struct flow_cache_object *object; }; -struct flow_cache_percpu { - struct hlist_head *hash_table; - int hash_count; - u32 hash_rnd; - int hash_rnd_recalc; - struct tasklet_struct flush_tasklet; -}; - struct flow_flush_info { struct flow_cache *cache; atomic_t cpuleft; struct completion completion; }; -struct flow_cache { - u32 hash_shift; - struct flow_cache_percpu __percpu *percpu; - struct notifier_block hotcpu_notifier; - int low_watermark; - int high_watermark; - struct timer_list rnd_timer; -}; - -atomic_t flow_cache_genid = ATOMIC_INIT(0); -EXPORT_SYMBOL(flow_cache_genid); -static struct flow_cache flow_cache_global; static struct kmem_cache *flow_cachep __read_mostly; -static DEFINE_SPINLOCK(flow_cache_gc_lock); -static LIST_HEAD(flow_cache_gc_list); - #define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) @@ -84,16 +62,18 @@ static void flow_cache_new_hashrnd(unsigned long arg) add_timer(&fc->rnd_timer); } -static int flow_entry_valid(struct flow_cache_entry *fle) +static int flow_entry_valid(struct flow_cache_entry *fle, + struct netns_xfrm *xfrm) { - if (atomic_read(&flow_cache_genid) != fle->genid) + if (atomic_read(&xfrm->flow_cache_genid) != fle->genid) return 0; if (fle->object && !fle->object->ops->check(fle->object)) return 0; return 1; } -static void flow_entry_kill(struct flow_cache_entry *fle) +static void flow_entry_kill(struct flow_cache_entry *fle, + struct netns_xfrm *xfrm) { if (fle->object) fle->object->ops->delete(fle->object); @@ -104,26 +84,28 @@ static void flow_cache_gc_task(struct work_struct *work) { struct list_head gc_list; struct flow_cache_entry *fce, *n; + struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, + flow_cache_gc_work); INIT_LIST_HEAD(&gc_list); - spin_lock_bh(&flow_cache_gc_lock); - list_splice_tail_init(&flow_cache_gc_list, &gc_list); - spin_unlock_bh(&flow_cache_gc_lock); + spin_lock_bh(&xfrm->flow_cache_gc_lock); + list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); + spin_unlock_bh(&xfrm->flow_cache_gc_lock); list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) - flow_entry_kill(fce); + flow_entry_kill(fce, xfrm); } -static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task); static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - int deleted, struct list_head *gc_list) + int deleted, struct list_head *gc_list, + struct netns_xfrm *xfrm) { if (deleted) { fcp->hash_count -= deleted; - spin_lock_bh(&flow_cache_gc_lock); - list_splice_tail(gc_list, &flow_cache_gc_list); - spin_unlock_bh(&flow_cache_gc_lock); - schedule_work(&flow_cache_gc_work); + spin_lock_bh(&xfrm->flow_cache_gc_lock); + list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); + spin_unlock_bh(&xfrm->flow_cache_gc_lock); + schedule_work(&xfrm->flow_cache_gc_work); } } @@ -135,6 +117,8 @@ static void __flow_cache_shrink(struct flow_cache *fc, struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; + struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, + flow_cache_global); for (i = 0; i < flow_cache_hash_size(fc); i++) { int saved = 0; @@ -142,7 +126,7 @@ static void __flow_cache_shrink(struct flow_cache *fc, hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (saved < shrink_to && - flow_entry_valid(fle)) { + flow_entry_valid(fle, xfrm)) { saved++; } else { deleted++; @@ -152,7 +136,7 @@ static void __flow_cache_shrink(struct flow_cache *fc, } } - flow_cache_queue_garbage(fcp, deleted, &gc_list); + flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); } static void flow_cache_shrink(struct flow_cache *fc, @@ -208,7 +192,7 @@ struct flow_cache_object * flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver, void *ctx) { - struct flow_cache *fc = &flow_cache_global; + struct flow_cache *fc = &net->xfrm.flow_cache_global; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle, *tfle; struct flow_cache_object *flo; @@ -258,7 +242,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; } - } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) { + } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) { flo = fle->object; if (!flo) goto ret_object; @@ -279,7 +263,7 @@ nocache: } flo = resolver(net, key, family, dir, flo, ctx); if (fle) { - fle->genid = atomic_read(&flow_cache_genid); + fle->genid = atomic_read(&net->xfrm.flow_cache_genid); if (!IS_ERR(flo)) fle->object = flo; else @@ -303,12 +287,14 @@ static void flow_cache_flush_tasklet(unsigned long data) struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; + struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, + flow_cache_global); fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { - if (flow_entry_valid(fle)) + if (flow_entry_valid(fle, xfrm)) continue; deleted++; @@ -317,7 +303,7 @@ static void flow_cache_flush_tasklet(unsigned long data) } } - flow_cache_queue_garbage(fcp, deleted, &gc_list); + flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); if (atomic_dec_and_test(&info->cpuleft)) complete(&info->completion); @@ -351,10 +337,9 @@ static void flow_cache_flush_per_cpu(void *data) tasklet_schedule(tasklet); } -void flow_cache_flush(void) +void flow_cache_flush(struct net *net) { struct flow_flush_info info; - static DEFINE_MUTEX(flow_flush_sem); cpumask_var_t mask; int i, self; @@ -365,8 +350,8 @@ void flow_cache_flush(void) /* Don't want cpus going down or up during this. */ get_online_cpus(); - mutex_lock(&flow_flush_sem); - info.cache = &flow_cache_global; + mutex_lock(&net->xfrm.flow_flush_sem); + info.cache = &net->xfrm.flow_cache_global; for_each_online_cpu(i) if (!flow_cache_percpu_empty(info.cache, i)) cpumask_set_cpu(i, mask); @@ -386,21 +371,23 @@ void flow_cache_flush(void) wait_for_completion(&info.completion); done: - mutex_unlock(&flow_flush_sem); + mutex_unlock(&net->xfrm.flow_flush_sem); put_online_cpus(); free_cpumask_var(mask); } static void flow_cache_flush_task(struct work_struct *work) { - flow_cache_flush(); -} + struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, + flow_cache_gc_work); + struct net *net = container_of(xfrm, struct net, xfrm); -static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task); + flow_cache_flush(net); +} -void flow_cache_flush_deferred(void) +void flow_cache_flush_deferred(struct net *net) { - schedule_work(&flow_cache_flush_work); + schedule_work(&net->xfrm.flow_cache_flush_work); } static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) @@ -425,7 +412,8 @@ static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) { - struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); + struct flow_cache *fc = container_of(nfb, struct flow_cache, + hotcpu_notifier); int res, cpu = (unsigned long) hcpu; struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); @@ -444,9 +432,20 @@ static int flow_cache_cpu(struct notifier_block *nfb, return NOTIFY_OK; } -static int __init flow_cache_init(struct flow_cache *fc) +int flow_cache_init(struct net *net) { int i; + struct flow_cache *fc = &net->xfrm.flow_cache_global; + + if (!flow_cachep) + flow_cachep = kmem_cache_create("flow_cache", + sizeof(struct flow_cache_entry), + 0, SLAB_PANIC, NULL); + spin_lock_init(&net->xfrm.flow_cache_gc_lock); + INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list); + INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); + INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); + mutex_init(&net->xfrm.flow_flush_sem); fc->hash_shift = 10; fc->low_watermark = 2 * flow_cache_hash_size(fc); @@ -484,14 +483,23 @@ err: return -ENOMEM; } +EXPORT_SYMBOL(flow_cache_init); -static int __init flow_cache_init_global(void) +void flow_cache_fini(struct net *net) { - flow_cachep = kmem_cache_create("flow_cache", - sizeof(struct flow_cache_entry), - 0, SLAB_PANIC, NULL); + int i; + struct flow_cache *fc = &net->xfrm.flow_cache_global; - return flow_cache_init(&flow_cache_global); -} + del_timer_sync(&fc->rnd_timer); + unregister_hotcpu_notifier(&fc->hotcpu_notifier); -module_init(flow_cache_init_global); + for_each_possible_cpu(i) { + struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); + kfree(fcp->hash_table); + fcp->hash_table = NULL; + } + + free_percpu(fc->percpu); + fc->percpu = NULL; +} +EXPORT_SYMBOL(flow_cache_fini); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index e29e810663d7..107ed12a5323 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -61,7 +61,7 @@ bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow) again: switch (proto) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { const struct iphdr *iph; struct iphdr _iph; ip: @@ -77,7 +77,7 @@ ip: iph_to_flow_copy_addrs(flow, iph); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { const struct ipv6hdr *iph; struct ipv6hdr _iph; ipv6: @@ -91,8 +91,8 @@ ipv6: nhoff += sizeof(struct ipv6hdr); break; } - case __constant_htons(ETH_P_8021AD): - case __constant_htons(ETH_P_8021Q): { + case htons(ETH_P_8021AD): + case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; struct vlan_hdr _vlan; @@ -104,7 +104,7 @@ ipv6: nhoff += sizeof(*vlan); goto again; } - case __constant_htons(ETH_P_PPP_SES): { + case htons(ETH_P_PPP_SES): { struct { struct pppoe_hdr hdr; __be16 proto; @@ -115,9 +115,9 @@ ipv6: proto = hdr->proto; nhoff += PPPOE_SES_HLEN; switch (proto) { - case __constant_htons(PPP_IP): + case htons(PPP_IP): goto ip; - case __constant_htons(PPP_IPV6): + case htons(PPP_IPV6): goto ipv6; default: return false; @@ -203,8 +203,8 @@ static __always_inline u32 __flow_hash_1word(u32 a) /* * __skb_get_hash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Sets rxhash in skb to non-zero hash value - * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * and src/dst port numbers. Sets hash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_hash in skb * if hash is a canonical 4-tuple hash over transport ports. */ void __skb_get_hash(struct sk_buff *skb) @@ -216,7 +216,7 @@ void __skb_get_hash(struct sk_buff *skb) return; if (keys.ports) - skb->l4_rxhash = 1; + skb->l4_hash = 1; /* get a consistent hash (same value on both flow directions) */ if (((__force u32)keys.dst < (__force u32)keys.src) || @@ -232,7 +232,7 @@ void __skb_get_hash(struct sk_buff *skb) if (!hash) hash = 1; - skb->rxhash = hash; + skb->hash = hash; } EXPORT_SYMBOL(__skb_get_hash); @@ -344,7 +344,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) hash = skb->sk->sk_hash; else hash = (__force u16) skb->protocol ^ - skb->rxhash; + skb->hash; hash = __flow_hash_1word(hash); queue_index = map->queues[ ((u64)hash * map->len) >> 32]; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e16129019c66..8f8a96ef9f3f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -836,10 +836,10 @@ out: static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; - return (n->nud_state & NUD_PROBE) ? - NEIGH_VAR(p, UCAST_PROBES) : - NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + - NEIGH_VAR(p, MCAST_PROBES); + int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES); + if (!(n->nud_state & NUD_PROBE)) + max_probes += NEIGH_VAR(p, MCAST_PROBES); + return max_probes; } static void neigh_invalidate(struct neighbour *neigh) @@ -945,6 +945,7 @@ static void neigh_timer_handler(unsigned long arg) neigh->nud_state = NUD_FAILED; notify = 1; neigh_invalidate(neigh); + goto out; } if (neigh->nud_state & NUD_IN_TIMER) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 93886246a0b4..daed9a64c6f6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -104,6 +104,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, } NETDEVICE_SHOW_RO(dev_id, fmt_hex); +NETDEVICE_SHOW_RO(dev_port, fmt_dec); NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec); NETDEVICE_SHOW_RO(addr_len, fmt_dec); NETDEVICE_SHOW_RO(iflink, fmt_dec); @@ -373,6 +374,7 @@ static struct attribute *net_class_attrs[] = { &dev_attr_netdev_group.attr, &dev_attr_type.attr, &dev_attr_dev_id.attr, + &dev_attr_dev_port.attr, &dev_attr_iflink.attr, &dev_attr_ifindex.attr, &dev_attr_addr_assign_type.attr, @@ -996,15 +998,12 @@ static struct attribute_group dql_group = { #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS -static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue) +static unsigned int get_netdev_queue_index(struct netdev_queue *queue) { struct net_device *dev = queue->dev; - int i; - - for (i = 0; i < dev->num_tx_queues; i++) - if (queue == &dev->_tx[i]) - break; + unsigned int i; + i = queue - dev->_tx; BUG_ON(i >= dev->num_tx_queues); return i; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index df9e6b1a9759..ed7740f7a94d 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -46,13 +46,9 @@ static struct sk_buff_head skb_pool; -static atomic_t trapped; - DEFINE_STATIC_SRCU(netpoll_srcu); #define USEC_PER_POLL 50 -#define NETPOLL_RX_ENABLED 1 -#define NETPOLL_RX_DROP 2 #define MAX_SKB_SIZE \ (sizeof(struct ethhdr) + \ @@ -61,7 +57,6 @@ DEFINE_STATIC_SRCU(netpoll_srcu); MAX_UDP_CHUNK) static void zap_completion_queue(void); -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo); static void netpoll_async_cleanup(struct work_struct *work); static unsigned int carrier_timeout = 4; @@ -74,6 +69,37 @@ module_param(carrier_timeout, uint, 0644); #define np_notice(np, fmt, ...) \ pr_notice("%s: " fmt, np->name, ##__VA_ARGS__) +static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, + struct netdev_queue *txq) +{ + const struct net_device_ops *ops = dev->netdev_ops; + int status = NETDEV_TX_OK; + netdev_features_t features; + + features = netif_skb_features(skb); + + if (vlan_tx_tag_present(skb) && + !vlan_hw_offload_capable(features, skb->vlan_proto)) { + skb = __vlan_put_tag(skb, skb->vlan_proto, + vlan_tx_tag_get(skb)); + if (unlikely(!skb)) { + /* This is actually a packet drop, but we + * don't want the code that calls this + * function to try and operate on a NULL skb. + */ + goto out; + } + skb->vlan_tci = 0; + } + + status = ops->ndo_start_xmit(skb, dev); + if (status == NETDEV_TX_OK) + txq_trans_update(txq); + +out: + return status; +} + static void queue_process(struct work_struct *work) { struct netpoll_info *npinfo = @@ -83,51 +109,31 @@ static void queue_process(struct work_struct *work) while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; - const struct net_device_ops *ops = dev->netdev_ops; struct netdev_queue *txq; if (!netif_device_present(dev) || !netif_running(dev)) { - __kfree_skb(skb); + kfree_skb(skb); continue; } txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); local_irq_save(flags); - __netif_tx_lock(txq, smp_processor_id()); + HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || - ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) { + netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { skb_queue_head(&npinfo->txq, skb); - __netif_tx_unlock(txq); + HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } - __netif_tx_unlock(txq); + HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); } } -static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, - unsigned short ulen, __be32 saddr, __be32 daddr) -{ - __wsum psum; - - if (uh->check == 0 || skb_csum_unnecessary(skb)) - return 0; - - psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_fold(csum_add(psum, skb->csum))) - return 0; - - skb->csum = psum; - - return __skb_checksum_complete(skb); -} - /* * Check whether delayed processing was scheduled for our NIC. If so, * we attempt to grab the poll lock and use ->poll() to pump the card. @@ -138,14 +144,8 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh, * trylock here and interrupts are already disabled in the softirq * case. Further, we test the poll_owner to avoid recursion on UP * systems where the lock doesn't exist. - * - * In cases where there is bi-directional communications, reading only - * one message at a time can lead to packets being dropped by the - * network adapter, forcing superfluous retries and possibly timeouts. - * Thus, we set our budget to greater than 1. */ -static int poll_one_napi(struct netpoll_info *npinfo, - struct napi_struct *napi, int budget) +static int poll_one_napi(struct napi_struct *napi, int budget) { int work; @@ -156,52 +156,35 @@ static int poll_one_napi(struct netpoll_info *npinfo, if (!test_bit(NAPI_STATE_SCHED, &napi->state)) return budget; - npinfo->rx_flags |= NETPOLL_RX_DROP; - atomic_inc(&trapped); set_bit(NAPI_STATE_NPSVC, &napi->state); work = napi->poll(napi, budget); + WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll); trace_napi_poll(napi); clear_bit(NAPI_STATE_NPSVC, &napi->state); - atomic_dec(&trapped); - npinfo->rx_flags &= ~NETPOLL_RX_DROP; return budget - work; } -static void poll_napi(struct net_device *dev) +static void poll_napi(struct net_device *dev, int budget) { struct napi_struct *napi; - int budget = 16; list_for_each_entry(napi, &dev->napi_list, dev_list) { if (napi->poll_owner != smp_processor_id() && spin_trylock(&napi->poll_lock)) { - budget = poll_one_napi(rcu_dereference_bh(dev->npinfo), - napi, budget); + budget = poll_one_napi(napi, budget); spin_unlock(&napi->poll_lock); - - if (!budget) - break; } } } -static void service_neigh_queue(struct netpoll_info *npi) -{ - if (npi) { - struct sk_buff *skb; - - while ((skb = skb_dequeue(&npi->neigh_tx))) - netpoll_neigh_reply(skb, npi); - } -} - static void netpoll_poll_dev(struct net_device *dev) { const struct net_device_ops *ops; struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); + int budget = 0; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity @@ -224,31 +207,14 @@ static void netpoll_poll_dev(struct net_device *dev) /* Process pending work on NIC */ ops->ndo_poll_controller(dev); - poll_napi(dev); + poll_napi(dev, budget); up(&ni->dev_lock); - if (dev->flags & IFF_SLAVE) { - if (ni) { - struct net_device *bond_dev; - struct sk_buff *skb; - struct netpoll_info *bond_ni; - - bond_dev = netdev_master_upper_dev_get_rcu(dev); - bond_ni = rcu_dereference_bh(bond_dev->npinfo); - while ((skb = skb_dequeue(&ni->neigh_tx))) { - skb->dev = bond_dev; - skb_queue_tail(&bond_ni->neigh_tx, skb); - } - } - } - - service_neigh_queue(ni); - zap_completion_queue(); } -void netpoll_rx_disable(struct net_device *dev) +void netpoll_poll_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; @@ -259,9 +225,9 @@ void netpoll_rx_disable(struct net_device *dev) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); } -EXPORT_SYMBOL(netpoll_rx_disable); +EXPORT_SYMBOL(netpoll_poll_disable); -void netpoll_rx_enable(struct net_device *dev) +void netpoll_poll_enable(struct net_device *dev) { struct netpoll_info *ni; rcu_read_lock(); @@ -270,7 +236,7 @@ void netpoll_rx_enable(struct net_device *dev) up(&ni->dev_lock); rcu_read_unlock(); } -EXPORT_SYMBOL(netpoll_rx_enable); +EXPORT_SYMBOL(netpoll_poll_enable); static void refill_skbs(void) { @@ -359,7 +325,6 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, { int status = NETDEV_TX_BUSY; unsigned long tries; - const struct net_device_ops *ops = dev->netdev_ops; /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; @@ -367,7 +332,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, npinfo = rcu_dereference_bh(np->dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { - __kfree_skb(skb); + dev_kfree_skb_irq(skb); return; } @@ -380,29 +345,11 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { - if (__netif_tx_trylock(txq)) { - if (!netif_xmit_stopped(txq)) { - if (vlan_tx_tag_present(skb) && - !vlan_hw_offload_capable(netif_skb_features(skb), - skb->vlan_proto)) { - skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); - if (unlikely(!skb)) { - /* This is actually a packet drop, but we - * don't want the code at the end of this - * function to try and re-queue a NULL skb. - */ - status = NETDEV_TX_OK; - goto unlock_txq; - } - skb->vlan_tci = 0; - } - - status = ops->ndo_start_xmit(skb, dev); - if (status == NETDEV_TX_OK) - txq_trans_update(txq); - } - unlock_txq: - __netif_tx_unlock(txq); + if (HARD_TX_TRYLOCK(dev, txq)) { + if (!netif_xmit_stopped(txq)) + status = netpoll_start_xmit(skb, dev, txq); + + HARD_TX_UNLOCK(dev, txq); if (status == NETDEV_TX_OK) break; @@ -417,7 +364,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, WARN_ONCE(!irqs_disabled(), "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n", - dev->name, ops->ndo_start_xmit); + dev->name, dev->netdev_ops->ndo_start_xmit); } @@ -529,384 +476,6 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) } EXPORT_SYMBOL(netpoll_send_udp); -static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo) -{ - int size, type = ARPOP_REPLY; - __be32 sip, tip; - unsigned char *sha; - struct sk_buff *send_skb; - struct netpoll *np, *tmp; - unsigned long flags; - int hlen, tlen; - int hits = 0, proto; - - if (list_empty(&npinfo->rx_np)) - return; - - /* Before checking the packet, we do some early - inspection whether this is interesting at all */ - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->dev == skb->dev) - hits++; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - - /* No netpoll struct is using this dev */ - if (!hits) - return; - - proto = ntohs(eth_hdr(skb)->h_proto); - if (proto == ETH_P_ARP) { - struct arphdr *arp; - unsigned char *arp_ptr; - /* No arp on this interface */ - if (skb->dev->flags & IFF_NOARP) - return; - - if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) - return; - - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - arp = arp_hdr(skb); - - if ((arp->ar_hrd != htons(ARPHRD_ETHER) && - arp->ar_hrd != htons(ARPHRD_IEEE802)) || - arp->ar_pro != htons(ETH_P_IP) || - arp->ar_op != htons(ARPOP_REQUEST)) - return; - - arp_ptr = (unsigned char *)(arp+1); - /* save the location of the src hw addr */ - sha = arp_ptr; - arp_ptr += skb->dev->addr_len; - memcpy(&sip, arp_ptr, 4); - arp_ptr += 4; - /* If we actually cared about dst hw addr, - it would get copied here */ - arp_ptr += skb->dev->addr_len; - memcpy(&tip, arp_ptr, 4); - - /* Should we ignore arp? */ - if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) - return; - - size = arp_hdr_len(skb->dev); - - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (tip != np->local_ip.ip) - continue; - - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; - - skb_reset_network_header(send_skb); - arp = (struct arphdr *) skb_put(send_skb, size); - send_skb->dev = skb->dev; - send_skb->protocol = htons(ETH_P_ARP); - - /* Fill the device header for the ARP frame */ - if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP, - sha, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; - } - - /* - * Fill out the arp protocol part. - * - * we only support ethernet device type, - * which (according to RFC 1390) should - * always equal 1 (Ethernet). - */ - - arp->ar_hrd = htons(np->dev->type); - arp->ar_pro = htons(ETH_P_IP); - arp->ar_hln = np->dev->addr_len; - arp->ar_pln = 4; - arp->ar_op = htons(type); - - arp_ptr = (unsigned char *)(arp + 1); - memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &tip, 4); - arp_ptr += 4; - memcpy(arp_ptr, sha, np->dev->addr_len); - arp_ptr += np->dev->addr_len; - memcpy(arp_ptr, &sip, 4); - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_skb_hooks for the same - * address we're fine by sending a single reply - */ - break; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } else if( proto == ETH_P_IPV6) { -#if IS_ENABLED(CONFIG_IPV6) - struct nd_msg *msg; - u8 *lladdr = NULL; - struct ipv6hdr *hdr; - struct icmp6hdr *icmp6h; - const struct in6_addr *saddr; - const struct in6_addr *daddr; - struct inet6_dev *in6_dev = NULL; - struct in6_addr *target; - - in6_dev = in6_dev_get(skb->dev); - if (!in6_dev || !in6_dev->cnf.accept_ra) - return; - - if (!pskb_may_pull(skb, skb->len)) - return; - - msg = (struct nd_msg *)skb_transport_header(skb); - - __skb_push(skb, skb->data - skb_transport_header(skb)); - - if (ipv6_hdr(skb)->hop_limit != 255) - return; - if (msg->icmph.icmp6_code != 0) - return; - if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - return; - - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - - size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr); - - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (!ipv6_addr_equal(daddr, &np->local_ip.in6)) - continue; - - hlen = LL_RESERVED_SPACE(np->dev); - tlen = np->dev->needed_tailroom; - send_skb = find_skb(np, size + hlen + tlen, hlen); - if (!send_skb) - continue; - - send_skb->protocol = htons(ETH_P_IPV6); - send_skb->dev = skb->dev; - - skb_reset_network_header(send_skb); - hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr)); - *(__be32*)hdr = htonl(0x60000000); - hdr->payload_len = htons(size); - hdr->nexthdr = IPPROTO_ICMPV6; - hdr->hop_limit = 255; - hdr->saddr = *saddr; - hdr->daddr = *daddr; - - icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr)); - icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; - icmp6h->icmp6_router = 0; - icmp6h->icmp6_solicited = 1; - - target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr)); - *target = msg->target; - icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size, - IPPROTO_ICMPV6, - csum_partial(icmp6h, - size, 0)); - - if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6, - lladdr, np->dev->dev_addr, - send_skb->len) < 0) { - kfree_skb(send_skb); - continue; - } - - netpoll_send_skb(np, send_skb); - - /* If there are several rx_skb_hooks for the same - * address, we're fine by sending a single reply - */ - break; - } - spin_unlock_irqrestore(&npinfo->rx_lock, flags); -#endif - } -} - -static bool pkt_is_ns(struct sk_buff *skb) -{ - struct nd_msg *msg; - struct ipv6hdr *hdr; - - if (skb->protocol != htons(ETH_P_IPV6)) - return false; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg))) - return false; - - msg = (struct nd_msg *)skb_transport_header(skb); - __skb_push(skb, skb->data - skb_transport_header(skb)); - hdr = ipv6_hdr(skb); - - if (hdr->nexthdr != IPPROTO_ICMPV6) - return false; - if (hdr->hop_limit != 255) - return false; - if (msg->icmph.icmp6_code != 0) - return false; - if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) - return false; - - return true; -} - -int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo) -{ - int proto, len, ulen, data_len; - int hits = 0, offset; - const struct iphdr *iph; - struct udphdr *uh; - struct netpoll *np, *tmp; - uint16_t source; - - if (list_empty(&npinfo->rx_np)) - goto out; - - if (skb->dev->type != ARPHRD_ETHER) - goto out; - - /* check if netpoll clients need ARP */ - if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) { - skb_queue_tail(&npinfo->neigh_tx, skb); - return 1; - } else if (pkt_is_ns(skb) && atomic_read(&trapped)) { - skb_queue_tail(&npinfo->neigh_tx, skb); - return 1; - } - - if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) { - skb = vlan_untag(skb); - if (unlikely(!skb)) - goto out; - } - - proto = ntohs(eth_hdr(skb)->h_proto); - if (proto != ETH_P_IP && proto != ETH_P_IPV6) - goto out; - if (skb->pkt_type == PACKET_OTHERHOST) - goto out; - if (skb_shared(skb)) - goto out; - - if (proto == ETH_P_IP) { - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - goto out; - iph = (struct iphdr *)skb->data; - if (iph->ihl < 5 || iph->version != 4) - goto out; - if (!pskb_may_pull(skb, iph->ihl*4)) - goto out; - iph = (struct iphdr *)skb->data; - if (ip_fast_csum((u8 *)iph, iph->ihl) != 0) - goto out; - - len = ntohs(iph->tot_len); - if (skb->len < len || len < iph->ihl*4) - goto out; - - /* - * Our transport medium may have padded the buffer out. - * Now We trim to the true length of the frame. - */ - if (pskb_trim_rcsum(skb, len)) - goto out; - - iph = (struct iphdr *)skb->data; - if (iph->protocol != IPPROTO_UDP) - goto out; - - len -= iph->ihl*4; - uh = (struct udphdr *)(((char *)iph) + iph->ihl*4); - offset = (unsigned char *)(uh + 1) - skb->data; - ulen = ntohs(uh->len); - data_len = skb->len - offset; - source = ntohs(uh->source); - - if (ulen != len) - goto out; - if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) - goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (np->local_ip.ip && np->local_ip.ip != iph->daddr) - continue; - if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - np->rx_skb_hook(np, source, skb, offset, data_len); - hits++; - } - } else { -#if IS_ENABLED(CONFIG_IPV6) - const struct ipv6hdr *ip6h; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto out; - ip6h = (struct ipv6hdr *)skb->data; - if (ip6h->version != 6) - goto out; - len = ntohs(ip6h->payload_len); - if (!len) - goto out; - if (len + sizeof(struct ipv6hdr) > skb->len) - goto out; - if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr))) - goto out; - ip6h = ipv6_hdr(skb); - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - uh = udp_hdr(skb); - offset = (unsigned char *)(uh + 1) - skb->data; - ulen = ntohs(uh->len); - data_len = skb->len - offset; - source = ntohs(uh->source); - if (ulen != skb->len) - goto out; - if (udp6_csum_init(skb, uh, IPPROTO_UDP)) - goto out; - list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) { - if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr)) - continue; - if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr)) - continue; - if (np->local_port && np->local_port != ntohs(uh->dest)) - continue; - - np->rx_skb_hook(np, source, skb, offset, data_len); - hits++; - } -#endif - } - - if (!hits) - goto out; - - kfree_skb(skb); - return 1; - -out: - if (atomic_read(&trapped)) { - kfree_skb(skb); - return 1; - } - - return 0; -} - void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); @@ -1026,11 +595,10 @@ int netpoll_parse_options(struct netpoll *np, char *opt) } EXPORT_SYMBOL(netpoll_parse_options); -int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) +int __netpoll_setup(struct netpoll *np, struct net_device *ndev) { struct netpoll_info *npinfo; const struct net_device_ops *ops; - unsigned long flags; int err; np->dev = ndev; @@ -1046,18 +614,13 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) } if (!ndev->npinfo) { - npinfo = kmalloc(sizeof(*npinfo), gfp); + npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; goto out; } - npinfo->rx_flags = 0; - INIT_LIST_HEAD(&npinfo->rx_np); - - spin_lock_init(&npinfo->rx_lock); sema_init(&npinfo->dev_lock, 1); - skb_queue_head_init(&npinfo->neigh_tx); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); @@ -1065,7 +628,7 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { - err = ops->ndo_netpoll_setup(ndev, npinfo, gfp); + err = ops->ndo_netpoll_setup(ndev, npinfo); if (err) goto free_npinfo; } @@ -1076,13 +639,6 @@ int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp) npinfo->netpoll = np; - if (np->rx_skb_hook) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - npinfo->rx_flags |= NETPOLL_RX_ENABLED; - list_add_tail(&np->rx, &npinfo->rx_np); - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } - /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); @@ -1204,7 +760,7 @@ int netpoll_setup(struct netpoll *np) /* fill up the skb queue */ refill_skbs(); - err = __netpoll_setup(np, ndev, GFP_KERNEL); + err = __netpoll_setup(np, ndev); if (err) goto put; @@ -1231,7 +787,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); - skb_queue_purge(&npinfo->neigh_tx); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ @@ -1247,7 +802,6 @@ static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; - unsigned long flags; /* rtnl_dereference would be preferable here but * rcu_cleanup_netpoll path can put us in here safely without @@ -1257,14 +811,6 @@ void __netpoll_cleanup(struct netpoll *np) if (!npinfo) return; - if (!list_empty(&npinfo->rx_np)) { - spin_lock_irqsave(&npinfo->rx_lock, flags); - list_del(&np->rx); - if (list_empty(&npinfo->rx_np)) - npinfo->rx_flags &= ~NETPOLL_RX_ENABLED; - spin_unlock_irqrestore(&npinfo->rx_lock, flags); - } - synchronize_srcu(&netpoll_srcu); if (atomic_dec_and_test(&npinfo->refcnt)) { @@ -1274,7 +820,7 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - rcu_assign_pointer(np->dev->npinfo, NULL); + RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); } } @@ -1308,18 +854,3 @@ out: rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup); - -int netpoll_trap(void) -{ - return atomic_read(&trapped); -} -EXPORT_SYMBOL(netpoll_trap); - -void netpoll_set_trap(int trap) -{ - if (trap) - atomic_inc(&trapped); - else - atomic_dec(&trapped); -} -EXPORT_SYMBOL(netpoll_set_trap); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index fdac61cac1bd..d0dac57291af 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -476,23 +476,22 @@ static int pgctrl_show(struct seq_file *seq, void *v) static ssize_t pgctrl_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - int err = 0; char data[128]; struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id); - if (!capable(CAP_NET_ADMIN)) { - err = -EPERM; - goto out; - } + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (count == 0) + return -EINVAL; if (count > sizeof(data)) count = sizeof(data); - if (copy_from_user(data, buf, count)) { - err = -EFAULT; - goto out; - } - data[count - 1] = 0; /* Make string */ + if (copy_from_user(data, buf, count)) + return -EFAULT; + + data[count - 1] = 0; /* Strip trailing '\n' and terminate string */ if (!strcmp(data, "stop")) pktgen_stop_all_threads_ifs(pn); @@ -506,10 +505,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf, else pr_warning("Unknown command: %s\n", data); - err = count; - -out: - return err; + return count; } static int pgctrl_open(struct inode *inode, struct file *file) @@ -1251,7 +1247,13 @@ static ssize_t pktgen_if_write(struct file *file, "Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s", f, "IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, " - "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n"); + "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, " + "MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, " + "QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, " +#ifdef CONFIG_XFRM + "IPSEC, " +#endif + "NODE_ALLOC\n"); return count; } sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 4425148d2b51..467f326126e0 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -221,5 +221,4 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, out: spin_unlock_bh(&fastopenq->lock); sock_put(lsk); - return; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 120eecc0f5a4..e7c6006bc3ea 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1121,56 +1121,7 @@ nla_put_failure: return -EMSGSIZE; } -static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct net *net = sock_net(skb->sk); - int h, s_h; - int idx = 0, s_idx; - struct net_device *dev; - struct hlist_head *head; - struct nlattr *tb[IFLA_MAX+1]; - u32 ext_filter_mask = 0; - - s_h = cb->args[0]; - s_idx = cb->args[1]; - - rcu_read_lock(); - cb->seq = net->dev_base_seq; - - if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, - ifla_policy) >= 0) { - - if (tb[IFLA_EXT_MASK]) - ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); - } - - for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { - idx = 0; - head = &net->dev_index_head[h]; - hlist_for_each_entry_rcu(dev, head, index_hlist) { - if (idx < s_idx) - goto cont; - if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, - NLM_F_MULTI, - ext_filter_mask) <= 0) - goto out; - - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); -cont: - idx++; - } - } -out: - rcu_read_unlock(); - cb->args[1] = idx; - cb->args[0] = h; - - return skb->len; -} - -const struct nla_policy ifla_policy[IFLA_MAX+1] = { +static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, [IFLA_ADDRESS] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, [IFLA_BROADCAST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, @@ -1197,7 +1148,6 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_NUM_RX_QUEUES] = { .type = NLA_U32 }, [IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN }, }; -EXPORT_SYMBOL(ifla_policy); static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { [IFLA_INFO_KIND] = { .type = NLA_STRING }, @@ -1235,6 +1185,61 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, }; +static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + int h, s_h; + int idx = 0, s_idx; + struct net_device *dev; + struct hlist_head *head; + struct nlattr *tb[IFLA_MAX+1]; + u32 ext_filter_mask = 0; + + s_h = cb->args[0]; + s_idx = cb->args[1]; + + rcu_read_lock(); + cb->seq = net->dev_base_seq; + + if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, + ifla_policy) >= 0) { + + if (tb[IFLA_EXT_MASK]) + ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + } + + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry_rcu(dev, head, index_hlist) { + if (idx < s_idx) + goto cont; + if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI, + ext_filter_mask) <= 0) + goto out; + + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); +cont: + idx++; + } + } +out: + rcu_read_unlock(); + cb->args[1] = idx; + cb->args[0] = h; + + return skb->len; +} + +int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len) +{ + return nla_parse(tb, IFLA_MAX, head, len, ifla_policy); +} +EXPORT_SYMBOL(rtnl_nla_parse_ifla); + struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[]) { struct net *net; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 90b96a11b974..30c7d35dd862 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3300,6 +3300,32 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) return elt; } +/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given + * sglist without mark the sg which contain last skb data as the end. + * So the caller can mannipulate sg list as will when padding new data after + * the first call without calling sg_unmark_end to expend sg list. + * + * Scenario to use skb_to_sgvec_nomark: + * 1. sg_init_table + * 2. skb_to_sgvec_nomark(payload1) + * 3. skb_to_sgvec_nomark(payload2) + * + * This is equivalent to: + * 1. sg_init_table + * 2. skb_to_sgvec(payload1) + * 3. sg_unmark_end + * 4. skb_to_sgvec(payload2) + * + * When mapping mutilple payload conditionally, skb_to_sgvec_nomark + * is more preferable. + */ +int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, + int offset, int len) +{ + return __skb_to_sgvec(skb, sg, offset, len); +} +EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark); + int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) { int nsg = __skb_to_sgvec(skb, sg, offset, len); @@ -3562,15 +3588,47 @@ static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len, return 0; } +#define MAX_TCP_HDR_LEN (15 * 4) + +static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb, + typeof(IPPROTO_IP) proto, + unsigned int off) +{ + switch (proto) { + int err; + + case IPPROTO_TCP: + err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr), + off + MAX_TCP_HDR_LEN); + if (!err && !skb_partial_csum_set(skb, off, + offsetof(struct tcphdr, + check))) + err = -EPROTO; + return err ? ERR_PTR(err) : &tcp_hdr(skb)->check; + + case IPPROTO_UDP: + err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr), + off + sizeof(struct udphdr)); + if (!err && !skb_partial_csum_set(skb, off, + offsetof(struct udphdr, + check))) + err = -EPROTO; + return err ? ERR_PTR(err) : &udp_hdr(skb)->check; + } + + return ERR_PTR(-EPROTO); +} + /* This value should be large enough to cover a tagged ethernet header plus * maximally sized IP and TCP or UDP headers. */ #define MAX_IP_HDR_LEN 128 -static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate) +static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate) { unsigned int off; bool fragment; + __sum16 *csum; int err; fragment = false; @@ -3591,51 +3649,15 @@ static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate) if (fragment) goto out; - switch (ip_hdr(skb)->protocol) { - case IPPROTO_TCP: - err = skb_maybe_pull_tail(skb, - off + sizeof(struct tcphdr), - MAX_IP_HDR_LEN); - if (err < 0) - goto out; - - if (!skb_partial_csum_set(skb, off, - offsetof(struct tcphdr, check))) { - err = -EPROTO; - goto out; - } - - if (recalculate) - tcp_hdr(skb)->check = - ~csum_tcpudp_magic(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, - skb->len - off, - IPPROTO_TCP, 0); - break; - case IPPROTO_UDP: - err = skb_maybe_pull_tail(skb, - off + sizeof(struct udphdr), - MAX_IP_HDR_LEN); - if (err < 0) - goto out; - - if (!skb_partial_csum_set(skb, off, - offsetof(struct udphdr, check))) { - err = -EPROTO; - goto out; - } - - if (recalculate) - udp_hdr(skb)->check = - ~csum_tcpudp_magic(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, - skb->len - off, - IPPROTO_UDP, 0); - break; - default: - goto out; - } + csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off); + if (IS_ERR(csum)) + return PTR_ERR(csum); + if (recalculate) + *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - off, + ip_hdr(skb)->protocol, 0); err = 0; out: @@ -3658,6 +3680,7 @@ static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) unsigned int len; bool fragment; bool done; + __sum16 *csum; fragment = false; done = false; @@ -3735,51 +3758,14 @@ static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate) if (!done || fragment) goto out; - switch (nexthdr) { - case IPPROTO_TCP: - err = skb_maybe_pull_tail(skb, - off + sizeof(struct tcphdr), - MAX_IPV6_HDR_LEN); - if (err < 0) - goto out; - - if (!skb_partial_csum_set(skb, off, - offsetof(struct tcphdr, check))) { - err = -EPROTO; - goto out; - } - - if (recalculate) - tcp_hdr(skb)->check = - ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len - off, - IPPROTO_TCP, 0); - break; - case IPPROTO_UDP: - err = skb_maybe_pull_tail(skb, - off + sizeof(struct udphdr), - MAX_IPV6_HDR_LEN); - if (err < 0) - goto out; - - if (!skb_partial_csum_set(skb, off, - offsetof(struct udphdr, check))) { - err = -EPROTO; - goto out; - } - - if (recalculate) - udp_hdr(skb)->check = - ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len - off, - IPPROTO_UDP, 0); - break; - default: - goto out; - } + csum = skb_checksum_setup_ip(skb, nexthdr, off); + if (IS_ERR(csum)) + return PTR_ERR(csum); + if (recalculate) + *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - off, nexthdr, 0); err = 0; out: @@ -3797,7 +3783,7 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate) switch (skb->protocol) { case htons(ETH_P_IP): - err = skb_checksum_setup_ip(skb, recalculate); + err = skb_checksum_setup_ipv4(skb, recalculate); break; case htons(ETH_P_IPV6): |