aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c743
1 files changed, 35 insertions, 708 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 2a352e668d10..84a0d9542fe9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -153,16 +153,10 @@
#include "net-sysfs.h"
-#define MAX_GRO_SKBS 8
-
-/* This should be increased if a protocol with a bigger head is added. */
-#define GRO_MAX_HEAD (MAX_HEADER + 128)
static DEFINE_SPINLOCK(ptype_lock);
-static DEFINE_SPINLOCK(offload_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
-static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
@@ -371,12 +365,12 @@ static void list_netdevice(struct net_device *dev)
ASSERT_RTNL();
- write_lock_bh(&dev_base_lock);
+ write_lock(&dev_base_lock);
list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
netdev_name_node_add(net, dev->name_node);
hlist_add_head_rcu(&dev->index_hlist,
dev_index_hash(net, dev->ifindex));
- write_unlock_bh(&dev_base_lock);
+ write_unlock(&dev_base_lock);
dev_base_seq_inc(net);
}
@@ -389,11 +383,11 @@ static void unlist_netdevice(struct net_device *dev)
ASSERT_RTNL();
/* Unlink dev from the device chain */
- write_lock_bh(&dev_base_lock);
+ write_lock(&dev_base_lock);
list_del_rcu(&dev->dev_list);
netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
- write_unlock_bh(&dev_base_lock);
+ write_unlock(&dev_base_lock);
dev_base_seq_inc(dev_net(dev));
}
@@ -604,84 +598,6 @@ void dev_remove_pack(struct packet_type *pt)
EXPORT_SYMBOL(dev_remove_pack);
-/**
- * dev_add_offload - register offload handlers
- * @po: protocol offload declaration
- *
- * Add protocol offload handlers to the networking stack. The passed
- * &proto_offload is linked into kernel lists and may not be freed until
- * it has been removed from the kernel lists.
- *
- * This call does not sleep therefore it can not
- * guarantee all CPU's that are in middle of receiving packets
- * will see the new offload handlers (until the next received packet).
- */
-void dev_add_offload(struct packet_offload *po)
-{
- struct packet_offload *elem;
-
- spin_lock(&offload_lock);
- list_for_each_entry(elem, &offload_base, list) {
- if (po->priority < elem->priority)
- break;
- }
- list_add_rcu(&po->list, elem->list.prev);
- spin_unlock(&offload_lock);
-}
-EXPORT_SYMBOL(dev_add_offload);
-
-/**
- * __dev_remove_offload - remove offload handler
- * @po: packet offload declaration
- *
- * Remove a protocol offload handler that was previously added to the
- * kernel offload handlers by dev_add_offload(). The passed &offload_type
- * is removed from the kernel lists and can be freed or reused once this
- * function returns.
- *
- * The packet type might still be in use by receivers
- * and must not be freed until after all the CPU's have gone
- * through a quiescent state.
- */
-static void __dev_remove_offload(struct packet_offload *po)
-{
- struct list_head *head = &offload_base;
- struct packet_offload *po1;
-
- spin_lock(&offload_lock);
-
- list_for_each_entry(po1, head, list) {
- if (po == po1) {
- list_del_rcu(&po->list);
- goto out;
- }
- }
-
- pr_warn("dev_remove_offload: %p not found\n", po);
-out:
- spin_unlock(&offload_lock);
-}
-
-/**
- * dev_remove_offload - remove packet offload handler
- * @po: packet offload declaration
- *
- * Remove a packet offload handler that was previously added to the kernel
- * offload handlers by dev_add_offload(). The passed &offload_type is
- * removed from the kernel lists and can be freed or reused once this
- * function returns.
- *
- * This call sleeps to guarantee that no CPU is looking at the packet
- * type after return.
- */
-void dev_remove_offload(struct packet_offload *po)
-{
- __dev_remove_offload(po);
-
- synchronize_net();
-}
-EXPORT_SYMBOL(dev_remove_offload);
-
/*******************************************************************************
*
* Device Interface Subroutines
@@ -1272,15 +1188,15 @@ rollback:
netdev_adjacent_rename_links(dev, oldname);
- write_lock_bh(&dev_base_lock);
+ write_lock(&dev_base_lock);
netdev_name_node_del(dev->name_node);
- write_unlock_bh(&dev_base_lock);
+ write_unlock(&dev_base_lock);
synchronize_rcu();
- write_lock_bh(&dev_base_lock);
+ write_lock(&dev_base_lock);
netdev_name_node_add(net, dev->name_node);
- write_unlock_bh(&dev_base_lock);
+ write_unlock(&dev_base_lock);
ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
ret = notifier_to_errno(ret);
@@ -1461,6 +1377,7 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
int ret;
ASSERT_RTNL();
+ dev_addr_check(dev);
if (!netif_device_present(dev)) {
/* may be detached because parent is runtime-suspended */
@@ -3315,40 +3232,6 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
return __vlan_get_protocol(skb, type, depth);
}
-/**
- * skb_mac_gso_segment - mac layer segmentation handler.
- * @skb: buffer to segment
- * @features: features for the output path (see dev->features)
- */
-struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
- netdev_features_t features)
-{
- struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
- struct packet_offload *ptype;
- int vlan_depth = skb->mac_len;
- __be16 type = skb_network_protocol(skb, &vlan_depth);
-
- if (unlikely(!type))
- return ERR_PTR(-EINVAL);
-
- __skb_pull(skb, vlan_depth);
-
- rcu_read_lock();
- list_for_each_entry_rcu(ptype, &offload_base, list) {
- if (ptype->type == type && ptype->callbacks.gso_segment) {
- segs = ptype->callbacks.gso_segment(skb, features);
- break;
- }
- }
- rcu_read_unlock();
-
- __skb_push(skb, skb->data - skb_mac_header(skb));
-
- return segs;
-}
-EXPORT_SYMBOL(skb_mac_gso_segment);
-
-
/* openvswitch calls this on rx path, so we need a different check.
*/
static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
@@ -3513,7 +3396,7 @@ static netdev_features_t gso_features_check(const struct sk_buff *skb,
{
u16 gso_segs = skb_shinfo(skb)->gso_segs;
- if (gso_segs > dev->gso_max_segs)
+ if (gso_segs > READ_ONCE(dev->gso_max_segs))
return features & ~NETIF_F_GSO_MASK;
if (!skb_shinfo(skb)->gso_type) {
@@ -3836,8 +3719,12 @@ no_lock_out:
* separate lock before trying to get qdisc main lock.
* This permits qdisc->running owner to get the lock more
* often and dequeue packets faster.
+ * On PREEMPT_RT it is possible to preempt the qdisc owner during xmit
+ * and then other tasks will only enqueue packets. The packets will be
+ * sent after the qdisc owner is scheduled again. To prevent this
+ * scenario the task always serialize on the lock.
*/
- contended = qdisc_is_running(q);
+ contended = qdisc_is_running(q) || IS_ENABLED(CONFIG_PREEMPT_RT);
if (unlikely(contended))
spin_lock(&q->busylock);
@@ -3941,8 +3828,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
return skb;
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
- qdisc_skb_cb(skb)->mru = 0;
- qdisc_skb_cb(skb)->post_ct = false;
+ tc_skb_cb(skb)->mru = 0;
+ tc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
@@ -4323,8 +4210,6 @@ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */
int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */
int dev_rx_weight __read_mostly = 64;
int dev_tx_weight __read_mostly = 64;
-/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
-int gro_normal_batch __read_mostly = 8;
/* Called with irq disabled */
static inline void ____napi_schedule(struct softnet_data *sd,
@@ -4827,7 +4712,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
case XDP_PASS:
break;
default:
- bpf_warn_invalid_xdp_action(act);
+ bpf_warn_invalid_xdp_action(skb->dev, xdp_prog, act);
fallthrough;
case XDP_ABORTED:
trace_xdp_exception(skb->dev, xdp_prog, act);
@@ -5014,7 +4899,8 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
trace_consume_skb(skb);
else
- trace_kfree_skb(skb, net_tx_action);
+ trace_kfree_skb(skb, net_tx_action,
+ SKB_DROP_REASON_NOT_SPECIFIED);
if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
__kfree_skb(skb);
@@ -5103,8 +4989,8 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
}
qdisc_skb_cb(skb)->pkt_len = skb->len;
- qdisc_skb_cb(skb)->mru = 0;
- qdisc_skb_cb(skb)->post_ct = false;
+ tc_skb_cb(skb)->mru = 0;
+ tc_skb_cb(skb)->post_ct = false;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
@@ -5667,7 +5553,7 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
return ret;
}
-static void netif_receive_skb_list_internal(struct list_head *head)
+void netif_receive_skb_list_internal(struct list_head *head)
{
struct sk_buff *skb, *next;
struct list_head sublist;
@@ -5845,550 +5731,6 @@ static void flush_all_backlogs(void)
cpus_read_unlock();
}
-/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */
-static void gro_normal_list(struct napi_struct *napi)
-{
- if (!napi->rx_count)
- return;
- netif_receive_skb_list_internal(&napi->rx_list);
- INIT_LIST_HEAD(&napi->rx_list);
- napi->rx_count = 0;
-}
-
-/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded,
- * pass the whole batch up to the stack.
- */
-static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb, int segs)
-{
- list_add_tail(&skb->list, &napi->rx_list);
- napi->rx_count += segs;
- if (napi->rx_count >= gro_normal_batch)
- gro_normal_list(napi);
-}
-
-static void napi_gro_complete(struct napi_struct *napi, struct sk_buff *skb)
-{
- struct packet_offload *ptype;
- __be16 type = skb->protocol;
- struct list_head *head = &offload_base;
- int err = -ENOENT;
-
- BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb));
-
- if (NAPI_GRO_CB(skb)->count == 1) {
- skb_shinfo(skb)->gso_size = 0;
- goto out;
- }
-
- rcu_read_lock();
- list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || !ptype->callbacks.gro_complete)
- continue;
-
- err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
- ipv6_gro_complete, inet_gro_complete,
- skb, 0);
- break;
- }
- rcu_read_unlock();
-
- if (err) {
- WARN_ON(&ptype->list == head);
- kfree_skb(skb);
- return;
- }
-
-out:
- gro_normal_one(napi, skb, NAPI_GRO_CB(skb)->count);
-}
-
-static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
- bool flush_old)
-{
- struct list_head *head = &napi->gro_hash[index].list;
- struct sk_buff *skb, *p;
-
- list_for_each_entry_safe_reverse(skb, p, head, list) {
- if (flush_old && NAPI_GRO_CB(skb)->age == jiffies)
- return;
- skb_list_del_init(skb);
- napi_gro_complete(napi, skb);
- napi->gro_hash[index].count--;
- }
-
- if (!napi->gro_hash[index].count)
- __clear_bit(index, &napi->gro_bitmask);
-}
-
-/* napi->gro_hash[].list contains packets ordered by age.
- * youngest packets at the head of it.
- * Complete skbs in reverse order to reduce latencies.
- */
-void napi_gro_flush(struct napi_struct *napi, bool flush_old)
-{
- unsigned long bitmask = napi->gro_bitmask;
- unsigned int i, base = ~0U;
-
- while ((i = ffs(bitmask)) != 0) {
- bitmask >>= i;
- base += i;
- __napi_gro_flush_chain(napi, base, flush_old);
- }
-}
-EXPORT_SYMBOL(napi_gro_flush);
-
-static void gro_list_prepare(const struct list_head *head,
- const struct sk_buff *skb)
-{
- unsigned int maclen = skb->dev->hard_header_len;
- u32 hash = skb_get_hash_raw(skb);
- struct sk_buff *p;
-
- list_for_each_entry(p, head, list) {
- unsigned long diffs;
-
- NAPI_GRO_CB(p)->flush = 0;
-
- if (hash != skb_get_hash_raw(p)) {
- NAPI_GRO_CB(p)->same_flow = 0;
- continue;
- }
-
- diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
- diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
- if (skb_vlan_tag_present(p))
- diffs |= skb_vlan_tag_get(p) ^ skb_vlan_tag_get(skb);
- diffs |= skb_metadata_differs(p, skb);
- if (maclen == ETH_HLEN)
- diffs |= compare_ether_header(skb_mac_header(p),
- skb_mac_header(skb));
- else if (!diffs)
- diffs = memcmp(skb_mac_header(p),
- skb_mac_header(skb),
- maclen);
-
- /* in most common scenarions 'slow_gro' is 0
- * otherwise we are already on some slower paths
- * either skip all the infrequent tests altogether or
- * avoid trying too hard to skip each of them individually
- */
- if (!diffs && unlikely(skb->slow_gro | p->slow_gro)) {
-#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- struct tc_skb_ext *skb_ext;
- struct tc_skb_ext *p_ext;
-#endif
-
- diffs |= p->sk != skb->sk;
- diffs |= skb_metadata_dst_cmp(p, skb);
- diffs |= skb_get_nfct(p) ^ skb_get_nfct(skb);
-
-#if IS_ENABLED(CONFIG_SKB_EXTENSIONS) && IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
- skb_ext = skb_ext_find(skb, TC_SKB_EXT);
- p_ext = skb_ext_find(p, TC_SKB_EXT);
-
- diffs |= (!!p_ext) ^ (!!skb_ext);
- if (!diffs && unlikely(skb_ext))
- diffs |= p_ext->chain ^ skb_ext->chain;
-#endif
- }
-
- NAPI_GRO_CB(p)->same_flow = !diffs;
- }
-}
-
-static inline void skb_gro_reset_offset(struct sk_buff *skb, u32 nhoff)
-{
- const struct skb_shared_info *pinfo = skb_shinfo(skb);
- const skb_frag_t *frag0 = &pinfo->frags[0];
-
- NAPI_GRO_CB(skb)->data_offset = 0;
- NAPI_GRO_CB(skb)->frag0 = NULL;
- NAPI_GRO_CB(skb)->frag0_len = 0;
-
- if (!skb_headlen(skb) && pinfo->nr_frags &&
- !PageHighMem(skb_frag_page(frag0)) &&
- (!NET_IP_ALIGN || !((skb_frag_off(frag0) + nhoff) & 3))) {
- NAPI_GRO_CB(skb)->frag0 = skb_frag_address(frag0);
- NAPI_GRO_CB(skb)->frag0_len = min_t(unsigned int,
- skb_frag_size(frag0),
- skb->end - skb->tail);
- }
-}
-
-static void gro_pull_from_frag0(struct sk_buff *skb, int grow)
-{
- struct skb_shared_info *pinfo = skb_shinfo(skb);
-
- BUG_ON(skb->end - skb->tail < grow);
-
- memcpy(skb_tail_pointer(skb), NAPI_GRO_CB(skb)->frag0, grow);
-
- skb->data_len -= grow;
- skb->tail += grow;
-
- skb_frag_off_add(&pinfo->frags[0], grow);
- skb_frag_size_sub(&pinfo->frags[0], grow);
-
- if (unlikely(!skb_frag_size(&pinfo->frags[0]))) {
- skb_frag_unref(skb, 0);
- memmove(pinfo->frags, pinfo->frags + 1,
- --pinfo->nr_frags * sizeof(pinfo->frags[0]));
- }
-}
-
-static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head)
-{
- struct sk_buff *oldest;
-
- oldest = list_last_entry(head, struct sk_buff, list);
-
- /* We are called with head length >= MAX_GRO_SKBS, so this is
- * impossible.
- */
- if (WARN_ON_ONCE(!oldest))
- return;
-
- /* Do not adjust napi->gro_hash[].count, caller is adding a new
- * SKB to the chain.
- */
- skb_list_del_init(oldest);
- napi_gro_complete(napi, oldest);
-}
-
-static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
-{
- u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
- struct gro_list *gro_list = &napi->gro_hash[bucket];
- struct list_head *head = &offload_base;
- struct packet_offload *ptype;
- __be16 type = skb->protocol;
- struct sk_buff *pp = NULL;
- enum gro_result ret;
- int same_flow;
- int grow;
-
- if (netif_elide_gro(skb->dev))
- goto normal;
-
- gro_list_prepare(&gro_list->list, skb);
-
- rcu_read_lock();
- list_for_each_entry_rcu(ptype, head, list) {
- if (ptype->type != type || !ptype->callbacks.gro_receive)
- continue;
-
- skb_set_network_header(skb, skb_gro_offset(skb));
- skb_reset_mac_len(skb);
- NAPI_GRO_CB(skb)->same_flow = 0;
- NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb);
- NAPI_GRO_CB(skb)->free = 0;
- NAPI_GRO_CB(skb)->encap_mark = 0;
- NAPI_GRO_CB(skb)->recursion_counter = 0;
- NAPI_GRO_CB(skb)->is_fou = 0;
- NAPI_GRO_CB(skb)->is_atomic = 1;
- NAPI_GRO_CB(skb)->gro_remcsum_start = 0;
-
- /* Setup for GRO checksum validation */
- switch (skb->ip_summed) {
- case CHECKSUM_COMPLETE:
- NAPI_GRO_CB(skb)->csum = skb->csum;
- NAPI_GRO_CB(skb)->csum_valid = 1;
- NAPI_GRO_CB(skb)->csum_cnt = 0;
- break;
- case CHECKSUM_UNNECESSARY:
- NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
- NAPI_GRO_CB(skb)->csum_valid = 0;
- break;
- default:
- NAPI_GRO_CB(skb)->csum_cnt = 0;
- NAPI_GRO_CB(skb)->csum_valid = 0;
- }
-
- pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
- ipv6_gro_receive, inet_gro_receive,
- &gro_list->list, skb);
- break;
- }
- rcu_read_unlock();
-
- if (&ptype->list == head)
- goto normal;
-
- if (PTR_ERR(pp) == -EINPROGRESS) {
- ret = GRO_CONSUMED;
- goto ok;
- }
-
- same_flow = NAPI_GRO_CB(skb)->same_flow;
- ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
-
- if (pp) {
- skb_list_del_init(pp);
- napi_gro_complete(napi, pp);
- gro_list->count--;
- }
-
- if (same_flow)
- goto ok;
-
- if (NAPI_GRO_CB(skb)->flush)
- goto normal;
-
- if (unlikely(gro_list->count >= MAX_GRO_SKBS))
- gro_flush_oldest(napi, &gro_list->list);
- else
- gro_list->count++;
-
- NAPI_GRO_CB(skb)->count = 1;
- NAPI_GRO_CB(skb)->age = jiffies;
- NAPI_GRO_CB(skb)->last = skb;
- skb_shinfo(skb)->gso_size = skb_gro_len(skb);
- list_add(&skb->list, &gro_list->list);
- ret = GRO_HELD;
-
-pull:
- grow = skb_gro_offset(skb) - skb_headlen(skb);
- if (grow > 0)
- gro_pull_from_frag0(skb, grow);
-ok:
- if (gro_list->count) {
- if (!test_bit(bucket, &napi->gro_bitmask))
- __set_bit(bucket, &napi->gro_bitmask);
- } else if (test_bit(bucket, &napi->gro_bitmask)) {
- __clear_bit(bucket, &napi->gro_bitmask);
- }
-
- return ret;
-
-normal:
- ret = GRO_NORMAL;
- goto pull;
-}
-
-struct packet_offload *gro_find_receive_by_type(__be16 type)
-{
- struct list_head *offload_head = &offload_base;
- struct packet_offload *ptype;
-
- list_for_each_entry_rcu(ptype, offload_head, list) {
- if (ptype->type != type || !ptype->callbacks.gro_receive)
- continue;
- return ptype;
- }
- return NULL;
-}
-EXPORT_SYMBOL(gro_find_receive_by_type);
-
-struct packet_offload *gro_find_complete_by_type(__be16 type)
-{
- struct list_head *offload_head = &offload_base;
- struct packet_offload *ptype;
-
- list_for_each_entry_rcu(ptype, offload_head, list) {
- if (ptype->type != type || !ptype->callbacks.gro_complete)
- continue;
- return ptype;
- }
- return NULL;
-}
-EXPORT_SYMBOL(gro_find_complete_by_type);
-
-static gro_result_t napi_skb_finish(struct napi_struct *napi,
- struct sk_buff *skb,
- gro_result_t ret)
-{
- switch (ret) {
- case GRO_NORMAL:
- gro_normal_one(napi, skb, 1);
- break;
-
- case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
- napi_skb_free_stolen_head(skb);
- else if (skb->fclone != SKB_FCLONE_UNAVAILABLE)
- __kfree_skb(skb);
- else
- __kfree_skb_defer(skb);
- break;
-
- case GRO_HELD:
- case GRO_MERGED:
- case GRO_CONSUMED:
- break;
- }
-
- return ret;
-}
-
-gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
-{
- gro_result_t ret;
-
- skb_mark_napi_id(skb, napi);
- trace_napi_gro_receive_entry(skb);
-
- skb_gro_reset_offset(skb, 0);
-
- ret = napi_skb_finish(napi, skb, dev_gro_receive(napi, skb));
- trace_napi_gro_receive_exit(ret);
-
- return ret;
-}
-EXPORT_SYMBOL(napi_gro_receive);
-
-static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
-{
- if (unlikely(skb->pfmemalloc)) {
- consume_skb(skb);
- return;
- }
- __skb_pull(skb, skb_headlen(skb));
- /* restore the reserve we had after netdev_alloc_skb_ip_align() */
- skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
- __vlan_hwaccel_clear_tag(skb);
- skb->dev = napi->dev;
- skb->skb_iif = 0;
-
- /* eth_type_trans() assumes pkt_type is PACKET_HOST */
- skb->pkt_type = PACKET_HOST;
-
- skb->encapsulation = 0;
- skb_shinfo(skb)->gso_type = 0;
- skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
- if (unlikely(skb->slow_gro)) {
- skb_orphan(skb);
- skb_ext_reset(skb);
- nf_reset_ct(skb);
- skb->slow_gro = 0;
- }
-
- napi->skb = skb;
-}
-
-struct sk_buff *napi_get_frags(struct napi_struct *napi)
-{
- struct sk_buff *skb = napi->skb;
-
- if (!skb) {
- skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
- if (skb) {
- napi->skb = skb;
- skb_mark_napi_id(skb, napi);
- }
- }
- return skb;
-}
-EXPORT_SYMBOL(napi_get_frags);
-
-static gro_result_t napi_frags_finish(struct napi_struct *napi,
- struct sk_buff *skb,
- gro_result_t ret)
-{
- switch (ret) {
- case GRO_NORMAL:
- case GRO_HELD:
- __skb_push(skb, ETH_HLEN);
- skb->protocol = eth_type_trans(skb, skb->dev);
- if (ret == GRO_NORMAL)
- gro_normal_one(napi, skb, 1);
- break;
-
- case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
- napi_skb_free_stolen_head(skb);
- else
- napi_reuse_skb(napi, skb);
- break;
-
- case GRO_MERGED:
- case GRO_CONSUMED:
- break;
- }
-
- return ret;
-}
-
-/* Upper GRO stack assumes network header starts at gro_offset=0
- * Drivers could call both napi_gro_frags() and napi_gro_receive()
- * We copy ethernet header into skb->data to have a common layout.
- */
-static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
-{
- struct sk_buff *skb = napi->skb;
- const struct ethhdr *eth;
- unsigned int hlen = sizeof(*eth);
-
- napi->skb = NULL;
-
- skb_reset_mac_header(skb);
- skb_gro_reset_offset(skb, hlen);
-
- if (unlikely(skb_gro_header_hard(skb, hlen))) {
- eth = skb_gro_header_slow(skb, hlen, 0);
- if (unlikely(!eth)) {
- net_warn_ratelimited("%s: dropping impossible skb from %s\n",
- __func__, napi->dev->name);
- napi_reuse_skb(napi, skb);
- return NULL;
- }
- } else {
- eth = (const struct ethhdr *)skb->data;
- gro_pull_from_frag0(skb, hlen);
- NAPI_GRO_CB(skb)->frag0 += hlen;
- NAPI_GRO_CB(skb)->frag0_len -= hlen;
- }
- __skb_pull(skb, hlen);
-
- /*
- * This works because the only protocols we care about don't require
- * special handling.
- * We'll fix it up properly in napi_frags_finish()
- */
- skb->protocol = eth->h_proto;
-
- return skb;
-}
-
-gro_result_t napi_gro_frags(struct napi_struct *napi)
-{
- gro_result_t ret;
- struct sk_buff *skb = napi_frags_skb(napi);
-
- trace_napi_gro_frags_entry(skb);
-
- ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
- trace_napi_gro_frags_exit(ret);
-
- return ret;
-}
-EXPORT_SYMBOL(napi_gro_frags);
-
-/* Compute the checksum from gro_offset and return the folded value
- * after adding in any pseudo checksum.
- */
-__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
-{
- __wsum wsum;
- __sum16 sum;
-
- wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
-
- /* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
- sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
- /* See comments in __skb_checksum_complete(). */
- if (likely(!sum)) {
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
- !skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev, skb);
- }
-
- NAPI_GRO_CB(skb)->csum = wsum;
- NAPI_GRO_CB(skb)->csum_valid = 1;
-
- return sum;
-}
-EXPORT_SYMBOL(__skb_gro_checksum_complete);
-
static void net_rps_send_ipi(struct softnet_data *remsd)
{
#ifdef CONFIG_RPS
@@ -7200,6 +6542,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
struct netdev_adjacent {
struct net_device *dev;
+ netdevice_tracker dev_tracker;
/* upper master flag, there can only be one master device per list */
bool master;
@@ -7964,7 +7307,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
adj->ref_nr = 1;
adj->private = private;
adj->ignore = false;
- dev_hold(adj_dev);
+ dev_hold_track(adj_dev, &adj->dev_tracker, GFP_KERNEL);
pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n",
dev->name, adj_dev->name, adj->ref_nr, adj_dev->name);
@@ -7993,8 +7336,8 @@ remove_symlinks:
if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list))
netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list);
free_adj:
+ dev_put_track(adj_dev, &adj->dev_tracker);
kfree(adj);
- dev_put(adj_dev);
return ret;
}
@@ -8035,7 +7378,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
list_del_rcu(&adj->list);
pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n",
adj_dev->name, dev->name, adj_dev->name);
- dev_put(adj_dev);
+ dev_put_track(adj_dev, &adj->dev_tracker);
kfree_rcu(adj, rcu);
}
@@ -9224,35 +8567,17 @@ bool netdev_port_same_parent_id(struct net_device *a, struct net_device *b)
EXPORT_SYMBOL(netdev_port_same_parent_id);
/**
- * dev_change_proto_down - update protocol port state information
+ * dev_change_proto_down - set carrier according to proto_down.
+ *
* @dev: device
* @proto_down: new value
- *
- * This info can be used by switch drivers to set the phys state of the
- * port.
*/
int dev_change_proto_down(struct net_device *dev, bool proto_down)
{
- const struct net_device_ops *ops = dev->netdev_ops;
-
- if (!ops->ndo_change_proto_down)
+ if (!(dev->priv_flags & IFF_CHANGE_PROTO_DOWN))
return -EOPNOTSUPP;
if (!netif_device_present(dev))
return -ENODEV;
- return ops->ndo_change_proto_down(dev, proto_down);
-}
-EXPORT_SYMBOL(dev_change_proto_down);
-
-/**
- * dev_change_proto_down_generic - generic implementation for
- * ndo_change_proto_down that sets carrier according to
- * proto_down.
- *
- * @dev: device
- * @proto_down: new value
- */
-int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
-{
if (proto_down)
netif_carrier_off(dev);
else
@@ -9260,7 +8585,7 @@ int dev_change_proto_down_generic(struct net_device *dev, bool proto_down)
dev->proto_down = proto_down;
return 0;
}
-EXPORT_SYMBOL(dev_change_proto_down_generic);
+EXPORT_SYMBOL(dev_change_proto_down);
/**
* dev_change_proto_down_reason - proto down reason
@@ -10545,6 +9870,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
netdev_unregister_timeout_secs * HZ)) {
pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n",
dev->name, refcnt);
+ ref_tracker_dir_print(&dev->refcnt_tracker, 10);
warning_time = jiffies;
}
}
@@ -10835,6 +10161,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev = PTR_ALIGN(p, NETDEV_ALIGN);
dev->padded = (char *)dev - (char *)p;
+ ref_tracker_dir_init(&dev->refcnt_tracker, 128);
#ifdef CONFIG_PCPU_DEV_REFCNT
dev->pcpu_refcnt = alloc_percpu(int);
if (!dev->pcpu_refcnt)
@@ -10854,6 +10181,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
+ dev->gro_max_size = GRO_MAX_SIZE;
dev->upper_level = 1;
dev->lower_level = 1;
#ifdef CONFIG_LOCKDEP
@@ -10951,6 +10279,7 @@ void free_netdev(struct net_device *dev)
list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
netif_napi_del(p);
+ ref_tracker_dir_exit(&dev->refcnt_tracker);
#ifdef CONFIG_PCPU_DEV_REFCNT
free_percpu(dev->pcpu_refcnt);
dev->pcpu_refcnt = NULL;
@@ -11643,8 +10972,6 @@ static int __init net_dev_init(void)
for (i = 0; i < PTYPE_HASH_SIZE; i++)
INIT_LIST_HEAD(&ptype_base[i]);
- INIT_LIST_HEAD(&offload_base);
-
if (register_pernet_subsys(&netdev_net_ops))
goto out;