aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c232
1 files changed, 119 insertions, 113 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index f411c28d0a66..7fe82929f509 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1694,6 +1694,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
skb_scrub_packet(skb, true);
skb->protocol = eth_type_trans(skb, dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
return 0;
}
@@ -2351,7 +2352,6 @@ EXPORT_SYMBOL(skb_checksum_help);
__be16 skb_network_protocol(struct sk_buff *skb, int *depth)
{
- unsigned int vlan_depth = skb->mac_len;
__be16 type = skb->protocol;
/* Tunnel gso handlers can set protocol to ethernet. */
@@ -2365,35 +2365,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
type = eth->h_proto;
}
- /* if skb->protocol is 802.1Q/AD then the header should already be
- * present at mac_len - VLAN_HLEN (if mac_len > 0), or at
- * ETH_HLEN otherwise
- */
- if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) {
- if (vlan_depth) {
- if (WARN_ON(vlan_depth < VLAN_HLEN))
- return 0;
- vlan_depth -= VLAN_HLEN;
- } else {
- vlan_depth = ETH_HLEN;
- }
- do {
- struct vlan_hdr *vh;
-
- if (unlikely(!pskb_may_pull(skb,
- vlan_depth + VLAN_HLEN)))
- return 0;
-
- vh = (struct vlan_hdr *)(skb->data + vlan_depth);
- type = vh->h_vlan_encapsulated_proto;
- vlan_depth += VLAN_HLEN;
- } while (type == htons(ETH_P_8021Q) ||
- type == htons(ETH_P_8021AD));
- }
-
- *depth = vlan_depth;
-
- return type;
+ return __vlan_get_protocol(skb, type, depth);
}
/**
@@ -2522,7 +2494,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
/* If MPLS offload request, verify we are testing hardware MPLS features
* instead of standard features for the netdev.
*/
-#ifdef CONFIG_NET_MPLS_GSO
+#if IS_ENABLED(CONFIG_NET_MPLS_GSO)
static netdev_features_t net_mpls_features(struct sk_buff *skb,
netdev_features_t features,
__be16 type)
@@ -2562,7 +2534,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
netdev_features_t netif_skb_features(struct sk_buff *skb)
{
- const struct net_device *dev = skb->dev;
+ struct net_device *dev = skb->dev;
netdev_features_t features = dev->features;
u16 gso_segs = skb_shinfo(skb)->gso_segs;
__be16 protocol = skb->protocol;
@@ -2570,11 +2542,21 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
features &= ~NETIF_F_GSO_MASK;
- if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
- struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
- protocol = veh->h_vlan_encapsulated_proto;
- } else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, features);
+ /* If encapsulation offload request, verify we are testing
+ * hardware encapsulation features instead of standard
+ * features for the netdev
+ */
+ if (skb->encapsulation)
+ features &= dev->hw_enc_features;
+
+ if (!vlan_tx_tag_present(skb)) {
+ if (unlikely(protocol == htons(ETH_P_8021Q) ||
+ protocol == htons(ETH_P_8021AD))) {
+ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+ protocol = veh->h_vlan_encapsulated_proto;
+ } else {
+ goto finalize;
+ }
}
features = netdev_intersect_features(features,
@@ -2591,6 +2573,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
+finalize:
+ if (dev->netdev_ops->ndo_features_check)
+ features &= dev->netdev_ops->ndo_features_check(skb, dev,
+ features);
+
return harmonize_features(skb, features);
}
EXPORT_SYMBOL(netif_skb_features);
@@ -2661,19 +2648,12 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
if (unlikely(!skb))
goto out_null;
- /* If encapsulation offload request, verify we are testing
- * hardware encapsulation features instead of standard
- * features for the netdev
- */
- if (skb->encapsulation)
- features &= dev->hw_enc_features;
-
if (netif_needs_gso(dev, skb, features)) {
struct sk_buff *segs;
segs = skb_gso_segment(skb, features);
if (IS_ERR(segs)) {
- segs = NULL;
+ goto out_kfree_skb;
} else if (segs) {
consume_skb(skb);
skb = segs;
@@ -4557,6 +4537,68 @@ void netif_napi_del(struct napi_struct *napi)
}
EXPORT_SYMBOL(netif_napi_del);
+static int napi_poll(struct napi_struct *n, struct list_head *repoll)
+{
+ void *have;
+ int work, weight;
+
+ list_del_init(&n->poll_list);
+
+ have = netpoll_poll_lock(n);
+
+ weight = n->weight;
+
+ /* This NAPI_STATE_SCHED test is for avoiding a race
+ * with netpoll's poll_napi(). Only the entity which
+ * obtains the lock and sees NAPI_STATE_SCHED set will
+ * actually make the ->poll() call. Therefore we avoid
+ * accidentally calling ->poll() when NAPI is not scheduled.
+ */
+ work = 0;
+ if (test_bit(NAPI_STATE_SCHED, &n->state)) {
+ work = n->poll(n, weight);
+ trace_napi_poll(n);
+ }
+
+ WARN_ON_ONCE(work > weight);
+
+ if (likely(work < weight))
+ goto out_unlock;
+
+ /* Drivers must not modify the NAPI state if they
+ * consume the entire weight. In such cases this code
+ * still "owns" the NAPI instance and therefore can
+ * move the instance around on the list at-will.
+ */
+ if (unlikely(napi_disable_pending(n))) {
+ napi_complete(n);
+ goto out_unlock;
+ }
+
+ if (n->gro_list) {
+ /* flush too old packets
+ * If HZ < 1000, flush all packets.
+ */
+ napi_gro_flush(n, HZ >= 1000);
+ }
+
+ /* Some drivers may have called napi_schedule
+ * prior to exhausting their budget.
+ */
+ if (unlikely(!list_empty(&n->poll_list))) {
+ pr_warn_once("%s: Budget exhausted after napi rescheduled\n",
+ n->dev ? n->dev->name : "backlog");
+ goto out_unlock;
+ }
+
+ list_add_tail(&n->poll_list, repoll);
+
+out_unlock:
+ netpoll_poll_unlock(have);
+
+ return work;
+}
+
static void net_rx_action(struct softirq_action *h)
{
struct softnet_data *sd = this_cpu_ptr(&softnet_data);
@@ -4564,74 +4606,34 @@ static void net_rx_action(struct softirq_action *h)
int budget = netdev_budget;
LIST_HEAD(list);
LIST_HEAD(repoll);
- void *have;
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
- while (!list_empty(&list)) {
+ for (;;) {
struct napi_struct *n;
- int work, weight;
-
- /* If softirq window is exhausted then punt.
- * Allow this to run for 2 jiffies since which will allow
- * an average latency of 1.5/HZ.
- */
- if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit)))
- goto softnet_break;
-
-
- n = list_first_entry(&list, struct napi_struct, poll_list);
- list_del_init(&n->poll_list);
-
- have = netpoll_poll_lock(n);
- weight = n->weight;
-
- /* This NAPI_STATE_SCHED test is for avoiding a race
- * with netpoll's poll_napi(). Only the entity which
- * obtains the lock and sees NAPI_STATE_SCHED set will
- * actually make the ->poll() call. Therefore we avoid
- * accidentally calling ->poll() when NAPI is not scheduled.
- */
- work = 0;
- if (test_bit(NAPI_STATE_SCHED, &n->state)) {
- work = n->poll(n, weight);
- trace_napi_poll(n);
+ if (list_empty(&list)) {
+ if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
+ return;
+ break;
}
- WARN_ON_ONCE(work > weight);
-
- budget -= work;
+ n = list_first_entry(&list, struct napi_struct, poll_list);
+ budget -= napi_poll(n, &repoll);
- /* Drivers must not modify the NAPI state if they
- * consume the entire weight. In such cases this code
- * still "owns" the NAPI instance and therefore can
- * move the instance around on the list at-will.
+ /* If softirq window is exhausted then punt.
+ * Allow this to run for 2 jiffies since which will allow
+ * an average latency of 1.5/HZ.
*/
- if (unlikely(work == weight)) {
- if (unlikely(napi_disable_pending(n))) {
- napi_complete(n);
- } else {
- if (n->gro_list) {
- /* flush too old packets
- * If HZ < 1000, flush all packets.
- */
- napi_gro_flush(n, HZ >= 1000);
- }
- list_add_tail(&n->poll_list, &repoll);
- }
+ if (unlikely(budget <= 0 ||
+ time_after_eq(jiffies, time_limit))) {
+ sd->time_squeeze++;
+ break;
}
-
- netpoll_poll_unlock(have);
}
- if (!sd_has_rps_ipi_waiting(sd) &&
- list_empty(&list) &&
- list_empty(&repoll))
- return;
-out:
local_irq_disable();
list_splice_tail_init(&sd->poll_list, &list);
@@ -4641,12 +4643,6 @@ out:
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
-
- return;
-
-softnet_break:
- sd->time_squeeze++;
- goto out;
}
struct netdev_adjacent {
@@ -5298,7 +5294,7 @@ void netdev_upper_dev_unlink(struct net_device *dev,
}
EXPORT_SYMBOL(netdev_upper_dev_unlink);
-void netdev_adjacent_add_links(struct net_device *dev)
+static void netdev_adjacent_add_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
@@ -5323,7 +5319,7 @@ void netdev_adjacent_add_links(struct net_device *dev)
}
}
-void netdev_adjacent_del_links(struct net_device *dev)
+static void netdev_adjacent_del_links(struct net_device *dev)
{
struct netdev_adjacent *iter;
@@ -6631,7 +6627,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
if (!queue)
return NULL;
netdev_init_one_queue(dev, queue, NULL);
- queue->qdisc = &noop_qdisc;
+ RCU_INIT_POINTER(queue->qdisc, &noop_qdisc);
queue->qdisc_sleeping = &noop_qdisc;
rcu_assign_pointer(dev->ingress_queue, queue);
#endif
@@ -7047,10 +7043,20 @@ static int dev_cpu_callback(struct notifier_block *nfb,
oldsd->output_queue = NULL;
oldsd->output_queue_tailp = &oldsd->output_queue;
}
- /* Append NAPI poll list from offline CPU. */
- if (!list_empty(&oldsd->poll_list)) {
- list_splice_init(&oldsd->poll_list, &sd->poll_list);
- raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ /* Append NAPI poll list from offline CPU, with one exception :
+ * process_backlog() must be called by cpu owning percpu backlog.
+ * We properly handle process_queue & input_pkt_queue later.
+ */
+ while (!list_empty(&oldsd->poll_list)) {
+ struct napi_struct *napi = list_first_entry(&oldsd->poll_list,
+ struct napi_struct,
+ poll_list);
+
+ list_del_init(&napi->poll_list);
+ if (napi->poll == process_backlog)
+ napi->state = 0;
+ else
+ ____napi_schedule(sd, napi);
}
raise_softirq_irqoff(NET_TX_SOFTIRQ);
@@ -7061,7 +7067,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
netif_rx_internal(skb);
input_queue_head_incr(oldsd);
}
- while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
+ while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
netif_rx_internal(skb);
input_queue_head_incr(oldsd);
}