aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c230
1 files changed, 165 insertions, 65 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 77d43ae2a7bb..1b5a4410be0e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
#include <linux/sctp.h>
#include <net/udp_tunnel.h>
#include <linux/net_namespace.h>
+#include <linux/indirect_call_wrapper.h>
#include "net-sysfs.h"
@@ -162,6 +163,9 @@ static struct list_head offload_base __read_mostly;
static int netif_rx_internal(struct sk_buff *skb);
static int call_netdevice_notifiers_info(unsigned long val,
struct netdev_notifier_info *info);
+static int call_netdevice_notifiers_extack(unsigned long val,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack);
static struct napi_struct *napi_by_id(unsigned int napi_id);
/*
@@ -1361,7 +1365,7 @@ void netdev_notify_peers(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_notify_peers);
-static int __dev_open(struct net_device *dev)
+static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
int ret;
@@ -1377,7 +1381,7 @@ static int __dev_open(struct net_device *dev)
*/
netpoll_poll_disable(dev);
- ret = call_netdevice_notifiers(NETDEV_PRE_UP, dev);
+ ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
ret = notifier_to_errno(ret);
if (ret)
return ret;
@@ -1406,7 +1410,8 @@ static int __dev_open(struct net_device *dev)
/**
* dev_open - prepare an interface for use.
- * @dev: device to open
+ * @dev: device to open
+ * @extack: netlink extended ack
*
* Takes a device from down to up state. The device's private open
* function is invoked and then the multicast lists are loaded. Finally
@@ -1416,14 +1421,14 @@ static int __dev_open(struct net_device *dev)
* Calling this function on an active interface is a nop. On a failure
* a negative errno code is returned.
*/
-int dev_open(struct net_device *dev)
+int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
{
int ret;
if (dev->flags & IFF_UP)
return 0;
- ret = __dev_open(dev);
+ ret = __dev_open(dev, extack);
if (ret < 0)
return ret;
@@ -1585,6 +1590,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
+ N(PRE_CHANGEADDR)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -1733,6 +1739,18 @@ static int call_netdevice_notifiers_info(unsigned long val,
return raw_notifier_call_chain(&netdev_chain, val, info);
}
+static int call_netdevice_notifiers_extack(unsigned long val,
+ struct net_device *dev,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_info info = {
+ .dev = dev,
+ .extack = extack,
+ };
+
+ return call_netdevice_notifiers_info(val, &info);
+}
+
/**
* call_netdevice_notifiers - call all network notifier blocks
* @val: value passed unmodified to notifier function
@@ -1744,11 +1762,7 @@ static int call_netdevice_notifiers_info(unsigned long val,
int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
{
- struct netdev_notifier_info info = {
- .dev = dev,
- };
-
- return call_netdevice_notifiers_info(val, &info);
+ return call_netdevice_notifiers_extack(val, dev, NULL);
}
EXPORT_SYMBOL(call_netdevice_notifiers);
@@ -2175,6 +2189,20 @@ static bool remove_xps_queue_cpu(struct net_device *dev,
return active;
}
+static void reset_xps_maps(struct net_device *dev,
+ struct xps_dev_maps *dev_maps,
+ bool is_rxqs_map)
+{
+ if (is_rxqs_map) {
+ static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
+ RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
+ } else {
+ RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+ }
+ static_key_slow_dec_cpuslocked(&xps_needed);
+ kfree_rcu(dev_maps, rcu);
+}
+
static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
struct xps_dev_maps *dev_maps, unsigned int nr_ids,
u16 offset, u16 count, bool is_rxqs_map)
@@ -2186,18 +2214,15 @@ static void clean_xps_maps(struct net_device *dev, const unsigned long *mask,
j < nr_ids;)
active |= remove_xps_queue_cpu(dev, dev_maps, j, offset,
count);
- if (!active) {
- if (is_rxqs_map) {
- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
- } else {
- RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
+ if (!active)
+ reset_xps_maps(dev, dev_maps, is_rxqs_map);
- for (i = offset + (count - 1); count--; i--)
- netdev_queue_numa_node_write(
- netdev_get_tx_queue(dev, i),
- NUMA_NO_NODE);
+ if (!is_rxqs_map) {
+ for (i = offset + (count - 1); count--; i--) {
+ netdev_queue_numa_node_write(
+ netdev_get_tx_queue(dev, i),
+ NUMA_NO_NODE);
}
- kfree_rcu(dev_maps, rcu);
}
}
@@ -2234,10 +2259,6 @@ static void netif_reset_xps_queues(struct net_device *dev, u16 offset,
false);
out_no_maps:
- if (static_key_enabled(&xps_rxqs_needed))
- static_key_slow_dec_cpuslocked(&xps_rxqs_needed);
-
- static_key_slow_dec_cpuslocked(&xps_needed);
mutex_unlock(&xps_map_mutex);
cpus_read_unlock();
}
@@ -2355,9 +2376,12 @@ int __netif_set_xps_queue(struct net_device *dev, const unsigned long *mask,
if (!new_dev_maps)
goto out_no_new_maps;
- static_key_slow_inc_cpuslocked(&xps_needed);
- if (is_rxqs_map)
- static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
+ if (!dev_maps) {
+ /* Increment static keys at most once per type */
+ static_key_slow_inc_cpuslocked(&xps_needed);
+ if (is_rxqs_map)
+ static_key_slow_inc_cpuslocked(&xps_rxqs_needed);
+ }
for (j = -1; j = netif_attrmask_next(j, possible_mask, nr_ids),
j < nr_ids;) {
@@ -2455,13 +2479,8 @@ out_no_new_maps:
}
/* free map if not active */
- if (!active) {
- if (is_rxqs_map)
- RCU_INIT_POINTER(dev->xps_rxqs_map, NULL);
- else
- RCU_INIT_POINTER(dev->xps_cpus_map, NULL);
- kfree_rcu(dev_maps, rcu);
- }
+ if (!active)
+ reset_xps_maps(dev, dev_maps, is_rxqs_map);
out_no_maps:
mutex_unlock(&xps_map_mutex);
@@ -3091,10 +3110,17 @@ EXPORT_SYMBOL(__skb_gso_segment);
/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
-void netdev_rx_csum_fault(struct net_device *dev)
+void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
{
if (net_ratelimit()) {
pr_err("%s: hw csum failure\n", dev ? dev->name : "<unknown>");
+ if (dev)
+ pr_err("dev features: %pNF\n", &dev->features);
+ pr_err("skb len=%u data_len=%u pkt_type=%u gso_size=%u gso_type=%u nr_frags=%u ip_summed=%u csum=%x csum_complete_sw=%d csum_valid=%d csum_level=%u\n",
+ skb->len, skb->data_len, skb->pkt_type,
+ skb_shinfo(skb)->gso_size, skb_shinfo(skb)->gso_type,
+ skb_shinfo(skb)->nr_frags, skb->ip_summed, skb->csum,
+ skb->csum_complete_sw, skb->csum_valid, skb->csum_level);
dump_stack();
}
}
@@ -3272,7 +3298,7 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *first, struct net_device *de
}
skb = next;
- if (netif_xmit_stopped(txq) && skb) {
+ if (netif_tx_queue_stopped(txq) && skb) {
rc = NETDEV_TX_BUSY;
break;
}
@@ -4520,9 +4546,14 @@ static int netif_rx_internal(struct sk_buff *skb)
int netif_rx(struct sk_buff *skb)
{
+ int ret;
+
trace_netif_rx_entry(skb);
- return netif_rx_internal(skb);
+ ret = netif_rx_internal(skb);
+ trace_netif_rx_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(netif_rx);
@@ -4537,6 +4568,7 @@ int netif_rx_ni(struct sk_buff *skb)
if (local_softirq_pending())
do_softirq();
preempt_enable();
+ trace_netif_rx_ni_exit(err);
return err;
}
@@ -4889,7 +4921,7 @@ skip_classify:
* and set skb->priority like in vlan_do_receive()
* For the time being, just ignore Priority Code Point
*/
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
}
type = skb->protocol;
@@ -5009,7 +5041,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo
struct net_device *orig_dev = skb->dev;
struct packet_type *pt_prev = NULL;
- list_del(&skb->list);
+ skb_list_del_init(skb);
__netif_receive_skb_core(skb, pfmemalloc, &pt_prev);
if (!pt_prev)
continue;
@@ -5165,7 +5197,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
INIT_LIST_HEAD(&sublist);
list_for_each_entry_safe(skb, next, head, list) {
net_timestamp_check(netdev_tstamp_prequeue, skb);
- list_del(&skb->list);
+ skb_list_del_init(skb);
if (!skb_defer_rx_timestamp(skb))
list_add_tail(&skb->list, &sublist);
}
@@ -5176,7 +5208,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
rcu_read_lock();
list_for_each_entry_safe(skb, next, head, list) {
xdp_prog = rcu_dereference(skb->dev->xdp_prog);
- list_del(&skb->list);
+ skb_list_del_init(skb);
if (do_xdp_generic(xdp_prog, skb) == XDP_PASS)
list_add_tail(&skb->list, &sublist);
}
@@ -5195,7 +5227,7 @@ static void netif_receive_skb_list_internal(struct list_head *head)
if (cpu >= 0) {
/* Will be handled, remove from list */
- list_del(&skb->list);
+ skb_list_del_init(skb);
enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
}
}
@@ -5222,9 +5254,14 @@ static void netif_receive_skb_list_internal(struct list_head *head)
*/
int netif_receive_skb(struct sk_buff *skb)
{
+ int ret;
+
trace_netif_receive_skb_entry(skb);
- return netif_receive_skb_internal(skb);
+ ret = netif_receive_skb_internal(skb);
+ trace_netif_receive_skb_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(netif_receive_skb);
@@ -5244,9 +5281,12 @@ void netif_receive_skb_list(struct list_head *head)
if (list_empty(head))
return;
- list_for_each_entry(skb, head, list)
- trace_netif_receive_skb_list_entry(skb);
+ if (trace_netif_receive_skb_list_entry_enabled()) {
+ list_for_each_entry(skb, head, list)
+ trace_netif_receive_skb_list_entry(skb);
+ }
netif_receive_skb_list_internal(head);
+ trace_netif_receive_skb_list_exit(0);
}
EXPORT_SYMBOL(netif_receive_skb_list);
@@ -5299,6 +5339,8 @@ static void flush_all_backlogs(void)
put_online_cpus();
}
+INDIRECT_CALLABLE_DECLARE(int inet_gro_complete(struct sk_buff *, int));
+INDIRECT_CALLABLE_DECLARE(int ipv6_gro_complete(struct sk_buff *, int));
static int napi_gro_complete(struct sk_buff *skb)
{
struct packet_offload *ptype;
@@ -5318,7 +5360,9 @@ static int napi_gro_complete(struct sk_buff *skb)
if (ptype->type != type || !ptype->callbacks.gro_complete)
continue;
- err = ptype->callbacks.gro_complete(skb, 0);
+ err = INDIRECT_CALL_INET(ptype->callbacks.gro_complete,
+ ipv6_gro_complete, inet_gro_complete,
+ skb, 0);
break;
}
rcu_read_unlock();
@@ -5357,11 +5401,13 @@ static void __napi_gro_flush_chain(struct napi_struct *napi, u32 index,
*/
void napi_gro_flush(struct napi_struct *napi, bool flush_old)
{
- u32 i;
+ unsigned long bitmask = napi->gro_bitmask;
+ unsigned int i, base = ~0U;
- for (i = 0; i < GRO_HASH_BUCKETS; i++) {
- if (test_bit(i, &napi->gro_bitmask))
- __napi_gro_flush_chain(napi, i, flush_old);
+ while ((i = ffs(bitmask)) != 0) {
+ bitmask >>= i;
+ base += i;
+ __napi_gro_flush_chain(napi, base, flush_old);
}
}
EXPORT_SYMBOL(napi_gro_flush);
@@ -5386,7 +5432,9 @@ static struct list_head *gro_list_prepare(struct napi_struct *napi,
}
diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
- diffs |= p->vlan_tci ^ skb->vlan_tci;
+ diffs |= skb_vlan_tag_present(p) ^ skb_vlan_tag_present(skb);
+ if (skb_vlan_tag_present(p))
+ diffs |= p->vlan_tci ^ skb->vlan_tci;
diffs |= skb_metadata_dst_cmp(p, skb);
diffs |= skb_metadata_differs(p, skb);
if (maclen == ETH_HLEN)
@@ -5461,6 +5509,10 @@ static void gro_flush_oldest(struct list_head *head)
napi_gro_complete(oldest);
}
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *inet_gro_receive(struct list_head *,
+ struct sk_buff *));
+INDIRECT_CALLABLE_DECLARE(struct sk_buff *ipv6_gro_receive(struct list_head *,
+ struct sk_buff *));
static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
u32 hash = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1);
@@ -5510,7 +5562,9 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
NAPI_GRO_CB(skb)->csum_valid = 0;
}
- pp = ptype->callbacks.gro_receive(gro_head, skb);
+ pp = INDIRECT_CALL_INET(ptype->callbacks.gro_receive,
+ ipv6_gro_receive, inet_gro_receive,
+ gro_head, skb);
break;
}
rcu_read_unlock();
@@ -5634,12 +5688,17 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
+ gro_result_t ret;
+
skb_mark_napi_id(skb, napi);
trace_napi_gro_receive_entry(skb);
skb_gro_reset_offset(skb);
- return napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ ret = napi_skb_finish(dev_gro_receive(napi, skb), skb);
+ trace_napi_gro_receive_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(napi_gro_receive);
@@ -5652,9 +5711,13 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
__skb_pull(skb, skb_headlen(skb));
/* restore the reserve we had after netdev_alloc_skb_ip_align() */
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN - skb_headroom(skb));
- skb->vlan_tci = 0;
+ __vlan_hwaccel_clear_tag(skb);
skb->dev = napi->dev;
skb->skb_iif = 0;
+
+ /* eth_type_trans() assumes pkt_type is PACKET_HOST */
+ skb->pkt_type = PACKET_HOST;
+
skb->encapsulation = 0;
skb_shinfo(skb)->gso_type = 0;
skb->truesize = SKB_TRUESIZE(skb_end_offset(skb));
@@ -5753,6 +5816,7 @@ static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
gro_result_t napi_gro_frags(struct napi_struct *napi)
{
+ gro_result_t ret;
struct sk_buff *skb = napi_frags_skb(napi);
if (!skb)
@@ -5760,7 +5824,10 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
trace_napi_gro_frags_entry(skb);
- return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+ ret = napi_frags_finish(napi, skb, dev_gro_receive(napi, skb));
+ trace_napi_gro_frags_exit(ret);
+
+ return ret;
}
EXPORT_SYMBOL(napi_gro_frags);
@@ -5776,10 +5843,11 @@ __sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
/* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
+ /* See comments in __skb_checksum_complete(). */
if (likely(!sum)) {
if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
!skb->csum_complete_sw)
- netdev_rx_csum_fault(skb->dev);
+ netdev_rx_csum_fault(skb->dev, skb);
}
NAPI_GRO_CB(skb)->csum = wsum;
@@ -5966,11 +6034,14 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
if (work_done)
timeout = n->dev->gro_flush_timeout;
+ /* When the NAPI instance uses a timeout and keeps postponing
+ * it, we need to bound somehow the time packets are kept in
+ * the GRO layer
+ */
+ napi_gro_flush(n, !!timeout);
if (timeout)
hrtimer_start(&n->timer, ns_to_ktime(timeout),
HRTIMER_MODE_REL_PINNED);
- else
- napi_gro_flush(n, false);
}
if (unlikely(!list_empty(&n->poll_list))) {
/* If n->poll_list is not empty, we need to mask irqs */
@@ -6197,8 +6268,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
napi->skb = NULL;
napi->poll = poll;
if (weight > NAPI_POLL_WEIGHT)
- pr_err_once("netif_napi_add() called with weight %d on device %s\n",
- weight, dev->name);
+ netdev_err_once(dev, "%s() called with weight %d\n", __func__,
+ weight);
napi->weight = weight;
list_add(&napi->dev_list, &dev->napi_list);
napi->dev = dev;
@@ -7455,7 +7526,8 @@ unsigned int dev_get_flags(const struct net_device *dev)
}
EXPORT_SYMBOL(dev_get_flags);
-int __dev_change_flags(struct net_device *dev, unsigned int flags)
+int __dev_change_flags(struct net_device *dev, unsigned int flags,
+ struct netlink_ext_ack *extack)
{
unsigned int old_flags = dev->flags;
int ret;
@@ -7492,7 +7564,7 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags)
if (old_flags & IFF_UP)
__dev_close(dev);
else
- ret = __dev_open(dev);
+ ret = __dev_open(dev, extack);
}
if ((flags ^ dev->gflags) & IFF_PROMISC) {
@@ -7552,16 +7624,18 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
* dev_change_flags - change device settings
* @dev: device
* @flags: device state flags
+ * @extack: netlink extended ack
*
* Change settings on device based state flags. The flags are
* in the userspace exported format.
*/
-int dev_change_flags(struct net_device *dev, unsigned int flags)
+int dev_change_flags(struct net_device *dev, unsigned int flags,
+ struct netlink_ext_ack *extack)
{
int ret;
unsigned int changes, old_flags = dev->flags, old_gflags = dev->gflags;
- ret = __dev_change_flags(dev, flags);
+ ret = __dev_change_flags(dev, flags, extack);
if (ret < 0)
return ret;
@@ -7694,13 +7768,36 @@ void dev_set_group(struct net_device *dev, int new_group)
EXPORT_SYMBOL(dev_set_group);
/**
+ * dev_pre_changeaddr_notify - Call NETDEV_PRE_CHANGEADDR.
+ * @dev: device
+ * @addr: new address
+ * @extack: netlink extended ack
+ */
+int dev_pre_changeaddr_notify(struct net_device *dev, const char *addr,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_notifier_pre_changeaddr_info info = {
+ .info.dev = dev,
+ .info.extack = extack,
+ .dev_addr = addr,
+ };
+ int rc;
+
+ rc = call_netdevice_notifiers_info(NETDEV_PRE_CHANGEADDR, &info.info);
+ return notifier_to_errno(rc);
+}
+EXPORT_SYMBOL(dev_pre_changeaddr_notify);
+
+/**
* dev_set_mac_address - Change Media Access Control Address
* @dev: device
* @sa: new address
+ * @extack: netlink extended ack
*
* Change the hardware (MAC) address of the device
*/
-int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
+int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa,
+ struct netlink_ext_ack *extack)
{
const struct net_device_ops *ops = dev->netdev_ops;
int err;
@@ -7711,6 +7808,9 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
return -EINVAL;
if (!netif_device_present(dev))
return -ENODEV;
+ err = dev_pre_changeaddr_notify(dev, sa->sa_data, extack);
+ if (err)
+ return err;
err = ops->ndo_set_mac_address(dev, sa);
if (err)
return err;