aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c186
1 files changed, 152 insertions, 34 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 904ff431d570..2a9c39f8824e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -94,6 +94,7 @@
#include <linux/ethtool.h>
#include <linux/notifier.h>
#include <linux/skbuff.h>
+#include <linux/bpf.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/busy_poll.h>
@@ -139,6 +140,7 @@
#include <linux/hrtimer.h>
#include <linux/netfilter_ingress.h>
#include <linux/sctp.h>
+#include <linux/crash_dump.h>
#include "net-sysfs.h"
@@ -2249,11 +2251,12 @@ EXPORT_SYMBOL(netif_set_real_num_rx_queues);
*/
int netif_get_num_default_rss_queues(void)
{
- return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
+ return is_kdump_kernel() ?
+ 1 : min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
}
EXPORT_SYMBOL(netif_get_num_default_rss_queues);
-static inline void __netif_reschedule(struct Qdisc *q)
+static void __netif_reschedule(struct Qdisc *q)
{
struct softnet_data *sd;
unsigned long flags;
@@ -2420,7 +2423,7 @@ EXPORT_SYMBOL(__skb_tx_hash);
static void skb_warn_bad_offload(const struct sk_buff *skb)
{
- static const netdev_features_t null_features = 0;
+ static const netdev_features_t null_features;
struct net_device *dev = skb->dev;
const char *name = "";
@@ -3068,6 +3071,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
struct netdev_queue *txq)
{
spinlock_t *root_lock = qdisc_lock(q);
+ struct sk_buff *to_free = NULL;
bool contended;
int rc;
@@ -3075,7 +3079,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
/*
* Heuristic to force contended enqueues to serialize on a
* separate lock before trying to get qdisc main lock.
- * This permits __QDISC___STATE_RUNNING owner to get the lock more
+ * This permits qdisc->running owner to get the lock more
* often and dequeue packets faster.
*/
contended = qdisc_is_running(q);
@@ -3084,7 +3088,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
spin_lock(root_lock);
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
- kfree_skb(skb);
+ __qdisc_drop(skb, &to_free);
rc = NET_XMIT_DROP;
} else if ((q->flags & TCQ_F_CAN_BYPASS) && !qdisc_qlen(q) &&
qdisc_run_begin(q)) {
@@ -3107,7 +3111,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
rc = NET_XMIT_SUCCESS;
} else {
- rc = q->enqueue(skb, q) & NET_XMIT_MASK;
+ rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK;
if (qdisc_run_begin(q)) {
if (unlikely(contended)) {
spin_unlock(&q->busylock);
@@ -3117,6 +3121,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
}
}
spin_unlock(root_lock);
+ if (unlikely(to_free))
+ kfree_skb_list(to_free);
if (unlikely(contended))
spin_unlock(&q->busylock);
return rc;
@@ -3142,8 +3148,6 @@ static void skb_update_prio(struct sk_buff *skb)
DEFINE_PER_CPU(int, xmit_recursion);
EXPORT_SYMBOL(xmit_recursion);
-#define RECURSION_LIMIT 10
-
/**
* dev_loopback_xmit - loop back @skb
* @net: network namespace this loopback is happening in
@@ -3386,8 +3390,8 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
int cpu = smp_processor_id(); /* ok because BHs are off */
if (txq->xmit_lock_owner != cpu) {
-
- if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+ if (unlikely(__this_cpu_read(xmit_recursion) >
+ XMIT_RECURSION_LIMIT))
goto recursion_alert;
skb = validate_xmit_skb(skb, dev);
@@ -3898,22 +3902,14 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
root_lock = qdisc_lock(q);
- if (spin_trylock(root_lock)) {
- smp_mb__before_atomic();
- clear_bit(__QDISC_STATE_SCHED,
- &q->state);
- qdisc_run(q);
- spin_unlock(root_lock);
- } else {
- if (!test_bit(__QDISC_STATE_DEACTIVATED,
- &q->state)) {
- __netif_reschedule(q);
- } else {
- smp_mb__before_atomic();
- clear_bit(__QDISC_STATE_SCHED,
- &q->state);
- }
- }
+ spin_lock(root_lock);
+ /* We need to make sure head->next_sched is read
+ * before clearing __QDISC_STATE_SCHED
+ */
+ smp_mb__before_atomic();
+ clear_bit(__QDISC_STATE_SCHED, &q->state);
+ qdisc_run(q);
+ spin_unlock(root_lock);
}
}
}
@@ -4977,7 +4973,7 @@ bool sk_busy_loop(struct sock *sk, int nonblock)
if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
rc = napi->poll(napi, BUSY_POLL_BUDGET);
- trace_napi_poll(napi);
+ trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
if (rc == BUSY_POLL_BUDGET) {
napi_complete_done(napi, rc);
napi_schedule(napi);
@@ -5133,7 +5129,7 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
work = 0;
if (test_bit(NAPI_STATE_SCHED, &n->state)) {
work = n->poll(n, weight);
- trace_napi_poll(n);
+ trace_napi_poll(n, work, weight);
}
WARN_ON_ONCE(work > weight);
@@ -5450,6 +5446,52 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter)
EXPORT_SYMBOL(netdev_lower_get_next);
/**
+ * netdev_all_lower_get_next - Get the next device from all lower neighbour list
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RTNL lock or
+ * its own locking that guarantees that the neighbour all lower
+ * list will remain unchanged.
+ */
+struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_entry(*iter, struct netdev_adjacent, list);
+
+ if (&lower->list == &dev->all_adj_list.lower)
+ return NULL;
+
+ *iter = lower->list.next;
+
+ return lower->dev;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next);
+
+/**
+ * netdev_all_lower_get_next_rcu - Get the next device from all
+ * lower neighbour list, RCU variant
+ * @dev: device
+ * @iter: list_head ** of the current position
+ *
+ * Gets the next netdev_adjacent from the dev's all lower neighbour
+ * list, starting from iter position. The caller must hold RCU read lock.
+ */
+struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev,
+ struct list_head **iter)
+{
+ struct netdev_adjacent *lower;
+
+ lower = list_first_or_null_rcu(&dev->all_adj_list.lower,
+ struct netdev_adjacent, list);
+
+ return lower ? lower->dev : NULL;
+}
+EXPORT_SYMBOL(netdev_all_lower_get_next_rcu);
+
+/**
* netdev_lower_get_first_private_rcu - Get the first ->private from the
* lower neighbour list, RCU
* variant
@@ -5919,7 +5961,7 @@ static void netdev_adjacent_add_links(struct net_device *dev)
struct net *net = dev_net(dev);
list_for_each_entry(iter, &dev->adj_list.upper, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_add(iter->dev, dev,
&iter->dev->adj_list.lower);
@@ -5928,7 +5970,7 @@ static void netdev_adjacent_add_links(struct net_device *dev)
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_add(iter->dev, dev,
&iter->dev->adj_list.upper);
@@ -5944,7 +5986,7 @@ static void netdev_adjacent_del_links(struct net_device *dev)
struct net *net = dev_net(dev);
list_for_each_entry(iter, &dev->adj_list.upper, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, dev->name,
&iter->dev->adj_list.lower);
@@ -5953,7 +5995,7 @@ static void netdev_adjacent_del_links(struct net_device *dev)
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, dev->name,
&iter->dev->adj_list.upper);
@@ -5969,7 +6011,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
struct net *net = dev_net(dev);
list_for_each_entry(iter, &dev->adj_list.upper, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, oldname,
&iter->dev->adj_list.lower);
@@ -5978,7 +6020,7 @@ void netdev_adjacent_rename_links(struct net_device *dev, char *oldname)
}
list_for_each_entry(iter, &dev->adj_list.lower, list) {
- if (!net_eq(net,dev_net(iter->dev)))
+ if (!net_eq(net, dev_net(iter->dev)))
continue;
netdev_adjacent_sysfs_del(iter->dev, oldname,
&iter->dev->adj_list.upper);
@@ -6046,6 +6088,50 @@ void netdev_lower_state_changed(struct net_device *lower_dev,
}
EXPORT_SYMBOL(netdev_lower_state_changed);
+int netdev_default_l2upper_neigh_construct(struct net_device *dev,
+ struct neighbour *n)
+{
+ struct net_device *lower_dev, *stop_dev;
+ struct list_head *iter;
+ int err;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (!lower_dev->netdev_ops->ndo_neigh_construct)
+ continue;
+ err = lower_dev->netdev_ops->ndo_neigh_construct(lower_dev, n);
+ if (err) {
+ stop_dev = lower_dev;
+ goto rollback;
+ }
+ }
+ return 0;
+
+rollback:
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (lower_dev == stop_dev)
+ break;
+ if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+ continue;
+ lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+ }
+ return err;
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_construct);
+
+void netdev_default_l2upper_neigh_destroy(struct net_device *dev,
+ struct neighbour *n)
+{
+ struct net_device *lower_dev;
+ struct list_head *iter;
+
+ netdev_for_each_lower_dev(dev, lower_dev, iter) {
+ if (!lower_dev->netdev_ops->ndo_neigh_destroy)
+ continue;
+ lower_dev->netdev_ops->ndo_neigh_destroy(lower_dev, n);
+ }
+}
+EXPORT_SYMBOL_GPL(netdev_default_l2upper_neigh_destroy);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -6530,6 +6616,38 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down)
EXPORT_SYMBOL(dev_change_proto_down);
/**
+ * dev_change_xdp_fd - set or clear a bpf program for a device rx path
+ * @dev: device
+ * @fd: new program fd or negative value to clear
+ *
+ * Set or clear a bpf program for a device
+ */
+int dev_change_xdp_fd(struct net_device *dev, int fd)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ struct bpf_prog *prog = NULL;
+ struct netdev_xdp xdp = {};
+ int err;
+
+ if (!ops->ndo_xdp)
+ return -EOPNOTSUPP;
+ if (fd >= 0) {
+ prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP);
+ if (IS_ERR(prog))
+ return PTR_ERR(prog);
+ }
+
+ xdp.command = XDP_SETUP_PROG;
+ xdp.prog = prog;
+ err = ops->ndo_xdp(dev, &xdp);
+ if (err < 0 && prog)
+ bpf_prog_put(prog);
+
+ return err;
+}
+EXPORT_SYMBOL(dev_change_xdp_fd);
+
+/**
* dev_new_index - allocate an ifindex
* @net: the applicable net namespace
*