aboutsummaryrefslogtreecommitdiffstats
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c171
1 files changed, 132 insertions, 39 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index ae00b894e675..d1706e88fbeb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -96,6 +96,7 @@
#include <linux/skbuff.h>
#include <net/net_namespace.h>
#include <net/sock.h>
+#include <net/busy_poll.h>
#include <linux/rtnetlink.h>
#include <linux/stat.h>
#include <net/dst.h>
@@ -182,8 +183,8 @@ EXPORT_SYMBOL(dev_base_lock);
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
-static unsigned int napi_gen_id;
-static DEFINE_HASHTABLE(napi_hash, 8);
+static unsigned int napi_gen_id = NR_CPUS;
+static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
static seqcount_t devnet_rename_seq;
@@ -3021,7 +3022,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
int queue_index = 0;
#ifdef CONFIG_XPS
- if (skb->sender_cpu == 0)
+ u32 sender_cpu = skb->sender_cpu - 1;
+
+ if (sender_cpu >= (u32)NR_CPUS)
skb->sender_cpu = raw_smp_processor_id() + 1;
#endif
@@ -4353,6 +4356,7 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
+ skb_mark_napi_id(skb, napi);
trace_napi_gro_receive_entry(skb);
skb_gro_reset_offset(skb);
@@ -4386,7 +4390,10 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
if (!skb) {
skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
- napi->skb = skb;
+ if (skb) {
+ napi->skb = skb;
+ skb_mark_napi_id(skb, napi);
+ }
}
return skb;
}
@@ -4661,7 +4668,7 @@ void napi_complete_done(struct napi_struct *n, int work_done)
EXPORT_SYMBOL(napi_complete_done);
/* must be called under rcu_read_lock(), as we dont take a reference */
-struct napi_struct *napi_by_id(unsigned int napi_id)
+static struct napi_struct *napi_by_id(unsigned int napi_id)
{
unsigned int hash = napi_id % HASH_SIZE(napi_hash);
struct napi_struct *napi;
@@ -4672,43 +4679,101 @@ struct napi_struct *napi_by_id(unsigned int napi_id)
return NULL;
}
-EXPORT_SYMBOL_GPL(napi_by_id);
-void napi_hash_add(struct napi_struct *napi)
+#if defined(CONFIG_NET_RX_BUSY_POLL)
+#define BUSY_POLL_BUDGET 8
+bool sk_busy_loop(struct sock *sk, int nonblock)
{
- if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
+ unsigned long end_time = !nonblock ? sk_busy_loop_end_time(sk) : 0;
+ int (*busy_poll)(struct napi_struct *dev);
+ struct napi_struct *napi;
+ int rc = false;
- spin_lock(&napi_hash_lock);
+ rcu_read_lock();
- /* 0 is not a valid id, we also skip an id that is taken
- * we expect both events to be extremely rare
- */
- napi->napi_id = 0;
- while (!napi->napi_id) {
- napi->napi_id = ++napi_gen_id;
- if (napi_by_id(napi->napi_id))
- napi->napi_id = 0;
+ napi = napi_by_id(sk->sk_napi_id);
+ if (!napi)
+ goto out;
+
+ /* Note: ndo_busy_poll method is optional in linux-4.5 */
+ busy_poll = napi->dev->netdev_ops->ndo_busy_poll;
+
+ do {
+ rc = 0;
+ local_bh_disable();
+ if (busy_poll) {
+ rc = busy_poll(napi);
+ } else if (napi_schedule_prep(napi)) {
+ void *have = netpoll_poll_lock(napi);
+
+ if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+ rc = napi->poll(napi, BUSY_POLL_BUDGET);
+ trace_napi_poll(napi);
+ if (rc == BUSY_POLL_BUDGET) {
+ napi_complete_done(napi, rc);
+ napi_schedule(napi);
+ }
+ }
+ netpoll_poll_unlock(have);
}
+ if (rc > 0)
+ NET_ADD_STATS_BH(sock_net(sk),
+ LINUX_MIB_BUSYPOLLRXPACKETS, rc);
+ local_bh_enable();
- hlist_add_head_rcu(&napi->napi_hash_node,
- &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+ if (rc == LL_FLUSH_FAILED)
+ break; /* permanent failure */
- spin_unlock(&napi_hash_lock);
- }
+ cpu_relax();
+ } while (!nonblock && skb_queue_empty(&sk->sk_receive_queue) &&
+ !need_resched() && !busy_loop_timeout(end_time));
+
+ rc = !skb_queue_empty(&sk->sk_receive_queue);
+out:
+ rcu_read_unlock();
+ return rc;
+}
+EXPORT_SYMBOL(sk_busy_loop);
+
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+void napi_hash_add(struct napi_struct *napi)
+{
+ if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) ||
+ test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
+ return;
+
+ spin_lock(&napi_hash_lock);
+
+ /* 0..NR_CPUS+1 range is reserved for sender_cpu use */
+ do {
+ if (unlikely(++napi_gen_id < NR_CPUS + 1))
+ napi_gen_id = NR_CPUS + 1;
+ } while (napi_by_id(napi_gen_id));
+ napi->napi_id = napi_gen_id;
+
+ hlist_add_head_rcu(&napi->napi_hash_node,
+ &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+
+ spin_unlock(&napi_hash_lock);
}
EXPORT_SYMBOL_GPL(napi_hash_add);
/* Warning : caller is responsible to make sure rcu grace period
* is respected before freeing memory containing @napi
*/
-void napi_hash_del(struct napi_struct *napi)
+bool napi_hash_del(struct napi_struct *napi)
{
+ bool rcu_sync_needed = false;
+
spin_lock(&napi_hash_lock);
- if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state))
+ if (test_and_clear_bit(NAPI_STATE_HASHED, &napi->state)) {
+ rcu_sync_needed = true;
hlist_del_rcu(&napi->napi_hash_node);
-
+ }
spin_unlock(&napi_hash_lock);
+ return rcu_sync_needed;
}
EXPORT_SYMBOL_GPL(napi_hash_del);
@@ -4744,6 +4809,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
napi->poll_owner = -1;
#endif
set_bit(NAPI_STATE_SCHED, &napi->state);
+ napi_hash_add(napi);
}
EXPORT_SYMBOL(netif_napi_add);
@@ -4763,8 +4829,12 @@ void napi_disable(struct napi_struct *n)
}
EXPORT_SYMBOL(napi_disable);
+/* Must be called in process context */
void netif_napi_del(struct napi_struct *napi)
{
+ might_sleep();
+ if (napi_hash_del(napi))
+ synchronize_net();
list_del_init(&napi->dev_list);
napi_free_frags(napi);
@@ -5351,7 +5421,7 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev,
static int __netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev, bool master,
- void *private)
+ void *upper_priv, void *upper_info)
{
struct netdev_notifier_changeupper_info changeupper_info;
struct netdev_adjacent *i, *j, *to_i, *to_j;
@@ -5375,6 +5445,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
changeupper_info.upper_dev = upper_dev;
changeupper_info.master = master;
changeupper_info.linking = true;
+ changeupper_info.upper_info = upper_info;
ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
&changeupper_info.info);
@@ -5382,7 +5453,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (ret)
return ret;
- ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
+ ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, upper_priv,
master);
if (ret)
return ret;
@@ -5420,8 +5491,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
goto rollback_lower_mesh;
}
- call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
- &changeupper_info.info);
+ ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev,
+ &changeupper_info.info);
+ ret = notifier_to_errno(ret);
+ if (ret)
+ goto rollback_lower_mesh;
+
return 0;
rollback_lower_mesh:
@@ -5475,7 +5550,7 @@ rollback_mesh:
int netdev_upper_dev_link(struct net_device *dev,
struct net_device *upper_dev)
{
- return __netdev_upper_dev_link(dev, upper_dev, false, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL);
}
EXPORT_SYMBOL(netdev_upper_dev_link);
@@ -5483,6 +5558,8 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* netdev_master_upper_dev_link - Add a master link to the upper device
* @dev: device
* @upper_dev: new upper device
+ * @upper_priv: upper device private
+ * @upper_info: upper info to be passed down via notifier
*
* Adds a link to device which is upper to this one. In this case, only
* one master upper device can be linked, although other non-master devices
@@ -5491,20 +5568,14 @@ EXPORT_SYMBOL(netdev_upper_dev_link);
* counts are adjusted and the function returns zero.
*/
int netdev_master_upper_dev_link(struct net_device *dev,
- struct net_device *upper_dev)
+ struct net_device *upper_dev,
+ void *upper_priv, void *upper_info)
{
- return __netdev_upper_dev_link(dev, upper_dev, true, NULL);
+ return __netdev_upper_dev_link(dev, upper_dev, true,
+ upper_priv, upper_info);
}
EXPORT_SYMBOL(netdev_master_upper_dev_link);
-int netdev_master_upper_dev_link_private(struct net_device *dev,
- struct net_device *upper_dev,
- void *private)
-{
- return __netdev_upper_dev_link(dev, upper_dev, true, private);
-}
-EXPORT_SYMBOL(netdev_master_upper_dev_link_private);
-
/**
* netdev_upper_dev_unlink - Removes a link to upper device
* @dev: device
@@ -5685,6 +5756,26 @@ int dev_get_nest_level(struct net_device *dev,
}
EXPORT_SYMBOL(dev_get_nest_level);
+/**
+ * netdev_lower_change - Dispatch event about lower device state change
+ * @lower_dev: device
+ * @lower_state_info: state to dispatch
+ *
+ * Send NETDEV_CHANGELOWERSTATE to netdev notifiers with info.
+ * The caller must hold the RTNL lock.
+ */
+void netdev_lower_state_changed(struct net_device *lower_dev,
+ void *lower_state_info)
+{
+ struct netdev_notifier_changelowerstate_info changelowerstate_info;
+
+ ASSERT_RTNL();
+ changelowerstate_info.lower_state_info = lower_state_info;
+ call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev,
+ &changelowerstate_info.info);
+}
+EXPORT_SYMBOL(netdev_lower_state_changed);
+
static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
@@ -7164,11 +7255,13 @@ EXPORT_SYMBOL(alloc_netdev_mqs);
* This function does the last stage of destroying an allocated device
* interface. The reference to the device object is released.
* If this is the last reference then it will be freed.
+ * Must be called in process context.
*/
void free_netdev(struct net_device *dev)
{
struct napi_struct *p, *n;
+ might_sleep();
netif_free_tx_queues(dev);
#ifdef CONFIG_SYSFS
kvfree(dev->_rx);