aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched/sch_generic.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched/sch_generic.c')
-rw-r--r--net/sched/sch_generic.c545
1 files changed, 369 insertions, 176 deletions
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6c9595f1048a..a9aadc4e6858 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -35,6 +35,27 @@
const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
EXPORT_SYMBOL(default_qdisc_ops);
+static void qdisc_maybe_clear_missed(struct Qdisc *q,
+ const struct netdev_queue *txq)
+{
+ clear_bit(__QDISC_STATE_MISSED, &q->state);
+
+ /* Make sure the below netif_xmit_frozen_or_stopped()
+ * checking happens after clearing STATE_MISSED.
+ */
+ smp_mb__after_atomic();
+
+ /* Checking netif_xmit_frozen_or_stopped() again to
+ * make sure STATE_MISSED is set if the STATE_MISSED
+ * set by netif_tx_wake_queue()'s rescheduling of
+ * net_tx_action() is cleared by the above clear_bit().
+ */
+ if (!netif_xmit_frozen_or_stopped(txq))
+ set_bit(__QDISC_STATE_MISSED, &q->state);
+ else
+ set_bit(__QDISC_STATE_DRAINING, &q->state);
+}
+
/* Main transmission queue. */
/* Modifications to data participating in scheduling must be protected with
@@ -74,6 +95,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
}
} else {
skb = SKB_XOFF_MAGIC;
+ qdisc_maybe_clear_missed(q, txq);
}
}
@@ -144,9 +166,13 @@ static inline void dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
skb = next;
}
- if (lock)
+
+ if (lock) {
spin_unlock(lock);
- __netif_schedule(q);
+ set_bit(__QDISC_STATE_MISSED, &q->state);
+ } else {
+ __netif_schedule(q);
+ }
}
static void try_bulk_dequeue_skb(struct Qdisc *q,
@@ -242,6 +268,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
}
} else {
skb = NULL;
+ qdisc_maybe_clear_missed(q, txq);
}
if (lock)
spin_unlock(lock);
@@ -251,8 +278,10 @@ validate:
*validate = true;
if ((q->flags & TCQ_F_ONETXQUEUE) &&
- netif_xmit_frozen_or_stopped(txq))
+ netif_xmit_frozen_or_stopped(txq)) {
+ qdisc_maybe_clear_missed(q, txq);
return skb;
+ }
skb = qdisc_dequeue_skb_bad_txq(q);
if (unlikely(skb)) {
@@ -275,8 +304,8 @@ trace:
/*
* Transmit possibly several skbs, and handle the return status as
- * required. Owning running seqcount bit guarantees that
- * only one CPU can execute this function.
+ * required. Owning qdisc running bit guarantees that only one CPU
+ * can execute this function.
*
* Returns to the caller:
* false - hardware queue frozen backoff
@@ -311,6 +340,8 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))
skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+ else
+ qdisc_maybe_clear_missed(q, txq);
HARD_TX_UNLOCK(dev, txq);
} else {
@@ -378,13 +409,17 @@ static inline bool qdisc_restart(struct Qdisc *q, int *packets)
void __qdisc_run(struct Qdisc *q)
{
- int quota = dev_tx_weight;
+ int quota = READ_ONCE(dev_tx_weight);
int packets;
while (qdisc_restart(q, &packets)) {
quota -= packets;
if (quota <= 0) {
- __netif_schedule(q);
+ if (q->flags & TCQ_F_NOLOCK)
+ set_bit(__QDISC_STATE_MISSED, &q->state);
+ else
+ __netif_schedule(q);
+
break;
}
}
@@ -392,16 +427,12 @@ void __qdisc_run(struct Qdisc *q)
unsigned long dev_trans_start(struct net_device *dev)
{
- unsigned long val, res;
+ unsigned long res = READ_ONCE(netdev_get_tx_queue(dev, 0)->trans_start);
+ unsigned long val;
unsigned int i;
- if (is_vlan_dev(dev))
- dev = vlan_dev_real_dev(dev);
- else if (netif_is_macvlan(dev))
- dev = macvlan_dev_real_dev(dev);
- res = netdev_get_tx_queue(dev, 0)->trans_start;
for (i = 1; i < dev->num_tx_queues; i++) {
- val = netdev_get_tx_queue(dev, i)->trans_start;
+ val = READ_ONCE(netdev_get_tx_queue(dev, i)->trans_start);
if (val && time_after(val, res))
res = val;
}
@@ -410,11 +441,63 @@ unsigned long dev_trans_start(struct net_device *dev)
}
EXPORT_SYMBOL(dev_trans_start);
+static void netif_freeze_queues(struct net_device *dev)
+{
+ unsigned int i;
+ int cpu;
+
+ cpu = smp_processor_id();
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+ /* We are the only thread of execution doing a
+ * freeze, but we have to grab the _xmit_lock in
+ * order to synchronize with threads which are in
+ * the ->hard_start_xmit() handler and already
+ * checked the frozen bit.
+ */
+ __netif_tx_lock(txq, cpu);
+ set_bit(__QUEUE_STATE_FROZEN, &txq->state);
+ __netif_tx_unlock(txq);
+ }
+}
+
+void netif_tx_lock(struct net_device *dev)
+{
+ spin_lock(&dev->tx_global_lock);
+ netif_freeze_queues(dev);
+}
+EXPORT_SYMBOL(netif_tx_lock);
+
+static void netif_unfreeze_queues(struct net_device *dev)
+{
+ unsigned int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+
+ /* No need to grab the _xmit_lock here. If the
+ * queue is not stopped for another reason, we
+ * force a schedule.
+ */
+ clear_bit(__QUEUE_STATE_FROZEN, &txq->state);
+ netif_schedule_queue(txq);
+ }
+}
+
+void netif_tx_unlock(struct net_device *dev)
+{
+ netif_unfreeze_queues(dev);
+ spin_unlock(&dev->tx_global_lock);
+}
+EXPORT_SYMBOL(netif_tx_unlock);
+
static void dev_watchdog(struct timer_list *t)
{
struct net_device *dev = from_timer(dev, t, watchdog_timer);
+ bool release = true;
- netif_tx_lock(dev);
+ spin_lock(&dev->tx_global_lock);
if (!qdisc_tx_is_noop(dev)) {
if (netif_device_present(dev) &&
netif_running(dev) &&
@@ -427,31 +510,34 @@ static void dev_watchdog(struct timer_list *t)
struct netdev_queue *txq;
txq = netdev_get_tx_queue(dev, i);
- trans_start = txq->trans_start;
+ trans_start = READ_ONCE(txq->trans_start);
if (netif_xmit_stopped(txq) &&
time_after(jiffies, (trans_start +
dev->watchdog_timeo))) {
some_queue_timedout = 1;
- txq->trans_timeout++;
+ atomic_long_inc(&txq->trans_timeout);
break;
}
}
- if (some_queue_timedout) {
+ if (unlikely(some_queue_timedout)) {
trace_net_dev_xmit_timeout(dev, i);
WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
dev->name, netdev_drivername(dev), i);
+ netif_freeze_queues(dev);
dev->netdev_ops->ndo_tx_timeout(dev, i);
+ netif_unfreeze_queues(dev);
}
if (!mod_timer(&dev->watchdog_timer,
round_jiffies(jiffies +
dev->watchdog_timeo)))
- dev_hold(dev);
+ release = false;
}
}
- netif_tx_unlock(dev);
+ spin_unlock(&dev->tx_global_lock);
- dev_put(dev);
+ if (release)
+ netdev_put(dev, &dev->watchdog_dev_tracker);
}
void __netdev_watchdog_up(struct net_device *dev)
@@ -461,9 +547,11 @@ void __netdev_watchdog_up(struct net_device *dev)
dev->watchdog_timeo = 5*HZ;
if (!mod_timer(&dev->watchdog_timer,
round_jiffies(jiffies + dev->watchdog_timeo)))
- dev_hold(dev);
+ netdev_hold(dev, &dev->watchdog_dev_tracker,
+ GFP_ATOMIC);
}
}
+EXPORT_SYMBOL_GPL(__netdev_watchdog_up);
static void dev_watchdog_up(struct net_device *dev)
{
@@ -474,7 +562,7 @@ static void dev_watchdog_down(struct net_device *dev)
{
netif_tx_lock_bh(dev);
if (del_timer(&dev->watchdog_timer))
- dev_put(dev);
+ netdev_put(dev, &dev->watchdog_dev_tracker);
netif_tx_unlock_bh(dev);
}
@@ -514,6 +602,24 @@ void netif_carrier_off(struct net_device *dev)
}
EXPORT_SYMBOL(netif_carrier_off);
+/**
+ * netif_carrier_event - report carrier state event
+ * @dev: network device
+ *
+ * Device has detected a carrier event but the carrier state wasn't changed.
+ * Use in drivers when querying carrier state asynchronously, to avoid missing
+ * events (link flaps) if link recovers before it's queried.
+ */
+void netif_carrier_event(struct net_device *dev)
+{
+ if (dev->reg_state == NETREG_UNINITIALIZED)
+ return;
+ atomic_inc(&dev->carrier_up_count);
+ atomic_inc(&dev->carrier_down_count);
+ linkwatch_fire_event(dev);
+}
+EXPORT_SYMBOL_GPL(netif_carrier_event);
+
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
under all circumstances. It is difficult to invent anything faster or
cheaper.
@@ -552,7 +658,6 @@ struct Qdisc noop_qdisc = {
.ops = &noop_qdisc_ops,
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
- .running = SEQCNT_ZERO(noop_qdisc.running),
.busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
.gso_skb = {
.next = (struct sk_buff *)&noop_qdisc.gso_skb,
@@ -639,8 +744,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
{
struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
struct sk_buff *skb = NULL;
+ bool need_retry = true;
int band;
+retry:
for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
struct skb_array *q = band2list(priv, band);
@@ -651,8 +758,24 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
}
if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb);
- } else {
- WRITE_ONCE(qdisc->empty, true);
+ } else if (need_retry &&
+ READ_ONCE(qdisc->state) & QDISC_STATE_NON_EMPTY) {
+ /* Delay clearing the STATE_MISSED here to reduce
+ * the overhead of the second spin_trylock() in
+ * qdisc_run_begin() and __netif_schedule() calling
+ * in qdisc_run_end().
+ */
+ clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
+
+ /* Make sure dequeuing happens after clearing
+ * STATE_MISSED.
+ */
+ smp_mb__after_atomic();
+
+ need_retry = false;
+
+ goto retry;
}
return skb;
@@ -794,13 +917,14 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
};
EXPORT_SYMBOL(pfifo_fast_ops);
+static struct lock_class_key qdisc_tx_busylock;
+
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
const struct Qdisc_ops *ops,
struct netlink_ext_ack *extack)
{
- void *p;
struct Qdisc *sch;
- unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
+ unsigned int size = sizeof(*sch) + ops->priv_size;
int err = -ENOBUFS;
struct net_device *dev;
@@ -811,30 +935,18 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
}
dev = dev_queue->dev;
- p = kzalloc_node(size, GFP_KERNEL,
- netdev_queue_numa_node_read(dev_queue));
+ sch = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue));
- if (!p)
+ if (!sch)
goto errout;
- sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
- /* if we got non aligned memory, ask more and do alignment ourself */
- if (sch != p) {
- kfree(p);
- p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
- netdev_queue_numa_node_read(dev_queue));
- if (!p)
- goto errout;
- sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
- sch->padded = (char *) sch - (char *) p;
- }
__skb_queue_head_init(&sch->gso_skb);
__skb_queue_head_init(&sch->skb_bad_txq);
- qdisc_skb_head_init(&sch->q);
+ gnet_stats_basic_sync_init(&sch->bstats);
spin_lock_init(&sch->q.lock);
if (ops->static_flags & TCQ_F_CPUSTATS) {
sch->cpu_bstats =
- netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ netdev_alloc_pcpu_stats(struct gnet_stats_basic_sync);
if (!sch->cpu_bstats)
goto errout1;
@@ -846,28 +958,25 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
}
spin_lock_init(&sch->busylock);
+ lockdep_set_class(&sch->busylock,
+ dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
/* seqlock has the same scope of busylock, for NOLOCK qdisc */
spin_lock_init(&sch->seqlock);
- seqcount_init(&sch->running);
+ lockdep_set_class(&sch->seqlock,
+ dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
sch->ops = ops;
sch->flags = ops->static_flags;
sch->enqueue = ops->enqueue;
sch->dequeue = ops->dequeue;
sch->dev_queue = dev_queue;
- sch->empty = true;
- dev_hold(dev);
+ netdev_hold(dev, &sch->dev_tracker, GFP_KERNEL);
refcount_set(&sch->refcnt, 1);
- if (sch != &noop_qdisc) {
- lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key);
- lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key);
- lockdep_set_class(&sch->running, &dev->qdisc_running_key);
- }
-
return sch;
errout1:
- kfree(p);
+ kfree(sch);
errout:
return ERR_PTR(err);
}
@@ -891,8 +1000,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
}
sch->parent = parentid;
- if (!ops->init || ops->init(sch, NULL, extack) == 0)
+ if (!ops->init || ops->init(sch, NULL, extack) == 0) {
+ trace_qdisc_create(ops, dev_queue->dev, parentid);
return sch;
+ }
qdisc_put(sch);
return NULL;
@@ -904,20 +1015,14 @@ EXPORT_SYMBOL(qdisc_create_dflt);
void qdisc_reset(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
- struct sk_buff *skb, *tmp;
+
+ trace_qdisc_reset(qdisc);
if (ops->reset)
ops->reset(qdisc);
- skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
- __skb_unlink(skb, &qdisc->gso_skb);
- kfree_skb_list(skb);
- }
-
- skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
- __skb_unlink(skb, &qdisc->skb_bad_txq);
- kfree_skb_list(skb);
- }
+ __skb_queue_purge(&qdisc->gso_skb);
+ __skb_queue_purge(&qdisc->skb_bad_txq);
qdisc->q.qlen = 0;
qdisc->qstats.backlog = 0;
@@ -931,7 +1036,7 @@ void qdisc_free(struct Qdisc *qdisc)
free_percpu(qdisc->cpu_qstats);
}
- kfree((char *) qdisc - qdisc->padded);
+ kfree(qdisc);
}
static void qdisc_free_cb(struct rcu_head *head)
@@ -944,7 +1049,6 @@ static void qdisc_free_cb(struct rcu_head *head)
static void qdisc_destroy(struct Qdisc *qdisc)
{
const struct Qdisc_ops *ops = qdisc->ops;
- struct sk_buff *skb, *tmp;
#ifdef CONFIG_NET_SCHED
qdisc_hash_del(qdisc);
@@ -952,23 +1056,16 @@ static void qdisc_destroy(struct Qdisc *qdisc)
qdisc_put_stab(rtnl_dereference(qdisc->stab));
#endif
gen_kill_estimator(&qdisc->rate_est);
- if (ops->reset)
- ops->reset(qdisc);
+
+ qdisc_reset(qdisc);
+
if (ops->destroy)
ops->destroy(qdisc);
module_put(ops->owner);
- dev_put(qdisc_dev(qdisc));
-
- skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
- __skb_unlink(skb, &qdisc->gso_skb);
- kfree_skb_list(skb);
- }
+ netdev_put(qdisc_dev(qdisc), &qdisc->dev_tracker);
- skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
- __skb_unlink(skb, &qdisc->skb_bad_txq);
- kfree_skb_list(skb);
- }
+ trace_qdisc_destroy(qdisc);
call_rcu(&qdisc->rcu, qdisc_free_cb);
}
@@ -1024,6 +1121,21 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
}
EXPORT_SYMBOL(dev_graft_qdisc);
+static void shutdown_scheduler_queue(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_qdisc_default)
+{
+ struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
+ struct Qdisc *qdisc_default = _qdisc_default;
+
+ if (qdisc) {
+ rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+ dev_queue->qdisc_sleeping = qdisc_default;
+
+ qdisc_put(qdisc);
+ }
+}
+
static void attach_one_default_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_unused)
@@ -1037,10 +1149,9 @@ static void attach_one_default_qdisc(struct net_device *dev,
ops = &pfifo_fast_ops;
qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
- if (!qdisc) {
- netdev_info(dev, "activation failed\n");
+ if (!qdisc)
return;
- }
+
if (!netif_is_multiqueue(dev))
qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
dev_queue->qdisc_sleeping = qdisc;
@@ -1056,18 +1167,34 @@ static void attach_default_qdiscs(struct net_device *dev)
if (!netif_is_multiqueue(dev) ||
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
- dev->qdisc = txq->qdisc_sleeping;
- qdisc_refcount_inc(dev->qdisc);
+ qdisc = txq->qdisc_sleeping;
+ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc_refcount_inc(qdisc);
} else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT, NULL);
if (qdisc) {
- dev->qdisc = qdisc;
+ rcu_assign_pointer(dev->qdisc, qdisc);
qdisc->ops->attach(qdisc);
}
}
+ qdisc = rtnl_dereference(dev->qdisc);
+
+ /* Detect default qdisc setup/init failed and fallback to "noqueue" */
+ if (qdisc == &noop_qdisc) {
+ netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
+ default_qdisc_ops->id, noqueue_qdisc_ops.id);
+ netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
+ dev->priv_flags |= IFF_NO_QUEUE;
+ netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
+ qdisc = txq->qdisc_sleeping;
+ rcu_assign_pointer(dev->qdisc, qdisc);
+ qdisc_refcount_inc(qdisc);
+ dev->priv_flags ^= IFF_NO_QUEUE;
+ }
+
#ifdef CONFIG_NET_SCHED
- if (dev->qdisc != &noop_qdisc)
- qdisc_hash_add(dev->qdisc, false);
+ if (qdisc != &noop_qdisc)
+ qdisc_hash_add(qdisc, false);
#endif
}
@@ -1083,7 +1210,7 @@ static void transition_one_qdisc(struct net_device *dev,
rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
if (need_watchdog_p) {
- dev_queue->trans_start = 0;
+ WRITE_ONCE(dev_queue->trans_start, 0);
*need_watchdog_p = 1;
}
}
@@ -1097,7 +1224,7 @@ void dev_activate(struct net_device *dev)
* and noqueue_qdisc for virtual interfaces
*/
- if (dev->qdisc == &noop_qdisc)
+ if (rtnl_dereference(dev->qdisc) == &noop_qdisc)
attach_default_qdiscs(dev);
if (!netif_carrier_ok(dev))
@@ -1116,6 +1243,14 @@ void dev_activate(struct net_device *dev)
}
EXPORT_SYMBOL(dev_activate);
+static void qdisc_deactivate(struct Qdisc *qdisc)
+{
+ if (qdisc->flags & TCQ_F_BUILTIN)
+ return;
+
+ set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+}
+
static void dev_deactivate_queue(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_qdisc_default)
@@ -1125,21 +1260,35 @@ static void dev_deactivate_queue(struct net_device *dev,
qdisc = rtnl_dereference(dev_queue->qdisc);
if (qdisc) {
- bool nolock = qdisc->flags & TCQ_F_NOLOCK;
+ qdisc_deactivate(qdisc);
+ rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
+ }
+}
- if (nolock)
- spin_lock_bh(&qdisc->seqlock);
- spin_lock_bh(qdisc_lock(qdisc));
+static void dev_reset_queue(struct net_device *dev,
+ struct netdev_queue *dev_queue,
+ void *_unused)
+{
+ struct Qdisc *qdisc;
+ bool nolock;
- if (!(qdisc->flags & TCQ_F_BUILTIN))
- set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+ qdisc = dev_queue->qdisc_sleeping;
+ if (!qdisc)
+ return;
- rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
- qdisc_reset(qdisc);
+ nolock = qdisc->flags & TCQ_F_NOLOCK;
- spin_unlock_bh(qdisc_lock(qdisc));
- if (nolock)
- spin_unlock_bh(&qdisc->seqlock);
+ if (nolock)
+ spin_lock_bh(&qdisc->seqlock);
+ spin_lock_bh(qdisc_lock(qdisc));
+
+ qdisc_reset(qdisc);
+
+ spin_unlock_bh(qdisc_lock(qdisc));
+ if (nolock) {
+ clear_bit(__QDISC_STATE_MISSED, &qdisc->state);
+ clear_bit(__QDISC_STATE_DRAINING, &qdisc->state);
+ spin_unlock_bh(&qdisc->seqlock);
}
}
@@ -1170,16 +1319,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
return false;
}
-static void dev_qdisc_reset(struct net_device *dev,
- struct netdev_queue *dev_queue,
- void *none)
-{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
-
- if (qdisc)
- qdisc_reset(qdisc);
-}
-
/**
* dev_deactivate_many - deactivate transmissions on several devices
* @head: list of devices to deactivate
@@ -1201,12 +1340,20 @@ void dev_deactivate_many(struct list_head *head)
dev_watchdog_down(dev);
}
- /* Wait for outstanding qdisc-less dev_queue_xmit calls.
+ /* Wait for outstanding qdisc-less dev_queue_xmit calls or
+ * outstanding qdisc enqueuing calls.
* This is avoided if all devices are in dismantle phase :
* Caller will call synchronize_net() for us
*/
synchronize_net();
+ list_for_each_entry(dev, head, close_list) {
+ netdev_for_each_tx_queue(dev, dev_reset_queue, NULL);
+
+ if (dev_ingress_queue(dev))
+ dev_reset_queue(dev, dev_ingress_queue(dev), NULL);
+ }
+
/* Wait for outstanding qdisc_run calls. */
list_for_each_entry(dev, head, close_list) {
while (some_qdisc_is_busy(dev)) {
@@ -1216,12 +1363,6 @@ void dev_deactivate_many(struct list_head *head)
*/
schedule_timeout_uninterruptible(1);
}
- /* The new qdisc is assigned at this point so we can safely
- * unwind stale skb lists and qdisc statistics
- */
- netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
- if (dev_ingress_queue(dev))
- dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
}
}
@@ -1246,6 +1387,39 @@ static int qdisc_change_tx_queue_len(struct net_device *dev,
return 0;
}
+void dev_qdisc_change_real_num_tx(struct net_device *dev,
+ unsigned int new_real_tx)
+{
+ struct Qdisc *qdisc = rtnl_dereference(dev->qdisc);
+
+ if (qdisc->ops->change_real_num_tx)
+ qdisc->ops->change_real_num_tx(qdisc, new_real_tx);
+}
+
+void mq_change_real_num_tx(struct Qdisc *sch, unsigned int new_real_tx)
+{
+#ifdef CONFIG_NET_SCHED
+ struct net_device *dev = qdisc_dev(sch);
+ struct Qdisc *qdisc;
+ unsigned int i;
+
+ for (i = new_real_tx; i < dev->real_num_tx_queues; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ /* Only update the default qdiscs we created,
+ * qdiscs with handles are always hashed.
+ */
+ if (qdisc != &noop_qdisc && !qdisc->handle)
+ qdisc_hash_del(qdisc);
+ }
+ for (i = dev->real_num_tx_queues; i < new_real_tx; i++) {
+ qdisc = netdev_get_tx_queue(dev, i)->qdisc_sleeping;
+ if (qdisc != &noop_qdisc && !qdisc->handle)
+ qdisc_hash_add(qdisc, false);
+ }
+#endif
+}
+EXPORT_SYMBOL(mq_change_real_num_tx);
+
int dev_qdisc_change_tx_queue_len(struct net_device *dev)
{
bool up = dev->flags & IFF_UP;
@@ -1280,7 +1454,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
void dev_init_scheduler(struct net_device *dev)
{
- dev->qdisc = &noop_qdisc;
+ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
@@ -1288,71 +1462,78 @@ void dev_init_scheduler(struct net_device *dev)
timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
}
-static void shutdown_scheduler_queue(struct net_device *dev,
- struct netdev_queue *dev_queue,
- void *_qdisc_default)
-{
- struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
- struct Qdisc *qdisc_default = _qdisc_default;
-
- if (qdisc) {
- rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
- dev_queue->qdisc_sleeping = qdisc_default;
-
- qdisc_put(qdisc);
- }
-}
-
void dev_shutdown(struct net_device *dev)
{
netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
if (dev_ingress_queue(dev))
shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
- qdisc_put(dev->qdisc);
- dev->qdisc = &noop_qdisc;
+ qdisc_put(rtnl_dereference(dev->qdisc));
+ rcu_assign_pointer(dev->qdisc, &noop_qdisc);
WARN_ON(timer_pending(&dev->watchdog_timer));
}
+/**
+ * psched_ratecfg_precompute__() - Pre-compute values for reciprocal division
+ * @rate: Rate to compute reciprocal division values of
+ * @mult: Multiplier for reciprocal division
+ * @shift: Shift for reciprocal division
+ *
+ * The multiplier and shift for reciprocal division by rate are stored
+ * in mult and shift.
+ *
+ * The deal here is to replace a divide by a reciprocal one
+ * in fast path (a reciprocal divide is a multiply and a shift)
+ *
+ * Normal formula would be :
+ * time_in_ns = (NSEC_PER_SEC * len) / rate_bps
+ *
+ * We compute mult/shift to use instead :
+ * time_in_ns = (len * mult) >> shift;
+ *
+ * We try to get the highest possible mult value for accuracy,
+ * but have to make sure no overflows will ever happen.
+ *
+ * reciprocal_value() is not used here it doesn't handle 64-bit values.
+ */
+static void psched_ratecfg_precompute__(u64 rate, u32 *mult, u8 *shift)
+{
+ u64 factor = NSEC_PER_SEC;
+
+ *mult = 1;
+ *shift = 0;
+
+ if (rate <= 0)
+ return;
+
+ for (;;) {
+ *mult = div64_u64(factor, rate);
+ if (*mult & (1U << 31) || factor & (1ULL << 63))
+ break;
+ factor <<= 1;
+ (*shift)++;
+ }
+}
+
void psched_ratecfg_precompute(struct psched_ratecfg *r,
const struct tc_ratespec *conf,
u64 rate64)
{
memset(r, 0, sizeof(*r));
r->overhead = conf->overhead;
+ r->mpu = conf->mpu;
r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
- r->mult = 1;
- /*
- * The deal here is to replace a divide by a reciprocal one
- * in fast path (a reciprocal divide is a multiply and a shift)
- *
- * Normal formula would be :
- * time_in_ns = (NSEC_PER_SEC * len) / rate_bps
- *
- * We compute mult/shift to use instead :
- * time_in_ns = (len * mult) >> shift;
- *
- * We try to get the highest possible mult value for accuracy,
- * but have to make sure no overflows will ever happen.
- */
- if (r->rate_bytes_ps > 0) {
- u64 factor = NSEC_PER_SEC;
-
- for (;;) {
- r->mult = div64_u64(factor, r->rate_bytes_ps);
- if (r->mult & (1U << 31) || factor & (1ULL << 63))
- break;
- factor <<= 1;
- r->shift++;
- }
- }
+ psched_ratecfg_precompute__(r->rate_bytes_ps, &r->mult, &r->shift);
}
EXPORT_SYMBOL(psched_ratecfg_precompute);
-static void mini_qdisc_rcu_func(struct rcu_head *head)
+void psched_ppscfg_precompute(struct psched_pktrate *r, u64 pktrate64)
{
+ r->rate_pkts_ps = pktrate64;
+ psched_ratecfg_precompute__(r->rate_pkts_ps, &r->mult, &r->shift);
}
+EXPORT_SYMBOL(psched_ppscfg_precompute);
void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
struct tcf_proto *tp_head)
@@ -1366,31 +1547,41 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
if (!tp_head) {
RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
- /* Wait for flying RCU callback before it is freed. */
- rcu_barrier();
- return;
- }
+ } else {
+ miniq = miniq_old != &miniqp->miniq1 ?
+ &miniqp->miniq1 : &miniqp->miniq2;
- miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
- &miniqp->miniq1 : &miniqp->miniq2;
+ /* We need to make sure that readers won't see the miniq
+ * we are about to modify. So ensure that at least one RCU
+ * grace period has elapsed since the miniq was made
+ * inactive.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ cond_synchronize_rcu(miniq->rcu_state);
+ else if (!poll_state_synchronize_rcu(miniq->rcu_state))
+ synchronize_rcu_expedited();
- /* We need to make sure that readers won't see the miniq
- * we are about to modify. So wait until previous call_rcu callback
- * is done.
- */
- rcu_barrier();
- miniq->filter_list = tp_head;
- rcu_assign_pointer(*miniqp->p_miniq, miniq);
+ miniq->filter_list = tp_head;
+ rcu_assign_pointer(*miniqp->p_miniq, miniq);
+ }
if (miniq_old)
- /* This is counterpart of the rcu barriers above. We need to
+ /* This is counterpart of the rcu sync above. We need to
* block potential new user of miniq_old until all readers
* are not seeing it.
*/
- call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func);
+ miniq_old->rcu_state = start_poll_synchronize_rcu();
}
EXPORT_SYMBOL(mini_qdisc_pair_swap);
+void mini_qdisc_pair_block_init(struct mini_Qdisc_pair *miniqp,
+ struct tcf_block *block)
+{
+ miniqp->miniq1.block = block;
+ miniqp->miniq2.block = block;
+}
+EXPORT_SYMBOL(mini_qdisc_pair_block_init);
+
void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
struct mini_Qdisc __rcu **p_miniq)
{
@@ -1398,6 +1589,8 @@ void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
+ miniqp->miniq1.rcu_state = get_state_synchronize_rcu();
+ miniqp->miniq2.rcu_state = miniqp->miniq1.rcu_state;
miniqp->p_miniq = p_miniq;
}
EXPORT_SYMBOL(mini_qdisc_pair_init);