From 5772e9a3463b264cee5a4e73ef586ad482d7ba48 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 1 Oct 2014 22:35:59 +0200 Subject: qdisc: bulk dequeue support for qdiscs with TCQ_F_ONETXQUEUE Based on DaveM's recent API work on dev_hard_start_xmit(), that allows sending/processing an entire skb list. This patch implements qdisc bulk dequeue, by allowing multiple packets to be dequeued in dequeue_skb(). The optimization principle for this is two fold, (1) to amortize locking cost and (2) avoid expensive tailptr update for notifying HW. (1) Several packets are dequeued while holding the qdisc root_lock, amortizing locking cost over several packet. The dequeued SKB list is processed under the TXQ lock in dev_hard_start_xmit(), thus also amortizing the cost of the TXQ lock. (2) Further more, dev_hard_start_xmit() will utilize the skb->xmit_more API to delay HW tailptr update, which also reduces the cost per packet. One restriction of the new API is that every SKB must belong to the same TXQ. This patch takes the easy way out, by restricting bulk dequeue to qdisc's with the TCQ_F_ONETXQUEUE flag, that specifies the qdisc only have attached a single TXQ. Some detail about the flow; dev_hard_start_xmit() will process the skb list, and transmit packets individually towards the driver (see xmit_one()). In case the driver stops midway in the list, the remaining skb list is returned by dev_hard_start_xmit(). In sch_direct_xmit() this returned list is requeued by dev_requeue_skb(). To avoid overshooting the HW limits, which results in requeuing, the patch limits the amount of bytes dequeued, based on the drivers BQL limits. In-effect bulking will only happen for BQL enabled drivers. Small amounts for extra HoL blocking (2x MTU/0.24ms) were measured at 100Mbit/s, with bulking 8 packets, but the oscillating nature of the measurement indicate something, like sched latency might be causing this effect. More comparisons show, that this oscillation goes away occationally. Thus, we disregard this artifact completely and remove any "magic" bulking limit. For now, as a conservative approach, stop bulking when seeing TSO and segmented GSO packets. They already benefit from bulking on their own. A followup patch add this, to allow easier bisect-ability for finding regressions. Jointed work with Hannes, Daniel and Florian. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/sch_generic.h | 16 ++++++++++++++++ net/sched/sch_generic.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f12669819d1a..d17ed6fb2f70 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -119,6 +120,21 @@ static inline void qdisc_run_end(struct Qdisc *qdisc) qdisc->__state &= ~__QDISC___STATE_RUNNING; } +static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) +{ + return qdisc->flags & TCQ_F_ONETXQUEUE; +} + +static inline int qdisc_avail_bulklimit(const struct netdev_queue *txq) +{ +#ifdef CONFIG_BQL + /* Non-BQL migrated drivers will return 0, too. */ + return dql_avail(&txq->dql); +#else + return 0; +#endif +} + static inline bool qdisc_is_throttled(const struct Qdisc *qdisc) { return test_bit(__QDISC_STATE_THROTTLED, &qdisc->state) ? true : false; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7c8e5d73d433..c2e87e63b832 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -56,6 +56,41 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) return 0; } +static struct sk_buff *try_bulk_dequeue_skb(struct Qdisc *q, + struct sk_buff *head_skb, + int bytelimit) +{ + struct sk_buff *skb, *tail_skb = head_skb; + + while (bytelimit > 0) { + /* For now, don't bulk dequeue GSO (or GSO segmented) pkts */ + if (tail_skb->next || skb_is_gso(tail_skb)) + break; + + skb = q->dequeue(q); + if (!skb) + break; + + bytelimit -= skb->len; /* covers GSO len */ + skb = validate_xmit_skb(skb, qdisc_dev(q)); + if (!skb) + break; + + /* "skb" can be a skb list after validate call above + * (GSO segmented), but it is okay to append it to + * current tail_skb->next, because next round will exit + * in-case "tail_skb->next" is a skb list. + */ + tail_skb->next = skb; + tail_skb = skb; + } + + return head_skb; +} + +/* Note that dequeue_skb can possibly return a SKB list (via skb->next). + * A requeued skb (via q->gso_skb) can also be a SKB list. + */ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { struct sk_buff *skb = q->gso_skb; @@ -70,10 +105,17 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q) } else skb = NULL; } else { - if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) { + if (!(q->flags & TCQ_F_ONETXQUEUE) || + !netif_xmit_frozen_or_stopped(txq)) { + int bytelimit = qdisc_avail_bulklimit(txq); + skb = q->dequeue(q); - if (skb) + if (skb) { + bytelimit -= skb->len; skb = validate_xmit_skb(skb, qdisc_dev(q)); + } + if (skb && qdisc_may_bulk(q)) + skb = try_bulk_dequeue_skb(q, skb, bytelimit); } } -- cgit v1.2.3-59-g8ed1b From 808e7ac0bdef31204184904f6b3ea356a30a9ed5 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Wed, 1 Oct 2014 22:36:09 +0200 Subject: qdisc: dequeue bulking also pickup GSO/TSO packets The TSO and GSO segmented packets already benefit from bulking on their own. The TSO packets have always taken advantage of the only updating the tailptr once for a large packet. The GSO segmented packets have recently taken advantage of bulking xmit_more API, via merge commit 53fda7f7f9e8 ("Merge branch 'xmit_list'"), specifically via commit 7f2e870f2a4 ("net: Move main gso loop out of dev_hard_start_xmit() into helper.") allowing qdisc requeue of remaining list. And via commit ce93718fb7cd ("net: Don't keep around original SKB when we software segment GSO frames."). This patch allow further bulking of TSO/GSO packets together, when dequeueing from the qdisc. Testing: Measuring HoL (Head-of-Line) blocking for TSO and GSO, with netperf-wrapper. Bulking several TSO show no performance regressions (requeues were in the area 32 requeues/sec). Bulking several GSOs does show small regression or very small improvement (requeues were in the area 8000 requeues/sec). Using ixgbe 10Gbit/s with GSO bulking, we can measure some additional latency. Base-case, which is "normal" GSO bulking, sees varying high-prio queue delay between 0.38ms to 0.47ms. Bulking several GSOs together, result in a stable high-prio queue delay of 0.50ms. Using igb at 100Mbit/s with GSO bulking, shows an improvement. Base-case sees varying high-prio queue delay between 2.23ms to 2.35ms Signed-off-by: David S. Miller --- net/sched/sch_generic.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index c2e87e63b832..797ebef73642 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -63,10 +63,6 @@ static struct sk_buff *try_bulk_dequeue_skb(struct Qdisc *q, struct sk_buff *skb, *tail_skb = head_skb; while (bytelimit > 0) { - /* For now, don't bulk dequeue GSO (or GSO segmented) pkts */ - if (tail_skb->next || skb_is_gso(tail_skb)) - break; - skb = q->dequeue(q); if (!skb) break; @@ -76,11 +72,9 @@ static struct sk_buff *try_bulk_dequeue_skb(struct Qdisc *q, if (!skb) break; - /* "skb" can be a skb list after validate call above - * (GSO segmented), but it is okay to append it to - * current tail_skb->next, because next round will exit - * in-case "tail_skb->next" is a skb list. - */ + while (tail_skb->next) /* GSO list goto tail */ + tail_skb = tail_skb->next; + tail_skb->next = skb; tail_skb = skb; } -- cgit v1.2.3-59-g8ed1b