aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig2
-rw-r--r--net/sched/act_api.c2
-rw-r--r--net/sched/cls_basic.c3
-rw-r--r--net/sched/em_meta.c295
-rw-r--r--net/sched/sch_dsmark.c16
-rw-r--r--net/sched/sch_netem.c209
6 files changed, 381 insertions, 146 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index b0941186f867..b22c9beb604d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -405,7 +405,7 @@ config NET_EMATCH_STACK
---help---
Size of the local stack variable used while evaluating the tree of
ematches. Limits the depth of the tree, i.e. the number of
- encapsulated precedences. Every level requires 4 bytes of addtional
+ encapsulated precedences. Every level requires 4 bytes of additional
stack space.
config NET_EMATCH_CMP
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index cafcb084098d..914c85ff8fe6 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -881,7 +881,7 @@ static int __init tc_action_init(void)
link_p[RTM_GETACTION-RTM_BASE].dumpit = tc_dump_action;
}
- printk("TC classifier action (bugs to netdev@oss.sgi.com cc "
+ printk("TC classifier action (bugs to netdev@vger.kernel.org cc "
"hadi@cyberus.ca)\n");
return 0;
}
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 0d2d4415f334..dfb300bb6baa 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -261,6 +261,9 @@ static int basic_dump(struct tcf_proto *tp, unsigned long fh,
rta = (struct rtattr *) b;
RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+ if (f->res.classid)
+ RTA_PUT(skb, TCA_BASIC_CLASSID, sizeof(u32), &f->res.classid);
+
if (tcf_exts_dump(skb, &f->exts, &basic_ext_map) < 0 ||
tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
goto rtattr_failure;
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index f1eeaf65cee5..48bb23c2a35a 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -32,7 +32,7 @@
* +-----------+ +-----------+
* | |
* ---> meta_ops[INT][INDEV](...) |
- * | |
+ * | |
* ----------- |
* V V
* +-----------+ +-----------+
@@ -70,6 +70,7 @@
#include <net/dst.h>
#include <net/route.h>
#include <net/pkt_cls.h>
+#include <net/sock.h>
struct meta_obj
{
@@ -284,6 +285,214 @@ META_COLLECTOR(int_rtiif)
}
/**************************************************************************
+ * Socket Attributes
+ **************************************************************************/
+
+#define SKIP_NONLOCAL(skb) \
+ if (unlikely(skb->sk == NULL)) { \
+ *err = -1; \
+ return; \
+ }
+
+META_COLLECTOR(int_sk_family)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_family;
+}
+
+META_COLLECTOR(int_sk_state)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_state;
+}
+
+META_COLLECTOR(int_sk_reuse)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_reuse;
+}
+
+META_COLLECTOR(int_sk_bound_if)
+{
+ SKIP_NONLOCAL(skb);
+ /* No error if bound_dev_if is 0, legal userspace check */
+ dst->value = skb->sk->sk_bound_dev_if;
+}
+
+META_COLLECTOR(var_sk_bound_if)
+{
+ SKIP_NONLOCAL(skb);
+
+ if (skb->sk->sk_bound_dev_if == 0) {
+ dst->value = (unsigned long) "any";
+ dst->len = 3;
+ } else {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(skb->sk->sk_bound_dev_if);
+ *err = var_dev(dev, dst);
+ if (dev)
+ dev_put(dev);
+ }
+}
+
+META_COLLECTOR(int_sk_refcnt)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = atomic_read(&skb->sk->sk_refcnt);
+}
+
+META_COLLECTOR(int_sk_rcvbuf)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_rcvbuf;
+}
+
+META_COLLECTOR(int_sk_shutdown)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_shutdown;
+}
+
+META_COLLECTOR(int_sk_proto)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_protocol;
+}
+
+META_COLLECTOR(int_sk_type)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_type;
+}
+
+META_COLLECTOR(int_sk_rmem_alloc)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = atomic_read(&skb->sk->sk_rmem_alloc);
+}
+
+META_COLLECTOR(int_sk_wmem_alloc)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = atomic_read(&skb->sk->sk_wmem_alloc);
+}
+
+META_COLLECTOR(int_sk_omem_alloc)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = atomic_read(&skb->sk->sk_omem_alloc);
+}
+
+META_COLLECTOR(int_sk_rcv_qlen)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_receive_queue.qlen;
+}
+
+META_COLLECTOR(int_sk_snd_qlen)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_write_queue.qlen;
+}
+
+META_COLLECTOR(int_sk_wmem_queued)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_wmem_queued;
+}
+
+META_COLLECTOR(int_sk_fwd_alloc)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_forward_alloc;
+}
+
+META_COLLECTOR(int_sk_sndbuf)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_sndbuf;
+}
+
+META_COLLECTOR(int_sk_alloc)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_allocation;
+}
+
+META_COLLECTOR(int_sk_route_caps)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_route_caps;
+}
+
+META_COLLECTOR(int_sk_hashent)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_hashent;
+}
+
+META_COLLECTOR(int_sk_lingertime)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_lingertime / HZ;
+}
+
+META_COLLECTOR(int_sk_err_qlen)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_error_queue.qlen;
+}
+
+META_COLLECTOR(int_sk_ack_bl)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_ack_backlog;
+}
+
+META_COLLECTOR(int_sk_max_ack_bl)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_max_ack_backlog;
+}
+
+META_COLLECTOR(int_sk_prio)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_priority;
+}
+
+META_COLLECTOR(int_sk_rcvlowat)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_rcvlowat;
+}
+
+META_COLLECTOR(int_sk_rcvtimeo)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_rcvtimeo / HZ;
+}
+
+META_COLLECTOR(int_sk_sndtimeo)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_sndtimeo / HZ;
+}
+
+META_COLLECTOR(int_sk_sendmsg_off)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_sndmsg_off;
+}
+
+META_COLLECTOR(int_sk_write_pend)
+{
+ SKIP_NONLOCAL(skb);
+ dst->value = skb->sk->sk_write_pending;
+}
+
+/**************************************************************************
* Meta value collectors assignment table
**************************************************************************/
@@ -293,41 +502,75 @@ struct meta_ops
struct meta_value *, struct meta_obj *, int *);
};
+#define META_ID(name) TCF_META_ID_##name
+#define META_FUNC(name) { .get = meta_##name }
+
/* Meta value operations table listing all meta value collectors and
* assigns them to a type and meta id. */
static struct meta_ops __meta_ops[TCF_META_TYPE_MAX+1][TCF_META_ID_MAX+1] = {
[TCF_META_TYPE_VAR] = {
- [TCF_META_ID_DEV] = { .get = meta_var_dev },
- [TCF_META_ID_INDEV] = { .get = meta_var_indev },
- [TCF_META_ID_REALDEV] = { .get = meta_var_realdev }
+ [META_ID(DEV)] = META_FUNC(var_dev),
+ [META_ID(INDEV)] = META_FUNC(var_indev),
+ [META_ID(REALDEV)] = META_FUNC(var_realdev),
+ [META_ID(SK_BOUND_IF)] = META_FUNC(var_sk_bound_if),
},
[TCF_META_TYPE_INT] = {
- [TCF_META_ID_RANDOM] = { .get = meta_int_random },
- [TCF_META_ID_LOADAVG_0] = { .get = meta_int_loadavg_0 },
- [TCF_META_ID_LOADAVG_1] = { .get = meta_int_loadavg_1 },
- [TCF_META_ID_LOADAVG_2] = { .get = meta_int_loadavg_2 },
- [TCF_META_ID_DEV] = { .get = meta_int_dev },
- [TCF_META_ID_INDEV] = { .get = meta_int_indev },
- [TCF_META_ID_REALDEV] = { .get = meta_int_realdev },
- [TCF_META_ID_PRIORITY] = { .get = meta_int_priority },
- [TCF_META_ID_PROTOCOL] = { .get = meta_int_protocol },
- [TCF_META_ID_SECURITY] = { .get = meta_int_security },
- [TCF_META_ID_PKTTYPE] = { .get = meta_int_pkttype },
- [TCF_META_ID_PKTLEN] = { .get = meta_int_pktlen },
- [TCF_META_ID_DATALEN] = { .get = meta_int_datalen },
- [TCF_META_ID_MACLEN] = { .get = meta_int_maclen },
+ [META_ID(RANDOM)] = META_FUNC(int_random),
+ [META_ID(LOADAVG_0)] = META_FUNC(int_loadavg_0),
+ [META_ID(LOADAVG_1)] = META_FUNC(int_loadavg_1),
+ [META_ID(LOADAVG_2)] = META_FUNC(int_loadavg_2),
+ [META_ID(DEV)] = META_FUNC(int_dev),
+ [META_ID(INDEV)] = META_FUNC(int_indev),
+ [META_ID(REALDEV)] = META_FUNC(int_realdev),
+ [META_ID(PRIORITY)] = META_FUNC(int_priority),
+ [META_ID(PROTOCOL)] = META_FUNC(int_protocol),
+ [META_ID(SECURITY)] = META_FUNC(int_security),
+ [META_ID(PKTTYPE)] = META_FUNC(int_pkttype),
+ [META_ID(PKTLEN)] = META_FUNC(int_pktlen),
+ [META_ID(DATALEN)] = META_FUNC(int_datalen),
+ [META_ID(MACLEN)] = META_FUNC(int_maclen),
#ifdef CONFIG_NETFILTER
- [TCF_META_ID_NFMARK] = { .get = meta_int_nfmark },
+ [META_ID(NFMARK)] = META_FUNC(int_nfmark),
#endif
- [TCF_META_ID_TCINDEX] = { .get = meta_int_tcindex },
+ [META_ID(TCINDEX)] = META_FUNC(int_tcindex),
#ifdef CONFIG_NET_CLS_ACT
- [TCF_META_ID_TCVERDICT] = { .get = meta_int_tcverd },
- [TCF_META_ID_TCCLASSID] = { .get = meta_int_tcclassid },
+ [META_ID(TCVERDICT)] = META_FUNC(int_tcverd),
+ [META_ID(TCCLASSID)] = META_FUNC(int_tcclassid),
#endif
#ifdef CONFIG_NET_CLS_ROUTE
- [TCF_META_ID_RTCLASSID] = { .get = meta_int_rtclassid },
+ [META_ID(RTCLASSID)] = META_FUNC(int_rtclassid),
#endif
- [TCF_META_ID_RTIIF] = { .get = meta_int_rtiif }
+ [META_ID(RTIIF)] = META_FUNC(int_rtiif),
+ [META_ID(SK_FAMILY)] = META_FUNC(int_sk_family),
+ [META_ID(SK_STATE)] = META_FUNC(int_sk_state),
+ [META_ID(SK_REUSE)] = META_FUNC(int_sk_reuse),
+ [META_ID(SK_BOUND_IF)] = META_FUNC(int_sk_bound_if),
+ [META_ID(SK_REFCNT)] = META_FUNC(int_sk_refcnt),
+ [META_ID(SK_RCVBUF)] = META_FUNC(int_sk_rcvbuf),
+ [META_ID(SK_SNDBUF)] = META_FUNC(int_sk_sndbuf),
+ [META_ID(SK_SHUTDOWN)] = META_FUNC(int_sk_shutdown),
+ [META_ID(SK_PROTO)] = META_FUNC(int_sk_proto),
+ [META_ID(SK_TYPE)] = META_FUNC(int_sk_type),
+ [META_ID(SK_RMEM_ALLOC)] = META_FUNC(int_sk_rmem_alloc),
+ [META_ID(SK_WMEM_ALLOC)] = META_FUNC(int_sk_wmem_alloc),
+ [META_ID(SK_OMEM_ALLOC)] = META_FUNC(int_sk_omem_alloc),
+ [META_ID(SK_WMEM_QUEUED)] = META_FUNC(int_sk_wmem_queued),
+ [META_ID(SK_RCV_QLEN)] = META_FUNC(int_sk_rcv_qlen),
+ [META_ID(SK_SND_QLEN)] = META_FUNC(int_sk_snd_qlen),
+ [META_ID(SK_ERR_QLEN)] = META_FUNC(int_sk_err_qlen),
+ [META_ID(SK_FORWARD_ALLOCS)] = META_FUNC(int_sk_fwd_alloc),
+ [META_ID(SK_ALLOCS)] = META_FUNC(int_sk_alloc),
+ [META_ID(SK_ROUTE_CAPS)] = META_FUNC(int_sk_route_caps),
+ [META_ID(SK_HASHENT)] = META_FUNC(int_sk_hashent),
+ [META_ID(SK_LINGERTIME)] = META_FUNC(int_sk_lingertime),
+ [META_ID(SK_ACK_BACKLOG)] = META_FUNC(int_sk_ack_bl),
+ [META_ID(SK_MAX_ACK_BACKLOG)] = META_FUNC(int_sk_max_ack_bl),
+ [META_ID(SK_PRIO)] = META_FUNC(int_sk_prio),
+ [META_ID(SK_RCVLOWAT)] = META_FUNC(int_sk_rcvlowat),
+ [META_ID(SK_RCVTIMEO)] = META_FUNC(int_sk_rcvtimeo),
+ [META_ID(SK_SNDTIMEO)] = META_FUNC(int_sk_sndtimeo),
+ [META_ID(SK_SENDMSG_OFF)] = META_FUNC(int_sk_sendmsg_off),
+ [META_ID(SK_WRITE_PENDING)] = META_FUNC(int_sk_write_pend),
}
};
@@ -396,9 +639,9 @@ static int meta_int_compare(struct meta_obj *a, struct meta_obj *b)
/* Let gcc optimize it, the unlikely is not really based on
* some numbers but jump free code for mismatches seems
* more logical. */
- if (unlikely(a == b))
+ if (unlikely(a->value == b->value))
return 0;
- else if (a < b)
+ else if (a->value < b->value)
return -1;
else
return 1;
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 8a3db9d95bab..d8bd2a569c7c 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -18,7 +18,7 @@
#include <asm/byteorder.h>
-#if 1 /* control */
+#if 0 /* control */
#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args)
#else
#define DPRINTK(format,args...)
@@ -73,8 +73,13 @@ static int dsmark_graft(struct Qdisc *sch,unsigned long arg,
DPRINTK("dsmark_graft(sch %p,[qdisc %p],new %p,old %p)\n",sch,p,new,
old);
- if (!new)
- new = &noop_qdisc;
+
+ if (new == NULL) {
+ new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
+ if (new == NULL)
+ new = &noop_qdisc;
+ }
+
sch_tree_lock(sch);
*old = xchg(&p->q,new);
if (*old)
@@ -163,14 +168,15 @@ static void dsmark_walk(struct Qdisc *sch,struct qdisc_walker *walker)
return;
for (i = 0; i < p->indices; i++) {
if (p->mask[i] == 0xff && !p->value[i])
- continue;
+ goto ignore;
if (walker->count >= walker->skip) {
if (walker->fn(sch, i+1, walker) < 0) {
walker->stop = 1;
break;
}
}
- walker->count++;
+ignore:
+ walker->count++;
}
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index e0c9fbe73b15..bb9bf8d5003c 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -53,7 +53,6 @@
struct netem_sched_data {
struct Qdisc *qdisc;
- struct sk_buff_head delayed;
struct timer_list timer;
u32 latency;
@@ -63,11 +62,12 @@ struct netem_sched_data {
u32 gap;
u32 jitter;
u32 duplicate;
+ u32 reorder;
struct crndstate {
unsigned long last;
unsigned long rho;
- } delay_cor, loss_cor, dup_cor;
+ } delay_cor, loss_cor, dup_cor, reorder_cor;
struct disttable {
u32 size;
@@ -137,122 +137,68 @@ static long tabledist(unsigned long mu, long sigma,
return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
}
-/* Put skb in the private delayed queue. */
-static int netem_delay(struct Qdisc *sch, struct sk_buff *skb)
-{
- struct netem_sched_data *q = qdisc_priv(sch);
- psched_tdiff_t td;
- psched_time_t now;
-
- PSCHED_GET_TIME(now);
- td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
-
- /* Always queue at tail to keep packets in order */
- if (likely(q->delayed.qlen < q->limit)) {
- struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
-
- PSCHED_TADD2(now, td, cb->time_to_send);
-
- pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb,
- now, cb->time_to_send);
-
- __skb_queue_tail(&q->delayed, skb);
- return NET_XMIT_SUCCESS;
- }
-
- pr_debug("netem_delay: queue over limit %d\n", q->limit);
- sch->qstats.overlimits++;
- kfree_skb(skb);
- return NET_XMIT_DROP;
-}
-
/*
- * Move a packet that is ready to send from the delay holding
- * list to the underlying qdisc.
+ * Insert one skb into qdisc.
+ * Note: parent depends on return value to account for queue length.
+ * NET_XMIT_DROP: queue length didn't change.
+ * NET_XMIT_SUCCESS: one skb was queued.
*/
-static int netem_run(struct Qdisc *sch)
-{
- struct netem_sched_data *q = qdisc_priv(sch);
- struct sk_buff *skb;
- psched_time_t now;
-
- PSCHED_GET_TIME(now);
-
- skb = skb_peek(&q->delayed);
- if (skb) {
- const struct netem_skb_cb *cb
- = (const struct netem_skb_cb *)skb->cb;
- long delay
- = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
- pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
-
- /* if more time remaining? */
- if (delay > 0) {
- mod_timer(&q->timer, jiffies + delay);
- return 1;
- }
-
- __skb_unlink(skb, &q->delayed);
-
- if (q->qdisc->enqueue(skb, q->qdisc)) {
- sch->q.qlen--;
- sch->qstats.drops++;
- }
- }
-
- return 0;
-}
-
static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
+ struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
+ struct sk_buff *skb2;
int ret;
+ int count = 1;
pr_debug("netem_enqueue skb=%p\n", skb);
+ /* Random duplication */
+ if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
+ ++count;
+
/* Random packet drop 0 => none, ~0 => all */
- if (q->loss && q->loss >= get_crandom(&q->loss_cor)) {
- pr_debug("netem_enqueue: random loss\n");
+ if (q->loss && q->loss >= get_crandom(&q->loss_cor))
+ --count;
+
+ if (count == 0) {
sch->qstats.drops++;
kfree_skb(skb);
- return 0; /* lie about loss so TCP doesn't know */
+ return NET_XMIT_DROP;
}
- /* Random duplication */
- if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor)) {
- struct sk_buff *skb2;
-
- skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 && netem_delay(sch, skb2) == NET_XMIT_SUCCESS) {
- struct Qdisc *qp;
-
- /* Since one packet can generate two packets in the
- * queue, the parent's qlen accounting gets confused,
- * so fix it.
- */
- qp = qdisc_lookup(sch->dev, TC_H_MAJ(sch->parent));
- if (qp)
- qp->q.qlen++;
-
- sch->q.qlen++;
- sch->bstats.bytes += skb2->len;
- sch->bstats.packets++;
- } else
- sch->qstats.drops++;
+ /*
+ * If we need to duplicate packet, then re-insert at top of the
+ * qdisc tree, since parent queuer expects that only one
+ * skb will be queued.
+ */
+ if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) {
+ struct Qdisc *rootq = sch->dev->qdisc;
+ u32 dupsave = q->duplicate; /* prevent duplicating a dup... */
+ q->duplicate = 0;
+
+ rootq->enqueue(skb2, rootq);
+ q->duplicate = dupsave;
}
- /* If doing simple delay then gap == 0 so all packets
- * go into the delayed holding queue
- * otherwise if doing out of order only "1 out of gap"
- * packets will be delayed.
- */
- if (q->counter < q->gap) {
+ if (q->gap == 0 /* not doing reordering */
+ || q->counter < q->gap /* inside last reordering gap */
+ || q->reorder < get_crandom(&q->reorder_cor)) {
+ psched_time_t now;
+ PSCHED_GET_TIME(now);
+ PSCHED_TADD2(now, tabledist(q->latency, q->jitter,
+ &q->delay_cor, q->delay_dist),
+ cb->time_to_send);
++q->counter;
ret = q->qdisc->enqueue(skb, q->qdisc);
} else {
+ /*
+ * Do re-ordering by putting one out of N packets at the front
+ * of the queue.
+ */
+ PSCHED_GET_TIME(cb->time_to_send);
q->counter = 0;
- ret = netem_delay(sch, skb);
- netem_run(sch);
+ ret = q->qdisc->ops->requeue(skb, q->qdisc);
}
if (likely(ret == NET_XMIT_SUCCESS)) {
@@ -296,22 +242,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
- int pending;
-
- pending = netem_run(sch);
skb = q->qdisc->dequeue(q->qdisc);
if (skb) {
- pr_debug("netem_dequeue: return skb=%p\n", skb);
- sch->q.qlen--;
- sch->flags &= ~TCQ_F_THROTTLED;
- }
- else if (pending) {
- pr_debug("netem_dequeue: throttling\n");
+ const struct netem_skb_cb *cb
+ = (const struct netem_skb_cb *)skb->cb;
+ psched_time_t now;
+ long delay;
+
+ /* if more time remaining? */
+ PSCHED_GET_TIME(now);
+ delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
+ pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
+ if (delay <= 0) {
+ pr_debug("netem_dequeue: return skb=%p\n", skb);
+ sch->q.qlen--;
+ sch->flags &= ~TCQ_F_THROTTLED;
+ return skb;
+ }
+
+ mod_timer(&q->timer, jiffies + delay);
sch->flags |= TCQ_F_THROTTLED;
- }
- return skb;
+ if (q->qdisc->ops->requeue(skb, q->qdisc) != 0)
+ sch->qstats.drops++;
+ }
+
+ return NULL;
}
static void netem_watchdog(unsigned long arg)
@@ -328,8 +285,6 @@ static void netem_reset(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc);
- skb_queue_purge(&q->delayed);
-
sch->q.qlen = 0;
sch->flags &= ~TCQ_F_THROTTLED;
del_timer_sync(&q->timer);
@@ -397,6 +352,19 @@ static int get_correlation(struct Qdisc *sch, const struct rtattr *attr)
return 0;
}
+static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
+{
+ struct netem_sched_data *q = qdisc_priv(sch);
+ const struct tc_netem_reorder *r = RTA_DATA(attr);
+
+ if (RTA_PAYLOAD(attr) != sizeof(*r))
+ return -EINVAL;
+
+ q->reorder = r->probability;
+ init_crandom(&q->reorder_cor, r->correlation);
+ return 0;
+}
+
static int netem_change(struct Qdisc *sch, struct rtattr *opt)
{
struct netem_sched_data *q = qdisc_priv(sch);
@@ -417,9 +385,15 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
q->jitter = qopt->jitter;
q->limit = qopt->limit;
q->gap = qopt->gap;
+ q->counter = 0;
q->loss = qopt->loss;
q->duplicate = qopt->duplicate;
+ /* for compatiablity with earlier versions.
+ * if gap is set, need to assume 100% probablity
+ */
+ q->reorder = ~0;
+
/* Handle nested options after initial queue options.
* Should have put all options in nested format but too late now.
*/
@@ -441,6 +415,11 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
if (ret)
return ret;
}
+ if (tb[TCA_NETEM_REORDER-1]) {
+ ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
+ if (ret)
+ return ret;
+ }
}
@@ -455,11 +434,9 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
if (!opt)
return -EINVAL;
- skb_queue_head_init(&q->delayed);
init_timer(&q->timer);
q->timer.function = netem_watchdog;
q->timer.data = (unsigned long) sch;
- q->counter = 0;
q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
if (!q->qdisc) {
@@ -491,6 +468,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
struct rtattr *rta = (struct rtattr *) b;
struct tc_netem_qopt qopt;
struct tc_netem_corr cor;
+ struct tc_netem_reorder reorder;
qopt.latency = q->latency;
qopt.jitter = q->jitter;
@@ -504,6 +482,11 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
cor.loss_corr = q->loss_cor.rho;
cor.dup_corr = q->dup_cor.rho;
RTA_PUT(skb, TCA_NETEM_CORR, sizeof(cor), &cor);
+
+ reorder.probability = q->reorder;
+ reorder.correlation = q->reorder_cor.rho;
+ RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
+
rta->rta_len = skb->tail - b;
return skb->len;