aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/act_csum.c31
-rw-r--r--net/sched/act_tunnel_key.c18
-rw-r--r--net/sched/cls_api.c29
-rw-r--r--net/sched/cls_fw.c5
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_pie.c110
6 files changed, 126 insertions, 69 deletions
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 945fb34ae721..c79aca29505e 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -559,8 +559,11 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
+ bool orig_vlan_tag_present = false;
+ unsigned int vlan_hdr_count = 0;
struct tcf_csum_params *params;
u32 update_flags;
+ __be16 protocol;
int action;
params = rcu_dereference_bh(p->params);
@@ -573,7 +576,9 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
goto drop;
update_flags = params->update_flags;
- switch (tc_skb_protocol(skb)) {
+ protocol = tc_skb_protocol(skb);
+again:
+ switch (protocol) {
case cpu_to_be16(ETH_P_IP):
if (!tcf_csum_ipv4(skb, update_flags))
goto drop;
@@ -582,13 +587,35 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
if (!tcf_csum_ipv6(skb, update_flags))
goto drop;
break;
+ case cpu_to_be16(ETH_P_8021AD): /* fall through */
+ case cpu_to_be16(ETH_P_8021Q):
+ if (skb_vlan_tag_present(skb) && !orig_vlan_tag_present) {
+ protocol = skb->protocol;
+ orig_vlan_tag_present = true;
+ } else {
+ struct vlan_hdr *vlan = (struct vlan_hdr *)skb->data;
+
+ protocol = vlan->h_vlan_encapsulated_proto;
+ skb_pull(skb, VLAN_HLEN);
+ skb_reset_network_header(skb);
+ vlan_hdr_count++;
+ }
+ goto again;
+ }
+
+out:
+ /* Restore the skb for the pulled VLAN tags */
+ while (vlan_hdr_count--) {
+ skb_push(skb, VLAN_HLEN);
+ skb_reset_network_header(skb);
}
return action;
drop:
qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
- return TC_ACT_SHOT;
+ action = TC_ACT_SHOT;
+ goto out;
}
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 3404af72b4c1..fc2b884b170c 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -201,8 +201,14 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
{
if (!p)
return;
- if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
+ if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET) {
+#ifdef CONFIG_DST_CACHE
+ struct ip_tunnel_info *info = &p->tcft_enc_metadata->u.tun_info;
+
+ dst_cache_destroy(&info->dst_cache);
+#endif
dst_release(&p->tcft_enc_metadata->dst);
+ }
kfree_rcu(p, rcu);
}
@@ -384,7 +390,8 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
release_dst_cache:
#ifdef CONFIG_DST_CACHE
- dst_cache_destroy(&metadata->u.tun_info.dst_cache);
+ if (metadata)
+ dst_cache_destroy(&metadata->u.tun_info.dst_cache);
#endif
release_tun_meta:
dst_release(&metadata->dst);
@@ -401,15 +408,8 @@ static void tunnel_key_release(struct tc_action *a)
{
struct tcf_tunnel_key *t = to_tunnel_key(a);
struct tcf_tunnel_key_params *params;
-#ifdef CONFIG_DST_CACHE
- struct ip_tunnel_info *info;
-#endif
params = rcu_dereference_protected(t->params, 1);
-#ifdef CONFIG_DST_CACHE
- info = &params->tcft_enc_metadata->u.tun_info;
- dst_cache_destroy(&info->dst_cache);
-#endif
tunnel_key_release_params(params);
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 6593c245f714..478095d50f95 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -238,18 +238,23 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
tcf_proto_destroy(tp, rtnl_held, extack);
}
-static int walker_noop(struct tcf_proto *tp, void *d, struct tcf_walker *arg)
+static int walker_check_empty(struct tcf_proto *tp, void *fh,
+ struct tcf_walker *arg)
{
- return -1;
+ if (fh) {
+ arg->nonempty = true;
+ return -1;
+ }
+ return 0;
}
static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
{
- struct tcf_walker walker = { .fn = walker_noop, };
+ struct tcf_walker walker = { .fn = walker_check_empty, };
if (tp->ops->walk) {
tp->ops->walk(tp, &walker, rtnl_held);
- return !walker.stop;
+ return !walker.nonempty;
}
return true;
}
@@ -2907,12 +2912,12 @@ errout_block_locked:
/* called with RTNL */
static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct tcf_chain *chain, *chain_prev;
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
struct Qdisc *q = NULL;
struct tcf_block *block;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ struct tcf_chain *chain;
long index_start;
long index;
u32 parent;
@@ -2975,11 +2980,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
index_start = cb->args[0];
index = 0;
- for (chain = __tcf_get_next_chain(block, NULL);
- chain;
- chain_prev = chain,
- chain = __tcf_get_next_chain(block, chain),
- tcf_chain_put(chain_prev)) {
+ mutex_lock(&block->lock);
+ list_for_each_entry(chain, &block->chain_list, list) {
if ((tca[TCA_CHAIN] &&
nla_get_u32(tca[TCA_CHAIN]) != chain->index))
continue;
@@ -2987,17 +2989,18 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
index++;
continue;
}
+ if (tcf_chain_held_by_acts_only(chain))
+ continue;
err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
chain->index, net, skb, block,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWCHAIN);
- if (err <= 0) {
- tcf_chain_put(chain);
+ if (err <= 0)
break;
- }
index++;
}
+ mutex_unlock(&block->lock);
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
tcf_block_refcnt_put(block, true);
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 456ee6e62dfa..ad036b00427d 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -363,7 +363,10 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg,
struct fw_head *head = rtnl_dereference(tp->root);
int h;
- if (head == NULL || arg->stop)
+ if (head == NULL)
+ arg->stop = 1;
+
+ if (arg->stop)
return;
for (h = 0; h < HTSIZE; h++) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 38e5add14fab..7bcee8b8f803 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -559,7 +559,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
};
static struct netdev_queue noop_netdev_queue = {
- .qdisc = &noop_qdisc,
+ RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
.qdisc_sleeping = &noop_qdisc,
};
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index d1429371592f..1cc0c7b74aa3 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -17,9 +17,7 @@
* University of Oslo, Norway.
*
* References:
- * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
- * IEEE Conference on High Performance Switching and Routing 2013 :
- * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
+ * RFC 8033: https://tools.ietf.org/html/rfc8033
*/
#include <linux/module.h>
@@ -31,9 +29,9 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
-#define QUEUE_THRESHOLD 10000
+#define QUEUE_THRESHOLD 16384
#define DQCOUNT_INVALID -1
-#define MAX_PROB 0xffffffff
+#define MAX_PROB 0xffffffffffffffff
#define PIE_SCALE 8
/* parameters used */
@@ -49,14 +47,16 @@ struct pie_params {
/* variables used */
struct pie_vars {
- u32 prob; /* probability but scaled by u32 limit. */
+ u64 prob; /* probability but scaled by u64 limit. */
psched_time_t burst_time;
psched_time_t qdelay;
psched_time_t qdelay_old;
u64 dq_count; /* measured in bytes */
psched_time_t dq_tstamp; /* drain rate */
+ u64 accu_prob; /* accumulated drop probability */
u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */
u32 qlen_old; /* in bytes */
+ u8 accu_prob_overflows; /* overflows of accu_prob */
};
/* statistics gathering */
@@ -81,9 +81,9 @@ static void pie_params_init(struct pie_params *params)
{
params->alpha = 2;
params->beta = 20;
- params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */
+ params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC); /* 15 ms */
params->limit = 1000; /* default of 1000 packets */
- params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */
+ params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */
params->ecn = false;
params->bytemode = false;
}
@@ -91,16 +91,18 @@ static void pie_params_init(struct pie_params *params)
static void pie_vars_init(struct pie_vars *vars)
{
vars->dq_count = DQCOUNT_INVALID;
+ vars->accu_prob = 0;
vars->avg_dq_rate = 0;
- /* default of 100 ms in pschedtime */
- vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC);
+ /* default of 150 ms in pschedtime */
+ vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC);
+ vars->accu_prob_overflows = 0;
}
static bool drop_early(struct Qdisc *sch, u32 packet_size)
{
struct pie_sched_data *q = qdisc_priv(sch);
- u32 rnd;
- u32 local_prob = q->vars.prob;
+ u64 rnd;
+ u64 local_prob = q->vars.prob;
u32 mtu = psched_mtu(qdisc_dev(sch));
/* If there is still burst allowance left skip random early drop */
@@ -124,13 +126,33 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
* probablity. Smaller packets will have lower drop prob in this case
*/
if (q->params.bytemode && packet_size <= mtu)
- local_prob = (local_prob / mtu) * packet_size;
+ local_prob = (u64)packet_size * div_u64(local_prob, mtu);
else
local_prob = q->vars.prob;
- rnd = prandom_u32();
- if (rnd < local_prob)
+ if (local_prob == 0) {
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
+ }
+
+ if (local_prob > MAX_PROB - q->vars.accu_prob)
+ q->vars.accu_prob_overflows++;
+
+ q->vars.accu_prob += local_prob;
+
+ if (q->vars.accu_prob_overflows == 0 &&
+ q->vars.accu_prob < (MAX_PROB / 100) * 85)
+ return false;
+ if (q->vars.accu_prob_overflows == 8 &&
+ q->vars.accu_prob >= MAX_PROB / 2)
+ return true;
+
+ prandom_bytes(&rnd, 8);
+ if (rnd < local_prob) {
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
return true;
+ }
return false;
}
@@ -168,6 +190,8 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
out:
q->stats.dropped++;
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
return qdisc_drop(skb, sch, to_free);
}
@@ -317,9 +341,10 @@ static void calculate_probability(struct Qdisc *sch)
u32 qlen = sch->qstats.backlog; /* queue size in bytes */
psched_time_t qdelay = 0; /* in pschedtime */
psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */
- s32 delta = 0; /* determines the change in probability */
- u32 oldprob;
- u32 alpha, beta;
+ s64 delta = 0; /* determines the change in probability */
+ u64 oldprob;
+ u64 alpha, beta;
+ u32 power;
bool update_prob = true;
q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +364,36 @@ static void calculate_probability(struct Qdisc *sch)
* value for alpha as 0.125. In this implementation, we use values 0-32
* passed from user space to represent this. Also, alpha and beta have
* unit of HZ and need to be scaled before they can used to update
- * probability. alpha/beta are updated locally below by 1) scaling them
- * appropriately 2) scaling down by 16 to come to 0-2 range.
- * Please see paper for details.
- *
- * We scale alpha and beta differently depending on whether we are in
- * light, medium or high dropping mode.
+ * probability. alpha/beta are updated locally below by scaling down
+ * by 16 to come to 0-2 range.
*/
- if (q->vars.prob < MAX_PROB / 100) {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
- } else if (q->vars.prob < MAX_PROB / 10) {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
- } else {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+ /* We scale alpha and beta differently depending on how heavy the
+ * congestion is. Please see RFC 8033 for details.
+ */
+ if (q->vars.prob < MAX_PROB / 10) {
+ alpha >>= 1;
+ beta >>= 1;
+
+ power = 100;
+ while (q->vars.prob < div_u64(MAX_PROB, power) &&
+ power <= 1000000) {
+ alpha >>= 2;
+ beta >>= 2;
+ power *= 10;
+ }
}
/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
- delta += alpha * ((qdelay - q->params.target));
- delta += beta * ((qdelay - qdelay_old));
+ delta += alpha * (u64)(qdelay - q->params.target);
+ delta += beta * (u64)(qdelay - qdelay_old);
oldprob = q->vars.prob;
/* to ensure we increase probability in steps of no more than 2% */
- if (delta > (s32)(MAX_PROB / (100 / 2)) &&
+ if (delta > (s64)(MAX_PROB / (100 / 2)) &&
q->vars.prob >= MAX_PROB / 10)
delta = (MAX_PROB / 100) * 2;
@@ -406,7 +429,8 @@ static void calculate_probability(struct Qdisc *sch)
*/
if (qdelay == 0 && qdelay_old == 0 && update_prob)
- q->vars.prob = (q->vars.prob * 98) / 100;
+ /* Reduce drop probability to 98.4% */
+ q->vars.prob -= q->vars.prob / 64u;
q->vars.qdelay = qdelay;
q->vars.qlen_old = qlen;