aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/act_api.c58
-rw-r--r--net/sched/act_bpf.c129
-rw-r--r--net/sched/act_connmark.c9
-rw-r--r--net/sched/act_csum.c3
-rw-r--r--net/sched/act_gact.c44
-rw-r--r--net/sched/act_ipt.c2
-rw-r--r--net/sched/act_mirred.c60
-rw-r--r--net/sched/act_nat.c10
-rw-r--r--net/sched/act_pedit.c8
-rw-r--r--net/sched/act_simple.c3
-rw-r--r--net/sched/act_skbedit.c3
-rw-r--r--net/sched/act_vlan.c3
-rw-r--r--net/sched/cls_bpf.c2
-rw-r--r--net/sched/cls_cgroup.c23
-rw-r--r--net/sched/cls_flow.c7
-rw-r--r--net/sched/cls_flower.c4
-rw-r--r--net/sched/cls_fw.c30
-rw-r--r--net/sched/cls_rsvp.h18
-rw-r--r--net/sched/cls_tcindex.c29
-rw-r--r--net/sched/cls_u32.c13
-rw-r--r--net/sched/sch_api.c55
-rw-r--r--net/sched/sch_atm.c2
-rw-r--r--net/sched/sch_cbq.c2
-rw-r--r--net/sched/sch_choke.c19
-rw-r--r--net/sched/sch_drr.c2
-rw-r--r--net/sched/sch_dsmark.c2
-rw-r--r--net/sched/sch_fifo.c2
-rw-r--r--net/sched/sch_fq_codel.c37
-rw-r--r--net/sched/sch_generic.c56
-rw-r--r--net/sched/sch_gred.c8
-rw-r--r--net/sched/sch_hfsc.c2
-rw-r--r--net/sched/sch_htb.c8
-rw-r--r--net/sched/sch_multiq.c2
-rw-r--r--net/sched/sch_plug.c9
-rw-r--r--net/sched/sch_prio.c2
-rw-r--r--net/sched/sch_qfq.c3
-rw-r--r--net/sched/sch_sfb.c4
-rw-r--r--net/sched/sch_sfq.c4
38 files changed, 397 insertions, 280 deletions
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index af427a3dbcba..06e7c4a37245 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -27,7 +27,16 @@
#include <net/act_api.h>
#include <net/netlink.h>
-void tcf_hash_destroy(struct tc_action *a)
+static void free_tcf(struct rcu_head *head)
+{
+ struct tcf_common *p = container_of(head, struct tcf_common, tcfc_rcu);
+
+ free_percpu(p->cpu_bstats);
+ free_percpu(p->cpu_qstats);
+ kfree(p);
+}
+
+static void tcf_hash_destroy(struct tc_action *a)
{
struct tcf_common *p = a->priv;
struct tcf_hashinfo *hinfo = a->ops->hinfo;
@@ -41,11 +50,10 @@ void tcf_hash_destroy(struct tc_action *a)
* gen_estimator est_timer() might access p->tcfc_lock
* or bstats, wait a RCU grace period before freeing p
*/
- kfree_rcu(p, tcfc_rcu);
+ call_rcu(&p->tcfc_rcu, free_tcf);
}
-EXPORT_SYMBOL(tcf_hash_destroy);
-int tcf_hash_release(struct tc_action *a, int bind)
+int __tcf_hash_release(struct tc_action *a, bool bind, bool strict)
{
struct tcf_common *p = a->priv;
int ret = 0;
@@ -53,7 +61,7 @@ int tcf_hash_release(struct tc_action *a, int bind)
if (p) {
if (bind)
p->tcfc_bindcnt--;
- else if (p->tcfc_bindcnt > 0)
+ else if (strict && p->tcfc_bindcnt > 0)
return -EPERM;
p->tcfc_refcnt--;
@@ -64,9 +72,10 @@ int tcf_hash_release(struct tc_action *a, int bind)
ret = 1;
}
}
+
return ret;
}
-EXPORT_SYMBOL(tcf_hash_release);
+EXPORT_SYMBOL(__tcf_hash_release);
static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb,
struct tc_action *a)
@@ -136,7 +145,7 @@ static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a)
head = &hinfo->htab[tcf_hash(i, hinfo->hmask)];
hlist_for_each_entry_safe(p, n, head, tcfc_head) {
a->priv = p;
- ret = tcf_hash_release(a, 0);
+ ret = __tcf_hash_release(a, false, true);
if (ret == ACT_P_DELETED) {
module_put(a->ops->owner);
n_i++;
@@ -230,15 +239,16 @@ void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est)
if (est)
gen_kill_estimator(&pc->tcfc_bstats,
&pc->tcfc_rate_est);
- kfree_rcu(pc, tcfc_rcu);
+ call_rcu(&pc->tcfc_rcu, free_tcf);
}
EXPORT_SYMBOL(tcf_hash_cleanup);
int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
- int size, int bind)
+ int size, int bind, bool cpustats)
{
struct tcf_hashinfo *hinfo = a->ops->hinfo;
struct tcf_common *p = kzalloc(size, GFP_KERNEL);
+ int err = -ENOMEM;
if (unlikely(!p))
return -ENOMEM;
@@ -246,18 +256,32 @@ int tcf_hash_create(u32 index, struct nlattr *est, struct tc_action *a,
if (bind)
p->tcfc_bindcnt = 1;
+ if (cpustats) {
+ p->cpu_bstats = netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
+ if (!p->cpu_bstats) {
+err1:
+ kfree(p);
+ return err;
+ }
+ p->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
+ if (!p->cpu_qstats) {
+err2:
+ free_percpu(p->cpu_bstats);
+ goto err1;
+ }
+ }
spin_lock_init(&p->tcfc_lock);
INIT_HLIST_NODE(&p->tcfc_head);
p->tcfc_index = index ? index : tcf_hash_new_index(hinfo);
p->tcfc_tm.install = jiffies;
p->tcfc_tm.lastuse = jiffies;
if (est) {
- int err = gen_new_estimator(&p->tcfc_bstats, NULL,
- &p->tcfc_rate_est,
- &p->tcfc_lock, est);
+ err = gen_new_estimator(&p->tcfc_bstats, p->cpu_bstats,
+ &p->tcfc_rate_est,
+ &p->tcfc_lock, est);
if (err) {
- kfree(p);
- return err;
+ free_percpu(p->cpu_qstats);
+ goto err2;
}
}
@@ -408,7 +432,7 @@ int tcf_action_destroy(struct list_head *actions, int bind)
int ret = 0;
list_for_each_entry_safe(a, tmp, actions, list) {
- ret = tcf_hash_release(a, bind);
+ ret = __tcf_hash_release(a, bind, true);
if (ret == ACT_P_DELETED)
module_put(a->ops->owner);
else if (ret < 0)
@@ -615,10 +639,10 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a,
if (err < 0)
goto errout;
- if (gnet_stats_copy_basic(&d, NULL, &p->tcfc_bstats) < 0 ||
+ if (gnet_stats_copy_basic(&d, p->cpu_bstats, &p->tcfc_bstats) < 0 ||
gnet_stats_copy_rate_est(&d, &p->tcfc_bstats,
&p->tcfc_rate_est) < 0 ||
- gnet_stats_copy_queue(&d, NULL,
+ gnet_stats_copy_queue(&d, p->cpu_qstats,
&p->tcfc_qstats,
p->tcfc_qstats.qlen) < 0)
goto errout;
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 1d56903fd4c7..559bfa011bda 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -27,34 +27,34 @@
struct tcf_bpf_cfg {
struct bpf_prog *filter;
struct sock_filter *bpf_ops;
- char *bpf_name;
+ const char *bpf_name;
u32 bpf_fd;
u16 bpf_num_ops;
+ bool is_ebpf;
};
static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
struct tcf_result *res)
{
struct tcf_bpf *prog = act->priv;
+ struct bpf_prog *filter;
int action, filter_res;
bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS;
if (unlikely(!skb_mac_header_was_set(skb)))
return TC_ACT_UNSPEC;
- spin_lock(&prog->tcf_lock);
-
- prog->tcf_tm.lastuse = jiffies;
- bstats_update(&prog->tcf_bstats, skb);
+ tcf_lastuse_update(&prog->tcf_tm);
+ bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb);
- /* Needed here for accessing maps. */
rcu_read_lock();
+ filter = rcu_dereference(prog->filter);
if (at_ingress) {
__skb_push(skb, skb->mac_len);
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter_res = BPF_PROG_RUN(filter, skb);
__skb_pull(skb, skb->mac_len);
} else {
- filter_res = BPF_PROG_RUN(prog->filter, skb);
+ filter_res = BPF_PROG_RUN(filter, skb);
}
rcu_read_unlock();
@@ -76,7 +76,7 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break;
case TC_ACT_SHOT:
action = filter_res;
- prog->tcf_qstats.drops++;
+ qstats_drop_inc(this_cpu_ptr(prog->common.cpu_qstats));
break;
case TC_ACT_UNSPEC:
action = prog->tcf_action;
@@ -86,7 +86,6 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
break;
}
- spin_unlock(&prog->tcf_lock);
return action;
}
@@ -207,6 +206,7 @@ static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
cfg->bpf_ops = bpf_ops;
cfg->bpf_num_ops = bpf_num_ops;
cfg->filter = fp;
+ cfg->is_ebpf = false;
return 0;
}
@@ -241,20 +241,45 @@ static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
cfg->bpf_fd = bpf_fd;
cfg->bpf_name = name;
cfg->filter = fp;
+ cfg->is_ebpf = true;
return 0;
}
+static void tcf_bpf_cfg_cleanup(const struct tcf_bpf_cfg *cfg)
+{
+ if (cfg->is_ebpf)
+ bpf_prog_put(cfg->filter);
+ else
+ bpf_prog_destroy(cfg->filter);
+
+ kfree(cfg->bpf_ops);
+ kfree(cfg->bpf_name);
+}
+
+static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
+ struct tcf_bpf_cfg *cfg)
+{
+ cfg->is_ebpf = tcf_bpf_is_ebpf(prog);
+ /* updates to prog->filter are prevented, since it's called either
+ * with rtnl lock or during final cleanup in rcu callback
+ */
+ cfg->filter = rcu_dereference_protected(prog->filter, 1);
+
+ cfg->bpf_ops = prog->bpf_ops;
+ cfg->bpf_name = prog->bpf_name;
+}
+
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action *act,
int replace, int bind)
{
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+ struct tcf_bpf_cfg cfg, old;
struct tc_act_bpf *parm;
struct tcf_bpf *prog;
- struct tcf_bpf_cfg cfg;
bool is_bpf, is_ebpf;
- int ret;
+ int ret, res = 0;
if (!nla)
return -EINVAL;
@@ -263,43 +288,48 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (ret < 0)
return ret;
- is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
- is_ebpf = tb[TCA_ACT_BPF_FD];
-
- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
- !tb[TCA_ACT_BPF_PARMS])
+ if (!tb[TCA_ACT_BPF_PARMS])
return -EINVAL;
parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
- memset(&cfg, 0, sizeof(cfg));
-
- ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
- tcf_bpf_init_from_efd(tb, &cfg);
- if (ret < 0)
- return ret;
-
if (!tcf_hash_check(parm->index, act, bind)) {
ret = tcf_hash_create(parm->index, est, act,
- sizeof(*prog), bind);
+ sizeof(*prog), bind, true);
if (ret < 0)
- goto destroy_fp;
+ return ret;
- ret = ACT_P_CREATED;
+ res = ACT_P_CREATED;
} else {
/* Don't override defaults. */
if (bind)
- goto destroy_fp;
+ return 0;
tcf_hash_release(act, bind);
- if (!replace) {
- ret = -EEXIST;
- goto destroy_fp;
- }
+ if (!replace)
+ return -EEXIST;
+ }
+
+ is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
+ is_ebpf = tb[TCA_ACT_BPF_FD];
+
+ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
+ ret = -EINVAL;
+ goto out;
}
+ memset(&cfg, 0, sizeof(cfg));
+
+ ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
+ tcf_bpf_init_from_efd(tb, &cfg);
+ if (ret < 0)
+ goto out;
+
prog = to_bpf(act);
- spin_lock_bh(&prog->tcf_lock);
+ ASSERT_RTNL();
+
+ if (res != ACT_P_CREATED)
+ tcf_bpf_prog_fill_cfg(prog, &old);
prog->bpf_ops = cfg.bpf_ops;
prog->bpf_name = cfg.bpf_name;
@@ -310,35 +340,30 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
prog->bpf_fd = cfg.bpf_fd;
prog->tcf_action = parm->action;
- prog->filter = cfg.filter;
-
- spin_unlock_bh(&prog->tcf_lock);
+ rcu_assign_pointer(prog->filter, cfg.filter);
- if (ret == ACT_P_CREATED)
+ if (res == ACT_P_CREATED) {
tcf_hash_insert(act);
+ } else {
+ /* make sure the program being replaced is no longer executing */
+ synchronize_rcu();
+ tcf_bpf_cfg_cleanup(&old);
+ }
- return ret;
-
-destroy_fp:
- if (is_ebpf)
- bpf_prog_put(cfg.filter);
- else
- bpf_prog_destroy(cfg.filter);
-
- kfree(cfg.bpf_ops);
- kfree(cfg.bpf_name);
+ return res;
+out:
+ if (res == ACT_P_CREATED)
+ tcf_hash_cleanup(act, est);
return ret;
}
static void tcf_bpf_cleanup(struct tc_action *act, int bind)
{
- const struct tcf_bpf *prog = act->priv;
+ struct tcf_bpf_cfg tmp;
- if (tcf_bpf_is_ebpf(prog))
- bpf_prog_put(prog->filter);
- else
- bpf_prog_destroy(prog->filter);
+ tcf_bpf_prog_fill_cfg(act->priv, &tmp);
+ tcf_bpf_cfg_cleanup(&tmp);
}
static struct tc_action_ops act_bpf_ops __read_mostly = {
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 295d14bd6c67..5019a47b9270 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -37,6 +37,7 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct tcf_connmark_info *ca = a->priv;
+ struct nf_conntrack_zone zone;
struct nf_conn *c;
int proto;
@@ -70,7 +71,10 @@ static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
proto, &tuple))
goto out;
- thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
+ zone.id = ca->zone;
+ zone.dir = NF_CT_DEFAULT_ZONE_DIR;
+
+ thash = nf_conntrack_find_get(dev_net(skb->dev), &zone, &tuple);
if (!thash)
goto out;
@@ -108,7 +112,8 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*ci),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 4cd5cf1aedf8..b07c535ba8e7 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -62,7 +62,8 @@ static int tcf_csum_init(struct net *n, struct nlattr *nla, struct nlattr *est,
parm = nla_data(tb[TCA_CSUM_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 7fffc2272701..5c1b05170736 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -28,14 +28,18 @@
#ifdef CONFIG_GACT_PROB
static int gact_net_rand(struct tcf_gact *gact)
{
- if (!gact->tcfg_pval || prandom_u32() % gact->tcfg_pval)
+ smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
+ if (prandom_u32() % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
static int gact_determ(struct tcf_gact *gact)
{
- if (!gact->tcfg_pval || gact->tcf_bstats.packets % gact->tcfg_pval)
+ u32 pack = atomic_inc_return(&gact->packets);
+
+ smp_rmb(); /* coupled with smp_wmb() in tcf_gact_init() */
+ if (pack % gact->tcfg_pval)
return gact->tcf_action;
return gact->tcfg_paction;
}
@@ -85,7 +89,8 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
#endif
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*gact), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*gact),
+ bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -99,16 +104,19 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
gact = to_gact(a);
- spin_lock_bh(&gact->tcf_lock);
+ ASSERT_RTNL();
gact->tcf_action = parm->action;
#ifdef CONFIG_GACT_PROB
if (p_parm) {
gact->tcfg_paction = p_parm->paction;
- gact->tcfg_pval = p_parm->pval;
+ gact->tcfg_pval = max_t(u16, 1, p_parm->pval);
+ /* Make sure tcfg_pval is written before tcfg_ptype
+ * coupled with smp_rmb() in gact_net_rand() & gact_determ()
+ */
+ smp_wmb();
gact->tcfg_ptype = p_parm->ptype;
}
#endif
- spin_unlock_bh(&gact->tcf_lock);
if (ret == ACT_P_CREATED)
tcf_hash_insert(a);
return ret;
@@ -118,23 +126,21 @@ static int tcf_gact(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_gact *gact = a->priv;
- int action = TC_ACT_SHOT;
+ int action = READ_ONCE(gact->tcf_action);
- spin_lock(&gact->tcf_lock);
#ifdef CONFIG_GACT_PROB
- if (gact->tcfg_ptype)
- action = gact_rand[gact->tcfg_ptype](gact);
- else
- action = gact->tcf_action;
-#else
- action = gact->tcf_action;
+ {
+ u32 ptype = READ_ONCE(gact->tcfg_ptype);
+
+ if (ptype)
+ action = gact_rand[ptype](gact);
+ }
#endif
- gact->tcf_bstats.bytes += qdisc_pkt_len(skb);
- gact->tcf_bstats.packets++;
+ bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), skb);
if (action == TC_ACT_SHOT)
- gact->tcf_qstats.drops++;
- gact->tcf_tm.lastuse = jiffies;
- spin_unlock(&gact->tcf_lock);
+ qstats_drop_inc(this_cpu_ptr(gact->common.cpu_qstats));
+
+ tcf_lastuse_update(&gact->tcf_tm);
return action;
}
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index cbc8dd7dd48a..99c9cc1c7af9 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -114,7 +114,7 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est,
index = nla_get_u32(tb[TCA_IPT_INDEX]);
if (!tcf_hash_check(index, a, bind) ) {
- ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind);
+ ret = tcf_hash_create(index, est, a, sizeof(*ipt), bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index a42a3b257226..2d1be4a760fd 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -35,9 +35,11 @@ static LIST_HEAD(mirred_list);
static void tcf_mirred_release(struct tc_action *a, int bind)
{
struct tcf_mirred *m = to_mirred(a);
+ struct net_device *dev = rcu_dereference_protected(m->tcfm_dev, 1);
+
list_del(&m->tcfm_list);
- if (m->tcfm_dev)
- dev_put(m->tcfm_dev);
+ if (dev)
+ dev_put(dev);
}
static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = {
@@ -93,11 +95,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, a, bind)) {
if (dev == NULL)
return -EINVAL;
- ret = tcf_hash_create(parm->index, est, a, sizeof(*m), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*m),
+ bind, true);
if (ret)
return ret;
ret = ACT_P_CREATED;
} else {
+ if (bind)
+ return 0;
if (!ovr) {
tcf_hash_release(a, bind);
return -EEXIST;
@@ -105,18 +110,18 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
m = to_mirred(a);
- spin_lock_bh(&m->tcf_lock);
+ ASSERT_RTNL();
m->tcf_action = parm->action;
m->tcfm_eaction = parm->eaction;
if (dev != NULL) {
m->tcfm_ifindex = parm->ifindex;
if (ret != ACT_P_CREATED)
- dev_put(m->tcfm_dev);
+ dev_put(rcu_dereference_protected(m->tcfm_dev, 1));
dev_hold(dev);
- m->tcfm_dev = dev;
+ rcu_assign_pointer(m->tcfm_dev, dev);
m->tcfm_ok_push = ok_push;
}
- spin_unlock_bh(&m->tcf_lock);
+
if (ret == ACT_P_CREATED) {
list_add(&m->tcfm_list, &mirred_list);
tcf_hash_insert(a);
@@ -131,20 +136,22 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
struct tcf_mirred *m = a->priv;
struct net_device *dev;
struct sk_buff *skb2;
+ int retval, err;
u32 at;
- int retval, err = 1;
- spin_lock(&m->tcf_lock);
- m->tcf_tm.lastuse = jiffies;
- bstats_update(&m->tcf_bstats, skb);
+ tcf_lastuse_update(&m->tcf_tm);
+
+ bstats_cpu_update(this_cpu_ptr(m->common.cpu_bstats), skb);
- dev = m->tcfm_dev;
- if (!dev) {
- printk_once(KERN_NOTICE "tc mirred: target device is gone\n");
+ rcu_read_lock();
+ retval = READ_ONCE(m->tcf_action);
+ dev = rcu_dereference(m->tcfm_dev);
+ if (unlikely(!dev)) {
+ pr_notice_once("tc mirred: target device is gone\n");
goto out;
}
- if (!(dev->flags & IFF_UP)) {
+ if (unlikely(!(dev->flags & IFF_UP))) {
net_notice_ratelimited("tc mirred to Houston: device %s is down\n",
dev->name);
goto out;
@@ -152,7 +159,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
at = G_TC_AT(skb->tc_verd);
skb2 = skb_clone(skb, GFP_ATOMIC);
- if (skb2 == NULL)
+ if (!skb2)
goto out;
if (!(at & AT_EGRESS)) {
@@ -168,16 +175,13 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
skb2->dev = dev;
err = dev_queue_xmit(skb2);
-out:
if (err) {
- m->tcf_qstats.overlimits++;
+out:
+ qstats_overlimit_inc(this_cpu_ptr(m->common.cpu_qstats));
if (m->tcfm_eaction != TCA_EGRESS_MIRROR)
retval = TC_ACT_SHOT;
- else
- retval = m->tcf_action;
- } else
- retval = m->tcf_action;
- spin_unlock(&m->tcf_lock);
+ }
+ rcu_read_unlock();
return retval;
}
@@ -216,14 +220,16 @@ static int mirred_device_event(struct notifier_block *unused,
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct tcf_mirred *m;
+ ASSERT_RTNL();
if (event == NETDEV_UNREGISTER)
list_for_each_entry(m, &mirred_list, tcfm_list) {
- spin_lock_bh(&m->tcf_lock);
- if (m->tcfm_dev == dev) {
+ if (rcu_access_pointer(m->tcfm_dev) == dev) {
dev_put(dev);
- m->tcfm_dev = NULL;
+ /* Note : no rcu grace period necessary, as
+ * net_device are already rcu protected.
+ */
+ RCU_INIT_POINTER(m->tcfm_dev, NULL);
}
- spin_unlock_bh(&m->tcf_lock);
}
return NOTIFY_DONE;
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 270a030d5fd0..b7c4ead8b5a8 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -55,7 +55,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
parm = nla_data(tb[TCA_NAT_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
ret = ACT_P_CREATED;
@@ -161,7 +162,8 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
goto drop;
tcph = (void *)(skb_network_header(skb) + ihl);
- inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, 1);
+ inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr,
+ true);
break;
}
case IPPROTO_UDP:
@@ -177,7 +179,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
udph = (void *)(skb_network_header(skb) + ihl);
if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
inet_proto_csum_replace4(&udph->check, skb, addr,
- new_addr, 1);
+ new_addr, true);
if (!udph->check)
udph->check = CSUM_MANGLED_0;
}
@@ -230,7 +232,7 @@ static int tcf_nat(struct sk_buff *skb, const struct tc_action *a,
iph->saddr = new_addr;
inet_proto_csum_replace4(&icmph->checksum, skb, addr, new_addr,
- 0);
+ false);
break;
}
default:
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 17e6d6669c7f..e38a7701f154 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -57,7 +57,8 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (!tcf_hash_check(parm->index, a, bind)) {
if (!parm->nkeys)
return -EINVAL;
- ret = tcf_hash_create(parm->index, est, a, sizeof(*p), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*p),
+ bind, false);
if (ret)
return ret;
p = to_pedit(a);
@@ -68,13 +69,12 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
ret = ACT_P_CREATED;
} else {
- p = to_pedit(a);
- tcf_hash_release(a, bind);
if (bind)
return 0;
+ tcf_hash_release(a, bind);
if (!ovr)
return -EEXIST;
-
+ p = to_pedit(a);
if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) {
keys = kmalloc(ksize, GFP_KERNEL);
if (keys == NULL)
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 6a8d9488613a..d6b708d6afdf 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -103,7 +103,8 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
defdata = nla_data(tb[TCA_DEF_DATA]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index fcfeeaf838be..6751b5f8c046 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -99,7 +99,8 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*d), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*d),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index d735ecf0b1a7..796785e0bf96 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -116,7 +116,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
action = parm->v_action;
if (!tcf_hash_check(parm->index, a, bind)) {
- ret = tcf_hash_create(parm->index, est, a, sizeof(*v), bind);
+ ret = tcf_hash_create(parm->index, est, a, sizeof(*v),
+ bind, false);
if (ret)
return ret;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index c79ecfd36e0f..e5168f8b9640 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -378,7 +378,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
goto errout;
if (oldprog) {
- list_replace_rcu(&prog->link, &oldprog->link);
+ list_replace_rcu(&oldprog->link, &prog->link);
tcf_unbind_filter(tp, &oldprog->res);
call_rcu(&oldprog->rcu, __cls_bpf_delete_prog);
} else {
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index ea611b216412..4c85bd3a750c 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -30,35 +30,16 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
struct cls_cgroup_head *head = rcu_dereference_bh(tp->root);
- u32 classid;
-
- classid = task_cls_state(current)->classid;
-
- /*
- * Due to the nature of the classifier it is required to ignore all
- * packets originating from softirq context as accessing `current'
- * would lead to false results.
- *
- * This test assumes that all callers of dev_queue_xmit() explicitely
- * disable bh. Knowing this, it is possible to detect softirq based
- * calls by looking at the number of nested bh disable calls because
- * softirqs always disables bh.
- */
- if (in_serving_softirq()) {
- /* If there is an sk_classid we'll use that. */
- if (!skb->sk)
- return -1;
- classid = skb->sk->sk_classid;
- }
+ u32 classid = task_get_classid(skb);
if (!classid)
return -1;
-
if (!tcf_em_tree_match(skb, &head->ematches, NULL))
return -1;
res->classid = classid;
res->class = 0;
+
return tcf_exts_exec(skb, &head->exts, res);
}
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 76bc3a20ffdb..536838b657bf 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -301,7 +301,7 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp,
keymask = f->keymask;
if (keymask & FLOW_KEYS_NEEDED)
- skb_flow_dissect_flow_keys(skb, &flow_keys);
+ skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
for (n = 0; n < f->nkeys; n++) {
key = ffs(keymask) - 1;
@@ -425,6 +425,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (!fnew)
goto err2;
+ tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+
fold = (struct flow_filter *)*arg;
if (fold) {
err = -EINVAL;
@@ -486,7 +488,6 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
fnew->mask = ~0U;
fnew->tp = tp;
get_random_bytes(&fnew->hashrnd, 4);
- tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
}
fnew->perturb_timer.function = flow_perturbation;
@@ -526,7 +527,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (*arg == 0)
list_add_tail_rcu(&fnew->list, &head->filters);
else
- list_replace_rcu(&fnew->list, &fold->list);
+ list_replace_rcu(&fold->list, &fnew->list);
*arg = (unsigned long)fnew;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9d37ccd95062..57692947ebbe 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -129,7 +129,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
* so do it rather here.
*/
skb_key.basic.n_proto = skb->protocol;
- skb_flow_dissect(skb, &head->dissector, &skb_key);
+ skb_flow_dissect(skb, &head->dissector, &skb_key, 0);
fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
@@ -499,7 +499,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
*arg = (unsigned long) fnew;
if (fold) {
- list_replace_rcu(&fnew->list, &fold->list);
+ list_replace_rcu(&fold->list, &fnew->list);
tcf_unbind_filter(tp, &fold->res);
call_rcu(&fold->rcu, fl_destroy_filter);
} else {
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 715e01e5910a..f23a3b68bba6 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -33,7 +33,6 @@
struct fw_head {
u32 mask;
- bool mask_set;
struct fw_filter __rcu *ht[HTSIZE];
struct rcu_head rcu;
};
@@ -84,7 +83,7 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp,
}
}
} else {
- /* old method */
+ /* Old method: classify the packet using its skb mark. */
if (id && (TC_H_MAJ(id) == 0 ||
!(TC_H_MAJ(id ^ tp->q->handle)))) {
res->classid = id;
@@ -114,14 +113,9 @@ static unsigned long fw_get(struct tcf_proto *tp, u32 handle)
static int fw_init(struct tcf_proto *tp)
{
- struct fw_head *head;
-
- head = kzalloc(sizeof(struct fw_head), GFP_KERNEL);
- if (head == NULL)
- return -ENOBUFS;
-
- head->mask_set = false;
- rcu_assign_pointer(tp->root, head);
+ /* We don't allocate fw_head here, because in the old method
+ * we don't need it at all.
+ */
return 0;
}
@@ -252,7 +246,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
int err;
if (!opt)
- return handle ? -EINVAL : 0;
+ return handle ? -EINVAL : 0; /* Succeed if it is old method. */
err = nla_parse_nested(tb, TCA_FW_MAX, opt, fw_policy);
if (err < 0)
@@ -302,11 +296,17 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (!handle)
return -EINVAL;
- if (!head->mask_set) {
- head->mask = 0xFFFFFFFF;
+ if (!head) {
+ u32 mask = 0xFFFFFFFF;
if (tb[TCA_FW_MASK])
- head->mask = nla_get_u32(tb[TCA_FW_MASK]);
- head->mask_set = true;
+ mask = nla_get_u32(tb[TCA_FW_MASK]);
+
+ head = kzalloc(sizeof(*head), GFP_KERNEL);
+ if (!head)
+ return -ENOBUFS;
+ head->mask = mask;
+
+ rcu_assign_pointer(tp->root, head);
}
f = kzalloc(sizeof(struct fw_filter), GFP_KERNEL);
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 02fa82792dab..f9c9fc075fe6 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -283,12 +283,22 @@ static int rsvp_init(struct tcf_proto *tp)
return -ENOBUFS;
}
-static void
-rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
+static void rsvp_delete_filter_rcu(struct rcu_head *head)
{
- tcf_unbind_filter(tp, &f->res);
+ struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu);
+
tcf_exts_destroy(&f->exts);
- kfree_rcu(f, rcu);
+ kfree(f);
+}
+
+static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
+{
+ tcf_unbind_filter(tp, &f->res);
+ /* all classifiers are required to call tcf_exts_destroy() after rcu
+ * grace period, since converted-to-rcu actions are relying on that
+ * in cleanup() callback
+ */
+ call_rcu(&f->rcu, rsvp_delete_filter_rcu);
}
static bool rsvp_destroy(struct tcf_proto *tp, bool force)
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index a557dbaf5afe..944c8ff45055 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -27,6 +27,7 @@
struct tcindex_filter_result {
struct tcf_exts exts;
struct tcf_result res;
+ struct rcu_head rcu;
};
struct tcindex_filter {
@@ -133,8 +134,23 @@ static int tcindex_init(struct tcf_proto *tp)
return 0;
}
-static int
-tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+static void tcindex_destroy_rexts(struct rcu_head *head)
+{
+ struct tcindex_filter_result *r;
+
+ r = container_of(head, struct tcindex_filter_result, rcu);
+ tcf_exts_destroy(&r->exts);
+}
+
+static void tcindex_destroy_fexts(struct rcu_head *head)
+{
+ struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu);
+
+ tcf_exts_destroy(&f->result.exts);
+ kfree(f);
+}
+
+static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
@@ -162,9 +178,14 @@ found:
rcu_assign_pointer(*walk, rtnl_dereference(f->next));
}
tcf_unbind_filter(tp, &r->res);
- tcf_exts_destroy(&r->exts);
+ /* all classifiers are required to call tcf_exts_destroy() after rcu
+ * grace period, since converted-to-rcu actions are relying on that
+ * in cleanup() callback
+ */
if (f)
- kfree_rcu(f, rcu);
+ call_rcu(&f->rcu, tcindex_destroy_fexts);
+ else
+ call_rcu(&r->rcu, tcindex_destroy_rexts);
return 0;
}
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index cab9e9b43967..4fbb67430ce4 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -490,6 +490,19 @@ static bool u32_destroy(struct tcf_proto *tp, bool force)
return false;
}
}
+
+ if (tp_c->refcnt > 1)
+ return false;
+
+ if (tp_c->refcnt == 1) {
+ struct tc_u_hnode *ht;
+
+ for (ht = rtnl_dereference(tp_c->hlist);
+ ht;
+ ht = rtnl_dereference(ht->next))
+ if (!ht_empty(ht))
+ return false;
+ }
}
if (root_ht && --root_ht->refcnt == 0)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index f06aa01d60fd..f43c8f33f09e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1806,51 +1806,45 @@ done:
* to this qdisc, (optionally) tests for protocol and asks
* specific classifiers.
*/
-int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
+int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res, bool compat_mode)
{
__be16 protocol = tc_skb_protocol(skb);
- int err;
+#ifdef CONFIG_NET_CLS_ACT
+ const struct tcf_proto *old_tp = tp;
+ int limit = 0;
+reclassify:
+#endif
for (; tp; tp = rcu_dereference_bh(tp->next)) {
+ int err;
+
if (tp->protocol != protocol &&
tp->protocol != htons(ETH_P_ALL))
continue;
- err = tp->classify(skb, tp, res);
+ err = tp->classify(skb, tp, res);
+#ifdef CONFIG_NET_CLS_ACT
+ if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode))
+ goto reset;
+#endif
if (err >= 0)
return err;
}
- return -1;
-}
-EXPORT_SYMBOL(tc_classify_compat);
-int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- int err = 0;
-#ifdef CONFIG_NET_CLS_ACT
- const struct tcf_proto *otp = tp;
- int limit = 0;
-reclassify:
-#endif
-
- err = tc_classify_compat(skb, tp, res);
+ return -1;
#ifdef CONFIG_NET_CLS_ACT
- if (err == TC_ACT_RECLASSIFY) {
- tp = otp;
-
- if (unlikely(limit++ >= MAX_REC_LOOP)) {
- net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
- tp->q->ops->id,
- tp->prio & 0xffff,
- ntohs(tp->protocol));
- return TC_ACT_SHOT;
- }
- goto reclassify;
+reset:
+ if (unlikely(limit++ >= MAX_REC_LOOP)) {
+ net_notice_ratelimited("%s: reclassify loop, rule prio %u, protocol %02x\n",
+ tp->q->ops->id, tp->prio & 0xffff,
+ ntohs(tp->protocol));
+ return TC_ACT_SHOT;
}
+
+ tp = old_tp;
+ goto reclassify;
#endif
- return err;
}
EXPORT_SYMBOL(tc_classify);
@@ -1947,6 +1941,7 @@ static int __init pktsched_init(void)
register_qdisc(&bfifo_qdisc_ops);
register_qdisc(&pfifo_head_drop_qdisc_ops);
register_qdisc(&mq_qdisc_ops);
+ register_qdisc(&noqueue_qdisc_ops);
rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index e3e2cc5fd068..1911af3ca7c0 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -375,7 +375,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
list_for_each_entry(flow, &p->flows, list) {
fl = rcu_dereference_bh(flow->filter_list);
if (fl) {
- result = tc_classify_compat(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, true);
if (result < 0)
continue;
flow = (struct atm_flow_data *)res.class;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index beeb75f80fdb..c538d9e4a8f6 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -240,7 +240,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
/*
* Step 2+n. Apply classifier.
*/
- result = tc_classify_compat(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, true);
if (!fl || result < 0)
goto fallback;
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 93d5742dc7e0..02bfd3d1c4f0 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -170,13 +170,13 @@ static bool choke_match_flow(struct sk_buff *skb1,
if (!choke_skb_cb(skb1)->keys_valid) {
choke_skb_cb(skb1)->keys_valid = 1;
- skb_flow_dissect_flow_keys(skb1, &temp);
+ skb_flow_dissect_flow_keys(skb1, &temp, 0);
make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
}
if (!choke_skb_cb(skb2)->keys_valid) {
choke_skb_cb(skb2)->keys_valid = 1;
- skb_flow_dissect_flow_keys(skb2, &temp);
+ skb_flow_dissect_flow_keys(skb2, &temp, 0);
make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
}
@@ -201,7 +201,7 @@ static bool choke_classify(struct sk_buff *skb,
int result;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -385,6 +385,19 @@ static void choke_reset(struct Qdisc *sch)
{
struct choke_sched_data *q = qdisc_priv(sch);
+ while (q->head != q->tail) {
+ struct sk_buff *skb = q->tab[q->head];
+
+ q->head = (q->head + 1) & q->tab_mask;
+ if (!skb)
+ continue;
+ qdisc_qstats_backlog_dec(sch, skb);
+ --sch->q.qlen;
+ qdisc_drop(skb, sch);
+ }
+
+ memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
+ q->head = q->tail = 0;
red_restart(&q->vars);
}
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 338706092c27..f26bdea875c1 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -331,7 +331,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 66700a6116aa..c4d45fd8c551 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -230,7 +230,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
else {
struct tcf_result res;
struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result = tc_classify(skb, fl, &res);
+ int result = tc_classify(skb, fl, &res, false);
pr_debug("result %d class 0x%04x\n", result, res.classid);
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 2e2398cfc694..2177eac0a61e 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -54,7 +54,7 @@ static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
bool is_bfifo = sch->ops == &bfifo_qdisc_ops;
if (opt == NULL) {
- u32 limit = qdisc_dev(sch)->tx_queue_len ? : 1;
+ u32 limit = qdisc_dev(sch)->tx_queue_len;
if (is_bfifo)
limit *= psched_mtu(qdisc_dev(sch));
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index d75993f89fac..4c834e93dafb 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -92,7 +92,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch,
return fq_codel_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, filter, &res);
+ result = tc_classify(skb, filter, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -155,14 +155,23 @@ static unsigned int fq_codel_drop(struct Qdisc *sch)
skb = dequeue_head(flow);
len = qdisc_pkt_len(skb);
q->backlogs[idx] -= len;
- kfree_skb(skb);
sch->q.qlen--;
qdisc_qstats_drop(sch);
qdisc_qstats_backlog_dec(sch, skb);
+ kfree_skb(skb);
flow->dropped++;
return idx;
}
+static unsigned int fq_codel_qdisc_drop(struct Qdisc *sch)
+{
+ unsigned int prev_backlog;
+
+ prev_backlog = sch->qstats.backlog;
+ fq_codel_drop(sch);
+ return prev_backlog - sch->qstats.backlog;
+}
+
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
@@ -279,10 +288,26 @@ begin:
static void fq_codel_reset(struct Qdisc *sch)
{
- struct sk_buff *skb;
+ struct fq_codel_sched_data *q = qdisc_priv(sch);
+ int i;
- while ((skb = fq_codel_dequeue(sch)) != NULL)
- kfree_skb(skb);
+ INIT_LIST_HEAD(&q->new_flows);
+ INIT_LIST_HEAD(&q->old_flows);
+ for (i = 0; i < q->flows_cnt; i++) {
+ struct fq_codel_flow *flow = q->flows + i;
+
+ while (flow->head) {
+ struct sk_buff *skb = dequeue_head(flow);
+
+ qdisc_qstats_backlog_dec(sch, skb);
+ kfree_skb(skb);
+ }
+
+ INIT_LIST_HEAD(&flow->flowchain);
+ codel_vars_init(&flow->cvars);
+ }
+ memset(q->backlogs, 0, q->flows_cnt * sizeof(u32));
+ sch->q.qlen = 0;
}
static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
@@ -604,7 +629,7 @@ static struct Qdisc_ops fq_codel_qdisc_ops __read_mostly = {
.enqueue = fq_codel_enqueue,
.dequeue = fq_codel_dequeue,
.peek = qdisc_peek_dequeued,
- .drop = fq_codel_drop,
+ .drop = fq_codel_qdisc_drop,
.init = fq_codel_init,
.reset = fq_codel_reset,
.destroy = fq_codel_destroy,
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 6efca30894aa..cb5d4ad32946 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -416,33 +416,25 @@ struct Qdisc noop_qdisc = {
};
EXPORT_SYMBOL(noop_qdisc);
-static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
+static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt)
+{
+ /* register_qdisc() assigns a default of noop_enqueue if unset,
+ * but __dev_queue_xmit() treats noqueue only as such
+ * if this is NULL - so clear it here. */
+ qdisc->enqueue = NULL;
+ return 0;
+}
+
+struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
.id = "noqueue",
.priv_size = 0,
+ .init = noqueue_init,
.enqueue = noop_enqueue,
.dequeue = noop_dequeue,
.peek = noop_dequeue,
.owner = THIS_MODULE,
};
-static struct Qdisc noqueue_qdisc;
-static struct netdev_queue noqueue_netdev_queue = {
- .qdisc = &noqueue_qdisc,
- .qdisc_sleeping = &noqueue_qdisc,
-};
-
-static struct Qdisc noqueue_qdisc = {
- .enqueue = NULL,
- .dequeue = noop_dequeue,
- .flags = TCQ_F_BUILTIN,
- .ops = &noqueue_qdisc_ops,
- .list = LIST_HEAD_INIT(noqueue_qdisc.list),
- .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
- .dev_queue = &noqueue_netdev_queue,
- .busylock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.busylock),
-};
-
-
static const u8 prio2band[TC_PRIO_MAX + 1] = {
1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
};
@@ -733,18 +725,19 @@ static void attach_one_default_qdisc(struct net_device *dev,
struct netdev_queue *dev_queue,
void *_unused)
{
- struct Qdisc *qdisc = &noqueue_qdisc;
+ struct Qdisc *qdisc;
+ const struct Qdisc_ops *ops = default_qdisc_ops;
- if (dev->tx_queue_len) {
- qdisc = qdisc_create_dflt(dev_queue,
- default_qdisc_ops, TC_H_ROOT);
- if (!qdisc) {
- netdev_info(dev, "activation failed\n");
- return;
- }
- if (!netif_is_multiqueue(dev))
- qdisc->flags |= TCQ_F_ONETXQUEUE;
+ if (dev->priv_flags & IFF_NO_QUEUE)
+ ops = &noqueue_qdisc_ops;
+
+ qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT);
+ if (!qdisc) {
+ netdev_info(dev, "activation failed\n");
+ return;
}
+ if (!netif_is_multiqueue(dev))
+ qdisc->flags |= TCQ_F_ONETXQUEUE;
dev_queue->qdisc_sleeping = qdisc;
}
@@ -755,7 +748,8 @@ static void attach_default_qdiscs(struct net_device *dev)
txq = netdev_get_tx_queue(dev, 0);
- if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) {
+ if (!netif_is_multiqueue(dev) ||
+ dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
dev->qdisc = txq->qdisc_sleeping;
atomic_inc(&dev->qdisc->refcnt);
@@ -779,7 +773,7 @@ static void transition_one_qdisc(struct net_device *dev,
clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);
rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
- if (need_watchdog_p && new_qdisc != &noqueue_qdisc) {
+ if (need_watchdog_p) {
dev_queue->trans_start = 0;
*need_watchdog_p = 1;
}
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index abb9f2fec28f..80105109f756 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -512,11 +512,9 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_GRED_LIMIT])
sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
- else {
- u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1;
-
- sch->limit = qlen * psched_mtu(qdisc_dev(sch));
- }
+ else
+ sch->limit = qdisc_dev(sch)->tx_queue_len
+ * psched_mtu(qdisc_dev(sch));
return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index e6c7416d0332..b7ebe2c87586 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1165,7 +1165,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
head = &q->root;
tcf = rcu_dereference_bh(q->root.filter_list);
- while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+ while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index f1acb0f60dc3..15ccd7f8fb2a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -229,7 +229,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
}
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
+ while (tcf && (result = tc_classify(skb, tcf, &res, false)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
case TC_ACT_QUEUED:
@@ -1048,11 +1048,9 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
if (tb[TCA_HTB_DIRECT_QLEN])
q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]);
- else {
+ else
q->direct_qlen = qdisc_dev(sch)->tx_queue_len;
- if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */
- q->direct_qlen = 2;
- }
+
if ((q->rate2quantum = gopt->rate2quantum) < 1)
q->rate2quantum = 1;
q->defcls = gopt->defcls;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 42dd218871e0..4e904ca0af9d 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -46,7 +46,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
int err;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- err = tc_classify(skb, fl, &res);
+ err = tc_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
index 89f8fcf73f18..5abfe44678d4 100644
--- a/net/sched/sch_plug.c
+++ b/net/sched/sch_plug.c
@@ -130,12 +130,8 @@ static int plug_init(struct Qdisc *sch, struct nlattr *opt)
q->unplug_indefinite = false;
if (opt == NULL) {
- /* We will set a default limit of 100 pkts (~150kB)
- * in case tx_queue_len is not available. The
- * default value is completely arbitrary.
- */
- u32 pkt_limit = qdisc_dev(sch)->tx_queue_len ? : 100;
- q->limit = pkt_limit * psched_mtu(qdisc_dev(sch));
+ q->limit = qdisc_dev(sch)->tx_queue_len
+ * psched_mtu(qdisc_dev(sch));
} else {
struct tc_plug_qopt *ctl = nla_data(opt);
@@ -216,6 +212,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
.peek = qdisc_peek_head,
.init = plug_init,
.change = plug_change,
+ .reset = qdisc_reset_queue,
.owner = THIS_MODULE,
};
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 8e5cd34aaa74..ba6487f2741f 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -42,7 +42,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
if (TC_H_MAJ(skb->priority) != sch->handle) {
fl = rcu_dereference_bh(q->filter_list);
- err = tc_classify(skb, fl, &res);
+ err = tc_classify(skb, fl, &res, false);
#ifdef CONFIG_NET_CLS_ACT
switch (err) {
case TC_ACT_STOLEN:
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index b8d73bca683c..3dc3a6e56052 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -186,7 +186,6 @@ struct qfq_sched {
u64 oldV, V; /* Precise virtual times. */
struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */
- u32 num_active_agg; /* Num. of active aggregates */
u32 wsum; /* weight sum */
u32 iwsum; /* inverse weight sum */
@@ -718,7 +717,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch,
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
fl = rcu_dereference_bh(q->filter_list);
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 4b815193326c..5bbb6332ec57 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -258,7 +258,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl,
struct tcf_result res;
int result;
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -502,7 +502,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
limit = ctl->limit;
if (limit == 0)
- limit = max_t(u32, qdisc_dev(sch)->tx_queue_len, 1);
+ limit = qdisc_dev(sch)->tx_queue_len;
child = fifo_create_dflt(sch, &pfifo_qdisc_ops, limit);
if (IS_ERR(child))
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 7d1492663360..3abab534eb5c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -179,7 +179,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
return sfq_hash(q, skb) + 1;
*qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- result = tc_classify(skb, fl, &res);
+ result = tc_classify(skb, fl, &res, false);
if (result >= 0) {
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
@@ -306,10 +306,10 @@ drop:
len = qdisc_pkt_len(skb);
slot->backlog -= len;
sfq_dec(q, x);
- kfree_skb(skb);
sch->q.qlen--;
qdisc_qstats_drop(sch);
qdisc_qstats_backlog_dec(sch, skb);
+ kfree_skb(skb);
return len;
}