aboutsummaryrefslogtreecommitdiffstats
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig3
-rw-r--r--net/sched/act_api.c103
-rw-r--r--net/sched/act_bpf.c27
-rw-r--r--net/sched/act_connmark.c24
-rw-r--r--net/sched/act_csum.c55
-rw-r--r--net/sched/act_gact.c17
-rw-r--r--net/sched/act_ife.c37
-rw-r--r--net/sched/act_ipt.c15
-rw-r--r--net/sched/act_mirred.c27
-rw-r--r--net/sched/act_nat.c17
-rw-r--r--net/sched/act_pedit.c22
-rw-r--r--net/sched/act_police.c15
-rw-r--r--net/sched/act_sample.c33
-rw-r--r--net/sched/act_simple.c58
-rw-r--r--net/sched/act_skbedit.c22
-rw-r--r--net/sched/act_skbmod.c22
-rw-r--r--net/sched/act_tunnel_key.c28
-rw-r--r--net/sched/act_vlan.c24
-rw-r--r--net/sched/cls_api.c1354
-rw-r--r--net/sched/cls_basic.c41
-rw-r--r--net/sched/cls_bpf.c17
-rw-r--r--net/sched/cls_cgroup.c17
-rw-r--r--net/sched/cls_flow.c17
-rw-r--r--net/sched/cls_flower.c140
-rw-r--r--net/sched/cls_fw.c20
-rw-r--r--net/sched/cls_matchall.c50
-rw-r--r--net/sched/cls_route.c21
-rw-r--r--net/sched/cls_rsvp.h23
-rw-r--r--net/sched/cls_tcindex.c34
-rw-r--r--net/sched/cls_u32.c22
-rw-r--r--net/sched/sch_api.c41
-rw-r--r--net/sched/sch_cake.c167
-rw-r--r--net/sched/sch_cbq.c10
-rw-r--r--net/sched/sch_drr.c16
-rw-r--r--net/sched/sch_generic.c21
-rw-r--r--net/sched/sch_hfsc.c19
-rw-r--r--net/sched/sch_htb.c22
-rw-r--r--net/sched/sch_mq.c2
-rw-r--r--net/sched/sch_mqprio.c3
-rw-r--r--net/sched/sch_multiq.c10
-rw-r--r--net/sched/sch_pie.c110
-rw-r--r--net/sched/sch_prio.c10
-rw-r--r--net/sched/sch_qfq.c14
-rw-r--r--net/sched/sch_red.c3
-rw-r--r--net/sched/sch_sfb.c3
-rw-r--r--net/sched/sch_taprio.c2
-rw-r--r--net/sched/sch_tbf.c3
47 files changed, 1977 insertions, 784 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 1b9afdee5ba9..5c02ad97ef23 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -358,8 +358,7 @@ config NET_SCH_PIE
help
Say Y here if you want to use the Proportional Integral controller
Enhanced scheduler packet scheduling algorithm.
- For more information, please see
- http://tools.ietf.org/html/draft-pan-tsvwg-pie-00
+ For more information, please see https://tools.ietf.org/html/rfc8033
To compile this driver as a module, choose M here: the module
will be called sch_pie.
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index d4b8355737d8..5a87e271d35a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -28,27 +28,10 @@
#include <net/act_api.h>
#include <net/netlink.h>
-static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
-{
- u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
-
- if (!tp)
- return -EINVAL;
- a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index);
- if (!a->goto_chain)
- return -ENOMEM;
- return 0;
-}
-
-static void tcf_action_goto_chain_fini(struct tc_action *a)
-{
- tcf_chain_put_by_act(a->goto_chain);
-}
-
static void tcf_action_goto_chain_exec(const struct tc_action *a,
struct tcf_result *res)
{
- const struct tcf_chain *chain = a->goto_chain;
+ const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain);
res->goto_tp = rcu_dereference_bh(chain->filter_chain);
}
@@ -71,6 +54,51 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
call_rcu(&old->rcu, tcf_free_cookie_rcu);
}
+int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
+ struct tcf_chain **newchain,
+ struct netlink_ext_ack *extack)
+{
+ int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL;
+ u32 chain_index;
+
+ if (!opcode)
+ ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0;
+ else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC)
+ ret = 0;
+ if (ret) {
+ NL_SET_ERR_MSG(extack, "invalid control action");
+ goto end;
+ }
+
+ if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) {
+ chain_index = action & TC_ACT_EXT_VAL_MASK;
+ if (!tp || !newchain) {
+ ret = -EINVAL;
+ NL_SET_ERR_MSG(extack,
+ "can't goto NULL proto/chain");
+ goto end;
+ }
+ *newchain = tcf_chain_get_by_act(tp->chain->block, chain_index);
+ if (!*newchain) {
+ ret = -ENOMEM;
+ NL_SET_ERR_MSG(extack,
+ "can't allocate goto_chain");
+ }
+ }
+end:
+ return ret;
+}
+EXPORT_SYMBOL(tcf_action_check_ctrlact);
+
+struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
+ struct tcf_chain *goto_chain)
+{
+ a->tcfa_action = action;
+ rcu_swap_protected(a->goto_chain, goto_chain, 1);
+ return goto_chain;
+}
+EXPORT_SYMBOL(tcf_action_set_ctrlact);
+
/* XXX: For standalone actions, we don't need a RCU grace period either, because
* actions are always connected to filters and filters are already destroyed in
* RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -78,13 +106,15 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
*/
static void free_tcf(struct tc_action *p)
{
+ struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1);
+
free_percpu(p->cpu_bstats);
free_percpu(p->cpu_bstats_hw);
free_percpu(p->cpu_qstats);
tcf_set_action_cookie(&p->act_cookie, NULL);
- if (p->goto_chain)
- tcf_action_goto_chain_fini(p);
+ if (chain)
+ tcf_chain_put_by_act(chain);
kfree(p);
}
@@ -543,7 +573,7 @@ int tcf_register_action(struct tc_action_ops *act,
write_lock(&act_mod_lock);
list_for_each_entry(a, &act_base, head) {
- if (act->type == a->type || (strcmp(act->kind, a->kind) == 0)) {
+ if (act->id == a->id || (strcmp(act->kind, a->kind) == 0)) {
write_unlock(&act_mod_lock);
unregister_pernet_subsys(ops);
return -EEXIST;
@@ -654,6 +684,10 @@ repeat:
return TC_ACT_OK;
}
} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
+ if (unlikely(!rcu_access_pointer(a->goto_chain))) {
+ net_warn_ratelimited("can't go to NULL chain!\n");
+ return TC_ACT_SHOT;
+ }
tcf_action_goto_chain_exec(a, res);
}
@@ -800,15 +834,6 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
return c;
}
-static bool tcf_action_valid(int action)
-{
- int opcode = TC_ACT_EXT_OPCODE(action);
-
- if (!opcode)
- return action <= TC_ACT_VALUE_MAX;
- return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
-}
-
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
@@ -890,10 +915,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
/* backward compatibility for policer */
if (name == NULL)
err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
- rtnl_held, extack);
+ rtnl_held, tp, extack);
else
err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
- extack);
+ tp, extack);
if (err < 0)
goto err_mod;
@@ -907,18 +932,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
if (err != ACT_P_CREATED)
module_put(a_o->owner);
- if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
- err = tcf_action_goto_chain_init(a, tp);
- if (err) {
- tcf_action_destroy_1(a, bind);
- NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
- return ERR_PTR(err);
- }
- }
-
- if (!tcf_action_valid(a->tcfa_action)) {
+ if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
+ !rcu_access_pointer(a->goto_chain)) {
tcf_action_destroy_1(a, bind);
- NL_SET_ERR_MSG(extack, "Invalid control action value");
+ NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
return ERR_PTR(-EINVAL);
}
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index c7633843e223..3841156aa09f 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -17,6 +17,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_bpf.h>
#include <net/tc_act/tc_bpf.h>
@@ -278,10 +279,11 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
static int tcf_bpf_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **act,
int replace, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, bpf_net_id);
struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_bpf_cfg cfg, old;
struct tc_act_bpf *parm;
struct tcf_bpf *prog;
@@ -323,12 +325,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
return ret;
}
+ ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (ret < 0)
+ goto release_idr;
+
is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
is_ebpf = tb[TCA_ACT_BPF_FD];
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
ret = -EINVAL;
- goto out;
+ goto put_chain;
}
memset(&cfg, 0, sizeof(cfg));
@@ -336,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
tcf_bpf_init_from_efd(tb, &cfg);
if (ret < 0)
- goto out;
+ goto put_chain;
prog = to_bpf(*act);
@@ -350,10 +356,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
if (cfg.bpf_num_ops)
prog->bpf_num_ops = cfg.bpf_num_ops;
- prog->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch);
rcu_assign_pointer(prog->filter, cfg.filter);
spin_unlock_bh(&prog->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
if (res == ACT_P_CREATED) {
tcf_idr_insert(tn, *act);
} else {
@@ -363,9 +372,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
}
return res;
-out:
- tcf_idr_release(*act, bind);
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+release_idr:
+ tcf_idr_release(*act, bind);
return ret;
}
@@ -396,7 +409,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_bpf_ops __read_mostly = {
.kind = "bpf",
- .type = TCA_ACT_BPF,
+ .id = TCA_ID_BPF,
.owner = THIS_MODULE,
.act = tcf_bpf_act,
.dump = tcf_bpf_dump,
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 8475913f2070..32ae0cd6e31c 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -21,6 +21,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/act_api.h>
+#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_connmark.h>
#include <net/tc_act/tc_connmark.h>
@@ -97,13 +98,15 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, connmark_net_id);
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_connmark_info *ci;
struct tc_connmark *parm;
- int ret = 0;
+ int ret = 0, err;
if (!nla)
return -EINVAL;
@@ -128,7 +131,11 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
}
ci = to_connmark(*a);
- ci->tcf_action = parm->action;
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+ extack);
+ if (err < 0)
+ goto release_idr;
+ tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ci->net = net;
ci->zone = parm->zone;
@@ -142,15 +149,24 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+ extack);
+ if (err < 0)
+ goto release_idr;
/* replacing action and zone */
spin_lock_bh(&ci->tcf_lock);
- ci->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ci->zone = parm->zone;
spin_unlock_bh(&ci->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
ret = 0;
}
return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
@@ -204,7 +220,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
- .type = TCA_ACT_CONNMARK,
+ .id = TCA_ID_CONNMARK,
.owner = THIS_MODULE,
.act = tcf_connmark_act,
.dump = tcf_connmark_dump,
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 3dc25b7806d7..0c77e7bdf6d5 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -33,6 +33,7 @@
#include <net/sctp/checksum.h>
#include <net/act_api.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_csum.h>
#include <net/tc_act/tc_csum.h>
@@ -46,12 +47,13 @@ static struct tc_action_ops act_csum_ops;
static int tcf_csum_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held,
+ int bind, bool rtnl_held, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, csum_net_id);
struct tcf_csum_params *params_new;
struct nlattr *tb[TCA_CSUM_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_csum *parm;
struct tcf_csum *p;
int ret = 0, err;
@@ -87,21 +89,27 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
return err;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
p = to_tcf_csum(*a);
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
params_new->update_flags = parm->update_flags;
spin_lock_bh(&p->tcf_lock);
- p->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(p->params, params_new,
lockdep_is_held(&p->tcf_lock));
spin_unlock_bh(&p->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (params_new)
kfree_rcu(params_new, rcu);
@@ -109,6 +117,12 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla,
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
/**
@@ -559,8 +573,11 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
struct tcf_result *res)
{
struct tcf_csum *p = to_tcf_csum(a);
+ bool orig_vlan_tag_present = false;
+ unsigned int vlan_hdr_count = 0;
struct tcf_csum_params *params;
u32 update_flags;
+ __be16 protocol;
int action;
params = rcu_dereference_bh(p->params);
@@ -573,7 +590,9 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
goto drop;
update_flags = params->update_flags;
- switch (tc_skb_protocol(skb)) {
+ protocol = tc_skb_protocol(skb);
+again:
+ switch (protocol) {
case cpu_to_be16(ETH_P_IP):
if (!tcf_csum_ipv4(skb, update_flags))
goto drop;
@@ -582,13 +601,35 @@ static int tcf_csum_act(struct sk_buff *skb, const struct tc_action *a,
if (!tcf_csum_ipv6(skb, update_flags))
goto drop;
break;
+ case cpu_to_be16(ETH_P_8021AD): /* fall through */
+ case cpu_to_be16(ETH_P_8021Q):
+ if (skb_vlan_tag_present(skb) && !orig_vlan_tag_present) {
+ protocol = skb->protocol;
+ orig_vlan_tag_present = true;
+ } else {
+ struct vlan_hdr *vlan = (struct vlan_hdr *)skb->data;
+
+ protocol = vlan->h_vlan_encapsulated_proto;
+ skb_pull(skb, VLAN_HLEN);
+ skb_reset_network_header(skb);
+ vlan_hdr_count++;
+ }
+ goto again;
+ }
+
+out:
+ /* Restore the skb for the pulled VLAN tags */
+ while (vlan_hdr_count--) {
+ skb_push(skb, VLAN_HLEN);
+ skb_reset_network_header(skb);
}
return action;
drop:
qstats_drop_inc(this_cpu_ptr(p->common.cpu_qstats));
- return TC_ACT_SHOT;
+ action = TC_ACT_SHOT;
+ goto out;
}
static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -660,7 +701,7 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
- .type = TCA_ACT_CSUM,
+ .id = TCA_ID_CSUM,
.owner = THIS_MODULE,
.act = tcf_csum_act,
.dump = tcf_csum_dump,
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b61c20ebb314..e540e31069d7 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_gact.h>
#include <net/tc_act/tc_gact.h>
@@ -57,10 +58,11 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
static int tcf_gact_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, gact_net_id);
struct nlattr *tb[TCA_GACT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_gact *parm;
struct tcf_gact *gact;
int ret = 0;
@@ -116,10 +118,13 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
return err;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
gact = to_gact(*a);
spin_lock_bh(&gact->tcf_lock);
- gact->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
#ifdef CONFIG_GACT_PROB
if (p_parm) {
gact->tcfg_paction = p_parm->paction;
@@ -133,9 +138,15 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla,
#endif
spin_unlock_bh(&gact->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a,
@@ -253,7 +264,7 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
- .type = TCA_ACT_GACT,
+ .id = TCA_ID_GACT,
.owner = THIS_MODULE,
.act = tcf_gact_act,
.stats_update = tcf_gact_stats_update,
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 30b63fa23ee2..31c6ffb6abe7 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -29,6 +29,7 @@
#include <net/net_namespace.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <uapi/linux/tc_act/tc_ife.h>
#include <net/tc_act/tc_ife.h>
#include <linux/etherdevice.h>
@@ -469,11 +470,12 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
static int tcf_ife_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, ife_net_id);
struct nlattr *tb[TCA_IFE_MAX + 1];
struct nlattr *tb2[IFE_META_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_ife_params *p;
struct tcf_ife_info *ife;
u16 ife_type = ETH_P_IFE;
@@ -531,6 +533,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
}
ife = to_ife(*a);
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
p->flags = parm->flags;
if (parm->flags & IFE_ENCODE) {
@@ -563,13 +569,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
if (tb[TCA_IFE_METALST]) {
err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST],
NULL, NULL);
- if (err) {
-metadata_parse_err:
- tcf_idr_release(*a, bind);
- kfree(p);
- return err;
- }
-
+ if (err)
+ goto metadata_parse_err;
err = populate_metalist(ife, tb2, exists, rtnl_held);
if (err)
goto metadata_parse_err;
@@ -581,21 +582,20 @@ metadata_parse_err:
* going to bail out
*/
err = use_all_metadata(ife, exists);
- if (err) {
- tcf_idr_release(*a, bind);
- kfree(p);
- return err;
- }
+ if (err)
+ goto metadata_parse_err;
}
if (exists)
spin_lock_bh(&ife->tcf_lock);
- ife->tcf_action = parm->action;
/* protected by tcf_lock when modifying existing action */
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(ife->params, p, 1);
if (exists)
spin_unlock_bh(&ife->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (p)
kfree_rcu(p, rcu);
@@ -603,6 +603,13 @@ metadata_parse_err:
tcf_idr_insert(tn, *a);
return ret;
+metadata_parse_err:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ kfree(p);
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind,
@@ -864,7 +871,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_ife_ops = {
.kind = "ife",
- .type = TCA_ACT_IFE,
+ .id = TCA_ID_IFE,
.owner = THIS_MODULE,
.act = tcf_ife_act,
.dump = tcf_ife_dump,
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index faa1addf89b3..04a0b5c61194 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -97,7 +97,8 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
- const struct tc_action_ops *ops, int ovr, int bind)
+ const struct tc_action_ops *ops, int ovr, int bind,
+ struct tcf_proto *tp)
{
struct tc_action_net *tn = net_generic(net, id);
struct nlattr *tb[TCA_IPT_MAX + 1];
@@ -205,20 +206,20 @@ err1:
static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held,
+ int bind, bool rtnl_held, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
- bind);
+ bind, tp);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool unlocked,
+ int bind, bool unlocked, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
- bind);
+ bind, tp);
}
static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
@@ -337,7 +338,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
- .type = TCA_ACT_IPT,
+ .id = TCA_ID_IPT,
.owner = THIS_MODULE,
.act = tcf_ipt_act,
.dump = tcf_ipt_dump,
@@ -386,7 +387,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
- .type = TCA_ACT_XT,
+ .id = TCA_ID_XT,
.owner = THIS_MODULE,
.act = tcf_ipt_act,
.dump = tcf_ipt_dump,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index c8cf4d10c435..17cc6bd4c57c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -94,10 +94,12 @@ static struct tc_action_ops act_mirred_ops;
static int tcf_mirred_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, mirred_net_id);
struct nlattr *tb[TCA_MIRRED_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
bool mac_header_xmit = false;
struct tc_mirred *parm;
struct tcf_mirred *m;
@@ -157,18 +159,23 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+
m = to_mirred(*a);
+ if (ret == ACT_P_CREATED)
+ INIT_LIST_HEAD(&m->tcfm_list);
+
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
spin_lock_bh(&m->tcf_lock);
- m->tcf_action = parm->action;
- m->tcfm_eaction = parm->eaction;
if (parm->ifindex) {
dev = dev_get_by_index(net, parm->ifindex);
if (!dev) {
spin_unlock_bh(&m->tcf_lock);
- tcf_idr_release(*a, bind);
- return -ENODEV;
+ err = -ENODEV;
+ goto put_chain;
}
mac_header_xmit = dev_is_mac_header_xmit(dev);
rcu_swap_protected(m->tcfm_dev, dev,
@@ -177,7 +184,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
dev_put(dev);
m->tcfm_mac_header_xmit = mac_header_xmit;
}
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ m->tcfm_eaction = parm->eaction;
spin_unlock_bh(&m->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED) {
spin_lock(&mirred_list_lock);
@@ -188,6 +199,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla,
}
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a,
@@ -400,7 +417,7 @@ static void tcf_mirred_put_dev(struct net_device *dev)
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
- .type = TCA_ACT_MIRRED,
+ .id = TCA_ID_MIRRED,
.owner = THIS_MODULE,
.act = tcf_mirred_act,
.stats_update = tcf_stats_update,
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index c5c1e23add77..e91bb8eb81ec 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -21,6 +21,7 @@
#include <linux/string.h>
#include <linux/tc_act/tc_nat.h>
#include <net/act_api.h>
+#include <net/pkt_cls.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/netlink.h>
@@ -38,10 +39,12 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
struct tc_action **a, int ovr, int bind,
- bool rtnl_held, struct netlink_ext_ack *extack)
+ bool rtnl_held, struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, nat_net_id);
struct nlattr *tb[TCA_NAT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_nat *parm;
int ret = 0, err;
struct tcf_nat *p;
@@ -76,6 +79,9 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
} else {
return err;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
p = to_tcf_nat(*a);
spin_lock_bh(&p->tcf_lock);
@@ -84,13 +90,18 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
p->mask = parm->mask;
p->flags = parm->flags;
- p->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
spin_unlock_bh(&p->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a,
@@ -304,7 +315,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
- .type = TCA_ACT_NAT,
+ .id = TCA_ID_NAT,
.owner = THIS_MODULE,
.act = tcf_nat_act,
.dump = tcf_nat_dump,
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 2b372a06b432..287793abfaf9 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -23,6 +23,7 @@
#include <linux/tc_act/tc_pedit.h>
#include <net/tc_act/tc_pedit.h>
#include <uapi/linux/tc_act/tc_pedit.h>
+#include <net/pkt_cls.h>
static unsigned int pedit_net_id;
static struct tc_action_ops act_pedit_ops;
@@ -138,10 +139,11 @@ nla_failure:
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, pedit_net_id);
struct nlattr *tb[TCA_PEDIT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_pedit_key *keys = NULL;
struct tcf_pedit_key_ex *keys_ex;
struct tc_pedit *parm;
@@ -205,6 +207,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
goto out_free;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0) {
+ ret = err;
+ goto out_release;
+ }
p = to_pedit(*a);
spin_lock_bh(&p->tcf_lock);
@@ -214,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
if (!keys) {
spin_unlock_bh(&p->tcf_lock);
ret = -ENOMEM;
- goto out_release;
+ goto put_chain;
}
kfree(p->tcfp_keys);
p->tcfp_keys = keys;
@@ -223,16 +230,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
memcpy(p->tcfp_keys, parm->keys, ksize);
p->tcfp_flags = parm->flags;
- p->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
kfree(p->tcfp_keys_ex);
p->tcfp_keys_ex = keys_ex;
spin_unlock_bh(&p->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
out_release:
tcf_idr_release(*a, bind);
out_free:
@@ -406,7 +418,7 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
struct tcf_t t;
int s;
- s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key);
+ s = struct_size(opt, keys, p->tcfp_nkeys);
/* netlink spinlocks held above us - must use ATOMIC */
opt = kzalloc(s, GFP_ATOMIC);
@@ -470,7 +482,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
- .type = TCA_ACT_PEDIT,
+ .id = TCA_ID_PEDIT,
.owner = THIS_MODULE,
.act = tcf_pedit_act,
.dump = tcf_pedit_dump,
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index ec8ec55e0fe8..2b8581f6ab51 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -21,6 +21,7 @@
#include <linux/slab.h>
#include <net/act_api.h>
#include <net/netlink.h>
+#include <net/pkt_cls.h>
struct tcf_police_params {
int tcfp_result;
@@ -83,10 +84,12 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
static int tcf_police_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
int ret = 0, tcfp_result = TC_ACT_OK, err, size;
struct nlattr *tb[TCA_POLICE_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_police *parm;
struct tcf_police *police;
struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
@@ -128,6 +131,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
police = to_police(*a);
if (parm->rate.rate) {
@@ -213,12 +219,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
if (new->peak_present)
police->tcfp_ptoks = new->tcfp_mtu_ptoks;
spin_unlock_bh(&police->tcfp_lock);
- police->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(police->params,
new,
lockdep_is_held(&police->tcf_lock));
spin_unlock_bh(&police->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (new)
kfree_rcu(new, rcu);
@@ -229,6 +237,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla,
failure:
qdisc_put_rtab(P_tab);
qdisc_put_rtab(R_tab);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
tcf_idr_release(*a, bind);
return err;
}
@@ -366,7 +377,7 @@ MODULE_LICENSE("GPL");
static struct tc_action_ops act_police_ops = {
.kind = "police",
- .type = TCA_ID_POLICE,
+ .id = TCA_ID_POLICE,
.owner = THIS_MODULE,
.act = tcf_police_act,
.dump = tcf_police_dump,
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 1a0c682fd734..0f82d50ea232 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -22,6 +22,7 @@
#include <linux/tc_act/tc_sample.h>
#include <net/tc_act/tc_sample.h>
#include <net/psample.h>
+#include <net/pkt_cls.h>
#include <linux/if_arp.h>
@@ -37,14 +38,15 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
static int tcf_sample_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
- int bind, bool rtnl_held,
+ int bind, bool rtnl_held, struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, sample_net_id);
struct nlattr *tb[TCA_SAMPLE_MAX + 1];
struct psample_group *psample_group;
+ struct tcf_chain *goto_ch = NULL;
+ u32 psample_group_num, rate;
struct tc_sample *parm;
- u32 psample_group_num;
struct tcf_sample *s;
bool exists = false;
int ret, err;
@@ -79,19 +81,28 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+ rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+ if (!rate) {
+ NL_SET_ERR_MSG(extack, "invalid sample rate");
+ err = -EINVAL;
+ goto put_chain;
+ }
psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]);
psample_group = psample_group_get(net, psample_group_num);
if (!psample_group) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
s = to_sample(*a);
spin_lock_bh(&s->tcf_lock);
- s->tcf_action = parm->action;
- s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]);
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ s->rate = rate;
s->psample_group_num = psample_group_num;
RCU_INIT_POINTER(s->psample_group, psample_group);
@@ -100,10 +111,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]);
}
spin_unlock_bh(&s->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static void tcf_sample_cleanup(struct tc_action *a)
@@ -233,7 +252,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_sample_ops = {
.kind = "sample",
- .type = TCA_ACT_SAMPLE,
+ .id = TCA_ID_SAMPLE,
.owner = THIS_MODULE,
.act = tcf_sample_act,
.dump = tcf_sample_dump,
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 902957beceb3..23c8ca5615e5 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -18,8 +18,7 @@
#include <linux/rtnetlink.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
-
-#define TCA_ACT_SIMP 22
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_defact.h>
#include <net/tc_act/tc_defact.h>
@@ -62,14 +61,26 @@ static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata)
return 0;
}
-static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata,
- struct tc_defact *p)
+static int reset_policy(struct tc_action *a, const struct nlattr *defdata,
+ struct tc_defact *p, struct tcf_proto *tp,
+ struct netlink_ext_ack *extack)
{
+ struct tcf_chain *goto_ch = NULL;
+ struct tcf_defact *d;
+ int err;
+
+ err = tcf_action_check_ctrlact(p->action, tp, &goto_ch, extack);
+ if (err < 0)
+ return err;
+ d = to_defact(a);
spin_lock_bh(&d->tcf_lock);
- d->tcf_action = p->action;
+ goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch);
memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
spin_unlock_bh(&d->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+ return 0;
}
static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
@@ -80,10 +91,11 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
static int tcf_simp_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, simp_net_id);
struct nlattr *tb[TCA_DEF_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_defact *parm;
struct tcf_defact *d;
bool exists = false;
@@ -124,27 +136,37 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla,
}
d = to_defact(*a);
- ret = alloc_defdata(d, tb[TCA_DEF_DATA]);
- if (ret < 0) {
- tcf_idr_release(*a, bind);
- return ret;
- }
- d->tcf_action = parm->action;
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
+ extack);
+ if (err < 0)
+ goto release_idr;
+
+ err = alloc_defdata(d, tb[TCA_DEF_DATA]);
+ if (err < 0)
+ goto put_chain;
+
+ tcf_action_set_ctrlact(*a, parm->action, goto_ch);
ret = ACT_P_CREATED;
} else {
- d = to_defact(*a);
-
if (!ovr) {
- tcf_idr_release(*a, bind);
- return -EEXIST;
+ err = -EEXIST;
+ goto release_idr;
}
- reset_policy(d, tb[TCA_DEF_DATA], parm);
+ err = reset_policy(*a, tb[TCA_DEF_DATA], parm, tp, extack);
+ if (err)
+ goto release_idr;
}
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
@@ -197,7 +219,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
- .type = TCA_ACT_SIMP,
+ .id = TCA_ID_SIMP,
.owner = THIS_MODULE,
.act = tcf_simp_act,
.dump = tcf_simp_dump,
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index cfceed28c333..7e1d261a31d2 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -26,6 +26,7 @@
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/dsfield.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_skbedit.h>
#include <net/tc_act/tc_skbedit.h>
@@ -96,11 +97,13 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbedit_net_id);
struct tcf_skbedit_params *params_new;
struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tc_skbedit *parm;
struct tcf_skbedit *d;
u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL;
@@ -186,11 +189,14 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
return -EEXIST;
}
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
if (unlikely(!params_new)) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
params_new->flags = flags;
@@ -208,16 +214,24 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
params_new->mask = *mask;
spin_lock_bh(&d->tcf_lock);
- d->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(d->params, params_new,
lockdep_is_held(&d->tcf_lock));
spin_unlock_bh(&d->tcf_lock);
if (params_new)
kfree_rcu(params_new, rcu);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
@@ -304,7 +318,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
- .type = TCA_ACT_SKBEDIT,
+ .id = TCA_ID_SKBEDIT,
.owner = THIS_MODULE,
.act = tcf_skbedit_act,
.dump = tcf_skbedit_dump,
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 59710a183bd3..1d4c324d0a42 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -16,6 +16,7 @@
#include <linux/rtnetlink.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_skbmod.h>
#include <net/tc_act/tc_skbmod.h>
@@ -82,11 +83,13 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, skbmod_net_id);
struct nlattr *tb[TCA_SKBMOD_MAX + 1];
struct tcf_skbmod_params *p, *p_old;
+ struct tcf_chain *goto_ch = NULL;
struct tc_skbmod *parm;
struct tcf_skbmod *d;
bool exists = false;
@@ -153,21 +156,24 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
d = to_skbmod(*a);
p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL);
if (unlikely(!p)) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
p->flags = lflags;
- d->tcf_action = parm->action;
if (ovr)
spin_lock_bh(&d->tcf_lock);
/* Protected by tcf_lock if overwriting existing action. */
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
p_old = rcu_dereference_protected(d->skbmod_p, 1);
if (lflags & SKBMOD_F_DMAC)
@@ -183,10 +189,18 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
if (p_old)
kfree_rcu(p_old, rcu);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static void tcf_skbmod_cleanup(struct tc_action *a)
@@ -260,7 +274,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_skbmod_ops = {
.kind = "skbmod",
- .type = TCA_ACT_SKBMOD,
+ .id = TCA_ACT_SKBMOD,
.owner = THIS_MODULE,
.act = tcf_skbmod_act,
.dump = tcf_skbmod_dump,
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 3f943de9a2c9..d5aaf90a3971 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -17,6 +17,7 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_tunnel_key.h>
#include <net/tc_act/tc_tunnel_key.h>
@@ -203,18 +204,21 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
return;
if (p->tcft_action == TCA_TUNNEL_KEY_ACT_SET)
dst_release(&p->tcft_enc_metadata->dst);
+
kfree_rcu(p, rcu);
}
static int tunnel_key_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
+ struct tcf_proto *tp,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1];
struct tcf_tunnel_key_params *params_new;
struct metadata_dst *metadata = NULL;
+ struct tcf_chain *goto_ch = NULL;
struct tc_tunnel_key *parm;
struct tcf_tunnel_key *t;
bool exists = false;
@@ -321,6 +325,12 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
goto err_out;
}
+#ifdef CONFIG_DST_CACHE
+ ret = dst_cache_init(&metadata->u.tun_info.dst_cache, GFP_KERNEL);
+ if (ret)
+ goto release_tun_meta;
+#endif
+
if (opts_len) {
ret = tunnel_key_opts_set(tb[TCA_TUNNEL_KEY_ENC_OPTS],
&metadata->u.tun_info,
@@ -352,6 +362,12 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
goto release_tun_meta;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0) {
+ ret = err;
+ exists = true;
+ goto release_tun_meta;
+ }
t = to_tunnel_key(*a);
params_new = kzalloc(sizeof(*params_new), GFP_KERNEL);
@@ -359,23 +375,29 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla,
NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters");
ret = -ENOMEM;
exists = true;
- goto release_tun_meta;
+ goto put_chain;
}
params_new->tcft_action = parm->t_action;
params_new->tcft_enc_metadata = metadata;
spin_lock_bh(&t->tcf_lock);
- t->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(t->params, params_new,
lockdep_is_held(&t->tcf_lock));
spin_unlock_bh(&t->tcf_lock);
tunnel_key_release_params(params_new);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
release_tun_meta:
if (metadata)
dst_release(&metadata->dst);
@@ -564,7 +586,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_tunnel_key_ops = {
.kind = "tunnel_key",
- .type = TCA_ACT_TUNNEL_KEY,
+ .id = TCA_ID_TUNNEL_KEY,
.owner = THIS_MODULE,
.act = tunnel_key_act,
.dump = tunnel_key_dump,
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 93fdaf707313..0f40d0a74423 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -15,6 +15,7 @@
#include <linux/if_vlan.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
+#include <net/pkt_cls.h>
#include <linux/tc_act/tc_vlan.h>
#include <net/tc_act/tc_vlan.h>
@@ -105,10 +106,11 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
static int tcf_vlan_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
int ovr, int bind, bool rtnl_held,
- struct netlink_ext_ack *extack)
+ struct tcf_proto *tp, struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, vlan_net_id);
struct nlattr *tb[TCA_VLAN_MAX + 1];
+ struct tcf_chain *goto_ch = NULL;
struct tcf_vlan_params *p;
struct tc_vlan *parm;
struct tcf_vlan *v;
@@ -200,12 +202,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
return -EEXIST;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
v = to_vlan(*a);
p = kzalloc(sizeof(*p), GFP_KERNEL);
if (!p) {
- tcf_idr_release(*a, bind);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto put_chain;
}
p->tcfv_action = action;
@@ -214,16 +220,24 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla,
p->tcfv_push_proto = push_proto;
spin_lock_bh(&v->tcf_lock);
- v->tcf_action = parm->action;
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
rcu_swap_protected(v->vlan_p, p, lockdep_is_held(&v->tcf_lock));
spin_unlock_bh(&v->tcf_lock);
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
if (p)
kfree_rcu(p, rcu);
if (ret == ACT_P_CREATED)
tcf_idr_insert(tn, *a);
return ret;
+put_chain:
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+release_idr:
+ tcf_idr_release(*a, bind);
+ return err;
}
static void tcf_vlan_cleanup(struct tc_action *a)
@@ -297,7 +311,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index)
static struct tc_action_ops act_vlan_ops = {
.kind = "vlan",
- .type = TCA_ACT_VLAN,
+ .id = TCA_ID_VLAN,
.owner = THIS_MODULE,
.act = tcf_vlan_act,
.dump = tcf_vlan_dump,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index e2b5cb2eb34e..99ae30c177c7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -31,6 +31,13 @@
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/pkt_cls.h>
+#include <net/tc_act/tc_pedit.h>
+#include <net/tc_act/tc_mirred.h>
+#include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
+#include <net/tc_act/tc_csum.h>
+#include <net/tc_act/tc_gact.h>
+#include <net/tc_act/tc_skbedit.h>
extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -61,7 +68,8 @@ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
}
static const struct tcf_proto_ops *
-tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
+tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
const struct tcf_proto_ops *ops;
@@ -69,9 +77,11 @@ tcf_proto_lookup_ops(const char *kind, struct netlink_ext_ack *extack)
if (ops)
return ops;
#ifdef CONFIG_MODULES
- rtnl_unlock();
+ if (rtnl_held)
+ rtnl_unlock();
request_module("cls_%s", kind);
- rtnl_lock();
+ if (rtnl_held)
+ rtnl_lock();
ops = __tcf_proto_lookup_ops(kind);
/* We dropped the RTNL semaphore in order to perform
* the module load. So, even if we succeeded in loading
@@ -152,8 +162,26 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp)
return TC_H_MAJ(first);
}
+static bool tcf_proto_is_unlocked(const char *kind)
+{
+ const struct tcf_proto_ops *ops;
+ bool ret;
+
+ ops = tcf_proto_lookup_ops(kind, false, NULL);
+ /* On error return false to take rtnl lock. Proto lookup/create
+ * functions will perform lookup again and properly handle errors.
+ */
+ if (IS_ERR(ops))
+ return false;
+
+ ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
+ module_put(ops->owner);
+ return ret;
+}
+
static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
u32 prio, struct tcf_chain *chain,
+ bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tcf_proto *tp;
@@ -163,7 +191,7 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
if (!tp)
return ERR_PTR(-ENOBUFS);
- tp->ops = tcf_proto_lookup_ops(kind, extack);
+ tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
if (IS_ERR(tp->ops)) {
err = PTR_ERR(tp->ops);
goto errout;
@@ -172,6 +200,8 @@ static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
tp->protocol = protocol;
tp->prio = prio;
tp->chain = chain;
+ spin_lock_init(&tp->lock);
+ refcount_set(&tp->refcnt, 1);
err = tp->ops->init(tp);
if (err) {
@@ -185,14 +215,80 @@ errout:
return ERR_PTR(err);
}
-static void tcf_proto_destroy(struct tcf_proto *tp,
+static void tcf_proto_get(struct tcf_proto *tp)
+{
+ refcount_inc(&tp->refcnt);
+}
+
+static void tcf_chain_put(struct tcf_chain *chain);
+
+static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
- tp->ops->destroy(tp, extack);
+ tp->ops->destroy(tp, rtnl_held, extack);
+ tcf_chain_put(tp->chain);
module_put(tp->ops->owner);
kfree_rcu(tp, rcu);
}
+static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
+{
+ if (refcount_dec_and_test(&tp->refcnt))
+ tcf_proto_destroy(tp, rtnl_held, extack);
+}
+
+static int walker_check_empty(struct tcf_proto *tp, void *fh,
+ struct tcf_walker *arg)
+{
+ if (fh) {
+ arg->nonempty = true;
+ return -1;
+ }
+ return 0;
+}
+
+static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held)
+{
+ struct tcf_walker walker = { .fn = walker_check_empty, };
+
+ if (tp->ops->walk) {
+ tp->ops->walk(tp, &walker, rtnl_held);
+ return !walker.nonempty;
+ }
+ return true;
+}
+
+static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held)
+{
+ spin_lock(&tp->lock);
+ if (tcf_proto_is_empty(tp, rtnl_held))
+ tp->deleting = true;
+ spin_unlock(&tp->lock);
+ return tp->deleting;
+}
+
+static void tcf_proto_mark_delete(struct tcf_proto *tp)
+{
+ spin_lock(&tp->lock);
+ tp->deleting = true;
+ spin_unlock(&tp->lock);
+}
+
+static bool tcf_proto_is_deleting(struct tcf_proto *tp)
+{
+ bool deleting;
+
+ spin_lock(&tp->lock);
+ deleting = tp->deleting;
+ spin_unlock(&tp->lock);
+
+ return deleting;
+}
+
+#define ASSERT_BLOCK_LOCKED(block) \
+ lockdep_assert_held(&(block)->lock)
+
struct tcf_filter_chain_list_item {
struct list_head list;
tcf_chain_head_change_t *chain_head_change;
@@ -204,10 +300,13 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
{
struct tcf_chain *chain;
+ ASSERT_BLOCK_LOCKED(block);
+
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (!chain)
return NULL;
list_add_tail(&chain->list, &block->chain_list);
+ mutex_init(&chain->filter_chain_lock);
chain->block = block;
chain->index = chain_index;
chain->refcnt = 1;
@@ -231,29 +330,59 @@ static void tcf_chain0_head_change(struct tcf_chain *chain,
if (chain->index)
return;
+
+ mutex_lock(&block->lock);
list_for_each_entry(item, &block->chain0.filter_chain_list, list)
tcf_chain_head_change_item(item, tp_head);
+ mutex_unlock(&block->lock);
}
-static void tcf_chain_destroy(struct tcf_chain *chain)
+/* Returns true if block can be safely freed. */
+
+static bool tcf_chain_detach(struct tcf_chain *chain)
{
struct tcf_block *block = chain->block;
+ ASSERT_BLOCK_LOCKED(block);
+
list_del(&chain->list);
if (!chain->index)
block->chain0.chain = NULL;
- kfree(chain);
- if (list_empty(&block->chain_list) && !refcount_read(&block->refcnt))
- kfree_rcu(block, rcu);
+
+ if (list_empty(&block->chain_list) &&
+ refcount_read(&block->refcnt) == 0)
+ return true;
+
+ return false;
+}
+
+static void tcf_block_destroy(struct tcf_block *block)
+{
+ mutex_destroy(&block->lock);
+ kfree_rcu(block, rcu);
+}
+
+static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
+{
+ struct tcf_block *block = chain->block;
+
+ mutex_destroy(&chain->filter_chain_lock);
+ kfree_rcu(chain, rcu);
+ if (free_block)
+ tcf_block_destroy(block);
}
static void tcf_chain_hold(struct tcf_chain *chain)
{
+ ASSERT_BLOCK_LOCKED(chain->block);
+
++chain->refcnt;
}
static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
{
+ ASSERT_BLOCK_LOCKED(chain->block);
+
/* In case all the references are action references, this
* chain should not be shown to the user.
*/
@@ -265,6 +394,8 @@ static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
{
struct tcf_chain *chain;
+ ASSERT_BLOCK_LOCKED(block);
+
list_for_each_entry(chain, &block->chain_list, list) {
if (chain->index == chain_index)
return chain;
@@ -279,31 +410,40 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
u32 chain_index, bool create,
bool by_act)
{
- struct tcf_chain *chain = tcf_chain_lookup(block, chain_index);
+ struct tcf_chain *chain = NULL;
+ bool is_first_reference;
+ mutex_lock(&block->lock);
+ chain = tcf_chain_lookup(block, chain_index);
if (chain) {
tcf_chain_hold(chain);
} else {
if (!create)
- return NULL;
+ goto errout;
chain = tcf_chain_create(block, chain_index);
if (!chain)
- return NULL;
+ goto errout;
}
if (by_act)
++chain->action_refcnt;
+ is_first_reference = chain->refcnt - chain->action_refcnt == 1;
+ mutex_unlock(&block->lock);
/* Send notification only in case we got the first
* non-action reference. Until then, the chain acts only as
* a placeholder for actions pointing to it and user ought
* not know about them.
*/
- if (chain->refcnt - chain->action_refcnt == 1 && !by_act)
+ if (is_first_reference && !by_act)
tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
RTM_NEWCHAIN, false);
return chain;
+
+errout:
+ mutex_unlock(&block->lock);
+ return chain;
}
static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@ -318,51 +458,91 @@ struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
}
EXPORT_SYMBOL(tcf_chain_get_by_act);
-static void tc_chain_tmplt_del(struct tcf_chain *chain);
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv);
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv, u32 chain_index,
+ struct tcf_block *block, struct sk_buff *oskb,
+ u32 seq, u16 flags, bool unicast);
-static void __tcf_chain_put(struct tcf_chain *chain, bool by_act)
+static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
+ bool explicitly_created)
{
+ struct tcf_block *block = chain->block;
+ const struct tcf_proto_ops *tmplt_ops;
+ bool free_block = false;
+ unsigned int refcnt;
+ void *tmplt_priv;
+
+ mutex_lock(&block->lock);
+ if (explicitly_created) {
+ if (!chain->explicitly_created) {
+ mutex_unlock(&block->lock);
+ return;
+ }
+ chain->explicitly_created = false;
+ }
+
if (by_act)
chain->action_refcnt--;
- chain->refcnt--;
+
+ /* tc_chain_notify_delete can't be called while holding block lock.
+ * However, when block is unlocked chain can be changed concurrently, so
+ * save these to temporary variables.
+ */
+ refcnt = --chain->refcnt;
+ tmplt_ops = chain->tmplt_ops;
+ tmplt_priv = chain->tmplt_priv;
/* The last dropped non-action reference will trigger notification. */
- if (chain->refcnt - chain->action_refcnt == 0 && !by_act)
- tc_chain_notify(chain, NULL, 0, 0, RTM_DELCHAIN, false);
+ if (refcnt - chain->action_refcnt == 0 && !by_act) {
+ tc_chain_notify_delete(tmplt_ops, tmplt_priv, chain->index,
+ block, NULL, 0, 0, false);
+ /* Last reference to chain, no need to lock. */
+ chain->flushing = false;
+ }
- if (chain->refcnt == 0) {
- tc_chain_tmplt_del(chain);
- tcf_chain_destroy(chain);
+ if (refcnt == 0)
+ free_block = tcf_chain_detach(chain);
+ mutex_unlock(&block->lock);
+
+ if (refcnt == 0) {
+ tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
+ tcf_chain_destroy(chain, free_block);
}
}
static void tcf_chain_put(struct tcf_chain *chain)
{
- __tcf_chain_put(chain, false);
+ __tcf_chain_put(chain, false, false);
}
void tcf_chain_put_by_act(struct tcf_chain *chain)
{
- __tcf_chain_put(chain, true);
+ __tcf_chain_put(chain, true, false);
}
EXPORT_SYMBOL(tcf_chain_put_by_act);
static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
{
- if (chain->explicitly_created)
- tcf_chain_put(chain);
+ __tcf_chain_put(chain, false, true);
}
-static void tcf_chain_flush(struct tcf_chain *chain)
+static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
{
- struct tcf_proto *tp = rtnl_dereference(chain->filter_chain);
+ struct tcf_proto *tp, *tp_next;
+ mutex_lock(&chain->filter_chain_lock);
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
+ RCU_INIT_POINTER(chain->filter_chain, NULL);
tcf_chain0_head_change(chain, NULL);
+ chain->flushing = true;
+ mutex_unlock(&chain->filter_chain_lock);
+
while (tp) {
- RCU_INIT_POINTER(chain->filter_chain, tp->next);
- tcf_proto_destroy(tp, NULL);
- tp = rtnl_dereference(chain->filter_chain);
- tcf_chain_put(chain);
+ tp_next = rcu_dereference_protected(tp->next, 1);
+ tcf_proto_put(tp, rtnl_held, NULL);
+ tp = tp_next;
}
}
@@ -684,8 +864,8 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block,
struct tcf_block_ext_info *ei,
struct netlink_ext_ack *extack)
{
- struct tcf_chain *chain0 = block->chain0.chain;
struct tcf_filter_chain_list_item *item;
+ struct tcf_chain *chain0;
item = kmalloc(sizeof(*item), GFP_KERNEL);
if (!item) {
@@ -694,9 +874,32 @@ tcf_chain0_head_change_cb_add(struct tcf_block *block,
}
item->chain_head_change = ei->chain_head_change;
item->chain_head_change_priv = ei->chain_head_change_priv;
- if (chain0 && chain0->filter_chain)
- tcf_chain_head_change_item(item, chain0->filter_chain);
- list_add(&item->list, &block->chain0.filter_chain_list);
+
+ mutex_lock(&block->lock);
+ chain0 = block->chain0.chain;
+ if (chain0)
+ tcf_chain_hold(chain0);
+ else
+ list_add(&item->list, &block->chain0.filter_chain_list);
+ mutex_unlock(&block->lock);
+
+ if (chain0) {
+ struct tcf_proto *tp_head;
+
+ mutex_lock(&chain0->filter_chain_lock);
+
+ tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
+ if (tp_head)
+ tcf_chain_head_change_item(item, tp_head);
+
+ mutex_lock(&block->lock);
+ list_add(&item->list, &block->chain0.filter_chain_list);
+ mutex_unlock(&block->lock);
+
+ mutex_unlock(&chain0->filter_chain_lock);
+ tcf_chain_put(chain0);
+ }
+
return 0;
}
@@ -704,20 +907,23 @@ static void
tcf_chain0_head_change_cb_del(struct tcf_block *block,
struct tcf_block_ext_info *ei)
{
- struct tcf_chain *chain0 = block->chain0.chain;
struct tcf_filter_chain_list_item *item;
+ mutex_lock(&block->lock);
list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
(item->chain_head_change == ei->chain_head_change &&
item->chain_head_change_priv == ei->chain_head_change_priv)) {
- if (chain0)
+ if (block->chain0.chain)
tcf_chain_head_change_item(item, NULL);
list_del(&item->list);
+ mutex_unlock(&block->lock);
+
kfree(item);
return;
}
}
+ mutex_unlock(&block->lock);
WARN_ON(1);
}
@@ -764,6 +970,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
return ERR_PTR(-ENOMEM);
}
+ mutex_init(&block->lock);
INIT_LIST_HEAD(&block->chain_list);
INIT_LIST_HEAD(&block->cb_list);
INIT_LIST_HEAD(&block->owner_list);
@@ -799,157 +1006,241 @@ static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
return block;
}
-static void tcf_block_flush_all_chains(struct tcf_block *block)
+static struct tcf_chain *
+__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
{
- struct tcf_chain *chain;
+ mutex_lock(&block->lock);
+ if (chain)
+ chain = list_is_last(&chain->list, &block->chain_list) ?
+ NULL : list_next_entry(chain, list);
+ else
+ chain = list_first_entry_or_null(&block->chain_list,
+ struct tcf_chain, list);
- /* Hold a refcnt for all chains, so that they don't disappear
- * while we are iterating.
- */
- list_for_each_entry(chain, &block->chain_list, list)
+ /* skip all action-only chains */
+ while (chain && tcf_chain_held_by_acts_only(chain))
+ chain = list_is_last(&chain->list, &block->chain_list) ?
+ NULL : list_next_entry(chain, list);
+
+ if (chain)
tcf_chain_hold(chain);
+ mutex_unlock(&block->lock);
- list_for_each_entry(chain, &block->chain_list, list)
- tcf_chain_flush(chain);
+ return chain;
}
-static void tcf_block_put_all_chains(struct tcf_block *block)
+/* Function to be used by all clients that want to iterate over all chains on
+ * block. It properly obtains block->lock and takes reference to chain before
+ * returning it. Users of this function must be tolerant to concurrent chain
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_chain *
+tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
{
- struct tcf_chain *chain, *tmp;
+ struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
- /* At this point, all the chains should have refcnt >= 1. */
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
- tcf_chain_put_explicitly_created(chain);
+ if (chain)
tcf_chain_put(chain);
- }
+
+ return chain_next;
}
+EXPORT_SYMBOL(tcf_get_next_chain);
-static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
- struct tcf_block_ext_info *ei)
+static struct tcf_proto *
+__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
{
- if (refcount_dec_and_test(&block->refcnt)) {
- /* Flushing/putting all chains will cause the block to be
- * deallocated when last chain is freed. However, if chain_list
- * is empty, block has to be manually deallocated. After block
- * reference counter reached 0, it is no longer possible to
- * increment it or add new chains to block.
- */
- bool free_block = list_empty(&block->chain_list);
+ u32 prio = 0;
- if (tcf_block_shared(block))
- tcf_block_remove(block, block->net);
- if (!free_block)
- tcf_block_flush_all_chains(block);
+ ASSERT_RTNL();
+ mutex_lock(&chain->filter_chain_lock);
- if (q)
- tcf_block_offload_unbind(block, q, ei);
+ if (!tp) {
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
+ } else if (tcf_proto_is_deleting(tp)) {
+ /* 'deleting' flag is set and chain->filter_chain_lock was
+ * unlocked, which means next pointer could be invalid. Restart
+ * search.
+ */
+ prio = tp->prio + 1;
+ tp = tcf_chain_dereference(chain->filter_chain, chain);
- if (free_block)
- kfree_rcu(block, rcu);
- else
- tcf_block_put_all_chains(block);
- } else if (q) {
- tcf_block_offload_unbind(block, q, ei);
+ for (; tp; tp = tcf_chain_dereference(tp->next, chain))
+ if (!tp->deleting && tp->prio >= prio)
+ break;
+ } else {
+ tp = tcf_chain_dereference(tp->next, chain);
}
+
+ if (tp)
+ tcf_proto_get(tp);
+
+ mutex_unlock(&chain->filter_chain_lock);
+
+ return tp;
}
-static void tcf_block_refcnt_put(struct tcf_block *block)
+/* Function to be used by all clients that want to iterate over all tp's on
+ * chain. Users of this function must be tolerant to concurrent tp
+ * insertion/deletion or ensure that no concurrent chain modification is
+ * possible. Note that all netlink dump callbacks cannot guarantee to provide
+ * consistent dump because rtnl lock is released each time skb is filled with
+ * data and sent to user-space.
+ */
+
+struct tcf_proto *
+tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
+ bool rtnl_held)
{
- __tcf_block_put(block, NULL, NULL);
+ struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
+
+ if (tp)
+ tcf_proto_put(tp, rtnl_held, NULL);
+
+ return tp_next;
}
+EXPORT_SYMBOL(tcf_get_next_proto);
-/* Find tcf block.
- * Set q, parent, cl when appropriate.
+static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
+{
+ struct tcf_chain *chain;
+
+ /* Last reference to block. At this point chains cannot be added or
+ * removed concurrently.
+ */
+ for (chain = tcf_get_next_chain(block, NULL);
+ chain;
+ chain = tcf_get_next_chain(block, chain)) {
+ tcf_chain_put_explicitly_created(chain);
+ tcf_chain_flush(chain, rtnl_held);
+ }
+}
+
+/* Lookup Qdisc and increments its reference counter.
+ * Set parent, if necessary.
*/
-static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
- u32 *parent, unsigned long *cl,
- int ifindex, u32 block_index,
- struct netlink_ext_ack *extack)
+static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
+ u32 *parent, int ifindex, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
- struct tcf_block *block;
+ const struct Qdisc_class_ops *cops;
+ struct net_device *dev;
int err = 0;
- if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
- block = tcf_block_refcnt_get(net, block_index);
- if (!block) {
- NL_SET_ERR_MSG(extack, "Block of given index was not found");
- return ERR_PTR(-EINVAL);
- }
- } else {
- const struct Qdisc_class_ops *cops;
- struct net_device *dev;
-
- rcu_read_lock();
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+ return 0;
- /* Find link */
- dev = dev_get_by_index_rcu(net, ifindex);
- if (!dev) {
- rcu_read_unlock();
- return ERR_PTR(-ENODEV);
- }
+ rcu_read_lock();
- /* Find qdisc */
- if (!*parent) {
- *q = dev->qdisc;
- *parent = (*q)->handle;
- } else {
- *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
- if (!*q) {
- NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
- err = -EINVAL;
- goto errout_rcu;
- }
- }
+ /* Find link */
+ dev = dev_get_by_index_rcu(net, ifindex);
+ if (!dev) {
+ rcu_read_unlock();
+ return -ENODEV;
+ }
- *q = qdisc_refcount_inc_nz(*q);
+ /* Find qdisc */
+ if (!*parent) {
+ *q = dev->qdisc;
+ *parent = (*q)->handle;
+ } else {
+ *q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
if (!*q) {
NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
err = -EINVAL;
goto errout_rcu;
}
+ }
- /* Is it classful? */
- cops = (*q)->ops->cl_ops;
- if (!cops) {
- NL_SET_ERR_MSG(extack, "Qdisc not classful");
- err = -EINVAL;
- goto errout_rcu;
- }
+ *q = qdisc_refcount_inc_nz(*q);
+ if (!*q) {
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+ err = -EINVAL;
+ goto errout_rcu;
+ }
- if (!cops->tcf_block) {
- NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
- err = -EOPNOTSUPP;
- goto errout_rcu;
- }
+ /* Is it classful? */
+ cops = (*q)->ops->cl_ops;
+ if (!cops) {
+ NL_SET_ERR_MSG(extack, "Qdisc not classful");
+ err = -EINVAL;
+ goto errout_qdisc;
+ }
- /* At this point we know that qdisc is not noop_qdisc,
- * which means that qdisc holds a reference to net_device
- * and we hold a reference to qdisc, so it is safe to release
- * rcu read lock.
- */
- rcu_read_unlock();
+ if (!cops->tcf_block) {
+ NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
+ err = -EOPNOTSUPP;
+ goto errout_qdisc;
+ }
- /* Do we search for filter, attached to class? */
- if (TC_H_MIN(*parent)) {
- *cl = cops->find(*q, *parent);
- if (*cl == 0) {
- NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
- err = -ENOENT;
- goto errout_qdisc;
- }
+errout_rcu:
+ /* At this point we know that qdisc is not noop_qdisc,
+ * which means that qdisc holds a reference to net_device
+ * and we hold a reference to qdisc, so it is safe to release
+ * rcu read lock.
+ */
+ rcu_read_unlock();
+ return err;
+
+errout_qdisc:
+ rcu_read_unlock();
+
+ if (rtnl_held)
+ qdisc_put(*q);
+ else
+ qdisc_put_unlocked(*q);
+ *q = NULL;
+
+ return err;
+}
+
+static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
+ int ifindex, struct netlink_ext_ack *extack)
+{
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
+ return 0;
+
+ /* Do we search for filter, attached to class? */
+ if (TC_H_MIN(parent)) {
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+
+ *cl = cops->find(q, parent);
+ if (*cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
+ return -ENOENT;
}
+ }
+
+ return 0;
+}
+
+static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
+ unsigned long cl, int ifindex,
+ u32 block_index,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_block *block;
- /* And the last stroke */
- block = cops->tcf_block(*q, *cl, extack);
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+ block = tcf_block_refcnt_get(net, block_index);
if (!block) {
- err = -EINVAL;
- goto errout_qdisc;
+ NL_SET_ERR_MSG(extack, "Block of given index was not found");
+ return ERR_PTR(-EINVAL);
}
+ } else {
+ const struct Qdisc_class_ops *cops = q->ops->cl_ops;
+
+ block = cops->tcf_block(q, cl, extack);
+ if (!block)
+ return ERR_PTR(-EINVAL);
+
if (tcf_block_shared(block)) {
NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
- err = -EOPNOTSUPP;
- goto errout_qdisc;
+ return ERR_PTR(-EOPNOTSUPP);
}
/* Always take reference to block in order to support execution
@@ -962,24 +1253,91 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
}
return block;
+}
+
+static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
+ struct tcf_block_ext_info *ei, bool rtnl_held)
+{
+ if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
+ /* Flushing/putting all chains will cause the block to be
+ * deallocated when last chain is freed. However, if chain_list
+ * is empty, block has to be manually deallocated. After block
+ * reference counter reached 0, it is no longer possible to
+ * increment it or add new chains to block.
+ */
+ bool free_block = list_empty(&block->chain_list);
+
+ mutex_unlock(&block->lock);
+ if (tcf_block_shared(block))
+ tcf_block_remove(block, block->net);
+
+ if (q)
+ tcf_block_offload_unbind(block, q, ei);
+
+ if (free_block)
+ tcf_block_destroy(block);
+ else
+ tcf_block_flush_all_chains(block, rtnl_held);
+ } else if (q) {
+ tcf_block_offload_unbind(block, q, ei);
+ }
+}
+
+static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
+{
+ __tcf_block_put(block, NULL, NULL, rtnl_held);
+}
+
+/* Find tcf block.
+ * Set q, parent, cl when appropriate.
+ */
+
+static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
+ u32 *parent, unsigned long *cl,
+ int ifindex, u32 block_index,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_block *block;
+ int err = 0;
+
+ ASSERT_RTNL();
+
+ err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
+ if (err)
+ goto errout;
+
+ err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
+ if (err)
+ goto errout_qdisc;
+
+ block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
+ if (IS_ERR(block)) {
+ err = PTR_ERR(block);
+ goto errout_qdisc;
+ }
+
+ return block;
-errout_rcu:
- rcu_read_unlock();
errout_qdisc:
- if (*q) {
+ if (*q)
qdisc_put(*q);
- *q = NULL;
- }
+errout:
+ *q = NULL;
return ERR_PTR(err);
}
-static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
+ bool rtnl_held)
{
if (!IS_ERR_OR_NULL(block))
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, rtnl_held);
- if (q)
- qdisc_put(q);
+ if (q) {
+ if (rtnl_held)
+ qdisc_put(q);
+ else
+ qdisc_put_unlocked(q);
+ }
}
struct tcf_block_owner_item {
@@ -1087,7 +1445,7 @@ err_chain0_head_change_cb_add:
tcf_block_owner_del(block, q, ei->binder_type);
err_block_owner_add:
err_block_insert:
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, true);
return err;
}
EXPORT_SYMBOL(tcf_block_get_ext);
@@ -1124,7 +1482,7 @@ void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
tcf_chain0_head_change_cb_del(block, ei);
tcf_block_owner_del(block, q, ei->binder_type);
- __tcf_block_put(block, q, ei);
+ __tcf_block_put(block, q, ei, true);
}
EXPORT_SYMBOL(tcf_block_put_ext);
@@ -1181,13 +1539,19 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
void *cb_priv, bool add, bool offload_in_use,
struct netlink_ext_ack *extack)
{
- struct tcf_chain *chain;
- struct tcf_proto *tp;
+ struct tcf_chain *chain, *chain_prev;
+ struct tcf_proto *tp, *tp_prev;
int err;
- list_for_each_entry(chain, &block->chain_list, list) {
- for (tp = rtnl_dereference(chain->filter_chain); tp;
- tp = rtnl_dereference(tp->next)) {
+ for (chain = __tcf_get_next_chain(block, NULL);
+ chain;
+ chain_prev = chain,
+ chain = __tcf_get_next_chain(block, chain),
+ tcf_chain_put(chain_prev)) {
+ for (tp = __tcf_get_next_proto(chain, NULL); tp;
+ tp_prev = tp,
+ tp = __tcf_get_next_proto(chain, tp),
+ tcf_proto_put(tp_prev, true, NULL)) {
if (tp->ops->reoffload) {
err = tp->ops->reoffload(tp, add, cb, cb_priv,
extack);
@@ -1204,6 +1568,8 @@ tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
return 0;
err_playback_remove:
+ tcf_proto_put(tp, true, NULL);
+ tcf_chain_put(chain);
tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
extack);
return err;
@@ -1329,32 +1695,116 @@ struct tcf_chain_info {
struct tcf_proto __rcu *next;
};
-static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain_info *chain_info)
+static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info)
{
- return rtnl_dereference(*chain_info->pprev);
+ return tcf_chain_dereference(*chain_info->pprev, chain);
}
-static void tcf_chain_tp_insert(struct tcf_chain *chain,
- struct tcf_chain_info *chain_info,
- struct tcf_proto *tp)
+static int tcf_chain_tp_insert(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ struct tcf_proto *tp)
{
+ if (chain->flushing)
+ return -EAGAIN;
+
if (*chain_info->pprev == chain->filter_chain)
tcf_chain0_head_change(chain, tp);
- RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info));
+ tcf_proto_get(tp);
+ RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
rcu_assign_pointer(*chain_info->pprev, tp);
- tcf_chain_hold(chain);
+
+ return 0;
}
static void tcf_chain_tp_remove(struct tcf_chain *chain,
struct tcf_chain_info *chain_info,
struct tcf_proto *tp)
{
- struct tcf_proto *next = rtnl_dereference(chain_info->next);
+ struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
+ tcf_proto_mark_delete(tp);
if (tp == chain->filter_chain)
tcf_chain0_head_change(chain, next);
RCU_INIT_POINTER(*chain_info->pprev, next);
- tcf_chain_put(chain);
+}
+
+static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
+ struct tcf_chain_info *chain_info,
+ u32 protocol, u32 prio,
+ bool prio_allocate);
+
+/* Try to insert new proto.
+ * If proto with specified priority already exists, free new proto
+ * and return existing one.
+ */
+
+static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
+ struct tcf_proto *tp_new,
+ u32 protocol, u32 prio,
+ bool rtnl_held)
+{
+ struct tcf_chain_info chain_info;
+ struct tcf_proto *tp;
+ int err = 0;
+
+ mutex_lock(&chain->filter_chain_lock);
+
+ tp = tcf_chain_tp_find(chain, &chain_info,
+ protocol, prio, false);
+ if (!tp)
+ err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
+ mutex_unlock(&chain->filter_chain_lock);
+
+ if (tp) {
+ tcf_proto_destroy(tp_new, rtnl_held, NULL);
+ tp_new = tp;
+ } else if (err) {
+ tcf_proto_destroy(tp_new, rtnl_held, NULL);
+ tp_new = ERR_PTR(err);
+ }
+
+ return tp_new;
+}
+
+static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
+ struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_chain_info chain_info;
+ struct tcf_proto *tp_iter;
+ struct tcf_proto **pprev;
+ struct tcf_proto *next;
+
+ mutex_lock(&chain->filter_chain_lock);
+
+ /* Atomically find and remove tp from chain. */
+ for (pprev = &chain->filter_chain;
+ (tp_iter = tcf_chain_dereference(*pprev, chain));
+ pprev = &tp_iter->next) {
+ if (tp_iter == tp) {
+ chain_info.pprev = pprev;
+ chain_info.next = tp_iter->next;
+ WARN_ON(tp_iter->deleting);
+ break;
+ }
+ }
+ /* Verify that tp still exists and no new filters were inserted
+ * concurrently.
+ * Mark tp for deletion if it is empty.
+ */
+ if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) {
+ mutex_unlock(&chain->filter_chain_lock);
+ return;
+ }
+
+ next = tcf_chain_dereference(chain_info.next, chain);
+ if (tp == chain->filter_chain)
+ tcf_chain0_head_change(chain, next);
+ RCU_INIT_POINTER(*chain_info.pprev, next);
+ mutex_unlock(&chain->filter_chain_lock);
+
+ tcf_proto_put(tp, rtnl_held, extack);
}
static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
@@ -1367,7 +1817,8 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
/* Check the chain for existence of proto-tcf with this priority */
for (pprev = &chain->filter_chain;
- (tp = rtnl_dereference(*pprev)); pprev = &tp->next) {
+ (tp = tcf_chain_dereference(*pprev, chain));
+ pprev = &tp->next) {
if (tp->prio >= prio) {
if (tp->prio == prio) {
if (prio_allocate ||
@@ -1380,14 +1831,20 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
}
}
chain_info->pprev = pprev;
- chain_info->next = tp ? tp->next : NULL;
+ if (tp) {
+ chain_info->next = tp->next;
+ tcf_proto_get(tp);
+ } else {
+ chain_info->next = NULL;
+ }
return tp;
}
static int tcf_fill_node(struct net *net, struct sk_buff *skb,
struct tcf_proto *tp, struct tcf_block *block,
struct Qdisc *q, u32 parent, void *fh,
- u32 portid, u32 seq, u16 flags, int event)
+ u32 portid, u32 seq, u16 flags, int event,
+ bool rtnl_held)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1415,7 +1872,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
if (!fh) {
tcm->tcm_handle = 0;
} else {
- if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0)
+ if (tp->ops->dump &&
+ tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
goto nla_put_failure;
}
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -1430,33 +1888,40 @@ nla_put_failure:
static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
- u32 parent, void *fh, int event, bool unicast)
+ u32 parent, void *fh, int event, bool unicast,
+ bool rtnl_held)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
- n->nlmsg_seq, n->nlmsg_flags, event) <= 0) {
+ n->nlmsg_seq, n->nlmsg_flags, event,
+ rtnl_held) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
if (unicast)
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ else
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ if (err > 0)
+ err = 0;
+ return err;
}
static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
u32 parent, void *fh, bool unicast, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1467,39 +1932,50 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
return -ENOBUFS;
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
- n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER) <= 0) {
+ n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
+ rtnl_held) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to build del event notification");
kfree_skb(skb);
return -EINVAL;
}
- err = tp->ops->delete(tp, fh, last, extack);
+ err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
if (err) {
kfree_skb(skb);
return err;
}
if (unicast)
- return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
-
- err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ else
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
+
+ if (err > 0)
+ err = 0;
return err;
}
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
struct tcf_block *block, struct Qdisc *q,
u32 parent, struct nlmsghdr *n,
- struct tcf_chain *chain, int event)
+ struct tcf_chain *chain, int event,
+ bool rtnl_held)
{
struct tcf_proto *tp;
- for (tp = rtnl_dereference(chain->filter_chain);
- tp; tp = rtnl_dereference(tp->next))
+ for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
+ tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
tfilter_notify(net, oskb, n, tp, block,
- q, parent, NULL, event, false);
+ q, parent, NULL, event, false, rtnl_held);
+}
+
+static void tfilter_put(struct tcf_proto *tp, void *fh)
+{
+ if (tp->ops->put && fh)
+ tp->ops->put(tp, fh);
}
static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1522,6 +1998,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
void *fh;
int err;
int tp_created;
+ bool rtnl_held = false;
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1538,7 +2015,9 @@ replay:
prio = TC_H_MAJ(t->tcm_info);
prio_allocate = false;
parent = t->tcm_parent;
+ tp = NULL;
cl = 0;
+ block = NULL;
if (prio == 0) {
/* If no priority is provided by the user,
@@ -1555,8 +2034,27 @@ replay:
/* Find head of filter chain. */
- block = tcf_block_find(net, &q, &parent, &cl,
- t->tcm_ifindex, t->tcm_block_index, extack);
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+ if (err)
+ return err;
+
+ /* Take rtnl mutex if rtnl_held was set to true on previous iteration,
+ * block is shared (no qdisc found), qdisc is not unlocked, classifier
+ * type is not specified, classifier is not unlocked.
+ */
+ if (rtnl_held ||
+ (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+ !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ rtnl_held = true;
+ rtnl_lock();
+ }
+
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+ if (err)
+ goto errout;
+
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+ extack);
if (IS_ERR(block)) {
err = PTR_ERR(block);
goto errout;
@@ -1575,40 +2073,62 @@ replay:
goto errout;
}
+ mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, prio_allocate);
if (IS_ERR(tp)) {
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = PTR_ERR(tp);
- goto errout;
+ goto errout_locked;
}
if (tp == NULL) {
+ struct tcf_proto *tp_new = NULL;
+
+ if (chain->flushing) {
+ err = -EAGAIN;
+ goto errout_locked;
+ }
+
/* Proto-tcf does not exist, create new one */
if (tca[TCA_KIND] == NULL || !protocol) {
NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
err = -EINVAL;
- goto errout;
+ goto errout_locked;
}
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
- goto errout;
+ goto errout_locked;
}
if (prio_allocate)
- prio = tcf_auto_prio(tcf_chain_tp_prev(&chain_info));
+ prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
+ &chain_info));
- tp = tcf_proto_create(nla_data(tca[TCA_KIND]),
- protocol, prio, chain, extack);
+ mutex_unlock(&chain->filter_chain_lock);
+ tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]),
+ protocol, prio, chain, rtnl_held,
+ extack);
+ if (IS_ERR(tp_new)) {
+ err = PTR_ERR(tp_new);
+ goto errout_tp;
+ }
+
+ tp_created = 1;
+ tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
+ rtnl_held);
if (IS_ERR(tp)) {
err = PTR_ERR(tp);
- goto errout;
+ goto errout_tp;
}
- tp_created = 1;
- } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+ } else {
+ mutex_unlock(&chain->filter_chain_lock);
+ }
+
+ if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
err = -EINVAL;
goto errout;
@@ -1623,6 +2143,7 @@ replay:
goto errout;
}
} else if (n->nlmsg_flags & NLM_F_EXCL) {
+ tfilter_put(tp, fh);
NL_SET_ERR_MSG(extack, "Filter already exists");
err = -EEXIST;
goto errout;
@@ -1636,25 +2157,41 @@ replay:
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
- extack);
+ rtnl_held, extack);
if (err == 0) {
- if (tp_created)
- tcf_chain_tp_insert(chain, &chain_info, tp);
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_NEWTFILTER, false);
- } else {
- if (tp_created)
- tcf_proto_destroy(tp, NULL);
+ RTM_NEWTFILTER, false, rtnl_held);
+ tfilter_put(tp, fh);
}
errout:
- if (chain)
- tcf_chain_put(chain);
- tcf_block_release(q, block);
- if (err == -EAGAIN)
+ if (err && tp_created)
+ tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
+errout_tp:
+ if (chain) {
+ if (tp && !IS_ERR(tp))
+ tcf_proto_put(tp, rtnl_held, NULL);
+ if (!tp_created)
+ tcf_chain_put(chain);
+ }
+ tcf_block_release(q, block, rtnl_held);
+
+ if (rtnl_held)
+ rtnl_unlock();
+
+ if (err == -EAGAIN) {
+ /* Take rtnl lock in case EAGAIN is caused by concurrent flush
+ * of target chain.
+ */
+ rtnl_held = true;
/* Replay the request. */
goto replay;
+ }
return err;
+
+errout_locked:
+ mutex_unlock(&chain->filter_chain_lock);
+ goto errout;
}
static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1670,11 +2207,12 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *q = NULL;
struct tcf_chain_info chain_info;
struct tcf_chain *chain = NULL;
- struct tcf_block *block;
+ struct tcf_block *block = NULL;
struct tcf_proto *tp = NULL;
unsigned long cl = 0;
void *fh = NULL;
int err;
+ bool rtnl_held = false;
if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
@@ -1695,8 +2233,27 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
/* Find head of filter chain. */
- block = tcf_block_find(net, &q, &parent, &cl,
- t->tcm_ifindex, t->tcm_block_index, extack);
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+ if (err)
+ return err;
+
+ /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
+ * found), qdisc is not unlocked, classifier type is not specified,
+ * classifier is not unlocked.
+ */
+ if (!prio ||
+ (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+ !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ rtnl_held = true;
+ rtnl_lock();
+ }
+
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+ if (err)
+ goto errout;
+
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+ extack);
if (IS_ERR(block)) {
err = PTR_ERR(block);
goto errout;
@@ -1724,56 +2281,69 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
if (prio == 0) {
tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
- tcf_chain_flush(chain);
+ chain, RTM_DELTFILTER, rtnl_held);
+ tcf_chain_flush(chain, rtnl_held);
err = 0;
goto errout;
}
+ mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, false);
if (!tp || IS_ERR(tp)) {
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = tp ? PTR_ERR(tp) : -ENOENT;
- goto errout;
+ goto errout_locked;
} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
err = -EINVAL;
+ goto errout_locked;
+ } else if (t->tcm_handle == 0) {
+ tcf_chain_tp_remove(chain, &chain_info, tp);
+ mutex_unlock(&chain->filter_chain_lock);
+
+ tcf_proto_put(tp, rtnl_held, NULL);
+ tfilter_notify(net, skb, n, tp, block, q, parent, fh,
+ RTM_DELTFILTER, false, rtnl_held);
+ err = 0;
goto errout;
}
+ mutex_unlock(&chain->filter_chain_lock);
fh = tp->ops->get(tp, t->tcm_handle);
if (!fh) {
- if (t->tcm_handle == 0) {
- tcf_chain_tp_remove(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_DELTFILTER, false);
- tcf_proto_destroy(tp, extack);
- err = 0;
- } else {
- NL_SET_ERR_MSG(extack, "Specified filter handle not found");
- err = -ENOENT;
- }
+ NL_SET_ERR_MSG(extack, "Specified filter handle not found");
+ err = -ENOENT;
} else {
bool last;
err = tfilter_del_notify(net, skb, n, tp, block,
q, parent, fh, false, &last,
- extack);
+ rtnl_held, extack);
+
if (err)
goto errout;
- if (last) {
- tcf_chain_tp_remove(chain, &chain_info, tp);
- tcf_proto_destroy(tp, extack);
- }
+ if (last)
+ tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
}
errout:
- if (chain)
+ if (chain) {
+ if (tp && !IS_ERR(tp))
+ tcf_proto_put(tp, rtnl_held, NULL);
tcf_chain_put(chain);
- tcf_block_release(q, block);
+ }
+ tcf_block_release(q, block, rtnl_held);
+
+ if (rtnl_held)
+ rtnl_unlock();
+
return err;
+
+errout_locked:
+ mutex_unlock(&chain->filter_chain_lock);
+ goto errout;
}
static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1789,11 +2359,12 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct Qdisc *q = NULL;
struct tcf_chain_info chain_info;
struct tcf_chain *chain = NULL;
- struct tcf_block *block;
+ struct tcf_block *block = NULL;
struct tcf_proto *tp = NULL;
unsigned long cl = 0;
void *fh = NULL;
int err;
+ bool rtnl_held = false;
err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, rtm_tca_policy, extack);
if (err < 0)
@@ -1811,8 +2382,26 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
/* Find head of filter chain. */
- block = tcf_block_find(net, &q, &parent, &cl,
- t->tcm_ifindex, t->tcm_block_index, extack);
+ err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
+ if (err)
+ return err;
+
+ /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
+ * unlocked, classifier type is not specified, classifier is not
+ * unlocked.
+ */
+ if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
+ !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) {
+ rtnl_held = true;
+ rtnl_lock();
+ }
+
+ err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
+ if (err)
+ goto errout;
+
+ block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
+ extack);
if (IS_ERR(block)) {
err = PTR_ERR(block);
goto errout;
@@ -1831,8 +2420,10 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
goto errout;
}
+ mutex_lock(&chain->filter_chain_lock);
tp = tcf_chain_tp_find(chain, &chain_info, protocol,
prio, false);
+ mutex_unlock(&chain->filter_chain_lock);
if (!tp || IS_ERR(tp)) {
NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
err = tp ? PTR_ERR(tp) : -ENOENT;
@@ -1850,15 +2441,23 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
err = -ENOENT;
} else {
err = tfilter_notify(net, skb, n, tp, block, q, parent,
- fh, RTM_NEWTFILTER, true);
+ fh, RTM_NEWTFILTER, true, rtnl_held);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
}
+ tfilter_put(tp, fh);
errout:
- if (chain)
+ if (chain) {
+ if (tp && !IS_ERR(tp))
+ tcf_proto_put(tp, rtnl_held, NULL);
tcf_chain_put(chain);
- tcf_block_release(q, block);
+ }
+ tcf_block_release(q, block, rtnl_held);
+
+ if (rtnl_held)
+ rtnl_unlock();
+
return err;
}
@@ -1879,7 +2478,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER);
+ RTM_NEWTFILTER, true);
}
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
@@ -1889,11 +2488,15 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
struct net *net = sock_net(skb->sk);
struct tcf_block *block = chain->block;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ struct tcf_proto *tp, *tp_prev;
struct tcf_dump_args arg;
- struct tcf_proto *tp;
- for (tp = rtnl_dereference(chain->filter_chain);
- tp; tp = rtnl_dereference(tp->next), (*p_index)++) {
+ for (tp = __tcf_get_next_proto(chain, NULL);
+ tp;
+ tp_prev = tp,
+ tp = __tcf_get_next_proto(chain, tp),
+ tcf_proto_put(tp_prev, true, NULL),
+ (*p_index)++) {
if (*p_index < index_start)
continue;
if (TC_H_MAJ(tcm->tcm_info) &&
@@ -1909,9 +2512,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER) <= 0)
- return false;
-
+ RTM_NEWTFILTER, true) <= 0)
+ goto errout;
cb->args[1] = 1;
}
if (!tp->ops->walk)
@@ -1926,23 +2528,27 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
arg.w.cookie = cb->args[2];
- tp->ops->walk(tp, &arg.w);
+ tp->ops->walk(tp, &arg.w, true);
cb->args[2] = arg.w.cookie;
cb->args[1] = arg.w.count + 1;
if (arg.w.stop)
- return false;
+ goto errout;
}
return true;
+
+errout:
+ tcf_proto_put(tp, true, NULL);
+ return false;
}
/* called with RTNL */
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct tcf_chain *chain, *chain_prev;
struct net *net = sock_net(skb->sk);
struct nlattr *tca[TCA_MAX + 1];
struct Qdisc *q = NULL;
struct tcf_block *block;
- struct tcf_chain *chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
long index_start;
long index;
@@ -2006,19 +2612,24 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
index_start = cb->args[0];
index = 0;
- list_for_each_entry(chain, &block->chain_list, list) {
+ for (chain = __tcf_get_next_chain(block, NULL);
+ chain;
+ chain_prev = chain,
+ chain = __tcf_get_next_chain(block, chain),
+ tcf_chain_put(chain_prev)) {
if (tca[TCA_CHAIN] &&
nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
if (!tcf_chain_dump(chain, q, parent, skb, cb,
index_start, &index)) {
+ tcf_chain_put(chain);
err = -EMSGSIZE;
break;
}
}
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, true);
cb->args[0] = index;
out:
@@ -2028,8 +2639,10 @@ out:
return skb->len;
}
-static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
- struct sk_buff *skb, struct tcf_block *block,
+static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv, u32 chain_index,
+ struct net *net, struct sk_buff *skb,
+ struct tcf_block *block,
u32 portid, u32 seq, u16 flags, int event)
{
unsigned char *b = skb_tail_pointer(skb);
@@ -2038,8 +2651,8 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
struct tcmsg *tcm;
void *priv;
- ops = chain->tmplt_ops;
- priv = chain->tmplt_priv;
+ ops = tmplt_ops;
+ priv = tmplt_priv;
nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
if (!nlh)
@@ -2057,7 +2670,7 @@ static int tc_chain_fill_node(struct tcf_chain *chain, struct net *net,
tcm->tcm_block_index = block->index;
}
- if (nla_put_u32(skb, TCA_CHAIN, chain->index))
+ if (nla_put_u32(skb, TCA_CHAIN, chain_index))
goto nla_put_failure;
if (ops) {
@@ -2083,18 +2696,50 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
struct tcf_block *block = chain->block;
struct net *net = block->net;
struct sk_buff *skb;
+ int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
return -ENOBUFS;
- if (tc_chain_fill_node(chain, net, skb, block, portid,
+ if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
+ chain->index, net, skb, block, portid,
seq, flags, event) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
if (unicast)
+ err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
+ else
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ flags & NLM_F_ECHO);
+
+ if (err > 0)
+ err = 0;
+ return err;
+}
+
+static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv, u32 chain_index,
+ struct tcf_block *block, struct sk_buff *oskb,
+ u32 seq, u16 flags, bool unicast)
+{
+ u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ struct net *net = block->net;
+ struct sk_buff *skb;
+
+ skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
+ block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (unicast)
return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
@@ -2111,7 +2756,7 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
if (!tca[TCA_KIND])
return 0;
- ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), extack);
+ ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack);
if (IS_ERR(ops))
return PTR_ERR(ops);
if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
@@ -2129,16 +2774,15 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
return 0;
}
-static void tc_chain_tmplt_del(struct tcf_chain *chain)
+static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
+ void *tmplt_priv)
{
- const struct tcf_proto_ops *ops = chain->tmplt_ops;
-
/* If template ops are set, no work to do for us. */
- if (!ops)
+ if (!tmplt_ops)
return;
- ops->tmplt_destroy(chain->tmplt_priv);
- module_put(ops->owner);
+ tmplt_ops->tmplt_destroy(tmplt_priv);
+ module_put(tmplt_ops->owner);
}
/* Add/delete/get a chain */
@@ -2181,6 +2825,8 @@ replay:
err = -EINVAL;
goto errout_block;
}
+
+ mutex_lock(&block->lock);
chain = tcf_chain_lookup(block, chain_index);
if (n->nlmsg_type == RTM_NEWCHAIN) {
if (chain) {
@@ -2192,54 +2838,61 @@ replay:
} else {
NL_SET_ERR_MSG(extack, "Filter chain already exists");
err = -EEXIST;
- goto errout_block;
+ goto errout_block_locked;
}
} else {
if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
err = -ENOENT;
- goto errout_block;
+ goto errout_block_locked;
}
chain = tcf_chain_create(block, chain_index);
if (!chain) {
NL_SET_ERR_MSG(extack, "Failed to create filter chain");
err = -ENOMEM;
- goto errout_block;
+ goto errout_block_locked;
}
}
} else {
if (!chain || tcf_chain_held_by_acts_only(chain)) {
NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
err = -EINVAL;
- goto errout_block;
+ goto errout_block_locked;
}
tcf_chain_hold(chain);
}
+ if (n->nlmsg_type == RTM_NEWCHAIN) {
+ /* Modifying chain requires holding parent block lock. In case
+ * the chain was successfully added, take a reference to the
+ * chain. This ensures that an empty chain does not disappear at
+ * the end of this function.
+ */
+ tcf_chain_hold(chain);
+ chain->explicitly_created = true;
+ }
+ mutex_unlock(&block->lock);
+
switch (n->nlmsg_type) {
case RTM_NEWCHAIN:
err = tc_chain_tmplt_add(chain, net, tca, extack);
- if (err)
+ if (err) {
+ tcf_chain_put_explicitly_created(chain);
goto errout;
- /* In case the chain was successfully added, take a reference
- * to the chain. This ensures that an empty chain
- * does not disappear at the end of this function.
- */
- tcf_chain_hold(chain);
- chain->explicitly_created = true;
+ }
+
tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
RTM_NEWCHAIN, false);
break;
case RTM_DELCHAIN:
tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
+ chain, RTM_DELTFILTER, true);
/* Flush the chain first as the user requested chain removal. */
- tcf_chain_flush(chain);
+ tcf_chain_flush(chain, true);
/* In case the chain was successfully deleted, put a reference
* to the chain previously taken during addition.
*/
tcf_chain_put_explicitly_created(chain);
- chain->explicitly_created = false;
break;
case RTM_GETCHAIN:
err = tc_chain_notify(chain, skb, n->nlmsg_seq,
@@ -2256,11 +2909,15 @@ replay:
errout:
tcf_chain_put(chain);
errout_block:
- tcf_block_release(q, block);
+ tcf_block_release(q, block, true);
if (err == -EAGAIN)
/* Replay the request. */
goto replay;
return err;
+
+errout_block_locked:
+ mutex_unlock(&block->lock);
+ goto errout_block;
}
/* called with RTNL */
@@ -2270,8 +2927,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
struct nlattr *tca[TCA_MAX + 1];
struct Qdisc *q = NULL;
struct tcf_block *block;
- struct tcf_chain *chain;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ struct tcf_chain *chain;
long index_start;
long index;
u32 parent;
@@ -2334,6 +2991,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
index_start = cb->args[0];
index = 0;
+ mutex_lock(&block->lock);
list_for_each_entry(chain, &block->chain_list, list) {
if ((tca[TCA_CHAIN] &&
nla_get_u32(tca[TCA_CHAIN]) != chain->index))
@@ -2344,7 +3002,8 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
}
if (tcf_chain_held_by_acts_only(chain))
continue;
- err = tc_chain_fill_node(chain, net, skb, block,
+ err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
+ chain->index, net, skb, block,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
RTM_NEWCHAIN);
@@ -2352,9 +3011,10 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
break;
index++;
}
+ mutex_unlock(&block->lock);
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
- tcf_block_refcnt_put(block);
+ tcf_block_refcnt_put(block, true);
cb->args[0] = index;
out:
@@ -2376,7 +3036,7 @@ EXPORT_SYMBOL(tcf_exts_destroy);
int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
{
@@ -2386,7 +3046,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
if (exts->police && tb[exts->police]) {
act = tcf_action_init_1(net, tp, tb[exts->police],
rate_tlv, "police", ovr,
- TCA_ACT_BIND, true, extack);
+ TCA_ACT_BIND, rtnl_held,
+ extack);
if (IS_ERR(act))
return PTR_ERR(act);
@@ -2398,13 +3059,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
err = tcf_action_init(net, tp, tb[exts->action],
rate_tlv, NULL, ovr, TCA_ACT_BIND,
- exts->actions, &attr_size, true,
- extack);
+ exts->actions, &attr_size,
+ rtnl_held, extack);
if (err < 0)
return err;
exts->nr_actions = err;
}
- exts->net = net;
}
#else
if ((exts->action && tb[exts->action]) ||
@@ -2515,6 +3175,114 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
}
EXPORT_SYMBOL(tc_setup_cb_call);
+int tc_setup_flow_action(struct flow_action *flow_action,
+ const struct tcf_exts *exts)
+{
+ const struct tc_action *act;
+ int i, j, k;
+
+ if (!exts)
+ return 0;
+
+ j = 0;
+ tcf_exts_for_each_action(i, act, exts) {
+ struct flow_action_entry *entry;
+
+ entry = &flow_action->entries[j];
+ if (is_tcf_gact_ok(act)) {
+ entry->id = FLOW_ACTION_ACCEPT;
+ } else if (is_tcf_gact_shot(act)) {
+ entry->id = FLOW_ACTION_DROP;
+ } else if (is_tcf_gact_trap(act)) {
+ entry->id = FLOW_ACTION_TRAP;
+ } else if (is_tcf_gact_goto_chain(act)) {
+ entry->id = FLOW_ACTION_GOTO;
+ entry->chain_index = tcf_gact_goto_chain_index(act);
+ } else if (is_tcf_mirred_egress_redirect(act)) {
+ entry->id = FLOW_ACTION_REDIRECT;
+ entry->dev = tcf_mirred_dev(act);
+ } else if (is_tcf_mirred_egress_mirror(act)) {
+ entry->id = FLOW_ACTION_MIRRED;
+ entry->dev = tcf_mirred_dev(act);
+ } else if (is_tcf_vlan(act)) {
+ switch (tcf_vlan_action(act)) {
+ case TCA_VLAN_ACT_PUSH:
+ entry->id = FLOW_ACTION_VLAN_PUSH;
+ entry->vlan.vid = tcf_vlan_push_vid(act);
+ entry->vlan.proto = tcf_vlan_push_proto(act);
+ entry->vlan.prio = tcf_vlan_push_prio(act);
+ break;
+ case TCA_VLAN_ACT_POP:
+ entry->id = FLOW_ACTION_VLAN_POP;
+ break;
+ case TCA_VLAN_ACT_MODIFY:
+ entry->id = FLOW_ACTION_VLAN_MANGLE;
+ entry->vlan.vid = tcf_vlan_push_vid(act);
+ entry->vlan.proto = tcf_vlan_push_proto(act);
+ entry->vlan.prio = tcf_vlan_push_prio(act);
+ break;
+ default:
+ goto err_out;
+ }
+ } else if (is_tcf_tunnel_set(act)) {
+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
+ entry->tunnel = tcf_tunnel_info(act);
+ } else if (is_tcf_tunnel_release(act)) {
+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
+ entry->tunnel = tcf_tunnel_info(act);
+ } else if (is_tcf_pedit(act)) {
+ for (k = 0; k < tcf_pedit_nkeys(act); k++) {
+ switch (tcf_pedit_cmd(act, k)) {
+ case TCA_PEDIT_KEY_EX_CMD_SET:
+ entry->id = FLOW_ACTION_MANGLE;
+ break;
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
+ entry->id = FLOW_ACTION_ADD;
+ break;
+ default:
+ goto err_out;
+ }
+ entry->mangle.htype = tcf_pedit_htype(act, k);
+ entry->mangle.mask = tcf_pedit_mask(act, k);
+ entry->mangle.val = tcf_pedit_val(act, k);
+ entry->mangle.offset = tcf_pedit_offset(act, k);
+ entry = &flow_action->entries[++j];
+ }
+ } else if (is_tcf_csum(act)) {
+ entry->id = FLOW_ACTION_CSUM;
+ entry->csum_flags = tcf_csum_update_flags(act);
+ } else if (is_tcf_skbedit_mark(act)) {
+ entry->id = FLOW_ACTION_MARK;
+ entry->mark = tcf_skbedit_mark(act);
+ } else {
+ goto err_out;
+ }
+
+ if (!is_tcf_pedit(act))
+ j++;
+ }
+ return 0;
+err_out:
+ return -EOPNOTSUPP;
+}
+EXPORT_SYMBOL(tc_setup_flow_action);
+
+unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
+{
+ unsigned int num_acts = 0;
+ struct tc_action *act;
+ int i;
+
+ tcf_exts_for_each_action(i, act, exts) {
+ if (is_tcf_pedit(act))
+ num_acts += tcf_pedit_nkeys(act);
+ else
+ num_acts++;
+ }
+ return num_acts;
+}
+EXPORT_SYMBOL(tcf_exts_num_actions);
+
static __net_init int tcf_net_init(struct net *net)
{
struct tcf_net *tn = net_generic(net, tcf_net_id);
@@ -2555,10 +3323,12 @@ static int __init tc_filter_init(void)
if (err)
goto err_rhash_setup_block_ht;
- rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
+ rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
+ RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
- tc_dump_tfilter, 0);
+ tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c
index 6a5dce8baf19..687b0af67878 100644
--- a/net/sched/cls_basic.c
+++ b/net/sched/cls_basic.c
@@ -18,6 +18,7 @@
#include <linux/rtnetlink.h>
#include <linux/skbuff.h>
#include <linux/idr.h>
+#include <linux/percpu.h>
#include <net/netlink.h>
#include <net/act_api.h>
#include <net/pkt_cls.h>
@@ -35,6 +36,7 @@ struct basic_filter {
struct tcf_result res;
struct tcf_proto *tp;
struct list_head link;
+ struct tc_basic_pcnt __percpu *pf;
struct rcu_work rwork;
};
@@ -46,8 +48,10 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct basic_filter *f;
list_for_each_entry_rcu(f, &head->flist, link) {
+ __this_cpu_inc(f->pf->rcnt);
if (!tcf_em_tree_match(skb, &f->ematches, NULL))
continue;
+ __this_cpu_inc(f->pf->rhit);
*res = f->res;
r = tcf_exts_exec(skb, &f->exts, res);
if (r < 0)
@@ -89,6 +93,7 @@ static void __basic_delete_filter(struct basic_filter *f)
tcf_exts_destroy(&f->exts);
tcf_em_tree_destroy(&f->ematches);
tcf_exts_put_net(&f->exts);
+ free_percpu(f->pf);
kfree(f);
}
@@ -102,7 +107,8 @@ static void basic_delete_filter_work(struct work_struct *work)
rtnl_unlock();
}
-static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void basic_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f, *n;
@@ -121,7 +127,7 @@ static void basic_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int basic_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f = arg;
@@ -148,7 +154,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
if (err < 0)
return err;
@@ -168,7 +174,7 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp,
static int basic_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
int err;
struct basic_head *head = rtnl_dereference(tp->root);
@@ -193,7 +199,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (!fnew)
return -ENOBUFS;
- err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE);
+ err = tcf_exts_init(&fnew->exts, net, TCA_BASIC_ACT, TCA_BASIC_POLICE);
if (err < 0)
goto errout;
@@ -208,6 +214,11 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
if (err)
goto errout;
fnew->handle = handle;
+ fnew->pf = alloc_percpu(struct tc_basic_pcnt);
+ if (!fnew->pf) {
+ err = -ENOMEM;
+ goto errout;
+ }
err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr,
extack);
@@ -231,12 +242,14 @@ static int basic_change(struct net *net, struct sk_buff *in_skb,
return 0;
errout:
+ free_percpu(fnew->pf);
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
return err;
}
-static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct basic_head *head = rtnl_dereference(tp->root);
struct basic_filter *f;
@@ -263,10 +276,12 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl)
}
static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
+ struct tc_basic_pcnt gpf = {};
struct basic_filter *f = fh;
struct nlattr *nest;
+ int cpu;
if (f == NULL)
return skb->len;
@@ -281,6 +296,18 @@ static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh,
nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid))
goto nla_put_failure;
+ for_each_possible_cpu(cpu) {
+ struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu);
+
+ gpf.rcnt += pf->rcnt;
+ gpf.rhit += pf->rhit;
+ }
+
+ if (nla_put_64bit(skb, TCA_BASIC_PCNT,
+ sizeof(struct tc_basic_pcnt),
+ &gpf, TCA_BASIC_PAD))
+ goto nla_put_failure;
+
if (tcf_exts_dump(skb, &f->exts) < 0 ||
tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0)
goto nla_put_failure;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index a95cb240a606..b4ac58039cb1 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -298,7 +298,7 @@ static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog,
}
static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -307,7 +307,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last,
return 0;
}
-static void cls_bpf_destroy(struct tcf_proto *tp,
+static void cls_bpf_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
@@ -417,7 +417,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
return -EINVAL;
- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, extack);
+ ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr, true,
+ extack);
if (ret < 0)
return ret;
@@ -455,7 +456,8 @@ static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *oldprog = *arg;
@@ -475,7 +477,7 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
if (!prog)
return -ENOBUFS;
- ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+ ret = tcf_exts_init(&prog->exts, net, TCA_BPF_ACT, TCA_BPF_POLICE);
if (ret < 0)
goto errout;
@@ -575,7 +577,7 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
}
static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *tm)
+ struct sk_buff *skb, struct tcmsg *tm, bool rtnl_held)
{
struct cls_bpf_prog *prog = fh;
struct nlattr *nest;
@@ -635,7 +637,8 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl)
prog->res.class = cl;
}
-static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
struct cls_bpf_prog *prog;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 3bc01bdde165..4c1567854f95 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -78,7 +78,7 @@ static void cls_cgroup_destroy_work(struct work_struct *work)
static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr,
+ void **arg, bool ovr, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_CGROUP_MAX + 1];
@@ -99,7 +99,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
if (!new)
return -ENOBUFS;
- err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
+ err = tcf_exts_init(&new->exts, net, TCA_CGROUP_ACT, TCA_CGROUP_POLICE);
if (err < 0)
goto errout;
new->handle = handle;
@@ -110,7 +110,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb,
goto errout;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr,
- extack);
+ true, extack);
if (err < 0)
goto errout;
@@ -130,7 +130,7 @@ errout:
return err;
}
-static void cls_cgroup_destroy(struct tcf_proto *tp,
+static void cls_cgroup_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
@@ -145,18 +145,21 @@ static void cls_cgroup_destroy(struct tcf_proto *tp,
}
static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
-static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
if (arg->count < arg->skip)
goto skip;
+ if (!head)
+ return;
if (arg->fn(tp, head, arg) < 0) {
arg->stop = 1;
return;
@@ -166,7 +169,7 @@ skip:
}
static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct cls_cgroup_head *head = rtnl_dereference(tp->root);
struct nlattr *nest;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 2bb043cd436b..eece1ee26930 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -391,7 +391,8 @@ static void flow_destroy_filter_work(struct work_struct *work)
static int flow_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *fold, *fnew;
@@ -440,12 +441,12 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
goto err1;
- err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE);
+ err = tcf_exts_init(&fnew->exts, net, TCA_FLOW_ACT, TCA_FLOW_POLICE);
if (err < 0)
goto err2;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr,
- extack);
+ true, extack);
if (err < 0)
goto err2;
@@ -566,7 +567,7 @@ err1:
}
static int flow_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f = arg;
@@ -590,7 +591,8 @@ static int flow_init(struct tcf_proto *tp)
return 0;
}
-static void flow_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void flow_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f, *next;
@@ -617,7 +619,7 @@ static void *flow_get(struct tcf_proto *tp, u32 handle)
}
static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct flow_filter *f = fh;
struct nlattr *nest;
@@ -677,7 +679,8 @@ nla_put_failure:
return -1;
}
-static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct flow_head *head = rtnl_dereference(tp->root);
struct flow_filter *f;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 12ca9d13db83..c04247b403ed 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -381,16 +381,31 @@ static int fl_hw_replace_filter(struct tcf_proto *tp,
bool skip_sw = tc_skip_sw(f->flags);
int err;
+ cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts));
+ if (!cls_flower.rule)
+ return -ENOMEM;
+
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack);
cls_flower.command = TC_CLSFLOWER_REPLACE;
cls_flower.cookie = (unsigned long) f;
- cls_flower.dissector = &f->mask->dissector;
- cls_flower.mask = &f->mask->key;
- cls_flower.key = &f->mkey;
- cls_flower.exts = &f->exts;
+ cls_flower.rule->match.dissector = &f->mask->dissector;
+ cls_flower.rule->match.mask = &f->mask->key;
+ cls_flower.rule->match.key = &f->mkey;
cls_flower.classid = f->res.classid;
+ err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts);
+ if (err) {
+ kfree(cls_flower.rule);
+ if (skip_sw) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+ return err;
+ }
+ return 0;
+ }
+
err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw);
+ kfree(cls_flower.rule);
+
if (err < 0) {
fl_hw_destroy_filter(tp, f, NULL);
return err;
@@ -413,10 +428,13 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f)
tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL);
cls_flower.command = TC_CLSFLOWER_STATS;
cls_flower.cookie = (unsigned long) f;
- cls_flower.exts = &f->exts;
cls_flower.classid = f->res.classid;
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+
+ tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes,
+ cls_flower.stats.pkts,
+ cls_flower.stats.lastused);
}
static bool __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
@@ -451,7 +469,8 @@ static void fl_destroy_sleepable(struct work_struct *work)
module_put(THIS_MODULE);
}
-static void fl_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fl_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct fl_flow_mask *mask, *next_mask;
@@ -1258,7 +1277,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true,
+ extack);
if (err < 0)
return err;
@@ -1285,7 +1305,8 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp,
static int fl_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *fold = *arg;
@@ -1323,50 +1344,50 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
goto errout_tb;
}
- err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+ err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
if (err < 0)
goto errout;
- if (!handle) {
- handle = 1;
- err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
- INT_MAX, GFP_KERNEL);
- } else if (!fold) {
- /* user specifies a handle and it doesn't exist */
- err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
- handle, GFP_KERNEL);
- }
- if (err)
- goto errout;
- fnew->handle = handle;
-
if (tb[TCA_FLOWER_FLAGS]) {
fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
if (!tc_flags_valid(fnew->flags)) {
err = -EINVAL;
- goto errout_idr;
+ goto errout;
}
}
err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE], ovr,
tp->chain->tmplt_priv, extack);
if (err)
- goto errout_idr;
+ goto errout;
err = fl_check_assign_mask(head, fnew, fold, mask);
if (err)
- goto errout_idr;
+ goto errout;
+
+ if (!handle) {
+ handle = 1;
+ err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+ INT_MAX, GFP_KERNEL);
+ } else if (!fold) {
+ /* user specifies a handle and it doesn't exist */
+ err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+ handle, GFP_KERNEL);
+ }
+ if (err)
+ goto errout_mask;
+ fnew->handle = handle;
if (!fold && __fl_lookup(fnew->mask, &fnew->mkey)) {
err = -EEXIST;
- goto errout_mask;
+ goto errout_idr;
}
err = rhashtable_insert_fast(&fnew->mask->ht, &fnew->ht_node,
fnew->mask->filter_ht_params);
if (err)
- goto errout_mask;
+ goto errout_idr;
if (!tc_skip_hw(fnew->flags)) {
err = fl_hw_replace_filter(tp, fnew, extack);
@@ -1405,12 +1426,13 @@ errout_mask_ht:
rhashtable_remove_fast(&fnew->mask->ht, &fnew->ht_node,
fnew->mask->filter_ht_params);
-errout_mask:
- fl_mask_put(head, fnew->mask, false);
-
errout_idr:
if (!fold)
idr_remove(&head->handle_idr, fnew->handle);
+
+errout_mask:
+ fl_mask_put(head, fnew->mask, false);
+
errout:
tcf_exts_destroy(&fnew->exts);
kfree(fnew);
@@ -1422,7 +1444,7 @@ errout_mask_alloc:
}
static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f = arg;
@@ -1434,7 +1456,8 @@ static int fl_delete(struct tcf_proto *tp, void *arg, bool *last,
return 0;
}
-static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_fl_head *head = rtnl_dereference(tp->root);
struct cls_fl_filter *f;
@@ -1467,18 +1490,36 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
if (tc_skip_hw(f->flags))
continue;
+ cls_flower.rule =
+ flow_rule_alloc(tcf_exts_num_actions(&f->exts));
+ if (!cls_flower.rule)
+ return -ENOMEM;
+
tc_cls_common_offload_init(&cls_flower.common, tp,
f->flags, extack);
cls_flower.command = add ?
TC_CLSFLOWER_REPLACE : TC_CLSFLOWER_DESTROY;
cls_flower.cookie = (unsigned long)f;
- cls_flower.dissector = &mask->dissector;
- cls_flower.mask = &mask->key;
- cls_flower.key = &f->mkey;
- cls_flower.exts = &f->exts;
+ cls_flower.rule->match.dissector = &mask->dissector;
+ cls_flower.rule->match.mask = &mask->key;
+ cls_flower.rule->match.key = &f->mkey;
+
+ err = tc_setup_flow_action(&cls_flower.rule->action,
+ &f->exts);
+ if (err) {
+ kfree(cls_flower.rule);
+ if (tc_skip_sw(f->flags)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action");
+ return err;
+ }
+ continue;
+ }
+
cls_flower.classid = f->res.classid;
err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+ kfree(cls_flower.rule);
+
if (err) {
if (add && tc_skip_sw(f->flags))
return err;
@@ -1493,25 +1534,30 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
return 0;
}
-static void fl_hw_create_tmplt(struct tcf_chain *chain,
- struct fl_flow_tmplt *tmplt)
+static int fl_hw_create_tmplt(struct tcf_chain *chain,
+ struct fl_flow_tmplt *tmplt)
{
struct tc_cls_flower_offload cls_flower = {};
struct tcf_block *block = chain->block;
- struct tcf_exts dummy_exts = { 0, };
+
+ cls_flower.rule = flow_rule_alloc(0);
+ if (!cls_flower.rule)
+ return -ENOMEM;
cls_flower.common.chain_index = chain->index;
cls_flower.command = TC_CLSFLOWER_TMPLT_CREATE;
cls_flower.cookie = (unsigned long) tmplt;
- cls_flower.dissector = &tmplt->dissector;
- cls_flower.mask = &tmplt->mask;
- cls_flower.key = &tmplt->dummy_key;
- cls_flower.exts = &dummy_exts;
+ cls_flower.rule->match.dissector = &tmplt->dissector;
+ cls_flower.rule->match.mask = &tmplt->mask;
+ cls_flower.rule->match.key = &tmplt->dummy_key;
/* We don't care if driver (any of them) fails to handle this
* call. It serves just as a hint for it.
*/
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false);
+ kfree(cls_flower.rule);
+
+ return 0;
}
static void fl_hw_destroy_tmplt(struct tcf_chain *chain,
@@ -1555,12 +1601,14 @@ static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain,
err = fl_set_key(net, tb, &tmplt->dummy_key, &tmplt->mask, extack);
if (err)
goto errout_tmplt;
- kfree(tb);
fl_init_dissector(&tmplt->dissector, &tmplt->mask);
- fl_hw_create_tmplt(chain, tmplt);
+ err = fl_hw_create_tmplt(chain, tmplt);
+ if (err)
+ goto errout_tmplt;
+ kfree(tb);
return tmplt;
errout_tmplt:
@@ -2008,7 +2056,7 @@ nla_put_failure:
}
static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct cls_fl_filter *f = fh;
struct nlattr *nest;
diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
index 29eeeaf3ea44..ad036b00427d 100644
--- a/net/sched/cls_fw.c
+++ b/net/sched/cls_fw.c
@@ -139,7 +139,8 @@ static void fw_delete_filter_work(struct work_struct *work)
rtnl_unlock();
}
-static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void fw_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f;
@@ -163,7 +164,7 @@ static void fw_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int fw_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = arg;
@@ -217,7 +218,7 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
int err;
err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr,
- extack);
+ true, extack);
if (err < 0)
return err;
@@ -250,7 +251,8 @@ static int fw_set_parms(struct net *net, struct tcf_proto *tp,
static int fw_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca, void **arg,
- bool ovr, struct netlink_ext_ack *extack)
+ bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = *arg;
@@ -283,7 +285,8 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
#endif /* CONFIG_NET_CLS_IND */
fnew->tp = f->tp;
- err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE);
+ err = tcf_exts_init(&fnew->exts, net, TCA_FW_ACT,
+ TCA_FW_POLICE);
if (err < 0) {
kfree(fnew);
return err;
@@ -332,7 +335,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
return -ENOBUFS;
- err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE);
+ err = tcf_exts_init(&f->exts, net, TCA_FW_ACT, TCA_FW_POLICE);
if (err < 0)
goto errout;
f->id = handle;
@@ -354,7 +357,8 @@ errout:
return err;
}
-static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct fw_head *head = rtnl_dereference(tp->root);
int h;
@@ -384,7 +388,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct fw_head *head = rtnl_dereference(tp->root);
struct fw_filter *f = fh;
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 0e408ee9dcec..a13bc351a414 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -12,6 +12,7 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
+#include <linux/percpu.h>
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
@@ -22,6 +23,7 @@ struct cls_mall_head {
u32 handle;
u32 flags;
unsigned int in_hw_count;
+ struct tc_matchall_pcnt __percpu *pf;
struct rcu_work rwork;
};
@@ -34,6 +36,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp,
return -1;
*res = head->res;
+ __this_cpu_inc(head->pf->rhit);
return tcf_exts_exec(skb, &head->exts, res);
}
@@ -46,6 +49,7 @@ static void __mall_destroy(struct cls_mall_head *head)
{
tcf_exts_destroy(&head->exts);
tcf_exts_put_net(&head->exts);
+ free_percpu(head->pf);
kfree(head);
}
@@ -105,7 +109,8 @@ static int mall_replace_hw_filter(struct tcf_proto *tp,
return 0;
}
-static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void mall_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
@@ -125,6 +130,11 @@ static void mall_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
static void *mall_get(struct tcf_proto *tp, u32 handle)
{
+ struct cls_mall_head *head = rtnl_dereference(tp->root);
+
+ if (head && head->handle == handle)
+ return head;
+
return NULL;
}
@@ -141,7 +151,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr, true,
+ extack);
if (err < 0)
return err;
@@ -155,7 +166,8 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp,
static int mall_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle, struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
struct nlattr *tb[TCA_MATCHALL_MAX + 1];
@@ -184,7 +196,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
if (!new)
return -ENOBUFS;
- err = tcf_exts_init(&new->exts, TCA_MATCHALL_ACT, 0);
+ err = tcf_exts_init(&new->exts, net, TCA_MATCHALL_ACT, 0);
if (err)
goto err_exts_init;
@@ -192,6 +204,11 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
handle = 1;
new->handle = handle;
new->flags = flags;
+ new->pf = alloc_percpu(struct tc_matchall_pcnt);
+ if (!new->pf) {
+ err = -ENOMEM;
+ goto err_alloc_percpu;
+ }
err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE], ovr,
extack);
@@ -214,6 +231,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
err_replace_hw_filter:
err_set_parms:
+ free_percpu(new->pf);
+err_alloc_percpu:
tcf_exts_destroy(&new->exts);
err_exts_init:
kfree(new);
@@ -221,17 +240,21 @@ err_exts_init:
}
static int mall_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
return -EOPNOTSUPP;
}
-static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
if (arg->count < arg->skip)
goto skip;
+
+ if (!head)
+ return;
if (arg->fn(tp, head, arg) < 0)
arg->stop = 1;
skip:
@@ -268,10 +291,12 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
}
static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
+ struct tc_matchall_pcnt gpf = {};
struct cls_mall_head *head = fh;
struct nlattr *nest;
+ int cpu;
if (!head)
return skb->len;
@@ -289,6 +314,17 @@ static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
if (head->flags && nla_put_u32(skb, TCA_MATCHALL_FLAGS, head->flags))
goto nla_put_failure;
+ for_each_possible_cpu(cpu) {
+ struct tc_matchall_pcnt *pf = per_cpu_ptr(head->pf, cpu);
+
+ gpf.rhit += pf->rhit;
+ }
+
+ if (nla_put_64bit(skb, TCA_MATCHALL_PCNT,
+ sizeof(struct tc_matchall_pcnt),
+ &gpf, TCA_MATCHALL_PAD))
+ goto nla_put_failure;
+
if (tcf_exts_dump(skb, &head->exts))
goto nla_put_failure;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 0404aa5fa7cb..f006af23b64a 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -276,7 +276,8 @@ static void route4_queue_work(struct route4_filter *f)
tcf_queue_work(&f->rwork, route4_delete_filter_work);
}
-static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void route4_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
int h1, h2;
@@ -312,7 +313,7 @@ static void route4_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int route4_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter *f = arg;
@@ -393,7 +394,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
struct route4_bucket *b;
int err;
- err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr, true, extack);
if (err < 0)
return err;
@@ -468,7 +469,7 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp,
static int route4_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct route4_head *head = rtnl_dereference(tp->root);
struct route4_filter __rcu **fp;
@@ -496,7 +497,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
if (!f)
goto errout;
- err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
+ err = tcf_exts_init(&f->exts, net, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE);
if (err < 0)
goto errout;
@@ -560,15 +561,13 @@ errout:
return err;
}
-static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct route4_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
- if (head == NULL)
- arg->stop = 1;
-
- if (arg->stop)
+ if (head == NULL || arg->stop)
return;
for (h = 0; h <= 256; h++) {
@@ -597,7 +596,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct route4_filter *f = fh;
struct nlattr *nest;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index e9ccf7daea7d..0719a21d9c41 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -312,7 +312,8 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
__rsvp_delete_filter(f);
}
-static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
int h1, h2;
@@ -341,7 +342,7 @@ static void rsvp_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
struct rsvp_filter *nfp, *f = arg;
@@ -477,7 +478,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base,
u32 handle,
struct nlattr **tca,
- void **arg, bool ovr, struct netlink_ext_ack *extack)
+ void **arg, bool ovr, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct rsvp_head *data = rtnl_dereference(tp->root);
struct rsvp_filter *f, *nfp;
@@ -499,10 +501,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (err < 0)
return err;
- err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr, true,
+ extack);
if (err < 0)
goto errout2;
@@ -520,7 +523,8 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
goto errout2;
}
- err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT,
+ TCA_RSVP_POLICE);
if (err < 0) {
kfree(n);
goto errout2;
@@ -548,7 +552,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb,
if (f == NULL)
goto errout2;
- err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE);
+ err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
if (err < 0)
goto errout;
h2 = 16;
@@ -654,7 +658,8 @@ errout2:
return err;
}
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct rsvp_head *head = rtnl_dereference(tp->root);
unsigned int h, h1;
@@ -688,7 +693,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
}
static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct rsvp_filter *f = fh;
struct rsvp_session *s;
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 38bb882bb958..24e0a62a65cc 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -173,7 +173,7 @@ static void tcindex_destroy_fexts_work(struct work_struct *work)
}
static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = arg;
@@ -246,10 +246,12 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
[TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
};
-static int tcindex_filter_result_init(struct tcindex_filter_result *r)
+static int tcindex_filter_result_init(struct tcindex_filter_result *r,
+ struct net *net)
{
memset(r, 0, sizeof(*r));
- return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
+ TCA_TCINDEX_POLICE);
}
static void tcindex_partial_destroy_work(struct work_struct *work)
@@ -281,13 +283,10 @@ static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
return -ENOMEM;
for (i = 0; i < cp->hash; i++) {
- err = tcf_exts_init(&cp->perfect[i].exts,
+ err = tcf_exts_init(&cp->perfect[i].exts, net,
TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
goto errout;
-#ifdef CONFIG_NET_CLS_ACT
- cp->perfect[i].exts.net = net;
-#endif
}
return 0;
@@ -310,10 +309,10 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
int err, balloc = 0;
struct tcf_exts e;
- err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
+ err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
if (err < 0)
return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &e, ovr, true, extack);
if (err < 0)
goto errout;
@@ -344,7 +343,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
cp->h = p->h;
- err = tcindex_filter_result_init(&new_filter_result);
+ err = tcindex_filter_result_init(&new_filter_result, net);
if (err < 0)
goto errout1;
if (old_r)
@@ -431,7 +430,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
goto errout_alloc;
f->key = handle;
f->next = NULL;
- err = tcindex_filter_result_init(&f->result);
+ err = tcindex_filter_result_init(&f->result, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
@@ -444,7 +443,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
}
if (old_r && old_r != r) {
- err = tcindex_filter_result_init(old_r);
+ err = tcindex_filter_result_init(old_r, net);
if (err < 0) {
kfree(f);
goto errout_alloc;
@@ -496,7 +495,7 @@ static int
tcindex_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
struct nlattr **tca, void **arg, bool ovr,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct nlattr *opt = tca[TCA_OPTIONS];
struct nlattr *tb[TCA_TCINDEX_MAX + 1];
@@ -519,7 +518,8 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
tca[TCA_RATE], ovr, extack);
}
-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
+static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
+ bool rtnl_held)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter *f, *next;
@@ -555,7 +555,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
}
}
-static void tcindex_destroy(struct tcf_proto *tp,
+static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
@@ -582,7 +582,7 @@ static void tcindex_destroy(struct tcf_proto *tp,
for (f = rtnl_dereference(p->h[i]); f; f = next) {
next = rtnl_dereference(f->next);
- tcindex_delete(tp, &f->result, &last, NULL);
+ tcindex_delete(tp, &f->result, &last, rtnl_held, NULL);
}
}
@@ -591,7 +591,7 @@ static void tcindex_destroy(struct tcf_proto *tp,
static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tcindex_data *p = rtnl_dereference(tp->root);
struct tcindex_filter_result *r = fh;
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index dcea21004604..48e76a3acf8a 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -629,7 +629,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
return -ENOENT;
}
-static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
+static void u32_destroy(struct tcf_proto *tp, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *root_ht = rtnl_dereference(tp->root);
@@ -663,7 +664,7 @@ static void u32_destroy(struct tcf_proto *tp, struct netlink_ext_ack *extack)
}
static int u32_delete(struct tcf_proto *tp, void *arg, bool *last,
- struct netlink_ext_ack *extack)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct tc_u_hnode *ht = arg;
struct tc_u_common *tp_c = tp->data;
@@ -726,7 +727,7 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
{
int err;
- err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, extack);
+ err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr, true, extack);
if (err < 0)
return err;
@@ -803,7 +804,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c,
rcu_assign_pointer(*ins, n);
}
-static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
+static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
struct tc_u_knode *n)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
@@ -848,7 +849,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
#endif
memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key));
- if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) {
+ if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) {
kfree(new);
return NULL;
}
@@ -858,7 +859,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp,
static int u32_change(struct net *net, struct sk_buff *in_skb,
struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, bool ovr,
+ struct nlattr **tca, void **arg, bool ovr, bool rtnl_held,
struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
@@ -910,7 +911,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
return -EINVAL;
}
- new = u32_init_knode(tp, n);
+ new = u32_init_knode(net, tp, n);
if (!new)
return -ENOMEM;
@@ -1060,7 +1061,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0;
n->flags = flags;
- err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE);
+ err = tcf_exts_init(&n->exts, net, TCA_U32_ACT, TCA_U32_POLICE);
if (err < 0)
goto errout;
@@ -1123,7 +1124,8 @@ erridr:
return err;
}
-static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg,
+ bool rtnl_held)
{
struct tc_u_common *tp_c = tp->data;
struct tc_u_hnode *ht;
@@ -1281,7 +1283,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl)
}
static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t)
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
{
struct tc_u_knode *n = fh;
struct tc_u_hnode *ht_up, *ht_down;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 7e4d1ccf4c87..fb8f138b9776 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -526,11 +526,6 @@ static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt,
return stab;
}
-static void stab_kfree_rcu(struct rcu_head *head)
-{
- kfree(container_of(head, struct qdisc_size_table, rcu));
-}
-
void qdisc_put_stab(struct qdisc_size_table *tab)
{
if (!tab)
@@ -538,7 +533,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab)
if (--tab->refcnt == 0) {
list_del(&tab->list);
- call_rcu(&tab->rcu, stab_kfree_rcu);
+ kfree_rcu(tab, rcu);
}
}
EXPORT_SYMBOL(qdisc_put_stab);
@@ -758,8 +753,7 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
return 0;
}
-void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
- unsigned int len)
+void qdisc_tree_reduce_backlog(struct Qdisc *sch, int n, int len)
{
bool qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
const struct Qdisc_class_ops *cops;
@@ -1202,9 +1196,11 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
} else {
if (handle == 0) {
handle = qdisc_alloc_handle(dev);
- err = -ENOMEM;
- if (handle == 0)
+ if (handle == 0) {
+ NL_SET_ERR_MSG(extack, "Maximum number of qdisc handles was exceeded");
+ err = -ENOSPC;
goto err_out3;
+ }
}
if (!netif_is_multiqueue(dev))
sch->flags |= TCQ_F_ONETXQUEUE;
@@ -1828,6 +1824,7 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
+ int err = 0;
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb)
@@ -1838,8 +1835,11 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
return -EINVAL;
}
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+ if (err > 0)
+ err = 0;
+ return err;
}
static int tclass_del_notify(struct net *net,
@@ -1870,8 +1870,11 @@ static int tclass_del_notify(struct net *net,
return err;
}
- return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
- n->nlmsg_flags & NLM_F_ECHO);
+ err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
+ n->nlmsg_flags & NLM_F_ECHO);
+ if (err > 0)
+ err = 0;
+ return err;
}
#ifdef CONFIG_NET_CLS
@@ -1910,17 +1913,19 @@ static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid,
block = cops->tcf_block(q, cl, NULL);
if (!block)
return;
- list_for_each_entry(chain, &block->chain_list, list) {
+ for (chain = tcf_get_next_chain(block, NULL);
+ chain;
+ chain = tcf_get_next_chain(block, chain)) {
struct tcf_proto *tp;
- for (tp = rtnl_dereference(chain->filter_chain);
- tp; tp = rtnl_dereference(tp->next)) {
+ for (tp = tcf_get_next_proto(chain, NULL, true);
+ tp; tp = tcf_get_next_proto(chain, tp, true)) {
struct tcf_bind_args arg = {};
arg.w.fn = tcf_node_bind;
arg.classid = clid;
arg.cl = new_cl;
- tp->ops->walk(tp, &arg.w);
+ tp->ops->walk(tp, &arg.w, true);
}
}
}
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 73940293700d..259d97bc2abd 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -138,8 +138,8 @@ struct cake_flow {
struct cake_host {
u32 srchost_tag;
u32 dsthost_tag;
- u16 srchost_refcnt;
- u16 dsthost_refcnt;
+ u16 srchost_bulk_flow_count;
+ u16 dsthost_bulk_flow_count;
};
struct cake_heap_entry {
@@ -211,6 +211,9 @@ struct cake_sched_data {
u8 ack_filter;
u8 atm_mode;
+ u32 fwmark_mask;
+ u16 fwmark_shft;
+
/* time_next = time_this + ((len * rate_ns) >> rate_shft) */
u16 rate_shft;
ktime_t time_next_packet;
@@ -746,8 +749,10 @@ skip_hash:
* queue, accept the collision, update the host tags.
*/
q->way_collisions++;
- q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
- q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
+ if (q->flows[outer_hash + k].set == CAKE_SET_BULK) {
+ q->hosts[q->flows[reduced_hash].srchost].srchost_bulk_flow_count--;
+ q->hosts[q->flows[reduced_hash].dsthost].dsthost_bulk_flow_count--;
+ }
allocate_src = cake_dsrc(flow_mode);
allocate_dst = cake_ddst(flow_mode);
found:
@@ -767,13 +772,14 @@ found:
}
for (i = 0; i < CAKE_SET_WAYS;
i++, k = (k + 1) % CAKE_SET_WAYS) {
- if (!q->hosts[outer_hash + k].srchost_refcnt)
+ if (!q->hosts[outer_hash + k].srchost_bulk_flow_count)
break;
}
q->hosts[outer_hash + k].srchost_tag = srchost_hash;
found_src:
srchost_idx = outer_hash + k;
- q->hosts[srchost_idx].srchost_refcnt++;
+ if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+ q->hosts[srchost_idx].srchost_bulk_flow_count++;
q->flows[reduced_hash].srchost = srchost_idx;
}
@@ -789,13 +795,14 @@ found_src:
}
for (i = 0; i < CAKE_SET_WAYS;
i++, k = (k + 1) % CAKE_SET_WAYS) {
- if (!q->hosts[outer_hash + k].dsthost_refcnt)
+ if (!q->hosts[outer_hash + k].dsthost_bulk_flow_count)
break;
}
q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
found_dst:
dsthost_idx = outer_hash + k;
- q->hosts[dsthost_idx].dsthost_refcnt++;
+ if (q->flows[reduced_hash].set == CAKE_SET_BULK)
+ q->hosts[dsthost_idx].dsthost_bulk_flow_count++;
q->flows[reduced_hash].dsthost = dsthost_idx;
}
}
@@ -1508,32 +1515,29 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
return idx + (tin << 16);
}
-static void cake_wash_diffserv(struct sk_buff *skb)
-{
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
- break;
- case htons(ETH_P_IPV6):
- ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
- break;
- default:
- break;
- }
-}
-
static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
{
+ int wlen = skb_network_offset(skb);
u8 dscp;
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
+ wlen += sizeof(struct iphdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
if (wash && dscp)
ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
return dscp;
case htons(ETH_P_IPV6):
+ wlen += sizeof(struct ipv6hdr);
+ if (!pskb_may_pull(skb, wlen) ||
+ skb_try_make_writable(skb, wlen))
+ return 0;
+
dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
if (wash && dscp)
ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
@@ -1552,26 +1556,32 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
struct sk_buff *skb)
{
struct cake_sched_data *q = qdisc_priv(sch);
- u32 tin;
+ u32 tin, mark;
+ u8 dscp;
- if (TC_H_MAJ(skb->priority) == sch->handle &&
- TC_H_MIN(skb->priority) > 0 &&
- TC_H_MIN(skb->priority) <= q->tin_cnt) {
+ /* Tin selection: Default to diffserv-based selection, allow overriding
+ * using firewall marks or skb->priority.
+ */
+ dscp = cake_handle_diffserv(skb,
+ q->rate_flags & CAKE_FLAG_WASH);
+ mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft;
+
+ if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT)
+ tin = 0;
+
+ else if (mark && mark <= q->tin_cnt)
+ tin = q->tin_order[mark - 1];
+
+ else if (TC_H_MAJ(skb->priority) == sch->handle &&
+ TC_H_MIN(skb->priority) > 0 &&
+ TC_H_MIN(skb->priority) <= q->tin_cnt)
tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
- if (q->rate_flags & CAKE_FLAG_WASH)
- cake_wash_diffserv(skb);
- } else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
- /* extract the Diffserv Precedence field, if it exists */
- /* and clear DSCP bits if washing */
- tin = q->tin_index[cake_handle_diffserv(skb,
- q->rate_flags & CAKE_FLAG_WASH)];
+ else {
+ tin = q->tin_index[dscp];
+
if (unlikely(tin >= q->tin_cnt))
tin = 0;
- } else {
- tin = 0;
- if (q->rate_flags & CAKE_FLAG_WASH)
- cake_wash_diffserv(skb);
}
return &q->tins[tin];
@@ -1794,20 +1804,30 @@ static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
b->sparse_flow_count++;
if (cake_dsrc(q->flow_mode))
- host_load = max(host_load, srchost->srchost_refcnt);
+ host_load = max(host_load, srchost->srchost_bulk_flow_count);
if (cake_ddst(q->flow_mode))
- host_load = max(host_load, dsthost->dsthost_refcnt);
+ host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
flow->deficit = (b->flow_quantum *
quantum_div[host_load]) >> 16;
} else if (flow->set == CAKE_SET_SPARSE_WAIT) {
+ struct cake_host *srchost = &b->hosts[flow->srchost];
+ struct cake_host *dsthost = &b->hosts[flow->dsthost];
+
/* this flow was empty, accounted as a sparse flow, but actually
* in the bulk rotation.
*/
flow->set = CAKE_SET_BULK;
b->sparse_flow_count--;
b->bulk_flow_count++;
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count++;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count++;
+
}
if (q->buffer_used > q->buffer_max_used)
@@ -1975,23 +1995,8 @@ retry:
dsthost = &b->hosts[flow->dsthost];
host_load = 1;
- if (cake_dsrc(q->flow_mode))
- host_load = max(host_load, srchost->srchost_refcnt);
-
- if (cake_ddst(q->flow_mode))
- host_load = max(host_load, dsthost->dsthost_refcnt);
-
- WARN_ON(host_load > CAKE_QUEUES);
-
/* flow isolation (DRR++) */
if (flow->deficit <= 0) {
- /* The shifted prandom_u32() is a way to apply dithering to
- * avoid accumulating roundoff errors
- */
- flow->deficit += (b->flow_quantum * quantum_div[host_load] +
- (prandom_u32() >> 16)) >> 16;
- list_move_tail(&flow->flowchain, &b->old_flows);
-
/* Keep all flows with deficits out of the sparse and decaying
* rotations. No non-empty flow can go into the decaying
* rotation, so they can't get deficits
@@ -2000,6 +2005,13 @@ retry:
if (flow->head) {
b->sparse_flow_count--;
b->bulk_flow_count++;
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count++;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count++;
+
flow->set = CAKE_SET_BULK;
} else {
/* we've moved it to the bulk rotation for
@@ -2009,6 +2021,22 @@ retry:
flow->set = CAKE_SET_SPARSE_WAIT;
}
}
+
+ if (cake_dsrc(q->flow_mode))
+ host_load = max(host_load, srchost->srchost_bulk_flow_count);
+
+ if (cake_ddst(q->flow_mode))
+ host_load = max(host_load, dsthost->dsthost_bulk_flow_count);
+
+ WARN_ON(host_load > CAKE_QUEUES);
+
+ /* The shifted prandom_u32() is a way to apply dithering to
+ * avoid accumulating roundoff errors
+ */
+ flow->deficit += (b->flow_quantum * quantum_div[host_load] +
+ (prandom_u32() >> 16)) >> 16;
+ list_move_tail(&flow->flowchain, &b->old_flows);
+
goto retry;
}
@@ -2029,6 +2057,13 @@ retry:
&b->decaying_flows);
if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count--;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count--;
+
b->decaying_flow_count++;
} else if (flow->set == CAKE_SET_SPARSE ||
flow->set == CAKE_SET_SPARSE_WAIT) {
@@ -2042,14 +2077,19 @@ retry:
if (flow->set == CAKE_SET_SPARSE ||
flow->set == CAKE_SET_SPARSE_WAIT)
b->sparse_flow_count--;
- else if (flow->set == CAKE_SET_BULK)
+ else if (flow->set == CAKE_SET_BULK) {
b->bulk_flow_count--;
- else
+
+ if (cake_dsrc(q->flow_mode))
+ srchost->srchost_bulk_flow_count--;
+
+ if (cake_ddst(q->flow_mode))
+ dsthost->dsthost_bulk_flow_count--;
+
+ } else
b->decaying_flow_count--;
flow->set = CAKE_SET_NONE;
- srchost->srchost_refcnt--;
- dsthost->dsthost_refcnt--;
}
goto begin;
}
@@ -2144,6 +2184,7 @@ static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
[TCA_CAKE_MPU] = { .type = NLA_U32 },
[TCA_CAKE_INGRESS] = { .type = NLA_U32 },
[TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 },
+ [TCA_CAKE_FWMARK] = { .type = NLA_U32 },
};
static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
@@ -2590,6 +2631,11 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt,
q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
}
+ if (tb[TCA_CAKE_FWMARK]) {
+ q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]);
+ q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0;
+ }
+
if (q->tins) {
sch_tree_lock(sch);
cake_reconfigure(sch);
@@ -2749,6 +2795,9 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
!!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask))
+ goto nla_put_failure;
+
return nla_nest_end(skb, opts);
nla_put_failure:
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4dc05409e3fb..114b9048ea7e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1358,9 +1358,11 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
+ __u32 qlen;
cl->xstats.avgidle = cl->avgidle;
cl->xstats.undertime = 0;
+ qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog);
if (cl->undertime != PSCHED_PASTPERFECT)
cl->xstats.undertime = cl->undertime - q->now;
@@ -1368,7 +1370,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
@@ -1665,17 +1667,13 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
- unsigned int qlen, backlog;
if (cl->filters || cl->children || cl == &q->link)
return -EBUSY;
sch_tree_lock(sch);
- qlen = cl->q->q.qlen;
- backlog = cl->q->qstats.backlog;
- qdisc_reset(cl->q);
- qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
+ qdisc_purge_queue(cl->q);
if (cl->next_alive)
cbq_deactivate_class(cl);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 09b800991065..430df9a55ec4 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -50,15 +50,6 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
return container_of(clc, struct drr_class, common);
}
-static void drr_purge_queue(struct drr_class *cl)
-{
- unsigned int len = cl->qdisc->q.qlen;
- unsigned int backlog = cl->qdisc->qstats.backlog;
-
- qdisc_reset(cl->qdisc);
- qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
[TCA_DRR_QUANTUM] = { .type = NLA_U32 },
};
@@ -167,7 +158,7 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
- drr_purge_queue(cl);
+ qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->common);
sch_tree_unlock(sch);
@@ -269,7 +260,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
struct gnet_dump *d)
{
struct drr_class *cl = (struct drr_class *)arg;
- __u32 qlen = cl->qdisc->q.qlen;
+ __u32 qlen = qdisc_qlen_sum(cl->qdisc);
+ struct Qdisc *cl_q = cl->qdisc;
struct tc_drr_stats xstats;
memset(&xstats, 0, sizeof(xstats));
@@ -279,7 +271,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
+ gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 968a85fe4d4a..a117d9260558 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -68,7 +68,7 @@ static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
skb = __skb_dequeue(&q->skb_bad_txq);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_cpu_qlen_dec(q);
+ qdisc_qstats_atomic_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
@@ -108,7 +108,7 @@ static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_cpu_qlen_inc(q);
+ qdisc_qstats_atomic_qlen_inc(q);
} else {
qdisc_qstats_backlog_inc(q, skb);
q->q.qlen++;
@@ -147,7 +147,7 @@ static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
qdisc_qstats_cpu_requeues_inc(q);
qdisc_qstats_cpu_backlog_inc(q, skb);
- qdisc_qstats_cpu_qlen_inc(q);
+ qdisc_qstats_atomic_qlen_inc(q);
skb = next;
}
@@ -252,7 +252,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
skb = __skb_dequeue(&q->gso_skb);
if (qdisc_is_percpu_stats(q)) {
qdisc_qstats_cpu_backlog_dec(q, skb);
- qdisc_qstats_cpu_qlen_dec(q);
+ qdisc_qstats_atomic_qlen_dec(q);
} else {
qdisc_qstats_backlog_dec(q, skb);
q->q.qlen--;
@@ -559,7 +559,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = {
};
static struct netdev_queue noop_netdev_queue = {
- .qdisc = &noop_qdisc,
+ RCU_POINTER_INITIALIZER(qdisc, &noop_qdisc),
.qdisc_sleeping = &noop_qdisc,
};
@@ -645,7 +645,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
if (unlikely(err))
return qdisc_drop_cpu(skb, qdisc, to_free);
- qdisc_qstats_cpu_qlen_inc(qdisc);
+ qdisc_qstats_atomic_qlen_inc(qdisc);
/* Note: skb can not be used after skb_array_produce(),
* so we better not use qdisc_qstats_cpu_backlog_inc()
*/
@@ -670,7 +670,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
if (likely(skb)) {
qdisc_qstats_cpu_backlog_dec(qdisc, skb);
qdisc_bstats_cpu_update(qdisc, skb);
- qdisc_qstats_cpu_qlen_dec(qdisc);
+ qdisc_qstats_atomic_qlen_dec(qdisc);
}
return skb;
@@ -714,7 +714,6 @@ static void pfifo_fast_reset(struct Qdisc *qdisc)
struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);
q->backlog = 0;
- q->qlen = 0;
}
}
@@ -1366,7 +1365,11 @@ static void mini_qdisc_rcu_func(struct rcu_head *head)
void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
struct tcf_proto *tp_head)
{
- struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
+ /* Protected with chain0->filter_chain_lock.
+ * Can't access chain directly because tp_head can be NULL.
+ */
+ struct mini_Qdisc *miniq_old =
+ rcu_dereference_protected(*miniqp->p_miniq, 1);
struct mini_Qdisc *miniq;
if (!tp_head) {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 24cc220a3218..d2ab463f22ae 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -845,16 +845,6 @@ qdisc_peek_len(struct Qdisc *sch)
}
static void
-hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
-{
- unsigned int len = cl->qdisc->q.qlen;
- unsigned int backlog = cl->qdisc->qstats.backlog;
-
- qdisc_reset(cl->qdisc);
- qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
-static void
hfsc_adjust_levels(struct hfsc_class *cl)
{
struct hfsc_class *p;
@@ -1076,7 +1066,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
list_add_tail(&cl->siblings, &parent->children);
if (parent->level == 0)
- hfsc_purge_queue(sch, parent);
+ qdisc_purge_queue(parent->qdisc);
hfsc_adjust_levels(parent);
sch_tree_unlock(sch);
@@ -1112,7 +1102,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
list_del(&cl->siblings);
hfsc_adjust_levels(cl->cl_parent);
- hfsc_purge_queue(sch, cl);
+ qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
sch_tree_unlock(sch);
@@ -1328,8 +1318,9 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
{
struct hfsc_class *cl = (struct hfsc_class *)arg;
struct tc_hfsc_stats xstats;
+ __u32 qlen;
- cl->qstats.backlog = cl->qdisc->qstats.backlog;
+ qdisc_qstats_qlen_backlog(cl->qdisc, &qlen, &cl->qstats.backlog);
xstats.level = cl->level;
xstats.period = cl->cl_vtperiod;
xstats.work = cl->cl_total;
@@ -1337,7 +1328,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
return -1;
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 30f9da7e1076..2f9883b196e8 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1127,10 +1127,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
};
__u32 qlen = 0;
- if (!cl->level && cl->leaf.q) {
- qlen = cl->leaf.q->q.qlen;
- qs.backlog = cl->leaf.q->qstats.backlog;
- }
+ if (!cl->level && cl->leaf.q)
+ qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
+
cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
INT_MIN, INT_MAX);
cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
@@ -1270,13 +1269,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
- if (!cl->level) {
- unsigned int qlen = cl->leaf.q->q.qlen;
- unsigned int backlog = cl->leaf.q->qstats.backlog;
-
- qdisc_reset(cl->leaf.q);
- qdisc_tree_reduce_backlog(cl->leaf.q, qlen, backlog);
- }
+ if (!cl->level)
+ qdisc_purge_queue(cl->leaf.q);
/* delete from hash and active; remainder in destroy_class */
qdisc_class_hash_remove(&q->clhash, &cl->common);
@@ -1404,12 +1398,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
classid, NULL);
sch_tree_lock(sch);
if (parent && !parent->level) {
- unsigned int qlen = parent->leaf.q->q.qlen;
- unsigned int backlog = parent->leaf.q->qstats.backlog;
-
/* turn parent into inner node */
- qdisc_reset(parent->leaf.q);
- qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog);
+ qdisc_purge_queue(parent->leaf.q);
qdisc_put(parent->leaf.q);
if (parent->prio_activity)
htb_deactivate(q, parent);
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 203659bc3906..3a3312467692 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -249,7 +249,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
sch = dev_queue->qdisc_sleeping;
if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+ qdisc_qstats_copy(d, sch) < 0)
return -1;
return 0;
}
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index d364e63c396d..ea0dc112b38d 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -561,8 +561,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
sch = dev_queue->qdisc_sleeping;
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &sch->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL,
- &sch->qstats, sch->q.qlen) < 0)
+ qdisc_qstats_copy(d, sch) < 0)
return -1;
}
return 0;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 7410ce4d0321..35b03ae08e0f 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -201,9 +201,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
for (i = q->bands; i < q->max_bands; i++) {
if (q->queues[i] != &noop_qdisc) {
struct Qdisc *child = q->queues[i];
+
q->queues[i] = &noop_qdisc;
- qdisc_tree_reduce_backlog(child, child->q.qlen,
- child->qstats.backlog);
+ qdisc_tree_flush_backlog(child);
qdisc_put(child);
}
}
@@ -225,9 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
qdisc_hash_add(child, true);
if (old != &noop_qdisc) {
- qdisc_tree_reduce_backlog(old,
- old->q.qlen,
- old->qstats.backlog);
+ qdisc_tree_flush_backlog(old);
qdisc_put(old);
}
sch_tree_unlock(sch);
@@ -344,7 +342,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
cl_q = q->queues[cl - 1];
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl_q->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
+ qdisc_qstats_copy(d, cl_q) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index d1429371592f..1cc0c7b74aa3 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -17,9 +17,7 @@
* University of Oslo, Norway.
*
* References:
- * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
- * IEEE Conference on High Performance Switching and Routing 2013 :
- * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
+ * RFC 8033: https://tools.ietf.org/html/rfc8033
*/
#include <linux/module.h>
@@ -31,9 +29,9 @@
#include <net/pkt_sched.h>
#include <net/inet_ecn.h>
-#define QUEUE_THRESHOLD 10000
+#define QUEUE_THRESHOLD 16384
#define DQCOUNT_INVALID -1
-#define MAX_PROB 0xffffffff
+#define MAX_PROB 0xffffffffffffffff
#define PIE_SCALE 8
/* parameters used */
@@ -49,14 +47,16 @@ struct pie_params {
/* variables used */
struct pie_vars {
- u32 prob; /* probability but scaled by u32 limit. */
+ u64 prob; /* probability but scaled by u64 limit. */
psched_time_t burst_time;
psched_time_t qdelay;
psched_time_t qdelay_old;
u64 dq_count; /* measured in bytes */
psched_time_t dq_tstamp; /* drain rate */
+ u64 accu_prob; /* accumulated drop probability */
u32 avg_dq_rate; /* bytes per pschedtime tick,scaled */
u32 qlen_old; /* in bytes */
+ u8 accu_prob_overflows; /* overflows of accu_prob */
};
/* statistics gathering */
@@ -81,9 +81,9 @@ static void pie_params_init(struct pie_params *params)
{
params->alpha = 2;
params->beta = 20;
- params->tupdate = usecs_to_jiffies(30 * USEC_PER_MSEC); /* 30 ms */
+ params->tupdate = usecs_to_jiffies(15 * USEC_PER_MSEC); /* 15 ms */
params->limit = 1000; /* default of 1000 packets */
- params->target = PSCHED_NS2TICKS(20 * NSEC_PER_MSEC); /* 20 ms */
+ params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */
params->ecn = false;
params->bytemode = false;
}
@@ -91,16 +91,18 @@ static void pie_params_init(struct pie_params *params)
static void pie_vars_init(struct pie_vars *vars)
{
vars->dq_count = DQCOUNT_INVALID;
+ vars->accu_prob = 0;
vars->avg_dq_rate = 0;
- /* default of 100 ms in pschedtime */
- vars->burst_time = PSCHED_NS2TICKS(100 * NSEC_PER_MSEC);
+ /* default of 150 ms in pschedtime */
+ vars->burst_time = PSCHED_NS2TICKS(150 * NSEC_PER_MSEC);
+ vars->accu_prob_overflows = 0;
}
static bool drop_early(struct Qdisc *sch, u32 packet_size)
{
struct pie_sched_data *q = qdisc_priv(sch);
- u32 rnd;
- u32 local_prob = q->vars.prob;
+ u64 rnd;
+ u64 local_prob = q->vars.prob;
u32 mtu = psched_mtu(qdisc_dev(sch));
/* If there is still burst allowance left skip random early drop */
@@ -124,13 +126,33 @@ static bool drop_early(struct Qdisc *sch, u32 packet_size)
* probablity. Smaller packets will have lower drop prob in this case
*/
if (q->params.bytemode && packet_size <= mtu)
- local_prob = (local_prob / mtu) * packet_size;
+ local_prob = (u64)packet_size * div_u64(local_prob, mtu);
else
local_prob = q->vars.prob;
- rnd = prandom_u32();
- if (rnd < local_prob)
+ if (local_prob == 0) {
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
+ }
+
+ if (local_prob > MAX_PROB - q->vars.accu_prob)
+ q->vars.accu_prob_overflows++;
+
+ q->vars.accu_prob += local_prob;
+
+ if (q->vars.accu_prob_overflows == 0 &&
+ q->vars.accu_prob < (MAX_PROB / 100) * 85)
+ return false;
+ if (q->vars.accu_prob_overflows == 8 &&
+ q->vars.accu_prob >= MAX_PROB / 2)
+ return true;
+
+ prandom_bytes(&rnd, 8);
+ if (rnd < local_prob) {
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
return true;
+ }
return false;
}
@@ -168,6 +190,8 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
out:
q->stats.dropped++;
+ q->vars.accu_prob = 0;
+ q->vars.accu_prob_overflows = 0;
return qdisc_drop(skb, sch, to_free);
}
@@ -317,9 +341,10 @@ static void calculate_probability(struct Qdisc *sch)
u32 qlen = sch->qstats.backlog; /* queue size in bytes */
psched_time_t qdelay = 0; /* in pschedtime */
psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */
- s32 delta = 0; /* determines the change in probability */
- u32 oldprob;
- u32 alpha, beta;
+ s64 delta = 0; /* determines the change in probability */
+ u64 oldprob;
+ u64 alpha, beta;
+ u32 power;
bool update_prob = true;
q->vars.qdelay_old = q->vars.qdelay;
@@ -339,38 +364,36 @@ static void calculate_probability(struct Qdisc *sch)
* value for alpha as 0.125. In this implementation, we use values 0-32
* passed from user space to represent this. Also, alpha and beta have
* unit of HZ and need to be scaled before they can used to update
- * probability. alpha/beta are updated locally below by 1) scaling them
- * appropriately 2) scaling down by 16 to come to 0-2 range.
- * Please see paper for details.
- *
- * We scale alpha and beta differently depending on whether we are in
- * light, medium or high dropping mode.
+ * probability. alpha/beta are updated locally below by scaling down
+ * by 16 to come to 0-2 range.
*/
- if (q->vars.prob < MAX_PROB / 100) {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 7;
- } else if (q->vars.prob < MAX_PROB / 10) {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 5;
- } else {
- alpha =
- (q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
- beta =
- (q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ alpha = ((u64)q->params.alpha * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+ beta = ((u64)q->params.beta * (MAX_PROB / PSCHED_TICKS_PER_SEC)) >> 4;
+
+ /* We scale alpha and beta differently depending on how heavy the
+ * congestion is. Please see RFC 8033 for details.
+ */
+ if (q->vars.prob < MAX_PROB / 10) {
+ alpha >>= 1;
+ beta >>= 1;
+
+ power = 100;
+ while (q->vars.prob < div_u64(MAX_PROB, power) &&
+ power <= 1000000) {
+ alpha >>= 2;
+ beta >>= 2;
+ power *= 10;
+ }
}
/* alpha and beta should be between 0 and 32, in multiples of 1/16 */
- delta += alpha * ((qdelay - q->params.target));
- delta += beta * ((qdelay - qdelay_old));
+ delta += alpha * (u64)(qdelay - q->params.target);
+ delta += beta * (u64)(qdelay - qdelay_old);
oldprob = q->vars.prob;
/* to ensure we increase probability in steps of no more than 2% */
- if (delta > (s32)(MAX_PROB / (100 / 2)) &&
+ if (delta > (s64)(MAX_PROB / (100 / 2)) &&
q->vars.prob >= MAX_PROB / 10)
delta = (MAX_PROB / 100) * 2;
@@ -406,7 +429,8 @@ static void calculate_probability(struct Qdisc *sch)
*/
if (qdelay == 0 && qdelay_old == 0 && update_prob)
- q->vars.prob = (q->vars.prob * 98) / 100;
+ /* Reduce drop probability to 98.4% */
+ q->vars.prob -= q->vars.prob / 64u;
q->vars.qdelay = qdelay;
q->vars.qlen_old = qlen;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 847141cd900f..d519b21535b3 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -216,12 +216,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
q->bands = qopt->bands;
memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
- for (i = q->bands; i < oldbands; i++) {
- struct Qdisc *child = q->queues[i];
-
- qdisc_tree_reduce_backlog(child, child->q.qlen,
- child->qstats.backlog);
- }
+ for (i = q->bands; i < oldbands; i++)
+ qdisc_tree_flush_backlog(q->queues[i]);
for (i = oldbands; i < q->bands; i++) {
q->queues[i] = queues[i];
@@ -365,7 +361,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
cl_q = q->queues[cl - 1];
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl_q->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
+ qdisc_qstats_copy(d, cl_q) < 0)
return -1;
return 0;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 29f5c4a24688..1589364b54da 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -217,15 +217,6 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
return container_of(clc, struct qfq_class, common);
}
-static void qfq_purge_queue(struct qfq_class *cl)
-{
- unsigned int len = cl->qdisc->q.qlen;
- unsigned int backlog = cl->qdisc->qstats.backlog;
-
- qdisc_reset(cl->qdisc);
- qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
[TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
[TCA_QFQ_LMAX] = { .type = NLA_U32 },
@@ -551,7 +542,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
- qfq_purge_queue(cl);
+ qdisc_purge_queue(cl->qdisc);
qdisc_class_hash_remove(&q->clhash, &cl->common);
sch_tree_unlock(sch);
@@ -655,8 +646,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
d, NULL, &cl->bstats) < 0 ||
gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL,
- &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
+ qdisc_qstats_copy(d, cl->qdisc) < 0)
return -1;
return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9df9942340ea..4e8c0abf6194 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -233,8 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
q->flags = ctl->flags;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
- q->qdisc->qstats.backlog);
+ qdisc_tree_flush_backlog(q->qdisc);
old_child = q->qdisc;
q->qdisc = child;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index bab506b01a32..2419fdb75966 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -521,8 +521,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
qdisc_hash_add(child, true);
sch_tree_lock(sch);
- qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
- q->qdisc->qstats.backlog);
+ qdisc_tree_flush_backlog(q->qdisc);
qdisc_put(q->qdisc);
q->qdisc = child;
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 206e4dbed12f..c7041999eb5d 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -895,7 +895,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
sch = dev_queue->qdisc_sleeping;
if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
- gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+ qdisc_qstats_copy(d, sch) < 0)
return -1;
return 0;
}
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 7f272a9070c5..f71578dbb9e3 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -391,8 +391,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
sch_tree_lock(sch);
if (child) {
- qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
- q->qdisc->qstats.backlog);
+ qdisc_tree_flush_backlog(q->qdisc);
qdisc_put(q->qdisc);
q->qdisc = child;
}