aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-05-15 10:23:11 -0700
committerDavid S. Miller <davem@davemloft.net>2020-05-15 10:42:51 -0700
commitcd2809cca24356c4d7c47030532d880b462fc8f9 (patch)
treebe0a3826d07f1b0fc73bed86c04541782ae88684
parentnet: core: recursively find netdev by device node (diff)
parentselftests: implement flower classifier terse dump tests (diff)
downloadwireguard-linux-cd2809cca24356c4d7c47030532d880b462fc8f9.tar.xz
wireguard-linux-cd2809cca24356c4d7c47030532d880b462fc8f9.zip
Merge branch 'Implement-classifier-action-terse-dump-mode'
Vlad Buslov says: ==================== Implement classifier-action terse dump mode Output rate of current upstream kernel TC filter dump implementation if relatively low (~100k rules/sec depending on configuration). This constraint impacts performance of software switch implementation that rely on TC for their datapath implementation and periodically call TC filter dump to update rules stats. Moreover, TC filter dump output a lot of static data that don't change during the filter lifecycle (filter key, specific action details, etc.) which constitutes significant portion of payload on resulting netlink packets and increases amount of syscalls necessary to dump all filters on particular Qdisc. In order to significantly improve filter dump rate this patch sets implement new mode of TC filter dump operation named "terse dump" mode. In this mode only parameters necessary to identify the filter (handle, action cookie, etc.) and data that can change during filter lifecycle (filter flags, action stats, etc.) are preserved in dump output while everything else is omitted. Userspace API is implemented using new TCA_DUMP_FLAGS tlv with only available flag value TCA_DUMP_FLAGS_TERSE. Internally, new API requires individual classifier support (new tcf_proto_ops->terse_dump() callback). Support for action terse dump is implemented in act API and don't require changing individual action implementations. The following table provides performance comparison between regular filter dump and new terse dump mode for two classifier-action profiles: one minimal config with L2 flower classifier and single gact action and another heavier config with L2+5tuple flower classifier with tunnel_key+mirred actions. Classifier-action type | dump | terse dump | X improvement | (rules/sec) | (rules/sec) | -----------------------------+-------------+-------------+--------------- L2 with gact | 141.8 | 293.2 | 2.07 L2+5tuple tunnel_key+mirred | 76.4 | 198.8 | 2.60 Benchmark details: to measure the rate tc filter dump and terse dump commands are invoked on ingress Qdisc that have one million filters configured using following commands. > time sudo tc -s filter show dev ens1f0 ingress >/dev/null > time sudo tc -s filter show terse dev ens1f0 ingress >/dev/null Value in results table is calculated by dividing 1000000 total rules by "real" time reported by time command. Setup details: 2x Intel(R) Xeon(R) CPU E5-2620 v3 @ 2.40GHz, 32GB memory ==================== Reviewed-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/act_api.h2
-rw-r--r--include/net/pkt_cls.h1
-rw-r--r--include/net/sch_generic.h4
-rw-r--r--include/uapi/linux/rtnetlink.h6
-rw-r--r--net/sched/act_api.c30
-rw-r--r--net/sched/cls_api.c67
-rw-r--r--net/sched/cls_flower.c43
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json38
8 files changed, 174 insertions, 17 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h
index c24d7643548e..1b4bfc4437be 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -193,7 +193,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
bool rtnl_held,
struct netlink_ext_ack *extack);
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
- int ref);
+ int ref, bool terse);
int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 04aa0649f3b0..ed65619cbc47 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -325,6 +325,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
void tcf_exts_destroy(struct tcf_exts *exts);
void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
+int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts);
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
/**
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ab87a8b86a32..c510b03b9751 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -330,6 +330,10 @@ struct tcf_proto_ops {
int (*dump)(struct net*, struct tcf_proto*, void *,
struct sk_buff *skb, struct tcmsg*,
bool);
+ int (*terse_dump)(struct net *net,
+ struct tcf_proto *tp, void *fh,
+ struct sk_buff *skb,
+ struct tcmsg *t, bool rtnl_held);
int (*tmplt_dump)(struct sk_buff *skb,
struct net *net,
void *tmplt_priv);
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 4a8c5b745157..073e71ef6bdd 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -609,11 +609,17 @@ enum {
TCA_HW_OFFLOAD,
TCA_INGRESS_BLOCK,
TCA_EGRESS_BLOCK,
+ TCA_DUMP_FLAGS,
__TCA_MAX
};
#define TCA_MAX (__TCA_MAX - 1)
+#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic
+ * data necessary to identify the objects
+ * (handle, cookie, etc.) and stats.
+ */
+
#define TCA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
#define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index fbbec2e562f5..8ac7eb0a8309 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -766,12 +766,10 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
return a->ops->dump(skb, a, bind, ref);
}
-int
-tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int
+tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a)
{
- int err = -EINVAL;
unsigned char *b = skb_tail_pointer(skb);
- struct nlattr *nest;
struct tc_cookie *cookie;
if (nla_put_string(skb, TCA_KIND, a->ops->kind))
@@ -789,6 +787,23 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
}
rcu_read_unlock();
+ return 0;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+int
+tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+ int err = -EINVAL;
+ unsigned char *b = skb_tail_pointer(skb);
+ struct nlattr *nest;
+
+ if (tcf_action_dump_terse(skb, a))
+ goto nla_put_failure;
+
if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
nla_put_bitfield32(skb, TCA_ACT_HW_STATS,
a->hw_stats, TCA_ACT_HW_STATS_ANY))
@@ -820,7 +835,7 @@ nla_put_failure:
EXPORT_SYMBOL(tcf_action_dump_1);
int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
- int bind, int ref)
+ int bind, int ref, bool terse)
{
struct tc_action *a;
int err = -EINVAL, i;
@@ -831,7 +846,8 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
nest = nla_nest_start_noflag(skb, i + 1);
if (nest == NULL)
goto nla_put_failure;
- err = tcf_action_dump_1(skb, a, bind, ref);
+ err = terse ? tcf_action_dump_terse(skb, a) :
+ tcf_action_dump_1(skb, a, bind, ref);
if (err < 0)
goto errout;
nla_nest_end(skb, nest);
@@ -1133,7 +1149,7 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
if (!nest)
goto out_nlmsg_trim;
- if (tcf_action_dump(skb, actions, bind, ref) < 0)
+ if (tcf_action_dump(skb, actions, bind, ref, false) < 0)
goto out_nlmsg_trim;
nla_nest_end(skb, nest);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 299b963c796e..752d608f4442 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1851,7 +1851,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
struct tcf_proto *tp, struct tcf_block *block,
struct Qdisc *q, u32 parent, void *fh,
u32 portid, u32 seq, u16 flags, int event,
- bool rtnl_held)
+ bool terse_dump, bool rtnl_held)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1878,6 +1878,14 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
goto nla_put_failure;
if (!fh) {
tcm->tcm_handle = 0;
+ } else if (terse_dump) {
+ if (tp->ops->terse_dump) {
+ if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
+ rtnl_held) < 0)
+ goto nla_put_failure;
+ } else {
+ goto cls_op_not_supp;
+ }
} else {
if (tp->ops->dump &&
tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
@@ -1888,6 +1896,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
out_nlmsg_trim:
nla_put_failure:
+cls_op_not_supp:
nlmsg_trim(skb, b);
return -1;
}
@@ -1908,7 +1917,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, event,
- rtnl_held) <= 0) {
+ false, rtnl_held) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1940,7 +1949,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
- rtnl_held) <= 0) {
+ false, rtnl_held) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to build del event notification");
kfree_skb(skb);
return -EINVAL;
@@ -2501,6 +2510,7 @@ struct tcf_dump_args {
struct tcf_block *block;
struct Qdisc *q;
u32 parent;
+ bool terse_dump;
};
static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -2511,12 +2521,12 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER, true);
+ RTM_NEWTFILTER, a->terse_dump, true);
}
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
struct sk_buff *skb, struct netlink_callback *cb,
- long index_start, long *p_index)
+ long index_start, long *p_index, bool terse)
{
struct net *net = sock_net(skb->sk);
struct tcf_block *block = chain->block;
@@ -2545,7 +2555,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER, true) <= 0)
+ RTM_NEWTFILTER, false, true) <= 0)
goto errout;
cb->args[1] = 1;
}
@@ -2561,6 +2571,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
arg.w.skip = cb->args[1] - 1;
arg.w.count = 0;
arg.w.cookie = cb->args[2];
+ arg.terse_dump = terse;
tp->ops->walk(tp, &arg.w, true);
cb->args[2] = arg.w.cookie;
cb->args[1] = arg.w.count + 1;
@@ -2574,6 +2585,10 @@ errout:
return false;
}
+static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
+ [TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
+};
+
/* called with RTNL */
static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
{
@@ -2583,6 +2598,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
struct Qdisc *q = NULL;
struct tcf_block *block;
struct tcmsg *tcm = nlmsg_data(cb->nlh);
+ bool terse_dump = false;
long index_start;
long index;
u32 parent;
@@ -2592,10 +2608,17 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
- NULL, cb->extack);
+ tcf_tfilter_dump_policy, cb->extack);
if (err)
return err;
+ if (tca[TCA_DUMP_FLAGS]) {
+ struct nla_bitfield32 flags =
+ nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);
+
+ terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
+ }
+
if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
if (!block)
@@ -2653,7 +2676,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
nla_get_u32(tca[TCA_CHAIN]) != chain->index)
continue;
if (!tcf_chain_dump(chain, q, parent, skb, cb,
- index_start, &index)) {
+ index_start, &index, terse_dump)) {
tcf_chain_put(chain);
err = -EMSGSIZE;
break;
@@ -3156,7 +3179,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
if (nest == NULL)
goto nla_put_failure;
- if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
+ if (tcf_action_dump(skb, exts->actions, 0, 0, false)
+ < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
} else if (exts->police) {
@@ -3180,6 +3204,31 @@ nla_put_failure:
}
EXPORT_SYMBOL(tcf_exts_dump);
+int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+ struct nlattr *nest;
+
+ if (!exts->action || !tcf_exts_has_actions(exts))
+ return 0;
+
+ nest = nla_nest_start_noflag(skb, exts->action);
+ if (!nest)
+ goto nla_put_failure;
+
+ if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
+ goto nla_put_failure;
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+#else
+ return 0;
+#endif
+}
+EXPORT_SYMBOL(tcf_exts_terse_dump);
int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
{
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 74a0febcafb8..0c574700da75 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2768,6 +2768,48 @@ nla_put_failure:
return -1;
}
+static int fl_terse_dump(struct net *net, struct tcf_proto *tp, void *fh,
+ struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
+{
+ struct cls_fl_filter *f = fh;
+ struct nlattr *nest;
+ bool skip_hw;
+
+ if (!f)
+ return skb->len;
+
+ t->tcm_handle = f->handle;
+
+ nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
+ if (!nest)
+ goto nla_put_failure;
+
+ spin_lock(&tp->lock);
+
+ skip_hw = tc_skip_hw(f->flags);
+
+ if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
+ goto nla_put_failure_locked;
+
+ spin_unlock(&tp->lock);
+
+ if (!skip_hw)
+ fl_hw_update_stats(tp, f, rtnl_held);
+
+ if (tcf_exts_terse_dump(skb, &f->exts))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+
+ return skb->len;
+
+nla_put_failure_locked:
+ spin_unlock(&tp->lock);
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -1;
+}
+
static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv)
{
struct fl_flow_tmplt *tmplt = tmplt_priv;
@@ -2832,6 +2874,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.hw_add = fl_hw_add,
.hw_del = fl_hw_del,
.dump = fl_dump,
+ .terse_dump = fl_terse_dump,
.bind_class = fl_bind_class,
.tmplt_create = fl_tmplt_create,
.tmplt_destroy = fl_tmplt_destroy,
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 12aa4bc1f6a0..bb543bf69d69 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -87,5 +87,43 @@
"teardown": [
"$TC qdisc del dev $DEV2 ingress"
]
+ },
+ {
+ "id": "7c65",
+ "name": "Add flower filter and then terse dump it",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+ "matchPattern": "filter protocol ip pref 1 flower.*handle",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
+ },
+ {
+ "id": "d45e",
+ "name": "Add flower filter and verify that terse dump doesn't output filter key",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+ "matchPattern": " dst_mac e4:11:22:11:4a:51",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
}
]