Merge branch 'act_tc-offload-originating-device'

Paul Blakey says: ==================== net/sched: Pass originating device to drivers offloading ct connection Currently, drivers register to a ct zone that can be shared by multiple devices. This can be inefficient for the driver to offload, as it needs to handle all the cases where the tuple can come from, instead of where it's most likely will arive from. For example, consider the following tc rules: tc filter add dev dev1 ... flower action ct commit zone 5 \ action mirred egress redirect dev dev2 tc filter add dev dev2 ... flower action ct zone 5 \ action goto chain chain 2 tc filter add dev dev2 ... flower ct_state +trk+est ... \ action mirred egress redirect dev dev1 Both dev2 and dev1 register to the zone 5 flow table (created by act_ct). A tuple originating on dev1, going to dev2, will be offloaded to both devices, and both will need to offload both directions, resulting in 4 total rules. The traffic will only hit originiating tuple on dev1, and reply tuple on dev2. By passing the originating device that created the connection with the tuple, dev1 can choose to offload only the originating tuple, and dev2 only the reply tuple. Resulting in a more efficient offload. The first patch adds an act_ct nf conntrack extension, to temporarily store the originiating device from the skb before offloading the connection once the connection is established. Once sent to offload, it fills the tuple originating device. The second patch get this information from tuples which pass in openvswitch. The third patch is Mellanox driver ct offload implementation using this information to provide a hint to firmware of where this offloaded tuple packets will arrive from (LOCAL or UPLINK port), and thus increase insertion rate. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2022-01-04 12:12:56 +0000
committer: David S. Miller <davem@davemloft.net> 2022-01-04 12:12:56 +0000
commit: dfb55f9984f53eb2fd2b6df64196e6eb0edf0b65 (patch)
tree: 066fc0c341cbdaf052ac2ab06f5d56fd81829e2a /net
parent: Merge tag 'batadv-next-pullrequest-20220103' of git://git.open-mesh.org/linux-merge (diff)
parent: net/mlx5: CT: Set flow source hint from provided tuple device (diff)
download: wireguard-linux-dfb55f9984f53eb2fd2b6df64196e6eb0edf0b65.tar.xz
wireguard-linux-dfb55f9984f53eb2fd2b6df64196e6eb0edf0b65.zip
3 files changed, 38 insertions, 1 deletions
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d7e313548066..01d6589fba6e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -47,6 +47,7 @@
 #include <net/netfilter/nf_conntrack_timeout.h>
 #include <net/netfilter/nf_conntrack_labels.h>
 #include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_act_ct.h>
 #include <net/netfilter/nf_nat.h>
 #include <net/netfilter/nf_nat_helper.h>
 #include <net/netns/hash.h>
@@ -2626,7 +2627,7 @@ int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp)
 static __always_inline unsigned int total_extension_size(void)
 {
 	/* remember to add new extensions below */
-	BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
+	BUILD_BUG_ON(NF_CT_EXT_NUM > 10);
 
 	return sizeof(struct nf_ct_ext) +
 	       sizeof(struct nf_conn_help)
@@ -2650,6 +2651,9 @@ static __always_inline unsigned int total_extension_size(void)
 #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
 		+ sizeof(struct nf_conn_synproxy)
 #endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+		+ sizeof(struct nf_conn_act_ct_ext)
+#endif
 	;
 };
 
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 1b5eae57bc90..13294a55073a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -25,6 +25,8 @@
 #include <net/netfilter/nf_nat.h>
 #endif
 
+#include <net/netfilter/nf_conntrack_act_ct.h>
+
 #include "datapath.h"
 #include "conntrack.h"
 #include "flow.h"
@@ -1045,6 +1047,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
 			 */
 			nf_ct_set_tcp_be_liberal(ct);
 		}
+
+		nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
 	}
 
 	return 0;
@@ -1245,6 +1249,8 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
 					 &info->labels.mask);
 		if (err)
 			return err;
+
+		nf_conn_act_ct_ext_add(ct);
 	} else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
 		   labels_nonzero(&info->labels.mask)) {
 		err = ovs_ct_set_labels(ct, key, &info->labels.value,
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f9afb5abff21..ebdf7caf7084 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -32,6 +32,7 @@
 #include <net/netfilter/nf_conntrack_helper.h>
 #include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/netfilter/nf_conntrack_act_ct.h>
 #include <uapi/linux/netfilter/nf_nat.h>
 
 static struct workqueue_struct *act_ct_wq;
@@ -56,6 +57,12 @@ static const struct rhashtable_params zones_params = {
 	.automatic_shrinking = true,
 };
 
+static struct nf_ct_ext_type act_ct_extend __read_mostly = {
+	.len		= sizeof(struct nf_conn_act_ct_ext),
+	.align		= __alignof__(struct nf_conn_act_ct_ext),
+	.id		= NF_CT_EXT_ACT_CT,
+};
+
 static struct flow_action_entry *
 tcf_ct_flow_table_flow_action_get_next(struct flow_action *flow_action)
 {
@@ -358,6 +365,7 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
 				  struct nf_conn *ct,
 				  bool tcp)
 {
+	struct nf_conn_act_ct_ext *act_ct_ext;
 	struct flow_offload *entry;
 	int err;
 
@@ -375,6 +383,14 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
 		ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
 	}
 
+	act_ct_ext = nf_conn_act_ct_ext_find(ct);
+	if (act_ct_ext) {
+		entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
+			act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL];
+		entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
+			act_ct_ext->ifindex[IP_CT_DIR_REPLY];
+	}
+
 	err = flow_offload_add(&ct_ft->nf_ft, entry);
 	if (err)
 		goto err_add;
@@ -1027,6 +1043,7 @@ do_nat:
 	if (!ct)
 		goto out_push;
 	nf_ct_deliver_cached_events(ct);
+	nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
 
 	err = tcf_ct_act_nat(skb, ct, ctinfo, p->ct_action, &p->range, commit);
 	if (err != NF_ACCEPT)
@@ -1036,6 +1053,9 @@ do_nat:
 		tcf_ct_act_set_mark(ct, p->mark, p->mark_mask);
 		tcf_ct_act_set_labels(ct, p->labels, p->labels_mask);
 
+		if (!nf_ct_is_confirmed(ct))
+			nf_conn_act_ct_ext_add(ct);
+
 		/* This will take care of sending queued events
 		 * even if the connection is already confirmed.
 		 */
@@ -1583,10 +1603,16 @@ static int __init ct_init_module(void)
 	if (err)
 		goto err_register;
 
+	err = nf_ct_extend_register(&act_ct_extend);
+	if (err)
+		goto err_register_extend;
+
 	static_branch_inc(&tcf_frag_xmit_count);
 
 	return 0;
 
+err_register_extend:
+	tcf_unregister_action(&act_ct_ops, &ct_net_ops);
 err_register:
 	tcf_ct_flow_tables_uninit();
 err_tbl_init:
@@ -1597,6 +1623,7 @@ err_tbl_init:
 static void __exit ct_cleanup_module(void)
 {
 	static_branch_dec(&tcf_frag_xmit_count);
+	nf_ct_extend_unregister(&act_ct_extend);
 	tcf_unregister_action(&act_ct_ops, &ct_net_ops);
 	tcf_ct_flow_tables_uninit();
 	destroy_workqueue(act_ct_wq);
author	David S. Miller <davem@davemloft.net>	2022-01-04 12:12:56 +0000
committer	David S. Miller <davem@davemloft.net>	2022-01-04 12:12:56 +0000
commit	dfb55f9984f53eb2fd2b6df64196e6eb0edf0b65 (patch)
tree	066fc0c341cbdaf052ac2ab06f5d56fd81829e2a /net
parent	Merge tag 'batadv-next-pullrequest-20220103' of git://git.open-mesh.org/linux-merge (diff)
parent	net/mlx5: CT: Set flow source hint from provided tuple device (diff)
download	wireguard-linux-dfb55f9984f53eb2fd2b6df64196e6eb0edf0b65.tar.xz wireguard-linux-dfb55f9984f53eb2fd2b6df64196e6eb0edf0b65.zip