aboutsummaryrefslogtreecommitdiffstatshomepage
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2025-05-26 18:53:40 +0200
committerPaolo Abeni <pabeni@redhat.com>2025-05-26 18:53:41 +0200
commitf5b60d6a575a7573a15e08aad129382aa39c228c (patch)
treec1772462a9ed99597523e254c57a77ed27aa9c50
parentMerge tag 'ipsec-next-2025-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec-next (diff)
parentselftests: netfilter: Torture nftables netdev hooks (diff)
downloadwireguard-linux-f5b60d6a575a7573a15e08aad129382aa39c228c.tar.xz
wireguard-linux-f5b60d6a575a7573a15e08aad129382aa39c228c.zip
Merge tag 'nf-next-25-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following batch contains Netfilter updates for net-next, specifically 26 patches: 5 patches adding/updating selftests, 4 fixes, 3 PREEMPT_RT fixes, and 14 patches to enhance nf_tables): 1) Improve selftest coverage for pipapo 4 bit group format, from Florian Westphal. 2) Fix incorrect dependencies when compiling a kernel without legacy ip{6}tables support, also from Florian. 3) Two patches to fix nft_fib vrf issues, including selftest updates to improve coverage, also from Florian Westphal. 4) Fix incorrect nesting in nft_tunnel's GENEVE support, from Fernando F. Mancera. 5) Three patches to fix PREEMPT_RT issues with nf_dup infrastructure and nft_inner to match in inner headers, from Sebastian Andrzej Siewior. 6) Integrate conntrack information into nft trace infrastructure, from Florian Westphal. 7) A series of 13 patches to allow to specify wildcard netdevice in netdev basechain and flowtables, eg. table netdev filter { chain ingress { type filter hook ingress devices = { eth0, eth1, vlan* } priority 0; policy accept; } } This also allows for runtime hook registration on NETDEV_{UN}REGISTER event, from Phil Sutter. netfilter pull request 25-05-23 * tag 'nf-next-25-05-23' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf-next: (26 commits) selftests: netfilter: Torture nftables netdev hooks netfilter: nf_tables: Add notifications for hook changes netfilter: nf_tables: Support wildcard netdev hook specs netfilter: nf_tables: Sort labels in nft_netdev_hook_alloc() netfilter: nf_tables: Handle NETDEV_CHANGENAME events netfilter: nf_tables: Wrap netdev notifiers netfilter: nf_tables: Respect NETDEV_REGISTER events netfilter: nf_tables: Prepare for handling NETDEV_REGISTER events netfilter: nf_tables: Have a list of nf_hook_ops in nft_hook netfilter: nf_tables: Pass nf_hook_ops to nft_unregister_flowtable_hook() netfilter: nf_tables: Introduce nft_register_flowtable_ops() netfilter: nf_tables: Introduce nft_hook_find_ops{,_rcu}() netfilter: nf_tables: Introduce functions freeing nft_hook objects netfilter: nf_tables: add packets conntrack state to debug trace info netfilter: conntrack: make nf_conntrack_id callable without a module dependency netfilter: nf_dup_netdev: Move the recursion counter struct netdev_xmit netfilter: nft_inner: Use nested-BH locking for nft_pcpu_tun_ctx netfilter: nf_dup{4, 6}: Move duplication check to task_struct netfilter: nft_tunnel: fix geneve_opt dump selftests: netfilter: nft_fib.sh: add type and oif tests with and without VRFs ... ==================== Link: https://patch.msgid.link/20250523132712.458507-1-pablo@netfilter.org Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--include/linux/netdevice_xmit.h3
-rw-r--r--include/linux/netfilter.h15
-rw-r--r--include/linux/sched.h1
-rw-r--r--include/net/netfilter/nf_tables.h12
-rw-r--r--include/net/netfilter/nft_fib.h9
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h18
-rw-r--r--include/uapi/linux/netfilter/nfnetlink.h2
-rw-r--r--net/ipv4/netfilter/ip_tables.c2
-rw-r--r--net/ipv4/netfilter/nf_dup_ipv4.c6
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c11
-rw-r--r--net/ipv6/netfilter/ip6_tables.c2
-rw-r--r--net/ipv6/netfilter/nf_dup_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c17
-rw-r--r--net/netfilter/core.c3
-rw-r--r--net/netfilter/nf_conntrack_core.c6
-rw-r--r--net/netfilter/nf_dup_netdev.c22
-rw-r--r--net/netfilter/nf_tables_api.c402
-rw-r--r--net/netfilter/nf_tables_offload.c51
-rw-r--r--net/netfilter/nf_tables_trace.c54
-rw-r--r--net/netfilter/nfnetlink.c1
-rw-r--r--net/netfilter/nft_chain_filter.c94
-rw-r--r--net/netfilter/nft_flow_offload.c2
-rw-r--r--net/netfilter/nft_inner.c18
-rw-r--r--net/netfilter/nft_tunnel.c8
-rw-r--r--net/netfilter/xt_TCPOPTSTRIP.c4
-rw-r--r--net/netfilter/xt_mark.c2
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile1
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_vrf.sh34
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh165
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh612
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh151
31 files changed, 1504 insertions, 230 deletions
diff --git a/include/linux/netdevice_xmit.h b/include/linux/netdevice_xmit.h
index 848735b3a7c0..813a19122ebb 100644
--- a/include/linux/netdevice_xmit.h
+++ b/include/linux/netdevice_xmit.h
@@ -11,6 +11,9 @@ struct netdev_xmit {
#if IS_ENABLED(CONFIG_NET_ACT_MIRRED)
u8 sched_mirred_nest;
#endif
+#if IS_ENABLED(CONFIG_NF_DUP_NETDEV)
+ u8 nf_dup_skb_recursion;
+#endif
};
#endif
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 2b8aac2c70ad..5f896fcc074d 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -95,6 +95,9 @@ enum nf_hook_ops_type {
};
struct nf_hook_ops {
+ struct list_head list;
+ struct rcu_head rcu;
+
/* User fills in from here down. */
nf_hookfn *hook;
struct net_device *dev;
@@ -470,6 +473,7 @@ struct nf_ct_hook {
void (*attach)(struct sk_buff *nskb, const struct sk_buff *skb);
void (*set_closing)(struct nf_conntrack *nfct);
int (*confirm)(struct sk_buff *skb);
+ u32 (*get_id)(const struct nf_conntrack *nfct);
};
extern const struct nf_ct_hook __rcu *nf_ct_hook;
@@ -498,17 +502,6 @@ extern const struct nf_defrag_hook __rcu *nf_defrag_v4_hook;
extern const struct nf_defrag_hook __rcu *nf_defrag_v6_hook;
/*
- * nf_skb_duplicated - TEE target has sent a packet
- *
- * When a xtables target sends a packet, the OUTPUT and POSTROUTING
- * hooks are traversed again, i.e. nft and xtables are invoked recursively.
- *
- * This is used by xtables TEE target to prevent the duplicated skb from
- * being duplicated again.
- */
-DECLARE_PER_CPU(bool, nf_skb_duplicated);
-
-/*
* Contains bitmask of ctnetlink event subscribers, if any.
* Can't be pernet due to NETLINK_LISTEN_ALL_NSID setsockopt flag.
*/
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f96ac1982893..52d9c52dc8f2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1044,6 +1044,7 @@ struct task_struct {
/* delay due to memory thrashing */
unsigned in_thrashing:1;
#endif
+ unsigned in_nf_duplicate:1;
#ifdef CONFIG_PREEMPT_RT
struct netdev_xmit net_xmit;
#endif
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 803d5f1601f9..e4d8e451e935 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1142,6 +1142,11 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set);
int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain);
+struct nft_hook;
+void nf_tables_chain_device_notify(const struct nft_chain *chain,
+ const struct nft_hook *hook,
+ const struct net_device *dev, int event);
+
enum nft_chain_types {
NFT_CHAIN_T_DEFAULT = 0,
NFT_CHAIN_T_ROUTE,
@@ -1199,12 +1204,17 @@ struct nft_stats {
struct nft_hook {
struct list_head list;
- struct nf_hook_ops ops;
+ struct list_head ops_list;
struct rcu_head rcu;
char ifname[IFNAMSIZ];
u8 ifnamelen;
};
+struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook,
+ const struct net_device *dev);
+struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook,
+ const struct net_device *dev);
+
/**
* struct nft_base_chain - nf_tables base chain
*
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index 6e202ed5e63f..7370fba844ef 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -2,6 +2,7 @@
#ifndef _NFT_FIB_H_
#define _NFT_FIB_H_
+#include <net/l3mdev.h>
#include <net/netfilter/nf_tables.h>
struct nft_fib {
@@ -39,6 +40,14 @@ static inline bool nft_fib_can_skip(const struct nft_pktinfo *pkt)
return nft_fib_is_loopback(pkt->skb, indev);
}
+static inline int nft_fib_l3mdev_master_ifindex_rcu(const struct nft_pktinfo *pkt,
+ const struct net_device *iif)
+{
+ const struct net_device *dev = iif ? iif : pkt->skb->dev;
+
+ return l3mdev_master_ifindex_rcu(dev);
+}
+
int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset);
int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[]);
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 7d6bc19a0153..518ba144544c 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -142,6 +142,8 @@ enum nf_tables_msg_types {
NFT_MSG_DESTROYOBJ,
NFT_MSG_DESTROYFLOWTABLE,
NFT_MSG_GETSETELEM_RESET,
+ NFT_MSG_NEWDEV,
+ NFT_MSG_DELDEV,
NFT_MSG_MAX,
};
@@ -1784,10 +1786,18 @@ enum nft_synproxy_attributes {
* enum nft_device_attributes - nf_tables device netlink attributes
*
* @NFTA_DEVICE_NAME: name of this device (NLA_STRING)
+ * @NFTA_DEVICE_TABLE: table containing the flowtable or chain hooking into the device (NLA_STRING)
+ * @NFTA_DEVICE_FLOWTABLE: flowtable hooking into the device (NLA_STRING)
+ * @NFTA_DEVICE_CHAIN: chain hooking into the device (NLA_STRING)
+ * @NFTA_DEVICE_SPEC: hook spec matching the device (NLA_STRING)
*/
enum nft_devices_attributes {
NFTA_DEVICE_UNSPEC,
NFTA_DEVICE_NAME,
+ NFTA_DEVICE_TABLE,
+ NFTA_DEVICE_FLOWTABLE,
+ NFTA_DEVICE_CHAIN,
+ NFTA_DEVICE_SPEC,
__NFTA_DEVICE_MAX
};
#define NFTA_DEVICE_MAX (__NFTA_DEVICE_MAX - 1)
@@ -1841,6 +1851,10 @@ enum nft_xfrm_keys {
* @NFTA_TRACE_MARK: nfmark (NLA_U32)
* @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32)
* @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32)
+ * @NFTA_TRACE_CT_ID: conntrack id (NLA_U32)
+ * @NFTA_TRACE_CT_DIRECTION: packets direction (NLA_U8)
+ * @NFTA_TRACE_CT_STATUS: conntrack status (NLA_U32)
+ * @NFTA_TRACE_CT_STATE: packet state (new, established, ...) (NLA_U32)
*/
enum nft_trace_attributes {
NFTA_TRACE_UNSPEC,
@@ -1861,6 +1875,10 @@ enum nft_trace_attributes {
NFTA_TRACE_NFPROTO,
NFTA_TRACE_POLICY,
NFTA_TRACE_PAD,
+ NFTA_TRACE_CT_ID,
+ NFTA_TRACE_CT_DIRECTION,
+ NFTA_TRACE_CT_STATUS,
+ NFTA_TRACE_CT_STATE,
__NFTA_TRACE_MAX
};
#define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 6cd58cd2a6f0..50d807af2649 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -25,6 +25,8 @@ enum nfnetlink_groups {
#define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA
NFNLGRP_NFTRACE,
#define NFNLGRP_NFTRACE NFNLGRP_NFTRACE
+ NFNLGRP_NFT_DEV,
+#define NFNLGRP_NFT_DEV NFNLGRP_NFT_DEV
__NFNLGRP_MAX,
};
#define NFNLGRP_MAX (__NFNLGRP_MAX - 1)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3d101613f27f..23c8deff8095 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -270,7 +270,7 @@ ipt_do_table(void *priv,
* but it is no problem since absolute verdict is issued by these.
*/
if (static_key_false(&xt_tee_enabled))
- jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
+ jumpstack += private->stacksize * current->in_nf_duplicate;
e = get_entry(table_base, private->hook_entry[hook]);
diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c
index 25e1e8eb18dd..ed08fb78cfa8 100644
--- a/net/ipv4/netfilter/nf_dup_ipv4.c
+++ b/net/ipv4/netfilter/nf_dup_ipv4.c
@@ -54,7 +54,7 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
struct iphdr *iph;
local_bh_disable();
- if (this_cpu_read(nf_skb_duplicated))
+ if (current->in_nf_duplicate)
goto out;
/*
* Copy the skb, and route the copy. Will later return %XT_CONTINUE for
@@ -86,9 +86,9 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum,
--iph->ttl;
if (nf_dup_ipv4_route(net, skb, gw, oif)) {
- __this_cpu_write(nf_skb_duplicated, true);
+ current->in_nf_duplicate = true;
ip_local_out(net, skb->sk, skb);
- __this_cpu_write(nf_skb_duplicated, false);
+ current->in_nf_duplicate = false;
} else {
kfree_skb(skb);
}
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 9082ca17e845..7e7c49535e3f 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -50,7 +50,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
else
addr = iph->saddr;
- *dst = inet_dev_addr_type(nft_net(pkt), dev, addr);
+ if (priv->flags & (NFTA_FIB_F_IIF | NFTA_FIB_F_OIF)) {
+ *dst = inet_dev_addr_type(nft_net(pkt), dev, addr);
+ return;
+ }
+
+ *dst = inet_addr_type_dev_table(nft_net(pkt), pkt->skb->dev, addr);
}
EXPORT_SYMBOL_GPL(nft_fib4_eval_type);
@@ -65,8 +70,8 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
struct flowi4 fl4 = {
.flowi4_scope = RT_SCOPE_UNIVERSE,
.flowi4_iif = LOOPBACK_IFINDEX,
+ .flowi4_proto = pkt->tprot,
.flowi4_uid = sock_net_uid(nft_net(pkt), NULL),
- .flowi4_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
};
const struct net_device *oif;
const struct net_device *found;
@@ -90,6 +95,8 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
else
oif = NULL;
+ fl4.flowi4_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, oif);
+
iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph);
if (!iph) {
regs->verdict.code = NFT_BREAK;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 7d5602950ae7..d585ac3c1113 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -292,7 +292,7 @@ ip6t_do_table(void *priv, struct sk_buff *skb,
* but it is no problem since absolute verdict is issued by these.
*/
if (static_key_false(&xt_tee_enabled))
- jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
+ jumpstack += private->stacksize * current->in_nf_duplicate;
e = get_entry(table_base, private->hook_entry[hook]);
diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c
index 0c39c77fe8a8..b903c62c00c9 100644
--- a/net/ipv6/netfilter/nf_dup_ipv6.c
+++ b/net/ipv6/netfilter/nf_dup_ipv6.c
@@ -48,7 +48,7 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
const struct in6_addr *gw, int oif)
{
local_bh_disable();
- if (this_cpu_read(nf_skb_duplicated))
+ if (current->in_nf_duplicate)
goto out;
skb = pskb_copy(skb, GFP_ATOMIC);
if (skb == NULL)
@@ -64,9 +64,9 @@ void nf_dup_ipv6(struct net *net, struct sk_buff *skb, unsigned int hooknum,
--iph->hop_limit;
}
if (nf_dup_ipv6_route(net, skb, gw, oif)) {
- __this_cpu_write(nf_skb_duplicated, true);
+ current->in_nf_duplicate = true;
ip6_local_out(net, skb->sk, skb);
- __this_cpu_write(nf_skb_duplicated, false);
+ current->in_nf_duplicate = false;
} else {
kfree_skb(skb);
}
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 7fd9d7b21cd4..421036a3605b 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -50,6 +50,7 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv,
fl6->flowi6_mark = pkt->skb->mark;
fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK;
+ fl6->flowi6_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, dev);
return lookup_flags;
}
@@ -73,8 +74,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv,
else if (priv->flags & NFTA_FIB_F_OIF)
dev = nft_out(pkt);
- fl6.flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev);
-
nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph);
if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true))
@@ -158,6 +157,7 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
{
const struct nft_fib *priv = nft_expr_priv(expr);
int noff = skb_network_offset(pkt->skb);
+ const struct net_device *found = NULL;
const struct net_device *oif = NULL;
u32 *dest = &regs->data[priv->dreg];
struct ipv6hdr *iph, _iph;
@@ -165,7 +165,6 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
.flowi6_iif = LOOPBACK_IFINDEX,
.flowi6_proto = pkt->tprot,
.flowi6_uid = sock_net_uid(nft_net(pkt), NULL),
- .flowi6_l3mdev = l3mdev_master_ifindex_rcu(nft_in(pkt)),
};
struct rt6_info *rt;
int lookup_flags;
@@ -203,11 +202,15 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
goto put_rt_err;
- if (oif && oif != rt->rt6i_idev->dev &&
- l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex)
- goto put_rt_err;
+ if (!oif) {
+ found = rt->rt6i_idev->dev;
+ } else {
+ if (oif == rt->rt6i_idev->dev ||
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == oif->ifindex)
+ found = oif;
+ }
- nft_fib_store_result(dest, priv, rt->rt6i_idev->dev);
+ nft_fib_store_result(dest, priv, found);
put_rt_err:
ip6_rt_put(rt);
}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index b9f551f02c81..11a702065bab 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -31,9 +31,6 @@
const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
EXPORT_SYMBOL_GPL(nf_ipv6_ops);
-DEFINE_PER_CPU(bool, nf_skb_duplicated);
-EXPORT_SYMBOL_GPL(nf_skb_duplicated);
-
#ifdef CONFIG_JUMP_LABEL
struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
EXPORT_SYMBOL(nf_hooks_needed);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index de8d50af9b5b..201d3c4ec623 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -505,6 +505,11 @@ u32 nf_ct_get_id(const struct nf_conn *ct)
}
EXPORT_SYMBOL_GPL(nf_ct_get_id);
+static u32 nf_conntrack_get_id(const struct nf_conntrack *nfct)
+{
+ return nf_ct_get_id(nf_ct_to_nf_conn(nfct));
+}
+
static void
clean_from_lists(struct nf_conn *ct)
{
@@ -2710,6 +2715,7 @@ static const struct nf_ct_hook nf_conntrack_hook = {
.attach = nf_conntrack_attach,
.set_closing = nf_conntrack_set_closing,
.confirm = __nf_conntrack_confirm,
+ .get_id = nf_conntrack_get_id,
};
void nf_conntrack_init_end(void)
diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c
index a8e2425e43b0..fab8b9011098 100644
--- a/net/netfilter/nf_dup_netdev.c
+++ b/net/netfilter/nf_dup_netdev.c
@@ -15,12 +15,26 @@
#define NF_RECURSION_LIMIT 2
-static DEFINE_PER_CPU(u8, nf_dup_skb_recursion);
+#ifndef CONFIG_PREEMPT_RT
+static u8 *nf_get_nf_dup_skb_recursion(void)
+{
+ return this_cpu_ptr(&softnet_data.xmit.nf_dup_skb_recursion);
+}
+#else
+
+static u8 *nf_get_nf_dup_skb_recursion(void)
+{
+ return &current->net_xmit.nf_dup_skb_recursion;
+}
+
+#endif
static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev,
enum nf_dev_hooks hook)
{
- if (__this_cpu_read(nf_dup_skb_recursion) > NF_RECURSION_LIMIT)
+ u8 *nf_dup_skb_recursion = nf_get_nf_dup_skb_recursion();
+
+ if (*nf_dup_skb_recursion > NF_RECURSION_LIMIT)
goto err;
if (hook == NF_NETDEV_INGRESS && skb_mac_header_was_set(skb)) {
@@ -32,9 +46,9 @@ static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev,
skb->dev = dev;
skb_clear_tstamp(skb);
- __this_cpu_inc(nf_dup_skb_recursion);
+ (*nf_dup_skb_recursion)++;
dev_queue_xmit(skb);
- __this_cpu_dec(nf_dup_skb_recursion);
+ (*nf_dup_skb_recursion)--;
return;
err:
kfree_skb(skb);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index b28f6730e26d..24c71ecb2179 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -300,40 +300,75 @@ void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
static int nft_netdev_register_hooks(struct net *net,
struct list_head *hook_list)
{
+ struct nf_hook_ops *ops;
struct nft_hook *hook;
int err, j;
j = 0;
list_for_each_entry(hook, hook_list, list) {
- err = nf_register_net_hook(net, &hook->ops);
- if (err < 0)
- goto err_register;
+ list_for_each_entry(ops, &hook->ops_list, list) {
+ err = nf_register_net_hook(net, ops);
+ if (err < 0)
+ goto err_register;
- j++;
+ j++;
+ }
}
return 0;
err_register:
list_for_each_entry(hook, hook_list, list) {
- if (j-- <= 0)
- break;
+ list_for_each_entry(ops, &hook->ops_list, list) {
+ if (j-- <= 0)
+ break;
- nf_unregister_net_hook(net, &hook->ops);
+ nf_unregister_net_hook(net, ops);
+ }
}
return err;
}
+static void nft_netdev_hook_free_ops(struct nft_hook *hook)
+{
+ struct nf_hook_ops *ops, *next;
+
+ list_for_each_entry_safe(ops, next, &hook->ops_list, list) {
+ list_del(&ops->list);
+ kfree(ops);
+ }
+}
+
+static void nft_netdev_hook_free(struct nft_hook *hook)
+{
+ nft_netdev_hook_free_ops(hook);
+ kfree(hook);
+}
+
+static void __nft_netdev_hook_free_rcu(struct rcu_head *rcu)
+{
+ struct nft_hook *hook = container_of(rcu, struct nft_hook, rcu);
+
+ nft_netdev_hook_free(hook);
+}
+
+static void nft_netdev_hook_free_rcu(struct nft_hook *hook)
+{
+ call_rcu(&hook->rcu, __nft_netdev_hook_free_rcu);
+}
+
static void nft_netdev_unregister_hooks(struct net *net,
struct list_head *hook_list,
bool release_netdev)
{
struct nft_hook *hook, *next;
+ struct nf_hook_ops *ops;
list_for_each_entry_safe(hook, next, hook_list, list) {
- nf_unregister_net_hook(net, &hook->ops);
+ list_for_each_entry(ops, &hook->ops_list, list)
+ nf_unregister_net_hook(net, ops);
if (release_netdev) {
list_del(&hook->list);
- kfree_rcu(hook, rcu);
+ nft_netdev_hook_free_rcu(hook);
}
}
}
@@ -2253,7 +2288,7 @@ void nf_tables_chain_destroy(struct nft_chain *chain)
list_for_each_entry_safe(hook, next,
&basechain->hook_list, list) {
list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
+ nft_netdev_hook_free_rcu(hook);
}
}
module_put(basechain->type->owner);
@@ -2274,19 +2309,20 @@ void nf_tables_chain_destroy(struct nft_chain *chain)
static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
const struct nlattr *attr)
{
+ struct nf_hook_ops *ops;
struct net_device *dev;
struct nft_hook *hook;
int err;
hook = kzalloc(sizeof(struct nft_hook), GFP_KERNEL_ACCOUNT);
- if (!hook) {
- err = -ENOMEM;
- goto err_hook_alloc;
- }
+ if (!hook)
+ return ERR_PTR(-ENOMEM);
+
+ INIT_LIST_HEAD(&hook->ops_list);
err = nla_strscpy(hook->ifname, attr, IFNAMSIZ);
if (err < 0)
- goto err_hook_dev;
+ goto err_hook_free;
hook->ifnamelen = nla_len(attr);
@@ -2294,18 +2330,22 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
* indirectly serializing all the other holders of the commit_mutex with
* the rtnl_mutex.
*/
- dev = __dev_get_by_name(net, hook->ifname);
- if (!dev) {
- err = -ENOENT;
- goto err_hook_dev;
- }
- hook->ops.dev = dev;
+ for_each_netdev(net, dev) {
+ if (strncmp(dev->name, hook->ifname, hook->ifnamelen))
+ continue;
+ ops = kzalloc(sizeof(struct nf_hook_ops), GFP_KERNEL_ACCOUNT);
+ if (!ops) {
+ err = -ENOMEM;
+ goto err_hook_free;
+ }
+ ops->dev = dev;
+ list_add_tail(&ops->list, &hook->ops_list);
+ }
return hook;
-err_hook_dev:
- kfree(hook);
-err_hook_alloc:
+err_hook_free:
+ nft_netdev_hook_free(hook);
return ERR_PTR(err);
}
@@ -2315,7 +2355,8 @@ static struct nft_hook *nft_hook_list_find(struct list_head *hook_list,
struct nft_hook *hook;
list_for_each_entry(hook, hook_list, list) {
- if (!strcmp(hook->ifname, this->ifname))
+ if (!strncmp(hook->ifname, this->ifname,
+ min(hook->ifnamelen, this->ifnamelen)))
return hook;
}
@@ -2345,7 +2386,7 @@ static int nf_tables_parse_netdev_hooks(struct net *net,
}
if (nft_hook_list_find(hook_list, hook)) {
NL_SET_BAD_ATTR(extack, tmp);
- kfree(hook);
+ nft_netdev_hook_free(hook);
err = -EEXIST;
goto err_hook;
}
@@ -2363,7 +2404,7 @@ static int nf_tables_parse_netdev_hooks(struct net *net,
err_hook:
list_for_each_entry_safe(hook, next, hook_list, list) {
list_del(&hook->list);
- kfree(hook);
+ nft_netdev_hook_free(hook);
}
return err;
}
@@ -2506,7 +2547,7 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
list_for_each_entry_safe(h, next, &hook->list, list) {
list_del(&h->list);
- kfree(h);
+ nft_netdev_hook_free(h);
}
module_put(hook->type->owner);
}
@@ -2559,6 +2600,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
struct nft_chain_hook *hook, u32 flags)
{
struct nft_chain *chain;
+ struct nf_hook_ops *ops;
struct nft_hook *h;
basechain->type = hook->type;
@@ -2567,8 +2609,10 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
if (nft_base_chain_netdev(family, hook->num)) {
list_splice_init(&hook->list, &basechain->hook_list);
- list_for_each_entry(h, &basechain->hook_list, list)
- nft_basechain_hook_init(&h->ops, family, hook, chain);
+ list_for_each_entry(h, &basechain->hook_list, list) {
+ list_for_each_entry(ops, &h->ops_list, list)
+ nft_basechain_hook_init(ops, family, hook, chain);
+ }
}
nft_basechain_hook_init(&basechain->ops, family, hook, chain);
@@ -2787,15 +2831,17 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) {
list_for_each_entry_safe(h, next, &hook.list, list) {
- h->ops.pf = basechain->ops.pf;
- h->ops.hooknum = basechain->ops.hooknum;
- h->ops.priority = basechain->ops.priority;
- h->ops.priv = basechain->ops.priv;
- h->ops.hook = basechain->ops.hook;
+ list_for_each_entry(ops, &h->ops_list, list) {
+ ops->pf = basechain->ops.pf;
+ ops->hooknum = basechain->ops.hooknum;
+ ops->priority = basechain->ops.priority;
+ ops->priv = basechain->ops.priv;
+ ops->hook = basechain->ops.hook;
+ }
if (nft_hook_list_find(&basechain->hook_list, h)) {
list_del(&h->list);
- kfree(h);
+ nft_netdev_hook_free(h);
}
}
} else {
@@ -2913,10 +2959,12 @@ err_trans:
err_hooks:
if (nla[NFTA_CHAIN_HOOK]) {
list_for_each_entry_safe(h, next, &hook.list, list) {
- if (unregister)
- nf_unregister_net_hook(ctx->net, &h->ops);
+ if (unregister) {
+ list_for_each_entry(ops, &h->ops_list, list)
+ nf_unregister_net_hook(ctx->net, ops);
+ }
list_del(&h->list);
- kfree_rcu(h, rcu);
+ nft_netdev_hook_free_rcu(h);
}
module_put(hook.type->owner);
}
@@ -8785,6 +8833,7 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
struct netlink_ext_ack *extack, bool add)
{
struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
+ struct nf_hook_ops *ops;
struct nft_hook *hook;
int hooknum, priority;
int err;
@@ -8839,11 +8888,13 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
}
list_for_each_entry(hook, &flowtable_hook->list, list) {
- hook->ops.pf = NFPROTO_NETDEV;
- hook->ops.hooknum = flowtable_hook->num;
- hook->ops.priority = flowtable_hook->priority;
- hook->ops.priv = &flowtable->data;
- hook->ops.hook = flowtable->data.type->hook;
+ list_for_each_entry(ops, &hook->ops_list, list) {
+ ops->pf = NFPROTO_NETDEV;
+ ops->hooknum = flowtable_hook->num;
+ ops->priority = flowtable_hook->priority;
+ ops->priv = &flowtable->data;
+ ops->hook = flowtable->data.type->hook;
+ }
}
return err;
@@ -8885,12 +8936,12 @@ nft_flowtable_type_get(struct net *net, u8 family)
}
/* Only called from error and netdev event paths. */
-static void nft_unregister_flowtable_hook(struct net *net,
- struct nft_flowtable *flowtable,
- struct nft_hook *hook)
+static void nft_unregister_flowtable_ops(struct net *net,
+ struct nft_flowtable *flowtable,
+ struct nf_hook_ops *ops)
{
- nf_unregister_net_hook(net, &hook->ops);
- flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+ nf_unregister_net_hook(net, ops);
+ flowtable->data.type->setup(&flowtable->data, ops->dev,
FLOW_BLOCK_UNBIND);
}
@@ -8900,14 +8951,14 @@ static void __nft_unregister_flowtable_net_hooks(struct net *net,
bool release_netdev)
{
struct nft_hook *hook, *next;
+ struct nf_hook_ops *ops;
list_for_each_entry_safe(hook, next, hook_list, list) {
- nf_unregister_net_hook(net, &hook->ops);
- flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
- FLOW_BLOCK_UNBIND);
+ list_for_each_entry(ops, &hook->ops_list, list)
+ nft_unregister_flowtable_ops(net, flowtable, ops);
if (release_netdev) {
list_del(&hook->list);
- kfree_rcu(hook, rcu);
+ nft_netdev_hook_free_rcu(hook);
}
}
}
@@ -8919,6 +8970,26 @@ static void nft_unregister_flowtable_net_hooks(struct net *net,
__nft_unregister_flowtable_net_hooks(net, flowtable, hook_list, false);
}
+static int nft_register_flowtable_ops(struct net *net,
+ struct nft_flowtable *flowtable,
+ struct nf_hook_ops *ops)
+{
+ int err;
+
+ err = flowtable->data.type->setup(&flowtable->data,
+ ops->dev, FLOW_BLOCK_BIND);
+ if (err < 0)
+ return err;
+
+ err = nf_register_net_hook(net, ops);
+ if (!err)
+ return 0;
+
+ flowtable->data.type->setup(&flowtable->data,
+ ops->dev, FLOW_BLOCK_UNBIND);
+ return err;
+}
+
static int nft_register_flowtable_net_hooks(struct net *net,
struct nft_table *table,
struct list_head *hook_list,
@@ -8926,6 +8997,7 @@ static int nft_register_flowtable_net_hooks(struct net *net,
{
struct nft_hook *hook, *next;
struct nft_flowtable *ft;
+ struct nf_hook_ops *ops;
int err, i = 0;
list_for_each_entry(hook, hook_list, list) {
@@ -8939,33 +9011,27 @@ static int nft_register_flowtable_net_hooks(struct net *net,
}
}
- err = flowtable->data.type->setup(&flowtable->data,
- hook->ops.dev,
- FLOW_BLOCK_BIND);
- if (err < 0)
- goto err_unregister_net_hooks;
+ list_for_each_entry(ops, &hook->ops_list, list) {
+ err = nft_register_flowtable_ops(net, flowtable, ops);
+ if (err < 0)
+ goto err_unregister_net_hooks;
- err = nf_register_net_hook(net, &hook->ops);
- if (err < 0) {
- flowtable->data.type->setup(&flowtable->data,
- hook->ops.dev,
- FLOW_BLOCK_UNBIND);
- goto err_unregister_net_hooks;
+ i++;
}
-
- i++;
}
return 0;
err_unregister_net_hooks:
list_for_each_entry_safe(hook, next, hook_list, list) {
- if (i-- <= 0)
- break;
+ list_for_each_entry(ops, &hook->ops_list, list) {
+ if (i-- <= 0)
+ break;
- nft_unregister_flowtable_hook(net, flowtable, hook);
+ nft_unregister_flowtable_ops(net, flowtable, ops);
+ }
list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
+ nft_netdev_hook_free_rcu(hook);
}
return err;
@@ -8977,7 +9043,7 @@ static void nft_hooks_destroy(struct list_head *hook_list)
list_for_each_entry_safe(hook, next, hook_list, list) {
list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
+ nft_netdev_hook_free_rcu(hook);
}
}
@@ -8988,6 +9054,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
const struct nlattr * const *nla = ctx->nla;
struct nft_flowtable_hook flowtable_hook;
struct nft_hook *hook, *next;
+ struct nf_hook_ops *ops;
struct nft_trans *trans;
bool unregister = false;
u32 flags;
@@ -9001,7 +9068,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
list_for_each_entry_safe(hook, next, &flowtable_hook.list, list) {
if (nft_hook_list_find(&flowtable->hook_list, hook)) {
list_del(&hook->list);
- kfree(hook);
+ nft_netdev_hook_free(hook);
}
}
@@ -9045,10 +9112,13 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
err_flowtable_update_hook:
list_for_each_entry_safe(hook, next, &flowtable_hook.list, list) {
- if (unregister)
- nft_unregister_flowtable_hook(ctx->net, flowtable, hook);
+ if (unregister) {
+ list_for_each_entry(ops, &hook->ops_list, list)
+ nft_unregister_flowtable_ops(ctx->net,
+ flowtable, ops);
+ }
list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
+ nft_netdev_hook_free_rcu(hook);
}
return err;
@@ -9194,7 +9264,7 @@ static void nft_flowtable_hook_release(struct nft_flowtable_hook *flowtable_hook
list_for_each_entry_safe(this, next, &flowtable_hook->list, list) {
list_del(&this->list);
- kfree(this);
+ nft_netdev_hook_free(this);
}
}
@@ -9557,7 +9627,7 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
flowtable->data.type->free(&flowtable->data);
list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
+ nft_netdev_hook_free_rcu(hook);
}
kfree(flowtable->name);
module_put(flowtable->data.type->owner);
@@ -9590,46 +9660,190 @@ nla_put_failure:
return -EMSGSIZE;
}
-static void nft_flowtable_event(unsigned long event, struct net_device *dev,
- struct nft_flowtable *flowtable)
+struct nf_hook_ops *nft_hook_find_ops(const struct nft_hook *hook,
+ const struct net_device *dev)
+{
+ struct nf_hook_ops *ops;
+
+ list_for_each_entry(ops, &hook->ops_list, list) {
+ if (ops->dev == dev)
+ return ops;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(nft_hook_find_ops);
+
+struct nf_hook_ops *nft_hook_find_ops_rcu(const struct nft_hook *hook,
+ const struct net_device *dev)
{
+ struct nf_hook_ops *ops;
+
+ list_for_each_entry_rcu(ops, &hook->ops_list, list) {
+ if (ops->dev == dev)
+ return ops;
+ }
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(nft_hook_find_ops_rcu);
+
+static void
+nf_tables_device_notify(const struct nft_table *table, int attr,
+ const char *name, const struct nft_hook *hook,
+ const struct net_device *dev, int event)
+{
+ struct net *net = dev_net(dev);
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ u16 flags = 0;
+
+ if (!nfnetlink_has_listeners(net, NFNLGRP_NFT_DEV))
+ return;
+
+ skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ goto err;
+
+ event = event == NETDEV_REGISTER ? NFT_MSG_NEWDEV : NFT_MSG_DELDEV;
+ event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
+ nlh = nfnl_msg_put(skb, 0, 0, event, flags, table->family,
+ NFNETLINK_V0, nft_base_seq(net));
+ if (!nlh)
+ goto err;
+
+ if (nla_put_string(skb, NFTA_DEVICE_TABLE, table->name) ||
+ nla_put_string(skb, attr, name) ||
+ nla_put(skb, NFTA_DEVICE_SPEC, hook->ifnamelen, hook->ifname) ||
+ nla_put_string(skb, NFTA_DEVICE_NAME, dev->name))
+ goto err;
+
+ nlmsg_end(skb, nlh);
+ nfnetlink_send(skb, net, 0, NFNLGRP_NFT_DEV,
+ nlmsg_report(nlh), GFP_KERNEL);
+ return;
+err:
+ if (skb)
+ kfree_skb(skb);
+ nfnetlink_set_err(net, 0, NFNLGRP_NFT_DEV, -ENOBUFS);
+}
+
+void
+nf_tables_chain_device_notify(const struct nft_chain *chain,
+ const struct nft_hook *hook,
+ const struct net_device *dev, int event)
+{
+ nf_tables_device_notify(chain->table, NFTA_DEVICE_CHAIN,
+ chain->name, hook, dev, event);
+}
+
+static void
+nf_tables_flowtable_device_notify(const struct nft_flowtable *ft,
+ const struct nft_hook *hook,
+ const struct net_device *dev, int event)
+{
+ nf_tables_device_notify(ft->table, NFTA_DEVICE_FLOWTABLE,
+ ft->name, hook, dev, event);
+}
+
+static int nft_flowtable_event(unsigned long event, struct net_device *dev,
+ struct nft_flowtable *flowtable, bool changename)
+{
+ struct nf_hook_ops *ops;
struct nft_hook *hook;
+ bool match;
list_for_each_entry(hook, &flowtable->hook_list, list) {
- if (hook->ops.dev != dev)
- continue;
+ ops = nft_hook_find_ops(hook, dev);
+ match = !strncmp(hook->ifname, dev->name, hook->ifnamelen);
- /* flow_offload_netdev_event() cleans up entries for us. */
- nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook);
- list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ /* NOP if not found or new name still matching */
+ if (!ops || (changename && match))
+ continue;
+
+ /* flow_offload_netdev_event() cleans up entries for us. */
+ nft_unregister_flowtable_ops(dev_net(dev),
+ flowtable, ops);
+ list_del_rcu(&ops->list);
+ kfree_rcu(ops, rcu);
+ break;
+ case NETDEV_REGISTER:
+ /* NOP if not matching or already registered */
+ if (!match || (changename && ops))
+ continue;
+
+ ops = kzalloc(sizeof(struct nf_hook_ops),
+ GFP_KERNEL_ACCOUNT);
+ if (!ops)
+ return 1;
+
+ ops->pf = NFPROTO_NETDEV;
+ ops->hooknum = flowtable->hooknum;
+ ops->priority = flowtable->data.priority;
+ ops->priv = &flowtable->data;
+ ops->hook = flowtable->data.type->hook;
+ ops->dev = dev;
+ if (nft_register_flowtable_ops(dev_net(dev),
+ flowtable, ops)) {
+ kfree(ops);
+ return 1;
+ }
+ list_add_tail_rcu(&ops->list, &hook->ops_list);
+ break;
+ }
+ nf_tables_flowtable_device_notify(flowtable, hook, dev, event);
break;
}
+ return 0;
+}
+
+static int __nf_tables_flowtable_event(unsigned long event,
+ struct net_device *dev,
+ bool changename)
+{
+ struct nftables_pernet *nft_net = nft_pernet(dev_net(dev));
+ struct nft_flowtable *flowtable;
+ struct nft_table *table;
+
+ list_for_each_entry(table, &nft_net->tables, list) {
+ list_for_each_entry(flowtable, &table->flowtables, list) {
+ if (nft_flowtable_event(event, dev,
+ flowtable, changename))
+ return 1;
+ }
+ }
+ return 0;
}
static int nf_tables_flowtable_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct nft_flowtable *flowtable;
struct nftables_pernet *nft_net;
- struct nft_table *table;
+ int ret = NOTIFY_DONE;
struct net *net;
- if (event != NETDEV_UNREGISTER)
- return 0;
+ if (event != NETDEV_REGISTER &&
+ event != NETDEV_UNREGISTER &&
+ event != NETDEV_CHANGENAME)
+ return NOTIFY_DONE;
net = dev_net(dev);
nft_net = nft_pernet(net);
mutex_lock(&nft_net->commit_mutex);
- list_for_each_entry(table, &nft_net->tables, list) {
- list_for_each_entry(flowtable, &table->flowtables, list) {
- nft_flowtable_event(event, dev, flowtable);
+
+ if (event == NETDEV_CHANGENAME) {
+ if (__nf_tables_flowtable_event(NETDEV_REGISTER, dev, true)) {
+ ret = NOTIFY_BAD;
+ goto out_unlock;
}
+ __nf_tables_flowtable_event(NETDEV_UNREGISTER, dev, true);
+ } else if (__nf_tables_flowtable_event(event, dev, false)) {
+ ret = NOTIFY_BAD;
}
+out_unlock:
mutex_unlock(&nft_net->commit_mutex);
-
- return NOTIFY_DONE;
+ return ret;
}
static struct notifier_block nf_tables_flowtable_notifier = {
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 64675f1c7f29..fd30e205de84 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -220,6 +220,7 @@ static int nft_chain_offload_priority(const struct nft_base_chain *basechain)
bool nft_chain_offload_support(const struct nft_base_chain *basechain)
{
+ struct nf_hook_ops *ops;
struct net_device *dev;
struct nft_hook *hook;
@@ -227,13 +228,16 @@ bool nft_chain_offload_support(const struct nft_base_chain *basechain)
return false;
list_for_each_entry(hook, &basechain->hook_list, list) {
- if (hook->ops.pf != NFPROTO_NETDEV ||
- hook->ops.hooknum != NF_NETDEV_INGRESS)
- return false;
-
- dev = hook->ops.dev;
- if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists())
- return false;
+ list_for_each_entry(ops, &hook->ops_list, list) {
+ if (ops->pf != NFPROTO_NETDEV ||
+ ops->hooknum != NF_NETDEV_INGRESS)
+ return false;
+
+ dev = ops->dev;
+ if (!dev->netdev_ops->ndo_setup_tc &&
+ !flow_indr_dev_exists())
+ return false;
+ }
}
return true;
@@ -455,34 +459,37 @@ static int nft_flow_block_chain(struct nft_base_chain *basechain,
const struct net_device *this_dev,
enum flow_block_command cmd)
{
- struct net_device *dev;
+ struct nf_hook_ops *ops;
struct nft_hook *hook;
int err, i = 0;
list_for_each_entry(hook, &basechain->hook_list, list) {
- dev = hook->ops.dev;
- if (this_dev && this_dev != dev)
- continue;
+ list_for_each_entry(ops, &hook->ops_list, list) {
+ if (this_dev && this_dev != ops->dev)
+ continue;
- err = nft_chain_offload_cmd(basechain, dev, cmd);
- if (err < 0 && cmd == FLOW_BLOCK_BIND) {
- if (!this_dev)
- goto err_flow_block;
+ err = nft_chain_offload_cmd(basechain, ops->dev, cmd);
+ if (err < 0 && cmd == FLOW_BLOCK_BIND) {
+ if (!this_dev)
+ goto err_flow_block;
- return err;
+ return err;
+ }
+ i++;
}
- i++;
}
return 0;
err_flow_block:
list_for_each_entry(hook, &basechain->hook_list, list) {
- if (i-- <= 0)
- break;
+ list_for_each_entry(ops, &hook->ops_list, list) {
+ if (i-- <= 0)
+ break;
- dev = hook->ops.dev;
- nft_chain_offload_cmd(basechain, dev, FLOW_BLOCK_UNBIND);
+ nft_chain_offload_cmd(basechain, ops->dev,
+ FLOW_BLOCK_UNBIND);
+ }
}
return err;
}
@@ -638,7 +645,7 @@ static struct nft_chain *__nft_offload_get_chain(const struct nftables_pernet *n
found = NULL;
basechain = nft_base_chain(chain);
list_for_each_entry(hook, &basechain->hook_list, list) {
- if (hook->ops.dev != dev)
+ if (!nft_hook_find_ops(hook, dev))
continue;
found = hook;
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index 580c55268f65..ae3fe87195ab 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -15,6 +15,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
@@ -90,6 +91,49 @@ static int nf_trace_fill_dev_info(struct sk_buff *nlskb,
return 0;
}
+static int nf_trace_fill_ct_info(struct sk_buff *nlskb,
+ const struct sk_buff *skb)
+{
+ const struct nf_ct_hook *ct_hook;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ u32 state;
+
+ ct_hook = rcu_dereference(nf_ct_hook);
+ if (!ct_hook)
+ return 0;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct) {
+ if (ctinfo != IP_CT_UNTRACKED) /* not seen by conntrack or invalid */
+ return 0;
+
+ state = NF_CT_STATE_UNTRACKED_BIT;
+ } else {
+ state = NF_CT_STATE_BIT(ctinfo);
+ }
+
+ if (nla_put_be32(nlskb, NFTA_TRACE_CT_STATE, htonl(state)))
+ return -1;
+
+ if (ct) {
+ u32 id = ct_hook->get_id(&ct->ct_general);
+ u32 status = READ_ONCE(ct->status);
+ u8 dir = CTINFO2DIR(ctinfo);
+
+ if (nla_put_u8(nlskb, NFTA_TRACE_CT_DIRECTION, dir))
+ return -1;
+
+ if (nla_put_be32(nlskb, NFTA_TRACE_CT_ID, (__force __be32)id))
+ return -1;
+
+ if (status && nla_put_be32(nlskb, NFTA_TRACE_CT_STATUS, htonl(status)))
+ return -1;
+ }
+
+ return 0;
+}
+
static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
const struct nft_pktinfo *pkt)
{
@@ -210,7 +254,11 @@ void nft_trace_notify(const struct nft_pktinfo *pkt,
nla_total_size(sizeof(__be32)) + /* trace type */
nla_total_size(0) + /* VERDICT, nested */
nla_total_size(sizeof(u32)) + /* verdict code */
- nla_total_size(sizeof(u32)) + /* id */
+ nla_total_size(sizeof(u32)) + /* ct id */
+ nla_total_size(sizeof(u8)) + /* ct direction */
+ nla_total_size(sizeof(u32)) + /* ct state */
+ nla_total_size(sizeof(u32)) + /* ct status */
+ nla_total_size(sizeof(u32)) + /* trace id */
nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) +
@@ -291,6 +339,10 @@ void nft_trace_notify(const struct nft_pktinfo *pkt,
if (nf_trace_fill_pkt_info(skb, pkt))
goto nla_put_failure;
+
+ if (nf_trace_fill_ct_info(skb, pkt->skb))
+ goto nla_put_failure;
+
info->packet_dumped = true;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index e598a2a252b0..ac77fc21632d 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -86,6 +86,7 @@ static const int nfnl_group2type[NFNLGRP_MAX+1] = {
[NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES,
[NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT,
[NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES,
+ [NFNLGRP_NFT_DEV] = NFNL_SUBSYS_NFTABLES,
};
static struct nfnl_net *nfnl_pernet(struct net *net)
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 19a553550c76..846d48ba8965 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -318,38 +318,68 @@ static const struct nft_chain_type nft_chain_filter_netdev = {
},
};
-static void nft_netdev_event(unsigned long event, struct net_device *dev,
- struct nft_base_chain *basechain)
+static int nft_netdev_event(unsigned long event, struct net_device *dev,
+ struct nft_base_chain *basechain, bool changename)
{
+ struct nft_table *table = basechain->chain.table;
+ struct nf_hook_ops *ops;
struct nft_hook *hook;
+ bool match;
list_for_each_entry(hook, &basechain->hook_list, list) {
- if (hook->ops.dev != dev)
- continue;
+ ops = nft_hook_find_ops(hook, dev);
+ match = !strncmp(hook->ifname, dev->name, hook->ifnamelen);
+
+ switch (event) {
+ case NETDEV_UNREGISTER:
+ /* NOP if not found or new name still matching */
+ if (!ops || (changename && match))
+ continue;
+
+ if (!(table->flags & NFT_TABLE_F_DORMANT))
+ nf_unregister_net_hook(dev_net(dev), ops);
- if (!(basechain->chain.table->flags & NFT_TABLE_F_DORMANT))
- nf_unregister_net_hook(dev_net(dev), &hook->ops);
+ list_del_rcu(&ops->list);
+ kfree_rcu(ops, rcu);
+ break;
+ case NETDEV_REGISTER:
+ /* NOP if not matching or already registered */
+ if (!match || (changename && ops))
+ continue;
- list_del_rcu(&hook->list);
- kfree_rcu(hook, rcu);
+ ops = kmemdup(&basechain->ops,
+ sizeof(struct nf_hook_ops),
+ GFP_KERNEL_ACCOUNT);
+ if (!ops)
+ return 1;
+
+ ops->dev = dev;
+
+ if (!(table->flags & NFT_TABLE_F_DORMANT) &&
+ nf_register_net_hook(dev_net(dev), ops)) {
+ kfree(ops);
+ return 1;
+ }
+ list_add_tail_rcu(&ops->list, &hook->ops_list);
+ break;
+ }
+ nf_tables_chain_device_notify(&basechain->chain,
+ hook, dev, event);
break;
}
+ return 0;
}
-static int nf_tables_netdev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
+static int __nf_tables_netdev_event(unsigned long event,
+ struct net_device *dev,
+ bool changename)
{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct nft_base_chain *basechain;
struct nftables_pernet *nft_net;
struct nft_chain *chain;
struct nft_table *table;
- if (event != NETDEV_UNREGISTER)
- return NOTIFY_DONE;
-
nft_net = nft_pernet(dev_net(dev));
- mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {
if (table->family != NFPROTO_NETDEV &&
table->family != NFPROTO_INET)
@@ -364,12 +394,40 @@ static int nf_tables_netdev_event(struct notifier_block *this,
basechain->ops.hooknum != NF_INET_INGRESS)
continue;
- nft_netdev_event(event, dev, basechain);
+ if (nft_netdev_event(event, dev, basechain, changename))
+ return 1;
}
}
- mutex_unlock(&nft_net->commit_mutex);
+ return 0;
+}
+
+static int nf_tables_netdev_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nftables_pernet *nft_net;
+ int ret = NOTIFY_DONE;
- return NOTIFY_DONE;
+ if (event != NETDEV_REGISTER &&
+ event != NETDEV_UNREGISTER &&
+ event != NETDEV_CHANGENAME)
+ return NOTIFY_DONE;
+
+ nft_net = nft_pernet(dev_net(dev));
+ mutex_lock(&nft_net->commit_mutex);
+
+ if (event == NETDEV_CHANGENAME) {
+ if (__nf_tables_netdev_event(NETDEV_REGISTER, dev, true)) {
+ ret = NOTIFY_BAD;
+ goto out_unlock;
+ }
+ __nf_tables_netdev_event(NETDEV_UNREGISTER, dev, true);
+ } else if (__nf_tables_netdev_event(event, dev, false)) {
+ ret = NOTIFY_BAD;
+ }
+out_unlock:
+ mutex_unlock(&nft_net->commit_mutex);
+ return ret;
}
static struct notifier_block nf_tables_netdev_notifier = {
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 221d50223018..225ff293cd50 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -175,7 +175,7 @@ static bool nft_flowtable_find_dev(const struct net_device *dev,
bool found = false;
list_for_each_entry_rcu(hook, &ft->hook_list, list) {
- if (hook->ops.dev != dev)
+ if (!nft_hook_find_ops_rcu(hook, dev))
continue;
found = true;
diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c
index 817ab978d24a..c4569d4b9228 100644
--- a/net/netfilter/nft_inner.c
+++ b/net/netfilter/nft_inner.c
@@ -23,7 +23,14 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
-static DEFINE_PER_CPU(struct nft_inner_tun_ctx, nft_pcpu_tun_ctx);
+struct nft_inner_tun_ctx_locked {
+ struct nft_inner_tun_ctx ctx;
+ local_lock_t bh_lock;
+};
+
+static DEFINE_PER_CPU(struct nft_inner_tun_ctx_locked, nft_pcpu_tun_ctx) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+};
/* Same layout as nft_expr but it embeds the private expression data area. */
struct __nft_expr {
@@ -237,12 +244,15 @@ static bool nft_inner_restore_tun_ctx(const struct nft_pktinfo *pkt,
struct nft_inner_tun_ctx *this_cpu_tun_ctx;
local_bh_disable();
- this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx);
+ local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
+ this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx);
if (this_cpu_tun_ctx->cookie != (unsigned long)pkt->skb) {
local_bh_enable();
+ local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
return false;
}
*tun_ctx = *this_cpu_tun_ctx;
+ local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
local_bh_enable();
return true;
@@ -254,9 +264,11 @@ static void nft_inner_save_tun_ctx(const struct nft_pktinfo *pkt,
struct nft_inner_tun_ctx *this_cpu_tun_ctx;
local_bh_disable();
- this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx);
+ local_lock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
+ this_cpu_tun_ctx = this_cpu_ptr(&nft_pcpu_tun_ctx.ctx);
if (this_cpu_tun_ctx->cookie != tun_ctx->cookie)
*this_cpu_tun_ctx = *tun_ctx;
+ local_unlock_nested_bh(&nft_pcpu_tun_ctx.bh_lock);
local_bh_enable();
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 0c63d1367cf7..a12486ae089d 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -621,10 +621,10 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
struct geneve_opt *opt;
int offset = 0;
- inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE);
- if (!inner)
- goto failure;
while (opts->len > offset) {
+ inner = nla_nest_start_noflag(skb, NFTA_TUNNEL_KEY_OPTS_GENEVE);
+ if (!inner)
+ goto failure;
opt = (struct geneve_opt *)(opts->u.data + offset);
if (nla_put_be16(skb, NFTA_TUNNEL_KEY_GENEVE_CLASS,
opt->opt_class) ||
@@ -634,8 +634,8 @@ static int nft_tunnel_opts_dump(struct sk_buff *skb,
opt->length * 4, opt->opt_data))
goto inner_failure;
offset += sizeof(*opt) + opt->length * 4;
+ nla_nest_end(skb, inner);
}
- nla_nest_end(skb, inner);
}
nla_nest_end(skb, nest);
return 0;
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index 30e99464171b..93f064306901 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -91,7 +91,7 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return tcpoptstrip_mangle_packet(skb, par, ip_hdrlen(skb));
}
-#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
static unsigned int
tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -119,7 +119,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_tcpoptstrip_target_info),
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)
+#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
{
.name = "TCPOPTSTRIP",
.family = NFPROTO_IPV6,
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 65b965ca40ea..59b9d04400ca 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -48,7 +48,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = {
.targetsize = sizeof(struct xt_mark_tginfo2),
.me = THIS_MODULE,
},
-#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES)
+#if IS_ENABLED(CONFIG_IP_NF_ARPTABLES) || IS_ENABLED(CONFIG_NFT_COMPAT_ARP)
{
.name = "MARK",
.revision = 2,
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index 3bdcbbdba925..e9b2f553588d 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -24,6 +24,7 @@ TEST_PROGS += nft_concat_range.sh
TEST_PROGS += nft_conntrack_helper.sh
TEST_PROGS += nft_fib.sh
TEST_PROGS += nft_flowtable.sh
+TEST_PROGS += nft_interface_stress.sh
TEST_PROGS += nft_meta.sh
TEST_PROGS += nft_nat.sh
TEST_PROGS += nft_nat_zones.sh
diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
index 025b58f2ae91..207b79932d91 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -32,7 +32,6 @@ source lib.sh
IP0=172.30.30.1
IP1=172.30.30.2
-DUMMYNET=10.9.9
PFXL=30
ret=0
@@ -52,8 +51,6 @@ trap cleanup EXIT
setup_ns ns0 ns1
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1
-
if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
echo "SKIP: Could not add veth device"
exit $ksft_skip
@@ -64,18 +61,13 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
exit $ksft_skip
fi
-ip -net "$ns0" link add dummy0 type dummy
-
ip -net "$ns0" li set veth0 master tvrf
-ip -net "$ns0" li set dummy0 master tvrf
ip -net "$ns0" li set tvrf up
ip -net "$ns0" li set veth0 up
-ip -net "$ns0" li set dummy0 up
ip -net "$ns1" li set veth0 up
ip -net "$ns0" addr add $IP0/$PFXL dev veth0
ip -net "$ns1" addr add $IP1/$PFXL dev veth0
-ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0
listener_ready()
{
@@ -216,35 +208,9 @@ EOF
fi
}
-test_fib()
-{
-ip netns exec "$ns0" nft -f - <<EOF
-flush ruleset
-table ip t {
- counter fibcount { }
-
- chain prerouting {
- type filter hook prerouting priority 0;
- meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack
- }
-}
-EOF
- ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0
- ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null
-
- if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then
- echo "PASS: fib lookup returned exepected output interface"
- else
- echo "FAIL: fib lookup did not return exepected output interface"
- ret=1
- return
- fi
-}
-
test_ct_zone_in
test_masquerade_vrf "default"
test_masquerade_vrf "pfifo"
test_masquerade_veth
-test_fib
exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 1f5979c1510c..efea93cf23d4 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -15,10 +15,12 @@ source lib.sh
# Available test groups:
# - reported_issues: check for issues that were reported in the past
# - correctness: check that packets match given entries, and only those
+# - correctness_large: same but with additional non-matching entries
# - concurrency: attempt races between insertion, deletion and lookup
# - timeout: check that packets match entries until they expire
# - performance: estimate matching rate, compare with rbtree and hash baselines
-TESTS="reported_issues correctness concurrency timeout"
+TESTS="reported_issues correctness correctness_large concurrency timeout"
+
[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
# Set types, defined by TYPE_ variables below
@@ -1257,9 +1259,7 @@ send_nomatch() {
# - add ranged element, check that packets match it
# - check that packets outside range don't match it
# - remove some elements, check that packets don't match anymore
-test_correctness() {
- setup veth send_"${proto}" set || return ${ksft_skip}
-
+test_correctness_main() {
range_size=1
for i in $(seq "${start}" $((start + count))); do
end=$((start + range_size))
@@ -1293,6 +1293,163 @@ test_correctness() {
done
}
+test_correctness() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ test_correctness_main
+}
+
+# Repeat the correctness tests, but add extra non-matching entries.
+# This exercises the more compact '4 bit group' representation that
+# gets picked when the default 8-bit representation exceed
+# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory.
+# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust().
+#
+# The format() helper is way too slow when generating lots of
+# entries so its not used here.
+test_correctness_large() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+ # number of dummy (filler) entries to add.
+ local dcount=16385
+
+ (
+ echo -n "add element inet filter test { "
+
+ case "$type_spec" in
+ "ether_addr . ipv4_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_mac $((1000000 + i))
+ printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "inet_proto . ipv6_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . " $((RANDOM%256))
+ format_addr6 $((1000000 + i))
+ done
+ ;;
+ "inet_service . inet_proto")
+ # smaller key sizes, need more entries to hit the
+ # 4-bit threshold.
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ local proto=$((RANDOM%256))
+
+ # Test uses UDP to match, as it also fails when matching
+ # an entry that doesn't exist, so skip 'udp' entries
+ # to not trigger a wrong failure.
+ [ $proto -eq 17 ] && proto=18
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . %i " $(((i%65534) + 1)) $((proto))
+ done
+ ;;
+ "inet_service . ipv4_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "ipv4_addr . ether_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ format_mac $((1000000 + i))
+ done
+ ;;
+ "ipv4_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+ done
+ ;;
+ "ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr")
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+ format_mac $((1000000 + i))
+ printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . inet_proto")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . inet_proto . ipv4_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . ipv4_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256))
+ done
+ ;;
+ "ipv6_addr . ether_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . "
+ format_mac $((1000000 + i))
+ done
+ ;;
+ "ipv6_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1))"
+ done
+ ;;
+ "ipv6_addr . inet_service . ether_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_mac $((i + 1000000))
+ done
+ ;;
+ "ipv6_addr . inet_service . ether_addr . inet_proto")
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_mac $((i + 1000000))
+ echo -n " . $((RANDOM%256))"
+ done
+ ;;
+ "ipv6_addr . inet_service . ipv6_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_addr6 $((i + 2123456))
+ echo -n " . $((RANDOM%256))"
+ done
+ ;;
+ *)
+ "Unhandled $type_spec"
+ return 1
+ esac
+ echo -n "}"
+
+ ) | nft -f - || return 1
+
+ test_correctness_main
+}
+
# Concurrency test template:
# - add all the elements
# - start a thread for each physical thread that:
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
index 82780b39277c..9929a9ffef65 100755
--- a/tools/testing/selftests/net/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -3,6 +3,10 @@
# This tests the fib expression.
#
# Kselftest framework requirement - SKIP code is 4.
+#
+# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99
+# dead:1::99 dead:1::1 dead:2::1 dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
source lib.sh
@@ -72,6 +76,89 @@ table inet filter {
EOF
}
+load_type_ruleset() {
+ local netns=$1
+
+ for family in ip ip6;do
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table $family filter {
+ chain type_match_in {
+ fib daddr type local counter comment "daddr configured on other iface"
+ fib daddr . iif type local counter comment "daddr configured on iif"
+ fib daddr type unicast counter comment "daddr not local"
+ fib daddr . iif type unicast counter comment "daddr not configured on iif"
+ }
+
+ chain type_match_out {
+ fib daddr type unicast counter
+ fib daddr . oif type unicast counter
+ fib daddr type local counter
+ fib daddr . oif type local counter
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain input {
+ type filter hook input priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain forward {
+ type filter hook forward priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain output {
+ type filter hook output priority 0;
+ icmp type echo-request counter jump type_match_out
+ icmpv6 type echo-request counter jump type_match_out
+ }
+
+ chain postrouting {
+ type filter hook postrouting priority 0;
+ icmp type echo-request counter jump type_match_out
+ icmpv6 type echo-request counter jump type_match_out
+ }
+}
+EOF
+done
+}
+
+reload_type_ruleset() {
+ ip netns exec "$1" nft flush table ip filter
+ ip netns exec "$1" nft flush table ip6 filter
+ load_type_ruleset "$1"
+}
+
+check_fib_type_counter_family() {
+ local family="$1"
+ local want="$2"
+ local ns="$3"
+ local chain="$4"
+ local what="$5"
+ local errmsg="$6"
+
+ if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then
+ echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2
+ ip netns exec "$ns" nft list chain "$family" filter "$chain"
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_type_counter() {
+ check_fib_type_counter_family "ip" "$@" || return 1
+ check_fib_type_counter_family "ip6" "$@" || return 1
+}
+
load_ruleset_count() {
local netns=$1
@@ -90,6 +177,7 @@ check_drops() {
if dmesg | grep -q ' nft_rpfilter: ';then
dmesg | grep ' nft_rpfilter: '
echo "FAIL: rpfilter did drop packets"
+ ret=1
return 1
fi
@@ -164,17 +252,496 @@ test_ping() {
return 0
}
+test_ping_unreachable() {
+ local daddr4=$1
+ local daddr6=$2
+
+ if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr4" > /dev/null; then
+ echo "FAIL: ${ns1} could reach $daddr4" 1>&2
+ return 1
+ fi
+
+ if ip netns exec "$ns1" ping -c 1 -w 1 -q "$daddr6" > /dev/null; then
+ echo "FAIL: ${ns1} could reach $daddr6" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+test_fib_type() {
+ local notice="$1"
+ local errmsg="addr-on-if"
+ local lret=0
+
+ if ! load_type_ruleset "$nsrouter";then
+ echo "SKIP: Could not load fib type ruleset"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return
+ fi
+
+ # makes router receive packet for addresses configured on incoming
+ # interface.
+ test_ping 10.0.1.1 dead:1::1 || return 1
+
+ # expectation: triggers all 'local' in prerouting/input.
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1
+
+ reload_type_ruleset "$nsrouter"
+ # makes router receive packet for address configured on a different (but local)
+ # interface.
+ test_ping 10.0.2.1 dead:2::1 || return 1
+
+ # expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'.
+ errmsg="addr-on-host"
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+ reload_type_ruleset "$nsrouter"
+ test_ping 10.0.2.99 dead:2::99 || return 1
+ errmsg="addr-on-otherhost"
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: fib expression address types match ($notice)"
+ else
+ echo "FAIL: fib expression address types match ($notice)"
+ ret=1
+ fi
+}
+
+test_fib_vrf_dev_add_dummy()
+{
+ if ! ip -net "$nsrouter" link add dummy0 type dummy ;then
+ echo "SKIP: VRF tests: dummy device type not supported"
+ return 1
+ fi
+
+ if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then
+ echo "SKIP: VRF tests: vrf device type not supported"
+ return 1
+ fi
+
+ ip -net "$nsrouter" link set dummy0 master tvrf
+ ip -net "$nsrouter" link set dummy0 up
+ ip -net "$nsrouter" link set tvrf up
+}
+
+load_ruleset_vrf()
+{
+# Due to the many different possible combinations using named counters
+# or one-rule-per-expected-result is complex.
+#
+# Instead, add dynamic sets for the fib modes
+# (fib address type, fib output interface lookup .. ),
+# and then add the obtained fib results to them.
+#
+# The test is successful if the sets contain the expected results
+# and no unexpected extra entries existed.
+ip netns exec "$nsrouter" nft -f - <<EOF
+flush ruleset
+table inet t {
+ set fibif4 {
+ typeof meta iif . ip daddr . fib daddr oif
+ flags dynamic
+ counter
+ }
+
+ set fibif4iif {
+ typeof meta iif . ip daddr . fib daddr . iif oif
+ flags dynamic
+ counter
+ }
+
+ set fibif6 {
+ typeof meta iif . ip6 daddr . fib daddr oif
+ flags dynamic
+ counter
+ }
+
+ set fibif6iif {
+ typeof meta iif . ip6 daddr . fib daddr . iif oif
+ flags dynamic
+ counter
+ }
+
+ set fibtype4 {
+ typeof meta iif . ip daddr . fib daddr type
+ flags dynamic
+ counter
+ }
+
+ set fibtype4iif {
+ typeof meta iif . ip daddr . fib daddr . iif type
+ flags dynamic
+ counter
+ }
+
+ set fibtype6 {
+ typeof meta iif . ip6 daddr . fib daddr type
+ flags dynamic
+ counter
+ }
+
+ set fibtype6iif {
+ typeof meta iif . ip6 daddr . fib daddr . iif type
+ flags dynamic
+ counter
+ }
+
+ chain fib_test {
+ meta nfproto ipv4 jump {
+ add @fibif4 { meta iif . ip daddr . fib daddr oif }
+ add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif }
+ add @fibtype4 { meta iif . ip daddr . fib daddr type }
+ add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type }
+
+ add @fibif4 { meta iif . ip saddr . fib saddr oif }
+ add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif }
+ }
+
+ meta nfproto ipv6 jump {
+ add @fibif6 { meta iif . ip6 daddr . fib daddr oif }
+ add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif }
+ add @fibtype6 { meta iif . ip6 daddr . fib daddr type }
+ add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type }
+
+ add @fibif6 { meta iif . ip6 saddr . fib saddr oif }
+ add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif }
+ }
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority 0;
+ icmp type echo-request counter jump fib_test
+
+ # neighbour discovery to be ignored.
+ icmpv6 type echo-request counter jump fib_test
+ }
+}
+EOF
+
+if [ $? -ne 0 ] ;then
+ echo "SKIP: Could not load ruleset for fib vrf test"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return 1
+fi
+}
+
+check_type()
+{
+ local setname="$1"
+ local iifname="$2"
+ local addr="$3"
+ local type="$4"
+ local count="$5"
+
+ [ -z "$count" ] && count=1
+
+ if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then
+ echo "FAIL: did not find $iifname . $addr . $type in $setname"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ return 1
+ fi
+
+ # delete the entry, this allows to check if anything unexpected appeared
+ # at the end of the test run: all dynamic sets should be empty by then.
+ if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then
+ echo "FAIL: can't delete $iifname . $addr . $type in $setname"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+check_local()
+{
+ check_type $@ "local" 1
+}
+
+check_unicast()
+{
+ check_type $@ "unicast" 1
+}
+
+check_rpf()
+{
+ check_type $@
+}
+
+check_fib_vrf_sets_empty()
+{
+ local setname=""
+ local lret=0
+
+ # A non-empty set means that we have seen unexpected packets OR
+ # that a fib lookup provided unexpected results.
+ for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \
+ "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do
+ if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then
+ echo "FAIL: $setname not empty"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ lret=1
+ fi
+ done
+
+ return $lret
+}
+
+check_fib_vrf_type()
+{
+ local msg="$1"
+
+ local addr
+ # the incoming interface is always veth0. As its not linked to a VRF,
+ # the 'tvrf' device should NOT show up anywhere.
+ local ifname="veth0"
+ local lret=0
+
+ # local_veth0, local_veth1
+ for addr in "10.0.1.1" "10.0.2.1"; do
+ check_local fibtype4 "$ifname" "$addr" || lret=1
+ check_type fibif4 "$ifname" "$addr" "0" || lret=1
+ done
+ for addr in "dead:1::1" "dead:2::1";do
+ check_local fibtype6 "$ifname" "$addr" || lret=1
+ check_type fibif6 "$ifname" "$addr" "0" || lret=1
+ done
+
+ # when restricted to the incoming interface, 10.0.1.1 should
+ # be 'local', but 10.0.2.1 unicast.
+ check_local fibtype4iif "$ifname" "10.0.1.1" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1
+
+ # same for the ipv6 addresses.
+ check_local fibtype6iif "$ifname" "dead:1::1" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1
+
+ # None of these addresses should find a valid route when restricting
+ # to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are
+ # reachable via 'lo'.
+ for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do
+ check_type fibif4iif "$ifname" "$addr" "0" || lret=1
+ done
+
+ # expect default route (veth1), dummy0 is part of VRF but iif isn't.
+ for addr in "10.9.9.1" "10.9.9.2";do
+ check_unicast fibtype4 "$ifname" "$addr" || lret=1
+ check_unicast fibtype4iif "$ifname" "$addr" || lret=1
+ check_type fibif4 "$ifname" "$addr" "veth1" || lret=1
+ done
+ for addr in "dead:9::1" "dead:9::2";do
+ check_unicast fibtype6 "$ifname" "$addr" || lret=1
+ check_unicast fibtype6iif "$ifname" "$addr" || lret=1
+ check_type fibif6 "$ifname" "$addr" "veth1" || lret=1
+ done
+
+ # same for the IPv6 equivalent addresses.
+ for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do
+ check_type fibif6iif "$ifname" "$addr" "0" || lret=1
+ done
+
+ check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+ check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+ check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+ check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+ check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+ check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+ check_rpf fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+ check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 5 || lret=1
+ check_rpf fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1
+
+ check_fib_vrf_sets_empty || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "FAIL: $msg"
+ ret=1
+ fi
+}
+
+check_fib_veth_vrf_type()
+{
+ local msg="$1"
+
+ local addr
+ local ifname
+ local setname
+ local lret=0
+
+ # as veth0 is now part of tvrf interface, packets will be seen
+ # twice, once with iif veth0, then with iif tvrf.
+
+ for ifname in "veth0" "tvrf"; do
+ for addr in "10.0.1.1" "10.9.9.1"; do
+ check_local fibtype4 "$ifname" "$addr" || lret=1
+ # addr local, but nft_fib doesn't return routes with RTN_LOCAL.
+ check_type fibif4 "$ifname" "$addr" 0 || lret=1
+ check_type fibif4iif "$ifname" "$addr" 0 || lret=1
+ done
+
+ for addr in "dead:1::1" "dead:9::1"; do
+ check_local fibtype6 "$ifname" "$addr" || lret=1
+ # same, address is local but no route is returned for lo.
+ check_type fibif6 "$ifname" "$addr" 0 || lret=1
+ check_type fibif6iif "$ifname" "$addr" 0 || lret=1
+ done
+
+ for t in fibtype4 fibtype4iif; do
+ check_unicast "$t" "$ifname" 10.9.9.2 || lret=1
+ done
+ for t in fibtype6 fibtype6iif; do
+ check_unicast "$t" "$ifname" dead:9::2 || lret=1
+ done
+
+ check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1
+
+ check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+
+ check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+ check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+ check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+ check_type fibif4 "$ifname" "10.9.9.2" "dummy0" || lret=1
+ check_type fibif6 "$ifname" "dead:9::2" "dummy0" || lret=1
+
+ # restricted to iif -- MUST NOT provide result, its != $ifname.
+ check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+ check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+ check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1
+ check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1
+ check_rpf fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1
+ check_rpf fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1
+ done
+
+ check_local fibtype4iif "veth0" "10.0.1.1" || lret=1
+ check_local fibtype6iif "veth0" "dead:1::1" || lret=1
+
+ check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1
+ check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1
+
+ # 10.9.9.2 should not provide a result for iif veth, but
+ # should when iif is tvrf.
+ # This is because its reachable via dummy0 which is part of
+ # tvrf. iif veth0 MUST conceal the dummy0 result (i.e. return oif 0).
+ check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1
+ check_type fibif6iif "veth0" "dead:9::2" 0 || lret=1
+
+ check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1
+ check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1
+
+ check_fib_vrf_sets_empty || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "FAIL: $msg"
+ ret=1
+ fi
+}
+
+# Extends nsrouter config by adding dummy0+vrf.
+#
+# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99
+# dead:1::99 dead:1::1 dead:2::1 dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
+# [dummy0]
+# 10.9.9.1
+# dead:9::1
+# [tvrf]
+test_fib_vrf()
+{
+ local cntname=""
+
+ if ! test_fib_vrf_dev_add_dummy; then
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return
+ fi
+
+ ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0
+ ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad
+
+ ip -net "$nsrouter" route add default via 10.0.2.99
+ ip -net "$nsrouter" route add default via dead:2::99
+
+ load_ruleset_vrf || return
+
+ # no echo reply for these addresses: The dummy interface is part of tvrf,
+ # but veth0 (incoming interface) isn't linked to it.
+ test_ping_unreachable "10.9.9.1" "dead:9::1" &
+ test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+ # expect replies from these.
+ test_ping "10.0.1.1" "dead:1::1"
+ test_ping "10.0.2.1" "dead:2::1"
+ test_ping "10.0.2.99" "dead:2::99"
+
+ wait
+
+ check_fib_vrf_type "fib expression address types match (iif not in vrf)"
+
+ # second round: this time, make veth0 (rx interface) part of the vrf.
+ # 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2
+ # becomes unreachable.
+ ip -net "$nsrouter" link set veth0 master tvrf
+ ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ # this reload should not be needed, but in case
+ # there is some error (missing or unexpected entry) this will prevent them
+ # from leaking into round 2.
+ load_ruleset_vrf || return
+
+ test_ping "10.0.1.1" "dead:1::1"
+ test_ping "10.9.9.1" "dead:9::1"
+
+ # ns2 should no longer be reachable (veth1 not in vrf)
+ test_ping_unreachable "10.0.2.99" "dead:2::99" &
+
+ # vrf via dummy0, but host doesn't exist
+ test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+ wait
+
+ check_fib_veth_vrf_type "fib expression address types match (iif in vrf)"
+}
+
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
test_ping 10.0.2.1 dead:2::1 || exit 1
-check_drops || exit 1
+check_drops
test_ping 10.0.2.99 dead:2::99 || exit 1
-check_drops || exit 1
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1
+check_drops
+
+test_ping 10.0.1.99 dead:1::99
+check_drops
-echo "PASS: fib expression did not cause unwanted packet drops"
+[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets"
load_input_ruleset "$ns1"
@@ -234,7 +801,7 @@ ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
# ... pbr ruleset for the router, check iif+oif.
if ! load_pbr_ruleset "$nsrouter";then
echo "SKIP: Could not load fib forward ruleset"
- exit $ksft_skip
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
fi
ip -net "$nsrouter" rule add from all table 128
@@ -245,11 +812,36 @@ ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
# drop main ipv4 table
ip -net "$nsrouter" -4 rule delete table main
-if ! test_ping 10.0.2.99 dead:2::99;then
- ip -net "$nsrouter" nft list ruleset
- echo "FAIL: fib mismatch in pbr setup"
- exit 1
+if test_ping 10.0.2.99 dead:2::99;then
+ echo "PASS: fib expression forward check with policy based routing"
+else
+ echo "FAIL: fib expression forward check with policy based routing"
+ ret=1
fi
-echo "PASS: fib expression forward check with policy based routing"
-exit 0
+test_fib_type "policy routing"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+# Un-do policy routing changes
+ip -net "$nsrouter" rule del from all table 128
+ip -net "$nsrouter" rule del from all iif veth0 table 129
+
+ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1
+
+ip -net "$ns1" -4 route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" -4 route add default via 10.0.1.1
+ip -net "$ns1" -6 route add default via dead:1::1
+
+ip -net "$nsrouter" -4 rule add from all table main priority 32766
+
+test_fib_type "default table"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+test_fib_vrf
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
new file mode 100755
index 000000000000..11d82d11495e
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -0,0 +1,151 @@
+#!/bin/bash -e
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+# Torture nftables' netdevice notifier callbacks and related code by frequent
+# renaming of interfaces which netdev-family chains and flowtables hook into.
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3 tool"
+
+# how many seconds to torture the kernel?
+# default to 80% of max run time but don't exceed 48s
+TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
+[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48
+
+trap "cleanup_all_ns" EXIT
+
+setup_ns nsc nsr nss
+
+ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr
+ip -net $nsc addr add 10.0.0.1/24 dev cr0
+ip -net $nsc link set cr0 up
+ip -net $nsc route add default via 10.0.0.2
+
+ip -net $nss link add sr0 type veth peer name rs0 netns $nsr
+ip -net $nss addr add 10.1.0.1/24 dev sr0
+ip -net $nss link set sr0 up
+ip -net $nss route add default via 10.1.0.2
+
+ip -net $nsr addr add 10.0.0.2/24 dev rc0
+ip -net $nsr link set rc0 up
+ip -net $nsr addr add 10.1.0.2/24 dev rs0
+ip -net $nsr link set rs0 up
+ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1
+
+{
+ echo "table netdev t {"
+ for ((i = 0; i < 10; i++)); do
+ cat <<-EOF
+ chain chain_rc$i {
+ type filter hook ingress device rc$i priority 0
+ counter
+ }
+ chain chain_rs$i {
+ type filter hook ingress device rs$i priority 0
+ counter
+ }
+ EOF
+ done
+ echo "}"
+ echo "table ip t {"
+ for ((i = 0; i < 10; i++)); do
+ cat <<-EOF
+ flowtable ft_${i} {
+ hook ingress priority 0
+ devices = { rc$i, rs$i }
+ }
+ EOF
+ done
+ echo "chain c {"
+ echo "type filter hook forward priority 0"
+ for ((i = 0; i < 10; i++)); do
+ echo -n "iifname rc$i oifname rs$i "
+ echo "ip protocol tcp counter flow add @ft_${i}"
+ done
+ echo "counter"
+ echo "}"
+ echo "}"
+} | ip netns exec $nsr nft -f - || {
+ echo "SKIP: Could not load nft ruleset"
+ exit $ksft_skip
+}
+
+for ((o=0, n=1; ; o=n, n++, n %= 10)); do
+ ip -net $nsr link set rc$o name rc$n
+ ip -net $nsr link set rs$o name rs$n
+done &
+rename_loop_pid=$!
+
+while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done &
+nft_list_pid=$!
+
+ip netns exec $nsr nft monitor >/dev/null &
+nft_monitor_pid=$!
+
+ip netns exec $nss iperf3 --server --daemon -1
+summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p'
+rate=$(ip netns exec $nsc iperf3 \
+ --format k -c 10.1.0.1 --time $TEST_RUNTIME \
+ --length 56 --parallel 10 -i 0 | sed -n "$summary_expr")
+
+kill $nft_list_pid
+kill $nft_monitor_pid
+kill $rename_loop_pid
+wait
+
+ip netns exec $nsr nft -f - <<EOF
+table ip t {
+ flowtable ft_wild {
+ hook ingress priority 0
+ devices = { wild* }
+ }
+}
+EOF
+if [[ $? -ne 0 ]]; then
+ echo "SKIP wildcard tests: not supported by host's nft?"
+else
+ for ((i = 0; i < 100; i++)); do
+ ip -net $nsr link add wild$i type dummy &
+ done
+ wait
+ for ((i = 80; i < 100; i++)); do
+ ip -net $nsr link del wild$i &
+ done
+ for ((i = 0; i < 80; i++)); do
+ ip -net $nsr link del wild$i &
+ done
+ wait
+ for ((i = 0; i < 100; i += 10)); do
+ (
+ for ((j = 0; j < 10; j++)); do
+ ip -net $nsr link add wild$((i + j)) type dummy
+ done
+ for ((j = 0; j < 10; j++)); do
+ ip -net $nsr link del wild$((i + j))
+ done
+ ) &
+ done
+ wait
+fi
+
+[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || {
+ echo "FAIL: Kernel is tainted!"
+ exit $ksft_fail
+}
+
+[[ $rate -gt 0 ]] || {
+ echo "FAIL: Zero throughput in iperf3"
+ exit $ksft_fail
+}
+
+[[ -f /sys/kernel/debug/kmemleak && \
+ -n $(</sys/kernel/debug/kmemleak) ]] && {
+ echo "FAIL: non-empty kmemleak report"
+ exit $ksft_fail
+}
+
+exit $ksft_pass