From 0604628bb03ae695f61b872cc59f1933a8379625 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 29 Jan 2019 09:15:02 +0100 Subject: netfilter: nf_tables: add NFTA_RULE_POSITION_ID to nla_policy Fixes: 75dd48e2e420a ("netfilter: nf_tables: Support RULE_ID reference in new rule") Reported-by: Cong Wang Signed-off-by: Florian Westphal Acked-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e92bedd09cde..7495f29d24e8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2239,6 +2239,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = { [NFTA_RULE_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, [NFTA_RULE_ID] = { .type = NLA_U32 }, + [NFTA_RULE_POSITION_ID] = { .type = NLA_U32 }, }; static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, -- cgit v1.2.3-59-g8ed1b From a46c52d9f2659498f0c0871f7f2333a692c243fe Mon Sep 17 00:00:00 2001 From: wenxu Date: Tue, 29 Jan 2019 15:51:17 +0800 Subject: netfilter: nft_tunnel: Add NFTA_TUNNEL_MODE options nft "tunnel" expr match both the tun_info of RX and TX. This patch provide the NFTA_TUNNEL_MODE to individually match the tun_info of RX or TX. Signed-off-by: wenxu Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 9 +++++++++ net/netfilter/nft_tunnel.c | 34 ++++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 2 deletions(-) (limited to 'net/netfilter') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 030302893d96..a66c8de006cc 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1727,10 +1727,19 @@ enum nft_tunnel_keys { }; #define NFT_TUNNEL_MAX (__NFT_TUNNEL_MAX - 1) +enum nft_tunnel_mode { + NFT_TUNNEL_MODE_NONE, + NFT_TUNNEL_MODE_RX, + NFT_TUNNEL_MODE_TX, + __NFT_TUNNEL_MODE_MAX +}; +#define NFT_TUNNEL_MODE_MAX (__NFT_TUNNEL_MODE_MAX - 1) + enum nft_tunnel_attributes { NFTA_TUNNEL_UNSPEC, NFTA_TUNNEL_KEY, NFTA_TUNNEL_DREG, + NFTA_TUNNEL_MODE, __NFTA_TUNNEL_MAX }; #define NFTA_TUNNEL_MAX (__NFTA_TUNNEL_MAX - 1) diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index 3a15f219e4e7..ea28588c5eed 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -15,6 +15,7 @@ struct nft_tunnel { enum nft_tunnel_keys key:8; enum nft_registers dreg:8; + enum nft_tunnel_mode mode:8; }; static void nft_tunnel_get_eval(const struct nft_expr *expr, @@ -29,14 +30,32 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_TUNNEL_PATH: - nft_reg_store8(dest, !!tun_info); + if (!tun_info) { + nft_reg_store8(dest, false); + return; + } + if (priv->mode == NFT_TUNNEL_MODE_NONE || + (priv->mode == NFT_TUNNEL_MODE_RX && + !(tun_info->mode & IP_TUNNEL_INFO_TX)) || + (priv->mode == NFT_TUNNEL_MODE_TX && + (tun_info->mode & IP_TUNNEL_INFO_TX))) + nft_reg_store8(dest, true); + else + nft_reg_store8(dest, false); break; case NFT_TUNNEL_ID: if (!tun_info) { regs->verdict.code = NFT_BREAK; return; } - *dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id)); + if (priv->mode == NFT_TUNNEL_MODE_NONE || + (priv->mode == NFT_TUNNEL_MODE_RX && + !(tun_info->mode & IP_TUNNEL_INFO_TX)) || + (priv->mode == NFT_TUNNEL_MODE_TX && + (tun_info->mode & IP_TUNNEL_INFO_TX))) + *dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id)); + else + regs->verdict.code = NFT_BREAK; break; default: WARN_ON(1); @@ -47,6 +66,7 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr, static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = { [NFTA_TUNNEL_KEY] = { .type = NLA_U32 }, [NFTA_TUNNEL_DREG] = { .type = NLA_U32 }, + [NFTA_TUNNEL_MODE] = { .type = NLA_U32 }, }; static int nft_tunnel_get_init(const struct nft_ctx *ctx, @@ -74,6 +94,14 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx, priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]); + if (tb[NFTA_TUNNEL_MODE]) { + priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE])); + if (priv->mode > NFT_TUNNEL_MODE_MAX) + return -EOPNOTSUPP; + } else { + priv->mode = NFT_TUNNEL_MODE_NONE; + } + return nft_validate_register_store(ctx, priv->dreg, NULL, NFT_DATA_VALUE, len); } @@ -87,6 +115,8 @@ static int nft_tunnel_get_dump(struct sk_buff *skb, goto nla_put_failure; if (nft_dump_register(skb, NFTA_TUNNEL_DREG, priv->dreg)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_TUNNEL_MODE, htonl(priv->mode))) + goto nla_put_failure; return 0; nla_put_failure: -- cgit v1.2.3-59-g8ed1b From ac02bcf9cc1e4aefb0a7156a2ae26e8396b15f24 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sat, 2 Feb 2019 10:17:00 +0100 Subject: netfilter: ipv6: avoid indirect calls for IPV6=y case indirect calls are only needed if ipv6 is a module. Add helpers to abstract the v6ops indirections and use them instead. fragment, reroute and route_input are kept as indirect calls. The first two are not not used in hot path and route_input is only used by bridge netfilter. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 64 +++++++++++++++++++++++++++++++-------- net/ipv6/netfilter.c | 15 ++++----- net/ipv6/netfilter/nft_fib_ipv6.c | 9 ++---- net/netfilter/utils.c | 6 ++-- net/netfilter/xt_addrtype.c | 16 +++------- 5 files changed, 68 insertions(+), 42 deletions(-) (limited to 'net/netfilter') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index ad4223c10488..471e9467105b 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -25,29 +25,24 @@ struct nf_queue_entry; * if IPv6 is a module. */ struct nf_ipv6_ops { +#if IS_MODULE(CONFIG_IPV6) int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); - void (*route_input)(struct sk_buff *skb); - int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, - int (*output)(struct net *, struct sock *, struct sk_buff *)); - int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, - bool strict); - int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); -#if IS_MODULE(CONFIG_IPV6) int (*route_me_harder)(struct net *net, struct sk_buff *skb); int (*dev_get_saddr)(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); + int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, + bool strict); #endif + void (*route_input)(struct sk_buff *skb); + int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)); + int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); }; #ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct net *net, struct sk_buff *skb); -__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, - unsigned int dataoff, u_int8_t protocol); - -int ipv6_netfilter_init(void); -void ipv6_netfilter_fini(void); +#include extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) @@ -55,6 +50,49 @@ static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) return rcu_dereference(nf_ipv6_ops); } +static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, + const struct net_device *dev, int strict) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); + + if (!v6_ops) + return 1; + + return v6_ops->chk_addr(net, addr, dev, strict); +#else + return ipv6_chk_addr(net, addr, dev, strict); +#endif +} + +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict); + +static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) +{ +#if IS_MODULE(CONFIG_IPV6) + const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); + + if (v6ops) + return v6ops->route(net, dst, fl, strict); + + return -EHOSTUNREACH; +#endif +#if IS_BUILTIN(CONFIG_IPV6) + return __nf_ip6_route(net, dst, fl, strict); +#else + return -EHOSTUNREACH; +#endif +} + +int ip6_route_me_harder(struct net *net, struct sk_buff *skb); +__sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, + unsigned int dataoff, u_int8_t protocol); + +int ipv6_netfilter_init(void); +void ipv6_netfilter_fini(void); + #else /* CONFIG_NETFILTER */ static inline int ipv6_netfilter_init(void) { return 0; } static inline void ipv6_netfilter_fini(void) { return; } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 0a5caf263889..a8263031f3a6 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -84,8 +84,8 @@ static int nf_ip6_reroute(struct sk_buff *skb, return 0; } -static int nf_ip6_route(struct net *net, struct dst_entry **dst, - struct flowi *fl, bool strict) +int __nf_ip6_route(struct net *net, struct dst_entry **dst, + struct flowi *fl, bool strict) { static const struct ipv6_pinfo fake_pinfo; static const struct inet_sock fake_sk = { @@ -105,17 +105,18 @@ static int nf_ip6_route(struct net *net, struct dst_entry **dst, *dst = result; return err; } +EXPORT_SYMBOL_GPL(__nf_ip6_route); static const struct nf_ipv6_ops ipv6ops = { - .chk_addr = ipv6_chk_addr, - .route_input = ip6_route_input, - .fragment = ip6_fragment, - .route = nf_ip6_route, - .reroute = nf_ip6_reroute, #if IS_MODULE(CONFIG_IPV6) + .chk_addr = ipv6_chk_addr, .route_me_harder = ip6_route_me_harder, .dev_get_saddr = ipv6_dev_get_saddr, + .route = __nf_ip6_route, #endif + .route_input = ip6_route_input, + .fragment = ip6_fragment, + .reroute = nf_ip6_reroute, }; int __init ipv6_netfilter_init(void) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 36be3cf0adef..73cdc0bc63f7 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -59,7 +59,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct ipv6hdr *iph) { const struct net_device *dev = NULL; - const struct nf_ipv6_ops *v6ops; int route_err, addrtype; struct rt6_info *rt; struct flowi6 fl6 = { @@ -68,10 +67,6 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, }; u32 ret = 0; - v6ops = nf_get_ipv6_ops(); - if (!v6ops) - return RTN_UNREACHABLE; - if (priv->flags & NFTA_FIB_F_IIF) dev = nft_in(pkt); else if (priv->flags & NFTA_FIB_F_OIF) @@ -79,10 +74,10 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); - if (dev && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) + if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = v6ops->route(nft_net(pkt), (struct dst_entry **)&rt, + route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, flowi6_to_flowi(&fl6), false); if (route_err) goto err; diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 55af9f247993..06dc55590441 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family) { - const struct nf_ipv6_ops *v6ops; + const struct nf_ipv6_ops *v6ops __maybe_unused; int ret = 0; switch (family) { @@ -170,9 +170,7 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, ret = nf_ip_route(net, dst, fl, strict); break; case AF_INET6: - v6ops = rcu_dereference(nf_ipv6_ops); - if (v6ops) - ret = v6ops->route(net, dst, fl, strict); + ret = nf_ip6_route(net, dst, fl, strict); break; } diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 89e281b3bfc2..29987ff03621 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -36,7 +36,6 @@ MODULE_ALIAS("ip6t_addrtype"); static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, const struct in6_addr *addr, u16 mask) { - const struct nf_ipv6_ops *v6ops; struct flowi6 flow; struct rt6_info *rt; u32 ret = 0; @@ -47,18 +46,13 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (dev) flow.flowi6_oif = dev->ifindex; - v6ops = nf_get_ipv6_ops(); - if (v6ops) { - if (dev && (mask & XT_ADDRTYPE_LOCAL)) { - if (v6ops->chk_addr(net, addr, dev, true)) - ret = XT_ADDRTYPE_LOCAL; - } - route_err = v6ops->route(net, (struct dst_entry **)&rt, - flowi6_to_flowi(&flow), false); - } else { - route_err = 1; + if (dev && (mask & XT_ADDRTYPE_LOCAL)) { + if (nf_ipv6_chk_addr(net, addr, dev, true)) + ret = XT_ADDRTYPE_LOCAL; } + route_err = nf_ip6_route(net, (struct dst_entry **)&rt, + flowi6_to_flowi(&flow), false); if (route_err) return XT_ADDRTYPE_UNREACHABLE; -- cgit v1.2.3-59-g8ed1b From 48ab807c792fa9074a46cfdc837023fadb46fb73 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 7 Feb 2019 13:13:11 +0000 Subject: netfilter: conntrack: fix indentation issue A statement in an if block is not indented correctly. Fix this. Signed-off-by: Colin Ian King Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 8071bb04a849..349b42a65c8a 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2675,7 +2675,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, ret = ctnetlink_dump_tuples_ip(skb, &m); if (ret >= 0) { l4proto = nf_ct_l4proto_find(tuple->dst.protonum); - ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); + ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto); } rcu_read_unlock(); -- cgit v1.2.3-59-g8ed1b From 6fde9df6b76eaf2395d158628d5f915857981a44 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 18:44:56 -0600 Subject: ipvs: Use struct_size() helper One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; size = sizeof(struct foo) + count * sizeof(struct boo); instance = alloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: size = struct_size(instance, entry, count); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- net/netfilter/ipvs/ip_vs_ctl.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 432141f04af3..446beeb5e7b2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2722,8 +2722,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int size; get = (struct ip_vs_get_services *)arg; - size = sizeof(*get) + - sizeof(struct ip_vs_service_entry) * get->num_services; + size = struct_size(get, entrytable, get->num_services); if (*len != size) { pr_err("length: %u != %u\n", *len, size); ret = -EINVAL; @@ -2764,8 +2763,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) int size; get = (struct ip_vs_get_dests *)arg; - size = sizeof(*get) + - sizeof(struct ip_vs_dest_entry) * get->num_dests; + size = struct_size(get, entrytable, get->num_dests); if (*len != size) { pr_err("length: %u != %u\n", *len, size); ret = -EINVAL; -- cgit v1.2.3-59-g8ed1b From 6ca64ef37da930ad0fd0f2c3dac1a4607e0af494 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 Feb 2019 18:56:08 -0600 Subject: netfilter: xt_recent: Use struct_size() in kvzalloc() One of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; void *entry[]; }; size = sizeof(struct foo) + count * sizeof(void *); instance = alloc(size, GFP_KERNEL) Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: size = struct_size(instance, entry, count); instance = alloc(size, GFP_KERNEL) Notice that, in this case, variable sz is not necessary, hence it is removed. This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_recent.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index f44de4bc2100..1664d2ec8b2f 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -337,7 +337,6 @@ static int recent_mt_check(const struct xt_mtchk_param *par, unsigned int nstamp_mask; unsigned int i; int ret = -EINVAL; - size_t sz; net_get_random_once(&hash_rnd, sizeof(hash_rnd)); @@ -387,8 +386,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par, goto out; } - sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size; - t = kvzalloc(sz, GFP_KERNEL); + t = kvzalloc(struct_size(t, iphash, ip_list_hash_size), GFP_KERNEL); if (t == NULL) { ret = -ENOMEM; goto out; -- cgit v1.2.3-59-g8ed1b From 13f5251fd17088170c18844534682d9cab5ff5aa Mon Sep 17 00:00:00 2001 From: Chieh-Min Wang Date: Tue, 12 Feb 2019 00:59:55 +0100 Subject: netfilter: conntrack: fix cloned unconfirmed skb->_nfct race in __nf_conntrack_confirm For bridge(br_flood) or broadcast/multicast packets, they could clone skb with unconfirmed conntrack which break the rule that unconfirmed skb->_nfct is never shared. With nfqueue running on my system, the race can be easily reproduced with following warning calltrace: [13257.707525] CPU: 0 PID: 12132 Comm: main Tainted: P W 4.4.60 #7744 [13257.707568] Hardware name: Qualcomm (Flattened Device Tree) [13257.714700] [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [13257.720253] [] (show_stack) from [] (dump_stack+0x94/0xa8) [13257.728240] [] (dump_stack) from [] (warn_slowpath_common+0x94/0xb0) [13257.735268] [] (warn_slowpath_common) from [] (warn_slowpath_null+0x1c/0x24) [13257.743519] [] (warn_slowpath_null) from [] (__nf_conntrack_confirm+0xa8/0x618) [13257.752284] [] (__nf_conntrack_confirm) from [] (ipv4_confirm+0xb8/0xfc) [13257.761049] [] (ipv4_confirm) from [] (nf_iterate+0x48/0xa8) [13257.769725] [] (nf_iterate) from [] (nf_hook_slow+0x30/0xb0) [13257.777108] [] (nf_hook_slow) from [] (br_nf_post_routing+0x274/0x31c) [13257.784486] [] (br_nf_post_routing) from [] (nf_iterate+0x48/0xa8) [13257.792556] [] (nf_iterate) from [] (nf_hook_slow+0x30/0xb0) [13257.800458] [] (nf_hook_slow) from [] (br_forward_finish+0x94/0xa4) [13257.808010] [] (br_forward_finish) from [] (br_nf_forward_finish+0x150/0x1ac) [13257.815736] [] (br_nf_forward_finish) from [] (nf_reinject+0x108/0x170) [13257.824762] [] (nf_reinject) from [] (nfqnl_recv_verdict+0x3d8/0x420) [13257.832924] [] (nfqnl_recv_verdict) from [] (nfnetlink_rcv_msg+0x158/0x248) [13257.841256] [] (nfnetlink_rcv_msg) from [] (netlink_rcv_skb+0x54/0xb0) [13257.849762] [] (netlink_rcv_skb) from [] (netlink_unicast+0x148/0x23c) [13257.858093] [] (netlink_unicast) from [] (netlink_sendmsg+0x2ec/0x368) [13257.866348] [] (netlink_sendmsg) from [] (sock_sendmsg+0x34/0x44) [13257.874590] [] (sock_sendmsg) from [] (___sys_sendmsg+0x1ec/0x200) [13257.882489] [] (___sys_sendmsg) from [] (__sys_sendmsg+0x3c/0x64) [13257.890300] [] (__sys_sendmsg) from [] (ret_fast_syscall+0x0/0x34) The original code just triggered the warning but do nothing. It will caused the shared conntrack moves to the dying list and the packet be droppped (nf_ct_resolve_clash returns NF_DROP for dying conntrack). - Reproduce steps: +----------------------------+ | br0(bridge) | | | +-+---------+---------+------+ | eth0| | eth1| | eth2| | | | | | | +--+--+ +--+--+ +---+-+ | | | | | | +--+-+ +-+--+ +--+-+ | PC1| | PC2| | PC3| +----+ +----+ +----+ iptables -A FORWARD -m mark --mark 0x1000000/0x1000000 -j NFQUEUE --queue-num 100 --queue-bypass ps: Our nfq userspace program will set mark on packets whose connection has already been processed. PC1 sends broadcast packets simulated by hping3: hping3 --rand-source --udp 192.168.1.255 -i u100 - Broadcast racing flow chart is as follow: br_handle_frame BR_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, br_handle_frame_finish) // skb->_nfct (unconfirmed conntrack) is constructed at PRE_ROUTING stage br_handle_frame_finish // check if this packet is broadcast br_flood_forward br_flood list_for_each_entry_rcu(p, &br->port_list, list) // iterate through each port maybe_deliver deliver_clone skb = skb_clone(skb) __br_forward BR_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD,...) // queue in our nfq and received by our userspace program // goto __nf_conntrack_confirm with process context on CPU 1 br_pass_frame_up BR_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN,...) // goto __nf_conntrack_confirm with softirq context on CPU 0 Because conntrack confirm can happen at both INPUT and POSTROUTING stage. So with NFQUEUE running, skb->_nfct with the same unconfirmed conntrack could race on different core. This patch fixes a repeating kernel splat, now it is only displayed once. Signed-off-by: Chieh-Min Wang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 815956ac5a76..85de2a7b0ede 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -936,10 +936,18 @@ __nf_conntrack_confirm(struct sk_buff *skb) * REJECT will give spurious warnings here. */ - /* No external references means no one else could have - * confirmed us. + /* Another skb with the same unconfirmed conntrack may + * win the race. This may happen for bridge(br_flood) + * or broadcast/multicast packets do skb_clone with + * unconfirmed conntrack. */ - WARN_ON(nf_ct_is_confirmed(ct)); + if (unlikely(nf_ct_is_confirmed(ct))) { + WARN_ON_ONCE(1); + nf_conntrack_double_unlock(hash, reply_hash); + local_bh_enable(); + return NF_DROP; + } + pr_debug("Confirming conntrack %p\n", ct); /* We have to check the DYING flag after unlink to prevent * a race against nf_ct_get_next_corpse() possibly called from -- cgit v1.2.3-59-g8ed1b From a3419ce3356cf1fdc69a0524eced84cef730b3bf Mon Sep 17 00:00:00 2001 From: Alin Nastac Date: Sat, 16 Feb 2019 10:49:12 +0100 Subject: netfilter: nf_conntrack_sip: add sip_external_media logic When enabled, the sip_external_media logic will leave SDP payload untouched when it detects that interface towards INVITEd party is the same with the one towards media endpoint. The typical scenario for this logic is when a LAN SIP agent has more than one IP address (uses a different address for media streams than the one used on signalling stream) and it also forwards calls to a voice mailbox located on the WAN side. In such case sip_direct_media must be disabled (so normal calls could be handled by the SIP helper), but media streams that are not traversing this router must also be excluded from address translation (e.g. call forwards). Signed-off-by: Alin Nastac Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 42 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'net/netfilter') diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index c8d2b6688a2a..f067c6b50857 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -21,6 +21,8 @@ #include #include +#include +#include #include #include #include @@ -54,6 +56,11 @@ module_param(sip_direct_media, int, 0600); MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling " "endpoints only (default 1)"); +static int sip_external_media __read_mostly = 0; +module_param(sip_external_media, int, 0600); +MODULE_PARM_DESC(sip_external_media, "Expect Media streams between external " + "endpoints (default 0)"); + const struct nf_nat_sip_hooks *nf_nat_sip_hooks; EXPORT_SYMBOL_GPL(nf_nat_sip_hooks); @@ -861,6 +868,41 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, if (!nf_inet_addr_cmp(daddr, &ct->tuplehash[dir].tuple.src.u3)) return NF_ACCEPT; saddr = &ct->tuplehash[!dir].tuple.src.u3; + } else if (sip_external_media) { + struct net_device *dev = skb_dst(skb)->dev; + struct net *net = dev_net(dev); + struct rtable *rt; + struct flowi4 fl4 = {}; +#if IS_ENABLED(CONFIG_IPV6) + struct flowi6 fl6 = {}; +#endif + struct dst_entry *dst = NULL; + + switch (nf_ct_l3num(ct)) { + case NFPROTO_IPV4: + fl4.daddr = daddr->ip; + rt = ip_route_output_key(net, &fl4); + if (!IS_ERR(rt)) + dst = &rt->dst; + break; + +#if IS_ENABLED(CONFIG_IPV6) + case NFPROTO_IPV6: + fl6.daddr = daddr->in6; + dst = ip6_route_output(net, NULL, &fl6); + if (dst->error) { + dst_release(dst); + dst = NULL; + } + break; +#endif + } + + /* Don't predict any conntracks when media endpoint is reachable + * through the same interface as the signalling peer. + */ + if (dst && dst->dev == dev) + return NF_ACCEPT; } /* We need to check whether the registration exists before attempting -- cgit v1.2.3-59-g8ed1b