aboutsummaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/Kconfig16
-rw-r--r--net/netfilter/Makefile6
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c49
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c18
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_ah_esp.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c15
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c23
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c4
-rw-r--r--net/netfilter/nf_conntrack_amanda.c9
-rw-r--r--net/netfilter/nf_conntrack_core.c235
-rw-r--r--net/netfilter/nf_conntrack_expect.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c18
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto.c514
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c134
-rw-r--r--net/netfilter/nf_conntrack_proto_generic.c85
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c196
-rw-r--r--net/netfilter/nf_conntrack_proto_icmp.c67
-rw-r--r--net/netfilter/nf_conntrack_proto_icmpv6.c69
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c128
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c210
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c80
-rw-r--r--net/netfilter/nf_conntrack_sip.c42
-rw-r--r--net/netfilter/nf_conntrack_standalone.c427
-rw-r--r--net/netfilter/nf_flow_table_core.c2
-rw-r--r--net/netfilter/nf_nat_core.c209
-rw-r--r--net/netfilter/nf_nat_helper.c15
-rw-r--r--net/netfilter/nf_nat_masquerade.c365
-rw-r--r--net/netfilter/nf_nat_proto.c744
-rw-r--r--net/netfilter/nf_tables_api.c199
-rw-r--r--net/netfilter/nf_tables_core.c40
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c19
-rw-r--r--net/netfilter/nft_bitwise.c5
-rw-r--r--net/netfilter/nft_byteorder.c6
-rw-r--r--net/netfilter/nft_chain_nat.c108
-rw-r--r--net/netfilter/nft_cmp.c6
-rw-r--r--net/netfilter/nft_compat.c281
-rw-r--r--net/netfilter/nft_counter.c2
-rw-r--r--net/netfilter/nft_ct.c4
-rw-r--r--net/netfilter/nft_dynset.c18
-rw-r--r--net/netfilter/nft_hash.c121
-rw-r--r--net/netfilter/nft_immediate.c6
-rw-r--r--net/netfilter/nft_lookup.c13
-rw-r--r--net/netfilter/nft_masq.c180
-rw-r--r--net/netfilter/nft_meta.c12
-rw-r--r--net/netfilter/nft_nat.c2
-rw-r--r--net/netfilter/nft_objref.c18
-rw-r--r--net/netfilter/nft_payload.c6
-rw-r--r--net/netfilter/nft_quota.c2
-rw-r--r--net/netfilter/nft_range.c5
-rw-r--r--net/netfilter/nft_redir.c154
-rw-r--r--net/netfilter/nft_rt.c6
-rw-r--r--net/netfilter/nft_set_hash.c38
-rw-r--r--net/netfilter/nft_tunnel.c41
-rw-r--r--net/netfilter/utils.c25
-rw-r--r--net/netfilter/x_tables.c4
-rw-r--r--net/netfilter/xt_CT.c2
-rw-r--r--net/netfilter/xt_IDLETIMER.c14
-rw-r--r--net/netfilter/xt_addrtype.c16
-rw-r--r--net/netfilter/xt_nat.c2
-rw-r--r--net/netfilter/xt_physdev.c9
-rw-r--r--net/netfilter/xt_recent.c4
64 files changed, 3017 insertions, 2064 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index beb3a69ce1d4..d43ffb09939b 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -174,7 +174,7 @@ config NF_CT_PROTO_DCCP
If unsure, say Y.
config NF_CT_PROTO_GRE
- tristate
+ bool
config NF_CT_PROTO_SCTP
bool 'SCTP protocol connection tracking support'
@@ -396,7 +396,13 @@ config NETFILTER_NETLINK_GLUE_CT
the enqueued via NFNETLINK.
config NF_NAT
- tristate
+ tristate "Network Address Translation support"
+ depends on NF_CONNTRACK
+ default m if NETFILTER_ADVANCED=n
+ help
+ The NAT option allows masquerading, port forwarding and other
+ forms of full Network Address Port Translation. This can be
+ controlled by iptables, ip6tables or nft.
config NF_NAT_NEEDED
bool
@@ -431,6 +437,9 @@ config NF_NAT_TFTP
config NF_NAT_REDIRECT
bool
+config NF_NAT_MASQUERADE
+ bool
+
config NETFILTER_SYNPROXY
tristate
@@ -523,6 +532,7 @@ config NFT_LIMIT
config NFT_MASQ
depends on NF_CONNTRACK
depends on NF_NAT
+ select NF_NAT_MASQUERADE
tristate "Netfilter nf_tables masquerade support"
help
This option adds the "masquerade" expression that you can use
@@ -532,6 +542,7 @@ config NFT_REDIR
depends on NF_CONNTRACK
depends on NF_NAT
tristate "Netfilter nf_tables redirect support"
+ select NF_NAT_REDIRECT
help
This options adds the "redirect" expression that you can use
to perform NAT in the redirect flavour.
@@ -539,6 +550,7 @@ config NFT_REDIR
config NFT_NAT
depends on NF_CONNTRACK
select NF_NAT
+ depends on NF_TABLES_IPV4 || NF_TABLES_IPV6
tristate "Netfilter nf_tables nat module"
help
This option adds the "nat" expression that you can use to perform
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1ae65a314d7a..4894a85cdd0b 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -13,6 +13,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
+nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
obj-$(CONFIG_NETFILTER) = netfilter.o
@@ -25,8 +26,6 @@ obj-$(CONFIG_NETFILTER_NETLINK_OSF) += nfnetlink_osf.o
# connection tracking
obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o
-obj-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
-
# netlink interface for nf_conntrack
obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
obj-$(CONFIG_NF_CT_NETLINK_TIMEOUT) += nfnetlink_cttimeout.o
@@ -57,6 +56,7 @@ obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
obj-$(CONFIG_NF_NAT) += nf_nat.o
nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
+nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o
# NAT helpers
obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
@@ -110,6 +110,8 @@ obj-$(CONFIG_NFT_OSF) += nft_osf.o
obj-$(CONFIG_NFT_TPROXY) += nft_tproxy.o
obj-$(CONFIG_NFT_XFRM) += nft_xfrm.o
+obj-$(CONFIG_NFT_NAT) += nft_chain_nat.o
+
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 235205c93e14..43bbaa32b1d6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -53,6 +53,7 @@
#endif
#include <net/ip_vs.h>
+#include <linux/indirect_call_wrapper.h>
EXPORT_SYMBOL(register_ip_vs_scheduler);
@@ -70,6 +71,29 @@ EXPORT_SYMBOL(ip_vs_get_debug_level);
#endif
EXPORT_SYMBOL(ip_vs_new_conn_out);
+#ifdef CONFIG_IP_VS_PROTO_TCP
+INDIRECT_CALLABLE_DECLARE(int
+ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph));
+#endif
+
+#ifdef CONFIG_IP_VS_PROTO_UDP
+INDIRECT_CALLABLE_DECLARE(int
+ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, struct ip_vs_iphdr *iph));
+#endif
+
+#if defined(CONFIG_IP_VS_PROTO_TCP) && defined(CONFIG_IP_VS_PROTO_UDP)
+#define SNAT_CALL(f, ...) \
+ INDIRECT_CALL_2(f, tcp_snat_handler, udp_snat_handler, __VA_ARGS__)
+#elif defined(CONFIG_IP_VS_PROTO_TCP)
+#define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, tcp_snat_handler, __VA_ARGS__)
+#elif defined(CONFIG_IP_VS_PROTO_UDP)
+#define SNAT_CALL(f, ...) INDIRECT_CALL_1(f, udp_snat_handler, __VA_ARGS__)
+#else
+#define SNAT_CALL(f, ...) f(__VA_ARGS__)
+#endif
+
static unsigned int ip_vs_net_id __read_mostly;
/* netns cnt used for uniqueness */
static atomic_t ipvs_netns_cnt = ATOMIC_INIT(0);
@@ -478,7 +502,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
*/
if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK)) {
iph->hdr_flags ^= IP_VS_HDR_INVERSE;
- cp = pp->conn_in_get(svc->ipvs, svc->af, skb, iph);
+ cp = INDIRECT_CALL_1(pp->conn_in_get,
+ ip_vs_conn_in_get_proto, svc->ipvs,
+ svc->af, skb, iph);
iph->hdr_flags ^= IP_VS_HDR_INVERSE;
if (cp) {
@@ -972,7 +998,8 @@ static int ip_vs_out_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb,
ip_vs_fill_iph_skb_icmp(AF_INET, skb, offset, true, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(ipvs, AF_INET, skb, &ciph);
+ cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto,
+ ipvs, AF_INET, skb, &ciph);
if (!cp)
return NF_ACCEPT;
@@ -1028,7 +1055,8 @@ static int ip_vs_out_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
return NF_ACCEPT;
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(ipvs, AF_INET6, skb, &ciph);
+ cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto,
+ ipvs, AF_INET6, skb, &ciph);
if (!cp)
return NF_ACCEPT;
@@ -1263,7 +1291,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
goto drop;
/* mangle the packet */
- if (pp->snat_handler && !pp->snat_handler(skb, pp, cp, iph))
+ if (pp->snat_handler &&
+ !SNAT_CALL(pp->snat_handler, skb, pp, cp, iph))
goto drop;
#ifdef CONFIG_IP_VS_IPV6
@@ -1389,7 +1418,8 @@ ip_vs_out(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, in
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(ipvs, af, skb, &iph);
+ cp = INDIRECT_CALL_1(pp->conn_out_get, ip_vs_conn_out_get_proto,
+ ipvs, af, skb, &iph);
if (likely(cp)) {
if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
@@ -1642,7 +1672,8 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related,
/* The embedded headers contain source and dest in reverse order.
* For IPIP this is error for request, not for reply.
*/
- cp = pp->conn_in_get(ipvs, AF_INET, skb, &ciph);
+ cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
+ ipvs, AF_INET, skb, &ciph);
if (!cp) {
int v;
@@ -1794,7 +1825,8 @@ static int ip_vs_in_icmp_v6(struct netns_ipvs *ipvs, struct sk_buff *skb,
/* The embedded headers contain source and dest in reverse order
* if not from localhost
*/
- cp = pp->conn_in_get(ipvs, AF_INET6, skb, &ciph);
+ cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
+ ipvs, AF_INET6, skb, &ciph);
if (!cp) {
int v;
@@ -1923,7 +1955,8 @@ ip_vs_in(struct netns_ipvs *ipvs, unsigned int hooknum, struct sk_buff *skb, int
/*
* Check if the packet belongs to an existing connection entry
*/
- cp = pp->conn_in_get(ipvs, af, skb, &iph);
+ cp = INDIRECT_CALL_1(pp->conn_in_get, ip_vs_conn_in_get_proto,
+ ipvs, af, skb, &iph);
conn_reuse_mode = sysctl_conn_reuse_mode(ipvs);
if (conn_reuse_mode && !iph.fragoffs && is_new_conn(skb, &iph) && cp) {
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ac8d848d7624..053cd96b9c76 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2745,8 +2745,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
int size;
get = (struct ip_vs_get_services *)arg;
- size = sizeof(*get) +
- sizeof(struct ip_vs_service_entry) * get->num_services;
+ size = struct_size(get, entrytable, get->num_services);
if (*len != size) {
pr_err("length: %u != %u\n", *len, size);
ret = -EINVAL;
@@ -2787,8 +2786,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
int size;
get = (struct ip_vs_get_dests *)arg;
- size = sizeof(*get) +
- sizeof(struct ip_vs_dest_entry) * get->num_dests;
+ size = struct_size(get, entrytable, get->num_dests);
if (*len != size) {
pr_err("length: %u != %u\n", *len, size);
ret = -EINVAL;
@@ -3088,7 +3086,7 @@ static bool ip_vs_is_af_valid(int af)
static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs,
struct ip_vs_service_user_kern *usvc,
- struct nlattr *nla, int full_entry,
+ struct nlattr *nla, bool full_entry,
struct ip_vs_service **ret_svc)
{
struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
@@ -3175,7 +3173,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs,
struct ip_vs_service *svc;
int ret;
- ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc);
+ ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc);
return ret ? ERR_PTR(ret) : svc;
}
@@ -3285,7 +3283,7 @@ out_err:
}
static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
- struct nlattr *nla, int full_entry)
+ struct nlattr *nla, bool full_entry)
{
struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
struct nlattr *nla_addr, *nla_port;
@@ -3547,11 +3545,11 @@ out:
static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
{
+ bool need_full_svc = false, need_full_dest = false;
struct ip_vs_service *svc = NULL;
struct ip_vs_service_user_kern usvc;
struct ip_vs_dest_user_kern udest;
int ret = 0, cmd;
- int need_full_svc = 0, need_full_dest = 0;
struct net *net = sock_net(skb->sk);
struct netns_ipvs *ipvs = net_ipvs(net);
@@ -3575,7 +3573,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
* received a valid one. We need a full service specification when
* adding / editing a service. Only identifying members otherwise. */
if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
- need_full_svc = 1;
+ need_full_svc = true;
ret = ip_vs_genl_parse_service(ipvs, &usvc,
info->attrs[IPVS_CMD_ATTR_SERVICE],
@@ -3595,7 +3593,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
cmd == IPVS_CMD_DEL_DEST) {
if (cmd != IPVS_CMD_DEL_DEST)
- need_full_dest = 1;
+ need_full_dest = true;
ret = ip_vs_genl_parse_dest(&udest,
info->attrs[IPVS_CMD_ATTR_DEST],
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 4398a72edec5..fe69d46ff779 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -124,7 +124,7 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
}
s = data + plen;
if (skip) {
- int found = 0;
+ bool found = false;
for (;; s++) {
if (s == data_limit)
@@ -136,7 +136,7 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
if (!ext && isdigit(*s))
break;
if (*s == skip)
- found = 1;
+ found = true;
} else if (*s != skip) {
break;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 5320d39976e1..480598cb0f05 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -129,7 +129,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
.conn_out_get = ah_esp_conn_out_get,
.snat_handler = NULL,
.dnat_handler = NULL,
- .csum_check = NULL,
.state_transition = NULL,
.register_app = NULL,
.unregister_app = NULL,
@@ -152,7 +151,6 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
.conn_out_get = ah_esp_conn_out_get,
.snat_handler = NULL,
.dnat_handler = NULL,
- .csum_check = NULL,
.state_transition = NULL,
.register_app = NULL,
.unregister_app = NULL,
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index b0cd7d08f2a7..b58ddb7dffd1 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -10,6 +10,9 @@
#include <net/ip_vs.h>
static int
+sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
+static int
sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
@@ -105,7 +108,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
int ret;
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ if (!sctp_csum_check(cp->af, skb, pp))
return 0;
/* Call application helper if needed */
@@ -152,7 +155,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
int ret;
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ if (!sctp_csum_check(cp->af, skb, pp))
return 0;
/* Call application helper if needed */
@@ -183,7 +186,7 @@ static int
sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
{
unsigned int sctphoff;
- struct sctphdr *sh, _sctph;
+ struct sctphdr *sh;
__le32 cmp, val;
#ifdef CONFIG_IP_VS_IPV6
@@ -193,10 +196,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
#endif
sctphoff = ip_hdrlen(skb);
- sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
- if (sh == NULL)
- return 0;
-
+ sh = (struct sctphdr *)(skb->data + sctphoff);
cmp = sh->checksum;
val = sctp_compute_cksum(skb, sctphoff);
@@ -587,7 +587,6 @@ struct ip_vs_protocol ip_vs_protocol_sctp = {
.conn_out_get = ip_vs_conn_out_get_proto,
.snat_handler = sctp_snat_handler,
.dnat_handler = sctp_dnat_handler,
- .csum_check = sctp_csum_check,
.state_name = sctp_state_name,
.state_transition = sctp_state_transition,
.app_conn_bind = sctp_app_conn_bind,
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 1770fc6ce960..00ce07dda980 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -28,10 +28,14 @@
#include <net/ip6_checksum.h>
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/ip_vs.h>
static int
+tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
+static int
tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
@@ -143,14 +147,14 @@ tcp_partial_csum_update(int af, struct tcphdr *tcph,
}
-static int
+INDIRECT_CALLABLE_SCOPE int
tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
struct tcphdr *tcph;
unsigned int tcphoff = iph->len;
+ bool payload_csum = false;
int oldlen;
- int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6 && iph->fragoffs)
@@ -166,7 +170,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
int ret;
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ if (!tcp_csum_check(cp->af, skb, pp))
return 0;
/* Call application helper if needed */
@@ -176,7 +180,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
if (ret == 1)
oldlen = skb->len - tcphoff;
else
- payload_csum = 1;
+ payload_csum = true;
}
tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -192,7 +196,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
cp->dport, cp->vport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = (cp->app && pp->csum_check) ?
+ skb->ip_summed = cp->app ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -227,8 +231,8 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
{
struct tcphdr *tcph;
unsigned int tcphoff = iph->len;
+ bool payload_csum = false;
int oldlen;
- int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6 && iph->fragoffs)
@@ -244,7 +248,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
int ret;
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ if (!tcp_csum_check(cp->af, skb, pp))
return 0;
/*
@@ -257,7 +261,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
if (ret == 1)
oldlen = skb->len - tcphoff;
else
- payload_csum = 1;
+ payload_csum = true;
}
tcph = (void *)skb_network_header(skb) + tcphoff;
@@ -275,7 +279,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
cp->vport, cp->dport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = (cp->app && pp->csum_check) ?
+ skb->ip_summed = cp->app ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -736,7 +740,6 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
.conn_out_get = ip_vs_conn_out_get_proto,
.snat_handler = tcp_snat_handler,
.dnat_handler = tcp_dnat_handler,
- .csum_check = tcp_csum_check,
.state_name = tcp_state_name,
.state_transition = tcp_state_transition,
.app_conn_bind = tcp_app_conn_bind,
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 0f53c49025f8..92c078abcb3e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -23,12 +23,16 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4.h>
#include <linux/udp.h>
+#include <linux/indirect_call_wrapper.h>
#include <net/ip_vs.h>
#include <net/ip.h>
#include <net/ip6_checksum.h>
static int
+udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp);
+
+static int
udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp,
@@ -133,14 +137,14 @@ udp_partial_csum_update(int af, struct udphdr *uhdr,
}
-static int
+INDIRECT_CALLABLE_SCOPE int
udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
{
struct udphdr *udph;
unsigned int udphoff = iph->len;
+ bool payload_csum = false;
int oldlen;
- int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6 && iph->fragoffs)
@@ -156,7 +160,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
int ret;
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ if (!udp_csum_check(cp->af, skb, pp))
return 0;
/*
@@ -168,7 +172,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
if (ret == 1)
oldlen = skb->len - udphoff;
else
- payload_csum = 1;
+ payload_csum = true;
}
udph = (void *)skb_network_header(skb) + udphoff;
@@ -186,7 +190,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr,
cp->dport, cp->vport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = (cp->app && pp->csum_check) ?
+ skb->ip_summed = cp->app ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -222,8 +226,8 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
{
struct udphdr *udph;
unsigned int udphoff = iph->len;
+ bool payload_csum = false;
int oldlen;
- int payload_csum = 0;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6 && iph->fragoffs)
@@ -239,7 +243,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
int ret;
/* Some checks before mangling */
- if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
+ if (!udp_csum_check(cp->af, skb, pp))
return 0;
/*
@@ -252,7 +256,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
if (ret == 1)
oldlen = skb->len - udphoff;
else
- payload_csum = 1;
+ payload_csum = true;
}
udph = (void *)skb_network_header(skb) + udphoff;
@@ -270,7 +274,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr,
cp->vport, cp->dport);
if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = (cp->app && pp->csum_check) ?
+ skb->ip_summed = cp->app ?
CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
} else {
/* full checksum calculation */
@@ -494,7 +498,6 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
.conn_out_get = ip_vs_conn_out_get_proto,
.snat_handler = udp_snat_handler,
.dnat_handler = udp_dnat_handler,
- .csum_check = udp_csum_check,
.state_transition = udp_state_transition,
.state_name = udp_state_name,
.register_app = udp_register_app,
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 473cce2a5231..175349fcf91f 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -126,7 +126,7 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
{
struct flowi4 fl4;
struct rtable *rt;
- int loop = 0;
+ bool loop = false;
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
@@ -149,7 +149,7 @@ retry:
ip_rt_put(rt);
*saddr = fl4.saddr;
flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
- loop++;
+ loop = true;
goto retry;
}
*saddr = fl4.saddr;
diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c
index 20edd589fe06..f2681ec5b5f6 100644
--- a/net/netfilter/nf_conntrack_amanda.c
+++ b/net/netfilter/nf_conntrack_amanda.c
@@ -54,6 +54,7 @@ enum amanda_strings {
SEARCH_DATA,
SEARCH_MESG,
SEARCH_INDEX,
+ SEARCH_STATE,
};
static struct {
@@ -81,6 +82,10 @@ static struct {
.string = "INDEX ",
.len = 6,
},
+ [SEARCH_STATE] = {
+ .string = "STATE ",
+ .len = 6,
+ },
};
static int amanda_help(struct sk_buff *skb,
@@ -124,7 +129,7 @@ static int amanda_help(struct sk_buff *skb,
goto out;
stop += start;
- for (i = SEARCH_DATA; i <= SEARCH_INDEX; i++) {
+ for (i = SEARCH_DATA; i <= SEARCH_STATE; i++) {
off = skb_find_text(skb, start, stop, search[i].ts);
if (off == UINT_MAX)
continue;
@@ -168,7 +173,7 @@ out:
}
static const struct nf_conntrack_expect_policy amanda_exp_policy = {
- .max_expected = 3,
+ .max_expected = 4,
.timeout = 180,
};
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index db4d46332e86..82bfbeef46af 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -51,7 +51,6 @@
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netns/hash.h>
#include <net/ip.h>
@@ -222,6 +221,24 @@ static u32 hash_conntrack(const struct net *net,
return scale_hash(hash_conntrack_raw(tuple, net));
}
+static bool nf_ct_get_tuple_ports(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
+{ struct {
+ __be16 sport;
+ __be16 dport;
+ } _inet_hdr, *inet_hdr;
+
+ /* Actually only need first 4 bytes to get ports. */
+ inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
+ if (!inet_hdr)
+ return false;
+
+ tuple->src.u.udp.port = inet_hdr->sport;
+ tuple->dst.u.udp.port = inet_hdr->dport;
+ return true;
+}
+
static bool
nf_ct_get_tuple(const struct sk_buff *skb,
unsigned int nhoff,
@@ -229,16 +246,11 @@ nf_ct_get_tuple(const struct sk_buff *skb,
u_int16_t l3num,
u_int8_t protonum,
struct net *net,
- struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_l4proto *l4proto)
+ struct nf_conntrack_tuple *tuple)
{
unsigned int size;
const __be32 *ap;
__be32 _addrs[8];
- struct {
- __be16 sport;
- __be16 dport;
- } _inet_hdr, *inet_hdr;
memset(tuple, 0, sizeof(*tuple));
@@ -274,16 +286,36 @@ nf_ct_get_tuple(const struct sk_buff *skb,
tuple->dst.protonum = protonum;
tuple->dst.dir = IP_CT_DIR_ORIGINAL;
- if (unlikely(l4proto->pkt_to_tuple))
- return l4proto->pkt_to_tuple(skb, dataoff, net, tuple);
-
- /* Actually only need first 4 bytes to get ports. */
- inet_hdr = skb_header_pointer(skb, dataoff, sizeof(_inet_hdr), &_inet_hdr);
- if (!inet_hdr)
- return false;
+ switch (protonum) {
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPPROTO_ICMPV6:
+ return icmpv6_pkt_to_tuple(skb, dataoff, net, tuple);
+#endif
+ case IPPROTO_ICMP:
+ return icmp_pkt_to_tuple(skb, dataoff, net, tuple);
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE:
+ return gre_pkt_to_tuple(skb, dataoff, net, tuple);
+#endif
+ case IPPROTO_TCP:
+ case IPPROTO_UDP: /* fallthrough */
+ return nf_ct_get_tuple_ports(skb, dataoff, tuple);
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+ case IPPROTO_UDPLITE:
+ return nf_ct_get_tuple_ports(skb, dataoff, tuple);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ case IPPROTO_SCTP:
+ return nf_ct_get_tuple_ports(skb, dataoff, tuple);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ case IPPROTO_DCCP:
+ return nf_ct_get_tuple_ports(skb, dataoff, tuple);
+#endif
+ default:
+ break;
+ }
- tuple->src.u.udp.port = inet_hdr->sport;
- tuple->dst.u.udp.port = inet_hdr->dport;
return true;
}
@@ -366,33 +398,20 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
u_int16_t l3num,
struct net *net, struct nf_conntrack_tuple *tuple)
{
- const struct nf_conntrack_l4proto *l4proto;
u8 protonum;
int protoff;
- int ret;
-
- rcu_read_lock();
protoff = get_l4proto(skb, nhoff, l3num, &protonum);
- if (protoff <= 0) {
- rcu_read_unlock();
+ if (protoff <= 0)
return false;
- }
- l4proto = __nf_ct_l4proto_find(protonum);
-
- ret = nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple,
- l4proto);
-
- rcu_read_unlock();
- return ret;
+ return nf_ct_get_tuple(skb, nhoff, protoff, l3num, protonum, net, tuple);
}
EXPORT_SYMBOL_GPL(nf_ct_get_tuplepr);
bool
nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
- const struct nf_conntrack_tuple *orig,
- const struct nf_conntrack_l4proto *l4proto)
+ const struct nf_conntrack_tuple *orig)
{
memset(inverse, 0, sizeof(*inverse));
@@ -415,8 +434,14 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
inverse->dst.protonum = orig->dst.protonum;
- if (unlikely(l4proto->invert_tuple))
- return l4proto->invert_tuple(inverse, orig);
+ switch (orig->dst.protonum) {
+ case IPPROTO_ICMP:
+ return nf_conntrack_invert_icmp_tuple(inverse, orig);
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPPROTO_ICMPV6:
+ return nf_conntrack_invert_icmpv6_tuple(inverse, orig);
+#endif
+ }
inverse->src.u.all = orig->dst.u.all;
inverse->dst.u.all = orig->src.u.all;
@@ -526,11 +551,20 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl)
}
EXPORT_SYMBOL_GPL(nf_ct_tmpl_free);
+static void destroy_gre_conntrack(struct nf_conn *ct)
+{
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ struct nf_conn *master = ct->master;
+
+ if (master)
+ nf_ct_gre_keymap_destroy(master);
+#endif
+}
+
static void
destroy_conntrack(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- const struct nf_conntrack_l4proto *l4proto;
pr_debug("destroy_conntrack(%p)\n", ct);
WARN_ON(atomic_read(&nfct->use) != 0);
@@ -539,9 +573,9 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_ct_tmpl_free(ct);
return;
}
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
- if (l4proto->destroy)
- l4proto->destroy(ct);
+
+ if (unlikely(nf_ct_protonum(ct) == IPPROTO_GRE))
+ destroy_gre_conntrack(ct);
local_bh_disable();
/* Expectations will have been removed in clean_from_lists,
@@ -840,7 +874,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
enum ip_conntrack_info oldinfo;
struct nf_conn *loser_ct = nf_ct_get(skb, &oldinfo);
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
+ l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->allow_clash &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
@@ -901,10 +935,18 @@ __nf_conntrack_confirm(struct sk_buff *skb)
* REJECT will give spurious warnings here.
*/
- /* No external references means no one else could have
- * confirmed us.
+ /* Another skb with the same unconfirmed conntrack may
+ * win the race. This may happen for bridge(br_flood)
+ * or broadcast/multicast packets do skb_clone with
+ * unconfirmed conntrack.
*/
- WARN_ON(nf_ct_is_confirmed(ct));
+ if (unlikely(nf_ct_is_confirmed(ct))) {
+ WARN_ON_ONCE(1);
+ nf_conntrack_double_unlock(hash, reply_hash);
+ local_bh_enable();
+ return NF_DROP;
+ }
+
pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
@@ -1128,7 +1170,7 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
+ l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
return true;
@@ -1358,7 +1400,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free);
static noinline struct nf_conntrack_tuple_hash *
init_conntrack(struct net *net, struct nf_conn *tmpl,
const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_l4proto *l4proto,
struct sk_buff *skb,
unsigned int dataoff, u32 hash)
{
@@ -1371,7 +1412,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conn_timeout *timeout_ext;
struct nf_conntrack_zone tmp;
- if (!nf_ct_invert_tuple(&repl_tuple, tuple, l4proto)) {
+ if (!nf_ct_invert_tuple(&repl_tuple, tuple)) {
pr_debug("Can't invert tuple.\n");
return NULL;
}
@@ -1453,7 +1494,6 @@ resolve_normal_ct(struct nf_conn *tmpl,
struct sk_buff *skb,
unsigned int dataoff,
u_int8_t protonum,
- const struct nf_conntrack_l4proto *l4proto,
const struct nf_hook_state *state)
{
const struct nf_conntrack_zone *zone;
@@ -1466,7 +1506,7 @@ resolve_normal_ct(struct nf_conn *tmpl,
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, state->pf, protonum, state->net,
- &tuple, l4proto)) {
+ &tuple)) {
pr_debug("Can't get tuple\n");
return 0;
}
@@ -1476,7 +1516,7 @@ resolve_normal_ct(struct nf_conn *tmpl,
hash = hash_conntrack_raw(&tuple, state->net);
h = __nf_conntrack_find_get(state->net, zone, &tuple, hash);
if (!h) {
- h = init_conntrack(state->net, tmpl, &tuple, l4proto,
+ h = init_conntrack(state->net, tmpl, &tuple,
skb, dataoff, hash);
if (!h)
return 0;
@@ -1538,10 +1578,66 @@ nf_conntrack_handle_icmp(struct nf_conn *tmpl,
return ret;
}
+static int generic_packet(struct nf_conn *ct, struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo)
+{
+ const unsigned int *timeout = nf_ct_timeout_lookup(ct);
+
+ if (!timeout)
+ timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout;
+
+ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
+ return NF_ACCEPT;
+}
+
+/* Returns verdict for packet, or -1 for invalid. */
+static int nf_conntrack_handle_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
+{
+ switch (nf_ct_protonum(ct)) {
+ case IPPROTO_TCP:
+ return nf_conntrack_tcp_packet(ct, skb, dataoff,
+ ctinfo, state);
+ case IPPROTO_UDP:
+ return nf_conntrack_udp_packet(ct, skb, dataoff,
+ ctinfo, state);
+ case IPPROTO_ICMP:
+ return nf_conntrack_icmp_packet(ct, skb, ctinfo, state);
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPPROTO_ICMPV6:
+ return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+ case IPPROTO_UDPLITE:
+ return nf_conntrack_udplite_packet(ct, skb, dataoff,
+ ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ case IPPROTO_SCTP:
+ return nf_conntrack_sctp_packet(ct, skb, dataoff,
+ ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ case IPPROTO_DCCP:
+ return nf_conntrack_dccp_packet(ct, skb, dataoff,
+ ctinfo, state);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE:
+ return nf_conntrack_gre_packet(ct, skb, dataoff,
+ ctinfo, state);
+#endif
+ }
+
+ return generic_packet(ct, skb, ctinfo);
+}
+
unsigned int
nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
{
- const struct nf_conntrack_l4proto *l4proto;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct, *tmpl;
u_int8_t protonum;
@@ -1568,8 +1664,6 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
goto out;
}
- l4proto = __nf_ct_l4proto_find(protonum);
-
if (protonum == IPPROTO_ICMP || protonum == IPPROTO_ICMPV6) {
ret = nf_conntrack_handle_icmp(tmpl, skb, dataoff,
protonum, state);
@@ -1583,7 +1677,7 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
}
repeat:
ret = resolve_normal_ct(tmpl, skb, dataoff,
- protonum, l4proto, state);
+ protonum, state);
if (ret < 0) {
/* Too stressed to deal. */
NF_CT_STAT_INC_ATOMIC(state->net, drop);
@@ -1599,7 +1693,7 @@ repeat:
goto out;
}
- ret = l4proto->packet(ct, skb, dataoff, ctinfo, state);
+ ret = nf_conntrack_handle_packet(ct, skb, dataoff, ctinfo, state);
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
@@ -1630,19 +1724,6 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_in);
-bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
- const struct nf_conntrack_tuple *orig)
-{
- bool ret;
-
- rcu_read_lock();
- ret = nf_ct_invert_tuple(inverse, orig,
- __nf_ct_l4proto_find(orig->dst.protonum));
- rcu_read_unlock();
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_invert_tuplepr);
-
/* Alter reply tuple (maybe alter helper). This is for NAT, and is
implicitly racy: see __nf_conntrack_confirm */
void nf_conntrack_alter_reply(struct nf_conn *ct,
@@ -1670,11 +1751,9 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply);
void __nf_ct_refresh_acct(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct sk_buff *skb,
- unsigned long extra_jiffies,
- int do_acct)
+ u32 extra_jiffies,
+ bool do_acct)
{
- WARN_ON(!skb);
-
/* Only update if this is not a fixed timeout */
if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
goto acct;
@@ -1683,7 +1762,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
if (nf_ct_is_confirmed(ct))
extra_jiffies += nfct_time_stamp;
- ct->timeout = extra_jiffies;
+ if (ct->timeout != extra_jiffies)
+ ct->timeout = extra_jiffies;
acct:
if (do_acct)
nf_ct_acct_update(ct, ctinfo, skb->len);
@@ -1773,7 +1853,6 @@ static void nf_conntrack_attach(struct sk_buff *nskb, const struct sk_buff *skb)
static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
{
- const struct nf_conntrack_l4proto *l4proto;
struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
@@ -1794,10 +1873,8 @@ static int nf_conntrack_update(struct net *net, struct sk_buff *skb)
if (dataoff <= 0)
return -1;
- l4proto = nf_ct_l4proto_find_get(l4num);
-
if (!nf_ct_get_tuple(skb, skb_network_offset(skb), dataoff, l3num,
- l4num, net, &tuple, l4proto))
+ l4num, net, &tuple))
return -1;
if (ct->status & IPS_SRC_NAT) {
@@ -2403,6 +2480,7 @@ int nf_conntrack_init_net(struct net *net)
int cpu;
BUILD_BUG_ON(IP_CT_UNTRACKED == IP_CT_NUMBER);
+ BUILD_BUG_ON_NOT_POWER_OF_2(CONNTRACK_LOCKS);
atomic_set(&net->ct.count, 0);
net->ct.pcpu_lists = alloc_percpu(struct ct_pcpu);
@@ -2429,15 +2507,10 @@ int nf_conntrack_init_net(struct net *net)
nf_conntrack_tstamp_pernet_init(net);
nf_conntrack_ecache_pernet_init(net);
nf_conntrack_helper_pernet_init(net);
+ nf_conntrack_proto_pernet_init(net);
- ret = nf_conntrack_proto_pernet_init(net);
- if (ret < 0)
- goto err_proto;
return 0;
-err_proto:
- nf_conntrack_ecache_pernet_fini(net);
- nf_conntrack_expect_pernet_fini(net);
err_expect:
free_percpu(net->ct.stat);
err_pcpu_lists:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 3034038bfdf0..334d6e5b7762 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -610,7 +610,7 @@ static int exp_seq_show(struct seq_file *s, void *v)
expect->tuple.src.l3num,
expect->tuple.dst.protonum);
print_tuple(s, &expect->tuple,
- __nf_ct_l4proto_find(expect->tuple.dst.protonum));
+ nf_ct_l4proto_find(expect->tuple.dst.protonum));
if (expect->flags & NF_CT_EXPECT_PERMANENT) {
seq_puts(s, "PERMANENT");
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1213beb5a714..66c596d287a5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -46,7 +46,7 @@
#include <net/netfilter/nf_conntrack_labels.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
#ifdef CONFIG_NF_NAT_NEEDED
-#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
#endif
@@ -134,7 +134,7 @@ static int ctnetlink_dump_tuples(struct sk_buff *skb,
ret = ctnetlink_dump_tuples_ip(skb, tuple);
if (ret >= 0) {
- l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
+ l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
ret = ctnetlink_dump_tuples_proto(skb, tuple, l4proto);
}
rcu_read_unlock();
@@ -182,7 +182,7 @@ static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct)
struct nlattr *nest_proto;
int ret;
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
+ l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
if (!l4proto->to_nlattr)
return 0;
@@ -590,7 +590,7 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
len = nla_policy_len(cta_ip_nla_policy, CTA_IP_MAX + 1);
len *= 3u; /* ORIG, REPLY, MASTER */
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
+ l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
len += l4proto->nlattr_size;
if (l4proto->nlattr_tuple_size) {
len4 = l4proto->nlattr_tuple_size();
@@ -1059,7 +1059,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
rcu_read_lock();
- l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
+ l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
if (likely(l4proto->nlattr_to_tuple)) {
ret = nla_validate_nested(attr, CTA_PROTO_MAX,
@@ -1722,11 +1722,9 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct,
if (err < 0)
return err;
- rcu_read_lock();
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
+ l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
if (l4proto->from_nlattr)
err = l4proto->from_nlattr(tb, ct);
- rcu_read_unlock();
return err;
}
@@ -2676,8 +2674,8 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
rcu_read_lock();
ret = ctnetlink_dump_tuples_ip(skb, &m);
if (ret >= 0) {
- l4proto = __nf_ct_l4proto_find(tuple->dst.protonum);
- ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
+ l4proto = nf_ct_l4proto_find(tuple->dst.protonum);
+ ret = ctnetlink_dump_tuples_proto(skb, &m, l4proto);
}
rcu_read_unlock();
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 11562f2a08bb..976f1dcb97f0 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -121,7 +121,7 @@ static void pptp_expectfn(struct nf_conn *ct,
struct nf_conntrack_expect *exp_other;
/* obviously this tuple inversion only works until you do NAT */
- nf_ct_invert_tuplepr(&inv_t, &exp->tuple);
+ nf_ct_invert_tuple(&inv_t, &exp->tuple);
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple(&inv_t);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 859f5d07a915..b9403a266a2e 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -43,40 +43,9 @@
extern unsigned int nf_conntrack_net_id;
-static struct nf_conntrack_l4proto __rcu *nf_ct_protos[MAX_NF_CT_PROTO + 1] __read_mostly;
-
static DEFINE_MUTEX(nf_ct_proto_mutex);
#ifdef CONFIG_SYSCTL
-static int
-nf_ct_register_sysctl(struct net *net,
- struct ctl_table_header **header,
- const char *path,
- struct ctl_table *table)
-{
- if (*header == NULL) {
- *header = register_net_sysctl(net, path, table);
- if (*header == NULL)
- return -ENOMEM;
- }
-
- return 0;
-}
-
-static void
-nf_ct_unregister_sysctl(struct ctl_table_header **header,
- struct ctl_table **table,
- unsigned int users)
-{
- if (users > 0)
- return;
-
- unregister_net_sysctl_table(*header);
- kfree(*table);
- *header = NULL;
- *table = NULL;
-}
-
__printf(5, 6)
void nf_l4proto_log_invalid(const struct sk_buff *skb,
struct net *net,
@@ -124,295 +93,82 @@ void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
#endif
-const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto)
-{
- if (unlikely(l4proto >= ARRAY_SIZE(nf_ct_protos)))
- return &nf_conntrack_l4proto_generic;
-
- return rcu_dereference(nf_ct_protos[l4proto]);
-}
-EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
-
-const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4num)
-{
- const struct nf_conntrack_l4proto *p;
-
- rcu_read_lock();
- p = __nf_ct_l4proto_find(l4num);
- if (!try_module_get(p->me))
- p = &nf_conntrack_l4proto_generic;
- rcu_read_unlock();
-
- return p;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
-
-void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
-{
- module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
-
-static int kill_l4proto(struct nf_conn *i, void *data)
-{
- const struct nf_conntrack_l4proto *l4proto;
- l4proto = data;
- return nf_ct_protonum(i) == l4proto->l4proto;
-}
-
-static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
- const struct nf_conntrack_l4proto *l4proto)
-{
- if (l4proto->get_net_proto) {
- /* statically built-in protocols use static per-net */
- return l4proto->get_net_proto(net);
- } else if (l4proto->net_id) {
- /* ... and loadable protocols use dynamic per-net */
- return net_generic(net, *l4proto->net_id);
- }
- return NULL;
-}
-
-static
-int nf_ct_l4proto_register_sysctl(struct net *net,
- struct nf_proto_net *pn)
-{
- int err = 0;
-
-#ifdef CONFIG_SYSCTL
- if (pn->ctl_table != NULL) {
- err = nf_ct_register_sysctl(net,
- &pn->ctl_table_header,
- "net/netfilter",
- pn->ctl_table);
- if (err < 0) {
- if (!pn->users) {
- kfree(pn->ctl_table);
- pn->ctl_table = NULL;
- }
- }
- }
-#endif /* CONFIG_SYSCTL */
- return err;
-}
-
-static
-void nf_ct_l4proto_unregister_sysctl(struct nf_proto_net *pn)
-{
-#ifdef CONFIG_SYSCTL
- if (pn->ctl_table_header != NULL)
- nf_ct_unregister_sysctl(&pn->ctl_table_header,
- &pn->ctl_table,
- pn->users);
-#endif /* CONFIG_SYSCTL */
-}
-
-/* FIXME: Allow NULL functions and sub in pointers to generic for
- them. --RR */
-int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
-{
- int ret = 0;
-
- if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
- (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
- return -EINVAL;
-
- mutex_lock(&nf_ct_proto_mutex);
- if (rcu_dereference_protected(
- nf_ct_protos[l4proto->l4proto],
- lockdep_is_held(&nf_ct_proto_mutex)
- ) != &nf_conntrack_l4proto_generic) {
- ret = -EBUSY;
- goto out_unlock;
- }
-
- rcu_assign_pointer(nf_ct_protos[l4proto->l4proto], l4proto);
-out_unlock:
- mutex_unlock(&nf_ct_proto_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
-
-int nf_ct_l4proto_pernet_register_one(struct net *net,
- const struct nf_conntrack_l4proto *l4proto)
-{
- int ret = 0;
- struct nf_proto_net *pn = NULL;
-
- if (l4proto->init_net) {
- ret = l4proto->init_net(net);
- if (ret < 0)
- goto out;
- }
-
- pn = nf_ct_l4proto_net(net, l4proto);
- if (pn == NULL)
- goto out;
-
- ret = nf_ct_l4proto_register_sysctl(net, pn);
- if (ret < 0)
- goto out;
-
- pn->users++;
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
-
-static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
-
-{
- BUG_ON(l4proto->l4proto >= ARRAY_SIZE(nf_ct_protos));
-
- BUG_ON(rcu_dereference_protected(
- nf_ct_protos[l4proto->l4proto],
- lockdep_is_held(&nf_ct_proto_mutex)
- ) != l4proto);
- rcu_assign_pointer(nf_ct_protos[l4proto->l4proto],
- &nf_conntrack_l4proto_generic);
-}
-
-void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
-{
- mutex_lock(&nf_ct_proto_mutex);
- __nf_ct_l4proto_unregister_one(l4proto);
- mutex_unlock(&nf_ct_proto_mutex);
-
- synchronize_net();
- /* Remove all contrack entries for this protocol */
- nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
-
-void nf_ct_l4proto_pernet_unregister_one(struct net *net,
- const struct nf_conntrack_l4proto *l4proto)
-{
- struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
-
- if (pn == NULL)
- return;
-
- pn->users--;
- nf_ct_l4proto_unregister_sysctl(pn);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
-
-static void
-nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
- unsigned int num_proto)
-{
- int i;
-
- mutex_lock(&nf_ct_proto_mutex);
- for (i = 0; i < num_proto; i++)
- __nf_ct_l4proto_unregister_one(l4proto[i]);
- mutex_unlock(&nf_ct_proto_mutex);
-
- synchronize_net();
-
- for (i = 0; i < num_proto; i++)
- nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]);
-}
-
-static int
-nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
- unsigned int num_proto)
-{
- int ret = -EINVAL;
- unsigned int i;
-
- for (i = 0; i < num_proto; i++) {
- ret = nf_ct_l4proto_register_one(l4proto[i]);
- if (ret < 0)
- break;
- }
- if (i != num_proto) {
- pr_err("nf_conntrack: can't register l4 %d proto.\n",
- l4proto[i]->l4proto);
- nf_ct_l4proto_unregister(l4proto, i);
- }
- return ret;
-}
-
-int nf_ct_l4proto_pernet_register(struct net *net,
- const struct nf_conntrack_l4proto *const l4proto[],
- unsigned int num_proto)
+const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto)
{
- int ret = -EINVAL;
- unsigned int i;
-
- for (i = 0; i < num_proto; i++) {
- ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]);
- if (ret < 0)
- break;
- }
- if (i != num_proto) {
- pr_err("nf_conntrack %d: pernet registration failed\n",
- l4proto[i]->l4proto);
- nf_ct_l4proto_pernet_unregister(net, l4proto, i);
+ switch (l4proto) {
+ case IPPROTO_UDP: return &nf_conntrack_l4proto_udp;
+ case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp;
+ case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp;
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+ case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite;
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ case IPPROTO_GRE: return &nf_conntrack_l4proto_gre;
+#endif
+#if IS_ENABLED(CONFIG_IPV6)
+ case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6;
+#endif /* CONFIG_IPV6 */
}
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
-void nf_ct_l4proto_pernet_unregister(struct net *net,
- const struct nf_conntrack_l4proto *const l4proto[],
- unsigned int num_proto)
-{
- while (num_proto-- != 0)
- nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
-}
-EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
+ return &nf_conntrack_l4proto_generic;
+};
+EXPORT_SYMBOL_GPL(nf_ct_l4proto_find);
-static unsigned int ipv4_helper(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
+static unsigned int nf_confirm(struct sk_buff *skb,
+ unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo)
{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
const struct nf_conn_help *help;
- const struct nf_conntrack_helper *helper;
-
- /* This is where we call the helper: as the packet goes out. */
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct || ctinfo == IP_CT_RELATED_REPLY)
- return NF_ACCEPT;
help = nfct_help(ct);
- if (!help)
- return NF_ACCEPT;
+ if (help) {
+ const struct nf_conntrack_helper *helper;
+ int ret;
+
+ /* rcu_read_lock()ed by nf_hook_thresh */
+ helper = rcu_dereference(help->helper);
+ if (helper) {
+ ret = helper->help(skb,
+ protoff,
+ ct, ctinfo);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ }
- /* rcu_read_lock()ed by nf_hook_thresh */
- helper = rcu_dereference(help->helper);
- if (!helper)
- return NF_ACCEPT;
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
+ NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+ return NF_DROP;
+ }
+ }
- return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
- ct, ctinfo);
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(skb);
}
static unsigned int ipv4_confirm(void *priv,
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
- goto out;
+ return nf_conntrack_confirm(skb);
- /* adjust seqs for loopback traffic only in outgoing direction */
- if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
- !nf_is_loopback_packet(skb)) {
- if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
- NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
- return NF_DROP;
- }
- }
-out:
- /* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(skb);
+ return nf_confirm(skb,
+ skb_network_offset(skb) + ip_hdrlen(skb),
+ ct, ctinfo);
}
static unsigned int ipv4_conntrack_in(void *priv,
@@ -461,24 +217,12 @@ static const struct nf_hook_ops ipv4_conntrack_ops[] = {
.priority = NF_IP_PRI_CONNTRACK,
},
{
- .hook = ipv4_helper,
- .pf = NFPROTO_IPV4,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
- },
- {
.hook = ipv4_confirm,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
- .hook = ipv4_helper,
- .pf = NFPROTO_IPV4,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
- },
- {
.hook = ipv4_confirm,
.pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
@@ -623,31 +367,21 @@ static unsigned int ipv6_confirm(void *priv,
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned char pnum = ipv6_hdr(skb)->nexthdr;
- int protoff;
__be16 frag_off;
+ int protoff;
ct = nf_ct_get(skb, &ctinfo);
if (!ct || ctinfo == IP_CT_RELATED_REPLY)
- goto out;
+ return nf_conntrack_confirm(skb);
protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
&frag_off);
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("proto header not found\n");
- goto out;
+ return nf_conntrack_confirm(skb);
}
- /* adjust seqs for loopback traffic only in outgoing direction */
- if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
- !nf_is_loopback_packet(skb)) {
- if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
- NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
- return NF_DROP;
- }
- }
-out:
- /* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(skb);
+ return nf_confirm(skb, protoff, ct, ctinfo);
}
static unsigned int ipv6_conntrack_in(void *priv,
@@ -664,42 +398,6 @@ static unsigned int ipv6_conntrack_local(void *priv,
return nf_conntrack_in(skb, state);
}
-static unsigned int ipv6_helper(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nf_conn *ct;
- const struct nf_conn_help *help;
- const struct nf_conntrack_helper *helper;
- enum ip_conntrack_info ctinfo;
- __be16 frag_off;
- int protoff;
- u8 nexthdr;
-
- /* This is where we call the helper: as the packet goes out. */
- ct = nf_ct_get(skb, &ctinfo);
- if (!ct || ctinfo == IP_CT_RELATED_REPLY)
- return NF_ACCEPT;
-
- help = nfct_help(ct);
- if (!help)
- return NF_ACCEPT;
- /* rcu_read_lock()ed by nf_hook_thresh */
- helper = rcu_dereference(help->helper);
- if (!helper)
- return NF_ACCEPT;
-
- nexthdr = ipv6_hdr(skb)->nexthdr;
- protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
- &frag_off);
- if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
- pr_debug("proto header not found\n");
- return NF_ACCEPT;
- }
-
- return helper->help(skb, protoff, ct, ctinfo);
-}
-
static const struct nf_hook_ops ipv6_conntrack_ops[] = {
{
.hook = ipv6_conntrack_in,
@@ -714,24 +412,12 @@ static const struct nf_hook_ops ipv6_conntrack_ops[] = {
.priority = NF_IP6_PRI_CONNTRACK,
},
{
- .hook = ipv6_helper,
- .pf = NFPROTO_IPV6,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP6_PRI_CONNTRACK_HELPER,
- },
- {
.hook = ipv6_confirm,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP6_PRI_LAST,
},
{
- .hook = ipv6_helper,
- .pf = NFPROTO_IPV6,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP6_PRI_CONNTRACK_HELPER,
- },
- {
.hook = ipv6_confirm,
.pf = NFPROTO_IPV6,
.hooknum = NF_INET_LOCAL_IN,
@@ -874,27 +560,9 @@ void nf_ct_netns_put(struct net *net, uint8_t nfproto)
}
EXPORT_SYMBOL_GPL(nf_ct_netns_put);
-static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
- &nf_conntrack_l4proto_tcp,
- &nf_conntrack_l4proto_udp,
- &nf_conntrack_l4proto_icmp,
-#ifdef CONFIG_NF_CT_PROTO_DCCP
- &nf_conntrack_l4proto_dccp,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_SCTP
- &nf_conntrack_l4proto_sctp,
-#endif
-#ifdef CONFIG_NF_CT_PROTO_UDPLITE
- &nf_conntrack_l4proto_udplite,
-#endif
-#if IS_ENABLED(CONFIG_IPV6)
- &nf_conntrack_l4proto_icmpv6,
-#endif /* CONFIG_IPV6 */
-};
-
int nf_conntrack_proto_init(void)
{
- int ret = 0, i;
+ int ret;
ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0)
@@ -906,18 +574,8 @@ int nf_conntrack_proto_init(void)
goto cleanup_sockopt;
#endif
- for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
- RCU_INIT_POINTER(nf_ct_protos[i],
- &nf_conntrack_l4proto_generic);
-
- ret = nf_ct_l4proto_register(builtin_l4proto,
- ARRAY_SIZE(builtin_l4proto));
- if (ret < 0)
- goto cleanup_sockopt2;
-
return ret;
-cleanup_sockopt2:
- nf_unregister_sockopt(&so_getorigdst);
+
#if IS_ENABLED(CONFIG_IPV6)
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst6);
@@ -933,43 +591,33 @@ void nf_conntrack_proto_fini(void)
#endif
}
-int nf_conntrack_proto_pernet_init(struct net *net)
+void nf_conntrack_proto_pernet_init(struct net *net)
{
- int err;
- struct nf_proto_net *pn = nf_ct_l4proto_net(net,
- &nf_conntrack_l4proto_generic);
-
- err = nf_conntrack_l4proto_generic.init_net(net);
- if (err < 0)
- return err;
- err = nf_ct_l4proto_register_sysctl(net,
- pn);
- if (err < 0)
- return err;
-
- err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
- ARRAY_SIZE(builtin_l4proto));
- if (err < 0) {
- nf_ct_l4proto_unregister_sysctl(pn);
- return err;
- }
-
- pn->users++;
- return 0;
+ nf_conntrack_generic_init_net(net);
+ nf_conntrack_udp_init_net(net);
+ nf_conntrack_tcp_init_net(net);
+ nf_conntrack_icmp_init_net(net);
+#if IS_ENABLED(CONFIG_IPV6)
+ nf_conntrack_icmpv6_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ nf_conntrack_dccp_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ nf_conntrack_sctp_init_net(net);
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ nf_conntrack_gre_init_net(net);
+#endif
}
void nf_conntrack_proto_pernet_fini(struct net *net)
{
- struct nf_proto_net *pn = nf_ct_l4proto_net(net,
- &nf_conntrack_l4proto_generic);
-
- nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
- ARRAY_SIZE(builtin_l4proto));
- pn->users--;
- nf_ct_l4proto_unregister_sysctl(pn);
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ nf_ct_gre_keymap_flush(net);
+#endif
}
-
module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
&nf_conntrack_htable_size, 0600);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index 023c1445bc39..6fca80587505 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -472,9 +472,10 @@ out_invalid:
return true;
}
-static int dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
- unsigned int dataoff, enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
+int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
struct dccp_hdr _dh, *dh;
@@ -723,123 +724,28 @@ dccp_timeout_nla_policy[CTA_TIMEOUT_DCCP_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-#ifdef CONFIG_SYSCTL
-/* template, data assigned later */
-static struct ctl_table dccp_sysctl_table[] = {
- {
- .procname = "nf_conntrack_dccp_timeout_request",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_dccp_timeout_respond",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_dccp_timeout_partopen",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_dccp_timeout_open",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_dccp_timeout_closereq",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_dccp_timeout_closing",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_dccp_timeout_timewait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_dccp_loose",
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- { }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int dccp_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *pn,
- struct nf_dccp_net *dn)
-{
-#ifdef CONFIG_SYSCTL
- if (pn->ctl_table)
- return 0;
-
- pn->ctl_table = kmemdup(dccp_sysctl_table,
- sizeof(dccp_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_table)
- return -ENOMEM;
-
- pn->ctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST];
- pn->ctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND];
- pn->ctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN];
- pn->ctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN];
- pn->ctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ];
- pn->ctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING];
- pn->ctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT];
- pn->ctl_table[7].data = &dn->dccp_loose;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- pn->ctl_table[0].procname = NULL;
-#endif
- return 0;
-}
-
-static int dccp_init_net(struct net *net)
+void nf_conntrack_dccp_init_net(struct net *net)
{
struct nf_dccp_net *dn = nf_dccp_pernet(net);
- struct nf_proto_net *pn = &dn->pn;
-
- if (!pn->users) {
- /* default values */
- dn->dccp_loose = 1;
- dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL;
- dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL;
- dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL;
- dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ;
- dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ;
- dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ;
- dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL;
-
- /* timeouts[0] is unused, make it same as SYN_SENT so
- * ->timeouts[0] contains 'new' timeout, like udp or icmp.
- */
- dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST];
- }
- return dccp_kmemdup_sysctl_table(net, pn, dn);
-}
-
-static struct nf_proto_net *dccp_get_net_proto(struct net *net)
-{
- return &net->ct.nf_ct_proto.dccp.pn;
+ /* default values */
+ dn->dccp_loose = 1;
+ dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL;
+ dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL;
+ dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL;
+ dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ;
+ dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ;
+ dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ;
+ dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL;
+
+ /* timeouts[0] is unused, make it same as SYN_SENT so
+ * ->timeouts[0] contains 'new' timeout, like udp or icmp.
+ */
+ dn->dccp_timeout[CT_DCCP_NONE] = dn->dccp_timeout[CT_DCCP_REQUEST];
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
.l4proto = IPPROTO_DCCP,
- .packet = dccp_packet,
.can_early_drop = dccp_can_early_drop,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = dccp_print_conntrack,
@@ -862,6 +768,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp = {
.nla_policy = dccp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = dccp_init_net,
- .get_net_proto = dccp_get_net_proto,
};
diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c
index 5da19d5fbc76..0f526fafecae 100644
--- a/net/netfilter/nf_conntrack_proto_generic.c
+++ b/net/netfilter/nf_conntrack_proto_generic.c
@@ -15,50 +15,6 @@
static const unsigned int nf_ct_generic_timeout = 600*HZ;
-static bool nf_generic_should_process(u8 proto)
-{
- switch (proto) {
-#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE
- case IPPROTO_GRE:
- return false;
-#endif
- default:
- return true;
- }
-}
-
-static bool generic_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct net *net, struct nf_conntrack_tuple *tuple)
-{
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
-
- return true;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int generic_packet(struct nf_conn *ct,
- struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
-{
- const unsigned int *timeout = nf_ct_timeout_lookup(ct);
-
- if (!nf_generic_should_process(nf_ct_protonum(ct))) {
- pr_warn_once("conntrack: generic helper won't handle protocol %d. Please consider loading the specific helper module.\n",
- nf_ct_protonum(ct));
- return -NF_ACCEPT;
- }
-
- if (!timeout)
- timeout = &nf_generic_pernet(nf_ct_net(ct))->timeout;
-
- nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
- return NF_ACCEPT;
-}
-
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
#include <linux/netfilter/nfnetlink.h>
@@ -104,53 +60,16 @@ generic_timeout_nla_policy[CTA_TIMEOUT_GENERIC_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-#ifdef CONFIG_SYSCTL
-static struct ctl_table generic_sysctl_table[] = {
- {
- .procname = "nf_conntrack_generic_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn,
- struct nf_generic_net *gn)
-{
-#ifdef CONFIG_SYSCTL
- pn->ctl_table = kmemdup(generic_sysctl_table,
- sizeof(generic_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_table)
- return -ENOMEM;
-
- pn->ctl_table[0].data = &gn->timeout;
-#endif
- return 0;
-}
-
-static int generic_init_net(struct net *net)
+void nf_conntrack_generic_init_net(struct net *net)
{
struct nf_generic_net *gn = nf_generic_pernet(net);
- struct nf_proto_net *pn = &gn->pn;
gn->timeout = nf_ct_generic_timeout;
-
- return generic_kmemdup_sysctl_table(pn, gn);
-}
-
-static struct nf_proto_net *generic_get_net_proto(struct net *net)
-{
- return &net->ct.nf_ct_proto.generic.pn;
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
{
.l4proto = 255,
- .pkt_to_tuple = generic_pkt_to_tuple,
- .packet = generic_packet,
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
.ctnl_timeout = {
.nlattr_to_obj = generic_timeout_nlattr_to_obj,
@@ -160,6 +79,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_generic =
.nla_policy = generic_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = generic_init_net,
- .get_net_proto = generic_get_net_proto,
};
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 8899b51aad44..ee9ab10a32e4 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -48,24 +48,25 @@ static const unsigned int gre_timeouts[GRE_CT_MAX] = {
[GRE_CT_REPLIED] = 180*HZ,
};
-static unsigned int proto_gre_net_id __read_mostly;
+/* used when expectation is added */
+static DEFINE_SPINLOCK(keymap_lock);
-static inline struct netns_proto_gre *gre_pernet(struct net *net)
+static inline struct nf_gre_net *gre_pernet(struct net *net)
{
- return net_generic(net, proto_gre_net_id);
+ return &net->ct.nf_ct_proto.gre;
}
-static void nf_ct_gre_keymap_flush(struct net *net)
+void nf_ct_gre_keymap_flush(struct net *net)
{
- struct netns_proto_gre *net_gre = gre_pernet(net);
+ struct nf_gre_net *net_gre = gre_pernet(net);
struct nf_ct_gre_keymap *km, *tmp;
- write_lock_bh(&net_gre->keymap_lock);
+ spin_lock_bh(&keymap_lock);
list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) {
- list_del(&km->list);
- kfree(km);
+ list_del_rcu(&km->list);
+ kfree_rcu(km, rcu);
}
- write_unlock_bh(&net_gre->keymap_lock);
+ spin_unlock_bh(&keymap_lock);
}
static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
@@ -81,18 +82,16 @@ static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km,
/* look up the source key for a given tuple */
static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t)
{
- struct netns_proto_gre *net_gre = gre_pernet(net);
+ struct nf_gre_net *net_gre = gre_pernet(net);
struct nf_ct_gre_keymap *km;
__be16 key = 0;
- read_lock_bh(&net_gre->keymap_lock);
- list_for_each_entry(km, &net_gre->keymap_list, list) {
+ list_for_each_entry_rcu(km, &net_gre->keymap_list, list) {
if (gre_key_cmpfn(km, t)) {
key = km->tuple.src.u.gre.key;
break;
}
}
- read_unlock_bh(&net_gre->keymap_lock);
pr_debug("lookup src key 0x%x for ", key);
nf_ct_dump_tuple(t);
@@ -105,21 +104,17 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
struct nf_conntrack_tuple *t)
{
struct net *net = nf_ct_net(ct);
- struct netns_proto_gre *net_gre = gre_pernet(net);
+ struct nf_gre_net *net_gre = gre_pernet(net);
struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
struct nf_ct_gre_keymap **kmp, *km;
kmp = &ct_pptp_info->keymap[dir];
if (*kmp) {
/* check whether it's a retransmission */
- read_lock_bh(&net_gre->keymap_lock);
- list_for_each_entry(km, &net_gre->keymap_list, list) {
- if (gre_key_cmpfn(km, t) && km == *kmp) {
- read_unlock_bh(&net_gre->keymap_lock);
+ list_for_each_entry_rcu(km, &net_gre->keymap_list, list) {
+ if (gre_key_cmpfn(km, t) && km == *kmp)
return 0;
- }
}
- read_unlock_bh(&net_gre->keymap_lock);
pr_debug("trying to override keymap_%s for ct %p\n",
dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct);
return -EEXIST;
@@ -134,9 +129,9 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
pr_debug("adding new entry %p: ", km);
nf_ct_dump_tuple(&km->tuple);
- write_lock_bh(&net_gre->keymap_lock);
+ spin_lock_bh(&keymap_lock);
list_add_tail(&km->list, &net_gre->keymap_list);
- write_unlock_bh(&net_gre->keymap_lock);
+ spin_unlock_bh(&keymap_lock);
return 0;
}
@@ -145,32 +140,30 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add);
/* destroy the keymap entries associated with specified master ct */
void nf_ct_gre_keymap_destroy(struct nf_conn *ct)
{
- struct net *net = nf_ct_net(ct);
- struct netns_proto_gre *net_gre = gre_pernet(net);
struct nf_ct_pptp_master *ct_pptp_info = nfct_help_data(ct);
enum ip_conntrack_dir dir;
pr_debug("entering for ct %p\n", ct);
- write_lock_bh(&net_gre->keymap_lock);
+ spin_lock_bh(&keymap_lock);
for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) {
if (ct_pptp_info->keymap[dir]) {
pr_debug("removing %p from list\n",
ct_pptp_info->keymap[dir]);
- list_del(&ct_pptp_info->keymap[dir]->list);
- kfree(ct_pptp_info->keymap[dir]);
+ list_del_rcu(&ct_pptp_info->keymap[dir]->list);
+ kfree_rcu(ct_pptp_info->keymap[dir], rcu);
ct_pptp_info->keymap[dir] = NULL;
}
}
- write_unlock_bh(&net_gre->keymap_lock);
+ spin_unlock_bh(&keymap_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy);
/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
/* gre hdr info to tuple */
-static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct net *net, struct nf_conntrack_tuple *tuple)
+bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct pptp_gre_header *pgrehdr;
struct pptp_gre_header _pgrehdr;
@@ -216,15 +209,15 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct)
static unsigned int *gre_get_timeouts(struct net *net)
{
- return gre_pernet(net)->gre_timeouts;
+ return gre_pernet(net)->timeouts;
}
/* Returns verdict for packet, and may modify conntrack */
-static int gre_packet(struct nf_conn *ct,
- struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
+int nf_conntrack_gre_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
if (state->pf != NFPROTO_IPV4)
return -NF_ACCEPT;
@@ -256,19 +249,6 @@ static int gre_packet(struct nf_conn *ct,
return NF_ACCEPT;
}
-/* Called when a conntrack entry has already been removed from the hashes
- * and is about to be deleted from memory */
-static void gre_destroy(struct nf_conn *ct)
-{
- struct nf_conn *master = ct->master;
- pr_debug(" entering\n");
-
- if (!master)
- pr_debug("no master !?!\n");
- else
- nf_ct_gre_keymap_destroy(master);
-}
-
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
#include <linux/netfilter/nfnetlink.h>
@@ -278,13 +258,13 @@ static int gre_timeout_nlattr_to_obj(struct nlattr *tb[],
struct net *net, void *data)
{
unsigned int *timeouts = data;
- struct netns_proto_gre *net_gre = gre_pernet(net);
+ struct nf_gre_net *net_gre = gre_pernet(net);
if (!timeouts)
timeouts = gre_get_timeouts(net);
/* set default timeouts for GRE. */
- timeouts[GRE_CT_UNREPLIED] = net_gre->gre_timeouts[GRE_CT_UNREPLIED];
- timeouts[GRE_CT_REPLIED] = net_gre->gre_timeouts[GRE_CT_REPLIED];
+ timeouts[GRE_CT_UNREPLIED] = net_gre->timeouts[GRE_CT_UNREPLIED];
+ timeouts[GRE_CT_REPLIED] = net_gre->timeouts[GRE_CT_REPLIED];
if (tb[CTA_TIMEOUT_GRE_UNREPLIED]) {
timeouts[GRE_CT_UNREPLIED] =
@@ -320,69 +300,22 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-#ifdef CONFIG_SYSCTL
-static struct ctl_table gre_sysctl_table[] = {
- {
- .procname = "nf_conntrack_gre_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_gre_timeout_stream",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {}
-};
-#endif
-
-static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf,
- struct netns_proto_gre *net_gre)
-{
-#ifdef CONFIG_SYSCTL
- int i;
-
- if (nf->ctl_table)
- return 0;
-
- nf->ctl_table = kmemdup(gre_sysctl_table,
- sizeof(gre_sysctl_table),
- GFP_KERNEL);
- if (!nf->ctl_table)
- return -ENOMEM;
-
- for (i = 0; i < GRE_CT_MAX; i++)
- nf->ctl_table[i].data = &net_gre->gre_timeouts[i];
-#endif
- return 0;
-}
-
-static int gre_init_net(struct net *net)
+void nf_conntrack_gre_init_net(struct net *net)
{
- struct netns_proto_gre *net_gre = gre_pernet(net);
- struct nf_proto_net *nf = &net_gre->nf;
+ struct nf_gre_net *net_gre = gre_pernet(net);
int i;
- rwlock_init(&net_gre->keymap_lock);
INIT_LIST_HEAD(&net_gre->keymap_list);
for (i = 0; i < GRE_CT_MAX; i++)
- net_gre->gre_timeouts[i] = gre_timeouts[i];
-
- return gre_kmemdup_sysctl_table(net, nf, net_gre);
+ net_gre->timeouts[i] = gre_timeouts[i];
}
/* protocol helper struct */
-static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
+const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
.l4proto = IPPROTO_GRE,
- .pkt_to_tuple = gre_pkt_to_tuple,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
#endif
- .packet = gre_packet,
- .destroy = gre_destroy,
- .me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,
@@ -398,61 +331,4 @@ static const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 = {
.nla_policy = gre_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .net_id = &proto_gre_net_id,
- .init_net = gre_init_net,
};
-
-static int proto_gre_net_init(struct net *net)
-{
- int ret = 0;
-
- ret = nf_ct_l4proto_pernet_register_one(net,
- &nf_conntrack_l4proto_gre4);
- if (ret < 0)
- pr_err("nf_conntrack_gre4: pernet registration failed.\n");
- return ret;
-}
-
-static void proto_gre_net_exit(struct net *net)
-{
- nf_ct_l4proto_pernet_unregister_one(net, &nf_conntrack_l4proto_gre4);
- nf_ct_gre_keymap_flush(net);
-}
-
-static struct pernet_operations proto_gre_net_ops = {
- .init = proto_gre_net_init,
- .exit = proto_gre_net_exit,
- .id = &proto_gre_net_id,
- .size = sizeof(struct netns_proto_gre),
-};
-
-static int __init nf_ct_proto_gre_init(void)
-{
- int ret;
-
- BUILD_BUG_ON(offsetof(struct netns_proto_gre, nf) != 0);
-
- ret = register_pernet_subsys(&proto_gre_net_ops);
- if (ret < 0)
- goto out_pernet;
- ret = nf_ct_l4proto_register_one(&nf_conntrack_l4proto_gre4);
- if (ret < 0)
- goto out_gre4;
-
- return 0;
-out_gre4:
- unregister_pernet_subsys(&proto_gre_net_ops);
-out_pernet:
- return ret;
-}
-
-static void __exit nf_ct_proto_gre_fini(void)
-{
- nf_ct_l4proto_unregister_one(&nf_conntrack_l4proto_gre4);
- unregister_pernet_subsys(&proto_gre_net_ops);
-}
-
-module_init(nf_ct_proto_gre_init);
-module_exit(nf_ct_proto_gre_fini);
-
-MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index de64d8a5fdfd..7df477996b16 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -25,8 +25,8 @@
static const unsigned int nf_ct_icmp_timeout = 30*HZ;
-static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
- struct net *net, struct nf_conntrack_tuple *tuple)
+bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct net *net, struct nf_conntrack_tuple *tuple)
{
const struct icmphdr *hp;
struct icmphdr _hdr;
@@ -54,8 +54,8 @@ static const u_int8_t invmap[] = {
[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
};
-static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
+bool nf_conntrack_invert_icmp_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
{
if (orig->dst.u.icmp.type >= sizeof(invmap) ||
!invmap[orig->dst.u.icmp.type])
@@ -68,11 +68,10 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
}
/* Returns verdict for packet, or -1 for invalid. */
-static int icmp_packet(struct nf_conn *ct,
- struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
+int nf_conntrack_icmp_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
/* Do not immediately delete the connection after the first
successful reply to avoid excessive conntrackd traffic
@@ -110,7 +109,6 @@ icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
const struct nf_hook_state *state)
{
struct nf_conntrack_tuple innertuple, origtuple;
- const struct nf_conntrack_l4proto *innerproto;
const struct nf_conntrack_tuple_hash *h;
const struct nf_conntrack_zone *zone;
enum ip_conntrack_info ctinfo;
@@ -128,12 +126,9 @@ icmp_error_message(struct nf_conn *tmpl, struct sk_buff *skb,
return -NF_ACCEPT;
}
- /* rcu_read_lock()ed by nf_hook_thresh */
- innerproto = __nf_ct_l4proto_find(origtuple.dst.protonum);
-
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
- if (!nf_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
+ if (!nf_ct_invert_tuple(&innertuple, &origtuple)) {
pr_debug("icmp_error_message: no match\n");
return -NF_ACCEPT;
}
@@ -303,56 +298,16 @@ icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-#ifdef CONFIG_SYSCTL
-static struct ctl_table icmp_sysctl_table[] = {
- {
- .procname = "nf_conntrack_icmp_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
- struct nf_icmp_net *in)
-{
-#ifdef CONFIG_SYSCTL
- pn->ctl_table = kmemdup(icmp_sysctl_table,
- sizeof(icmp_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_table)
- return -ENOMEM;
-
- pn->ctl_table[0].data = &in->timeout;
-#endif
- return 0;
-}
-
-static int icmp_init_net(struct net *net)
+void nf_conntrack_icmp_init_net(struct net *net)
{
struct nf_icmp_net *in = nf_icmp_pernet(net);
- struct nf_proto_net *pn = &in->pn;
in->timeout = nf_ct_icmp_timeout;
-
- return icmp_kmemdup_sysctl_table(pn, in);
-}
-
-static struct nf_proto_net *icmp_get_net_proto(struct net *net)
-{
- return &net->ct.nf_ct_proto.icmp.pn;
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
{
.l4proto = IPPROTO_ICMP,
- .pkt_to_tuple = icmp_pkt_to_tuple,
- .invert_tuple = icmp_invert_tuple,
- .packet = icmp_packet,
- .destroy = NULL,
- .me = NULL,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = icmp_tuple_to_nlattr,
.nlattr_tuple_size = icmp_nlattr_tuple_size,
@@ -368,6 +323,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp =
.nla_policy = icmp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = icmp_init_net,
- .get_net_proto = icmp_get_net_proto,
};
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index a15eefb8e317..bec4a3211658 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -30,10 +30,10 @@
static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
-static bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct net *net,
- struct nf_conntrack_tuple *tuple)
+bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
+ unsigned int dataoff,
+ struct net *net,
+ struct nf_conntrack_tuple *tuple)
{
const struct icmp6hdr *hp;
struct icmp6hdr _hdr;
@@ -67,8 +67,8 @@ static const u_int8_t noct_valid_new[] = {
[ICMPV6_MLD2_REPORT - 130] = 1
};
-static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
+bool nf_conntrack_invert_icmpv6_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
{
int type = orig->dst.u.icmp.type - 128;
if (type < 0 || type >= sizeof(invmap) || !invmap[type])
@@ -86,11 +86,10 @@ static unsigned int *icmpv6_get_timeouts(struct net *net)
}
/* Returns verdict for packet, or -1 for invalid. */
-static int icmpv6_packet(struct nf_conn *ct,
- struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
+int nf_conntrack_icmpv6_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
unsigned int *timeout = nf_ct_timeout_lookup(ct);
static const u8 valid_new[] = {
@@ -131,7 +130,6 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
{
struct nf_conntrack_tuple intuple, origtuple;
const struct nf_conntrack_tuple_hash *h;
- const struct nf_conntrack_l4proto *inproto;
enum ip_conntrack_info ctinfo;
struct nf_conntrack_zone tmp;
@@ -147,12 +145,9 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl,
return -NF_ACCEPT;
}
- /* rcu_read_lock()ed by nf_hook_thresh */
- inproto = __nf_ct_l4proto_find(origtuple.dst.protonum);
-
/* Ordinarily, we'd expect the inverted tupleproto, but it's
been preserved inside the ICMP. */
- if (!nf_ct_invert_tuple(&intuple, &origtuple, inproto)) {
+ if (!nf_ct_invert_tuple(&intuple, &origtuple)) {
pr_debug("icmpv6_error: Can't invert tuple\n");
return -NF_ACCEPT;
}
@@ -314,54 +309,16 @@ icmpv6_timeout_nla_policy[CTA_TIMEOUT_ICMPV6_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-#ifdef CONFIG_SYSCTL
-static struct ctl_table icmpv6_sysctl_table[] = {
- {
- .procname = "nf_conntrack_icmpv6_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int icmpv6_kmemdup_sysctl_table(struct nf_proto_net *pn,
- struct nf_icmp_net *in)
-{
-#ifdef CONFIG_SYSCTL
- pn->ctl_table = kmemdup(icmpv6_sysctl_table,
- sizeof(icmpv6_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_table)
- return -ENOMEM;
-
- pn->ctl_table[0].data = &in->timeout;
-#endif
- return 0;
-}
-
-static int icmpv6_init_net(struct net *net)
+void nf_conntrack_icmpv6_init_net(struct net *net)
{
struct nf_icmp_net *in = nf_icmpv6_pernet(net);
- struct nf_proto_net *pn = &in->pn;
in->timeout = nf_ct_icmpv6_timeout;
-
- return icmpv6_kmemdup_sysctl_table(pn, in);
-}
-
-static struct nf_proto_net *icmpv6_get_net_proto(struct net *net)
-{
- return &net->ct.nf_ct_proto.icmpv6.pn;
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
{
.l4proto = IPPROTO_ICMPV6,
- .pkt_to_tuple = icmpv6_pkt_to_tuple,
- .invert_tuple = icmpv6_invert_tuple,
- .packet = icmpv6_packet,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = icmpv6_tuple_to_nlattr,
.nlattr_tuple_size = icmpv6_nlattr_tuple_size,
@@ -377,6 +334,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 =
.nla_policy = icmpv6_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = icmpv6_init_net,
- .get_net_proto = icmpv6_get_net_proto,
};
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index d53e3e78f605..a7818101ad80 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -357,11 +357,11 @@ out_invalid:
}
/* Returns verdict for packet, or -NF_ACCEPT for invalid. */
-static int sctp_packet(struct nf_conn *ct,
- struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
+int nf_conntrack_sctp_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
enum sctp_conntrack new_state, old_state;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
@@ -642,116 +642,18 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table sctp_sysctl_table[] = {
- {
- .procname = "nf_conntrack_sctp_timeout_closed",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_sctp_timeout_cookie_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_sctp_timeout_cookie_echoed",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_sctp_timeout_established",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_sctp_timeout_shutdown_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_sctp_timeout_shutdown_recd",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_sctp_timeout_heartbeat_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif
-
-static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn,
- struct nf_sctp_net *sn)
-{
-#ifdef CONFIG_SYSCTL
- if (pn->ctl_table)
- return 0;
-
- pn->ctl_table = kmemdup(sctp_sysctl_table,
- sizeof(sctp_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_table)
- return -ENOMEM;
-
- pn->ctl_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED];
- pn->ctl_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT];
- pn->ctl_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED];
- pn->ctl_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED];
- pn->ctl_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT];
- pn->ctl_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD];
- pn->ctl_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT];
- pn->ctl_table[7].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_SENT];
- pn->ctl_table[8].data = &sn->timeouts[SCTP_CONNTRACK_HEARTBEAT_ACKED];
-#endif
- return 0;
-}
-
-static int sctp_init_net(struct net *net)
+void nf_conntrack_sctp_init_net(struct net *net)
{
struct nf_sctp_net *sn = nf_sctp_pernet(net);
- struct nf_proto_net *pn = &sn->pn;
-
- if (!pn->users) {
- int i;
-
- for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
- sn->timeouts[i] = sctp_timeouts[i];
-
- /* timeouts[0] is unused, init it so ->timeouts[0] contains
- * 'new' timeout, like udp or icmp.
- */
- sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED];
- }
+ int i;
- return sctp_kmemdup_sysctl_table(pn, sn);
-}
+ for (i = 0; i < SCTP_CONNTRACK_MAX; i++)
+ sn->timeouts[i] = sctp_timeouts[i];
-static struct nf_proto_net *sctp_get_net_proto(struct net *net)
-{
- return &net->ct.nf_ct_proto.sctp.pn;
+ /* timeouts[0] is unused, init it so ->timeouts[0] contains
+ * 'new' timeout, like udp or icmp.
+ */
+ sn->timeouts[0] = sctp_timeouts[SCTP_CONNTRACK_CLOSED];
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
@@ -759,9 +661,7 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = sctp_print_conntrack,
#endif
- .packet = sctp_packet,
.can_early_drop = sctp_can_early_drop,
- .me = THIS_MODULE,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.nlattr_size = SCTP_NLATTR_SIZE,
.to_nlattr = sctp_to_nlattr,
@@ -780,6 +680,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp = {
.nla_policy = sctp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = sctp_init_net,
- .get_net_proto = sctp_get_net_proto,
};
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 4dcbd51a8e97..a06875a466a4 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -828,12 +828,18 @@ static noinline bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
return true;
}
+static bool nf_conntrack_tcp_established(const struct nf_conn *ct)
+{
+ return ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED &&
+ test_bit(IPS_ASSURED_BIT, &ct->status);
+}
+
/* Returns verdict for packet, or -1 for invalid. */
-static int tcp_packet(struct nf_conn *ct,
- struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
+int nf_conntrack_tcp_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = nf_tcp_pernet(net);
@@ -1030,16 +1036,38 @@ static int tcp_packet(struct nf_conn *ct,
new_state = TCP_CONNTRACK_ESTABLISHED;
break;
case TCP_CONNTRACK_CLOSE:
- if (index == TCP_RST_SET
- && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
- && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
- /* Invalid RST */
- spin_unlock_bh(&ct->lock);
- nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
- return -NF_ACCEPT;
+ if (index != TCP_RST_SET)
+ break;
+
+ if (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) {
+ u32 seq = ntohl(th->seq);
+
+ if (before(seq, ct->proto.tcp.seen[!dir].td_maxack)) {
+ /* Invalid RST */
+ spin_unlock_bh(&ct->lock);
+ nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
+ return -NF_ACCEPT;
+ }
+
+ if (!nf_conntrack_tcp_established(ct) ||
+ seq == ct->proto.tcp.seen[!dir].td_maxack)
+ break;
+
+ /* Check if rst is part of train, such as
+ * foo:80 > bar:4379: P, 235946583:235946602(19) ack 42
+ * foo:80 > bar:4379: R, 235946602:235946602(0) ack 42
+ */
+ if (ct->proto.tcp.last_index == TCP_ACK_SET &&
+ ct->proto.tcp.last_dir == dir &&
+ seq == ct->proto.tcp.last_end)
+ break;
+
+ /* ... RST sequence number doesn't match exactly, keep
+ * established state to allow a possible challenge ACK.
+ */
+ new_state = old_state;
}
- if (index == TCP_RST_SET
- && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
+ if (((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
&& ct->proto.tcp.last_index == TCP_SYN_SET)
|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
&& ct->proto.tcp.last_index == TCP_ACK_SET))
@@ -1055,7 +1083,7 @@ static int tcp_packet(struct nf_conn *ct,
* segments we ignored. */
goto in_window;
}
- /* Just fall through */
+ break;
default:
/* Keep compilers happy. */
break;
@@ -1090,6 +1118,8 @@ static int tcp_packet(struct nf_conn *ct,
if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
timeout = timeouts[TCP_CONNTRACK_RETRANS];
+ else if (unlikely(index == TCP_RST_SET))
+ timeout = timeouts[TCP_CONNTRACK_CLOSE];
else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
@@ -1387,146 +1417,21 @@ static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-#ifdef CONFIG_SYSCTL
-static struct ctl_table tcp_sysctl_table[] = {
- {
- .procname = "nf_conntrack_tcp_timeout_syn_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_tcp_timeout_syn_recv",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_tcp_timeout_established",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_tcp_timeout_fin_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_tcp_timeout_close_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_tcp_timeout_last_ack",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_tcp_timeout_time_wait",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_tcp_timeout_close",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_tcp_timeout_max_retrans",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_tcp_timeout_unacknowledged",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_tcp_loose",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "nf_conntrack_tcp_be_liberal",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "nf_conntrack_tcp_max_retrans",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- { }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
- struct nf_tcp_net *tn)
-{
-#ifdef CONFIG_SYSCTL
- if (pn->ctl_table)
- return 0;
-
- pn->ctl_table = kmemdup(tcp_sysctl_table,
- sizeof(tcp_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_table)
- return -ENOMEM;
-
- pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
- pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
- pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
- pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
- pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
- pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
- pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
- pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
- pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
- pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
- pn->ctl_table[10].data = &tn->tcp_loose;
- pn->ctl_table[11].data = &tn->tcp_be_liberal;
- pn->ctl_table[12].data = &tn->tcp_max_retrans;
-#endif
- return 0;
-}
-
-static int tcp_init_net(struct net *net)
+void nf_conntrack_tcp_init_net(struct net *net)
{
struct nf_tcp_net *tn = nf_tcp_pernet(net);
- struct nf_proto_net *pn = &tn->pn;
-
- if (!pn->users) {
- int i;
-
- for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
- tn->timeouts[i] = tcp_timeouts[i];
+ int i;
- /* timeouts[0] is unused, make it same as SYN_SENT so
- * ->timeouts[0] contains 'new' timeout, like udp or icmp.
- */
- tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
- tn->tcp_loose = nf_ct_tcp_loose;
- tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
- tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
- }
+ for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
+ tn->timeouts[i] = tcp_timeouts[i];
- return tcp_kmemdup_sysctl_table(pn, tn);
-}
-
-static struct nf_proto_net *tcp_get_net_proto(struct net *net)
-{
- return &net->ct.nf_ct_proto.tcp.pn;
+ /* timeouts[0] is unused, make it same as SYN_SENT so
+ * ->timeouts[0] contains 'new' timeout, like udp or icmp.
+ */
+ tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
+ tn->tcp_loose = nf_ct_tcp_loose;
+ tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
+ tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
@@ -1535,7 +1440,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = tcp_print_conntrack,
#endif
- .packet = tcp_packet,
.can_early_drop = tcp_can_early_drop,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.to_nlattr = tcp_to_nlattr,
@@ -1556,6 +1460,4 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
.nla_policy = tcp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = tcp_init_net,
- .get_net_proto = tcp_get_net_proto,
};
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index b4f5d5e82031..951366dfbec3 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -85,11 +85,11 @@ static bool udp_error(struct sk_buff *skb,
}
/* Returns verdict for packet, and may modify conntracktype */
-static int udp_packet(struct nf_conn *ct,
- struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
+int nf_conntrack_udp_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
unsigned int *timeouts;
@@ -177,11 +177,11 @@ static bool udplite_error(struct sk_buff *skb,
}
/* Returns verdict for packet, and may modify conntracktype */
-static int udplite_packet(struct nf_conn *ct,
- struct sk_buff *skb,
- unsigned int dataoff,
- enum ip_conntrack_info ctinfo,
- const struct nf_hook_state *state)
+int nf_conntrack_udplite_packet(struct nf_conn *ct,
+ struct sk_buff *skb,
+ unsigned int dataoff,
+ enum ip_conntrack_info ctinfo,
+ const struct nf_hook_state *state)
{
unsigned int *timeouts;
@@ -260,66 +260,19 @@ udp_timeout_nla_policy[CTA_TIMEOUT_UDP_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
-#ifdef CONFIG_SYSCTL
-static struct ctl_table udp_sysctl_table[] = {
- {
- .procname = "nf_conntrack_udp_timeout",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- {
- .procname = "nf_conntrack_udp_timeout_stream",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- { }
-};
-#endif /* CONFIG_SYSCTL */
-
-static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn,
- struct nf_udp_net *un)
-{
-#ifdef CONFIG_SYSCTL
- if (pn->ctl_table)
- return 0;
- pn->ctl_table = kmemdup(udp_sysctl_table,
- sizeof(udp_sysctl_table),
- GFP_KERNEL);
- if (!pn->ctl_table)
- return -ENOMEM;
- pn->ctl_table[0].data = &un->timeouts[UDP_CT_UNREPLIED];
- pn->ctl_table[1].data = &un->timeouts[UDP_CT_REPLIED];
-#endif
- return 0;
-}
-
-static int udp_init_net(struct net *net)
+void nf_conntrack_udp_init_net(struct net *net)
{
struct nf_udp_net *un = nf_udp_pernet(net);
- struct nf_proto_net *pn = &un->pn;
+ int i;
- if (!pn->users) {
- int i;
-
- for (i = 0; i < UDP_CT_MAX; i++)
- un->timeouts[i] = udp_timeouts[i];
- }
-
- return udp_kmemdup_sysctl_table(pn, un);
-}
-
-static struct nf_proto_net *udp_get_net_proto(struct net *net)
-{
- return &net->ct.nf_ct_proto.udp.pn;
+ for (i = 0; i < UDP_CT_MAX; i++)
+ un->timeouts[i] = udp_timeouts[i];
}
const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
{
.l4proto = IPPROTO_UDP,
.allow_clash = true,
- .packet = udp_packet,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
@@ -335,8 +288,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
.nla_policy = udp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = udp_init_net,
- .get_net_proto = udp_get_net_proto,
};
#ifdef CONFIG_NF_CT_PROTO_UDPLITE
@@ -344,7 +295,6 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite =
{
.l4proto = IPPROTO_UDPLITE,
.allow_clash = true,
- .packet = udplite_packet,
#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,
.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple,
@@ -360,7 +310,5 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite =
.nla_policy = udp_timeout_nla_policy,
},
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
- .init_net = udp_init_net,
- .get_net_proto = udp_get_net_proto,
};
#endif
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index c8d2b6688a2a..f067c6b50857 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -21,6 +21,8 @@
#include <linux/tcp.h>
#include <linux/netfilter.h>
+#include <net/route.h>
+#include <net/ip6_route.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_expect.h>
@@ -54,6 +56,11 @@ module_param(sip_direct_media, int, 0600);
MODULE_PARM_DESC(sip_direct_media, "Expect Media streams between signalling "
"endpoints only (default 1)");
+static int sip_external_media __read_mostly = 0;
+module_param(sip_external_media, int, 0600);
+MODULE_PARM_DESC(sip_external_media, "Expect Media streams between external "
+ "endpoints (default 0)");
+
const struct nf_nat_sip_hooks *nf_nat_sip_hooks;
EXPORT_SYMBOL_GPL(nf_nat_sip_hooks);
@@ -861,6 +868,41 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff,
if (!nf_inet_addr_cmp(daddr, &ct->tuplehash[dir].tuple.src.u3))
return NF_ACCEPT;
saddr = &ct->tuplehash[!dir].tuple.src.u3;
+ } else if (sip_external_media) {
+ struct net_device *dev = skb_dst(skb)->dev;
+ struct net *net = dev_net(dev);
+ struct rtable *rt;
+ struct flowi4 fl4 = {};
+#if IS_ENABLED(CONFIG_IPV6)
+ struct flowi6 fl6 = {};
+#endif
+ struct dst_entry *dst = NULL;
+
+ switch (nf_ct_l3num(ct)) {
+ case NFPROTO_IPV4:
+ fl4.daddr = daddr->ip;
+ rt = ip_route_output_key(net, &fl4);
+ if (!IS_ERR(rt))
+ dst = &rt->dst;
+ break;
+
+#if IS_ENABLED(CONFIG_IPV6)
+ case NFPROTO_IPV6:
+ fl6.daddr = daddr->in6;
+ dst = ip6_route_output(net, NULL, &fl6);
+ if (dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ }
+ break;
+#endif
+ }
+
+ /* Don't predict any conntracks when media endpoint is reachable
+ * through the same interface as the signalling peer.
+ */
+ if (dst && dst->dev == dev)
+ return NF_ACCEPT;
}
/* We need to check whether the registration exists before attempting
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index b6177fd73304..c2ae14c720b4 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -24,6 +24,10 @@
#include <net/netfilter/nf_conntrack_timestamp.h>
#include <linux/rculist_nulls.h>
+static bool enable_hooks __read_mostly;
+MODULE_PARM_DESC(enable_hooks, "Always enable conntrack hooks");
+module_param(enable_hooks, bool, 0000);
+
unsigned int nf_conntrack_net_id __read_mostly;
#ifdef CONFIG_NF_CONNTRACK_PROCFS
@@ -310,8 +314,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (!net_eq(nf_ct_net(ct), net))
goto release;
- l4proto = __nf_ct_l4proto_find(nf_ct_protonum(ct));
- WARN_ON(!l4proto);
+ l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
ret = -ENOSPC;
seq_printf(s, "%-8s %u %-8s %u ",
@@ -547,8 +550,55 @@ enum nf_ct_sysctl_index {
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
NF_SYSCTL_CT_TIMESTAMP,
#endif
+ NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
+ NF_SYSCTL_CT_PROTO_TCP_LOOSE,
+ NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
+ NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT,
+ NF_SYSCTL_CT_PROTO_DCCP_LOOSE,
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ NF_SYSCTL_CT_PROTO_TIMEOUT_GRE,
+ NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM,
+#endif
+
+ __NF_SYSCTL_CT_LAST_SYSCTL,
};
+#define NF_SYSCTL_CT_LAST_SYSCTL (__NF_SYSCTL_CT_LAST_SYSCTL + 1)
+
static struct ctl_table nf_ct_sysctl_table[] = {
[NF_SYSCTL_CT_MAX] = {
.procname = "nf_conntrack_max",
@@ -626,7 +676,235 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.proc_handler = proc_dointvec,
},
#endif
- { }
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = {
+ .procname = "nf_conntrack_generic_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_SENT] = {
+ .procname = "nf_conntrack_tcp_timeout_syn_sent",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_SYN_RECV] = {
+ .procname = "nf_conntrack_tcp_timeout_syn_recv",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ESTABLISHED] = {
+ .procname = "nf_conntrack_tcp_timeout_established",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_FIN_WAIT] = {
+ .procname = "nf_conntrack_tcp_timeout_fin_wait",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE_WAIT] = {
+ .procname = "nf_conntrack_tcp_timeout_close_wait",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_LAST_ACK] = {
+ .procname = "nf_conntrack_tcp_timeout_last_ack",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_TIME_WAIT] = {
+ .procname = "nf_conntrack_tcp_timeout_time_wait",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE] = {
+ .procname = "nf_conntrack_tcp_timeout_close",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS] = {
+ .procname = "nf_conntrack_tcp_timeout_max_retrans",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK] = {
+ .procname = "nf_conntrack_tcp_timeout_unacknowledged",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
+ .procname = "nf_conntrack_tcp_loose",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ [NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = {
+ .procname = "nf_conntrack_tcp_be_liberal",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = {
+ .procname = "nf_conntrack_tcp_max_retrans",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP] = {
+ .procname = "nf_conntrack_udp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM] = {
+ .procname = "nf_conntrack_udp_timeout_stream",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
+ .procname = "nf_conntrack_icmp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6] = {
+ .procname = "nf_conntrack_icmpv6_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_CLOSED] = {
+ .procname = "nf_conntrack_sctp_timeout_closed",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_WAIT] = {
+ .procname = "nf_conntrack_sctp_timeout_cookie_wait",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_COOKIE_ECHOED] = {
+ .procname = "nf_conntrack_sctp_timeout_cookie_echoed",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ESTABLISHED] = {
+ .procname = "nf_conntrack_sctp_timeout_established",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_SENT] = {
+ .procname = "nf_conntrack_sctp_timeout_shutdown_sent",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD] = {
+ .procname = "nf_conntrack_sctp_timeout_shutdown_recd",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = {
+ .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT] = {
+ .procname = "nf_conntrack_sctp_timeout_heartbeat_sent",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = {
+ .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+#endif
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = {
+ .procname = "nf_conntrack_dccp_timeout_request",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_RESPOND] = {
+ .procname = "nf_conntrack_dccp_timeout_respond",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_PARTOPEN] = {
+ .procname = "nf_conntrack_dccp_timeout_partopen",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_OPEN] = {
+ .procname = "nf_conntrack_dccp_timeout_open",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSEREQ] = {
+ .procname = "nf_conntrack_dccp_timeout_closereq",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_CLOSING] = {
+ .procname = "nf_conntrack_dccp_timeout_closing",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_TIMEWAIT] = {
+ .procname = "nf_conntrack_dccp_timeout_timewait",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_DCCP_LOOSE] = {
+ .procname = "nf_conntrack_dccp_loose",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_GRE] = {
+ .procname = "nf_conntrack_gre_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ [NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM] = {
+ .procname = "nf_conntrack_gre_timeout_stream",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+#endif
+ {}
};
static struct ctl_table nf_ct_netfilter_table[] = {
@@ -640,14 +918,103 @@ static struct ctl_table nf_ct_netfilter_table[] = {
{ }
};
+static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
+ struct ctl_table *table)
+{
+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
+
+#define XASSIGN(XNAME, tn) \
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_ ## XNAME].data = \
+ &(tn)->timeouts[TCP_CONNTRACK_ ## XNAME]
+
+ XASSIGN(SYN_SENT, tn);
+ XASSIGN(SYN_RECV, tn);
+ XASSIGN(ESTABLISHED, tn);
+ XASSIGN(FIN_WAIT, tn);
+ XASSIGN(CLOSE_WAIT, tn);
+ XASSIGN(LAST_ACK, tn);
+ XASSIGN(TIME_WAIT, tn);
+ XASSIGN(CLOSE, tn);
+ XASSIGN(RETRANS, tn);
+ XASSIGN(UNACK, tn);
+#undef XASSIGN
+#define XASSIGN(XNAME, rval) \
+ table[NF_SYSCTL_CT_PROTO_TCP_ ## XNAME].data = (rval)
+
+ XASSIGN(LOOSE, &tn->tcp_loose);
+ XASSIGN(LIBERAL, &tn->tcp_be_liberal);
+ XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
+#undef XASSIGN
+}
+
+static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
+ struct ctl_table *table)
+{
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ struct nf_sctp_net *sn = nf_sctp_pernet(net);
+
+#define XASSIGN(XNAME, sn) \
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_ ## XNAME].data = \
+ &(sn)->timeouts[SCTP_CONNTRACK_ ## XNAME]
+
+ XASSIGN(CLOSED, sn);
+ XASSIGN(COOKIE_WAIT, sn);
+ XASSIGN(COOKIE_ECHOED, sn);
+ XASSIGN(ESTABLISHED, sn);
+ XASSIGN(SHUTDOWN_SENT, sn);
+ XASSIGN(SHUTDOWN_RECD, sn);
+ XASSIGN(SHUTDOWN_ACK_SENT, sn);
+ XASSIGN(HEARTBEAT_SENT, sn);
+ XASSIGN(HEARTBEAT_ACKED, sn);
+#undef XASSIGN
+#endif
+}
+
+static void nf_conntrack_standalone_init_dccp_sysctl(struct net *net,
+ struct ctl_table *table)
+{
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ struct nf_dccp_net *dn = nf_dccp_pernet(net);
+
+#define XASSIGN(XNAME, dn) \
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_ ## XNAME].data = \
+ &(dn)->dccp_timeout[CT_DCCP_ ## XNAME]
+
+ XASSIGN(REQUEST, dn);
+ XASSIGN(RESPOND, dn);
+ XASSIGN(PARTOPEN, dn);
+ XASSIGN(OPEN, dn);
+ XASSIGN(CLOSEREQ, dn);
+ XASSIGN(CLOSING, dn);
+ XASSIGN(TIMEWAIT, dn);
+#undef XASSIGN
+
+ table[NF_SYSCTL_CT_PROTO_DCCP_LOOSE].data = &dn->dccp_loose;
+#endif
+}
+
+static void nf_conntrack_standalone_init_gre_sysctl(struct net *net,
+ struct ctl_table *table)
+{
+#ifdef CONFIG_NF_CT_PROTO_GRE
+ struct nf_gre_net *gn = nf_gre_pernet(net);
+
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE].data = &gn->timeouts[GRE_CT_UNREPLIED];
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_GRE_STREAM].data = &gn->timeouts[GRE_CT_REPLIED];
+#endif
+}
+
static int nf_conntrack_standalone_init_sysctl(struct net *net)
{
+ struct nf_udp_net *un = nf_udp_pernet(net);
struct ctl_table *table;
+ BUILD_BUG_ON(ARRAY_SIZE(nf_ct_sysctl_table) != NF_SYSCTL_CT_LAST_SYSCTL);
+
table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table),
GFP_KERNEL);
if (!table)
- goto out_kmemdup;
+ return -ENOMEM;
table[NF_SYSCTL_CT_COUNT].data = &net->ct.count;
table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
@@ -655,6 +1022,16 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
#ifdef CONFIG_NF_CONNTRACK_EVENTS
table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events;
#endif
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC].data = &nf_generic_pernet(net)->timeout;
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP].data = &nf_icmp_pernet(net)->timeout;
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
+
+ nf_conntrack_standalone_init_tcp_sysctl(net, table);
+ nf_conntrack_standalone_init_sctp_sysctl(net, table);
+ nf_conntrack_standalone_init_dccp_sysctl(net, table);
+ nf_conntrack_standalone_init_gre_sysctl(net, table);
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns) {
@@ -680,7 +1057,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
out_unregister_netfilter:
kfree(table);
-out_kmemdup:
return -ENOMEM;
}
@@ -703,31 +1079,47 @@ static void nf_conntrack_standalone_fini_sysctl(struct net *net)
}
#endif /* CONFIG_SYSCTL */
+static void nf_conntrack_fini_net(struct net *net)
+{
+ if (enable_hooks)
+ nf_ct_netns_put(net, NFPROTO_INET);
+
+ nf_conntrack_standalone_fini_proc(net);
+ nf_conntrack_standalone_fini_sysctl(net);
+}
+
static int nf_conntrack_pernet_init(struct net *net)
{
int ret;
- ret = nf_conntrack_init_net(net);
+ net->ct.sysctl_checksum = 1;
+
+ ret = nf_conntrack_standalone_init_sysctl(net);
if (ret < 0)
- goto out_init;
+ return ret;
ret = nf_conntrack_standalone_init_proc(net);
if (ret < 0)
goto out_proc;
- net->ct.sysctl_checksum = 1;
- net->ct.sysctl_log_invalid = 0;
- ret = nf_conntrack_standalone_init_sysctl(net);
+ ret = nf_conntrack_init_net(net);
if (ret < 0)
- goto out_sysctl;
+ goto out_init_net;
+
+ if (enable_hooks) {
+ ret = nf_ct_netns_get(net, NFPROTO_INET);
+ if (ret < 0)
+ goto out_hooks;
+ }
return 0;
-out_sysctl:
+out_hooks:
+ nf_conntrack_cleanup_net(net);
+out_init_net:
nf_conntrack_standalone_fini_proc(net);
out_proc:
- nf_conntrack_cleanup_net(net);
-out_init:
+ nf_conntrack_standalone_fini_sysctl(net);
return ret;
}
@@ -735,10 +1127,9 @@ static void nf_conntrack_pernet_exit(struct list_head *net_exit_list)
{
struct net *net;
- list_for_each_entry(net, net_exit_list, exit_list) {
- nf_conntrack_standalone_fini_sysctl(net);
- nf_conntrack_standalone_fini_proc(net);
- }
+ list_for_each_entry(net, net_exit_list, exit_list)
+ nf_conntrack_fini_net(net);
+
nf_conntrack_cleanup_net_list(net_exit_list);
}
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index c0c72ae9df42..7aabfd4b1e50 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -121,7 +121,7 @@ static void flow_offload_fixup_ct_state(struct nf_conn *ct)
if (l4num == IPPROTO_TCP)
flow_offload_fixup_tcp(&ct->proto.tcp);
- l4proto = __nf_ct_l4proto_find(l4num);
+ l4proto = nf_ct_l4proto_find(l4num);
if (!l4proto)
return;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index d159e9e7835b..af7dc6537758 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -22,8 +22,6 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
@@ -35,8 +33,6 @@
static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
static DEFINE_MUTEX(nf_nat_proto_mutex);
-static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
- __read_mostly;
static unsigned int nat_net_id __read_mostly;
static struct hlist_head *nf_nat_bysource __read_mostly;
@@ -58,16 +54,75 @@ struct nat_net {
struct nf_nat_hooks_net nat_proto_net[NFPROTO_NUMPROTO];
};
-inline const struct nf_nat_l3proto *
-__nf_nat_l3proto_find(u8 family)
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ unsigned long statusbit,
+ struct flowi *fl)
{
- return rcu_dereference(nf_nat_l3protos[family]);
+ const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+ struct flowi4 *fl4 = &fl->u.ip4;
+
+ if (ct->status & statusbit) {
+ fl4->daddr = t->dst.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl4->fl4_dport = t->dst.u.all;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl4->saddr = t->src.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl4->fl4_sport = t->src.u.all;
+ }
+}
+
+static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ unsigned long statusbit,
+ struct flowi *fl)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+ struct flowi6 *fl6 = &fl->u.ip6;
+
+ if (ct->status & statusbit) {
+ fl6->daddr = t->dst.u3.in6;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl6->fl6_dport = t->dst.u.all;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl6->saddr = t->src.u3.in6;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl6->fl6_sport = t->src.u.all;
+ }
+#endif
}
-#ifdef CONFIG_XFRM
static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
{
- const struct nf_nat_l3proto *l3proto;
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
@@ -79,17 +134,20 @@ static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
return;
family = nf_ct_l3num(ct);
- l3proto = __nf_nat_l3proto_find(family);
- if (l3proto == NULL)
- return;
-
dir = CTINFO2DIR(ctinfo);
if (dir == IP_CT_DIR_ORIGINAL)
statusbit = IPS_DST_NAT;
else
statusbit = IPS_SRC_NAT;
- l3proto->decode_session(skb, ct, dir, statusbit, fl);
+ switch (family) {
+ case NFPROTO_IPV4:
+ nf_nat_ipv4_decode_session(skb, ct, dir, statusbit, fl);
+ return;
+ case NFPROTO_IPV6:
+ nf_nat_ipv6_decode_session(skb, ct, dir, statusbit, fl);
+ return;
+ }
}
int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
@@ -146,7 +204,7 @@ hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
}
/* Is this tuple already taken? (not by us) */
-int
+static int
nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
const struct nf_conn *ignored_conntrack)
{
@@ -158,10 +216,9 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
*/
struct nf_conntrack_tuple reply;
- nf_ct_invert_tuplepr(&reply, tuple);
+ nf_ct_invert_tuple(&reply, tuple);
return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
}
-EXPORT_SYMBOL(nf_nat_used_tuple);
static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
const struct nf_nat_range2 *range)
@@ -183,7 +240,7 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
__be16 port;
switch (tuple->dst.protonum) {
- case IPPROTO_ICMP: /* fallthrough */
+ case IPPROTO_ICMP:
case IPPROTO_ICMPV6:
return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
@@ -253,7 +310,7 @@ find_appropriate_src(struct net *net,
net_eq(net, nf_ct_net(ct)) &&
nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
/* Copy source part from reply tuple. */
- nf_ct_invert_tuplepr(result,
+ nf_ct_invert_tuple(result,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
@@ -560,8 +617,8 @@ nf_nat_setup_info(struct nf_conn *ct,
* manipulations (future optimization: if num_manips == 0,
* orig_tp = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)
*/
- nf_ct_invert_tuplepr(&curr_tuple,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ nf_ct_invert_tuple(&curr_tuple,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
@@ -569,7 +626,7 @@ nf_nat_setup_info(struct nf_conn *ct,
struct nf_conntrack_tuple reply;
/* Alter conntrack table so will recognize replies. */
- nf_ct_invert_tuplepr(&reply, &new_tuple);
+ nf_ct_invert_tuple(&reply, &new_tuple);
nf_conntrack_alter_reply(ct, &reply);
/* Non-atomic: we own this at the moment. */
@@ -632,23 +689,6 @@ nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
}
EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
-static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
- enum nf_nat_manip_type mtype,
- enum ip_conntrack_dir dir)
-{
- const struct nf_nat_l3proto *l3proto;
- struct nf_conntrack_tuple target;
-
- /* We are aiming to look like inverse of other direction. */
- nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-
- l3proto = __nf_nat_l3proto_find(target.src.l3num);
- if (!l3proto->manip_pkt(skb, 0, &target, mtype))
- return NF_DROP;
-
- return NF_ACCEPT;
-}
-
/* Do packet manipulations according to nf_nat_setup_info. */
unsigned int nf_nat_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
@@ -799,33 +839,6 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
return 0;
}
-static void nf_nat_l3proto_clean(u8 l3proto)
-{
- struct nf_nat_proto_clean clean = {
- .l3proto = l3proto,
- };
-
- nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
-}
-
-int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
-{
- RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
- return 0;
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_register);
-
-void nf_nat_l3proto_unregister(const struct nf_nat_l3proto *l3proto)
-{
- mutex_lock(&nf_nat_proto_mutex);
- RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], NULL);
- mutex_unlock(&nf_nat_proto_mutex);
- synchronize_rcu();
-
- nf_nat_l3proto_clean(l3proto->l3proto);
-}
-EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
-
/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
@@ -888,10 +901,43 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
[CTA_NAT_PROTO] = { .type = NLA_NESTED },
};
+static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range2 *range)
+{
+ if (tb[CTA_NAT_V4_MINIP]) {
+ range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
+ range->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (tb[CTA_NAT_V4_MAXIP])
+ range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
+ else
+ range->max_addr.ip = range->min_addr.ip;
+
+ return 0;
+}
+
+static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range2 *range)
+{
+ if (tb[CTA_NAT_V6_MINIP]) {
+ nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP],
+ sizeof(struct in6_addr));
+ range->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (tb[CTA_NAT_V6_MAXIP])
+ nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP],
+ sizeof(struct in6_addr));
+ else
+ range->max_addr = range->min_addr;
+
+ return 0;
+}
+
static int
nfnetlink_parse_nat(const struct nlattr *nat,
- const struct nf_conn *ct, struct nf_nat_range2 *range,
- const struct nf_nat_l3proto *l3proto)
+ const struct nf_conn *ct, struct nf_nat_range2 *range)
{
struct nlattr *tb[CTA_NAT_MAX+1];
int err;
@@ -902,8 +948,19 @@ nfnetlink_parse_nat(const struct nlattr *nat,
if (err < 0)
return err;
- err = l3proto->nlattr_to_range(tb, range);
- if (err < 0)
+ switch (nf_ct_l3num(ct)) {
+ case NFPROTO_IPV4:
+ err = nf_nat_ipv4_nlattr_to_range(tb, range);
+ break;
+ case NFPROTO_IPV6:
+ err = nf_nat_ipv6_nlattr_to_range(tb, range);
+ break;
+ default:
+ err = -EPROTONOSUPPORT;
+ break;
+ }
+
+ if (err)
return err;
if (!tb[CTA_NAT_PROTO])
@@ -919,7 +976,6 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
const struct nlattr *attr)
{
struct nf_nat_range2 range;
- const struct nf_nat_l3proto *l3proto;
int err;
/* Should not happen, restricted to creating new conntracks
@@ -928,18 +984,11 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
if (WARN_ON_ONCE(nf_nat_initialized(ct, manip)))
return -EEXIST;
- /* Make sure that L3 NAT is there by when we call nf_nat_setup_info to
- * attach the null binding, otherwise this may oops.
- */
- l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
- if (l3proto == NULL)
- return -EAGAIN;
-
/* No NAT information has been passed, allocate the null-binding */
if (attr == NULL)
return __nf_nat_alloc_null_binding(ct, manip) == NF_DROP ? -ENOMEM : 0;
- err = nfnetlink_parse_nat(attr, ct, &range, l3proto);
+ err = nfnetlink_parse_nat(attr, ct, &range);
if (err < 0)
return err;
@@ -1036,7 +1085,6 @@ int nf_nat_register_fn(struct net *net, const struct nf_hook_ops *ops,
mutex_unlock(&nf_nat_proto_mutex);
return ret;
}
-EXPORT_SYMBOL_GPL(nf_nat_register_fn);
void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops,
unsigned int ops_count)
@@ -1085,7 +1133,6 @@ void nf_nat_unregister_fn(struct net *net, const struct nf_hook_ops *ops,
unlock:
mutex_unlock(&nf_nat_proto_mutex);
}
-EXPORT_SYMBOL_GPL(nf_nat_unregister_fn);
static struct pernet_operations nat_net_ops = {
.id = &nat_net_id,
diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c
index 38793b95d9bc..ccc06f7539d7 100644
--- a/net/netfilter/nf_nat_helper.c
+++ b/net/netfilter/nf_nat_helper.c
@@ -22,9 +22,6 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
/* Frobs data inside this packet, which is linear. */
@@ -98,7 +95,6 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
const char *rep_buffer,
unsigned int rep_len, bool adjust)
{
- const struct nf_nat_l3proto *l3proto;
struct tcphdr *tcph;
int oldlen, datalen;
@@ -118,9 +114,8 @@ bool __nf_nat_mangle_tcp_packet(struct sk_buff *skb,
datalen = skb->len - protoff;
- l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
- l3proto->csum_recalc(skb, IPPROTO_TCP, tcph, &tcph->check,
- datalen, oldlen);
+ nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP,
+ tcph, &tcph->check, datalen, oldlen);
if (adjust && rep_len != match_len)
nf_ct_seqadj_set(ct, ctinfo, tcph->seq,
@@ -150,7 +145,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
const char *rep_buffer,
unsigned int rep_len)
{
- const struct nf_nat_l3proto *l3proto;
struct udphdr *udph;
int datalen, oldlen;
@@ -176,9 +170,8 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb,
if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
return true;
- l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
- l3proto->csum_recalc(skb, IPPROTO_UDP, udph, &udph->check,
- datalen, oldlen);
+ nf_nat_csum_recalc(skb, nf_ct_l3num(ct), IPPROTO_TCP,
+ udph, &udph->check, datalen, oldlen);
return true;
}
diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c
new file mode 100644
index 000000000000..d85c4d902e7b
--- /dev/null
+++ b/net/netfilter/nf_nat_masquerade.c
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/types.h>
+#include <linux/atomic.h>
+#include <linux/inetdevice.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv6.h>
+
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
+
+static DEFINE_MUTEX(masq_mutex);
+static unsigned int masq_refcnt4 __read_mostly;
+static unsigned int masq_refcnt6 __read_mostly;
+
+unsigned int
+nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
+ const struct nf_nat_range2 *range,
+ const struct net_device *out)
+{
+ struct nf_conn *ct;
+ struct nf_conn_nat *nat;
+ enum ip_conntrack_info ctinfo;
+ struct nf_nat_range2 newrange;
+ const struct rtable *rt;
+ __be32 newsrc, nh;
+
+ WARN_ON(hooknum != NF_INET_POST_ROUTING);
+
+ ct = nf_ct_get(skb, &ctinfo);
+
+ WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY)));
+
+ /* Source address is 0.0.0.0 - locally generated packet that is
+ * probably not supposed to be masqueraded.
+ */
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
+ return NF_ACCEPT;
+
+ rt = skb_rtable(skb);
+ nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
+ newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
+ if (!newsrc) {
+ pr_info("%s ate my IP address\n", out->name);
+ return NF_DROP;
+ }
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat)
+ nat->masq_index = out->ifindex;
+
+ /* Transfer from original range. */
+ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+ memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.ip = newsrc;
+ newrange.max_addr.ip = newsrc;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ /* Hand modified range to generic setup. */
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
+
+static int device_cmp(struct nf_conn *i, void *ifindex)
+{
+ const struct nf_conn_nat *nat = nfct_nat(i);
+
+ if (!nat)
+ return 0;
+ return nat->masq_index == (int)(long)ifindex;
+}
+
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+
+ if (event == NETDEV_DOWN) {
+ /* Device was downed. Search entire table for
+ * conntracks which were associated with that device,
+ * and forget them.
+ */
+
+ nf_ct_iterate_cleanup_net(net, device_cmp,
+ (void *)(long)dev->ifindex, 0, 0);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int inet_cmp(struct nf_conn *ct, void *ptr)
+{
+ struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
+ struct net_device *dev = ifa->ifa_dev->dev;
+ struct nf_conntrack_tuple *tuple;
+
+ if (!device_cmp(ct, (void *)(long)dev->ifindex))
+ return 0;
+
+ tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+ return ifa->ifa_address == tuple->dst.u3.ip;
+}
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
+ struct net *net = dev_net(idev->dev);
+
+ /* The masq_dev_notifier will catch the case of the device going
+ * down. So if the inetdev is dead and being destroyed we have
+ * no work to do. Otherwise this is an individual address removal
+ * and we have to perform the flush.
+ */
+ if (idev->dead)
+ return NOTIFY_DONE;
+
+ if (event == NETDEV_DOWN)
+ nf_ct_iterate_cleanup_net(net, inet_cmp, ptr, 0, 0);
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_dev_notifier = {
+ .notifier_call = masq_device_event,
+};
+
+static struct notifier_block masq_inet_notifier = {
+ .notifier_call = masq_inet_event,
+};
+
+int nf_nat_masquerade_ipv4_register_notifier(void)
+{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
+ if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) {
+ ret = -EOVERFLOW;
+ goto out_unlock;
+ }
+
+ /* check if the notifier was already set */
+ if (++masq_refcnt4 > 1)
+ goto out_unlock;
+
+ /* Register for device down reports */
+ ret = register_netdevice_notifier(&masq_dev_notifier);
+ if (ret)
+ goto err_dec;
+ /* Register IP address change reports */
+ ret = register_inetaddr_notifier(&masq_inet_notifier);
+ if (ret)
+ goto err_unregister;
+
+ mutex_unlock(&masq_mutex);
+ return ret;
+
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+ masq_refcnt4--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
+
+void nf_nat_masquerade_ipv4_unregister_notifier(void)
+{
+ mutex_lock(&masq_mutex);
+ /* check if the notifier still has clients */
+ if (--masq_refcnt4 > 0)
+ goto out_unlock;
+
+ unregister_netdevice_notifier(&masq_dev_notifier);
+ unregister_inetaddr_notifier(&masq_inet_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
+
+#if IS_ENABLED(CONFIG_IPV6)
+static atomic_t v6_worker_count __read_mostly;
+
+static int
+nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
+ const struct in6_addr *daddr, unsigned int srcprefs,
+ struct in6_addr *saddr)
+{
+#ifdef CONFIG_IPV6_MODULE
+ const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+ if (!v6_ops)
+ return -EHOSTUNREACH;
+
+ return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#else
+ return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
+#endif
+}
+
+unsigned int
+nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
+ const struct net_device *out)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ struct in6_addr src;
+ struct nf_conn *ct;
+ struct nf_nat_range2 newrange;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY)));
+
+ if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
+ &ipv6_hdr(skb)->daddr, 0, &src) < 0)
+ return NF_DROP;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat)
+ nat->masq_index = out->ifindex;
+
+ newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.in6 = src;
+ newrange.max_addr.in6 = src;
+ newrange.min_proto = range->min_proto;
+ newrange.max_proto = range->max_proto;
+
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
+
+struct masq_dev_work {
+ struct work_struct work;
+ struct net *net;
+ struct in6_addr addr;
+ int ifindex;
+};
+
+static int inet6_cmp(struct nf_conn *ct, void *work)
+{
+ struct masq_dev_work *w = (struct masq_dev_work *)work;
+ struct nf_conntrack_tuple *tuple;
+
+ if (!device_cmp(ct, (void *)(long)w->ifindex))
+ return 0;
+
+ tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
+
+ return ipv6_addr_equal(&w->addr, &tuple->dst.u3.in6);
+}
+
+static void iterate_cleanup_work(struct work_struct *work)
+{
+ struct masq_dev_work *w;
+
+ w = container_of(work, struct masq_dev_work, work);
+
+ nf_ct_iterate_cleanup_net(w->net, inet6_cmp, (void *)w, 0, 0);
+
+ put_net(w->net);
+ kfree(w);
+ atomic_dec(&v6_worker_count);
+ module_put(THIS_MODULE);
+}
+
+/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
+ *
+ * Defer it to the system workqueue.
+ *
+ * As we can have 'a lot' of inet_events (depending on amount of ipv6
+ * addresses being deleted), we also need to limit work item queue.
+ */
+static int masq_inet6_event(struct notifier_block *this,
+ unsigned long event, void *ptr)
+{
+ struct inet6_ifaddr *ifa = ptr;
+ const struct net_device *dev;
+ struct masq_dev_work *w;
+ struct net *net;
+
+ if (event != NETDEV_DOWN || atomic_read(&v6_worker_count) >= 16)
+ return NOTIFY_DONE;
+
+ dev = ifa->idev->dev;
+ net = maybe_get_net(dev_net(dev));
+ if (!net)
+ return NOTIFY_DONE;
+
+ if (!try_module_get(THIS_MODULE))
+ goto err_module;
+
+ w = kmalloc(sizeof(*w), GFP_ATOMIC);
+ if (w) {
+ atomic_inc(&v6_worker_count);
+
+ INIT_WORK(&w->work, iterate_cleanup_work);
+ w->ifindex = dev->ifindex;
+ w->net = net;
+ w->addr = ifa->addr;
+ schedule_work(&w->work);
+
+ return NOTIFY_DONE;
+ }
+
+ module_put(THIS_MODULE);
+ err_module:
+ put_net(net);
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block masq_inet6_notifier = {
+ .notifier_call = masq_inet6_event,
+};
+
+int nf_nat_masquerade_ipv6_register_notifier(void)
+{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
+ if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) {
+ ret = -EOVERFLOW;
+ goto out_unlock;
+ }
+
+ /* check if the notifier is already set */
+ if (++masq_refcnt6 > 1)
+ goto out_unlock;
+
+ ret = register_inet6addr_notifier(&masq_inet6_notifier);
+ if (ret)
+ goto err_dec;
+
+ mutex_unlock(&masq_mutex);
+ return ret;
+err_dec:
+ masq_refcnt6--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_register_notifier);
+
+void nf_nat_masquerade_ipv6_unregister_notifier(void)
+{
+ mutex_lock(&masq_mutex);
+ /* check if the notifier still has clients */
+ if (--masq_refcnt6 > 0)
+ goto out_unlock;
+
+ unregister_inet6addr_notifier(&masq_inet6_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
+}
+EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
+#endif
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
index f83bf9d8c9f5..62743da3004f 100644
--- a/net/netfilter/nf_nat_proto.c
+++ b/net/netfilter/nf_nat_proto.c
@@ -20,13 +20,26 @@
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
+
+#include <linux/ipv6.h>
+#include <linux/netfilter_ipv6.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <net/ip6_route.h>
+#include <net/xfrm.h>
+#include <net/ipv6.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static void nf_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype);
static void
__udp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, struct udphdr *hdr,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype, bool do_csum)
@@ -43,8 +56,7 @@ __udp_manip_pkt(struct sk_buff *skb,
portptr = &hdr->dest;
}
if (do_csum) {
- l3proto->csum_update(skb, iphdroff, &hdr->check,
- tuple, maniptype);
+ nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
false);
if (!hdr->check)
@@ -54,7 +66,6 @@ __udp_manip_pkt(struct sk_buff *skb,
}
static bool udp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
@@ -68,12 +79,11 @@ static bool udp_manip_pkt(struct sk_buff *skb,
hdr = (struct udphdr *)(skb->data + hdroff);
do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
- __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum);
+ __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum);
return true;
}
static bool udplite_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
@@ -85,14 +95,13 @@ static bool udplite_manip_pkt(struct sk_buff *skb,
return false;
hdr = (struct udphdr *)(skb->data + hdroff);
- __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true);
+ __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true);
#endif
return true;
}
static bool
sctp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
@@ -135,7 +144,6 @@ sctp_manip_pkt(struct sk_buff *skb,
static bool
tcp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
@@ -171,14 +179,13 @@ tcp_manip_pkt(struct sk_buff *skb,
if (hdrsize < sizeof(*hdr))
return true;
- l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
+ nf_csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
return true;
}
static bool
dccp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
@@ -210,8 +217,7 @@ dccp_manip_pkt(struct sk_buff *skb,
if (hdrsize < sizeof(*hdr))
return true;
- l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
- tuple, maniptype);
+ nf_csum_update(skb, iphdroff, &hdr->dccph_checksum, tuple, maniptype);
inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
false);
#endif
@@ -220,7 +226,6 @@ dccp_manip_pkt(struct sk_buff *skb,
static bool
icmp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
@@ -239,7 +244,6 @@ icmp_manip_pkt(struct sk_buff *skb,
static bool
icmpv6_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
@@ -250,8 +254,7 @@ icmpv6_manip_pkt(struct sk_buff *skb,
return false;
hdr = (struct icmp6hdr *)(skb->data + hdroff);
- l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
- tuple, maniptype);
+ nf_csum_update(skb, iphdroff, &hdr->icmp6_cksum, tuple, maniptype);
if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
@@ -265,7 +268,6 @@ icmpv6_manip_pkt(struct sk_buff *skb,
/* manipulate a GRE packet according to maniptype */
static bool
gre_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
@@ -304,40 +306,718 @@ gre_manip_pkt(struct sk_buff *skb,
return true;
}
-bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
+static bool l4proto_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
switch (tuple->dst.protonum) {
case IPPROTO_TCP:
- return tcp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ return tcp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_UDP:
- return udp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ return udp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_UDPLITE:
- return udplite_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ return udplite_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_SCTP:
- return sctp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ return sctp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_ICMP:
- return icmp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ return icmp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_ICMPV6:
- return icmpv6_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ return icmpv6_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_DCCP:
- return dccp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ return dccp_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
case IPPROTO_GRE:
- return gre_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ return gre_manip_pkt(skb, iphdroff, hdroff,
tuple, maniptype);
}
/* If we don't know protocol -- no error, pass it unmodified. */
return true;
}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_manip_pkt);
+
+static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph;
+ unsigned int hdroff;
+
+ if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+ return false;
+
+ iph = (void *)skb->data + iphdroff;
+ hdroff = iphdroff + iph->ihl * 4;
+
+ if (!l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
+ return false;
+ iph = (void *)skb->data + iphdroff;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+ iph->saddr = target->src.u3.ip;
+ } else {
+ csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+ iph->daddr = target->dst.u3.ip;
+ }
+ return true;
+}
+
+static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6hdr *ipv6h;
+ __be16 frag_off;
+ int hdroff;
+ u8 nexthdr;
+
+ if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h)))
+ return false;
+
+ ipv6h = (void *)skb->data + iphdroff;
+ nexthdr = ipv6h->nexthdr;
+ hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h),
+ &nexthdr, &frag_off);
+ if (hdroff < 0)
+ goto manip_addr;
+
+ if ((frag_off & htons(~0x7)) == 0 &&
+ !l4proto_manip_pkt(skb, iphdroff, hdroff, target, maniptype))
+ return false;
+
+ /* must reload, offset might have changed */
+ ipv6h = (void *)skb->data + iphdroff;
+
+manip_addr:
+ if (maniptype == NF_NAT_MANIP_SRC)
+ ipv6h->saddr = target->src.u3.in6;
+ else
+ ipv6h->daddr = target->dst.u3.in6;
+
+#endif
+ return true;
+}
+
+unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
+ enum nf_nat_manip_type mtype,
+ enum ip_conntrack_dir dir)
+{
+ struct nf_conntrack_tuple target;
+
+ /* We are aiming to look like inverse of other direction. */
+ nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+
+ switch (target.src.l3num) {
+ case NFPROTO_IPV6:
+ if (nf_nat_ipv6_manip_pkt(skb, 0, &target, mtype))
+ return NF_ACCEPT;
+ break;
+ case NFPROTO_IPV4:
+ if (nf_nat_ipv4_manip_pkt(skb, 0, &target, mtype))
+ return NF_ACCEPT;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ return NF_DROP;
+}
+
+static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ __be32 oldip, newip;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ oldip = iph->saddr;
+ newip = t->src.u3.ip;
+ } else {
+ oldip = iph->daddr;
+ newip = t->dst.u3.ip;
+ }
+ inet_proto_csum_replace4(check, skb, oldip, newip, true);
+}
+
+static void nf_nat_ipv6_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff);
+ const struct in6_addr *oldip, *newip;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ oldip = &ipv6h->saddr;
+ newip = &t->src.u3.in6;
+ } else {
+ oldip = &ipv6h->daddr;
+ newip = &t->dst.u3.in6;
+ }
+ inet_proto_csum_replace16(check, skb, oldip->s6_addr32,
+ newip->s6_addr32, true);
+#endif
+}
+
+static void nf_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype)
+{
+ switch (t->src.l3num) {
+ case NFPROTO_IPV4:
+ nf_nat_ipv4_csum_update(skb, iphdroff, check, t, maniptype);
+ return;
+ case NFPROTO_IPV6:
+ nf_nat_ipv6_csum_update(skb, iphdroff, check, t, maniptype);
+ return;
+ }
+}
+
+static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
+ u8 proto, void *data, __sum16 *check,
+ int datalen, int oldlen)
+{
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
+ ip_hdrlen(skb);
+ skb->csum_offset = (void *)check - data;
+ *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, datalen,
+ proto, 0);
+ } else {
+ inet_proto_csum_replace2(check, skb,
+ htons(oldlen), htons(datalen), true);
+ }
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb,
+ u8 proto, void *data, __sum16 *check,
+ int datalen, int oldlen)
+{
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) + skb_network_offset(skb) +
+ (data - (void *)skb->data);
+ skb->csum_offset = (void *)check - data;
+ *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ datalen, proto, 0);
+ } else {
+ inet_proto_csum_replace2(check, skb,
+ htons(oldlen), htons(datalen), true);
+ }
+}
+#endif
+
+void nf_nat_csum_recalc(struct sk_buff *skb,
+ u8 nfproto, u8 proto, void *data, __sum16 *check,
+ int datalen, int oldlen)
+{
+ switch (nfproto) {
+ case NFPROTO_IPV4:
+ nf_nat_ipv4_csum_recalc(skb, proto, data, check,
+ datalen, oldlen);
+ return;
+#if IS_ENABLED(CONFIG_IPV6)
+ case NFPROTO_IPV6:
+ nf_nat_ipv6_csum_recalc(skb, proto, data, check,
+ datalen, oldlen);
+ return;
+#endif
+ }
+
+ WARN_ON_ONCE(1);
+}
+
+int nf_nat_icmp_reply_translation(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum)
+{
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } *inside;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+ unsigned int hdrlen = ip_hdrlen(skb);
+ struct nf_conntrack_tuple target;
+ unsigned long statusbit;
+
+ WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
+
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ return 0;
+ if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+ return 0;
+
+ inside = (void *)skb->data + hdrlen;
+ if (inside->icmp.type == ICMP_REDIRECT) {
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ if (manip == NF_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply direction */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (!(ct->status & statusbit))
+ return 1;
+
+ if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
+ &ct->tuplehash[!dir].tuple, !manip))
+ return 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ /* Reloading "inside" here since manip_pkt may reallocate */
+ inside = (void *)skb->data + hdrlen;
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum =
+ csum_fold(skb_checksum(skb, hdrlen,
+ skb->len - hdrlen, 0));
+ }
+
+ /* Change outer to look like the reply to an incoming packet */
+ nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+ target.dst.protonum = IPPROTO_ICMP;
+ if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+static unsigned int
+nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct)
+ return NF_ACCEPT;
+
+ if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ state->hook))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ }
+
+ return nf_nat_inet_fn(priv, skb, state);
+}
+
+static unsigned int
+nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ unsigned int ret;
+ __be32 daddr = ip_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv4_fn(priv, skb, state);
+ if (ret == NF_ACCEPT && daddr != ip_hdr(skb)->daddr)
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int err;
+#endif
+ unsigned int ret;
+
+ ret = nf_nat_ipv4_fn(priv, skb, state);
+#ifdef CONFIG_XFRM
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ if (IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED)
+ return ret;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip ||
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(state->net, skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+#endif
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+ int err;
+
+ ret = nf_nat_ipv4_fn(priv, skb, state);
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ err = ip_route_me_harder(state->net, skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(state->net, skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#endif
+ }
+ return ret;
+}
+
+static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv4_in,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv4_out,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv4_local_fn,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv4_fn,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+};
+
+int nf_nat_ipv4_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+ return nf_nat_register_fn(net, ops, nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_register_fn);
+
+void nf_nat_ipv4_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+ nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv4_unregister_fn);
+
+#if IS_ENABLED(CONFIG_IPV6)
+int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ unsigned int hdrlen)
+{
+ struct {
+ struct icmp6hdr icmp6;
+ struct ipv6hdr ip6;
+ } *inside;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+ struct nf_conntrack_tuple target;
+ unsigned long statusbit;
+
+ WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY);
+
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ return 0;
+ if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6))
+ return 0;
+
+ inside = (void *)skb->data + hdrlen;
+ if (inside->icmp6.icmp6_type == NDISC_REDIRECT) {
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ if (manip == NF_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply direction */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (!(ct->status & statusbit))
+ return 1;
+
+ if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
+ &ct->tuplehash[!dir].tuple, !manip))
+ return 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+
+ inside = (void *)skb->data + hdrlen;
+ inside->icmp6.icmp6_cksum = 0;
+ inside->icmp6.icmp6_cksum =
+ csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+ skb->len - hdrlen, IPPROTO_ICMPV6,
+ skb_checksum(skb, hdrlen,
+ skb->len - hdrlen, 0));
+ }
+
+ nf_ct_invert_tuple(&target, &ct->tuplehash[!dir].tuple);
+ target.dst.protonum = IPPROTO_ICMPV6;
+ if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation);
+
+static unsigned int
+nf_nat_ipv6_fn(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ __be16 frag_off;
+ int hdrlen;
+ u8 nexthdr;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
+ if (!ct)
+ return NF_ACCEPT;
+
+ if (ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY) {
+ nexthdr = ipv6_hdr(skb)->nexthdr;
+ hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
+ &nexthdr, &frag_off);
+
+ if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {
+ if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo,
+ state->hook,
+ hdrlen))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ }
+
+ return nf_nat_inet_fn(priv, skb, state);
+}
+
+static unsigned int
+nf_nat_ipv6_in(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ unsigned int ret;
+ struct in6_addr daddr = ipv6_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv6_fn(priv, skb, state);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv6_out(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int err;
+#endif
+ unsigned int ret;
+
+ ret = nf_nat_ipv6_fn(priv, skb, state);
+#ifdef CONFIG_XFRM
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ if (IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED)
+ return ret;
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3,
+ &ct->tuplehash[!dir].tuple.dst.u3) ||
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+#endif
+
+ return ret;
+}
+
+static int nat_route_me_harder(struct net *net, struct sk_buff *skb)
+{
+#ifdef CONFIG_IPV6_MODULE
+ const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
+
+ if (!v6_ops)
+ return -EHOSTUNREACH;
+
+ return v6_ops->route_me_harder(net, skb);
+#else
+ return ip6_route_me_harder(net, skb);
+#endif
+}
+
+static unsigned int
+nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+ int err;
+
+ ret = nf_nat_ipv6_fn(priv, skb, state);
+ if (ret != NF_ACCEPT)
+ return ret;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3,
+ &ct->tuplehash[!dir].tuple.src.u3)) {
+ err = nat_route_me_harder(state->net, skb);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMPV6 &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(state->net, skb, AF_INET6);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#endif
+ }
+
+ return ret;
+}
+
+static const struct nf_hook_ops nf_nat_ipv6_ops[] = {
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv6_in,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP6_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv6_out,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP6_PRI_NAT_SRC,
+ },
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv6_local_fn,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP6_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv6_fn,
+ .pf = NFPROTO_IPV6,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP6_PRI_NAT_SRC,
+ },
+};
+
+int nf_nat_ipv6_register_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+ return nf_nat_register_fn(net, ops, nf_nat_ipv6_ops,
+ ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_register_fn);
+
+void nf_nat_ipv6_unregister_fn(struct net *net, const struct nf_hook_ops *ops)
+{
+ nf_nat_unregister_fn(net, ops, ARRAY_SIZE(nf_nat_ipv6_ops));
+}
+EXPORT_SYMBOL_GPL(nf_nat_ipv6_unregister_fn);
+#endif /* CONFIG_IPV6 */
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4893f248dfdc..513f93118604 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -37,10 +37,16 @@ enum {
NFT_VALIDATE_DO,
};
+static struct rhltable nft_objname_ht;
+
static u32 nft_chain_hash(const void *data, u32 len, u32 seed);
static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed);
static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);
+static u32 nft_objname_hash(const void *data, u32 len, u32 seed);
+static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed);
+static int nft_objname_hash_cmp(struct rhashtable_compare_arg *, const void *);
+
static const struct rhashtable_params nft_chain_ht_params = {
.head_offset = offsetof(struct nft_chain, rhlhead),
.key_offset = offsetof(struct nft_chain, name),
@@ -51,6 +57,15 @@ static const struct rhashtable_params nft_chain_ht_params = {
.automatic_shrinking = true,
};
+static const struct rhashtable_params nft_objname_ht_params = {
+ .head_offset = offsetof(struct nft_object, rhlhead),
+ .key_offset = offsetof(struct nft_object, key),
+ .hashfn = nft_objname_hash,
+ .obj_hashfn = nft_objname_hash_obj,
+ .obj_cmpfn = nft_objname_hash_cmp,
+ .automatic_shrinking = true,
+};
+
static void nft_validate_state_update(struct net *net, u8 new_validate_state)
{
switch (net->nft.validate_state) {
@@ -127,7 +142,7 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
list_for_each_entry_reverse(trans, &net->nft.commit_list, list) {
if (trans->msg_type == NFT_MSG_NEWSET &&
nft_trans_set(trans) == set) {
- nft_trans_set_bound(trans) = true;
+ set->bound = true;
break;
}
}
@@ -823,6 +838,34 @@ static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg,
return strcmp(chain->name, name);
}
+static u32 nft_objname_hash(const void *data, u32 len, u32 seed)
+{
+ const struct nft_object_hash_key *k = data;
+
+ seed ^= hash_ptr(k->table, 32);
+
+ return jhash(k->name, strlen(k->name), seed);
+}
+
+static u32 nft_objname_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct nft_object *obj = data;
+
+ return nft_objname_hash(&obj->key, 0, seed);
+}
+
+static int nft_objname_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct nft_object_hash_key *k = arg->key;
+ const struct nft_object *obj = ptr;
+
+ if (obj->key.table != k->table)
+ return -1;
+
+ return strcmp(obj->key.name, k->name);
+}
+
static int nf_tables_newtable(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -1079,7 +1122,7 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
return ERR_PTR(-ENOENT);
}
-static bool lockdep_commit_lock_is_held(struct net *net)
+static bool lockdep_commit_lock_is_held(const struct net *net)
{
#ifdef CONFIG_PROVE_LOCKING
return lockdep_is_held(&net->nft.commit_mutex);
@@ -2119,9 +2162,11 @@ err1:
static void nf_tables_expr_destroy(const struct nft_ctx *ctx,
struct nft_expr *expr)
{
+ const struct nft_expr_type *type = expr->ops->type;
+
if (expr->ops->destroy)
expr->ops->destroy(ctx, expr);
- module_put(expr->ops->type->owner);
+ module_put(type->owner);
}
struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
@@ -2129,6 +2174,7 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
{
struct nft_expr_info info;
struct nft_expr *expr;
+ struct module *owner;
int err;
err = nf_tables_expr_parse(ctx, nla, &info);
@@ -2148,7 +2194,11 @@ struct nft_expr *nft_expr_init(const struct nft_ctx *ctx,
err3:
kfree(expr);
err2:
- module_put(info.ops->type->owner);
+ owner = info.ops->type->owner;
+ if (info.ops->type->release_ops)
+ info.ops->type->release_ops(info.ops);
+
+ module_put(owner);
err1:
return ERR_PTR(err);
}
@@ -2198,6 +2248,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
[NFTA_RULE_USERDATA] = { .type = NLA_BINARY,
.len = NFT_USERDATA_MAXLEN },
[NFTA_RULE_ID] = { .type = NLA_U32 },
+ [NFTA_RULE_POSITION_ID] = { .type = NLA_U32 },
};
static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
@@ -2567,6 +2618,9 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
return 0;
}
+static struct nft_rule *nft_rule_lookup_byid(const struct net *net,
+ const struct nlattr *nla);
+
#define NFT_RULE_MAXEXPRS 128
static int nf_tables_newrule(struct net *net, struct sock *nlsk,
@@ -2636,6 +2690,12 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
return PTR_ERR(old_rule);
}
+ } else if (nla[NFTA_RULE_POSITION_ID]) {
+ old_rule = nft_rule_lookup_byid(net, nla[NFTA_RULE_POSITION_ID]);
+ if (IS_ERR(old_rule)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION_ID]);
+ return PTR_ERR(old_rule);
+ }
}
}
@@ -3614,6 +3674,9 @@ err1:
static void nft_set_destroy(struct nft_set *set)
{
+ if (WARN_ON(set->use > 0))
+ return;
+
set->ops->destroy(set);
module_put(to_set_type(set->ops)->owner);
kfree(set->name);
@@ -3654,7 +3717,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk,
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(set);
}
- if (!list_empty(&set->bindings) ||
+ if (set->use ||
(nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) {
NL_SET_BAD_ATTR(extack, attr);
return -EBUSY;
@@ -3684,6 +3747,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
+ if (set->use == UINT_MAX)
+ return -EOVERFLOW;
+
if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
@@ -3711,6 +3777,7 @@ bind:
binding->chain = ctx->chain;
list_add_tail_rcu(&binding->list, &set->bindings);
nft_set_trans_bind(ctx, set);
+ set->use++;
return 0;
}
@@ -3730,6 +3797,25 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
EXPORT_SYMBOL_GPL(nf_tables_unbind_set);
+void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
+ struct nft_set_binding *binding,
+ enum nft_trans_phase phase)
+{
+ switch (phase) {
+ case NFT_TRANS_PREPARE:
+ set->use--;
+ return;
+ case NFT_TRANS_ABORT:
+ case NFT_TRANS_RELEASE:
+ set->use--;
+ /* fall through */
+ default:
+ nf_tables_unbind_set(ctx, set, binding,
+ phase == NFT_TRANS_COMMIT);
+ }
+}
+EXPORT_SYMBOL_GPL(nf_tables_deactivate_set);
+
void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set)
{
if (list_empty(&set->bindings) && nft_set_is_anonymous(set))
@@ -3844,7 +3930,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
nla_put_string(skb, NFTA_SET_ELEM_OBJREF,
- (*nft_set_ext_obj(ext))->name) < 0)
+ (*nft_set_ext_obj(ext))->key.name) < 0)
goto nla_put_failure;
if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
@@ -4377,7 +4463,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
err = -EINVAL;
goto err2;
}
- obj = nft_obj_lookup(ctx->table, nla[NFTA_SET_ELEM_OBJREF],
+ obj = nft_obj_lookup(ctx->net, ctx->table,
+ nla[NFTA_SET_ELEM_OBJREF],
set->objtype, genmask);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
@@ -4812,18 +4899,36 @@ void nft_unregister_obj(struct nft_object_type *obj_type)
}
EXPORT_SYMBOL_GPL(nft_unregister_obj);
-struct nft_object *nft_obj_lookup(const struct nft_table *table,
+struct nft_object *nft_obj_lookup(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u32 objtype,
u8 genmask)
{
+ struct nft_object_hash_key k = { .table = table };
+ char search[NFT_OBJ_MAXNAMELEN];
+ struct rhlist_head *tmp, *list;
struct nft_object *obj;
- list_for_each_entry_rcu(obj, &table->objects, list) {
- if (!nla_strcmp(nla, obj->name) &&
- objtype == obj->ops->type->type &&
- nft_active_genmask(obj, genmask))
+ nla_strlcpy(search, nla, sizeof(search));
+ k.name = search;
+
+ WARN_ON_ONCE(!rcu_read_lock_held() &&
+ !lockdep_commit_lock_is_held(net));
+
+ rcu_read_lock();
+ list = rhltable_lookup(&nft_objname_ht, &k, nft_objname_ht_params);
+ if (!list)
+ goto out;
+
+ rhl_for_each_entry_rcu(obj, tmp, list, rhlhead) {
+ if (objtype == obj->ops->type->type &&
+ nft_active_genmask(obj, genmask)) {
+ rcu_read_unlock();
return obj;
+ }
}
+out:
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
}
EXPORT_SYMBOL_GPL(nft_obj_lookup);
@@ -4981,7 +5086,7 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
}
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+ obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
if (err != -ENOENT) {
@@ -5007,11 +5112,11 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
err = PTR_ERR(obj);
goto err1;
}
- obj->table = table;
+ obj->key.table = table;
obj->handle = nf_tables_alloc_handle(table);
- obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
- if (!obj->name) {
+ obj->key.name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL);
+ if (!obj->key.name) {
err = -ENOMEM;
goto err2;
}
@@ -5020,11 +5125,20 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk,
if (err < 0)
goto err3;
+ err = rhltable_insert(&nft_objname_ht, &obj->rhlhead,
+ nft_objname_ht_params);
+ if (err < 0)
+ goto err4;
+
list_add_tail_rcu(&obj->list, &table->objects);
table->use++;
return 0;
+err4:
+ /* queued in transaction log */
+ INIT_LIST_HEAD(&obj->list);
+ return err;
err3:
- kfree(obj->name);
+ kfree(obj->key.name);
err2:
if (obj->ops->destroy)
obj->ops->destroy(&ctx, obj);
@@ -5053,7 +5167,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
nfmsg->res_id = htons(net->nft.base_seq & 0xffff);
if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) ||
- nla_put_string(skb, NFTA_OBJ_NAME, obj->name) ||
+ nla_put_string(skb, NFTA_OBJ_NAME, obj->key.name) ||
nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) ||
nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) ||
nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset) ||
@@ -5208,7 +5322,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
}
objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE]));
- obj = nft_obj_lookup(table, nla[NFTA_OBJ_NAME], objtype, genmask);
+ obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask);
if (IS_ERR(obj)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]);
return PTR_ERR(obj);
@@ -5239,7 +5353,7 @@ static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
obj->ops->destroy(ctx, obj);
module_put(obj->ops->type->owner);
- kfree(obj->name);
+ kfree(obj->key.name);
kfree(obj);
}
@@ -5273,7 +5387,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
obj = nft_obj_lookup_byhandle(table, attr, objtype, genmask);
} else {
attr = nla[NFTA_OBJ_NAME];
- obj = nft_obj_lookup(table, attr, objtype, genmask);
+ obj = nft_obj_lookup(net, table, attr, objtype, genmask);
}
if (IS_ERR(obj)) {
@@ -5290,7 +5404,7 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
return nft_delobj(&ctx, obj);
}
-void nft_obj_notify(struct net *net, struct nft_table *table,
+void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
int family, int report, gfp_t gfp)
{
@@ -6397,6 +6511,12 @@ static void nf_tables_commit_chain(struct net *net, struct nft_chain *chain)
nf_tables_commit_chain_free_rules_old(g0);
}
+static void nft_obj_del(struct nft_object *obj)
+{
+ rhltable_remove(&nft_objname_ht, &obj->rhlhead, nft_objname_ht_params);
+ list_del_rcu(&obj->list);
+}
+
static void nft_chain_del(struct nft_chain *chain)
{
struct nft_table *table = chain->table;
@@ -6444,6 +6564,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
struct nft_chain *chain;
struct nft_table *table;
+ if (list_empty(&net->nft.commit_list)) {
+ mutex_unlock(&net->nft.commit_mutex);
+ return 0;
+ }
+
/* 0. Validate ruleset, otherwise roll back for error reporting. */
if (nf_tables_validate(net) < 0)
return -EAGAIN;
@@ -6576,7 +6701,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELOBJ:
- list_del_rcu(&nft_trans_obj(trans)->list);
+ nft_obj_del(nft_trans_obj(trans));
nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
NFT_MSG_DELOBJ);
break;
@@ -6617,8 +6742,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_NEWSET:
- if (!nft_trans_set_bound(trans))
- nft_set_destroy(nft_trans_set(trans));
+ nft_set_destroy(nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
@@ -6691,8 +6815,11 @@ static int __nf_tables_abort(struct net *net)
break;
case NFT_MSG_NEWSET:
trans->ctx.table->use--;
- if (!nft_trans_set_bound(trans))
- list_del_rcu(&nft_trans_set(trans)->list);
+ if (nft_trans_set(trans)->bound) {
+ nft_trans_destroy(trans);
+ break;
+ }
+ list_del_rcu(&nft_trans_set(trans)->list);
break;
case NFT_MSG_DELSET:
trans->ctx.table->use++;
@@ -6700,8 +6827,11 @@ static int __nf_tables_abort(struct net *net)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
+ if (nft_trans_elem_set(trans)->bound) {
+ nft_trans_destroy(trans);
+ break;
+ }
te = (struct nft_trans_elem *)trans->data;
-
te->set->ops->remove(net, te->set, &te->elem);
atomic_dec(&te->set->nelems);
break;
@@ -6716,7 +6846,7 @@ static int __nf_tables_abort(struct net *net)
break;
case NFT_MSG_NEWOBJ:
trans->ctx.table->use--;
- list_del_rcu(&nft_trans_obj(trans)->list);
+ nft_obj_del(nft_trans_obj(trans));
break;
case NFT_MSG_DELOBJ:
trans->ctx.table->use++;
@@ -7330,7 +7460,7 @@ static void __nft_release_tables(struct net *net)
nft_set_destroy(set);
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
- list_del(&obj->list);
+ nft_obj_del(obj);
table->use--;
nft_obj_destroy(&ctx, obj);
}
@@ -7392,12 +7522,18 @@ static int __init nf_tables_module_init(void)
if (err < 0)
goto err3;
+ err = rhltable_init(&nft_objname_ht, &nft_objname_ht_params);
+ if (err < 0)
+ goto err4;
+
/* must be last */
err = nfnetlink_subsys_register(&nf_tables_subsys);
if (err < 0)
- goto err4;
+ goto err5;
return err;
+err5:
+ rhltable_destroy(&nft_objname_ht);
err4:
unregister_netdevice_notifier(&nf_tables_flowtable_notifier);
err3:
@@ -7417,6 +7553,7 @@ static void __exit nf_tables_module_exit(void)
unregister_pernet_subsys(&nf_tables_net_ops);
cancel_work_sync(&trans_destroy_work);
rcu_barrier();
+ rhltable_destroy(&nft_objname_ht);
nf_tables_core_module_exit();
}
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index a50500232b0a..d0f168c2670f 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -98,21 +98,23 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
const struct nft_pktinfo *pkt)
{
struct nft_base_chain *base_chain;
+ struct nft_stats __percpu *pstats;
struct nft_stats *stats;
base_chain = nft_base_chain(chain);
- if (!rcu_access_pointer(base_chain->stats))
- return;
- local_bh_disable();
- stats = this_cpu_ptr(rcu_dereference(base_chain->stats));
- if (stats) {
+ rcu_read_lock();
+ pstats = READ_ONCE(base_chain->stats);
+ if (pstats) {
+ local_bh_disable();
+ stats = this_cpu_ptr(pstats);
u64_stats_update_begin(&stats->syncp);
stats->pkts++;
stats->bytes += pkt->skb->len;
u64_stats_update_end(&stats->syncp);
+ local_bh_enable();
}
- local_bh_enable();
+ rcu_read_unlock();
}
struct nft_jumpstack {
@@ -124,14 +126,25 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
struct nft_regs *regs,
struct nft_pktinfo *pkt)
{
+#ifdef CONFIG_RETPOLINE
unsigned long e = (unsigned long)expr->ops->eval;
-
- if (e == (unsigned long)nft_meta_get_eval)
- nft_meta_get_eval(expr, regs, pkt);
- else if (e == (unsigned long)nft_lookup_eval)
- nft_lookup_eval(expr, regs, pkt);
- else
- expr->ops->eval(expr, regs, pkt);
+#define X(e, fun) \
+ do { if ((e) == (unsigned long)(fun)) \
+ return fun(expr, regs, pkt); } while (0)
+
+ X(e, nft_payload_eval);
+ X(e, nft_cmp_eval);
+ X(e, nft_meta_get_eval);
+ X(e, nft_lookup_eval);
+ X(e, nft_range_eval);
+ X(e, nft_immediate_eval);
+ X(e, nft_byteorder_eval);
+ X(e, nft_dynset_eval);
+ X(e, nft_rt_get_eval);
+ X(e, nft_bitwise_eval);
+#undef X
+#endif /* CONFIG_RETPOLINE */
+ expr->ops->eval(expr, regs, pkt);
}
unsigned int
@@ -210,7 +223,6 @@ next_rule:
chain = regs.verdict.chain;
goto do_chain;
case NFT_CONTINUE:
- /* fall through */
case NFT_RETURN:
nft_trace_packet(&info, chain, rule,
NFT_TRACETYPE_RETURN);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 109b0d27345a..c69b11ca5aad 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -122,7 +122,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
return -EBUSY;
}
- l4proto = nf_ct_l4proto_find_get(l4num);
+ l4proto = nf_ct_l4proto_find(l4num);
/* This protocol is not supportted, skip. */
if (l4proto->l4proto != l4num) {
@@ -152,7 +152,6 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl,
err:
kfree(timeout);
err_proto_put:
- nf_ct_l4proto_put(l4proto);
return ret;
}
@@ -302,7 +301,6 @@ static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
if (refcount_dec_if_one(&timeout->refcnt)) {
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
- nf_ct_l4proto_put(timeout->timeout.l4proto);
nf_ct_untimeout(net, &timeout->timeout);
kfree_rcu(timeout, rcu_head);
} else {
@@ -359,7 +357,7 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
return -EINVAL;
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
- l4proto = nf_ct_l4proto_find_get(l4num);
+ l4proto = nf_ct_l4proto_find(l4num);
/* This protocol is not supported, skip. */
if (l4proto->l4proto != l4num) {
@@ -372,10 +370,8 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl,
if (ret < 0)
goto err;
- nf_ct_l4proto_put(l4proto);
return 0;
err:
- nf_ct_l4proto_put(l4proto);
return ret;
}
@@ -442,7 +438,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
- l4proto = nf_ct_l4proto_find_get(l4num);
+ l4proto = nf_ct_l4proto_find(l4num);
err = -EOPNOTSUPP;
if (l4proto->l4proto != l4num)
@@ -474,12 +470,7 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
break;
case IPPROTO_GRE:
#ifdef CONFIG_NF_CT_PROTO_GRE
- if (l4proto->net_id) {
- struct netns_proto_gre *net_gre;
-
- net_gre = net_generic(net, *l4proto->net_id);
- timeouts = net_gre->gre_timeouts;
- }
+ timeouts = nf_gre_pernet(net)->timeouts;
#endif
break;
case 255:
@@ -516,7 +507,6 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl,
/* this avoids a loop in nfnetlink. */
return ret == -EAGAIN ? -ENOBUFS : ret;
err:
- nf_ct_l4proto_put(l4proto);
return err;
}
@@ -597,7 +587,6 @@ static void __net_exit cttimeout_net_exit(struct net *net)
list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
list_del_rcu(&cur->head);
- nf_ct_l4proto_put(cur->timeout.l4proto);
if (refcount_dec_and_test(&cur->refcnt))
kfree_rcu(cur, rcu_head);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index fff8073e2a56..2c75b9e0474e 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -25,9 +25,8 @@ struct nft_bitwise {
struct nft_data xor;
};
-static void nft_bitwise_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_bitwise_eval(const struct nft_expr *expr,
+ struct nft_regs *regs, const struct nft_pktinfo *pkt)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
const u32 *src = &regs->data[priv->sreg];
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 13d4e421a6b3..19dbc34cc75e 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -26,9 +26,9 @@ struct nft_byteorder {
u8 size;
};
-static void nft_byteorder_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_byteorder_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
const struct nft_byteorder *priv = nft_expr_priv(expr);
u32 *src = &regs->data[priv->sreg];
diff --git a/net/netfilter/nft_chain_nat.c b/net/netfilter/nft_chain_nat.c
new file mode 100644
index 000000000000..ee4852088d50
--- /dev/null
+++ b/net/netfilter/nft_chain_nat.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/module.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_tables_ipv6.h>
+
+static unsigned int nft_nat_do_chain(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, skb, state);
+
+ switch (state->pf) {
+#ifdef CONFIG_NF_TABLES_IPV4
+ case NFPROTO_IPV4:
+ nft_set_pktinfo_ipv4(&pkt, skb);
+ break;
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+ case NFPROTO_IPV6:
+ nft_set_pktinfo_ipv6(&pkt, skb);
+ break;
+#endif
+ default:
+ break;
+ }
+
+ return nft_do_chain(&pkt, priv);
+}
+
+#ifdef CONFIG_NF_TABLES_IPV4
+static const struct nft_chain_type nft_chain_nat_ipv4 = {
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .hooks = {
+ [NF_INET_PRE_ROUTING] = nft_nat_do_chain,
+ [NF_INET_POST_ROUTING] = nft_nat_do_chain,
+ [NF_INET_LOCAL_OUT] = nft_nat_do_chain,
+ [NF_INET_LOCAL_IN] = nft_nat_do_chain,
+ },
+ .ops_register = nf_nat_ipv4_register_fn,
+ .ops_unregister = nf_nat_ipv4_unregister_fn,
+};
+#endif
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static const struct nft_chain_type nft_chain_nat_ipv6 = {
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .family = NFPROTO_IPV6,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .hooks = {
+ [NF_INET_PRE_ROUTING] = nft_nat_do_chain,
+ [NF_INET_POST_ROUTING] = nft_nat_do_chain,
+ [NF_INET_LOCAL_OUT] = nft_nat_do_chain,
+ [NF_INET_LOCAL_IN] = nft_nat_do_chain,
+ },
+ .ops_register = nf_nat_ipv6_register_fn,
+ .ops_unregister = nf_nat_ipv6_unregister_fn,
+};
+#endif
+
+static int __init nft_chain_nat_init(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV6
+ nft_register_chain_type(&nft_chain_nat_ipv6);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV4
+ nft_register_chain_type(&nft_chain_nat_ipv4);
+#endif
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_exit(void)
+{
+#ifdef CONFIG_NF_TABLES_IPV4
+ nft_unregister_chain_type(&nft_chain_nat_ipv4);
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+ nft_unregister_chain_type(&nft_chain_nat_ipv6);
+#endif
+}
+
+module_init(nft_chain_nat_init);
+module_exit(nft_chain_nat_exit);
+
+MODULE_LICENSE("GPL");
+#ifdef CONFIG_NF_TABLES_IPV4
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
+#endif
+#ifdef CONFIG_NF_TABLES_IPV6
+MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
+#endif
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 79d48c1d06f4..f9f1fa66a16e 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -24,9 +24,9 @@ struct nft_cmp_expr {
enum nft_cmp_ops op:8;
};
-static void nft_cmp_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_cmp_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
const struct nft_cmp_expr *priv = nft_expr_priv(expr);
int d;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 0a4bad55a8aa..469f9da5073b 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -22,23 +22,6 @@
#include <linux/netfilter_bridge/ebtables.h>
#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
-#include <net/netns/generic.h>
-
-struct nft_xt {
- struct list_head head;
- struct nft_expr_ops ops;
- refcount_t refcnt;
-
- /* used only when transaction mutex is locked */
- unsigned int listcnt;
-
- /* Unlike other expressions, ops doesn't have static storage duration.
- * nft core assumes they do. We use kfree_rcu so that nft core can
- * can check expr->ops->size even after nft_compat->destroy() frees
- * the nft_xt struct that holds the ops structure.
- */
- struct rcu_head rcu_head;
-};
/* Used for matches where *info is larger than X byte */
#define NFT_MATCH_LARGE_THRESH 192
@@ -47,46 +30,6 @@ struct nft_xt_match_priv {
void *info;
};
-struct nft_compat_net {
- struct list_head nft_target_list;
- struct list_head nft_match_list;
-};
-
-static unsigned int nft_compat_net_id __read_mostly;
-static struct nft_expr_type nft_match_type;
-static struct nft_expr_type nft_target_type;
-
-static struct nft_compat_net *nft_compat_pernet(struct net *net)
-{
- return net_generic(net, nft_compat_net_id);
-}
-
-static void nft_xt_get(struct nft_xt *xt)
-{
- /* refcount_inc() warns on 0 -> 1 transition, but we can't
- * init the reference count to 1 in .select_ops -- we can't
- * undo such an increase when another expression inside the same
- * rule fails afterwards.
- */
- if (xt->listcnt == 0)
- refcount_set(&xt->refcnt, 1);
- else
- refcount_inc(&xt->refcnt);
-
- xt->listcnt++;
-}
-
-static bool nft_xt_put(struct nft_xt *xt)
-{
- if (refcount_dec_and_test(&xt->refcnt)) {
- WARN_ON_ONCE(!list_empty(&xt->head));
- kfree_rcu(xt, rcu_head);
- return true;
- }
-
- return false;
-}
-
static int nft_compat_chain_validate_dependency(const struct nft_ctx *ctx,
const char *tablename)
{
@@ -281,7 +224,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
struct xt_target *target = expr->ops->data;
struct xt_tgchk_param par;
size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
- struct nft_xt *nft_xt;
u16 proto = 0;
bool inv = false;
union nft_entry e = {};
@@ -305,8 +247,6 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (!target->target)
return -EINVAL;
- nft_xt = container_of(expr->ops, struct nft_xt, ops);
- nft_xt_get(nft_xt);
return 0;
}
@@ -325,8 +265,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
if (par.target->destroy != NULL)
par.target->destroy(&par);
- if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
- module_put(me);
+ module_put(me);
+ kfree(expr->ops);
}
static int nft_extension_dump_info(struct sk_buff *skb, int attr,
@@ -499,7 +439,6 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
struct xt_match *match = expr->ops->data;
struct xt_mtchk_param par;
size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
- struct nft_xt *nft_xt;
u16 proto = 0;
bool inv = false;
union nft_entry e = {};
@@ -515,13 +454,7 @@ __nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
- ret = xt_check_match(&par, size, proto, inv);
- if (ret < 0)
- return ret;
-
- nft_xt = container_of(expr->ops, struct nft_xt, ops);
- nft_xt_get(nft_xt);
- return 0;
+ return xt_check_match(&par, size, proto, inv);
}
static int
@@ -564,8 +497,8 @@ __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
if (par.match->destroy != NULL)
par.match->destroy(&par);
- if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
- module_put(me);
+ module_put(me);
+ kfree(expr->ops);
}
static void
@@ -574,18 +507,6 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
__nft_match_destroy(ctx, expr, nft_expr_priv(expr));
}
-static void nft_compat_deactivate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- enum nft_trans_phase phase)
-{
- struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops);
-
- if (phase == NFT_TRANS_ABORT || phase == NFT_TRANS_COMMIT) {
- if (--xt->listcnt == 0)
- list_del_init(&xt->head);
- }
-}
-
static void
nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
{
@@ -780,19 +701,13 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
.cb = nfnl_nft_compat_cb,
};
-static bool nft_match_cmp(const struct xt_match *match,
- const char *name, u32 rev, u32 family)
-{
- return strcmp(match->name, name) == 0 && match->revision == rev &&
- (match->family == NFPROTO_UNSPEC || match->family == family);
-}
+static struct nft_expr_type nft_match_type;
static const struct nft_expr_ops *
nft_match_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
- struct nft_compat_net *cn;
- struct nft_xt *nft_match;
+ struct nft_expr_ops *ops;
struct xt_match *match;
unsigned int matchsize;
char *mt_name;
@@ -808,16 +723,6 @@ nft_match_select_ops(const struct nft_ctx *ctx,
rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV]));
family = ctx->family;
- cn = nft_compat_pernet(ctx->net);
-
- /* Re-use the existing match if it's already loaded. */
- list_for_each_entry(nft_match, &cn->nft_match_list, head) {
- struct xt_match *match = nft_match->ops.data;
-
- if (nft_match_cmp(match, mt_name, rev, family))
- return &nft_match->ops;
- }
-
match = xt_request_find_match(family, mt_name, rev);
if (IS_ERR(match))
return ERR_PTR(-ENOENT);
@@ -827,65 +732,62 @@ nft_match_select_ops(const struct nft_ctx *ctx,
goto err;
}
- /* This is the first time we use this match, allocate operations */
- nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
- if (nft_match == NULL) {
+ ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+ if (!ops) {
err = -ENOMEM;
goto err;
}
- refcount_set(&nft_match->refcnt, 0);
- nft_match->ops.type = &nft_match_type;
- nft_match->ops.eval = nft_match_eval;
- nft_match->ops.init = nft_match_init;
- nft_match->ops.destroy = nft_match_destroy;
- nft_match->ops.deactivate = nft_compat_deactivate;
- nft_match->ops.dump = nft_match_dump;
- nft_match->ops.validate = nft_match_validate;
- nft_match->ops.data = match;
+ ops->type = &nft_match_type;
+ ops->eval = nft_match_eval;
+ ops->init = nft_match_init;
+ ops->destroy = nft_match_destroy;
+ ops->dump = nft_match_dump;
+ ops->validate = nft_match_validate;
+ ops->data = match;
matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
if (matchsize > NFT_MATCH_LARGE_THRESH) {
matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv));
- nft_match->ops.eval = nft_match_large_eval;
- nft_match->ops.init = nft_match_large_init;
- nft_match->ops.destroy = nft_match_large_destroy;
- nft_match->ops.dump = nft_match_large_dump;
+ ops->eval = nft_match_large_eval;
+ ops->init = nft_match_large_init;
+ ops->destroy = nft_match_large_destroy;
+ ops->dump = nft_match_large_dump;
}
- nft_match->ops.size = matchsize;
+ ops->size = matchsize;
- nft_match->listcnt = 0;
- list_add(&nft_match->head, &cn->nft_match_list);
-
- return &nft_match->ops;
+ return ops;
err:
module_put(match->me);
return ERR_PTR(err);
}
+static void nft_match_release_ops(const struct nft_expr_ops *ops)
+{
+ struct xt_match *match = ops->data;
+
+ module_put(match->me);
+ kfree(ops);
+}
+
static struct nft_expr_type nft_match_type __read_mostly = {
.name = "match",
.select_ops = nft_match_select_ops,
+ .release_ops = nft_match_release_ops,
.policy = nft_match_policy,
.maxattr = NFTA_MATCH_MAX,
.owner = THIS_MODULE,
};
-static bool nft_target_cmp(const struct xt_target *tg,
- const char *name, u32 rev, u32 family)
-{
- return strcmp(tg->name, name) == 0 && tg->revision == rev &&
- (tg->family == NFPROTO_UNSPEC || tg->family == family);
-}
+static struct nft_expr_type nft_target_type;
static const struct nft_expr_ops *
nft_target_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
- struct nft_compat_net *cn;
- struct nft_xt *nft_target;
+ struct nft_expr_ops *ops;
struct xt_target *target;
char *tg_name;
u32 rev, family;
@@ -905,18 +807,6 @@ nft_target_select_ops(const struct nft_ctx *ctx,
strcmp(tg_name, "standard") == 0)
return ERR_PTR(-EINVAL);
- cn = nft_compat_pernet(ctx->net);
- /* Re-use the existing target if it's already loaded. */
- list_for_each_entry(nft_target, &cn->nft_target_list, head) {
- struct xt_target *target = nft_target->ops.data;
-
- if (!target->target)
- continue;
-
- if (nft_target_cmp(target, tg_name, rev, family))
- return &nft_target->ops;
- }
-
target = xt_request_find_target(family, tg_name, rev);
if (IS_ERR(target))
return ERR_PTR(-ENOENT);
@@ -931,113 +821,55 @@ nft_target_select_ops(const struct nft_ctx *ctx,
goto err;
}
- /* This is the first time we use this target, allocate operations */
- nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
- if (nft_target == NULL) {
+ ops = kzalloc(sizeof(struct nft_expr_ops), GFP_KERNEL);
+ if (!ops) {
err = -ENOMEM;
goto err;
}
- refcount_set(&nft_target->refcnt, 0);
- nft_target->ops.type = &nft_target_type;
- nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
- nft_target->ops.init = nft_target_init;
- nft_target->ops.destroy = nft_target_destroy;
- nft_target->ops.deactivate = nft_compat_deactivate;
- nft_target->ops.dump = nft_target_dump;
- nft_target->ops.validate = nft_target_validate;
- nft_target->ops.data = target;
+ ops->type = &nft_target_type;
+ ops->size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
+ ops->init = nft_target_init;
+ ops->destroy = nft_target_destroy;
+ ops->dump = nft_target_dump;
+ ops->validate = nft_target_validate;
+ ops->data = target;
if (family == NFPROTO_BRIDGE)
- nft_target->ops.eval = nft_target_eval_bridge;
+ ops->eval = nft_target_eval_bridge;
else
- nft_target->ops.eval = nft_target_eval_xt;
-
- nft_target->listcnt = 0;
- list_add(&nft_target->head, &cn->nft_target_list);
+ ops->eval = nft_target_eval_xt;
- return &nft_target->ops;
+ return ops;
err:
module_put(target->me);
return ERR_PTR(err);
}
+static void nft_target_release_ops(const struct nft_expr_ops *ops)
+{
+ struct xt_target *target = ops->data;
+
+ module_put(target->me);
+ kfree(ops);
+}
+
static struct nft_expr_type nft_target_type __read_mostly = {
.name = "target",
.select_ops = nft_target_select_ops,
+ .release_ops = nft_target_release_ops,
.policy = nft_target_policy,
.maxattr = NFTA_TARGET_MAX,
.owner = THIS_MODULE,
};
-static int __net_init nft_compat_init_net(struct net *net)
-{
- struct nft_compat_net *cn = nft_compat_pernet(net);
-
- INIT_LIST_HEAD(&cn->nft_target_list);
- INIT_LIST_HEAD(&cn->nft_match_list);
-
- return 0;
-}
-
-static void __net_exit nft_compat_exit_net(struct net *net)
-{
- struct nft_compat_net *cn = nft_compat_pernet(net);
- struct nft_xt *xt, *next;
-
- if (list_empty(&cn->nft_match_list) &&
- list_empty(&cn->nft_target_list))
- return;
-
- /* If there was an error that caused nft_xt expr to not be initialized
- * fully and noone else requested the same expression later, the lists
- * contain 0-refcount entries that still hold module reference.
- *
- * Clean them here.
- */
- mutex_lock(&net->nft.commit_mutex);
- list_for_each_entry_safe(xt, next, &cn->nft_target_list, head) {
- struct xt_target *target = xt->ops.data;
-
- list_del_init(&xt->head);
-
- if (refcount_read(&xt->refcnt))
- continue;
- module_put(target->me);
- kfree(xt);
- }
-
- list_for_each_entry_safe(xt, next, &cn->nft_match_list, head) {
- struct xt_match *match = xt->ops.data;
-
- list_del_init(&xt->head);
-
- if (refcount_read(&xt->refcnt))
- continue;
- module_put(match->me);
- kfree(xt);
- }
- mutex_unlock(&net->nft.commit_mutex);
-}
-
-static struct pernet_operations nft_compat_net_ops = {
- .init = nft_compat_init_net,
- .exit = nft_compat_exit_net,
- .id = &nft_compat_net_id,
- .size = sizeof(struct nft_compat_net),
-};
-
static int __init nft_compat_module_init(void)
{
int ret;
- ret = register_pernet_subsys(&nft_compat_net_ops);
- if (ret < 0)
- goto err_target;
-
ret = nft_register_expr(&nft_match_type);
if (ret < 0)
- goto err_pernet;
+ return ret;
ret = nft_register_expr(&nft_target_type);
if (ret < 0)
@@ -1054,8 +886,6 @@ err_target:
nft_unregister_expr(&nft_target_type);
err_match:
nft_unregister_expr(&nft_match_type);
-err_pernet:
- unregister_pernet_subsys(&nft_compat_net_ops);
return ret;
}
@@ -1064,7 +894,6 @@ static void __exit nft_compat_module_exit(void)
nfnetlink_subsys_unregister(&nfnl_compat_subsys);
nft_unregister_expr(&nft_target_type);
nft_unregister_expr(&nft_match_type);
- unregister_pernet_subsys(&nft_compat_net_ops);
}
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index a61d7edfc290..1a6b06ce6b5b 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -104,7 +104,7 @@ static void nft_counter_obj_destroy(const struct nft_ctx *ctx,
nft_counter_do_destroy(priv);
}
-static void nft_counter_reset(struct nft_counter_percpu_priv __percpu *priv,
+static void nft_counter_reset(struct nft_counter_percpu_priv *priv,
struct nft_counter *total)
{
struct nft_counter *this_cpu;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 586627c361df..7b717fad6cdc 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -870,7 +870,7 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
l4num = nla_get_u8(tb[NFTA_CT_TIMEOUT_L4PROTO]);
priv->l4proto = l4num;
- l4proto = nf_ct_l4proto_find_get(l4num);
+ l4proto = nf_ct_l4proto_find(l4num);
if (l4proto->l4proto != l4num) {
ret = -EOPNOTSUPP;
@@ -902,7 +902,6 @@ static int nft_ct_timeout_obj_init(const struct nft_ctx *ctx,
err_free_timeout:
kfree(timeout);
err_proto_put:
- nf_ct_l4proto_put(l4proto);
return ret;
}
@@ -913,7 +912,6 @@ static void nft_ct_timeout_obj_destroy(const struct nft_ctx *ctx,
struct nf_ct_timeout *timeout = priv->timeout;
nf_ct_untimeout(ctx->net, timeout);
- nf_ct_l4proto_put(timeout->l4proto);
nf_ct_netns_put(ctx->net, ctx->family);
kfree(priv->timeout);
}
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index f1172f99752b..e461007558e8 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -62,9 +62,8 @@ err1:
return NULL;
}
-static void nft_dynset_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_dynset_eval(const struct nft_expr *expr,
+ struct nft_regs *regs, const struct nft_pktinfo *pkt)
{
const struct nft_dynset *priv = nft_expr_priv(expr);
struct nft_set *set = priv->set;
@@ -241,11 +240,15 @@ static void nft_dynset_deactivate(const struct nft_ctx *ctx,
{
struct nft_dynset *priv = nft_expr_priv(expr);
- if (phase == NFT_TRANS_PREPARE)
- return;
+ nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_dynset_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding,
- phase == NFT_TRANS_COMMIT);
+ priv->set->use++;
}
static void nft_dynset_destroy(const struct nft_ctx *ctx,
@@ -293,6 +296,7 @@ static const struct nft_expr_ops nft_dynset_ops = {
.eval = nft_dynset_eval,
.init = nft_dynset_init,
.destroy = nft_dynset_destroy,
+ .activate = nft_dynset_activate,
.deactivate = nft_dynset_deactivate,
.dump = nft_dynset_dump,
};
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index c2d237144f74..ea658e6c53e3 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -25,7 +25,6 @@ struct nft_jhash {
u32 modulus;
u32 seed;
u32 offset;
- struct nft_set *map;
};
static void nft_jhash_eval(const struct nft_expr *expr,
@@ -42,33 +41,10 @@ static void nft_jhash_eval(const struct nft_expr *expr,
regs->data[priv->dreg] = h + priv->offset;
}
-static void nft_jhash_map_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
-{
- struct nft_jhash *priv = nft_expr_priv(expr);
- const void *data = &regs->data[priv->sreg];
- const struct nft_set *map = priv->map;
- const struct nft_set_ext *ext;
- u32 result;
- bool found;
-
- result = reciprocal_scale(jhash(data, priv->len, priv->seed),
- priv->modulus) + priv->offset;
-
- found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
- if (!found)
- return;
-
- nft_data_copy(&regs->data[priv->dreg],
- nft_set_ext_data(ext), map->dlen);
-}
-
struct nft_symhash {
enum nft_registers dreg:8;
u32 modulus;
u32 offset;
- struct nft_set *map;
};
static void nft_symhash_eval(const struct nft_expr *expr,
@@ -84,28 +60,6 @@ static void nft_symhash_eval(const struct nft_expr *expr,
regs->data[priv->dreg] = h + priv->offset;
}
-static void nft_symhash_map_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
-{
- struct nft_symhash *priv = nft_expr_priv(expr);
- struct sk_buff *skb = pkt->skb;
- const struct nft_set *map = priv->map;
- const struct nft_set_ext *ext;
- u32 result;
- bool found;
-
- result = reciprocal_scale(__skb_get_hash_symmetric(skb),
- priv->modulus) + priv->offset;
-
- found = map->ops->lookup(nft_net(pkt), map, &result, &ext);
- if (!found)
- return;
-
- nft_data_copy(&regs->data[priv->dreg],
- nft_set_ext_data(ext), map->dlen);
-}
-
static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_SREG] = { .type = NLA_U32 },
[NFTA_HASH_DREG] = { .type = NLA_U32 },
@@ -114,9 +68,6 @@ static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_SEED] = { .type = NLA_U32 },
[NFTA_HASH_OFFSET] = { .type = NLA_U32 },
[NFTA_HASH_TYPE] = { .type = NLA_U32 },
- [NFTA_HASH_SET_NAME] = { .type = NLA_STRING,
- .len = NFT_SET_MAXNAMELEN - 1 },
- [NFTA_HASH_SET_ID] = { .type = NLA_U32 },
};
static int nft_jhash_init(const struct nft_ctx *ctx,
@@ -166,20 +117,6 @@ static int nft_jhash_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
-static int nft_jhash_map_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
-{
- struct nft_jhash *priv = nft_expr_priv(expr);
- u8 genmask = nft_genmask_next(ctx->net);
-
- nft_jhash_init(ctx, expr, tb);
- priv->map = nft_set_lookup_global(ctx->net, ctx->table,
- tb[NFTA_HASH_SET_NAME],
- tb[NFTA_HASH_SET_ID], genmask);
- return PTR_ERR_OR_ZERO(priv->map);
-}
-
static int nft_symhash_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -206,20 +143,6 @@ static int nft_symhash_init(const struct nft_ctx *ctx,
NFT_DATA_VALUE, sizeof(u32));
}
-static int nft_symhash_map_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
-{
- struct nft_jhash *priv = nft_expr_priv(expr);
- u8 genmask = nft_genmask_next(ctx->net);
-
- nft_symhash_init(ctx, expr, tb);
- priv->map = nft_set_lookup_global(ctx->net, ctx->table,
- tb[NFTA_HASH_SET_NAME],
- tb[NFTA_HASH_SET_ID], genmask);
- return PTR_ERR_OR_ZERO(priv->map);
-}
-
static int nft_jhash_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
@@ -247,18 +170,6 @@ nla_put_failure:
return -1;
}
-static int nft_jhash_map_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
-{
- const struct nft_jhash *priv = nft_expr_priv(expr);
-
- if (nft_jhash_dump(skb, expr) ||
- nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name))
- return -1;
-
- return 0;
-}
-
static int nft_symhash_dump(struct sk_buff *skb,
const struct nft_expr *expr)
{
@@ -279,18 +190,6 @@ nla_put_failure:
return -1;
}
-static int nft_symhash_map_dump(struct sk_buff *skb,
- const struct nft_expr *expr)
-{
- const struct nft_symhash *priv = nft_expr_priv(expr);
-
- if (nft_symhash_dump(skb, expr) ||
- nla_put_string(skb, NFTA_HASH_SET_NAME, priv->map->name))
- return -1;
-
- return 0;
-}
-
static struct nft_expr_type nft_hash_type;
static const struct nft_expr_ops nft_jhash_ops = {
.type = &nft_hash_type,
@@ -300,14 +199,6 @@ static const struct nft_expr_ops nft_jhash_ops = {
.dump = nft_jhash_dump,
};
-static const struct nft_expr_ops nft_jhash_map_ops = {
- .type = &nft_hash_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_jhash)),
- .eval = nft_jhash_map_eval,
- .init = nft_jhash_map_init,
- .dump = nft_jhash_map_dump,
-};
-
static const struct nft_expr_ops nft_symhash_ops = {
.type = &nft_hash_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
@@ -316,14 +207,6 @@ static const struct nft_expr_ops nft_symhash_ops = {
.dump = nft_symhash_dump,
};
-static const struct nft_expr_ops nft_symhash_map_ops = {
- .type = &nft_hash_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_symhash)),
- .eval = nft_symhash_map_eval,
- .init = nft_symhash_map_init,
- .dump = nft_symhash_map_dump,
-};
-
static const struct nft_expr_ops *
nft_hash_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -336,12 +219,8 @@ nft_hash_select_ops(const struct nft_ctx *ctx,
type = ntohl(nla_get_be32(tb[NFTA_HASH_TYPE]));
switch (type) {
case NFT_HASH_SYM:
- if (tb[NFTA_HASH_SET_NAME])
- return &nft_symhash_map_ops;
return &nft_symhash_ops;
case NFT_HASH_JENKINS:
- if (tb[NFTA_HASH_SET_NAME])
- return &nft_jhash_map_ops;
return &nft_jhash_ops;
default:
break;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 3f6d1d2a6281..5ec43124cbca 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -17,9 +17,9 @@
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
-static void nft_immediate_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_immediate_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 14496da5141d..161c3451a747 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -127,11 +127,15 @@ static void nft_lookup_deactivate(const struct nft_ctx *ctx,
{
struct nft_lookup *priv = nft_expr_priv(expr);
- if (phase == NFT_TRANS_PREPARE)
- return;
+ nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_lookup_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_lookup *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding,
- phase == NFT_TRANS_COMMIT);
+ priv->set->use++;
}
static void nft_lookup_destroy(const struct nft_ctx *ctx,
@@ -222,6 +226,7 @@ static const struct nft_expr_ops nft_lookup_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
.eval = nft_lookup_eval,
.init = nft_lookup_init,
+ .activate = nft_lookup_activate,
.deactivate = nft_lookup_deactivate,
.destroy = nft_lookup_destroy,
.dump = nft_lookup_dump,
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 9d8655bc1bea..bee156eaa400 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -14,18 +14,24 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nft_masq.h>
+#include <net/netfilter/ipv4/nf_nat_masquerade.h>
+#include <net/netfilter/ipv6/nf_nat_masquerade.h>
-const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
+struct nft_masq {
+ u32 flags;
+ enum nft_registers sreg_proto_min:8;
+ enum nft_registers sreg_proto_max:8;
+};
+
+static const struct nla_policy nft_masq_policy[NFTA_MASQ_MAX + 1] = {
[NFTA_MASQ_FLAGS] = { .type = NLA_U32 },
[NFTA_MASQ_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_MASQ_REG_PROTO_MAX] = { .type = NLA_U32 },
};
-EXPORT_SYMBOL_GPL(nft_masq_policy);
-int nft_masq_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+static int nft_masq_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
{
int err;
@@ -36,11 +42,10 @@ int nft_masq_validate(const struct nft_ctx *ctx,
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_POST_ROUTING));
}
-EXPORT_SYMBOL_GPL(nft_masq_validate);
-int nft_masq_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_masq_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
u32 plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all);
struct nft_masq *priv = nft_expr_priv(expr);
@@ -75,9 +80,8 @@ int nft_masq_init(const struct nft_ctx *ctx,
return nf_ct_netns_get(ctx->net, ctx->family);
}
-EXPORT_SYMBOL_GPL(nft_masq_init);
-int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_masq *priv = nft_expr_priv(expr);
@@ -98,7 +102,157 @@ int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr)
nla_put_failure:
return -1;
}
-EXPORT_SYMBOL_GPL(nft_masq_dump);
+
+static void nft_masq_ipv4_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_masq *priv = nft_expr_priv(expr);
+ struct nf_nat_range2 range;
+
+ memset(&range, 0, sizeof(range));
+ range.flags = priv->flags;
+ if (priv->sreg_proto_min) {
+ range.min_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ range.max_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
+ }
+ regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, nft_hook(pkt),
+ &range, nft_out(pkt));
+}
+
+static void
+nft_masq_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
+}
+
+static struct nft_expr_type nft_masq_ipv4_type;
+static const struct nft_expr_ops nft_masq_ipv4_ops = {
+ .type = &nft_masq_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+ .eval = nft_masq_ipv4_eval,
+ .init = nft_masq_init,
+ .destroy = nft_masq_ipv4_destroy,
+ .dump = nft_masq_dump,
+ .validate = nft_masq_validate,
+};
+
+static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "masq",
+ .ops = &nft_masq_ipv4_ops,
+ .policy = nft_masq_policy,
+ .maxattr = NFTA_MASQ_MAX,
+ .owner = THIS_MODULE,
+};
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static void nft_masq_ipv6_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_masq *priv = nft_expr_priv(expr);
+ struct nf_nat_range2 range;
+
+ memset(&range, 0, sizeof(range));
+ range.flags = priv->flags;
+ if (priv->sreg_proto_min) {
+ range.min_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ range.max_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
+ }
+ regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range,
+ nft_out(pkt));
+}
+
+static void
+nft_masq_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
+}
+
+static struct nft_expr_type nft_masq_ipv6_type;
+static const struct nft_expr_ops nft_masq_ipv6_ops = {
+ .type = &nft_masq_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_masq)),
+ .eval = nft_masq_ipv6_eval,
+ .init = nft_masq_init,
+ .destroy = nft_masq_ipv6_destroy,
+ .dump = nft_masq_dump,
+ .validate = nft_masq_validate,
+};
+
+static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "masq",
+ .ops = &nft_masq_ipv6_ops,
+ .policy = nft_masq_policy,
+ .maxattr = NFTA_MASQ_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_masq_module_init_ipv6(void)
+{
+ int ret = nft_register_expr(&nft_masq_ipv6_type);
+
+ if (ret)
+ return ret;
+
+ ret = nf_nat_masquerade_ipv6_register_notifier();
+ if (ret < 0)
+ nft_unregister_expr(&nft_masq_ipv6_type);
+
+ return ret;
+}
+
+static void nft_masq_module_exit_ipv6(void)
+{
+ nft_unregister_expr(&nft_masq_ipv6_type);
+ nf_nat_masquerade_ipv6_unregister_notifier();
+}
+#else
+static inline int nft_masq_module_init_ipv6(void) { return 0; }
+static inline void nft_masq_module_exit_ipv6(void) {}
+#endif
+
+static int __init nft_masq_module_init(void)
+{
+ int ret;
+
+ ret = nft_masq_module_init_ipv6();
+ if (ret < 0)
+ return ret;
+
+ ret = nft_register_expr(&nft_masq_ipv4_type);
+ if (ret < 0) {
+ nft_masq_module_exit_ipv6();
+ return ret;
+ }
+
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret < 0) {
+ nft_masq_module_exit_ipv6();
+ nft_unregister_expr(&nft_masq_ipv4_type);
+ return ret;
+ }
+
+ return ret;
+}
+
+static void __exit nft_masq_module_exit(void)
+{
+ nft_masq_module_exit_ipv6();
+ nft_unregister_expr(&nft_masq_ipv4_type);
+ nf_nat_masquerade_ipv4_unregister_notifier();
+}
+
+module_init(nft_masq_module_init);
+module_exit(nft_masq_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "masq");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "masq");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 6df486c5ebd3..987d2d6ce624 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -244,6 +244,16 @@ void nft_meta_get_eval(const struct nft_expr *expr,
strncpy((char *)dest, p->br->dev->name, IFNAMSIZ);
return;
#endif
+ case NFT_META_IIFKIND:
+ if (in == NULL || in->rtnl_link_ops == NULL)
+ goto err;
+ strncpy((char *)dest, in->rtnl_link_ops->kind, IFNAMSIZ);
+ break;
+ case NFT_META_OIFKIND:
+ if (out == NULL || out->rtnl_link_ops == NULL)
+ goto err;
+ strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ);
+ break;
default:
WARN_ON(1);
goto err;
@@ -340,6 +350,8 @@ static int nft_meta_get_init(const struct nft_ctx *ctx,
break;
case NFT_META_IIFNAME:
case NFT_META_OIFNAME:
+ case NFT_META_IIFKIND:
+ case NFT_META_OIFKIND:
len = IFNAMSIZ;
break;
case NFT_META_PRANDOM:
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index c15807d10b91..e93aed9bda88 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -21,9 +21,7 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_nat_l3proto.h>
#include <net/ip.h>
struct nft_nat {
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index ae178e914486..457a9ceb46af 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -38,7 +38,8 @@ static int nft_objref_init(const struct nft_ctx *ctx,
return -EINVAL;
objtype = ntohl(nla_get_be32(tb[NFTA_OBJREF_IMM_TYPE]));
- obj = nft_obj_lookup(ctx->table, tb[NFTA_OBJREF_IMM_NAME], objtype,
+ obj = nft_obj_lookup(ctx->net, ctx->table,
+ tb[NFTA_OBJREF_IMM_NAME], objtype,
genmask);
if (IS_ERR(obj))
return -ENOENT;
@@ -53,7 +54,7 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_object *obj = nft_objref_priv(expr);
- if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) ||
+ if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->key.name) ||
nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE,
htonl(obj->ops->type->type)))
goto nla_put_failure;
@@ -161,11 +162,15 @@ static void nft_objref_map_deactivate(const struct nft_ctx *ctx,
{
struct nft_objref_map *priv = nft_expr_priv(expr);
- if (phase == NFT_TRANS_PREPARE)
- return;
+ nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase);
+}
+
+static void nft_objref_map_activate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_objref_map *priv = nft_expr_priv(expr);
- nf_tables_unbind_set(ctx, priv->set, &priv->binding,
- phase == NFT_TRANS_COMMIT);
+ priv->set->use++;
}
static void nft_objref_map_destroy(const struct nft_ctx *ctx,
@@ -182,6 +187,7 @@ static const struct nft_expr_ops nft_objref_map_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)),
.eval = nft_objref_map_eval,
.init = nft_objref_map_init,
+ .activate = nft_objref_map_activate,
.deactivate = nft_objref_map_deactivate,
.destroy = nft_objref_map_destroy,
.dump = nft_objref_map_dump,
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index e110b0ebbf58..54e15de4b79a 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -70,9 +70,9 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
}
-static void nft_payload_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_payload_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
const struct nft_payload *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 0ed124a93fcf..354cde67bca9 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -61,7 +61,7 @@ static void nft_quota_obj_eval(struct nft_object *obj,
if (overquota &&
!test_and_set_bit(NFT_QUOTA_DEPLETED_BIT, &priv->flags))
- nft_obj_notify(nft_net(pkt), obj->table, obj, 0, 0,
+ nft_obj_notify(nft_net(pkt), obj->key.table, obj, 0, 0,
NFT_MSG_NEWOBJ, nft_pf(pkt), 0, GFP_ATOMIC);
}
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index cedb96c3619f..529ac8acb19d 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -23,9 +23,8 @@ struct nft_range_expr {
enum nft_range_ops op:8;
};
-static void nft_range_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_range_eval(const struct nft_expr *expr,
+ struct nft_regs *regs, const struct nft_pktinfo *pkt)
{
const struct nft_range_expr *priv = nft_expr_priv(expr);
int d1, d2;
diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c
index c64cbe78dee7..f8092926f704 100644
--- a/net/netfilter/nft_redir.c
+++ b/net/netfilter/nft_redir.c
@@ -13,19 +13,24 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_redirect.h>
#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nft_redir.h>
-const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
+struct nft_redir {
+ enum nft_registers sreg_proto_min:8;
+ enum nft_registers sreg_proto_max:8;
+ u16 flags;
+};
+
+static const struct nla_policy nft_redir_policy[NFTA_REDIR_MAX + 1] = {
[NFTA_REDIR_REG_PROTO_MIN] = { .type = NLA_U32 },
[NFTA_REDIR_REG_PROTO_MAX] = { .type = NLA_U32 },
[NFTA_REDIR_FLAGS] = { .type = NLA_U32 },
};
-EXPORT_SYMBOL_GPL(nft_redir_policy);
-int nft_redir_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nft_data **data)
+static int nft_redir_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
{
int err;
@@ -37,11 +42,10 @@ int nft_redir_validate(const struct nft_ctx *ctx,
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_OUT));
}
-EXPORT_SYMBOL_GPL(nft_redir_validate);
-int nft_redir_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+static int nft_redir_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_redir *priv = nft_expr_priv(expr);
unsigned int plen;
@@ -77,7 +81,6 @@ int nft_redir_init(const struct nft_ctx *ctx,
return nf_ct_netns_get(ctx->net, ctx->family);
}
-EXPORT_SYMBOL_GPL(nft_redir_init);
int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
@@ -101,7 +104,134 @@ int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr)
nla_put_failure:
return -1;
}
-EXPORT_SYMBOL_GPL(nft_redir_dump);
+
+static void nft_redir_ipv4_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_redir *priv = nft_expr_priv(expr);
+ struct nf_nat_ipv4_multi_range_compat mr;
+
+ memset(&mr, 0, sizeof(mr));
+ if (priv->sreg_proto_min) {
+ mr.range[0].min.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ mr.range[0].max.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
+ mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ }
+
+ mr.range[0].flags |= priv->flags;
+
+ regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, nft_hook(pkt));
+}
+
+static void
+nft_redir_ipv4_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ nf_ct_netns_put(ctx->net, NFPROTO_IPV4);
+}
+
+static struct nft_expr_type nft_redir_ipv4_type;
+static const struct nft_expr_ops nft_redir_ipv4_ops = {
+ .type = &nft_redir_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+ .eval = nft_redir_ipv4_eval,
+ .init = nft_redir_init,
+ .destroy = nft_redir_ipv4_destroy,
+ .dump = nft_redir_dump,
+ .validate = nft_redir_validate,
+};
+
+static struct nft_expr_type nft_redir_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "redir",
+ .ops = &nft_redir_ipv4_ops,
+ .policy = nft_redir_policy,
+ .maxattr = NFTA_REDIR_MAX,
+ .owner = THIS_MODULE,
+};
+
+#ifdef CONFIG_NF_TABLES_IPV6
+static void nft_redir_ipv6_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_redir *priv = nft_expr_priv(expr);
+ struct nf_nat_range2 range;
+
+ memset(&range, 0, sizeof(range));
+ if (priv->sreg_proto_min) {
+ range.min_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_min]);
+ range.max_proto.all = (__force __be16)nft_reg_load16(
+ &regs->data[priv->sreg_proto_max]);
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ }
+
+ range.flags |= priv->flags;
+
+ regs->verdict.code =
+ nf_nat_redirect_ipv6(pkt->skb, &range, nft_hook(pkt));
+}
+
+static void
+nft_redir_ipv6_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+ nf_ct_netns_put(ctx->net, NFPROTO_IPV6);
+}
+
+static struct nft_expr_type nft_redir_ipv6_type;
+static const struct nft_expr_ops nft_redir_ipv6_ops = {
+ .type = &nft_redir_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_redir)),
+ .eval = nft_redir_ipv6_eval,
+ .init = nft_redir_init,
+ .destroy = nft_redir_ipv6_destroy,
+ .dump = nft_redir_dump,
+ .validate = nft_redir_validate,
+};
+
+static struct nft_expr_type nft_redir_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "redir",
+ .ops = &nft_redir_ipv6_ops,
+ .policy = nft_redir_policy,
+ .maxattr = NFTA_REDIR_MAX,
+ .owner = THIS_MODULE,
+};
+#endif
+
+static int __init nft_redir_module_init(void)
+{
+ int ret = nft_register_expr(&nft_redir_ipv4_type);
+
+ if (ret)
+ return ret;
+
+#ifdef CONFIG_NF_TABLES_IPV6
+ ret = nft_register_expr(&nft_redir_ipv6_type);
+ if (ret) {
+ nft_unregister_expr(&nft_redir_ipv4_type);
+ return ret;
+ }
+#endif
+
+ return ret;
+}
+
+static void __exit nft_redir_module_exit(void)
+{
+ nft_unregister_expr(&nft_redir_ipv4_type);
+#ifdef CONFIG_NF_TABLES_IPV6
+ nft_unregister_expr(&nft_redir_ipv6_type);
+#endif
+}
+
+module_init(nft_redir_module_init);
+module_exit(nft_redir_module_exit);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET4, "redir");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir");
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index f35fa33913ae..c48daed5c46b 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -53,9 +53,9 @@ static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skb
return mtu - minlen;
}
-static void nft_rt_get_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_rt_get_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
const struct nft_rt *priv = nft_expr_priv(expr);
const struct sk_buff *skb = pkt->skb;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 339a9dd1c832..03df08801e28 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -442,15 +442,6 @@ static void *nft_hash_get(const struct net *net, const struct nft_set *set,
return ERR_PTR(-ENOENT);
}
-/* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */
-static inline u32 nft_hash_key(const u32 *key, u32 klen)
-{
- if (klen == 4)
- return *key;
-
- return *(u16 *)key;
-}
-
static bool nft_hash_lookup_fast(const struct net *net,
const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
@@ -460,11 +451,11 @@ static bool nft_hash_lookup_fast(const struct net *net,
const struct nft_hash_elem *he;
u32 hash, k1, k2;
- k1 = nft_hash_key(key, set->klen);
+ k1 = *key;
hash = jhash_1word(k1, priv->seed);
hash = reciprocal_scale(hash, priv->buckets);
hlist_for_each_entry_rcu(he, &priv->table[hash], node) {
- k2 = nft_hash_key(nft_set_ext_key(&he->ext)->data, set->klen);
+ k2 = *(u32 *)nft_set_ext_key(&he->ext)->data;
if (k1 == k2 &&
nft_set_elem_active(&he->ext, genmask)) {
*ext = &he->ext;
@@ -474,6 +465,23 @@ static bool nft_hash_lookup_fast(const struct net *net,
return false;
}
+static u32 nft_jhash(const struct nft_set *set, const struct nft_hash *priv,
+ const struct nft_set_ext *ext)
+{
+ const struct nft_data *key = nft_set_ext_key(ext);
+ u32 hash, k1;
+
+ if (set->klen == 4) {
+ k1 = *(u32 *)key;
+ hash = jhash_1word(k1, priv->seed);
+ } else {
+ hash = jhash(key, set->klen, priv->seed);
+ }
+ hash = reciprocal_scale(hash, priv->buckets);
+
+ return hash;
+}
+
static int nft_hash_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_set_ext **ext)
@@ -483,8 +491,7 @@ static int nft_hash_insert(const struct net *net, const struct nft_set *set,
u8 genmask = nft_genmask_next(net);
u32 hash;
- hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
- hash = reciprocal_scale(hash, priv->buckets);
+ hash = nft_jhash(set, priv, &this->ext);
hlist_for_each_entry(he, &priv->table[hash], node) {
if (!memcmp(nft_set_ext_key(&this->ext),
nft_set_ext_key(&he->ext), set->klen) &&
@@ -523,10 +530,9 @@ static void *nft_hash_deactivate(const struct net *net,
u8 genmask = nft_genmask_next(net);
u32 hash;
- hash = jhash(nft_set_ext_key(&this->ext), set->klen, priv->seed);
- hash = reciprocal_scale(hash, priv->buckets);
+ hash = nft_jhash(set, priv, &this->ext);
hlist_for_each_entry(he, &priv->table[hash], node) {
- if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val,
+ if (!memcmp(nft_set_ext_key(&he->ext), &elem->key.val,
set->klen) &&
nft_set_elem_active(&he->ext, genmask)) {
nft_set_elem_change_active(net, set, &he->ext);
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 3a15f219e4e7..b113fcac94e1 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -15,6 +15,7 @@
struct nft_tunnel {
enum nft_tunnel_keys key:8;
enum nft_registers dreg:8;
+ enum nft_tunnel_mode mode:8;
};
static void nft_tunnel_get_eval(const struct nft_expr *expr,
@@ -29,14 +30,32 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr,
switch (priv->key) {
case NFT_TUNNEL_PATH:
- nft_reg_store8(dest, !!tun_info);
+ if (!tun_info) {
+ nft_reg_store8(dest, false);
+ return;
+ }
+ if (priv->mode == NFT_TUNNEL_MODE_NONE ||
+ (priv->mode == NFT_TUNNEL_MODE_RX &&
+ !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
+ (priv->mode == NFT_TUNNEL_MODE_TX &&
+ (tun_info->mode & IP_TUNNEL_INFO_TX)))
+ nft_reg_store8(dest, true);
+ else
+ nft_reg_store8(dest, false);
break;
case NFT_TUNNEL_ID:
if (!tun_info) {
regs->verdict.code = NFT_BREAK;
return;
}
- *dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
+ if (priv->mode == NFT_TUNNEL_MODE_NONE ||
+ (priv->mode == NFT_TUNNEL_MODE_RX &&
+ !(tun_info->mode & IP_TUNNEL_INFO_TX)) ||
+ (priv->mode == NFT_TUNNEL_MODE_TX &&
+ (tun_info->mode & IP_TUNNEL_INFO_TX)))
+ *dest = ntohl(tunnel_id_to_key32(tun_info->key.tun_id));
+ else
+ regs->verdict.code = NFT_BREAK;
break;
default:
WARN_ON(1);
@@ -47,6 +66,7 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr,
static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = {
[NFTA_TUNNEL_KEY] = { .type = NLA_U32 },
[NFTA_TUNNEL_DREG] = { .type = NLA_U32 },
+ [NFTA_TUNNEL_MODE] = { .type = NLA_U32 },
};
static int nft_tunnel_get_init(const struct nft_ctx *ctx,
@@ -74,6 +94,14 @@ static int nft_tunnel_get_init(const struct nft_ctx *ctx,
priv->dreg = nft_parse_register(tb[NFTA_TUNNEL_DREG]);
+ if (tb[NFTA_TUNNEL_MODE]) {
+ priv->mode = ntohl(nla_get_be32(tb[NFTA_TUNNEL_MODE]));
+ if (priv->mode > NFT_TUNNEL_MODE_MAX)
+ return -EOPNOTSUPP;
+ } else {
+ priv->mode = NFT_TUNNEL_MODE_NONE;
+ }
+
return nft_validate_register_store(ctx, priv->dreg, NULL,
NFT_DATA_VALUE, len);
}
@@ -87,6 +115,8 @@ static int nft_tunnel_get_dump(struct sk_buff *skb,
goto nla_put_failure;
if (nft_dump_register(skb, NFTA_TUNNEL_DREG, priv->dreg))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_TUNNEL_MODE, htonl(priv->mode)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -376,6 +406,13 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
return -ENOMEM;
memcpy(&md->u.tun_info, &info, sizeof(info));
+#ifdef CONFIG_DST_CACHE
+ err = dst_cache_init(&md->u.tun_info.dst_cache, GFP_KERNEL);
+ if (err < 0) {
+ metadata_dst_free(md);
+ return err;
+ }
+#endif
ip_tunnel_info_opts_set(&md->u.tun_info, &priv->opts.u, priv->opts.len,
priv->opts.flags);
priv->md = md;
diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c
index e8da9a9bba73..06dc55590441 100644
--- a/net/netfilter/utils.c
+++ b/net/netfilter/utils.c
@@ -162,7 +162,7 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial);
int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
bool strict, unsigned short family)
{
- const struct nf_ipv6_ops *v6ops;
+ const struct nf_ipv6_ops *v6ops __maybe_unused;
int ret = 0;
switch (family) {
@@ -170,9 +170,7 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
ret = nf_ip_route(net, dst, fl, strict);
break;
case AF_INET6:
- v6ops = rcu_dereference(nf_ipv6_ops);
- if (v6ops)
- ret = v6ops->route(net, dst, fl, strict);
+ ret = nf_ip6_route(net, dst, fl, strict);
break;
}
@@ -180,6 +178,25 @@ int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl,
}
EXPORT_SYMBOL_GPL(nf_route);
+static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry)
+{
+#ifdef CONFIG_INET
+ const struct ip_rt_info *rt_info = nf_queue_entry_reroute(entry);
+
+ if (entry->state.hook == NF_INET_LOCAL_OUT) {
+ const struct iphdr *iph = ip_hdr(skb);
+
+ if (!(iph->tos == rt_info->tos &&
+ skb->mark == rt_info->mark &&
+ iph->daddr == rt_info->daddr &&
+ iph->saddr == rt_info->saddr))
+ return ip_route_me_harder(entry->state.net, skb,
+ RTN_UNSPEC);
+ }
+#endif
+ return 0;
+}
+
int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry)
{
const struct nf_ipv6_ops *v6ops;
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 13e1ac333fa4..e5e5c64df8d1 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -461,7 +461,7 @@ int xt_check_proc_name(const char *name, unsigned int size)
EXPORT_SYMBOL(xt_check_proc_name);
int xt_check_match(struct xt_mtchk_param *par,
- unsigned int size, u_int8_t proto, bool inv_proto)
+ unsigned int size, u16 proto, bool inv_proto)
{
int ret;
@@ -984,7 +984,7 @@ bool xt_find_jump_offset(const unsigned int *offsets,
EXPORT_SYMBOL(xt_find_jump_offset);
int xt_check_target(struct xt_tgchk_param *par,
- unsigned int size, u_int8_t proto, bool inv_proto)
+ unsigned int size, u16 proto, bool inv_proto)
{
int ret;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 2c7a4b80206f..0fa863f57575 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -159,7 +159,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par,
/* Make sure the timeout policy matches any existing protocol tracker,
* otherwise default to generic.
*/
- l4proto = __nf_ct_l4proto_find(proto);
+ l4proto = nf_ct_l4proto_find(proto);
if (timeout->l4proto->l4proto != l4proto->l4proto) {
ret = -EINVAL;
pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n",
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index eb4cbd244c3d..5f9b37e12801 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -41,19 +41,13 @@
#include <linux/workqueue.h>
#include <linux/sysfs.h>
-struct idletimer_tg_attr {
- struct attribute attr;
- ssize_t (*show)(struct kobject *kobj,
- struct attribute *attr, char *buf);
-};
-
struct idletimer_tg {
struct list_head entry;
struct timer_list timer;
struct work_struct work;
struct kobject *kobj;
- struct idletimer_tg_attr attr;
+ struct device_attribute attr;
unsigned int refcnt;
};
@@ -76,15 +70,15 @@ struct idletimer_tg *__idletimer_tg_find_by_label(const char *label)
return NULL;
}
-static ssize_t idletimer_tg_show(struct kobject *kobj, struct attribute *attr,
- char *buf)
+static ssize_t idletimer_tg_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
struct idletimer_tg *timer;
unsigned long expires = 0;
mutex_lock(&list_mutex);
- timer = __idletimer_tg_find_by_label(attr->name);
+ timer = __idletimer_tg_find_by_label(attr->attr.name);
if (timer)
expires = timer->timer.expires;
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index 89e281b3bfc2..29987ff03621 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -36,7 +36,6 @@ MODULE_ALIAS("ip6t_addrtype");
static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
const struct in6_addr *addr, u16 mask)
{
- const struct nf_ipv6_ops *v6ops;
struct flowi6 flow;
struct rt6_info *rt;
u32 ret = 0;
@@ -47,18 +46,13 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
if (dev)
flow.flowi6_oif = dev->ifindex;
- v6ops = nf_get_ipv6_ops();
- if (v6ops) {
- if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
- if (v6ops->chk_addr(net, addr, dev, true))
- ret = XT_ADDRTYPE_LOCAL;
- }
- route_err = v6ops->route(net, (struct dst_entry **)&rt,
- flowi6_to_flowi(&flow), false);
- } else {
- route_err = 1;
+ if (dev && (mask & XT_ADDRTYPE_LOCAL)) {
+ if (nf_ipv6_chk_addr(net, addr, dev, true))
+ ret = XT_ADDRTYPE_LOCAL;
}
+ route_err = nf_ip6_route(net, (struct dst_entry **)&rt,
+ flowi6_to_flowi(&flow), false);
if (route_err)
return XT_ADDRTYPE_UNREACHABLE;
diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c
index ac91170fc8c8..61eabd171186 100644
--- a/net/netfilter/xt_nat.c
+++ b/net/netfilter/xt_nat.c
@@ -14,7 +14,7 @@
#include <linux/skbuff.h>
#include <linux/netfilter.h>
#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat.h>
static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par)
{
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 4034d70bff39..b2e39cb6a590 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -96,8 +96,7 @@ match_outdev:
static int physdev_mt_check(const struct xt_mtchk_param *par)
{
const struct xt_physdev_info *info = par->matchinfo;
-
- br_netfilter_enable();
+ static bool brnf_probed __read_mostly;
if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
info->bitmask & ~XT_PHYSDEV_OP_MASK)
@@ -111,6 +110,12 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
return -EINVAL;
}
+
+ if (!brnf_probed) {
+ brnf_probed = true;
+ request_module("br_netfilter");
+ }
+
return 0;
}
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index f44de4bc2100..1664d2ec8b2f 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -337,7 +337,6 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
unsigned int nstamp_mask;
unsigned int i;
int ret = -EINVAL;
- size_t sz;
net_get_random_once(&hash_rnd, sizeof(hash_rnd));
@@ -387,8 +386,7 @@ static int recent_mt_check(const struct xt_mtchk_param *par,
goto out;
}
- sz = sizeof(*t) + sizeof(t->iphash[0]) * ip_list_hash_size;
- t = kvzalloc(sz, GFP_KERNEL);
+ t = kvzalloc(struct_size(t, iphash, ip_list_hash_size), GFP_KERNEL);
if (t == NULL) {
ret = -ENOMEM;
goto out;