aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/bpfilter/sockopt.c5
-rw-r--r--net/ipv4/devinet.c15
-rw-r--r--net/ipv4/fib_frontend.c53
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/ip_tunnel.c8
-rw-r--r--net/ipv4/ipmr_base.c8
-rw-r--r--net/ipv4/metrics.c2
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c147
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp_input.c32
-rw-r--r--net/ipv4/tcp_ipv4.c37
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv4/udp.c4
16 files changed, 282 insertions, 56 deletions
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 42a96d2d8d05..5e04ed25bc0e 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -10,8 +10,9 @@ int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
unsigned int optlen, bool is_set);
EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
-int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval,
- unsigned int optlen, bool is_set)
+static int bpfilter_mbox_request(struct sock *sk, int optname,
+ char __user *optval,
+ unsigned int optlen, bool is_set)
{
if (!bpfilter_process_sockopt) {
int err = request_module("bpfilter");
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 40f001782c1b..d7585ab1a77a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -99,6 +99,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
[IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
[IFA_FLAGS] = { .type = NLA_U32 },
+ [IFA_RT_PRIORITY] = { .type = NLA_U32 },
};
#define IN4_ADDR_HSIZE_SHIFT 8
@@ -835,6 +836,9 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+ if (tb[IFA_RT_PRIORITY])
+ ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
@@ -906,12 +910,20 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
extack);
} else {
+ u32 new_metric = ifa->ifa_rt_priority;
+
inet_free_ifa(ifa);
if (nlh->nlmsg_flags & NLM_F_EXCL ||
!(nlh->nlmsg_flags & NLM_F_REPLACE))
return -EEXIST;
ifa = ifa_existing;
+
+ if (ifa->ifa_rt_priority != new_metric) {
+ fib_modify_prefix_metric(ifa, new_metric);
+ ifa->ifa_rt_priority = new_metric;
+ }
+
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
cancel_delayed_work(&check_lifetime_work);
queue_delayed_work(system_power_efficient_wq,
@@ -1549,6 +1561,7 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_BROADCAST */
+ nla_total_size(IFNAMSIZ) /* IFA_LABEL */
+ nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(4) /* IFA_RT_PRIORITY */
+ nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}
@@ -1618,6 +1631,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
(ifa->ifa_label[0] &&
nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
+ (ifa->ifa_rt_priority &&
+ nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
preferred, valid))
goto nla_put_failure;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b69e2824c761..63aa39b3af03 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -847,7 +847,8 @@ out_err:
* to fib engine. It is legal, because all events occur
* only when netlink is already locked.
*/
-static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
+static void fib_magic(int cmd, int type, __be32 dst, int dst_len,
+ struct in_ifaddr *ifa, u32 rt_priority)
{
struct net *net = dev_net(ifa->ifa_dev->dev);
u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
@@ -857,6 +858,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
.fc_type = type,
.fc_dst = dst,
.fc_dst_len = dst_len,
+ .fc_priority = rt_priority,
.fc_prefsrc = ifa->ifa_local,
.fc_oif = ifa->ifa_dev->dev->ifindex,
.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
@@ -902,31 +904,57 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
}
}
- fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim, 0);
if (!(dev->flags & IFF_UP))
return;
/* Add broadcast address, if it is explicitly assigned. */
if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
- fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+ prim, 0);
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
fib_magic(RTM_NEWROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- prefix, ifa->ifa_prefixlen, prim);
+ prefix, ifa->ifa_prefixlen, prim,
+ ifa->ifa_rt_priority);
/* Add network specific broadcasts, when it takes a sense */
if (ifa->ifa_prefixlen < 31) {
- fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32,
+ prim, 0);
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
- 32, prim);
+ 32, prim, 0);
}
}
}
+void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric)
+{
+ __be32 prefix = ifa->ifa_address & ifa->ifa_mask;
+ struct in_device *in_dev = ifa->ifa_dev;
+ struct net_device *dev = in_dev->dev;
+
+ if (!(dev->flags & IFF_UP) ||
+ ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
+ ipv4_is_zeronet(prefix) ||
+ prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
+ return;
+
+ /* add the new */
+ fib_magic(RTM_NEWROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, ifa, new_metric);
+
+ /* delete the old */
+ fib_magic(RTM_DELROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, ifa, ifa->ifa_rt_priority);
+}
+
/* Delete primary or secondary address.
* Optionally, on secondary address promotion consider the addresses
* from subnet iprim as deleted, even if they are in device list.
@@ -968,7 +996,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
fib_magic(RTM_DELROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- any, ifa->ifa_prefixlen, prim);
+ any, ifa->ifa_prefixlen, prim, 0);
subnet = 1;
}
@@ -1052,17 +1080,20 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
no_promotions:
if (!(ok & BRD_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+ prim, 0);
if (subnet && ifa->ifa_prefixlen < 31) {
if (!(ok & BRD1_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32,
+ prim, 0);
if (!(ok & BRD0_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32,
+ prim, 0);
}
if (!(ok & LOCAL_OK)) {
unsigned int addr_type;
- fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim, 0);
/* Check, that this local address finally disappeared. */
addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6608db23f54b..f3c89ccf14c5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -717,6 +717,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
} else {
+ if (nla_len(nla) != sizeof(u32))
+ return false;
val = nla_get_u32(nla);
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6b0e362cc99b..38d906baf1df 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -328,7 +328,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (tdev) {
hlen = tdev->hard_header_len + tdev->needed_headroom;
- mtu = tdev->mtu;
+ mtu = min(tdev->mtu, IP_MAX_MTU);
}
dev->needed_headroom = t_hlen + hlen;
@@ -362,7 +362,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
nt = netdev_priv(dev);
t_hlen = nt->hlen + sizeof(struct iphdr);
dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
+ dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
ip_tunnel_add(itn, nt);
return nt;
@@ -930,7 +930,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
+ int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
@@ -1107,7 +1107,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
mtu = ip_tunnel_bind_dev(dev);
if (tb[IFLA_MTU]) {
- unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen;
+ unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
(unsigned int)(max - sizeof(struct iphdr)));
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 30221701614c..cafb0506c8c9 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -35,17 +35,19 @@ mr_table_alloc(struct net *net, u32 id,
struct net *net))
{
struct mr_table *mrt;
+ int err;
mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
if (!mrt)
- return NULL;
+ return ERR_PTR(-ENOMEM);
mrt->id = id;
write_pnet(&mrt->net, net);
mrt->ops = *ops;
- if (rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params)) {
+ err = rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+ if (err) {
kfree(mrt);
- return NULL;
+ return ERR_PTR(err);
}
INIT_LIST_HEAD(&mrt->mfc_cache_list);
INIT_LIST_HEAD(&mrt->mfc_unres_queue);
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
index 5121c6475e6b..04311f7067e2 100644
--- a/net/ipv4/metrics.c
+++ b/net/ipv4/metrics.c
@@ -32,6 +32,8 @@ int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
if (val == TCP_CA_UNSPEC)
return -EINVAL;
} else {
+ if (nla_len(nla) != sizeof(u32))
+ return -EINVAL;
val = nla_get_u32(nla);
}
if (type == RTAX_ADVMSS && val > 65535 - 40)
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 280048e1e395..bbfc356cb1b5 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -29,7 +29,10 @@ config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
help
This option enables the IPv4 socket lookup infrastructure. This is
- is required by the iptables socket match.
+ is required by the {ip,nf}tables socket match.
+
+config NF_TPROXY_IPV4
+ tristate "IPv4 tproxy support"
if NF_TABLES
@@ -129,10 +132,7 @@ config NFT_CHAIN_NAT_IPV4
source and destination ports.
config NF_NAT_MASQUERADE_IPV4
- tristate "IPv4 masquerade support"
- help
- This is the kernel functionality to provide NAT in the masquerade
- flavour (automatic source address selection).
+ bool
config NFT_MASQ_IPV4
tristate "IPv4 masquerading support for nf_tables"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 0e5edd0c7926..8394c17c269f 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -10,12 +10,14 @@ nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
+nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
# defrag
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
+obj-$(CONFIG_NF_TPROXY_IPV4) += nf_tproxy_ipv4.o
# logging
obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
@@ -32,9 +34,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o
$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
-obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
-
-
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index f538c5001547..ad3aeff152ed 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -7,7 +7,6 @@
*/
#include <linux/types.h>
-#include <linux/module.h>
#include <linux/atomic.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
@@ -157,6 +156,3 @@ void nf_nat_masquerade_ipv4_unregister_notifier(void)
unregister_inetaddr_notifier(&masq_inet_notifier);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
new file mode 100644
index 000000000000..805e83ec3ad9
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <net/netfilter/nf_tproxy.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <linux/inetdevice.h>
+
+struct sock *
+nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
+ __be32 laddr, __be16 lport, struct sock *sk)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct tcphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
+ if (hp == NULL) {
+ inet_twsk_put(inet_twsk(sk));
+ return NULL;
+ }
+
+ if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
+ /* SYN to a TIME_WAIT socket, we'd rather redirect it
+ * to a listener socket if there's one */
+ struct sock *sk2;
+
+ sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ iph->saddr, laddr ? laddr : iph->daddr,
+ hp->source, lport ? lport : hp->dest,
+ skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+ if (sk2) {
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ sk = sk2;
+ }
+ }
+
+ return sk;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait4);
+
+__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
+{
+ struct in_device *indev;
+ __be32 laddr;
+
+ if (user_laddr)
+ return user_laddr;
+
+ laddr = 0;
+ indev = __in_dev_get_rcu(skb->dev);
+ for_primary_ifa(indev) {
+ laddr = ifa->ifa_local;
+ break;
+ } endfor_ifa(indev);
+
+ return laddr ? laddr : daddr;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_laddr4);
+
+struct sock *
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+ const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type)
+{
+ struct sock *sk;
+ struct tcphdr *tcph;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ switch (lookup_type) {
+ case NF_TPROXY_LOOKUP_LISTENER:
+ tcph = hp;
+ sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
+ ip_hdrlen(skb) +
+ __tcp_hdrlen(tcph),
+ saddr, sport,
+ daddr, dport,
+ in->ifindex, 0);
+
+ if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ break;
+ case NF_TPROXY_LOOKUP_ESTABLISHED:
+ sk = inet_lookup_established(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ break;
+ default:
+ BUG();
+ }
+ break;
+ case IPPROTO_UDP:
+ sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ if (sk) {
+ int connected = (sk->sk_state == TCP_ESTABLISHED);
+ int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ if ((lookup_type == NF_TPROXY_LOOKUP_ESTABLISHED &&
+ (!connected || wildcard)) ||
+ (lookup_type == NF_TPROXY_LOOKUP_LISTENER && connected)) {
+ sock_put(sk);
+ sk = NULL;
+ }
+ }
+ break;
+ default:
+ WARN_ON(1);
+ sk = NULL;
+ }
+
+ pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
+ protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
+
+ return sk;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
+MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d2eed3ddcb0a..d06247ba08b2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -30,6 +30,7 @@
static int zero;
static int one = 1;
+static int two = 2;
static int four = 4;
static int thousand = 1000;
static int gso_max_segs = GSO_MAX_SEGS;
@@ -845,7 +846,9 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_tcp_tw_reuse,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
},
{
.procname = "tcp_max_tw_buckets",
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1191cac72109..355d3dffd021 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -254,8 +254,10 @@ static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
}
-static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
case INET_ECN_NOT_ECT:
/* Funny extension: if ECT is not set on a segment,
@@ -263,31 +265,31 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
* it is probably a retransmit.
*/
if (tp->ecn_flags & TCP_ECN_SEEN)
- tcp_enter_quickack_mode((struct sock *)tp, 1);
+ tcp_enter_quickack_mode(sk, 1);
break;
case INET_ECN_CE:
- if (tcp_ca_needs_ecn((struct sock *)tp))
- tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
+ if (tcp_ca_needs_ecn(sk))
+ tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
/* Better not delay acks, sender can have a very low cwnd */
- tcp_enter_quickack_mode((struct sock *)tp, 1);
+ tcp_enter_quickack_mode(sk, 1);
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
}
tp->ecn_flags |= TCP_ECN_SEEN;
break;
default:
- if (tcp_ca_needs_ecn((struct sock *)tp))
- tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
+ if (tcp_ca_needs_ecn(sk))
+ tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
tp->ecn_flags |= TCP_ECN_SEEN;
break;
}
}
-static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
{
- if (tp->ecn_flags & TCP_ECN_OK)
- __tcp_ecn_check_ce(tp, skb);
+ if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
+ __tcp_ecn_check_ce(sk, skb);
}
static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
@@ -710,7 +712,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
}
icsk->icsk_ack.lrcvtime = now;
- tcp_ecn_check_ce(tp, skb);
+ tcp_ecn_check_ce(sk, skb);
if (skb->len >= 128)
tcp_grow_window(sk, skb);
@@ -4434,7 +4436,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
u32 seq, end_seq;
bool fragstolen;
- tcp_ecn_check_ce(tp, skb);
+ tcp_ecn_check_ce(sk, skb);
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
@@ -5390,11 +5392,11 @@ discard:
* the rest is checked inline. Fast processing is turned on in
* tcp_data_queue when everything is OK.
*/
-void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct tcphdr *th)
+void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
{
- unsigned int len = skb->len;
+ const struct tcphdr *th = (const struct tcphdr *)skb->data;
struct tcp_sock *tp = tcp_sk(sk);
+ unsigned int len = skb->len;
/* TCP congestion window tracking */
trace_tcp_probe(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index adbdb503db0c..633963e228bc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -110,8 +110,38 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
+ const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
+ int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
+
+ if (reuse == 2) {
+ /* Still does not detect *everything* that goes through
+ * lo, since we require a loopback src or dst address
+ * or direct binding to 'lo' interface.
+ */
+ bool loopback = false;
+ if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
+ loopback = true;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (tw->tw_family == AF_INET6) {
+ if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
+ (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
+ (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
+ ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
+ (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
+ (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
+ loopback = true;
+ } else
+#endif
+ {
+ if (ipv4_is_loopback(tw->tw_daddr) ||
+ ipv4_is_loopback(tw->tw_rcv_saddr))
+ loopback = true;
+ }
+ if (!loopback)
+ reuse = 0;
+ }
/* With PAWS, it is safe from the viewpoint
of data integrity. Even without PAWS it is safe provided sequence
@@ -125,8 +155,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
and use initial timestamp retrieved from peer table.
*/
if (tcptw->tw_ts_recent_stamp &&
- (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
- get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
+ (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0)
tp->write_seq = 1;
@@ -1486,7 +1515,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
sk->sk_rx_dst = NULL;
}
}
- tcp_rcv_established(sk, skb, tcp_hdr(skb));
+ tcp_rcv_established(sk, skb);
return 0;
}
@@ -2529,7 +2558,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_orphan_retries = 0;
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
- net->ipv4.sysctl_tcp_tw_reuse = 0;
+ net->ipv4.sysctl_tcp_tw_reuse = 2;
cnt = tcp_hashinfo.ehash_mask + 1;
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f867658b4b30..1dda1341a223 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -307,7 +307,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (timeo < rto)
timeo = rto;
- tw->tw_timeout = TCP_TIMEWAIT_LEN;
if (state == TCP_TIME_WAIT)
timeo = TCP_TIMEWAIT_LEN;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3c27d00b5730..9c41a45a9d39 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -544,9 +544,7 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
/* Must be called under rcu_read_lock().
* Does increment socket refcount.
*/
-#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
- IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
- IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
+#if IS_ENABLED(CONFIG_NF_TPROXY_IPV4) || IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{