aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/Kconfig2
-rw-r--r--net/Kconfig13
-rw-r--r--net/Makefile4
-rw-r--r--net/bluetooth/hci_core.c23
-rw-r--r--net/bluetooth/hci_debugfs.c24
-rw-r--r--net/bluetooth/smp.c12
-rw-r--r--net/bpfilter/Makefile4
-rw-r--r--net/bpfilter/bpfilter_kern.c7
-rw-r--r--net/bridge/br_private.h13
-rw-r--r--net/bridge/br_switchdev.c25
-rw-r--r--net/bridge/br_vlan.c103
-rw-r--r--net/bridge/netfilter/ebtables.c3
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/devlink.c22
-rw-r--r--net/core/failover.c315
-rw-r--r--net/core/flow_dissector.c2
-rw-r--r--net/core/net-sysfs.c6
-rw-r--r--net/core/rtnetlink.c18
-rw-r--r--net/core/sock.c15
-rw-r--r--net/dccp/minisocks.c1
-rw-r--r--net/dsa/port.c6
-rw-r--r--net/ipv4/bpfilter/sockopt.c5
-rw-r--r--net/ipv4/devinet.c15
-rw-r--r--net/ipv4/fib_frontend.c53
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/ip_tunnel.c8
-rw-r--r--net/ipv4/ipmr_base.c8
-rw-r--r--net/ipv4/metrics.c2
-rw-r--r--net/ipv4/netfilter/Kconfig10
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c4
-rw-r--r--net/ipv4/netfilter/nf_tproxy_ipv4.c147
-rw-r--r--net/ipv4/sysctl_net_ipv4.c5
-rw-r--r--net/ipv4/tcp_input.c32
-rw-r--r--net/ipv4/tcp_ipv4.c37
-rw-r--r--net/ipv4/tcp_minisocks.c1
-rw-r--r--net/ipv4/udp.c4
-rw-r--r--net/ipv6/addrconf.c363
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/ip6_tunnel.c11
-rw-r--r--net/ipv6/ip6mr.c21
-rw-r--r--net/ipv6/ndisc.c6
-rw-r--r--net/ipv6/netfilter/Kconfig10
-rw-r--r--net/ipv6/netfilter/Makefile3
-rw-r--r--net/ipv6/netfilter/nf_nat_masquerade_ipv6.c4
-rw-r--r--net/ipv6/netfilter/nf_tproxy_ipv6.c146
-rw-r--r--net/ipv6/route.c16
-rw-r--r--net/ipv6/seg6.c1
-rw-r--r--net/ipv6/seg6_iptunnel.c4
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/udp.c4
-rw-r--r--net/ipv6/xfrm6_policy.c2
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/l2tp/l2tp_ppp.c35
-rw-r--r--net/ncsi/ncsi-netlink.c3
-rw-r--r--net/ncsi/ncsi-rsp.c4
-rw-r--r--net/netfilter/Kconfig26
-rw-r--r--net/netfilter/Makefile4
-rw-r--r--net/netfilter/ipvs/ip_vs_app.c24
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c21
-rw-r--r--net/netfilter/ipvs/ip_vs_ftp.c467
-rw-r--r--net/netfilter/ipvs/ip_vs_nfct.c101
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_sctp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c4
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_udp.c4
-rw-r--r--net/netfilter/nf_conncount.c36
-rw-r--r--net/netfilter/nf_flow_table_ip.c6
-rw-r--r--net/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/nf_nat_redirect.c4
-rw-r--r--net/netfilter/nf_tables_api.c626
-rw-r--r--net/netfilter/nf_tables_core.c44
-rw-r--r--net/netfilter/nfnetlink.c44
-rw-r--r--net/netfilter/nfnetlink_acct.c2
-rw-r--r--net/netfilter/nfnetlink_cthelper.c4
-rw-r--r--net/netfilter/nft_compat.c29
-rw-r--r--net/netfilter/nft_connlimit.c297
-rw-r--r--net/netfilter/nft_counter.c4
-rw-r--r--net/netfilter/nft_ct.c23
-rw-r--r--net/netfilter/nft_dynset.c9
-rw-r--r--net/netfilter/nft_fwd_netdev.c146
-rw-r--r--net/netfilter/nft_hash.c10
-rw-r--r--net/netfilter/nft_immediate.c27
-rw-r--r--net/netfilter/nft_limit.c38
-rw-r--r--net/netfilter/nft_log.c92
-rw-r--r--net/netfilter/nft_lookup.c47
-rw-r--r--net/netfilter/nft_meta.c14
-rw-r--r--net/netfilter/nft_numgen.c5
-rw-r--r--net/netfilter/nft_set_hash.c21
-rw-r--r--net/netfilter/nft_socket.c144
-rw-r--r--net/netfilter/xt_TPROXY.c366
-rw-r--r--net/netfilter/xt_socket.c4
-rw-r--r--net/nfc/netlink.c17
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/rds/Kconfig2
-rw-r--r--net/rxrpc/ar-internal.h2
-rw-r--r--net/rxrpc/call_event.c8
-rw-r--r--net/rxrpc/input.c10
-rw-r--r--net/sched/cls_api.c464
-rw-r--r--net/sched/cls_flower.c6
-rw-r--r--net/sched/sch_generic.c3
-rw-r--r--net/sched/sch_mq.c37
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/sunrpc/Kconfig2
-rw-r--r--net/xfrm/xfrm_policy.c5
106 files changed, 3496 insertions, 1367 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 46c39f7da444..e6014e0e51f7 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -32,7 +32,7 @@ config NET_9P_XEN
config NET_9P_RDMA
- depends on INET && INFINIBAND_ADDR_TRANS
+ depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
tristate "9P RDMA Transport (Experimental)"
help
This builds support for an RDMA transport.
diff --git a/net/Kconfig b/net/Kconfig
index ba554cedb615..f738a6f27665 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -432,6 +432,19 @@ config MAY_USE_DEVLINK
config PAGE_POOL
bool
+config FAILOVER
+ tristate "Generic failover module"
+ help
+ The failover module provides a generic interface for paravirtual
+ drivers to register a netdev and a set of ops with a failover
+ instance. The ops are used as event handlers that get called to
+ handle netdev register/unregister/link change/name change events
+ on slave pci ethernet devices with the same mac address as the
+ failover netdev. This enables paravirtual drivers to use a
+ VF as an accelerated low latency datapath. It also allows live
+ migration of VMs with direct attached VFs by failing over to the
+ paravirtual datapath when the VF is unplugged.
+
endif # if NET
# Used by archs to tell that they support BPF JIT compiler plus which flavour.
diff --git a/net/Makefile b/net/Makefile
index bdaf53925acd..13ec0d5415c7 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -20,7 +20,11 @@ obj-$(CONFIG_TLS) += tls/
obj-$(CONFIG_XFRM) += xfrm/
obj-$(CONFIG_UNIX) += unix/
obj-$(CONFIG_NET) += ipv6/
+ifneq ($(CC_CAN_LINK),y)
+$(warning CC cannot link executables. Skipping bpfilter.)
+else
obj-$(CONFIG_BPFILTER) += bpfilter/
+endif
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index b0ee9edaae35..1dec33790198 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -76,19 +76,15 @@ static ssize_t dut_mode_write(struct file *file, const char __user *user_buf,
{
struct hci_dev *hdev = file->private_data;
struct sk_buff *skb;
- char buf[32];
- size_t buf_size = min(count, (sizeof(buf)-1));
bool enable;
+ int err;
if (!test_bit(HCI_UP, &hdev->flags))
return -ENETDOWN;
- if (copy_from_user(buf, user_buf, buf_size))
- return -EFAULT;
-
- buf[buf_size] = '\0';
- if (strtobool(buf, &enable))
- return -EINVAL;
+ err = kstrtobool_from_user(user_buf, count, &enable);
+ if (err)
+ return err;
if (enable == hci_dev_test_flag(hdev, HCI_DUT_MODE))
return -EALREADY;
@@ -135,17 +131,12 @@ static ssize_t vendor_diag_write(struct file *file, const char __user *user_buf,
size_t count, loff_t *ppos)
{
struct hci_dev *hdev = file->private_data;
- char buf[32];
- size_t buf_size = min(count, (sizeof(buf)-1));
bool enable;
int err;
- if (copy_from_user(buf, user_buf, buf_size))
- return -EFAULT;
-
- buf[buf_size] = '\0';
- if (strtobool(buf, &enable))
- return -EINVAL;
+ err = kstrtobool_from_user(user_buf, count, &enable);
+ if (err)
+ return err;
/* When the diagnostic flags are not persistent and the transport
* is not active or in user channel operation, then there is no need
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 418b76e557b0..0d8ab5b3c177 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -47,19 +47,15 @@ static ssize_t __name ## _write(struct file *file, \
size_t count, loff_t *ppos) \
{ \
struct hci_dev *hdev = file->private_data; \
- char buf[32]; \
- size_t buf_size = min(count, (sizeof(buf) - 1)); \
bool enable; \
+ int err; \
\
if (test_bit(HCI_UP, &hdev->flags)) \
return -EBUSY; \
\
- if (copy_from_user(buf, user_buf, buf_size)) \
- return -EFAULT; \
- \
- buf[buf_size] = '\0'; \
- if (strtobool(buf, &enable)) \
- return -EINVAL; \
+ err = kstrtobool_from_user(user_buf, count, &enable); \
+ if (err) \
+ return err; \
\
if (enable == test_bit(__quirk, &hdev->quirks)) \
return -EALREADY; \
@@ -658,19 +654,15 @@ static ssize_t force_static_address_write(struct file *file,
size_t count, loff_t *ppos)
{
struct hci_dev *hdev = file->private_data;
- char buf[32];
- size_t buf_size = min(count, (sizeof(buf)-1));
bool enable;
+ int err;
if (test_bit(HCI_UP, &hdev->flags))
return -EBUSY;
- if (copy_from_user(buf, user_buf, buf_size))
- return -EFAULT;
-
- buf[buf_size] = '\0';
- if (strtobool(buf, &enable))
- return -EINVAL;
+ err = kstrtobool_from_user(user_buf, count, &enable);
+ if (err)
+ return err;
if (enable == hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR))
return -EALREADY;
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a2ddae2f37d7..ae91e2d40056 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -3315,16 +3315,12 @@ static ssize_t force_bredr_smp_write(struct file *file,
size_t count, loff_t *ppos)
{
struct hci_dev *hdev = file->private_data;
- char buf[32];
- size_t buf_size = min(count, (sizeof(buf)-1));
bool enable;
+ int err;
- if (copy_from_user(buf, user_buf, buf_size))
- return -EFAULT;
-
- buf[buf_size] = '\0';
- if (strtobool(buf, &enable))
- return -EINVAL;
+ err = kstrtobool_from_user(user_buf, count, &enable);
+ if (err)
+ return err;
if (enable == hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP))
return -EALREADY;
diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index 2af752c8ef5e..aafa72001fcd 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -5,7 +5,9 @@
hostprogs-y := bpfilter_umh
bpfilter_umh-objs := main.o
-HOSTCFLAGS += -I. -Itools/include/
+HOSTCFLAGS += -I. -Itools/include/ -Itools/include/uapi
+HOSTCC := $(CC)
+
ifeq ($(CONFIG_BPFILTER_UMH), y)
# builtin bpfilter_umh should be compiled with -static
# since rootfs isn't mounted at the time of __init
diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c
index 7596314b61c7..b13d058f8c34 100644
--- a/net/bpfilter/bpfilter_kern.c
+++ b/net/bpfilter/bpfilter_kern.c
@@ -33,7 +33,8 @@ static void shutdown_umh(struct umh_info *info)
static void __stop_umh(void)
{
- if (bpfilter_process_sockopt) {
+ if (IS_ENABLED(CONFIG_INET) &&
+ bpfilter_process_sockopt) {
bpfilter_process_sockopt = NULL;
shutdown_umh(&info);
}
@@ -98,7 +99,9 @@ static int __init load_umh(void)
stop_umh();
return -EFAULT;
}
- bpfilter_process_sockopt = &__bpfilter_process_sockopt;
+ if (IS_ENABLED(CONFIG_INET))
+ bpfilter_process_sockopt = &__bpfilter_process_sockopt;
+
return 0;
}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 11520ed528b0..5216a524b537 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1139,6 +1139,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
unsigned long mask);
void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb,
int type);
+int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags);
+int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid);
static inline void br_switchdev_frame_unmark(struct sk_buff *skb)
{
@@ -1168,6 +1170,17 @@ static inline int br_switchdev_set_port_flag(struct net_bridge_port *p,
return 0;
}
+static inline int br_switchdev_port_vlan_add(struct net_device *dev,
+ u16 vid, u16 flags)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
+{
+ return -EOPNOTSUPP;
+}
+
static inline void
br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
{
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 35474d49555d..d77f807420c4 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -136,3 +136,28 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
break;
}
}
+
+int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags)
+{
+ struct switchdev_obj_port_vlan v = {
+ .obj.orig_dev = dev,
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .flags = flags,
+ .vid_begin = vid,
+ .vid_end = vid,
+ };
+
+ return switchdev_port_obj_add(dev, &v.obj);
+}
+
+int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
+{
+ struct switchdev_obj_port_vlan v = {
+ .obj.orig_dev = dev,
+ .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
+ .vid_begin = vid,
+ .vid_end = vid,
+ };
+
+ return switchdev_port_obj_del(dev, &v.obj);
+}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index dc832c0934c6..7df269092103 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -82,19 +82,12 @@ static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags)
static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br,
u16 vid, u16 flags)
{
- struct switchdev_obj_port_vlan v = {
- .obj.orig_dev = dev,
- .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .flags = flags,
- .vid_begin = vid,
- .vid_end = vid,
- };
int err;
/* Try switchdev op first. In case it is not supported, fallback to
* 8021q add.
*/
- err = switchdev_port_obj_add(dev, &v.obj);
+ err = br_switchdev_port_vlan_add(dev, vid, flags);
if (err == -EOPNOTSUPP)
return vlan_vid_add(dev, br->vlan_proto, vid);
return err;
@@ -130,18 +123,12 @@ static void __vlan_del_list(struct net_bridge_vlan *v)
static int __vlan_vid_del(struct net_device *dev, struct net_bridge *br,
u16 vid)
{
- struct switchdev_obj_port_vlan v = {
- .obj.orig_dev = dev,
- .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .vid_begin = vid,
- .vid_end = vid,
- };
int err;
/* Try switchdev op first. In case it is not supported, fallback to
* 8021q del.
*/
- err = switchdev_port_obj_del(dev, &v.obj);
+ err = br_switchdev_port_vlan_del(dev, vid);
if (err == -EOPNOTSUPP) {
vlan_vid_del(dev, br->vlan_proto, vid);
return 0;
@@ -259,6 +246,10 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags)
goto out_filt;
v->brvlan = masterv;
v->stats = masterv->stats;
+ } else {
+ err = br_switchdev_port_vlan_add(dev, v->vid, flags);
+ if (err && err != -EOPNOTSUPP)
+ goto out;
}
/* Add the dev mac and count the vlan only if it's usable */
@@ -294,6 +285,8 @@ out_filt:
br_vlan_put_master(masterv);
v->brvlan = NULL;
}
+ } else {
+ br_switchdev_port_vlan_del(dev, v->vid);
}
goto out;
@@ -319,6 +312,11 @@ static int __vlan_del(struct net_bridge_vlan *v)
err = __vlan_vid_del(p->dev, p->br, v->vid);
if (err)
goto out;
+ } else {
+ err = br_switchdev_port_vlan_del(v->br->dev, v->vid);
+ if (err && err != -EOPNOTSUPP)
+ goto out;
+ err = 0;
}
if (br_vlan_should_use(v)) {
@@ -564,6 +562,48 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid)
return false;
}
+static int br_vlan_add_existing(struct net_bridge *br,
+ struct net_bridge_vlan_group *vg,
+ struct net_bridge_vlan *vlan,
+ u16 flags, bool *changed)
+{
+ int err;
+
+ err = br_switchdev_port_vlan_add(br->dev, vlan->vid, flags);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ if (!br_vlan_is_brentry(vlan)) {
+ /* Trying to change flags of non-existent bridge vlan */
+ if (!(flags & BRIDGE_VLAN_INFO_BRENTRY)) {
+ err = -EINVAL;
+ goto err_flags;
+ }
+ /* It was only kept for port vlans, now make it real */
+ err = br_fdb_insert(br, NULL, br->dev->dev_addr,
+ vlan->vid);
+ if (err) {
+ br_err(br, "failed to insert local address into bridge forwarding table\n");
+ goto err_fdb_insert;
+ }
+
+ refcount_inc(&vlan->refcnt);
+ vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
+ vg->num_vlans++;
+ *changed = true;
+ }
+
+ if (__vlan_add_flags(vlan, flags))
+ *changed = true;
+
+ return 0;
+
+err_fdb_insert:
+err_flags:
+ br_switchdev_port_vlan_del(br->dev, vlan->vid);
+ return err;
+}
+
/* Must be protected by RTNL.
* Must be called with vid in range from 1 to 4094 inclusive.
* changed must be true only if the vlan was created or updated
@@ -579,28 +619,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed)
*changed = false;
vg = br_vlan_group(br);
vlan = br_vlan_find(vg, vid);
- if (vlan) {
- if (!br_vlan_is_brentry(vlan)) {
- /* Trying to change flags of non-existent bridge vlan */
- if (!(flags & BRIDGE_VLAN_INFO_BRENTRY))
- return -EINVAL;
- /* It was only kept for port vlans, now make it real */
- ret = br_fdb_insert(br, NULL, br->dev->dev_addr,
- vlan->vid);
- if (ret) {
- br_err(br, "failed insert local address into bridge forwarding table\n");
- return ret;
- }
- refcount_inc(&vlan->refcnt);
- vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY;
- vg->num_vlans++;
- *changed = true;
- }
- if (__vlan_add_flags(vlan, flags))
- *changed = true;
-
- return 0;
- }
+ if (vlan)
+ return br_vlan_add_existing(br, vg, vlan, flags, changed);
vlan = kzalloc(sizeof(*vlan), GFP_KERNEL);
if (!vlan)
@@ -1053,13 +1073,6 @@ err_vlan_enabled:
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
bool *changed)
{
- struct switchdev_obj_port_vlan v = {
- .obj.orig_dev = port->dev,
- .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN,
- .flags = flags,
- .vid_begin = vid,
- .vid_end = vid,
- };
struct net_bridge_vlan *vlan;
int ret;
@@ -1069,7 +1082,7 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags,
vlan = br_vlan_find(nbp_vlan_group(port), vid);
if (vlan) {
/* Pass the flags to the hardware bridge */
- ret = switchdev_port_obj_add(port->dev, &v.obj);
+ ret = br_switchdev_port_vlan_add(port->dev, vid, flags);
if (ret && ret != -EOPNOTSUPP)
return ret;
*changed = __vlan_add_flags(vlan, flags);
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index b286ed5596c3..c2138e7e8263 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1949,7 +1949,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
int off, pad = 0;
unsigned int size_kern, match_size = mwt->match_size;
- strlcpy(name, mwt->u.name, sizeof(name));
+ if (strscpy(name, mwt->u.name, sizeof(name)) < 0)
+ return -EINVAL;
if (state->buf_kern_start)
dst = state->buf_kern_start + state->buf_kern_offset;
diff --git a/net/core/Makefile b/net/core/Makefile
index 7080417f8bc8..80175e6a2eb8 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -31,3 +31,4 @@ obj-$(CONFIG_DST_CACHE) += dst_cache.o
obj-$(CONFIG_HWBM) += hwbm.o
obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
+obj-$(CONFIG_FAILOVER) += failover.o
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 475246b355f0..22099705cc41 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -702,12 +702,13 @@ static int devlink_nl_cmd_port_set_doit(struct sk_buff *skb,
return 0;
}
-static int devlink_port_split(struct devlink *devlink,
- u32 port_index, u32 count)
+static int devlink_port_split(struct devlink *devlink, u32 port_index,
+ u32 count, struct netlink_ext_ack *extack)
{
if (devlink->ops && devlink->ops->port_split)
- return devlink->ops->port_split(devlink, port_index, count);
+ return devlink->ops->port_split(devlink, port_index, count,
+ extack);
return -EOPNOTSUPP;
}
@@ -724,14 +725,15 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
count = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_SPLIT_COUNT]);
- return devlink_port_split(devlink, port_index, count);
+ return devlink_port_split(devlink, port_index, count, info->extack);
}
-static int devlink_port_unsplit(struct devlink *devlink, u32 port_index)
+static int devlink_port_unsplit(struct devlink *devlink, u32 port_index,
+ struct netlink_ext_ack *extack)
{
if (devlink->ops && devlink->ops->port_unsplit)
- return devlink->ops->port_unsplit(devlink, port_index);
+ return devlink->ops->port_unsplit(devlink, port_index, extack);
return -EOPNOTSUPP;
}
@@ -745,7 +747,7 @@ static int devlink_nl_cmd_port_unsplit_doit(struct sk_buff *skb,
return -EINVAL;
port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
- return devlink_port_unsplit(devlink, port_index);
+ return devlink_port_unsplit(devlink, port_index, info->extack);
}
static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
@@ -1826,7 +1828,6 @@ send_done:
nla_put_failure:
err = -EMSGSIZE;
err_table_put:
- genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
}
@@ -2032,7 +2033,6 @@ int devlink_dpipe_entry_ctx_prepare(struct devlink_dpipe_dump_ctx *dump_ctx)
return 0;
nla_put_failure:
- genlmsg_cancel(dump_ctx->skb, dump_ctx->hdr);
nlmsg_free(dump_ctx->skb);
return -EMSGSIZE;
}
@@ -2249,7 +2249,6 @@ send_done:
nla_put_failure:
err = -EMSGSIZE;
err_table_put:
- genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
}
@@ -2551,7 +2550,6 @@ nla_put_failure:
err = -EMSGSIZE;
err_resource_put:
err_skb_send_alloc:
- genlmsg_cancel(skb, hdr);
nlmsg_free(skb);
return err;
}
@@ -2603,7 +2601,7 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
return err;
}
- return devlink->ops->reload(devlink);
+ return devlink->ops->reload(devlink, info->extack);
}
static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
diff --git a/net/core/failover.c b/net/core/failover.c
new file mode 100644
index 000000000000..4a92a98ccce9
--- /dev/null
+++ b/net/core/failover.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018, Intel Corporation. */
+
+/* A common module to handle registrations and notifications for paravirtual
+ * drivers to enable accelerated datapath and support VF live migration.
+ *
+ * The notifier and event handling code is based on netvsc driver.
+ */
+
+#include <linux/module.h>
+#include <linux/etherdevice.h>
+#include <uapi/linux/if_arp.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_vlan.h>
+#include <net/failover.h>
+
+static LIST_HEAD(failover_list);
+static DEFINE_SPINLOCK(failover_lock);
+
+static struct net_device *failover_get_bymac(u8 *mac, struct failover_ops **ops)
+{
+ struct net_device *failover_dev;
+ struct failover *failover;
+
+ spin_lock(&failover_lock);
+ list_for_each_entry(failover, &failover_list, list) {
+ failover_dev = rtnl_dereference(failover->failover_dev);
+ if (ether_addr_equal(failover_dev->perm_addr, mac)) {
+ *ops = rtnl_dereference(failover->ops);
+ spin_unlock(&failover_lock);
+ return failover_dev;
+ }
+ }
+ spin_unlock(&failover_lock);
+ return NULL;
+}
+
+/**
+ * failover_slave_register - Register a slave netdev
+ *
+ * @slave_dev: slave netdev that is being registered
+ *
+ * Registers a slave device to a failover instance. Only ethernet devices
+ * are supported.
+ */
+static int failover_slave_register(struct net_device *slave_dev)
+{
+ struct netdev_lag_upper_info lag_upper_info;
+ struct net_device *failover_dev;
+ struct failover_ops *fops;
+ int err;
+
+ if (slave_dev->type != ARPHRD_ETHER)
+ goto done;
+
+ ASSERT_RTNL();
+
+ failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+ if (!failover_dev)
+ goto done;
+
+ if (fops && fops->slave_pre_register &&
+ fops->slave_pre_register(slave_dev, failover_dev))
+ goto done;
+
+ err = netdev_rx_handler_register(slave_dev, fops->slave_handle_frame,
+ failover_dev);
+ if (err) {
+ netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n",
+ err);
+ goto done;
+ }
+
+ lag_upper_info.tx_type = NETDEV_LAG_TX_TYPE_ACTIVEBACKUP;
+ err = netdev_master_upper_dev_link(slave_dev, failover_dev, NULL,
+ &lag_upper_info, NULL);
+ if (err) {
+ netdev_err(slave_dev, "can not set failover device %s (err = %d)\n",
+ failover_dev->name, err);
+ goto err_upper_link;
+ }
+
+ slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
+
+ if (fops && fops->slave_register &&
+ !fops->slave_register(slave_dev, failover_dev))
+ return NOTIFY_OK;
+
+ netdev_upper_dev_unlink(slave_dev, failover_dev);
+ slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+err_upper_link:
+ netdev_rx_handler_unregister(slave_dev);
+done:
+ return NOTIFY_DONE;
+}
+
+/**
+ * failover_slave_unregister - Unregister a slave netdev
+ *
+ * @slave_dev: slave netdev that is being unregistered
+ *
+ * Unregisters a slave device from a failover instance.
+ */
+int failover_slave_unregister(struct net_device *slave_dev)
+{
+ struct net_device *failover_dev;
+ struct failover_ops *fops;
+
+ if (!netif_is_failover_slave(slave_dev))
+ goto done;
+
+ ASSERT_RTNL();
+
+ failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+ if (!failover_dev)
+ goto done;
+
+ if (fops && fops->slave_pre_unregister &&
+ fops->slave_pre_unregister(slave_dev, failover_dev))
+ goto done;
+
+ netdev_rx_handler_unregister(slave_dev);
+ netdev_upper_dev_unlink(slave_dev, failover_dev);
+ slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
+
+ if (fops && fops->slave_unregister &&
+ !fops->slave_unregister(slave_dev, failover_dev))
+ return NOTIFY_OK;
+
+done:
+ return NOTIFY_DONE;
+}
+EXPORT_SYMBOL_GPL(failover_slave_unregister);
+
+static int failover_slave_link_change(struct net_device *slave_dev)
+{
+ struct net_device *failover_dev;
+ struct failover_ops *fops;
+
+ if (!netif_is_failover_slave(slave_dev))
+ goto done;
+
+ ASSERT_RTNL();
+
+ failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+ if (!failover_dev)
+ goto done;
+
+ if (!netif_running(failover_dev))
+ goto done;
+
+ if (fops && fops->slave_link_change &&
+ !fops->slave_link_change(slave_dev, failover_dev))
+ return NOTIFY_OK;
+
+done:
+ return NOTIFY_DONE;
+}
+
+static int failover_slave_name_change(struct net_device *slave_dev)
+{
+ struct net_device *failover_dev;
+ struct failover_ops *fops;
+
+ if (!netif_is_failover_slave(slave_dev))
+ goto done;
+
+ ASSERT_RTNL();
+
+ failover_dev = failover_get_bymac(slave_dev->perm_addr, &fops);
+ if (!failover_dev)
+ goto done;
+
+ if (!netif_running(failover_dev))
+ goto done;
+
+ if (fops && fops->slave_name_change &&
+ !fops->slave_name_change(slave_dev, failover_dev))
+ return NOTIFY_OK;
+
+done:
+ return NOTIFY_DONE;
+}
+
+static int
+failover_event(struct notifier_block *this, unsigned long event, void *ptr)
+{
+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
+
+ /* Skip parent events */
+ if (netif_is_failover(event_dev))
+ return NOTIFY_DONE;
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ return failover_slave_register(event_dev);
+ case NETDEV_UNREGISTER:
+ return failover_slave_unregister(event_dev);
+ case NETDEV_UP:
+ case NETDEV_DOWN:
+ case NETDEV_CHANGE:
+ return failover_slave_link_change(event_dev);
+ case NETDEV_CHANGENAME:
+ return failover_slave_name_change(event_dev);
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block failover_notifier = {
+ .notifier_call = failover_event,
+};
+
+static void
+failover_existing_slave_register(struct net_device *failover_dev)
+{
+ struct net *net = dev_net(failover_dev);
+ struct net_device *dev;
+
+ rtnl_lock();
+ for_each_netdev(net, dev) {
+ if (netif_is_failover(dev))
+ continue;
+ if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
+ failover_slave_register(dev);
+ }
+ rtnl_unlock();
+}
+
+/**
+ * failover_register - Register a failover instance
+ *
+ * @dev: failover netdev
+ * @ops: failover ops
+ *
+ * Allocate and register a failover instance for a failover netdev. ops
+ * provides handlers for slave device register/unregister/link change/
+ * name change events.
+ *
+ * Return: pointer to failover instance
+ */
+struct failover *failover_register(struct net_device *dev,
+ struct failover_ops *ops)
+{
+ struct failover *failover;
+
+ if (dev->type != ARPHRD_ETHER)
+ return ERR_PTR(-EINVAL);
+
+ failover = kzalloc(sizeof(*failover), GFP_KERNEL);
+ if (!failover)
+ return ERR_PTR(-ENOMEM);
+
+ rcu_assign_pointer(failover->ops, ops);
+ dev_hold(dev);
+ dev->priv_flags |= IFF_FAILOVER;
+ rcu_assign_pointer(failover->failover_dev, dev);
+
+ spin_lock(&failover_lock);
+ list_add_tail(&failover->list, &failover_list);
+ spin_unlock(&failover_lock);
+
+ netdev_info(dev, "failover master:%s registered\n", dev->name);
+
+ failover_existing_slave_register(dev);
+
+ return failover;
+}
+EXPORT_SYMBOL_GPL(failover_register);
+
+/**
+ * failover_unregister - Unregister a failover instance
+ *
+ * @failover: pointer to failover instance
+ *
+ * Unregisters and frees a failover instance.
+ */
+void failover_unregister(struct failover *failover)
+{
+ struct net_device *failover_dev;
+
+ failover_dev = rcu_dereference(failover->failover_dev);
+
+ netdev_info(failover_dev, "failover master:%s unregistered\n",
+ failover_dev->name);
+
+ failover_dev->priv_flags &= ~IFF_FAILOVER;
+ dev_put(failover_dev);
+
+ spin_lock(&failover_lock);
+ list_del(&failover->list);
+ spin_unlock(&failover_lock);
+
+ kfree(failover);
+}
+EXPORT_SYMBOL_GPL(failover_unregister);
+
+static __init int
+failover_init(void)
+{
+ register_netdevice_notifier(&failover_notifier);
+
+ return 0;
+}
+module_init(failover_init);
+
+static __exit
+void failover_exit(void)
+{
+ unregister_netdevice_notifier(&failover_notifier);
+}
+module_exit(failover_exit);
+
+MODULE_DESCRIPTION("Generic failover infrastructure/interface");
+MODULE_LICENSE("GPL v2");
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 4fc1e84d77ec..53f96e4f7bf5 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1334,7 +1334,7 @@ __u32 __get_hash_from_flowi6(const struct flowi6 *fl6, struct flow_keys *keys)
keys->ports.src = fl6->fl6_sport;
keys->ports.dst = fl6->fl6_dport;
keys->keyid.keyid = fl6->fl6_gre_key;
- keys->tags.flow_label = (__force u32)fl6->flowlabel;
+ keys->tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
keys->basic.ip_proto = fl6->flowi6_proto;
return flow_hash_from_keys(keys);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c476f0794132..bb7e80f4ced3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1214,9 +1214,6 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
cpumask_var_t mask;
unsigned long index;
- if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
- return -ENOMEM;
-
index = get_netdev_queue_index(queue);
if (dev->num_tc) {
@@ -1226,6 +1223,9 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue,
return -EINVAL;
}
+ if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
rcu_read_lock();
dev_maps = rcu_dereference(dev->xps_maps);
if (dev_maps) {
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 80802546c279..9a1ba2015ad8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -59,6 +59,9 @@
#include <net/rtnetlink.h>
#include <net/net_namespace.h>
+#define RTNL_MAX_TYPE 48
+#define RTNL_SLAVE_MAX_TYPE 36
+
struct rtnl_link {
rtnl_doit_func doit;
rtnl_dumpit_func dumpit;
@@ -389,6 +392,11 @@ int rtnl_link_register(struct rtnl_link_ops *ops)
{
int err;
+ /* Sanity-check max sizes to avoid stack buffer overflow. */
+ if (WARN_ON(ops->maxtype > RTNL_MAX_TYPE ||
+ ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE))
+ return -EINVAL;
+
rtnl_lock();
err = __rtnl_link_register(ops);
rtnl_unlock();
@@ -2902,13 +2910,16 @@ replay:
}
if (1) {
- struct nlattr *attr[ops ? ops->maxtype + 1 : 1];
- struct nlattr *slave_attr[m_ops ? m_ops->slave_maxtype + 1 : 1];
+ struct nlattr *attr[RTNL_MAX_TYPE + 1];
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
struct nlattr **data = NULL;
struct nlattr **slave_data = NULL;
struct net *dest_net, *link_net = NULL;
if (ops) {
+ if (ops->maxtype > RTNL_MAX_TYPE)
+ return -EINVAL;
+
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
err = nla_parse_nested(attr, ops->maxtype,
linkinfo[IFLA_INFO_DATA],
@@ -2925,6 +2936,9 @@ replay:
}
if (m_ops) {
+ if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
+ return -EINVAL;
+
if (m_ops->slave_maxtype &&
linkinfo[IFLA_INFO_SLAVE_DATA]) {
err = nla_parse_nested(slave_attr,
diff --git a/net/core/sock.c b/net/core/sock.c
index 435a0ba85e52..feca4c98f8a0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -728,9 +728,22 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
sock_valbool_flag(sk, SOCK_DBG, valbool);
break;
case SO_REUSEADDR:
- sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
+ val = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
+ if ((sk->sk_family == PF_INET || sk->sk_family == PF_INET6) &&
+ inet_sk(sk)->inet_num &&
+ (sk->sk_reuse != val)) {
+ ret = (sk->sk_state == TCP_ESTABLISHED) ? -EISCONN : -EUCLEAN;
+ break;
+ }
+ sk->sk_reuse = val;
break;
case SO_REUSEPORT:
+ if ((sk->sk_family == PF_INET || sk->sk_family == PF_INET6) &&
+ inet_sk(sk)->inet_num &&
+ (sk->sk_reuseport != valbool)) {
+ ret = (sk->sk_state == TCP_ESTABLISHED) ? -EISCONN : -EUCLEAN;
+ break;
+ }
sk->sk_reuseport = valbool;
break;
case SO_TYPE:
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 37ccbe62eb1a..ba6fc3c1186b 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -53,7 +53,6 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
if (timeo < rto)
timeo = rto;
- tw->tw_timeout = DCCP_TIMEWAIT_LEN;
if (state == DCCP_TIME_WAIT)
timeo = DCCP_TIMEWAIT_LEN;
diff --git a/net/dsa/port.c b/net/dsa/port.c
index 2413beb995be..ed0595459df1 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -252,6 +252,9 @@ int dsa_port_vlan_add(struct dsa_port *dp,
.vlan = vlan,
};
+ if (netif_is_bridge_master(vlan->obj.orig_dev))
+ return -EOPNOTSUPP;
+
if (br_vlan_enabled(dp->bridge_dev))
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info);
@@ -267,6 +270,9 @@ int dsa_port_vlan_del(struct dsa_port *dp,
.vlan = vlan,
};
+ if (netif_is_bridge_master(vlan->obj.orig_dev))
+ return -EOPNOTSUPP;
+
if (br_vlan_enabled(dp->bridge_dev))
return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info);
diff --git a/net/ipv4/bpfilter/sockopt.c b/net/ipv4/bpfilter/sockopt.c
index 42a96d2d8d05..5e04ed25bc0e 100644
--- a/net/ipv4/bpfilter/sockopt.c
+++ b/net/ipv4/bpfilter/sockopt.c
@@ -10,8 +10,9 @@ int (*bpfilter_process_sockopt)(struct sock *sk, int optname,
unsigned int optlen, bool is_set);
EXPORT_SYMBOL_GPL(bpfilter_process_sockopt);
-int bpfilter_mbox_request(struct sock *sk, int optname, char __user *optval,
- unsigned int optlen, bool is_set)
+static int bpfilter_mbox_request(struct sock *sk, int optname,
+ char __user *optval,
+ unsigned int optlen, bool is_set)
{
if (!bpfilter_process_sockopt) {
int err = request_module("bpfilter");
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 40f001782c1b..d7585ab1a77a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -99,6 +99,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
[IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
[IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
[IFA_FLAGS] = { .type = NLA_U32 },
+ [IFA_RT_PRIORITY] = { .type = NLA_U32 },
};
#define IN4_ADDR_HSIZE_SHIFT 8
@@ -835,6 +836,9 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
else
memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+ if (tb[IFA_RT_PRIORITY])
+ ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
@@ -906,12 +910,20 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
extack);
} else {
+ u32 new_metric = ifa->ifa_rt_priority;
+
inet_free_ifa(ifa);
if (nlh->nlmsg_flags & NLM_F_EXCL ||
!(nlh->nlmsg_flags & NLM_F_REPLACE))
return -EEXIST;
ifa = ifa_existing;
+
+ if (ifa->ifa_rt_priority != new_metric) {
+ fib_modify_prefix_metric(ifa, new_metric);
+ ifa->ifa_rt_priority = new_metric;
+ }
+
set_ifa_lifetime(ifa, valid_lft, prefered_lft);
cancel_delayed_work(&check_lifetime_work);
queue_delayed_work(system_power_efficient_wq,
@@ -1549,6 +1561,7 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_BROADCAST */
+ nla_total_size(IFNAMSIZ) /* IFA_LABEL */
+ nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(4) /* IFA_RT_PRIORITY */
+ nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}
@@ -1618,6 +1631,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
(ifa->ifa_label[0] &&
nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
+ (ifa->ifa_rt_priority &&
+ nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
preferred, valid))
goto nla_put_failure;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b69e2824c761..63aa39b3af03 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -847,7 +847,8 @@ out_err:
* to fib engine. It is legal, because all events occur
* only when netlink is already locked.
*/
-static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
+static void fib_magic(int cmd, int type, __be32 dst, int dst_len,
+ struct in_ifaddr *ifa, u32 rt_priority)
{
struct net *net = dev_net(ifa->ifa_dev->dev);
u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
@@ -857,6 +858,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
.fc_type = type,
.fc_dst = dst,
.fc_dst_len = dst_len,
+ .fc_priority = rt_priority,
.fc_prefsrc = ifa->ifa_local,
.fc_oif = ifa->ifa_dev->dev->ifindex,
.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
@@ -902,31 +904,57 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
}
}
- fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim, 0);
if (!(dev->flags & IFF_UP))
return;
/* Add broadcast address, if it is explicitly assigned. */
if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
- fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+ prim, 0);
if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
(prefix != addr || ifa->ifa_prefixlen < 32)) {
if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
fib_magic(RTM_NEWROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- prefix, ifa->ifa_prefixlen, prim);
+ prefix, ifa->ifa_prefixlen, prim,
+ ifa->ifa_rt_priority);
/* Add network specific broadcasts, when it takes a sense */
if (ifa->ifa_prefixlen < 31) {
- fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
+ fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32,
+ prim, 0);
fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
- 32, prim);
+ 32, prim, 0);
}
}
}
+void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric)
+{
+ __be32 prefix = ifa->ifa_address & ifa->ifa_mask;
+ struct in_device *in_dev = ifa->ifa_dev;
+ struct net_device *dev = in_dev->dev;
+
+ if (!(dev->flags & IFF_UP) ||
+ ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
+ ipv4_is_zeronet(prefix) ||
+ prefix == ifa->ifa_local || ifa->ifa_prefixlen == 32)
+ return;
+
+ /* add the new */
+ fib_magic(RTM_NEWROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, ifa, new_metric);
+
+ /* delete the old */
+ fib_magic(RTM_DELROUTE,
+ dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+ prefix, ifa->ifa_prefixlen, ifa, ifa->ifa_rt_priority);
+}
+
/* Delete primary or secondary address.
* Optionally, on secondary address promotion consider the addresses
* from subnet iprim as deleted, even if they are in device list.
@@ -968,7 +996,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
fib_magic(RTM_DELROUTE,
dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
- any, ifa->ifa_prefixlen, prim);
+ any, ifa->ifa_prefixlen, prim, 0);
subnet = 1;
}
@@ -1052,17 +1080,20 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
no_promotions:
if (!(ok & BRD_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+ prim, 0);
if (subnet && ifa->ifa_prefixlen < 31) {
if (!(ok & BRD1_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32,
+ prim, 0);
if (!(ok & BRD0_OK))
- fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32,
+ prim, 0);
}
if (!(ok & LOCAL_OK)) {
unsigned int addr_type;
- fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
+ fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim, 0);
/* Check, that this local address finally disappeared. */
addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6608db23f54b..f3c89ccf14c5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -717,6 +717,8 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
nla_strlcpy(tmp, nla, sizeof(tmp));
val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca);
} else {
+ if (nla_len(nla) != sizeof(u32))
+ return false;
val = nla_get_u32(nla);
}
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 6b0e362cc99b..38d906baf1df 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -328,7 +328,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
if (tdev) {
hlen = tdev->hard_header_len + tdev->needed_headroom;
- mtu = tdev->mtu;
+ mtu = min(tdev->mtu, IP_MAX_MTU);
}
dev->needed_headroom = t_hlen + hlen;
@@ -362,7 +362,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
nt = netdev_priv(dev);
t_hlen = nt->hlen + sizeof(struct iphdr);
dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
+ dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
ip_tunnel_add(itn, nt);
return nt;
@@ -930,7 +930,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
+ int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
@@ -1107,7 +1107,7 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
mtu = ip_tunnel_bind_dev(dev);
if (tb[IFLA_MTU]) {
- unsigned int max = 0xfff8 - dev->hard_header_len - nt->hlen;
+ unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
(unsigned int)(max - sizeof(struct iphdr)));
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 30221701614c..cafb0506c8c9 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -35,17 +35,19 @@ mr_table_alloc(struct net *net, u32 id,
struct net *net))
{
struct mr_table *mrt;
+ int err;
mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
if (!mrt)
- return NULL;
+ return ERR_PTR(-ENOMEM);
mrt->id = id;
write_pnet(&mrt->net, net);
mrt->ops = *ops;
- if (rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params)) {
+ err = rhltable_init(&mrt->mfc_hash, mrt->ops.rht_params);
+ if (err) {
kfree(mrt);
- return NULL;
+ return ERR_PTR(err);
}
INIT_LIST_HEAD(&mrt->mfc_cache_list);
INIT_LIST_HEAD(&mrt->mfc_unres_queue);
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
index 5121c6475e6b..04311f7067e2 100644
--- a/net/ipv4/metrics.c
+++ b/net/ipv4/metrics.c
@@ -32,6 +32,8 @@ int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, int fc_mx_len,
if (val == TCP_CA_UNSPEC)
return -EINVAL;
} else {
+ if (nla_len(nla) != sizeof(u32))
+ return -EINVAL;
val = nla_get_u32(nla);
}
if (type == RTAX_ADVMSS && val > 65535 - 40)
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 280048e1e395..bbfc356cb1b5 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -29,7 +29,10 @@ config NF_SOCKET_IPV4
tristate "IPv4 socket lookup support"
help
This option enables the IPv4 socket lookup infrastructure. This is
- is required by the iptables socket match.
+ is required by the {ip,nf}tables socket match.
+
+config NF_TPROXY_IPV4
+ tristate "IPv4 tproxy support"
if NF_TABLES
@@ -129,10 +132,7 @@ config NFT_CHAIN_NAT_IPV4
source and destination ports.
config NF_NAT_MASQUERADE_IPV4
- tristate "IPv4 masquerade support"
- help
- This is the kernel functionality to provide NAT in the masquerade
- flavour (automatic source address selection).
+ bool
config NFT_MASQ_IPV4
tristate "IPv4 masquerading support for nf_tables"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 0e5edd0c7926..8394c17c269f 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -10,12 +10,14 @@ nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
+nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
# defrag
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
obj-$(CONFIG_NF_SOCKET_IPV4) += nf_socket_ipv4.o
+obj-$(CONFIG_NF_TPROXY_IPV4) += nf_tproxy_ipv4.o
# logging
obj-$(CONFIG_NF_LOG_ARP) += nf_log_arp.o
@@ -32,9 +34,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o
$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
-obj-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
-
-
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index f538c5001547..ad3aeff152ed 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -7,7 +7,6 @@
*/
#include <linux/types.h>
-#include <linux/module.h>
#include <linux/atomic.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
@@ -157,6 +156,3 @@ void nf_nat_masquerade_ipv4_unregister_notifier(void)
unregister_inetaddr_notifier(&masq_inet_notifier);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
new file mode 100644
index 000000000000..805e83ec3ad9
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
@@ -0,0 +1,147 @@
+/*
+ * Copyright (C) 2007-2008 BalaBit IT Ltd.
+ * Author: Krisztian Kovacs
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <net/netfilter/nf_tproxy.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/inet_sock.h>
+#include <linux/ip.h>
+#include <net/checksum.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+#include <linux/inetdevice.h>
+
+struct sock *
+nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
+ __be32 laddr, __be16 lport, struct sock *sk)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct tcphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
+ if (hp == NULL) {
+ inet_twsk_put(inet_twsk(sk));
+ return NULL;
+ }
+
+ if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
+ /* SYN to a TIME_WAIT socket, we'd rather redirect it
+ * to a listener socket if there's one */
+ struct sock *sk2;
+
+ sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
+ iph->saddr, laddr ? laddr : iph->daddr,
+ hp->source, lport ? lport : hp->dest,
+ skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+ if (sk2) {
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ sk = sk2;
+ }
+ }
+
+ return sk;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait4);
+
+__be32 nf_tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
+{
+ struct in_device *indev;
+ __be32 laddr;
+
+ if (user_laddr)
+ return user_laddr;
+
+ laddr = 0;
+ indev = __in_dev_get_rcu(skb->dev);
+ for_primary_ifa(indev) {
+ laddr = ifa->ifa_local;
+ break;
+ } endfor_ifa(indev);
+
+ return laddr ? laddr : daddr;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_laddr4);
+
+struct sock *
+nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
+ const u8 protocol,
+ const __be32 saddr, const __be32 daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type)
+{
+ struct sock *sk;
+ struct tcphdr *tcph;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ switch (lookup_type) {
+ case NF_TPROXY_LOOKUP_LISTENER:
+ tcph = hp;
+ sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
+ ip_hdrlen(skb) +
+ __tcp_hdrlen(tcph),
+ saddr, sport,
+ daddr, dport,
+ in->ifindex, 0);
+
+ if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ break;
+ case NF_TPROXY_LOOKUP_ESTABLISHED:
+ sk = inet_lookup_established(net, &tcp_hashinfo,
+ saddr, sport, daddr, dport,
+ in->ifindex);
+ break;
+ default:
+ BUG();
+ }
+ break;
+ case IPPROTO_UDP:
+ sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ if (sk) {
+ int connected = (sk->sk_state == TCP_ESTABLISHED);
+ int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ if ((lookup_type == NF_TPROXY_LOOKUP_ESTABLISHED &&
+ (!connected || wildcard)) ||
+ (lookup_type == NF_TPROXY_LOOKUP_LISTENER && connected)) {
+ sock_put(sk);
+ sk = NULL;
+ }
+ }
+ break;
+ default:
+ WARN_ON(1);
+ sk = NULL;
+ }
+
+ pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
+ protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
+
+ return sk;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
+MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index d2eed3ddcb0a..d06247ba08b2 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -30,6 +30,7 @@
static int zero;
static int one = 1;
+static int two = 2;
static int four = 4;
static int thousand = 1000;
static int gso_max_segs = GSO_MAX_SEGS;
@@ -845,7 +846,9 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_tcp_tw_reuse,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &two,
},
{
.procname = "tcp_max_tw_buckets",
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 1191cac72109..355d3dffd021 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -254,8 +254,10 @@ static void tcp_ecn_withdraw_cwr(struct tcp_sock *tp)
tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;
}
-static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+static void __tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
{
+ struct tcp_sock *tp = tcp_sk(sk);
+
switch (TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK) {
case INET_ECN_NOT_ECT:
/* Funny extension: if ECT is not set on a segment,
@@ -263,31 +265,31 @@ static void __tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
* it is probably a retransmit.
*/
if (tp->ecn_flags & TCP_ECN_SEEN)
- tcp_enter_quickack_mode((struct sock *)tp, 1);
+ tcp_enter_quickack_mode(sk, 1);
break;
case INET_ECN_CE:
- if (tcp_ca_needs_ecn((struct sock *)tp))
- tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_IS_CE);
+ if (tcp_ca_needs_ecn(sk))
+ tcp_ca_event(sk, CA_EVENT_ECN_IS_CE);
if (!(tp->ecn_flags & TCP_ECN_DEMAND_CWR)) {
/* Better not delay acks, sender can have a very low cwnd */
- tcp_enter_quickack_mode((struct sock *)tp, 1);
+ tcp_enter_quickack_mode(sk, 1);
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
}
tp->ecn_flags |= TCP_ECN_SEEN;
break;
default:
- if (tcp_ca_needs_ecn((struct sock *)tp))
- tcp_ca_event((struct sock *)tp, CA_EVENT_ECN_NO_CE);
+ if (tcp_ca_needs_ecn(sk))
+ tcp_ca_event(sk, CA_EVENT_ECN_NO_CE);
tp->ecn_flags |= TCP_ECN_SEEN;
break;
}
}
-static void tcp_ecn_check_ce(struct tcp_sock *tp, const struct sk_buff *skb)
+static void tcp_ecn_check_ce(struct sock *sk, const struct sk_buff *skb)
{
- if (tp->ecn_flags & TCP_ECN_OK)
- __tcp_ecn_check_ce(tp, skb);
+ if (tcp_sk(sk)->ecn_flags & TCP_ECN_OK)
+ __tcp_ecn_check_ce(sk, skb);
}
static void tcp_ecn_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th)
@@ -710,7 +712,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
}
icsk->icsk_ack.lrcvtime = now;
- tcp_ecn_check_ce(tp, skb);
+ tcp_ecn_check_ce(sk, skb);
if (skb->len >= 128)
tcp_grow_window(sk, skb);
@@ -4434,7 +4436,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
u32 seq, end_seq;
bool fragstolen;
- tcp_ecn_check_ce(tp, skb);
+ tcp_ecn_check_ce(sk, skb);
if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFODROP);
@@ -5390,11 +5392,11 @@ discard:
* the rest is checked inline. Fast processing is turned on in
* tcp_data_queue when everything is OK.
*/
-void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
- const struct tcphdr *th)
+void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
{
- unsigned int len = skb->len;
+ const struct tcphdr *th = (const struct tcphdr *)skb->data;
struct tcp_sock *tp = tcp_sk(sk);
+ unsigned int len = skb->len;
/* TCP congestion window tracking */
trace_tcp_probe(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index adbdb503db0c..633963e228bc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -110,8 +110,38 @@ static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb)
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
+ const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
+ int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
+
+ if (reuse == 2) {
+ /* Still does not detect *everything* that goes through
+ * lo, since we require a loopback src or dst address
+ * or direct binding to 'lo' interface.
+ */
+ bool loopback = false;
+ if (tw->tw_bound_dev_if == LOOPBACK_IFINDEX)
+ loopback = true;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (tw->tw_family == AF_INET6) {
+ if (ipv6_addr_loopback(&tw->tw_v6_daddr) ||
+ (ipv6_addr_v4mapped(&tw->tw_v6_daddr) &&
+ (tw->tw_v6_daddr.s6_addr[12] == 127)) ||
+ ipv6_addr_loopback(&tw->tw_v6_rcv_saddr) ||
+ (ipv6_addr_v4mapped(&tw->tw_v6_rcv_saddr) &&
+ (tw->tw_v6_rcv_saddr.s6_addr[12] == 127)))
+ loopback = true;
+ } else
+#endif
+ {
+ if (ipv4_is_loopback(tw->tw_daddr) ||
+ ipv4_is_loopback(tw->tw_rcv_saddr))
+ loopback = true;
+ }
+ if (!loopback)
+ reuse = 0;
+ }
/* With PAWS, it is safe from the viewpoint
of data integrity. Even without PAWS it is safe provided sequence
@@ -125,8 +155,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
and use initial timestamp retrieved from peer table.
*/
if (tcptw->tw_ts_recent_stamp &&
- (!twp || (sock_net(sk)->ipv4.sysctl_tcp_tw_reuse &&
- get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
+ (!twp || (reuse && get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
if (tp->write_seq == 0)
tp->write_seq = 1;
@@ -1486,7 +1515,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
sk->sk_rx_dst = NULL;
}
}
- tcp_rcv_established(sk, skb, tcp_hdr(skb));
+ tcp_rcv_established(sk, skb);
return 0;
}
@@ -2529,7 +2558,7 @@ static int __net_init tcp_sk_init(struct net *net)
net->ipv4.sysctl_tcp_orphan_retries = 0;
net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
- net->ipv4.sysctl_tcp_tw_reuse = 0;
+ net->ipv4.sysctl_tcp_tw_reuse = 2;
cnt = tcp_hashinfo.ehash_mask + 1;
net->ipv4.tcp_death_row.sysctl_max_tw_buckets = (cnt + 1) / 2;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index f867658b4b30..1dda1341a223 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -307,7 +307,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
if (timeo < rto)
timeo = rto;
- tw->tw_timeout = TCP_TIMEWAIT_LEN;
if (state == TCP_TIME_WAIT)
timeo = TCP_TIMEWAIT_LEN;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3c27d00b5730..9c41a45a9d39 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -544,9 +544,7 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup_skb);
/* Must be called under rcu_read_lock().
* Does increment socket refcount.
*/
-#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
- IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
- IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
+#if IS_ENABLED(CONFIG_NF_TPROXY_IPV4) || IS_ENABLED(CONFIG_NF_SOCKET_IPV4)
struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport,
__be32 daddr, __be16 dport, int dif)
{
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fbfd71a2d9c8..5596d87952d4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -986,17 +986,15 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
/* On success it returns ifp with increased reference count */
static struct inet6_ifaddr *
-ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
- const struct in6_addr *peer_addr, int pfxlen,
- int scope, u32 flags, u32 valid_lft, u32 prefered_lft,
+ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
bool can_block, struct netlink_ext_ack *extack)
{
gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
+ int addr_type = ipv6_addr_type(cfg->pfx);
struct net *net = dev_net(idev->dev);
struct inet6_ifaddr *ifa = NULL;
struct fib6_info *f6i = NULL;
int err = 0;
- int addr_type = ipv6_addr_type(addr);
if (addr_type == IPV6_ADDR_ANY ||
addr_type & IPV6_ADDR_MULTICAST ||
@@ -1019,7 +1017,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
*/
if (can_block) {
struct in6_validator_info i6vi = {
- .i6vi_addr = *addr,
+ .i6vi_addr = *cfg->pfx,
.i6vi_dev = idev,
.extack = extack,
};
@@ -1036,7 +1034,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
goto out;
}
- f6i = addrconf_f6i_alloc(net, idev, addr, false, gfp_flags);
+ f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags);
if (IS_ERR(f6i)) {
err = PTR_ERR(f6i);
f6i = NULL;
@@ -1049,21 +1047,22 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
neigh_parms_data_state_setall(idev->nd_parms);
- ifa->addr = *addr;
- if (peer_addr)
- ifa->peer_addr = *peer_addr;
+ ifa->addr = *cfg->pfx;
+ if (cfg->peer_pfx)
+ ifa->peer_addr = *cfg->peer_pfx;
spin_lock_init(&ifa->lock);
INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
INIT_HLIST_NODE(&ifa->addr_lst);
- ifa->scope = scope;
- ifa->prefix_len = pfxlen;
- ifa->flags = flags;
+ ifa->scope = cfg->scope;
+ ifa->prefix_len = cfg->plen;
+ ifa->rt_priority = cfg->rt_priority;
+ ifa->flags = cfg->ifa_flags;
/* No need to add the TENTATIVE flag for addresses with NODAD */
- if (!(flags & IFA_F_NODAD))
+ if (!(cfg->ifa_flags & IFA_F_NODAD))
ifa->flags |= IFA_F_TENTATIVE;
- ifa->valid_lft = valid_lft;
- ifa->prefered_lft = prefered_lft;
+ ifa->valid_lft = cfg->valid_lft;
+ ifa->prefered_lft = cfg->preferred_lft;
ifa->cstamp = ifa->tstamp = jiffies;
ifa->tokenized = false;
@@ -1260,11 +1259,10 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
{
struct inet6_dev *idev = ifp->idev;
struct in6_addr addr, *tmpaddr;
- unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age;
+ unsigned long tmp_tstamp, age;
unsigned long regen_advance;
- int tmp_plen;
+ struct ifa6_config cfg;
int ret = 0;
- u32 addr_flags;
unsigned long now = jiffies;
long max_desync_factor;
s32 cnf_temp_preferred_lft;
@@ -1326,13 +1324,12 @@ retry:
}
}
- tmp_valid_lft = min_t(__u32,
- ifp->valid_lft,
+ cfg.valid_lft = min_t(__u32, ifp->valid_lft,
idev->cnf.temp_valid_lft + age);
- tmp_prefered_lft = cnf_temp_preferred_lft + age -
- idev->desync_factor;
- tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, tmp_prefered_lft);
- tmp_plen = ifp->prefix_len;
+ cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
+ cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft);
+
+ cfg.plen = ifp->prefix_len;
tmp_tstamp = ifp->tstamp;
spin_unlock_bh(&ifp->lock);
@@ -1346,21 +1343,23 @@ retry:
* temporary addresses being generated.
*/
age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
- if (tmp_prefered_lft <= regen_advance + age) {
+ if (cfg.preferred_lft <= regen_advance + age) {
in6_ifa_put(ifp);
in6_dev_put(idev);
ret = -1;
goto out;
}
- addr_flags = IFA_F_TEMPORARY;
+ cfg.ifa_flags = IFA_F_TEMPORARY;
/* set in addrconf_prefix_rcv() */
if (ifp->flags & IFA_F_OPTIMISTIC)
- addr_flags |= IFA_F_OPTIMISTIC;
+ cfg.ifa_flags |= IFA_F_OPTIMISTIC;
+
+ cfg.pfx = &addr;
+ cfg.scope = ipv6_addr_scope(cfg.pfx);
+ cfg.rt_priority = 0;
- ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
- ipv6_addr_scope(&addr), addr_flags,
- tmp_valid_lft, tmp_prefered_lft, block, NULL);
+ ift = ipv6_add_addr(idev, &cfg, block, NULL);
if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
@@ -2031,13 +2030,17 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
spin_lock_bh(&ifp->lock);
if (ifp->flags & IFA_F_STABLE_PRIVACY) {
- int scope = ifp->scope;
- u32 flags = ifp->flags;
struct in6_addr new_addr;
struct inet6_ifaddr *ifp2;
- u32 valid_lft, preferred_lft;
- int pfxlen = ifp->prefix_len;
int retries = ifp->stable_privacy_retry + 1;
+ struct ifa6_config cfg = {
+ .pfx = &new_addr,
+ .plen = ifp->prefix_len,
+ .ifa_flags = ifp->flags,
+ .valid_lft = ifp->valid_lft,
+ .preferred_lft = ifp->prefered_lft,
+ .scope = ifp->scope,
+ };
if (retries > net->ipv6.sysctl.idgen_retries) {
net_info_ratelimited("%s: privacy stable address generation failed because of DAD conflicts!\n",
@@ -2050,9 +2053,6 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
idev))
goto errdad;
- valid_lft = ifp->valid_lft;
- preferred_lft = ifp->prefered_lft;
-
spin_unlock_bh(&ifp->lock);
if (idev->cnf.max_addresses &&
@@ -2063,9 +2063,7 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n",
ifp->idev->dev->name);
- ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen,
- scope, flags, valid_lft,
- preferred_lft, false, NULL);
+ ifp2 = ipv6_add_addr(idev, &cfg, false, NULL);
if (IS_ERR(ifp2))
goto lock_errdad;
@@ -2253,6 +2251,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
return addrconf_ifid_ieee1394(eui, dev);
case ARPHRD_TUNNEL6:
case ARPHRD_IP6GRE:
+ case ARPHRD_RAWIP:
return addrconf_ifid_ip6tnl(eui, dev);
}
return -1;
@@ -2318,12 +2317,13 @@ static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad
*/
static void
-addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
- unsigned long expires, u32 flags, gfp_t gfp_flags)
+addrconf_prefix_route(struct in6_addr *pfx, int plen, u32 metric,
+ struct net_device *dev, unsigned long expires,
+ u32 flags, gfp_t gfp_flags)
{
struct fib6_config cfg = {
.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
- .fc_metric = IP6_RT_PRIO_ADDRCONF,
+ .fc_metric = metric ? : IP6_RT_PRIO_ADDRCONF,
.fc_ifindex = dev->ifindex,
.fc_expires = expires,
.fc_dst_len = plen,
@@ -2507,12 +2507,20 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
if (!ifp && valid_lft) {
int max_addresses = in6_dev->cnf.max_addresses;
+ struct ifa6_config cfg = {
+ .pfx = addr,
+ .plen = pinfo->prefix_len,
+ .ifa_flags = addr_flags,
+ .valid_lft = valid_lft,
+ .preferred_lft = prefered_lft,
+ .scope = addr_type & IPV6_ADDR_SCOPE_MASK,
+ };
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if ((net->ipv6.devconf_all->optimistic_dad ||
in6_dev->cnf.optimistic_dad) &&
!net->ipv6.devconf_all->forwarding && sllao)
- addr_flags |= IFA_F_OPTIMISTIC;
+ cfg.ifa_flags |= IFA_F_OPTIMISTIC;
#endif
/* Do not allow to create too much of autoconfigured
@@ -2520,11 +2528,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
*/
if (!max_addresses ||
ipv6_count_addresses(in6_dev) < max_addresses)
- ifp = ipv6_add_addr(in6_dev, addr, NULL,
- pinfo->prefix_len,
- addr_type&IPV6_ADDR_SCOPE_MASK,
- addr_flags, valid_lft,
- prefered_lft, false, NULL);
+ ifp = ipv6_add_addr(in6_dev, &cfg, false, NULL);
if (IS_ERR_OR_NULL(ifp))
return -1;
@@ -2683,7 +2687,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
expires = jiffies_to_clock_t(rt_expires);
}
addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
- dev, expires, flags, GFP_ATOMIC);
+ 0, dev, expires, flags,
+ GFP_ATOMIC);
}
fib6_info_release(rt);
}
@@ -2830,10 +2835,7 @@ static int ipv6_mc_config(struct sock *sk, bool join,
* Manual configuration of address on an interface
*/
static int inet6_addr_add(struct net *net, int ifindex,
- const struct in6_addr *pfx,
- const struct in6_addr *peer_pfx,
- unsigned int plen, __u32 ifa_flags,
- __u32 prefered_lft, __u32 valid_lft,
+ struct ifa6_config *cfg,
struct netlink_ext_ack *extack)
{
struct inet6_ifaddr *ifp;
@@ -2841,19 +2843,18 @@ static int inet6_addr_add(struct net *net, int ifindex,
struct net_device *dev;
unsigned long timeout;
clock_t expires;
- int scope;
u32 flags;
ASSERT_RTNL();
- if (plen > 128)
+ if (cfg->plen > 128)
return -EINVAL;
/* check the lifetime */
- if (!valid_lft || prefered_lft > valid_lft)
+ if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
return -EINVAL;
- if (ifa_flags & IFA_F_MANAGETEMPADDR && plen != 64)
+ if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64)
return -EINVAL;
dev = __dev_get_by_index(net, ifindex);
@@ -2864,41 +2865,40 @@ static int inet6_addr_add(struct net *net, int ifindex,
if (IS_ERR(idev))
return PTR_ERR(idev);
- if (ifa_flags & IFA_F_MCAUTOJOIN) {
+ if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
- true, pfx, ifindex);
+ true, cfg->pfx, ifindex);
if (ret < 0)
return ret;
}
- scope = ipv6_addr_scope(pfx);
+ cfg->scope = ipv6_addr_scope(cfg->pfx);
- timeout = addrconf_timeout_fixup(valid_lft, HZ);
+ timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
- valid_lft = timeout;
+ cfg->valid_lft = timeout;
flags = RTF_EXPIRES;
} else {
expires = 0;
flags = 0;
- ifa_flags |= IFA_F_PERMANENT;
+ cfg->ifa_flags |= IFA_F_PERMANENT;
}
- timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+ timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
if (timeout == 0)
- ifa_flags |= IFA_F_DEPRECATED;
- prefered_lft = timeout;
+ cfg->ifa_flags |= IFA_F_DEPRECATED;
+ cfg->preferred_lft = timeout;
}
- ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
- valid_lft, prefered_lft, true, extack);
-
+ ifp = ipv6_add_addr(idev, cfg, true, extack);
if (!IS_ERR(ifp)) {
- if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
- expires, flags, GFP_KERNEL);
+ if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+ ifp->rt_priority, dev, expires,
+ flags, GFP_KERNEL);
}
/* Send a netlink notification if DAD is enabled and
@@ -2912,15 +2912,15 @@ static int inet6_addr_add(struct net *net, int ifindex,
* manually configured addresses
*/
addrconf_dad_start(ifp);
- if (ifa_flags & IFA_F_MANAGETEMPADDR)
- manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
- true, jiffies);
+ if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR)
+ manage_tempaddrs(idev, ifp, cfg->valid_lft,
+ cfg->preferred_lft, true, jiffies);
in6_ifa_put(ifp);
addrconf_verify_rtnl();
return 0;
- } else if (ifa_flags & IFA_F_MCAUTOJOIN) {
- ipv6_mc_config(net->ipv6.mc_autojoin_sk,
- false, pfx, ifindex);
+ } else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
+ ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
+ cfg->pfx, ifindex);
}
return PTR_ERR(ifp);
@@ -2971,6 +2971,11 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
int addrconf_add_ifaddr(struct net *net, void __user *arg)
{
+ struct ifa6_config cfg = {
+ .ifa_flags = IFA_F_PERMANENT,
+ .preferred_lft = INFINITY_LIFE_TIME,
+ .valid_lft = INFINITY_LIFE_TIME,
+ };
struct in6_ifreq ireq;
int err;
@@ -2980,10 +2985,11 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
return -EFAULT;
+ cfg.pfx = &ireq.ifr6_addr;
+ cfg.plen = ireq.ifr6_prefixlen;
+
rtnl_lock();
- err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
- ireq.ifr6_prefixlen, IFA_F_PERMANENT,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, NULL);
+ err = inet6_addr_add(net, ireq.ifr6_ifindex, &cfg, NULL);
rtnl_unlock();
return err;
}
@@ -3010,11 +3016,16 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
int plen, int scope)
{
struct inet6_ifaddr *ifp;
+ struct ifa6_config cfg = {
+ .pfx = addr,
+ .plen = plen,
+ .ifa_flags = IFA_F_PERMANENT,
+ .valid_lft = INFINITY_LIFE_TIME,
+ .preferred_lft = INFINITY_LIFE_TIME,
+ .scope = scope
+ };
- ifp = ipv6_add_addr(idev, addr, NULL, plen,
- scope, IFA_F_PERMANENT,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME,
- true, NULL);
+ ifp = ipv6_add_addr(idev, &cfg, true, NULL);
if (!IS_ERR(ifp)) {
spin_lock_bh(&ifp->lock);
ifp->flags &= ~IFA_F_TENTATIVE;
@@ -3051,7 +3062,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
if (addr.s6_addr32[3]) {
add_addr(idev, &addr, plen, scope);
- addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags,
+ addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags,
GFP_ATOMIC);
return;
}
@@ -3076,8 +3087,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
}
add_addr(idev, &addr, plen, flag);
- addrconf_prefix_route(&addr, plen, idev->dev, 0,
- pflags, GFP_ATOMIC);
+ addrconf_prefix_route(&addr, plen, 0, idev->dev,
+ 0, pflags, GFP_ATOMIC);
}
}
}
@@ -3104,20 +3115,26 @@ static void init_loopback(struct net_device *dev)
void addrconf_add_linklocal(struct inet6_dev *idev,
const struct in6_addr *addr, u32 flags)
{
+ struct ifa6_config cfg = {
+ .pfx = addr,
+ .plen = 64,
+ .ifa_flags = flags | IFA_F_PERMANENT,
+ .valid_lft = INFINITY_LIFE_TIME,
+ .preferred_lft = INFINITY_LIFE_TIME,
+ .scope = IFA_LINK
+ };
struct inet6_ifaddr *ifp;
- u32 addr_flags = flags | IFA_F_PERMANENT;
#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
idev->cnf.optimistic_dad) &&
!dev_net(idev->dev)->ipv6.devconf_all->forwarding)
- addr_flags |= IFA_F_OPTIMISTIC;
+ cfg.ifa_flags |= IFA_F_OPTIMISTIC;
#endif
- ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
- INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
+ ifp = ipv6_add_addr(idev, &cfg, true, NULL);
if (!IS_ERR(ifp)) {
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev,
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len, 0, idev->dev,
0, 0, GFP_ATOMIC);
addrconf_dad_start(ifp);
in6_ifa_put(ifp);
@@ -3233,7 +3250,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
addrconf_add_linklocal(idev, &addr,
IFA_F_STABLE_PRIVACY);
else if (prefix_route)
- addrconf_prefix_route(&addr, 64, idev->dev,
+ addrconf_prefix_route(&addr, 64, 0, idev->dev,
0, 0, GFP_KERNEL);
break;
case IN6_ADDR_GEN_MODE_EUI64:
@@ -3244,7 +3261,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
addrconf_add_linklocal(idev, &addr, 0);
else if (prefix_route)
- addrconf_prefix_route(&addr, 64, idev->dev,
+ addrconf_prefix_route(&addr, 64, 0, idev->dev,
0, 0, GFP_KERNEL);
break;
case IN6_ADDR_GEN_MODE_NONE:
@@ -3270,7 +3287,8 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_IP6GRE) &&
(dev->type != ARPHRD_IPGRE) &&
(dev->type != ARPHRD_TUNNEL) &&
- (dev->type != ARPHRD_NONE)) {
+ (dev->type != ARPHRD_NONE) &&
+ (dev->type != ARPHRD_RAWIP)) {
/* Alas, we support only Ethernet autoconfiguration. */
return;
}
@@ -3364,7 +3382,8 @@ static int fixup_permanent_addr(struct net *net,
if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
- idev->dev, 0, 0, GFP_ATOMIC);
+ ifp->rt_priority, idev->dev, 0, 0,
+ GFP_ATOMIC);
}
if (ifp->state == INET6_IFADDR_STATE_PREDAD)
@@ -4484,6 +4503,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
[IFA_LOCAL] = { .len = sizeof(struct in6_addr) },
[IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) },
[IFA_FLAGS] = { .len = sizeof(u32) },
+ [IFA_RT_PRIORITY] = { .len = sizeof(u32) },
};
static int
@@ -4516,8 +4536,38 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
ifm->ifa_prefixlen);
}
-static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
- u32 prefered_lft, u32 valid_lft)
+static int modify_prefix_route(struct inet6_ifaddr *ifp,
+ unsigned long expires, u32 flags)
+{
+ struct fib6_info *f6i;
+
+ f6i = addrconf_get_prefix_route(&ifp->addr,
+ ifp->prefix_len,
+ ifp->idev->dev,
+ 0, RTF_GATEWAY | RTF_DEFAULT);
+ if (!f6i)
+ return -ENOENT;
+
+ if (f6i->fib6_metric != ifp->rt_priority) {
+ /* add new one */
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+ ifp->rt_priority, ifp->idev->dev,
+ expires, flags, GFP_KERNEL);
+ /* delete old one */
+ ip6_del_rt(dev_net(ifp->idev->dev), f6i);
+ } else {
+ if (!expires)
+ fib6_clean_expires(f6i);
+ else
+ fib6_set_expires(f6i, expires);
+
+ fib6_info_release(f6i);
+ }
+
+ return 0;
+}
+
+static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
{
u32 flags;
clock_t expires;
@@ -4527,32 +4577,32 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
ASSERT_RTNL();
- if (!valid_lft || (prefered_lft > valid_lft))
+ if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
return -EINVAL;
- if (ifa_flags & IFA_F_MANAGETEMPADDR &&
+ if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR &&
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
return -EINVAL;
if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
- ifa_flags &= ~IFA_F_OPTIMISTIC;
+ cfg->ifa_flags &= ~IFA_F_OPTIMISTIC;
- timeout = addrconf_timeout_fixup(valid_lft, HZ);
+ timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
- valid_lft = timeout;
+ cfg->valid_lft = timeout;
flags = RTF_EXPIRES;
} else {
expires = 0;
flags = 0;
- ifa_flags |= IFA_F_PERMANENT;
+ cfg->ifa_flags |= IFA_F_PERMANENT;
}
- timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+ timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
if (timeout == 0)
- ifa_flags |= IFA_F_DEPRECATED;
- prefered_lft = timeout;
+ cfg->ifa_flags |= IFA_F_DEPRECATED;
+ cfg->preferred_lft = timeout;
}
spin_lock_bh(&ifp->lock);
@@ -4562,19 +4612,30 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
ifp->flags &= ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD |
IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
IFA_F_NOPREFIXROUTE);
- ifp->flags |= ifa_flags;
+ ifp->flags |= cfg->ifa_flags;
ifp->tstamp = jiffies;
- ifp->valid_lft = valid_lft;
- ifp->prefered_lft = prefered_lft;
+ ifp->valid_lft = cfg->valid_lft;
+ ifp->prefered_lft = cfg->preferred_lft;
+
+ if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
+ ifp->rt_priority = cfg->rt_priority;
spin_unlock_bh(&ifp->lock);
if (!(ifp->flags&IFA_F_TENTATIVE))
ipv6_ifa_notify(0, ifp);
- if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
- addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
- ifp->idev->dev, expires, flags,
- GFP_KERNEL);
+ if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
+ int rc = -ENOENT;
+
+ if (had_prefixroute)
+ rc = modify_prefix_route(ifp, expires, flags);
+
+ /* prefix route could have been deleted; if so restore it */
+ if (rc == -ENOENT) {
+ addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+ ifp->rt_priority, ifp->idev->dev,
+ expires, flags, GFP_KERNEL);
+ }
} else if (had_prefixroute) {
enum cleanup_prefix_rt_t action;
unsigned long rt_expires;
@@ -4590,10 +4651,14 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
}
if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) {
- if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR))
- valid_lft = prefered_lft = 0;
- manage_tempaddrs(ifp->idev, ifp, valid_lft, prefered_lft,
- !was_managetempaddr, jiffies);
+ if (was_managetempaddr &&
+ !(ifp->flags & IFA_F_MANAGETEMPADDR)) {
+ cfg->valid_lft = 0;
+ cfg->preferred_lft = 0;
+ }
+ manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
+ cfg->preferred_lft, !was_managetempaddr,
+ jiffies);
}
addrconf_verify_rtnl();
@@ -4608,12 +4673,11 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct net *net = sock_net(skb->sk);
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
- struct in6_addr *pfx, *peer_pfx;
+ struct in6_addr *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
struct inet6_dev *idev;
- u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
- u32 ifa_flags;
+ struct ifa6_config cfg;
int err;
err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
@@ -4621,60 +4685,70 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
return err;
+ memset(&cfg, 0, sizeof(cfg));
+
ifm = nlmsg_data(nlh);
- pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
- if (!pfx)
+ cfg.pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
+ if (!cfg.pfx)
return -EINVAL;
+ cfg.peer_pfx = peer_pfx;
+ cfg.plen = ifm->ifa_prefixlen;
+ if (tb[IFA_RT_PRIORITY])
+ cfg.rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+
+ cfg.valid_lft = INFINITY_LIFE_TIME;
+ cfg.preferred_lft = INFINITY_LIFE_TIME;
+
if (tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci;
ci = nla_data(tb[IFA_CACHEINFO]);
- valid_lft = ci->ifa_valid;
- preferred_lft = ci->ifa_prefered;
- } else {
- preferred_lft = INFINITY_LIFE_TIME;
- valid_lft = INFINITY_LIFE_TIME;
+ cfg.valid_lft = ci->ifa_valid;
+ cfg.preferred_lft = ci->ifa_prefered;
}
dev = __dev_get_by_index(net, ifm->ifa_index);
if (!dev)
return -ENODEV;
- ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+ if (tb[IFA_FLAGS])
+ cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
+ else
+ cfg.ifa_flags = ifm->ifa_flags;
/* We ignore other flags so far. */
- ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
- IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+ cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
+ IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE |
+ IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
idev = ipv6_find_idev(dev);
if (IS_ERR(idev))
return PTR_ERR(idev);
if (!ipv6_allow_optimistic_dad(net, idev))
- ifa_flags &= ~IFA_F_OPTIMISTIC;
+ cfg.ifa_flags &= ~IFA_F_OPTIMISTIC;
- if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+ if (cfg.ifa_flags & IFA_F_NODAD &&
+ cfg.ifa_flags & IFA_F_OPTIMISTIC) {
NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
return -EINVAL;
}
- ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
+ ifa = ipv6_get_ifaddr(net, cfg.pfx, dev, 1);
if (!ifa) {
/*
* It would be best to check for !NLM_F_CREATE here but
* userspace already relies on not having to provide this.
*/
- return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
- ifm->ifa_prefixlen, ifa_flags,
- preferred_lft, valid_lft, extack);
+ return inet6_addr_add(net, ifm->ifa_index, &cfg, extack);
}
if (nlh->nlmsg_flags & NLM_F_EXCL ||
!(nlh->nlmsg_flags & NLM_F_REPLACE))
err = -EEXIST;
else
- err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
+ err = inet6_addr_modify(ifa, &cfg);
in6_ifa_put(ifa);
@@ -4725,7 +4799,8 @@ static inline int inet6_ifaddr_msgsize(void)
+ nla_total_size(16) /* IFA_LOCAL */
+ nla_total_size(16) /* IFA_ADDRESS */
+ nla_total_size(sizeof(struct ifa_cacheinfo))
- + nla_total_size(4) /* IFA_FLAGS */;
+ + nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(4) /* IFA_RT_PRIORITY */;
}
static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
@@ -4771,6 +4846,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0)
goto error;
+ if (ifa->rt_priority &&
+ nla_put_u32(skb, IFA_RT_PRIORITY, ifa->rt_priority))
+ goto error;
+
if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
goto error;
@@ -5615,7 +5694,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
if (!ipv6_addr_any(&ifp->peer_addr))
- addrconf_prefix_route(&ifp->peer_addr, 128,
+ addrconf_prefix_route(&ifp->peer_addr, 128, 0,
ifp->idev->dev, 0, 0,
GFP_ATOMIC);
break;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 60b0d1652448..021e5aef6ba3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -482,7 +482,8 @@ int ip6_forward(struct sk_buff *skb)
send redirects to source routed frames.
We don't send redirects to frames decapsulated from IPsec.
*/
- if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
+ if (IP6CB(skb)->iif == dst->dev->ifindex &&
+ opt->srcrt == 0 && !skb_sec_path(skb)) {
struct in6_addr *target = NULL;
struct inet_peer *peer;
struct rt6_info *rt;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index da66aaac51ce..00e138a44cbb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1692,8 +1692,13 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
}
- if (new_mtu > 0xFFF8 - dev->hard_header_len)
- return -EINVAL;
+ if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) {
+ if (new_mtu > IP6_MAX_MTU - dev->hard_header_len)
+ return -EINVAL;
+ } else {
+ if (new_mtu > IP_MAX_MTU - dev->hard_header_len)
+ return -EINVAL;
+ }
dev->mtu = new_mtu;
return 0;
}
@@ -1841,7 +1846,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)
if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
dev->mtu -= 8;
dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = 0xFFF8 - dev->hard_header_len;
+ dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len;
return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 20a419ee8000..058fc05e5708 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -228,8 +228,8 @@ static int __net_init ip6mr_rules_init(struct net *net)
INIT_LIST_HEAD(&net->ipv6.mr6_tables);
mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
- if (!mrt) {
- err = -ENOMEM;
+ if (IS_ERR(mrt)) {
+ err = PTR_ERR(mrt);
goto err1;
}
@@ -302,8 +302,13 @@ static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
static int __net_init ip6mr_rules_init(struct net *net)
{
- net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
- return net->ipv6.mrt6 ? 0 : -ENOMEM;
+ struct mr_table *mrt;
+
+ mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
+ if (IS_ERR(mrt))
+ return PTR_ERR(mrt);
+ net->ipv6.mrt6 = mrt;
+ return 0;
}
static void __net_exit ip6mr_rules_exit(struct net *net)
@@ -1758,9 +1763,11 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
rtnl_lock();
ret = 0;
- if (!ip6mr_new_table(net, v))
- ret = -ENOMEM;
- raw6_sk(sk)->ip6mr_table = v;
+ mrt = ip6mr_new_table(net, v);
+ if (IS_ERR(mrt))
+ ret = PTR_ERR(mrt);
+ else
+ raw6_sk(sk)->ip6mr_table = v;
rtnl_unlock();
return ret;
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 9ac5366064e3..e640d2f3c55c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1578,6 +1578,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL;
bool ret;
+ if (netif_is_l3_master(skb->dev)) {
+ dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif);
+ if (!dev)
+ return;
+ }
+
if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n",
dev->name);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index ce77bcc2490c..37b14dc9d863 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -29,7 +29,10 @@ config NF_SOCKET_IPV6
tristate "IPv6 socket lookup support"
help
This option enables the IPv6 socket lookup infrastructure. This
- is used by the ip6tables socket match.
+ is used by the {ip6,nf}tables socket match.
+
+config NF_TPROXY_IPV6
+ tristate "IPv6 tproxy support"
if NF_TABLES
@@ -136,10 +139,7 @@ config NF_NAT_IPV6
if NF_NAT_IPV6
config NF_NAT_MASQUERADE_IPV6
- tristate "IPv6 masquerade support"
- help
- This is the kernel functionality to provide NAT in the masquerade
- flavour (automatic source address selection) for IPv6.
+ bool
endif # NF_NAT_IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 44273d6f03a5..10a5a1c87320 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -18,14 +18,15 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o
obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o
nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
+nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
-obj-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
# defrag
nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o
obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
obj-$(CONFIG_NF_SOCKET_IPV6) += nf_socket_ipv6.o
+obj-$(CONFIG_NF_TPROXY_IPV6) += nf_tproxy_ipv6.o
# logging
obj-$(CONFIG_NF_LOG_IPV6) += nf_log_ipv6.o
diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
index 9dfc2b90c362..e6eb7cf9b54f 100644
--- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c
@@ -10,7 +10,6 @@
*/
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/atomic.h>
#include <linux/netdevice.h>
#include <linux/ipv6.h>
@@ -186,6 +185,3 @@ void nf_nat_masquerade_ipv6_unregister_notifier(void)
unregister_netdevice_notifier(&masq_dev_notifier);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6_unregister_notifier);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
new file mode 100644
index 000000000000..bf1d6c421e3b
--- /dev/null
+++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
@@ -0,0 +1,146 @@
+#include <net/netfilter/nf_tproxy.h>
+#include <linux/module.h>
+#include <net/inet6_hashtables.h>
+#include <net/addrconf.h>
+#include <net/udp.h>
+#include <net/tcp.h>
+
+const struct in6_addr *
+nf_tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
+ const struct in6_addr *daddr)
+{
+ struct inet6_dev *indev;
+ struct inet6_ifaddr *ifa;
+ struct in6_addr *laddr;
+
+ if (!ipv6_addr_any(user_laddr))
+ return user_laddr;
+ laddr = NULL;
+
+ indev = __in6_dev_get(skb->dev);
+ if (indev) {
+ read_lock_bh(&indev->lock);
+ list_for_each_entry(ifa, &indev->addr_list, if_list) {
+ if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
+ continue;
+
+ laddr = &ifa->addr;
+ break;
+ }
+ read_unlock_bh(&indev->lock);
+ }
+
+ return laddr ? laddr : daddr;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_laddr6);
+
+struct sock *
+nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
+ struct net *net,
+ const struct in6_addr *laddr,
+ const __be16 lport,
+ struct sock *sk)
+{
+ const struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct tcphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
+ if (hp == NULL) {
+ inet_twsk_put(inet_twsk(sk));
+ return NULL;
+ }
+
+ if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
+ /* SYN to a TIME_WAIT socket, we'd rather redirect it
+ * to a listener socket if there's one */
+ struct sock *sk2;
+
+ sk2 = nf_tproxy_get_sock_v6(net, skb, thoff, hp, tproto,
+ &iph->saddr,
+ nf_tproxy_laddr6(skb, laddr, &iph->daddr),
+ hp->source,
+ lport ? lport : hp->dest,
+ skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+ if (sk2) {
+ inet_twsk_deschedule_put(inet_twsk(sk));
+ sk = sk2;
+ }
+ }
+
+ return sk;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_handle_time_wait6);
+
+struct sock *
+nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
+ const u8 protocol,
+ const struct in6_addr *saddr, const struct in6_addr *daddr,
+ const __be16 sport, const __be16 dport,
+ const struct net_device *in,
+ const enum nf_tproxy_lookup_t lookup_type)
+{
+ struct sock *sk;
+ struct tcphdr *tcph;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ switch (lookup_type) {
+ case NF_TPROXY_LOOKUP_LISTENER:
+ tcph = hp;
+ sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
+ thoff + __tcp_hdrlen(tcph),
+ saddr, sport,
+ daddr, ntohs(dport),
+ in->ifindex, 0);
+
+ if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
+ sk = NULL;
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ break;
+ case NF_TPROXY_LOOKUP_ESTABLISHED:
+ sk = __inet6_lookup_established(net, &tcp_hashinfo,
+ saddr, sport, daddr, ntohs(dport),
+ in->ifindex, 0);
+ break;
+ default:
+ BUG();
+ }
+ break;
+ case IPPROTO_UDP:
+ sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
+ in->ifindex);
+ if (sk) {
+ int connected = (sk->sk_state == TCP_ESTABLISHED);
+ int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
+
+ /* NOTE: we return listeners even if bound to
+ * 0.0.0.0, those are filtered out in
+ * xt_socket, since xt_TPROXY needs 0 bound
+ * listeners too
+ */
+ if ((lookup_type == NF_TPROXY_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
+ (lookup_type == NF_TPROXY_LOOKUP_LISTENER && connected)) {
+ sock_put(sk);
+ sk = NULL;
+ }
+ }
+ break;
+ default:
+ WARN_ON(1);
+ sk = NULL;
+ }
+
+ pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
+ protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
+
+ return sk;
+}
+EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v6);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Balazs Scheidler, Krisztian Kovacs");
+MODULE_DESCRIPTION("Netfilter IPv4 transparent proxy support");
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 22c4de2317d0..1dc98715c78b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1981,7 +1981,7 @@ out:
} else {
keys->addrs.v6addrs.src = key_iph->saddr;
keys->addrs.v6addrs.dst = key_iph->daddr;
- keys->tags.flow_label = ip6_flowinfo(key_iph);
+ keys->tags.flow_label = ip6_flowlabel(key_iph);
keys->basic.ip_proto = key_iph->nexthdr;
}
}
@@ -2002,7 +2002,7 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
} else {
hash_keys.addrs.v6addrs.src = fl6->saddr;
hash_keys.addrs.v6addrs.dst = fl6->daddr;
- hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+ hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6);
hash_keys.basic.ip_proto = fl6->flowi6_proto;
}
break;
@@ -4412,13 +4412,17 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
err_nh = NULL;
list_for_each_entry(nh, &rt6_nh_list, next) {
- rt_last = nh->fib6_info;
err = __ip6_ins_rt(nh->fib6_info, info, extack);
fib6_info_release(nh->fib6_info);
- /* save reference to first route for notification */
- if (!rt_notif && !err)
- rt_notif = nh->fib6_info;
+ if (!err) {
+ /* save reference to last route successfully inserted */
+ rt_last = nh->fib6_info;
+
+ /* save reference to first route for notification */
+ if (!rt_notif)
+ rt_notif = nh->fib6_info;
+ }
/* nh->fib6_info is used or freed at this point, reset to NULL*/
nh->fib6_info = NULL;
diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c
index 7f5621d09571..0fdf2a55e746 100644
--- a/net/ipv6/seg6.c
+++ b/net/ipv6/seg6.c
@@ -226,7 +226,6 @@ static int seg6_genl_get_tunsrc(struct sk_buff *skb, struct genl_info *info)
nla_put_failure:
rcu_read_unlock();
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -ENOMEM;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index eab39bd91548..19ccf0dc996c 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -122,7 +122,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = skb_cow_head(skb, tot_len);
+ err = skb_cow_head(skb, tot_len + skb->mac_len);
if (unlikely(err))
return err;
@@ -181,7 +181,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen);
+ err = skb_cow_head(skb, hdrlen + skb->mac_len);
if (unlikely(err))
return err;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 2afce37a7177..e9400ffa7875 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1371,7 +1371,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
dev->hard_header_len = LL_MAX_HEADER + t_hlen;
dev->mtu = ETH_DATA_LEN - t_hlen;
dev->min_mtu = IPV6_MIN_MTU;
- dev->max_mtu = 0xFFF8 - t_hlen;
+ dev->max_mtu = IP6_MAX_MTU - t_hlen;
dev->flags = IFF_NOARP;
netif_keep_dst(dev);
dev->addr_len = 4;
@@ -1583,7 +1583,8 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
if (tb[IFLA_MTU]) {
u32 mtu = nla_get_u32(tb[IFLA_MTU]);
- if (mtu >= IPV6_MIN_MTU && mtu <= 0xFFF8 - dev->hard_header_len)
+ if (mtu >= IPV6_MIN_MTU &&
+ mtu <= IP6_MAX_MTU - dev->hard_header_len)
dev->mtu = mtu;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 7d47c2b550a9..8764a63abd91 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1322,7 +1322,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
}
}
- tcp_rcv_established(sk, skb, tcp_hdr(skb));
+ tcp_rcv_established(sk, skb);
if (opt_skb)
goto ipv6_pktoptions;
return 0;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 9f729a7b8cf0..19d7d4c24dfb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -285,9 +285,7 @@ EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb);
/* Must be called under rcu_read_lock().
* Does increment socket refcount.
*/
-#if IS_ENABLED(CONFIG_NETFILTER_XT_MATCH_SOCKET) || \
- IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TPROXY) || \
- IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
+#if IS_ENABLED(CONFIG_NF_TPROXY_IPV6) || IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport,
const struct in6_addr *daddr, __be16 dport, int dif)
{
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 2cff209d0fc1..ef3defaf43b9 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -124,7 +124,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
struct flowi6 *fl6 = &fl->u.ip6;
int onlyproto = 0;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
- u16 offset = sizeof(*hdr);
+ u32 offset = sizeof(*hdr);
struct ipv6_opt_hdr *exthdr;
const unsigned char *nh = skb_network_header(skb);
u16 nhoff = IP6CB(skb)->nhoff;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index dc76bc346829..d3601d421571 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1671,7 +1671,7 @@ static struct file *kcm_clone(struct socket *osock)
__module_get(newsock->ops->owner);
newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
- &kcm_proto, true);
+ &kcm_proto, false);
if (!newsk) {
sock_release(newsock);
return ERR_PTR(-ENOMEM);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index f951c768dcf2..f1c9c327f674 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -428,16 +428,6 @@ static void pppol2tp_put_sk(struct rcu_head *head)
*/
static void pppol2tp_session_close(struct l2tp_session *session)
{
- struct pppol2tp_session *ps;
-
- ps = l2tp_session_priv(session);
- mutex_lock(&ps->sk_lock);
- ps->__sk = rcu_dereference_protected(ps->sk,
- lockdep_is_held(&ps->sk_lock));
- RCU_INIT_POINTER(ps->sk, NULL);
- if (ps->__sk)
- call_rcu(&ps->rcu, pppol2tp_put_sk);
- mutex_unlock(&ps->sk_lock);
}
/* Really kill the session socket. (Called from sock_put() if
@@ -480,15 +470,24 @@ static int pppol2tp_release(struct socket *sock)
sock_orphan(sk);
sock->sk = NULL;
- /* If the socket is associated with a session,
- * l2tp_session_delete will call pppol2tp_session_close which
- * will drop the session's ref on the socket.
- */
session = pppol2tp_sock_to_session(sk);
if (session) {
+ struct pppol2tp_session *ps;
+
l2tp_session_delete(session);
- /* drop the ref obtained by pppol2tp_sock_to_session */
- sock_put(sk);
+
+ ps = l2tp_session_priv(session);
+ mutex_lock(&ps->sk_lock);
+ ps->__sk = rcu_dereference_protected(ps->sk,
+ lockdep_is_held(&ps->sk_lock));
+ RCU_INIT_POINTER(ps->sk, NULL);
+ mutex_unlock(&ps->sk_lock);
+ call_rcu(&ps->rcu, pppol2tp_put_sk);
+
+ /* Rely on the sock_put() call at the end of the function for
+ * dropping the reference held by pppol2tp_sock_to_session().
+ * The last reference will be dropped by pppol2tp_put_sk().
+ */
}
release_sock(sk);
@@ -742,7 +741,8 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
*/
mutex_lock(&ps->sk_lock);
if (rcu_dereference_protected(ps->sk,
- lockdep_is_held(&ps->sk_lock))) {
+ lockdep_is_held(&ps->sk_lock)) ||
+ ps->__sk) {
mutex_unlock(&ps->sk_lock);
error = -EEXIST;
goto end;
@@ -803,7 +803,6 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
out_no_ppp:
/* This is how we get the session context from the socket. */
- sock_hold(sk);
sk->sk_user_data = session;
rcu_assign_pointer(ps->sk, sk);
mutex_unlock(&ps->sk_lock);
diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c
index b09ef77bf4cd..82e6edf9c5d9 100644
--- a/net/ncsi/ncsi-netlink.c
+++ b/net/ncsi/ncsi-netlink.c
@@ -201,7 +201,6 @@ static int ncsi_pkg_info_nl(struct sk_buff *msg, struct genl_info *info)
return genlmsg_reply(skb, info);
err:
- genlmsg_cancel(skb, hdr);
kfree_skb(skb);
return rc;
}
@@ -209,7 +208,7 @@ err:
static int ncsi_pkg_info_all_nl(struct sk_buff *skb,
struct netlink_callback *cb)
{
- struct nlattr *attrs[NCSI_ATTR_MAX];
+ struct nlattr *attrs[NCSI_ATTR_MAX + 1];
struct ncsi_package *np, *package;
struct ncsi_dev_priv *ndp;
unsigned int package_id;
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index a6b7c7d5c829..930c1d3796f0 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -652,7 +652,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
NCSI_CAP_VLAN_MASK;
size = (rsp->uc_cnt + rsp->mc_cnt + rsp->mixed_cnt) * ETH_ALEN;
- nc->mac_filter.addrs = kzalloc(size, GFP_KERNEL);
+ nc->mac_filter.addrs = kzalloc(size, GFP_ATOMIC);
if (!nc->mac_filter.addrs)
return -ENOMEM;
nc->mac_filter.n_uc = rsp->uc_cnt;
@@ -661,7 +661,7 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr)
nc->vlan_filter.vids = kcalloc(rsp->vlan_cnt,
sizeof(*nc->vlan_filter.vids),
- GFP_KERNEL);
+ GFP_ATOMIC);
if (!nc->vlan_filter.vids)
return -ENOMEM;
/* Set VLAN filters active so they are cleared in the first
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index a5b60e6a983e..dbd7d1fad277 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -433,11 +433,7 @@ config NF_NAT_TFTP
default NF_NAT && NF_CONNTRACK_TFTP
config NF_NAT_REDIRECT
- tristate "IPv4/IPv6 redirect support"
- depends on NF_NAT
- help
- This is the kernel functionality to redirect packets to local
- machine through NAT.
+ bool
config NETFILTER_SYNPROXY
tristate
@@ -521,6 +517,15 @@ config NFT_COUNTER
This option adds the "counter" expression that you can use to
include packet and byte counters in a rule.
+config NFT_CONNLIMIT
+ tristate "Netfilter nf_tables connlimit module"
+ depends on NF_CONNTRACK
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_CONNCOUNT
+ help
+ This option adds the "connlimit" expression that you can use to
+ ratelimit rule matchings per connections.
+
config NFT_LOG
tristate "Netfilter nf_tables log module"
help
@@ -617,6 +622,15 @@ config NFT_FIB_INET
The lookup will be delegated to the IPv4 or IPv6 FIB depending
on the protocol of the packet.
+config NFT_SOCKET
+ tristate "Netfilter nf_tables socket match support"
+ depends on IPV6 || IPV6=n
+ select NF_SOCKET_IPV4
+ select NF_SOCKET_IPV6 if IPV6
+ help
+ This option allows matching for the presence or absence of a
+ corresponding socket and its attributes.
+
if NF_TABLES_NETDEV
config NF_DUP_NETDEV
@@ -984,6 +998,8 @@ config NETFILTER_XT_TARGET_TPROXY
depends on IP_NF_MANGLE
select NF_DEFRAG_IPV4
select NF_DEFRAG_IPV6 if IP6_NF_IPTABLES != n
+ select NF_TPROXY_IPV4
+ select NF_TPROXY_IPV6 if IP6_NF_IPTABLES
help
This option adds a `TPROXY' target, which is somewhat similar to
REDIRECT. It can only be used in the mangle table and is useful
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1aa710b5d384..44449389e527 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -55,7 +55,7 @@ obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
obj-$(CONFIG_NF_LOG_NETDEV) += nf_log_netdev.o
obj-$(CONFIG_NF_NAT) += nf_nat.o
-obj-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
+nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o
# NAT helpers
obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
@@ -80,6 +80,7 @@ nf_tables-objs := nf_tables_core.o nf_tables_api.o nft_chain_filter.o \
obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
+obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o
obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o
obj-$(CONFIG_NFT_CT) += nft_ct.o
obj-$(CONFIG_NFT_FLOW_OFFLOAD) += nft_flow_offload.o
@@ -102,6 +103,7 @@ obj-$(CONFIG_NFT_FIB) += nft_fib.o
obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o
obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o
obj-$(CONFIG_NF_OSF) += nf_osf.o
+obj-$(CONFIG_NFT_SOCKET) += nft_socket.o
# nf_tables netdev
obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 1c98c907bc63..12d74896556a 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -355,7 +355,8 @@ static inline void vs_seq_update(struct ip_vs_conn *cp, struct ip_vs_seq *vseq,
}
static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
- struct ip_vs_app *app)
+ struct ip_vs_app *app,
+ struct ip_vs_iphdr *ipvsh)
{
int diff;
const unsigned int tcp_offset = ip_hdrlen(skb);
@@ -386,7 +387,7 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
if (app->pkt_out == NULL)
return 1;
- if (!app->pkt_out(app, cp, skb, &diff))
+ if (!app->pkt_out(app, cp, skb, &diff, ipvsh))
return 0;
/*
@@ -404,7 +405,8 @@ static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
* called by ipvs packet handler, assumes previously checked cp!=NULL
* returns false if it can't handle packet (oom)
*/
-int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
+int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
+ struct ip_vs_iphdr *ipvsh)
{
struct ip_vs_app *app;
@@ -417,7 +419,7 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
- return app_tcp_pkt_out(cp, skb, app);
+ return app_tcp_pkt_out(cp, skb, app, ipvsh);
/*
* Call private output hook function
@@ -425,12 +427,13 @@ int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
if (app->pkt_out == NULL)
return 1;
- return app->pkt_out(app, cp, skb, NULL);
+ return app->pkt_out(app, cp, skb, NULL, ipvsh);
}
static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
- struct ip_vs_app *app)
+ struct ip_vs_app *app,
+ struct ip_vs_iphdr *ipvsh)
{
int diff;
const unsigned int tcp_offset = ip_hdrlen(skb);
@@ -461,7 +464,7 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
if (app->pkt_in == NULL)
return 1;
- if (!app->pkt_in(app, cp, skb, &diff))
+ if (!app->pkt_in(app, cp, skb, &diff, ipvsh))
return 0;
/*
@@ -479,7 +482,8 @@ static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
* called by ipvs packet handler, assumes previously checked cp!=NULL.
* returns false if can't handle packet (oom).
*/
-int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
+int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
+ struct ip_vs_iphdr *ipvsh)
{
struct ip_vs_app *app;
@@ -492,7 +496,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
/* TCP is complicated */
if (cp->protocol == IPPROTO_TCP)
- return app_tcp_pkt_in(cp, skb, app);
+ return app_tcp_pkt_in(cp, skb, app, ipvsh);
/*
* Call private input hook function
@@ -500,7 +504,7 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
if (app->pkt_in == NULL)
return 1;
- return app->pkt_in(app, cp, skb, NULL);
+ return app->pkt_in(app, cp, skb, NULL, ipvsh);
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index d4f68d0f7df7..af89bb5ffac7 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2385,8 +2385,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
struct ipvs_sync_daemon_cfg cfg;
memset(&cfg, 0, sizeof(cfg));
- strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
- sizeof(cfg.mcast_ifn));
+ ret = -EINVAL;
+ if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
+ sizeof(cfg.mcast_ifn)) <= 0)
+ goto out_dec;
cfg.syncid = dm->syncid;
ret = start_sync_thread(ipvs, &cfg, dm->state);
} else {
@@ -2424,12 +2426,19 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
}
}
+ if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) &&
+ strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) ==
+ IP_VS_SCHEDNAME_MAXLEN) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
usvc.protocol != IPPROTO_SCTP) {
- pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
+ pr_err("set_ctl: invalid protocol: %d %pI4:%d\n",
usvc.protocol, &usvc.addr.ip,
- ntohs(usvc.port), usvc.sched_name);
+ ntohs(usvc.port));
ret = -EFAULT;
goto out_unlock;
}
@@ -2851,7 +2860,7 @@ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
[IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
[IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
- .len = IP_VS_IFNAME_MAXLEN },
+ .len = IP_VS_IFNAME_MAXLEN - 1 },
[IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
[IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 },
[IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 },
@@ -2869,7 +2878,7 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
[IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
[IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
[IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
- .len = IP_VS_SCHEDNAME_MAXLEN },
+ .len = IP_VS_SCHEDNAME_MAXLEN - 1 },
[IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
.len = IP_VS_PENAME_MAXLEN },
[IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 58d5d05aec24..4398a72edec5 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -29,6 +29,8 @@
#include <linux/moduleparam.h>
#include <linux/kernel.h>
#include <linux/skbuff.h>
+#include <linux/ctype.h>
+#include <linux/inet.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
@@ -44,9 +46,18 @@
#include <net/ip_vs.h>
-#define SERVER_STRING "227 "
-#define CLIENT_STRING "PORT"
+#define SERVER_STRING_PASV "227 "
+#define CLIENT_STRING_PORT "PORT"
+#define SERVER_STRING_EPSV "229 "
+#define CLIENT_STRING_EPRT "EPRT"
+enum {
+ IP_VS_FTP_ACTIVE = 0,
+ IP_VS_FTP_PORT = 0,
+ IP_VS_FTP_PASV,
+ IP_VS_FTP_EPRT,
+ IP_VS_FTP_EPSV,
+};
/*
* List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper
@@ -58,9 +69,15 @@ module_param_array(ports, ushort, &ports_count, 0444);
MODULE_PARM_DESC(ports, "Ports to monitor for FTP control commands");
-/* Dummy variable */
-static int ip_vs_ftp_pasv;
+static char *ip_vs_ftp_data_ptr(struct sk_buff *skb, struct ip_vs_iphdr *ipvsh)
+{
+ struct tcphdr *th = (struct tcphdr *)((char *)skb->data + ipvsh->len);
+
+ if ((th->doff << 2) < sizeof(struct tcphdr))
+ return NULL;
+ return (char *)th + (th->doff << 2);
+}
static int
ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
@@ -78,20 +95,20 @@ ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp)
}
-/*
- * Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
- * with the "pattern", ignoring before "skip" and terminated with
- * the "term" character.
- * <addr,port> is in network order.
+/* Get <addr,port> from the string "xxx.xxx.xxx.xxx,ppp,ppp", started
+ * with the "pattern". <addr,port> is in network order.
+ * Parse extended format depending on ext. In this case addr can be pre-set.
*/
static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
const char *pattern, size_t plen,
- char skip, char term,
- __be32 *addr, __be16 *port,
- char **start, char **end)
+ char skip, bool ext, int mode,
+ union nf_inet_addr *addr, __be16 *port,
+ __u16 af, char **start, char **end)
{
char *s, c;
unsigned char p[6];
+ char edelim;
+ __u16 hport;
int i = 0;
if (data_limit - data < plen) {
@@ -113,6 +130,11 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
if (s == data_limit)
return -1;
if (!found) {
+ /* "(" is optional for non-extended format,
+ * so catch the start of IPv4 address
+ */
+ if (!ext && isdigit(*s))
+ break;
if (*s == skip)
found = 1;
} else if (*s != skip) {
@@ -120,41 +142,102 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
}
}
}
+ /* Old IPv4-only format? */
+ if (!ext) {
+ p[0] = 0;
+ for (data = s; ; data++) {
+ if (data == data_limit)
+ return -1;
+ c = *data;
+ if (isdigit(c)) {
+ p[i] = p[i]*10 + c - '0';
+ } else if (c == ',' && i < 5) {
+ i++;
+ p[i] = 0;
+ } else {
+ /* unexpected character or terminator */
+ break;
+ }
+ }
- for (data = s; ; data++) {
- if (data == data_limit)
+ if (i != 5)
return -1;
- if (*data == term)
- break;
+
+ *start = s;
+ *end = data;
+ addr->ip = get_unaligned((__be32 *) p);
+ *port = get_unaligned((__be16 *) (p + 4));
+ return 1;
}
- *end = data;
+ if (s == data_limit)
+ return -1;
+ *start = s;
+ edelim = *s++;
+ if (edelim < 33 || edelim > 126)
+ return -1;
+ if (s == data_limit)
+ return -1;
+ if (*s == edelim) {
+ /* Address family is usually missing for EPSV response */
+ if (mode != IP_VS_FTP_EPSV)
+ return -1;
+ s++;
+ if (s == data_limit)
+ return -1;
+ /* Then address should be missing too */
+ if (*s != edelim)
+ return -1;
+ /* Caller can pre-set addr, if needed */
+ s++;
+ } else {
+ const char *ep;
- memset(p, 0, sizeof(p));
- for (data = s; ; data++) {
- c = *data;
- if (c == term)
- break;
- if (c >= '0' && c <= '9') {
- p[i] = p[i]*10 + c - '0';
- } else if (c == ',' && i < 5) {
- i++;
- } else {
- /* unexpected character */
+ /* We allow address only from same family */
+ if (af == AF_INET6 && *s != '2')
return -1;
+ if (af == AF_INET && *s != '1')
+ return -1;
+ s++;
+ if (s == data_limit)
+ return -1;
+ if (*s != edelim)
+ return -1;
+ s++;
+ if (s == data_limit)
+ return -1;
+ if (af == AF_INET6) {
+ if (in6_pton(s, data_limit - s, (u8 *)addr, edelim,
+ &ep) <= 0)
+ return -1;
+ } else {
+ if (in4_pton(s, data_limit - s, (u8 *)addr, edelim,
+ &ep) <= 0)
+ return -1;
}
+ s = (char *) ep;
+ if (s == data_limit)
+ return -1;
+ if (*s != edelim)
+ return -1;
+ s++;
}
-
- if (i != 5)
+ for (hport = 0; ; s++)
+ {
+ if (s == data_limit)
+ return -1;
+ if (!isdigit(*s))
+ break;
+ hport = hport * 10 + *s - '0';
+ }
+ if (s == data_limit || !hport || *s != edelim)
return -1;
-
- *start = s;
- *addr = get_unaligned((__be32 *) p);
- *port = get_unaligned((__be16 *) (p + 4));
+ s++;
+ *end = s;
+ *port = htons(hport);
return 1;
}
-/*
- * Look at outgoing ftp packets to catch the response to a PASV command
+/* Look at outgoing ftp packets to catch the response to a PASV/EPSV command
* from the server (inside-to-outside).
* When we see one, we build a connection entry with the client address,
* client port 0 (unknown at the moment), the server address and the
@@ -165,12 +248,13 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit,
* The outgoing packet should be something like
* "227 Entering Passive Mode (xxx,xxx,xxx,xxx,ppp,ppp)".
* xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
+ * The extended format for EPSV response provides usually only port:
+ * "229 Entering Extended Passive Mode (|||ppp|)"
*/
static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
- struct sk_buff *skb, int *diff)
+ struct sk_buff *skb, int *diff,
+ struct ip_vs_iphdr *ipvsh)
{
- struct iphdr *iph;
- struct tcphdr *th;
char *data, *data_limit;
char *start, *end;
union nf_inet_addr from;
@@ -184,14 +268,6 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
*diff = 0;
-#ifdef CONFIG_IP_VS_IPV6
- /* This application helper doesn't work with IPv6 yet,
- * so turn this into a no-op for IPv6 packets
- */
- if (cp->af == AF_INET6)
- return 1;
-#endif
-
/* Only useful for established sessions */
if (cp->state != IP_VS_TCP_S_ESTABLISHED)
return 1;
@@ -200,53 +276,77 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
if (!skb_make_writable(skb, skb->len))
return 0;
- if (cp->app_data == &ip_vs_ftp_pasv) {
- iph = ip_hdr(skb);
- th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
- data = (char *)th + (th->doff << 2);
+ if (cp->app_data == (void *) IP_VS_FTP_PASV) {
+ data = ip_vs_ftp_data_ptr(skb, ipvsh);
data_limit = skb_tail_pointer(skb);
+ if (!data || data >= data_limit)
+ return 1;
+
if (ip_vs_ftp_get_addrport(data, data_limit,
- SERVER_STRING,
- sizeof(SERVER_STRING)-1,
- '(', ')',
- &from.ip, &port,
+ SERVER_STRING_PASV,
+ sizeof(SERVER_STRING_PASV)-1,
+ '(', false, IP_VS_FTP_PASV,
+ &from, &port, cp->af,
&start, &end) != 1)
return 1;
- IP_VS_DBG(7, "PASV response (%pI4:%d) -> %pI4:%d detected\n",
+ IP_VS_DBG(7, "PASV response (%pI4:%u) -> %pI4:%u detected\n",
&from.ip, ntohs(port), &cp->caddr.ip, 0);
+ } else if (cp->app_data == (void *) IP_VS_FTP_EPSV) {
+ data = ip_vs_ftp_data_ptr(skb, ipvsh);
+ data_limit = skb_tail_pointer(skb);
- /*
- * Now update or create an connection entry for it
+ if (!data || data >= data_limit)
+ return 1;
+
+ /* Usually, data address is not specified but
+ * we support different address, so pre-set it.
*/
- {
- struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(cp->ipvs, AF_INET,
- iph->protocol, &from, port,
- &cp->caddr, 0, &p);
- n_cp = ip_vs_conn_out_get(&p);
- }
- if (!n_cp) {
- struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(cp->ipvs,
- AF_INET, IPPROTO_TCP, &cp->caddr,
- 0, &cp->vaddr, port, &p);
- /* As above, this is ipv4 only */
- n_cp = ip_vs_conn_new(&p, AF_INET, &from, port,
- IP_VS_CONN_F_NO_CPORT |
- IP_VS_CONN_F_NFCT,
- cp->dest, skb->mark);
- if (!n_cp)
- return 0;
+ from = cp->daddr;
+ if (ip_vs_ftp_get_addrport(data, data_limit,
+ SERVER_STRING_EPSV,
+ sizeof(SERVER_STRING_EPSV)-1,
+ '(', true, IP_VS_FTP_EPSV,
+ &from, &port, cp->af,
+ &start, &end) != 1)
+ return 1;
- /* add its controller */
- ip_vs_control_add(n_cp, cp);
- }
+ IP_VS_DBG_BUF(7, "EPSV response (%s:%u) -> %s:%u detected\n",
+ IP_VS_DBG_ADDR(cp->af, &from), ntohs(port),
+ IP_VS_DBG_ADDR(cp->af, &cp->caddr), 0);
+ } else {
+ return 1;
+ }
- /*
- * Replace the old passive address with the new one
- */
+ /* Now update or create a connection entry for it */
+ {
+ struct ip_vs_conn_param p;
+
+ ip_vs_conn_fill_param(cp->ipvs, cp->af,
+ ipvsh->protocol, &from, port,
+ &cp->caddr, 0, &p);
+ n_cp = ip_vs_conn_out_get(&p);
+ }
+ if (!n_cp) {
+ struct ip_vs_conn_param p;
+
+ ip_vs_conn_fill_param(cp->ipvs,
+ cp->af, ipvsh->protocol, &cp->caddr,
+ 0, &cp->vaddr, port, &p);
+ n_cp = ip_vs_conn_new(&p, cp->af, &from, port,
+ IP_VS_CONN_F_NO_CPORT |
+ IP_VS_CONN_F_NFCT,
+ cp->dest, skb->mark);
+ if (!n_cp)
+ return 0;
+
+ /* add its controller */
+ ip_vs_control_add(n_cp, cp);
+ }
+
+ /* Replace the old passive address with the new one */
+ if (cp->app_data == (void *) IP_VS_FTP_PASV) {
from.ip = n_cp->vaddr.ip;
port = n_cp->vport;
snprintf(buf, sizeof(buf), "%u,%u,%u,%u,%u,%u",
@@ -256,50 +356,54 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
((unsigned char *)&from.ip)[3],
ntohs(port) >> 8,
ntohs(port) & 0xFF);
+ } else if (cp->app_data == (void *) IP_VS_FTP_EPSV) {
+ from = n_cp->vaddr;
+ port = n_cp->vport;
+ /* Only port, client will use VIP for the data connection */
+ snprintf(buf, sizeof(buf), "|||%u|",
+ ntohs(port));
+ } else {
+ *buf = 0;
+ }
+ buf_len = strlen(buf);
- buf_len = strlen(buf);
-
- ct = nf_ct_get(skb, &ctinfo);
- if (ct) {
- bool mangled;
-
- /* If mangling fails this function will return 0
- * which will cause the packet to be dropped.
- * Mangling can only fail under memory pressure,
- * hopefully it will succeed on the retransmitted
- * packet.
- */
- mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
- iph->ihl * 4,
- start - data,
- end - start,
- buf, buf_len);
- if (mangled) {
- ip_vs_nfct_expect_related(skb, ct, n_cp,
- IPPROTO_TCP, 0, 0);
- if (skb->ip_summed == CHECKSUM_COMPLETE)
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- /* csum is updated */
- ret = 1;
- }
- }
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct) {
+ bool mangled;
- /*
- * Not setting 'diff' is intentional, otherwise the sequence
- * would be adjusted twice.
+ /* If mangling fails this function will return 0
+ * which will cause the packet to be dropped.
+ * Mangling can only fail under memory pressure,
+ * hopefully it will succeed on the retransmitted
+ * packet.
*/
-
- cp->app_data = NULL;
- ip_vs_tcp_conn_listen(n_cp);
- ip_vs_conn_put(n_cp);
- return ret;
+ mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ ipvsh->len,
+ start - data,
+ end - start,
+ buf, buf_len);
+ if (mangled) {
+ ip_vs_nfct_expect_related(skb, ct, n_cp,
+ ipvsh->protocol, 0, 0);
+ if (skb->ip_summed == CHECKSUM_COMPLETE)
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ /* csum is updated */
+ ret = 1;
+ }
}
- return 1;
+
+ /* Not setting 'diff' is intentional, otherwise the sequence
+ * would be adjusted twice.
+ */
+
+ cp->app_data = (void *) IP_VS_FTP_ACTIVE;
+ ip_vs_tcp_conn_listen(n_cp);
+ ip_vs_conn_put(n_cp);
+ return ret;
}
-/*
- * Look at incoming ftp packets to catch the PASV/PORT command
+/* Look at incoming ftp packets to catch the PASV/PORT/EPRT/EPSV command
* (outside-to-inside).
*
* The incoming packet having the PORT command should be something like
@@ -308,12 +412,19 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* In this case, we create a connection entry using the client address and
* port, so that the active ftp data connection from the server can reach
* the client.
+ * Extended format:
+ * "EPSV\r\n" when client requests server address from same family
+ * "EPSV 1\r\n" when client requests IPv4 server address
+ * "EPSV 2\r\n" when client requests IPv6 server address
+ * "EPSV ALL\r\n" - not supported
+ * EPRT with specified delimiter (ASCII 33..126), "|" by default:
+ * "EPRT |1|IPv4ADDR|PORT|\r\n" when client provides IPv4 addrport
+ * "EPRT |2|IPv6ADDR|PORT|\r\n" when client provides IPv6 addrport
*/
static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
- struct sk_buff *skb, int *diff)
+ struct sk_buff *skb, int *diff,
+ struct ip_vs_iphdr *ipvsh)
{
- struct iphdr *iph;
- struct tcphdr *th;
char *data, *data_start, *data_limit;
char *start, *end;
union nf_inet_addr to;
@@ -323,14 +434,6 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/* no diff required for incoming packets */
*diff = 0;
-#ifdef CONFIG_IP_VS_IPV6
- /* This application helper doesn't work with IPv6 yet,
- * so turn this into a no-op for IPv6 packets
- */
- if (cp->af == AF_INET6)
- return 1;
-#endif
-
/* Only useful for established sessions */
if (cp->state != IP_VS_TCP_S_ESTABLISHED)
return 1;
@@ -339,27 +442,48 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
if (!skb_make_writable(skb, skb->len))
return 0;
- /*
- * Detecting whether it is passive
- */
- iph = ip_hdr(skb);
- th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
-
- /* Since there may be OPTIONS in the TCP packet and the HLEN is
- the length of the header in 32-bit multiples, it is accurate
- to calculate data address by th+HLEN*4 */
- data = data_start = (char *)th + (th->doff << 2);
+ data = data_start = ip_vs_ftp_data_ptr(skb, ipvsh);
data_limit = skb_tail_pointer(skb);
+ if (!data || data >= data_limit)
+ return 1;
while (data <= data_limit - 6) {
- if (strncasecmp(data, "PASV\r\n", 6) == 0) {
+ if (cp->af == AF_INET &&
+ strncasecmp(data, "PASV\r\n", 6) == 0) {
/* Passive mode on */
IP_VS_DBG(7, "got PASV at %td of %td\n",
data - data_start,
data_limit - data_start);
- cp->app_data = &ip_vs_ftp_pasv;
+ cp->app_data = (void *) IP_VS_FTP_PASV;
return 1;
}
+
+ /* EPSV or EPSV<space><net-prt> */
+ if (strncasecmp(data, "EPSV", 4) == 0 &&
+ (data[4] == ' ' || data[4] == '\r')) {
+ if (data[4] == ' ') {
+ char proto = data[5];
+
+ if (data > data_limit - 7 || data[6] != '\r')
+ return 1;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6 && proto == '2') {
+ } else
+#endif
+ if (cp->af == AF_INET && proto == '1') {
+ } else {
+ return 1;
+ }
+ }
+ /* Extended Passive mode on */
+ IP_VS_DBG(7, "got EPSV at %td of %td\n",
+ data - data_start,
+ data_limit - data_start);
+ cp->app_data = (void *) IP_VS_FTP_EPSV;
+ return 1;
+ }
+
data++;
}
@@ -370,33 +494,52 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
* then create a new connection entry for the coming data
* connection.
*/
- if (ip_vs_ftp_get_addrport(data_start, data_limit,
- CLIENT_STRING, sizeof(CLIENT_STRING)-1,
- ' ', '\r', &to.ip, &port,
- &start, &end) != 1)
+ if (cp->af == AF_INET &&
+ ip_vs_ftp_get_addrport(data_start, data_limit,
+ CLIENT_STRING_PORT,
+ sizeof(CLIENT_STRING_PORT)-1,
+ ' ', false, IP_VS_FTP_PORT,
+ &to, &port, cp->af,
+ &start, &end) == 1) {
+
+ IP_VS_DBG(7, "PORT %pI4:%u detected\n", &to.ip, ntohs(port));
+
+ /* Now update or create a connection entry for it */
+ IP_VS_DBG(7, "protocol %s %pI4:%u %pI4:%u\n",
+ ip_vs_proto_name(ipvsh->protocol),
+ &to.ip, ntohs(port), &cp->vaddr.ip,
+ ntohs(cp->vport)-1);
+ } else if (ip_vs_ftp_get_addrport(data_start, data_limit,
+ CLIENT_STRING_EPRT,
+ sizeof(CLIENT_STRING_EPRT)-1,
+ ' ', true, IP_VS_FTP_EPRT,
+ &to, &port, cp->af,
+ &start, &end) == 1) {
+
+ IP_VS_DBG_BUF(7, "EPRT %s:%u detected\n",
+ IP_VS_DBG_ADDR(cp->af, &to), ntohs(port));
+
+ /* Now update or create a connection entry for it */
+ IP_VS_DBG_BUF(7, "protocol %s %s:%u %s:%u\n",
+ ip_vs_proto_name(ipvsh->protocol),
+ IP_VS_DBG_ADDR(cp->af, &to), ntohs(port),
+ IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
+ ntohs(cp->vport)-1);
+ } else {
return 1;
-
- IP_VS_DBG(7, "PORT %pI4:%d detected\n", &to.ip, ntohs(port));
+ }
/* Passive mode off */
- cp->app_data = NULL;
-
- /*
- * Now update or create a connection entry for it
- */
- IP_VS_DBG(7, "protocol %s %pI4:%d %pI4:%d\n",
- ip_vs_proto_name(iph->protocol),
- &to.ip, ntohs(port), &cp->vaddr.ip, 0);
+ cp->app_data = (void *) IP_VS_FTP_ACTIVE;
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(cp->ipvs, AF_INET,
- iph->protocol, &to, port, &cp->vaddr,
+ ip_vs_conn_fill_param(cp->ipvs, cp->af,
+ ipvsh->protocol, &to, port, &cp->vaddr,
htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
if (!n_cp) {
- /* This is ipv4 only */
- n_cp = ip_vs_conn_new(&p, AF_INET, &cp->daddr,
+ n_cp = ip_vs_conn_new(&p, cp->af, &cp->daddr,
htons(ntohs(cp->dport)-1),
IP_VS_CONN_F_NFCT, cp->dest,
skb->mark);
@@ -454,7 +597,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
ret = register_ip_vs_app_inc(ipvs, app, app->protocol, ports[i]);
if (ret)
goto err_unreg;
- pr_info("%s: loaded support on port[%d] = %d\n",
+ pr_info("%s: loaded support on port[%d] = %u\n",
app->name, i, ports[i]);
}
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 6cf3fd81a5ec..eb8b9c883889 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -67,15 +67,20 @@
#include <net/netfilter/nf_conntrack_zones.h>
-#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u"
-#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \
- &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \
+#define FMT_TUPLE "%s:%u->%s:%u/%u"
+#define ARG_TUPLE(T) IP_VS_DBG_ADDR((T)->src.l3num, &(T)->src.u3), \
+ ntohs((T)->src.u.all), \
+ IP_VS_DBG_ADDR((T)->src.l3num, &(T)->dst.u3), \
+ ntohs((T)->dst.u.all), \
(T)->dst.protonum
-#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u"
-#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \
- &((C)->vaddr.ip), ntohs((C)->vport), \
- &((C)->daddr.ip), ntohs((C)->dport), \
+#define FMT_CONN "%s:%u->%s:%u->%s:%u/%u:%u"
+#define ARG_CONN(C) IP_VS_DBG_ADDR((C)->af, &((C)->caddr)), \
+ ntohs((C)->cport), \
+ IP_VS_DBG_ADDR((C)->af, &((C)->vaddr)), \
+ ntohs((C)->vport), \
+ IP_VS_DBG_ADDR((C)->daf, &((C)->daddr)), \
+ ntohs((C)->dport), \
(C)->protocol, (C)->state
void
@@ -127,13 +132,17 @@ ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin)
new_tuple.dst.protonum != IPPROTO_ICMPV6)
new_tuple.dst.u.tcp.port = cp->vport;
}
- IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
- "ctinfo=%d, old reply=" FMT_TUPLE
- ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n",
- __func__, ct, ct->status, ctinfo,
- ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple),
- ARG_TUPLE(&new_tuple), ARG_CONN(cp));
+ IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
+ "ctinfo=%d, old reply=" FMT_TUPLE "\n",
+ __func__, ct, ct->status, ctinfo,
+ ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple));
+ IP_VS_DBG_BUF(7, "%s: Updating conntrack ct=%p, status=0x%lX, "
+ "ctinfo=%d, new reply=" FMT_TUPLE "\n",
+ __func__, ct, ct->status, ctinfo,
+ ARG_TUPLE(&new_tuple));
nf_conntrack_alter_reply(ct, &new_tuple);
+ IP_VS_DBG_BUF(7, "%s: Updated conntrack ct=%p for cp=" FMT_CONN "\n",
+ __func__, ct, ARG_CONN(cp));
}
int ip_vs_confirm_conntrack(struct sk_buff *skb)
@@ -152,9 +161,6 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
struct ip_vs_conn_param p;
struct net *net = nf_ct_net(ct);
- if (exp->tuple.src.l3num != PF_INET)
- return;
-
/*
* We assume that no NF locks are held before this callback.
* ip_vs_conn_out_get and ip_vs_conn_in_get should match their
@@ -171,19 +177,15 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
cp = ip_vs_conn_out_get(&p);
if (cp) {
/* Change reply CLIENT->RS to CLIENT->VS */
+ IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found inout cp="
+ FMT_CONN "\n",
+ __func__, ct, ct->status, ARG_CONN(cp));
new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
- FMT_TUPLE ", found inout cp=" FMT_CONN "\n",
- __func__, ct, ct->status,
- ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
- ARG_CONN(cp));
+ IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple="
+ FMT_TUPLE "\n",
+ __func__, ct, ARG_TUPLE(&new_reply));
new_reply.dst.u3 = cp->vaddr;
new_reply.dst.u.tcp.port = cp->vport;
- IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
- ", inout cp=" FMT_CONN "\n",
- __func__, ct,
- ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
- ARG_CONN(cp));
goto alter;
}
@@ -191,25 +193,21 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
cp = ip_vs_conn_in_get(&p);
if (cp) {
/* Change reply VS->CLIENT to RS->CLIENT */
+ IP_VS_DBG_BUF(7, "%s: for ct=%p, status=0x%lX found outin cp="
+ FMT_CONN "\n",
+ __func__, ct, ct->status, ARG_CONN(cp));
new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", "
- FMT_TUPLE ", found outin cp=" FMT_CONN "\n",
- __func__, ct, ct->status,
- ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
- ARG_CONN(cp));
+ IP_VS_DBG_BUF(7, "%s: ct=%p before alter: reply tuple="
+ FMT_TUPLE "\n",
+ __func__, ct, ARG_TUPLE(&new_reply));
new_reply.src.u3 = cp->daddr;
new_reply.src.u.tcp.port = cp->dport;
- IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", "
- FMT_TUPLE ", outin cp=" FMT_CONN "\n",
- __func__, ct,
- ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
- ARG_CONN(cp));
goto alter;
}
- IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
- " - unknown expect\n",
- __func__, ct, ct->status, ARG_TUPLE(orig));
+ IP_VS_DBG_BUF(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE
+ " - unknown expect\n",
+ __func__, ct, ct->status, ARG_TUPLE(orig));
return;
alter:
@@ -247,8 +245,8 @@ void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct,
exp->expectfn = ip_vs_nfct_expect_callback;
- IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
- __func__, ct, ARG_TUPLE(&exp->tuple));
+ IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
+ __func__, ct, ARG_TUPLE(&exp->tuple));
nf_ct_expect_related(exp);
nf_ct_expect_put(exp);
}
@@ -274,26 +272,25 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
tuple.dst.u3 = cp->vaddr;
tuple.dst.u.all = cp->vport;
- IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
- " for conn " FMT_CONN "\n",
- __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
+ IP_VS_DBG_BUF(7, "%s: dropping conntrack for conn " FMT_CONN "\n",
+ __func__, ARG_CONN(cp));
h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (nf_ct_kill(ct)) {
- IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple="
- FMT_TUPLE "\n",
- __func__, ct, ARG_TUPLE(&tuple));
+ IP_VS_DBG_BUF(7, "%s: ct=%p deleted for tuple="
+ FMT_TUPLE "\n",
+ __func__, ct, ARG_TUPLE(&tuple));
} else {
- IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
- FMT_TUPLE "\n",
- __func__, ct, ARG_TUPLE(&tuple));
+ IP_VS_DBG_BUF(7, "%s: ct=%p, no conntrack for tuple="
+ FMT_TUPLE "\n",
+ __func__, ct, ARG_TUPLE(&tuple));
}
nf_ct_put(ct);
} else {
- IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
- __func__, ARG_TUPLE(&tuple));
+ IP_VS_DBG_BUF(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
+ __func__, ARG_TUPLE(&tuple));
}
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index eff7569824e5..3250c4a1111e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -109,7 +109,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0;
/* Call application helper if needed */
- ret = ip_vs_app_pkt_out(cp, skb);
+ ret = ip_vs_app_pkt_out(cp, skb, iph);
if (ret == 0)
return 0;
/* ret=2: csum update is needed after payload mangling */
@@ -156,7 +156,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0;
/* Call application helper if needed */
- ret = ip_vs_app_pkt_in(cp, skb);
+ ret = ip_vs_app_pkt_in(cp, skb, iph);
if (ret == 0)
return 0;
/* ret=2: csum update is needed after payload mangling */
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 569631d2b2a1..80d10ad12a15 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -170,7 +170,7 @@ tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0;
/* Call application helper if needed */
- if (!(ret = ip_vs_app_pkt_out(cp, skb)))
+ if (!(ret = ip_vs_app_pkt_out(cp, skb, iph)))
return 0;
/* ret=2: csum update is needed after payload mangling */
if (ret == 1)
@@ -251,7 +251,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
* Attempt ip_vs_app call.
* It will fix ip_vs_conn and iph ack_seq stuff
*/
- if (!(ret = ip_vs_app_pkt_in(cp, skb)))
+ if (!(ret = ip_vs_app_pkt_in(cp, skb, iph)))
return 0;
/* ret=2: csum update is needed after payload mangling */
if (ret == 1)
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index c15ef7c2a1fa..e0ef11c3691e 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -162,7 +162,7 @@ udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
/*
* Call application helper if needed
*/
- if (!(ret = ip_vs_app_pkt_out(cp, skb)))
+ if (!(ret = ip_vs_app_pkt_out(cp, skb, iph)))
return 0;
/* ret=2: csum update is needed after payload mangling */
if (ret == 1)
@@ -246,7 +246,7 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
* Attempt ip_vs_app call.
* It will fix ip_vs_conn
*/
- if (!(ret = ip_vs_app_pkt_in(cp, skb)))
+ if (!(ret = ip_vs_app_pkt_in(cp, skb, iph)))
return 0;
/* ret=2: csum update is needed after payload mangling */
if (ret == 1)
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index 153e690e2893..3b5059a8dcdd 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -79,7 +79,7 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen)
return memcmp(a, b, klen * sizeof(u32));
}
-static bool add_hlist(struct hlist_head *head,
+bool nf_conncount_add(struct hlist_head *head,
const struct nf_conntrack_tuple *tuple)
{
struct nf_conncount_tuple *conn;
@@ -91,12 +91,12 @@ static bool add_hlist(struct hlist_head *head,
hlist_add_head(&conn->node, head);
return true;
}
+EXPORT_SYMBOL_GPL(nf_conncount_add);
-static unsigned int check_hlist(struct net *net,
- struct hlist_head *head,
- const struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_zone *zone,
- bool *addit)
+unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head,
+ const struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_zone *zone,
+ bool *addit)
{
const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn;
@@ -141,6 +141,7 @@ static unsigned int check_hlist(struct net *net,
return length;
}
+EXPORT_SYMBOL_GPL(nf_conncount_lookup);
static void tree_nodes_free(struct rb_root *root,
struct nf_conncount_rb *gc_nodes[],
@@ -187,13 +188,15 @@ count_tree(struct net *net, struct rb_root *root,
} else {
/* same source network -> be counted! */
unsigned int count;
- count = check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
+
+ count = nf_conncount_lookup(net, &rbconn->hhead, tuple,
+ zone, &addit);
tree_nodes_free(root, gc_nodes, gc_count);
if (!addit)
return count;
- if (!add_hlist(&rbconn->hhead, tuple))
+ if (!nf_conncount_add(&rbconn->hhead, tuple))
return 0; /* hotdrop */
return count + 1;
@@ -203,7 +206,7 @@ count_tree(struct net *net, struct rb_root *root,
continue;
/* only used for GC on hhead, retval and 'addit' ignored */
- check_hlist(net, &rbconn->hhead, tuple, zone, &addit);
+ nf_conncount_lookup(net, &rbconn->hhead, tuple, zone, &addit);
if (hlist_empty(&rbconn->hhead))
gc_nodes[gc_count++] = rbconn;
}
@@ -303,11 +306,19 @@ struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int family
}
EXPORT_SYMBOL_GPL(nf_conncount_init);
-static void destroy_tree(struct rb_root *r)
+void nf_conncount_cache_free(struct hlist_head *hhead)
{
struct nf_conncount_tuple *conn;
- struct nf_conncount_rb *rbconn;
struct hlist_node *n;
+
+ hlist_for_each_entry_safe(conn, n, hhead, node)
+ kmem_cache_free(conncount_conn_cachep, conn);
+}
+EXPORT_SYMBOL_GPL(nf_conncount_cache_free);
+
+static void destroy_tree(struct rb_root *r)
+{
+ struct nf_conncount_rb *rbconn;
struct rb_node *node;
while ((node = rb_first(r)) != NULL) {
@@ -315,8 +326,7 @@ static void destroy_tree(struct rb_root *r)
rb_erase(node, r);
- hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
- kmem_cache_free(conncount_conn_cachep, conn);
+ nf_conncount_cache_free(&rbconn->hhead);
kmem_cache_free(conncount_rb_cachep, rbconn);
}
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 82451b7e0acb..15ed91309992 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -220,7 +220,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
struct net_device *outdev;
- const struct rtable *rt;
+ struct rtable *rt;
unsigned int thoff;
struct iphdr *iph;
__be32 nexthop;
@@ -241,7 +241,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- rt = (const struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
+ rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)) &&
(ip_hdr(skb)->frag_off & htons(IP_DF)) != 0)
@@ -264,6 +264,7 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
skb->dev = outdev;
nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
+ skb_dst_set_noref(skb, &rt->dst);
neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
return NF_STOLEN;
@@ -480,6 +481,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
skb->dev = outdev;
nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
+ skb_dst_set_noref(skb, &rt->dst);
neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
return NF_STOLEN;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 821f8d835f7a..b7df32a56e7e 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1036,7 +1036,7 @@ static struct pernet_operations nat_net_ops = {
.size = sizeof(struct nat_net),
};
-struct nf_nat_hook nat_hook = {
+static struct nf_nat_hook nat_hook = {
.parse_nat_setup = nfnetlink_parse_nat_setup,
#ifdef CONFIG_XFRM
.decode_session = __nf_nat_decode_session,
diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c
index 7c4bb0a773ca..adee04af8d43 100644
--- a/net/netfilter/nf_nat_redirect.c
+++ b/net/netfilter/nf_nat_redirect.c
@@ -15,7 +15,6 @@
#include <linux/inetdevice.h>
#include <linux/ip.h>
#include <linux/kernel.h>
-#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
#include <linux/types.h>
@@ -124,6 +123,3 @@ nf_nat_redirect_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
}
EXPORT_SYMBOL_GPL(nf_nat_redirect_ipv6);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 87b2a77add65..ca4c4d994ddb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -28,6 +28,42 @@ static LIST_HEAD(nf_tables_objects);
static LIST_HEAD(nf_tables_flowtables);
static u64 table_handle;
+enum {
+ NFT_VALIDATE_SKIP = 0,
+ NFT_VALIDATE_NEED,
+ NFT_VALIDATE_DO,
+};
+
+static u32 nft_chain_hash(const void *data, u32 len, u32 seed);
+static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed);
+static int nft_chain_hash_cmp(struct rhashtable_compare_arg *, const void *);
+
+static const struct rhashtable_params nft_chain_ht_params = {
+ .head_offset = offsetof(struct nft_chain, rhlhead),
+ .key_offset = offsetof(struct nft_chain, name),
+ .hashfn = nft_chain_hash,
+ .obj_hashfn = nft_chain_hash_obj,
+ .obj_cmpfn = nft_chain_hash_cmp,
+ .locks_mul = 1,
+ .automatic_shrinking = true,
+};
+
+static void nft_validate_state_update(struct net *net, u8 new_validate_state)
+{
+ switch (net->nft.validate_state) {
+ case NFT_VALIDATE_SKIP:
+ WARN_ON_ONCE(new_validate_state == NFT_VALIDATE_DO);
+ break;
+ case NFT_VALIDATE_NEED:
+ break;
+ case NFT_VALIDATE_DO:
+ if (new_validate_state == NFT_VALIDATE_NEED)
+ return;
+ }
+
+ net->nft.validate_state = new_validate_state;
+}
+
static void nft_ctx_init(struct nft_ctx *ctx,
struct net *net,
const struct sk_buff *skb,
@@ -373,7 +409,7 @@ static struct nft_table *nft_table_lookup(const struct net *net,
if (nla == NULL)
return ERR_PTR(-EINVAL);
- list_for_each_entry(table, &net->nft.tables, list) {
+ list_for_each_entry_rcu(table, &net->nft.tables, list) {
if (!nla_strcmp(nla, table->name) &&
table->family == family &&
nft_active_genmask(table, genmask))
@@ -546,6 +582,24 @@ done:
return skb->len;
}
+static int nft_netlink_dump_start_rcu(struct sock *nlsk, struct sk_buff *skb,
+ const struct nlmsghdr *nlh,
+ struct netlink_dump_control *c)
+{
+ int err;
+
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+
+ rcu_read_unlock();
+ err = netlink_dump_start(nlsk, skb, nlh, c);
+ rcu_read_lock();
+ module_put(THIS_MODULE);
+
+ return err;
+}
+
+/* called with rcu_read_lock held */
static int nf_tables_gettable(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -561,8 +615,10 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_tables,
+ .module = THIS_MODULE,
};
- return netlink_dump_start(nlsk, skb, nlh, &c);
+
+ return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
table = nft_table_lookup(net, nla[NFTA_TABLE_NAME], family, genmask);
@@ -571,7 +627,7 @@ static int nf_tables_gettable(struct net *net, struct sock *nlsk,
return PTR_ERR(table);
}
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
return -ENOMEM;
@@ -678,6 +734,29 @@ err:
return ret;
}
+static u32 nft_chain_hash(const void *data, u32 len, u32 seed)
+{
+ const char *name = data;
+
+ return jhash(name, strlen(name), seed);
+}
+
+static u32 nft_chain_hash_obj(const void *data, u32 len, u32 seed)
+{
+ const struct nft_chain *chain = data;
+
+ return nft_chain_hash(chain->name, 0, seed);
+}
+
+static int nft_chain_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct nft_chain *chain = ptr;
+ const char *name = arg->key;
+
+ return strcmp(chain->name, name);
+}
+
static int nf_tables_newtable(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -724,6 +803,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
if (table->name == NULL)
goto err_strdup;
+ err = rhltable_init(&table->chains_ht, &nft_chain_ht_params);
+ if (err)
+ goto err_chain_ht;
+
INIT_LIST_HEAD(&table->chains);
INIT_LIST_HEAD(&table->sets);
INIT_LIST_HEAD(&table->objects);
@@ -740,6 +823,8 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk,
list_add_tail_rcu(&table->list, &net->nft.tables);
return 0;
err_trans:
+ rhltable_destroy(&table->chains_ht);
+err_chain_ht:
kfree(table->name);
err_strdup:
kfree(table);
@@ -880,6 +965,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
{
BUG_ON(ctx->table->use > 0);
+ rhltable_destroy(&ctx->table->chains_ht);
kfree(ctx->table->name);
kfree(ctx->table);
}
@@ -925,21 +1011,35 @@ nft_chain_lookup_byhandle(const struct nft_table *table, u64 handle, u8 genmask)
return ERR_PTR(-ENOENT);
}
-static struct nft_chain *nft_chain_lookup(const struct nft_table *table,
+static struct nft_chain *nft_chain_lookup(struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
+ char search[NFT_CHAIN_MAXNAMELEN + 1];
+ struct rhlist_head *tmp, *list;
struct nft_chain *chain;
if (nla == NULL)
return ERR_PTR(-EINVAL);
- list_for_each_entry(chain, &table->chains, list) {
- if (!nla_strcmp(nla, chain->name) &&
- nft_active_genmask(chain, genmask))
- return chain;
- }
+ nla_strlcpy(search, nla, sizeof(search));
- return ERR_PTR(-ENOENT);
+ WARN_ON(!rcu_read_lock_held() &&
+ !lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+
+ chain = ERR_PTR(-ENOENT);
+ rcu_read_lock();
+ list = rhltable_lookup(&table->chains_ht, search, nft_chain_ht_params);
+ if (!list)
+ goto out_unlock;
+
+ rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
+ if (nft_active_genmask(chain, genmask))
+ goto out_unlock;
+ }
+ chain = ERR_PTR(-ENOENT);
+out_unlock:
+ rcu_read_unlock();
+ return chain;
}
static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
@@ -1135,6 +1235,7 @@ done:
return skb->len;
}
+/* called with rcu_read_lock held */
static int nf_tables_getchain(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -1142,8 +1243,8 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
- const struct nft_table *table;
const struct nft_chain *chain;
+ struct nft_table *table;
struct sk_buff *skb2;
int family = nfmsg->nfgen_family;
int err;
@@ -1151,8 +1252,10 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_chains,
+ .module = THIS_MODULE,
};
- return netlink_dump_start(nlsk, skb, nlh, &c);
+
+ return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
table = nft_table_lookup(net, nla[NFTA_CHAIN_TABLE], family, genmask);
@@ -1167,7 +1270,7 @@ static int nf_tables_getchain(struct net *net, struct sock *nlsk,
return PTR_ERR(chain);
}
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
return -ENOMEM;
@@ -1233,8 +1336,24 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain,
rcu_assign_pointer(chain->stats, newstats);
synchronize_rcu();
free_percpu(oldstats);
- } else
+ } else {
rcu_assign_pointer(chain->stats, newstats);
+ static_branch_inc(&nft_counters_enabled);
+ }
+}
+
+static void nf_tables_chain_free_chain_rules(struct nft_chain *chain)
+{
+ struct nft_rule **g0 = rcu_dereference_raw(chain->rules_gen_0);
+ struct nft_rule **g1 = rcu_dereference_raw(chain->rules_gen_1);
+
+ if (g0 != g1)
+ kvfree(g1);
+ kvfree(g0);
+
+ /* should be NULL either via abort or via successful commit */
+ WARN_ON_ONCE(chain->rules_next);
+ kvfree(chain->rules_next);
}
static void nf_tables_chain_destroy(struct nft_ctx *ctx)
@@ -1243,6 +1362,9 @@ static void nf_tables_chain_destroy(struct nft_ctx *ctx)
BUG_ON(chain->use > 0);
+ /* no concurrent access possible anymore */
+ nf_tables_chain_free_chain_rules(chain);
+
if (nft_is_base_chain(chain)) {
struct nft_base_chain *basechain = nft_base_chain(chain);
@@ -1335,6 +1457,27 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook)
module_put(hook->type->owner);
}
+struct nft_rules_old {
+ struct rcu_head h;
+ struct nft_rule **start;
+};
+
+static struct nft_rule **nf_tables_chain_alloc_rules(const struct nft_chain *chain,
+ unsigned int alloc)
+{
+ if (alloc > INT_MAX)
+ return NULL;
+
+ alloc += 1; /* NULL, ends rules */
+ if (sizeof(struct nft_rule *) > INT_MAX / alloc)
+ return NULL;
+
+ alloc *= sizeof(struct nft_rule *);
+ alloc += sizeof(struct nft_rules_old);
+
+ return kvmalloc(alloc, GFP_KERNEL);
+}
+
static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
u8 policy, bool create)
{
@@ -1344,6 +1487,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_stats __percpu *stats;
struct net *net = ctx->net;
struct nft_chain *chain;
+ struct nft_rule **rules;
int err;
if (table->use == UINT_MAX)
@@ -1406,13 +1550,31 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err1;
}
+ rules = nf_tables_chain_alloc_rules(chain, 0);
+ if (!rules) {
+ err = -ENOMEM;
+ goto err1;
+ }
+
+ *rules = NULL;
+ rcu_assign_pointer(chain->rules_gen_0, rules);
+ rcu_assign_pointer(chain->rules_gen_1, rules);
+
err = nf_tables_register_hook(net, table, chain);
if (err < 0)
goto err1;
+ err = rhltable_insert_key(&table->chains_ht, chain->name,
+ &chain->rhlhead, nft_chain_ht_params);
+ if (err)
+ goto err2;
+
err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
- if (err < 0)
+ if (err < 0) {
+ rhltable_remove(&table->chains_ht, &chain->rhlhead,
+ nft_chain_ht_params);
goto err2;
+ }
table->use++;
list_add_tail_rcu(&chain->list, &table->chains);
@@ -1849,19 +2011,7 @@ static int nf_tables_newexpr(const struct nft_ctx *ctx,
goto err1;
}
- if (ops->validate) {
- const struct nft_data *data = NULL;
-
- err = ops->validate(ctx, expr, &data);
- if (err < 0)
- goto err2;
- }
-
return 0;
-
-err2:
- if (ops->destroy)
- ops->destroy(ctx, expr);
err1:
expr->ops = NULL;
return err;
@@ -1920,7 +2070,7 @@ static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
struct nft_rule *rule;
// FIXME: this sucks
- list_for_each_entry(rule, &chain->rules, list) {
+ list_for_each_entry_rcu(rule, &chain->rules, list) {
if (handle == rule->handle)
return rule;
}
@@ -2116,6 +2266,7 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
return 0;
}
+/* called with rcu_read_lock held */
static int nf_tables_getrule(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -2123,9 +2274,9 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
{
const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
u8 genmask = nft_genmask_cur(net);
- const struct nft_table *table;
const struct nft_chain *chain;
const struct nft_rule *rule;
+ struct nft_table *table;
struct sk_buff *skb2;
int family = nfmsg->nfgen_family;
int err;
@@ -2134,18 +2285,19 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
struct netlink_dump_control c = {
.dump = nf_tables_dump_rules,
.done = nf_tables_dump_rules_done,
+ .module = THIS_MODULE,
};
if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
struct nft_rule_dump_ctx *ctx;
- ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
if (!ctx)
return -ENOMEM;
if (nla[NFTA_RULE_TABLE]) {
ctx->table = nla_strdup(nla[NFTA_RULE_TABLE],
- GFP_KERNEL);
+ GFP_ATOMIC);
if (!ctx->table) {
kfree(ctx);
return -ENOMEM;
@@ -2153,7 +2305,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
}
if (nla[NFTA_RULE_CHAIN]) {
ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN],
- GFP_KERNEL);
+ GFP_ATOMIC);
if (!ctx->chain) {
kfree(ctx->table);
kfree(ctx);
@@ -2163,7 +2315,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
c.data = ctx;
}
- return netlink_dump_start(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
table = nft_table_lookup(net, nla[NFTA_RULE_TABLE], family, genmask);
@@ -2184,7 +2336,7 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
return PTR_ERR(rule);
}
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
return -ENOMEM;
@@ -2225,6 +2377,53 @@ static void nf_tables_rule_release(const struct nft_ctx *ctx,
nf_tables_rule_destroy(ctx, rule);
}
+int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
+{
+ struct nft_expr *expr, *last;
+ const struct nft_data *data;
+ struct nft_rule *rule;
+ int err;
+
+ list_for_each_entry(rule, &chain->rules, list) {
+ if (!nft_is_active_next(ctx->net, rule))
+ continue;
+
+ nft_rule_for_each_expr(expr, last, rule) {
+ if (!expr->ops->validate)
+ continue;
+
+ err = expr->ops->validate(ctx, expr, &data);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_chain_validate);
+
+static int nft_table_validate(struct net *net, const struct nft_table *table)
+{
+ struct nft_chain *chain;
+ struct nft_ctx ctx = {
+ .net = net,
+ .family = table->family,
+ };
+ int err;
+
+ list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_base_chain(chain))
+ continue;
+
+ ctx.chain = chain;
+ err = nft_chain_validate(&ctx, chain);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
#define NFT_RULE_MAXEXPRS 128
static struct nft_expr_info *info;
@@ -2352,6 +2551,10 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
err = nf_tables_newexpr(&ctx, &info[i], expr);
if (err < 0)
goto err2;
+
+ if (info[i].ops->validate)
+ nft_validate_state_update(net, NFT_VALIDATE_NEED);
+
info[i].ops = NULL;
expr = nft_expr_next(expr);
}
@@ -2395,8 +2598,11 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
}
}
chain->use++;
- return 0;
+ if (net->nft.validate_state == NFT_VALIDATE_DO)
+ return nft_table_validate(net, table);
+
+ return 0;
err2:
nf_tables_rule_release(&ctx, rule);
err1:
@@ -2655,7 +2861,7 @@ static struct nft_set *nft_set_lookup(const struct nft_table *table,
if (nla == NULL)
return ERR_PTR(-EINVAL);
- list_for_each_entry(set, &table->sets, list) {
+ list_for_each_entry_rcu(set, &table->sets, list) {
if (!nla_strcmp(nla, set->name) &&
nft_active_genmask(set, genmask))
return set;
@@ -2781,7 +2987,7 @@ static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result)
return 0;
}
-static u64 nf_jiffies64_to_msecs(u64 input)
+static __be64 nf_jiffies64_to_msecs(u64 input)
{
u64 ms = jiffies64_to_nsecs(input);
@@ -2960,6 +3166,7 @@ static int nf_tables_dump_sets_done(struct netlink_callback *cb)
return 0;
}
+/* called with rcu_read_lock held */
static int nf_tables_getset(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -2982,17 +3189,18 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
struct netlink_dump_control c = {
.dump = nf_tables_dump_sets,
.done = nf_tables_dump_sets_done,
+ .module = THIS_MODULE,
};
struct nft_ctx *ctx_dump;
- ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_KERNEL);
+ ctx_dump = kmalloc(sizeof(*ctx_dump), GFP_ATOMIC);
if (ctx_dump == NULL)
return -ENOMEM;
*ctx_dump = ctx;
c.data = ctx_dump;
- return netlink_dump_start(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
/* Only accept unspec with dump */
@@ -3005,7 +3213,7 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk,
if (IS_ERR(set))
return PTR_ERR(set);
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (skb2 == NULL)
return -ENOMEM;
@@ -3219,6 +3427,8 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
}
INIT_LIST_HEAD(&set->bindings);
+ set->table = table;
+ write_pnet(&set->net, net);
set->ops = ops;
set->ktype = ktype;
set->klen = desc.klen;
@@ -3746,7 +3956,7 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
ext = nft_set_elem_ext(set, &elem);
err = -ENOMEM;
- skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
if (skb == NULL)
goto err1;
@@ -3768,6 +3978,7 @@ err1:
return err == -EAGAIN ? -ENOBUFS : err;
}
+/* called with rcu_read_lock held */
static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -3792,10 +4003,11 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
struct netlink_dump_control c = {
.dump = nf_tables_dump_set,
.done = nf_tables_dump_set_done,
+ .module = THIS_MODULE,
};
struct nft_set_dump_ctx *dump_ctx;
- dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL);
+ dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_ATOMIC);
if (!dump_ctx)
return -ENOMEM;
@@ -3803,7 +4015,7 @@ static int nf_tables_getsetelem(struct net *net, struct sock *nlsk,
dump_ctx->ctx = ctx;
c.data = dump_ctx;
- return netlink_dump_start(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
@@ -3894,12 +4106,24 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
+ struct nft_ctx ctx = {
+ .net = read_pnet(&set->net),
+ .family = set->table->family,
+ };
nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
- if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
- nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
+ if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) {
+ struct nft_expr *expr = nft_set_ext_expr(ext);
+
+ if (expr->ops->destroy_clone) {
+ expr->ops->destroy_clone(&ctx, expr);
+ module_put(expr->ops->type->owner);
+ } else {
+ nf_tables_expr_destroy(&ctx, expr);
+ }
+ }
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
(*nft_set_ext_obj(ext))->use--;
kfree(elem);
@@ -3909,12 +4133,13 @@ EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
/* Only called from commit path, nft_set_elem_deactivate() already deals with
* the refcounting from the preparation phase.
*/
-static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem)
+static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
- nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
+ nf_tables_expr_destroy(ctx, nft_set_ext_expr(ext));
kfree(elem);
}
@@ -4034,6 +4259,12 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
d2.type, d2.len);
if (err < 0)
goto err3;
+
+ if (d2.type == NFT_DATA_VERDICT &&
+ (data.verdict.code == NFT_GOTO ||
+ data.verdict.code == NFT_JUMP))
+ nft_validate_state_update(ctx->net,
+ NFT_VALIDATE_NEED);
}
nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len);
@@ -4133,7 +4364,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
const struct nlattr *attr;
struct nft_set *set;
struct nft_ctx ctx;
- int rem, err = 0;
+ int rem, err;
if (nla[NFTA_SET_ELEM_LIST_ELEMENTS] == NULL)
return -EINVAL;
@@ -4154,9 +4385,13 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags);
if (err < 0)
- break;
+ return err;
}
- return err;
+
+ if (net->nft.validate_state == NFT_VALIDATE_DO)
+ return nft_table_validate(net, ctx.table);
+
+ return 0;
}
/**
@@ -4426,7 +4661,7 @@ struct nft_object *nft_obj_lookup(const struct nft_table *table,
{
struct nft_object *obj;
- list_for_each_entry(obj, &table->objects, list) {
+ list_for_each_entry_rcu(obj, &table->objects, list) {
if (!nla_strcmp(nla, obj->name) &&
objtype == obj->ops->type->type &&
nft_active_genmask(obj, genmask))
@@ -4635,7 +4870,7 @@ err3:
kfree(obj->name);
err2:
if (obj->ops->destroy)
- obj->ops->destroy(obj);
+ obj->ops->destroy(&ctx, obj);
kfree(obj);
err1:
module_put(type->owner);
@@ -4711,7 +4946,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (filter && filter->table[0] &&
+ if (filter && filter->table &&
strcmp(filter->table, table->name))
goto cont;
if (filter &&
@@ -4756,12 +4991,12 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
{
struct nft_obj_filter *filter;
- filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
if (!filter)
return ERR_PTR(-ENOMEM);
if (nla[NFTA_OBJ_TABLE]) {
- filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL);
+ filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_ATOMIC);
if (!filter->table) {
kfree(filter);
return ERR_PTR(-ENOMEM);
@@ -4773,6 +5008,7 @@ nft_obj_filter_alloc(const struct nlattr * const nla[])
return filter;
}
+/* called with rcu_read_lock held */
static int nf_tables_getobj(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[],
@@ -4792,6 +5028,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
struct netlink_dump_control c = {
.dump = nf_tables_dump_obj,
.done = nf_tables_dump_obj_done,
+ .module = THIS_MODULE,
};
if (nla[NFTA_OBJ_TABLE] ||
@@ -4804,7 +5041,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
c.data = filter;
}
- return netlink_dump_start(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
if (!nla[NFTA_OBJ_NAME] ||
@@ -4824,7 +5061,7 @@ static int nf_tables_getobj(struct net *net, struct sock *nlsk,
return PTR_ERR(obj);
}
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
return -ENOMEM;
@@ -4843,10 +5080,10 @@ err:
return err;
}
-static void nft_obj_destroy(struct nft_object *obj)
+static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj)
{
if (obj->ops->destroy)
- obj->ops->destroy(obj);
+ obj->ops->destroy(ctx, obj);
module_put(obj->ops->type->owner);
kfree(obj->name);
@@ -4969,7 +5206,7 @@ struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
{
struct nft_flowtable *flowtable;
- list_for_each_entry(flowtable, &table->flowtables, list) {
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
if (!nla_strcmp(nla, flowtable->name) &&
nft_active_genmask(flowtable, genmask))
return flowtable;
@@ -5389,7 +5626,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
if (idx > s_idx)
memset(&cb->args[1], 0,
sizeof(cb->args) - sizeof(cb->args[0]));
- if (filter && filter->table[0] &&
+ if (filter && filter->table &&
strcmp(filter->table, table->name))
goto cont;
@@ -5430,13 +5667,13 @@ nft_flowtable_filter_alloc(const struct nlattr * const nla[])
{
struct nft_flowtable_filter *filter;
- filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ filter = kzalloc(sizeof(*filter), GFP_ATOMIC);
if (!filter)
return ERR_PTR(-ENOMEM);
if (nla[NFTA_FLOWTABLE_TABLE]) {
filter->table = nla_strdup(nla[NFTA_FLOWTABLE_TABLE],
- GFP_KERNEL);
+ GFP_ATOMIC);
if (!filter->table) {
kfree(filter);
return ERR_PTR(-ENOMEM);
@@ -5445,6 +5682,7 @@ nft_flowtable_filter_alloc(const struct nlattr * const nla[])
return filter;
}
+/* called with rcu_read_lock held */
static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
struct sk_buff *skb,
const struct nlmsghdr *nlh,
@@ -5463,6 +5701,7 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
struct netlink_dump_control c = {
.dump = nf_tables_dump_flowtable,
.done = nf_tables_dump_flowtable_done,
+ .module = THIS_MODULE,
};
if (nla[NFTA_FLOWTABLE_TABLE]) {
@@ -5474,7 +5713,7 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
c.data = filter;
}
- return netlink_dump_start(nlsk, skb, nlh, &c);
+ return nft_netlink_dump_start_rcu(nlsk, skb, nlh, &c);
}
if (!nla[NFTA_FLOWTABLE_NAME])
@@ -5490,7 +5729,7 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (!skb2)
return -ENOMEM;
@@ -5654,7 +5893,7 @@ static int nf_tables_getgen(struct net *net, struct sock *nlsk,
struct sk_buff *skb2;
int err;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+ skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
if (skb2 == NULL)
return -ENOMEM;
@@ -5676,7 +5915,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_table_policy,
},
[NFT_MSG_GETTABLE] = {
- .call = nf_tables_gettable,
+ .call_rcu = nf_tables_gettable,
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
@@ -5691,7 +5930,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_chain_policy,
},
[NFT_MSG_GETCHAIN] = {
- .call = nf_tables_getchain,
+ .call_rcu = nf_tables_getchain,
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
@@ -5706,7 +5945,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_rule_policy,
},
[NFT_MSG_GETRULE] = {
- .call = nf_tables_getrule,
+ .call_rcu = nf_tables_getrule,
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
},
@@ -5721,7 +5960,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_policy,
},
[NFT_MSG_GETSET] = {
- .call = nf_tables_getset,
+ .call_rcu = nf_tables_getset,
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
@@ -5736,7 +5975,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_GETSETELEM] = {
- .call = nf_tables_getsetelem,
+ .call_rcu = nf_tables_getsetelem,
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
@@ -5746,7 +5985,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_set_elem_list_policy,
},
[NFT_MSG_GETGEN] = {
- .call = nf_tables_getgen,
+ .call_rcu = nf_tables_getgen,
},
[NFT_MSG_NEWOBJ] = {
.call_batch = nf_tables_newobj,
@@ -5754,7 +5993,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_obj_policy,
},
[NFT_MSG_GETOBJ] = {
- .call = nf_tables_getobj,
+ .call_rcu = nf_tables_getobj,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
@@ -5764,7 +6003,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_obj_policy,
},
[NFT_MSG_GETOBJ_RESET] = {
- .call = nf_tables_getobj,
+ .call_rcu = nf_tables_getobj,
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
@@ -5774,7 +6013,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.policy = nft_flowtable_policy,
},
[NFT_MSG_GETFLOWTABLE] = {
- .call = nf_tables_getflowtable,
+ .call_rcu = nf_tables_getflowtable,
.attr_count = NFTA_FLOWTABLE_MAX,
.policy = nft_flowtable_policy,
},
@@ -5785,12 +6024,41 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
},
};
+static int nf_tables_validate(struct net *net)
+{
+ struct nft_table *table;
+
+ switch (net->nft.validate_state) {
+ case NFT_VALIDATE_SKIP:
+ break;
+ case NFT_VALIDATE_NEED:
+ nft_validate_state_update(net, NFT_VALIDATE_DO);
+ /* fall through */
+ case NFT_VALIDATE_DO:
+ list_for_each_entry(table, &net->nft.tables, list) {
+ if (nft_table_validate(net, table) < 0)
+ return -EAGAIN;
+ }
+ break;
+ }
+
+ return 0;
+}
+
static void nft_chain_commit_update(struct nft_trans *trans)
{
struct nft_base_chain *basechain;
- if (nft_trans_chain_name(trans))
+ if (nft_trans_chain_name(trans)) {
+ rhltable_remove(&trans->ctx.table->chains_ht,
+ &trans->ctx.chain->rhlhead,
+ nft_chain_ht_params);
swap(trans->ctx.chain->name, nft_trans_chain_name(trans));
+ rhltable_insert_key(&trans->ctx.table->chains_ht,
+ trans->ctx.chain->name,
+ &trans->ctx.chain->rhlhead,
+ nft_chain_ht_params);
+ }
if (!nft_is_base_chain(trans->ctx.chain))
return;
@@ -5822,11 +6090,12 @@ static void nft_commit_release(struct nft_trans *trans)
nft_set_destroy(nft_trans_set(trans));
break;
case NFT_MSG_DELSETELEM:
- nf_tables_set_elem_destroy(nft_trans_elem_set(trans),
+ nf_tables_set_elem_destroy(&trans->ctx,
+ nft_trans_elem_set(trans),
nft_trans_elem(trans).priv);
break;
case NFT_MSG_DELOBJ:
- nft_obj_destroy(nft_trans_obj(trans));
+ nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
break;
case NFT_MSG_DELFLOWTABLE:
nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
@@ -5850,21 +6119,175 @@ static void nf_tables_commit_release(struct net *net)
}
}
+static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain)
+{
+ struct nft_rule *rule;
+ unsigned int alloc = 0;
+ int i;
+
+ /* already handled or inactive chain? */
+ if (chain->rules_next || !nft_is_active_next(net, chain))
+ return 0;
+
+ rule = list_entry(&chain->rules, struct nft_rule, list);
+ i = 0;
+
+ list_for_each_entry_continue(rule, &chain->rules, list) {
+ if (nft_is_active_next(net, rule))
+ alloc++;
+ }
+
+ chain->rules_next = nf_tables_chain_alloc_rules(chain, alloc);
+ if (!chain->rules_next)
+ return -ENOMEM;
+
+ list_for_each_entry_continue(rule, &chain->rules, list) {
+ if (nft_is_active_next(net, rule))
+ chain->rules_next[i++] = rule;
+ }
+
+ chain->rules_next[i] = NULL;
+ return 0;
+}
+
+static void nf_tables_commit_chain_prepare_cancel(struct net *net)
+{
+ struct nft_trans *trans, *next;
+
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ struct nft_chain *chain = trans->ctx.chain;
+
+ if (trans->msg_type == NFT_MSG_NEWRULE ||
+ trans->msg_type == NFT_MSG_DELRULE) {
+ kvfree(chain->rules_next);
+ chain->rules_next = NULL;
+ }
+ }
+}
+
+static void __nf_tables_commit_chain_free_rules_old(struct rcu_head *h)
+{
+ struct nft_rules_old *o = container_of(h, struct nft_rules_old, h);
+
+ kvfree(o->start);
+}
+
+static void nf_tables_commit_chain_free_rules_old(struct nft_rule **rules)
+{
+ struct nft_rule **r = rules;
+ struct nft_rules_old *old;
+
+ while (*r)
+ r++;
+
+ r++; /* rcu_head is after end marker */
+ old = (void *) r;
+ old->start = rules;
+
+ call_rcu(&old->h, __nf_tables_commit_chain_free_rules_old);
+}
+
+static void nf_tables_commit_chain_active(struct net *net, struct nft_chain *chain)
+{
+ struct nft_rule **g0, **g1;
+ bool next_genbit;
+
+ next_genbit = nft_gencursor_next(net);
+
+ g0 = rcu_dereference_protected(chain->rules_gen_0,
+ lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+ g1 = rcu_dereference_protected(chain->rules_gen_1,
+ lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES));
+
+ /* No changes to this chain? */
+ if (chain->rules_next == NULL) {
+ /* chain had no change in last or next generation */
+ if (g0 == g1)
+ return;
+ /*
+ * chain had no change in this generation; make sure next
+ * one uses same rules as current generation.
+ */
+ if (next_genbit) {
+ rcu_assign_pointer(chain->rules_gen_1, g0);
+ nf_tables_commit_chain_free_rules_old(g1);
+ } else {
+ rcu_assign_pointer(chain->rules_gen_0, g1);
+ nf_tables_commit_chain_free_rules_old(g0);
+ }
+
+ return;
+ }
+
+ if (next_genbit)
+ rcu_assign_pointer(chain->rules_gen_1, chain->rules_next);
+ else
+ rcu_assign_pointer(chain->rules_gen_0, chain->rules_next);
+
+ chain->rules_next = NULL;
+
+ if (g0 == g1)
+ return;
+
+ if (next_genbit)
+ nf_tables_commit_chain_free_rules_old(g1);
+ else
+ nf_tables_commit_chain_free_rules_old(g0);
+}
+
+static void nft_chain_del(struct nft_chain *chain)
+{
+ struct nft_table *table = chain->table;
+
+ WARN_ON_ONCE(rhltable_remove(&table->chains_ht, &chain->rhlhead,
+ nft_chain_ht_params));
+ list_del_rcu(&chain->list);
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
+ struct nft_chain *chain;
+ struct nft_table *table;
- /* Bump generation counter, invalidate any dump in progress */
- while (++net->nft.base_seq == 0);
+ /* 0. Validate ruleset, otherwise roll back for error reporting. */
+ if (nf_tables_validate(net) < 0)
+ return -EAGAIN;
- /* A new generation has just started */
- net->nft.gencursor = nft_gencursor_next(net);
+ /* 1. Allocate space for next generation rules_gen_X[] */
+ list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
+ int ret;
+
+ if (trans->msg_type == NFT_MSG_NEWRULE ||
+ trans->msg_type == NFT_MSG_DELRULE) {
+ chain = trans->ctx.chain;
+
+ ret = nf_tables_commit_chain_prepare(net, chain);
+ if (ret < 0) {
+ nf_tables_commit_chain_prepare_cancel(net);
+ return ret;
+ }
+ }
+ }
- /* Make sure all packets have left the previous generation before
- * purging old rules.
+ /* step 2. Make rules_gen_X visible to packet path */
+ list_for_each_entry(table, &net->nft.tables, list) {
+ list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_active_next(net, chain))
+ continue;
+ nf_tables_commit_chain_active(net, chain);
+ }
+ }
+
+ /*
+ * Bump generation counter, invalidate any dump in progress.
+ * Cannot fail after this point.
*/
- synchronize_rcu();
+ while (++net->nft.base_seq == 0);
+
+ /* step 3. Start new generation, rules_gen_X now in use. */
+ net->nft.gencursor = nft_gencursor_next(net);
list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) {
switch (trans->msg_type) {
@@ -5895,7 +6318,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELCHAIN:
- list_del_rcu(&trans->ctx.chain->list);
+ nft_chain_del(trans->ctx.chain);
nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
@@ -6006,7 +6429,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nft_trans_elem(trans).priv, true);
break;
case NFT_MSG_NEWOBJ:
- nft_obj_destroy(nft_trans_obj(trans));
+ nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
break;
case NFT_MSG_NEWFLOWTABLE:
nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
@@ -6046,7 +6469,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
} else {
trans->ctx.table->use--;
- list_del_rcu(&trans->ctx.chain->list);
+ nft_chain_del(trans->ctx.chain);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
trans->ctx.chain);
@@ -6126,6 +6549,11 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
return 0;
}
+static void nf_tables_cleanup(struct net *net)
+{
+ nft_validate_state_update(net, NFT_VALIDATE_SKIP);
+}
+
static bool nf_tables_valid_genid(struct net *net, u32 genid)
{
return net->nft.base_seq == genid;
@@ -6138,6 +6566,7 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
.cb = nf_tables_cb,
.commit = nf_tables_commit,
.abort = nf_tables_abort,
+ .cleanup = nf_tables_cleanup,
.valid_genid = nf_tables_valid_genid,
};
@@ -6221,19 +6650,18 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
list_for_each_entry(rule, &chain->rules, list) {
nft_rule_for_each_expr(expr, last, rule) {
- const struct nft_data *data = NULL;
+ struct nft_immediate_expr *priv;
+ const struct nft_data *data;
int err;
- if (!expr->ops->validate)
+ if (strcmp(expr->ops->type->name, "immediate"))
continue;
- err = expr->ops->validate(ctx, expr, &data);
- if (err < 0)
- return err;
-
- if (data == NULL)
+ priv = nft_expr_priv(expr);
+ if (priv->dreg != NFT_REG_VERDICT)
continue;
+ data = &priv->data;
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
@@ -6643,7 +7071,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
ctx->chain->use--;
nf_tables_rule_release(ctx, rule);
}
- list_del(&ctx->chain->list);
+ nft_chain_del(ctx->chain);
ctx->table->use--;
nf_tables_chain_destroy(ctx);
@@ -6695,11 +7123,11 @@ static void __nft_release_tables(struct net *net)
list_for_each_entry_safe(obj, ne, &table->objects, list) {
list_del(&obj->list);
table->use--;
- nft_obj_destroy(obj);
+ nft_obj_destroy(&ctx, obj);
}
list_for_each_entry_safe(chain, nc, &table->chains, list) {
ctx.chain = chain;
- list_del(&chain->list);
+ nft_chain_del(chain);
table->use--;
nf_tables_chain_destroy(&ctx);
}
@@ -6713,6 +7141,8 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&net->nft.tables);
INIT_LIST_HEAD(&net->nft.commit_list);
net->nft.base_seq = 1;
+ net->nft.validate_state = NFT_VALIDATE_SKIP;
+
return 0;
}
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 4f46d2f4e167..deff10adef9c 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -23,22 +23,6 @@
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
-static const char *const comments[__NFT_TRACETYPE_MAX] = {
- [NFT_TRACETYPE_POLICY] = "policy",
- [NFT_TRACETYPE_RETURN] = "return",
- [NFT_TRACETYPE_RULE] = "rule",
-};
-
-static const struct nf_loginfo trace_loginfo = {
- .type = NF_LOG_TYPE_LOG,
- .u = {
- .log = {
- .level = LOGLEVEL_WARNING,
- .logflags = NF_LOG_DEFAULT_MASK,
- },
- },
-};
-
static noinline void __nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
enum nft_trace_types type)
@@ -120,20 +104,20 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
if (!base_chain->stats)
return;
+ local_bh_disable();
stats = this_cpu_ptr(rcu_dereference(base_chain->stats));
if (stats) {
- local_bh_disable();
u64_stats_update_begin(&stats->syncp);
stats->pkts++;
stats->bytes += pkt->skb->len;
u64_stats_update_end(&stats->syncp);
- local_bh_enable();
}
+ local_bh_enable();
}
struct nft_jumpstack {
const struct nft_chain *chain;
- const struct nft_rule *rule;
+ struct nft_rule *const *rules;
};
unsigned int
@@ -141,27 +125,29 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
{
const struct nft_chain *chain = priv, *basechain = chain;
const struct net *net = nft_net(pkt);
+ struct nft_rule *const *rules;
const struct nft_rule *rule;
const struct nft_expr *expr, *last;
struct nft_regs regs;
unsigned int stackptr = 0;
struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
- unsigned int gencursor = nft_genmask_cur(net);
+ bool genbit = READ_ONCE(net->nft.gencursor);
struct nft_traceinfo info;
info.trace = false;
if (static_branch_unlikely(&nft_trace_enabled))
nft_trace_init(&info, pkt, &regs.verdict, basechain);
do_chain:
- rule = list_entry(&chain->rules, struct nft_rule, list);
+ if (genbit)
+ rules = rcu_dereference(chain->rules_gen_1);
+ else
+ rules = rcu_dereference(chain->rules_gen_0);
+
next_rule:
+ rule = *rules;
regs.verdict.code = NFT_CONTINUE;
- list_for_each_entry_continue_rcu(rule, &chain->rules, list) {
-
- /* This rule is not active, skip. */
- if (unlikely(rule->genmask & gencursor))
- continue;
-
+ for (; *rules ; rules++) {
+ rule = *rules;
nft_rule_for_each_expr(expr, last, rule) {
if (expr->ops == &nft_cmp_fast_ops)
nft_cmp_fast_eval(expr, &regs);
@@ -199,7 +185,7 @@ next_rule:
case NFT_JUMP:
BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
jumpstack[stackptr].chain = chain;
- jumpstack[stackptr].rule = rule;
+ jumpstack[stackptr].rules = rules + 1;
stackptr++;
/* fall through */
case NFT_GOTO:
@@ -221,7 +207,7 @@ next_rule:
if (stackptr > 0) {
stackptr--;
chain = jumpstack[stackptr].chain;
- rule = jumpstack[stackptr].rule;
+ rules = jumpstack[stackptr].rules;
goto next_rule;
}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 03ead8a9e90c..4d0da7042aff 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -25,6 +25,7 @@
#include <linux/uaccess.h>
#include <net/sock.h>
#include <linux/init.h>
+#include <linux/sched/signal.h>
#include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h>
@@ -37,6 +38,8 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
rcu_dereference_protected(table[(id)].subsys, \
lockdep_nfnl_is_held((id)))
+#define NFNL_MAX_ATTR_COUNT 32
+
static struct {
struct mutex mutex;
const struct nfnetlink_subsystem __rcu *subsys;
@@ -76,6 +79,13 @@ EXPORT_SYMBOL_GPL(lockdep_nfnl_is_held);
int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)
{
+ u8 cb_id;
+
+ /* Sanity-check attr_count size to avoid stack buffer overflow. */
+ for (cb_id = 0; cb_id < n->cb_count; cb_id++)
+ if (WARN_ON(n->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT))
+ return -EINVAL;
+
nfnl_lock(n->subsys_id);
if (table[n->subsys_id].subsys) {
nfnl_unlock(n->subsys_id);
@@ -185,11 +195,17 @@ replay:
{
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
- struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
+ struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
struct nlattr *attr = (void *)nlh + min_len;
int attrlen = nlh->nlmsg_len - min_len;
__u8 subsys_id = NFNL_SUBSYS_ID(type);
+ /* Sanity-check NFNL_MAX_ATTR_COUNT */
+ if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
+ rcu_read_unlock();
+ return -ENOMEM;
+ }
+
err = nla_parse(cda, ss->cb[cb_id].attr_count, attr, attrlen,
ss->cb[cb_id].policy, extack);
if (err < 0) {
@@ -330,6 +346,13 @@ replay:
while (skb->len >= nlmsg_total_size(0)) {
int msglen, type;
+ if (fatal_signal_pending(current)) {
+ nfnl_err_reset(&err_list);
+ err = -EINTR;
+ status = NFNL_BATCH_FAILURE;
+ goto done;
+ }
+
memset(&extack, 0, sizeof(extack));
nlh = nlmsg_hdr(skb);
err = 0;
@@ -379,10 +402,16 @@ replay:
{
int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
u8 cb_id = NFNL_MSG_TYPE(nlh->nlmsg_type);
- struct nlattr *cda[ss->cb[cb_id].attr_count + 1];
+ struct nlattr *cda[NFNL_MAX_ATTR_COUNT + 1];
struct nlattr *attr = (void *)nlh + min_len;
int attrlen = nlh->nlmsg_len - min_len;
+ /* Sanity-check NFTA_MAX_ATTR */
+ if (ss->cb[cb_id].attr_count > NFNL_MAX_ATTR_COUNT) {
+ err = -ENOMEM;
+ goto ack;
+ }
+
err = nla_parse(cda, ss->cb[cb_id].attr_count, attr,
attrlen, ss->cb[cb_id].policy, NULL);
if (err < 0)
@@ -441,10 +470,19 @@ done:
kfree_skb(skb);
goto replay;
} else if (status == NFNL_BATCH_DONE) {
- ss->commit(net, oskb);
+ err = ss->commit(net, oskb);
+ if (err == -EAGAIN) {
+ status |= NFNL_BATCH_REPLAY;
+ goto done;
+ } else if (err) {
+ ss->abort(net, oskb);
+ netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL);
+ }
} else {
ss->abort(net, oskb);
}
+ if (ss->cleanup)
+ ss->cleanup(net);
nfnl_err_deliver(&err_list, oskb);
nfnl_unlock(subsys_id);
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
index 6ddf89183e7b..a0e5adf0b3b6 100644
--- a/net/netfilter/nfnetlink_acct.c
+++ b/net/netfilter/nfnetlink_acct.c
@@ -115,7 +115,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl,
nfacct->flags = flags;
}
- nla_strlcpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
+ nla_strlcpy(nfacct->name, tb[NFACCT_NAME], NFACCT_NAME_MAX);
if (tb[NFACCT_BYTES]) {
atomic64_set(&nfacct->bytes,
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index fa026b269b36..cb5b5f207777 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -150,7 +150,7 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy,
return -EINVAL;
nla_strlcpy(expect_policy->name,
- nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN);
+ tb[NFCTH_POLICY_NAME], NF_CT_HELPER_NAME_LEN);
expect_policy->max_expected =
ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX]));
if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
@@ -235,7 +235,7 @@ nfnl_cthelper_create(const struct nlattr * const tb[],
goto err1;
nla_strlcpy(helper->name,
- nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN);
+ tb[NFCTH_NAME], NF_CT_HELPER_NAME_LEN);
size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN]));
if (size > FIELD_SIZEOF(struct nf_conn_help, data)) {
ret = -ENOMEM;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 1d99a1efdafc..8d1ff654e5af 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -611,10 +611,10 @@ nla_put_failure:
return -1;
}
-static int nfnl_compat_get(struct net *net, struct sock *nfnl,
- struct sk_buff *skb, const struct nlmsghdr *nlh,
- const struct nlattr * const tb[],
- struct netlink_ext_ack *extack)
+static int nfnl_compat_get_rcu(struct net *net, struct sock *nfnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const tb[],
+ struct netlink_ext_ack *extack)
{
int ret = 0, target;
struct nfgenmsg *nfmsg;
@@ -653,16 +653,21 @@ static int nfnl_compat_get(struct net *net, struct sock *nfnl,
return -EINVAL;
}
+ if (!try_module_get(THIS_MODULE))
+ return -EINVAL;
+
+ rcu_read_unlock();
try_then_request_module(xt_find_revision(nfmsg->nfgen_family, name,
rev, target, &ret),
fmt, name);
-
if (ret < 0)
- return ret;
+ goto out_put;
skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (skb2 == NULL)
- return -ENOMEM;
+ if (skb2 == NULL) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
/* include the best revision for this extension in the message */
if (nfnl_compat_fill_info(skb2, NETLINK_CB(skb).portid,
@@ -672,14 +677,16 @@ static int nfnl_compat_get(struct net *net, struct sock *nfnl,
nfmsg->nfgen_family,
name, ret, target) <= 0) {
kfree_skb(skb2);
- return -ENOSPC;
+ goto out_put;
}
ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
MSG_DONTWAIT);
if (ret > 0)
ret = 0;
-
+out_put:
+ rcu_read_lock();
+ module_put(THIS_MODULE);
return ret == -EAGAIN ? -ENOBUFS : ret;
}
@@ -691,7 +698,7 @@ static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
};
static const struct nfnl_callback nfnl_nft_compat_cb[NFNL_MSG_COMPAT_MAX] = {
- [NFNL_MSG_COMPAT_GET] = { .call = nfnl_compat_get,
+ [NFNL_MSG_COMPAT_GET] = { .call_rcu = nfnl_compat_get_rcu,
.attr_count = NFTA_COMPAT_MAX,
.policy = nfnl_compat_policy_get },
};
diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c
new file mode 100644
index 000000000000..50c068d660e5
--- /dev/null
+++ b/net/netfilter/nft_connlimit.c
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_count.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_tuple.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+struct nft_connlimit {
+ spinlock_t lock;
+ struct hlist_head hhead;
+ u32 limit;
+ bool invert;
+};
+
+static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt,
+ const struct nft_set_ext *ext)
+{
+ const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
+ const struct nf_conntrack_tuple *tuple_ptr;
+ struct nf_conntrack_tuple tuple;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int count;
+ bool addit;
+
+ tuple_ptr = &tuple;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+ if (ct != NULL) {
+ tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+ zone = nf_ct_zone(ct);
+ } else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
+ nft_pf(pkt), nft_net(pkt), &tuple)) {
+ regs->verdict.code = NF_DROP;
+ return;
+ }
+
+ spin_lock_bh(&priv->lock);
+ count = nf_conncount_lookup(nft_net(pkt), &priv->hhead, tuple_ptr, zone,
+ &addit);
+
+ if (!addit)
+ goto out;
+
+ if (!nf_conncount_add(&priv->hhead, tuple_ptr)) {
+ regs->verdict.code = NF_DROP;
+ spin_unlock_bh(&priv->lock);
+ return;
+ }
+ count++;
+out:
+ spin_unlock_bh(&priv->lock);
+
+ if ((count > priv->limit) ^ priv->invert) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+}
+
+static int nft_connlimit_do_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_connlimit *priv)
+{
+ bool invert = false;
+ u32 flags, limit;
+
+ if (!tb[NFTA_CONNLIMIT_COUNT])
+ return -EINVAL;
+
+ limit = ntohl(nla_get_be32(tb[NFTA_CONNLIMIT_COUNT]));
+
+ if (tb[NFTA_CONNLIMIT_FLAGS]) {
+ flags = ntohl(nla_get_be32(tb[NFTA_CONNLIMIT_FLAGS]));
+ if (flags & ~NFT_CONNLIMIT_F_INV)
+ return -EOPNOTSUPP;
+ if (flags & NFT_CONNLIMIT_F_INV)
+ invert = true;
+ }
+
+ spin_lock_init(&priv->lock);
+ INIT_HLIST_HEAD(&priv->hhead);
+ priv->limit = limit;
+ priv->invert = invert;
+
+ return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_connlimit_do_destroy(const struct nft_ctx *ctx,
+ struct nft_connlimit *priv)
+{
+ nf_ct_netns_put(ctx->net, ctx->family);
+ nf_conncount_cache_free(&priv->hhead);
+}
+
+static int nft_connlimit_do_dump(struct sk_buff *skb,
+ struct nft_connlimit *priv)
+{
+ if (nla_put_be32(skb, NFTA_CONNLIMIT_COUNT, htonl(priv->limit)))
+ goto nla_put_failure;
+ if (priv->invert &&
+ nla_put_be32(skb, NFTA_CONNLIMIT_FLAGS, htonl(NFT_CONNLIMIT_F_INV)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static inline void nft_connlimit_obj_eval(struct nft_object *obj,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_connlimit *priv = nft_obj_data(obj);
+
+ nft_connlimit_do_eval(priv, regs, pkt, NULL);
+}
+
+static int nft_connlimit_obj_init(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[],
+ struct nft_object *obj)
+{
+ struct nft_connlimit *priv = nft_obj_data(obj);
+
+ return nft_connlimit_do_init(ctx, tb, priv);
+}
+
+static void nft_connlimit_obj_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
+{
+ struct nft_connlimit *priv = nft_obj_data(obj);
+
+ nft_connlimit_do_destroy(ctx, priv);
+}
+
+static int nft_connlimit_obj_dump(struct sk_buff *skb,
+ struct nft_object *obj, bool reset)
+{
+ struct nft_connlimit *priv = nft_obj_data(obj);
+
+ return nft_connlimit_do_dump(skb, priv);
+}
+
+static const struct nla_policy nft_connlimit_policy[NFTA_CONNLIMIT_MAX + 1] = {
+ [NFTA_CONNLIMIT_COUNT] = { .type = NLA_U32 },
+ [NFTA_CONNLIMIT_FLAGS] = { .type = NLA_U32 },
+};
+
+static struct nft_object_type nft_connlimit_obj_type;
+static const struct nft_object_ops nft_connlimit_obj_ops = {
+ .type = &nft_connlimit_obj_type,
+ .size = sizeof(struct nft_connlimit),
+ .eval = nft_connlimit_obj_eval,
+ .init = nft_connlimit_obj_init,
+ .destroy = nft_connlimit_obj_destroy,
+ .dump = nft_connlimit_obj_dump,
+};
+
+static struct nft_object_type nft_connlimit_obj_type __read_mostly = {
+ .type = NFT_OBJECT_CONNLIMIT,
+ .ops = &nft_connlimit_obj_ops,
+ .maxattr = NFTA_CONNLIMIT_MAX,
+ .policy = nft_connlimit_policy,
+ .owner = THIS_MODULE,
+};
+
+static void nft_connlimit_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_connlimit *priv = nft_expr_priv(expr);
+
+ nft_connlimit_do_eval(priv, regs, pkt, NULL);
+}
+
+static int nft_connlimit_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_connlimit *priv = nft_expr_priv(expr);
+
+ return nft_connlimit_do_dump(skb, priv);
+}
+
+static int nft_connlimit_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_connlimit *priv = nft_expr_priv(expr);
+
+ return nft_connlimit_do_init(ctx, tb, priv);
+}
+
+static void nft_connlimit_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_connlimit *priv = nft_expr_priv(expr);
+
+ nft_connlimit_do_destroy(ctx, priv);
+}
+
+static int nft_connlimit_clone(struct nft_expr *dst, const struct nft_expr *src)
+{
+ struct nft_connlimit *priv_dst = nft_expr_priv(dst);
+ struct nft_connlimit *priv_src = nft_expr_priv(src);
+
+ spin_lock_init(&priv_dst->lock);
+ INIT_HLIST_HEAD(&priv_dst->hhead);
+ priv_dst->limit = priv_src->limit;
+ priv_dst->invert = priv_src->invert;
+
+ return 0;
+}
+
+static void nft_connlimit_destroy_clone(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_connlimit *priv = nft_expr_priv(expr);
+
+ nf_conncount_cache_free(&priv->hhead);
+}
+
+static bool nft_connlimit_gc(struct net *net, const struct nft_expr *expr)
+{
+ struct nft_connlimit *priv = nft_expr_priv(expr);
+ bool addit, ret;
+
+ spin_lock_bh(&priv->lock);
+ nf_conncount_lookup(net, &priv->hhead, NULL, &nf_ct_zone_dflt, &addit);
+
+ ret = hlist_empty(&priv->hhead);
+ spin_unlock_bh(&priv->lock);
+
+ return ret;
+}
+
+static struct nft_expr_type nft_connlimit_type;
+static const struct nft_expr_ops nft_connlimit_ops = {
+ .type = &nft_connlimit_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_connlimit)),
+ .eval = nft_connlimit_eval,
+ .init = nft_connlimit_init,
+ .destroy = nft_connlimit_destroy,
+ .clone = nft_connlimit_clone,
+ .destroy_clone = nft_connlimit_destroy_clone,
+ .dump = nft_connlimit_dump,
+ .gc = nft_connlimit_gc,
+};
+
+static struct nft_expr_type nft_connlimit_type __read_mostly = {
+ .name = "connlimit",
+ .ops = &nft_connlimit_ops,
+ .policy = nft_connlimit_policy,
+ .maxattr = NFTA_CONNLIMIT_MAX,
+ .flags = NFT_EXPR_STATEFUL | NFT_EXPR_GC,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_connlimit_module_init(void)
+{
+ int err;
+
+ err = nft_register_obj(&nft_connlimit_obj_type);
+ if (err < 0)
+ return err;
+
+ err = nft_register_expr(&nft_connlimit_type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+err1:
+ nft_unregister_obj(&nft_connlimit_obj_type);
+ return err;
+}
+
+static void __exit nft_connlimit_module_exit(void)
+{
+ nft_unregister_expr(&nft_connlimit_type);
+ nft_unregister_obj(&nft_connlimit_obj_type);
+}
+
+module_init(nft_connlimit_module_init);
+module_exit(nft_connlimit_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso");
+MODULE_ALIAS_NFT_EXPR("connlimit");
+MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_CONNLIMIT);
diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c
index eefe3b409925..a61d7edfc290 100644
--- a/net/netfilter/nft_counter.c
+++ b/net/netfilter/nft_counter.c
@@ -96,7 +96,8 @@ static void nft_counter_do_destroy(struct nft_counter_percpu_priv *priv)
free_percpu(priv->counter);
}
-static void nft_counter_obj_destroy(struct nft_object *obj)
+static void nft_counter_obj_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
{
struct nft_counter_percpu_priv *priv = nft_obj_data(obj);
@@ -257,6 +258,7 @@ static const struct nft_expr_ops nft_counter_ops = {
.eval = nft_counter_eval,
.init = nft_counter_init,
.destroy = nft_counter_destroy,
+ .destroy_clone = nft_counter_destroy,
.dump = nft_counter_dump,
.clone = nft_counter_clone,
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index ea737fd789e8..1435ffc5f57e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -826,7 +826,8 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx,
return 0;
}
-static void nft_ct_helper_obj_destroy(struct nft_object *obj)
+static void nft_ct_helper_obj_destroy(const struct nft_ctx *ctx,
+ struct nft_object *obj)
{
struct nft_ct_helper_obj *priv = nft_obj_data(obj);
@@ -880,22 +881,26 @@ static int nft_ct_helper_obj_dump(struct sk_buff *skb,
struct nft_object *obj, bool reset)
{
const struct nft_ct_helper_obj *priv = nft_obj_data(obj);
- const struct nf_conntrack_helper *helper = priv->helper4;
+ const struct nf_conntrack_helper *helper;
u16 family;
+ if (priv->helper4 && priv->helper6) {
+ family = NFPROTO_INET;
+ helper = priv->helper4;
+ } else if (priv->helper6) {
+ family = NFPROTO_IPV6;
+ helper = priv->helper6;
+ } else {
+ family = NFPROTO_IPV4;
+ helper = priv->helper4;
+ }
+
if (nla_put_string(skb, NFTA_CT_HELPER_NAME, helper->name))
return -1;
if (nla_put_u8(skb, NFTA_CT_HELPER_L4PROTO, priv->l4proto))
return -1;
- if (priv->helper4 && priv->helper6)
- family = NFPROTO_INET;
- else if (priv->helper6)
- family = NFPROTO_IPV6;
- else
- family = NFPROTO_IPV4;
-
if (nla_put_be16(skb, NFTA_CT_HELPER_L3PROTO, htons(family)))
return -1;
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index b07a3fd9eeea..4d49529cff61 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -195,6 +195,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
err = -EOPNOTSUPP;
if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL))
goto err1;
+
+ if (priv->expr->ops->type->flags & NFT_EXPR_GC) {
+ if (set->flags & NFT_SET_TIMEOUT)
+ goto err1;
+ if (!set->ops->gc_init)
+ goto err1;
+ set->ops->gc_init(set);
+ }
+
} else if (set->flags & NFT_SET_EVAL)
return -EINVAL;
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index ce13a50b9189..8abb9891cdf2 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -12,8 +12,12 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_dup_netdev.h>
+#include <net/neighbour.h>
+#include <net/ip.h>
struct nft_fwd_netdev {
enum nft_registers sreg_dev:8;
@@ -32,6 +36,8 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = {
[NFTA_FWD_SREG_DEV] = { .type = NLA_U32 },
+ [NFTA_FWD_SREG_ADDR] = { .type = NLA_U32 },
+ [NFTA_FWD_NFPROTO] = { .type = NLA_U32 },
};
static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
@@ -62,7 +68,133 @@ nla_put_failure:
return -1;
}
+struct nft_fwd_neigh {
+ enum nft_registers sreg_dev:8;
+ enum nft_registers sreg_addr:8;
+ u8 nfproto;
+};
+
+static void nft_fwd_neigh_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_fwd_neigh *priv = nft_expr_priv(expr);
+ void *addr = &regs->data[priv->sreg_addr];
+ int oif = regs->data[priv->sreg_dev];
+ unsigned int verdict = NF_STOLEN;
+ struct sk_buff *skb = pkt->skb;
+ struct net_device *dev;
+ int neigh_table;
+
+ switch (priv->nfproto) {
+ case NFPROTO_IPV4: {
+ struct iphdr *iph;
+
+ if (skb->protocol != htons(ETH_P_IP)) {
+ verdict = NFT_BREAK;
+ goto out;
+ }
+ if (skb_try_make_writable(skb, sizeof(*iph))) {
+ verdict = NF_DROP;
+ goto out;
+ }
+ iph = ip_hdr(skb);
+ ip_decrease_ttl(iph);
+ neigh_table = NEIGH_ARP_TABLE;
+ break;
+ }
+ case NFPROTO_IPV6: {
+ struct ipv6hdr *ip6h;
+
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ verdict = NFT_BREAK;
+ goto out;
+ }
+ if (skb_try_make_writable(skb, sizeof(*ip6h))) {
+ verdict = NF_DROP;
+ goto out;
+ }
+ ip6h = ipv6_hdr(skb);
+ ip6h->hop_limit--;
+ neigh_table = NEIGH_ND_TABLE;
+ break;
+ }
+ default:
+ verdict = NFT_BREAK;
+ goto out;
+ }
+
+ dev = dev_get_by_index_rcu(nft_net(pkt), oif);
+ if (dev == NULL)
+ return;
+
+ skb->dev = dev;
+ neigh_xmit(neigh_table, dev, addr, skb);
+out:
+ regs->verdict.code = verdict;
+}
+
+static int nft_fwd_neigh_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_fwd_neigh *priv = nft_expr_priv(expr);
+ unsigned int addr_len;
+ int err;
+
+ if (!tb[NFTA_FWD_SREG_DEV] ||
+ !tb[NFTA_FWD_SREG_ADDR] ||
+ !tb[NFTA_FWD_NFPROTO])
+ return -EINVAL;
+
+ priv->sreg_dev = nft_parse_register(tb[NFTA_FWD_SREG_DEV]);
+ priv->sreg_addr = nft_parse_register(tb[NFTA_FWD_SREG_ADDR]);
+ priv->nfproto = ntohl(nla_get_be32(tb[NFTA_FWD_NFPROTO]));
+
+ switch (priv->nfproto) {
+ case NFPROTO_IPV4:
+ addr_len = sizeof(struct in_addr);
+ break;
+ case NFPROTO_IPV6:
+ addr_len = sizeof(struct in6_addr);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ err = nft_validate_register_load(priv->sreg_dev, sizeof(int));
+ if (err < 0)
+ return err;
+
+ return nft_validate_register_load(priv->sreg_addr, addr_len);
+}
+
+static const struct nft_expr_ops nft_fwd_netdev_ingress_ops;
+
+static int nft_fwd_neigh_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ struct nft_fwd_neigh *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_FWD_SREG_DEV, priv->sreg_dev) ||
+ nft_dump_register(skb, NFTA_FWD_SREG_ADDR, priv->sreg_addr) ||
+ nla_put_be32(skb, NFTA_FWD_NFPROTO, htonl(priv->nfproto)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
static struct nft_expr_type nft_fwd_netdev_type;
+static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = {
+ .type = &nft_fwd_netdev_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_neigh)),
+ .eval = nft_fwd_neigh_eval,
+ .init = nft_fwd_neigh_init,
+ .dump = nft_fwd_neigh_dump,
+};
+
static const struct nft_expr_ops nft_fwd_netdev_ops = {
.type = &nft_fwd_netdev_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_fwd_netdev)),
@@ -71,10 +203,22 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = {
.dump = nft_fwd_netdev_dump,
};
+static const struct nft_expr_ops *
+nft_fwd_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (tb[NFTA_FWD_SREG_ADDR])
+ return &nft_fwd_neigh_netdev_ops;
+ if (tb[NFTA_FWD_SREG_DEV])
+ return &nft_fwd_netdev_ops;
+
+ return ERR_PTR(-EOPNOTSUPP);
+}
+
static struct nft_expr_type nft_fwd_netdev_type __read_mostly = {
.family = NFPROTO_NETDEV,
.name = "fwd",
- .ops = &nft_fwd_netdev_ops,
+ .select_ops = nft_fwd_select_ops,
.policy = nft_fwd_netdev_policy,
.maxattr = NFTA_FWD_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index f0fc21f88775..c2d237144f74 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -177,10 +177,7 @@ static int nft_jhash_map_init(const struct nft_ctx *ctx,
priv->map = nft_set_lookup_global(ctx->net, ctx->table,
tb[NFTA_HASH_SET_NAME],
tb[NFTA_HASH_SET_ID], genmask);
- if (IS_ERR(priv->map))
- return PTR_ERR(priv->map);
-
- return 0;
+ return PTR_ERR_OR_ZERO(priv->map);
}
static int nft_symhash_init(const struct nft_ctx *ctx,
@@ -220,10 +217,7 @@ static int nft_symhash_map_init(const struct nft_ctx *ctx,
priv->map = nft_set_lookup_global(ctx->net, ctx->table,
tb[NFTA_HASH_SET_NAME],
tb[NFTA_HASH_SET_ID], genmask);
- if (IS_ERR(priv->map))
- return PTR_ERR(priv->map);
-
- return 0;
+ return PTR_ERR_OR_ZERO(priv->map);
}
static int nft_jhash_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index aa87ff8beae8..15adf8ca82c3 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -17,12 +17,6 @@
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
-struct nft_immediate_expr {
- struct nft_data data;
- enum nft_registers dreg:8;
- u8 dlen;
-};
-
static void nft_immediate_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -101,12 +95,27 @@ nla_put_failure:
static int nft_immediate_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
- const struct nft_data **data)
+ const struct nft_data **d)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ const struct nft_data *data;
+ int err;
- if (priv->dreg == NFT_REG_VERDICT)
- *data = &priv->data;
+ if (priv->dreg != NFT_REG_VERDICT)
+ return 0;
+
+ data = &priv->data;
+
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ err = nft_chain_validate(ctx, data->verdict.chain);
+ if (err < 0)
+ return err;
+ break;
+ default:
+ break;
+ }
return 0;
}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index a9fc298ef4c3..72f13a1144dd 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -51,10 +51,13 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
return !limit->invert;
}
+/* Use same default as in iptables. */
+#define NFT_LIMIT_PKT_BURST_DEFAULT 5
+
static int nft_limit_init(struct nft_limit *limit,
- const struct nlattr * const tb[])
+ const struct nlattr * const tb[], bool pkts)
{
- u64 unit;
+ u64 unit, tokens;
if (tb[NFTA_LIMIT_RATE] == NULL ||
tb[NFTA_LIMIT_UNIT] == NULL)
@@ -68,18 +71,25 @@ static int nft_limit_init(struct nft_limit *limit,
if (tb[NFTA_LIMIT_BURST])
limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
- else
- limit->burst = 0;
+
+ if (pkts && limit->burst == 0)
+ limit->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
if (limit->rate + limit->burst < limit->rate)
return -EOVERFLOW;
- /* The token bucket size limits the number of tokens can be
- * accumulated. tokens_max specifies the bucket size.
- * tokens_max = unit * (rate + burst) / rate.
- */
- limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst),
- limit->rate);
+ if (pkts) {
+ tokens = div_u64(limit->nsecs, limit->rate) * limit->burst;
+ } else {
+ /* The token bucket size limits the number of tokens can be
+ * accumulated. tokens_max specifies the bucket size.
+ * tokens_max = unit * (rate + burst) / rate.
+ */
+ tokens = div_u64(limit->nsecs * (limit->rate + limit->burst),
+ limit->rate);
+ }
+
+ limit->tokens = tokens;
limit->tokens_max = limit->tokens;
if (tb[NFTA_LIMIT_FLAGS]) {
@@ -144,7 +154,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx,
struct nft_limit_pkts *priv = nft_expr_priv(expr);
int err;
- err = nft_limit_init(&priv->limit, tb);
+ err = nft_limit_init(&priv->limit, tb, true);
if (err < 0)
return err;
@@ -185,7 +195,7 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx,
{
struct nft_limit *priv = nft_expr_priv(expr);
- return nft_limit_init(priv, tb);
+ return nft_limit_init(priv, tb, false);
}
static int nft_limit_bytes_dump(struct sk_buff *skb,
@@ -246,7 +256,7 @@ static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
struct nft_limit_pkts *priv = nft_obj_data(obj);
int err;
- err = nft_limit_init(&priv->limit, tb);
+ err = nft_limit_init(&priv->limit, tb, true);
if (err < 0)
return err;
@@ -289,7 +299,7 @@ static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
{
struct nft_limit *priv = nft_obj_data(obj);
- return nft_limit_init(priv, tb);
+ return nft_limit_init(priv, tb, false);
}
static int nft_limit_obj_bytes_dump(struct sk_buff *skb,
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index a27be36dc0af..7eef1cffbf1b 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -9,12 +9,15 @@
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*/
+#include <linux/audit.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <net/ipv6.h>
+#include <net/ip.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
#include <linux/netdevice.h>
@@ -26,12 +29,93 @@ struct nft_log {
char *prefix;
};
+static bool audit_ip4(struct audit_buffer *ab, struct sk_buff *skb)
+{
+ struct iphdr _iph;
+ const struct iphdr *ih;
+
+ ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_iph), &_iph);
+ if (!ih)
+ return false;
+
+ audit_log_format(ab, " saddr=%pI4 daddr=%pI4 proto=%hhu",
+ &ih->saddr, &ih->daddr, ih->protocol);
+
+ return true;
+}
+
+static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
+{
+ struct ipv6hdr _ip6h;
+ const struct ipv6hdr *ih;
+ u8 nexthdr;
+ __be16 frag_off;
+
+ ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h);
+ if (!ih)
+ return false;
+
+ nexthdr = ih->nexthdr;
+ ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), &nexthdr, &frag_off);
+
+ audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",
+ &ih->saddr, &ih->daddr, nexthdr);
+
+ return true;
+}
+
+static void nft_log_eval_audit(const struct nft_pktinfo *pkt)
+{
+ struct sk_buff *skb = pkt->skb;
+ struct audit_buffer *ab;
+ int fam = -1;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+ if (!ab)
+ return;
+
+ audit_log_format(ab, "mark=%#x", skb->mark);
+
+ switch (nft_pf(pkt)) {
+ case NFPROTO_BRIDGE:
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1;
+ break;
+ case htons(ETH_P_IPV6):
+ fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1;
+ break;
+ }
+ break;
+ case NFPROTO_IPV4:
+ fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1;
+ break;
+ case NFPROTO_IPV6:
+ fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1;
+ break;
+ }
+
+ if (fam == -1)
+ audit_log_format(ab, " saddr=? daddr=? proto=-1");
+
+ audit_log_end(ab);
+}
+
static void nft_log_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
const struct nft_log *priv = nft_expr_priv(expr);
+ if (priv->loginfo.type == NF_LOG_TYPE_LOG &&
+ priv->loginfo.u.log.level == LOGLEVEL_AUDIT) {
+ nft_log_eval_audit(pkt);
+ return;
+ }
+
nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb,
nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s",
priv->prefix);
@@ -84,7 +168,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
} else {
li->u.log.level = LOGLEVEL_WARNING;
}
- if (li->u.log.level > LOGLEVEL_DEBUG) {
+ if (li->u.log.level > LOGLEVEL_AUDIT) {
err = -EINVAL;
goto err1;
}
@@ -112,6 +196,9 @@ static int nft_log_init(const struct nft_ctx *ctx,
break;
}
+ if (li->u.log.level == LOGLEVEL_AUDIT)
+ return 0;
+
err = nf_logger_find_get(ctx->family, li->type);
if (err < 0)
goto err1;
@@ -133,6 +220,9 @@ static void nft_log_destroy(const struct nft_ctx *ctx,
if (priv->prefix != nft_log_null_prefix)
kfree(priv->prefix);
+ if (li->u.log.level == LOGLEVEL_AUDIT)
+ return;
+
nf_logger_put(ctx->family, li->type);
}
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index f52da5e2199f..42e6fadf1417 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -149,6 +149,52 @@ nla_put_failure:
return -1;
}
+static int nft_lookup_validate_setelem(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_data *data;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
+ *nft_set_ext_flags(ext) & NFT_SET_ELEM_INTERVAL_END)
+ return 0;
+
+ data = nft_set_ext_data(ext);
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ return nft_chain_validate(ctx, data->verdict.chain);
+ default:
+ return 0;
+ }
+}
+
+static int nft_lookup_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **d)
+{
+ const struct nft_lookup *priv = nft_expr_priv(expr);
+ struct nft_set_iter iter;
+
+ if (!(priv->set->flags & NFT_SET_MAP) ||
+ priv->set->dtype != NFT_DATA_VERDICT)
+ return 0;
+
+ iter.genmask = nft_genmask_next(ctx->net);
+ iter.skip = 0;
+ iter.count = 0;
+ iter.err = 0;
+ iter.fn = nft_lookup_validate_setelem;
+
+ priv->set->ops->walk(ctx, priv->set, &iter);
+ if (iter.err < 0)
+ return iter.err;
+
+ return 0;
+}
+
static const struct nft_expr_ops nft_lookup_ops = {
.type = &nft_lookup_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)),
@@ -156,6 +202,7 @@ static const struct nft_expr_ops nft_lookup_ops = {
.init = nft_lookup_init,
.destroy = nft_lookup_destroy,
.dump = nft_lookup_dump,
+ .validate = nft_lookup_validate,
};
struct nft_expr_type nft_lookup_type __read_mostly = {
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 5348bd058c88..1105a23bda5e 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -259,7 +259,7 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
struct sk_buff *skb = pkt->skb;
u32 *sreg = &regs->data[meta->sreg];
u32 value = *sreg;
- u8 pkt_type;
+ u8 value8;
switch (meta->key) {
case NFT_META_MARK:
@@ -269,15 +269,17 @@ static void nft_meta_set_eval(const struct nft_expr *expr,
skb->priority = value;
break;
case NFT_META_PKTTYPE:
- pkt_type = nft_reg_load8(sreg);
+ value8 = nft_reg_load8(sreg);
- if (skb->pkt_type != pkt_type &&
- skb_pkt_type_ok(pkt_type) &&
+ if (skb->pkt_type != value8 &&
+ skb_pkt_type_ok(value8) &&
skb_pkt_type_ok(skb->pkt_type))
- skb->pkt_type = pkt_type;
+ skb->pkt_type = value8;
break;
case NFT_META_NFTRACE:
- skb->nf_trace = !!value;
+ value8 = nft_reg_load8(sreg);
+
+ skb->nf_trace = !!value8;
break;
default:
WARN_ON(1);
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index cdbc62a53933..1f4d0854cf70 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -114,10 +114,7 @@ static int nft_ng_inc_map_init(const struct nft_ctx *ctx,
tb[NFTA_NG_SET_NAME],
tb[NFTA_NG_SET_ID], genmask);
- if (IS_ERR(priv->map))
- return PTR_ERR(priv->map);
-
- return 0;
+ return PTR_ERR_OR_ZERO(priv->map);
}
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index dbf1f4ad077c..6f9a1365a09f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -311,8 +311,16 @@ static void nft_rhash_gc(struct work_struct *work)
continue;
}
+ if (nft_set_ext_exists(&he->ext, NFT_SET_EXT_EXPR)) {
+ struct nft_expr *expr = nft_set_ext_expr(&he->ext);
+
+ if (expr->ops->gc &&
+ expr->ops->gc(read_pnet(&set->net), expr))
+ goto gc;
+ }
if (!nft_set_elem_expired(&he->ext))
continue;
+gc:
if (nft_set_elem_mark_busy(&he->ext))
continue;
@@ -339,6 +347,14 @@ static unsigned int nft_rhash_privsize(const struct nlattr * const nla[],
return sizeof(struct nft_rhash);
}
+static void nft_rhash_gc_init(const struct nft_set *set)
+{
+ struct nft_rhash *priv = nft_set_priv(set);
+
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+}
+
static int nft_rhash_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const tb[])
@@ -356,8 +372,8 @@ static int nft_rhash_init(const struct nft_set *set,
INIT_DEFERRABLE_WORK(&priv->gc_work, nft_rhash_gc);
if (set->flags & NFT_SET_TIMEOUT)
- queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
- nft_set_gc_interval(set));
+ nft_rhash_gc_init(set);
+
return 0;
}
@@ -647,6 +663,7 @@ static struct nft_set_type nft_rhash_type __read_mostly = {
.elemsize = offsetof(struct nft_rhash_elem, ext),
.estimate = nft_rhash_estimate,
.init = nft_rhash_init,
+ .gc_init = nft_rhash_gc_init,
.destroy = nft_rhash_destroy,
.insert = nft_rhash_insert,
.activate = nft_rhash_activate,
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
new file mode 100644
index 000000000000..f28a0b944087
--- /dev/null
+++ b/net/netfilter/nft_socket.c
@@ -0,0 +1,144 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/module.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_socket.h>
+#include <net/inet_sock.h>
+#include <net/tcp.h>
+
+struct nft_socket {
+ enum nft_socket_keys key:8;
+ union {
+ enum nft_registers dreg:8;
+ };
+};
+
+static void nft_socket_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_socket *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ struct sock *sk = skb->sk;
+ u32 *dest = &regs->data[priv->dreg];
+
+ if (!sk)
+ switch(nft_pf(pkt)) {
+ case NFPROTO_IPV4:
+ sk = nf_sk_lookup_slow_v4(nft_net(pkt), skb, nft_in(pkt));
+ break;
+#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
+ case NFPROTO_IPV6:
+ sk = nf_sk_lookup_slow_v6(nft_net(pkt), skb, nft_in(pkt));
+ break;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
+ if(!sk) {
+ nft_reg_store8(dest, 0);
+ return;
+ }
+
+ /* So that subsequent socket matching not to require other lookups. */
+ skb->sk = sk;
+
+ switch(priv->key) {
+ case NFT_SOCKET_TRANSPARENT:
+ nft_reg_store8(dest, inet_sk_transparent(sk));
+ break;
+ default:
+ WARN_ON(1);
+ regs->verdict.code = NFT_BREAK;
+ }
+}
+
+static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = {
+ [NFTA_SOCKET_KEY] = { .type = NLA_U32 },
+ [NFTA_SOCKET_DREG] = { .type = NLA_U32 },
+};
+
+static int nft_socket_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_socket *priv = nft_expr_priv(expr);
+ unsigned int len;
+
+ if (!tb[NFTA_SOCKET_DREG] || !tb[NFTA_SOCKET_KEY])
+ return -EINVAL;
+
+ switch(ctx->family) {
+ case NFPROTO_IPV4:
+#if IS_ENABLED(CONFIG_NF_SOCKET_IPV6)
+ case NFPROTO_IPV6:
+#endif
+ case NFPROTO_INET:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ priv->key = ntohl(nla_get_u32(tb[NFTA_SOCKET_KEY]));
+ switch(priv->key) {
+ case NFT_SOCKET_TRANSPARENT:
+ len = sizeof(u8);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ priv->dreg = nft_parse_register(tb[NFTA_SOCKET_DREG]);
+ return nft_validate_register_store(ctx, priv->dreg, NULL,
+ NFT_DATA_VALUE, len);
+}
+
+static int nft_socket_dump(struct sk_buff *skb,
+ const struct nft_expr *expr)
+{
+ const struct nft_socket *priv = nft_expr_priv(expr);
+
+ if (nla_put_u32(skb, NFTA_SOCKET_KEY, htonl(priv->key)))
+ return -1;
+ if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
+ return -1;
+ return 0;
+}
+
+static struct nft_expr_type nft_socket_type;
+static const struct nft_expr_ops nft_socket_ops = {
+ .type = &nft_socket_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_socket)),
+ .eval = nft_socket_eval,
+ .init = nft_socket_init,
+ .dump = nft_socket_dump,
+};
+
+static struct nft_expr_type nft_socket_type __read_mostly = {
+ .name = "socket",
+ .ops = &nft_socket_ops,
+ .policy = nft_socket_policy,
+ .maxattr = NFTA_SOCKET_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_socket_module_init(void)
+{
+ return nft_register_expr(&nft_socket_type);
+}
+
+static void __exit nft_socket_module_exit(void)
+{
+ nft_unregister_expr(&nft_socket_type);
+}
+
+module_init(nft_socket_module_init);
+module_exit(nft_socket_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Máté Eckl");
+MODULE_DESCRIPTION("nf_tables socket match module");
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 8c89323c06af..58fce4e749a9 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -33,264 +33,9 @@
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
#endif
+#include <net/netfilter/nf_tproxy.h>
#include <linux/netfilter/xt_TPROXY.h>
-enum nf_tproxy_lookup_t {
- NFT_LOOKUP_LISTENER,
- NFT_LOOKUP_ESTABLISHED,
-};
-
-static bool tproxy_sk_is_transparent(struct sock *sk)
-{
- switch (sk->sk_state) {
- case TCP_TIME_WAIT:
- if (inet_twsk(sk)->tw_transparent)
- return true;
- break;
- case TCP_NEW_SYN_RECV:
- if (inet_rsk(inet_reqsk(sk))->no_srccheck)
- return true;
- break;
- default:
- if (inet_sk(sk)->transparent)
- return true;
- }
-
- sock_gen_put(sk);
- return false;
-}
-
-static inline __be32
-tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr)
-{
- struct in_device *indev;
- __be32 laddr;
-
- if (user_laddr)
- return user_laddr;
-
- laddr = 0;
- indev = __in_dev_get_rcu(skb->dev);
- for_primary_ifa(indev) {
- laddr = ifa->ifa_local;
- break;
- } endfor_ifa(indev);
-
- return laddr ? laddr : daddr;
-}
-
-/*
- * This is used when the user wants to intercept a connection matching
- * an explicit iptables rule. In this case the sockets are assumed
- * matching in preference order:
- *
- * - match: if there's a fully established connection matching the
- * _packet_ tuple, it is returned, assuming the redirection
- * already took place and we process a packet belonging to an
- * established connection
- *
- * - match: if there's a listening socket matching the redirection
- * (e.g. on-port & on-ip of the connection), it is returned,
- * regardless if it was bound to 0.0.0.0 or an explicit
- * address. The reasoning is that if there's an explicit rule, it
- * does not really matter if the listener is bound to an interface
- * or to 0. The user already stated that he wants redirection
- * (since he added the rule).
- *
- * Please note that there's an overlap between what a TPROXY target
- * and a socket match will match. Normally if you have both rules the
- * "socket" match will be the first one, effectively all packets
- * belonging to established connections going through that one.
- */
-static inline struct sock *
-nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp,
- const u8 protocol,
- const __be32 saddr, const __be32 daddr,
- const __be16 sport, const __be16 dport,
- const struct net_device *in,
- const enum nf_tproxy_lookup_t lookup_type)
-{
- struct sock *sk;
- struct tcphdr *tcph;
-
- switch (protocol) {
- case IPPROTO_TCP:
- switch (lookup_type) {
- case NFT_LOOKUP_LISTENER:
- tcph = hp;
- sk = inet_lookup_listener(net, &tcp_hashinfo, skb,
- ip_hdrlen(skb) +
- __tcp_hdrlen(tcph),
- saddr, sport,
- daddr, dport,
- in->ifindex, 0);
-
- if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
- sk = NULL;
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too
- */
- break;
- case NFT_LOOKUP_ESTABLISHED:
- sk = inet_lookup_established(net, &tcp_hashinfo,
- saddr, sport, daddr, dport,
- in->ifindex);
- break;
- default:
- BUG();
- }
- break;
- case IPPROTO_UDP:
- sk = udp4_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
- if (sk) {
- int connected = (sk->sk_state == TCP_ESTABLISHED);
- int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0);
-
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too
- */
- if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
- (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
- sock_put(sk);
- sk = NULL;
- }
- }
- break;
- default:
- WARN_ON(1);
- sk = NULL;
- }
-
- pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n",
- protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk);
-
- return sk;
-}
-
-#ifdef XT_TPROXY_HAVE_IPV6
-static inline struct sock *
-nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp,
- const u8 protocol,
- const struct in6_addr *saddr, const struct in6_addr *daddr,
- const __be16 sport, const __be16 dport,
- const struct net_device *in,
- const enum nf_tproxy_lookup_t lookup_type)
-{
- struct sock *sk;
- struct tcphdr *tcph;
-
- switch (protocol) {
- case IPPROTO_TCP:
- switch (lookup_type) {
- case NFT_LOOKUP_LISTENER:
- tcph = hp;
- sk = inet6_lookup_listener(net, &tcp_hashinfo, skb,
- thoff + __tcp_hdrlen(tcph),
- saddr, sport,
- daddr, ntohs(dport),
- in->ifindex, 0);
-
- if (sk && !refcount_inc_not_zero(&sk->sk_refcnt))
- sk = NULL;
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too
- */
- break;
- case NFT_LOOKUP_ESTABLISHED:
- sk = __inet6_lookup_established(net, &tcp_hashinfo,
- saddr, sport, daddr, ntohs(dport),
- in->ifindex, 0);
- break;
- default:
- BUG();
- }
- break;
- case IPPROTO_UDP:
- sk = udp6_lib_lookup(net, saddr, sport, daddr, dport,
- in->ifindex);
- if (sk) {
- int connected = (sk->sk_state == TCP_ESTABLISHED);
- int wildcard = ipv6_addr_any(&sk->sk_v6_rcv_saddr);
-
- /* NOTE: we return listeners even if bound to
- * 0.0.0.0, those are filtered out in
- * xt_socket, since xt_TPROXY needs 0 bound
- * listeners too
- */
- if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) ||
- (lookup_type == NFT_LOOKUP_LISTENER && connected)) {
- sock_put(sk);
- sk = NULL;
- }
- }
- break;
- default:
- WARN_ON(1);
- sk = NULL;
- }
-
- pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n",
- protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk);
-
- return sk;
-}
-#endif
-
-/**
- * tproxy_handle_time_wait4 - handle IPv4 TCP TIME_WAIT reopen redirections
- * @skb: The skb being processed.
- * @laddr: IPv4 address to redirect to or zero.
- * @lport: TCP port to redirect to or zero.
- * @sk: The TIME_WAIT TCP socket found by the lookup.
- *
- * We have to handle SYN packets arriving to TIME_WAIT sockets
- * differently: instead of reopening the connection we should rather
- * redirect the new connection to the proxy if there's a listener
- * socket present.
- *
- * tproxy_handle_time_wait4() consumes the socket reference passed in.
- *
- * Returns the listener socket if there's one, the TIME_WAIT socket if
- * no such listener is found, or NULL if the TCP header is incomplete.
- */
-static struct sock *
-tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
- __be32 laddr, __be16 lport, struct sock *sk)
-{
- const struct iphdr *iph = ip_hdr(skb);
- struct tcphdr _hdr, *hp;
-
- hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr);
- if (hp == NULL) {
- inet_twsk_put(inet_twsk(sk));
- return NULL;
- }
-
- if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
- /* SYN to a TIME_WAIT socket, we'd rather redirect it
- * to a listener socket if there's one */
- struct sock *sk2;
-
- sk2 = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
- iph->saddr, laddr ? laddr : iph->daddr,
- hp->source, lport ? lport : hp->dest,
- skb->dev, NFT_LOOKUP_LISTENER);
- if (sk2) {
- inet_twsk_deschedule_put(inet_twsk(sk));
- sk = sk2;
- }
- }
-
- return sk;
-}
-
/* assign a socket to the skb -- consumes sk */
static void
nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
@@ -319,26 +64,26 @@ tproxy_tg4(struct net *net, struct sk_buff *skb, __be32 laddr, __be16 lport,
sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, iph->daddr,
hp->source, hp->dest,
- skb->dev, NFT_LOOKUP_ESTABLISHED);
+ skb->dev, NF_TPROXY_LOOKUP_ESTABLISHED);
- laddr = tproxy_laddr4(skb, laddr, iph->daddr);
+ laddr = nf_tproxy_laddr4(skb, laddr, iph->daddr);
if (!lport)
lport = hp->dest;
/* UDP has no TCP_TIME_WAIT state, so we never enter here */
if (sk && sk->sk_state == TCP_TIME_WAIT)
/* reopening a TIME_WAIT connection needs special handling */
- sk = tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
+ sk = nf_tproxy_handle_time_wait4(net, skb, laddr, lport, sk);
else if (!sk)
/* no, there's no established connection, check if
* there's a listener on the redirected addr/port */
sk = nf_tproxy_get_sock_v4(net, skb, hp, iph->protocol,
iph->saddr, laddr,
hp->source, lport,
- skb->dev, NFT_LOOKUP_LISTENER);
+ skb->dev, NF_TPROXY_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */
- if (sk && tproxy_sk_is_transparent(sk)) {
+ if (sk && nf_tproxy_sk_is_transparent(sk)) {
/* This should be in a separate target, but we don't do multiple
targets on the same rule yet */
skb->mark = (skb->mark & ~mark_mask) ^ mark_value;
@@ -377,87 +122,6 @@ tproxy_tg4_v1(struct sk_buff *skb, const struct xt_action_param *par)
#ifdef XT_TPROXY_HAVE_IPV6
-static inline const struct in6_addr *
-tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr,
- const struct in6_addr *daddr)
-{
- struct inet6_dev *indev;
- struct inet6_ifaddr *ifa;
- struct in6_addr *laddr;
-
- if (!ipv6_addr_any(user_laddr))
- return user_laddr;
- laddr = NULL;
-
- indev = __in6_dev_get(skb->dev);
- if (indev) {
- read_lock_bh(&indev->lock);
- list_for_each_entry(ifa, &indev->addr_list, if_list) {
- if (ifa->flags & (IFA_F_TENTATIVE | IFA_F_DEPRECATED))
- continue;
-
- laddr = &ifa->addr;
- break;
- }
- read_unlock_bh(&indev->lock);
- }
-
- return laddr ? laddr : daddr;
-}
-
-/**
- * tproxy_handle_time_wait6 - handle IPv6 TCP TIME_WAIT reopen redirections
- * @skb: The skb being processed.
- * @tproto: Transport protocol.
- * @thoff: Transport protocol header offset.
- * @par: Iptables target parameters.
- * @sk: The TIME_WAIT TCP socket found by the lookup.
- *
- * We have to handle SYN packets arriving to TIME_WAIT sockets
- * differently: instead of reopening the connection we should rather
- * redirect the new connection to the proxy if there's a listener
- * socket present.
- *
- * tproxy_handle_time_wait6() consumes the socket reference passed in.
- *
- * Returns the listener socket if there's one, the TIME_WAIT socket if
- * no such listener is found, or NULL if the TCP header is incomplete.
- */
-static struct sock *
-tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
- const struct xt_action_param *par,
- struct sock *sk)
-{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct tcphdr _hdr, *hp;
- const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
-
- hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
- if (hp == NULL) {
- inet_twsk_put(inet_twsk(sk));
- return NULL;
- }
-
- if (hp->syn && !hp->rst && !hp->ack && !hp->fin) {
- /* SYN to a TIME_WAIT socket, we'd rather redirect it
- * to a listener socket if there's one */
- struct sock *sk2;
-
- sk2 = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
- &iph->saddr,
- tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr),
- hp->source,
- tgi->lport ? tgi->lport : hp->dest,
- skb->dev, NFT_LOOKUP_LISTENER);
- if (sk2) {
- inet_twsk_deschedule_put(inet_twsk(sk));
- sk = sk2;
- }
- }
-
- return sk;
-}
-
static unsigned int
tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -489,25 +153,31 @@ tproxy_tg6_v1(struct sk_buff *skb, const struct xt_action_param *par)
sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp, tproto,
&iph->saddr, &iph->daddr,
hp->source, hp->dest,
- xt_in(par), NFT_LOOKUP_ESTABLISHED);
+ xt_in(par), NF_TPROXY_LOOKUP_ESTABLISHED);
- laddr = tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
+ laddr = nf_tproxy_laddr6(skb, &tgi->laddr.in6, &iph->daddr);
lport = tgi->lport ? tgi->lport : hp->dest;
/* UDP has no TCP_TIME_WAIT state, so we never enter here */
- if (sk && sk->sk_state == TCP_TIME_WAIT)
+ if (sk && sk->sk_state == TCP_TIME_WAIT) {
+ const struct xt_tproxy_target_info_v1 *tgi = par->targinfo;
/* reopening a TIME_WAIT connection needs special handling */
- sk = tproxy_handle_time_wait6(skb, tproto, thoff, par, sk);
+ sk = nf_tproxy_handle_time_wait6(skb, tproto, thoff,
+ xt_net(par),
+ &tgi->laddr.in6,
+ tgi->lport,
+ sk);
+ }
else if (!sk)
/* no there's no established connection, check if
* there's a listener on the redirected addr/port */
sk = nf_tproxy_get_sock_v6(xt_net(par), skb, thoff, hp,
tproto, &iph->saddr, laddr,
hp->source, lport,
- xt_in(par), NFT_LOOKUP_LISTENER);
+ xt_in(par), NF_TPROXY_LOOKUP_LISTENER);
/* NOTE: assign_sock consumes our sk reference */
- if (sk && tproxy_sk_is_transparent(sk)) {
+ if (sk && nf_tproxy_sk_is_transparent(sk)) {
/* This should be in a separate target, but we don't do multiple
targets on the same rule yet */
skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2ac7f674d19b..5c0779c4fa3c 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -73,7 +73,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
* if XT_SOCKET_TRANSPARENT is used
*/
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = nf_sk_is_transparent(sk);
+ transparent = inet_sk_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
@@ -130,7 +130,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
* if XT_SOCKET_TRANSPARENT is used
*/
if (info->flags & XT_SOCKET_TRANSPARENT)
- transparent = nf_sk_is_transparent(sk);
+ transparent = inet_sk_transparent(sk);
if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
transparent && sk_fullsock(sk))
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index f018eafc2a0d..376181cc1def 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -206,7 +206,6 @@ int nfc_genl_targets_found(struct nfc_dev *dev)
return genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -237,7 +236,6 @@ int nfc_genl_target_lost(struct nfc_dev *dev, u32 target_idx)
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -269,7 +267,6 @@ int nfc_genl_tm_activated(struct nfc_dev *dev, u32 protocol)
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -299,7 +296,6 @@ int nfc_genl_tm_deactivated(struct nfc_dev *dev)
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -340,7 +336,6 @@ int nfc_genl_device_added(struct nfc_dev *dev)
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -370,7 +365,6 @@ int nfc_genl_device_removed(struct nfc_dev *dev)
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -434,8 +428,6 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list)
return genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_ATOMIC);
nla_put_failure:
- genlmsg_cancel(msg, hdr);
-
free_msg:
nlmsg_free(msg);
@@ -470,7 +462,6 @@ int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type)
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -501,7 +492,6 @@ int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx)
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -546,7 +536,6 @@ int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx,
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
/* evt_transaction is no more used */
devm_kfree(&dev->dev, evt_transaction);
@@ -585,7 +574,6 @@ int nfc_genl_se_connectivity(struct nfc_dev *dev, u8 se_idx)
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -703,7 +691,6 @@ int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx,
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -735,7 +722,6 @@ int nfc_genl_dep_link_down_event(struct nfc_dev *dev)
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -1030,7 +1016,6 @@ static int nfc_genl_send_params(struct sk_buff *msg,
return 0;
nla_put_failure:
-
genlmsg_cancel(msg, hdr);
return -EMSGSIZE;
}
@@ -1290,7 +1275,6 @@ int nfc_genl_fw_download_done(struct nfc_dev *dev, const char *firmware_name,
return 0;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
return -EMSGSIZE;
@@ -1507,7 +1491,6 @@ static void se_io_cb(void *context, u8 *apdu, size_t apdu_len, int err)
return;
nla_put_failure:
- genlmsg_cancel(msg, hdr);
free_msg:
nlmsg_free(msg);
kfree(ctx);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b00aa959727d..082f981795f5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4250,7 +4250,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
goto out;
if (po->tp_version >= TPACKET_V3 &&
req->tp_block_size <=
- BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv))
+ BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + sizeof(struct tpacket3_hdr))
goto out;
if (unlikely(req->tp_frame_size < po->tp_hdrlen +
po->tp_reserve))
diff --git a/net/rds/Kconfig b/net/rds/Kconfig
index 1a31502ee7db..bffde4b46c5d 100644
--- a/net/rds/Kconfig
+++ b/net/rds/Kconfig
@@ -8,7 +8,7 @@ config RDS
config RDS_RDMA
tristate "RDS over Infiniband"
- depends on RDS && INFINIBAND_ADDR_TRANS
+ depends on RDS && INFINIBAND && INFINIBAND_ADDR_TRANS
---help---
Allow RDS to use Infiniband as a transport.
This transport supports RDMA operations.
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 19975d2ca9a2..b2393113a251 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -477,6 +477,7 @@ enum rxrpc_call_flag {
RXRPC_CALL_PINGING, /* Ping in process */
RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
RXRPC_CALL_BEGAN_RX_TIMER, /* We began the expect_rx_by timer */
+ RXRPC_CALL_RX_HEARD, /* The peer responded at least once to this call */
};
/*
@@ -624,6 +625,7 @@ struct rxrpc_call {
*/
rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */
rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */
+ rxrpc_serial_t rx_serial; /* Highest serial received for this call */
u8 rx_winsize; /* Size of Rx window */
u8 tx_winsize; /* Maximum size of Tx window */
bool tx_phase; /* T if transmission phase, F if receive phase */
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 6e0d788b4dc4..20210418904b 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -392,7 +392,13 @@ recheck_state:
/* Process events */
if (test_and_clear_bit(RXRPC_CALL_EV_EXPIRED, &call->events)) {
- rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
+ if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
+ (int)call->conn->hi_serial - (int)call->rx_serial > 0) {
+ trace_rxrpc_call_reset(call);
+ rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ECONNRESET);
+ } else {
+ rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME);
+ }
set_bit(RXRPC_CALL_EV_ABORT, &call->events);
goto recheck_state;
}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index b5fd6381313d..608d078a4981 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1278,8 +1278,14 @@ void rxrpc_data_ready(struct sock *udp_sk)
call = NULL;
}
- if (call && sp->hdr.serviceId != call->service_id)
- call->service_id = sp->hdr.serviceId;
+ if (call) {
+ if (sp->hdr.serviceId != call->service_id)
+ call->service_id = sp->hdr.serviceId;
+ if ((int)sp->hdr.serial - (int)call->rx_serial > 0)
+ call->rx_serial = sp->hdr.serial;
+ if (!test_bit(RXRPC_CALL_RX_HEARD, &call->flags))
+ set_bit(RXRPC_CALL_RX_HEARD, &call->flags);
+ }
} else {
skew = 0;
call = NULL;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 76303c45db19..29fb4d68a144 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -437,6 +437,78 @@ static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
return idr_find(&tn->idr, block_index);
}
+/* Find tcf block.
+ * Set q, parent, cl when appropriate.
+ */
+
+static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
+ u32 *parent, unsigned long *cl,
+ int ifindex, u32 block_index,
+ struct netlink_ext_ack *extack)
+{
+ struct tcf_block *block;
+
+ if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
+ block = tcf_block_lookup(net, block_index);
+ if (!block) {
+ NL_SET_ERR_MSG(extack, "Block of given index was not found");
+ return ERR_PTR(-EINVAL);
+ }
+ } else {
+ const struct Qdisc_class_ops *cops;
+ struct net_device *dev;
+
+ /* Find link */
+ dev = __dev_get_by_index(net, ifindex);
+ if (!dev)
+ return ERR_PTR(-ENODEV);
+
+ /* Find qdisc */
+ if (!*parent) {
+ *q = dev->qdisc;
+ *parent = (*q)->handle;
+ } else {
+ *q = qdisc_lookup(dev, TC_H_MAJ(*parent));
+ if (!*q) {
+ NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+ return ERR_PTR(-EINVAL);
+ }
+ }
+
+ /* Is it classful? */
+ cops = (*q)->ops->cl_ops;
+ if (!cops) {
+ NL_SET_ERR_MSG(extack, "Qdisc not classful");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!cops->tcf_block) {
+ NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+
+ /* Do we search for filter, attached to class? */
+ if (TC_H_MIN(*parent)) {
+ *cl = cops->find(*q, *parent);
+ if (*cl == 0) {
+ NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
+ return ERR_PTR(-ENOENT);
+ }
+ }
+
+ /* And the last stroke */
+ block = cops->tcf_block(*q, *cl, extack);
+ if (!block)
+ return ERR_PTR(-EINVAL);
+ if (tcf_block_shared(block)) {
+ NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ }
+
+ return block;
+}
+
static struct tcf_chain *tcf_block_chain_zero(struct tcf_block *block)
{
return list_first_entry(&block->chain_list, struct tcf_chain, list);
@@ -735,10 +807,6 @@ static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type,
int ok_count = 0;
int err;
- /* Make sure all netdevs sharing this block are offload-capable. */
- if (block->nooffloaddevcnt && err_stop)
- return -EOPNOTSUPP;
-
list_for_each_entry(block_cb, &block->cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err) {
@@ -984,9 +1052,7 @@ static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
q, parent, 0, event, false);
}
-/* Add/change/delete/get a filter node */
-
-static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
+static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
struct netlink_ext_ack *extack)
{
struct net *net = sock_net(skb->sk);
@@ -1007,8 +1073,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
int err;
int tp_created;
- if ((n->nlmsg_type != RTM_GETTFILTER) &&
- !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
return -EPERM;
replay:
@@ -1026,24 +1091,13 @@ replay:
cl = 0;
if (prio == 0) {
- switch (n->nlmsg_type) {
- case RTM_DELTFILTER:
- if (protocol || t->tcm_handle || tca[TCA_KIND]) {
- NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
- return -ENOENT;
- }
- break;
- case RTM_NEWTFILTER:
- /* If no priority is provided by the user,
- * we allocate one.
- */
- if (n->nlmsg_flags & NLM_F_CREATE) {
- prio = TC_H_MAKE(0x80000000U, 0U);
- prio_allocate = true;
- break;
- }
- /* fall-through */
- default:
+ /* If no priority is provided by the user,
+ * we allocate one.
+ */
+ if (n->nlmsg_flags & NLM_F_CREATE) {
+ prio = TC_H_MAKE(0x80000000U, 0U);
+ prio_allocate = true;
+ } else {
NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
return -ENOENT;
}
@@ -1051,66 +1105,11 @@ replay:
/* Find head of filter chain. */
- if (t->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
- block = tcf_block_lookup(net, t->tcm_block_index);
- if (!block) {
- NL_SET_ERR_MSG(extack, "Block of given index was not found");
- err = -EINVAL;
- goto errout;
- }
- } else {
- const struct Qdisc_class_ops *cops;
- struct net_device *dev;
-
- /* Find link */
- dev = __dev_get_by_index(net, t->tcm_ifindex);
- if (!dev)
- return -ENODEV;
-
- /* Find qdisc */
- if (!parent) {
- q = dev->qdisc;
- parent = q->handle;
- } else {
- q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent));
- if (!q) {
- NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
- return -EINVAL;
- }
- }
-
- /* Is it classful? */
- cops = q->ops->cl_ops;
- if (!cops) {
- NL_SET_ERR_MSG(extack, "Qdisc not classful");
- return -EINVAL;
- }
-
- if (!cops->tcf_block) {
- NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
- return -EOPNOTSUPP;
- }
-
- /* Do we search for filter, attached to class? */
- if (TC_H_MIN(parent)) {
- cl = cops->find(q, parent);
- if (cl == 0) {
- NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
- return -ENOENT;
- }
- }
-
- /* And the last stroke */
- block = cops->tcf_block(q, cl, extack);
- if (!block) {
- err = -EINVAL;
- goto errout;
- }
- if (tcf_block_shared(block)) {
- NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
- err = -EOPNOTSUPP;
- goto errout;
- }
+ block = tcf_block_find(net, &q, &parent, &cl,
+ t->tcm_ifindex, t->tcm_block_index, extack);
+ if (IS_ERR(block)) {
+ err = PTR_ERR(block);
+ goto errout;
}
chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
@@ -1119,19 +1118,10 @@ replay:
err = -EINVAL;
goto errout;
}
- chain = tcf_chain_get(block, chain_index,
- n->nlmsg_type == RTM_NEWTFILTER);
+ chain = tcf_chain_get(block, chain_index, true);
if (!chain) {
NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
- err = n->nlmsg_type == RTM_NEWTFILTER ? -ENOMEM : -EINVAL;
- goto errout;
- }
-
- if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) {
- tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
- tcf_chain_flush(chain);
- err = 0;
+ err = -ENOMEM;
goto errout;
}
@@ -1152,8 +1142,7 @@ replay:
goto errout;
}
- if (n->nlmsg_type != RTM_NEWTFILTER ||
- !(n->nlmsg_flags & NLM_F_CREATE)) {
+ if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
goto errout;
@@ -1178,56 +1167,15 @@ replay:
fh = tp->ops->get(tp, t->tcm_handle);
if (!fh) {
- if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
- tcf_chain_tp_remove(chain, &chain_info, tp);
- tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_DELTFILTER, false);
- tcf_proto_destroy(tp, extack);
- err = 0;
- goto errout;
- }
-
- if (n->nlmsg_type != RTM_NEWTFILTER ||
- !(n->nlmsg_flags & NLM_F_CREATE)) {
+ if (!(n->nlmsg_flags & NLM_F_CREATE)) {
NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
err = -ENOENT;
goto errout;
}
- } else {
- bool last;
-
- switch (n->nlmsg_type) {
- case RTM_NEWTFILTER:
- if (n->nlmsg_flags & NLM_F_EXCL) {
- if (tp_created)
- tcf_proto_destroy(tp, NULL);
- NL_SET_ERR_MSG(extack, "Filter already exists");
- err = -EEXIST;
- goto errout;
- }
- break;
- case RTM_DELTFILTER:
- err = tfilter_del_notify(net, skb, n, tp, block,
- q, parent, fh, false, &last,
- extack);
- if (err)
- goto errout;
- if (last) {
- tcf_chain_tp_remove(chain, &chain_info, tp);
- tcf_proto_destroy(tp, extack);
- }
- goto errout;
- case RTM_GETTFILTER:
- err = tfilter_notify(net, skb, n, tp, block, q, parent,
- fh, RTM_NEWTFILTER, true);
- if (err < 0)
- NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
- goto errout;
- default:
- NL_SET_ERR_MSG(extack, "Invalid netlink message type");
- err = -EINVAL;
- goto errout;
- }
+ } else if (n->nlmsg_flags & NLM_F_EXCL) {
+ NL_SET_ERR_MSG(extack, "Filter already exists");
+ err = -EEXIST;
+ goto errout;
}
err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
@@ -1252,6 +1200,202 @@ errout:
return err;
}
+static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tca[TCA_MAX + 1];
+ struct tcmsg *t;
+ u32 protocol;
+ u32 prio;
+ u32 parent;
+ u32 chain_index;
+ struct Qdisc *q = NULL;
+ struct tcf_chain_info chain_info;
+ struct tcf_chain *chain = NULL;
+ struct tcf_block *block;
+ struct tcf_proto *tp = NULL;
+ unsigned long cl = 0;
+ void *fh = NULL;
+ int err;
+
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+ if (err < 0)
+ return err;
+
+ t = nlmsg_data(n);
+ protocol = TC_H_MIN(t->tcm_info);
+ prio = TC_H_MAJ(t->tcm_info);
+ parent = t->tcm_parent;
+
+ if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
+ NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
+ return -ENOENT;
+ }
+
+ /* Find head of filter chain. */
+
+ block = tcf_block_find(net, &q, &parent, &cl,
+ t->tcm_ifindex, t->tcm_block_index, extack);
+ if (IS_ERR(block)) {
+ err = PTR_ERR(block);
+ goto errout;
+ }
+
+ chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ if (chain_index > TC_ACT_EXT_VAL_MASK) {
+ NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
+ err = -EINVAL;
+ goto errout;
+ }
+ chain = tcf_chain_get(block, chain_index, false);
+ if (!chain) {
+ NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (prio == 0) {
+ tfilter_notify_chain(net, skb, block, q, parent, n,
+ chain, RTM_DELTFILTER);
+ tcf_chain_flush(chain);
+ err = 0;
+ goto errout;
+ }
+
+ tp = tcf_chain_tp_find(chain, &chain_info, protocol,
+ prio, false);
+ if (!tp || IS_ERR(tp)) {
+ NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
+ err = tp ? PTR_ERR(tp) : -ENOENT;
+ goto errout;
+ } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+ NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ fh = tp->ops->get(tp, t->tcm_handle);
+
+ if (!fh) {
+ if (t->tcm_handle == 0) {
+ tcf_chain_tp_remove(chain, &chain_info, tp);
+ tfilter_notify(net, skb, n, tp, block, q, parent, fh,
+ RTM_DELTFILTER, false);
+ tcf_proto_destroy(tp, extack);
+ err = 0;
+ } else {
+ NL_SET_ERR_MSG(extack, "Specified filter handle not found");
+ err = -ENOENT;
+ }
+ } else {
+ bool last;
+
+ err = tfilter_del_notify(net, skb, n, tp, block,
+ q, parent, fh, false, &last,
+ extack);
+ if (err)
+ goto errout;
+ if (last) {
+ tcf_chain_tp_remove(chain, &chain_info, tp);
+ tcf_proto_destroy(tp, extack);
+ }
+ }
+
+errout:
+ if (chain)
+ tcf_chain_put(chain);
+ return err;
+}
+
+static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
+ struct netlink_ext_ack *extack)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tca[TCA_MAX + 1];
+ struct tcmsg *t;
+ u32 protocol;
+ u32 prio;
+ u32 parent;
+ u32 chain_index;
+ struct Qdisc *q = NULL;
+ struct tcf_chain_info chain_info;
+ struct tcf_chain *chain = NULL;
+ struct tcf_block *block;
+ struct tcf_proto *tp = NULL;
+ unsigned long cl = 0;
+ void *fh = NULL;
+ int err;
+
+ err = nlmsg_parse(n, sizeof(*t), tca, TCA_MAX, NULL, extack);
+ if (err < 0)
+ return err;
+
+ t = nlmsg_data(n);
+ protocol = TC_H_MIN(t->tcm_info);
+ prio = TC_H_MAJ(t->tcm_info);
+ parent = t->tcm_parent;
+
+ if (prio == 0) {
+ NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
+ return -ENOENT;
+ }
+
+ /* Find head of filter chain. */
+
+ block = tcf_block_find(net, &q, &parent, &cl,
+ t->tcm_ifindex, t->tcm_block_index, extack);
+ if (IS_ERR(block)) {
+ err = PTR_ERR(block);
+ goto errout;
+ }
+
+ chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
+ if (chain_index > TC_ACT_EXT_VAL_MASK) {
+ NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
+ err = -EINVAL;
+ goto errout;
+ }
+ chain = tcf_chain_get(block, chain_index, false);
+ if (!chain) {
+ NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ tp = tcf_chain_tp_find(chain, &chain_info, protocol,
+ prio, false);
+ if (!tp || IS_ERR(tp)) {
+ NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
+ err = tp ? PTR_ERR(tp) : -ENOENT;
+ goto errout;
+ } else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
+ NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
+ err = -EINVAL;
+ goto errout;
+ }
+
+ fh = tp->ops->get(tp, t->tcm_handle);
+
+ if (!fh) {
+ NL_SET_ERR_MSG(extack, "Specified filter handle not found");
+ err = -ENOENT;
+ } else {
+ err = tfilter_notify(net, skb, n, tp, block, q, parent,
+ fh, RTM_NEWTFILTER, true);
+ if (err < 0)
+ NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
+ }
+
+errout:
+ if (chain)
+ tcf_chain_put(chain);
+ return err;
+}
+
struct tcf_dump_args {
struct tcf_walker w;
struct sk_buff *skb;
@@ -1581,21 +1725,31 @@ static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts,
int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts,
enum tc_setup_type type, void *type_data, bool err_stop)
{
- int ok_count;
+ int ok_count = 0;
int ret;
- ret = tcf_block_cb_call(block, type, type_data, err_stop);
- if (ret < 0)
- return ret;
- ok_count = ret;
+ if (!block->nooffloaddevcnt) {
+ ret = tcf_block_cb_call(block, type, type_data, err_stop);
+ if (ret < 0)
+ return ret;
+ ok_count = ret;
+ }
if (!exts || ok_count)
- return ok_count;
+ goto skip_egress;
+
ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop);
if (ret < 0)
return ret;
ok_count += ret;
+skip_egress:
+ /* if one of the netdevs sharing this block are not offload-capable
+ * make sure we succeeded in egress instead.
+ */
+ if (block->nooffloaddevcnt && !ok_count && err_stop)
+ return -EOPNOTSUPP;
+
return ok_count;
}
EXPORT_SYMBOL(tc_setup_cb_call);
@@ -1634,9 +1788,9 @@ static int __init tc_filter_init(void)
if (err)
goto err_register_pernet_subsys;
- rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0);
- rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter,
+ rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL, 0);
+ rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
tc_dump_tfilter, 0);
return 0;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 4e74508515f4..2b5be42a9f1c 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -326,6 +326,8 @@ static void fl_destroy_sleepable(struct work_struct *work)
struct cls_fl_head *head = container_of(to_rcu_work(work),
struct cls_fl_head,
rwork);
+
+ rhashtable_destroy(&head->ht);
kfree(head);
module_put(THIS_MODULE);
}
@@ -875,7 +877,7 @@ static int fl_check_assign_mask(struct cls_fl_head *head,
return PTR_ERR(newmask);
fnew->mask = newmask;
- } else if (fold && fold->mask == fnew->mask) {
+ } else if (fold && fold->mask != fnew->mask) {
return -EINVAL;
}
@@ -1028,7 +1030,7 @@ errout_mask:
fl_mask_put(head, fnew->mask, false);
errout_idr:
- if (fnew->handle)
+ if (!fold)
idr_remove(&head->handle_idr, fnew->handle);
errout:
tcf_exts_destroy(&fnew->exts);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 760ab1b09f8b..69078c82963e 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -346,9 +346,6 @@ bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
return false;
}
- if (ret && netif_xmit_frozen_or_stopped(txq))
- return false;
-
return true;
}
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index f062a18e9162..d6b8ae4ed7a3 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -16,6 +16,7 @@
#include <linux/errno.h>
#include <linux/skbuff.h>
#include <net/netlink.h>
+#include <net/pkt_cls.h>
#include <net/pkt_sched.h>
#include <net/sch_generic.h>
@@ -23,12 +24,44 @@ struct mq_sched {
struct Qdisc **qdiscs;
};
+static int mq_offload(struct Qdisc *sch, enum tc_mq_command cmd)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_mq_qopt_offload opt = {
+ .command = cmd,
+ .handle = sch->handle,
+ };
+
+ if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
+ return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt);
+}
+
+static void mq_offload_stats(struct Qdisc *sch)
+{
+ struct net_device *dev = qdisc_dev(sch);
+ struct tc_mq_qopt_offload opt = {
+ .command = TC_MQ_STATS,
+ .handle = sch->handle,
+ .stats = {
+ .bstats = &sch->bstats,
+ .qstats = &sch->qstats,
+ },
+ };
+
+ if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc)
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQ, &opt);
+}
+
static void mq_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
struct mq_sched *priv = qdisc_priv(sch);
unsigned int ntx;
+ mq_offload(sch, TC_MQ_DESTROY);
+
if (!priv->qdiscs)
return;
for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
@@ -70,6 +103,8 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt,
}
sch->flags |= TCQ_F_MQROOT;
+
+ mq_offload(sch, TC_MQ_CREATE);
return 0;
}
@@ -127,6 +162,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
sch->q.qlen += qdisc->q.qlen;
sch->bstats.bytes += qdisc->bstats.bytes;
sch->bstats.packets += qdisc->bstats.packets;
+ sch->qstats.qlen += qdisc->qstats.qlen;
sch->qstats.backlog += qdisc->qstats.backlog;
sch->qstats.drops += qdisc->qstats.drops;
sch->qstats.requeues += qdisc->qstats.requeues;
@@ -135,6 +171,7 @@ static int mq_dump(struct Qdisc *sch, struct sk_buff *skb)
spin_unlock_bh(qdisc_lock(qdisc));
}
+ mq_offload_stats(sch);
return 0;
}
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 4a95e260b674..445b7ef61677 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -637,7 +637,7 @@ unsigned long sctp_transport_timeout(struct sctp_transport *trans)
trans->state != SCTP_PF)
timeout += trans->hbinterval;
- return timeout;
+ return max_t(unsigned long, timeout, HZ / 5);
}
/* Reset transport variables to their initial values */
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 2c369d4bb1c1..973b4471b532 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1420,7 +1420,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
return rc;
if (optlen < sizeof(int))
- return rc;
+ return -EINVAL;
get_user(val, (int __user *)optval);
lock_sock(sk);
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 6358e5271070..ac09ca803296 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -50,7 +50,7 @@ config SUNRPC_DEBUG
config SUNRPC_XPRT_RDMA
tristate "RPC-over-RDMA transport"
- depends on SUNRPC && INFINIBAND_ADDR_TRANS
+ depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS
default SUNRPC && INFINIBAND
select SG_POOL
help
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 40b54cc64243..5f48251c1319 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1658,7 +1658,6 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len;
}
-out:
return &xdst0->u.dst;
put_states:
@@ -1667,8 +1666,8 @@ put_states:
free_dst:
if (xdst0)
dst_release_immediate(&xdst0->u.dst);
- xdst0 = ERR_PTR(err);
- goto out;
+
+ return ERR_PTR(err);
}
static int xfrm_expand_policies(const struct flowi *fl, u16 family,